aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ObsoleteFiles.inc3
-rw-r--r--crypto/openssl/CHANGES111
-rwxr-xr-xcrypto/openssl/Configure5
-rw-r--r--crypto/openssl/INSTALL29
-rw-r--r--crypto/openssl/NEWS19
-rw-r--r--crypto/openssl/README2
-rw-r--r--crypto/openssl/apps/apps.c9
-rw-r--r--crypto/openssl/apps/apps.h4
-rw-r--r--crypto/openssl/apps/ca.c4
-rw-r--r--crypto/openssl/apps/dgst.c4
-rw-r--r--crypto/openssl/apps/enc.c2
-rw-r--r--crypto/openssl/apps/ocsp.c6
-rw-r--r--crypto/openssl/apps/openssl.c3
-rw-r--r--crypto/openssl/apps/pkcs12.c2
-rw-r--r--crypto/openssl/apps/req.c16
-rw-r--r--crypto/openssl/apps/s_apps.h20
-rw-r--r--crypto/openssl/apps/s_cb.c3
-rw-r--r--crypto/openssl/apps/s_client.c2
-rw-r--r--crypto/openssl/apps/speed.c2
-rw-r--r--crypto/openssl/apps/storeutl.c4
-rwxr-xr-xcrypto/openssl/config8
-rwxr-xr-xcrypto/openssl/crypto/aes/asm/aes-586.pl3000
-rwxr-xr-xcrypto/openssl/crypto/aes/asm/aes-s390x.pl6
-rwxr-xr-xcrypto/openssl/crypto/aes/asm/aes-x86_64.pl2916
-rwxr-xr-xcrypto/openssl/crypto/aes/asm/bsaes-x86_64.pl3239
-rw-r--r--crypto/openssl/crypto/asn1/a_time.c47
-rw-r--r--crypto/openssl/crypto/asn1/a_type.c10
-rw-r--r--crypto/openssl/crypto/asn1/x_bignum.c19
-rw-r--r--crypto/openssl/crypto/bio/b_addr.c26
-rw-r--r--crypto/openssl/crypto/bio/bss_dgram.c4
-rw-r--r--crypto/openssl/crypto/bio/bss_file.c61
-rw-r--r--crypto/openssl/crypto/bio/bss_mem.c4
-rwxr-xr-xcrypto/openssl/crypto/bn/asm/mips.pl2
-rw-r--r--crypto/openssl/crypto/bn/bn_div.c4
-rw-r--r--crypto/openssl/crypto/bn/bn_lcl.h4
-rw-r--r--crypto/openssl/crypto/bn/bn_lib.c101
-rw-r--r--crypto/openssl/crypto/bn/bn_prime.c8
-rw-r--r--crypto/openssl/crypto/bn/bn_rand.c7
-rw-r--r--crypto/openssl/crypto/bn/bn_sqrt.c5
-rw-r--r--crypto/openssl/crypto/cms/cms_att.c136
-rw-r--r--crypto/openssl/crypto/cms/cms_env.c20
-rw-r--r--crypto/openssl/crypto/cms/cms_err.c5
-rw-r--r--crypto/openssl/crypto/cms/cms_lcl.h9
-rw-r--r--crypto/openssl/crypto/cms/cms_sd.c38
-rw-r--r--crypto/openssl/crypto/cms/cms_smime.c6
-rw-r--r--crypto/openssl/crypto/conf/conf_sap.c2
-rw-r--r--crypto/openssl/crypto/ctype.c8
-rw-r--r--crypto/openssl/crypto/dh/dh_check.c37
-rw-r--r--crypto/openssl/crypto/dh/dh_gen.c52
-rw-r--r--crypto/openssl/crypto/dh/dh_key.c13
-rw-r--r--crypto/openssl/crypto/dh/dh_lib.c6
-rw-r--r--crypto/openssl/crypto/dsa/dsa_ameth.c4
-rw-r--r--crypto/openssl/crypto/dsa/dsa_err.c4
-rw-r--r--crypto/openssl/crypto/dsa/dsa_ossl.c10
-rw-r--r--crypto/openssl/crypto/dso/dso_dlfcn.c5
-rwxr-xr-xcrypto/openssl/crypto/ec/asm/ecp_nistz256-sparcv9.pl3
-rwxr-xr-xcrypto/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl2
-rwxr-xr-xcrypto/openssl/crypto/ec/asm/x25519-ppc64.pl6
-rw-r--r--crypto/openssl/crypto/ec/ec_asn1.c73
-rw-r--r--crypto/openssl/crypto/ec/ec_curve.c114
-rw-r--r--crypto/openssl/crypto/ec/ec_lcl.h4
-rw-r--r--crypto/openssl/crypto/ec/ec_lib.c103
-rw-r--r--crypto/openssl/crypto/ec/ecdh_ossl.c2
-rw-r--r--crypto/openssl/crypto/ec/ecdsa_ossl.c16
-rw-r--r--crypto/openssl/crypto/ec/ecp_nistp224.c62
-rw-r--r--crypto/openssl/crypto/ec/ecp_nistp256.c63
-rw-r--r--crypto/openssl/crypto/ec/ecp_nistp521.c65
-rw-r--r--crypto/openssl/crypto/ec/ecp_nistputil.c26
-rw-r--r--crypto/openssl/crypto/ec/ecx_meth.c2
-rw-r--r--crypto/openssl/crypto/engine/eng_devcrypto.c2
-rw-r--r--crypto/openssl/crypto/engine/eng_openssl.c8
-rw-r--r--crypto/openssl/crypto/err/err.c16
-rw-r--r--crypto/openssl/crypto/err/openssl.txt12
-rw-r--r--crypto/openssl/crypto/evp/bio_ok.c6
-rw-r--r--crypto/openssl/crypto/evp/e_aes.c91
-rw-r--r--crypto/openssl/crypto/evp/e_aria.c13
-rw-r--r--crypto/openssl/crypto/evp/e_chacha20_poly1305.c7
-rw-r--r--crypto/openssl/crypto/evp/e_rc5.c6
-rw-r--r--crypto/openssl/crypto/evp/evp_err.c11
-rw-r--r--crypto/openssl/crypto/evp/evp_lib.c9
-rw-r--r--crypto/openssl/crypto/evp/m_sha3.c5
-rw-r--r--crypto/openssl/crypto/include/internal/ctype.h4
-rw-r--r--crypto/openssl/crypto/include/internal/rand_int.h6
-rw-r--r--crypto/openssl/crypto/include/internal/sm2err.h6
-rw-r--r--crypto/openssl/crypto/init.c3
-rw-r--r--crypto/openssl/crypto/lhash/lhash.c8
-rw-r--r--crypto/openssl/crypto/o_str.c2
-rw-r--r--crypto/openssl/crypto/pem/pvkfmt.c5
-rw-r--r--crypto/openssl/crypto/pkcs7/pk7_doit.c14
-rw-r--r--crypto/openssl/crypto/rand/drbg_lib.c13
-rw-r--r--crypto/openssl/crypto/rand/rand_err.c3
-rw-r--r--crypto/openssl/crypto/rand/rand_lcl.h46
-rw-r--r--crypto/openssl/crypto/rand/rand_lib.c148
-rw-r--r--crypto/openssl/crypto/rand/rand_unix.c185
-rw-r--r--crypto/openssl/crypto/rsa/rsa_ameth.c13
-rw-r--r--crypto/openssl/crypto/rsa/rsa_err.c4
-rw-r--r--crypto/openssl/crypto/rsa/rsa_gen.c2
-rw-r--r--crypto/openssl/crypto/rsa/rsa_lib.c17
-rw-r--r--crypto/openssl/crypto/rsa/rsa_ossl.c10
-rw-r--r--crypto/openssl/crypto/s390xcap.c12
-rwxr-xr-xcrypto/openssl/crypto/sha/asm/keccak1600-armv4.pl224
-rwxr-xr-xcrypto/openssl/crypto/sha/asm/keccak1600-armv8.pl4
-rwxr-xr-xcrypto/openssl/crypto/sha/asm/sha512-sparcv9.pl4
-rw-r--r--crypto/openssl/crypto/sm2/sm2_sign.c6
-rw-r--r--crypto/openssl/crypto/store/loader_file.c18
-rw-r--r--crypto/openssl/crypto/store/store_lib.c4
-rw-r--r--crypto/openssl/crypto/threads_none.c15
-rw-r--r--crypto/openssl/crypto/threads_pthread.c12
-rw-r--r--crypto/openssl/crypto/ui/ui_lib.c4
-rw-r--r--crypto/openssl/crypto/ui/ui_openssl.c2
-rw-r--r--crypto/openssl/crypto/uid.c12
-rw-r--r--crypto/openssl/crypto/whrlpool/wp_block.c3
-rw-r--r--crypto/openssl/crypto/x509/by_dir.c6
-rw-r--r--crypto/openssl/crypto/x509/t_req.c6
-rw-r--r--crypto/openssl/crypto/x509/x509_att.c6
-rw-r--r--crypto/openssl/crypto/x509/x509_cmp.c12
-rw-r--r--crypto/openssl/crypto/x509/x509_err.c4
-rw-r--r--crypto/openssl/crypto/x509/x509_lu.c113
-rw-r--r--crypto/openssl/crypto/x509/x509_vfy.c12
-rw-r--r--crypto/openssl/crypto/x509v3/v3_alt.c11
-rw-r--r--crypto/openssl/crypto/x509v3/v3_purp.c16
-rw-r--r--crypto/openssl/doc/HOWTO/proxy_certificates.txt2
-rw-r--r--crypto/openssl/doc/man1/engine.pod4
-rw-r--r--crypto/openssl/doc/man1/errstr.pod4
-rw-r--r--crypto/openssl/doc/man1/pkcs12.pod3
-rw-r--r--crypto/openssl/doc/man1/pkeyparam.pod4
-rw-r--r--crypto/openssl/doc/man1/s_client.pod2
-rw-r--r--crypto/openssl/doc/man1/s_server.pod2
-rw-r--r--crypto/openssl/doc/man3/ADMISSIONS.pod4
-rw-r--r--crypto/openssl/doc/man3/ASYNC_start_job.pod4
-rw-r--r--crypto/openssl/doc/man3/BIO_connect.pod2
-rw-r--r--crypto/openssl/doc/man3/BIO_f_ssl.pod38
-rw-r--r--crypto/openssl/doc/man3/BIO_find_type.pod4
-rw-r--r--crypto/openssl/doc/man3/BIO_new.pod4
-rw-r--r--crypto/openssl/doc/man3/BIO_s_accept.pod4
-rw-r--r--crypto/openssl/doc/man3/BIO_s_bio.pod6
-rw-r--r--crypto/openssl/doc/man3/BIO_s_connect.pod4
-rw-r--r--crypto/openssl/doc/man3/BIO_s_fd.pod4
-rw-r--r--crypto/openssl/doc/man3/BIO_s_mem.pod48
-rw-r--r--crypto/openssl/doc/man3/BIO_set_callback.pod12
-rw-r--r--crypto/openssl/doc/man3/BN_generate_prime.pod14
-rw-r--r--crypto/openssl/doc/man3/BN_mod_mul_montgomery.pod4
-rw-r--r--crypto/openssl/doc/man3/BN_new.pod4
-rw-r--r--crypto/openssl/doc/man3/CMS_final.pod4
-rw-r--r--crypto/openssl/doc/man3/CRYPTO_THREAD_run_once.pod4
-rw-r--r--crypto/openssl/doc/man3/CRYPTO_memcmp.pod39
-rw-r--r--crypto/openssl/doc/man3/DES_random_key.pod8
-rw-r--r--crypto/openssl/doc/man3/DSA_generate_key.pod6
-rw-r--r--crypto/openssl/doc/man3/DSA_sign.pod9
-rw-r--r--crypto/openssl/doc/man3/ECDSA_SIG_new.pod25
-rw-r--r--crypto/openssl/doc/man3/EVP_DigestInit.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_DigestSignInit.pod12
-rw-r--r--crypto/openssl/doc/man3/EVP_DigestVerifyInit.pod10
-rw-r--r--crypto/openssl/doc/man3/EVP_PKEY_CTX_set_hkdf_md.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_PKEY_CTX_set_tls1_prf_md.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_PKEY_decrypt.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_PKEY_derive.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_PKEY_encrypt.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_PKEY_sign.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_PKEY_verify.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_PKEY_verify_recover.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_SealInit.pod9
-rw-r--r--crypto/openssl/doc/man3/EVP_SignInit.pod9
-rw-r--r--crypto/openssl/doc/man3/EVP_VerifyInit.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_aria.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_md5.pod4
-rw-r--r--crypto/openssl/doc/man3/EVP_rc5_32_12_16_cbc.pod27
-rw-r--r--crypto/openssl/doc/man3/OCSP_REQUEST_new.pod4
-rw-r--r--crypto/openssl/doc/man3/OPENSSL_fork_prepare.pod6
-rw-r--r--crypto/openssl/doc/man3/OSSL_STORE_LOADER.pod4
-rw-r--r--crypto/openssl/doc/man3/OSSL_STORE_expect.pod4
-rw-r--r--crypto/openssl/doc/man3/PKCS12_newpass.pod4
-rw-r--r--crypto/openssl/doc/man3/RAND_DRBG_set_callbacks.pod2
-rw-r--r--crypto/openssl/doc/man3/RAND_set_rand_method.pod9
-rw-r--r--crypto/openssl/doc/man3/RSA_blinding_on.pod5
-rw-r--r--crypto/openssl/doc/man3/RSA_generate_key.pod15
-rw-r--r--crypto/openssl/doc/man3/RSA_padding_add_PKCS1_type_1.pod7
-rw-r--r--crypto/openssl/doc/man3/RSA_public_encrypt.pod2
-rw-r--r--crypto/openssl/doc/man3/RSA_sign_ASN1_OCTET_STRING.pod10
-rw-r--r--crypto/openssl/doc/man3/SSL_CTX_config.pod4
-rw-r--r--crypto/openssl/doc/man3/SSL_CTX_dane_enable.pod4
-rw-r--r--crypto/openssl/doc/man3/SSL_CTX_get0_param.pod16
-rw-r--r--crypto/openssl/doc/man3/SSL_CTX_new.pod19
-rw-r--r--crypto/openssl/doc/man3/SSL_CTX_set_cipher_list.pod4
-rw-r--r--crypto/openssl/doc/man3/SSL_CTX_set_generate_session_id.pod4
-rw-r--r--crypto/openssl/doc/man3/SSL_CTX_set_session_id_context.pod6
-rw-r--r--crypto/openssl/doc/man3/SSL_CTX_set_verify.pod12
-rw-r--r--crypto/openssl/doc/man3/SSL_SESSION_get0_hostname.pod4
-rw-r--r--crypto/openssl/doc/man3/SSL_get_error.pod6
-rw-r--r--crypto/openssl/doc/man3/SSL_library_init.pod4
-rw-r--r--crypto/openssl/doc/man3/SSL_set1_host.pod4
-rw-r--r--crypto/openssl/doc/man3/SSL_write.pod2
-rw-r--r--crypto/openssl/doc/man3/X509_STORE_CTX_get_error.pod10
-rw-r--r--crypto/openssl/doc/man3/X509_STORE_CTX_set_verify_cb.pod4
-rw-r--r--crypto/openssl/doc/man3/X509_STORE_add_cert.pod6
-rw-r--r--crypto/openssl/doc/man3/X509_STORE_new.pod4
-rw-r--r--crypto/openssl/doc/man3/X509_VERIFY_PARAM_set_flags.pod4
-rw-r--r--crypto/openssl/doc/man3/X509_cmp.pod80
-rw-r--r--crypto/openssl/doc/man3/X509_get_extension_flags.pod14
-rw-r--r--crypto/openssl/doc/man3/d2i_X509.pod20
-rw-r--r--crypto/openssl/doc/man5/x509v3_config.pod5
-rw-r--r--crypto/openssl/doc/man7/Ed25519.pod4
-rw-r--r--crypto/openssl/doc/man7/RAND.pod8
-rw-r--r--crypto/openssl/doc/man7/SM2.pod4
-rw-r--r--crypto/openssl/doc/man7/X25519.pod4
-rw-r--r--crypto/openssl/doc/man7/bio.pod4
-rw-r--r--crypto/openssl/doc/man7/scrypt.pod4
-rw-r--r--crypto/openssl/e_os.h30
-rw-r--r--crypto/openssl/engines/build.info15
-rw-r--r--crypto/openssl/engines/e_afalg.c16
-rw-r--r--crypto/openssl/include/internal/constant_time_locl.h64
-rw-r--r--crypto/openssl/include/internal/cryptlib.h1
-rw-r--r--crypto/openssl/include/internal/dsoerr.h4
-rw-r--r--crypto/openssl/include/internal/refcount.h2
-rw-r--r--crypto/openssl/include/internal/thread_once.h2
-rw-r--r--crypto/openssl/include/internal/tsan_assist.h2
-rw-r--r--crypto/openssl/include/openssl/asn1err.h6
-rw-r--r--crypto/openssl/include/openssl/asyncerr.h6
-rw-r--r--crypto/openssl/include/openssl/bio.h6
-rw-r--r--crypto/openssl/include/openssl/bioerr.h6
-rw-r--r--crypto/openssl/include/openssl/bnerr.h6
-rw-r--r--crypto/openssl/include/openssl/buffererr.h6
-rw-r--r--crypto/openssl/include/openssl/cms.h5
-rw-r--r--crypto/openssl/include/openssl/cmserr.h8
-rw-r--r--crypto/openssl/include/openssl/comperr.h6
-rw-r--r--crypto/openssl/include/openssl/conferr.h6
-rw-r--r--crypto/openssl/include/openssl/cryptoerr.h9
-rw-r--r--crypto/openssl/include/openssl/cterr.h6
-rw-r--r--crypto/openssl/include/openssl/dherr.h6
-rw-r--r--crypto/openssl/include/openssl/dsaerr.h7
-rw-r--r--crypto/openssl/include/openssl/ec.h7
-rw-r--r--crypto/openssl/include/openssl/ecerr.h4
-rw-r--r--crypto/openssl/include/openssl/engineerr.h6
-rw-r--r--crypto/openssl/include/openssl/evp.h4
-rw-r--r--crypto/openssl/include/openssl/evperr.h12
-rw-r--r--crypto/openssl/include/openssl/kdferr.h6
-rw-r--r--crypto/openssl/include/openssl/objectserr.h6
-rw-r--r--crypto/openssl/include/openssl/ocsperr.h6
-rw-r--r--crypto/openssl/include/openssl/opensslv.h4
-rw-r--r--crypto/openssl/include/openssl/pemerr.h6
-rw-r--r--crypto/openssl/include/openssl/pkcs12err.h6
-rw-r--r--crypto/openssl/include/openssl/pkcs7err.h6
-rw-r--r--crypto/openssl/include/openssl/randerr.h5
-rw-r--r--crypto/openssl/include/openssl/rsaerr.h7
-rw-r--r--crypto/openssl/include/openssl/ssl.h60
-rw-r--r--crypto/openssl/include/openssl/sslerr.h4
-rw-r--r--crypto/openssl/include/openssl/store.h6
-rw-r--r--crypto/openssl/include/openssl/storeerr.h6
-rw-r--r--crypto/openssl/include/openssl/tls1.h4
-rw-r--r--crypto/openssl/include/openssl/tserr.h6
-rw-r--r--crypto/openssl/include/openssl/uierr.h6
-rw-r--r--crypto/openssl/include/openssl/x509err.h7
-rw-r--r--crypto/openssl/include/openssl/x509v3.h2
-rw-r--r--crypto/openssl/include/openssl/x509v3err.h6
-rw-r--r--crypto/openssl/ssl/d1_msg.c5
-rw-r--r--crypto/openssl/ssl/record/rec_layer_s3.c12
-rw-r--r--crypto/openssl/ssl/s3_lib.c14
-rw-r--r--crypto/openssl/ssl/ssl_cert.c5
-rw-r--r--crypto/openssl/ssl/ssl_ciph.c13
-rw-r--r--crypto/openssl/ssl/ssl_lib.c20
-rw-r--r--crypto/openssl/ssl/ssl_locl.h37
-rw-r--r--crypto/openssl/ssl/ssl_sess.c39
-rw-r--r--crypto/openssl/ssl/statem/extensions.c21
-rw-r--r--crypto/openssl/ssl/statem/extensions_clnt.c16
-rw-r--r--crypto/openssl/ssl/statem/extensions_srvr.c20
-rw-r--r--crypto/openssl/ssl/statem/statem_clnt.c6
-rw-r--r--crypto/openssl/ssl/statem/statem_lib.c19
-rw-r--r--crypto/openssl/ssl/statem/statem_srvr.c54
-rw-r--r--crypto/openssl/ssl/t1_lib.c275
-rw-r--r--crypto/openssl/ssl/tls13_enc.c53
-rw-r--r--secure/lib/libcrypto/Makefile14
-rw-r--r--secure/lib/libcrypto/Makefile.asm7
-rw-r--r--secure/lib/libcrypto/Makefile.inc10
-rw-r--r--secure/lib/libcrypto/Makefile.man14
-rw-r--r--secure/lib/libcrypto/Version.map8
-rw-r--r--secure/lib/libcrypto/aarch64/keccak1600-armv8.S26
-rw-r--r--secure/lib/libcrypto/amd64/aes-x86_64.S2651
-rw-r--r--secure/lib/libcrypto/amd64/bsaes-x86_64.S2597
-rw-r--r--secure/lib/libcrypto/arm/keccak1600-armv4.S222
-rw-r--r--secure/lib/libcrypto/i386/aes-586.S6491
-rw-r--r--secure/lib/libcrypto/man/ADMISSIONS.38
-rw-r--r--secure/lib/libcrypto/man/ASN1_INTEGER_get_int64.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_ITEM_lookup.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_OBJECT_new.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_STRING_TABLE_add.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_STRING_length.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_STRING_new.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_STRING_print_ex.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_TIME_set.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_TYPE_get.34
-rw-r--r--secure/lib/libcrypto/man/ASN1_generate_nconf.34
-rw-r--r--secure/lib/libcrypto/man/ASYNC_WAIT_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/ASYNC_start_job.310
-rw-r--r--secure/lib/libcrypto/man/BF_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/BIO_ADDR.34
-rw-r--r--secure/lib/libcrypto/man/BIO_ADDRINFO.34
-rw-r--r--secure/lib/libcrypto/man/BIO_connect.36
-rw-r--r--secure/lib/libcrypto/man/BIO_ctrl.34
-rw-r--r--secure/lib/libcrypto/man/BIO_f_base64.34
-rw-r--r--secure/lib/libcrypto/man/BIO_f_buffer.34
-rw-r--r--secure/lib/libcrypto/man/BIO_f_cipher.34
-rw-r--r--secure/lib/libcrypto/man/BIO_f_md.34
-rw-r--r--secure/lib/libcrypto/man/BIO_f_null.34
-rw-r--r--secure/lib/libcrypto/man/BIO_f_ssl.342
-rw-r--r--secure/lib/libcrypto/man/BIO_find_type.310
-rw-r--r--secure/lib/libcrypto/man/BIO_get_data.34
-rw-r--r--secure/lib/libcrypto/man/BIO_get_ex_new_index.34
-rw-r--r--secure/lib/libcrypto/man/BIO_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/BIO_new.310
-rw-r--r--secure/lib/libcrypto/man/BIO_new_CMS.34
-rw-r--r--secure/lib/libcrypto/man/BIO_parse_hostserv.34
-rw-r--r--secure/lib/libcrypto/man/BIO_printf.34
-rw-r--r--secure/lib/libcrypto/man/BIO_push.34
-rw-r--r--secure/lib/libcrypto/man/BIO_read.34
-rw-r--r--secure/lib/libcrypto/man/BIO_s_accept.310
-rw-r--r--secure/lib/libcrypto/man/BIO_s_bio.314
-rw-r--r--secure/lib/libcrypto/man/BIO_s_connect.310
-rw-r--r--secure/lib/libcrypto/man/BIO_s_fd.310
-rw-r--r--secure/lib/libcrypto/man/BIO_s_file.34
-rw-r--r--secure/lib/libcrypto/man/BIO_s_mem.353
-rw-r--r--secure/lib/libcrypto/man/BIO_s_null.34
-rw-r--r--secure/lib/libcrypto/man/BIO_s_socket.34
-rw-r--r--secure/lib/libcrypto/man/BIO_set_callback.314
-rw-r--r--secure/lib/libcrypto/man/BIO_should_retry.34
-rw-r--r--secure/lib/libcrypto/man/BN_BLINDING_new.34
-rw-r--r--secure/lib/libcrypto/man/BN_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/BN_CTX_start.34
-rw-r--r--secure/lib/libcrypto/man/BN_add.34
-rw-r--r--secure/lib/libcrypto/man/BN_add_word.34
-rw-r--r--secure/lib/libcrypto/man/BN_bn2bin.34
-rw-r--r--secure/lib/libcrypto/man/BN_cmp.34
-rw-r--r--secure/lib/libcrypto/man/BN_copy.34
-rw-r--r--secure/lib/libcrypto/man/BN_generate_prime.318
-rw-r--r--secure/lib/libcrypto/man/BN_mod_inverse.34
-rw-r--r--secure/lib/libcrypto/man/BN_mod_mul_montgomery.310
-rw-r--r--secure/lib/libcrypto/man/BN_mod_mul_reciprocal.34
-rw-r--r--secure/lib/libcrypto/man/BN_new.36
-rw-r--r--secure/lib/libcrypto/man/BN_num_bytes.34
-rw-r--r--secure/lib/libcrypto/man/BN_rand.34
-rw-r--r--secure/lib/libcrypto/man/BN_security_bits.34
-rw-r--r--secure/lib/libcrypto/man/BN_set_bit.34
-rw-r--r--secure/lib/libcrypto/man/BN_swap.34
-rw-r--r--secure/lib/libcrypto/man/BN_zero.34
-rw-r--r--secure/lib/libcrypto/man/BUF_MEM_new.34
-rw-r--r--secure/lib/libcrypto/man/CMS_add0_cert.34
-rw-r--r--secure/lib/libcrypto/man/CMS_add1_recipient_cert.34
-rw-r--r--secure/lib/libcrypto/man/CMS_add1_signer.34
-rw-r--r--secure/lib/libcrypto/man/CMS_compress.34
-rw-r--r--secure/lib/libcrypto/man/CMS_decrypt.34
-rw-r--r--secure/lib/libcrypto/man/CMS_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/CMS_final.38
-rw-r--r--secure/lib/libcrypto/man/CMS_get0_RecipientInfos.34
-rw-r--r--secure/lib/libcrypto/man/CMS_get0_SignerInfos.34
-rw-r--r--secure/lib/libcrypto/man/CMS_get0_type.34
-rw-r--r--secure/lib/libcrypto/man/CMS_get1_ReceiptRequest.34
-rw-r--r--secure/lib/libcrypto/man/CMS_sign.34
-rw-r--r--secure/lib/libcrypto/man/CMS_sign_receipt.34
-rw-r--r--secure/lib/libcrypto/man/CMS_uncompress.34
-rw-r--r--secure/lib/libcrypto/man/CMS_verify.34
-rw-r--r--secure/lib/libcrypto/man/CMS_verify_receipt.34
-rw-r--r--secure/lib/libcrypto/man/CONF_modules_free.34
-rw-r--r--secure/lib/libcrypto/man/CONF_modules_load_file.34
-rw-r--r--secure/lib/libcrypto/man/CRYPTO_THREAD_run_once.310
-rw-r--r--secure/lib/libcrypto/man/CRYPTO_get_ex_new_index.34
-rw-r--r--secure/lib/libcrypto/man/CRYPTO_memcmp.3171
-rw-r--r--secure/lib/libcrypto/man/CTLOG_STORE_get0_log_by_id.34
-rw-r--r--secure/lib/libcrypto/man/CTLOG_STORE_new.34
-rw-r--r--secure/lib/libcrypto/man/CTLOG_new.34
-rw-r--r--secure/lib/libcrypto/man/CT_POLICY_EVAL_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/DEFINE_STACK_OF.34
-rw-r--r--secure/lib/libcrypto/man/DES_random_key.312
-rw-r--r--secure/lib/libcrypto/man/DH_generate_key.34
-rw-r--r--secure/lib/libcrypto/man/DH_generate_parameters.34
-rw-r--r--secure/lib/libcrypto/man/DH_get0_pqg.34
-rw-r--r--secure/lib/libcrypto/man/DH_get_1024_160.34
-rw-r--r--secure/lib/libcrypto/man/DH_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/DH_new.34
-rw-r--r--secure/lib/libcrypto/man/DH_new_by_nid.34
-rw-r--r--secure/lib/libcrypto/man/DH_set_method.34
-rw-r--r--secure/lib/libcrypto/man/DH_size.34
-rw-r--r--secure/lib/libcrypto/man/DSA_SIG_new.34
-rw-r--r--secure/lib/libcrypto/man/DSA_do_sign.34
-rw-r--r--secure/lib/libcrypto/man/DSA_dup_DH.34
-rw-r--r--secure/lib/libcrypto/man/DSA_generate_key.310
-rw-r--r--secure/lib/libcrypto/man/DSA_generate_parameters.34
-rw-r--r--secure/lib/libcrypto/man/DSA_get0_pqg.34
-rw-r--r--secure/lib/libcrypto/man/DSA_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/DSA_new.34
-rw-r--r--secure/lib/libcrypto/man/DSA_set_method.34
-rw-r--r--secure/lib/libcrypto/man/DSA_sign.313
-rw-r--r--secure/lib/libcrypto/man/DSA_size.34
-rw-r--r--secure/lib/libcrypto/man/DTLS_get_data_mtu.34
-rw-r--r--secure/lib/libcrypto/man/DTLS_set_timer_cb.34
-rw-r--r--secure/lib/libcrypto/man/DTLSv1_listen.34
-rw-r--r--secure/lib/libcrypto/man/ECDSA_SIG_new.323
-rw-r--r--secure/lib/libcrypto/man/ECPKParameters_print.34
-rw-r--r--secure/lib/libcrypto/man/EC_GFp_simple_method.34
-rw-r--r--secure/lib/libcrypto/man/EC_GROUP_copy.34
-rw-r--r--secure/lib/libcrypto/man/EC_GROUP_new.34
-rw-r--r--secure/lib/libcrypto/man/EC_KEY_get_enc_flags.34
-rw-r--r--secure/lib/libcrypto/man/EC_KEY_new.34
-rw-r--r--secure/lib/libcrypto/man/EC_POINT_add.34
-rw-r--r--secure/lib/libcrypto/man/EC_POINT_new.34
-rw-r--r--secure/lib/libcrypto/man/ENGINE_add.34
-rw-r--r--secure/lib/libcrypto/man/ERR_GET_LIB.34
-rw-r--r--secure/lib/libcrypto/man/ERR_clear_error.34
-rw-r--r--secure/lib/libcrypto/man/ERR_error_string.34
-rw-r--r--secure/lib/libcrypto/man/ERR_get_error.34
-rw-r--r--secure/lib/libcrypto/man/ERR_load_crypto_strings.34
-rw-r--r--secure/lib/libcrypto/man/ERR_load_strings.34
-rw-r--r--secure/lib/libcrypto/man/ERR_print_errors.34
-rw-r--r--secure/lib/libcrypto/man/ERR_put_error.34
-rw-r--r--secure/lib/libcrypto/man/ERR_remove_state.34
-rw-r--r--secure/lib/libcrypto/man/ERR_set_mark.34
-rw-r--r--secure/lib/libcrypto/man/EVP_BytesToKey.34
-rw-r--r--secure/lib/libcrypto/man/EVP_CIPHER_CTX_get_cipher_data.34
-rw-r--r--secure/lib/libcrypto/man/EVP_CIPHER_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/EVP_DigestInit.310
-rw-r--r--secure/lib/libcrypto/man/EVP_DigestSignInit.316
-rw-r--r--secure/lib/libcrypto/man/EVP_DigestVerifyInit.314
-rw-r--r--secure/lib/libcrypto/man/EVP_EncodeInit.34
-rw-r--r--secure/lib/libcrypto/man/EVP_EncryptInit.34
-rw-r--r--secure/lib/libcrypto/man/EVP_MD_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/EVP_OpenInit.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_ASN1_METHOD.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_ctrl.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_set1_pbe_pass.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_set_hkdf_md.310
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_set_rsa_pss_keygen_md.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_set_scrypt_N.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_set_tls1_prf_md.310
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_asn1_get_count.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_cmp.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_decrypt.310
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_derive.310
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_encrypt.310
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_get_default_digest_nid.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_keygen.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_meth_get_count.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_new.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_print_private.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_set1_RSA.34
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_sign.310
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_verify.310
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_verify_recover.310
-rw-r--r--secure/lib/libcrypto/man/EVP_SealInit.313
-rw-r--r--secure/lib/libcrypto/man/EVP_SignInit.313
-rw-r--r--secure/lib/libcrypto/man/EVP_VerifyInit.38
-rw-r--r--secure/lib/libcrypto/man/EVP_aes.34
-rw-r--r--secure/lib/libcrypto/man/EVP_aria.38
-rw-r--r--secure/lib/libcrypto/man/EVP_bf_cbc.34
-rw-r--r--secure/lib/libcrypto/man/EVP_blake2b512.34
-rw-r--r--secure/lib/libcrypto/man/EVP_camellia.34
-rw-r--r--secure/lib/libcrypto/man/EVP_cast5_cbc.34
-rw-r--r--secure/lib/libcrypto/man/EVP_chacha20.34
-rw-r--r--secure/lib/libcrypto/man/EVP_des.34
-rw-r--r--secure/lib/libcrypto/man/EVP_desx_cbc.34
-rw-r--r--secure/lib/libcrypto/man/EVP_idea_cbc.34
-rw-r--r--secure/lib/libcrypto/man/EVP_md2.34
-rw-r--r--secure/lib/libcrypto/man/EVP_md4.34
-rw-r--r--secure/lib/libcrypto/man/EVP_md5.38
-rw-r--r--secure/lib/libcrypto/man/EVP_mdc2.34
-rw-r--r--secure/lib/libcrypto/man/EVP_rc2_cbc.34
-rw-r--r--secure/lib/libcrypto/man/EVP_rc4.34
-rw-r--r--secure/lib/libcrypto/man/EVP_rc5_32_12_16_cbc.329
-rw-r--r--secure/lib/libcrypto/man/EVP_ripemd160.34
-rw-r--r--secure/lib/libcrypto/man/EVP_seed_cbc.34
-rw-r--r--secure/lib/libcrypto/man/EVP_sha1.34
-rw-r--r--secure/lib/libcrypto/man/EVP_sha224.34
-rw-r--r--secure/lib/libcrypto/man/EVP_sha3_224.34
-rw-r--r--secure/lib/libcrypto/man/EVP_sm3.34
-rw-r--r--secure/lib/libcrypto/man/EVP_sm4_cbc.34
-rw-r--r--secure/lib/libcrypto/man/EVP_whirlpool.34
-rw-r--r--secure/lib/libcrypto/man/HMAC.34
-rw-r--r--secure/lib/libcrypto/man/MD5.34
-rw-r--r--secure/lib/libcrypto/man/MDC2_Init.34
-rw-r--r--secure/lib/libcrypto/man/OBJ_nid2obj.34
-rw-r--r--secure/lib/libcrypto/man/OCSP_REQUEST_new.310
-rw-r--r--secure/lib/libcrypto/man/OCSP_cert_to_id.34
-rw-r--r--secure/lib/libcrypto/man/OCSP_request_add1_nonce.34
-rw-r--r--secure/lib/libcrypto/man/OCSP_resp_find_status.34
-rw-r--r--secure/lib/libcrypto/man/OCSP_response_status.34
-rw-r--r--secure/lib/libcrypto/man/OCSP_sendreq_new.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_Applink.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_LH_COMPFUNC.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_LH_stats.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_VERSION_NUMBER.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_config.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_fork_prepare.310
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_ia32cap.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_init_crypto.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_init_ssl.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_instrument_bus.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_load_builtin_modules.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_malloc.34
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_secure_malloc.34
-rw-r--r--secure/lib/libcrypto/man/OSSL_STORE_INFO.34
-rw-r--r--secure/lib/libcrypto/man/OSSL_STORE_LOADER.38
-rw-r--r--secure/lib/libcrypto/man/OSSL_STORE_SEARCH.34
-rw-r--r--secure/lib/libcrypto/man/OSSL_STORE_expect.38
-rw-r--r--secure/lib/libcrypto/man/OSSL_STORE_open.34
-rw-r--r--secure/lib/libcrypto/man/OpenSSL_add_all_algorithms.34
-rw-r--r--secure/lib/libcrypto/man/PEM_bytes_read_bio.34
-rw-r--r--secure/lib/libcrypto/man/PEM_read.34
-rw-r--r--secure/lib/libcrypto/man/PEM_read_CMS.34
-rw-r--r--secure/lib/libcrypto/man/PEM_read_bio_PrivateKey.34
-rw-r--r--secure/lib/libcrypto/man/PEM_read_bio_ex.34
-rw-r--r--secure/lib/libcrypto/man/PEM_write_bio_CMS_stream.34
-rw-r--r--secure/lib/libcrypto/man/PEM_write_bio_PKCS7_stream.34
-rw-r--r--secure/lib/libcrypto/man/PKCS12_create.34
-rw-r--r--secure/lib/libcrypto/man/PKCS12_newpass.310
-rw-r--r--secure/lib/libcrypto/man/PKCS12_parse.34
-rw-r--r--secure/lib/libcrypto/man/PKCS5_PBKDF2_HMAC.34
-rw-r--r--secure/lib/libcrypto/man/PKCS7_decrypt.34
-rw-r--r--secure/lib/libcrypto/man/PKCS7_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/PKCS7_sign.34
-rw-r--r--secure/lib/libcrypto/man/PKCS7_sign_add_signer.34
-rw-r--r--secure/lib/libcrypto/man/PKCS7_verify.34
-rw-r--r--secure/lib/libcrypto/man/RAND_DRBG_generate.34
-rw-r--r--secure/lib/libcrypto/man/RAND_DRBG_get0_master.34
-rw-r--r--secure/lib/libcrypto/man/RAND_DRBG_new.34
-rw-r--r--secure/lib/libcrypto/man/RAND_DRBG_reseed.34
-rw-r--r--secure/lib/libcrypto/man/RAND_DRBG_set_callbacks.36
-rw-r--r--secure/lib/libcrypto/man/RAND_DRBG_set_ex_data.34
-rw-r--r--secure/lib/libcrypto/man/RAND_add.34
-rw-r--r--secure/lib/libcrypto/man/RAND_bytes.34
-rw-r--r--secure/lib/libcrypto/man/RAND_cleanup.34
-rw-r--r--secure/lib/libcrypto/man/RAND_egd.34
-rw-r--r--secure/lib/libcrypto/man/RAND_load_file.34
-rw-r--r--secure/lib/libcrypto/man/RAND_set_rand_method.313
-rw-r--r--secure/lib/libcrypto/man/RC4_set_key.34
-rw-r--r--secure/lib/libcrypto/man/RIPEMD160_Init.34
-rw-r--r--secure/lib/libcrypto/man/RSA_blinding_on.39
-rw-r--r--secure/lib/libcrypto/man/RSA_check_key.34
-rw-r--r--secure/lib/libcrypto/man/RSA_generate_key.319
-rw-r--r--secure/lib/libcrypto/man/RSA_get0_key.34
-rw-r--r--secure/lib/libcrypto/man/RSA_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/RSA_new.34
-rw-r--r--secure/lib/libcrypto/man/RSA_padding_add_PKCS1_type_1.313
-rw-r--r--secure/lib/libcrypto/man/RSA_print.34
-rw-r--r--secure/lib/libcrypto/man/RSA_private_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/RSA_public_encrypt.38
-rw-r--r--secure/lib/libcrypto/man/RSA_set_method.34
-rw-r--r--secure/lib/libcrypto/man/RSA_sign.34
-rw-r--r--secure/lib/libcrypto/man/RSA_sign_ASN1_OCTET_STRING.314
-rw-r--r--secure/lib/libcrypto/man/RSA_size.34
-rw-r--r--secure/lib/libcrypto/man/SCT_new.34
-rw-r--r--secure/lib/libcrypto/man/SCT_print.34
-rw-r--r--secure/lib/libcrypto/man/SCT_validate.34
-rw-r--r--secure/lib/libcrypto/man/SHA256_Init.34
-rw-r--r--secure/lib/libcrypto/man/SMIME_read_CMS.34
-rw-r--r--secure/lib/libcrypto/man/SMIME_read_PKCS7.34
-rw-r--r--secure/lib/libcrypto/man/SMIME_write_CMS.34
-rw-r--r--secure/lib/libcrypto/man/SMIME_write_PKCS7.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CIPHER_get_name.34
-rw-r--r--secure/lib/libcrypto/man/SSL_COMP_add_compression_method.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CONF_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CONF_CTX_set1_prefix.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CONF_CTX_set_flags.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CONF_CTX_set_ssl_ctx.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CONF_cmd.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CONF_cmd_argv.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_add1_chain_cert.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_add_extra_chain_cert.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_add_session.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_config.310
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_ctrl.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_dane_enable.310
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_flush_sessions.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_free.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_get0_param.322
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_get_verify_mode.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_has_client_custom_ext.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_load_verify_locations.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_new.323
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_sess_number.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_sess_set_cache_size.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_sess_set_get_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_sessions.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set0_CA_list.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set1_curves.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set1_sigalgs.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set1_verify_cert_store.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_alpn_select_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_cert_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_cert_store.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_cert_verify_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_cipher_list.38
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_client_cert_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_client_hello_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_ct_validation_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_ctlog_list_file.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_default_passwd_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_ex_data.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_generate_session_id.38
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_info_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_keylog_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_max_cert_list.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_min_proto_version.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_mode.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_msg_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_num_tickets.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_options.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_psk_client_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_quiet_shutdown.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_read_ahead.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_record_padding_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_security_level.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_session_cache_mode.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_session_id_context.310
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_session_ticket_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_split_send_fragment.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_ssl_version.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_stateless_cookie_generate_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_timeout.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_tlsext_servername_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_tlsext_status_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_tlsext_ticket_key_cb.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_tlsext_use_srtp.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_tmp_dh_callback.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_set_verify.316
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_use_certificate.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_use_psk_identity_hint.34
-rw-r--r--secure/lib/libcrypto/man/SSL_CTX_use_serverinfo.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_free.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_get0_cipher.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_get0_hostname.38
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_get0_id_context.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_get0_peer.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_get_compress_id.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_get_ex_data.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_get_protocol_version.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_get_time.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_has_ticket.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_is_resumable.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_print.34
-rw-r--r--secure/lib/libcrypto/man/SSL_SESSION_set1_id.34
-rw-r--r--secure/lib/libcrypto/man/SSL_accept.34
-rw-r--r--secure/lib/libcrypto/man/SSL_alert_type_string.34
-rw-r--r--secure/lib/libcrypto/man/SSL_alloc_buffers.34
-rw-r--r--secure/lib/libcrypto/man/SSL_check_chain.34
-rw-r--r--secure/lib/libcrypto/man/SSL_clear.34
-rw-r--r--secure/lib/libcrypto/man/SSL_connect.34
-rw-r--r--secure/lib/libcrypto/man/SSL_do_handshake.34
-rw-r--r--secure/lib/libcrypto/man/SSL_export_keying_material.34
-rw-r--r--secure/lib/libcrypto/man/SSL_extension_supported.34
-rw-r--r--secure/lib/libcrypto/man/SSL_free.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get0_peer_scts.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_SSL_CTX.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_all_async_fds.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_ciphers.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_client_random.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_current_cipher.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_default_timeout.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_error.310
-rw-r--r--secure/lib/libcrypto/man/SSL_get_extms_support.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_fd.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_peer_cert_chain.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_peer_certificate.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_peer_signature_nid.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_peer_tmp_key.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_psk_identity.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_rbio.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_session.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_shared_sigalgs.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_verify_result.34
-rw-r--r--secure/lib/libcrypto/man/SSL_get_version.34
-rw-r--r--secure/lib/libcrypto/man/SSL_in_init.34
-rw-r--r--secure/lib/libcrypto/man/SSL_key_update.34
-rw-r--r--secure/lib/libcrypto/man/SSL_library_init.310
-rw-r--r--secure/lib/libcrypto/man/SSL_load_client_CA_file.34
-rw-r--r--secure/lib/libcrypto/man/SSL_new.34
-rw-r--r--secure/lib/libcrypto/man/SSL_pending.34
-rw-r--r--secure/lib/libcrypto/man/SSL_read.34
-rw-r--r--secure/lib/libcrypto/man/SSL_read_early_data.34
-rw-r--r--secure/lib/libcrypto/man/SSL_rstate_string.34
-rw-r--r--secure/lib/libcrypto/man/SSL_session_reused.34
-rw-r--r--secure/lib/libcrypto/man/SSL_set1_host.310
-rw-r--r--secure/lib/libcrypto/man/SSL_set_bio.34
-rw-r--r--secure/lib/libcrypto/man/SSL_set_connect_state.34
-rw-r--r--secure/lib/libcrypto/man/SSL_set_fd.34
-rw-r--r--secure/lib/libcrypto/man/SSL_set_session.34
-rw-r--r--secure/lib/libcrypto/man/SSL_set_shutdown.34
-rw-r--r--secure/lib/libcrypto/man/SSL_set_verify_result.34
-rw-r--r--secure/lib/libcrypto/man/SSL_shutdown.34
-rw-r--r--secure/lib/libcrypto/man/SSL_state_string.34
-rw-r--r--secure/lib/libcrypto/man/SSL_want.34
-rw-r--r--secure/lib/libcrypto/man/SSL_write.38
-rw-r--r--secure/lib/libcrypto/man/UI_STRING.34
-rw-r--r--secure/lib/libcrypto/man/UI_UTIL_read_pw.34
-rw-r--r--secure/lib/libcrypto/man/UI_create_method.34
-rw-r--r--secure/lib/libcrypto/man/UI_new.34
-rw-r--r--secure/lib/libcrypto/man/X509V3_get_d2i.34
-rw-r--r--secure/lib/libcrypto/man/X509_ALGOR_dup.34
-rw-r--r--secure/lib/libcrypto/man/X509_CRL_get0_by_serial.34
-rw-r--r--secure/lib/libcrypto/man/X509_EXTENSION_set_object.34
-rw-r--r--secure/lib/libcrypto/man/X509_LOOKUP_hash_dir.34
-rw-r--r--secure/lib/libcrypto/man/X509_LOOKUP_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_ENTRY_get_object.34
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_add_entry_by_txt.34
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_get0_der.34
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_get_index_by_NID.34
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_print_ex.34
-rw-r--r--secure/lib/libcrypto/man/X509_PUBKEY_new.34
-rw-r--r--secure/lib/libcrypto/man/X509_SIG_get0.34
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_CTX_get_error.314
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_CTX_set_verify_cb.310
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_add_cert.310
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_get0_param.34
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_new.38
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_set_verify_cb_func.34
-rw-r--r--secure/lib/libcrypto/man/X509_VERIFY_PARAM_set_flags.310
-rw-r--r--secure/lib/libcrypto/man/X509_check_ca.34
-rw-r--r--secure/lib/libcrypto/man/X509_check_host.34
-rw-r--r--secure/lib/libcrypto/man/X509_check_issued.34
-rw-r--r--secure/lib/libcrypto/man/X509_check_private_key.34
-rw-r--r--secure/lib/libcrypto/man/X509_cmp.3209
-rw-r--r--secure/lib/libcrypto/man/X509_cmp_time.34
-rw-r--r--secure/lib/libcrypto/man/X509_digest.34
-rw-r--r--secure/lib/libcrypto/man/X509_dup.34
-rw-r--r--secure/lib/libcrypto/man/X509_get0_notBefore.34
-rw-r--r--secure/lib/libcrypto/man/X509_get0_signature.34
-rw-r--r--secure/lib/libcrypto/man/X509_get0_uids.34
-rw-r--r--secure/lib/libcrypto/man/X509_get_extension_flags.318
-rw-r--r--secure/lib/libcrypto/man/X509_get_pubkey.34
-rw-r--r--secure/lib/libcrypto/man/X509_get_serialNumber.34
-rw-r--r--secure/lib/libcrypto/man/X509_get_subject_name.34
-rw-r--r--secure/lib/libcrypto/man/X509_get_version.34
-rw-r--r--secure/lib/libcrypto/man/X509_new.34
-rw-r--r--secure/lib/libcrypto/man/X509_sign.34
-rw-r--r--secure/lib/libcrypto/man/X509_verify_cert.34
-rw-r--r--secure/lib/libcrypto/man/X509v3_get_ext_by_NID.34
-rw-r--r--secure/lib/libcrypto/man/d2i_DHparams.34
-rw-r--r--secure/lib/libcrypto/man/d2i_PKCS8PrivateKey_bio.34
-rw-r--r--secure/lib/libcrypto/man/d2i_PrivateKey.34
-rw-r--r--secure/lib/libcrypto/man/d2i_SSL_SESSION.34
-rw-r--r--secure/lib/libcrypto/man/d2i_X509.323
-rw-r--r--secure/lib/libcrypto/man/i2d_CMS_bio_stream.34
-rw-r--r--secure/lib/libcrypto/man/i2d_PKCS7_bio_stream.34
-rw-r--r--secure/lib/libcrypto/man/i2d_re_X509_tbs.34
-rw-r--r--secure/lib/libcrypto/man/o2i_SCT_LIST.34
-rw-r--r--secure/usr.bin/openssl/man/CA.pl.14
-rw-r--r--secure/usr.bin/openssl/man/asn1parse.14
-rw-r--r--secure/usr.bin/openssl/man/ca.14
-rw-r--r--secure/usr.bin/openssl/man/ciphers.14
-rw-r--r--secure/usr.bin/openssl/man/cms.14
-rw-r--r--secure/usr.bin/openssl/man/crl.14
-rw-r--r--secure/usr.bin/openssl/man/crl2pkcs7.14
-rw-r--r--secure/usr.bin/openssl/man/dgst.14
-rw-r--r--secure/usr.bin/openssl/man/dhparam.14
-rw-r--r--secure/usr.bin/openssl/man/dsa.14
-rw-r--r--secure/usr.bin/openssl/man/dsaparam.14
-rw-r--r--secure/usr.bin/openssl/man/ec.14
-rw-r--r--secure/usr.bin/openssl/man/ecparam.14
-rw-r--r--secure/usr.bin/openssl/man/enc.14
-rw-r--r--secure/usr.bin/openssl/man/engine.110
-rw-r--r--secure/usr.bin/openssl/man/errstr.110
-rw-r--r--secure/usr.bin/openssl/man/gendsa.14
-rw-r--r--secure/usr.bin/openssl/man/genpkey.14
-rw-r--r--secure/usr.bin/openssl/man/genrsa.14
-rw-r--r--secure/usr.bin/openssl/man/list.14
-rw-r--r--secure/usr.bin/openssl/man/nseq.14
-rw-r--r--secure/usr.bin/openssl/man/ocsp.14
-rw-r--r--secure/usr.bin/openssl/man/openssl.14
-rw-r--r--secure/usr.bin/openssl/man/passwd.14
-rw-r--r--secure/usr.bin/openssl/man/pkcs12.17
-rw-r--r--secure/usr.bin/openssl/man/pkcs7.14
-rw-r--r--secure/usr.bin/openssl/man/pkcs8.14
-rw-r--r--secure/usr.bin/openssl/man/pkey.14
-rw-r--r--secure/usr.bin/openssl/man/pkeyparam.110
-rw-r--r--secure/usr.bin/openssl/man/pkeyutl.14
-rw-r--r--secure/usr.bin/openssl/man/prime.14
-rw-r--r--secure/usr.bin/openssl/man/rand.14
-rw-r--r--secure/usr.bin/openssl/man/req.14
-rw-r--r--secure/usr.bin/openssl/man/rsa.14
-rw-r--r--secure/usr.bin/openssl/man/rsautl.14
-rw-r--r--secure/usr.bin/openssl/man/s_client.16
-rw-r--r--secure/usr.bin/openssl/man/s_server.16
-rw-r--r--secure/usr.bin/openssl/man/s_time.14
-rw-r--r--secure/usr.bin/openssl/man/sess_id.14
-rw-r--r--secure/usr.bin/openssl/man/smime.14
-rw-r--r--secure/usr.bin/openssl/man/speed.14
-rw-r--r--secure/usr.bin/openssl/man/spkac.14
-rw-r--r--secure/usr.bin/openssl/man/srp.14
-rw-r--r--secure/usr.bin/openssl/man/storeutl.14
-rw-r--r--secure/usr.bin/openssl/man/ts.14
-rw-r--r--secure/usr.bin/openssl/man/tsget.14
-rw-r--r--secure/usr.bin/openssl/man/verify.14
-rw-r--r--secure/usr.bin/openssl/man/version.14
-rw-r--r--secure/usr.bin/openssl/man/x509.14
793 files changed, 4911 insertions, 23725 deletions
diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc
index f881181aa3f6..0ac61ecb4485 100644
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@@ -38,6 +38,9 @@
# xargs -n1 | sort | uniq -d;
# done
+# 20190910: OpenSSL 1.1.1d
+OLD_FILES+=usr/share/openssl/man/man3/d2i_ECDSA_SIG.3.gz
+OLD_FILES+=usr/share/openssl/man/man3/i2d_ECDSA_SIG.3.gz
# 20190811: sys/pwm.h renamed to dev/pwmc.h and pwm(9) removed
OLD_FILES+=usr/include/sys/pwm.h usr/share/man/man9/pwm.9
# 20190723: new clang import which bumps version from 8.0.0 to 8.0.1.
diff --git a/crypto/openssl/CHANGES b/crypto/openssl/CHANGES
index c8662c392a7f..c6ca3439480e 100644
--- a/crypto/openssl/CHANGES
+++ b/crypto/openssl/CHANGES
@@ -7,6 +7,101 @@
https://github.com/openssl/openssl/commits/ and pick the appropriate
release branch.
+ Changes between 1.1.1c and 1.1.1d [10 Sep 2019]
+
+ *) Fixed a fork protection issue. OpenSSL 1.1.1 introduced a rewritten random
+ number generator (RNG). This was intended to include protection in the
+ event of a fork() system call in order to ensure that the parent and child
+ processes did not share the same RNG state. However this protection was not
+ being used in the default case.
+
+ A partial mitigation for this issue is that the output from a high
+ precision timer is mixed into the RNG state so the likelihood of a parent
+ and child process sharing state is significantly reduced.
+
+ If an application already calls OPENSSL_init_crypto() explicitly using
+ OPENSSL_INIT_ATFORK then this problem does not occur at all.
+ (CVE-2019-1549)
+ [Matthias St. Pierre]
+
+ *) For built-in EC curves, ensure an EC_GROUP built from the curve name is
+ used even when parsing explicit parameters, when loading a serialized key
+ or calling `EC_GROUP_new_from_ecpkparameters()`/
+ `EC_GROUP_new_from_ecparameters()`.
+ This prevents bypass of security hardening and performance gains,
+ especially for curves with specialized EC_METHODs.
+ By default, if a key encoded with explicit parameters is loaded and later
+ serialized, the output is still encoded with explicit parameters, even if
+ internally a "named" EC_GROUP is used for computation.
+ [Nicola Tuveri]
+
+ *) Compute ECC cofactors if not provided during EC_GROUP construction. Before
+ this change, EC_GROUP_set_generator would accept order and/or cofactor as
+ NULL. After this change, only the cofactor parameter can be NULL. It also
+ does some minimal sanity checks on the passed order.
+ (CVE-2019-1547)
+ [Billy Bob Brumley]
+
+ *) Fixed a padding oracle in PKCS7_dataDecode and CMS_decrypt_set1_pkey.
+ An attack is simple, if the first CMS_recipientInfo is valid but the
+ second CMS_recipientInfo is chosen ciphertext. If the second
+ recipientInfo decodes to PKCS #1 v1.5 form plaintext, the correct
+ encryption key will be replaced by garbage, and the message cannot be
+ decoded, but if the RSA decryption fails, the correct encryption key is
+ used and the recipient will not notice the attack.
+ As a work around for this potential attack the length of the decrypted
+ key must be equal to the cipher default key length, in case the
+ certifiate is not given and all recipientInfo are tried out.
+ The old behaviour can be re-enabled in the CMS code by setting the
+ CMS_DEBUG_DECRYPT flag.
+ (CVE-2019-1563)
+ [Bernd Edlinger]
+
+ *) Early start up entropy quality from the DEVRANDOM seed source has been
+ improved for older Linux systems. The RAND subsystem will wait for
+ /dev/random to be producing output before seeding from /dev/urandom.
+ The seeded state is stored for future library initialisations using
+ a system global shared memory segment. The shared memory identifier
+ can be configured by defining OPENSSL_RAND_SEED_DEVRANDOM_SHM_ID to
+ the desired value. The default identifier is 114.
+ [Paul Dale]
+
+ *) Correct the extended master secret constant on EBCDIC systems. Without this
+ fix TLS connections between an EBCDIC system and a non-EBCDIC system that
+ negotiate EMS will fail. Unfortunately this also means that TLS connections
+ between EBCDIC systems with this fix, and EBCDIC systems without this
+ fix will fail if they negotiate EMS.
+ [Matt Caswell]
+
+ *) Use Windows installation paths in the mingw builds
+
+ Mingw isn't a POSIX environment per se, which means that Windows
+ paths should be used for installation.
+ (CVE-2019-1552)
+ [Richard Levitte]
+
+ *) Changed DH_check to accept parameters with order q and 2q subgroups.
+ With order 2q subgroups the bit 0 of the private key is not secret
+ but DH_generate_key works around that by clearing bit 0 of the
+ private key for those. This avoids leaking bit 0 of the private key.
+ [Bernd Edlinger]
+
+ *) Significantly reduce secure memory usage by the randomness pools.
+ [Paul Dale]
+
+ *) Revert the DEVRANDOM_WAIT feature for Linux systems
+
+ The DEVRANDOM_WAIT feature added a select() call to wait for the
+ /dev/random device to become readable before reading from the
+ /dev/urandom device.
+
+ It turned out that this change had negative side effects on
+ performance which were not acceptable. After some discussion it
+ was decided to revert this feature and leave it up to the OS
+ resp. the platform maintainer to ensure a proper initialization
+ during early boot time.
+ [Matthias St. Pierre]
+
Changes between 1.1.1b and 1.1.1c [28 May 2019]
*) Add build tests for C++. These are generated files that only do one
@@ -75,6 +170,16 @@
(CVE-2019-1543)
[Matt Caswell]
+ *) Add DEVRANDOM_WAIT feature for Linux systems
+
+ On older Linux systems where the getrandom() system call is not available,
+ OpenSSL normally uses the /dev/urandom device for seeding its CSPRNG.
+ Contrary to getrandom(), the /dev/urandom device will not block during
+ early boot when the kernel CSPRNG has not been seeded yet.
+
+ To mitigate this known weakness, use select() to wait for /dev/random to
+ become readable before reading from /dev/urandom.
+
*) Ensure that SM2 only uses SM3 as digest algorithm
[Paul Yang]
@@ -322,7 +427,7 @@
SSL_set_ciphersuites()
[Matt Caswell]
- *) Memory allocation failures consistenly add an error to the error
+ *) Memory allocation failures consistently add an error to the error
stack.
[Rich Salz]
@@ -6860,7 +6965,7 @@
reason texts, thereby removing some of the footprint that may not
be interesting if those errors aren't displayed anyway.
- NOTE: it's still possible for any application or module to have it's
+ NOTE: it's still possible for any application or module to have its
own set of error texts inserted. The routines are there, just not
used by default when no-err is given.
[Richard Levitte]
@@ -8826,7 +8931,7 @@ des-cbc 3624.96k 5258.21k 5530.91k 5624.30k 5628.26k
Changes between 0.9.6g and 0.9.6h [5 Dec 2002]
*) New function OPENSSL_cleanse(), which is used to cleanse a section of
- memory from it's contents. This is done with a counter that will
+ memory from its contents. This is done with a counter that will
place alternating values in each byte. This can be used to solve
two issues: 1) the removal of calls to memset() by highly optimizing
compilers, and 2) cleansing with other values than 0, since those can
diff --git a/crypto/openssl/Configure b/crypto/openssl/Configure
index 254b04c95d95..5a699836f32a 100755
--- a/crypto/openssl/Configure
+++ b/crypto/openssl/Configure
@@ -87,9 +87,6 @@ my $usage="Usage: Configure [no-<cipher> ...] [enable-<cipher> ...] [-Dxxx] [-lx
# linked openssl executable has rather debugging value than
# production quality.
#
-# DEBUG_SAFESTACK use type-safe stacks to enforce type-safety on stack items
-# provided to stack calls. Generates unique stack functions for
-# each possible stack type.
# BN_LLONG use the type 'long long' in crypto/bn/bn.h
# RC4_CHAR use 'char' instead of 'int' for RC4_INT in crypto/rc4/rc4.h
# Following are set automatically by this script
@@ -145,13 +142,13 @@ my @gcc_devteam_warn = qw(
# -Wunused-macros -- no, too tricky for BN and _XOPEN_SOURCE etc
# -Wextended-offsetof -- no, needed in CMS ASN1 code
my @clang_devteam_warn = qw(
+ -Wno-unknown-warning-option
-Wswitch-default
-Wno-parentheses-equality
-Wno-language-extension-token
-Wno-extended-offsetof
-Wconditional-uninitialized
-Wincompatible-pointer-types-discards-qualifiers
- -Wno-unknown-warning-option
-Wmissing-variable-declarations
);
diff --git a/crypto/openssl/INSTALL b/crypto/openssl/INSTALL
index 0b6a3fd1ec39..2119cbae9e59 100644
--- a/crypto/openssl/INSTALL
+++ b/crypto/openssl/INSTALL
@@ -98,6 +98,9 @@
$ nmake test
$ nmake install
+ Note that in order to perform the install step above you need to have
+ appropriate permissions to write to the installation directory.
+
If any of these steps fails, see section Installation in Detail below.
This will build and install OpenSSL in the default location, which is:
@@ -107,6 +110,12 @@
OpenSSL version number with underscores instead of periods.
Windows: C:\Program Files\OpenSSL or C:\Program Files (x86)\OpenSSL
+ The installation directory should be appropriately protected to ensure
+ unprivileged users cannot make changes to OpenSSL binaries or files, or install
+ engines. If you already have a pre-installed version of OpenSSL as part of
+ your Operating System it is recommended that you do not overwrite the system
+ version and instead install to somewhere else.
+
If you want to install it anywhere else, run config like this:
On Unix:
@@ -135,7 +144,10 @@
Don't build with support for deprecated APIs below the
specified version number. For example "--api=1.1.0" will
remove support for all APIS that were deprecated in OpenSSL
- version 1.1.0 or below.
+ version 1.1.0 or below. This is a rather specialized option
+ for developers. If you just intend to remove all deprecated
+ APIs entirely (up to the current version), it is easier
+ to add the 'no-deprecated' option instead (see below).
--cross-compile-prefix=PREFIX
The PREFIX to include in front of commands for your
@@ -229,7 +241,7 @@
source exists.
getrandom: Use the L<getrandom(2)> or equivalent system
call.
- devrandom: Use the the first device from the DEVRANDOM list
+ devrandom: Use the first device from the DEVRANDOM list
which can be opened to read random bytes. The
DEVRANDOM preprocessor constant expands to
"/dev/urandom","/dev/random","/dev/srandom" on
@@ -908,8 +920,11 @@
$ mms install ! OpenVMS
$ nmake install # Windows
- This will install all the software components in this directory
- tree under PREFIX (the directory given with --prefix or its
+ Note that in order to perform the install step above you need to have
+ appropriate permissions to write to the installation directory.
+
+ The above commands will install all the software components in this
+ directory tree under PREFIX (the directory given with --prefix or its
default):
Unix:
@@ -965,6 +980,12 @@
for private key files.
misc Various scripts.
+ The installation directory should be appropriately protected to ensure
+ unprivileged users cannot make changes to OpenSSL binaries or files, or
+ install engines. If you already have a pre-installed version of OpenSSL as
+ part of your Operating System it is recommended that you do not overwrite
+ the system version and instead install to somewhere else.
+
Package builders who want to configure the library for standard
locations, but have the package installed somewhere else so that
it can easily be packaged, can use
diff --git a/crypto/openssl/NEWS b/crypto/openssl/NEWS
index 0e38f69dbbaa..1c88dee35389 100644
--- a/crypto/openssl/NEWS
+++ b/crypto/openssl/NEWS
@@ -5,6 +5,23 @@
This file gives a brief overview of the major changes between each OpenSSL
release. For more details please read the CHANGES file.
+ Major changes between OpenSSL 1.1.1c and OpenSSL 1.1.1d [10 Sep 2019]
+
+ o Fixed a fork protection issue (CVE-2019-1549)
+ o Fixed a padding oracle in PKCS7_dataDecode and CMS_decrypt_set1_pkey
+ (CVE-2019-1563)
+ o For built-in EC curves, ensure an EC_GROUP built from the curve name is
+ used even when parsing explicit parameters
+ o Compute ECC cofactors if not provided during EC_GROUP construction
+ (CVE-2019-1547)
+ o Early start up entropy quality from the DEVRANDOM seed source has been
+ improved for older Linux systems
+ o Correct the extended master secret constant on EBCDIC systems
+ o Use Windows installation paths in the mingw builds (CVE-2019-1552)
+ o Changed DH_check to accept parameters with order q and 2q subgroups
+ o Significantly reduce secure memory usage by the randomness pools
+ o Revert the DEVRANDOM_WAIT feature for Linux systems
+
Major changes between OpenSSL 1.1.1b and OpenSSL 1.1.1c [28 May 2019]
o Prevent over long nonces in ChaCha20-Poly1305 (CVE-2019-1543)
@@ -601,7 +618,7 @@
Major changes between OpenSSL 0.9.7h and OpenSSL 0.9.7i [14 Oct 2005]:
- o Give EVP_MAX_MD_SIZE it's old value, except for a FIPS build.
+ o Give EVP_MAX_MD_SIZE its old value, except for a FIPS build.
Major changes between OpenSSL 0.9.7g and OpenSSL 0.9.7h [11 Oct 2005]:
diff --git a/crypto/openssl/README b/crypto/openssl/README
index 29757157c384..51bb4789be2a 100644
--- a/crypto/openssl/README
+++ b/crypto/openssl/README
@@ -1,5 +1,5 @@
- OpenSSL 1.1.1c 28 May 2019
+ OpenSSL 1.1.1d 10 Sep 2019
Copyright (c) 1998-2019 The OpenSSL Project
Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson
diff --git a/crypto/openssl/apps/apps.c b/crypto/openssl/apps/apps.c
index 36cb0b278337..7177c5d98266 100644
--- a/crypto/openssl/apps/apps.c
+++ b/crypto/openssl/apps/apps.c
@@ -40,7 +40,6 @@
#endif
#include <openssl/bn.h>
#include <openssl/ssl.h>
-#include "s_apps.h"
#include "apps.h"
#ifdef _WIN32
@@ -48,6 +47,14 @@ static int WIN32_rename(const char *from, const char *to);
# define rename(from,to) WIN32_rename((from),(to))
#endif
+#if defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_MSDOS)
+# include <conio.h>
+#endif
+
+#if defined(OPENSSL_SYS_MSDOS) && !defined(_WIN32)
+# define _kbhit kbhit
+#endif
+
typedef struct {
const char *name;
unsigned long flag;
diff --git a/crypto/openssl/apps/apps.h b/crypto/openssl/apps/apps.h
index d9eb650eb211..4a3e1a88573e 100644
--- a/crypto/openssl/apps/apps.h
+++ b/crypto/openssl/apps/apps.h
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -444,11 +444,9 @@ void destroy_ui_method(void);
const UI_METHOD *get_ui_method(void);
int chopup_args(ARGS *arg, char *buf);
-# ifdef HEADER_X509_H
int dump_cert_text(BIO *out, X509 *x);
void print_name(BIO *out, const char *title, X509_NAME *nm,
unsigned long lflags);
-# endif
void print_bignum_var(BIO *, const BIGNUM *, const char*,
int, unsigned char *);
void print_array(BIO *, const char *, int, const unsigned char *);
diff --git a/crypto/openssl/apps/ca.c b/crypto/openssl/apps/ca.c
index 69207c0662ed..a4a77126f2f2 100644
--- a/crypto/openssl/apps/ca.c
+++ b/crypto/openssl/apps/ca.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -722,7 +722,7 @@ end_of_options:
/*****************************************************************/
if (req || gencrl) {
- if (spkac_file != NULL) {
+ if (spkac_file != NULL && outfile != NULL) {
output_der = 1;
batch = 1;
}
diff --git a/crypto/openssl/apps/dgst.c b/crypto/openssl/apps/dgst.c
index d158a0ccb284..d6f5a0e2e712 100644
--- a/crypto/openssl/apps/dgst.c
+++ b/crypto/openssl/apps/dgst.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -421,7 +421,7 @@ int do_fp(BIO *out, unsigned char *buf, BIO *bp, int sep, int binout,
size_t len;
int i;
- for (;;) {
+ while (BIO_pending(bp) || !BIO_eof(bp)) {
i = BIO_read(bp, (char *)buf, BUFSIZE);
if (i < 0) {
BIO_printf(bio_err, "Read Error in %s\n", file);
diff --git a/crypto/openssl/apps/enc.c b/crypto/openssl/apps/enc.c
index de33e57a5f49..d1772f3eb9f2 100644
--- a/crypto/openssl/apps/enc.c
+++ b/crypto/openssl/apps/enc.c
@@ -586,7 +586,7 @@ int enc_main(int argc, char **argv)
if (benc != NULL)
wbio = BIO_push(benc, wbio);
- for (;;) {
+ while (BIO_pending(rbio) || !BIO_eof(rbio)) {
inl = BIO_read(rbio, (char *)buff, bsize);
if (inl <= 0)
break;
diff --git a/crypto/openssl/apps/ocsp.c b/crypto/openssl/apps/ocsp.c
index 066a2e43af2b..5d2391816995 100644
--- a/crypto/openssl/apps/ocsp.c
+++ b/crypto/openssl/apps/ocsp.c
@@ -1416,9 +1416,11 @@ static int do_responder(OCSP_REQUEST **preq, BIO **pcbio, BIO *acbio,
*q = '\0';
/*
- * Skip "GET / HTTP..." requests often used by load-balancers
+ * Skip "GET / HTTP..." requests often used by load-balancers. Note:
+ * 'p' was incremented above to point to the first byte *after* the
+ * leading slash, so with 'GET / ' it is now an empty string.
*/
- if (p[1] == '\0')
+ if (p[0] == '\0')
goto out;
len = urldecode(p);
diff --git a/crypto/openssl/apps/openssl.c b/crypto/openssl/apps/openssl.c
index a872e2c5eeec..9648f35b0284 100644
--- a/crypto/openssl/apps/openssl.c
+++ b/crypto/openssl/apps/openssl.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -22,7 +22,6 @@
# include <openssl/engine.h>
#endif
#include <openssl/err.h>
-#include "s_apps.h"
/* Needed to get the other O_xxx flags. */
#ifdef OPENSSL_SYS_VMS
# include <unixio.h>
diff --git a/crypto/openssl/apps/pkcs12.c b/crypto/openssl/apps/pkcs12.c
index 719a309a860c..d0600b376043 100644
--- a/crypto/openssl/apps/pkcs12.c
+++ b/crypto/openssl/apps/pkcs12.c
@@ -838,7 +838,7 @@ static int alg_print(const X509_ALGOR *alg)
goto done;
}
BIO_printf(bio_err, ", Salt length: %d, Cost(N): %ld, "
- "Block size(r): %ld, Paralelizm(p): %ld",
+ "Block size(r): %ld, Parallelism(p): %ld",
ASN1_STRING_length(kdf->salt),
ASN1_INTEGER_get(kdf->costParameter),
ASN1_INTEGER_get(kdf->blockSize),
diff --git a/crypto/openssl/apps/req.c b/crypto/openssl/apps/req.c
index 6fd28a2aba24..f9d6fba1094d 100644
--- a/crypto/openssl/apps/req.c
+++ b/crypto/openssl/apps/req.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -881,9 +881,19 @@ int req_main(int argc, char **argv)
if (text) {
if (x509)
- X509_print_ex(out, x509ss, get_nameopt(), reqflag);
+ ret = X509_print_ex(out, x509ss, get_nameopt(), reqflag);
else
- X509_REQ_print_ex(out, req, get_nameopt(), reqflag);
+ ret = X509_REQ_print_ex(out, req, get_nameopt(), reqflag);
+
+ if (ret == 0) {
+ if (x509)
+ BIO_printf(bio_err, "Error printing certificate\n");
+ else
+ BIO_printf(bio_err, "Error printing certificate request\n");
+
+ ERR_print_errors(bio_err);
+ goto end;
+ }
}
if (subject) {
diff --git a/crypto/openssl/apps/s_apps.h b/crypto/openssl/apps/s_apps.h
index 0a3bc962808b..f94e659e71e3 100644
--- a/crypto/openssl/apps/s_apps.h
+++ b/crypto/openssl/apps/s_apps.h
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -9,13 +9,7 @@
#include <openssl/opensslconf.h>
-#if defined(OPENSSL_SYS_WINDOWS) || defined(OPENSSL_SYS_MSDOS)
-# include <conio.h>
-#endif
-
-#if defined(OPENSSL_SYS_MSDOS) && !defined(_WIN32)
-# define _kbhit kbhit
-#endif
+#include <openssl/ssl.h>
#define PORT "4433"
#define PROTOCOL "tcp"
@@ -24,17 +18,15 @@ typedef int (*do_server_cb)(int s, int stype, int prot, unsigned char *context);
int do_server(int *accept_sock, const char *host, const char *port,
int family, int type, int protocol, do_server_cb cb,
unsigned char *context, int naccept, BIO *bio_s_out);
-#ifdef HEADER_X509_H
+
int verify_callback(int ok, X509_STORE_CTX *ctx);
-#endif
-#ifdef HEADER_SSL_H
+
int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file);
int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key,
STACK_OF(X509) *chain, int build_chain);
int ssl_print_sigalgs(BIO *out, SSL *s);
int ssl_print_point_formats(BIO *out, SSL *s);
int ssl_print_groups(BIO *out, SSL *s, int noshared);
-#endif
int ssl_print_tmp_key(BIO *out, SSL *s);
int init_client(int *sock, const char *host, const char *port,
const char *bindhost, const char *bindport,
@@ -44,13 +36,11 @@ int should_retry(int i);
long bio_dump_callback(BIO *bio, int cmd, const char *argp,
int argi, long argl, long ret);
-#ifdef HEADER_SSL_H
void apps_ssl_info_callback(const SSL *s, int where, int ret);
void msg_cb(int write_p, int version, int content_type, const void *buf,
size_t len, SSL *ssl, void *arg);
void tlsext_cb(SSL *s, int client_server, int type, const unsigned char *data,
int len, void *arg);
-#endif
int generate_cookie_callback(SSL *ssl, unsigned char *cookie,
unsigned int *cookie_len);
@@ -75,7 +65,6 @@ int args_excert(int option, SSL_EXCERT **pexc);
int load_excert(SSL_EXCERT **pexc);
void print_verify_detail(SSL *s, BIO *bio);
void print_ssl_summary(SSL *s);
-#ifdef HEADER_SSL_H
int config_ctx(SSL_CONF_CTX *cctx, STACK_OF(OPENSSL_STRING) *str, SSL_CTX *ctx);
int ssl_ctx_add_crls(SSL_CTX *ctx, STACK_OF(X509_CRL) *crls,
int crl_download);
@@ -86,4 +75,3 @@ int ssl_load_stores(SSL_CTX *ctx, const char *vfyCApath,
void ssl_ctx_security_debug(SSL_CTX *ctx, int verbose);
int set_keylog_file(SSL_CTX *ctx, const char *keylog_file);
void print_ca_names(BIO *bio, SSL *s);
-#endif
diff --git a/crypto/openssl/apps/s_cb.c b/crypto/openssl/apps/s_cb.c
index 520323c8b05c..5dc1d03f2883 100644
--- a/crypto/openssl/apps/s_cb.c
+++ b/crypto/openssl/apps/s_cb.c
@@ -1525,7 +1525,8 @@ void print_ca_names(BIO *bio, SSL *s)
int i;
if (sk == NULL || sk_X509_NAME_num(sk) == 0) {
- BIO_printf(bio, "---\nNo %s certificate CA names sent\n", cs);
+ if (!SSL_is_server(s))
+ BIO_printf(bio, "---\nNo %s certificate CA names sent\n", cs);
return;
}
diff --git a/crypto/openssl/apps/s_client.c b/crypto/openssl/apps/s_client.c
index b85339a0b870..26a6789d811a 100644
--- a/crypto/openssl/apps/s_client.c
+++ b/crypto/openssl/apps/s_client.c
@@ -2345,7 +2345,7 @@ int s_client_main(int argc, char **argv)
(void)BIO_flush(fbio);
/*
* The first line is the HTTP response. According to RFC 7230,
- * it's formated exactly like this:
+ * it's formatted exactly like this:
*
* HTTP/d.d ddd Reason text\r\n
*/
diff --git a/crypto/openssl/apps/speed.c b/crypto/openssl/apps/speed.c
index 8d4b1695d851..20149506cc74 100644
--- a/crypto/openssl/apps/speed.c
+++ b/crypto/openssl/apps/speed.c
@@ -1790,7 +1790,7 @@ int speed_main(int argc, char **argv)
}
buflen = lengths[size_num - 1];
- if (buflen < 36) /* size of random vector in RSA bencmark */
+ if (buflen < 36) /* size of random vector in RSA benchmark */
buflen = 36;
buflen += MAX_MISALIGNMENT + 1;
loopargs[i].buf_malloc = app_malloc(buflen, "input buffer");
diff --git a/crypto/openssl/apps/storeutl.c b/crypto/openssl/apps/storeutl.c
index 50007f6e8b69..644fe28499d6 100644
--- a/crypto/openssl/apps/storeutl.c
+++ b/crypto/openssl/apps/storeutl.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -125,7 +125,7 @@ int storeutl_main(int argc, char *argv[])
}
/*
* If expected wasn't set at this point, it means the map
- * isn't syncronised with the possible options leading here.
+ * isn't synchronised with the possible options leading here.
*/
OPENSSL_assert(expected != 0);
}
diff --git a/crypto/openssl/config b/crypto/openssl/config
index d0e31b6512ef..c1a94d4e42fa 100755
--- a/crypto/openssl/config
+++ b/crypto/openssl/config
@@ -498,12 +498,12 @@ case "$GUESSOS" in
OUT="darwin64-x86_64-cc"
fi ;;
armv6+7-*-iphoneos)
- __CNF_CFLAGS="$__CNF_CFLAGS -arch%20armv6 -arch%20armv7"
- __CNF_CXXFLAGS="$__CNF_CXXFLAGS -arch%20armv6 -arch%20armv7"
+ __CNF_CFLAGS="$__CNF_CFLAGS -arch armv6 -arch armv7"
+ __CNF_CXXFLAGS="$__CNF_CXXFLAGS -arch armv6 -arch armv7"
OUT="iphoneos-cross" ;;
*-*-iphoneos)
- __CNF_CFLAGS="$__CNF_CFLAGS -arch%20${MACHINE}"
- __CNF_CXXFLAGS="$__CNF_CXXFLAGS -arch%20${MACHINE}"
+ __CNF_CFLAGS="$__CNF_CFLAGS -arch ${MACHINE}"
+ __CNF_CXXFLAGS="$__CNF_CXXFLAGS -arch ${MACHINE}"
OUT="iphoneos-cross" ;;
arm64-*-iphoneos|*-*-ios64)
OUT="ios64-cross" ;;
diff --git a/crypto/openssl/crypto/aes/asm/aes-586.pl b/crypto/openssl/crypto/aes/asm/aes-586.pl
deleted file mode 100755
index 29059edf8b7a..000000000000
--- a/crypto/openssl/crypto/aes/asm/aes-586.pl
+++ /dev/null
@@ -1,3000 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License"). You may not use
-# this file except in compliance with the License. You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# Version 4.3.
-#
-# You might fail to appreciate this module performance from the first
-# try. If compared to "vanilla" linux-ia32-icc target, i.e. considered
-# to be *the* best Intel C compiler without -KPIC, performance appears
-# to be virtually identical... But try to re-configure with shared
-# library support... Aha! Intel compiler "suddenly" lags behind by 30%
-# [on P4, more on others]:-) And if compared to position-independent
-# code generated by GNU C, this code performs *more* than *twice* as
-# fast! Yes, all this buzz about PIC means that unlike other hand-
-# coded implementations, this one was explicitly designed to be safe
-# to use even in shared library context... This also means that this
-# code isn't necessarily absolutely fastest "ever," because in order
-# to achieve position independence an extra register has to be
-# off-loaded to stack, which affects the benchmark result.
-#
-# Special note about instruction choice. Do you recall RC4_INT code
-# performing poorly on P4? It might be the time to figure out why.
-# RC4_INT code implies effective address calculations in base+offset*4
-# form. Trouble is that it seems that offset scaling turned to be
-# critical path... At least eliminating scaling resulted in 2.8x RC4
-# performance improvement [as you might recall]. As AES code is hungry
-# for scaling too, I [try to] avoid the latter by favoring off-by-2
-# shifts and masking the result with 0xFF<<2 instead of "boring" 0xFF.
-#
-# As was shown by Dean Gaudet, the above note turned out to be
-# void. Performance improvement with off-by-2 shifts was observed on
-# intermediate implementation, which was spilling yet another register
-# to stack... Final offset*4 code below runs just a tad faster on P4,
-# but exhibits up to 10% improvement on other cores.
-#
-# Second version is "monolithic" replacement for aes_core.c, which in
-# addition to AES_[de|en]crypt implements AES_set_[de|en]cryption_key.
-# This made it possible to implement little-endian variant of the
-# algorithm without modifying the base C code. Motivating factor for
-# the undertaken effort was that it appeared that in tight IA-32
-# register window little-endian flavor could achieve slightly higher
-# Instruction Level Parallelism, and it indeed resulted in up to 15%
-# better performance on most recent µ-archs...
-#
-# Third version adds AES_cbc_encrypt implementation, which resulted in
-# up to 40% performance improvement of CBC benchmark results. 40% was
-# observed on P4 core, where "overall" improvement coefficient, i.e. if
-# compared to PIC generated by GCC and in CBC mode, was observed to be
-# as large as 4x:-) CBC performance is virtually identical to ECB now
-# and on some platforms even better, e.g. 17.6 "small" cycles/byte on
-# Opteron, because certain function prologues and epilogues are
-# effectively taken out of the loop...
-#
-# Version 3.2 implements compressed tables and prefetch of these tables
-# in CBC[!] mode. Former means that 3/4 of table references are now
-# misaligned, which unfortunately has negative impact on elder IA-32
-# implementations, Pentium suffered 30% penalty, PIII - 10%.
-#
-# Version 3.3 avoids L1 cache aliasing between stack frame and
-# S-boxes, and 3.4 - L1 cache aliasing even between key schedule. The
-# latter is achieved by copying the key schedule to controlled place in
-# stack. This unfortunately has rather strong impact on small block CBC
-# performance, ~2x deterioration on 16-byte block if compared to 3.3.
-#
-# Version 3.5 checks if there is L1 cache aliasing between user-supplied
-# key schedule and S-boxes and abstains from copying the former if
-# there is no. This allows end-user to consciously retain small block
-# performance by aligning key schedule in specific manner.
-#
-# Version 3.6 compresses Td4 to 256 bytes and prefetches it in ECB.
-#
-# Current ECB performance numbers for 128-bit key in CPU cycles per
-# processed byte [measure commonly used by AES benchmarkers] are:
-#
-# small footprint fully unrolled
-# P4 24 22
-# AMD K8 20 19
-# PIII 25 23
-# Pentium 81 78
-#
-# Version 3.7 reimplements outer rounds as "compact." Meaning that
-# first and last rounds reference compact 256 bytes S-box. This means
-# that first round consumes a lot more CPU cycles and that encrypt
-# and decrypt performance becomes asymmetric. Encrypt performance
-# drops by 10-12%, while decrypt - by 20-25%:-( 256 bytes S-box is
-# aggressively pre-fetched.
-#
-# Version 4.0 effectively rolls back to 3.6 and instead implements
-# additional set of functions, _[x86|sse]_AES_[en|de]crypt_compact,
-# which use exclusively 256 byte S-box. These functions are to be
-# called in modes not concealing plain text, such as ECB, or when
-# we're asked to process smaller amount of data [or unconditionally
-# on hyper-threading CPU]. Currently it's called unconditionally from
-# AES_[en|de]crypt, which affects all modes, but CBC. CBC routine
-# still needs to be modified to switch between slower and faster
-# mode when appropriate... But in either case benchmark landscape
-# changes dramatically and below numbers are CPU cycles per processed
-# byte for 128-bit key.
-#
-# ECB encrypt ECB decrypt CBC large chunk
-# P4 52[54] 83[95] 23
-# AMD K8 46[41] 66[70] 18
-# PIII 41[50] 60[77] 24
-# Core 2 31[36] 45[64] 18.5
-# Atom 76[100] 96[138] 60
-# Pentium 115 150 77
-#
-# Version 4.1 switches to compact S-box even in key schedule setup.
-#
-# Version 4.2 prefetches compact S-box in every SSE round or in other
-# words every cache-line is *guaranteed* to be accessed within ~50
-# cycles window. Why just SSE? Because it's needed on hyper-threading
-# CPU! Which is also why it's prefetched with 64 byte stride. Best
-# part is that it has no negative effect on performance:-)
-#
-# Version 4.3 implements switch between compact and non-compact block
-# functions in AES_cbc_encrypt depending on how much data was asked
-# to be processed in one stroke.
-#
-######################################################################
-# Timing attacks are classified in two classes: synchronous when
-# attacker consciously initiates cryptographic operation and collects
-# timing data of various character afterwards, and asynchronous when
-# malicious code is executed on same CPU simultaneously with AES,
-# instruments itself and performs statistical analysis of this data.
-#
-# As far as synchronous attacks go the root to the AES timing
-# vulnerability is twofold. Firstly, of 256 S-box elements at most 160
-# are referred to in single 128-bit block operation. Well, in C
-# implementation with 4 distinct tables it's actually as little as 40
-# references per 256 elements table, but anyway... Secondly, even
-# though S-box elements are clustered into smaller amount of cache-
-# lines, smaller than 160 and even 40, it turned out that for certain
-# plain-text pattern[s] or simply put chosen plain-text and given key
-# few cache-lines remain unaccessed during block operation. Now, if
-# attacker can figure out this access pattern, he can deduct the key
-# [or at least part of it]. The natural way to mitigate this kind of
-# attacks is to minimize the amount of cache-lines in S-box and/or
-# prefetch them to ensure that every one is accessed for more uniform
-# timing. But note that *if* plain-text was concealed in such way that
-# input to block function is distributed *uniformly*, then attack
-# wouldn't apply. Now note that some encryption modes, most notably
-# CBC, do mask the plain-text in this exact way [secure cipher output
-# is distributed uniformly]. Yes, one still might find input that
-# would reveal the information about given key, but if amount of
-# candidate inputs to be tried is larger than amount of possible key
-# combinations then attack becomes infeasible. This is why revised
-# AES_cbc_encrypt "dares" to switch to larger S-box when larger chunk
-# of data is to be processed in one stroke. The current size limit of
-# 512 bytes is chosen to provide same [diminishingly low] probability
-# for cache-line to remain untouched in large chunk operation with
-# large S-box as for single block operation with compact S-box and
-# surely needs more careful consideration...
-#
-# As for asynchronous attacks. There are two flavours: attacker code
-# being interleaved with AES on hyper-threading CPU at *instruction*
-# level, and two processes time sharing single core. As for latter.
-# Two vectors. 1. Given that attacker process has higher priority,
-# yield execution to process performing AES just before timer fires
-# off the scheduler, immediately regain control of CPU and analyze the
-# cache state. For this attack to be efficient attacker would have to
-# effectively slow down the operation by several *orders* of magnitude,
-# by ratio of time slice to duration of handful of AES rounds, which
-# unlikely to remain unnoticed. Not to mention that this also means
-# that he would spend correspondingly more time to collect enough
-# statistical data to mount the attack. It's probably appropriate to
-# say that if adversary reckons that this attack is beneficial and
-# risks to be noticed, you probably have larger problems having him
-# mere opportunity. In other words suggested code design expects you
-# to preclude/mitigate this attack by overall system security design.
-# 2. Attacker manages to make his code interrupt driven. In order for
-# this kind of attack to be feasible, interrupt rate has to be high
-# enough, again comparable to duration of handful of AES rounds. But
-# is there interrupt source of such rate? Hardly, not even 1Gbps NIC
-# generates interrupts at such raging rate...
-#
-# And now back to the former, hyper-threading CPU or more specifically
-# Intel P4. Recall that asynchronous attack implies that malicious
-# code instruments itself. And naturally instrumentation granularity
-# has be noticeably lower than duration of codepath accessing S-box.
-# Given that all cache-lines are accessed during that time that is.
-# Current implementation accesses *all* cache-lines within ~50 cycles
-# window, which is actually *less* than RDTSC latency on Intel P4!
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../perlasm");
-require "x86asm.pl";
-
-$output = pop;
-open OUT,">$output";
-*STDOUT=*OUT;
-
-&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
-&static_label("AES_Te");
-&static_label("AES_Td");
-
-$s0="eax";
-$s1="ebx";
-$s2="ecx";
-$s3="edx";
-$key="edi";
-$acc="esi";
-$tbl="ebp";
-
-# stack frame layout in _[x86|sse]_AES_* routines, frame is allocated
-# by caller
-$__ra=&DWP(0,"esp"); # return address
-$__s0=&DWP(4,"esp"); # s0 backing store
-$__s1=&DWP(8,"esp"); # s1 backing store
-$__s2=&DWP(12,"esp"); # s2 backing store
-$__s3=&DWP(16,"esp"); # s3 backing store
-$__key=&DWP(20,"esp"); # pointer to key schedule
-$__end=&DWP(24,"esp"); # pointer to end of key schedule
-$__tbl=&DWP(28,"esp"); # %ebp backing store
-
-# stack frame layout in AES_[en|crypt] routines, which differs from
-# above by 4 and overlaps by %ebp backing store
-$_tbl=&DWP(24,"esp");
-$_esp=&DWP(28,"esp");
-
-sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } }
-
-$speed_limit=512; # chunks smaller than $speed_limit are
- # processed with compact routine in CBC mode
-$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
- # recent µ-archs], but ~5 times smaller!
- # I favor compact code to minimize cache
- # contention and in hope to "collect" 5% back
- # in real-life applications...
-
-$vertical_spin=0; # shift "vertically" defaults to 0, because of
- # its proof-of-concept status...
-# Note that there is no decvert(), as well as last encryption round is
-# performed with "horizontal" shifts. This is because this "vertical"
-# implementation [one which groups shifts on a given $s[i] to form a
-# "column," unlike "horizontal" one, which groups shifts on different
-# $s[i] to form a "row"] is work in progress. It was observed to run
-# few percents faster on Intel cores, but not AMD. On AMD K8 core it's
-# whole 12% slower:-( So we face a trade-off... Shall it be resolved
-# some day? Till then the code is considered experimental and by
-# default remains dormant...
-
-sub encvert()
-{ my ($te,@s) = @_;
- my ($v0,$v1) = ($acc,$key);
-
- &mov ($v0,$s[3]); # copy s3
- &mov (&DWP(4,"esp"),$s[2]); # save s2
- &mov ($v1,$s[0]); # copy s0
- &mov (&DWP(8,"esp"),$s[1]); # save s1
-
- &movz ($s[2],&HB($s[0]));
- &and ($s[0],0xFF);
- &mov ($s[0],&DWP(0,$te,$s[0],8)); # s0>>0
- &shr ($v1,16);
- &mov ($s[3],&DWP(3,$te,$s[2],8)); # s0>>8
- &movz ($s[1],&HB($v1));
- &and ($v1,0xFF);
- &mov ($s[2],&DWP(2,$te,$v1,8)); # s0>>16
- &mov ($v1,$v0);
- &mov ($s[1],&DWP(1,$te,$s[1],8)); # s0>>24
-
- &and ($v0,0xFF);
- &xor ($s[3],&DWP(0,$te,$v0,8)); # s3>>0
- &movz ($v0,&HB($v1));
- &shr ($v1,16);
- &xor ($s[2],&DWP(3,$te,$v0,8)); # s3>>8
- &movz ($v0,&HB($v1));
- &and ($v1,0xFF);
- &xor ($s[1],&DWP(2,$te,$v1,8)); # s3>>16
- &mov ($v1,&DWP(4,"esp")); # restore s2
- &xor ($s[0],&DWP(1,$te,$v0,8)); # s3>>24
-
- &mov ($v0,$v1);
- &and ($v1,0xFF);
- &xor ($s[2],&DWP(0,$te,$v1,8)); # s2>>0
- &movz ($v1,&HB($v0));
- &shr ($v0,16);
- &xor ($s[1],&DWP(3,$te,$v1,8)); # s2>>8
- &movz ($v1,&HB($v0));
- &and ($v0,0xFF);
- &xor ($s[0],&DWP(2,$te,$v0,8)); # s2>>16
- &mov ($v0,&DWP(8,"esp")); # restore s1
- &xor ($s[3],&DWP(1,$te,$v1,8)); # s2>>24
-
- &mov ($v1,$v0);
- &and ($v0,0xFF);
- &xor ($s[1],&DWP(0,$te,$v0,8)); # s1>>0
- &movz ($v0,&HB($v1));
- &shr ($v1,16);
- &xor ($s[0],&DWP(3,$te,$v0,8)); # s1>>8
- &movz ($v0,&HB($v1));
- &and ($v1,0xFF);
- &xor ($s[3],&DWP(2,$te,$v1,8)); # s1>>16
- &mov ($key,$__key); # reincarnate v1 as key
- &xor ($s[2],&DWP(1,$te,$v0,8)); # s1>>24
-}
-
-# Another experimental routine, which features "horizontal spin," but
-# eliminates one reference to stack. Strangely enough runs slower...
-sub enchoriz()
-{ my ($v0,$v1) = ($key,$acc);
-
- &movz ($v0,&LB($s0)); # 3, 2, 1, 0*
- &rotr ($s2,8); # 8,11,10, 9
- &mov ($v1,&DWP(0,$te,$v0,8)); # 0
- &movz ($v0,&HB($s1)); # 7, 6, 5*, 4
- &rotr ($s3,16); # 13,12,15,14
- &xor ($v1,&DWP(3,$te,$v0,8)); # 5
- &movz ($v0,&HB($s2)); # 8,11,10*, 9
- &rotr ($s0,16); # 1, 0, 3, 2
- &xor ($v1,&DWP(2,$te,$v0,8)); # 10
- &movz ($v0,&HB($s3)); # 13,12,15*,14
- &xor ($v1,&DWP(1,$te,$v0,8)); # 15, t[0] collected
- &mov ($__s0,$v1); # t[0] saved
-
- &movz ($v0,&LB($s1)); # 7, 6, 5, 4*
- &shr ($s1,16); # -, -, 7, 6
- &mov ($v1,&DWP(0,$te,$v0,8)); # 4
- &movz ($v0,&LB($s3)); # 13,12,15,14*
- &xor ($v1,&DWP(2,$te,$v0,8)); # 14
- &movz ($v0,&HB($s0)); # 1, 0, 3*, 2
- &and ($s3,0xffff0000); # 13,12, -, -
- &xor ($v1,&DWP(1,$te,$v0,8)); # 3
- &movz ($v0,&LB($s2)); # 8,11,10, 9*
- &or ($s3,$s1); # 13,12, 7, 6
- &xor ($v1,&DWP(3,$te,$v0,8)); # 9, t[1] collected
- &mov ($s1,$v1); # s[1]=t[1]
-
- &movz ($v0,&LB($s0)); # 1, 0, 3, 2*
- &shr ($s2,16); # -, -, 8,11
- &mov ($v1,&DWP(2,$te,$v0,8)); # 2
- &movz ($v0,&HB($s3)); # 13,12, 7*, 6
- &xor ($v1,&DWP(1,$te,$v0,8)); # 7
- &movz ($v0,&HB($s2)); # -, -, 8*,11
- &xor ($v1,&DWP(0,$te,$v0,8)); # 8
- &mov ($v0,$s3);
- &shr ($v0,24); # 13
- &xor ($v1,&DWP(3,$te,$v0,8)); # 13, t[2] collected
-
- &movz ($v0,&LB($s2)); # -, -, 8,11*
- &shr ($s0,24); # 1*
- &mov ($s2,&DWP(1,$te,$v0,8)); # 11
- &xor ($s2,&DWP(3,$te,$s0,8)); # 1
- &mov ($s0,$__s0); # s[0]=t[0]
- &movz ($v0,&LB($s3)); # 13,12, 7, 6*
- &shr ($s3,16); # , ,13,12
- &xor ($s2,&DWP(2,$te,$v0,8)); # 6
- &mov ($key,$__key); # reincarnate v0 as key
- &and ($s3,0xff); # , ,13,12*
- &mov ($s3,&DWP(0,$te,$s3,8)); # 12
- &xor ($s3,$s2); # s[2]=t[3] collected
- &mov ($s2,$v1); # s[2]=t[2]
-}
-
-# More experimental code... SSE one... Even though this one eliminates
-# *all* references to stack, it's not faster...
-sub sse_encbody()
-{
- &movz ($acc,&LB("eax")); # 0
- &mov ("ecx",&DWP(0,$tbl,$acc,8)); # 0
- &pshufw ("mm2","mm0",0x0d); # 7, 6, 3, 2
- &movz ("edx",&HB("eax")); # 1
- &mov ("edx",&DWP(3,$tbl,"edx",8)); # 1
- &shr ("eax",16); # 5, 4
-
- &movz ($acc,&LB("ebx")); # 10
- &xor ("ecx",&DWP(2,$tbl,$acc,8)); # 10
- &pshufw ("mm6","mm4",0x08); # 13,12, 9, 8
- &movz ($acc,&HB("ebx")); # 11
- &xor ("edx",&DWP(1,$tbl,$acc,8)); # 11
- &shr ("ebx",16); # 15,14
-
- &movz ($acc,&HB("eax")); # 5
- &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 5
- &movq ("mm3",QWP(16,$key));
- &movz ($acc,&HB("ebx")); # 15
- &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 15
- &movd ("mm0","ecx"); # t[0] collected
-
- &movz ($acc,&LB("eax")); # 4
- &mov ("ecx",&DWP(0,$tbl,$acc,8)); # 4
- &movd ("eax","mm2"); # 7, 6, 3, 2
- &movz ($acc,&LB("ebx")); # 14
- &xor ("ecx",&DWP(2,$tbl,$acc,8)); # 14
- &movd ("ebx","mm6"); # 13,12, 9, 8
-
- &movz ($acc,&HB("eax")); # 3
- &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 3
- &movz ($acc,&HB("ebx")); # 9
- &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 9
- &movd ("mm1","ecx"); # t[1] collected
-
- &movz ($acc,&LB("eax")); # 2
- &mov ("ecx",&DWP(2,$tbl,$acc,8)); # 2
- &shr ("eax",16); # 7, 6
- &punpckldq ("mm0","mm1"); # t[0,1] collected
- &movz ($acc,&LB("ebx")); # 8
- &xor ("ecx",&DWP(0,$tbl,$acc,8)); # 8
- &shr ("ebx",16); # 13,12
-
- &movz ($acc,&HB("eax")); # 7
- &xor ("ecx",&DWP(1,$tbl,$acc,8)); # 7
- &pxor ("mm0","mm3");
- &movz ("eax",&LB("eax")); # 6
- &xor ("edx",&DWP(2,$tbl,"eax",8)); # 6
- &pshufw ("mm1","mm0",0x08); # 5, 4, 1, 0
- &movz ($acc,&HB("ebx")); # 13
- &xor ("ecx",&DWP(3,$tbl,$acc,8)); # 13
- &xor ("ecx",&DWP(24,$key)); # t[2]
- &movd ("mm4","ecx"); # t[2] collected
- &movz ("ebx",&LB("ebx")); # 12
- &xor ("edx",&DWP(0,$tbl,"ebx",8)); # 12
- &shr ("ecx",16);
- &movd ("eax","mm1"); # 5, 4, 1, 0
- &mov ("ebx",&DWP(28,$key)); # t[3]
- &xor ("ebx","edx");
- &movd ("mm5","ebx"); # t[3] collected
- &and ("ebx",0xffff0000);
- &or ("ebx","ecx");
-
- &punpckldq ("mm4","mm5"); # t[2,3] collected
-}
-
-######################################################################
-# "Compact" block function
-######################################################################
-
-sub enccompact()
-{ my $Fn = \&mov;
- while ($#_>5) { pop(@_); $Fn=sub{}; }
- my ($i,$te,@s)=@_;
- my $tmp = $key;
- my $out = $i==3?$s[0]:$acc;
-
- # $Fn is used in first compact round and its purpose is to
- # void restoration of some values from stack, so that after
- # 4xenccompact with extra argument $key value is left there...
- if ($i==3) { &$Fn ($key,$__key); }##%edx
- else { &mov ($out,$s[0]); }
- &and ($out,0xFF);
- if ($i==1) { &shr ($s[0],16); }#%ebx[1]
- if ($i==2) { &shr ($s[0],24); }#%ecx[2]
- &movz ($out,&BP(-128,$te,$out,1));
-
- if ($i==3) { $tmp=$s[1]; }##%eax
- &movz ($tmp,&HB($s[1]));
- &movz ($tmp,&BP(-128,$te,$tmp,1));
- &shl ($tmp,8);
- &xor ($out,$tmp);
-
- if ($i==3) { $tmp=$s[2]; &mov ($s[1],$__s0); }##%ebx
- else { &mov ($tmp,$s[2]);
- &shr ($tmp,16); }
- if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
- &and ($tmp,0xFF);
- &movz ($tmp,&BP(-128,$te,$tmp,1));
- &shl ($tmp,16);
- &xor ($out,$tmp);
-
- if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }##%ecx
- elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
- else { &mov ($tmp,$s[3]);
- &shr ($tmp,24); }
- &movz ($tmp,&BP(-128,$te,$tmp,1));
- &shl ($tmp,24);
- &xor ($out,$tmp);
- if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
- if ($i==3) { &mov ($s[3],$acc); }
- &comment();
-}
-
-sub enctransform()
-{ my @s = ($s0,$s1,$s2,$s3);
- my $i = shift;
- my $tmp = $tbl;
- my $r2 = $key ;
-
- &and ($tmp,$s[$i]);
- &lea ($r2,&DWP(0,$s[$i],$s[$i]));
- &mov ($acc,$tmp);
- &shr ($tmp,7);
- &and ($r2,0xfefefefe);
- &sub ($acc,$tmp);
- &mov ($tmp,$s[$i]);
- &and ($acc,0x1b1b1b1b);
- &rotr ($tmp,16);
- &xor ($acc,$r2); # r2
- &mov ($r2,$s[$i]);
-
- &xor ($s[$i],$acc); # r0 ^ r2
- &rotr ($r2,16+8);
- &xor ($acc,$tmp);
- &rotl ($s[$i],24);
- &xor ($acc,$r2);
- &mov ($tmp,0x80808080) if ($i!=1);
- &xor ($s[$i],$acc); # ROTATE(r2^r0,24) ^ r2
-}
-
-&function_begin_B("_x86_AES_encrypt_compact");
- # note that caller is expected to allocate stack frame for me!
- &mov ($__key,$key); # save key
-
- &xor ($s0,&DWP(0,$key)); # xor with key
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &mov ($acc,&DWP(240,$key)); # load key->rounds
- &lea ($acc,&DWP(-2,$acc,$acc));
- &lea ($acc,&DWP(0,$key,$acc,8));
- &mov ($__end,$acc); # end of key schedule
-
- # prefetch Te4
- &mov ($key,&DWP(0-128,$tbl));
- &mov ($acc,&DWP(32-128,$tbl));
- &mov ($key,&DWP(64-128,$tbl));
- &mov ($acc,&DWP(96-128,$tbl));
- &mov ($key,&DWP(128-128,$tbl));
- &mov ($acc,&DWP(160-128,$tbl));
- &mov ($key,&DWP(192-128,$tbl));
- &mov ($acc,&DWP(224-128,$tbl));
-
- &set_label("loop",16);
-
- &enccompact(0,$tbl,$s0,$s1,$s2,$s3,1);
- &enccompact(1,$tbl,$s1,$s2,$s3,$s0,1);
- &enccompact(2,$tbl,$s2,$s3,$s0,$s1,1);
- &enccompact(3,$tbl,$s3,$s0,$s1,$s2,1);
- &mov ($tbl,0x80808080);
- &enctransform(2);
- &enctransform(3);
- &enctransform(0);
- &enctransform(1);
- &mov ($key,$__key);
- &mov ($tbl,$__tbl);
- &add ($key,16); # advance rd_key
- &xor ($s0,&DWP(0,$key));
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &cmp ($key,$__end);
- &mov ($__key,$key);
- &jb (&label("loop"));
-
- &enccompact(0,$tbl,$s0,$s1,$s2,$s3);
- &enccompact(1,$tbl,$s1,$s2,$s3,$s0);
- &enccompact(2,$tbl,$s2,$s3,$s0,$s1);
- &enccompact(3,$tbl,$s3,$s0,$s1,$s2);
-
- &xor ($s0,&DWP(16,$key));
- &xor ($s1,&DWP(20,$key));
- &xor ($s2,&DWP(24,$key));
- &xor ($s3,&DWP(28,$key));
-
- &ret ();
-&function_end_B("_x86_AES_encrypt_compact");
-
-######################################################################
-# "Compact" SSE block function.
-######################################################################
-#
-# Performance is not actually extraordinary in comparison to pure
-# x86 code. In particular encrypt performance is virtually the same.
-# Decrypt performance on the other hand is 15-20% better on newer
-# µ-archs [but we're thankful for *any* improvement here], and ~50%
-# better on PIII:-) And additionally on the pros side this code
-# eliminates redundant references to stack and thus relieves/
-# minimizes the pressure on the memory bus.
-#
-# MMX register layout lsb
-# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
-# | mm4 | mm0 |
-# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
-# | s3 | s2 | s1 | s0 |
-# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
-# |15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0|
-# +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
-#
-# Indexes translate as s[N/4]>>(8*(N%4)), e.g. 5 means s1>>8.
-# In this terms encryption and decryption "compact" permutation
-# matrices can be depicted as following:
-#
-# encryption lsb # decryption lsb
-# +----++----+----+----+----+ # +----++----+----+----+----+
-# | t0 || 15 | 10 | 5 | 0 | # | t0 || 7 | 10 | 13 | 0 |
-# +----++----+----+----+----+ # +----++----+----+----+----+
-# | t1 || 3 | 14 | 9 | 4 | # | t1 || 11 | 14 | 1 | 4 |
-# +----++----+----+----+----+ # +----++----+----+----+----+
-# | t2 || 7 | 2 | 13 | 8 | # | t2 || 15 | 2 | 5 | 8 |
-# +----++----+----+----+----+ # +----++----+----+----+----+
-# | t3 || 11 | 6 | 1 | 12 | # | t3 || 3 | 6 | 9 | 12 |
-# +----++----+----+----+----+ # +----++----+----+----+----+
-#
-######################################################################
-# Why not xmm registers? Short answer. It was actually tested and
-# was not any faster, but *contrary*, most notably on Intel CPUs.
-# Longer answer. Main advantage of using mm registers is that movd
-# latency is lower, especially on Intel P4. While arithmetic
-# instructions are twice as many, they can be scheduled every cycle
-# and not every second one when they are operating on xmm register,
-# so that "arithmetic throughput" remains virtually the same. And
-# finally the code can be executed even on elder SSE-only CPUs:-)
-
-sub sse_enccompact()
-{
- &pshufw ("mm1","mm0",0x08); # 5, 4, 1, 0
- &pshufw ("mm5","mm4",0x0d); # 15,14,11,10
- &movd ("eax","mm1"); # 5, 4, 1, 0
- &movd ("ebx","mm5"); # 15,14,11,10
- &mov ($__key,$key);
-
- &movz ($acc,&LB("eax")); # 0
- &movz ("edx",&HB("eax")); # 1
- &pshufw ("mm2","mm0",0x0d); # 7, 6, 3, 2
- &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 0
- &movz ($key,&LB("ebx")); # 10
- &movz ("edx",&BP(-128,$tbl,"edx",1)); # 1
- &shr ("eax",16); # 5, 4
- &shl ("edx",8); # 1
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 10
- &movz ($key,&HB("ebx")); # 11
- &shl ($acc,16); # 10
- &pshufw ("mm6","mm4",0x08); # 13,12, 9, 8
- &or ("ecx",$acc); # 10
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 11
- &movz ($key,&HB("eax")); # 5
- &shl ($acc,24); # 11
- &shr ("ebx",16); # 15,14
- &or ("edx",$acc); # 11
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 5
- &movz ($key,&HB("ebx")); # 15
- &shl ($acc,8); # 5
- &or ("ecx",$acc); # 5
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 15
- &movz ($key,&LB("eax")); # 4
- &shl ($acc,24); # 15
- &or ("ecx",$acc); # 15
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 4
- &movz ($key,&LB("ebx")); # 14
- &movd ("eax","mm2"); # 7, 6, 3, 2
- &movd ("mm0","ecx"); # t[0] collected
- &movz ("ecx",&BP(-128,$tbl,$key,1)); # 14
- &movz ($key,&HB("eax")); # 3
- &shl ("ecx",16); # 14
- &movd ("ebx","mm6"); # 13,12, 9, 8
- &or ("ecx",$acc); # 14
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 3
- &movz ($key,&HB("ebx")); # 9
- &shl ($acc,24); # 3
- &or ("ecx",$acc); # 3
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 9
- &movz ($key,&LB("ebx")); # 8
- &shl ($acc,8); # 9
- &shr ("ebx",16); # 13,12
- &or ("ecx",$acc); # 9
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 8
- &movz ($key,&LB("eax")); # 2
- &shr ("eax",16); # 7, 6
- &movd ("mm1","ecx"); # t[1] collected
- &movz ("ecx",&BP(-128,$tbl,$key,1)); # 2
- &movz ($key,&HB("eax")); # 7
- &shl ("ecx",16); # 2
- &and ("eax",0xff); # 6
- &or ("ecx",$acc); # 2
-
- &punpckldq ("mm0","mm1"); # t[0,1] collected
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 7
- &movz ($key,&HB("ebx")); # 13
- &shl ($acc,24); # 7
- &and ("ebx",0xff); # 12
- &movz ("eax",&BP(-128,$tbl,"eax",1)); # 6
- &or ("ecx",$acc); # 7
- &shl ("eax",16); # 6
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 13
- &or ("edx","eax"); # 6
- &shl ($acc,8); # 13
- &movz ("ebx",&BP(-128,$tbl,"ebx",1)); # 12
- &or ("ecx",$acc); # 13
- &or ("edx","ebx"); # 12
- &mov ($key,$__key);
- &movd ("mm4","ecx"); # t[2] collected
- &movd ("mm5","edx"); # t[3] collected
-
- &punpckldq ("mm4","mm5"); # t[2,3] collected
-}
-
- if (!$x86only) {
-&function_begin_B("_sse_AES_encrypt_compact");
- &pxor ("mm0",&QWP(0,$key)); # 7, 6, 5, 4, 3, 2, 1, 0
- &pxor ("mm4",&QWP(8,$key)); # 15,14,13,12,11,10, 9, 8
-
- # note that caller is expected to allocate stack frame for me!
- &mov ($acc,&DWP(240,$key)); # load key->rounds
- &lea ($acc,&DWP(-2,$acc,$acc));
- &lea ($acc,&DWP(0,$key,$acc,8));
- &mov ($__end,$acc); # end of key schedule
-
- &mov ($s0,0x1b1b1b1b); # magic constant
- &mov (&DWP(8,"esp"),$s0);
- &mov (&DWP(12,"esp"),$s0);
-
- # prefetch Te4
- &mov ($s0,&DWP(0-128,$tbl));
- &mov ($s1,&DWP(32-128,$tbl));
- &mov ($s2,&DWP(64-128,$tbl));
- &mov ($s3,&DWP(96-128,$tbl));
- &mov ($s0,&DWP(128-128,$tbl));
- &mov ($s1,&DWP(160-128,$tbl));
- &mov ($s2,&DWP(192-128,$tbl));
- &mov ($s3,&DWP(224-128,$tbl));
-
- &set_label("loop",16);
- &sse_enccompact();
- &add ($key,16);
- &cmp ($key,$__end);
- &ja (&label("out"));
-
- &movq ("mm2",&QWP(8,"esp"));
- &pxor ("mm3","mm3"); &pxor ("mm7","mm7");
- &movq ("mm1","mm0"); &movq ("mm5","mm4"); # r0
- &pcmpgtb("mm3","mm0"); &pcmpgtb("mm7","mm4");
- &pand ("mm3","mm2"); &pand ("mm7","mm2");
- &pshufw ("mm2","mm0",0xb1); &pshufw ("mm6","mm4",0xb1);# ROTATE(r0,16)
- &paddb ("mm0","mm0"); &paddb ("mm4","mm4");
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # = r2
- &pshufw ("mm3","mm2",0xb1); &pshufw ("mm7","mm6",0xb1);# r0
- &pxor ("mm1","mm0"); &pxor ("mm5","mm4"); # r0^r2
- &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= ROTATE(r0,16)
-
- &movq ("mm2","mm3"); &movq ("mm6","mm7");
- &pslld ("mm3",8); &pslld ("mm7",8);
- &psrld ("mm2",24); &psrld ("mm6",24);
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= r0<<8
- &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= r0>>24
-
- &movq ("mm3","mm1"); &movq ("mm7","mm5");
- &movq ("mm2",&QWP(0,$key)); &movq ("mm6",&QWP(8,$key));
- &psrld ("mm1",8); &psrld ("mm5",8);
- &mov ($s0,&DWP(0-128,$tbl));
- &pslld ("mm3",24); &pslld ("mm7",24);
- &mov ($s1,&DWP(64-128,$tbl));
- &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= (r2^r0)<<8
- &mov ($s2,&DWP(128-128,$tbl));
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= (r2^r0)>>24
- &mov ($s3,&DWP(192-128,$tbl));
-
- &pxor ("mm0","mm2"); &pxor ("mm4","mm6");
- &jmp (&label("loop"));
-
- &set_label("out",16);
- &pxor ("mm0",&QWP(0,$key));
- &pxor ("mm4",&QWP(8,$key));
-
- &ret ();
-&function_end_B("_sse_AES_encrypt_compact");
- }
-
-######################################################################
-# Vanilla block function.
-######################################################################
-
-sub encstep()
-{ my ($i,$te,@s) = @_;
- my $tmp = $key;
- my $out = $i==3?$s[0]:$acc;
-
- # lines marked with #%e?x[i] denote "reordered" instructions...
- if ($i==3) { &mov ($key,$__key); }##%edx
- else { &mov ($out,$s[0]);
- &and ($out,0xFF); }
- if ($i==1) { &shr ($s[0],16); }#%ebx[1]
- if ($i==2) { &shr ($s[0],24); }#%ecx[2]
- &mov ($out,&DWP(0,$te,$out,8));
-
- if ($i==3) { $tmp=$s[1]; }##%eax
- &movz ($tmp,&HB($s[1]));
- &xor ($out,&DWP(3,$te,$tmp,8));
-
- if ($i==3) { $tmp=$s[2]; &mov ($s[1],$__s0); }##%ebx
- else { &mov ($tmp,$s[2]);
- &shr ($tmp,16); }
- if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
- &and ($tmp,0xFF);
- &xor ($out,&DWP(2,$te,$tmp,8));
-
- if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }##%ecx
- elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
- else { &mov ($tmp,$s[3]);
- &shr ($tmp,24) }
- &xor ($out,&DWP(1,$te,$tmp,8));
- if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
- if ($i==3) { &mov ($s[3],$acc); }
- &comment();
-}
-
-sub enclast()
-{ my ($i,$te,@s)=@_;
- my $tmp = $key;
- my $out = $i==3?$s[0]:$acc;
-
- if ($i==3) { &mov ($key,$__key); }##%edx
- else { &mov ($out,$s[0]); }
- &and ($out,0xFF);
- if ($i==1) { &shr ($s[0],16); }#%ebx[1]
- if ($i==2) { &shr ($s[0],24); }#%ecx[2]
- &mov ($out,&DWP(2,$te,$out,8));
- &and ($out,0x000000ff);
-
- if ($i==3) { $tmp=$s[1]; }##%eax
- &movz ($tmp,&HB($s[1]));
- &mov ($tmp,&DWP(0,$te,$tmp,8));
- &and ($tmp,0x0000ff00);
- &xor ($out,$tmp);
-
- if ($i==3) { $tmp=$s[2]; &mov ($s[1],$__s0); }##%ebx
- else { &mov ($tmp,$s[2]);
- &shr ($tmp,16); }
- if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
- &and ($tmp,0xFF);
- &mov ($tmp,&DWP(0,$te,$tmp,8));
- &and ($tmp,0x00ff0000);
- &xor ($out,$tmp);
-
- if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }##%ecx
- elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
- else { &mov ($tmp,$s[3]);
- &shr ($tmp,24); }
- &mov ($tmp,&DWP(2,$te,$tmp,8));
- &and ($tmp,0xff000000);
- &xor ($out,$tmp);
- if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
- if ($i==3) { &mov ($s[3],$acc); }
-}
-
-&function_begin_B("_x86_AES_encrypt");
- if ($vertical_spin) {
- # I need high parts of volatile registers to be accessible...
- &exch ($s1="edi",$key="ebx");
- &mov ($s2="esi",$acc="ecx");
- }
-
- # note that caller is expected to allocate stack frame for me!
- &mov ($__key,$key); # save key
-
- &xor ($s0,&DWP(0,$key)); # xor with key
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &mov ($acc,&DWP(240,$key)); # load key->rounds
-
- if ($small_footprint) {
- &lea ($acc,&DWP(-2,$acc,$acc));
- &lea ($acc,&DWP(0,$key,$acc,8));
- &mov ($__end,$acc); # end of key schedule
-
- &set_label("loop",16);
- if ($vertical_spin) {
- &encvert($tbl,$s0,$s1,$s2,$s3);
- } else {
- &encstep(0,$tbl,$s0,$s1,$s2,$s3);
- &encstep(1,$tbl,$s1,$s2,$s3,$s0);
- &encstep(2,$tbl,$s2,$s3,$s0,$s1);
- &encstep(3,$tbl,$s3,$s0,$s1,$s2);
- }
- &add ($key,16); # advance rd_key
- &xor ($s0,&DWP(0,$key));
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
- &cmp ($key,$__end);
- &mov ($__key,$key);
- &jb (&label("loop"));
- }
- else {
- &cmp ($acc,10);
- &jle (&label("10rounds"));
- &cmp ($acc,12);
- &jle (&label("12rounds"));
-
- &set_label("14rounds",4);
- for ($i=1;$i<3;$i++) {
- if ($vertical_spin) {
- &encvert($tbl,$s0,$s1,$s2,$s3);
- } else {
- &encstep(0,$tbl,$s0,$s1,$s2,$s3);
- &encstep(1,$tbl,$s1,$s2,$s3,$s0);
- &encstep(2,$tbl,$s2,$s3,$s0,$s1);
- &encstep(3,$tbl,$s3,$s0,$s1,$s2);
- }
- &xor ($s0,&DWP(16*$i+0,$key));
- &xor ($s1,&DWP(16*$i+4,$key));
- &xor ($s2,&DWP(16*$i+8,$key));
- &xor ($s3,&DWP(16*$i+12,$key));
- }
- &add ($key,32);
- &mov ($__key,$key); # advance rd_key
- &set_label("12rounds",4);
- for ($i=1;$i<3;$i++) {
- if ($vertical_spin) {
- &encvert($tbl,$s0,$s1,$s2,$s3);
- } else {
- &encstep(0,$tbl,$s0,$s1,$s2,$s3);
- &encstep(1,$tbl,$s1,$s2,$s3,$s0);
- &encstep(2,$tbl,$s2,$s3,$s0,$s1);
- &encstep(3,$tbl,$s3,$s0,$s1,$s2);
- }
- &xor ($s0,&DWP(16*$i+0,$key));
- &xor ($s1,&DWP(16*$i+4,$key));
- &xor ($s2,&DWP(16*$i+8,$key));
- &xor ($s3,&DWP(16*$i+12,$key));
- }
- &add ($key,32);
- &mov ($__key,$key); # advance rd_key
- &set_label("10rounds",4);
- for ($i=1;$i<10;$i++) {
- if ($vertical_spin) {
- &encvert($tbl,$s0,$s1,$s2,$s3);
- } else {
- &encstep(0,$tbl,$s0,$s1,$s2,$s3);
- &encstep(1,$tbl,$s1,$s2,$s3,$s0);
- &encstep(2,$tbl,$s2,$s3,$s0,$s1);
- &encstep(3,$tbl,$s3,$s0,$s1,$s2);
- }
- &xor ($s0,&DWP(16*$i+0,$key));
- &xor ($s1,&DWP(16*$i+4,$key));
- &xor ($s2,&DWP(16*$i+8,$key));
- &xor ($s3,&DWP(16*$i+12,$key));
- }
- }
-
- if ($vertical_spin) {
- # "reincarnate" some registers for "horizontal" spin...
- &mov ($s1="ebx",$key="edi");
- &mov ($s2="ecx",$acc="esi");
- }
- &enclast(0,$tbl,$s0,$s1,$s2,$s3);
- &enclast(1,$tbl,$s1,$s2,$s3,$s0);
- &enclast(2,$tbl,$s2,$s3,$s0,$s1);
- &enclast(3,$tbl,$s3,$s0,$s1,$s2);
-
- &add ($key,$small_footprint?16:160);
- &xor ($s0,&DWP(0,$key));
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &ret ();
-
-&set_label("AES_Te",64); # Yes! I keep it in the code segment!
- &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
- &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
- &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
- &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
- &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
- &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
- &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
- &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
- &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
- &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
- &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
- &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
- &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
- &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
- &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
- &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
- &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
- &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
- &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
- &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
- &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
- &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
- &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
- &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
- &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
- &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
- &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
- &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
- &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
- &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
- &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
- &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
- &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
- &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
- &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
- &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
- &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
- &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
- &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
- &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
- &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
- &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
- &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
- &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
- &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
- &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
- &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
- &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
- &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
- &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
- &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
- &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
- &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
- &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
- &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
- &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
- &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
- &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
- &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
- &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
- &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
- &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
- &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
- &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
-
-#Te4 # four copies of Te4 to choose from to avoid L1 aliasing
- &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
- &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
- &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
- &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
- &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
- &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
- &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
- &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
- &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
- &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
- &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
- &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
- &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
- &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
- &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
- &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
- &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
- &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
- &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
- &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
- &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
- &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
- &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
- &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
- &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
- &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
- &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
- &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
- &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
- &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
- &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
- &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
-
- &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
- &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
- &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
- &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
- &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
- &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
- &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
- &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
- &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
- &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
- &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
- &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
- &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
- &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
- &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
- &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
- &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
- &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
- &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
- &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
- &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
- &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
- &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
- &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
- &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
- &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
- &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
- &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
- &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
- &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
- &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
- &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
-
- &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
- &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
- &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
- &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
- &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
- &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
- &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
- &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
- &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
- &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
- &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
- &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
- &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
- &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
- &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
- &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
- &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
- &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
- &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
- &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
- &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
- &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
- &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
- &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
- &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
- &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
- &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
- &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
- &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
- &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
- &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
- &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
-
- &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
- &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
- &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
- &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
- &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
- &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
- &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
- &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
- &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
- &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
- &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
- &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
- &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
- &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
- &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
- &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
- &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
- &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
- &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
- &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
- &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
- &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
- &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
- &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
- &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
- &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
- &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
- &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
- &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
- &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
- &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
- &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
-#rcon:
- &data_word(0x00000001, 0x00000002, 0x00000004, 0x00000008);
- &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
- &data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000);
- &data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000);
-&function_end_B("_x86_AES_encrypt");
-
-# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
-&function_begin("AES_encrypt");
- &mov ($acc,&wparam(0)); # load inp
- &mov ($key,&wparam(2)); # load key
-
- &mov ($s0,"esp");
- &sub ("esp",36);
- &and ("esp",-64); # align to cache-line
-
- # place stack frame just "above" the key schedule
- &lea ($s1,&DWP(-64-63,$key));
- &sub ($s1,"esp");
- &neg ($s1);
- &and ($s1,0x3C0); # modulo 1024, but aligned to cache-line
- &sub ("esp",$s1);
- &add ("esp",4); # 4 is reserved for caller's return address
- &mov ($_esp,$s0); # save stack pointer
-
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
-
- # pick Te4 copy which can't "overlap" with stack frame or key schedule
- &lea ($s1,&DWP(768-4,"esp"));
- &sub ($s1,$tbl);
- &and ($s1,0x300);
- &lea ($tbl,&DWP(2048+128,$tbl,$s1));
-
- if (!$x86only) {
- &bt (&DWP(0,$s0),25); # check for SSE bit
- &jnc (&label("x86"));
-
- &movq ("mm0",&QWP(0,$acc));
- &movq ("mm4",&QWP(8,$acc));
- &call ("_sse_AES_encrypt_compact");
- &mov ("esp",$_esp); # restore stack pointer
- &mov ($acc,&wparam(1)); # load out
- &movq (&QWP(0,$acc),"mm0"); # write output data
- &movq (&QWP(8,$acc),"mm4");
- &emms ();
- &function_end_A();
- }
- &set_label("x86",16);
- &mov ($_tbl,$tbl);
- &mov ($s0,&DWP(0,$acc)); # load input data
- &mov ($s1,&DWP(4,$acc));
- &mov ($s2,&DWP(8,$acc));
- &mov ($s3,&DWP(12,$acc));
- &call ("_x86_AES_encrypt_compact");
- &mov ("esp",$_esp); # restore stack pointer
- &mov ($acc,&wparam(1)); # load out
- &mov (&DWP(0,$acc),$s0); # write output data
- &mov (&DWP(4,$acc),$s1);
- &mov (&DWP(8,$acc),$s2);
- &mov (&DWP(12,$acc),$s3);
-&function_end("AES_encrypt");
-
-#--------------------------------------------------------------------#
-
-######################################################################
-# "Compact" block function
-######################################################################
-
-sub deccompact()
-{ my $Fn = \&mov;
- while ($#_>5) { pop(@_); $Fn=sub{}; }
- my ($i,$td,@s)=@_;
- my $tmp = $key;
- my $out = $i==3?$s[0]:$acc;
-
- # $Fn is used in first compact round and its purpose is to
- # void restoration of some values from stack, so that after
- # 4xdeccompact with extra argument $key, $s0 and $s1 values
- # are left there...
- if($i==3) { &$Fn ($key,$__key); }
- else { &mov ($out,$s[0]); }
- &and ($out,0xFF);
- &movz ($out,&BP(-128,$td,$out,1));
-
- if ($i==3) { $tmp=$s[1]; }
- &movz ($tmp,&HB($s[1]));
- &movz ($tmp,&BP(-128,$td,$tmp,1));
- &shl ($tmp,8);
- &xor ($out,$tmp);
-
- if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
- else { mov ($tmp,$s[2]); }
- &shr ($tmp,16);
- &and ($tmp,0xFF);
- &movz ($tmp,&BP(-128,$td,$tmp,1));
- &shl ($tmp,16);
- &xor ($out,$tmp);
-
- if ($i==3) { $tmp=$s[3]; &$Fn ($s[2],$__s1); }
- else { &mov ($tmp,$s[3]); }
- &shr ($tmp,24);
- &movz ($tmp,&BP(-128,$td,$tmp,1));
- &shl ($tmp,24);
- &xor ($out,$tmp);
- if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
- if ($i==3) { &$Fn ($s[3],$__s0); }
-}
-
-# must be called with 2,3,0,1 as argument sequence!!!
-sub dectransform()
-{ my @s = ($s0,$s1,$s2,$s3);
- my $i = shift;
- my $tmp = $key;
- my $tp2 = @s[($i+2)%4]; $tp2 = @s[2] if ($i==1);
- my $tp4 = @s[($i+3)%4]; $tp4 = @s[3] if ($i==1);
- my $tp8 = $tbl;
-
- &mov ($tmp,0x80808080);
- &and ($tmp,$s[$i]);
- &mov ($acc,$tmp);
- &shr ($tmp,7);
- &lea ($tp2,&DWP(0,$s[$i],$s[$i]));
- &sub ($acc,$tmp);
- &and ($tp2,0xfefefefe);
- &and ($acc,0x1b1b1b1b);
- &xor ($tp2,$acc);
- &mov ($tmp,0x80808080);
-
- &and ($tmp,$tp2);
- &mov ($acc,$tmp);
- &shr ($tmp,7);
- &lea ($tp4,&DWP(0,$tp2,$tp2));
- &sub ($acc,$tmp);
- &and ($tp4,0xfefefefe);
- &and ($acc,0x1b1b1b1b);
- &xor ($tp2,$s[$i]); # tp2^tp1
- &xor ($tp4,$acc);
- &mov ($tmp,0x80808080);
-
- &and ($tmp,$tp4);
- &mov ($acc,$tmp);
- &shr ($tmp,7);
- &lea ($tp8,&DWP(0,$tp4,$tp4));
- &sub ($acc,$tmp);
- &and ($tp8,0xfefefefe);
- &and ($acc,0x1b1b1b1b);
- &xor ($tp4,$s[$i]); # tp4^tp1
- &rotl ($s[$i],8); # = ROTATE(tp1,8)
- &xor ($tp8,$acc);
-
- &xor ($s[$i],$tp2);
- &xor ($tp2,$tp8);
- &xor ($s[$i],$tp4);
- &xor ($tp4,$tp8);
- &rotl ($tp2,24);
- &xor ($s[$i],$tp8); # ^= tp8^(tp4^tp1)^(tp2^tp1)
- &rotl ($tp4,16);
- &xor ($s[$i],$tp2); # ^= ROTATE(tp8^tp2^tp1,24)
- &rotl ($tp8,8);
- &xor ($s[$i],$tp4); # ^= ROTATE(tp8^tp4^tp1,16)
- &mov ($s[0],$__s0) if($i==2); #prefetch $s0
- &mov ($s[1],$__s1) if($i==3); #prefetch $s1
- &mov ($s[2],$__s2) if($i==1);
- &xor ($s[$i],$tp8); # ^= ROTATE(tp8,8)
-
- &mov ($s[3],$__s3) if($i==1);
- &mov (&DWP(4+4*$i,"esp"),$s[$i]) if($i>=2);
-}
-
-&function_begin_B("_x86_AES_decrypt_compact");
- # note that caller is expected to allocate stack frame for me!
- &mov ($__key,$key); # save key
-
- &xor ($s0,&DWP(0,$key)); # xor with key
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &mov ($acc,&DWP(240,$key)); # load key->rounds
-
- &lea ($acc,&DWP(-2,$acc,$acc));
- &lea ($acc,&DWP(0,$key,$acc,8));
- &mov ($__end,$acc); # end of key schedule
-
- # prefetch Td4
- &mov ($key,&DWP(0-128,$tbl));
- &mov ($acc,&DWP(32-128,$tbl));
- &mov ($key,&DWP(64-128,$tbl));
- &mov ($acc,&DWP(96-128,$tbl));
- &mov ($key,&DWP(128-128,$tbl));
- &mov ($acc,&DWP(160-128,$tbl));
- &mov ($key,&DWP(192-128,$tbl));
- &mov ($acc,&DWP(224-128,$tbl));
-
- &set_label("loop",16);
-
- &deccompact(0,$tbl,$s0,$s3,$s2,$s1,1);
- &deccompact(1,$tbl,$s1,$s0,$s3,$s2,1);
- &deccompact(2,$tbl,$s2,$s1,$s0,$s3,1);
- &deccompact(3,$tbl,$s3,$s2,$s1,$s0,1);
- &dectransform(2);
- &dectransform(3);
- &dectransform(0);
- &dectransform(1);
- &mov ($key,$__key);
- &mov ($tbl,$__tbl);
- &add ($key,16); # advance rd_key
- &xor ($s0,&DWP(0,$key));
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &cmp ($key,$__end);
- &mov ($__key,$key);
- &jb (&label("loop"));
-
- &deccompact(0,$tbl,$s0,$s3,$s2,$s1);
- &deccompact(1,$tbl,$s1,$s0,$s3,$s2);
- &deccompact(2,$tbl,$s2,$s1,$s0,$s3);
- &deccompact(3,$tbl,$s3,$s2,$s1,$s0);
-
- &xor ($s0,&DWP(16,$key));
- &xor ($s1,&DWP(20,$key));
- &xor ($s2,&DWP(24,$key));
- &xor ($s3,&DWP(28,$key));
-
- &ret ();
-&function_end_B("_x86_AES_decrypt_compact");
-
-######################################################################
-# "Compact" SSE block function.
-######################################################################
-
-sub sse_deccompact()
-{
- &pshufw ("mm1","mm0",0x0c); # 7, 6, 1, 0
- &pshufw ("mm5","mm4",0x09); # 13,12,11,10
- &movd ("eax","mm1"); # 7, 6, 1, 0
- &movd ("ebx","mm5"); # 13,12,11,10
- &mov ($__key,$key);
-
- &movz ($acc,&LB("eax")); # 0
- &movz ("edx",&HB("eax")); # 1
- &pshufw ("mm2","mm0",0x06); # 3, 2, 5, 4
- &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 0
- &movz ($key,&LB("ebx")); # 10
- &movz ("edx",&BP(-128,$tbl,"edx",1)); # 1
- &shr ("eax",16); # 7, 6
- &shl ("edx",8); # 1
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 10
- &movz ($key,&HB("ebx")); # 11
- &shl ($acc,16); # 10
- &pshufw ("mm6","mm4",0x03); # 9, 8,15,14
- &or ("ecx",$acc); # 10
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 11
- &movz ($key,&HB("eax")); # 7
- &shl ($acc,24); # 11
- &shr ("ebx",16); # 13,12
- &or ("edx",$acc); # 11
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 7
- &movz ($key,&HB("ebx")); # 13
- &shl ($acc,24); # 7
- &or ("ecx",$acc); # 7
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 13
- &movz ($key,&LB("eax")); # 6
- &shl ($acc,8); # 13
- &movd ("eax","mm2"); # 3, 2, 5, 4
- &or ("ecx",$acc); # 13
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 6
- &movz ($key,&LB("ebx")); # 12
- &shl ($acc,16); # 6
- &movd ("ebx","mm6"); # 9, 8,15,14
- &movd ("mm0","ecx"); # t[0] collected
- &movz ("ecx",&BP(-128,$tbl,$key,1)); # 12
- &movz ($key,&LB("eax")); # 4
- &or ("ecx",$acc); # 12
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 4
- &movz ($key,&LB("ebx")); # 14
- &or ("edx",$acc); # 4
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 14
- &movz ($key,&HB("eax")); # 5
- &shl ($acc,16); # 14
- &shr ("eax",16); # 3, 2
- &or ("edx",$acc); # 14
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 5
- &movz ($key,&HB("ebx")); # 15
- &shr ("ebx",16); # 9, 8
- &shl ($acc,8); # 5
- &movd ("mm1","edx"); # t[1] collected
- &movz ("edx",&BP(-128,$tbl,$key,1)); # 15
- &movz ($key,&HB("ebx")); # 9
- &shl ("edx",24); # 15
- &and ("ebx",0xff); # 8
- &or ("edx",$acc); # 15
-
- &punpckldq ("mm0","mm1"); # t[0,1] collected
-
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 9
- &movz ($key,&LB("eax")); # 2
- &shl ($acc,8); # 9
- &movz ("eax",&HB("eax")); # 3
- &movz ("ebx",&BP(-128,$tbl,"ebx",1)); # 8
- &or ("ecx",$acc); # 9
- &movz ($acc,&BP(-128,$tbl,$key,1)); # 2
- &or ("edx","ebx"); # 8
- &shl ($acc,16); # 2
- &movz ("eax",&BP(-128,$tbl,"eax",1)); # 3
- &or ("edx",$acc); # 2
- &shl ("eax",24); # 3
- &or ("ecx","eax"); # 3
- &mov ($key,$__key);
- &movd ("mm4","edx"); # t[2] collected
- &movd ("mm5","ecx"); # t[3] collected
-
- &punpckldq ("mm4","mm5"); # t[2,3] collected
-}
-
- if (!$x86only) {
-&function_begin_B("_sse_AES_decrypt_compact");
- &pxor ("mm0",&QWP(0,$key)); # 7, 6, 5, 4, 3, 2, 1, 0
- &pxor ("mm4",&QWP(8,$key)); # 15,14,13,12,11,10, 9, 8
-
- # note that caller is expected to allocate stack frame for me!
- &mov ($acc,&DWP(240,$key)); # load key->rounds
- &lea ($acc,&DWP(-2,$acc,$acc));
- &lea ($acc,&DWP(0,$key,$acc,8));
- &mov ($__end,$acc); # end of key schedule
-
- &mov ($s0,0x1b1b1b1b); # magic constant
- &mov (&DWP(8,"esp"),$s0);
- &mov (&DWP(12,"esp"),$s0);
-
- # prefetch Td4
- &mov ($s0,&DWP(0-128,$tbl));
- &mov ($s1,&DWP(32-128,$tbl));
- &mov ($s2,&DWP(64-128,$tbl));
- &mov ($s3,&DWP(96-128,$tbl));
- &mov ($s0,&DWP(128-128,$tbl));
- &mov ($s1,&DWP(160-128,$tbl));
- &mov ($s2,&DWP(192-128,$tbl));
- &mov ($s3,&DWP(224-128,$tbl));
-
- &set_label("loop",16);
- &sse_deccompact();
- &add ($key,16);
- &cmp ($key,$__end);
- &ja (&label("out"));
-
- # ROTATE(x^y,N) == ROTATE(x,N)^ROTATE(y,N)
- &movq ("mm3","mm0"); &movq ("mm7","mm4");
- &movq ("mm2","mm0",1); &movq ("mm6","mm4",1);
- &movq ("mm1","mm0"); &movq ("mm5","mm4");
- &pshufw ("mm0","mm0",0xb1); &pshufw ("mm4","mm4",0xb1);# = ROTATE(tp0,16)
- &pslld ("mm2",8); &pslld ("mm6",8);
- &psrld ("mm3",8); &psrld ("mm7",8);
- &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= tp0<<8
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp0>>8
- &pslld ("mm2",16); &pslld ("mm6",16);
- &psrld ("mm3",16); &psrld ("mm7",16);
- &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= tp0<<24
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp0>>24
-
- &movq ("mm3",&QWP(8,"esp"));
- &pxor ("mm2","mm2"); &pxor ("mm6","mm6");
- &pcmpgtb("mm2","mm1"); &pcmpgtb("mm6","mm5");
- &pand ("mm2","mm3"); &pand ("mm6","mm3");
- &paddb ("mm1","mm1"); &paddb ("mm5","mm5");
- &pxor ("mm1","mm2"); &pxor ("mm5","mm6"); # tp2
- &movq ("mm3","mm1"); &movq ("mm7","mm5");
- &movq ("mm2","mm1"); &movq ("mm6","mm5");
- &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp2
- &pslld ("mm3",24); &pslld ("mm7",24);
- &psrld ("mm2",8); &psrld ("mm6",8);
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp2<<24
- &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= tp2>>8
-
- &movq ("mm2",&QWP(8,"esp"));
- &pxor ("mm3","mm3"); &pxor ("mm7","mm7");
- &pcmpgtb("mm3","mm1"); &pcmpgtb("mm7","mm5");
- &pand ("mm3","mm2"); &pand ("mm7","mm2");
- &paddb ("mm1","mm1"); &paddb ("mm5","mm5");
- &pxor ("mm1","mm3"); &pxor ("mm5","mm7"); # tp4
- &pshufw ("mm3","mm1",0xb1); &pshufw ("mm7","mm5",0xb1);
- &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp4
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= ROTATE(tp4,16)
-
- &pxor ("mm3","mm3"); &pxor ("mm7","mm7");
- &pcmpgtb("mm3","mm1"); &pcmpgtb("mm7","mm5");
- &pand ("mm3","mm2"); &pand ("mm7","mm2");
- &paddb ("mm1","mm1"); &paddb ("mm5","mm5");
- &pxor ("mm1","mm3"); &pxor ("mm5","mm7"); # tp8
- &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp8
- &movq ("mm3","mm1"); &movq ("mm7","mm5");
- &pshufw ("mm2","mm1",0xb1); &pshufw ("mm6","mm5",0xb1);
- &pxor ("mm0","mm2"); &pxor ("mm4","mm6"); # ^= ROTATE(tp8,16)
- &pslld ("mm1",8); &pslld ("mm5",8);
- &psrld ("mm3",8); &psrld ("mm7",8);
- &movq ("mm2",&QWP(0,$key)); &movq ("mm6",&QWP(8,$key));
- &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp8<<8
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp8>>8
- &mov ($s0,&DWP(0-128,$tbl));
- &pslld ("mm1",16); &pslld ("mm5",16);
- &mov ($s1,&DWP(64-128,$tbl));
- &psrld ("mm3",16); &psrld ("mm7",16);
- &mov ($s2,&DWP(128-128,$tbl));
- &pxor ("mm0","mm1"); &pxor ("mm4","mm5"); # ^= tp8<<24
- &mov ($s3,&DWP(192-128,$tbl));
- &pxor ("mm0","mm3"); &pxor ("mm4","mm7"); # ^= tp8>>24
-
- &pxor ("mm0","mm2"); &pxor ("mm4","mm6");
- &jmp (&label("loop"));
-
- &set_label("out",16);
- &pxor ("mm0",&QWP(0,$key));
- &pxor ("mm4",&QWP(8,$key));
-
- &ret ();
-&function_end_B("_sse_AES_decrypt_compact");
- }
-
-######################################################################
-# Vanilla block function.
-######################################################################
-
-sub decstep()
-{ my ($i,$td,@s) = @_;
- my $tmp = $key;
- my $out = $i==3?$s[0]:$acc;
-
- # no instructions are reordered, as performance appears
- # optimal... or rather that all attempts to reorder didn't
- # result in better performance [which by the way is not a
- # bit lower than encryption].
- if($i==3) { &mov ($key,$__key); }
- else { &mov ($out,$s[0]); }
- &and ($out,0xFF);
- &mov ($out,&DWP(0,$td,$out,8));
-
- if ($i==3) { $tmp=$s[1]; }
- &movz ($tmp,&HB($s[1]));
- &xor ($out,&DWP(3,$td,$tmp,8));
-
- if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
- else { &mov ($tmp,$s[2]); }
- &shr ($tmp,16);
- &and ($tmp,0xFF);
- &xor ($out,&DWP(2,$td,$tmp,8));
-
- if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }
- else { &mov ($tmp,$s[3]); }
- &shr ($tmp,24);
- &xor ($out,&DWP(1,$td,$tmp,8));
- if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
- if ($i==3) { &mov ($s[3],$__s0); }
- &comment();
-}
-
-sub declast()
-{ my ($i,$td,@s)=@_;
- my $tmp = $key;
- my $out = $i==3?$s[0]:$acc;
-
- if($i==0) { &lea ($td,&DWP(2048+128,$td));
- &mov ($tmp,&DWP(0-128,$td));
- &mov ($acc,&DWP(32-128,$td));
- &mov ($tmp,&DWP(64-128,$td));
- &mov ($acc,&DWP(96-128,$td));
- &mov ($tmp,&DWP(128-128,$td));
- &mov ($acc,&DWP(160-128,$td));
- &mov ($tmp,&DWP(192-128,$td));
- &mov ($acc,&DWP(224-128,$td));
- &lea ($td,&DWP(-128,$td)); }
- if($i==3) { &mov ($key,$__key); }
- else { &mov ($out,$s[0]); }
- &and ($out,0xFF);
- &movz ($out,&BP(0,$td,$out,1));
-
- if ($i==3) { $tmp=$s[1]; }
- &movz ($tmp,&HB($s[1]));
- &movz ($tmp,&BP(0,$td,$tmp,1));
- &shl ($tmp,8);
- &xor ($out,$tmp);
-
- if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
- else { mov ($tmp,$s[2]); }
- &shr ($tmp,16);
- &and ($tmp,0xFF);
- &movz ($tmp,&BP(0,$td,$tmp,1));
- &shl ($tmp,16);
- &xor ($out,$tmp);
-
- if ($i==3) { $tmp=$s[3]; &mov ($s[2],$__s1); }
- else { &mov ($tmp,$s[3]); }
- &shr ($tmp,24);
- &movz ($tmp,&BP(0,$td,$tmp,1));
- &shl ($tmp,24);
- &xor ($out,$tmp);
- if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
- if ($i==3) { &mov ($s[3],$__s0);
- &lea ($td,&DWP(-2048,$td)); }
-}
-
-&function_begin_B("_x86_AES_decrypt");
- # note that caller is expected to allocate stack frame for me!
- &mov ($__key,$key); # save key
-
- &xor ($s0,&DWP(0,$key)); # xor with key
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &mov ($acc,&DWP(240,$key)); # load key->rounds
-
- if ($small_footprint) {
- &lea ($acc,&DWP(-2,$acc,$acc));
- &lea ($acc,&DWP(0,$key,$acc,8));
- &mov ($__end,$acc); # end of key schedule
- &set_label("loop",16);
- &decstep(0,$tbl,$s0,$s3,$s2,$s1);
- &decstep(1,$tbl,$s1,$s0,$s3,$s2);
- &decstep(2,$tbl,$s2,$s1,$s0,$s3);
- &decstep(3,$tbl,$s3,$s2,$s1,$s0);
- &add ($key,16); # advance rd_key
- &xor ($s0,&DWP(0,$key));
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
- &cmp ($key,$__end);
- &mov ($__key,$key);
- &jb (&label("loop"));
- }
- else {
- &cmp ($acc,10);
- &jle (&label("10rounds"));
- &cmp ($acc,12);
- &jle (&label("12rounds"));
-
- &set_label("14rounds",4);
- for ($i=1;$i<3;$i++) {
- &decstep(0,$tbl,$s0,$s3,$s2,$s1);
- &decstep(1,$tbl,$s1,$s0,$s3,$s2);
- &decstep(2,$tbl,$s2,$s1,$s0,$s3);
- &decstep(3,$tbl,$s3,$s2,$s1,$s0);
- &xor ($s0,&DWP(16*$i+0,$key));
- &xor ($s1,&DWP(16*$i+4,$key));
- &xor ($s2,&DWP(16*$i+8,$key));
- &xor ($s3,&DWP(16*$i+12,$key));
- }
- &add ($key,32);
- &mov ($__key,$key); # advance rd_key
- &set_label("12rounds",4);
- for ($i=1;$i<3;$i++) {
- &decstep(0,$tbl,$s0,$s3,$s2,$s1);
- &decstep(1,$tbl,$s1,$s0,$s3,$s2);
- &decstep(2,$tbl,$s2,$s1,$s0,$s3);
- &decstep(3,$tbl,$s3,$s2,$s1,$s0);
- &xor ($s0,&DWP(16*$i+0,$key));
- &xor ($s1,&DWP(16*$i+4,$key));
- &xor ($s2,&DWP(16*$i+8,$key));
- &xor ($s3,&DWP(16*$i+12,$key));
- }
- &add ($key,32);
- &mov ($__key,$key); # advance rd_key
- &set_label("10rounds",4);
- for ($i=1;$i<10;$i++) {
- &decstep(0,$tbl,$s0,$s3,$s2,$s1);
- &decstep(1,$tbl,$s1,$s0,$s3,$s2);
- &decstep(2,$tbl,$s2,$s1,$s0,$s3);
- &decstep(3,$tbl,$s3,$s2,$s1,$s0);
- &xor ($s0,&DWP(16*$i+0,$key));
- &xor ($s1,&DWP(16*$i+4,$key));
- &xor ($s2,&DWP(16*$i+8,$key));
- &xor ($s3,&DWP(16*$i+12,$key));
- }
- }
-
- &declast(0,$tbl,$s0,$s3,$s2,$s1);
- &declast(1,$tbl,$s1,$s0,$s3,$s2);
- &declast(2,$tbl,$s2,$s1,$s0,$s3);
- &declast(3,$tbl,$s3,$s2,$s1,$s0);
-
- &add ($key,$small_footprint?16:160);
- &xor ($s0,&DWP(0,$key));
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &ret ();
-
-&set_label("AES_Td",64); # Yes! I keep it in the code segment!
- &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
- &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
- &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
- &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
- &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
- &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
- &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
- &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
- &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
- &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
- &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
- &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
- &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
- &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
- &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
- &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
- &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
- &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
- &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
- &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
- &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
- &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
- &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
- &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
- &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
- &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
- &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
- &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
- &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
- &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
- &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
- &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
- &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
- &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
- &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
- &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
- &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
- &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
- &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
- &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
- &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
- &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
- &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
- &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
- &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
- &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
- &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
- &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
- &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
- &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
- &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
- &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
- &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
- &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
- &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
- &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
- &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
- &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
- &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
- &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
- &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
- &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
- &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
- &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
-
-#Td4: # four copies of Td4 to choose from to avoid L1 aliasing
- &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
- &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
- &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
- &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
- &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
- &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
- &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
- &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
- &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
- &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
- &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
- &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
- &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
- &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
- &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
- &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
- &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
- &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
- &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
- &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
- &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
- &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
- &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
- &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
- &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
- &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
- &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
- &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
- &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
- &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
- &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
- &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-
- &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
- &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
- &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
- &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
- &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
- &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
- &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
- &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
- &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
- &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
- &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
- &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
- &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
- &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
- &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
- &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
- &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
- &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
- &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
- &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
- &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
- &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
- &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
- &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
- &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
- &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
- &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
- &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
- &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
- &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
- &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
- &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-
- &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
- &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
- &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
- &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
- &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
- &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
- &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
- &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
- &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
- &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
- &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
- &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
- &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
- &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
- &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
- &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
- &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
- &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
- &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
- &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
- &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
- &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
- &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
- &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
- &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
- &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
- &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
- &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
- &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
- &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
- &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
- &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-
- &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
- &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
- &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
- &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
- &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
- &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
- &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
- &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
- &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
- &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
- &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
- &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
- &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
- &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
- &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
- &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
- &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
- &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
- &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
- &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
- &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
- &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
- &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
- &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
- &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
- &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
- &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
- &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
- &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
- &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
- &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
- &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-&function_end_B("_x86_AES_decrypt");
-
-# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
-&function_begin("AES_decrypt");
- &mov ($acc,&wparam(0)); # load inp
- &mov ($key,&wparam(2)); # load key
-
- &mov ($s0,"esp");
- &sub ("esp",36);
- &and ("esp",-64); # align to cache-line
-
- # place stack frame just "above" the key schedule
- &lea ($s1,&DWP(-64-63,$key));
- &sub ($s1,"esp");
- &neg ($s1);
- &and ($s1,0x3C0); # modulo 1024, but aligned to cache-line
- &sub ("esp",$s1);
- &add ("esp",4); # 4 is reserved for caller's return address
- &mov ($_esp,$s0); # save stack pointer
-
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
- &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl));
-
- # pick Td4 copy which can't "overlap" with stack frame or key schedule
- &lea ($s1,&DWP(768-4,"esp"));
- &sub ($s1,$tbl);
- &and ($s1,0x300);
- &lea ($tbl,&DWP(2048+128,$tbl,$s1));
-
- if (!$x86only) {
- &bt (&DWP(0,$s0),25); # check for SSE bit
- &jnc (&label("x86"));
-
- &movq ("mm0",&QWP(0,$acc));
- &movq ("mm4",&QWP(8,$acc));
- &call ("_sse_AES_decrypt_compact");
- &mov ("esp",$_esp); # restore stack pointer
- &mov ($acc,&wparam(1)); # load out
- &movq (&QWP(0,$acc),"mm0"); # write output data
- &movq (&QWP(8,$acc),"mm4");
- &emms ();
- &function_end_A();
- }
- &set_label("x86",16);
- &mov ($_tbl,$tbl);
- &mov ($s0,&DWP(0,$acc)); # load input data
- &mov ($s1,&DWP(4,$acc));
- &mov ($s2,&DWP(8,$acc));
- &mov ($s3,&DWP(12,$acc));
- &call ("_x86_AES_decrypt_compact");
- &mov ("esp",$_esp); # restore stack pointer
- &mov ($acc,&wparam(1)); # load out
- &mov (&DWP(0,$acc),$s0); # write output data
- &mov (&DWP(4,$acc),$s1);
- &mov (&DWP(8,$acc),$s2);
- &mov (&DWP(12,$acc),$s3);
-&function_end("AES_decrypt");
-
-# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
-# size_t length, const AES_KEY *key,
-# unsigned char *ivp,const int enc);
-{
-# stack frame layout
-# -4(%esp) # return address 0(%esp)
-# 0(%esp) # s0 backing store 4(%esp)
-# 4(%esp) # s1 backing store 8(%esp)
-# 8(%esp) # s2 backing store 12(%esp)
-# 12(%esp) # s3 backing store 16(%esp)
-# 16(%esp) # key backup 20(%esp)
-# 20(%esp) # end of key schedule 24(%esp)
-# 24(%esp) # %ebp backup 28(%esp)
-# 28(%esp) # %esp backup
-my $_inp=&DWP(32,"esp"); # copy of wparam(0)
-my $_out=&DWP(36,"esp"); # copy of wparam(1)
-my $_len=&DWP(40,"esp"); # copy of wparam(2)
-my $_key=&DWP(44,"esp"); # copy of wparam(3)
-my $_ivp=&DWP(48,"esp"); # copy of wparam(4)
-my $_tmp=&DWP(52,"esp"); # volatile variable
-#
-my $ivec=&DWP(60,"esp"); # ivec[16]
-my $aes_key=&DWP(76,"esp"); # copy of aes_key
-my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds
-
-&function_begin("AES_cbc_encrypt");
- &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len
- &cmp ($s2,0);
- &je (&label("drop_out"));
-
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
-
- &cmp (&wparam(5),0);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
- &jne (&label("picked_te"));
- &lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl));
- &set_label("picked_te");
-
- # one can argue if this is required
- &pushf ();
- &cld ();
-
- &cmp ($s2,$speed_limit);
- &jb (&label("slow_way"));
- &test ($s2,15);
- &jnz (&label("slow_way"));
- if (!$x86only) {
- &bt (&DWP(0,$s0),28); # check for hyper-threading bit
- &jc (&label("slow_way"));
- }
- # pre-allocate aligned stack frame...
- &lea ($acc,&DWP(-80-244,"esp"));
- &and ($acc,-64);
-
- # ... and make sure it doesn't alias with $tbl modulo 4096
- &mov ($s0,$tbl);
- &lea ($s1,&DWP(2048+256,$tbl));
- &mov ($s3,$acc);
- &and ($s0,0xfff); # s = %ebp&0xfff
- &and ($s1,0xfff); # e = (%ebp+2048+256)&0xfff
- &and ($s3,0xfff); # p = %esp&0xfff
-
- &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
- &jb (&label("tbl_break_out"));
- &sub ($s3,$s1);
- &sub ($acc,$s3);
- &jmp (&label("tbl_ok"));
- &set_label("tbl_break_out",4); # else %esp -= (p-s)&0xfff + framesz;
- &sub ($s3,$s0);
- &and ($s3,0xfff);
- &add ($s3,384);
- &sub ($acc,$s3);
- &set_label("tbl_ok",4);
-
- &lea ($s3,&wparam(0)); # obtain pointer to parameter block
- &exch ("esp",$acc); # allocate stack frame
- &add ("esp",4); # reserve for return address!
- &mov ($_tbl,$tbl); # save %ebp
- &mov ($_esp,$acc); # save %esp
-
- &mov ($s0,&DWP(0,$s3)); # load inp
- &mov ($s1,&DWP(4,$s3)); # load out
- #&mov ($s2,&DWP(8,$s3)); # load len
- &mov ($key,&DWP(12,$s3)); # load key
- &mov ($acc,&DWP(16,$s3)); # load ivp
- &mov ($s3,&DWP(20,$s3)); # load enc flag
-
- &mov ($_inp,$s0); # save copy of inp
- &mov ($_out,$s1); # save copy of out
- &mov ($_len,$s2); # save copy of len
- &mov ($_key,$key); # save copy of key
- &mov ($_ivp,$acc); # save copy of ivp
-
- &mov ($mark,0); # copy of aes_key->rounds = 0;
- # do we copy key schedule to stack?
- &mov ($s1 eq "ebx" ? $s1 : "",$key);
- &mov ($s2 eq "ecx" ? $s2 : "",244/4);
- &sub ($s1,$tbl);
- &mov ("esi",$key);
- &and ($s1,0xfff);
- &lea ("edi",$aes_key);
- &cmp ($s1,2048+256);
- &jb (&label("do_copy"));
- &cmp ($s1,4096-244);
- &jb (&label("skip_copy"));
- &set_label("do_copy",4);
- &mov ($_key,"edi");
- &data_word(0xA5F3F689); # rep movsd
- &set_label("skip_copy");
-
- &mov ($key,16);
- &set_label("prefetch_tbl",4);
- &mov ($s0,&DWP(0,$tbl));
- &mov ($s1,&DWP(32,$tbl));
- &mov ($s2,&DWP(64,$tbl));
- &mov ($acc,&DWP(96,$tbl));
- &lea ($tbl,&DWP(128,$tbl));
- &sub ($key,1);
- &jnz (&label("prefetch_tbl"));
- &sub ($tbl,2048);
-
- &mov ($acc,$_inp);
- &mov ($key,$_ivp);
-
- &cmp ($s3,0);
- &je (&label("fast_decrypt"));
-
-#----------------------------- ENCRYPT -----------------------------#
- &mov ($s0,&DWP(0,$key)); # load iv
- &mov ($s1,&DWP(4,$key));
-
- &set_label("fast_enc_loop",16);
- &mov ($s2,&DWP(8,$key));
- &mov ($s3,&DWP(12,$key));
-
- &xor ($s0,&DWP(0,$acc)); # xor input data
- &xor ($s1,&DWP(4,$acc));
- &xor ($s2,&DWP(8,$acc));
- &xor ($s3,&DWP(12,$acc));
-
- &mov ($key,$_key); # load key
- &call ("_x86_AES_encrypt");
-
- &mov ($acc,$_inp); # load inp
- &mov ($key,$_out); # load out
-
- &mov (&DWP(0,$key),$s0); # save output data
- &mov (&DWP(4,$key),$s1);
- &mov (&DWP(8,$key),$s2);
- &mov (&DWP(12,$key),$s3);
-
- &lea ($acc,&DWP(16,$acc)); # advance inp
- &mov ($s2,$_len); # load len
- &mov ($_inp,$acc); # save inp
- &lea ($s3,&DWP(16,$key)); # advance out
- &mov ($_out,$s3); # save out
- &sub ($s2,16); # decrease len
- &mov ($_len,$s2); # save len
- &jnz (&label("fast_enc_loop"));
- &mov ($acc,$_ivp); # load ivp
- &mov ($s2,&DWP(8,$key)); # restore last 2 dwords
- &mov ($s3,&DWP(12,$key));
- &mov (&DWP(0,$acc),$s0); # save ivec
- &mov (&DWP(4,$acc),$s1);
- &mov (&DWP(8,$acc),$s2);
- &mov (&DWP(12,$acc),$s3);
-
- &cmp ($mark,0); # was the key schedule copied?
- &mov ("edi",$_key);
- &je (&label("skip_ezero"));
- # zero copy of key schedule
- &mov ("ecx",240/4);
- &xor ("eax","eax");
- &align (4);
- &data_word(0xABF3F689); # rep stosd
- &set_label("skip_ezero");
- &mov ("esp",$_esp);
- &popf ();
- &set_label("drop_out");
- &function_end_A();
- &pushf (); # kludge, never executed
-
-#----------------------------- DECRYPT -----------------------------#
-&set_label("fast_decrypt",16);
-
- &cmp ($acc,$_out);
- &je (&label("fast_dec_in_place")); # in-place processing...
-
- &mov ($_tmp,$key);
-
- &align (4);
- &set_label("fast_dec_loop",16);
- &mov ($s0,&DWP(0,$acc)); # read input
- &mov ($s1,&DWP(4,$acc));
- &mov ($s2,&DWP(8,$acc));
- &mov ($s3,&DWP(12,$acc));
-
- &mov ($key,$_key); # load key
- &call ("_x86_AES_decrypt");
-
- &mov ($key,$_tmp); # load ivp
- &mov ($acc,$_len); # load len
- &xor ($s0,&DWP(0,$key)); # xor iv
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &mov ($key,$_out); # load out
- &mov ($acc,$_inp); # load inp
-
- &mov (&DWP(0,$key),$s0); # write output
- &mov (&DWP(4,$key),$s1);
- &mov (&DWP(8,$key),$s2);
- &mov (&DWP(12,$key),$s3);
-
- &mov ($s2,$_len); # load len
- &mov ($_tmp,$acc); # save ivp
- &lea ($acc,&DWP(16,$acc)); # advance inp
- &mov ($_inp,$acc); # save inp
- &lea ($key,&DWP(16,$key)); # advance out
- &mov ($_out,$key); # save out
- &sub ($s2,16); # decrease len
- &mov ($_len,$s2); # save len
- &jnz (&label("fast_dec_loop"));
- &mov ($key,$_tmp); # load temp ivp
- &mov ($acc,$_ivp); # load user ivp
- &mov ($s0,&DWP(0,$key)); # load iv
- &mov ($s1,&DWP(4,$key));
- &mov ($s2,&DWP(8,$key));
- &mov ($s3,&DWP(12,$key));
- &mov (&DWP(0,$acc),$s0); # copy back to user
- &mov (&DWP(4,$acc),$s1);
- &mov (&DWP(8,$acc),$s2);
- &mov (&DWP(12,$acc),$s3);
- &jmp (&label("fast_dec_out"));
-
- &set_label("fast_dec_in_place",16);
- &set_label("fast_dec_in_place_loop");
- &mov ($s0,&DWP(0,$acc)); # read input
- &mov ($s1,&DWP(4,$acc));
- &mov ($s2,&DWP(8,$acc));
- &mov ($s3,&DWP(12,$acc));
-
- &lea ($key,$ivec);
- &mov (&DWP(0,$key),$s0); # copy to temp
- &mov (&DWP(4,$key),$s1);
- &mov (&DWP(8,$key),$s2);
- &mov (&DWP(12,$key),$s3);
-
- &mov ($key,$_key); # load key
- &call ("_x86_AES_decrypt");
-
- &mov ($key,$_ivp); # load ivp
- &mov ($acc,$_out); # load out
- &xor ($s0,&DWP(0,$key)); # xor iv
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &mov (&DWP(0,$acc),$s0); # write output
- &mov (&DWP(4,$acc),$s1);
- &mov (&DWP(8,$acc),$s2);
- &mov (&DWP(12,$acc),$s3);
-
- &lea ($acc,&DWP(16,$acc)); # advance out
- &mov ($_out,$acc); # save out
-
- &lea ($acc,$ivec);
- &mov ($s0,&DWP(0,$acc)); # read temp
- &mov ($s1,&DWP(4,$acc));
- &mov ($s2,&DWP(8,$acc));
- &mov ($s3,&DWP(12,$acc));
-
- &mov (&DWP(0,$key),$s0); # copy iv
- &mov (&DWP(4,$key),$s1);
- &mov (&DWP(8,$key),$s2);
- &mov (&DWP(12,$key),$s3);
-
- &mov ($acc,$_inp); # load inp
- &mov ($s2,$_len); # load len
- &lea ($acc,&DWP(16,$acc)); # advance inp
- &mov ($_inp,$acc); # save inp
- &sub ($s2,16); # decrease len
- &mov ($_len,$s2); # save len
- &jnz (&label("fast_dec_in_place_loop"));
-
- &set_label("fast_dec_out",4);
- &cmp ($mark,0); # was the key schedule copied?
- &mov ("edi",$_key);
- &je (&label("skip_dzero"));
- # zero copy of key schedule
- &mov ("ecx",240/4);
- &xor ("eax","eax");
- &align (4);
- &data_word(0xABF3F689); # rep stosd
- &set_label("skip_dzero");
- &mov ("esp",$_esp);
- &popf ();
- &function_end_A();
- &pushf (); # kludge, never executed
-
-#--------------------------- SLOW ROUTINE ---------------------------#
-&set_label("slow_way",16);
-
- &mov ($s0,&DWP(0,$s0)) if (!$x86only);# load OPENSSL_ia32cap
- &mov ($key,&wparam(3)); # load key
-
- # pre-allocate aligned stack frame...
- &lea ($acc,&DWP(-80,"esp"));
- &and ($acc,-64);
-
- # ... and make sure it doesn't alias with $key modulo 1024
- &lea ($s1,&DWP(-80-63,$key));
- &sub ($s1,$acc);
- &neg ($s1);
- &and ($s1,0x3C0); # modulo 1024, but aligned to cache-line
- &sub ($acc,$s1);
-
- # pick S-box copy which can't overlap with stack frame or $key
- &lea ($s1,&DWP(768,$acc));
- &sub ($s1,$tbl);
- &and ($s1,0x300);
- &lea ($tbl,&DWP(2048+128,$tbl,$s1));
-
- &lea ($s3,&wparam(0)); # pointer to parameter block
-
- &exch ("esp",$acc);
- &add ("esp",4); # reserve for return address!
- &mov ($_tbl,$tbl); # save %ebp
- &mov ($_esp,$acc); # save %esp
- &mov ($_tmp,$s0); # save OPENSSL_ia32cap
-
- &mov ($s0,&DWP(0,$s3)); # load inp
- &mov ($s1,&DWP(4,$s3)); # load out
- #&mov ($s2,&DWP(8,$s3)); # load len
- #&mov ($key,&DWP(12,$s3)); # load key
- &mov ($acc,&DWP(16,$s3)); # load ivp
- &mov ($s3,&DWP(20,$s3)); # load enc flag
-
- &mov ($_inp,$s0); # save copy of inp
- &mov ($_out,$s1); # save copy of out
- &mov ($_len,$s2); # save copy of len
- &mov ($_key,$key); # save copy of key
- &mov ($_ivp,$acc); # save copy of ivp
-
- &mov ($key,$acc);
- &mov ($acc,$s0);
-
- &cmp ($s3,0);
- &je (&label("slow_decrypt"));
-
-#--------------------------- SLOW ENCRYPT ---------------------------#
- &cmp ($s2,16);
- &mov ($s3,$s1);
- &jb (&label("slow_enc_tail"));
-
- if (!$x86only) {
- &bt ($_tmp,25); # check for SSE bit
- &jnc (&label("slow_enc_x86"));
-
- &movq ("mm0",&QWP(0,$key)); # load iv
- &movq ("mm4",&QWP(8,$key));
-
- &set_label("slow_enc_loop_sse",16);
- &pxor ("mm0",&QWP(0,$acc)); # xor input data
- &pxor ("mm4",&QWP(8,$acc));
-
- &mov ($key,$_key);
- &call ("_sse_AES_encrypt_compact");
-
- &mov ($acc,$_inp); # load inp
- &mov ($key,$_out); # load out
- &mov ($s2,$_len); # load len
-
- &movq (&QWP(0,$key),"mm0"); # save output data
- &movq (&QWP(8,$key),"mm4");
-
- &lea ($acc,&DWP(16,$acc)); # advance inp
- &mov ($_inp,$acc); # save inp
- &lea ($s3,&DWP(16,$key)); # advance out
- &mov ($_out,$s3); # save out
- &sub ($s2,16); # decrease len
- &cmp ($s2,16);
- &mov ($_len,$s2); # save len
- &jae (&label("slow_enc_loop_sse"));
- &test ($s2,15);
- &jnz (&label("slow_enc_tail"));
- &mov ($acc,$_ivp); # load ivp
- &movq (&QWP(0,$acc),"mm0"); # save ivec
- &movq (&QWP(8,$acc),"mm4");
- &emms ();
- &mov ("esp",$_esp);
- &popf ();
- &function_end_A();
- &pushf (); # kludge, never executed
- }
- &set_label("slow_enc_x86",16);
- &mov ($s0,&DWP(0,$key)); # load iv
- &mov ($s1,&DWP(4,$key));
-
- &set_label("slow_enc_loop_x86",4);
- &mov ($s2,&DWP(8,$key));
- &mov ($s3,&DWP(12,$key));
-
- &xor ($s0,&DWP(0,$acc)); # xor input data
- &xor ($s1,&DWP(4,$acc));
- &xor ($s2,&DWP(8,$acc));
- &xor ($s3,&DWP(12,$acc));
-
- &mov ($key,$_key); # load key
- &call ("_x86_AES_encrypt_compact");
-
- &mov ($acc,$_inp); # load inp
- &mov ($key,$_out); # load out
-
- &mov (&DWP(0,$key),$s0); # save output data
- &mov (&DWP(4,$key),$s1);
- &mov (&DWP(8,$key),$s2);
- &mov (&DWP(12,$key),$s3);
-
- &mov ($s2,$_len); # load len
- &lea ($acc,&DWP(16,$acc)); # advance inp
- &mov ($_inp,$acc); # save inp
- &lea ($s3,&DWP(16,$key)); # advance out
- &mov ($_out,$s3); # save out
- &sub ($s2,16); # decrease len
- &cmp ($s2,16);
- &mov ($_len,$s2); # save len
- &jae (&label("slow_enc_loop_x86"));
- &test ($s2,15);
- &jnz (&label("slow_enc_tail"));
- &mov ($acc,$_ivp); # load ivp
- &mov ($s2,&DWP(8,$key)); # restore last dwords
- &mov ($s3,&DWP(12,$key));
- &mov (&DWP(0,$acc),$s0); # save ivec
- &mov (&DWP(4,$acc),$s1);
- &mov (&DWP(8,$acc),$s2);
- &mov (&DWP(12,$acc),$s3);
-
- &mov ("esp",$_esp);
- &popf ();
- &function_end_A();
- &pushf (); # kludge, never executed
-
- &set_label("slow_enc_tail",16);
- &emms () if (!$x86only);
- &mov ($key eq "edi"? $key:"",$s3); # load out to edi
- &mov ($s1,16);
- &sub ($s1,$s2);
- &cmp ($key,$acc eq "esi"? $acc:""); # compare with inp
- &je (&label("enc_in_place"));
- &align (4);
- &data_word(0xA4F3F689); # rep movsb # copy input
- &jmp (&label("enc_skip_in_place"));
- &set_label("enc_in_place");
- &lea ($key,&DWP(0,$key,$s2));
- &set_label("enc_skip_in_place");
- &mov ($s2,$s1);
- &xor ($s0,$s0);
- &align (4);
- &data_word(0xAAF3F689); # rep stosb # zero tail
-
- &mov ($key,$_ivp); # restore ivp
- &mov ($acc,$s3); # output as input
- &mov ($s0,&DWP(0,$key));
- &mov ($s1,&DWP(4,$key));
- &mov ($_len,16); # len=16
- &jmp (&label("slow_enc_loop_x86")); # one more spin...
-
-#--------------------------- SLOW DECRYPT ---------------------------#
-&set_label("slow_decrypt",16);
- if (!$x86only) {
- &bt ($_tmp,25); # check for SSE bit
- &jnc (&label("slow_dec_loop_x86"));
-
- &set_label("slow_dec_loop_sse",4);
- &movq ("mm0",&QWP(0,$acc)); # read input
- &movq ("mm4",&QWP(8,$acc));
-
- &mov ($key,$_key);
- &call ("_sse_AES_decrypt_compact");
-
- &mov ($acc,$_inp); # load inp
- &lea ($s0,$ivec);
- &mov ($s1,$_out); # load out
- &mov ($s2,$_len); # load len
- &mov ($key,$_ivp); # load ivp
-
- &movq ("mm1",&QWP(0,$acc)); # re-read input
- &movq ("mm5",&QWP(8,$acc));
-
- &pxor ("mm0",&QWP(0,$key)); # xor iv
- &pxor ("mm4",&QWP(8,$key));
-
- &movq (&QWP(0,$key),"mm1"); # copy input to iv
- &movq (&QWP(8,$key),"mm5");
-
- &sub ($s2,16); # decrease len
- &jc (&label("slow_dec_partial_sse"));
-
- &movq (&QWP(0,$s1),"mm0"); # write output
- &movq (&QWP(8,$s1),"mm4");
-
- &lea ($s1,&DWP(16,$s1)); # advance out
- &mov ($_out,$s1); # save out
- &lea ($acc,&DWP(16,$acc)); # advance inp
- &mov ($_inp,$acc); # save inp
- &mov ($_len,$s2); # save len
- &jnz (&label("slow_dec_loop_sse"));
- &emms ();
- &mov ("esp",$_esp);
- &popf ();
- &function_end_A();
- &pushf (); # kludge, never executed
-
- &set_label("slow_dec_partial_sse",16);
- &movq (&QWP(0,$s0),"mm0"); # save output to temp
- &movq (&QWP(8,$s0),"mm4");
- &emms ();
-
- &add ($s2 eq "ecx" ? "ecx":"",16);
- &mov ("edi",$s1); # out
- &mov ("esi",$s0); # temp
- &align (4);
- &data_word(0xA4F3F689); # rep movsb # copy partial output
-
- &mov ("esp",$_esp);
- &popf ();
- &function_end_A();
- &pushf (); # kludge, never executed
- }
- &set_label("slow_dec_loop_x86",16);
- &mov ($s0,&DWP(0,$acc)); # read input
- &mov ($s1,&DWP(4,$acc));
- &mov ($s2,&DWP(8,$acc));
- &mov ($s3,&DWP(12,$acc));
-
- &lea ($key,$ivec);
- &mov (&DWP(0,$key),$s0); # copy to temp
- &mov (&DWP(4,$key),$s1);
- &mov (&DWP(8,$key),$s2);
- &mov (&DWP(12,$key),$s3);
-
- &mov ($key,$_key); # load key
- &call ("_x86_AES_decrypt_compact");
-
- &mov ($key,$_ivp); # load ivp
- &mov ($acc,$_len); # load len
- &xor ($s0,&DWP(0,$key)); # xor iv
- &xor ($s1,&DWP(4,$key));
- &xor ($s2,&DWP(8,$key));
- &xor ($s3,&DWP(12,$key));
-
- &sub ($acc,16);
- &jc (&label("slow_dec_partial_x86"));
-
- &mov ($_len,$acc); # save len
- &mov ($acc,$_out); # load out
-
- &mov (&DWP(0,$acc),$s0); # write output
- &mov (&DWP(4,$acc),$s1);
- &mov (&DWP(8,$acc),$s2);
- &mov (&DWP(12,$acc),$s3);
-
- &lea ($acc,&DWP(16,$acc)); # advance out
- &mov ($_out,$acc); # save out
-
- &lea ($acc,$ivec);
- &mov ($s0,&DWP(0,$acc)); # read temp
- &mov ($s1,&DWP(4,$acc));
- &mov ($s2,&DWP(8,$acc));
- &mov ($s3,&DWP(12,$acc));
-
- &mov (&DWP(0,$key),$s0); # copy it to iv
- &mov (&DWP(4,$key),$s1);
- &mov (&DWP(8,$key),$s2);
- &mov (&DWP(12,$key),$s3);
-
- &mov ($acc,$_inp); # load inp
- &lea ($acc,&DWP(16,$acc)); # advance inp
- &mov ($_inp,$acc); # save inp
- &jnz (&label("slow_dec_loop_x86"));
- &mov ("esp",$_esp);
- &popf ();
- &function_end_A();
- &pushf (); # kludge, never executed
-
- &set_label("slow_dec_partial_x86",16);
- &lea ($acc,$ivec);
- &mov (&DWP(0,$acc),$s0); # save output to temp
- &mov (&DWP(4,$acc),$s1);
- &mov (&DWP(8,$acc),$s2);
- &mov (&DWP(12,$acc),$s3);
-
- &mov ($acc,$_inp);
- &mov ($s0,&DWP(0,$acc)); # re-read input
- &mov ($s1,&DWP(4,$acc));
- &mov ($s2,&DWP(8,$acc));
- &mov ($s3,&DWP(12,$acc));
-
- &mov (&DWP(0,$key),$s0); # copy it to iv
- &mov (&DWP(4,$key),$s1);
- &mov (&DWP(8,$key),$s2);
- &mov (&DWP(12,$key),$s3);
-
- &mov ("ecx",$_len);
- &mov ("edi",$_out);
- &lea ("esi",$ivec);
- &align (4);
- &data_word(0xA4F3F689); # rep movsb # copy partial output
-
- &mov ("esp",$_esp);
- &popf ();
-&function_end("AES_cbc_encrypt");
-}
-
-#------------------------------------------------------------------#
-
-sub enckey()
-{
- &movz ("esi",&LB("edx")); # rk[i]>>0
- &movz ("ebx",&BP(-128,$tbl,"esi",1));
- &movz ("esi",&HB("edx")); # rk[i]>>8
- &shl ("ebx",24);
- &xor ("eax","ebx");
-
- &movz ("ebx",&BP(-128,$tbl,"esi",1));
- &shr ("edx",16);
- &movz ("esi",&LB("edx")); # rk[i]>>16
- &xor ("eax","ebx");
-
- &movz ("ebx",&BP(-128,$tbl,"esi",1));
- &movz ("esi",&HB("edx")); # rk[i]>>24
- &shl ("ebx",8);
- &xor ("eax","ebx");
-
- &movz ("ebx",&BP(-128,$tbl,"esi",1));
- &shl ("ebx",16);
- &xor ("eax","ebx");
-
- &xor ("eax",&DWP(1024-128,$tbl,"ecx",4)); # rcon
-}
-
-&function_begin("_x86_AES_set_encrypt_key");
- &mov ("esi",&wparam(1)); # user supplied key
- &mov ("edi",&wparam(3)); # private key schedule
-
- &test ("esi",-1);
- &jz (&label("badpointer"));
- &test ("edi",-1);
- &jz (&label("badpointer"));
-
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($tbl);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
- &lea ($tbl,&DWP(2048+128,$tbl));
-
- # prefetch Te4
- &mov ("eax",&DWP(0-128,$tbl));
- &mov ("ebx",&DWP(32-128,$tbl));
- &mov ("ecx",&DWP(64-128,$tbl));
- &mov ("edx",&DWP(96-128,$tbl));
- &mov ("eax",&DWP(128-128,$tbl));
- &mov ("ebx",&DWP(160-128,$tbl));
- &mov ("ecx",&DWP(192-128,$tbl));
- &mov ("edx",&DWP(224-128,$tbl));
-
- &mov ("ecx",&wparam(2)); # number of bits in key
- &cmp ("ecx",128);
- &je (&label("10rounds"));
- &cmp ("ecx",192);
- &je (&label("12rounds"));
- &cmp ("ecx",256);
- &je (&label("14rounds"));
- &mov ("eax",-2); # invalid number of bits
- &jmp (&label("exit"));
-
- &set_label("10rounds");
- &mov ("eax",&DWP(0,"esi")); # copy first 4 dwords
- &mov ("ebx",&DWP(4,"esi"));
- &mov ("ecx",&DWP(8,"esi"));
- &mov ("edx",&DWP(12,"esi"));
- &mov (&DWP(0,"edi"),"eax");
- &mov (&DWP(4,"edi"),"ebx");
- &mov (&DWP(8,"edi"),"ecx");
- &mov (&DWP(12,"edi"),"edx");
-
- &xor ("ecx","ecx");
- &jmp (&label("10shortcut"));
-
- &align (4);
- &set_label("10loop");
- &mov ("eax",&DWP(0,"edi")); # rk[0]
- &mov ("edx",&DWP(12,"edi")); # rk[3]
- &set_label("10shortcut");
- &enckey ();
-
- &mov (&DWP(16,"edi"),"eax"); # rk[4]
- &xor ("eax",&DWP(4,"edi"));
- &mov (&DWP(20,"edi"),"eax"); # rk[5]
- &xor ("eax",&DWP(8,"edi"));
- &mov (&DWP(24,"edi"),"eax"); # rk[6]
- &xor ("eax",&DWP(12,"edi"));
- &mov (&DWP(28,"edi"),"eax"); # rk[7]
- &inc ("ecx");
- &add ("edi",16);
- &cmp ("ecx",10);
- &jl (&label("10loop"));
-
- &mov (&DWP(80,"edi"),10); # setup number of rounds
- &xor ("eax","eax");
- &jmp (&label("exit"));
-
- &set_label("12rounds");
- &mov ("eax",&DWP(0,"esi")); # copy first 6 dwords
- &mov ("ebx",&DWP(4,"esi"));
- &mov ("ecx",&DWP(8,"esi"));
- &mov ("edx",&DWP(12,"esi"));
- &mov (&DWP(0,"edi"),"eax");
- &mov (&DWP(4,"edi"),"ebx");
- &mov (&DWP(8,"edi"),"ecx");
- &mov (&DWP(12,"edi"),"edx");
- &mov ("ecx",&DWP(16,"esi"));
- &mov ("edx",&DWP(20,"esi"));
- &mov (&DWP(16,"edi"),"ecx");
- &mov (&DWP(20,"edi"),"edx");
-
- &xor ("ecx","ecx");
- &jmp (&label("12shortcut"));
-
- &align (4);
- &set_label("12loop");
- &mov ("eax",&DWP(0,"edi")); # rk[0]
- &mov ("edx",&DWP(20,"edi")); # rk[5]
- &set_label("12shortcut");
- &enckey ();
-
- &mov (&DWP(24,"edi"),"eax"); # rk[6]
- &xor ("eax",&DWP(4,"edi"));
- &mov (&DWP(28,"edi"),"eax"); # rk[7]
- &xor ("eax",&DWP(8,"edi"));
- &mov (&DWP(32,"edi"),"eax"); # rk[8]
- &xor ("eax",&DWP(12,"edi"));
- &mov (&DWP(36,"edi"),"eax"); # rk[9]
-
- &cmp ("ecx",7);
- &je (&label("12break"));
- &inc ("ecx");
-
- &xor ("eax",&DWP(16,"edi"));
- &mov (&DWP(40,"edi"),"eax"); # rk[10]
- &xor ("eax",&DWP(20,"edi"));
- &mov (&DWP(44,"edi"),"eax"); # rk[11]
-
- &add ("edi",24);
- &jmp (&label("12loop"));
-
- &set_label("12break");
- &mov (&DWP(72,"edi"),12); # setup number of rounds
- &xor ("eax","eax");
- &jmp (&label("exit"));
-
- &set_label("14rounds");
- &mov ("eax",&DWP(0,"esi")); # copy first 8 dwords
- &mov ("ebx",&DWP(4,"esi"));
- &mov ("ecx",&DWP(8,"esi"));
- &mov ("edx",&DWP(12,"esi"));
- &mov (&DWP(0,"edi"),"eax");
- &mov (&DWP(4,"edi"),"ebx");
- &mov (&DWP(8,"edi"),"ecx");
- &mov (&DWP(12,"edi"),"edx");
- &mov ("eax",&DWP(16,"esi"));
- &mov ("ebx",&DWP(20,"esi"));
- &mov ("ecx",&DWP(24,"esi"));
- &mov ("edx",&DWP(28,"esi"));
- &mov (&DWP(16,"edi"),"eax");
- &mov (&DWP(20,"edi"),"ebx");
- &mov (&DWP(24,"edi"),"ecx");
- &mov (&DWP(28,"edi"),"edx");
-
- &xor ("ecx","ecx");
- &jmp (&label("14shortcut"));
-
- &align (4);
- &set_label("14loop");
- &mov ("edx",&DWP(28,"edi")); # rk[7]
- &set_label("14shortcut");
- &mov ("eax",&DWP(0,"edi")); # rk[0]
-
- &enckey ();
-
- &mov (&DWP(32,"edi"),"eax"); # rk[8]
- &xor ("eax",&DWP(4,"edi"));
- &mov (&DWP(36,"edi"),"eax"); # rk[9]
- &xor ("eax",&DWP(8,"edi"));
- &mov (&DWP(40,"edi"),"eax"); # rk[10]
- &xor ("eax",&DWP(12,"edi"));
- &mov (&DWP(44,"edi"),"eax"); # rk[11]
-
- &cmp ("ecx",6);
- &je (&label("14break"));
- &inc ("ecx");
-
- &mov ("edx","eax");
- &mov ("eax",&DWP(16,"edi")); # rk[4]
- &movz ("esi",&LB("edx")); # rk[11]>>0
- &movz ("ebx",&BP(-128,$tbl,"esi",1));
- &movz ("esi",&HB("edx")); # rk[11]>>8
- &xor ("eax","ebx");
-
- &movz ("ebx",&BP(-128,$tbl,"esi",1));
- &shr ("edx",16);
- &shl ("ebx",8);
- &movz ("esi",&LB("edx")); # rk[11]>>16
- &xor ("eax","ebx");
-
- &movz ("ebx",&BP(-128,$tbl,"esi",1));
- &movz ("esi",&HB("edx")); # rk[11]>>24
- &shl ("ebx",16);
- &xor ("eax","ebx");
-
- &movz ("ebx",&BP(-128,$tbl,"esi",1));
- &shl ("ebx",24);
- &xor ("eax","ebx");
-
- &mov (&DWP(48,"edi"),"eax"); # rk[12]
- &xor ("eax",&DWP(20,"edi"));
- &mov (&DWP(52,"edi"),"eax"); # rk[13]
- &xor ("eax",&DWP(24,"edi"));
- &mov (&DWP(56,"edi"),"eax"); # rk[14]
- &xor ("eax",&DWP(28,"edi"));
- &mov (&DWP(60,"edi"),"eax"); # rk[15]
-
- &add ("edi",32);
- &jmp (&label("14loop"));
-
- &set_label("14break");
- &mov (&DWP(48,"edi"),14); # setup number of rounds
- &xor ("eax","eax");
- &jmp (&label("exit"));
-
- &set_label("badpointer");
- &mov ("eax",-1);
- &set_label("exit");
-&function_end("_x86_AES_set_encrypt_key");
-
-# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
-# AES_KEY *key)
-&function_begin_B("AES_set_encrypt_key");
- &call ("_x86_AES_set_encrypt_key");
- &ret ();
-&function_end_B("AES_set_encrypt_key");
-
-sub deckey()
-{ my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_;
- my $tmp = $tbl;
-
- &mov ($tmp,0x80808080);
- &and ($tmp,$tp1);
- &lea ($tp2,&DWP(0,$tp1,$tp1));
- &mov ($acc,$tmp);
- &shr ($tmp,7);
- &sub ($acc,$tmp);
- &and ($tp2,0xfefefefe);
- &and ($acc,0x1b1b1b1b);
- &xor ($tp2,$acc);
- &mov ($tmp,0x80808080);
-
- &and ($tmp,$tp2);
- &lea ($tp4,&DWP(0,$tp2,$tp2));
- &mov ($acc,$tmp);
- &shr ($tmp,7);
- &sub ($acc,$tmp);
- &and ($tp4,0xfefefefe);
- &and ($acc,0x1b1b1b1b);
- &xor ($tp2,$tp1); # tp2^tp1
- &xor ($tp4,$acc);
- &mov ($tmp,0x80808080);
-
- &and ($tmp,$tp4);
- &lea ($tp8,&DWP(0,$tp4,$tp4));
- &mov ($acc,$tmp);
- &shr ($tmp,7);
- &xor ($tp4,$tp1); # tp4^tp1
- &sub ($acc,$tmp);
- &and ($tp8,0xfefefefe);
- &and ($acc,0x1b1b1b1b);
- &rotl ($tp1,8); # = ROTATE(tp1,8)
- &xor ($tp8,$acc);
-
- &mov ($tmp,&DWP(4*($i+1),$key)); # modulo-scheduled load
-
- &xor ($tp1,$tp2);
- &xor ($tp2,$tp8);
- &xor ($tp1,$tp4);
- &rotl ($tp2,24);
- &xor ($tp4,$tp8);
- &xor ($tp1,$tp8); # ^= tp8^(tp4^tp1)^(tp2^tp1)
- &rotl ($tp4,16);
- &xor ($tp1,$tp2); # ^= ROTATE(tp8^tp2^tp1,24)
- &rotl ($tp8,8);
- &xor ($tp1,$tp4); # ^= ROTATE(tp8^tp4^tp1,16)
- &mov ($tp2,$tmp);
- &xor ($tp1,$tp8); # ^= ROTATE(tp8,8)
-
- &mov (&DWP(4*$i,$key),$tp1);
-}
-
-# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
-# AES_KEY *key)
-&function_begin_B("AES_set_decrypt_key");
- &call ("_x86_AES_set_encrypt_key");
- &cmp ("eax",0);
- &je (&label("proceed"));
- &ret ();
-
- &set_label("proceed");
- &push ("ebp");
- &push ("ebx");
- &push ("esi");
- &push ("edi");
-
- &mov ("esi",&wparam(2));
- &mov ("ecx",&DWP(240,"esi")); # pull number of rounds
- &lea ("ecx",&DWP(0,"","ecx",4));
- &lea ("edi",&DWP(0,"esi","ecx",4)); # pointer to last chunk
-
- &set_label("invert",4); # invert order of chunks
- &mov ("eax",&DWP(0,"esi"));
- &mov ("ebx",&DWP(4,"esi"));
- &mov ("ecx",&DWP(0,"edi"));
- &mov ("edx",&DWP(4,"edi"));
- &mov (&DWP(0,"edi"),"eax");
- &mov (&DWP(4,"edi"),"ebx");
- &mov (&DWP(0,"esi"),"ecx");
- &mov (&DWP(4,"esi"),"edx");
- &mov ("eax",&DWP(8,"esi"));
- &mov ("ebx",&DWP(12,"esi"));
- &mov ("ecx",&DWP(8,"edi"));
- &mov ("edx",&DWP(12,"edi"));
- &mov (&DWP(8,"edi"),"eax");
- &mov (&DWP(12,"edi"),"ebx");
- &mov (&DWP(8,"esi"),"ecx");
- &mov (&DWP(12,"esi"),"edx");
- &add ("esi",16);
- &sub ("edi",16);
- &cmp ("esi","edi");
- &jne (&label("invert"));
-
- &mov ($key,&wparam(2));
- &mov ($acc,&DWP(240,$key)); # pull number of rounds
- &lea ($acc,&DWP(-2,$acc,$acc));
- &lea ($acc,&DWP(0,$key,$acc,8));
- &mov (&wparam(2),$acc);
-
- &mov ($s0,&DWP(16,$key)); # modulo-scheduled load
- &set_label("permute",4); # permute the key schedule
- &add ($key,16);
- &deckey (0,$key,$s0,$s1,$s2,$s3);
- &deckey (1,$key,$s1,$s2,$s3,$s0);
- &deckey (2,$key,$s2,$s3,$s0,$s1);
- &deckey (3,$key,$s3,$s0,$s1,$s2);
- &cmp ($key,&wparam(2));
- &jb (&label("permute"));
-
- &xor ("eax","eax"); # return success
-&function_end("AES_set_decrypt_key");
-&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
-&asm_finish();
-
-close STDOUT;
diff --git a/crypto/openssl/crypto/aes/asm/aes-s390x.pl b/crypto/openssl/crypto/aes/asm/aes-s390x.pl
index 0c4005906650..815fde8fcd7e 100755
--- a/crypto/openssl/crypto/aes/asm/aes-s390x.pl
+++ b/crypto/openssl/crypto/aes/asm/aes-s390x.pl
@@ -1,5 +1,5 @@
#! /usr/bin/env perl
-# Copyright 2007-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2007-2019 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@@ -38,14 +38,14 @@
# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
# for 128-bit keys, if hardware support is detected.
-# Januray 2009.
+# January 2009.
#
# Add support for hardware AES192/256 and reschedule instructions to
# minimize/avoid Address Generation Interlock hazard and to favour
# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
# almost 50% on z9. The gain is smaller on z10, because being dual-
# issue z10 makes it impossible to eliminate the interlock condition:
-# critial path is not long enough. Yet it spends ~24 cycles per byte
+# critical path is not long enough. Yet it spends ~24 cycles per byte
# processed with 128-bit key.
#
# Unlike previous version hardware support detection takes place only
diff --git a/crypto/openssl/crypto/aes/asm/aes-x86_64.pl b/crypto/openssl/crypto/aes/asm/aes-x86_64.pl
deleted file mode 100755
index d87e20114771..000000000000
--- a/crypto/openssl/crypto/aes/asm/aes-x86_64.pl
+++ /dev/null
@@ -1,2916 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License"). You may not use
-# this file except in compliance with the License. You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# Version 2.1.
-#
-# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
-# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
-# [you'll notice a lot of resemblance], such as compressed S-boxes
-# in little-endian byte order, prefetch of these tables in CBC mode,
-# as well as avoiding L1 cache aliasing between stack frame and key
-# schedule and already mentioned tables, compressed Td4...
-#
-# Performance in number of cycles per processed byte for 128-bit key:
-#
-# ECB encrypt ECB decrypt CBC large chunk
-# AMD64 33 43 13.0
-# EM64T 38 56 18.6(*)
-# Core 2 30 42 14.5(*)
-# Atom 65 86 32.1(*)
-#
-# (*) with hyper-threading off
-
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
-*STDOUT=*OUT;
-
-$verticalspin=1; # unlike 32-bit version $verticalspin performs
- # ~15% better on both AMD and Intel cores
-$speed_limit=512; # see aes-586.pl for details
-
-$code=".text\n";
-
-$s0="%eax";
-$s1="%ebx";
-$s2="%ecx";
-$s3="%edx";
-$acc0="%esi"; $mask80="%rsi";
-$acc1="%edi"; $maskfe="%rdi";
-$acc2="%ebp"; $mask1b="%rbp";
-$inp="%r8";
-$out="%r9";
-$t0="%r10d";
-$t1="%r11d";
-$t2="%r12d";
-$rnds="%r13d";
-$sbox="%r14";
-$key="%r15";
-
-sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; }
-sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
- $r =~ s/%[er]([sd]i)/%\1l/;
- $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
-sub LO() { my $r=shift; $r =~ s/%r([a-z]+)/%e\1/;
- $r =~ s/%r([0-9]+)/%r\1d/; $r; }
-sub _data_word()
-{ my $i;
- while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
-}
-sub data_word()
-{ my $i;
- my $last=pop(@_);
- $code.=".long\t";
- while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; }
- $code.=sprintf"0x%08x\n",$last;
-}
-
-sub data_byte()
-{ my $i;
- my $last=pop(@_);
- $code.=".byte\t";
- while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; }
- $code.=sprintf"0x%02x\n",$last&0xff;
-}
-
-sub encvert()
-{ my $t3="%r8d"; # zaps $inp!
-
-$code.=<<___;
- # favor 3-way issue Opteron pipeline...
- movzb `&lo("$s0")`,$acc0
- movzb `&lo("$s1")`,$acc1
- movzb `&lo("$s2")`,$acc2
- mov 0($sbox,$acc0,8),$t0
- mov 0($sbox,$acc1,8),$t1
- mov 0($sbox,$acc2,8),$t2
-
- movzb `&hi("$s1")`,$acc0
- movzb `&hi("$s2")`,$acc1
- movzb `&lo("$s3")`,$acc2
- xor 3($sbox,$acc0,8),$t0
- xor 3($sbox,$acc1,8),$t1
- mov 0($sbox,$acc2,8),$t3
-
- movzb `&hi("$s3")`,$acc0
- shr \$16,$s2
- movzb `&hi("$s0")`,$acc2
- xor 3($sbox,$acc0,8),$t2
- shr \$16,$s3
- xor 3($sbox,$acc2,8),$t3
-
- shr \$16,$s1
- lea 16($key),$key
- shr \$16,$s0
-
- movzb `&lo("$s2")`,$acc0
- movzb `&lo("$s3")`,$acc1
- movzb `&lo("$s0")`,$acc2
- xor 2($sbox,$acc0,8),$t0
- xor 2($sbox,$acc1,8),$t1
- xor 2($sbox,$acc2,8),$t2
-
- movzb `&hi("$s3")`,$acc0
- movzb `&hi("$s0")`,$acc1
- movzb `&lo("$s1")`,$acc2
- xor 1($sbox,$acc0,8),$t0
- xor 1($sbox,$acc1,8),$t1
- xor 2($sbox,$acc2,8),$t3
-
- mov 12($key),$s3
- movzb `&hi("$s1")`,$acc1
- movzb `&hi("$s2")`,$acc2
- mov 0($key),$s0
- xor 1($sbox,$acc1,8),$t2
- xor 1($sbox,$acc2,8),$t3
-
- mov 4($key),$s1
- mov 8($key),$s2
- xor $t0,$s0
- xor $t1,$s1
- xor $t2,$s2
- xor $t3,$s3
-___
-}
-
-sub enclastvert()
-{ my $t3="%r8d"; # zaps $inp!
-
-$code.=<<___;
- movzb `&lo("$s0")`,$acc0
- movzb `&lo("$s1")`,$acc1
- movzb `&lo("$s2")`,$acc2
- movzb 2($sbox,$acc0,8),$t0
- movzb 2($sbox,$acc1,8),$t1
- movzb 2($sbox,$acc2,8),$t2
-
- movzb `&lo("$s3")`,$acc0
- movzb `&hi("$s1")`,$acc1
- movzb `&hi("$s2")`,$acc2
- movzb 2($sbox,$acc0,8),$t3
- mov 0($sbox,$acc1,8),$acc1 #$t0
- mov 0($sbox,$acc2,8),$acc2 #$t1
-
- and \$0x0000ff00,$acc1
- and \$0x0000ff00,$acc2
-
- xor $acc1,$t0
- xor $acc2,$t1
- shr \$16,$s2
-
- movzb `&hi("$s3")`,$acc0
- movzb `&hi("$s0")`,$acc1
- shr \$16,$s3
- mov 0($sbox,$acc0,8),$acc0 #$t2
- mov 0($sbox,$acc1,8),$acc1 #$t3
-
- and \$0x0000ff00,$acc0
- and \$0x0000ff00,$acc1
- shr \$16,$s1
- xor $acc0,$t2
- xor $acc1,$t3
- shr \$16,$s0
-
- movzb `&lo("$s2")`,$acc0
- movzb `&lo("$s3")`,$acc1
- movzb `&lo("$s0")`,$acc2
- mov 0($sbox,$acc0,8),$acc0 #$t0
- mov 0($sbox,$acc1,8),$acc1 #$t1
- mov 0($sbox,$acc2,8),$acc2 #$t2
-
- and \$0x00ff0000,$acc0
- and \$0x00ff0000,$acc1
- and \$0x00ff0000,$acc2
-
- xor $acc0,$t0
- xor $acc1,$t1
- xor $acc2,$t2
-
- movzb `&lo("$s1")`,$acc0
- movzb `&hi("$s3")`,$acc1
- movzb `&hi("$s0")`,$acc2
- mov 0($sbox,$acc0,8),$acc0 #$t3
- mov 2($sbox,$acc1,8),$acc1 #$t0
- mov 2($sbox,$acc2,8),$acc2 #$t1
-
- and \$0x00ff0000,$acc0
- and \$0xff000000,$acc1
- and \$0xff000000,$acc2
-
- xor $acc0,$t3
- xor $acc1,$t0
- xor $acc2,$t1
-
- movzb `&hi("$s1")`,$acc0
- movzb `&hi("$s2")`,$acc1
- mov 16+12($key),$s3
- mov 2($sbox,$acc0,8),$acc0 #$t2
- mov 2($sbox,$acc1,8),$acc1 #$t3
- mov 16+0($key),$s0
-
- and \$0xff000000,$acc0
- and \$0xff000000,$acc1
-
- xor $acc0,$t2
- xor $acc1,$t3
-
- mov 16+4($key),$s1
- mov 16+8($key),$s2
- xor $t0,$s0
- xor $t1,$s1
- xor $t2,$s2
- xor $t3,$s3
-___
-}
-
-sub encstep()
-{ my ($i,@s) = @_;
- my $tmp0=$acc0;
- my $tmp1=$acc1;
- my $tmp2=$acc2;
- my $out=($t0,$t1,$t2,$s[0])[$i];
-
- if ($i==3) {
- $tmp0=$s[1];
- $tmp1=$s[2];
- $tmp2=$s[3];
- }
- $code.=" movzb ".&lo($s[0]).",$out\n";
- $code.=" mov $s[2],$tmp1\n" if ($i!=3);
- $code.=" lea 16($key),$key\n" if ($i==0);
-
- $code.=" movzb ".&hi($s[1]).",$tmp0\n";
- $code.=" mov 0($sbox,$out,8),$out\n";
-
- $code.=" shr \$16,$tmp1\n";
- $code.=" mov $s[3],$tmp2\n" if ($i!=3);
- $code.=" xor 3($sbox,$tmp0,8),$out\n";
-
- $code.=" movzb ".&lo($tmp1).",$tmp1\n";
- $code.=" shr \$24,$tmp2\n";
- $code.=" xor 4*$i($key),$out\n";
-
- $code.=" xor 2($sbox,$tmp1,8),$out\n";
- $code.=" xor 1($sbox,$tmp2,8),$out\n";
-
- $code.=" mov $t0,$s[1]\n" if ($i==3);
- $code.=" mov $t1,$s[2]\n" if ($i==3);
- $code.=" mov $t2,$s[3]\n" if ($i==3);
- $code.="\n";
-}
-
-sub enclast()
-{ my ($i,@s)=@_;
- my $tmp0=$acc0;
- my $tmp1=$acc1;
- my $tmp2=$acc2;
- my $out=($t0,$t1,$t2,$s[0])[$i];
-
- if ($i==3) {
- $tmp0=$s[1];
- $tmp1=$s[2];
- $tmp2=$s[3];
- }
- $code.=" movzb ".&lo($s[0]).",$out\n";
- $code.=" mov $s[2],$tmp1\n" if ($i!=3);
-
- $code.=" mov 2($sbox,$out,8),$out\n";
- $code.=" shr \$16,$tmp1\n";
- $code.=" mov $s[3],$tmp2\n" if ($i!=3);
-
- $code.=" and \$0x000000ff,$out\n";
- $code.=" movzb ".&hi($s[1]).",$tmp0\n";
- $code.=" movzb ".&lo($tmp1).",$tmp1\n";
- $code.=" shr \$24,$tmp2\n";
-
- $code.=" mov 0($sbox,$tmp0,8),$tmp0\n";
- $code.=" mov 0($sbox,$tmp1,8),$tmp1\n";
- $code.=" mov 2($sbox,$tmp2,8),$tmp2\n";
-
- $code.=" and \$0x0000ff00,$tmp0\n";
- $code.=" and \$0x00ff0000,$tmp1\n";
- $code.=" and \$0xff000000,$tmp2\n";
-
- $code.=" xor $tmp0,$out\n";
- $code.=" mov $t0,$s[1]\n" if ($i==3);
- $code.=" xor $tmp1,$out\n";
- $code.=" mov $t1,$s[2]\n" if ($i==3);
- $code.=" xor $tmp2,$out\n";
- $code.=" mov $t2,$s[3]\n" if ($i==3);
- $code.="\n";
-}
-
-$code.=<<___;
-.type _x86_64_AES_encrypt,\@abi-omnipotent
-.align 16
-_x86_64_AES_encrypt:
- xor 0($key),$s0 # xor with key
- xor 4($key),$s1
- xor 8($key),$s2
- xor 12($key),$s3
-
- mov 240($key),$rnds # load key->rounds
- sub \$1,$rnds
- jmp .Lenc_loop
-.align 16
-.Lenc_loop:
-___
- if ($verticalspin) { &encvert(); }
- else { &encstep(0,$s0,$s1,$s2,$s3);
- &encstep(1,$s1,$s2,$s3,$s0);
- &encstep(2,$s2,$s3,$s0,$s1);
- &encstep(3,$s3,$s0,$s1,$s2);
- }
-$code.=<<___;
- sub \$1,$rnds
- jnz .Lenc_loop
-___
- if ($verticalspin) { &enclastvert(); }
- else { &enclast(0,$s0,$s1,$s2,$s3);
- &enclast(1,$s1,$s2,$s3,$s0);
- &enclast(2,$s2,$s3,$s0,$s1);
- &enclast(3,$s3,$s0,$s1,$s2);
- $code.=<<___;
- xor 16+0($key),$s0 # xor with key
- xor 16+4($key),$s1
- xor 16+8($key),$s2
- xor 16+12($key),$s3
-___
- }
-$code.=<<___;
- .byte 0xf3,0xc3 # rep ret
-.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
-___
-
-# it's possible to implement this by shifting tN by 8, filling least
-# significant byte with byte load and finally bswap-ing at the end,
-# but such partial register load kills Core 2...
-sub enccompactvert()
-{ my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d");
-
-$code.=<<___;
- movzb `&lo("$s0")`,$t0
- movzb `&lo("$s1")`,$t1
- movzb `&lo("$s2")`,$t2
- movzb `&lo("$s3")`,$t3
- movzb `&hi("$s1")`,$acc0
- movzb `&hi("$s2")`,$acc1
- shr \$16,$s2
- movzb `&hi("$s3")`,$acc2
- movzb ($sbox,$t0,1),$t0
- movzb ($sbox,$t1,1),$t1
- movzb ($sbox,$t2,1),$t2
- movzb ($sbox,$t3,1),$t3
-
- movzb ($sbox,$acc0,1),$t4 #$t0
- movzb `&hi("$s0")`,$acc0
- movzb ($sbox,$acc1,1),$t5 #$t1
- movzb `&lo("$s2")`,$acc1
- movzb ($sbox,$acc2,1),$acc2 #$t2
- movzb ($sbox,$acc0,1),$acc0 #$t3
-
- shl \$8,$t4
- shr \$16,$s3
- shl \$8,$t5
- xor $t4,$t0
- shr \$16,$s0
- movzb `&lo("$s3")`,$t4
- shr \$16,$s1
- xor $t5,$t1
- shl \$8,$acc2
- movzb `&lo("$s0")`,$t5
- movzb ($sbox,$acc1,1),$acc1 #$t0
- xor $acc2,$t2
-
- shl \$8,$acc0
- movzb `&lo("$s1")`,$acc2
- shl \$16,$acc1
- xor $acc0,$t3
- movzb ($sbox,$t4,1),$t4 #$t1
- movzb `&hi("$s3")`,$acc0
- movzb ($sbox,$t5,1),$t5 #$t2
- xor $acc1,$t0
-
- shr \$8,$s2
- movzb `&hi("$s0")`,$acc1
- shl \$16,$t4
- shr \$8,$s1
- shl \$16,$t5
- xor $t4,$t1
- movzb ($sbox,$acc2,1),$acc2 #$t3
- movzb ($sbox,$acc0,1),$acc0 #$t0
- movzb ($sbox,$acc1,1),$acc1 #$t1
- movzb ($sbox,$s2,1),$s3 #$t3
- movzb ($sbox,$s1,1),$s2 #$t2
-
- shl \$16,$acc2
- xor $t5,$t2
- shl \$24,$acc0
- xor $acc2,$t3
- shl \$24,$acc1
- xor $acc0,$t0
- shl \$24,$s3
- xor $acc1,$t1
- shl \$24,$s2
- mov $t0,$s0
- mov $t1,$s1
- xor $t2,$s2
- xor $t3,$s3
-___
-}
-
-sub enctransform_ref()
-{ my $sn = shift;
- my ($acc,$r2,$tmp)=("%r8d","%r9d","%r13d");
-
-$code.=<<___;
- mov $sn,$acc
- and \$0x80808080,$acc
- mov $acc,$tmp
- shr \$7,$tmp
- lea ($sn,$sn),$r2
- sub $tmp,$acc
- and \$0xfefefefe,$r2
- and \$0x1b1b1b1b,$acc
- mov $sn,$tmp
- xor $acc,$r2
-
- xor $r2,$sn
- rol \$24,$sn
- xor $r2,$sn
- ror \$16,$tmp
- xor $tmp,$sn
- ror \$8,$tmp
- xor $tmp,$sn
-___
-}
-
-# unlike decrypt case it does not pay off to parallelize enctransform
-sub enctransform()
-{ my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d");
-
-$code.=<<___;
- mov \$0x80808080,$t0
- mov \$0x80808080,$t1
- and $s0,$t0
- and $s1,$t1
- mov $t0,$acc0
- mov $t1,$acc1
- shr \$7,$t0
- lea ($s0,$s0),$r20
- shr \$7,$t1
- lea ($s1,$s1),$r21
- sub $t0,$acc0
- sub $t1,$acc1
- and \$0xfefefefe,$r20
- and \$0xfefefefe,$r21
- and \$0x1b1b1b1b,$acc0
- and \$0x1b1b1b1b,$acc1
- mov $s0,$t0
- mov $s1,$t1
- xor $acc0,$r20
- xor $acc1,$r21
-
- xor $r20,$s0
- xor $r21,$s1
- mov \$0x80808080,$t2
- rol \$24,$s0
- mov \$0x80808080,$t3
- rol \$24,$s1
- and $s2,$t2
- and $s3,$t3
- xor $r20,$s0
- xor $r21,$s1
- mov $t2,$acc0
- ror \$16,$t0
- mov $t3,$acc1
- ror \$16,$t1
- lea ($s2,$s2),$r20
- shr \$7,$t2
- xor $t0,$s0
- shr \$7,$t3
- xor $t1,$s1
- ror \$8,$t0
- lea ($s3,$s3),$r21
- ror \$8,$t1
- sub $t2,$acc0
- sub $t3,$acc1
- xor $t0,$s0
- xor $t1,$s1
-
- and \$0xfefefefe,$r20
- and \$0xfefefefe,$r21
- and \$0x1b1b1b1b,$acc0
- and \$0x1b1b1b1b,$acc1
- mov $s2,$t2
- mov $s3,$t3
- xor $acc0,$r20
- xor $acc1,$r21
-
- ror \$16,$t2
- xor $r20,$s2
- ror \$16,$t3
- xor $r21,$s3
- rol \$24,$s2
- mov 0($sbox),$acc0 # prefetch Te4
- rol \$24,$s3
- xor $r20,$s2
- mov 64($sbox),$acc1
- xor $r21,$s3
- mov 128($sbox),$r20
- xor $t2,$s2
- ror \$8,$t2
- xor $t3,$s3
- ror \$8,$t3
- xor $t2,$s2
- mov 192($sbox),$r21
- xor $t3,$s3
-___
-}
-
-$code.=<<___;
-.type _x86_64_AES_encrypt_compact,\@abi-omnipotent
-.align 16
-_x86_64_AES_encrypt_compact:
-.cfi_startproc
- lea 128($sbox),$inp # size optimization
- mov 0-128($inp),$acc1 # prefetch Te4
- mov 32-128($inp),$acc2
- mov 64-128($inp),$t0
- mov 96-128($inp),$t1
- mov 128-128($inp),$acc1
- mov 160-128($inp),$acc2
- mov 192-128($inp),$t0
- mov 224-128($inp),$t1
- jmp .Lenc_loop_compact
-.align 16
-.Lenc_loop_compact:
- xor 0($key),$s0 # xor with key
- xor 4($key),$s1
- xor 8($key),$s2
- xor 12($key),$s3
- lea 16($key),$key
-___
- &enccompactvert();
-$code.=<<___;
- cmp 16(%rsp),$key
- je .Lenc_compact_done
-___
- &enctransform();
-$code.=<<___;
- jmp .Lenc_loop_compact
-.align 16
-.Lenc_compact_done:
- xor 0($key),$s0
- xor 4($key),$s1
- xor 8($key),$s2
- xor 12($key),$s3
- .byte 0xf3,0xc3 # rep ret
-.cfi_endproc
-.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
-___
-
-# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
-$code.=<<___;
-.globl AES_encrypt
-.type AES_encrypt,\@function,3
-.align 16
-.globl asm_AES_encrypt
-.hidden asm_AES_encrypt
-asm_AES_encrypt:
-AES_encrypt:
-.cfi_startproc
- mov %rsp,%rax
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_push %rbx
- push %rbp
-.cfi_push %rbp
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
-
- # allocate frame "above" key schedule
- lea -63(%rdx),%rcx # %rdx is key argument
- and \$-64,%rsp
- sub %rsp,%rcx
- neg %rcx
- and \$0x3c0,%rcx
- sub %rcx,%rsp
- sub \$32,%rsp
-
- mov %rsi,16(%rsp) # save out
- mov %rax,24(%rsp) # save original stack pointer
-.cfi_cfa_expression %rsp+24,deref,+8
-.Lenc_prologue:
-
- mov %rdx,$key
- mov 240($key),$rnds # load rounds
-
- mov 0(%rdi),$s0 # load input vector
- mov 4(%rdi),$s1
- mov 8(%rdi),$s2
- mov 12(%rdi),$s3
-
- shl \$4,$rnds
- lea ($key,$rnds),%rbp
- mov $key,(%rsp) # key schedule
- mov %rbp,8(%rsp) # end of key schedule
-
- # pick Te4 copy which can't "overlap" with stack frame or key schedule
- lea .LAES_Te+2048(%rip),$sbox
- lea 768(%rsp),%rbp
- sub $sbox,%rbp
- and \$0x300,%rbp
- lea ($sbox,%rbp),$sbox
-
- call _x86_64_AES_encrypt_compact
-
- mov 16(%rsp),$out # restore out
- mov 24(%rsp),%rsi # restore saved stack pointer
-.cfi_def_cfa %rsi,8
- mov $s0,0($out) # write output vector
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
- mov -48(%rsi),%r15
-.cfi_restore %r15
- mov -40(%rsi),%r14
-.cfi_restore %r14
- mov -32(%rsi),%r13
-.cfi_restore %r13
- mov -24(%rsi),%r12
-.cfi_restore %r12
- mov -16(%rsi),%rbp
-.cfi_restore %rbp
- mov -8(%rsi),%rbx
-.cfi_restore %rbx
- lea (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Lenc_epilogue:
- ret
-.cfi_endproc
-.size AES_encrypt,.-AES_encrypt
-___
-
-#------------------------------------------------------------------#
-
-sub decvert()
-{ my $t3="%r8d"; # zaps $inp!
-
-$code.=<<___;
- # favor 3-way issue Opteron pipeline...
- movzb `&lo("$s0")`,$acc0
- movzb `&lo("$s1")`,$acc1
- movzb `&lo("$s2")`,$acc2
- mov 0($sbox,$acc0,8),$t0
- mov 0($sbox,$acc1,8),$t1
- mov 0($sbox,$acc2,8),$t2
-
- movzb `&hi("$s3")`,$acc0
- movzb `&hi("$s0")`,$acc1
- movzb `&lo("$s3")`,$acc2
- xor 3($sbox,$acc0,8),$t0
- xor 3($sbox,$acc1,8),$t1
- mov 0($sbox,$acc2,8),$t3
-
- movzb `&hi("$s1")`,$acc0
- shr \$16,$s0
- movzb `&hi("$s2")`,$acc2
- xor 3($sbox,$acc0,8),$t2
- shr \$16,$s3
- xor 3($sbox,$acc2,8),$t3
-
- shr \$16,$s1
- lea 16($key),$key
- shr \$16,$s2
-
- movzb `&lo("$s2")`,$acc0
- movzb `&lo("$s3")`,$acc1
- movzb `&lo("$s0")`,$acc2
- xor 2($sbox,$acc0,8),$t0
- xor 2($sbox,$acc1,8),$t1
- xor 2($sbox,$acc2,8),$t2
-
- movzb `&hi("$s1")`,$acc0
- movzb `&hi("$s2")`,$acc1
- movzb `&lo("$s1")`,$acc2
- xor 1($sbox,$acc0,8),$t0
- xor 1($sbox,$acc1,8),$t1
- xor 2($sbox,$acc2,8),$t3
-
- movzb `&hi("$s3")`,$acc0
- mov 12($key),$s3
- movzb `&hi("$s0")`,$acc2
- xor 1($sbox,$acc0,8),$t2
- mov 0($key),$s0
- xor 1($sbox,$acc2,8),$t3
-
- xor $t0,$s0
- mov 4($key),$s1
- mov 8($key),$s2
- xor $t2,$s2
- xor $t1,$s1
- xor $t3,$s3
-___
-}
-
-sub declastvert()
-{ my $t3="%r8d"; # zaps $inp!
-
-$code.=<<___;
- lea 2048($sbox),$sbox # size optimization
- movzb `&lo("$s0")`,$acc0
- movzb `&lo("$s1")`,$acc1
- movzb `&lo("$s2")`,$acc2
- movzb ($sbox,$acc0,1),$t0
- movzb ($sbox,$acc1,1),$t1
- movzb ($sbox,$acc2,1),$t2
-
- movzb `&lo("$s3")`,$acc0
- movzb `&hi("$s3")`,$acc1
- movzb `&hi("$s0")`,$acc2
- movzb ($sbox,$acc0,1),$t3
- movzb ($sbox,$acc1,1),$acc1 #$t0
- movzb ($sbox,$acc2,1),$acc2 #$t1
-
- shl \$8,$acc1
- shl \$8,$acc2
-
- xor $acc1,$t0
- xor $acc2,$t1
- shr \$16,$s3
-
- movzb `&hi("$s1")`,$acc0
- movzb `&hi("$s2")`,$acc1
- shr \$16,$s0
- movzb ($sbox,$acc0,1),$acc0 #$t2
- movzb ($sbox,$acc1,1),$acc1 #$t3
-
- shl \$8,$acc0
- shl \$8,$acc1
- shr \$16,$s1
- xor $acc0,$t2
- xor $acc1,$t3
- shr \$16,$s2
-
- movzb `&lo("$s2")`,$acc0
- movzb `&lo("$s3")`,$acc1
- movzb `&lo("$s0")`,$acc2
- movzb ($sbox,$acc0,1),$acc0 #$t0
- movzb ($sbox,$acc1,1),$acc1 #$t1
- movzb ($sbox,$acc2,1),$acc2 #$t2
-
- shl \$16,$acc0
- shl \$16,$acc1
- shl \$16,$acc2
-
- xor $acc0,$t0
- xor $acc1,$t1
- xor $acc2,$t2
-
- movzb `&lo("$s1")`,$acc0
- movzb `&hi("$s1")`,$acc1
- movzb `&hi("$s2")`,$acc2
- movzb ($sbox,$acc0,1),$acc0 #$t3
- movzb ($sbox,$acc1,1),$acc1 #$t0
- movzb ($sbox,$acc2,1),$acc2 #$t1
-
- shl \$16,$acc0
- shl \$24,$acc1
- shl \$24,$acc2
-
- xor $acc0,$t3
- xor $acc1,$t0
- xor $acc2,$t1
-
- movzb `&hi("$s3")`,$acc0
- movzb `&hi("$s0")`,$acc1
- mov 16+12($key),$s3
- movzb ($sbox,$acc0,1),$acc0 #$t2
- movzb ($sbox,$acc1,1),$acc1 #$t3
- mov 16+0($key),$s0
-
- shl \$24,$acc0
- shl \$24,$acc1
-
- xor $acc0,$t2
- xor $acc1,$t3
-
- mov 16+4($key),$s1
- mov 16+8($key),$s2
- lea -2048($sbox),$sbox
- xor $t0,$s0
- xor $t1,$s1
- xor $t2,$s2
- xor $t3,$s3
-___
-}
-
-sub decstep()
-{ my ($i,@s) = @_;
- my $tmp0=$acc0;
- my $tmp1=$acc1;
- my $tmp2=$acc2;
- my $out=($t0,$t1,$t2,$s[0])[$i];
-
- $code.=" mov $s[0],$out\n" if ($i!=3);
- $tmp1=$s[2] if ($i==3);
- $code.=" mov $s[2],$tmp1\n" if ($i!=3);
- $code.=" and \$0xFF,$out\n";
-
- $code.=" mov 0($sbox,$out,8),$out\n";
- $code.=" shr \$16,$tmp1\n";
- $tmp2=$s[3] if ($i==3);
- $code.=" mov $s[3],$tmp2\n" if ($i!=3);
-
- $tmp0=$s[1] if ($i==3);
- $code.=" movzb ".&hi($s[1]).",$tmp0\n";
- $code.=" and \$0xFF,$tmp1\n";
- $code.=" shr \$24,$tmp2\n";
-
- $code.=" xor 3($sbox,$tmp0,8),$out\n";
- $code.=" xor 2($sbox,$tmp1,8),$out\n";
- $code.=" xor 1($sbox,$tmp2,8),$out\n";
-
- $code.=" mov $t2,$s[1]\n" if ($i==3);
- $code.=" mov $t1,$s[2]\n" if ($i==3);
- $code.=" mov $t0,$s[3]\n" if ($i==3);
- $code.="\n";
-}
-
-sub declast()
-{ my ($i,@s)=@_;
- my $tmp0=$acc0;
- my $tmp1=$acc1;
- my $tmp2=$acc2;
- my $out=($t0,$t1,$t2,$s[0])[$i];
-
- $code.=" mov $s[0],$out\n" if ($i!=3);
- $tmp1=$s[2] if ($i==3);
- $code.=" mov $s[2],$tmp1\n" if ($i!=3);
- $code.=" and \$0xFF,$out\n";
-
- $code.=" movzb 2048($sbox,$out,1),$out\n";
- $code.=" shr \$16,$tmp1\n";
- $tmp2=$s[3] if ($i==3);
- $code.=" mov $s[3],$tmp2\n" if ($i!=3);
-
- $tmp0=$s[1] if ($i==3);
- $code.=" movzb ".&hi($s[1]).",$tmp0\n";
- $code.=" and \$0xFF,$tmp1\n";
- $code.=" shr \$24,$tmp2\n";
-
- $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n";
- $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n";
- $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n";
-
- $code.=" shl \$8,$tmp0\n";
- $code.=" shl \$16,$tmp1\n";
- $code.=" shl \$24,$tmp2\n";
-
- $code.=" xor $tmp0,$out\n";
- $code.=" mov $t2,$s[1]\n" if ($i==3);
- $code.=" xor $tmp1,$out\n";
- $code.=" mov $t1,$s[2]\n" if ($i==3);
- $code.=" xor $tmp2,$out\n";
- $code.=" mov $t0,$s[3]\n" if ($i==3);
- $code.="\n";
-}
-
-$code.=<<___;
-.type _x86_64_AES_decrypt,\@abi-omnipotent
-.align 16
-_x86_64_AES_decrypt:
- xor 0($key),$s0 # xor with key
- xor 4($key),$s1
- xor 8($key),$s2
- xor 12($key),$s3
-
- mov 240($key),$rnds # load key->rounds
- sub \$1,$rnds
- jmp .Ldec_loop
-.align 16
-.Ldec_loop:
-___
- if ($verticalspin) { &decvert(); }
- else { &decstep(0,$s0,$s3,$s2,$s1);
- &decstep(1,$s1,$s0,$s3,$s2);
- &decstep(2,$s2,$s1,$s0,$s3);
- &decstep(3,$s3,$s2,$s1,$s0);
- $code.=<<___;
- lea 16($key),$key
- xor 0($key),$s0 # xor with key
- xor 4($key),$s1
- xor 8($key),$s2
- xor 12($key),$s3
-___
- }
-$code.=<<___;
- sub \$1,$rnds
- jnz .Ldec_loop
-___
- if ($verticalspin) { &declastvert(); }
- else { &declast(0,$s0,$s3,$s2,$s1);
- &declast(1,$s1,$s0,$s3,$s2);
- &declast(2,$s2,$s1,$s0,$s3);
- &declast(3,$s3,$s2,$s1,$s0);
- $code.=<<___;
- xor 16+0($key),$s0 # xor with key
- xor 16+4($key),$s1
- xor 16+8($key),$s2
- xor 16+12($key),$s3
-___
- }
-$code.=<<___;
- .byte 0xf3,0xc3 # rep ret
-.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
-___
-
-sub deccompactvert()
-{ my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d");
-
-$code.=<<___;
- movzb `&lo("$s0")`,$t0
- movzb `&lo("$s1")`,$t1
- movzb `&lo("$s2")`,$t2
- movzb `&lo("$s3")`,$t3
- movzb `&hi("$s3")`,$acc0
- movzb `&hi("$s0")`,$acc1
- shr \$16,$s3
- movzb `&hi("$s1")`,$acc2
- movzb ($sbox,$t0,1),$t0
- movzb ($sbox,$t1,1),$t1
- movzb ($sbox,$t2,1),$t2
- movzb ($sbox,$t3,1),$t3
-
- movzb ($sbox,$acc0,1),$t4 #$t0
- movzb `&hi("$s2")`,$acc0
- movzb ($sbox,$acc1,1),$t5 #$t1
- movzb ($sbox,$acc2,1),$acc2 #$t2
- movzb ($sbox,$acc0,1),$acc0 #$t3
-
- shr \$16,$s2
- shl \$8,$t5
- shl \$8,$t4
- movzb `&lo("$s2")`,$acc1
- shr \$16,$s0
- xor $t4,$t0
- shr \$16,$s1
- movzb `&lo("$s3")`,$t4
-
- shl \$8,$acc2
- xor $t5,$t1
- shl \$8,$acc0
- movzb `&lo("$s0")`,$t5
- movzb ($sbox,$acc1,1),$acc1 #$t0
- xor $acc2,$t2
- movzb `&lo("$s1")`,$acc2
-
- shl \$16,$acc1
- xor $acc0,$t3
- movzb ($sbox,$t4,1),$t4 #$t1
- movzb `&hi("$s1")`,$acc0
- movzb ($sbox,$acc2,1),$acc2 #$t3
- xor $acc1,$t0
- movzb ($sbox,$t5,1),$t5 #$t2
- movzb `&hi("$s2")`,$acc1
-
- shl \$16,$acc2
- shl \$16,$t4
- shl \$16,$t5
- xor $acc2,$t3
- movzb `&hi("$s3")`,$acc2
- xor $t4,$t1
- shr \$8,$s0
- xor $t5,$t2
-
- movzb ($sbox,$acc0,1),$acc0 #$t0
- movzb ($sbox,$acc1,1),$s1 #$t1
- movzb ($sbox,$acc2,1),$s2 #$t2
- movzb ($sbox,$s0,1),$s3 #$t3
-
- mov $t0,$s0
- shl \$24,$acc0
- shl \$24,$s1
- shl \$24,$s2
- xor $acc0,$s0
- shl \$24,$s3
- xor $t1,$s1
- xor $t2,$s2
- xor $t3,$s3
-___
-}
-
-# parallelized version! input is pair of 64-bit values: %rax=s1.s0
-# and %rcx=s3.s2, output is four 32-bit values in %eax=s0, %ebx=s1,
-# %ecx=s2 and %edx=s3.
-sub dectransform()
-{ my ($tp10,$tp20,$tp40,$tp80,$acc0)=("%rax","%r8", "%r9", "%r10","%rbx");
- my ($tp18,$tp28,$tp48,$tp88,$acc8)=("%rcx","%r11","%r12","%r13","%rdx");
- my $prefetch = shift;
-
-$code.=<<___;
- mov $mask80,$tp40
- mov $mask80,$tp48
- and $tp10,$tp40
- and $tp18,$tp48
- mov $tp40,$acc0
- mov $tp48,$acc8
- shr \$7,$tp40
- lea ($tp10,$tp10),$tp20
- shr \$7,$tp48
- lea ($tp18,$tp18),$tp28
- sub $tp40,$acc0
- sub $tp48,$acc8
- and $maskfe,$tp20
- and $maskfe,$tp28
- and $mask1b,$acc0
- and $mask1b,$acc8
- xor $acc0,$tp20
- xor $acc8,$tp28
- mov $mask80,$tp80
- mov $mask80,$tp88
-
- and $tp20,$tp80
- and $tp28,$tp88
- mov $tp80,$acc0
- mov $tp88,$acc8
- shr \$7,$tp80
- lea ($tp20,$tp20),$tp40
- shr \$7,$tp88
- lea ($tp28,$tp28),$tp48
- sub $tp80,$acc0
- sub $tp88,$acc8
- and $maskfe,$tp40
- and $maskfe,$tp48
- and $mask1b,$acc0
- and $mask1b,$acc8
- xor $acc0,$tp40
- xor $acc8,$tp48
- mov $mask80,$tp80
- mov $mask80,$tp88
-
- and $tp40,$tp80
- and $tp48,$tp88
- mov $tp80,$acc0
- mov $tp88,$acc8
- shr \$7,$tp80
- xor $tp10,$tp20 # tp2^=tp1
- shr \$7,$tp88
- xor $tp18,$tp28 # tp2^=tp1
- sub $tp80,$acc0
- sub $tp88,$acc8
- lea ($tp40,$tp40),$tp80
- lea ($tp48,$tp48),$tp88
- xor $tp10,$tp40 # tp4^=tp1
- xor $tp18,$tp48 # tp4^=tp1
- and $maskfe,$tp80
- and $maskfe,$tp88
- and $mask1b,$acc0
- and $mask1b,$acc8
- xor $acc0,$tp80
- xor $acc8,$tp88
-
- xor $tp80,$tp10 # tp1^=tp8
- xor $tp88,$tp18 # tp1^=tp8
- xor $tp80,$tp20 # tp2^tp1^=tp8
- xor $tp88,$tp28 # tp2^tp1^=tp8
- mov $tp10,$acc0
- mov $tp18,$acc8
- xor $tp80,$tp40 # tp4^tp1^=tp8
- shr \$32,$acc0
- xor $tp88,$tp48 # tp4^tp1^=tp8
- shr \$32,$acc8
- xor $tp20,$tp80 # tp8^=tp8^tp2^tp1=tp2^tp1
- rol \$8,`&LO("$tp10")` # ROTATE(tp1^tp8,8)
- xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1
- rol \$8,`&LO("$tp18")` # ROTATE(tp1^tp8,8)
- xor $tp40,$tp80 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
- rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8)
- xor $tp48,$tp88 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
-
- rol \$8,`&LO("$acc8")` # ROTATE(tp1^tp8,8)
- xor `&LO("$tp80")`,`&LO("$tp10")`
- shr \$32,$tp80
- xor `&LO("$tp88")`,`&LO("$tp18")`
- shr \$32,$tp88
- xor `&LO("$tp80")`,`&LO("$acc0")`
- xor `&LO("$tp88")`,`&LO("$acc8")`
-
- mov $tp20,$tp80
- rol \$24,`&LO("$tp20")` # ROTATE(tp2^tp1^tp8,24)
- mov $tp28,$tp88
- rol \$24,`&LO("$tp28")` # ROTATE(tp2^tp1^tp8,24)
- shr \$32,$tp80
- xor `&LO("$tp20")`,`&LO("$tp10")`
- shr \$32,$tp88
- xor `&LO("$tp28")`,`&LO("$tp18")`
- rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24)
- mov $tp40,$tp20
- rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24)
- mov $tp48,$tp28
- shr \$32,$tp20
- xor `&LO("$tp80")`,`&LO("$acc0")`
- shr \$32,$tp28
- xor `&LO("$tp88")`,`&LO("$acc8")`
-
- `"mov 0($sbox),$mask80" if ($prefetch)`
- rol \$16,`&LO("$tp40")` # ROTATE(tp4^tp1^tp8,16)
- `"mov 64($sbox),$maskfe" if ($prefetch)`
- rol \$16,`&LO("$tp48")` # ROTATE(tp4^tp1^tp8,16)
- `"mov 128($sbox),$mask1b" if ($prefetch)`
- rol \$16,`&LO("$tp20")` # ROTATE(tp4^tp1^tp8,16)
- `"mov 192($sbox),$tp80" if ($prefetch)`
- xor `&LO("$tp40")`,`&LO("$tp10")`
- rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16)
- xor `&LO("$tp48")`,`&LO("$tp18")`
- `"mov 256($sbox),$tp88" if ($prefetch)`
- xor `&LO("$tp20")`,`&LO("$acc0")`
- xor `&LO("$tp28")`,`&LO("$acc8")`
-___
-}
-
-$code.=<<___;
-.type _x86_64_AES_decrypt_compact,\@abi-omnipotent
-.align 16
-_x86_64_AES_decrypt_compact:
-.cfi_startproc
- lea 128($sbox),$inp # size optimization
- mov 0-128($inp),$acc1 # prefetch Td4
- mov 32-128($inp),$acc2
- mov 64-128($inp),$t0
- mov 96-128($inp),$t1
- mov 128-128($inp),$acc1
- mov 160-128($inp),$acc2
- mov 192-128($inp),$t0
- mov 224-128($inp),$t1
- jmp .Ldec_loop_compact
-
-.align 16
-.Ldec_loop_compact:
- xor 0($key),$s0 # xor with key
- xor 4($key),$s1
- xor 8($key),$s2
- xor 12($key),$s3
- lea 16($key),$key
-___
- &deccompactvert();
-$code.=<<___;
- cmp 16(%rsp),$key
- je .Ldec_compact_done
-
- mov 256+0($sbox),$mask80
- shl \$32,%rbx
- shl \$32,%rdx
- mov 256+8($sbox),$maskfe
- or %rbx,%rax
- or %rdx,%rcx
- mov 256+16($sbox),$mask1b
-___
- &dectransform(1);
-$code.=<<___;
- jmp .Ldec_loop_compact
-.align 16
-.Ldec_compact_done:
- xor 0($key),$s0
- xor 4($key),$s1
- xor 8($key),$s2
- xor 12($key),$s3
- .byte 0xf3,0xc3 # rep ret
-.cfi_endproc
-.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
-___
-
-# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
-$code.=<<___;
-.globl AES_decrypt
-.type AES_decrypt,\@function,3
-.align 16
-.globl asm_AES_decrypt
-.hidden asm_AES_decrypt
-asm_AES_decrypt:
-AES_decrypt:
-.cfi_startproc
- mov %rsp,%rax
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_push %rbx
- push %rbp
-.cfi_push %rbp
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
-
- # allocate frame "above" key schedule
- lea -63(%rdx),%rcx # %rdx is key argument
- and \$-64,%rsp
- sub %rsp,%rcx
- neg %rcx
- and \$0x3c0,%rcx
- sub %rcx,%rsp
- sub \$32,%rsp
-
- mov %rsi,16(%rsp) # save out
- mov %rax,24(%rsp) # save original stack pointer
-.cfi_cfa_expression %rsp+24,deref,+8
-.Ldec_prologue:
-
- mov %rdx,$key
- mov 240($key),$rnds # load rounds
-
- mov 0(%rdi),$s0 # load input vector
- mov 4(%rdi),$s1
- mov 8(%rdi),$s2
- mov 12(%rdi),$s3
-
- shl \$4,$rnds
- lea ($key,$rnds),%rbp
- mov $key,(%rsp) # key schedule
- mov %rbp,8(%rsp) # end of key schedule
-
- # pick Td4 copy which can't "overlap" with stack frame or key schedule
- lea .LAES_Td+2048(%rip),$sbox
- lea 768(%rsp),%rbp
- sub $sbox,%rbp
- and \$0x300,%rbp
- lea ($sbox,%rbp),$sbox
- shr \$3,%rbp # recall "magic" constants!
- add %rbp,$sbox
-
- call _x86_64_AES_decrypt_compact
-
- mov 16(%rsp),$out # restore out
- mov 24(%rsp),%rsi # restore saved stack pointer
-.cfi_def_cfa %rsi,8
- mov $s0,0($out) # write output vector
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
- mov -48(%rsi),%r15
-.cfi_restore %r15
- mov -40(%rsi),%r14
-.cfi_restore %r14
- mov -32(%rsi),%r13
-.cfi_restore %r13
- mov -24(%rsi),%r12
-.cfi_restore %r12
- mov -16(%rsi),%rbp
-.cfi_restore %rbp
- mov -8(%rsi),%rbx
-.cfi_restore %rbx
- lea (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Ldec_epilogue:
- ret
-.cfi_endproc
-.size AES_decrypt,.-AES_decrypt
-___
-#------------------------------------------------------------------#
-
-sub enckey()
-{
-$code.=<<___;
- movz %dl,%esi # rk[i]>>0
- movzb -128(%rbp,%rsi),%ebx
- movz %dh,%esi # rk[i]>>8
- shl \$24,%ebx
- xor %ebx,%eax
-
- movzb -128(%rbp,%rsi),%ebx
- shr \$16,%edx
- movz %dl,%esi # rk[i]>>16
- xor %ebx,%eax
-
- movzb -128(%rbp,%rsi),%ebx
- movz %dh,%esi # rk[i]>>24
- shl \$8,%ebx
- xor %ebx,%eax
-
- movzb -128(%rbp,%rsi),%ebx
- shl \$16,%ebx
- xor %ebx,%eax
-
- xor 1024-128(%rbp,%rcx,4),%eax # rcon
-___
-}
-
-# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
-# AES_KEY *key)
-$code.=<<___;
-.globl AES_set_encrypt_key
-.type AES_set_encrypt_key,\@function,3
-.align 16
-AES_set_encrypt_key:
-.cfi_startproc
- push %rbx
-.cfi_push %rbx
- push %rbp
-.cfi_push %rbp
- push %r12 # redundant, but allows to share
-.cfi_push %r12
- push %r13 # exception handler...
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- sub \$8,%rsp
-.cfi_adjust_cfa_offset 8
-.Lenc_key_prologue:
-
- call _x86_64_AES_set_encrypt_key
-
- mov 40(%rsp),%rbp
-.cfi_restore %rbp
- mov 48(%rsp),%rbx
-.cfi_restore %rbx
- add \$56,%rsp
-.cfi_adjust_cfa_offset -56
-.Lenc_key_epilogue:
- ret
-.cfi_endproc
-.size AES_set_encrypt_key,.-AES_set_encrypt_key
-
-.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent
-.align 16
-_x86_64_AES_set_encrypt_key:
-.cfi_startproc
- mov %esi,%ecx # %ecx=bits
- mov %rdi,%rsi # %rsi=userKey
- mov %rdx,%rdi # %rdi=key
-
- test \$-1,%rsi
- jz .Lbadpointer
- test \$-1,%rdi
- jz .Lbadpointer
-
- lea .LAES_Te(%rip),%rbp
- lea 2048+128(%rbp),%rbp
-
- # prefetch Te4
- mov 0-128(%rbp),%eax
- mov 32-128(%rbp),%ebx
- mov 64-128(%rbp),%r8d
- mov 96-128(%rbp),%edx
- mov 128-128(%rbp),%eax
- mov 160-128(%rbp),%ebx
- mov 192-128(%rbp),%r8d
- mov 224-128(%rbp),%edx
-
- cmp \$128,%ecx
- je .L10rounds
- cmp \$192,%ecx
- je .L12rounds
- cmp \$256,%ecx
- je .L14rounds
- mov \$-2,%rax # invalid number of bits
- jmp .Lexit
-
-.L10rounds:
- mov 0(%rsi),%rax # copy first 4 dwords
- mov 8(%rsi),%rdx
- mov %rax,0(%rdi)
- mov %rdx,8(%rdi)
-
- shr \$32,%rdx
- xor %ecx,%ecx
- jmp .L10shortcut
-.align 4
-.L10loop:
- mov 0(%rdi),%eax # rk[0]
- mov 12(%rdi),%edx # rk[3]
-.L10shortcut:
-___
- &enckey ();
-$code.=<<___;
- mov %eax,16(%rdi) # rk[4]
- xor 4(%rdi),%eax
- mov %eax,20(%rdi) # rk[5]
- xor 8(%rdi),%eax
- mov %eax,24(%rdi) # rk[6]
- xor 12(%rdi),%eax
- mov %eax,28(%rdi) # rk[7]
- add \$1,%ecx
- lea 16(%rdi),%rdi
- cmp \$10,%ecx
- jl .L10loop
-
- movl \$10,80(%rdi) # setup number of rounds
- xor %rax,%rax
- jmp .Lexit
-
-.L12rounds:
- mov 0(%rsi),%rax # copy first 6 dwords
- mov 8(%rsi),%rbx
- mov 16(%rsi),%rdx
- mov %rax,0(%rdi)
- mov %rbx,8(%rdi)
- mov %rdx,16(%rdi)
-
- shr \$32,%rdx
- xor %ecx,%ecx
- jmp .L12shortcut
-.align 4
-.L12loop:
- mov 0(%rdi),%eax # rk[0]
- mov 20(%rdi),%edx # rk[5]
-.L12shortcut:
-___
- &enckey ();
-$code.=<<___;
- mov %eax,24(%rdi) # rk[6]
- xor 4(%rdi),%eax
- mov %eax,28(%rdi) # rk[7]
- xor 8(%rdi),%eax
- mov %eax,32(%rdi) # rk[8]
- xor 12(%rdi),%eax
- mov %eax,36(%rdi) # rk[9]
-
- cmp \$7,%ecx
- je .L12break
- add \$1,%ecx
-
- xor 16(%rdi),%eax
- mov %eax,40(%rdi) # rk[10]
- xor 20(%rdi),%eax
- mov %eax,44(%rdi) # rk[11]
-
- lea 24(%rdi),%rdi
- jmp .L12loop
-.L12break:
- movl \$12,72(%rdi) # setup number of rounds
- xor %rax,%rax
- jmp .Lexit
-
-.L14rounds:
- mov 0(%rsi),%rax # copy first 8 dwords
- mov 8(%rsi),%rbx
- mov 16(%rsi),%rcx
- mov 24(%rsi),%rdx
- mov %rax,0(%rdi)
- mov %rbx,8(%rdi)
- mov %rcx,16(%rdi)
- mov %rdx,24(%rdi)
-
- shr \$32,%rdx
- xor %ecx,%ecx
- jmp .L14shortcut
-.align 4
-.L14loop:
- mov 0(%rdi),%eax # rk[0]
- mov 28(%rdi),%edx # rk[4]
-.L14shortcut:
-___
- &enckey ();
-$code.=<<___;
- mov %eax,32(%rdi) # rk[8]
- xor 4(%rdi),%eax
- mov %eax,36(%rdi) # rk[9]
- xor 8(%rdi),%eax
- mov %eax,40(%rdi) # rk[10]
- xor 12(%rdi),%eax
- mov %eax,44(%rdi) # rk[11]
-
- cmp \$6,%ecx
- je .L14break
- add \$1,%ecx
-
- mov %eax,%edx
- mov 16(%rdi),%eax # rk[4]
- movz %dl,%esi # rk[11]>>0
- movzb -128(%rbp,%rsi),%ebx
- movz %dh,%esi # rk[11]>>8
- xor %ebx,%eax
-
- movzb -128(%rbp,%rsi),%ebx
- shr \$16,%edx
- shl \$8,%ebx
- movz %dl,%esi # rk[11]>>16
- xor %ebx,%eax
-
- movzb -128(%rbp,%rsi),%ebx
- movz %dh,%esi # rk[11]>>24
- shl \$16,%ebx
- xor %ebx,%eax
-
- movzb -128(%rbp,%rsi),%ebx
- shl \$24,%ebx
- xor %ebx,%eax
-
- mov %eax,48(%rdi) # rk[12]
- xor 20(%rdi),%eax
- mov %eax,52(%rdi) # rk[13]
- xor 24(%rdi),%eax
- mov %eax,56(%rdi) # rk[14]
- xor 28(%rdi),%eax
- mov %eax,60(%rdi) # rk[15]
-
- lea 32(%rdi),%rdi
- jmp .L14loop
-.L14break:
- movl \$14,48(%rdi) # setup number of rounds
- xor %rax,%rax
- jmp .Lexit
-
-.Lbadpointer:
- mov \$-1,%rax
-.Lexit:
- .byte 0xf3,0xc3 # rep ret
-.cfi_endproc
-.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
-___
-
-sub deckey_ref()
-{ my ($i,$ptr,$te,$td) = @_;
- my ($tp1,$tp2,$tp4,$tp8,$acc)=("%eax","%ebx","%edi","%edx","%r8d");
-$code.=<<___;
- mov $i($ptr),$tp1
- mov $tp1,$acc
- and \$0x80808080,$acc
- mov $acc,$tp4
- shr \$7,$tp4
- lea 0($tp1,$tp1),$tp2
- sub $tp4,$acc
- and \$0xfefefefe,$tp2
- and \$0x1b1b1b1b,$acc
- xor $tp2,$acc
- mov $acc,$tp2
-
- and \$0x80808080,$acc
- mov $acc,$tp8
- shr \$7,$tp8
- lea 0($tp2,$tp2),$tp4
- sub $tp8,$acc
- and \$0xfefefefe,$tp4
- and \$0x1b1b1b1b,$acc
- xor $tp1,$tp2 # tp2^tp1
- xor $tp4,$acc
- mov $acc,$tp4
-
- and \$0x80808080,$acc
- mov $acc,$tp8
- shr \$7,$tp8
- sub $tp8,$acc
- lea 0($tp4,$tp4),$tp8
- xor $tp1,$tp4 # tp4^tp1
- and \$0xfefefefe,$tp8
- and \$0x1b1b1b1b,$acc
- xor $acc,$tp8
-
- xor $tp8,$tp1 # tp1^tp8
- rol \$8,$tp1 # ROTATE(tp1^tp8,8)
- xor $tp8,$tp2 # tp2^tp1^tp8
- xor $tp8,$tp4 # tp4^tp1^tp8
- xor $tp2,$tp8
- xor $tp4,$tp8 # tp8^(tp8^tp4^tp1)^(tp8^tp2^tp1)=tp8^tp4^tp2
-
- xor $tp8,$tp1
- rol \$24,$tp2 # ROTATE(tp2^tp1^tp8,24)
- xor $tp2,$tp1
- rol \$16,$tp4 # ROTATE(tp4^tp1^tp8,16)
- xor $tp4,$tp1
-
- mov $tp1,$i($ptr)
-___
-}
-
-# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
-# AES_KEY *key)
-$code.=<<___;
-.globl AES_set_decrypt_key
-.type AES_set_decrypt_key,\@function,3
-.align 16
-AES_set_decrypt_key:
-.cfi_startproc
- push %rbx
-.cfi_push %rbx
- push %rbp
-.cfi_push %rbp
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- push %rdx # save key schedule
-.cfi_adjust_cfa_offset 8
-.Ldec_key_prologue:
-
- call _x86_64_AES_set_encrypt_key
- mov (%rsp),%r8 # restore key schedule
- cmp \$0,%eax
- jne .Labort
-
- mov 240(%r8),%r14d # pull number of rounds
- xor %rdi,%rdi
- lea (%rdi,%r14d,4),%rcx
- mov %r8,%rsi
- lea (%r8,%rcx,4),%rdi # pointer to last chunk
-.align 4
-.Linvert:
- mov 0(%rsi),%rax
- mov 8(%rsi),%rbx
- mov 0(%rdi),%rcx
- mov 8(%rdi),%rdx
- mov %rax,0(%rdi)
- mov %rbx,8(%rdi)
- mov %rcx,0(%rsi)
- mov %rdx,8(%rsi)
- lea 16(%rsi),%rsi
- lea -16(%rdi),%rdi
- cmp %rsi,%rdi
- jne .Linvert
-
- lea .LAES_Te+2048+1024(%rip),%rax # rcon
-
- mov 40(%rax),$mask80
- mov 48(%rax),$maskfe
- mov 56(%rax),$mask1b
-
- mov %r8,$key
- sub \$1,%r14d
-.align 4
-.Lpermute:
- lea 16($key),$key
- mov 0($key),%rax
- mov 8($key),%rcx
-___
- &dectransform ();
-$code.=<<___;
- mov %eax,0($key)
- mov %ebx,4($key)
- mov %ecx,8($key)
- mov %edx,12($key)
- sub \$1,%r14d
- jnz .Lpermute
-
- xor %rax,%rax
-.Labort:
- mov 8(%rsp),%r15
-.cfi_restore %r15
- mov 16(%rsp),%r14
-.cfi_restore %r14
- mov 24(%rsp),%r13
-.cfi_restore %r13
- mov 32(%rsp),%r12
-.cfi_restore %r12
- mov 40(%rsp),%rbp
-.cfi_restore %rbp
- mov 48(%rsp),%rbx
-.cfi_restore %rbx
- add \$56,%rsp
-.cfi_adjust_cfa_offset -56
-.Ldec_key_epilogue:
- ret
-.cfi_endproc
-.size AES_set_decrypt_key,.-AES_set_decrypt_key
-___
-
-# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
-# size_t length, const AES_KEY *key,
-# unsigned char *ivp,const int enc);
-{
-# stack frame layout
-# -8(%rsp) return address
-my $keyp="0(%rsp)"; # one to pass as $key
-my $keyend="8(%rsp)"; # &(keyp->rd_key[4*keyp->rounds])
-my $_rsp="16(%rsp)"; # saved %rsp
-my $_inp="24(%rsp)"; # copy of 1st parameter, inp
-my $_out="32(%rsp)"; # copy of 2nd parameter, out
-my $_len="40(%rsp)"; # copy of 3rd parameter, length
-my $_key="48(%rsp)"; # copy of 4th parameter, key
-my $_ivp="56(%rsp)"; # copy of 5th parameter, ivp
-my $ivec="64(%rsp)"; # ivec[16]
-my $aes_key="80(%rsp)"; # copy of aes_key
-my $mark="80+240(%rsp)"; # copy of aes_key->rounds
-
-$code.=<<___;
-.globl AES_cbc_encrypt
-.type AES_cbc_encrypt,\@function,6
-.align 16
-.extern OPENSSL_ia32cap_P
-.globl asm_AES_cbc_encrypt
-.hidden asm_AES_cbc_encrypt
-asm_AES_cbc_encrypt:
-AES_cbc_encrypt:
-.cfi_startproc
- cmp \$0,%rdx # check length
- je .Lcbc_epilogue
- pushfq
-# This could be .cfi_push 49, but libunwind fails on registers it does not
-# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
-.cfi_adjust_cfa_offset 8
- push %rbx
-.cfi_push %rbx
- push %rbp
-.cfi_push %rbp
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
-.Lcbc_prologue:
-
- cld
- mov %r9d,%r9d # clear upper half of enc
-
- lea .LAES_Te(%rip),$sbox
- lea .LAES_Td(%rip),%r10
- cmp \$0,%r9
- cmoveq %r10,$sbox
-
-.cfi_remember_state
- mov OPENSSL_ia32cap_P(%rip),%r10d
- cmp \$$speed_limit,%rdx
- jb .Lcbc_slow_prologue
- test \$15,%rdx
- jnz .Lcbc_slow_prologue
- bt \$28,%r10d
- jc .Lcbc_slow_prologue
-
- # allocate aligned stack frame...
- lea -88-248(%rsp),$key
- and \$-64,$key
-
- # ... and make sure it doesn't alias with AES_T[ed] modulo 4096
- mov $sbox,%r10
- lea 2304($sbox),%r11
- mov $key,%r12
- and \$0xFFF,%r10 # s = $sbox&0xfff
- and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff
- and \$0xFFF,%r12 # p = %rsp&0xfff
-
- cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
- jb .Lcbc_te_break_out
- sub %r11,%r12
- sub %r12,$key
- jmp .Lcbc_te_ok
-.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz
- sub %r10,%r12
- and \$0xFFF,%r12
- add \$320,%r12
- sub %r12,$key
-.align 4
-.Lcbc_te_ok:
-
- xchg %rsp,$key
-.cfi_def_cfa_register $key
- #add \$8,%rsp # reserve for return address!
- mov $key,$_rsp # save %rsp
-.cfi_cfa_expression $_rsp,deref,+64
-.Lcbc_fast_body:
- mov %rdi,$_inp # save copy of inp
- mov %rsi,$_out # save copy of out
- mov %rdx,$_len # save copy of len
- mov %rcx,$_key # save copy of key
- mov %r8,$_ivp # save copy of ivp
- movl \$0,$mark # copy of aes_key->rounds = 0;
- mov %r8,%rbp # rearrange input arguments
- mov %r9,%rbx
- mov %rsi,$out
- mov %rdi,$inp
- mov %rcx,$key
-
- mov 240($key),%eax # key->rounds
- # do we copy key schedule to stack?
- mov $key,%r10
- sub $sbox,%r10
- and \$0xfff,%r10
- cmp \$2304,%r10
- jb .Lcbc_do_ecopy
- cmp \$4096-248,%r10
- jb .Lcbc_skip_ecopy
-.align 4
-.Lcbc_do_ecopy:
- mov $key,%rsi
- lea $aes_key,%rdi
- lea $aes_key,$key
- mov \$240/8,%ecx
- .long 0x90A548F3 # rep movsq
- mov %eax,(%rdi) # copy aes_key->rounds
-.Lcbc_skip_ecopy:
- mov $key,$keyp # save key pointer
-
- mov \$18,%ecx
-.align 4
-.Lcbc_prefetch_te:
- mov 0($sbox),%r10
- mov 32($sbox),%r11
- mov 64($sbox),%r12
- mov 96($sbox),%r13
- lea 128($sbox),$sbox
- sub \$1,%ecx
- jnz .Lcbc_prefetch_te
- lea -2304($sbox),$sbox
-
- cmp \$0,%rbx
- je .LFAST_DECRYPT
-
-#----------------------------- ENCRYPT -----------------------------#
- mov 0(%rbp),$s0 # load iv
- mov 4(%rbp),$s1
- mov 8(%rbp),$s2
- mov 12(%rbp),$s3
-
-.align 4
-.Lcbc_fast_enc_loop:
- xor 0($inp),$s0
- xor 4($inp),$s1
- xor 8($inp),$s2
- xor 12($inp),$s3
- mov $keyp,$key # restore key
- mov $inp,$_inp # if ($verticalspin) save inp
-
- call _x86_64_AES_encrypt
-
- mov $_inp,$inp # if ($verticalspin) restore inp
- mov $_len,%r10
- mov $s0,0($out)
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
- lea 16($inp),$inp
- lea 16($out),$out
- sub \$16,%r10
- test \$-16,%r10
- mov %r10,$_len
- jnz .Lcbc_fast_enc_loop
- mov $_ivp,%rbp # restore ivp
- mov $s0,0(%rbp) # save ivec
- mov $s1,4(%rbp)
- mov $s2,8(%rbp)
- mov $s3,12(%rbp)
-
- jmp .Lcbc_fast_cleanup
-
-#----------------------------- DECRYPT -----------------------------#
-.align 16
-.LFAST_DECRYPT:
- cmp $inp,$out
- je .Lcbc_fast_dec_in_place
-
- mov %rbp,$ivec
-.align 4
-.Lcbc_fast_dec_loop:
- mov 0($inp),$s0 # read input
- mov 4($inp),$s1
- mov 8($inp),$s2
- mov 12($inp),$s3
- mov $keyp,$key # restore key
- mov $inp,$_inp # if ($verticalspin) save inp
-
- call _x86_64_AES_decrypt
-
- mov $ivec,%rbp # load ivp
- mov $_inp,$inp # if ($verticalspin) restore inp
- mov $_len,%r10 # load len
- xor 0(%rbp),$s0 # xor iv
- xor 4(%rbp),$s1
- xor 8(%rbp),$s2
- xor 12(%rbp),$s3
- mov $inp,%rbp # current input, next iv
-
- sub \$16,%r10
- mov %r10,$_len # update len
- mov %rbp,$ivec # update ivp
-
- mov $s0,0($out) # write output
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
- lea 16($inp),$inp
- lea 16($out),$out
- jnz .Lcbc_fast_dec_loop
- mov $_ivp,%r12 # load user ivp
- mov 0(%rbp),%r10 # load iv
- mov 8(%rbp),%r11
- mov %r10,0(%r12) # copy back to user
- mov %r11,8(%r12)
- jmp .Lcbc_fast_cleanup
-
-.align 16
-.Lcbc_fast_dec_in_place:
- mov 0(%rbp),%r10 # copy iv to stack
- mov 8(%rbp),%r11
- mov %r10,0+$ivec
- mov %r11,8+$ivec
-.align 4
-.Lcbc_fast_dec_in_place_loop:
- mov 0($inp),$s0 # load input
- mov 4($inp),$s1
- mov 8($inp),$s2
- mov 12($inp),$s3
- mov $keyp,$key # restore key
- mov $inp,$_inp # if ($verticalspin) save inp
-
- call _x86_64_AES_decrypt
-
- mov $_inp,$inp # if ($verticalspin) restore inp
- mov $_len,%r10
- xor 0+$ivec,$s0
- xor 4+$ivec,$s1
- xor 8+$ivec,$s2
- xor 12+$ivec,$s3
-
- mov 0($inp),%r11 # load input
- mov 8($inp),%r12
- sub \$16,%r10
- jz .Lcbc_fast_dec_in_place_done
-
- mov %r11,0+$ivec # copy input to iv
- mov %r12,8+$ivec
-
- mov $s0,0($out) # save output [zaps input]
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
- lea 16($inp),$inp
- lea 16($out),$out
- mov %r10,$_len
- jmp .Lcbc_fast_dec_in_place_loop
-.Lcbc_fast_dec_in_place_done:
- mov $_ivp,%rdi
- mov %r11,0(%rdi) # copy iv back to user
- mov %r12,8(%rdi)
-
- mov $s0,0($out) # save output [zaps input]
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
-.align 4
-.Lcbc_fast_cleanup:
- cmpl \$0,$mark # was the key schedule copied?
- lea $aes_key,%rdi
- je .Lcbc_exit
- mov \$240/8,%ecx
- xor %rax,%rax
- .long 0x90AB48F3 # rep stosq
-
- jmp .Lcbc_exit
-
-#--------------------------- SLOW ROUTINE ---------------------------#
-.align 16
-.Lcbc_slow_prologue:
-.cfi_restore_state
- # allocate aligned stack frame...
- lea -88(%rsp),%rbp
- and \$-64,%rbp
- # ... just "above" key schedule
- lea -88-63(%rcx),%r10
- sub %rbp,%r10
- neg %r10
- and \$0x3c0,%r10
- sub %r10,%rbp
-
- xchg %rsp,%rbp
-.cfi_def_cfa_register %rbp
- #add \$8,%rsp # reserve for return address!
- mov %rbp,$_rsp # save %rsp
-.cfi_cfa_expression $_rsp,deref,+64
-.Lcbc_slow_body:
- #mov %rdi,$_inp # save copy of inp
- #mov %rsi,$_out # save copy of out
- #mov %rdx,$_len # save copy of len
- #mov %rcx,$_key # save copy of key
- mov %r8,$_ivp # save copy of ivp
- mov %r8,%rbp # rearrange input arguments
- mov %r9,%rbx
- mov %rsi,$out
- mov %rdi,$inp
- mov %rcx,$key
- mov %rdx,%r10
-
- mov 240($key),%eax
- mov $key,$keyp # save key pointer
- shl \$4,%eax
- lea ($key,%rax),%rax
- mov %rax,$keyend
-
- # pick Te4 copy which can't "overlap" with stack frame or key schedule
- lea 2048($sbox),$sbox
- lea 768-8(%rsp),%rax
- sub $sbox,%rax
- and \$0x300,%rax
- lea ($sbox,%rax),$sbox
-
- cmp \$0,%rbx
- je .LSLOW_DECRYPT
-
-#--------------------------- SLOW ENCRYPT ---------------------------#
- test \$-16,%r10 # check upon length
- mov 0(%rbp),$s0 # load iv
- mov 4(%rbp),$s1
- mov 8(%rbp),$s2
- mov 12(%rbp),$s3
- jz .Lcbc_slow_enc_tail # short input...
-
-.align 4
-.Lcbc_slow_enc_loop:
- xor 0($inp),$s0
- xor 4($inp),$s1
- xor 8($inp),$s2
- xor 12($inp),$s3
- mov $keyp,$key # restore key
- mov $inp,$_inp # save inp
- mov $out,$_out # save out
- mov %r10,$_len # save len
-
- call _x86_64_AES_encrypt_compact
-
- mov $_inp,$inp # restore inp
- mov $_out,$out # restore out
- mov $_len,%r10 # restore len
- mov $s0,0($out)
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
- lea 16($inp),$inp
- lea 16($out),$out
- sub \$16,%r10
- test \$-16,%r10
- jnz .Lcbc_slow_enc_loop
- test \$15,%r10
- jnz .Lcbc_slow_enc_tail
- mov $_ivp,%rbp # restore ivp
- mov $s0,0(%rbp) # save ivec
- mov $s1,4(%rbp)
- mov $s2,8(%rbp)
- mov $s3,12(%rbp)
-
- jmp .Lcbc_exit
-
-.align 4
-.Lcbc_slow_enc_tail:
- mov %rax,%r11
- mov %rcx,%r12
- mov %r10,%rcx
- mov $inp,%rsi
- mov $out,%rdi
- .long 0x9066A4F3 # rep movsb
- mov \$16,%rcx # zero tail
- sub %r10,%rcx
- xor %rax,%rax
- .long 0x9066AAF3 # rep stosb
- mov $out,$inp # this is not a mistake!
- mov \$16,%r10 # len=16
- mov %r11,%rax
- mov %r12,%rcx
- jmp .Lcbc_slow_enc_loop # one more spin...
-#--------------------------- SLOW DECRYPT ---------------------------#
-.align 16
-.LSLOW_DECRYPT:
- shr \$3,%rax
- add %rax,$sbox # recall "magic" constants!
-
- mov 0(%rbp),%r11 # copy iv to stack
- mov 8(%rbp),%r12
- mov %r11,0+$ivec
- mov %r12,8+$ivec
-
-.align 4
-.Lcbc_slow_dec_loop:
- mov 0($inp),$s0 # load input
- mov 4($inp),$s1
- mov 8($inp),$s2
- mov 12($inp),$s3
- mov $keyp,$key # restore key
- mov $inp,$_inp # save inp
- mov $out,$_out # save out
- mov %r10,$_len # save len
-
- call _x86_64_AES_decrypt_compact
-
- mov $_inp,$inp # restore inp
- mov $_out,$out # restore out
- mov $_len,%r10
- xor 0+$ivec,$s0
- xor 4+$ivec,$s1
- xor 8+$ivec,$s2
- xor 12+$ivec,$s3
-
- mov 0($inp),%r11 # load input
- mov 8($inp),%r12
- sub \$16,%r10
- jc .Lcbc_slow_dec_partial
- jz .Lcbc_slow_dec_done
-
- mov %r11,0+$ivec # copy input to iv
- mov %r12,8+$ivec
-
- mov $s0,0($out) # save output [can zap input]
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
- lea 16($inp),$inp
- lea 16($out),$out
- jmp .Lcbc_slow_dec_loop
-.Lcbc_slow_dec_done:
- mov $_ivp,%rdi
- mov %r11,0(%rdi) # copy iv back to user
- mov %r12,8(%rdi)
-
- mov $s0,0($out) # save output [can zap input]
- mov $s1,4($out)
- mov $s2,8($out)
- mov $s3,12($out)
-
- jmp .Lcbc_exit
-
-.align 4
-.Lcbc_slow_dec_partial:
- mov $_ivp,%rdi
- mov %r11,0(%rdi) # copy iv back to user
- mov %r12,8(%rdi)
-
- mov $s0,0+$ivec # save output to stack
- mov $s1,4+$ivec
- mov $s2,8+$ivec
- mov $s3,12+$ivec
-
- mov $out,%rdi
- lea $ivec,%rsi
- lea 16(%r10),%rcx
- .long 0x9066A4F3 # rep movsb
- jmp .Lcbc_exit
-
-.align 16
-.Lcbc_exit:
- mov $_rsp,%rsi
-.cfi_def_cfa %rsi,64
- mov (%rsi),%r15
-.cfi_restore %r15
- mov 8(%rsi),%r14
-.cfi_restore %r14
- mov 16(%rsi),%r13
-.cfi_restore %r13
- mov 24(%rsi),%r12
-.cfi_restore %r12
- mov 32(%rsi),%rbp
-.cfi_restore %rbp
- mov 40(%rsi),%rbx
-.cfi_restore %rbx
- lea 48(%rsi),%rsp
-.cfi_def_cfa %rsp,16
-.Lcbc_popfq:
- popfq
-# This could be .cfi_pop 49, but libunwind fails on registers it does not
-# recognize. See https://bugzilla.redhat.com/show_bug.cgi?id=217087.
-.cfi_adjust_cfa_offset -8
-.Lcbc_epilogue:
- ret
-.cfi_endproc
-.size AES_cbc_encrypt,.-AES_cbc_encrypt
-___
-}
-
-$code.=<<___;
-.align 64
-.LAES_Te:
-___
- &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
- &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
- &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
- &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
- &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
- &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
- &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
- &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
- &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
- &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
- &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
- &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
- &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
- &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
- &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
- &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
- &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
- &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
- &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
- &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
- &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
- &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
- &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
- &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
- &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
- &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
- &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
- &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
- &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
- &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
- &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
- &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
- &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
- &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
- &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
- &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
- &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
- &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
- &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
- &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
- &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
- &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
- &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
- &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
- &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
- &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
- &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
- &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
- &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
- &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
- &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
- &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
- &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
- &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
- &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
- &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
- &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
- &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
- &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
- &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
- &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
- &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
- &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
- &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
-
-#Te4 # four copies of Te4 to choose from to avoid L1 aliasing
- &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
- &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
- &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
- &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
- &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
- &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
- &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
- &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
- &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
- &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
- &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
- &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
- &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
- &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
- &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
- &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
- &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
- &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
- &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
- &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
- &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
- &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
- &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
- &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
- &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
- &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
- &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
- &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
- &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
- &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
- &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
- &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
-
- &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
- &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
- &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
- &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
- &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
- &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
- &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
- &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
- &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
- &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
- &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
- &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
- &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
- &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
- &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
- &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
- &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
- &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
- &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
- &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
- &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
- &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
- &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
- &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
- &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
- &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
- &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
- &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
- &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
- &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
- &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
- &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
-
- &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
- &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
- &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
- &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
- &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
- &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
- &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
- &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
- &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
- &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
- &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
- &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
- &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
- &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
- &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
- &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
- &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
- &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
- &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
- &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
- &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
- &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
- &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
- &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
- &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
- &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
- &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
- &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
- &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
- &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
- &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
- &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
-
- &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
- &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
- &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
- &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
- &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
- &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
- &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
- &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
- &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
- &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
- &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
- &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
- &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
- &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
- &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
- &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
- &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
- &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
- &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
- &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
- &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
- &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
- &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
- &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
- &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
- &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
- &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
- &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
- &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
- &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
- &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
- &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
-#rcon:
-$code.=<<___;
- .long 0x00000001, 0x00000002, 0x00000004, 0x00000008
- .long 0x00000010, 0x00000020, 0x00000040, 0x00000080
- .long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
- .long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
-___
-$code.=<<___;
-.align 64
-.LAES_Td:
-___
- &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
- &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
- &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
- &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
- &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
- &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
- &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
- &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
- &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
- &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
- &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
- &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
- &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
- &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
- &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
- &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
- &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
- &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
- &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
- &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
- &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
- &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
- &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
- &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
- &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
- &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
- &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
- &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
- &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
- &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
- &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
- &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
- &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
- &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
- &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
- &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
- &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
- &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
- &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
- &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
- &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
- &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
- &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
- &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
- &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
- &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
- &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
- &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
- &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
- &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
- &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
- &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
- &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
- &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
- &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
- &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
- &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
- &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
- &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
- &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
- &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
- &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
- &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
- &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
-
-#Td4: # four copies of Td4 to choose from to avoid L1 aliasing
- &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
- &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
- &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
- &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
- &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
- &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
- &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
- &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
- &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
- &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
- &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
- &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
- &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
- &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
- &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
- &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
- &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
- &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
- &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
- &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
- &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
- &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
- &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
- &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
- &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
- &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
- &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
- &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
- &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
- &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
- &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
- &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-$code.=<<___;
- .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
- .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
-___
- &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
- &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
- &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
- &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
- &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
- &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
- &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
- &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
- &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
- &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
- &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
- &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
- &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
- &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
- &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
- &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
- &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
- &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
- &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
- &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
- &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
- &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
- &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
- &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
- &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
- &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
- &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
- &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
- &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
- &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
- &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
- &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-$code.=<<___;
- .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
- .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
-___
- &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
- &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
- &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
- &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
- &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
- &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
- &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
- &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
- &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
- &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
- &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
- &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
- &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
- &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
- &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
- &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
- &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
- &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
- &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
- &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
- &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
- &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
- &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
- &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
- &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
- &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
- &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
- &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
- &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
- &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
- &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
- &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-$code.=<<___;
- .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
- .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
-___
- &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
- &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
- &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
- &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
- &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
- &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
- &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
- &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
- &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
- &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
- &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
- &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
- &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
- &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
- &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
- &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
- &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
- &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
- &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
- &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
- &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
- &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
- &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
- &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
- &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
- &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
- &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
- &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
- &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
- &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
- &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
- &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-$code.=<<___;
- .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
- .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
-.asciz "AES for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
-.align 64
-___
-
-# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-# CONTEXT *context,DISPATCHER_CONTEXT *disp)
-if ($win64) {
-$rec="%rcx";
-$frame="%rdx";
-$context="%r8";
-$disp="%r9";
-
-$code.=<<___;
-.extern __imp_RtlVirtualUnwind
-.type block_se_handler,\@abi-omnipotent
-.align 16
-block_se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- mov 8($disp),%rsi # disp->ImageBase
- mov 56($disp),%r11 # disp->HandlerData
-
- mov 0(%r11),%r10d # HandlerData[0]
- lea (%rsi,%r10),%r10 # prologue label
- cmp %r10,%rbx # context->Rip<prologue label
- jb .Lin_block_prologue
-
- mov 152($context),%rax # pull context->Rsp
-
- mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=epilogue label
- jae .Lin_block_prologue
-
- mov 24(%rax),%rax # pull saved real stack pointer
-
- mov -8(%rax),%rbx
- mov -16(%rax),%rbp
- mov -24(%rax),%r12
- mov -32(%rax),%r13
- mov -40(%rax),%r14
- mov -48(%rax),%r15
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R13
- mov %r14,232($context) # restore context->R14
- mov %r15,240($context) # restore context->R15
-
-.Lin_block_prologue:
- mov 8(%rax),%rdi
- mov 16(%rax),%rsi
- mov %rax,152($context) # restore context->Rsp
- mov %rsi,168($context) # restore context->Rsi
- mov %rdi,176($context) # restore context->Rdi
-
- jmp .Lcommon_seh_exit
-.size block_se_handler,.-block_se_handler
-
-.type key_se_handler,\@abi-omnipotent
-.align 16
-key_se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- mov 8($disp),%rsi # disp->ImageBase
- mov 56($disp),%r11 # disp->HandlerData
-
- mov 0(%r11),%r10d # HandlerData[0]
- lea (%rsi,%r10),%r10 # prologue label
- cmp %r10,%rbx # context->Rip<prologue label
- jb .Lin_key_prologue
-
- mov 152($context),%rax # pull context->Rsp
-
- mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=epilogue label
- jae .Lin_key_prologue
-
- lea 56(%rax),%rax
-
- mov -8(%rax),%rbx
- mov -16(%rax),%rbp
- mov -24(%rax),%r12
- mov -32(%rax),%r13
- mov -40(%rax),%r14
- mov -48(%rax),%r15
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R13
- mov %r14,232($context) # restore context->R14
- mov %r15,240($context) # restore context->R15
-
-.Lin_key_prologue:
- mov 8(%rax),%rdi
- mov 16(%rax),%rsi
- mov %rax,152($context) # restore context->Rsp
- mov %rsi,168($context) # restore context->Rsi
- mov %rdi,176($context) # restore context->Rdi
-
- jmp .Lcommon_seh_exit
-.size key_se_handler,.-key_se_handler
-
-.type cbc_se_handler,\@abi-omnipotent
-.align 16
-cbc_se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- lea .Lcbc_prologue(%rip),%r10
- cmp %r10,%rbx # context->Rip<.Lcbc_prologue
- jb .Lin_cbc_prologue
-
- lea .Lcbc_fast_body(%rip),%r10
- cmp %r10,%rbx # context->Rip<.Lcbc_fast_body
- jb .Lin_cbc_frame_setup
-
- lea .Lcbc_slow_prologue(%rip),%r10
- cmp %r10,%rbx # context->Rip<.Lcbc_slow_prologue
- jb .Lin_cbc_body
-
- lea .Lcbc_slow_body(%rip),%r10
- cmp %r10,%rbx # context->Rip<.Lcbc_slow_body
- jb .Lin_cbc_frame_setup
-
-.Lin_cbc_body:
- mov 152($context),%rax # pull context->Rsp
-
- lea .Lcbc_epilogue(%rip),%r10
- cmp %r10,%rbx # context->Rip>=.Lcbc_epilogue
- jae .Lin_cbc_prologue
-
- lea 8(%rax),%rax
-
- lea .Lcbc_popfq(%rip),%r10
- cmp %r10,%rbx # context->Rip>=.Lcbc_popfq
- jae .Lin_cbc_prologue
-
- mov `16-8`(%rax),%rax # biased $_rsp
- lea 56(%rax),%rax
-
-.Lin_cbc_frame_setup:
- mov -16(%rax),%rbx
- mov -24(%rax),%rbp
- mov -32(%rax),%r12
- mov -40(%rax),%r13
- mov -48(%rax),%r14
- mov -56(%rax),%r15
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R13
- mov %r14,232($context) # restore context->R14
- mov %r15,240($context) # restore context->R15
-
-.Lin_cbc_prologue:
- mov 8(%rax),%rdi
- mov 16(%rax),%rsi
- mov %rax,152($context) # restore context->Rsp
- mov %rsi,168($context) # restore context->Rsi
- mov %rdi,176($context) # restore context->Rdi
-
-.Lcommon_seh_exit:
-
- mov 40($disp),%rdi # disp->ContextRecord
- mov $context,%rsi # context
- mov \$`1232/8`,%ecx # sizeof(CONTEXT)
- .long 0xa548f3fc # cld; rep movsq
-
- mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
- mov 8(%rsi),%rdx # arg2, disp->ImageBase
- mov 0(%rsi),%r8 # arg3, disp->ControlPc
- mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
- mov 40(%rsi),%r10 # disp->ContextRecord
- lea 56(%rsi),%r11 # &disp->HandlerData
- lea 24(%rsi),%r12 # &disp->EstablisherFrame
- mov %r10,32(%rsp) # arg5
- mov %r11,40(%rsp) # arg6
- mov %r12,48(%rsp) # arg7
- mov %rcx,56(%rsp) # arg8, (NULL)
- call *__imp_RtlVirtualUnwind(%rip)
-
- mov \$1,%eax # ExceptionContinueSearch
- add \$64,%rsp
- popfq
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- pop %rdi
- pop %rsi
- ret
-.size cbc_se_handler,.-cbc_se_handler
-
-.section .pdata
-.align 4
- .rva .LSEH_begin_AES_encrypt
- .rva .LSEH_end_AES_encrypt
- .rva .LSEH_info_AES_encrypt
-
- .rva .LSEH_begin_AES_decrypt
- .rva .LSEH_end_AES_decrypt
- .rva .LSEH_info_AES_decrypt
-
- .rva .LSEH_begin_AES_set_encrypt_key
- .rva .LSEH_end_AES_set_encrypt_key
- .rva .LSEH_info_AES_set_encrypt_key
-
- .rva .LSEH_begin_AES_set_decrypt_key
- .rva .LSEH_end_AES_set_decrypt_key
- .rva .LSEH_info_AES_set_decrypt_key
-
- .rva .LSEH_begin_AES_cbc_encrypt
- .rva .LSEH_end_AES_cbc_encrypt
- .rva .LSEH_info_AES_cbc_encrypt
-
-.section .xdata
-.align 8
-.LSEH_info_AES_encrypt:
- .byte 9,0,0,0
- .rva block_se_handler
- .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[]
-.LSEH_info_AES_decrypt:
- .byte 9,0,0,0
- .rva block_se_handler
- .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[]
-.LSEH_info_AES_set_encrypt_key:
- .byte 9,0,0,0
- .rva key_se_handler
- .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[]
-.LSEH_info_AES_set_decrypt_key:
- .byte 9,0,0,0
- .rva key_se_handler
- .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[]
-.LSEH_info_AES_cbc_encrypt:
- .byte 9,0,0,0
- .rva cbc_se_handler
-___
-}
-
-$code =~ s/\`([^\`]*)\`/eval($1)/gem;
-
-print $code;
-
-close STDOUT;
diff --git a/crypto/openssl/crypto/aes/asm/bsaes-x86_64.pl b/crypto/openssl/crypto/aes/asm/bsaes-x86_64.pl
deleted file mode 100755
index e62342729e7f..000000000000
--- a/crypto/openssl/crypto/aes/asm/bsaes-x86_64.pl
+++ /dev/null
@@ -1,3239 +0,0 @@
-#! /usr/bin/env perl
-# Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License"). You may not use
-# this file except in compliance with the License. You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
-
-###################################################################
-### AES-128 [originally in CTR mode] ###
-### bitsliced implementation for Intel Core 2 processors ###
-### requires support of SSE extensions up to SSSE3 ###
-### Author: Emilia Käsper and Peter Schwabe ###
-### Date: 2009-03-19 ###
-### Public domain ###
-### ###
-### See http://homes.esat.kuleuven.be/~ekasper/#software for ###
-### further information. ###
-###################################################################
-#
-# September 2011.
-#
-# Started as transliteration to "perlasm" the original code has
-# undergone following changes:
-#
-# - code was made position-independent;
-# - rounds were folded into a loop resulting in >5x size reduction
-# from 12.5KB to 2.2KB;
-# - above was possibile thanks to mixcolumns() modification that
-# allowed to feed its output back to aesenc[last], this was
-# achieved at cost of two additional inter-registers moves;
-# - some instruction reordering and interleaving;
-# - this module doesn't implement key setup subroutine, instead it
-# relies on conversion of "conventional" key schedule as returned
-# by AES_set_encrypt_key (see discussion below);
-# - first and last round keys are treated differently, which allowed
-# to skip one shiftrows(), reduce bit-sliced key schedule and
-# speed-up conversion by 22%;
-# - support for 192- and 256-bit keys was added;
-#
-# Resulting performance in CPU cycles spent to encrypt one byte out
-# of 4096-byte buffer with 128-bit key is:
-#
-# Emilia's this(*) difference
-#
-# Core 2 9.30 8.69 +7%
-# Nehalem(**) 7.63 6.88 +11%
-# Atom 17.1 16.4 +4%
-# Silvermont - 12.9
-# Goldmont - 8.85
-#
-# (*) Comparison is not completely fair, because "this" is ECB,
-# i.e. no extra processing such as counter values calculation
-# and xor-ing input as in Emilia's CTR implementation is
-# performed. However, the CTR calculations stand for not more
-# than 1% of total time, so comparison is *rather* fair.
-#
-# (**) Results were collected on Westmere, which is considered to
-# be equivalent to Nehalem for this code.
-#
-# As for key schedule conversion subroutine. Interface to OpenSSL
-# relies on per-invocation on-the-fly conversion. This naturally
-# has impact on performance, especially for short inputs. Conversion
-# time in CPU cycles and its ratio to CPU cycles spent in 8x block
-# function is:
-#
-# conversion conversion/8x block
-# Core 2 240 0.22
-# Nehalem 180 0.20
-# Atom 430 0.20
-#
-# The ratio values mean that 128-byte blocks will be processed
-# 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%,
-# etc. Then keep in mind that input sizes not divisible by 128 are
-# *effectively* slower, especially shortest ones, e.g. consecutive
-# 144-byte blocks are processed 44% slower than one would expect,
-# 272 - 29%, 400 - 22%, etc. Yet, despite all these "shortcomings"
-# it's still faster than ["hyper-threading-safe" code path in]
-# aes-x86_64.pl on all lengths above 64 bytes...
-#
-# October 2011.
-#
-# Add decryption procedure. Performance in CPU cycles spent to decrypt
-# one byte out of 4096-byte buffer with 128-bit key is:
-#
-# Core 2 9.98
-# Nehalem 7.80
-# Atom 17.9
-# Silvermont 14.0
-# Goldmont 10.2
-#
-# November 2011.
-#
-# Add bsaes_xts_[en|de]crypt. Less-than-80-bytes-block performance is
-# suboptimal, but XTS is meant to be used with larger blocks...
-#
-# <appro@openssl.org>
-
-$flavour = shift;
-$output = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
-*STDOUT=*OUT;
-
-my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx");
-my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15)
-my $ecb=0; # suppress unreferenced ECB subroutines, spare some space...
-
-{
-my ($key,$rounds,$const)=("%rax","%r10d","%r11");
-
-sub Sbox {
-# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
-my @b=@_[0..7];
-my @t=@_[8..11];
-my @s=@_[12..15];
- &InBasisChange (@b);
- &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s);
- &OutBasisChange (@b[7,1,4,2,6,5,0,3]);
-}
-
-sub InBasisChange {
-# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
-my @b=@_[0..7];
-$code.=<<___;
- pxor @b[6], @b[5]
- pxor @b[1], @b[2]
- pxor @b[0], @b[3]
- pxor @b[2], @b[6]
- pxor @b[0], @b[5]
-
- pxor @b[3], @b[6]
- pxor @b[7], @b[3]
- pxor @b[5], @b[7]
- pxor @b[4], @b[3]
- pxor @b[5], @b[4]
- pxor @b[1], @b[3]
-
- pxor @b[7], @b[2]
- pxor @b[5], @b[1]
-___
-}
-
-sub OutBasisChange {
-# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
-my @b=@_[0..7];
-$code.=<<___;
- pxor @b[6], @b[0]
- pxor @b[4], @b[1]
- pxor @b[0], @b[2]
- pxor @b[6], @b[4]
- pxor @b[1], @b[6]
-
- pxor @b[5], @b[1]
- pxor @b[3], @b[5]
- pxor @b[7], @b[3]
- pxor @b[5], @b[7]
- pxor @b[5], @b[2]
-
- pxor @b[7], @b[4]
-___
-}
-
-sub InvSbox {
-# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb
-my @b=@_[0..7];
-my @t=@_[8..11];
-my @s=@_[12..15];
- &InvInBasisChange (@b);
- &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s);
- &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]);
-}
-
-sub InvInBasisChange { # OutBasisChange in reverse
-my @b=@_[5,1,2,6,3,7,0,4];
-$code.=<<___
- pxor @b[7], @b[4]
-
- pxor @b[5], @b[7]
- pxor @b[5], @b[2]
- pxor @b[7], @b[3]
- pxor @b[3], @b[5]
- pxor @b[5], @b[1]
-
- pxor @b[1], @b[6]
- pxor @b[0], @b[2]
- pxor @b[6], @b[4]
- pxor @b[6], @b[0]
- pxor @b[4], @b[1]
-___
-}
-
-sub InvOutBasisChange { # InBasisChange in reverse
-my @b=@_[2,5,7,3,6,1,0,4];
-$code.=<<___;
- pxor @b[5], @b[1]
- pxor @b[7], @b[2]
-
- pxor @b[1], @b[3]
- pxor @b[5], @b[4]
- pxor @b[5], @b[7]
- pxor @b[4], @b[3]
- pxor @b[0], @b[5]
- pxor @b[7], @b[3]
- pxor @b[2], @b[6]
- pxor @b[1], @b[2]
- pxor @b[3], @b[6]
-
- pxor @b[0], @b[3]
- pxor @b[6], @b[5]
-___
-}
-
-sub Mul_GF4 {
-#;*************************************************************
-#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
-#;*************************************************************
-my ($x0,$x1,$y0,$y1,$t0)=@_;
-$code.=<<___;
- movdqa $y0, $t0
- pxor $y1, $t0
- pand $x0, $t0
- pxor $x1, $x0
- pand $y0, $x1
- pand $y1, $x0
- pxor $x1, $x0
- pxor $t0, $x1
-___
-}
-
-sub Mul_GF4_N { # not used, see next subroutine
-# multiply and scale by N
-my ($x0,$x1,$y0,$y1,$t0)=@_;
-$code.=<<___;
- movdqa $y0, $t0
- pxor $y1, $t0
- pand $x0, $t0
- pxor $x1, $x0
- pand $y0, $x1
- pand $y1, $x0
- pxor $x0, $x1
- pxor $t0, $x0
-___
-}
-
-sub Mul_GF4_N_GF4 {
-# interleaved Mul_GF4_N and Mul_GF4
-my ($x0,$x1,$y0,$y1,$t0,
- $x2,$x3,$y2,$y3,$t1)=@_;
-$code.=<<___;
- movdqa $y0, $t0
- movdqa $y2, $t1
- pxor $y1, $t0
- pxor $y3, $t1
- pand $x0, $t0
- pand $x2, $t1
- pxor $x1, $x0
- pxor $x3, $x2
- pand $y0, $x1
- pand $y2, $x3
- pand $y1, $x0
- pand $y3, $x2
- pxor $x0, $x1
- pxor $x3, $x2
- pxor $t0, $x0
- pxor $t1, $x3
-___
-}
-sub Mul_GF16_2 {
-my @x=@_[0..7];
-my @y=@_[8..11];
-my @t=@_[12..15];
-$code.=<<___;
- movdqa @x[0], @t[0]
- movdqa @x[1], @t[1]
-___
- &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2]);
-$code.=<<___;
- pxor @x[2], @t[0]
- pxor @x[3], @t[1]
- pxor @y[2], @y[0]
- pxor @y[3], @y[1]
-___
- Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
- @x[2], @x[3], @y[2], @y[3], @t[2]);
-$code.=<<___;
- pxor @t[0], @x[0]
- pxor @t[0], @x[2]
- pxor @t[1], @x[1]
- pxor @t[1], @x[3]
-
- movdqa @x[4], @t[0]
- movdqa @x[5], @t[1]
- pxor @x[6], @t[0]
- pxor @x[7], @t[1]
-___
- &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
- @x[6], @x[7], @y[2], @y[3], @t[2]);
-$code.=<<___;
- pxor @y[2], @y[0]
- pxor @y[3], @y[1]
-___
- &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[3]);
-$code.=<<___;
- pxor @t[0], @x[4]
- pxor @t[0], @x[6]
- pxor @t[1], @x[5]
- pxor @t[1], @x[7]
-___
-}
-sub Inv_GF256 {
-#;********************************************************************
-#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) *
-#;********************************************************************
-my @x=@_[0..7];
-my @t=@_[8..11];
-my @s=@_[12..15];
-# direct optimizations from hardware
-$code.=<<___;
- movdqa @x[4], @t[3]
- movdqa @x[5], @t[2]
- movdqa @x[1], @t[1]
- movdqa @x[7], @s[1]
- movdqa @x[0], @s[0]
-
- pxor @x[6], @t[3]
- pxor @x[7], @t[2]
- pxor @x[3], @t[1]
- movdqa @t[3], @s[2]
- pxor @x[6], @s[1]
- movdqa @t[2], @t[0]
- pxor @x[2], @s[0]
- movdqa @t[3], @s[3]
-
- por @t[1], @t[2]
- por @s[0], @t[3]
- pxor @t[0], @s[3]
- pand @s[0], @s[2]
- pxor @t[1], @s[0]
- pand @t[1], @t[0]
- pand @s[0], @s[3]
- movdqa @x[3], @s[0]
- pxor @x[2], @s[0]
- pand @s[0], @s[1]
- pxor @s[1], @t[3]
- pxor @s[1], @t[2]
- movdqa @x[4], @s[1]
- movdqa @x[1], @s[0]
- pxor @x[5], @s[1]
- pxor @x[0], @s[0]
- movdqa @s[1], @t[1]
- pand @s[0], @s[1]
- por @s[0], @t[1]
- pxor @s[1], @t[0]
- pxor @s[3], @t[3]
- pxor @s[2], @t[2]
- pxor @s[3], @t[1]
- movdqa @x[7], @s[0]
- pxor @s[2], @t[0]
- movdqa @x[6], @s[1]
- pxor @s[2], @t[1]
- movdqa @x[5], @s[2]
- pand @x[3], @s[0]
- movdqa @x[4], @s[3]
- pand @x[2], @s[1]
- pand @x[1], @s[2]
- por @x[0], @s[3]
- pxor @s[0], @t[3]
- pxor @s[1], @t[2]
- pxor @s[2], @t[1]
- pxor @s[3], @t[0]
-
- #Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
-
- # new smaller inversion
-
- movdqa @t[3], @s[0]
- pand @t[1], @t[3]
- pxor @t[2], @s[0]
-
- movdqa @t[0], @s[2]
- movdqa @s[0], @s[3]
- pxor @t[3], @s[2]
- pand @s[2], @s[3]
-
- movdqa @t[1], @s[1]
- pxor @t[2], @s[3]
- pxor @t[0], @s[1]
-
- pxor @t[2], @t[3]
-
- pand @t[3], @s[1]
-
- movdqa @s[2], @t[2]
- pxor @t[0], @s[1]
-
- pxor @s[1], @t[2]
- pxor @s[1], @t[1]
-
- pand @t[0], @t[2]
-
- pxor @t[2], @s[2]
- pxor @t[2], @t[1]
-
- pand @s[3], @s[2]
-
- pxor @s[0], @s[2]
-___
-# output in s3, s2, s1, t1
-
-# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
-
-# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
- &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
-
-### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
-}
-
-# AES linear components
-
-sub ShiftRows {
-my @x=@_[0..7];
-my $mask=pop;
-$code.=<<___;
- pxor 0x00($key),@x[0]
- pxor 0x10($key),@x[1]
- pxor 0x20($key),@x[2]
- pxor 0x30($key),@x[3]
- pshufb $mask,@x[0]
- pshufb $mask,@x[1]
- pxor 0x40($key),@x[4]
- pxor 0x50($key),@x[5]
- pshufb $mask,@x[2]
- pshufb $mask,@x[3]
- pxor 0x60($key),@x[6]
- pxor 0x70($key),@x[7]
- pshufb $mask,@x[4]
- pshufb $mask,@x[5]
- pshufb $mask,@x[6]
- pshufb $mask,@x[7]
- lea 0x80($key),$key
-___
-}
-
-sub MixColumns {
-# modified to emit output in order suitable for feeding back to aesenc[last]
-my @x=@_[0..7];
-my @t=@_[8..15];
-my $inv=@_[16]; # optional
-$code.=<<___;
- pshufd \$0x93, @x[0], @t[0] # x0 <<< 32
- pshufd \$0x93, @x[1], @t[1]
- pxor @t[0], @x[0] # x0 ^ (x0 <<< 32)
- pshufd \$0x93, @x[2], @t[2]
- pxor @t[1], @x[1]
- pshufd \$0x93, @x[3], @t[3]
- pxor @t[2], @x[2]
- pshufd \$0x93, @x[4], @t[4]
- pxor @t[3], @x[3]
- pshufd \$0x93, @x[5], @t[5]
- pxor @t[4], @x[4]
- pshufd \$0x93, @x[6], @t[6]
- pxor @t[5], @x[5]
- pshufd \$0x93, @x[7], @t[7]
- pxor @t[6], @x[6]
- pxor @t[7], @x[7]
-
- pxor @x[0], @t[1]
- pxor @x[7], @t[0]
- pxor @x[7], @t[1]
- pshufd \$0x4E, @x[0], @x[0] # (x0 ^ (x0 <<< 32)) <<< 64)
- pxor @x[1], @t[2]
- pshufd \$0x4E, @x[1], @x[1]
- pxor @x[4], @t[5]
- pxor @t[0], @x[0]
- pxor @x[5], @t[6]
- pxor @t[1], @x[1]
- pxor @x[3], @t[4]
- pshufd \$0x4E, @x[4], @t[0]
- pxor @x[6], @t[7]
- pshufd \$0x4E, @x[5], @t[1]
- pxor @x[2], @t[3]
- pshufd \$0x4E, @x[3], @x[4]
- pxor @x[7], @t[3]
- pshufd \$0x4E, @x[7], @x[5]
- pxor @x[7], @t[4]
- pshufd \$0x4E, @x[6], @x[3]
- pxor @t[4], @t[0]
- pshufd \$0x4E, @x[2], @x[6]
- pxor @t[5], @t[1]
-___
-$code.=<<___ if (!$inv);
- pxor @t[3], @x[4]
- pxor @t[7], @x[5]
- pxor @t[6], @x[3]
- movdqa @t[0], @x[2]
- pxor @t[2], @x[6]
- movdqa @t[1], @x[7]
-___
-$code.=<<___ if ($inv);
- pxor @x[4], @t[3]
- pxor @t[7], @x[5]
- pxor @x[3], @t[6]
- movdqa @t[0], @x[3]
- pxor @t[2], @x[6]
- movdqa @t[6], @x[2]
- movdqa @t[1], @x[7]
- movdqa @x[6], @x[4]
- movdqa @t[3], @x[6]
-___
-}
-
-sub InvMixColumns_orig {
-my @x=@_[0..7];
-my @t=@_[8..15];
-
-$code.=<<___;
- # multiplication by 0x0e
- pshufd \$0x93, @x[7], @t[7]
- movdqa @x[2], @t[2]
- pxor @x[5], @x[7] # 7 5
- pxor @x[5], @x[2] # 2 5
- pshufd \$0x93, @x[0], @t[0]
- movdqa @x[5], @t[5]
- pxor @x[0], @x[5] # 5 0 [1]
- pxor @x[1], @x[0] # 0 1
- pshufd \$0x93, @x[1], @t[1]
- pxor @x[2], @x[1] # 1 25
- pxor @x[6], @x[0] # 01 6 [2]
- pxor @x[3], @x[1] # 125 3 [4]
- pshufd \$0x93, @x[3], @t[3]
- pxor @x[0], @x[2] # 25 016 [3]
- pxor @x[7], @x[3] # 3 75
- pxor @x[6], @x[7] # 75 6 [0]
- pshufd \$0x93, @x[6], @t[6]
- movdqa @x[4], @t[4]
- pxor @x[4], @x[6] # 6 4
- pxor @x[3], @x[4] # 4 375 [6]
- pxor @x[7], @x[3] # 375 756=36
- pxor @t[5], @x[6] # 64 5 [7]
- pxor @t[2], @x[3] # 36 2
- pxor @t[4], @x[3] # 362 4 [5]
- pshufd \$0x93, @t[5], @t[5]
-___
- my @y = @x[7,5,0,2,1,3,4,6];
-$code.=<<___;
- # multiplication by 0x0b
- pxor @y[0], @y[1]
- pxor @t[0], @y[0]
- pxor @t[1], @y[1]
- pshufd \$0x93, @t[2], @t[2]
- pxor @t[5], @y[0]
- pxor @t[6], @y[1]
- pxor @t[7], @y[0]
- pshufd \$0x93, @t[4], @t[4]
- pxor @t[6], @t[7] # clobber t[7]
- pxor @y[0], @y[1]
-
- pxor @t[0], @y[3]
- pshufd \$0x93, @t[0], @t[0]
- pxor @t[1], @y[2]
- pxor @t[1], @y[4]
- pxor @t[2], @y[2]
- pshufd \$0x93, @t[1], @t[1]
- pxor @t[2], @y[3]
- pxor @t[2], @y[5]
- pxor @t[7], @y[2]
- pshufd \$0x93, @t[2], @t[2]
- pxor @t[3], @y[3]
- pxor @t[3], @y[6]
- pxor @t[3], @y[4]
- pshufd \$0x93, @t[3], @t[3]
- pxor @t[4], @y[7]
- pxor @t[4], @y[5]
- pxor @t[7], @y[7]
- pxor @t[5], @y[3]
- pxor @t[4], @y[4]
- pxor @t[5], @t[7] # clobber t[7] even more
-
- pxor @t[7], @y[5]
- pshufd \$0x93, @t[4], @t[4]
- pxor @t[7], @y[6]
- pxor @t[7], @y[4]
-
- pxor @t[5], @t[7]
- pshufd \$0x93, @t[5], @t[5]
- pxor @t[6], @t[7] # restore t[7]
-
- # multiplication by 0x0d
- pxor @y[7], @y[4]
- pxor @t[4], @y[7]
- pshufd \$0x93, @t[6], @t[6]
- pxor @t[0], @y[2]
- pxor @t[5], @y[7]
- pxor @t[2], @y[2]
- pshufd \$0x93, @t[7], @t[7]
-
- pxor @y[1], @y[3]
- pxor @t[1], @y[1]
- pxor @t[0], @y[0]
- pxor @t[0], @y[3]
- pxor @t[5], @y[1]
- pxor @t[5], @y[0]
- pxor @t[7], @y[1]
- pshufd \$0x93, @t[0], @t[0]
- pxor @t[6], @y[0]
- pxor @y[1], @y[3]
- pxor @t[1], @y[4]
- pshufd \$0x93, @t[1], @t[1]
-
- pxor @t[7], @y[7]
- pxor @t[2], @y[4]
- pxor @t[2], @y[5]
- pshufd \$0x93, @t[2], @t[2]
- pxor @t[6], @y[2]
- pxor @t[3], @t[6] # clobber t[6]
- pxor @y[7], @y[4]
- pxor @t[6], @y[3]
-
- pxor @t[6], @y[6]
- pxor @t[5], @y[5]
- pxor @t[4], @y[6]
- pshufd \$0x93, @t[4], @t[4]
- pxor @t[6], @y[5]
- pxor @t[7], @y[6]
- pxor @t[3], @t[6] # restore t[6]
-
- pshufd \$0x93, @t[5], @t[5]
- pshufd \$0x93, @t[6], @t[6]
- pshufd \$0x93, @t[7], @t[7]
- pshufd \$0x93, @t[3], @t[3]
-
- # multiplication by 0x09
- pxor @y[1], @y[4]
- pxor @y[1], @t[1] # t[1]=y[1]
- pxor @t[5], @t[0] # clobber t[0]
- pxor @t[5], @t[1]
- pxor @t[0], @y[3]
- pxor @y[0], @t[0] # t[0]=y[0]
- pxor @t[6], @t[1]
- pxor @t[7], @t[6] # clobber t[6]
- pxor @t[1], @y[4]
- pxor @t[4], @y[7]
- pxor @y[4], @t[4] # t[4]=y[4]
- pxor @t[3], @y[6]
- pxor @y[3], @t[3] # t[3]=y[3]
- pxor @t[2], @y[5]
- pxor @y[2], @t[2] # t[2]=y[2]
- pxor @t[7], @t[3]
- pxor @y[5], @t[5] # t[5]=y[5]
- pxor @t[6], @t[2]
- pxor @t[6], @t[5]
- pxor @y[6], @t[6] # t[6]=y[6]
- pxor @y[7], @t[7] # t[7]=y[7]
-
- movdqa @t[0],@XMM[0]
- movdqa @t[1],@XMM[1]
- movdqa @t[2],@XMM[2]
- movdqa @t[3],@XMM[3]
- movdqa @t[4],@XMM[4]
- movdqa @t[5],@XMM[5]
- movdqa @t[6],@XMM[6]
- movdqa @t[7],@XMM[7]
-___
-}
-
-sub InvMixColumns {
-my @x=@_[0..7];
-my @t=@_[8..15];
-
-# Thanks to Jussi Kivilinna for providing pointer to
-#
-# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 |
-# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
-# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 |
-# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 |
-
-$code.=<<___;
- # multiplication by 0x05-0x00-0x04-0x00
- pshufd \$0x4E, @x[0], @t[0]
- pshufd \$0x4E, @x[6], @t[6]
- pxor @x[0], @t[0]
- pshufd \$0x4E, @x[7], @t[7]
- pxor @x[6], @t[6]
- pshufd \$0x4E, @x[1], @t[1]
- pxor @x[7], @t[7]
- pshufd \$0x4E, @x[2], @t[2]
- pxor @x[1], @t[1]
- pshufd \$0x4E, @x[3], @t[3]
- pxor @x[2], @t[2]
- pxor @t[6], @x[0]
- pxor @t[6], @x[1]
- pshufd \$0x4E, @x[4], @t[4]
- pxor @x[3], @t[3]
- pxor @t[0], @x[2]
- pxor @t[1], @x[3]
- pshufd \$0x4E, @x[5], @t[5]
- pxor @x[4], @t[4]
- pxor @t[7], @x[1]
- pxor @t[2], @x[4]
- pxor @x[5], @t[5]
-
- pxor @t[7], @x[2]
- pxor @t[6], @x[3]
- pxor @t[6], @x[4]
- pxor @t[3], @x[5]
- pxor @t[4], @x[6]
- pxor @t[7], @x[4]
- pxor @t[7], @x[5]
- pxor @t[5], @x[7]
-___
- &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6
-}
-
-sub aesenc { # not used
-my @b=@_[0..7];
-my @t=@_[8..15];
-$code.=<<___;
- movdqa 0x30($const),@t[0] # .LSR
-___
- &ShiftRows (@b,@t[0]);
- &Sbox (@b,@t);
- &MixColumns (@b[0,1,4,6,3,7,2,5],@t);
-}
-
-sub aesenclast { # not used
-my @b=@_[0..7];
-my @t=@_[8..15];
-$code.=<<___;
- movdqa 0x40($const),@t[0] # .LSRM0
-___
- &ShiftRows (@b,@t[0]);
- &Sbox (@b,@t);
-$code.=<<___
- pxor 0x00($key),@b[0]
- pxor 0x10($key),@b[1]
- pxor 0x20($key),@b[4]
- pxor 0x30($key),@b[6]
- pxor 0x40($key),@b[3]
- pxor 0x50($key),@b[7]
- pxor 0x60($key),@b[2]
- pxor 0x70($key),@b[5]
-___
-}
-
-sub swapmove {
-my ($a,$b,$n,$mask,$t)=@_;
-$code.=<<___;
- movdqa $b,$t
- psrlq \$$n,$b
- pxor $a,$b
- pand $mask,$b
- pxor $b,$a
- psllq \$$n,$b
- pxor $t,$b
-___
-}
-sub swapmove2x {
-my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
-$code.=<<___;
- movdqa $b0,$t0
- psrlq \$$n,$b0
- movdqa $b1,$t1
- psrlq \$$n,$b1
- pxor $a0,$b0
- pxor $a1,$b1
- pand $mask,$b0
- pand $mask,$b1
- pxor $b0,$a0
- psllq \$$n,$b0
- pxor $b1,$a1
- psllq \$$n,$b1
- pxor $t0,$b0
- pxor $t1,$b1
-___
-}
-
-sub bitslice {
-my @x=reverse(@_[0..7]);
-my ($t0,$t1,$t2,$t3)=@_[8..11];
-$code.=<<___;
- movdqa 0x00($const),$t0 # .LBS0
- movdqa 0x10($const),$t1 # .LBS1
-___
- &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
- &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
-$code.=<<___;
- movdqa 0x20($const),$t0 # .LBS2
-___
- &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
- &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
-
- &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
- &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
-}
-
-$code.=<<___;
-.text
-
-.extern asm_AES_encrypt
-.extern asm_AES_decrypt
-
-.type _bsaes_encrypt8,\@abi-omnipotent
-.align 64
-_bsaes_encrypt8:
-.cfi_startproc
- lea .LBS0(%rip), $const # constants table
-
- movdqa ($key), @XMM[9] # round 0 key
- lea 0x10($key), $key
- movdqa 0x50($const), @XMM[8] # .LM0SR
- pxor @XMM[9], @XMM[0] # xor with round0 key
- pxor @XMM[9], @XMM[1]
- pxor @XMM[9], @XMM[2]
- pxor @XMM[9], @XMM[3]
- pshufb @XMM[8], @XMM[0]
- pshufb @XMM[8], @XMM[1]
- pxor @XMM[9], @XMM[4]
- pxor @XMM[9], @XMM[5]
- pshufb @XMM[8], @XMM[2]
- pshufb @XMM[8], @XMM[3]
- pxor @XMM[9], @XMM[6]
- pxor @XMM[9], @XMM[7]
- pshufb @XMM[8], @XMM[4]
- pshufb @XMM[8], @XMM[5]
- pshufb @XMM[8], @XMM[6]
- pshufb @XMM[8], @XMM[7]
-_bsaes_encrypt8_bitslice:
-___
- &bitslice (@XMM[0..7, 8..11]);
-$code.=<<___;
- dec $rounds
- jmp .Lenc_sbox
-.align 16
-.Lenc_loop:
-___
- &ShiftRows (@XMM[0..7, 8]);
-$code.=".Lenc_sbox:\n";
- &Sbox (@XMM[0..7, 8..15]);
-$code.=<<___;
- dec $rounds
- jl .Lenc_done
-___
- &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]);
-$code.=<<___;
- movdqa 0x30($const), @XMM[8] # .LSR
- jnz .Lenc_loop
- movdqa 0x40($const), @XMM[8] # .LSRM0
- jmp .Lenc_loop
-.align 16
-.Lenc_done:
-___
- # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
- &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]);
-$code.=<<___;
- movdqa ($key), @XMM[8] # last round key
- pxor @XMM[8], @XMM[4]
- pxor @XMM[8], @XMM[6]
- pxor @XMM[8], @XMM[3]
- pxor @XMM[8], @XMM[7]
- pxor @XMM[8], @XMM[2]
- pxor @XMM[8], @XMM[5]
- pxor @XMM[8], @XMM[0]
- pxor @XMM[8], @XMM[1]
- ret
-.cfi_endproc
-.size _bsaes_encrypt8,.-_bsaes_encrypt8
-
-.type _bsaes_decrypt8,\@abi-omnipotent
-.align 64
-_bsaes_decrypt8:
-.cfi_startproc
- lea .LBS0(%rip), $const # constants table
-
- movdqa ($key), @XMM[9] # round 0 key
- lea 0x10($key), $key
- movdqa -0x30($const), @XMM[8] # .LM0ISR
- pxor @XMM[9], @XMM[0] # xor with round0 key
- pxor @XMM[9], @XMM[1]
- pxor @XMM[9], @XMM[2]
- pxor @XMM[9], @XMM[3]
- pshufb @XMM[8], @XMM[0]
- pshufb @XMM[8], @XMM[1]
- pxor @XMM[9], @XMM[4]
- pxor @XMM[9], @XMM[5]
- pshufb @XMM[8], @XMM[2]
- pshufb @XMM[8], @XMM[3]
- pxor @XMM[9], @XMM[6]
- pxor @XMM[9], @XMM[7]
- pshufb @XMM[8], @XMM[4]
- pshufb @XMM[8], @XMM[5]
- pshufb @XMM[8], @XMM[6]
- pshufb @XMM[8], @XMM[7]
-___
- &bitslice (@XMM[0..7, 8..11]);
-$code.=<<___;
- dec $rounds
- jmp .Ldec_sbox
-.align 16
-.Ldec_loop:
-___
- &ShiftRows (@XMM[0..7, 8]);
-$code.=".Ldec_sbox:\n";
- &InvSbox (@XMM[0..7, 8..15]);
-$code.=<<___;
- dec $rounds
- jl .Ldec_done
-___
- &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]);
-$code.=<<___;
- movdqa -0x10($const), @XMM[8] # .LISR
- jnz .Ldec_loop
- movdqa -0x20($const), @XMM[8] # .LISRM0
- jmp .Ldec_loop
-.align 16
-.Ldec_done:
-___
- &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]);
-$code.=<<___;
- movdqa ($key), @XMM[8] # last round key
- pxor @XMM[8], @XMM[6]
- pxor @XMM[8], @XMM[4]
- pxor @XMM[8], @XMM[2]
- pxor @XMM[8], @XMM[7]
- pxor @XMM[8], @XMM[3]
- pxor @XMM[8], @XMM[5]
- pxor @XMM[8], @XMM[0]
- pxor @XMM[8], @XMM[1]
- ret
-.cfi_endproc
-.size _bsaes_decrypt8,.-_bsaes_decrypt8
-___
-}
-{
-my ($out,$inp,$rounds,$const)=("%rax","%rcx","%r10d","%r11");
-
-sub bitslice_key {
-my @x=reverse(@_[0..7]);
-my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
-
- &swapmove (@x[0,1],1,$bs0,$t2,$t3);
-$code.=<<___;
- #&swapmove(@x[2,3],1,$t0,$t2,$t3);
- movdqa @x[0], @x[2]
- movdqa @x[1], @x[3]
-___
- #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
-
- &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3);
-$code.=<<___;
- #&swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
- movdqa @x[0], @x[4]
- movdqa @x[2], @x[6]
- movdqa @x[1], @x[5]
- movdqa @x[3], @x[7]
-___
- &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3);
- &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3);
-}
-
-$code.=<<___;
-.type _bsaes_key_convert,\@abi-omnipotent
-.align 16
-_bsaes_key_convert:
-.cfi_startproc
- lea .Lmasks(%rip), $const
- movdqu ($inp), %xmm7 # load round 0 key
- lea 0x10($inp), $inp
- movdqa 0x00($const), %xmm0 # 0x01...
- movdqa 0x10($const), %xmm1 # 0x02...
- movdqa 0x20($const), %xmm2 # 0x04...
- movdqa 0x30($const), %xmm3 # 0x08...
- movdqa 0x40($const), %xmm4 # .LM0
- pcmpeqd %xmm5, %xmm5 # .LNOT
-
- movdqu ($inp), %xmm6 # load round 1 key
- movdqa %xmm7, ($out) # save round 0 key
- lea 0x10($out), $out
- dec $rounds
- jmp .Lkey_loop
-.align 16
-.Lkey_loop:
- pshufb %xmm4, %xmm6 # .LM0
-
- movdqa %xmm0, %xmm8
- movdqa %xmm1, %xmm9
-
- pand %xmm6, %xmm8
- pand %xmm6, %xmm9
- movdqa %xmm2, %xmm10
- pcmpeqb %xmm0, %xmm8
- psllq \$4, %xmm0 # 0x10...
- movdqa %xmm3, %xmm11
- pcmpeqb %xmm1, %xmm9
- psllq \$4, %xmm1 # 0x20...
-
- pand %xmm6, %xmm10
- pand %xmm6, %xmm11
- movdqa %xmm0, %xmm12
- pcmpeqb %xmm2, %xmm10
- psllq \$4, %xmm2 # 0x40...
- movdqa %xmm1, %xmm13
- pcmpeqb %xmm3, %xmm11
- psllq \$4, %xmm3 # 0x80...
-
- movdqa %xmm2, %xmm14
- movdqa %xmm3, %xmm15
- pxor %xmm5, %xmm8 # "pnot"
- pxor %xmm5, %xmm9
-
- pand %xmm6, %xmm12
- pand %xmm6, %xmm13
- movdqa %xmm8, 0x00($out) # write bit-sliced round key
- pcmpeqb %xmm0, %xmm12
- psrlq \$4, %xmm0 # 0x01...
- movdqa %xmm9, 0x10($out)
- pcmpeqb %xmm1, %xmm13
- psrlq \$4, %xmm1 # 0x02...
- lea 0x10($inp), $inp
-
- pand %xmm6, %xmm14
- pand %xmm6, %xmm15
- movdqa %xmm10, 0x20($out)
- pcmpeqb %xmm2, %xmm14
- psrlq \$4, %xmm2 # 0x04...
- movdqa %xmm11, 0x30($out)
- pcmpeqb %xmm3, %xmm15
- psrlq \$4, %xmm3 # 0x08...
- movdqu ($inp), %xmm6 # load next round key
-
- pxor %xmm5, %xmm13 # "pnot"
- pxor %xmm5, %xmm14
- movdqa %xmm12, 0x40($out)
- movdqa %xmm13, 0x50($out)
- movdqa %xmm14, 0x60($out)
- movdqa %xmm15, 0x70($out)
- lea 0x80($out),$out
- dec $rounds
- jnz .Lkey_loop
-
- movdqa 0x50($const), %xmm7 # .L63
- #movdqa %xmm6, ($out) # don't save last round key
- ret
-.cfi_endproc
-.size _bsaes_key_convert,.-_bsaes_key_convert
-___
-}
-
-if (0 && !$win64) { # following four functions are unsupported interface
- # used for benchmarking...
-$code.=<<___;
-.globl bsaes_enc_key_convert
-.type bsaes_enc_key_convert,\@function,2
-.align 16
-bsaes_enc_key_convert:
- mov 240($inp),%r10d # pass rounds
- mov $inp,%rcx # pass key
- mov $out,%rax # pass key schedule
- call _bsaes_key_convert
- pxor %xmm6,%xmm7 # fix up last round key
- movdqa %xmm7,(%rax) # save last round key
- ret
-.size bsaes_enc_key_convert,.-bsaes_enc_key_convert
-
-.globl bsaes_encrypt_128
-.type bsaes_encrypt_128,\@function,4
-.align 16
-bsaes_encrypt_128:
-.Lenc128_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- movdqu 0x60($inp), @XMM[6]
- movdqu 0x70($inp), @XMM[7]
- mov $key, %rax # pass the $key
- lea 0x80($inp), $inp
- mov \$10,%r10d
-
- call _bsaes_encrypt8
-
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[2], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$0x80,$len
- ja .Lenc128_loop
- ret
-.size bsaes_encrypt_128,.-bsaes_encrypt_128
-
-.globl bsaes_dec_key_convert
-.type bsaes_dec_key_convert,\@function,2
-.align 16
-bsaes_dec_key_convert:
- mov 240($inp),%r10d # pass rounds
- mov $inp,%rcx # pass key
- mov $out,%rax # pass key schedule
- call _bsaes_key_convert
- pxor ($out),%xmm7 # fix up round 0 key
- movdqa %xmm6,(%rax) # save last round key
- movdqa %xmm7,($out)
- ret
-.size bsaes_dec_key_convert,.-bsaes_dec_key_convert
-
-.globl bsaes_decrypt_128
-.type bsaes_decrypt_128,\@function,4
-.align 16
-bsaes_decrypt_128:
-.Ldec128_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- movdqu 0x60($inp), @XMM[6]
- movdqu 0x70($inp), @XMM[7]
- mov $key, %rax # pass the $key
- lea 0x80($inp), $inp
- mov \$10,%r10d
-
- call _bsaes_decrypt8
-
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$0x80,$len
- ja .Ldec128_loop
- ret
-.size bsaes_decrypt_128,.-bsaes_decrypt_128
-___
-}
-{
-######################################################################
-#
-# OpenSSL interface
-#
-my ($arg1,$arg2,$arg3,$arg4,$arg5,$arg6)=$win64 ? ("%rcx","%rdx","%r8","%r9","%r10","%r11d")
- : ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d");
-my ($inp,$out,$len,$key)=("%r12","%r13","%r14","%r15");
-
-if ($ecb) {
-$code.=<<___;
-.globl bsaes_ecb_encrypt_blocks
-.type bsaes_ecb_encrypt_blocks,\@abi-omnipotent
-.align 16
-bsaes_ecb_encrypt_blocks:
-.cfi_startproc
- mov %rsp, %rax
-.Lecb_enc_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp),%rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lecb_enc_body:
-___
-$code.=<<___;
- mov %rsp,%rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- mov 240($arg4),%eax # rounds
- mov $arg1,$inp # backup arguments
- mov $arg2,$out
- mov $arg3,$len
- mov $arg4,$key
- cmp \$8,$arg3
- jb .Lecb_enc_short
-
- mov %eax,%ebx # backup rounds
- shl \$7,%rax # 128 bytes per inner round key
- sub \$`128-32`,%rax # size of bit-sliced key schedule
- sub %rax,%rsp
- mov %rsp,%rax # pass key schedule
- mov $key,%rcx # pass key
- mov %ebx,%r10d # pass rounds
- call _bsaes_key_convert
- pxor %xmm6,%xmm7 # fix up last round key
- movdqa %xmm7,(%rax) # save last round key
-
- sub \$8,$len
-.Lecb_enc_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- mov %rsp, %rax # pass key schedule
- movdqu 0x60($inp), @XMM[6]
- mov %ebx,%r10d # pass rounds
- movdqu 0x70($inp), @XMM[7]
- lea 0x80($inp), $inp
-
- call _bsaes_encrypt8
-
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[2], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$8,$len
- jnc .Lecb_enc_loop
-
- add \$8,$len
- jz .Lecb_enc_done
-
- movdqu 0x00($inp), @XMM[0] # load input
- mov %rsp, %rax # pass key schedule
- mov %ebx,%r10d # pass rounds
- cmp \$2,$len
- jb .Lecb_enc_one
- movdqu 0x10($inp), @XMM[1]
- je .Lecb_enc_two
- movdqu 0x20($inp), @XMM[2]
- cmp \$4,$len
- jb .Lecb_enc_three
- movdqu 0x30($inp), @XMM[3]
- je .Lecb_enc_four
- movdqu 0x40($inp), @XMM[4]
- cmp \$6,$len
- jb .Lecb_enc_five
- movdqu 0x50($inp), @XMM[5]
- je .Lecb_enc_six
- movdqu 0x60($inp), @XMM[6]
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[2], 0x60($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_six:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_five:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_four:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_three:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_two:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_one:
- call _bsaes_encrypt8
- movdqu @XMM[0], 0x00($out) # write output
- jmp .Lecb_enc_done
-.align 16
-.Lecb_enc_short:
- lea ($inp), $arg1
- lea ($out), $arg2
- lea ($key), $arg3
- call asm_AES_encrypt
- lea 16($inp), $inp
- lea 16($out), $out
- dec $len
- jnz .Lecb_enc_short
-
-.Lecb_enc_done:
- lea (%rsp),%rax
- pxor %xmm0, %xmm0
-.Lecb_enc_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- jb .Lecb_enc_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lecb_enc_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lecb_enc_epilogue:
- ret
-.cfi_endproc
-.size bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks
-
-.globl bsaes_ecb_decrypt_blocks
-.type bsaes_ecb_decrypt_blocks,\@abi-omnipotent
-.align 16
-bsaes_ecb_decrypt_blocks:
-.cfi_startproc
- mov %rsp, %rax
-.Lecb_dec_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp),%rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lecb_dec_body:
-___
-$code.=<<___;
- mov %rsp,%rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- mov 240($arg4),%eax # rounds
- mov $arg1,$inp # backup arguments
- mov $arg2,$out
- mov $arg3,$len
- mov $arg4,$key
- cmp \$8,$arg3
- jb .Lecb_dec_short
-
- mov %eax,%ebx # backup rounds
- shl \$7,%rax # 128 bytes per inner round key
- sub \$`128-32`,%rax # size of bit-sliced key schedule
- sub %rax,%rsp
- mov %rsp,%rax # pass key schedule
- mov $key,%rcx # pass key
- mov %ebx,%r10d # pass rounds
- call _bsaes_key_convert
- pxor (%rsp),%xmm7 # fix up 0 round key
- movdqa %xmm6,(%rax) # save last round key
- movdqa %xmm7,(%rsp)
-
- sub \$8,$len
-.Lecb_dec_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- mov %rsp, %rax # pass key schedule
- movdqu 0x60($inp), @XMM[6]
- mov %ebx,%r10d # pass rounds
- movdqu 0x70($inp), @XMM[7]
- lea 0x80($inp), $inp
-
- call _bsaes_decrypt8
-
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$8,$len
- jnc .Lecb_dec_loop
-
- add \$8,$len
- jz .Lecb_dec_done
-
- movdqu 0x00($inp), @XMM[0] # load input
- mov %rsp, %rax # pass key schedule
- mov %ebx,%r10d # pass rounds
- cmp \$2,$len
- jb .Lecb_dec_one
- movdqu 0x10($inp), @XMM[1]
- je .Lecb_dec_two
- movdqu 0x20($inp), @XMM[2]
- cmp \$4,$len
- jb .Lecb_dec_three
- movdqu 0x30($inp), @XMM[3]
- je .Lecb_dec_four
- movdqu 0x40($inp), @XMM[4]
- cmp \$6,$len
- jb .Lecb_dec_five
- movdqu 0x50($inp), @XMM[5]
- je .Lecb_dec_six
- movdqu 0x60($inp), @XMM[6]
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_six:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_five:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_four:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_three:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_two:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_one:
- call _bsaes_decrypt8
- movdqu @XMM[0], 0x00($out) # write output
- jmp .Lecb_dec_done
-.align 16
-.Lecb_dec_short:
- lea ($inp), $arg1
- lea ($out), $arg2
- lea ($key), $arg3
- call asm_AES_decrypt
- lea 16($inp), $inp
- lea 16($out), $out
- dec $len
- jnz .Lecb_dec_short
-
-.Lecb_dec_done:
- lea (%rsp),%rax
- pxor %xmm0, %xmm0
-.Lecb_dec_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- jb .Lecb_dec_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lecb_dec_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lecb_dec_epilogue:
- ret
-.cfi_endproc
-.size bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks
-___
-}
-$code.=<<___;
-.extern asm_AES_cbc_encrypt
-.globl bsaes_cbc_encrypt
-.type bsaes_cbc_encrypt,\@abi-omnipotent
-.align 16
-bsaes_cbc_encrypt:
-.cfi_startproc
-___
-$code.=<<___ if ($win64);
- mov 48(%rsp),$arg6 # pull direction flag
-___
-$code.=<<___;
- cmp \$0,$arg6
- jne asm_AES_cbc_encrypt
- cmp \$128,$arg3
- jb asm_AES_cbc_encrypt
-
- mov %rsp, %rax
-.Lcbc_dec_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp), %rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- mov 0xa0(%rsp),$arg5 # pull ivp
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lcbc_dec_body:
-___
-$code.=<<___;
- mov %rsp, %rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- mov 240($arg4), %eax # rounds
- mov $arg1, $inp # backup arguments
- mov $arg2, $out
- mov $arg3, $len
- mov $arg4, $key
- mov $arg5, %rbx
- shr \$4, $len # bytes to blocks
-
- mov %eax, %edx # rounds
- shl \$7, %rax # 128 bytes per inner round key
- sub \$`128-32`, %rax # size of bit-sliced key schedule
- sub %rax, %rsp
-
- mov %rsp, %rax # pass key schedule
- mov $key, %rcx # pass key
- mov %edx, %r10d # pass rounds
- call _bsaes_key_convert
- pxor (%rsp),%xmm7 # fix up 0 round key
- movdqa %xmm6,(%rax) # save last round key
- movdqa %xmm7,(%rsp)
-
- movdqu (%rbx), @XMM[15] # load IV
- sub \$8,$len
-.Lcbc_dec_loop:
- movdqu 0x00($inp), @XMM[0] # load input
- movdqu 0x10($inp), @XMM[1]
- movdqu 0x20($inp), @XMM[2]
- movdqu 0x30($inp), @XMM[3]
- movdqu 0x40($inp), @XMM[4]
- movdqu 0x50($inp), @XMM[5]
- mov %rsp, %rax # pass key schedule
- movdqu 0x60($inp), @XMM[6]
- mov %edx,%r10d # pass rounds
- movdqu 0x70($inp), @XMM[7]
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
-
- call _bsaes_decrypt8
-
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[10], @XMM[4]
- movdqu 0x40($inp), @XMM[12]
- pxor @XMM[11], @XMM[2]
- movdqu 0x50($inp), @XMM[13]
- pxor @XMM[12], @XMM[7]
- movdqu 0x60($inp), @XMM[14]
- pxor @XMM[13], @XMM[3]
- movdqu 0x70($inp), @XMM[15] # IV
- pxor @XMM[14], @XMM[5]
- movdqu @XMM[0], 0x00($out) # write output
- lea 0x80($inp), $inp
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- sub \$8,$len
- jnc .Lcbc_dec_loop
-
- add \$8,$len
- jz .Lcbc_dec_done
-
- movdqu 0x00($inp), @XMM[0] # load input
- mov %rsp, %rax # pass key schedule
- mov %edx, %r10d # pass rounds
- cmp \$2,$len
- jb .Lcbc_dec_one
- movdqu 0x10($inp), @XMM[1]
- je .Lcbc_dec_two
- movdqu 0x20($inp), @XMM[2]
- cmp \$4,$len
- jb .Lcbc_dec_three
- movdqu 0x30($inp), @XMM[3]
- je .Lcbc_dec_four
- movdqu 0x40($inp), @XMM[4]
- cmp \$6,$len
- jb .Lcbc_dec_five
- movdqu 0x50($inp), @XMM[5]
- je .Lcbc_dec_six
- movdqu 0x60($inp), @XMM[6]
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[10], @XMM[4]
- movdqu 0x40($inp), @XMM[12]
- pxor @XMM[11], @XMM[2]
- movdqu 0x50($inp), @XMM[13]
- pxor @XMM[12], @XMM[7]
- movdqu 0x60($inp), @XMM[15] # IV
- pxor @XMM[13], @XMM[3]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_six:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[10], @XMM[4]
- movdqu 0x40($inp), @XMM[12]
- pxor @XMM[11], @XMM[2]
- movdqu 0x50($inp), @XMM[15] # IV
- pxor @XMM[12], @XMM[7]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_five:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[10], @XMM[4]
- movdqu 0x40($inp), @XMM[15] # IV
- pxor @XMM[11], @XMM[2]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_four:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[9], @XMM[6]
- movdqu 0x30($inp), @XMM[15] # IV
- pxor @XMM[10], @XMM[4]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_three:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[8], @XMM[1]
- movdqu 0x20($inp), @XMM[15] # IV
- pxor @XMM[9], @XMM[6]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_two:
- movdqa @XMM[15], 0x20(%rbp) # put aside IV
- call _bsaes_decrypt8
- pxor 0x20(%rbp), @XMM[0] # ^= IV
- movdqu 0x00($inp), @XMM[8] # re-load input
- movdqu 0x10($inp), @XMM[15] # IV
- pxor @XMM[8], @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- jmp .Lcbc_dec_done
-.align 16
-.Lcbc_dec_one:
- lea ($inp), $arg1
- lea 0x20(%rbp), $arg2 # buffer output
- lea ($key), $arg3
- call asm_AES_decrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[15] # ^= IV
- movdqu @XMM[15], ($out) # write output
- movdqa @XMM[0], @XMM[15] # IV
-
-.Lcbc_dec_done:
- movdqu @XMM[15], (%rbx) # return IV
- lea (%rsp), %rax
- pxor %xmm0, %xmm0
-.Lcbc_dec_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- ja .Lcbc_dec_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lcbc_dec_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lcbc_dec_epilogue:
- ret
-.cfi_endproc
-.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
-
-.globl bsaes_ctr32_encrypt_blocks
-.type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent
-.align 16
-bsaes_ctr32_encrypt_blocks:
-.cfi_startproc
- mov %rsp, %rax
-.Lctr_enc_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp), %rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- mov 0xa0(%rsp),$arg5 # pull ivp
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lctr_enc_body:
-___
-$code.=<<___;
- mov %rsp, %rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- movdqu ($arg5), %xmm0 # load counter
- mov 240($arg4), %eax # rounds
- mov $arg1, $inp # backup arguments
- mov $arg2, $out
- mov $arg3, $len
- mov $arg4, $key
- movdqa %xmm0, 0x20(%rbp) # copy counter
- cmp \$8, $arg3
- jb .Lctr_enc_short
-
- mov %eax, %ebx # rounds
- shl \$7, %rax # 128 bytes per inner round key
- sub \$`128-32`, %rax # size of bit-sliced key schedule
- sub %rax, %rsp
-
- mov %rsp, %rax # pass key schedule
- mov $key, %rcx # pass key
- mov %ebx, %r10d # pass rounds
- call _bsaes_key_convert
- pxor %xmm6,%xmm7 # fix up last round key
- movdqa %xmm7,(%rax) # save last round key
-
- movdqa (%rsp), @XMM[9] # load round0 key
- lea .LADD1(%rip), %r11
- movdqa 0x20(%rbp), @XMM[0] # counter copy
- movdqa -0x20(%r11), @XMM[8] # .LSWPUP
- pshufb @XMM[8], @XMM[9] # byte swap upper part
- pshufb @XMM[8], @XMM[0]
- movdqa @XMM[9], (%rsp) # save adjusted round0 key
- jmp .Lctr_enc_loop
-.align 16
-.Lctr_enc_loop:
- movdqa @XMM[0], 0x20(%rbp) # save counter
- movdqa @XMM[0], @XMM[1] # prepare 8 counter values
- movdqa @XMM[0], @XMM[2]
- paddd 0x00(%r11), @XMM[1] # .LADD1
- movdqa @XMM[0], @XMM[3]
- paddd 0x10(%r11), @XMM[2] # .LADD2
- movdqa @XMM[0], @XMM[4]
- paddd 0x20(%r11), @XMM[3] # .LADD3
- movdqa @XMM[0], @XMM[5]
- paddd 0x30(%r11), @XMM[4] # .LADD4
- movdqa @XMM[0], @XMM[6]
- paddd 0x40(%r11), @XMM[5] # .LADD5
- movdqa @XMM[0], @XMM[7]
- paddd 0x50(%r11), @XMM[6] # .LADD6
- paddd 0x60(%r11), @XMM[7] # .LADD7
-
- # Borrow prologue from _bsaes_encrypt8 to use the opportunity
- # to flip byte order in 32-bit counter
- movdqa (%rsp), @XMM[9] # round 0 key
- lea 0x10(%rsp), %rax # pass key schedule
- movdqa -0x10(%r11), @XMM[8] # .LSWPUPM0SR
- pxor @XMM[9], @XMM[0] # xor with round0 key
- pxor @XMM[9], @XMM[1]
- pxor @XMM[9], @XMM[2]
- pxor @XMM[9], @XMM[3]
- pshufb @XMM[8], @XMM[0]
- pshufb @XMM[8], @XMM[1]
- pxor @XMM[9], @XMM[4]
- pxor @XMM[9], @XMM[5]
- pshufb @XMM[8], @XMM[2]
- pshufb @XMM[8], @XMM[3]
- pxor @XMM[9], @XMM[6]
- pxor @XMM[9], @XMM[7]
- pshufb @XMM[8], @XMM[4]
- pshufb @XMM[8], @XMM[5]
- pshufb @XMM[8], @XMM[6]
- pshufb @XMM[8], @XMM[7]
- lea .LBS0(%rip), %r11 # constants table
- mov %ebx,%r10d # pass rounds
-
- call _bsaes_encrypt8_bitslice
-
- sub \$8,$len
- jc .Lctr_enc_loop_done
-
- movdqu 0x00($inp), @XMM[8] # load input
- movdqu 0x10($inp), @XMM[9]
- movdqu 0x20($inp), @XMM[10]
- movdqu 0x30($inp), @XMM[11]
- movdqu 0x40($inp), @XMM[12]
- movdqu 0x50($inp), @XMM[13]
- movdqu 0x60($inp), @XMM[14]
- movdqu 0x70($inp), @XMM[15]
- lea 0x80($inp),$inp
- pxor @XMM[0], @XMM[8]
- movdqa 0x20(%rbp), @XMM[0] # load counter
- pxor @XMM[9], @XMM[1]
- movdqu @XMM[8], 0x00($out) # write output
- pxor @XMM[10], @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor @XMM[11], @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor @XMM[12], @XMM[3]
- movdqu @XMM[6], 0x30($out)
- pxor @XMM[13], @XMM[7]
- movdqu @XMM[3], 0x40($out)
- pxor @XMM[14], @XMM[2]
- movdqu @XMM[7], 0x50($out)
- pxor @XMM[15], @XMM[5]
- movdqu @XMM[2], 0x60($out)
- lea .LADD1(%rip), %r11
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
- paddd 0x70(%r11), @XMM[0] # .LADD8
- jnz .Lctr_enc_loop
-
- jmp .Lctr_enc_done
-.align 16
-.Lctr_enc_loop_done:
- add \$8, $len
- movdqu 0x00($inp), @XMM[8] # load input
- pxor @XMM[8], @XMM[0]
- movdqu @XMM[0], 0x00($out) # write output
- cmp \$2,$len
- jb .Lctr_enc_done
- movdqu 0x10($inp), @XMM[9]
- pxor @XMM[9], @XMM[1]
- movdqu @XMM[1], 0x10($out)
- je .Lctr_enc_done
- movdqu 0x20($inp), @XMM[10]
- pxor @XMM[10], @XMM[4]
- movdqu @XMM[4], 0x20($out)
- cmp \$4,$len
- jb .Lctr_enc_done
- movdqu 0x30($inp), @XMM[11]
- pxor @XMM[11], @XMM[6]
- movdqu @XMM[6], 0x30($out)
- je .Lctr_enc_done
- movdqu 0x40($inp), @XMM[12]
- pxor @XMM[12], @XMM[3]
- movdqu @XMM[3], 0x40($out)
- cmp \$6,$len
- jb .Lctr_enc_done
- movdqu 0x50($inp), @XMM[13]
- pxor @XMM[13], @XMM[7]
- movdqu @XMM[7], 0x50($out)
- je .Lctr_enc_done
- movdqu 0x60($inp), @XMM[14]
- pxor @XMM[14], @XMM[2]
- movdqu @XMM[2], 0x60($out)
- jmp .Lctr_enc_done
-
-.align 16
-.Lctr_enc_short:
- lea 0x20(%rbp), $arg1
- lea 0x30(%rbp), $arg2
- lea ($key), $arg3
- call asm_AES_encrypt
- movdqu ($inp), @XMM[1]
- lea 16($inp), $inp
- mov 0x2c(%rbp), %eax # load 32-bit counter
- bswap %eax
- pxor 0x30(%rbp), @XMM[1]
- inc %eax # increment
- movdqu @XMM[1], ($out)
- bswap %eax
- lea 16($out), $out
- mov %eax, 0x2c(%rsp) # save 32-bit counter
- dec $len
- jnz .Lctr_enc_short
-
-.Lctr_enc_done:
- lea (%rsp), %rax
- pxor %xmm0, %xmm0
-.Lctr_enc_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- ja .Lctr_enc_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lctr_enc_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lctr_enc_epilogue:
- ret
-.cfi_endproc
-.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
-___
-######################################################################
-# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
-# const AES_KEY *key1, const AES_KEY *key2,
-# const unsigned char iv[16]);
-#
-my ($twmask,$twres,$twtmp)=@XMM[13..15];
-$arg6=~s/d$//;
-
-$code.=<<___;
-.globl bsaes_xts_encrypt
-.type bsaes_xts_encrypt,\@abi-omnipotent
-.align 16
-bsaes_xts_encrypt:
-.cfi_startproc
- mov %rsp, %rax
-.Lxts_enc_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp), %rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- mov 0xa0(%rsp),$arg5 # pull key2
- mov 0xa8(%rsp),$arg6 # pull ivp
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lxts_enc_body:
-___
-$code.=<<___;
- mov %rsp, %rbp # backup %rsp
-.cfi_def_cfa_register %rbp
- mov $arg1, $inp # backup arguments
- mov $arg2, $out
- mov $arg3, $len
- mov $arg4, $key
-
- lea ($arg6), $arg1
- lea 0x20(%rbp), $arg2
- lea ($arg5), $arg3
- call asm_AES_encrypt # generate initial tweak
-
- mov 240($key), %eax # rounds
- mov $len, %rbx # backup $len
-
- mov %eax, %edx # rounds
- shl \$7, %rax # 128 bytes per inner round key
- sub \$`128-32`, %rax # size of bit-sliced key schedule
- sub %rax, %rsp
-
- mov %rsp, %rax # pass key schedule
- mov $key, %rcx # pass key
- mov %edx, %r10d # pass rounds
- call _bsaes_key_convert
- pxor %xmm6, %xmm7 # fix up last round key
- movdqa %xmm7, (%rax) # save last round key
-
- and \$-16, $len
- sub \$0x80, %rsp # place for tweak[8]
- movdqa 0x20(%rbp), @XMM[7] # initial tweak
-
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
-
- sub \$0x80, $len
- jc .Lxts_enc_short
- jmp .Lxts_enc_loop
-
-.align 16
-.Lxts_enc_loop:
-___
- for ($i=0;$i<7;$i++) {
- $code.=<<___;
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- movdqa @XMM[7], @XMM[$i]
- movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-___
- $code.=<<___ if ($i>=1);
- movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
-___
- $code.=<<___ if ($i>=2);
- pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
-___
- }
-$code.=<<___;
- movdqu 0x60($inp), @XMM[8+6]
- pxor @XMM[8+5], @XMM[5]
- movdqu 0x70($inp), @XMM[8+7]
- lea 0x80($inp), $inp
- movdqa @XMM[7], 0x70(%rsp)
- pxor @XMM[8+6], @XMM[6]
- lea 0x80(%rsp), %rax # pass key schedule
- pxor @XMM[8+7], @XMM[7]
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor 0x40(%rsp), @XMM[3]
- movdqu @XMM[6], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[3], 0x40($out)
- pxor 0x60(%rsp), @XMM[2]
- movdqu @XMM[7], 0x50($out)
- pxor 0x70(%rsp), @XMM[5]
- movdqu @XMM[2], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
-
- movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-
- sub \$0x80,$len
- jnc .Lxts_enc_loop
-
-.Lxts_enc_short:
- add \$0x80, $len
- jz .Lxts_enc_done
-___
- for ($i=0;$i<7;$i++) {
- $code.=<<___;
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- movdqa @XMM[7], @XMM[$i]
- movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-___
- $code.=<<___ if ($i>=1);
- movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
- cmp \$`0x10*$i`,$len
- je .Lxts_enc_$i
-___
- $code.=<<___ if ($i>=2);
- pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
-___
- }
-$code.=<<___;
- movdqu 0x60($inp), @XMM[8+6]
- pxor @XMM[8+5], @XMM[5]
- movdqa @XMM[7], 0x70(%rsp)
- lea 0x70($inp), $inp
- pxor @XMM[8+6], @XMM[6]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor 0x40(%rsp), @XMM[3]
- movdqu @XMM[6], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[3], 0x40($out)
- pxor 0x60(%rsp), @XMM[2]
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[2], 0x60($out)
- lea 0x70($out), $out
-
- movdqa 0x70(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_6:
- pxor @XMM[8+4], @XMM[4]
- lea 0x60($inp), $inp
- pxor @XMM[8+5], @XMM[5]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor 0x40(%rsp), @XMM[3]
- movdqu @XMM[6], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[3], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- lea 0x60($out), $out
-
- movdqa 0x60(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_5:
- pxor @XMM[8+3], @XMM[3]
- lea 0x50($inp), $inp
- pxor @XMM[8+4], @XMM[4]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- pxor 0x40(%rsp), @XMM[3]
- movdqu @XMM[6], 0x30($out)
- movdqu @XMM[3], 0x40($out)
- lea 0x50($out), $out
-
- movdqa 0x50(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_4:
- pxor @XMM[8+2], @XMM[2]
- lea 0x40($inp), $inp
- pxor @XMM[8+3], @XMM[3]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[6]
- movdqu @XMM[4], 0x20($out)
- movdqu @XMM[6], 0x30($out)
- lea 0x40($out), $out
-
- movdqa 0x40(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_3:
- pxor @XMM[8+1], @XMM[1]
- lea 0x30($inp), $inp
- pxor @XMM[8+2], @XMM[2]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[4]
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[4], 0x20($out)
- lea 0x30($out), $out
-
- movdqa 0x30(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_2:
- pxor @XMM[8+0], @XMM[0]
- lea 0x20($inp), $inp
- pxor @XMM[8+1], @XMM[1]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_encrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- lea 0x20($out), $out
-
- movdqa 0x20(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_enc_done
-.align 16
-.Lxts_enc_1:
- pxor @XMM[0], @XMM[8]
- lea 0x10($inp), $inp
- movdqa @XMM[8], 0x20(%rbp)
- lea 0x20(%rbp), $arg1
- lea 0x20(%rbp), $arg2
- lea ($key), $arg3
- call asm_AES_encrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[0] # ^= tweak[]
- #pxor @XMM[8], @XMM[0]
- #lea 0x80(%rsp), %rax # pass key schedule
- #mov %edx, %r10d # pass rounds
- #call _bsaes_encrypt8
- #pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- movdqu @XMM[0], 0x00($out) # write output
- lea 0x10($out), $out
-
- movdqa 0x10(%rsp), @XMM[7] # next iteration tweak
-
-.Lxts_enc_done:
- and \$15, %ebx
- jz .Lxts_enc_ret
- mov $out, %rdx
-
-.Lxts_enc_steal:
- movzb ($inp), %eax
- movzb -16(%rdx), %ecx
- lea 1($inp), $inp
- mov %al, -16(%rdx)
- mov %cl, 0(%rdx)
- lea 1(%rdx), %rdx
- sub \$1,%ebx
- jnz .Lxts_enc_steal
-
- movdqu -16($out), @XMM[0]
- lea 0x20(%rbp), $arg1
- pxor @XMM[7], @XMM[0]
- lea 0x20(%rbp), $arg2
- movdqa @XMM[0], 0x20(%rbp)
- lea ($key), $arg3
- call asm_AES_encrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[7]
- movdqu @XMM[7], -16($out)
-
-.Lxts_enc_ret:
- lea (%rsp), %rax
- pxor %xmm0, %xmm0
-.Lxts_enc_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- ja .Lxts_enc_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lxts_enc_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lxts_enc_epilogue:
- ret
-.cfi_endproc
-.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
-
-.globl bsaes_xts_decrypt
-.type bsaes_xts_decrypt,\@abi-omnipotent
-.align 16
-bsaes_xts_decrypt:
-.cfi_startproc
- mov %rsp, %rax
-.Lxts_dec_prologue:
- push %rbp
-.cfi_push %rbp
- push %rbx
-.cfi_push %rbx
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
- lea -0x48(%rsp), %rsp
-.cfi_adjust_cfa_offset 0x48
-___
-$code.=<<___ if ($win64);
- mov 0xa0(%rsp),$arg5 # pull key2
- mov 0xa8(%rsp),$arg6 # pull ivp
- lea -0xa0(%rsp), %rsp
- movaps %xmm6, 0x40(%rsp)
- movaps %xmm7, 0x50(%rsp)
- movaps %xmm8, 0x60(%rsp)
- movaps %xmm9, 0x70(%rsp)
- movaps %xmm10, 0x80(%rsp)
- movaps %xmm11, 0x90(%rsp)
- movaps %xmm12, 0xa0(%rsp)
- movaps %xmm13, 0xb0(%rsp)
- movaps %xmm14, 0xc0(%rsp)
- movaps %xmm15, 0xd0(%rsp)
-.Lxts_dec_body:
-___
-$code.=<<___;
- mov %rsp, %rbp # backup %rsp
- mov $arg1, $inp # backup arguments
- mov $arg2, $out
- mov $arg3, $len
- mov $arg4, $key
-
- lea ($arg6), $arg1
- lea 0x20(%rbp), $arg2
- lea ($arg5), $arg3
- call asm_AES_encrypt # generate initial tweak
-
- mov 240($key), %eax # rounds
- mov $len, %rbx # backup $len
-
- mov %eax, %edx # rounds
- shl \$7, %rax # 128 bytes per inner round key
- sub \$`128-32`, %rax # size of bit-sliced key schedule
- sub %rax, %rsp
-
- mov %rsp, %rax # pass key schedule
- mov $key, %rcx # pass key
- mov %edx, %r10d # pass rounds
- call _bsaes_key_convert
- pxor (%rsp), %xmm7 # fix up round 0 key
- movdqa %xmm6, (%rax) # save last round key
- movdqa %xmm7, (%rsp)
-
- xor %eax, %eax # if ($len%16) len-=16;
- and \$-16, $len
- test \$15, %ebx
- setnz %al
- shl \$4, %rax
- sub %rax, $len
-
- sub \$0x80, %rsp # place for tweak[8]
- movdqa 0x20(%rbp), @XMM[7] # initial tweak
-
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
-
- sub \$0x80, $len
- jc .Lxts_dec_short
- jmp .Lxts_dec_loop
-
-.align 16
-.Lxts_dec_loop:
-___
- for ($i=0;$i<7;$i++) {
- $code.=<<___;
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- movdqa @XMM[7], @XMM[$i]
- movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-___
- $code.=<<___ if ($i>=1);
- movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
-___
- $code.=<<___ if ($i>=2);
- pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
-___
- }
-$code.=<<___;
- movdqu 0x60($inp), @XMM[8+6]
- pxor @XMM[8+5], @XMM[5]
- movdqu 0x70($inp), @XMM[8+7]
- lea 0x80($inp), $inp
- movdqa @XMM[7], 0x70(%rsp)
- pxor @XMM[8+6], @XMM[6]
- lea 0x80(%rsp), %rax # pass key schedule
- pxor @XMM[8+7], @XMM[7]
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- pxor 0x40(%rsp), @XMM[2]
- movdqu @XMM[4], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[2], 0x40($out)
- pxor 0x60(%rsp), @XMM[3]
- movdqu @XMM[7], 0x50($out)
- pxor 0x70(%rsp), @XMM[5]
- movdqu @XMM[3], 0x60($out)
- movdqu @XMM[5], 0x70($out)
- lea 0x80($out), $out
-
- movdqa 0x70(%rsp), @XMM[7] # prepare next iteration tweak
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-
- sub \$0x80,$len
- jnc .Lxts_dec_loop
-
-.Lxts_dec_short:
- add \$0x80, $len
- jz .Lxts_dec_done
-___
- for ($i=0;$i<7;$i++) {
- $code.=<<___;
- pshufd \$0x13, $twtmp, $twres
- pxor $twtmp, $twtmp
- movdqa @XMM[7], @XMM[$i]
- movdqa @XMM[7], `0x10*$i`(%rsp)# save tweak[$i]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- pcmpgtd @XMM[7], $twtmp # broadcast upper bits
- pxor $twres, @XMM[7]
-___
- $code.=<<___ if ($i>=1);
- movdqu `0x10*($i-1)`($inp), @XMM[8+$i-1]
- cmp \$`0x10*$i`,$len
- je .Lxts_dec_$i
-___
- $code.=<<___ if ($i>=2);
- pxor @XMM[8+$i-2], @XMM[$i-2]# input[] ^ tweak[]
-___
- }
-$code.=<<___;
- movdqu 0x60($inp), @XMM[8+6]
- pxor @XMM[8+5], @XMM[5]
- movdqa @XMM[7], 0x70(%rsp)
- lea 0x70($inp), $inp
- pxor @XMM[8+6], @XMM[6]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- pxor 0x40(%rsp), @XMM[2]
- movdqu @XMM[4], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[2], 0x40($out)
- pxor 0x60(%rsp), @XMM[3]
- movdqu @XMM[7], 0x50($out)
- movdqu @XMM[3], 0x60($out)
- lea 0x70($out), $out
-
- movdqa 0x70(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_6:
- pxor @XMM[8+4], @XMM[4]
- lea 0x60($inp), $inp
- pxor @XMM[8+5], @XMM[5]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- pxor 0x40(%rsp), @XMM[2]
- movdqu @XMM[4], 0x30($out)
- pxor 0x50(%rsp), @XMM[7]
- movdqu @XMM[2], 0x40($out)
- movdqu @XMM[7], 0x50($out)
- lea 0x60($out), $out
-
- movdqa 0x60(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_5:
- pxor @XMM[8+3], @XMM[3]
- lea 0x50($inp), $inp
- pxor @XMM[8+4], @XMM[4]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- pxor 0x40(%rsp), @XMM[2]
- movdqu @XMM[4], 0x30($out)
- movdqu @XMM[2], 0x40($out)
- lea 0x50($out), $out
-
- movdqa 0x50(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_4:
- pxor @XMM[8+2], @XMM[2]
- lea 0x40($inp), $inp
- pxor @XMM[8+3], @XMM[3]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- pxor 0x30(%rsp), @XMM[4]
- movdqu @XMM[6], 0x20($out)
- movdqu @XMM[4], 0x30($out)
- lea 0x40($out), $out
-
- movdqa 0x40(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_3:
- pxor @XMM[8+1], @XMM[1]
- lea 0x30($inp), $inp
- pxor @XMM[8+2], @XMM[2]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- pxor 0x20(%rsp), @XMM[6]
- movdqu @XMM[1], 0x10($out)
- movdqu @XMM[6], 0x20($out)
- lea 0x30($out), $out
-
- movdqa 0x30(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_2:
- pxor @XMM[8+0], @XMM[0]
- lea 0x20($inp), $inp
- pxor @XMM[8+1], @XMM[1]
- lea 0x80(%rsp), %rax # pass key schedule
- mov %edx, %r10d # pass rounds
-
- call _bsaes_decrypt8
-
- pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- pxor 0x10(%rsp), @XMM[1]
- movdqu @XMM[0], 0x00($out) # write output
- movdqu @XMM[1], 0x10($out)
- lea 0x20($out), $out
-
- movdqa 0x20(%rsp), @XMM[7] # next iteration tweak
- jmp .Lxts_dec_done
-.align 16
-.Lxts_dec_1:
- pxor @XMM[0], @XMM[8]
- lea 0x10($inp), $inp
- movdqa @XMM[8], 0x20(%rbp)
- lea 0x20(%rbp), $arg1
- lea 0x20(%rbp), $arg2
- lea ($key), $arg3
- call asm_AES_decrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[0] # ^= tweak[]
- #pxor @XMM[8], @XMM[0]
- #lea 0x80(%rsp), %rax # pass key schedule
- #mov %edx, %r10d # pass rounds
- #call _bsaes_decrypt8
- #pxor 0x00(%rsp), @XMM[0] # ^= tweak[]
- movdqu @XMM[0], 0x00($out) # write output
- lea 0x10($out), $out
-
- movdqa 0x10(%rsp), @XMM[7] # next iteration tweak
-
-.Lxts_dec_done:
- and \$15, %ebx
- jz .Lxts_dec_ret
-
- pxor $twtmp, $twtmp
- movdqa .Lxts_magic(%rip), $twmask
- pcmpgtd @XMM[7], $twtmp
- pshufd \$0x13, $twtmp, $twres
- movdqa @XMM[7], @XMM[6]
- paddq @XMM[7], @XMM[7] # psllq 1,$tweak
- pand $twmask, $twres # isolate carry and residue
- movdqu ($inp), @XMM[0]
- pxor $twres, @XMM[7]
-
- lea 0x20(%rbp), $arg1
- pxor @XMM[7], @XMM[0]
- lea 0x20(%rbp), $arg2
- movdqa @XMM[0], 0x20(%rbp)
- lea ($key), $arg3
- call asm_AES_decrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[7]
- mov $out, %rdx
- movdqu @XMM[7], ($out)
-
-.Lxts_dec_steal:
- movzb 16($inp), %eax
- movzb (%rdx), %ecx
- lea 1($inp), $inp
- mov %al, (%rdx)
- mov %cl, 16(%rdx)
- lea 1(%rdx), %rdx
- sub \$1,%ebx
- jnz .Lxts_dec_steal
-
- movdqu ($out), @XMM[0]
- lea 0x20(%rbp), $arg1
- pxor @XMM[6], @XMM[0]
- lea 0x20(%rbp), $arg2
- movdqa @XMM[0], 0x20(%rbp)
- lea ($key), $arg3
- call asm_AES_decrypt # doesn't touch %xmm
- pxor 0x20(%rbp), @XMM[6]
- movdqu @XMM[6], ($out)
-
-.Lxts_dec_ret:
- lea (%rsp), %rax
- pxor %xmm0, %xmm0
-.Lxts_dec_bzero: # wipe key schedule [if any]
- movdqa %xmm0, 0x00(%rax)
- movdqa %xmm0, 0x10(%rax)
- lea 0x20(%rax), %rax
- cmp %rax, %rbp
- ja .Lxts_dec_bzero
-
- lea 0x78(%rbp),%rax
-.cfi_def_cfa %rax,8
-___
-$code.=<<___ if ($win64);
- movaps 0x40(%rbp), %xmm6
- movaps 0x50(%rbp), %xmm7
- movaps 0x60(%rbp), %xmm8
- movaps 0x70(%rbp), %xmm9
- movaps 0x80(%rbp), %xmm10
- movaps 0x90(%rbp), %xmm11
- movaps 0xa0(%rbp), %xmm12
- movaps 0xb0(%rbp), %xmm13
- movaps 0xc0(%rbp), %xmm14
- movaps 0xd0(%rbp), %xmm15
- lea 0xa0(%rax), %rax
-.Lxts_dec_tail:
-___
-$code.=<<___;
- mov -48(%rax), %r15
-.cfi_restore %r15
- mov -40(%rax), %r14
-.cfi_restore %r14
- mov -32(%rax), %r13
-.cfi_restore %r13
- mov -24(%rax), %r12
-.cfi_restore %r12
- mov -16(%rax), %rbx
-.cfi_restore %rbx
- mov -8(%rax), %rbp
-.cfi_restore %rbp
- lea (%rax), %rsp # restore %rsp
-.cfi_def_cfa_register %rsp
-.Lxts_dec_epilogue:
- ret
-.cfi_endproc
-.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
-___
-}
-$code.=<<___;
-.type _bsaes_const,\@object
-.align 64
-_bsaes_const:
-.LM0ISR: # InvShiftRows constants
- .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
-.LISRM0:
- .quad 0x01040b0e0205080f, 0x0306090c00070a0d
-.LISR:
- .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
-.LBS0: # bit-slice constants
- .quad 0x5555555555555555, 0x5555555555555555
-.LBS1:
- .quad 0x3333333333333333, 0x3333333333333333
-.LBS2:
- .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
-.LSR: # shiftrows constants
- .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
-.LSRM0:
- .quad 0x0304090e00050a0f, 0x01060b0c0207080d
-.LM0SR:
- .quad 0x0a0e02060f03070b, 0x0004080c05090d01
-.LSWPUP: # byte-swap upper dword
- .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
-.LSWPUPM0SR:
- .quad 0x0a0d02060c03070b, 0x0004080f05090e01
-.LADD1: # counter increment constants
- .quad 0x0000000000000000, 0x0000000100000000
-.LADD2:
- .quad 0x0000000000000000, 0x0000000200000000
-.LADD3:
- .quad 0x0000000000000000, 0x0000000300000000
-.LADD4:
- .quad 0x0000000000000000, 0x0000000400000000
-.LADD5:
- .quad 0x0000000000000000, 0x0000000500000000
-.LADD6:
- .quad 0x0000000000000000, 0x0000000600000000
-.LADD7:
- .quad 0x0000000000000000, 0x0000000700000000
-.LADD8:
- .quad 0x0000000000000000, 0x0000000800000000
-.Lxts_magic:
- .long 0x87,0,1,0
-.Lmasks:
- .quad 0x0101010101010101, 0x0101010101010101
- .quad 0x0202020202020202, 0x0202020202020202
- .quad 0x0404040404040404, 0x0404040404040404
- .quad 0x0808080808080808, 0x0808080808080808
-.LM0:
- .quad 0x02060a0e03070b0f, 0x0004080c0105090d
-.L63:
- .quad 0x6363636363636363, 0x6363636363636363
-.asciz "Bit-sliced AES for x86_64/SSSE3, Emilia Käsper, Peter Schwabe, Andy Polyakov"
-.align 64
-.size _bsaes_const,.-_bsaes_const
-___
-
-# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-# CONTEXT *context,DISPATCHER_CONTEXT *disp)
-if ($win64) {
-$rec="%rcx";
-$frame="%rdx";
-$context="%r8";
-$disp="%r9";
-
-$code.=<<___;
-.extern __imp_RtlVirtualUnwind
-.type se_handler,\@abi-omnipotent
-.align 16
-se_handler:
- push %rsi
- push %rdi
- push %rbx
- push %rbp
- push %r12
- push %r13
- push %r14
- push %r15
- pushfq
- sub \$64,%rsp
-
- mov 120($context),%rax # pull context->Rax
- mov 248($context),%rbx # pull context->Rip
-
- mov 8($disp),%rsi # disp->ImageBase
- mov 56($disp),%r11 # disp->HandlerData
-
- mov 0(%r11),%r10d # HandlerData[0]
- lea (%rsi,%r10),%r10 # prologue label
- cmp %r10,%rbx # context->Rip<=prologue label
- jbe .Lin_prologue
-
- mov 4(%r11),%r10d # HandlerData[1]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=epilogue label
- jae .Lin_prologue
-
- mov 8(%r11),%r10d # HandlerData[2]
- lea (%rsi,%r10),%r10 # epilogue label
- cmp %r10,%rbx # context->Rip>=tail label
- jae .Lin_tail
-
- mov 160($context),%rax # pull context->Rbp
-
- lea 0x40(%rax),%rsi # %xmm save area
- lea 512($context),%rdi # &context.Xmm6
- mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
- .long 0xa548f3fc # cld; rep movsq
- lea 0xa0+0x78(%rax),%rax # adjust stack pointer
-
-.Lin_tail:
- mov -48(%rax),%rbp
- mov -40(%rax),%rbx
- mov -32(%rax),%r12
- mov -24(%rax),%r13
- mov -16(%rax),%r14
- mov -8(%rax),%r15
- mov %rbx,144($context) # restore context->Rbx
- mov %rbp,160($context) # restore context->Rbp
- mov %r12,216($context) # restore context->R12
- mov %r13,224($context) # restore context->R13
- mov %r14,232($context) # restore context->R14
- mov %r15,240($context) # restore context->R15
-
-.Lin_prologue:
- mov %rax,152($context) # restore context->Rsp
-
- mov 40($disp),%rdi # disp->ContextRecord
- mov $context,%rsi # context
- mov \$`1232/8`,%ecx # sizeof(CONTEXT)
- .long 0xa548f3fc # cld; rep movsq
-
- mov $disp,%rsi
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
- mov 8(%rsi),%rdx # arg2, disp->ImageBase
- mov 0(%rsi),%r8 # arg3, disp->ControlPc
- mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
- mov 40(%rsi),%r10 # disp->ContextRecord
- lea 56(%rsi),%r11 # &disp->HandlerData
- lea 24(%rsi),%r12 # &disp->EstablisherFrame
- mov %r10,32(%rsp) # arg5
- mov %r11,40(%rsp) # arg6
- mov %r12,48(%rsp) # arg7
- mov %rcx,56(%rsp) # arg8, (NULL)
- call *__imp_RtlVirtualUnwind(%rip)
-
- mov \$1,%eax # ExceptionContinueSearch
- add \$64,%rsp
- popfq
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- pop %rbp
- pop %rbx
- pop %rdi
- pop %rsi
- ret
-.size se_handler,.-se_handler
-
-.section .pdata
-.align 4
-___
-$code.=<<___ if ($ecb);
- .rva .Lecb_enc_prologue
- .rva .Lecb_enc_epilogue
- .rva .Lecb_enc_info
-
- .rva .Lecb_dec_prologue
- .rva .Lecb_dec_epilogue
- .rva .Lecb_dec_info
-___
-$code.=<<___;
- .rva .Lcbc_dec_prologue
- .rva .Lcbc_dec_epilogue
- .rva .Lcbc_dec_info
-
- .rva .Lctr_enc_prologue
- .rva .Lctr_enc_epilogue
- .rva .Lctr_enc_info
-
- .rva .Lxts_enc_prologue
- .rva .Lxts_enc_epilogue
- .rva .Lxts_enc_info
-
- .rva .Lxts_dec_prologue
- .rva .Lxts_dec_epilogue
- .rva .Lxts_dec_info
-
-.section .xdata
-.align 8
-___
-$code.=<<___ if ($ecb);
-.Lecb_enc_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lecb_enc_body,.Lecb_enc_epilogue # HandlerData[]
- .rva .Lecb_enc_tail
- .long 0
-.Lecb_dec_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lecb_dec_body,.Lecb_dec_epilogue # HandlerData[]
- .rva .Lecb_dec_tail
- .long 0
-___
-$code.=<<___;
-.Lcbc_dec_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lcbc_dec_body,.Lcbc_dec_epilogue # HandlerData[]
- .rva .Lcbc_dec_tail
- .long 0
-.Lctr_enc_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lctr_enc_body,.Lctr_enc_epilogue # HandlerData[]
- .rva .Lctr_enc_tail
- .long 0
-.Lxts_enc_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[]
- .rva .Lxts_enc_tail
- .long 0
-.Lxts_dec_info:
- .byte 9,0,0,0
- .rva se_handler
- .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[]
- .rva .Lxts_dec_tail
- .long 0
-___
-}
-
-$code =~ s/\`([^\`]*)\`/eval($1)/gem;
-
-print $code;
-
-close STDOUT;
diff --git a/crypto/openssl/crypto/asn1/a_time.c b/crypto/openssl/crypto/asn1/a_time.c
index 1babb9636054..c36dd9500169 100644
--- a/crypto/openssl/crypto/asn1/a_time.c
+++ b/crypto/openssl/crypto/asn1/a_time.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1999-2017 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1999-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -67,7 +67,7 @@ static void determine_days(struct tm *tm)
}
c = y / 100;
y %= 100;
- /* Zeller's congruance */
+ /* Zeller's congruence */
tm->tm_wday = (d + (13 * m) / 5 + y + y / 4 + c / 4 + 5 * c + 6) % 7;
}
@@ -79,7 +79,11 @@ int asn1_time_to_tm(struct tm *tm, const ASN1_TIME *d)
char *a;
int n, i, i2, l, o, min_l = 11, strict = 0, end = 6, btz = 5, md;
struct tm tmp;
-
+#if defined(CHARSET_EBCDIC)
+ const char upper_z = 0x5A, num_zero = 0x30, period = 0x2E, minus = 0x2D, plus = 0x2B;
+#else
+ const char upper_z = 'Z', num_zero = '0', period = '.', minus = '-', plus = '+';
+#endif
/*
* ASN1_STRING_FLAG_X509_TIME is used to enforce RFC 5280
* time string format, in which:
@@ -120,20 +124,20 @@ int asn1_time_to_tm(struct tm *tm, const ASN1_TIME *d)
if (l < min_l)
goto err;
for (i = 0; i < end; i++) {
- if (!strict && (i == btz) && ((a[o] == 'Z') || (a[o] == '+') || (a[o] == '-'))) {
+ if (!strict && (i == btz) && ((a[o] == upper_z) || (a[o] == plus) || (a[o] == minus))) {
i++;
break;
}
- if (!ossl_isdigit(a[o]))
+ if (!ascii_isdigit(a[o]))
goto err;
- n = a[o] - '0';
+ n = a[o] - num_zero;
/* incomplete 2-digital number */
if (++o == l)
goto err;
- if (!ossl_isdigit(a[o]))
+ if (!ascii_isdigit(a[o]))
goto err;
- n = (n * 10) + a[o] - '0';
+ n = (n * 10) + a[o] - num_zero;
/* no more bytes to read, but we haven't seen time-zone yet */
if (++o == l)
goto err;
@@ -185,14 +189,14 @@ int asn1_time_to_tm(struct tm *tm, const ASN1_TIME *d)
* Optional fractional seconds: decimal point followed by one or more
* digits.
*/
- if (d->type == V_ASN1_GENERALIZEDTIME && a[o] == '.') {
+ if (d->type == V_ASN1_GENERALIZEDTIME && a[o] == period) {
if (strict)
/* RFC 5280 forbids fractional seconds */
goto err;
if (++o == l)
goto err;
i = o;
- while ((o < l) && ossl_isdigit(a[o]))
+ while ((o < l) && ascii_isdigit(a[o]))
o++;
/* Must have at least one digit after decimal point */
if (i == o)
@@ -207,10 +211,10 @@ int asn1_time_to_tm(struct tm *tm, const ASN1_TIME *d)
* 'o' can point to '\0' is either the subsequent if or the first
* else if is true.
*/
- if (a[o] == 'Z') {
+ if (a[o] == upper_z) {
o++;
- } else if (!strict && ((a[o] == '+') || (a[o] == '-'))) {
- int offsign = a[o] == '-' ? 1 : -1;
+ } else if (!strict && ((a[o] == plus) || (a[o] == minus))) {
+ int offsign = a[o] == minus ? 1 : -1;
int offset = 0;
o++;
@@ -223,13 +227,13 @@ int asn1_time_to_tm(struct tm *tm, const ASN1_TIME *d)
if (o + 4 != l)
goto err;
for (i = end; i < end + 2; i++) {
- if (!ossl_isdigit(a[o]))
+ if (!ascii_isdigit(a[o]))
goto err;
- n = a[o] - '0';
+ n = a[o] - num_zero;
o++;
- if (!ossl_isdigit(a[o]))
+ if (!ascii_isdigit(a[o]))
goto err;
- n = (n * 10) + a[o] - '0';
+ n = (n * 10) + a[o] - num_zero;
i2 = (d->type == V_ASN1_UTCTIME) ? i + 1 : i;
if ((n < min[i2]) || (n > max[i2]))
goto err;
@@ -300,7 +304,7 @@ ASN1_TIME *asn1_time_from_tm(ASN1_TIME *s, struct tm *ts, int type)
ts->tm_mday, ts->tm_hour, ts->tm_min,
ts->tm_sec);
-#ifdef CHARSET_EBCDIC_not
+#ifdef CHARSET_EBCDIC
ebcdic2ascii(tmps->data, tmps->data, tmps->length);
#endif
return tmps;
@@ -467,6 +471,7 @@ int ASN1_TIME_print(BIO *bp, const ASN1_TIME *tm)
char *v;
int gmt = 0, l;
struct tm stm;
+ const char upper_z = 0x5A, period = 0x2E;
if (!asn1_time_to_tm(&stm, tm)) {
/* asn1_time_to_tm will check the time type */
@@ -475,7 +480,7 @@ int ASN1_TIME_print(BIO *bp, const ASN1_TIME *tm)
l = tm->length;
v = (char *)tm->data;
- if (v[l - 1] == 'Z')
+ if (v[l - 1] == upper_z)
gmt = 1;
if (tm->type == V_ASN1_GENERALIZEDTIME) {
@@ -486,10 +491,10 @@ int ASN1_TIME_print(BIO *bp, const ASN1_TIME *tm)
* Try to parse fractional seconds. '14' is the place of
* 'fraction point' in a GeneralizedTime string.
*/
- if (tm->length > 15 && v[14] == '.') {
+ if (tm->length > 15 && v[14] == period) {
f = &v[14];
f_len = 1;
- while (14 + f_len < l && ossl_isdigit(f[f_len]))
+ while (14 + f_len < l && ascii_isdigit(f[f_len]))
++f_len;
}
diff --git a/crypto/openssl/crypto/asn1/a_type.c b/crypto/openssl/crypto/asn1/a_type.c
index 0c7aebe3076b..732328e05049 100644
--- a/crypto/openssl/crypto/asn1/a_type.c
+++ b/crypto/openssl/crypto/asn1/a_type.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -15,7 +15,9 @@
int ASN1_TYPE_get(const ASN1_TYPE *a)
{
- if ((a->value.ptr != NULL) || (a->type == V_ASN1_NULL))
+ if (a->type == V_ASN1_BOOLEAN
+ || a->type == V_ASN1_NULL
+ || a->value.ptr != NULL)
return a->type;
else
return 0;
@@ -23,7 +25,9 @@ int ASN1_TYPE_get(const ASN1_TYPE *a)
void ASN1_TYPE_set(ASN1_TYPE *a, int type, void *value)
{
- if (a->value.ptr != NULL) {
+ if (a->type != V_ASN1_BOOLEAN
+ && a->type != V_ASN1_NULL
+ && a->value.ptr != NULL) {
ASN1_TYPE **tmp_a = &a;
asn1_primitive_free((ASN1_VALUE **)tmp_a, NULL, 0);
}
diff --git a/crypto/openssl/crypto/asn1/x_bignum.c b/crypto/openssl/crypto/asn1/x_bignum.c
index da57e77a7aa8..6c93ea7510da 100644
--- a/crypto/openssl/crypto/asn1/x_bignum.c
+++ b/crypto/openssl/crypto/asn1/x_bignum.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -130,9 +130,20 @@ static int bn_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
static int bn_secure_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
int utype, char *free_cont, const ASN1_ITEM *it)
{
- if (!*pval)
- bn_secure_new(pval, it);
- return bn_c2i(pval, cont, len, utype, free_cont, it);
+ int ret;
+ BIGNUM *bn;
+
+ if (!*pval && !bn_secure_new(pval, it))
+ return 0;
+
+ ret = bn_c2i(pval, cont, len, utype, free_cont, it);
+ if (!ret)
+ return 0;
+
+ /* Set constant-time flag for all secure BIGNUMS */
+ bn = (BIGNUM *)*pval;
+ BN_set_flags(bn, BN_FLG_CONSTTIME);
+ return ret;
}
static int bn_print(BIO *out, ASN1_VALUE **pval, const ASN1_ITEM *it,
diff --git a/crypto/openssl/crypto/bio/b_addr.c b/crypto/openssl/crypto/bio/b_addr.c
index f295b766fa73..dd5008e636a4 100644
--- a/crypto/openssl/crypto/bio/b_addr.c
+++ b/crypto/openssl/crypto/bio/b_addr.c
@@ -675,7 +675,7 @@ int BIO_lookup_ex(const char *host, const char *service, int lookup_type,
if (1) {
#ifdef AI_PASSIVE
- int gai_ret = 0;
+ int gai_ret = 0, old_ret = 0;
struct addrinfo hints;
memset(&hints, 0, sizeof(hints));
@@ -683,12 +683,12 @@ int BIO_lookup_ex(const char *host, const char *service, int lookup_type,
hints.ai_family = family;
hints.ai_socktype = socktype;
hints.ai_protocol = protocol;
-#ifdef AI_ADDRCONFIG
-#ifdef AF_UNSPEC
+# ifdef AI_ADDRCONFIG
+# ifdef AF_UNSPEC
if (family == AF_UNSPEC)
-#endif
+# endif
hints.ai_flags |= AI_ADDRCONFIG;
-#endif
+# endif
if (lookup_type == BIO_LOOKUP_SERVER)
hints.ai_flags |= AI_PASSIVE;
@@ -696,6 +696,7 @@ int BIO_lookup_ex(const char *host, const char *service, int lookup_type,
/* Note that |res| SHOULD be a 'struct addrinfo **' thanks to
* macro magic in bio_lcl.h
*/
+ retry:
switch ((gai_ret = getaddrinfo(host, service, &hints, res))) {
# ifdef EAI_SYSTEM
case EAI_SYSTEM:
@@ -703,12 +704,25 @@ int BIO_lookup_ex(const char *host, const char *service, int lookup_type,
BIOerr(BIO_F_BIO_LOOKUP_EX, ERR_R_SYS_LIB);
break;
# endif
+# ifdef EAI_MEMORY
+ case EAI_MEMORY:
+ BIOerr(BIO_F_BIO_LOOKUP_EX, ERR_R_MALLOC_FAILURE);
+ break;
+# endif
case 0:
ret = 1; /* Success */
break;
default:
+# if defined(AI_ADDRCONFIG) && defined(AI_NUMERICHOST)
+ if (hints.ai_flags & AI_ADDRCONFIG) {
+ hints.ai_flags &= ~AI_ADDRCONFIG;
+ hints.ai_flags |= AI_NUMERICHOST;
+ old_ret = gai_ret;
+ goto retry;
+ }
+# endif
BIOerr(BIO_F_BIO_LOOKUP_EX, ERR_R_SYS_LIB);
- ERR_add_error_data(1, gai_strerror(gai_ret));
+ ERR_add_error_data(1, gai_strerror(old_ret ? old_ret : gai_ret));
break;
}
} else {
diff --git a/crypto/openssl/crypto/bio/bss_dgram.c b/crypto/openssl/crypto/bio/bss_dgram.c
index d5fe5bb5a8a2..551821609f73 100644
--- a/crypto/openssl/crypto/bio/bss_dgram.c
+++ b/crypto/openssl/crypto/bio/bss_dgram.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -784,7 +784,7 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr)
* reasons. When BIO_CTRL_DGRAM_SET_PEEK_MODE was first defined its value
* was incorrectly clashing with BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE. The
* value has been updated to a non-clashing value. However to preserve
- * binary compatiblity we now respond to both the old value and the new one
+ * binary compatibility we now respond to both the old value and the new one
*/
case BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE:
case BIO_CTRL_DGRAM_SET_PEEK_MODE:
diff --git a/crypto/openssl/crypto/bio/bss_file.c b/crypto/openssl/crypto/bio/bss_file.c
index 057344783d61..a21020559760 100644
--- a/crypto/openssl/crypto/bio/bss_file.c
+++ b/crypto/openssl/crypto/bio/bss_file.c
@@ -7,10 +7,7 @@
* https://www.openssl.org/source/license.html
*/
-#ifndef HEADER_BSS_FILE_C
-# define HEADER_BSS_FILE_C
-
-# if defined(__linux) || defined(__sun) || defined(__hpux)
+#if defined(__linux) || defined(__sun) || defined(__hpux)
/*
* Following definition aliases fopen to fopen64 on above mentioned
* platforms. This makes it possible to open and sequentially access files
@@ -23,17 +20,17 @@
* of 32-bit platforms which allow for sequential access of large files
* without extra "magic" comprise *BSD, Darwin, IRIX...
*/
-# ifndef _FILE_OFFSET_BITS
-# define _FILE_OFFSET_BITS 64
-# endif
+# ifndef _FILE_OFFSET_BITS
+# define _FILE_OFFSET_BITS 64
# endif
+#endif
-# include <stdio.h>
-# include <errno.h>
-# include "bio_lcl.h"
-# include <openssl/err.h>
+#include <stdio.h>
+#include <errno.h>
+#include "bio_lcl.h"
+#include <openssl/err.h>
-# if !defined(OPENSSL_NO_STDIO)
+#if !defined(OPENSSL_NO_STDIO)
static int file_write(BIO *h, const char *buf, int num);
static int file_read(BIO *h, char *buf, int size);
@@ -72,9 +69,9 @@ BIO *BIO_new_file(const char *filename, const char *mode)
SYSerr(SYS_F_FOPEN, get_last_sys_error());
ERR_add_error_data(5, "fopen('", filename, "','", mode, "')");
if (errno == ENOENT
-# ifdef ENXIO
+#ifdef ENXIO
|| errno == ENXIO
-# endif
+#endif
)
BIOerr(BIO_F_BIO_NEW_FILE, BIO_R_NO_SUCH_FILE);
else
@@ -212,33 +209,33 @@ static long file_ctrl(BIO *b, int cmd, long num, void *ptr)
b->shutdown = (int)num & BIO_CLOSE;
b->ptr = ptr;
b->init = 1;
-# if BIO_FLAGS_UPLINK!=0
-# if defined(__MINGW32__) && defined(__MSVCRT__) && !defined(_IOB_ENTRIES)
-# define _IOB_ENTRIES 20
-# endif
+# if BIO_FLAGS_UPLINK!=0
+# if defined(__MINGW32__) && defined(__MSVCRT__) && !defined(_IOB_ENTRIES)
+# define _IOB_ENTRIES 20
+# endif
/* Safety net to catch purely internal BIO_set_fp calls */
-# if defined(_MSC_VER) && _MSC_VER>=1900
+# if defined(_MSC_VER) && _MSC_VER>=1900
if (ptr == stdin || ptr == stdout || ptr == stderr)
BIO_clear_flags(b, BIO_FLAGS_UPLINK);
-# elif defined(_IOB_ENTRIES)
+# elif defined(_IOB_ENTRIES)
if ((size_t)ptr >= (size_t)stdin &&
(size_t)ptr < (size_t)(stdin + _IOB_ENTRIES))
BIO_clear_flags(b, BIO_FLAGS_UPLINK);
-# endif
# endif
-# ifdef UP_fsetmod
+# endif
+# ifdef UP_fsetmod
if (b->flags & BIO_FLAGS_UPLINK)
UP_fsetmod(b->ptr, (char)((num & BIO_FP_TEXT) ? 't' : 'b'));
else
-# endif
+# endif
{
-# if defined(OPENSSL_SYS_WINDOWS)
+# if defined(OPENSSL_SYS_WINDOWS)
int fd = _fileno((FILE *)ptr);
if (num & BIO_FP_TEXT)
_setmode(fd, _O_TEXT);
else
_setmode(fd, _O_BINARY);
-# elif defined(OPENSSL_SYS_MSDOS)
+# elif defined(OPENSSL_SYS_MSDOS)
int fd = fileno((FILE *)ptr);
/* Set correct text/binary mode */
if (num & BIO_FP_TEXT)
@@ -251,11 +248,11 @@ static long file_ctrl(BIO *b, int cmd, long num, void *ptr)
} else
_setmode(fd, _O_BINARY);
}
-# elif defined(OPENSSL_SYS_WIN32_CYGWIN)
+# elif defined(OPENSSL_SYS_WIN32_CYGWIN)
int fd = fileno((FILE *)ptr);
if (!(num & BIO_FP_TEXT))
setmode(fd, O_BINARY);
-# endif
+# endif
}
break;
case BIO_C_SET_FILENAME:
@@ -277,15 +274,15 @@ static long file_ctrl(BIO *b, int cmd, long num, void *ptr)
ret = 0;
break;
}
-# if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS)
+# if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS)
if (!(num & BIO_FP_TEXT))
OPENSSL_strlcat(p, "b", sizeof(p));
else
OPENSSL_strlcat(p, "t", sizeof(p));
-# elif defined(OPENSSL_SYS_WIN32_CYGWIN)
+# elif defined(OPENSSL_SYS_WIN32_CYGWIN)
if (!(num & BIO_FP_TEXT))
OPENSSL_strlcat(p, "b", sizeof(p));
-# endif
+# endif
fp = openssl_fopen(ptr, p);
if (fp == NULL) {
SYSerr(SYS_F_FOPEN, get_last_sys_error());
@@ -422,6 +419,4 @@ BIO *BIO_new_file(const char *filename, const char *mode)
return NULL;
}
-# endif /* OPENSSL_NO_STDIO */
-
-#endif /* HEADER_BSS_FILE_C */
+#endif /* OPENSSL_NO_STDIO */
diff --git a/crypto/openssl/crypto/bio/bss_mem.c b/crypto/openssl/crypto/bio/bss_mem.c
index 8c621d6c1e88..2d536e9db05f 100644
--- a/crypto/openssl/crypto/bio/bss_mem.c
+++ b/crypto/openssl/crypto/bio/bss_mem.c
@@ -259,9 +259,7 @@ static long mem_ctrl(BIO *b, int cmd, long num, void *ptr)
bm = bbm->buf;
if (bm->data != NULL) {
if (!(b->flags & BIO_FLAGS_MEM_RDONLY)) {
- if (b->flags & BIO_FLAGS_NONCLEAR_RST) {
- bm->length = bm->max;
- } else {
+ if (!(b->flags & BIO_FLAGS_NONCLEAR_RST)) {
memset(bm->data, 0, bm->max);
bm->length = 0;
}
diff --git a/crypto/openssl/crypto/bn/asm/mips.pl b/crypto/openssl/crypto/bn/asm/mips.pl
index 38b796e375fe..a205189eb684 100755
--- a/crypto/openssl/crypto/bn/asm/mips.pl
+++ b/crypto/openssl/crypto/bn/asm/mips.pl
@@ -801,7 +801,7 @@ $code.=<<___;
#if 0
/*
* The bn_div_3_words entry point is re-used for constant-time interface.
- * Implementation is retained as hystorical reference.
+ * Implementation is retained as historical reference.
*/
.align 5
.globl bn_div_3_words
diff --git a/crypto/openssl/crypto/bn/bn_div.c b/crypto/openssl/crypto/bn/bn_div.c
index 3a6fa0a1b194..7fc0132830a1 100644
--- a/crypto/openssl/crypto/bn/bn_div.c
+++ b/crypto/openssl/crypto/bn/bn_div.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -258,7 +258,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
*
* - availability of constant-time bn_div_3_words;
* - dividend is at least as "wide" as divisor, limb-wise, zero-padded
- * if so requied, which shouldn't be a privacy problem, because
+ * if so required, which shouldn't be a privacy problem, because
* divisor's length is considered public;
*/
int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
diff --git a/crypto/openssl/crypto/bn/bn_lcl.h b/crypto/openssl/crypto/bn/bn_lcl.h
index 8a36db2e8b67..7f823a6178a5 100644
--- a/crypto/openssl/crypto/bn/bn_lcl.h
+++ b/crypto/openssl/crypto/bn/bn_lcl.h
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -295,7 +295,7 @@ struct bn_gencb_st {
(b) > 23 ? 3 : 1)
/*
- * BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
+ * BN_mod_exp_mont_consttime is based on the assumption that the L1 data cache
* line width of the target processor is at least the following value.
*/
# define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
diff --git a/crypto/openssl/crypto/bn/bn_lib.c b/crypto/openssl/crypto/bn/bn_lib.c
index f93bbcfcc71f..254069ff3819 100644
--- a/crypto/openssl/crypto/bn/bn_lib.c
+++ b/crypto/openssl/crypto/bn/bn_lib.c
@@ -132,20 +132,66 @@ int BN_num_bits_word(BN_ULONG l)
return bits;
}
+/*
+ * This function still leaks `a->dmax`: it's caller's responsibility to
+ * expand the input `a` in advance to a public length.
+ */
+static ossl_inline
+int bn_num_bits_consttime(const BIGNUM *a)
+{
+ int j, ret;
+ unsigned int mask, past_i;
+ int i = a->top - 1;
+ bn_check_top(a);
+
+ for (j = 0, past_i = 0, ret = 0; j < a->dmax; j++) {
+ mask = constant_time_eq_int(i, j); /* 0xff..ff if i==j, 0x0 otherwise */
+
+ ret += BN_BITS2 & (~mask & ~past_i);
+ ret += BN_num_bits_word(a->d[j]) & mask;
+
+ past_i |= mask; /* past_i will become 0xff..ff after i==j */
+ }
+
+ /*
+ * if BN_is_zero(a) => i is -1 and ret contains garbage, so we mask the
+ * final result.
+ */
+ mask = ~(constant_time_eq_int(i, ((int)-1)));
+
+ return ret & mask;
+}
+
int BN_num_bits(const BIGNUM *a)
{
int i = a->top - 1;
bn_check_top(a);
+ if (a->flags & BN_FLG_CONSTTIME) {
+ /*
+ * We assume that BIGNUMs flagged as CONSTTIME have also been expanded
+ * so that a->dmax is not leaking secret information.
+ *
+ * In other words, it's the caller's responsibility to ensure `a` has
+ * been preallocated in advance to a public length if we hit this
+ * branch.
+ *
+ */
+ return bn_num_bits_consttime(a);
+ }
+
if (BN_is_zero(a))
return 0;
+
return ((i * BN_BITS2) + BN_num_bits_word(a->d[i]));
}
-static void bn_free_d(BIGNUM *a)
+static void bn_free_d(BIGNUM *a, int clear)
{
if (BN_get_flags(a, BN_FLG_SECURE))
- OPENSSL_secure_free(a->d);
+ OPENSSL_secure_clear_free(a->d, a->dmax * sizeof(a->d[0]));
+ else if (clear != 0)
+ OPENSSL_clear_free(a->d, a->dmax * sizeof(a->d[0]));
else
OPENSSL_free(a->d);
}
@@ -155,10 +201,8 @@ void BN_clear_free(BIGNUM *a)
{
if (a == NULL)
return;
- if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
- OPENSSL_cleanse(a->d, a->dmax * sizeof(a->d[0]));
- bn_free_d(a);
- }
+ if (a->d != NULL && !BN_get_flags(a, BN_FLG_STATIC_DATA))
+ bn_free_d(a, 1);
if (BN_get_flags(a, BN_FLG_MALLOCED)) {
OPENSSL_cleanse(a, sizeof(*a));
OPENSSL_free(a);
@@ -170,7 +214,7 @@ void BN_free(BIGNUM *a)
if (a == NULL)
return;
if (!BN_get_flags(a, BN_FLG_STATIC_DATA))
- bn_free_d(a);
+ bn_free_d(a, 0);
if (a->flags & BN_FLG_MALLOCED)
OPENSSL_free(a);
}
@@ -248,10 +292,8 @@ BIGNUM *bn_expand2(BIGNUM *b, int words)
BN_ULONG *a = bn_expand_internal(b, words);
if (!a)
return NULL;
- if (b->d) {
- OPENSSL_cleanse(b->d, b->dmax * sizeof(b->d[0]));
- bn_free_d(b);
- }
+ if (b->d != NULL)
+ bn_free_d(b, 1);
b->d = a;
b->dmax = words;
}
@@ -416,8 +458,11 @@ BIGNUM *BN_bin2bn(const unsigned char *s, int len, BIGNUM *ret)
return ret;
}
+typedef enum {big, little} endianess_t;
+
/* ignore negative */
-static int bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
+static
+int bn2binpad(const BIGNUM *a, unsigned char *to, int tolen, endianess_t endianess)
{
int n;
size_t i, lasti, j, atop, mask;
@@ -449,10 +494,17 @@ static int bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
lasti = atop - 1;
atop = a->top * BN_BYTES;
- for (i = 0, j = 0, to += tolen; j < (size_t)tolen; j++) {
+ if (endianess == big)
+ to += tolen; /* start from the end of the buffer */
+ for (i = 0, j = 0; j < (size_t)tolen; j++) {
+ unsigned char val;
l = a->d[i / BN_BYTES];
mask = 0 - ((j - atop) >> (8 * sizeof(i) - 1));
- *--to = (unsigned char)(l >> (8 * (i % BN_BYTES)) & mask);
+ val = (unsigned char)(l >> (8 * (i % BN_BYTES)) & mask);
+ if (endianess == big)
+ *--to = val;
+ else
+ *to++ = val;
i += (i - lasti) >> (8 * sizeof(i) - 1); /* stay on last limb */
}
@@ -463,12 +515,12 @@ int BN_bn2binpad(const BIGNUM *a, unsigned char *to, int tolen)
{
if (tolen < 0)
return -1;
- return bn2binpad(a, to, tolen);
+ return bn2binpad(a, to, tolen, big);
}
int BN_bn2bin(const BIGNUM *a, unsigned char *to)
{
- return bn2binpad(a, to, -1);
+ return bn2binpad(a, to, -1, big);
}
BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret)
@@ -520,22 +572,9 @@ BIGNUM *BN_lebin2bn(const unsigned char *s, int len, BIGNUM *ret)
int BN_bn2lebinpad(const BIGNUM *a, unsigned char *to, int tolen)
{
- int i;
- BN_ULONG l;
- bn_check_top(a);
- i = BN_num_bytes(a);
- if (tolen < i)
+ if (tolen < 0)
return -1;
- /* Add trailing zeroes if necessary */
- if (tolen > i)
- memset(to + i, 0, tolen - i);
- to += i;
- while (i--) {
- l = a->d[i / BN_BYTES];
- to--;
- *to = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff;
- }
- return tolen;
+ return bn2binpad(a, to, tolen, little);
}
int BN_ucmp(const BIGNUM *a, const BIGNUM *b)
diff --git a/crypto/openssl/crypto/bn/bn_prime.c b/crypto/openssl/crypto/bn/bn_prime.c
index 4bbd7c881031..19b081f38eb7 100644
--- a/crypto/openssl/crypto/bn/bn_prime.c
+++ b/crypto/openssl/crypto/bn/bn_prime.c
@@ -63,8 +63,12 @@ int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
/* There are no prime numbers this small. */
BNerr(BN_F_BN_GENERATE_PRIME_EX, BN_R_BITS_TOO_SMALL);
return 0;
- } else if (bits == 2 && safe) {
- /* The smallest safe prime (7) is three bits. */
+ } else if (add == NULL && safe && bits < 6 && bits != 3) {
+ /*
+ * The smallest safe prime (7) is three bits.
+ * But the following two safe primes with less than 6 bits (11, 23)
+ * are unreachable for BN_rand with BN_RAND_TOP_TWO.
+ */
BNerr(BN_F_BN_GENERATE_PRIME_EX, BN_R_BITS_TOO_SMALL);
return 0;
}
diff --git a/crypto/openssl/crypto/bn/bn_rand.c b/crypto/openssl/crypto/bn/bn_rand.c
index c0d1a32292ba..051f29e34305 100644
--- a/crypto/openssl/crypto/bn/bn_rand.c
+++ b/crypto/openssl/crypto/bn/bn_rand.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -225,8 +225,7 @@ int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range,
goto err;
/* We copy |priv| into a local buffer to avoid exposing its length. */
- todo = sizeof(priv->d[0]) * priv->top;
- if (todo > sizeof(private_bytes)) {
+ if (BN_bn2binpad(priv, private_bytes, sizeof(private_bytes)) < 0) {
/*
* No reasonable DSA or ECDSA key should have a private key this
* large and we don't handle this case in order to avoid leaking the
@@ -235,8 +234,6 @@ int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range,
BNerr(BN_F_BN_GENERATE_DSA_NONCE, BN_R_PRIVATE_KEY_TOO_LARGE);
goto err;
}
- memcpy(private_bytes, priv->d, todo);
- memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
for (done = 0; done < num_k_bytes;) {
if (RAND_priv_bytes(random_bytes, sizeof(random_bytes)) != 1)
diff --git a/crypto/openssl/crypto/bn/bn_sqrt.c b/crypto/openssl/crypto/bn/bn_sqrt.c
index b97d8ca43ba2..c3e66b033bde 100644
--- a/crypto/openssl/crypto/bn/bn_sqrt.c
+++ b/crypto/openssl/crypto/bn/bn_sqrt.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -125,7 +125,8 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
* = a.
*
* (This is due to A.O.L. Atkin,
- * <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
+ * Subject: Square Roots and Cognate Matters modulo p=8n+5.
+ * URL: https://listserv.nodak.edu/cgi-bin/wa.exe?A2=ind9211&L=NMBRTHRY&P=4026
* November 1992.)
*/
diff --git a/crypto/openssl/crypto/cms/cms_att.c b/crypto/openssl/crypto/cms/cms_att.c
index 664e64971b0a..0566019753bd 100644
--- a/crypto/openssl/crypto/cms/cms_att.c
+++ b/crypto/openssl/crypto/cms/cms_att.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2008-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -13,6 +13,56 @@
#include <openssl/err.h>
#include <openssl/cms.h>
#include "cms_lcl.h"
+#include "internal/nelem.h"
+
+/*-
+ * Attribute flags.
+ * CMS attribute restrictions are discussed in
+ * - RFC 5652 Section 11.
+ * ESS attribute restrictions are discussed in
+ * - RFC 2634 Section 1.3.4 AND
+ * - RFC 5035 Section 5.4
+ */
+/* This is a signed attribute */
+#define CMS_ATTR_F_SIGNED 0x01
+/* This is an unsigned attribute */
+#define CMS_ATTR_F_UNSIGNED 0x02
+/* Must be present if there are any other attributes of the same type */
+#define CMS_ATTR_F_REQUIRED_COND 0x10
+/* There can only be one instance of this attribute */
+#define CMS_ATTR_F_ONLY_ONE 0x20
+/* The Attribute's value must have exactly one entry */
+#define CMS_ATTR_F_ONE_ATTR_VALUE 0x40
+
+/* Attributes rules for different attributes */
+static const struct {
+ int nid; /* The attribute id */
+ int flags;
+} cms_attribute_properties[] = {
+ /* See RFC Section 11 */
+ { NID_pkcs9_contentType, CMS_ATTR_F_SIGNED
+ | CMS_ATTR_F_ONLY_ONE
+ | CMS_ATTR_F_ONE_ATTR_VALUE
+ | CMS_ATTR_F_REQUIRED_COND },
+ { NID_pkcs9_messageDigest, CMS_ATTR_F_SIGNED
+ | CMS_ATTR_F_ONLY_ONE
+ | CMS_ATTR_F_ONE_ATTR_VALUE
+ | CMS_ATTR_F_REQUIRED_COND },
+ { NID_pkcs9_signingTime, CMS_ATTR_F_SIGNED
+ | CMS_ATTR_F_ONLY_ONE
+ | CMS_ATTR_F_ONE_ATTR_VALUE },
+ { NID_pkcs9_countersignature, CMS_ATTR_F_UNSIGNED },
+ /* ESS */
+ { NID_id_smime_aa_signingCertificate, CMS_ATTR_F_SIGNED
+ | CMS_ATTR_F_ONLY_ONE
+ | CMS_ATTR_F_ONE_ATTR_VALUE },
+ { NID_id_smime_aa_signingCertificateV2, CMS_ATTR_F_SIGNED
+ | CMS_ATTR_F_ONLY_ONE
+ | CMS_ATTR_F_ONE_ATTR_VALUE },
+ { NID_id_smime_aa_receiptRequest, CMS_ATTR_F_SIGNED
+ | CMS_ATTR_F_ONLY_ONE
+ | CMS_ATTR_F_ONE_ATTR_VALUE }
+};
/* CMS SignedData Attribute utilities */
@@ -149,4 +199,86 @@ void *CMS_unsigned_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid,
return X509at_get0_data_by_OBJ(si->unsignedAttrs, oid, lastpos, type);
}
-/* Specific attribute cases */
+/*
+ * Retrieve an attribute by nid from a stack of attributes starting at index
+ * *lastpos + 1.
+ * Returns the attribute or NULL if there is no attribute.
+ * If an attribute was found *lastpos returns the index of the found attribute.
+ */
+static X509_ATTRIBUTE *cms_attrib_get(int nid,
+ const STACK_OF(X509_ATTRIBUTE) *attrs,
+ int *lastpos)
+{
+ X509_ATTRIBUTE *at;
+ int loc;
+
+ loc = X509at_get_attr_by_NID(attrs, nid, *lastpos);
+ if (loc < 0)
+ return NULL;
+
+ at = X509at_get_attr(attrs, loc);
+ *lastpos = loc;
+ return at;
+}
+
+static int cms_check_attribute(int nid, int flags, int type,
+ const STACK_OF(X509_ATTRIBUTE) *attrs,
+ int have_attrs)
+{
+ int lastpos = -1;
+ X509_ATTRIBUTE *at = cms_attrib_get(nid, attrs, &lastpos);
+
+ if (at != NULL) {
+ int count = X509_ATTRIBUTE_count(at);
+
+ /* Is this attribute allowed? */
+ if (((flags & type) == 0)
+ /* check if multiple attributes of the same type are allowed */
+ || (((flags & CMS_ATTR_F_ONLY_ONE) != 0)
+ && cms_attrib_get(nid, attrs, &lastpos) != NULL)
+ /* Check if attribute should have exactly one value in its set */
+ || (((flags & CMS_ATTR_F_ONE_ATTR_VALUE) != 0)
+ && count != 1)
+ /* There should be at least one value */
+ || count == 0)
+ return 0;
+ } else {
+ /* fail if a required attribute is missing */
+ if (have_attrs
+ && ((flags & CMS_ATTR_F_REQUIRED_COND) != 0)
+ && (flags & type) != 0)
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Check that the signerinfo attributes obey the attribute rules which includes
+ * the following checks
+ * - If any signed attributes exist then there must be a Content Type
+ * and Message Digest attribute in the signed attributes.
+ * - The countersignature attribute is an optional unsigned attribute only.
+ * - Content Type, Message Digest, and Signing time attributes are signed
+ * attributes. Only one instance of each is allowed, with each of these
+ * attributes containing a single attribute value in its set.
+ */
+int CMS_si_check_attributes(const CMS_SignerInfo *si)
+{
+ int i;
+ int have_signed_attrs = (CMS_signed_get_attr_count(si) > 0);
+ int have_unsigned_attrs = (CMS_unsigned_get_attr_count(si) > 0);
+
+ for (i = 0; i < (int)OSSL_NELEM(cms_attribute_properties); ++i) {
+ int nid = cms_attribute_properties[i].nid;
+ int flags = cms_attribute_properties[i].flags;
+
+ if (!cms_check_attribute(nid, flags, CMS_ATTR_F_SIGNED,
+ si->signedAttrs, have_signed_attrs)
+ || !cms_check_attribute(nid, flags, CMS_ATTR_F_UNSIGNED,
+ si->unsignedAttrs, have_unsigned_attrs)) {
+ CMSerr(CMS_F_CMS_SI_CHECK_ATTRIBUTES, CMS_R_ATTRIBUTE_ERROR);
+ return 0;
+ }
+ }
+ return 1;
+}
diff --git a/crypto/openssl/crypto/cms/cms_env.c b/crypto/openssl/crypto/cms/cms_env.c
index bb95af75e3e1..26fb81f79ab1 100644
--- a/crypto/openssl/crypto/cms/cms_env.c
+++ b/crypto/openssl/crypto/cms/cms_env.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2008-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -363,6 +363,7 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
unsigned char *ek = NULL;
size_t eklen;
int ret = 0;
+ size_t fixlen = 0;
CMS_EncryptedContentInfo *ec;
ec = cms->d.envelopedData->encryptedContentInfo;
@@ -371,6 +372,19 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
return 0;
}
+ if (cms->d.envelopedData->encryptedContentInfo->havenocert
+ && !cms->d.envelopedData->encryptedContentInfo->debug) {
+ X509_ALGOR *calg = ec->contentEncryptionAlgorithm;
+ const EVP_CIPHER *ciph = EVP_get_cipherbyobj(calg->algorithm);
+
+ if (ciph == NULL) {
+ CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_UNKNOWN_CIPHER);
+ return 0;
+ }
+
+ fixlen = EVP_CIPHER_key_length(ciph);
+ }
+
ktri->pctx = EVP_PKEY_CTX_new(pkey, NULL);
if (ktri->pctx == NULL)
return 0;
@@ -401,7 +415,9 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
if (EVP_PKEY_decrypt(ktri->pctx, ek, &eklen,
ktri->encryptedKey->data,
- ktri->encryptedKey->length) <= 0) {
+ ktri->encryptedKey->length) <= 0
+ || eklen == 0
+ || (fixlen != 0 && eklen != fixlen)) {
CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CMS_LIB);
goto err;
}
diff --git a/crypto/openssl/crypto/cms/cms_err.c b/crypto/openssl/crypto/cms/cms_err.c
index 4432b471ee76..a211f4954ce1 100644
--- a/crypto/openssl/crypto/cms/cms_err.c
+++ b/crypto/openssl/crypto/cms/cms_err.c
@@ -1,6 +1,6 @@
/*
* Generated by util/mkerr.pl DO NOT EDIT
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -146,6 +146,8 @@ static const ERR_STRING_DATA CMS_str_functs[] = {
{ERR_PACK(ERR_LIB_CMS, CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, 0),
"CMS_SignerInfo_verify_content"},
{ERR_PACK(ERR_LIB_CMS, CMS_F_CMS_SIGN_RECEIPT, 0), "CMS_sign_receipt"},
+ {ERR_PACK(ERR_LIB_CMS, CMS_F_CMS_SI_CHECK_ATTRIBUTES, 0),
+ "CMS_si_check_attributes"},
{ERR_PACK(ERR_LIB_CMS, CMS_F_CMS_STREAM, 0), "CMS_stream"},
{ERR_PACK(ERR_LIB_CMS, CMS_F_CMS_UNCOMPRESS, 0), "CMS_uncompress"},
{ERR_PACK(ERR_LIB_CMS, CMS_F_CMS_VERIFY, 0), "CMS_verify"},
@@ -155,6 +157,7 @@ static const ERR_STRING_DATA CMS_str_functs[] = {
static const ERR_STRING_DATA CMS_str_reasons[] = {
{ERR_PACK(ERR_LIB_CMS, 0, CMS_R_ADD_SIGNER_ERROR), "add signer error"},
+ {ERR_PACK(ERR_LIB_CMS, 0, CMS_R_ATTRIBUTE_ERROR), "attribute error"},
{ERR_PACK(ERR_LIB_CMS, 0, CMS_R_CERTIFICATE_ALREADY_PRESENT),
"certificate already present"},
{ERR_PACK(ERR_LIB_CMS, 0, CMS_R_CERTIFICATE_HAS_NO_KEYID),
diff --git a/crypto/openssl/crypto/cms/cms_lcl.h b/crypto/openssl/crypto/cms/cms_lcl.h
index 916fcbfbe190..68aa01271bc2 100644
--- a/crypto/openssl/crypto/cms/cms_lcl.h
+++ b/crypto/openssl/crypto/cms/cms_lcl.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2008-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -125,6 +125,8 @@ struct CMS_EncryptedContentInfo_st {
size_t keylen;
/* Set to 1 if we are debugging decrypt and don't fake keys for MMA */
int debug;
+ /* Set to 1 if we have no cert and need extra safety measures for MMA */
+ int havenocert;
};
struct CMS_RecipientInfo_st {
@@ -317,8 +319,6 @@ struct CMS_OtherKeyAttribute_st {
/* ESS structures */
-# ifdef HEADER_X509V3_H
-
struct CMS_ReceiptRequest_st {
ASN1_OCTET_STRING *signedContentIdentifier;
CMS_ReceiptsFrom *receiptsFrom;
@@ -332,7 +332,6 @@ struct CMS_ReceiptsFrom_st {
STACK_OF(GENERAL_NAMES) *receiptList;
} d;
};
-# endif
struct CMS_Receipt_st {
int32_t version;
@@ -416,6 +415,8 @@ int cms_RecipientInfo_kari_encrypt(CMS_ContentInfo *cms,
/* PWRI routines */
int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri,
int en_de);
+/* SignerInfo routines */
+int CMS_si_check_attributes(const CMS_SignerInfo *si);
DECLARE_ASN1_ITEM(CMS_CertificateChoices)
DECLARE_ASN1_ITEM(CMS_DigestedData)
diff --git a/crypto/openssl/crypto/cms/cms_sd.c b/crypto/openssl/crypto/cms/cms_sd.c
index ff2d540b6a30..3841513f8bd2 100644
--- a/crypto/openssl/crypto/cms/cms_sd.c
+++ b/crypto/openssl/crypto/cms/cms_sd.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2008-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -109,6 +109,27 @@ static void cms_sd_set_version(CMS_SignedData *sd)
}
+/*
+ * RFC 5652 Section 11.1 Content Type
+ * The content-type attribute within signed-data MUST
+ * 1) be present if there are signed attributes
+ * 2) match the content type in the signed-data,
+ * 3) be a signed attribute.
+ * 4) not have more than one copy of the attribute.
+ *
+ * Note that since the CMS_SignerInfo_sign() always adds the "signing time"
+ * attribute, the content type attribute MUST be added also.
+ * Assumptions: This assumes that the attribute does not already exist.
+ */
+static int cms_set_si_contentType_attr(CMS_ContentInfo *cms, CMS_SignerInfo *si)
+{
+ ASN1_OBJECT *ctype = cms->d.signedData->encapContentInfo->eContentType;
+
+ /* Add the contentType attribute */
+ return CMS_signed_add1_attr_by_NID(si, NID_pkcs9_contentType,
+ V_ASN1_OBJECT, ctype, -1) > 0;
+}
+
/* Copy an existing messageDigest value */
static int cms_copy_messageDigest(CMS_ContentInfo *cms, CMS_SignerInfo *si)
@@ -328,6 +349,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms,
if (flags & CMS_REUSE_DIGEST) {
if (!cms_copy_messageDigest(cms, si))
goto err;
+ if (!cms_set_si_contentType_attr(cms, si))
+ goto err;
if (!(flags & (CMS_PARTIAL | CMS_KEY_PARAM)) &&
!CMS_SignerInfo_sign(si))
goto err;
@@ -558,8 +581,6 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms,
*/
if (CMS_signed_get_attr_count(si) >= 0) {
- ASN1_OBJECT *ctype =
- cms->d.signedData->encapContentInfo->eContentType;
unsigned char md[EVP_MAX_MD_SIZE];
unsigned int mdlen;
if (!EVP_DigestFinal_ex(mctx, md, &mdlen))
@@ -568,9 +589,9 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms,
V_ASN1_OCTET_STRING, md, mdlen))
goto err;
/* Copy content type across */
- if (CMS_signed_add1_attr_by_NID(si, NID_pkcs9_contentType,
- V_ASN1_OBJECT, ctype, -1) <= 0)
+ if (!cms_set_si_contentType_attr(cms, si))
goto err;
+
if (!CMS_SignerInfo_sign(si))
goto err;
} else if (si->pctx) {
@@ -650,6 +671,9 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si)
goto err;
}
+ if (!CMS_si_check_attributes(si))
+ goto err;
+
if (si->pctx)
pctx = si->pctx;
else {
@@ -696,7 +720,6 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si)
OPENSSL_free(abuf);
EVP_MD_CTX_reset(mctx);
return 0;
-
}
int CMS_SignerInfo_verify(CMS_SignerInfo *si)
@@ -711,6 +734,9 @@ int CMS_SignerInfo_verify(CMS_SignerInfo *si)
return -1;
}
+ if (!CMS_si_check_attributes(si))
+ return -1;
+
md = EVP_get_digestbyobj(si->digestAlgorithm->algorithm);
if (md == NULL)
return -1;
diff --git a/crypto/openssl/crypto/cms/cms_smime.c b/crypto/openssl/crypto/cms/cms_smime.c
index 5dcf803f4bd3..10815639f811 100644
--- a/crypto/openssl/crypto/cms/cms_smime.c
+++ b/crypto/openssl/crypto/cms/cms_smime.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2008-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -743,6 +743,10 @@ int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert,
cms->d.envelopedData->encryptedContentInfo->debug = 1;
else
cms->d.envelopedData->encryptedContentInfo->debug = 0;
+ if (!cert)
+ cms->d.envelopedData->encryptedContentInfo->havenocert = 1;
+ else
+ cms->d.envelopedData->encryptedContentInfo->havenocert = 0;
if (!pk && !cert && !dcont && !out)
return 1;
if (pk && !CMS_decrypt_set1_pkey(cms, pk, cert))
diff --git a/crypto/openssl/crypto/conf/conf_sap.c b/crypto/openssl/crypto/conf/conf_sap.c
index 3805c426d802..82105de748ed 100644
--- a/crypto/openssl/crypto/conf/conf_sap.c
+++ b/crypto/openssl/crypto/conf/conf_sap.c
@@ -42,7 +42,7 @@ void OPENSSL_config(const char *appname)
int openssl_config_int(const OPENSSL_INIT_SETTINGS *settings)
{
- int ret;
+ int ret = 0;
const char *filename;
const char *appname;
unsigned long flags;
diff --git a/crypto/openssl/crypto/ctype.c b/crypto/openssl/crypto/ctype.c
index 813be25a0741..e05f84cd4086 100644
--- a/crypto/openssl/crypto/ctype.c
+++ b/crypto/openssl/crypto/ctype.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2017-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -272,3 +272,9 @@ int ossl_toupper(int c)
{
return ossl_islower(c) ? c ^ case_change : c;
}
+
+int ascii_isdigit(const char inchar) {
+ if (inchar > 0x2F && inchar < 0x3A)
+ return 1;
+ return 0;
+}
diff --git a/crypto/openssl/crypto/dh/dh_check.c b/crypto/openssl/crypto/dh/dh_check.c
index c7e1dbf4ac0f..d13d8206ce50 100644
--- a/crypto/openssl/crypto/dh/dh_check.c
+++ b/crypto/openssl/crypto/dh/dh_check.c
@@ -24,7 +24,8 @@ int DH_check_params_ex(const DH *dh)
{
int errflags = 0;
- (void)DH_check_params(dh, &errflags);
+ if (!DH_check_params(dh, &errflags))
+ return 0;
if ((errflags & DH_CHECK_P_NOT_PRIME) != 0)
DHerr(DH_F_DH_CHECK_PARAMS_EX, DH_R_CHECK_P_NOT_PRIME);
@@ -67,18 +68,14 @@ int DH_check_params(const DH *dh, int *ret)
/*-
* Check that p is a safe prime and
- * if g is 2, 3 or 5, check that it is a suitable generator
- * where
- * for 2, p mod 24 == 11
- * for 3, p mod 12 == 5
- * for 5, p mod 10 == 3 or 7
- * should hold.
+ * g is a suitable generator.
*/
int DH_check_ex(const DH *dh)
{
int errflags = 0;
- (void)DH_check(dh, &errflags);
+ if (!DH_check(dh, &errflags))
+ return 0;
if ((errflags & DH_NOT_SUITABLE_GENERATOR) != 0)
DHerr(DH_F_DH_CHECK_EX, DH_R_NOT_SUITABLE_GENERATOR);
@@ -102,10 +99,11 @@ int DH_check(const DH *dh, int *ret)
{
int ok = 0, r;
BN_CTX *ctx = NULL;
- BN_ULONG l;
BIGNUM *t1 = NULL, *t2 = NULL;
- *ret = 0;
+ if (!DH_check_params(dh, ret))
+ return 0;
+
ctx = BN_CTX_new();
if (ctx == NULL)
goto err;
@@ -139,21 +137,7 @@ int DH_check(const DH *dh, int *ret)
*ret |= DH_CHECK_INVALID_Q_VALUE;
if (dh->j && BN_cmp(dh->j, t1))
*ret |= DH_CHECK_INVALID_J_VALUE;
-
- } else if (BN_is_word(dh->g, DH_GENERATOR_2)) {
- l = BN_mod_word(dh->p, 24);
- if (l == (BN_ULONG)-1)
- goto err;
- if (l != 11)
- *ret |= DH_NOT_SUITABLE_GENERATOR;
- } else if (BN_is_word(dh->g, DH_GENERATOR_5)) {
- l = BN_mod_word(dh->p, 10);
- if (l == (BN_ULONG)-1)
- goto err;
- if ((l != 3) && (l != 7))
- *ret |= DH_NOT_SUITABLE_GENERATOR;
- } else
- *ret |= DH_UNABLE_TO_CHECK_GENERATOR;
+ }
r = BN_is_prime_ex(dh->p, DH_NUMBER_ITERATIONS_FOR_PRIME, ctx, NULL);
if (r < 0)
@@ -180,7 +164,8 @@ int DH_check_pub_key_ex(const DH *dh, const BIGNUM *pub_key)
{
int errflags = 0;
- (void)DH_check(dh, &errflags);
+ if (!DH_check_pub_key(dh, pub_key, &errflags))
+ return 0;
if ((errflags & DH_CHECK_PUBKEY_TOO_SMALL) != 0)
DHerr(DH_F_DH_CHECK_PUB_KEY_EX, DH_R_CHECK_PUBKEY_TOO_SMALL);
diff --git a/crypto/openssl/crypto/dh/dh_gen.c b/crypto/openssl/crypto/dh/dh_gen.c
index 887fc4c3aede..d293835eb22b 100644
--- a/crypto/openssl/crypto/dh/dh_gen.c
+++ b/crypto/openssl/crypto/dh/dh_gen.c
@@ -30,30 +30,33 @@ int DH_generate_parameters_ex(DH *ret, int prime_len, int generator,
/*-
* We generate DH parameters as follows
- * find a prime q which is prime_len/2 bits long.
- * p=(2*q)+1 or (p-1)/2 = q
- * For this case, g is a generator if
- * g^((p-1)/q) mod p != 1 for values of q which are the factors of p-1.
- * Since the factors of p-1 are q and 2, we just need to check
- * g^2 mod p != 1 and g^q mod p != 1.
+ * find a prime p which is prime_len bits long,
+ * where q=(p-1)/2 is also prime.
+ * In the following we assume that g is not 0, 1 or p-1, since it
+ * would generate only trivial subgroups.
+ * For this case, g is a generator of the order-q subgroup if
+ * g^q mod p == 1.
+ * Or in terms of the Legendre symbol: (g/p) == 1.
*
* Having said all that,
* there is another special case method for the generators 2, 3 and 5.
- * for 2, p mod 24 == 11
- * for 3, p mod 12 == 5 <<<<< does not work for safe primes.
- * for 5, p mod 10 == 3 or 7
+ * Using the quadratic reciprocity law it is possible to solve
+ * (g/p) == 1 for the special values 2, 3, 5:
+ * (2/p) == 1 if p mod 8 == 1 or 7.
+ * (3/p) == 1 if p mod 12 == 1 or 11.
+ * (5/p) == 1 if p mod 5 == 1 or 4.
+ * See for instance: https://en.wikipedia.org/wiki/Legendre_symbol
*
- * Thanks to Phil Karn for the pointers about the
- * special generators and for answering some of my questions.
+ * Since all safe primes > 7 must satisfy p mod 12 == 11
+ * and all safe primes > 11 must satisfy p mod 5 != 1
+ * we can further improve the condition for g = 2, 3 and 5:
+ * for 2, p mod 24 == 23
+ * for 3, p mod 12 == 11
+ * for 5, p mod 60 == 59
*
- * I've implemented the second simple method :-).
- * Since DH should be using a safe prime (both p and q are prime),
- * this generator function can take a very very long time to run.
- */
-/*
- * Actually there is no reason to insist that 'generator' be a generator.
- * It's just as OK (and in some sense better) to use a generator of the
- * order-q subgroup.
+ * However for compatibilty with previous versions we use:
+ * for 2, p mod 24 == 11
+ * for 5, p mod 60 == 23
*/
static int dh_builtin_genparams(DH *ret, int prime_len, int generator,
BN_GENCB *cb)
@@ -88,13 +91,10 @@ static int dh_builtin_genparams(DH *ret, int prime_len, int generator,
goto err;
g = 2;
} else if (generator == DH_GENERATOR_5) {
- if (!BN_set_word(t1, 10))
+ if (!BN_set_word(t1, 60))
goto err;
- if (!BN_set_word(t2, 3))
+ if (!BN_set_word(t2, 23))
goto err;
- /*
- * BN_set_word(t3,7); just have to miss out on these ones :-(
- */
g = 5;
} else {
/*
@@ -102,9 +102,9 @@ static int dh_builtin_genparams(DH *ret, int prime_len, int generator,
* not: since we are using safe primes, it will generate either an
* order-q or an order-2q group, which both is OK
*/
- if (!BN_set_word(t1, 2))
+ if (!BN_set_word(t1, 12))
goto err;
- if (!BN_set_word(t2, 1))
+ if (!BN_set_word(t2, 11))
goto err;
g = generator;
}
diff --git a/crypto/openssl/crypto/dh/dh_key.c b/crypto/openssl/crypto/dh/dh_key.c
index 99c00e5a05d1..718aa422d935 100644
--- a/crypto/openssl/crypto/dh/dh_key.c
+++ b/crypto/openssl/crypto/dh/dh_key.c
@@ -125,6 +125,15 @@ static int generate_key(DH *dh)
l = dh->length ? dh->length : BN_num_bits(dh->p) - 1;
if (!BN_priv_rand(priv_key, l, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY))
goto err;
+ /*
+ * We handle just one known case where g is a quadratic non-residue:
+ * for g = 2: p % 8 == 3
+ */
+ if (BN_is_word(dh->g, DH_GENERATOR_2) && !BN_is_bit_set(dh->p, 2)) {
+ /* clear bit 0, since it won't be a secret anyway */
+ if (!BN_clear_bit(priv_key, 0))
+ goto err;
+ }
}
}
@@ -136,11 +145,11 @@ static int generate_key(DH *dh)
BN_with_flags(prk, priv_key, BN_FLG_CONSTTIME);
if (!dh->meth->bn_mod_exp(dh, pub_key, dh->g, prk, dh->p, ctx, mont)) {
- BN_free(prk);
+ BN_clear_free(prk);
goto err;
}
/* We MUST free prk before any further use of priv_key */
- BN_free(prk);
+ BN_clear_free(prk);
}
dh->pub_key = pub_key;
diff --git a/crypto/openssl/crypto/dh/dh_lib.c b/crypto/openssl/crypto/dh/dh_lib.c
index 962f864deec6..e7e7ef08e9e3 100644
--- a/crypto/openssl/crypto/dh/dh_lib.c
+++ b/crypto/openssl/crypto/dh/dh_lib.c
@@ -1,5 +1,5 @@
/*
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -234,11 +234,11 @@ void DH_get0_key(const DH *dh, const BIGNUM **pub_key, const BIGNUM **priv_key)
int DH_set0_key(DH *dh, BIGNUM *pub_key, BIGNUM *priv_key)
{
if (pub_key != NULL) {
- BN_free(dh->pub_key);
+ BN_clear_free(dh->pub_key);
dh->pub_key = pub_key;
}
if (priv_key != NULL) {
- BN_free(dh->priv_key);
+ BN_clear_free(dh->priv_key);
dh->priv_key = priv_key;
}
diff --git a/crypto/openssl/crypto/dsa/dsa_ameth.c b/crypto/openssl/crypto/dsa/dsa_ameth.c
index 9c5b8aa02e9d..49aa1ae23bab 100644
--- a/crypto/openssl/crypto/dsa/dsa_ameth.c
+++ b/crypto/openssl/crypto/dsa/dsa_ameth.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2006-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -503,7 +503,7 @@ static int dsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
case ASN1_PKEY_CTRL_DEFAULT_MD_NID:
*(int *)arg2 = NID_sha256;
- return 2;
+ return 1;
default:
return -2;
diff --git a/crypto/openssl/crypto/dsa/dsa_err.c b/crypto/openssl/crypto/dsa/dsa_err.c
index 8f97f6f3f9ee..8dcf0548ac76 100644
--- a/crypto/openssl/crypto/dsa/dsa_err.c
+++ b/crypto/openssl/crypto/dsa/dsa_err.c
@@ -1,6 +1,6 @@
/*
* Generated by util/mkerr.pl DO NOT EDIT
- * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -52,6 +52,8 @@ static const ERR_STRING_DATA DSA_str_reasons[] = {
"invalid digest type"},
{ERR_PACK(ERR_LIB_DSA, 0, DSA_R_INVALID_PARAMETERS), "invalid parameters"},
{ERR_PACK(ERR_LIB_DSA, 0, DSA_R_MISSING_PARAMETERS), "missing parameters"},
+ {ERR_PACK(ERR_LIB_DSA, 0, DSA_R_MISSING_PRIVATE_KEY),
+ "missing private key"},
{ERR_PACK(ERR_LIB_DSA, 0, DSA_R_MODULUS_TOO_LARGE), "modulus too large"},
{ERR_PACK(ERR_LIB_DSA, 0, DSA_R_NO_PARAMETERS_SET), "no parameters set"},
{ERR_PACK(ERR_LIB_DSA, 0, DSA_R_PARAMETER_ENCODING_ERROR),
diff --git a/crypto/openssl/crypto/dsa/dsa_ossl.c b/crypto/openssl/crypto/dsa/dsa_ossl.c
index cefda5a450fa..16161dcadf22 100644
--- a/crypto/openssl/crypto/dsa/dsa_ossl.c
+++ b/crypto/openssl/crypto/dsa/dsa_ossl.c
@@ -72,6 +72,10 @@ static DSA_SIG *dsa_do_sign(const unsigned char *dgst, int dlen, DSA *dsa)
reason = DSA_R_MISSING_PARAMETERS;
goto err;
}
+ if (dsa->priv_key == NULL) {
+ reason = DSA_R_MISSING_PRIVATE_KEY;
+ goto err;
+ }
ret = DSA_SIG_new();
if (ret == NULL)
@@ -195,6 +199,10 @@ static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in,
DSAerr(DSA_F_DSA_SIGN_SETUP, DSA_R_INVALID_PARAMETERS);
return 0;
}
+ if (dsa->priv_key == NULL) {
+ DSAerr(DSA_F_DSA_SIGN_SETUP, DSA_R_MISSING_PRIVATE_KEY);
+ return 0;
+ }
k = BN_new();
l = BN_new();
@@ -248,7 +256,7 @@ static int dsa_sign_setup(DSA *dsa, BN_CTX *ctx_in,
* one bit longer than the modulus.
*
* There are some concerns about the efficacy of doing this. More
- * specificly refer to the discussion starting with:
+ * specifically refer to the discussion starting with:
* https://github.com/openssl/openssl/pull/7486#discussion_r228323705
* The fix is to rework BN so these gymnastics aren't required.
*/
diff --git a/crypto/openssl/crypto/dso/dso_dlfcn.c b/crypto/openssl/crypto/dso/dso_dlfcn.c
index 4240f5f5e30c..ba3b55fcbffa 100644
--- a/crypto/openssl/crypto/dso/dso_dlfcn.c
+++ b/crypto/openssl/crypto/dso/dso_dlfcn.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -27,8 +27,7 @@
# endif
# include <dlfcn.h>
# define HAVE_DLINFO 1
-# if defined(__CYGWIN__) || \
- defined(__SCO_VERSION__) || defined(_SCO_ELF) || \
+# if defined(__SCO_VERSION__) || defined(_SCO_ELF) || \
(defined(__osf__) && !defined(RTLD_NEXT)) || \
(defined(__OpenBSD__) && !defined(RTLD_SELF)) || \
defined(__ANDROID__)
diff --git a/crypto/openssl/crypto/ec/asm/ecp_nistz256-sparcv9.pl b/crypto/openssl/crypto/ec/asm/ecp_nistz256-sparcv9.pl
index 0a4def6e2bf6..4383bea4a7be 100755
--- a/crypto/openssl/crypto/ec/asm/ecp_nistz256-sparcv9.pl
+++ b/crypto/openssl/crypto/ec/asm/ecp_nistz256-sparcv9.pl
@@ -1,5 +1,5 @@
#! /usr/bin/env perl
-# Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2015-2019 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@@ -2301,7 +2301,6 @@ my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
# !in1infty, !in2infty and result of check for zero.
$code.=<<___;
-.globl ecp_nistz256_point_add_vis3
.align 32
ecp_nistz256_point_add_vis3:
save %sp,-STACK64_FRAME-32*18-32,%sp
diff --git a/crypto/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl
index 87149e7f680d..10ccc6414a49 100755
--- a/crypto/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -1301,7 +1301,7 @@ ecp_nistz256_ord_mul_montx:
################################# reduction
mulx 8*0+128(%r14), $t0, $t1
- adcx $t0, $acc3 # guranteed to be zero
+ adcx $t0, $acc3 # guaranteed to be zero
adox $t1, $acc4
mulx 8*1+128(%r14), $t0, $t1
diff --git a/crypto/openssl/crypto/ec/asm/x25519-ppc64.pl b/crypto/openssl/crypto/ec/asm/x25519-ppc64.pl
index 3773cb27cd65..6e8b36420f53 100755
--- a/crypto/openssl/crypto/ec/asm/x25519-ppc64.pl
+++ b/crypto/openssl/crypto/ec/asm/x25519-ppc64.pl
@@ -1,5 +1,5 @@
#! /usr/bin/env perl
-# Copyright 2018 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2018-2019 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
@@ -451,7 +451,7 @@ x25519_fe64_tobytes:
and $t0,$t0,$t1
sldi $a3,$a3,1
add $t0,$t0,$t1 # compare to modulus in the same go
- srdi $a3,$a3,1 # most signifcant bit cleared
+ srdi $a3,$a3,1 # most significant bit cleared
addc $a0,$a0,$t0
addze $a1,$a1
@@ -462,7 +462,7 @@ x25519_fe64_tobytes:
sradi $t0,$a3,63 # most significant bit -> mask
sldi $a3,$a3,1
andc $t0,$t1,$t0
- srdi $a3,$a3,1 # most signifcant bit cleared
+ srdi $a3,$a3,1 # most significant bit cleared
subi $rp,$rp,1
subfc $a0,$t0,$a0
diff --git a/crypto/openssl/crypto/ec/ec_asn1.c b/crypto/openssl/crypto/ec/ec_asn1.c
index 13c56a621dd7..1ce1181fc10a 100644
--- a/crypto/openssl/crypto/ec/ec_asn1.c
+++ b/crypto/openssl/crypto/ec/ec_asn1.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -568,10 +568,12 @@ ECPKPARAMETERS *EC_GROUP_get_ecpkparameters(const EC_GROUP *group,
EC_GROUP *EC_GROUP_new_from_ecparameters(const ECPARAMETERS *params)
{
int ok = 0, tmp;
- EC_GROUP *ret = NULL;
+ EC_GROUP *ret = NULL, *dup = NULL;
BIGNUM *p = NULL, *a = NULL, *b = NULL;
EC_POINT *point = NULL;
long field_bits;
+ int curve_name = NID_undef;
+ BN_CTX *ctx = NULL;
if (!params->fieldID || !params->fieldID->fieldType ||
!params->fieldID->p.ptr) {
@@ -789,18 +791,79 @@ EC_GROUP *EC_GROUP_new_from_ecparameters(const ECPARAMETERS *params)
goto err;
}
+ /*
+ * Check if the explicit parameters group just created matches one of the
+ * built-in curves.
+ *
+ * We create a copy of the group just built, so that we can remove optional
+ * fields for the lookup: we do this to avoid the possibility that one of
+ * the optional parameters is used to force the library into using a less
+ * performant and less secure EC_METHOD instead of the specialized one.
+ * In any case, `seed` is not really used in any computation, while a
+ * cofactor different from the one in the built-in table is just
+ * mathematically wrong anyway and should not be used.
+ */
+ if ((ctx = BN_CTX_new()) == NULL) {
+ ECerr(EC_F_EC_GROUP_NEW_FROM_ECPARAMETERS, ERR_R_BN_LIB);
+ goto err;
+ }
+ if ((dup = EC_GROUP_dup(ret)) == NULL
+ || EC_GROUP_set_seed(dup, NULL, 0) != 1
+ || !EC_GROUP_set_generator(dup, point, a, NULL)) {
+ ECerr(EC_F_EC_GROUP_NEW_FROM_ECPARAMETERS, ERR_R_EC_LIB);
+ goto err;
+ }
+ if ((curve_name = ec_curve_nid_from_params(dup, ctx)) != NID_undef) {
+ /*
+ * The input explicit parameters successfully matched one of the
+ * built-in curves: often for built-in curves we have specialized
+ * methods with better performance and hardening.
+ *
+ * In this case we replace the `EC_GROUP` created through explicit
+ * parameters with one created from a named group.
+ */
+ EC_GROUP *named_group = NULL;
+
+#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
+ /*
+ * NID_wap_wsg_idm_ecid_wtls12 and NID_secp224r1 are both aliases for
+ * the same curve, we prefer the SECP nid when matching explicit
+ * parameters as that is associated with a specialized EC_METHOD.
+ */
+ if (curve_name == NID_wap_wsg_idm_ecid_wtls12)
+ curve_name = NID_secp224r1;
+#endif /* !def(OPENSSL_NO_EC_NISTP_64_GCC_128) */
+
+ if ((named_group = EC_GROUP_new_by_curve_name(curve_name)) == NULL) {
+ ECerr(EC_F_EC_GROUP_NEW_FROM_ECPARAMETERS, ERR_R_EC_LIB);
+ goto err;
+ }
+ EC_GROUP_free(ret);
+ ret = named_group;
+
+ /*
+ * Set the flag so that EC_GROUPs created from explicit parameters are
+ * serialized using explicit parameters by default.
+ */
+ EC_GROUP_set_asn1_flag(ret, OPENSSL_EC_EXPLICIT_CURVE);
+ }
+
ok = 1;
err:
if (!ok) {
- EC_GROUP_clear_free(ret);
+ EC_GROUP_free(ret);
ret = NULL;
}
+ EC_GROUP_free(dup);
BN_free(p);
BN_free(a);
BN_free(b);
EC_POINT_free(point);
+
+ BN_CTX_free(ctx);
+
return ret;
}
@@ -861,7 +924,7 @@ EC_GROUP *d2i_ECPKParameters(EC_GROUP **a, const unsigned char **in, long len)
}
if (a) {
- EC_GROUP_clear_free(*a);
+ EC_GROUP_free(*a);
*a = group;
}
@@ -909,7 +972,7 @@ EC_KEY *d2i_ECPrivateKey(EC_KEY **a, const unsigned char **in, long len)
ret = *a;
if (priv_key->parameters) {
- EC_GROUP_clear_free(ret->group);
+ EC_GROUP_free(ret->group);
ret->group = EC_GROUP_new_from_ecpkparameters(priv_key->parameters);
}
diff --git a/crypto/openssl/crypto/ec/ec_curve.c b/crypto/openssl/crypto/ec/ec_curve.c
index bb1ce196d0fa..477349d4413e 100644
--- a/crypto/openssl/crypto/ec/ec_curve.c
+++ b/crypto/openssl/crypto/ec/ec_curve.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved
*
* Licensed under the OpenSSL license (the "License"). You may not use
@@ -3197,3 +3197,115 @@ int EC_curve_nist2nid(const char *name)
}
return NID_undef;
}
+
+#define NUM_BN_FIELDS 6
+/*
+ * Validates EC domain parameter data for known named curves.
+ * This can be used when a curve is loaded explicitly (without a curve
+ * name) or to validate that domain parameters have not been modified.
+ *
+ * Returns: The nid associated with the found named curve, or NID_undef
+ * if not found. If there was an error it returns -1.
+ */
+int ec_curve_nid_from_params(const EC_GROUP *group, BN_CTX *ctx)
+{
+ int ret = -1, nid, len, field_type, param_len;
+ size_t i, seed_len;
+ const unsigned char *seed, *params_seed, *params;
+ unsigned char *param_bytes = NULL;
+ const EC_CURVE_DATA *data;
+ const EC_POINT *generator = NULL;
+ const EC_METHOD *meth;
+ const BIGNUM *cofactor = NULL;
+ /* An array of BIGNUMs for (p, a, b, x, y, order) */
+ BIGNUM *bn[NUM_BN_FIELDS] = {NULL, NULL, NULL, NULL, NULL, NULL};
+
+ meth = EC_GROUP_method_of(group);
+ if (meth == NULL)
+ return -1;
+ /* Use the optional named curve nid as a search field */
+ nid = EC_GROUP_get_curve_name(group);
+ field_type = EC_METHOD_get_field_type(meth);
+ seed_len = EC_GROUP_get_seed_len(group);
+ seed = EC_GROUP_get0_seed(group);
+ cofactor = EC_GROUP_get0_cofactor(group);
+
+ BN_CTX_start(ctx);
+
+ /*
+ * The built-in curves contains data fields (p, a, b, x, y, order) that are
+ * all zero-padded to be the same size. The size of the padding is
+ * determined by either the number of bytes in the field modulus (p) or the
+ * EC group order, whichever is larger.
+ */
+ param_len = BN_num_bytes(group->order);
+ len = BN_num_bytes(group->field);
+ if (len > param_len)
+ param_len = len;
+
+ /* Allocate space to store the padded data for (p, a, b, x, y, order) */
+ param_bytes = OPENSSL_malloc(param_len * NUM_BN_FIELDS);
+ if (param_bytes == NULL)
+ goto end;
+
+ /* Create the bignums */
+ for (i = 0; i < NUM_BN_FIELDS; ++i) {
+ if ((bn[i] = BN_CTX_get(ctx)) == NULL)
+ goto end;
+ }
+ /*
+ * Fill in the bn array with the same values as the internal curves
+ * i.e. the values are p, a, b, x, y, order.
+ */
+ /* Get p, a & b */
+ if (!(EC_GROUP_get_curve(group, bn[0], bn[1], bn[2], ctx)
+ && ((generator = EC_GROUP_get0_generator(group)) != NULL)
+ /* Get x & y */
+ && EC_POINT_get_affine_coordinates(group, generator, bn[3], bn[4], ctx)
+ /* Get order */
+ && EC_GROUP_get_order(group, bn[5], ctx)))
+ goto end;
+
+ /*
+ * Convert the bignum array to bytes that are joined together to form
+ * a single buffer that contains data for all fields.
+ * (p, a, b, x, y, order) are all zero padded to be the same size.
+ */
+ for (i = 0; i < NUM_BN_FIELDS; ++i) {
+ if (BN_bn2binpad(bn[i], &param_bytes[i*param_len], param_len) <= 0)
+ goto end;
+ }
+
+ for (i = 0; i < curve_list_length; i++) {
+ const ec_list_element curve = curve_list[i];
+
+ data = curve.data;
+ /* Get the raw order byte data */
+ params_seed = (const unsigned char *)(data + 1); /* skip header */
+ params = params_seed + data->seed_len;
+
+ /* Look for unique fields in the fixed curve data */
+ if (data->field_type == field_type
+ && param_len == data->param_len
+ && (nid <= 0 || nid == curve.nid)
+ /* check the optional cofactor (ignore if its zero) */
+ && (BN_is_zero(cofactor)
+ || BN_is_word(cofactor, (const BN_ULONG)curve.data->cofactor))
+ /* Check the optional seed (ignore if its not set) */
+ && (data->seed_len == 0 || seed_len == 0
+ || ((size_t)data->seed_len == seed_len
+ && memcmp(params_seed, seed, seed_len) == 0))
+ /* Check that the groups params match the built-in curve params */
+ && memcmp(param_bytes, params, param_len * NUM_BN_FIELDS)
+ == 0) {
+ ret = curve.nid;
+ goto end;
+ }
+ }
+ /* Gets here if the group was not found */
+ ret = NID_undef;
+end:
+ OPENSSL_free(param_bytes);
+ BN_CTX_end(ctx);
+ return ret;
+}
diff --git a/crypto/openssl/crypto/ec/ec_lcl.h b/crypto/openssl/crypto/ec/ec_lcl.h
index 119255f1dc83..fbdb04ea3a04 100644
--- a/crypto/openssl/crypto/ec/ec_lcl.h
+++ b/crypto/openssl/crypto/ec/ec_lcl.h
@@ -154,7 +154,7 @@ struct ec_method_st {
int (*field_div) (const EC_GROUP *, BIGNUM *r, const BIGNUM *a,
const BIGNUM *b, BN_CTX *);
/*-
- * 'field_inv' computes the multipicative inverse of a in the field,
+ * 'field_inv' computes the multiplicative inverse of a in the field,
* storing the result in r.
*
* If 'a' is zero (or equivalent), you'll get an EC_R_CANNOT_INVERT error.
@@ -595,6 +595,8 @@ int ec_key_simple_generate_key(EC_KEY *eckey);
int ec_key_simple_generate_public_key(EC_KEY *eckey);
int ec_key_simple_check_key(const EC_KEY *eckey);
+int ec_curve_nid_from_params(const EC_GROUP *group, BN_CTX *ctx);
+
/* EC_METHOD definitions */
struct ec_key_method_st {
diff --git a/crypto/openssl/crypto/ec/ec_lib.c b/crypto/openssl/crypto/ec/ec_lib.c
index 8cab5a5061cf..1289c8608edd 100644
--- a/crypto/openssl/crypto/ec/ec_lib.c
+++ b/crypto/openssl/crypto/ec/ec_lib.c
@@ -265,6 +265,67 @@ int EC_METHOD_get_field_type(const EC_METHOD *meth)
static int ec_precompute_mont_data(EC_GROUP *);
+/*-
+ * Try computing cofactor from the generator order (n) and field cardinality (q).
+ * This works for all curves of cryptographic interest.
+ *
+ * Hasse thm: q + 1 - 2*sqrt(q) <= n*h <= q + 1 + 2*sqrt(q)
+ * h_min = (q + 1 - 2*sqrt(q))/n
+ * h_max = (q + 1 + 2*sqrt(q))/n
+ * h_max - h_min = 4*sqrt(q)/n
+ * So if n > 4*sqrt(q) holds, there is only one possible value for h:
+ * h = \lfloor (h_min + h_max)/2 \rceil = \lfloor (q + 1)/n \rceil
+ *
+ * Otherwise, zero cofactor and return success.
+ */
+static int ec_guess_cofactor(EC_GROUP *group) {
+ int ret = 0;
+ BN_CTX *ctx = NULL;
+ BIGNUM *q = NULL;
+
+ /*-
+ * If the cofactor is too large, we cannot guess it.
+ * The RHS of below is a strict overestimate of lg(4 * sqrt(q))
+ */
+ if (BN_num_bits(group->order) <= (BN_num_bits(group->field) + 1) / 2 + 3) {
+ /* default to 0 */
+ BN_zero(group->cofactor);
+ /* return success */
+ return 1;
+ }
+
+ if ((ctx = BN_CTX_new()) == NULL)
+ return 0;
+
+ BN_CTX_start(ctx);
+ if ((q = BN_CTX_get(ctx)) == NULL)
+ goto err;
+
+ /* set q = 2**m for binary fields; q = p otherwise */
+ if (group->meth->field_type == NID_X9_62_characteristic_two_field) {
+ BN_zero(q);
+ if (!BN_set_bit(q, BN_num_bits(group->field) - 1))
+ goto err;
+ } else {
+ if (!BN_copy(q, group->field))
+ goto err;
+ }
+
+ /* compute h = \lfloor (q + 1)/n \rceil = \lfloor (q + 1 + n/2)/n \rfloor */
+ if (!BN_rshift1(group->cofactor, group->order) /* n/2 */
+ || !BN_add(group->cofactor, group->cofactor, q) /* q + n/2 */
+ /* q + 1 + n/2 */
+ || !BN_add(group->cofactor, group->cofactor, BN_value_one())
+ /* (q + 1 + n/2)/n */
+ || !BN_div(group->cofactor, NULL, group->cofactor, group->order, ctx))
+ goto err;
+ ret = 1;
+ err:
+ BN_CTX_end(ctx);
+ BN_CTX_free(ctx);
+ return ret;
+}
+
int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator,
const BIGNUM *order, const BIGNUM *cofactor)
{
@@ -273,6 +334,34 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator,
return 0;
}
+ /* require group->field >= 1 */
+ if (group->field == NULL || BN_is_zero(group->field)
+ || BN_is_negative(group->field)) {
+ ECerr(EC_F_EC_GROUP_SET_GENERATOR, EC_R_INVALID_FIELD);
+ return 0;
+ }
+
+ /*-
+ * - require order >= 1
+ * - enforce upper bound due to Hasse thm: order can be no more than one bit
+ * longer than field cardinality
+ */
+ if (order == NULL || BN_is_zero(order) || BN_is_negative(order)
+ || BN_num_bits(order) > BN_num_bits(group->field) + 1) {
+ ECerr(EC_F_EC_GROUP_SET_GENERATOR, EC_R_INVALID_GROUP_ORDER);
+ return 0;
+ }
+
+ /*-
+ * Unfortunately the cofactor is an optional field in many standards.
+ * Internally, the lib uses 0 cofactor as a marker for "unknown cofactor".
+ * So accept cofactor == NULL or cofactor >= 0.
+ */
+ if (cofactor != NULL && BN_is_negative(cofactor)) {
+ ECerr(EC_F_EC_GROUP_SET_GENERATOR, EC_R_UNKNOWN_COFACTOR);
+ return 0;
+ }
+
if (group->generator == NULL) {
group->generator = EC_POINT_new(group);
if (group->generator == NULL)
@@ -281,17 +370,17 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator,
if (!EC_POINT_copy(group->generator, generator))
return 0;
- if (order != NULL) {
- if (!BN_copy(group->order, order))
- return 0;
- } else
- BN_zero(group->order);
+ if (!BN_copy(group->order, order))
+ return 0;
- if (cofactor != NULL) {
+ /* Either take the provided positive cofactor, or try to compute it */
+ if (cofactor != NULL && !BN_is_zero(cofactor)) {
if (!BN_copy(group->cofactor, cofactor))
return 0;
- } else
+ } else if (!ec_guess_cofactor(group)) {
BN_zero(group->cofactor);
+ return 0;
+ }
/*
* Some groups have an order with
diff --git a/crypto/openssl/crypto/ec/ecdh_ossl.c b/crypto/openssl/crypto/ec/ecdh_ossl.c
index 5608c62b2ac9..ab51ee7138ff 100644
--- a/crypto/openssl/crypto/ec/ecdh_ossl.c
+++ b/crypto/openssl/crypto/ec/ecdh_ossl.c
@@ -58,7 +58,7 @@ int ecdh_simple_compute_key(unsigned char **pout, size_t *poutlen,
priv_key = EC_KEY_get0_private_key(ecdh);
if (priv_key == NULL) {
- ECerr(EC_F_ECDH_SIMPLE_COMPUTE_KEY, EC_R_NO_PRIVATE_VALUE);
+ ECerr(EC_F_ECDH_SIMPLE_COMPUTE_KEY, EC_R_MISSING_PRIVATE_KEY);
goto err;
}
diff --git a/crypto/openssl/crypto/ec/ecdsa_ossl.c b/crypto/openssl/crypto/ec/ecdsa_ossl.c
index e35c7600d866..c35ed2dcd0e7 100644
--- a/crypto/openssl/crypto/ec/ecdsa_ossl.c
+++ b/crypto/openssl/crypto/ec/ecdsa_ossl.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2002-2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -41,11 +41,16 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in,
const EC_GROUP *group;
int ret = 0;
int order_bits;
+ const BIGNUM *priv_key;
if (eckey == NULL || (group = EC_KEY_get0_group(eckey)) == NULL) {
ECerr(EC_F_ECDSA_SIGN_SETUP, ERR_R_PASSED_NULL_PARAMETER);
return 0;
}
+ if ((priv_key = EC_KEY_get0_private_key(eckey)) == NULL) {
+ ECerr(EC_F_ECDSA_SIGN_SETUP, EC_R_MISSING_PRIVATE_KEY);
+ return 0;
+ }
if (!EC_KEY_can_sign(eckey)) {
ECerr(EC_F_ECDSA_SIGN_SETUP, EC_R_CURVE_DOES_NOT_SUPPORT_SIGNING);
@@ -83,8 +88,7 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in,
/* get random k */
do {
if (dgst != NULL) {
- if (!BN_generate_dsa_nonce(k, order,
- EC_KEY_get0_private_key(eckey),
+ if (!BN_generate_dsa_nonce(k, order, priv_key,
dgst, dlen, ctx)) {
ECerr(EC_F_ECDSA_SIGN_SETUP,
EC_R_RANDOM_NUMBER_GENERATION_FAILED);
@@ -162,10 +166,14 @@ ECDSA_SIG *ossl_ecdsa_sign_sig(const unsigned char *dgst, int dgst_len,
group = EC_KEY_get0_group(eckey);
priv_key = EC_KEY_get0_private_key(eckey);
- if (group == NULL || priv_key == NULL) {
+ if (group == NULL) {
ECerr(EC_F_OSSL_ECDSA_SIGN_SIG, ERR_R_PASSED_NULL_PARAMETER);
return NULL;
}
+ if (priv_key == NULL) {
+ ECerr(EC_F_OSSL_ECDSA_SIGN_SIG, EC_R_MISSING_PRIVATE_KEY);
+ return NULL;
+ }
if (!EC_KEY_can_sign(eckey)) {
ECerr(EC_F_OSSL_ECDSA_SIGN_SIG, EC_R_CURVE_DOES_NOT_SUPPORT_SIGNING);
diff --git a/crypto/openssl/crypto/ec/ecp_nistp224.c b/crypto/openssl/crypto/ec/ecp_nistp224.c
index 025273a14440..fbbdb9d9386c 100644
--- a/crypto/openssl/crypto/ec/ecp_nistp224.c
+++ b/crypto/openssl/crypto/ec/ecp_nistp224.c
@@ -324,34 +324,21 @@ static void felem_to_bin28(u8 out[28], const felem in)
}
}
-/* To preserve endianness when using BN_bn2bin and BN_bin2bn */
-static void flip_endian(u8 *out, const u8 *in, unsigned len)
-{
- unsigned i;
- for (i = 0; i < len; ++i)
- out[i] = in[len - 1 - i];
-}
-
/* From OpenSSL BIGNUM to internal representation */
static int BN_to_felem(felem out, const BIGNUM *bn)
{
- felem_bytearray b_in;
felem_bytearray b_out;
- unsigned num_bytes;
+ int num_bytes;
- /* BN_bn2bin eats leading zeroes */
- memset(b_out, 0, sizeof(b_out));