aboutsummaryrefslogtreecommitdiffstats
path: root/secure
diff options
context:
space:
mode:
authorJung-uk Kim <jkim@FreeBSD.org>2012-07-12 19:30:53 +0000
committerJung-uk Kim <jkim@FreeBSD.org>2012-07-12 19:30:53 +0000
commit1f13597d10e771d5546d31839150812bde8e4a56 (patch)
tree9739a803477c4610ec6628d914bdcc55fbb3602a /secure
parentbc52deb7a99deda0fc81a7914fa4bbb7e9be113a (diff)
parent0758ab5ea778e4ba36d2150af1bba602a48d6467 (diff)
downloadsrc-1f13597d10e771d5546d31839150812bde8e4a56.tar.gz
src-1f13597d10e771d5546d31839150812bde8e4a56.zip
Merge OpenSSL 1.0.1c.
Approved by: benl (maintainer)
Notes
Notes: svn path=/head/; revision=238405
Diffstat (limited to 'secure')
-rw-r--r--secure/lib/libcrypto/Makefile368
-rw-r--r--secure/lib/libcrypto/Makefile.asm120
-rw-r--r--secure/lib/libcrypto/Makefile.inc19
-rw-r--r--secure/lib/libcrypto/Makefile.man135
-rw-r--r--secure/lib/libcrypto/amd64/aes-x86_64.S2542
-rw-r--r--secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S1397
-rw-r--r--secure/lib/libcrypto/amd64/aesni-x86_64.S2536
-rw-r--r--secure/lib/libcrypto/amd64/bsaes-x86_64.S2562
-rw-r--r--secure/lib/libcrypto/amd64/cmll-x86_64.S1839
-rw-r--r--secure/lib/libcrypto/amd64/ghash-x86_64.S1027
-rw-r--r--secure/lib/libcrypto/amd64/md5-x86_64.S669
-rw-r--r--secure/lib/libcrypto/amd64/modexp512-x86_64.S1774
-rw-r--r--secure/lib/libcrypto/amd64/rc4-md5-x86_64.S1260
-rw-r--r--secure/lib/libcrypto/amd64/rc4-x86_64.S616
-rw-r--r--secure/lib/libcrypto/amd64/sha1-x86_64.S2487
-rw-r--r--secure/lib/libcrypto/amd64/sha256-x86_64.S1779
-rw-r--r--secure/lib/libcrypto/amd64/sha512-x86_64.S1803
-rw-r--r--secure/lib/libcrypto/amd64/vpaes-x86_64.S829
-rw-r--r--secure/lib/libcrypto/amd64/wp-x86_64.S859
-rw-r--r--secure/lib/libcrypto/amd64/x86_64-gf2m.S292
-rw-r--r--secure/lib/libcrypto/amd64/x86_64-mont.S1375
-rw-r--r--secure/lib/libcrypto/amd64/x86_64-mont5.S785
-rw-r--r--secure/lib/libcrypto/amd64/x86_64cpuid.S235
-rw-r--r--secure/lib/libcrypto/engines/Makefile2
-rw-r--r--secure/lib/libcrypto/engines/Makefile.inc2
-rw-r--r--secure/lib/libcrypto/engines/libgost/Makefile8
-rw-r--r--secure/lib/libcrypto/i386/aes-586.s3237
-rw-r--r--secure/lib/libcrypto/i386/aesni-x86.s2144
-rw-r--r--secure/lib/libcrypto/i386/bf-586.s1266
-rw-r--r--secure/lib/libcrypto/i386/bf-686.s1200
-rw-r--r--secure/lib/libcrypto/i386/bn-586.s2078
-rw-r--r--secure/lib/libcrypto/i386/cast-586.s1690
-rw-r--r--secure/lib/libcrypto/i386/cmll-x86.s2376
-rw-r--r--secure/lib/libcrypto/i386/co-586.s1825
-rw-r--r--secure/lib/libcrypto/i386/crypt586.s1717
-rw-r--r--secure/lib/libcrypto/i386/des-586.s4334
-rw-r--r--secure/lib/libcrypto/i386/ghash-x86.s1270
-rw-r--r--secure/lib/libcrypto/i386/md5-586.s1141
-rw-r--r--secure/lib/libcrypto/i386/rc4-586.s665
-rw-r--r--secure/lib/libcrypto/i386/rc5-586.s958
-rw-r--r--secure/lib/libcrypto/i386/rmd-586.s3717
-rw-r--r--secure/lib/libcrypto/i386/sha1-586.s4080
-rw-r--r--secure/lib/libcrypto/i386/sha256-586.s259
-rw-r--r--secure/lib/libcrypto/i386/sha512-586.s836
-rw-r--r--secure/lib/libcrypto/i386/vpaes-x86.s662
-rw-r--r--secure/lib/libcrypto/i386/wp-mmx.s1106
-rw-r--r--secure/lib/libcrypto/i386/x86-gf2m.s344
-rw-r--r--secure/lib/libcrypto/i386/x86-mont.s457
-rw-r--r--secure/lib/libcrypto/i386/x86cpuid.s333
-rw-r--r--secure/lib/libcrypto/man/ASN1_OBJECT_new.32
-rw-r--r--secure/lib/libcrypto/man/ASN1_STRING_length.32
-rw-r--r--secure/lib/libcrypto/man/ASN1_STRING_new.32
-rw-r--r--secure/lib/libcrypto/man/ASN1_STRING_print_ex.32
-rw-r--r--secure/lib/libcrypto/man/ASN1_generate_nconf.36
-rw-r--r--secure/lib/libcrypto/man/BIO_ctrl.32
-rw-r--r--secure/lib/libcrypto/man/BIO_f_base64.32
-rw-r--r--secure/lib/libcrypto/man/BIO_f_buffer.32
-rw-r--r--secure/lib/libcrypto/man/BIO_f_cipher.32
-rw-r--r--secure/lib/libcrypto/man/BIO_f_md.38
-rw-r--r--secure/lib/libcrypto/man/BIO_f_null.32
-rw-r--r--secure/lib/libcrypto/man/BIO_f_ssl.310
-rw-r--r--secure/lib/libcrypto/man/BIO_find_type.32
-rw-r--r--secure/lib/libcrypto/man/BIO_new.32
-rw-r--r--secure/lib/libcrypto/man/BIO_new_CMS.3189
-rw-r--r--secure/lib/libcrypto/man/BIO_push.32
-rw-r--r--secure/lib/libcrypto/man/BIO_read.32
-rw-r--r--secure/lib/libcrypto/man/BIO_s_accept.32
-rw-r--r--secure/lib/libcrypto/man/BIO_s_bio.32
-rw-r--r--secure/lib/libcrypto/man/BIO_s_connect.32
-rw-r--r--secure/lib/libcrypto/man/BIO_s_fd.32
-rw-r--r--secure/lib/libcrypto/man/BIO_s_file.36
-rw-r--r--secure/lib/libcrypto/man/BIO_s_mem.34
-rw-r--r--secure/lib/libcrypto/man/BIO_s_null.32
-rw-r--r--secure/lib/libcrypto/man/BIO_s_socket.32
-rw-r--r--secure/lib/libcrypto/man/BIO_set_callback.32
-rw-r--r--secure/lib/libcrypto/man/BIO_should_retry.32
-rw-r--r--secure/lib/libcrypto/man/BN_BLINDING_new.322
-rw-r--r--secure/lib/libcrypto/man/BN_CTX_new.32
-rw-r--r--secure/lib/libcrypto/man/BN_CTX_start.32
-rw-r--r--secure/lib/libcrypto/man/BN_add.32
-rw-r--r--secure/lib/libcrypto/man/BN_add_word.32
-rw-r--r--secure/lib/libcrypto/man/BN_bn2bin.32
-rw-r--r--secure/lib/libcrypto/man/BN_cmp.32
-rw-r--r--secure/lib/libcrypto/man/BN_copy.32
-rw-r--r--secure/lib/libcrypto/man/BN_generate_prime.32
-rw-r--r--secure/lib/libcrypto/man/BN_mod_inverse.32
-rw-r--r--secure/lib/libcrypto/man/BN_mod_mul_montgomery.32
-rw-r--r--secure/lib/libcrypto/man/BN_mod_mul_reciprocal.32
-rw-r--r--secure/lib/libcrypto/man/BN_new.32
-rw-r--r--secure/lib/libcrypto/man/BN_num_bytes.32
-rw-r--r--secure/lib/libcrypto/man/BN_rand.32
-rw-r--r--secure/lib/libcrypto/man/BN_set_bit.32
-rw-r--r--secure/lib/libcrypto/man/BN_swap.32
-rw-r--r--secure/lib/libcrypto/man/BN_zero.32
-rw-r--r--secure/lib/libcrypto/man/CMS_add0_cert.3189
-rw-r--r--secure/lib/libcrypto/man/CMS_add1_recipient_cert.3186
-rw-r--r--secure/lib/libcrypto/man/CMS_compress.3194
-rw-r--r--secure/lib/libcrypto/man/CMS_decrypt.3188
-rw-r--r--secure/lib/libcrypto/man/CMS_encrypt.3219
-rw-r--r--secure/lib/libcrypto/man/CMS_final.3165
-rw-r--r--secure/lib/libcrypto/man/CMS_get0_RecipientInfos.3230
-rw-r--r--secure/lib/libcrypto/man/CMS_get0_SignerInfos.3199
-rw-r--r--secure/lib/libcrypto/man/CMS_get0_type.3188
-rw-r--r--secure/lib/libcrypto/man/CMS_get1_ReceiptRequest.3193
-rw-r--r--secure/lib/libcrypto/man/CMS_sign.3244
-rw-r--r--secure/lib/libcrypto/man/CMS_sign_add1_signer.3224
-rw-r--r--secure/lib/libcrypto/man/CMS_sign_receipt.3169
-rw-r--r--secure/lib/libcrypto/man/CMS_uncompress.3177
-rw-r--r--secure/lib/libcrypto/man/CMS_verify.3248
-rw-r--r--secure/lib/libcrypto/man/CMS_verify_receipt.3171
-rw-r--r--secure/lib/libcrypto/man/CONF_modules_free.32
-rw-r--r--secure/lib/libcrypto/man/CONF_modules_load_file.32
-rw-r--r--secure/lib/libcrypto/man/CRYPTO_set_ex_data.32
-rw-r--r--secure/lib/libcrypto/man/DH_generate_key.32
-rw-r--r--secure/lib/libcrypto/man/DH_generate_parameters.32
-rw-r--r--secure/lib/libcrypto/man/DH_get_ex_new_index.32
-rw-r--r--secure/lib/libcrypto/man/DH_new.32
-rw-r--r--secure/lib/libcrypto/man/DH_set_method.32
-rw-r--r--secure/lib/libcrypto/man/DH_size.32
-rw-r--r--secure/lib/libcrypto/man/DSA_SIG_new.32
-rw-r--r--secure/lib/libcrypto/man/DSA_do_sign.32
-rw-r--r--secure/lib/libcrypto/man/DSA_dup_DH.32
-rw-r--r--secure/lib/libcrypto/man/DSA_generate_key.32
-rw-r--r--secure/lib/libcrypto/man/DSA_generate_parameters.32
-rw-r--r--secure/lib/libcrypto/man/DSA_get_ex_new_index.34
-rw-r--r--secure/lib/libcrypto/man/DSA_new.32
-rw-r--r--secure/lib/libcrypto/man/DSA_set_method.32
-rw-r--r--secure/lib/libcrypto/man/DSA_sign.32
-rw-r--r--secure/lib/libcrypto/man/DSA_size.32
-rw-r--r--secure/lib/libcrypto/man/ERR_GET_LIB.32
-rw-r--r--secure/lib/libcrypto/man/ERR_clear_error.32
-rw-r--r--secure/lib/libcrypto/man/ERR_error_string.32
-rw-r--r--secure/lib/libcrypto/man/ERR_get_error.32
-rw-r--r--secure/lib/libcrypto/man/ERR_load_crypto_strings.32
-rw-r--r--secure/lib/libcrypto/man/ERR_load_strings.32
-rw-r--r--secure/lib/libcrypto/man/ERR_print_errors.32
-rw-r--r--secure/lib/libcrypto/man/ERR_put_error.32
-rw-r--r--secure/lib/libcrypto/man/ERR_remove_state.32
-rw-r--r--secure/lib/libcrypto/man/ERR_set_mark.32
-rw-r--r--secure/lib/libcrypto/man/EVP_BytesToKey.32
-rw-r--r--secure/lib/libcrypto/man/EVP_DigestInit.387
-rw-r--r--secure/lib/libcrypto/man/EVP_DigestSignInit.3209
-rw-r--r--secure/lib/libcrypto/man/EVP_DigestVerifyInit.3204
-rw-r--r--secure/lib/libcrypto/man/EVP_EncryptInit.32
-rw-r--r--secure/lib/libcrypto/man/EVP_OpenInit.32
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_ctrl.3251
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_CTX_new.3174
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_cmp.3184
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_decrypt.3216
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_derive.3216
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_encrypt.3216
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_get_default_digest.3163
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_keygen.3288
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_new.32
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_print_private.3175
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_set1_RSA.32
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_sign.3218
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_verify.3214
-rw-r--r--secure/lib/libcrypto/man/EVP_PKEY_verifyrecover.3226
-rw-r--r--secure/lib/libcrypto/man/EVP_SealInit.32
-rw-r--r--secure/lib/libcrypto/man/EVP_SignInit.311
-rw-r--r--secure/lib/libcrypto/man/EVP_VerifyInit.311
-rw-r--r--secure/lib/libcrypto/man/OBJ_nid2obj.32
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_Applink.32
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_VERSION_NUMBER.32
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_config.32
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_ia32cap.32
-rw-r--r--secure/lib/libcrypto/man/OPENSSL_load_builtin_modules.32
-rw-r--r--secure/lib/libcrypto/man/OpenSSL_add_all_algorithms.32
-rw-r--r--secure/lib/libcrypto/man/PEM_write_bio_CMS_stream.3165
-rw-r--r--secure/lib/libcrypto/man/PEM_write_bio_PKCS7_stream.3163
-rw-r--r--secure/lib/libcrypto/man/PKCS12_create.32
-rw-r--r--secure/lib/libcrypto/man/PKCS12_parse.32
-rw-r--r--secure/lib/libcrypto/man/PKCS7_decrypt.32
-rw-r--r--secure/lib/libcrypto/man/PKCS7_encrypt.357
-rw-r--r--secure/lib/libcrypto/man/PKCS7_sign.3115
-rw-r--r--secure/lib/libcrypto/man/PKCS7_sign_add_signer.3206
-rw-r--r--secure/lib/libcrypto/man/PKCS7_verify.32
-rw-r--r--secure/lib/libcrypto/man/RAND_add.32
-rw-r--r--secure/lib/libcrypto/man/RAND_bytes.32
-rw-r--r--secure/lib/libcrypto/man/RAND_cleanup.32
-rw-r--r--secure/lib/libcrypto/man/RAND_egd.32
-rw-r--r--secure/lib/libcrypto/man/RAND_load_file.32
-rw-r--r--secure/lib/libcrypto/man/RAND_set_rand_method.32
-rw-r--r--secure/lib/libcrypto/man/RSA_blinding_on.32
-rw-r--r--secure/lib/libcrypto/man/RSA_check_key.32
-rw-r--r--secure/lib/libcrypto/man/RSA_generate_key.32
-rw-r--r--secure/lib/libcrypto/man/RSA_get_ex_new_index.32
-rw-r--r--secure/lib/libcrypto/man/RSA_new.32
-rw-r--r--secure/lib/libcrypto/man/RSA_padding_add_PKCS1_type_1.32
-rw-r--r--secure/lib/libcrypto/man/RSA_print.32
-rw-r--r--secure/lib/libcrypto/man/RSA_private_encrypt.32
-rw-r--r--secure/lib/libcrypto/man/RSA_public_encrypt.32
-rw-r--r--secure/lib/libcrypto/man/RSA_set_method.32
-rw-r--r--secure/lib/libcrypto/man/RSA_sign.32
-rw-r--r--secure/lib/libcrypto/man/RSA_sign_ASN1_OCTET_STRING.32
-rw-r--r--secure/lib/libcrypto/man/RSA_size.32
-rw-r--r--secure/lib/libcrypto/man/SMIME_read_CMS.3195
-rw-r--r--secure/lib/libcrypto/man/SMIME_read_PKCS7.32
-rw-r--r--secure/lib/libcrypto/man/SMIME_write_CMS.3187
-rw-r--r--secure/lib/libcrypto/man/SMIME_write_PKCS7.322
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_ENTRY_get_object.32
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_add_entry_by_txt.32
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_get_index_by_NID.32
-rw-r--r--secure/lib/libcrypto/man/X509_NAME_print_ex.32
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_CTX_get_error.3385
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_CTX_get_ex_new_index.3164
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_CTX_new.3247
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_CTX_set_verify_cb.3289
-rw-r--r--secure/lib/libcrypto/man/X509_STORE_set_verify_cb_func.3175
-rw-r--r--secure/lib/libcrypto/man/X509_VERIFY_PARAM_set_flags.3292
-rw-r--r--secure/lib/libcrypto/man/X509_new.32
-rw-r--r--secure/lib/libcrypto/man/X509_verify_cert.3174
-rw-r--r--secure/lib/libcrypto/man/bio.32
-rw-r--r--secure/lib/libcrypto/man/blowfish.32
-rw-r--r--secure/lib/libcrypto/man/bn.32
-rw-r--r--secure/lib/libcrypto/man/bn_internal.32
-rw-r--r--secure/lib/libcrypto/man/buffer.32
-rw-r--r--secure/lib/libcrypto/man/crypto.32
-rw-r--r--secure/lib/libcrypto/man/d2i_ASN1_OBJECT.32
-rw-r--r--secure/lib/libcrypto/man/d2i_DHparams.32
-rw-r--r--secure/lib/libcrypto/man/d2i_DSAPublicKey.32
-rw-r--r--secure/lib/libcrypto/man/d2i_PKCS8PrivateKey.32
-rw-r--r--secure/lib/libcrypto/man/d2i_RSAPublicKey.310
-rw-r--r--secure/lib/libcrypto/man/d2i_X509.32
-rw-r--r--secure/lib/libcrypto/man/d2i_X509_ALGOR.32
-rw-r--r--secure/lib/libcrypto/man/d2i_X509_CRL.32
-rw-r--r--secure/lib/libcrypto/man/d2i_X509_NAME.32
-rw-r--r--secure/lib/libcrypto/man/d2i_X509_REQ.32
-rw-r--r--secure/lib/libcrypto/man/d2i_X509_SIG.32
-rw-r--r--secure/lib/libcrypto/man/des.32
-rw-r--r--secure/lib/libcrypto/man/des_modes.3290
-rw-r--r--secure/lib/libcrypto/man/dh.32
-rw-r--r--secure/lib/libcrypto/man/dsa.32
-rw-r--r--secure/lib/libcrypto/man/ecdsa.34
-rw-r--r--secure/lib/libcrypto/man/engine.32
-rw-r--r--secure/lib/libcrypto/man/err.32
-rw-r--r--secure/lib/libcrypto/man/evp.324
-rw-r--r--secure/lib/libcrypto/man/hmac.322
-rw-r--r--secure/lib/libcrypto/man/i2d_CMS_bio_stream.3167
-rw-r--r--secure/lib/libcrypto/man/i2d_PKCS7_bio_stream.3165
-rw-r--r--secure/lib/libcrypto/man/lh_stats.32
-rw-r--r--secure/lib/libcrypto/man/lhash.3242
-rw-r--r--secure/lib/libcrypto/man/md5.32
-rw-r--r--secure/lib/libcrypto/man/mdc2.32
-rw-r--r--secure/lib/libcrypto/man/pem.32
-rw-r--r--secure/lib/libcrypto/man/rand.32
-rw-r--r--secure/lib/libcrypto/man/rc4.32
-rw-r--r--secure/lib/libcrypto/man/ripemd.32
-rw-r--r--secure/lib/libcrypto/man/rsa.32
-rw-r--r--secure/lib/libcrypto/man/sha.32
-rw-r--r--secure/lib/libcrypto/man/threads.385
-rw-r--r--secure/lib/libcrypto/man/ui.32
-rw-r--r--secure/lib/libcrypto/man/ui_compat.32
-rw-r--r--secure/lib/libcrypto/man/x509.32
-rw-r--r--secure/lib/libcrypto/opensslconf-arm.h53
-rw-r--r--secure/lib/libcrypto/opensslconf-i386.h213
-rw-r--r--secure/lib/libcrypto/opensslconf-ia64.h57
-rw-r--r--secure/lib/libcrypto/opensslconf-mips.h57
-rw-r--r--secure/lib/libcrypto/opensslconf-powerpc.h57
-rw-r--r--secure/lib/libcrypto/opensslconf-sparc64.h57
-rw-r--r--secure/lib/libcrypto/opensslconf-x86.h (renamed from secure/lib/libcrypto/opensslconf-amd64.h)78
-rw-r--r--secure/lib/libssl/Makefile19
-rw-r--r--secure/lib/libssl/Makefile.man8
-rw-r--r--secure/lib/libssl/man/SSL_CIPHER_get_name.32
-rw-r--r--secure/lib/libssl/man/SSL_COMP_add_compression_method.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_add_extra_chain_cert.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_add_session.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_ctrl.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_flush_sessions.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_free.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_get_ex_new_index.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_get_verify_mode.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_load_verify_locations.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_new.34
-rw-r--r--secure/lib/libssl/man/SSL_CTX_sess_number.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_sess_set_cache_size.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_sess_set_get_cb.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_sessions.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_cert_store.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_cert_verify_callback.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_cipher_list.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_client_CA_list.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_client_cert_cb.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_default_passwd_cb.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_generate_session_id.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_info_callback.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_max_cert_list.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_mode.311
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_msg_callback.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_options.35
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_psk_client_callback.3175
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_quiet_shutdown.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_session_cache_mode.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_session_id_context.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_ssl_version.38
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_timeout.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_tmp_dh_callback.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_tmp_rsa_callback.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_set_verify.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_use_certificate.32
-rw-r--r--secure/lib/libssl/man/SSL_CTX_use_psk_identity_hint.3198
-rw-r--r--secure/lib/libssl/man/SSL_SESSION_free.32
-rw-r--r--secure/lib/libssl/man/SSL_SESSION_get_ex_new_index.32
-rw-r--r--secure/lib/libssl/man/SSL_SESSION_get_time.32
-rw-r--r--secure/lib/libssl/man/SSL_accept.32
-rw-r--r--secure/lib/libssl/man/SSL_alert_type_string.37
-rw-r--r--secure/lib/libssl/man/SSL_clear.32
-rw-r--r--secure/lib/libssl/man/SSL_connect.32
-rw-r--r--secure/lib/libssl/man/SSL_do_handshake.32
-rw-r--r--secure/lib/libssl/man/SSL_free.32
-rw-r--r--secure/lib/libssl/man/SSL_get_SSL_CTX.32
-rw-r--r--secure/lib/libssl/man/SSL_get_ciphers.32
-rw-r--r--secure/lib/libssl/man/SSL_get_client_CA_list.32
-rw-r--r--secure/lib/libssl/man/SSL_get_current_cipher.32
-rw-r--r--secure/lib/libssl/man/SSL_get_default_timeout.32
-rw-r--r--secure/lib/libssl/man/SSL_get_error.32
-rw-r--r--secure/lib/libssl/man/SSL_get_ex_data_X509_STORE_CTX_idx.32
-rw-r--r--secure/lib/libssl/man/SSL_get_ex_new_index.32
-rw-r--r--secure/lib/libssl/man/SSL_get_fd.32
-rw-r--r--secure/lib/libssl/man/SSL_get_peer_cert_chain.32
-rw-r--r--secure/lib/libssl/man/SSL_get_peer_certificate.32
-rw-r--r--secure/lib/libssl/man/SSL_get_psk_identity.3156
-rw-r--r--secure/lib/libssl/man/SSL_get_rbio.32
-rw-r--r--secure/lib/libssl/man/SSL_get_session.32
-rw-r--r--secure/lib/libssl/man/SSL_get_verify_result.32
-rw-r--r--secure/lib/libssl/man/SSL_get_version.32
-rw-r--r--secure/lib/libssl/man/SSL_library_init.33
-rw-r--r--secure/lib/libssl/man/SSL_load_client_CA_file.32
-rw-r--r--secure/lib/libssl/man/SSL_new.32
-rw-r--r--secure/lib/libssl/man/SSL_pending.32
-rw-r--r--secure/lib/libssl/man/SSL_read.32
-rw-r--r--secure/lib/libssl/man/SSL_rstate_string.32
-rw-r--r--secure/lib/libssl/man/SSL_session_reused.32
-rw-r--r--secure/lib/libssl/man/SSL_set_bio.32
-rw-r--r--secure/lib/libssl/man/SSL_set_connect_state.32
-rw-r--r--secure/lib/libssl/man/SSL_set_fd.32
-rw-r--r--secure/lib/libssl/man/SSL_set_session.32
-rw-r--r--secure/lib/libssl/man/SSL_set_shutdown.32
-rw-r--r--secure/lib/libssl/man/SSL_set_verify_result.32
-rw-r--r--secure/lib/libssl/man/SSL_shutdown.32
-rw-r--r--secure/lib/libssl/man/SSL_state_string.32
-rw-r--r--secure/lib/libssl/man/SSL_want.32
-rw-r--r--secure/lib/libssl/man/SSL_write.32
-rw-r--r--secure/lib/libssl/man/d2i_SSL_SESSION.32
-rw-r--r--secure/lib/libssl/man/ssl.375
-rw-r--r--secure/usr.bin/openssl/Makefile14
-rw-r--r--secure/usr.bin/openssl/Makefile.man7
-rw-r--r--secure/usr.bin/openssl/man/CA.pl.12
-rw-r--r--secure/usr.bin/openssl/man/asn1parse.115
-rw-r--r--secure/usr.bin/openssl/man/ca.114
-rw-r--r--secure/usr.bin/openssl/man/ciphers.153
-rw-r--r--secure/usr.bin/openssl/man/cms.1677
-rw-r--r--secure/usr.bin/openssl/man/config.1282
-rw-r--r--secure/usr.bin/openssl/man/crl.12
-rw-r--r--secure/usr.bin/openssl/man/crl2pkcs7.12
-rw-r--r--secure/usr.bin/openssl/man/dgst.141
-rw-r--r--secure/usr.bin/openssl/man/dhparam.14
-rw-r--r--secure/usr.bin/openssl/man/dsa.14
-rw-r--r--secure/usr.bin/openssl/man/dsaparam.14
-rw-r--r--secure/usr.bin/openssl/man/ec.14
-rw-r--r--secure/usr.bin/openssl/man/ecparam.14
-rw-r--r--secure/usr.bin/openssl/man/enc.156
-rw-r--r--secure/usr.bin/openssl/man/errstr.12
-rw-r--r--secure/usr.bin/openssl/man/gendsa.14
-rw-r--r--secure/usr.bin/openssl/man/genpkey.1306
-rw-r--r--secure/usr.bin/openssl/man/genrsa.14
-rw-r--r--secure/usr.bin/openssl/man/nseq.12
-rw-r--r--secure/usr.bin/openssl/man/ocsp.17
-rw-r--r--secure/usr.bin/openssl/man/openssl.180
-rw-r--r--secure/usr.bin/openssl/man/passwd.12
-rw-r--r--secure/usr.bin/openssl/man/pkcs12.180
-rw-r--r--secure/usr.bin/openssl/man/pkcs7.14
-rw-r--r--secure/usr.bin/openssl/man/pkcs8.14
-rw-r--r--secure/usr.bin/openssl/man/pkey.1251
-rw-r--r--secure/usr.bin/openssl/man/pkeyparam.1182
-rw-r--r--secure/usr.bin/openssl/man/pkeyutl.1320
-rw-r--r--secure/usr.bin/openssl/man/rand.12
-rw-r--r--secure/usr.bin/openssl/man/req.187
-rw-r--r--secure/usr.bin/openssl/man/rsa.14
-rw-r--r--secure/usr.bin/openssl/man/rsautl.12
-rw-r--r--secure/usr.bin/openssl/man/s_client.126
-rw-r--r--secure/usr.bin/openssl/man/s_server.115
-rw-r--r--secure/usr.bin/openssl/man/s_time.12
-rw-r--r--secure/usr.bin/openssl/man/sess_id.12
-rw-r--r--secure/usr.bin/openssl/man/smime.1119
-rw-r--r--secure/usr.bin/openssl/man/speed.14
-rw-r--r--secure/usr.bin/openssl/man/spkac.14
-rw-r--r--secure/usr.bin/openssl/man/ts.1649
-rw-r--r--secure/usr.bin/openssl/man/tsget.1311
-rw-r--r--secure/usr.bin/openssl/man/verify.173
-rw-r--r--secure/usr.bin/openssl/man/version.12
-rw-r--r--secure/usr.bin/openssl/man/x509.129
-rw-r--r--secure/usr.bin/openssl/man/x509v3_config.1100
394 files changed, 67060 insertions, 13827 deletions
diff --git a/secure/lib/libcrypto/Makefile b/secure/lib/libcrypto/Makefile
index 73f5cb7e6de0..f9bff4704855 100644
--- a/secure/lib/libcrypto/Makefile
+++ b/secure/lib/libcrypto/Makefile
@@ -6,7 +6,7 @@ SUBDIR= engines
.include <bsd.own.mk>
LIB= crypto
-SHLIB_MAJOR= 6
+SHLIB_MAJOR= 7
ALLOW_SHARED_TEXTREL=
NO_LINT=
@@ -21,31 +21,42 @@ MAN+= config.5 des_modes.7
.include "Makefile.inc"
# base sources
-SRCS= cpt_err.c cryptlib.c cversion.c ebcdic.c ex_data.c mem.c mem_clr.c \
- mem_dbg.c o_dir.c o_str.c o_time.c tmdiff.c uid.c dyn_lck.c \
- o_init.c fips_err.c
-INCS= crypto.h ebcdic.h opensslv.h ossl_typ.h symhacks.h tmdiff.h \
- ../e_os.h ../e_os2.h
+SRCS= cpt_err.c cryptlib.c cversion.c ex_data.c mem.c mem_dbg.c o_dir.c \
+ o_fips.c o_init.c o_str.c o_time.c uid.c
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= x86_64cpuid.S
+.elif ${MACHINE_CPUARCH} == "i386"
+SRCS+= x86cpuid.s
+.else
+SRCS+= mem_clr.c
+.endif
+INCS+= crypto.h ebcdic.h opensslv.h ossl_typ.h symhacks.h ../e_os2.h
# aes
-SRCS+= aes_cbc.c aes_cfb.c aes_core.c aes_ctr.c aes_ecb.c aes_ige.c \
- aes_misc.c aes_ofb.c aes_wrap.c
-INCS+= aes.h aes_locl.h
+SRCS+= aes_cfb.c aes_ctr.c aes_ecb.c aes_ige.c aes_misc.c aes_ofb.c aes_wrap.c
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= aes-x86_64.S aesni-sha1-x86_64.S aesni-x86_64.S bsaes-x86_64.S \
+ vpaes-x86_64.S
+.elif ${MACHINE_CPUARCH} == "i386"
+SRCS+= aes-586.s aesni-x86.s vpaes-x86.s
+.else
+SRCS+= aes_cbc.c aes_core.c
+.endif
+INCS+= aes.h
# asn1
-SRCS+= a_bitstr.c a_bool.c a_bytes.c a_d2i_fp.c a_digest.c a_dup.c \
- a_enum.c a_gentm.c a_hdr.c a_i2d_fp.c a_int.c a_mbstr.c \
- a_meth.c a_object.c a_octet.c a_print.c a_set.c a_sign.c \
- a_strex.c a_strnid.c a_time.c a_type.c a_utctm.c a_utf8.c \
- a_verify.c asn1_err.c asn1_gen.c asn1_lib.c asn1_par.c \
- asn_moid.c asn_mime.c asn_pack.c d2i_pr.c d2i_pu.c evp_asn1.c f_enum.c \
- f_int.c f_string.c i2d_pr.c i2d_pu.c n_pkey.c nsseq.c p5_pbe.c \
- p5_pbev2.c p8_pkey.c t_bitst.c t_crl.c t_pkey.c t_req.c \
- t_spki.c t_x509.c t_x509a.c tasn_dec.c tasn_enc.c tasn_fre.c \
- tasn_new.c tasn_typ.c tasn_utl.c x_algor.c x_attrib.c \
- x_bignum.c x_crl.c x_exten.c x_info.c x_long.c x_name.c \
- x_pkey.c x_pubkey.c x_req.c x_sig.c x_spki.c x_val.c x_x509.c \
- x_x509a.c
+SRCS+= a_bitstr.c a_bool.c a_bytes.c a_d2i_fp.c a_digest.c a_dup.c a_enum.c \
+ a_gentm.c a_i2d_fp.c a_int.c a_mbstr.c a_object.c a_octet.c a_print.c \
+ a_set.c a_sign.c a_strex.c a_strnid.c a_time.c a_type.c a_utctm.c \
+ a_utf8.c a_verify.c ameth_lib.c asn1_err.c asn1_gen.c asn1_lib.c \
+ asn1_par.c asn_mime.c asn_moid.c asn_pack.c bio_asn1.c bio_ndef.c \
+ d2i_pr.c d2i_pu.c evp_asn1.c f_enum.c f_int.c f_string.c i2d_pr.c \
+ i2d_pu.c n_pkey.c nsseq.c p5_pbe.c p5_pbev2.c p8_pkey.c t_bitst.c \
+ t_crl.c t_pkey.c t_req.c t_spki.c t_x509.c t_x509a.c tasn_dec.c \
+ tasn_enc.c tasn_fre.c tasn_new.c tasn_prn.c tasn_typ.c tasn_utl.c \
+ x_algor.c x_attrib.c x_bignum.c x_crl.c x_exten.c x_info.c x_long.c \
+ x_name.c x_nx509.c x_pkey.c x_pubkey.c x_req.c x_sig.c x_spki.c \
+ x_val.c x_x509.c x_x509a.c
INCS+= asn1.h asn1_mac.h asn1t.h
# bf
@@ -62,86 +73,94 @@ SRCS+= bf_enc.c
INCS+= blowfish.h
# bio
-SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_lbuf.c bf_nbio.c \
- bf_null.c bio_cb.c bio_err.c bio_lib.c bss_acpt.c bss_bio.c \
- bss_conn.c bss_dgram.c bss_fd.c bss_file.c bss_log.c bss_mem.c \
- bss_null.c bss_sock.c
-INCS+= bio.h bio_lcl.h
+SRCS+= b_dump.c b_print.c b_sock.c bf_buff.c bf_nbio.c bf_null.c bio_cb.c \
+ bio_err.c bio_lib.c bss_acpt.c bss_bio.c bss_conn.c bss_dgram.c \
+ bss_fd.c bss_file.c bss_log.c bss_mem.c bss_null.c bss_sock.c
+INCS+= bio.h
# bn
-SRCS+= bn_add.c bn_blind.c bn_const.c bn_ctx.c bn_depr.c bn_div.c \
- bn_err.c bn_exp.c bn_exp2.c bn_gcd.c bn_gf2m.c bn_kron.c \
- bn_lib.c bn_mod.c bn_mont.c bn_mpi.c bn_mul.c bn_nist.c bn_opt.c \
- bn_prime.c bn_print.c bn_rand.c bn_recp.c bn_shift.c bn_sqr.c \
- bn_sqrt.c bn_word.c bn_x931p.c
-.if ${MACHINE_CPUARCH} == "i386"
-SRCS+= bn-586.s co-586.s
-.elif ${MACHINE_CPUARCH} == "amd64"
-SRCS+= x86_64-gcc.c
+SRCS+= bn_add.c bn_blind.c bn_const.c bn_ctx.c bn_depr.c bn_div.c bn_err.c \
+ bn_exp.c bn_exp2.c bn_gcd.c bn_gf2m.c bn_kron.c bn_lib.c bn_mod.c \
+ bn_mont.c bn_mpi.c bn_mul.c bn_nist.c bn_prime.c bn_print.c bn_rand.c \
+ bn_recp.c bn_shift.c bn_sqr.c bn_sqrt.c bn_word.c bn_x931p.c
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= modexp512-x86_64.S x86_64-gcc.c x86_64-gf2m.S x86_64-mont.S \
+ x86_64-mont5.S
+.elif ${MACHINE_CPUARCH} == "i386"
+SRCS+= bn-586.s co-586.s x86-gf2m.s x86-mont.s
.else
SRCS+= bn_asm.c
.endif
-
INCS+= bn.h
# buffer
SRCS+= buf_err.c buf_str.c buffer.c
INCS+= buffer.h
-# cast
-SRCS+= c_cfb64.c c_ecb.c c_ofb64.c c_skey.c
-.if ${MACHINE_CPUARCH} == "i386"
-SRCS+= cast-586.s
+# camellia
+SRCS+= cmll_cfb.c cmll_ctr.c cmll_ecb.c cmll_ofb.c cmll_utl.c
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= cmll_misc.c cmll-x86_64.S
+.elif ${MACHINE_CPUARCH} == "i386"
+SRCS+= cmll-x86.s
.else
-SRCS+= c_enc.c
+SRCS+= camellia.c cmll_cbc.c cmll_misc.c
.endif
+INCS+= camellia.h
+
+# cast
+SRCS+= c_cfb64.c c_ecb.c c_enc.c c_ofb64.c c_skey.c
INCS+= cast.h
-# camellia
-.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
-SRCS+= camellia.c cmll_cbc.c cmll_cfb.c cmll_ctr.c cmll_ecb.c \
- cmll_misc.c cmll_ofb.c
-INCS+= camellia.h
-.endif
+# cmac
+SRCS+= cm_ameth.c cm_pmeth.c cmac.c
+INCS+= cmac.h
+
+# cms
+SRCS+= cms_asn1.c cms_att.c cms_dd.c cms_enc.c cms_env.c cms_err.c \
+ cms_ess.c cms_io.c cms_lib.c cms_pwri.c cms_sd.c cms_smime.c
+INCS+= cms.h
# comp
SRCS+= c_rle.c c_zlib.c comp_err.c comp_lib.c
INCS+= comp.h
# conf
-SRCS+= conf_api.c conf_def.c conf_err.c conf_lib.c conf_mall.c conf_mod.c conf_sap.c
+SRCS+= conf_api.c conf_def.c conf_err.c conf_lib.c conf_mall.c conf_mod.c \
+ conf_sap.c
INCS+= conf.h conf_api.h
# des
-SRCS+= cbc3_enc.c cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c \
- des_lib.c des_old.c des_old2.c ecb3_enc.c ecb_enc.c ede_cbcm_enc.c \
- enc_read.c enc_writ.c fcrypt.c ofb64ede.c ofb64enc.c \
- ofb_enc.c pcbc_enc.c qud_cksm.c rand_key.c read2pwd.c \
- rpc_enc.c set_key.c str2key.c xcbc_enc.c
+SRCS+= cbc_cksm.c cbc_enc.c cfb64ede.c cfb64enc.c cfb_enc.c des_old.c \
+ des_old2.c ecb3_enc.c ecb_enc.c ede_cbcm_enc.c enc_read.c enc_writ.c \
+ fcrypt.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \
+ rand_key.c read2pwd.c rpc_enc.c set_key.c str2key.c xcbc_enc.c
.if ${MACHINE_CPUARCH} == "i386"
-SRCS+= des-586.s crypt586.s
+SRCS+= crypt586.s des-586.s
.else
SRCS+= des_enc.c fcrypt_b.c
.endif
INCS+= des.h des_old.h
# dh
-SRCS+= dh_asn1.c dh_check.c dh_err.c dh_depr.c dh_gen.c dh_key.c dh_lib.c
+SRCS+= dh_ameth.c dh_asn1.c dh_check.c dh_depr.c dh_err.c dh_gen.c dh_key.c \
+ dh_lib.c dh_pmeth.c dh_prn.c
INCS+= dh.h
# dsa
-SRCS+= dsa_asn1.c dsa_err.c dsa_depr.c dsa_gen.c dsa_key.c dsa_lib.c \
- dsa_ossl.c dsa_sign.c dsa_vrf.c dsa_utl.c
+SRCS+= dsa_ameth.c dsa_asn1.c dsa_depr.c dsa_err.c dsa_gen.c dsa_key.c \
+ dsa_lib.c dsa_ossl.c dsa_pmeth.c dsa_prn.c dsa_sign.c dsa_vrf.c
INCS+= dsa.h
# dso
-SRCS+= dso_dl.c dso_dlfcn.c dso_err.c dso_lib.c dso_null.c dso_openssl.c
+SRCS+= dso_dlfcn.c dso_err.c dso_lib.c dso_openssl.c
INCS+= dso.h
# ec
-SRCS+= ec_asn1.c ec_check.c ec_curve.c ec_cvt.c ec_err.c ec_key.c \
- ec_lib.c ec_mult.c ec_print.c ecp_mont.c ecp_nist.c \
- ecp_smpl.c ec2_mult.c ec2_smpl.c
+SRCS+= ec2_mult.c ec2_oct.c ec2_smpl.c ec_ameth.c ec_asn1.c ec_check.c \
+ ec_curve.c ec_cvt.c ec_err.c ec_key.c ec_lib.c ec_mult.c ec_oct.c \
+ ec_pmeth.c ec_print.c eck_prn.c ecp_mont.c ecp_nist.c ecp_oct.c \
+ ecp_smpl.c
INCS+= ec.h
# ecdh
@@ -153,36 +172,31 @@ SRCS+= ecs_asn1.c ecs_err.c ecs_lib.c ecs_ossl.c ecs_sign.c ecs_vrf.c
INCS+= ecdsa.h
# engine
-SRCS+= eng_all.c eng_cnf.c eng_cryptodev.c eng_ctrl.c eng_dyn.c \
- eng_err.c eng_fat.c eng_init.c eng_lib.c eng_list.c \
- eng_openssl.c eng_padlock.c eng_pkey.c eng_table.c tb_cipher.c \
- tb_dh.c tb_digest.c tb_dsa.c tb_ecdh.c tb_ecdsa.c tb_rand.c \
+SRCS+= eng_all.c eng_cnf.c eng_cryptodev.c eng_ctrl.c eng_dyn.c eng_err.c \
+ eng_fat.c eng_init.c eng_lib.c eng_list.c eng_openssl.c eng_pkey.c \
+ eng_rdrand.c eng_rsax.c eng_table.c tb_asnmth.c tb_cipher.c tb_dh.c \
+ tb_digest.c tb_dsa.c tb_ecdh.c tb_ecdsa.c tb_pkmeth.c tb_rand.c \
tb_rsa.c tb_store.c
INCS+= engine.h
# err
-SRCS+= err.c err_all.c err_prn.c err_def.c err_str.c err_bio.c
+SRCS+= err.c err_all.c err_prn.c
INCS+= err.h
# evp
SRCS+= bio_b64.c bio_enc.c bio_md.c bio_ok.c c_all.c c_allc.c c_alld.c \
- dig_eng.c digest.c e_aes.c e_bf.c e_cast.c e_des.c e_des3.c e_idea.c \
- e_null.c e_old.c e_rc2.c e_rc4.c e_rc5.c e_xcbc_d.c encode.c \
- evp_acnf.c evp_cnf.c evp_enc.c evp_err.c evp_key.c evp_lib.c evp_pbe.c \
- evp_pkey.c e_seed.c enc_min.c m_dss.c m_dss1.c m_ecdsa.c m_md2.c m_md4.c m_md5.c \
- m_mdc2.c m_null.c m_ripemd.c m_sha.c m_sha1.c names.c \
- openbsd_hw.c p5_crpt.c p5_crpt2.c p_dec.c p_enc.c p_lib.c \
- p_open.c p_seal.c p_sign.c p_verify.c
-.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
-SRCS+= e_camellia.c
-.endif
+ digest.c e_aes.c e_aes_cbc_hmac_sha1.c e_bf.c e_camellia.c e_cast.c \
+ e_des.c e_des3.c e_idea.c e_null.c e_old.c e_rc2.c e_rc4.c \
+ e_rc4_hmac_md5.c e_rc5.c e_seed.c e_xcbc_d.c encode.c evp_acnf.c \
+ evp_enc.c evp_err.c evp_key.c evp_lib.c evp_pbe.c evp_pkey.c m_dss.c \
+ m_dss1.c m_ecdsa.c m_md4.c m_md5.c m_mdc2.c m_null.c m_ripemd.c \
+ m_sha.c m_sha1.c m_sigver.c m_wp.c names.c p5_crpt.c p5_crpt2.c \
+ p_dec.c p_enc.c p_lib.c p_open.c p_seal.c p_sign.c p_verify.c \
+ pmeth_fn.c pmeth_gn.c pmeth_lib.c
INCS+= evp.h
-# fips
-INCS+= fips.h fips_rand.h
-
# hmac
-SRCS+= hmac.c
+SRCS+= hm_ameth.c hm_pmeth.c hmac.c
INCS+= hmac.h
# idea
@@ -192,24 +206,21 @@ INCS+= idea.h
.endif
# krb5
-#SRCS+= krb5_asn.c
INCS+= krb5_asn.h
# lhash
SRCS+= lh_stats.c lhash.c
INCS+= lhash.h
-# md2
-SRCS+= md2_dgst.c md2_one.c
-INCS+= md2.h
-
# md4
SRCS+= md4_dgst.c md4_one.c
INCS+= md4.h
# md5
SRCS+= md5_dgst.c md5_one.c
-.if ${MACHINE_CPUARCH} == "i386"
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= md5-x86_64.S
+.elif ${MACHINE_CPUARCH} == "i386"
SRCS+= md5-586.s
.endif
INCS+= md5.h
@@ -218,36 +229,46 @@ INCS+= md5.h
SRCS+= mdc2_one.c mdc2dgst.c
INCS+= mdc2.h
+# modes
+SRCS+= cbc128.c ccm128.c cfb128.c ctr128.c cts128.c gcm128.c ofb128.c xts128.c
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= ghash-x86_64.S
+.elif ${MACHINE_CPUARCH} == "i386"
+SRCS+= ghash-x86.s
+.endif
+INCS+= modes.h
+
# objects
-SRCS+= o_names.c obj_dat.c obj_err.c obj_lib.c
-INCS+= objects.h obj_mac.h
+SRCS+= o_names.c obj_dat.c obj_err.c obj_lib.c obj_xref.c
+INCS+= obj_mac.h objects.h
# ocsp
-SRCS+= ocsp_asn.c ocsp_cl.c ocsp_err.c ocsp_ext.c ocsp_ht.c \
- ocsp_lib.c ocsp_prn.c ocsp_srv.c ocsp_vfy.c
+SRCS+= ocsp_asn.c ocsp_cl.c ocsp_err.c ocsp_ext.c ocsp_ht.c ocsp_lib.c \
+ ocsp_prn.c ocsp_srv.c ocsp_vfy.c
INCS+= ocsp.h
# pem
SRCS+= pem_all.c pem_err.c pem_info.c pem_lib.c pem_oth.c pem_pk8.c \
- pem_pkey.c pem_seal.c pem_sign.c pem_x509.c pem_xaux.c
+ pem_pkey.c pem_seal.c pem_sign.c pem_x509.c pem_xaux.c pvkfmt.c
INCS+= pem.h pem2.h
# pkcs12
-SRCS+= p12_add.c p12_asn.c p12_attr.c p12_crpt.c p12_crt.c \
- p12_decr.c p12_init.c p12_key.c p12_kiss.c p12_mutl.c \
- p12_npas.c p12_p8d.c p12_p8e.c p12_utl.c pk12err.c
-INCS+= pkcs12.h pkcs7.h
+SRCS+= p12_add.c p12_asn.c p12_attr.c p12_crpt.c p12_crt.c p12_decr.c \
+ p12_init.c p12_key.c p12_kiss.c p12_mutl.c p12_npas.c p12_p8d.c \
+ p12_p8e.c p12_utl.c pk12err.c
+INCS+= pkcs12.h
# pkcs7
-SRCS+= example.c pk7_asn1.c pk7_attr.c pk7_dgst.c pk7_doit.c \
- pk7_lib.c pk7_mime.c pk7_smime.c pkcs7err.c
+SRCS+= bio_pk7.c pk7_asn1.c pk7_attr.c pk7_doit.c pk7_lib.c pk7_mime.c \
+ pk7_smime.c pkcs7err.c
+INCS+= pkcs7.h
# pqueue
SRCS+= pqueue.c
-INCS+= pqueue.h pq_compat.h
+INCS+= pqueue.h
# rand
-SRCS+= md_rand.c rand_egd.c rand_err.c rand_lib.c rand_unix.c randfile.c rand_eng.c
+SRCS+= md_rand.c rand_egd.c rand_err.c rand_lib.c rand_unix.c randfile.c
INCS+= rand.h
# rc2
@@ -255,11 +276,13 @@ SRCS+= rc2_cbc.c rc2_ecb.c rc2_skey.c rc2cfb64.c rc2ofb64.c
INCS+= rc2.h
# rc4
-SRCS+= rc4_skey.c rc4_fblk.c
-.if ${MACHINE_CPUARCH} == "i386"
+SRCS+= rc4_utl.c
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= rc4-md5-x86_64.S rc4-x86_64.S
+.elif ${MACHINE_CPUARCH} == "i386"
SRCS+= rc4-586.s
.else
-SRCS+= rc4_enc.c
+SRCS+= rc4_enc.c rc4_skey.c
.endif
INCS+= rc4.h
@@ -274,32 +297,44 @@ INCS+= rc5.h
# ripemd
SRCS+= rmd_dgst.c rmd_one.c
+.if ${MACHINE_CPUARCH} == "i386"
+SRCS+= rmd-586.s
+.endif
INCS+= ripemd.h
# rsa
-SRCS+= rsa_asn1.c rsa_chk.c rsa_eay.c rsa_err.c rsa_gen.c rsa_lib.c \
- rsa_none.c rsa_null.c rsa_oaep.c rsa_pk1.c rsa_saos.c \
- rsa_sign.c rsa_ssl.c rsa_depr.c rsa_pss.c rsa_x931.c rsa_x931g.c \
- rsa_eng.c
+SRCS+= rsa_ameth.c rsa_asn1.c rsa_chk.c rsa_crpt.c rsa_depr.c rsa_eay.c \
+ rsa_err.c rsa_gen.c rsa_lib.c rsa_none.c rsa_null.c rsa_oaep.c \
+ rsa_pk1.c rsa_pmeth.c rsa_prn.c rsa_pss.c rsa_saos.c rsa_sign.c \
+ rsa_ssl.c rsa_x931.c
INCS+= rsa.h
+# seed
+SRCS+= seed.c seed_cbc.c seed_cfb.c seed_ecb.c seed_ofb.c
+INCS+= seed.h
+
# sha
-SRCS+= sha1_one.c sha1dgst.c sha_dgst.c sha_one.c sha256.c sha512.c
-.if ${MACHINE_CPUARCH} == "i386"
-SRCS+= sha1-586.s
+SRCS+= sha1_one.c sha1dgst.c sha256.c sha512.c sha_dgst.c sha_one.c
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= sha1-x86_64.S sha256-x86_64.S sha512-x86_64.S
+.elif ${MACHINE_CPUARCH} == "i386"
+SRCS+= sha1-586.s sha256-586.s sha512-586.s
.endif
INCS+= sha.h
+# srp
+SRCS+= srp_lib.c srp_vfy.c
+INCS+= srp.h
+
# stack
SRCS+= stack.c
-INCS+= stack.h safestack.h
-
-# store
-SRCS+= str_err.c str_lib.c str_meth.c str_mem.c
-INCS+= store.h
+INCS+= safestack.h stack.h
-# threads
-SRCS+= th-lock.c
+# ts
+SRCS+= ts_asn1.c ts_conf.c ts_err.c ts_lib.c ts_req_print.c ts_req_utils.c \
+ ts_rsp_print.c ts_rsp_sign.c ts_rsp_utils.c ts_rsp_verify.c \
+ ts_verify_ctx.c
+INCS+= ts.h
# txt_db
SRCS+= txt_db.c
@@ -307,50 +342,53 @@ INCS+= txt_db.h
# ui
SRCS+= ui_compat.c ui_err.c ui_lib.c ui_openssl.c ui_util.c
-INCS+= ui.h ui_compat.h ui_locl.h
+INCS+= ui.h ui_compat.h
+
+# whrlpool
+SRCS+= wp_dgst.c
+.if ${MACHINE_CPUARCH} == "amd64"
+SRCS+= wp-x86_64.S
+.elif ${MACHINE_CPUARCH} == "i386"
+SRCS+= wp-mmx.s wp_block.c
+.else
+SRCS+= wp_block.c
+.endif
+INCS+= whrlpool.h
# x509
-SRCS+= by_dir.c by_file.c x509_att.c x509_cmp.c x509_d2.c \
- x509_def.c x509_err.c x509_ext.c x509_lu.c x509_obj.c \
- x509_r2x.c x509_req.c x509_set.c x509_trs.c x509_txt.c \
- x509_v3.c x509_vfy.c x509cset.c x509name.c x509rset.c \
- x509spki.c x509type.c x_all.c x509_vpm.c
+SRCS+= by_dir.c by_file.c x509_att.c x509_cmp.c x509_d2.c x509_def.c \
+ x509_err.c x509_ext.c x509_lu.c x509_obj.c x509_r2x.c x509_req.c \
+ x509_set.c x509_trs.c x509_txt.c x509_v3.c x509_vfy.c x509_vpm.c \
+ x509cset.c x509name.c x509rset.c x509spki.c x509type.c x_all.c
INCS+= x509.h x509_vfy.h
# x509v3
-SRCS+= pcy_cache.c pcy_data.c pcy_lib.c pcy_map.c pcy_node.c \
- pcy_tree.c v3_addr.c v3_akey.c v3_akeya.c v3_alt.c v3_asid.c \
- v3_bcons.c v3_bitst.c \
- v3_conf.c v3_cpols.c v3_crld.c v3_enum.c v3_extku.c v3_genn.c \
- v3_ia5.c v3_info.c v3_int.c v3_lib.c v3_ncons.c v3_ocsp.c \
- v3_pci.c v3_pcia.c v3_pcons.c v3_pku.c v3_pmaps.c v3_prn.c \
- v3_purp.c v3_skey.c v3_sxnet.c v3_utl.c v3err.c
+SRCS+= pcy_cache.c pcy_data.c pcy_lib.c pcy_map.c pcy_node.c pcy_tree.c \
+ v3_addr.c v3_akey.c v3_akeya.c v3_alt.c v3_asid.c v3_bcons.c \
+ v3_bitst.c v3_conf.c v3_cpols.c v3_crld.c v3_enum.c v3_extku.c \
+ v3_genn.c v3_ia5.c v3_info.c v3_int.c v3_lib.c v3_ncons.c v3_ocsp.c \
+ v3_pci.c v3_pcia.c v3_pcons.c v3_pku.c v3_pmaps.c v3_prn.c v3_purp.c \
+ v3_skey.c v3_sxnet.c v3_utl.c v3err.c
INCS+= x509v3.h
-# cms
-#SRCS+= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \
-# cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c
-#INCS+= cms.h
-
-# jpake - is marked experimental
-#SRCS+= jpake.c jpake_err.c
-#INCS+= jpake.h
-
-# seed
-#SRCS+= seed.c seed_ecb.c seed_cbc.c seed_cfb.c seed_ofb.c
-#INCS+= seed.h
-
SRCS+= buildinf.h
-INCS+= opensslconf.h evp.h
+INCS+= opensslconf.h
INCSDIR= ${INCLUDEDIR}/openssl
CSTD= gnu89
+CFLAGS+= -I${LCRYPTO_SRC}/crypto/asn1
+CFLAGS+= -I${LCRYPTO_SRC}/crypto/evp
+CFLAGS+= -I${LCRYPTO_SRC}/crypto/modes
+
.if !empty(SRCS:M*.s)
AFLAGS+= --noexecstack
.endif
+.if !empty(SRCS:M*.S)
+ACFLAGS+= -Wa,--noexecstack
+.endif
-CLEANFILES= buildinf.h opensslconf.h evp.h
+CLEANFILES= buildinf.h opensslconf.h
buildinf.h: ${.CURDIR}/Makefile
( echo "#ifndef MK1MF_BUILD"; \
@@ -359,25 +397,19 @@ buildinf.h: ${.CURDIR}/Makefile
echo " #define PLATFORM \"FreeBSD-${MACHINE_ARCH}\""; \
echo "#endif" ) > ${.TARGET}
+.if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
+opensslconf.h: opensslconf-x86.h
+.else
opensslconf.h: opensslconf-${MACHINE_CPUARCH}.h
+.endif
cp -f ${.ALLSRC} ${.TARGET}
-evp.h: ${LCRYPTO_SRC}/crypto/evp/evp.h
.if ${MK_IDEA} == "no"
+evp.h: ${LCRYPTO_SRC}/crypto/evp/evp.h
sed '/^#ifndef OPENSSL_NO_IDEA$$/,/^#endif$$/d' ${.ALLSRC} > ${.TARGET}
-.else
- cp -f ${.ALLSRC} ${.TARGET}
+CLEANFILES+= evp.h
.endif
-# No FIPS support for now
-fips.h:
- echo '/* dummy fips.h */' > ${.TARGET}
-
-fips_rand.h:
- echo '/* dummy fips_rand.h */' > ${.TARGET}
-
-CLEANFILES+= fips.h fips_rand.h
-
OLDSYMLINKS+= libdes.a libdes.so libdes.so.3 libdes_p.a
afterinstall:
@${ECHO} "Removing stale symlinks."
@@ -388,7 +420,9 @@ afterinstall:
.include <bsd.lib.mk>
-.if ${MACHINE_CPUARCH} == "i386"
+.if ${MACHINE_CPUARCH} == "amd64"
+.PATH: ${.CURDIR}/amd64
+.elif ${MACHINE_CPUARCH} == "i386"
.PATH: ${.CURDIR}/i386
.endif
@@ -400,17 +434,17 @@ _bn_asmpath= ${LCRYPTO_SRC}/crypto/bn/asm
_ideapath= ${LCRYPTO_SRC}/crypto/idea
.endif
-.PATH: \
- ${LCRYPTO_SRC}/crypto \
+.PATH: ${LCRYPTO_SRC}/crypto \
${LCRYPTO_SRC}/crypto/aes \
${LCRYPTO_SRC}/crypto/asn1 \
${LCRYPTO_SRC}/crypto/bf \
${LCRYPTO_SRC}/crypto/bio \
- ${_bn_asmpath} \
${LCRYPTO_SRC}/crypto/bn \
+ ${_bn_asmpath} \
${LCRYPTO_SRC}/crypto/buffer \
- ${LCRYPTO_SRC}/crypto/cast \
${LCRYPTO_SRC}/crypto/camellia \
+ ${LCRYPTO_SRC}/crypto/cast \
+ ${LCRYPTO_SRC}/crypto/cmac \
${LCRYPTO_SRC}/crypto/cms \
${LCRYPTO_SRC}/crypto/comp \
${LCRYPTO_SRC}/crypto/conf \
@@ -426,13 +460,12 @@ _ideapath= ${LCRYPTO_SRC}/crypto/idea
${LCRYPTO_SRC}/crypto/evp \
${LCRYPTO_SRC}/crypto/hmac \
${_ideapath} \
- ${LCRYPTO_SRC}/crypto/jpake \
${LCRYPTO_SRC}/crypto/krb5 \
${LCRYPTO_SRC}/crypto/lhash \
- ${LCRYPTO_SRC}/crypto/md2 \
${LCRYPTO_SRC}/crypto/md4 \
${LCRYPTO_SRC}/crypto/md5 \
${LCRYPTO_SRC}/crypto/mdc2 \
+ ${LCRYPTO_SRC}/crypto/modes \
${LCRYPTO_SRC}/crypto/objects \
${LCRYPTO_SRC}/crypto/ocsp \
${LCRYPTO_SRC}/crypto/pem \
@@ -447,13 +480,12 @@ _ideapath= ${LCRYPTO_SRC}/crypto/idea
${LCRYPTO_SRC}/crypto/rsa \
${LCRYPTO_SRC}/crypto/seed \
${LCRYPTO_SRC}/crypto/sha \
+ ${LCRYPTO_SRC}/crypto/srp \
${LCRYPTO_SRC}/crypto/stack \
- ${LCRYPTO_SRC}/crypto/store \
- ${LCRYPTO_SRC}/crypto/threads \
+ ${LCRYPTO_SRC}/crypto/ts \
${LCRYPTO_SRC}/crypto/txt_db \
${LCRYPTO_SRC}/crypto/ui \
+ ${LCRYPTO_SRC}/crypto/whrlpool \
${LCRYPTO_SRC}/crypto/x509 \
${LCRYPTO_SRC}/crypto/x509v3 \
- ${LCRYPTO_SRC}/engines \
- ${LCRYPTO_SRC} \
${.CURDIR}/man
diff --git a/secure/lib/libcrypto/Makefile.asm b/secure/lib/libcrypto/Makefile.asm
index 50584ee03ed6..eb62c7cf7786 100644
--- a/secure/lib/libcrypto/Makefile.asm
+++ b/secure/lib/libcrypto/Makefile.asm
@@ -1,36 +1,113 @@
# $FreeBSD$
-# Use this to help generate the asm *.s files after an import. It is not
+# Use this to help generate the asm *.[Ss] files after an import. It is not
# perfect by any means, but does what is needed.
# Do a 'make -f Makefile.asm all' and it will generate *.s. Move them
# to the i386 subdir, and correct any exposed paths and $ FreeBSD $ tags.
-.if ${MACHINE_ARCH} == "i386"
-
.include "Makefile.inc"
-.PATH: ${LCRYPTO_SRC}/crypto/rc4/asm ${LCRYPTO_SRC}/crypto/rc5/asm \
- ${LCRYPTO_SRC}/crypto/des/asm ${LCRYPTO_SRC}/crypto/cast/asm \
- ${LCRYPTO_SRC}/crypto/sha/asm ${LCRYPTO_SRC}/crypto/bn/asm \
- ${LCRYPTO_SRC}/crypto/bf/asm ${LCRYPTO_SRC}/crypto/md5/asm \
- ${LCRYPTO_SRC}/crypto/ripemd/asm
+.if ${MACHINE_CPUARCH} == "amd64"
+
+.PATH: ${LCRYPTO_SRC}/crypto \
+ ${LCRYPTO_SRC}/crypto/aes/asm \
+ ${LCRYPTO_SRC}/crypto/bn/asm \
+ ${LCRYPTO_SRC}/crypto/camellia/asm \
+ ${LCRYPTO_SRC}/crypto/md5/asm \
+ ${LCRYPTO_SRC}/crypto/modes/asm \
+ ${LCRYPTO_SRC}/crypto/rc4/asm \
+ ${LCRYPTO_SRC}/crypto/rc5/asm \
+ ${LCRYPTO_SRC}/crypto/sha/asm \
+ ${LCRYPTO_SRC}/crypto/whrlpool/asm
+
+# aes
+SRCS= aes-x86_64.pl aesni-sha1-x86_64.pl aesni-x86_64.pl bsaes-x86_64.pl \
+ vpaes-x86_64.pl
+
+# bn
+SRCS+= modexp512-x86_64.pl x86_64-gf2m.pl x86_64-mont.pl x86_64-mont5.pl
+
+# camellia
+SRCS+= cmll-x86_64.pl
+
+# md5
+SRCS+= md5-x86_64.pl
+
+# modes
+SRCS+= ghash-x86_64.pl
+
+# rc4
+SRCS+= rc4-md5-x86_64.pl rc4-x86_64.pl
+
+# sha
+SRCS+= sha1-x86_64.pl sha512-x86_64.pl
+
+# whrlpool
+SRCS+= wp-x86_64.pl
+
+ASM= ${SRCS:S/.pl/.S/}
+ASM+= sha256-x86_64.S x86_64cpuid.S
+
+all: ${ASM}
+
+CLEANFILES+= ${SRCS:M*.pl:S/.pl$/.cmt/} ${SRCS:M*.pl:S/.pl$/.S/}
+CLEANFILES+= sha256-x86_64.cmt sha256-x86_64.S x86_64cpuid.cmt x86_64cpuid.S
+.SUFFIXES: .pl .cmt
+
+.pl.cmt:
+ ( cd `dirname ${.IMPSRC}`/.. ; perl ${.IMPSRC} ${.OBJDIR}/${.TARGET} )
+
+.cmt.S:
+ ( echo ' # $$'FreeBSD'$$'; cat ${.IMPSRC} ) > ${.TARGET}
+
+sha256-x86_64.cmt: sha512-x86_64.pl
+ ( cd `dirname ${.ALLSRC}`/.. ; perl ${.ALLSRC} ${.OBJDIR}/${.TARGET} )
+
+x86_64cpuid.cmt: x86_64cpuid.pl
+ ( cd `dirname ${.ALLSRC}` ; perl ${.ALLSRC} ${.OBJDIR}/${.TARGET} )
+
+.elif ${MACHINE_CPUARCH} == "i386"
+
+.PATH: ${LCRYPTO_SRC}/crypto \
+ ${LCRYPTO_SRC}/crypto/aes/asm \
+ ${LCRYPTO_SRC}/crypto/bf/asm \
+ ${LCRYPTO_SRC}/crypto/bn/asm \
+ ${LCRYPTO_SRC}/crypto/camellia/asm \
+ ${LCRYPTO_SRC}/crypto/cast/asm \
+ ${LCRYPTO_SRC}/crypto/des/asm \
+ ${LCRYPTO_SRC}/crypto/md5/asm \
+ ${LCRYPTO_SRC}/crypto/modes/asm \
+ ${LCRYPTO_SRC}/crypto/rc4/asm \
+ ${LCRYPTO_SRC}/crypto/rc5/asm \
+ ${LCRYPTO_SRC}/crypto/ripemd/asm \
+ ${LCRYPTO_SRC}/crypto/sha/asm \
+ ${LCRYPTO_SRC}/crypto/whrlpool/asm
PERLPATH= -I${LCRYPTO_SRC}/crypto/des/asm -I${LCRYPTO_SRC}/crypto/perlasm
+# aes
+SRCS= aes-586.pl aesni-x86.pl vpaes-x86.pl
+
# blowfish
-SRCS= bf-686.pl bf-586.pl
+SRCS+= bf-586.pl bf-686.pl
# bn
-SRCS+= bn-586.pl co-586.pl
+SRCS+= bn-586.pl co-586.pl x86-gf2m.pl x86-mont.pl
+
+# camellia
+SRCS+= cmll-x86.pl
# cast
SRCS+= cast-586.pl
# des
-SRCS+= des-586.pl crypt586.pl
+SRCS+= crypt586.pl des-586.pl
# md5
SRCS+= md5-586.pl
+# modes
+SRCS+= ghash-x86.pl
+
# rc4
SRCS+= rc4-586.pl
@@ -41,21 +118,24 @@ SRCS+= rc5-586.pl
SRCS+= rmd-586.pl
# sha
-SRCS+= sha1-586.pl
+SRCS+= sha1-586.pl sha256-586.pl sha512-586.pl
+
+# whrlpool
+SRCS+= wp-mmx.pl
+
+# cpuid
+SRCS+= x86cpuid.pl
ASM= ${SRCS:S/.pl/.s/}
all: ${ASM}
-CLEANFILES+= ${SRCS:M*.pl:S/.pl$/.cmt/} ${SRCS:M*.pl:S/.pl$/.s/}
-.SUFFIXES: .pl .cmt
+CLEANFILES+= ${SRCS:M*.pl:S/.pl$/.s/}
+.SUFFIXES: .pl
-.pl.cmt:
+.pl.s:
( echo ' # $$'FreeBSD'$$' ;\
- perl ${PERLPATH} ${.IMPSRC} elf ${CPUTYPE:Mi386:S/i//} ) > ${.TARGET}
-
-.cmt.s:
- tr -d "'" < ${.IMPSRC} > ${.TARGET}
+ perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ) > ${.TARGET}
+.endif
.include <bsd.prog.mk>
-.endif
diff --git a/secure/lib/libcrypto/Makefile.inc b/secure/lib/libcrypto/Makefile.inc
index fe31ea0c8619..b55fb5e06c84 100644
--- a/secure/lib/libcrypto/Makefile.inc
+++ b/secure/lib/libcrypto/Makefile.inc
@@ -3,7 +3,7 @@
.include <bsd.own.mk>
# OpenSSL version used for manual page generation
-OPENSSL_VER= 0.9.8x
+OPENSSL_VER= 1.0.1c
OPENSSL_DATE= 2012-05-10
LCRYPTO_SRC= ${.CURDIR}/../../../crypto/openssl
@@ -17,8 +17,21 @@ CFLAGS+= -DOPENSSL_THREADS -DDSO_DLFCN -DHAVE_DLFCN_H
CFLAGS+= -DOPENSSL_NO_IDEA
.endif
-.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "amd64"
-CFLAGS+= -DL_ENDIAN
+.if ${MACHINE_CPUARCH} == "amd64"
+CFLAGS+=-DL_ENDIAN -DOPENSSL_IA32_SSE2
+CFLAGS+=-DAES_ASM -DBSAES_ASM -DVPAES_ASM
+CFLAGS+=-DOPENSSL_BN_ASM_MONT -DOPENSSL_BN_ASM_MONT5 -DOPENSSL_BN_ASM_GF2m
+CFLAGS+=-DMD5_ASM
+CFLAGS+=-DSHA1_ASM -DSHA256_ASM -DSHA512_ASM
+CFLAGS+=-DWHIRLPOOL_ASM -DGHASH_ASM
+.elif ${MACHINE_CPUARCH} == "i386"
+CFLAGS+=-DL_ENDIAN -DOPENSSL_IA32_SSE2
+CFLAGS+=-DAES_ASM -DVPAES_ASM
+CFLAGS+=-DOPENSSL_BN_ASM_PART_WORDS -DOPENSSL_BN_ASM_MONT -DOPENSSL_BN_ASM_GF2m
+CFLAGS+=-DMD5_ASM
+CFLAGS+=-DRMD160_ASM
+CFLAGS+=-DSHA1_ASM -DSHA256_ASM -DSHA512_ASM
+CFLAGS+=-DWHIRLPOOL_ASM -DGHASH_ASM
.endif
MANDIR= ${SHAREDIR}/openssl/man/man
diff --git a/secure/lib/libcrypto/Makefile.man b/secure/lib/libcrypto/Makefile.man
index 58ebff18b115..cb0d3f7e566f 100644
--- a/secure/lib/libcrypto/Makefile.man
+++ b/secure/lib/libcrypto/Makefile.man
@@ -14,6 +14,7 @@ MAN+= BIO_f_null.3
MAN+= BIO_f_ssl.3
MAN+= BIO_find_type.3
MAN+= BIO_new.3
+MAN+= BIO_new_CMS.3
MAN+= BIO_push.3
MAN+= BIO_read.3
MAN+= BIO_s_accept.3
@@ -44,6 +45,22 @@ MAN+= BN_rand.3
MAN+= BN_set_bit.3
MAN+= BN_swap.3
MAN+= BN_zero.3
+MAN+= CMS_add0_cert.3
+MAN+= CMS_add1_recipient_cert.3
+MAN+= CMS_compress.3
+MAN+= CMS_decrypt.3
+MAN+= CMS_encrypt.3
+MAN+= CMS_final.3
+MAN+= CMS_get0_RecipientInfos.3
+MAN+= CMS_get0_SignerInfos.3
+MAN+= CMS_get0_type.3
+MAN+= CMS_get1_ReceiptRequest.3
+MAN+= CMS_sign.3
+MAN+= CMS_sign_add1_signer.3
+MAN+= CMS_sign_receipt.3
+MAN+= CMS_uncompress.3
+MAN+= CMS_verify.3
+MAN+= CMS_verify_receipt.3
MAN+= CONF_modules_free.3
MAN+= CONF_modules_load_file.3
MAN+= CRYPTO_set_ex_data.3
@@ -75,10 +92,24 @@ MAN+= ERR_remove_state.3
MAN+= ERR_set_mark.3
MAN+= EVP_BytesToKey.3
MAN+= EVP_DigestInit.3
+MAN+= EVP_DigestSignInit.3
+MAN+= EVP_DigestVerifyInit.3
MAN+= EVP_EncryptInit.3
MAN+= EVP_OpenInit.3
+MAN+= EVP_PKEY_CTX_ctrl.3
+MAN+= EVP_PKEY_CTX_new.3
+MAN+= EVP_PKEY_cmp.3
+MAN+= EVP_PKEY_decrypt.3
+MAN+= EVP_PKEY_derive.3
+MAN+= EVP_PKEY_encrypt.3
+MAN+= EVP_PKEY_get_default_digest.3
+MAN+= EVP_PKEY_keygen.3
MAN+= EVP_PKEY_new.3
+MAN+= EVP_PKEY_print_private.3
MAN+= EVP_PKEY_set1_RSA.3
+MAN+= EVP_PKEY_sign.3
+MAN+= EVP_PKEY_verify.3
+MAN+= EVP_PKEY_verifyrecover.3
MAN+= EVP_SealInit.3
MAN+= EVP_SignInit.3
MAN+= EVP_VerifyInit.3
@@ -89,11 +120,14 @@ MAN+= OPENSSL_config.3
MAN+= OPENSSL_ia32cap.3
MAN+= OPENSSL_load_builtin_modules.3
MAN+= OpenSSL_add_all_algorithms.3
+MAN+= PEM_write_bio_CMS_stream.3
+MAN+= PEM_write_bio_PKCS7_stream.3
MAN+= PKCS12_create.3
MAN+= PKCS12_parse.3
MAN+= PKCS7_decrypt.3
MAN+= PKCS7_encrypt.3
MAN+= PKCS7_sign.3
+MAN+= PKCS7_sign_add_signer.3
MAN+= PKCS7_verify.3
MAN+= RAND_add.3
MAN+= RAND_bytes.3
@@ -114,13 +148,22 @@ MAN+= RSA_set_method.3
MAN+= RSA_sign.3
MAN+= RSA_sign_ASN1_OCTET_STRING.3
MAN+= RSA_size.3
+MAN+= SMIME_read_CMS.3
MAN+= SMIME_read_PKCS7.3
+MAN+= SMIME_write_CMS.3
MAN+= SMIME_write_PKCS7.3
MAN+= X509_NAME_ENTRY_get_object.3
MAN+= X509_NAME_add_entry_by_txt.3
MAN+= X509_NAME_get_index_by_NID.3
MAN+= X509_NAME_print_ex.3
+MAN+= X509_STORE_CTX_get_error.3
+MAN+= X509_STORE_CTX_get_ex_new_index.3
+MAN+= X509_STORE_CTX_new.3
+MAN+= X509_STORE_CTX_set_verify_cb.3
+MAN+= X509_STORE_set_verify_cb_func.3
+MAN+= X509_VERIFY_PARAM_set_flags.3
MAN+= X509_new.3
+MAN+= X509_verify_cert.3
MAN+= bio.3
MAN+= blowfish.3
MAN+= bn.3
@@ -146,6 +189,8 @@ MAN+= engine.3
MAN+= err.3
MAN+= evp.3
MAN+= hmac.3
+MAN+= i2d_CMS_bio_stream.3
+MAN+= i2d_PKCS7_bio_stream.3
MAN+= lh_stats.3
MAN+= lhash.3
MAN+= md5.3
@@ -347,6 +392,28 @@ MLINKS+= BN_zero.3 BN_one.3
MLINKS+= BN_zero.3 BN_value_one.3
MLINKS+= BN_zero.3 BN_set_word.3
MLINKS+= BN_zero.3 BN_get_word.3
+MLINKS+= CMS_add0_cert.3 CMS_add1_cert.3
+MLINKS+= CMS_add0_cert.3 CMS_get1_certs.3
+MLINKS+= CMS_add0_cert.3 CMS_add0_crl.3
+MLINKS+= CMS_add0_cert.3 CMS_get1_crls.3
+MLINKS+= CMS_add1_recipient_cert.3 CMS_add0_recipient_key.3
+MLINKS+= CMS_get0_RecipientInfos.3 CMS_RecipientInfo_type.3
+MLINKS+= CMS_get0_RecipientInfos.3 CMS_RecipientInfo_ktri_get0_signer_id.3
+MLINKS+= CMS_get0_RecipientInfos.3 CMS_RecipientInfo_ktri_cert_cmp.3
+MLINKS+= CMS_get0_RecipientInfos.3 CMS_RecipientInfo_set0_pkey.3
+MLINKS+= CMS_get0_RecipientInfos.3 CMS_RecipientInfo_kekri_get0_id.3
+MLINKS+= CMS_get0_RecipientInfos.3 CMS_RecipientInfo_kekri_id_cmp.3
+MLINKS+= CMS_get0_RecipientInfos.3 CMS_RecipientInfo_set0_key.3
+MLINKS+= CMS_get0_RecipientInfos.3 CMS_RecipientInfo_decrypt.3
+MLINKS+= CMS_get0_SignerInfos.3 CMS_SignerInfo_get0_signer_id.3
+MLINKS+= CMS_get0_SignerInfos.3 CMS_SignerInfo_cert_cmp.3
+MLINKS+= CMS_get0_SignerInfos.3 CMS_set1_signer_certs.3
+MLINKS+= CMS_get0_type.3 CMS_set1_eContentType.3
+MLINKS+= CMS_get0_type.3 CMS_get0_eContentType.3
+MLINKS+= CMS_get1_ReceiptRequest.3 CMS_ReceiptRequest_create0.3
+MLINKS+= CMS_get1_ReceiptRequest.3 CMS_add1_ReceiptRequest.3
+MLINKS+= CMS_get1_ReceiptRequest.3 CMS_ReceiptRequest_get0_values.3
+MLINKS+= CMS_sign_add1_signer.3 CMS_SignerInfo_sign.3
MLINKS+= CONF_modules_free.3 CONF_modules_finish.3
MLINKS+= CONF_modules_free.3 CONF_modules_unload.3
MLINKS+= CONF_modules_load_file.3 CONF_modules_load.3
@@ -415,6 +482,10 @@ MLINKS+= EVP_DigestInit.3 EVP_md2.3
MLINKS+= EVP_DigestInit.3 EVP_md5.3
MLINKS+= EVP_DigestInit.3 EVP_sha.3
MLINKS+= EVP_DigestInit.3 EVP_sha1.3
+MLINKS+= EVP_DigestInit.3 EVP_sha224.3
+MLINKS+= EVP_DigestInit.3 EVP_sha256.3
+MLINKS+= EVP_DigestInit.3 EVP_sha384.3
+MLINKS+= EVP_DigestInit.3 EVP_sha512.3
MLINKS+= EVP_DigestInit.3 EVP_dss.3
MLINKS+= EVP_DigestInit.3 EVP_dss1.3
MLINKS+= EVP_DigestInit.3 EVP_mdc2.3
@@ -422,6 +493,10 @@ MLINKS+= EVP_DigestInit.3 EVP_ripemd160.3
MLINKS+= EVP_DigestInit.3 EVP_get_digestbyname.3
MLINKS+= EVP_DigestInit.3 EVP_get_digestbynid.3
MLINKS+= EVP_DigestInit.3 EVP_get_digestbyobj.3
+MLINKS+= EVP_DigestSignInit.3 EVP_DigestSignUpdate.3
+MLINKS+= EVP_DigestSignInit.3 EVP_DigestSignFinal.3
+MLINKS+= EVP_DigestVerifyInit.3 EVP_DigestVerifyUpdate.3
+MLINKS+= EVP_DigestVerifyInit.3 EVP_DigestVerifyFinal.3
MLINKS+= EVP_EncryptInit.3 EVP_CIPHER_CTX_init.3
MLINKS+= EVP_EncryptInit.3 EVP_EncryptInit_ex.3
MLINKS+= EVP_EncryptInit.3 EVP_EncryptUpdate.3
@@ -465,7 +540,30 @@ MLINKS+= EVP_EncryptInit.3 EVP_CIPHER_asn1_to_param.3
MLINKS+= EVP_EncryptInit.3 EVP_CIPHER_CTX_set_padding.3
MLINKS+= EVP_OpenInit.3 EVP_OpenUpdate.3
MLINKS+= EVP_OpenInit.3 EVP_OpenFinal.3
+MLINKS+= EVP_PKEY_CTX_ctrl.3 EVP_PKEY_ctrl.3
+MLINKS+= EVP_PKEY_CTX_ctrl.3 EVP_PKEY_ctrl_str.3
+MLINKS+= EVP_PKEY_CTX_new.3 EVP_PKEY_CTX_new_id.3
+MLINKS+= EVP_PKEY_CTX_new.3 EVP_PKEY_CTX_dup.3
+MLINKS+= EVP_PKEY_CTX_new.3 EVP_PKEY_CTX_free.3
+MLINKS+= EVP_PKEY_cmp.3 EVP_PKEY_copy_parameters.3
+MLINKS+= EVP_PKEY_cmp.3 EVP_PKEY_missing_parameters.3
+MLINKS+= EVP_PKEY_cmp.3 EVP_PKEY_cmp_parameters.3
+MLINKS+= EVP_PKEY_decrypt.3 EVP_PKEY_decrypt_init.3
+MLINKS+= EVP_PKEY_derive.3 EVP_PKEY_derive_init.3
+MLINKS+= EVP_PKEY_derive.3 EVP_PKEY_derive_set_peer.3
+MLINKS+= EVP_PKEY_encrypt.3 EVP_PKEY_encrypt_init.3
+MLINKS+= EVP_PKEY_get_default_digest.3 EVP_PKEY_get_default_digest_nid.3
+MLINKS+= EVP_PKEY_keygen.3 EVP_PKEY_keygen_init.3
+MLINKS+= EVP_PKEY_keygen.3 EVP_PKEY_paramgen_init.3
+MLINKS+= EVP_PKEY_keygen.3 EVP_PKEY_paramgen.3
+MLINKS+= EVP_PKEY_keygen.3 EVP_PKEY_CTX_set_cb.3
+MLINKS+= EVP_PKEY_keygen.3 EVP_PKEY_CTX_get_cb.3
+MLINKS+= EVP_PKEY_keygen.3 EVP_PKEY_CTX_get_keygen_info.3
+MLINKS+= EVP_PKEY_keygen.3 EVP_PKEVP_PKEY_CTX_set_app_data.3
+MLINKS+= EVP_PKEY_keygen.3 EVP_PKEY_CTX_get_app_data.3
MLINKS+= EVP_PKEY_new.3 EVP_PKEY_free.3
+MLINKS+= EVP_PKEY_print_private.3 EVP_PKEY_print_public.3
+MLINKS+= EVP_PKEY_print_private.3 EVP_PKEY_print_params.3
MLINKS+= EVP_PKEY_set1_RSA.3 EVP_PKEY_set1_DSA.3
MLINKS+= EVP_PKEY_set1_RSA.3 EVP_PKEY_set1_DH.3
MLINKS+= EVP_PKEY_set1_RSA.3 EVP_PKEY_set1_EC_KEY.3
@@ -478,6 +576,9 @@ MLINKS+= EVP_PKEY_set1_RSA.3 EVP_PKEY_assign_DSA.3
MLINKS+= EVP_PKEY_set1_RSA.3 EVP_PKEY_assign_DH.3
MLINKS+= EVP_PKEY_set1_RSA.3 EVP_PKEY_assign_EC_KEY.3
MLINKS+= EVP_PKEY_set1_RSA.3 EVP_PKEY_type.3
+MLINKS+= EVP_PKEY_sign.3 EVP_PKEY_sign_init.3
+MLINKS+= EVP_PKEY_verify.3 EVP_PKEY_verify_init.3
+MLINKS+= EVP_PKEY_verifyrecover.3 EVP_PKEY_verifyrecover_init.3
MLINKS+= EVP_SealInit.3 EVP_SealUpdate.3
MLINKS+= EVP_SealInit.3 EVP_SealFinal.3
MLINKS+= EVP_SignInit.3 EVP_SignUpdate.3
@@ -559,6 +660,33 @@ MLINKS+= X509_NAME_get_index_by_NID.3 X509_NAME_get_text_by_OBJ.3
MLINKS+= X509_NAME_print_ex.3 X509_NAME_print_ex_fp.3
MLINKS+= X509_NAME_print_ex.3 X509_NAME_print.3
MLINKS+= X509_NAME_print_ex.3 X509_NAME_oneline.3
+MLINKS+= X509_STORE_CTX_get_error.3 X509_STORE_CTX_set_error.3
+MLINKS+= X509_STORE_CTX_get_error.3 X509_STORE_CTX_get_error_depth.3
+MLINKS+= X509_STORE_CTX_get_error.3 X509_STORE_CTX_get_current_cert.3
+MLINKS+= X509_STORE_CTX_get_error.3 X509_STORE_CTX_get1_chain.3
+MLINKS+= X509_STORE_CTX_get_error.3 X509_verify_cert_error_string.3
+MLINKS+= X509_STORE_CTX_get_ex_new_index.3 X509_STORE_CTX_set_ex_data.3
+MLINKS+= X509_STORE_CTX_get_ex_new_index.3 X509_STORE_CTX_get_ex_data.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_cleanup.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_free.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_init.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_trusted_stack.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_set_cert.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_set_chain.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_set0_crls.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_get0_param.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_set0_param.3
+MLINKS+= X509_STORE_CTX_new.3 X509_STORE_CTX_set_default.3
+MLINKS+= X509_STORE_set_verify_cb_func.3 X509_STORE_set_verify_cb.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_clear_flags.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_get_flags.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_set_purpose.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_set_trust.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_set_depth.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_get_depth.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_set_time.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_add0_policy.3
+MLINKS+= X509_VERIFY_PARAM_set_flags.3 X509_VERIFY_PARAM_set1_policies.3
MLINKS+= X509_new.3 X509_free.3
MLINKS+= blowfish.3 BF_set_key.3
MLINKS+= blowfish.3 BF_encrypt.3
@@ -784,8 +912,13 @@ MLINKS+= sha.3 SHA1.3
MLINKS+= sha.3 SHA1_Init.3
MLINKS+= sha.3 SHA1_Update.3
MLINKS+= sha.3 SHA1_Final.3
+MLINKS+= threads.3 CRYPTO_THREADID_set_callback.3
+MLINKS+= threads.3 CRYPTO_THREADID_get_callback.3
+MLINKS+= threads.3 CRYPTO_THREADID_current.3
+MLINKS+= threads.3 CRYPTO_THREADID_cmp.3
+MLINKS+= threads.3 CRYPTO_THREADID_cpy.3
+MLINKS+= threads.3 CRYPTO_THREADID_hash.3
MLINKS+= threads.3 CRYPTO_set_locking_callback.3
-MLINKS+= threads.3 CRYPTO_set_id_callback.3
MLINKS+= threads.3 CRYPTO_num_locks.3
MLINKS+= threads.3 CRYPTO_set_dynlock_create_callback.3
MLINKS+= threads.3 CRYPTO_set_dynlock_lock_callback.3
diff --git a/secure/lib/libcrypto/amd64/aes-x86_64.S b/secure/lib/libcrypto/amd64/aes-x86_64.S
new file mode 100644
index 000000000000..c800d5eb06b9
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/aes-x86_64.S
@@ -0,0 +1,2542 @@
+ # $FreeBSD$
+.text
+.type _x86_64_AES_encrypt,@function
+.align 16
+_x86_64_AES_encrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Lenc_loop
+.align 16
+.Lenc_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ shrl $16,%ecx
+ movzbl %ah,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movl 12(%r15),%edx
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rdi,8),%r12d
+ xorl 1(%r14,%rbp,8),%r8d
+
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Lenc_loop
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl 2(%r14,%rsi,8),%r10d
+ movzbl 2(%r14,%rdi,8),%r11d
+ movzbl 2(%r14,%rbp,8),%r12d
+
+ movzbl %dl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl 2(%r14,%rsi,8),%r8d
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $65280,%edi
+ andl $65280,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%ecx
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ shrl $16,%edx
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+
+ andl $65280,%esi
+ andl $65280,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%eax
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 0(%r14,%rdi,8),%edi
+ movl 0(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $16711680,%edi
+ andl $16711680,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movl 0(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 2(%r14,%rbp,8),%ebp
+
+ andl $16711680,%esi
+ andl $4278190080,%edi
+ andl $4278190080,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movl 16+12(%r15),%edx
+ movl 2(%r14,%rsi,8),%esi
+ movl 2(%r14,%rdi,8),%edi
+ movl 16+0(%r15),%eax
+
+ andl $4278190080,%esi
+ andl $4278190080,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+.byte 0xf3,0xc3
+.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
+.type _x86_64_AES_encrypt_compact,@function
+.align 16
+_x86_64_AES_encrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Lenc_loop_compact
+.align 16
+.Lenc_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %dh,%ebp
+ movzbl %ah,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %dh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ah,%edi
+ shrl $8,%ecx
+ shrl $8,%ebx
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rcx,1),%edx
+ movzbl (%r14,%rbx,1),%ecx
+ shll $16,%r9d
+ shll $16,%r13d
+ shll $16,%ebp
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%edi
+ shll $24,%edx
+ xorl %esi,%r10d
+ shll $24,%ecx
+ xorl %edi,%r11d
+ movl %r10d,%eax
+ movl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Lenc_compact_done
+ movl %eax,%esi
+ movl %ebx,%edi
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ movl %esi,%r10d
+ movl %edi,%r11d
+ shrl $7,%r10d
+ leal (%rax,%rax,1),%r8d
+ shrl $7,%r11d
+ leal (%rbx,%rbx,1),%r9d
+ subl %r10d,%esi
+ subl %r11d,%edi
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %eax,%r10d
+ movl %ebx,%r11d
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %ecx,%esi
+ movl %edx,%edi
+ roll $24,%eax
+ roll $24,%ebx
+ andl $2155905152,%esi
+ andl $2155905152,%edi
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movl %esi,%r12d
+ movl %edi,%ebp
+ rorl $16,%r10d
+ rorl $16,%r11d
+ shrl $7,%r12d
+ leal (%rcx,%rcx,1),%r8d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ shrl $7,%ebp
+ leal (%rdx,%rdx,1),%r9d
+ rorl $8,%r10d
+ rorl $8,%r11d
+ subl %r12d,%esi
+ subl %ebp,%edi
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+
+ andl $4278124286,%r8d
+ andl $4278124286,%r9d
+ andl $454761243,%esi
+ andl $454761243,%edi
+ movl %ecx,%r12d
+ movl %edx,%ebp
+ xorl %esi,%r8d
+ xorl %edi,%r9d
+
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ roll $24,%ecx
+ roll $24,%edx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+ movl 0(%r14),%esi
+ rorl $16,%r12d
+ rorl $16,%ebp
+ movl 64(%r14),%edi
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ movl 128(%r14),%r8d
+ rorl $8,%r12d
+ rorl $8,%ebp
+ movl 192(%r14),%r9d
+ xorl %r12d,%ecx
+ xorl %ebp,%edx
+ jmp .Lenc_loop_compact
+.align 16
+.Lenc_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+.byte 0xf3,0xc3
+.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
+.globl AES_encrypt
+.type AES_encrypt,@function
+.align 16
+.globl asm_AES_encrypt
+.hidden asm_AES_encrypt
+asm_AES_encrypt:
+AES_encrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Lenc_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Te+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lenc_epilogue:
+ .byte 0xf3,0xc3
+.size AES_encrypt,.-AES_encrypt
+.type _x86_64_AES_decrypt,@function
+.align 16
+_x86_64_AES_decrypt:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+
+ movl 240(%r15),%r13d
+ subl $1,%r13d
+ jmp .Ldec_loop
+.align 16
+.Ldec_loop:
+
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movl 0(%r14,%rsi,8),%r10d
+ movl 0(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r12d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl %dl,%ebp
+ xorl 3(%r14,%rsi,8),%r10d
+ xorl 3(%r14,%rdi,8),%r11d
+ movl 0(%r14,%rbp,8),%r8d
+
+ movzbl %bh,%esi
+ shrl $16,%eax
+ movzbl %ch,%ebp
+ xorl 3(%r14,%rsi,8),%r12d
+ shrl $16,%edx
+ xorl 3(%r14,%rbp,8),%r8d
+
+ shrl $16,%ebx
+ leaq 16(%r15),%r15
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ xorl 2(%r14,%rsi,8),%r10d
+ xorl 2(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r12d
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ movzbl %bl,%ebp
+ xorl 1(%r14,%rsi,8),%r10d
+ xorl 1(%r14,%rdi,8),%r11d
+ xorl 2(%r14,%rbp,8),%r8d
+
+ movzbl %dh,%esi
+ movl 12(%r15),%edx
+ movzbl %ah,%ebp
+ xorl 1(%r14,%rsi,8),%r12d
+ movl 0(%r15),%eax
+ xorl 1(%r14,%rbp,8),%r8d
+
+ xorl %r10d,%eax
+ movl 4(%r15),%ebx
+ movl 8(%r15),%ecx
+ xorl %r12d,%ecx
+ xorl %r11d,%ebx
+ xorl %r8d,%edx
+ subl $1,%r13d
+ jnz .Ldec_loop
+ leaq 2048(%r14),%r14
+ movzbl %al,%esi
+ movzbl %bl,%edi
+ movzbl %cl,%ebp
+ movzbl (%r14,%rsi,1),%r10d
+ movzbl (%r14,%rdi,1),%r11d
+ movzbl (%r14,%rbp,1),%r12d
+
+ movzbl %dl,%esi
+ movzbl %dh,%edi
+ movzbl %ah,%ebp
+ movzbl (%r14,%rsi,1),%r8d
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $8,%edi
+ shll $8,%ebp
+
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+ shrl $16,%edx
+
+ movzbl %bh,%esi
+ movzbl %ch,%edi
+ shrl $16,%eax
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+
+ shll $8,%esi
+ shll $8,%edi
+ shrl $16,%ebx
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+ shrl $16,%ecx
+
+ movzbl %cl,%esi
+ movzbl %dl,%edi
+ movzbl %al,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $16,%edi
+ shll $16,%ebp
+
+ xorl %esi,%r10d
+ xorl %edi,%r11d
+ xorl %ebp,%r12d
+
+ movzbl %bl,%esi
+ movzbl %bh,%edi
+ movzbl %ch,%ebp
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movzbl (%r14,%rbp,1),%ebp
+
+ shll $16,%esi
+ shll $24,%edi
+ shll $24,%ebp
+
+ xorl %esi,%r8d
+ xorl %edi,%r10d
+ xorl %ebp,%r11d
+
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movl 16+12(%r15),%edx
+ movzbl (%r14,%rsi,1),%esi
+ movzbl (%r14,%rdi,1),%edi
+ movl 16+0(%r15),%eax
+
+ shll $24,%esi
+ shll $24,%edi
+
+ xorl %esi,%r12d
+ xorl %edi,%r8d
+
+ movl 16+4(%r15),%ebx
+ movl 16+8(%r15),%ecx
+ leaq -2048(%r14),%r14
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+.byte 0xf3,0xc3
+.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
+.type _x86_64_AES_decrypt_compact,@function
+.align 16
+_x86_64_AES_decrypt_compact:
+ leaq 128(%r14),%r8
+ movl 0-128(%r8),%edi
+ movl 32-128(%r8),%ebp
+ movl 64-128(%r8),%r10d
+ movl 96-128(%r8),%r11d
+ movl 128-128(%r8),%edi
+ movl 160-128(%r8),%ebp
+ movl 192-128(%r8),%r10d
+ movl 224-128(%r8),%r11d
+ jmp .Ldec_loop_compact
+
+.align 16
+.Ldec_loop_compact:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+ leaq 16(%r15),%r15
+ movzbl %al,%r10d
+ movzbl %bl,%r11d
+ movzbl %cl,%r12d
+ movzbl (%r14,%r10,1),%r10d
+ movzbl (%r14,%r11,1),%r11d
+ movzbl (%r14,%r12,1),%r12d
+
+ movzbl %dl,%r8d
+ movzbl %dh,%esi
+ movzbl %ah,%edi
+ movzbl (%r14,%r8,1),%r8d
+ movzbl (%r14,%rsi,1),%r9d
+ movzbl (%r14,%rdi,1),%r13d
+
+ movzbl %bh,%ebp
+ movzbl %ch,%esi
+ shrl $16,%ecx
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ shrl $16,%edx
+
+ movzbl %cl,%edi
+ shll $8,%r9d
+ shll $8,%r13d
+ movzbl (%r14,%rdi,1),%edi
+ xorl %r9d,%r10d
+ xorl %r13d,%r11d
+
+ movzbl %dl,%r9d
+ shrl $16,%eax
+ shrl $16,%ebx
+ movzbl %al,%r13d
+ shll $8,%ebp
+ shll $8,%esi
+ movzbl (%r14,%r9,1),%r9d
+ movzbl (%r14,%r13,1),%r13d
+ xorl %ebp,%r12d
+ xorl %esi,%r8d
+
+ movzbl %bl,%ebp
+ movzbl %bh,%esi
+ shll $16,%edi
+ movzbl (%r14,%rbp,1),%ebp
+ movzbl (%r14,%rsi,1),%esi
+ xorl %edi,%r10d
+
+ movzbl %ch,%edi
+ shll $16,%r9d
+ shll $16,%r13d
+ movzbl (%r14,%rdi,1),%ebx
+ xorl %r9d,%r11d
+ xorl %r13d,%r12d
+
+ movzbl %dh,%edi
+ shrl $8,%eax
+ shll $16,%ebp
+ movzbl (%r14,%rdi,1),%ecx
+ movzbl (%r14,%rax,1),%edx
+ xorl %ebp,%r8d
+
+ shll $24,%esi
+ shll $24,%ebx
+ shll $24,%ecx
+ xorl %esi,%r10d
+ shll $24,%edx
+ xorl %r11d,%ebx
+ movl %r10d,%eax
+ xorl %r12d,%ecx
+ xorl %r8d,%edx
+ cmpq 16(%rsp),%r15
+ je .Ldec_compact_done
+
+ movq 256+0(%r14),%rsi
+ shlq $32,%rbx
+ shlq $32,%rdx
+ movq 256+8(%r14),%rdi
+ orq %rbx,%rax
+ orq %rdx,%rcx
+ movq 256+16(%r14),%rbp
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq 0(%r14),%rsi
+ shrq $32,%r8
+ shrq $32,%r11
+ movq 64(%r14),%rdi
+ roll $16,%r9d
+ roll $16,%r12d
+ movq 128(%r14),%rbp
+ roll $16,%r8d
+ roll $16,%r11d
+ movq 192(%r14),%r10
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+ movq 256(%r14),%r13
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ jmp .Ldec_loop_compact
+.align 16
+.Ldec_compact_done:
+ xorl 0(%r15),%eax
+ xorl 4(%r15),%ebx
+ xorl 8(%r15),%ecx
+ xorl 12(%r15),%edx
+.byte 0xf3,0xc3
+.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
+.globl AES_decrypt
+.type AES_decrypt,@function
+.align 16
+.globl asm_AES_decrypt
+.hidden asm_AES_decrypt
+asm_AES_decrypt:
+AES_decrypt:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r10
+ leaq -63(%rdx),%rcx
+ andq $-64,%rsp
+ subq %rsp,%rcx
+ negq %rcx
+ andq $960,%rcx
+ subq %rcx,%rsp
+ subq $32,%rsp
+
+ movq %rsi,16(%rsp)
+ movq %r10,24(%rsp)
+.Ldec_prologue:
+
+ movq %rdx,%r15
+ movl 240(%r15),%r13d
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+
+ shll $4,%r13d
+ leaq (%r15,%r13,1),%rbp
+ movq %r15,(%rsp)
+ movq %rbp,8(%rsp)
+
+
+ leaq .LAES_Td+2048(%rip),%r14
+ leaq 768(%rsp),%rbp
+ subq %r14,%rbp
+ andq $768,%rbp
+ leaq (%r14,%rbp,1),%r14
+ shrq $3,%rbp
+ addq %rbp,%r14
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 16(%rsp),%r9
+ movq 24(%rsp),%rsi
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Ldec_epilogue:
+ .byte 0xf3,0xc3
+.size AES_decrypt,.-AES_decrypt
+.globl private_AES_set_encrypt_key
+.type private_AES_set_encrypt_key,@function
+.align 16
+private_AES_set_encrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $8,%rsp
+.Lenc_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Lenc_key_epilogue:
+ .byte 0xf3,0xc3
+.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
+
+.type _x86_64_AES_set_encrypt_key,@function
+.align 16
+_x86_64_AES_set_encrypt_key:
+ movl %esi,%ecx
+ movq %rdi,%rsi
+ movq %rdx,%rdi
+
+ testq $-1,%rsi
+ jz .Lbadpointer
+ testq $-1,%rdi
+ jz .Lbadpointer
+
+ leaq .LAES_Te(%rip),%rbp
+ leaq 2048+128(%rbp),%rbp
+
+
+ movl 0-128(%rbp),%eax
+ movl 32-128(%rbp),%ebx
+ movl 64-128(%rbp),%r8d
+ movl 96-128(%rbp),%edx
+ movl 128-128(%rbp),%eax
+ movl 160-128(%rbp),%ebx
+ movl 192-128(%rbp),%r8d
+ movl 224-128(%rbp),%edx
+
+ cmpl $128,%ecx
+ je .L10rounds
+ cmpl $192,%ecx
+ je .L12rounds
+ cmpl $256,%ecx
+ je .L14rounds
+ movq $-2,%rax
+ jmp .Lexit
+
+.L10rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rdx,8(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L10shortcut
+.align 4
+.L10loop:
+ movl 0(%rdi),%eax
+ movl 12(%rdi),%edx
+.L10shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,16(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,20(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,24(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,28(%rdi)
+ addl $1,%ecx
+ leaq 16(%rdi),%rdi
+ cmpl $10,%ecx
+ jl .L10loop
+
+ movl $10,80(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L12rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rdx,16(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L12shortcut
+.align 4
+.L12loop:
+ movl 0(%rdi),%eax
+ movl 20(%rdi),%edx
+.L12shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,24(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,28(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,32(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,36(%rdi)
+
+ cmpl $7,%ecx
+ je .L12break
+ addl $1,%ecx
+
+ xorl 16(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ leaq 24(%rdi),%rdi
+ jmp .L12loop
+.L12break:
+ movl $12,72(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.L14rounds:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rcx
+ movq 24(%rsi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+
+ shrq $32,%rdx
+ xorl %ecx,%ecx
+ jmp .L14shortcut
+.align 4
+.L14loop:
+ movl 0(%rdi),%eax
+ movl 28(%rdi),%edx
+.L14shortcut:
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $8,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ xorl 1024-128(%rbp,%rcx,4),%eax
+ movl %eax,32(%rdi)
+ xorl 4(%rdi),%eax
+ movl %eax,36(%rdi)
+ xorl 8(%rdi),%eax
+ movl %eax,40(%rdi)
+ xorl 12(%rdi),%eax
+ movl %eax,44(%rdi)
+
+ cmpl $6,%ecx
+ je .L14break
+ addl $1,%ecx
+
+ movl %eax,%edx
+ movl 16(%rdi),%eax
+ movzbl %dl,%esi
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shrl $16,%edx
+ shll $8,%ebx
+ movzbl %dl,%esi
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ movzbl %dh,%esi
+ shll $16,%ebx
+ xorl %ebx,%eax
+
+ movzbl -128(%rbp,%rsi,1),%ebx
+ shll $24,%ebx
+ xorl %ebx,%eax
+
+ movl %eax,48(%rdi)
+ xorl 20(%rdi),%eax
+ movl %eax,52(%rdi)
+ xorl 24(%rdi),%eax
+ movl %eax,56(%rdi)
+ xorl 28(%rdi),%eax
+ movl %eax,60(%rdi)
+
+ leaq 32(%rdi),%rdi
+ jmp .L14loop
+.L14break:
+ movl $14,48(%rdi)
+ xorq %rax,%rax
+ jmp .Lexit
+
+.Lbadpointer:
+ movq $-1,%rax
+.Lexit:
+.byte 0xf3,0xc3
+.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
+.globl private_AES_set_decrypt_key
+.type private_AES_set_decrypt_key,@function
+.align 16
+private_AES_set_decrypt_key:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rdx
+.Ldec_key_prologue:
+
+ call _x86_64_AES_set_encrypt_key
+ movq (%rsp),%r8
+ cmpl $0,%eax
+ jne .Labort
+
+ movl 240(%r8),%r14d
+ xorq %rdi,%rdi
+ leaq (%rdi,%r14,4),%rcx
+ movq %r8,%rsi
+ leaq (%r8,%rcx,4),%rdi
+.align 4
+.Linvert:
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 0(%rdi),%rcx
+ movq 8(%rdi),%rdx
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,0(%rsi)
+ movq %rdx,8(%rsi)
+ leaq 16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+ cmpq %rsi,%rdi
+ jne .Linvert
+
+ leaq .LAES_Te+2048+1024(%rip),%rax
+
+ movq 40(%rax),%rsi
+ movq 48(%rax),%rdi
+ movq 56(%rax),%rbp
+
+ movq %r8,%r15
+ subl $1,%r14d
+.align 4
+.Lpermute:
+ leaq 16(%r15),%r15
+ movq 0(%r15),%rax
+ movq 8(%r15),%rcx
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+ shrq $7,%r9
+ leaq (%rax,%rax,1),%r8
+ shrq $7,%r12
+ leaq (%rcx,%rcx,1),%r11
+ subq %r9,%rbx
+ subq %r12,%rdx
+ andq %rdi,%r8
+ andq %rdi,%r11
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r8,%rbx
+ xorq %r11,%rdx
+ movq %rbx,%r8
+ movq %rdx,%r11
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ leaq (%r8,%r8,1),%r9
+ shrq $7,%r13
+ leaq (%r11,%r11,1),%r12
+ subq %r10,%rbx
+ subq %r13,%rdx
+ andq %rdi,%r9
+ andq %rdi,%r12
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %r9,%rbx
+ xorq %r12,%rdx
+ movq %rbx,%r9
+ movq %rdx,%r12
+
+ andq %rsi,%rbx
+ andq %rsi,%rdx
+ movq %rbx,%r10
+ movq %rdx,%r13
+ shrq $7,%r10
+ xorq %rax,%r8
+ shrq $7,%r13
+ xorq %rcx,%r11
+ subq %r10,%rbx
+ subq %r13,%rdx
+ leaq (%r9,%r9,1),%r10
+ leaq (%r12,%r12,1),%r13
+ xorq %rax,%r9
+ xorq %rcx,%r12
+ andq %rdi,%r10
+ andq %rdi,%r13
+ andq %rbp,%rbx
+ andq %rbp,%rdx
+ xorq %rbx,%r10
+ xorq %rdx,%r13
+
+ xorq %r10,%rax
+ xorq %r13,%rcx
+ xorq %r10,%r8
+ xorq %r13,%r11
+ movq %rax,%rbx
+ movq %rcx,%rdx
+ xorq %r10,%r9
+ xorq %r13,%r12
+ shrq $32,%rbx
+ shrq $32,%rdx
+ xorq %r8,%r10
+ xorq %r11,%r13
+ roll $8,%eax
+ roll $8,%ecx
+ xorq %r9,%r10
+ xorq %r12,%r13
+
+ roll $8,%ebx
+ roll $8,%edx
+ xorl %r10d,%eax
+ xorl %r13d,%ecx
+ shrq $32,%r10
+ shrq $32,%r13
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+ movq %r8,%r10
+ movq %r11,%r13
+ shrq $32,%r10
+ shrq $32,%r13
+ roll $24,%r8d
+ roll $24,%r11d
+ roll $24,%r10d
+ roll $24,%r13d
+ xorl %r8d,%eax
+ xorl %r11d,%ecx
+ movq %r9,%r8
+ movq %r12,%r11
+ xorl %r10d,%ebx
+ xorl %r13d,%edx
+
+
+ shrq $32,%r8
+ shrq $32,%r11
+
+ roll $16,%r9d
+ roll $16,%r12d
+
+ roll $16,%r8d
+ roll $16,%r11d
+
+ xorl %r9d,%eax
+ xorl %r12d,%ecx
+
+ xorl %r8d,%ebx
+ xorl %r11d,%edx
+ movl %eax,0(%r15)
+ movl %ebx,4(%r15)
+ movl %ecx,8(%r15)
+ movl %edx,12(%r15)
+ subl $1,%r14d
+ jnz .Lpermute
+
+ xorq %rax,%rax
+.Labort:
+ movq 8(%rsp),%r15
+ movq 16(%rsp),%r14
+ movq 24(%rsp),%r13
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%rbp
+ movq 48(%rsp),%rbx
+ addq $56,%rsp
+.Ldec_key_epilogue:
+ .byte 0xf3,0xc3
+.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+.globl AES_cbc_encrypt
+.type AES_cbc_encrypt,@function
+.align 16
+
+.globl asm_AES_cbc_encrypt
+.hidden asm_AES_cbc_encrypt
+asm_AES_cbc_encrypt:
+AES_cbc_encrypt:
+ cmpq $0,%rdx
+ je .Lcbc_epilogue
+ pushfq
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Lcbc_prologue:
+
+ cld
+ movl %r9d,%r9d
+
+ leaq .LAES_Te(%rip),%r14
+ cmpq $0,%r9
+ jne .Lcbc_picked_te
+ leaq .LAES_Td(%rip),%r14
+.Lcbc_picked_te:
+
+ movl OPENSSL_ia32cap_P(%rip),%r10d
+ cmpq $512,%rdx
+ jb .Lcbc_slow_prologue
+ testq $15,%rdx
+ jnz .Lcbc_slow_prologue
+ btl $28,%r10d
+ jc .Lcbc_slow_prologue
+
+
+ leaq -88-248(%rsp),%r15
+ andq $-64,%r15
+
+
+ movq %r14,%r10
+ leaq 2304(%r14),%r11
+ movq %r15,%r12
+ andq $4095,%r10
+ andq $4095,%r11
+ andq $4095,%r12
+
+ cmpq %r11,%r12
+ jb .Lcbc_te_break_out
+ subq %r11,%r12
+ subq %r12,%r15
+ jmp .Lcbc_te_ok
+.Lcbc_te_break_out:
+ subq %r10,%r12
+ andq $4095,%r12
+ addq $320,%r12
+ subq %r12,%r15
+.align 4
+.Lcbc_te_ok:
+
+ xchgq %rsp,%r15
+
+ movq %r15,16(%rsp)
+.Lcbc_fast_body:
+ movq %rdi,24(%rsp)
+ movq %rsi,32(%rsp)
+ movq %rdx,40(%rsp)
+ movq %rcx,48(%rsp)
+ movq %r8,56(%rsp)
+ movl $0,80+240(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+
+ movl 240(%r15),%eax
+
+ movq %r15,%r10
+ subq %r14,%r10
+ andq $4095,%r10
+ cmpq $2304,%r10
+ jb .Lcbc_do_ecopy
+ cmpq $4096-248,%r10
+ jb .Lcbc_skip_ecopy
+.align 4
+.Lcbc_do_ecopy:
+ movq %r15,%rsi
+ leaq 80(%rsp),%rdi
+ leaq 80(%rsp),%r15
+ movl $30,%ecx
+.long 0x90A548F3
+ movl %eax,(%rdi)
+.Lcbc_skip_ecopy:
+ movq %r15,0(%rsp)
+
+ movl $18,%ecx
+.align 4
+.Lcbc_prefetch_te:
+ movq 0(%r14),%r10
+ movq 32(%r14),%r11
+ movq 64(%r14),%r12
+ movq 96(%r14),%r13
+ leaq 128(%r14),%r14
+ subl $1,%ecx
+ jnz .Lcbc_prefetch_te
+ leaq -2304(%r14),%r14
+
+ cmpq $0,%rbx
+ je .LFAST_DECRYPT
+
+
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+
+.align 4
+.Lcbc_fast_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_encrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ movq %r10,40(%rsp)
+ jnz .Lcbc_fast_enc_loop
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_fast_cleanup
+
+
+.align 16
+.LFAST_DECRYPT:
+ cmpq %r8,%r9
+ je .Lcbc_fast_dec_in_place
+
+ movq %rbp,64(%rsp)
+.align 4
+.Lcbc_fast_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 64(%rsp),%rbp
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0(%rbp),%eax
+ xorl 4(%rbp),%ebx
+ xorl 8(%rbp),%ecx
+ xorl 12(%rbp),%edx
+ movq %r8,%rbp
+
+ subq $16,%r10
+ movq %r10,40(%rsp)
+ movq %rbp,64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jnz .Lcbc_fast_dec_loop
+ movq 56(%rsp),%r12
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0(%r12)
+ movq %r11,8(%r12)
+ jmp .Lcbc_fast_cleanup
+
+.align 16
+.Lcbc_fast_dec_in_place:
+ movq 0(%rbp),%r10
+ movq 8(%rbp),%r11
+ movq %r10,0+64(%rsp)
+ movq %r11,8+64(%rsp)
+.align 4
+.Lcbc_fast_dec_in_place_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+
+ call _x86_64_AES_decrypt
+
+ movq 24(%rsp),%r8
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jz .Lcbc_fast_dec_in_place_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ movq %r10,40(%rsp)
+ jmp .Lcbc_fast_dec_in_place_loop
+.Lcbc_fast_dec_in_place_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+.align 4
+.Lcbc_fast_cleanup:
+ cmpl $0,80+240(%rsp)
+ leaq 80(%rsp),%rdi
+ je .Lcbc_exit
+ movl $30,%ecx
+ xorq %rax,%rax
+.long 0x90AB48F3
+
+ jmp .Lcbc_exit
+
+
+.align 16
+.Lcbc_slow_prologue:
+
+ leaq -88(%rsp),%rbp
+ andq $-64,%rbp
+
+ leaq -88-63(%rcx),%r10
+ subq %rbp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rbp
+
+ xchgq %rsp,%rbp
+
+ movq %rbp,16(%rsp)
+.Lcbc_slow_body:
+
+
+
+
+ movq %r8,56(%rsp)
+ movq %r8,%rbp
+ movq %r9,%rbx
+ movq %rsi,%r9
+ movq %rdi,%r8
+ movq %rcx,%r15
+ movq %rdx,%r10
+
+ movl 240(%r15),%eax
+ movq %r15,0(%rsp)
+ shll $4,%eax
+ leaq (%r15,%rax,1),%rax
+ movq %rax,8(%rsp)
+
+
+ leaq 2048(%r14),%r14
+ leaq 768-8(%rsp),%rax
+ subq %r14,%rax
+ andq $768,%rax
+ leaq (%r14,%rax,1),%r14
+
+ cmpq $0,%rbx
+ je .LSLOW_DECRYPT
+
+
+ testq $-16,%r10
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+ jz .Lcbc_slow_enc_tail
+
+.align 4
+.Lcbc_slow_enc_loop:
+ xorl 0(%r8),%eax
+ xorl 4(%r8),%ebx
+ xorl 8(%r8),%ecx
+ xorl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_encrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ subq $16,%r10
+ testq $-16,%r10
+ jnz .Lcbc_slow_enc_loop
+ testq $15,%r10
+ jnz .Lcbc_slow_enc_tail
+ movq 56(%rsp),%rbp
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ jmp .Lcbc_exit
+
+.align 4
+.Lcbc_slow_enc_tail:
+ movq %rax,%r11
+ movq %rcx,%r12
+ movq %r10,%rcx
+ movq %r8,%rsi
+ movq %r9,%rdi
+.long 0x9066A4F3
+ movq $16,%rcx
+ subq %r10,%rcx
+ xorq %rax,%rax
+.long 0x9066AAF3
+ movq %r9,%r8
+ movq $16,%r10
+ movq %r11,%rax
+ movq %r12,%rcx
+ jmp .Lcbc_slow_enc_loop
+
+.align 16
+.LSLOW_DECRYPT:
+ shrq $3,%rax
+ addq %rax,%r14
+
+ movq 0(%rbp),%r11
+ movq 8(%rbp),%r12
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+.align 4
+.Lcbc_slow_dec_loop:
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movq 0(%rsp),%r15
+ movq %r8,24(%rsp)
+ movq %r9,32(%rsp)
+ movq %r10,40(%rsp)
+
+ call _x86_64_AES_decrypt_compact
+
+ movq 24(%rsp),%r8
+ movq 32(%rsp),%r9
+ movq 40(%rsp),%r10
+ xorl 0+64(%rsp),%eax
+ xorl 4+64(%rsp),%ebx
+ xorl 8+64(%rsp),%ecx
+ xorl 12+64(%rsp),%edx
+
+ movq 0(%r8),%r11
+ movq 8(%r8),%r12
+ subq $16,%r10
+ jc .Lcbc_slow_dec_partial
+ jz .Lcbc_slow_dec_done
+
+ movq %r11,0+64(%rsp)
+ movq %r12,8+64(%rsp)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ leaq 16(%r8),%r8
+ leaq 16(%r9),%r9
+ jmp .Lcbc_slow_dec_loop
+.Lcbc_slow_dec_done:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0(%r9)
+ movl %ebx,4(%r9)
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+
+ jmp .Lcbc_exit
+
+.align 4
+.Lcbc_slow_dec_partial:
+ movq 56(%rsp),%rdi
+ movq %r11,0(%rdi)
+ movq %r12,8(%rdi)
+
+ movl %eax,0+64(%rsp)
+ movl %ebx,4+64(%rsp)
+ movl %ecx,8+64(%rsp)
+ movl %edx,12+64(%rsp)
+
+ movq %r9,%rdi
+ leaq 64(%rsp),%rsi
+ leaq 16(%r10),%rcx
+.long 0x9066A4F3
+ jmp .Lcbc_exit
+
+.align 16
+.Lcbc_exit:
+ movq 16(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lcbc_popfq:
+ popfq
+.Lcbc_epilogue:
+ .byte 0xf3,0xc3
+.size AES_cbc_encrypt,.-AES_cbc_encrypt
+.align 64
+.LAES_Te:
+.long 0xa56363c6,0xa56363c6
+.long 0x847c7cf8,0x847c7cf8
+.long 0x997777ee,0x997777ee
+.long 0x8d7b7bf6,0x8d7b7bf6
+.long 0x0df2f2ff,0x0df2f2ff
+.long 0xbd6b6bd6,0xbd6b6bd6
+.long 0xb16f6fde,0xb16f6fde
+.long 0x54c5c591,0x54c5c591
+.long 0x50303060,0x50303060
+.long 0x03010102,0x03010102
+.long 0xa96767ce,0xa96767ce
+.long 0x7d2b2b56,0x7d2b2b56
+.long 0x19fefee7,0x19fefee7
+.long 0x62d7d7b5,0x62d7d7b5
+.long 0xe6abab4d,0xe6abab4d
+.long 0x9a7676ec,0x9a7676ec
+.long 0x45caca8f,0x45caca8f
+.long 0x9d82821f,0x9d82821f
+.long 0x40c9c989,0x40c9c989
+.long 0x877d7dfa,0x877d7dfa
+.long 0x15fafaef,0x15fafaef
+.long 0xeb5959b2,0xeb5959b2
+.long 0xc947478e,0xc947478e
+.long 0x0bf0f0fb,0x0bf0f0fb
+.long 0xecadad41,0xecadad41
+.long 0x67d4d4b3,0x67d4d4b3
+.long 0xfda2a25f,0xfda2a25f
+.long 0xeaafaf45,0xeaafaf45
+.long 0xbf9c9c23,0xbf9c9c23
+.long 0xf7a4a453,0xf7a4a453
+.long 0x967272e4,0x967272e4
+.long 0x5bc0c09b,0x5bc0c09b
+.long 0xc2b7b775,0xc2b7b775
+.long 0x1cfdfde1,0x1cfdfde1
+.long 0xae93933d,0xae93933d
+.long 0x6a26264c,0x6a26264c
+.long 0x5a36366c,0x5a36366c
+.long 0x413f3f7e,0x413f3f7e
+.long 0x02f7f7f5,0x02f7f7f5
+.long 0x4fcccc83,0x4fcccc83
+.long 0x5c343468,0x5c343468
+.long 0xf4a5a551,0xf4a5a551
+.long 0x34e5e5d1,0x34e5e5d1
+.long 0x08f1f1f9,0x08f1f1f9
+.long 0x937171e2,0x937171e2
+.long 0x73d8d8ab,0x73d8d8ab
+.long 0x53313162,0x53313162
+.long 0x3f15152a,0x3f15152a
+.long 0x0c040408,0x0c040408
+.long 0x52c7c795,0x52c7c795
+.long 0x65232346,0x65232346
+.long 0x5ec3c39d,0x5ec3c39d
+.long 0x28181830,0x28181830
+.long 0xa1969637,0xa1969637
+.long 0x0f05050a,0x0f05050a
+.long 0xb59a9a2f,0xb59a9a2f
+.long 0x0907070e,0x0907070e
+.long 0x36121224,0x36121224
+.long 0x9b80801b,0x9b80801b
+.long 0x3de2e2df,0x3de2e2df
+.long 0x26ebebcd,0x26ebebcd
+.long 0x6927274e,0x6927274e
+.long 0xcdb2b27f,0xcdb2b27f
+.long 0x9f7575ea,0x9f7575ea
+.long 0x1b090912,0x1b090912
+.long 0x9e83831d,0x9e83831d
+.long 0x742c2c58,0x742c2c58
+.long 0x2e1a1a34,0x2e1a1a34
+.long 0x2d1b1b36,0x2d1b1b36
+.long 0xb26e6edc,0xb26e6edc
+.long 0xee5a5ab4,0xee5a5ab4
+.long 0xfba0a05b,0xfba0a05b
+.long 0xf65252a4,0xf65252a4
+.long 0x4d3b3b76,0x4d3b3b76
+.long 0x61d6d6b7,0x61d6d6b7
+.long 0xceb3b37d,0xceb3b37d
+.long 0x7b292952,0x7b292952
+.long 0x3ee3e3dd,0x3ee3e3dd
+.long 0x712f2f5e,0x712f2f5e
+.long 0x97848413,0x97848413
+.long 0xf55353a6,0xf55353a6
+.long 0x68d1d1b9,0x68d1d1b9
+.long 0x00000000,0x00000000
+.long 0x2cededc1,0x2cededc1
+.long 0x60202040,0x60202040
+.long 0x1ffcfce3,0x1ffcfce3
+.long 0xc8b1b179,0xc8b1b179
+.long 0xed5b5bb6,0xed5b5bb6
+.long 0xbe6a6ad4,0xbe6a6ad4
+.long 0x46cbcb8d,0x46cbcb8d
+.long 0xd9bebe67,0xd9bebe67
+.long 0x4b393972,0x4b393972
+.long 0xde4a4a94,0xde4a4a94
+.long 0xd44c4c98,0xd44c4c98
+.long 0xe85858b0,0xe85858b0
+.long 0x4acfcf85,0x4acfcf85
+.long 0x6bd0d0bb,0x6bd0d0bb
+.long 0x2aefefc5,0x2aefefc5
+.long 0xe5aaaa4f,0xe5aaaa4f
+.long 0x16fbfbed,0x16fbfbed
+.long 0xc5434386,0xc5434386
+.long 0xd74d4d9a,0xd74d4d9a
+.long 0x55333366,0x55333366
+.long 0x94858511,0x94858511
+.long 0xcf45458a,0xcf45458a
+.long 0x10f9f9e9,0x10f9f9e9
+.long 0x06020204,0x06020204
+.long 0x817f7ffe,0x817f7ffe
+.long 0xf05050a0,0xf05050a0
+.long 0x443c3c78,0x443c3c78
+.long 0xba9f9f25,0xba9f9f25
+.long 0xe3a8a84b,0xe3a8a84b
+.long 0xf35151a2,0xf35151a2
+.long 0xfea3a35d,0xfea3a35d
+.long 0xc0404080,0xc0404080
+.long 0x8a8f8f05,0x8a8f8f05
+.long 0xad92923f,0xad92923f
+.long 0xbc9d9d21,0xbc9d9d21
+.long 0x48383870,0x48383870
+.long 0x04f5f5f1,0x04f5f5f1
+.long 0xdfbcbc63,0xdfbcbc63
+.long 0xc1b6b677,0xc1b6b677
+.long 0x75dadaaf,0x75dadaaf
+.long 0x63212142,0x63212142
+.long 0x30101020,0x30101020
+.long 0x1affffe5,0x1affffe5
+.long 0x0ef3f3fd,0x0ef3f3fd
+.long 0x6dd2d2bf,0x6dd2d2bf
+.long 0x4ccdcd81,0x4ccdcd81
+.long 0x140c0c18,0x140c0c18
+.long 0x35131326,0x35131326
+.long 0x2fececc3,0x2fececc3
+.long 0xe15f5fbe,0xe15f5fbe
+.long 0xa2979735,0xa2979735
+.long 0xcc444488,0xcc444488
+.long 0x3917172e,0x3917172e
+.long 0x57c4c493,0x57c4c493
+.long 0xf2a7a755,0xf2a7a755
+.long 0x827e7efc,0x827e7efc
+.long 0x473d3d7a,0x473d3d7a
+.long 0xac6464c8,0xac6464c8
+.long 0xe75d5dba,0xe75d5dba
+.long 0x2b191932,0x2b191932
+.long 0x957373e6,0x957373e6
+.long 0xa06060c0,0xa06060c0
+.long 0x98818119,0x98818119
+.long 0xd14f4f9e,0xd14f4f9e
+.long 0x7fdcdca3,0x7fdcdca3
+.long 0x66222244,0x66222244
+.long 0x7e2a2a54,0x7e2a2a54
+.long 0xab90903b,0xab90903b
+.long 0x8388880b,0x8388880b
+.long 0xca46468c,0xca46468c
+.long 0x29eeeec7,0x29eeeec7
+.long 0xd3b8b86b,0xd3b8b86b
+.long 0x3c141428,0x3c141428
+.long 0x79dedea7,0x79dedea7
+.long 0xe25e5ebc,0xe25e5ebc
+.long 0x1d0b0b16,0x1d0b0b16
+.long 0x76dbdbad,0x76dbdbad
+.long 0x3be0e0db,0x3be0e0db
+.long 0x56323264,0x56323264
+.long 0x4e3a3a74,0x4e3a3a74
+.long 0x1e0a0a14,0x1e0a0a14
+.long 0xdb494992,0xdb494992
+.long 0x0a06060c,0x0a06060c
+.long 0x6c242448,0x6c242448
+.long 0xe45c5cb8,0xe45c5cb8
+.long 0x5dc2c29f,0x5dc2c29f
+.long 0x6ed3d3bd,0x6ed3d3bd
+.long 0xefacac43,0xefacac43
+.long 0xa66262c4,0xa66262c4
+.long 0xa8919139,0xa8919139
+.long 0xa4959531,0xa4959531
+.long 0x37e4e4d3,0x37e4e4d3
+.long 0x8b7979f2,0x8b7979f2
+.long 0x32e7e7d5,0x32e7e7d5
+.long 0x43c8c88b,0x43c8c88b
+.long 0x5937376e,0x5937376e
+.long 0xb76d6dda,0xb76d6dda
+.long 0x8c8d8d01,0x8c8d8d01
+.long 0x64d5d5b1,0x64d5d5b1
+.long 0xd24e4e9c,0xd24e4e9c
+.long 0xe0a9a949,0xe0a9a949
+.long 0xb46c6cd8,0xb46c6cd8
+.long 0xfa5656ac,0xfa5656ac
+.long 0x07f4f4f3,0x07f4f4f3
+.long 0x25eaeacf,0x25eaeacf
+.long 0xaf6565ca,0xaf6565ca
+.long 0x8e7a7af4,0x8e7a7af4
+.long 0xe9aeae47,0xe9aeae47
+.long 0x18080810,0x18080810
+.long 0xd5baba6f,0xd5baba6f
+.long 0x887878f0,0x887878f0
+.long 0x6f25254a,0x6f25254a
+.long 0x722e2e5c,0x722e2e5c
+.long 0x241c1c38,0x241c1c38
+.long 0xf1a6a657,0xf1a6a657
+.long 0xc7b4b473,0xc7b4b473
+.long 0x51c6c697,0x51c6c697
+.long 0x23e8e8cb,0x23e8e8cb
+.long 0x7cdddda1,0x7cdddda1
+.long 0x9c7474e8,0x9c7474e8
+.long 0x211f1f3e,0x211f1f3e
+.long 0xdd4b4b96,0xdd4b4b96
+.long 0xdcbdbd61,0xdcbdbd61
+.long 0x868b8b0d,0x868b8b0d
+.long 0x858a8a0f,0x858a8a0f
+.long 0x907070e0,0x907070e0
+.long 0x423e3e7c,0x423e3e7c
+.long 0xc4b5b571,0xc4b5b571
+.long 0xaa6666cc,0xaa6666cc
+.long 0xd8484890,0xd8484890
+.long 0x05030306,0x05030306
+.long 0x01f6f6f7,0x01f6f6f7
+.long 0x120e0e1c,0x120e0e1c
+.long 0xa36161c2,0xa36161c2
+.long 0x5f35356a,0x5f35356a
+.long 0xf95757ae,0xf95757ae
+.long 0xd0b9b969,0xd0b9b969
+.long 0x91868617,0x91868617
+.long 0x58c1c199,0x58c1c199
+.long 0x271d1d3a,0x271d1d3a
+.long 0xb99e9e27,0xb99e9e27
+.long 0x38e1e1d9,0x38e1e1d9
+.long 0x13f8f8eb,0x13f8f8eb
+.long 0xb398982b,0xb398982b
+.long 0x33111122,0x33111122
+.long 0xbb6969d2,0xbb6969d2
+.long 0x70d9d9a9,0x70d9d9a9
+.long 0x898e8e07,0x898e8e07
+.long 0xa7949433,0xa7949433
+.long 0xb69b9b2d,0xb69b9b2d
+.long 0x221e1e3c,0x221e1e3c
+.long 0x92878715,0x92878715
+.long 0x20e9e9c9,0x20e9e9c9
+.long 0x49cece87,0x49cece87
+.long 0xff5555aa,0xff5555aa
+.long 0x78282850,0x78282850
+.long 0x7adfdfa5,0x7adfdfa5
+.long 0x8f8c8c03,0x8f8c8c03
+.long 0xf8a1a159,0xf8a1a159
+.long 0x80898909,0x80898909
+.long 0x170d0d1a,0x170d0d1a
+.long 0xdabfbf65,0xdabfbf65
+.long 0x31e6e6d7,0x31e6e6d7
+.long 0xc6424284,0xc6424284
+.long 0xb86868d0,0xb86868d0
+.long 0xc3414182,0xc3414182
+.long 0xb0999929,0xb0999929
+.long 0x772d2d5a,0x772d2d5a
+.long 0x110f0f1e,0x110f0f1e
+.long 0xcbb0b07b,0xcbb0b07b
+.long 0xfc5454a8,0xfc5454a8
+.long 0xd6bbbb6d,0xd6bbbb6d
+.long 0x3a16162c,0x3a16162c
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5
+.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76
+.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0
+.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0
+.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc
+.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15
+.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a
+.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75
+.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0
+.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84
+.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b
+.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf
+.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85
+.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8
+.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5
+.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2
+.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17
+.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73
+.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88
+.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb
+.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c
+.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79
+.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9
+.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08
+.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6
+.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a
+.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e
+.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e
+.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94
+.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf
+.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68
+.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
+.long 0x00000001, 0x00000002, 0x00000004, 0x00000008
+.long 0x00000010, 0x00000020, 0x00000040, 0x00000080
+.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
+.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
+.align 64
+.LAES_Td:
+.long 0x50a7f451,0x50a7f451
+.long 0x5365417e,0x5365417e
+.long 0xc3a4171a,0xc3a4171a
+.long 0x965e273a,0x965e273a
+.long 0xcb6bab3b,0xcb6bab3b
+.long 0xf1459d1f,0xf1459d1f
+.long 0xab58faac,0xab58faac
+.long 0x9303e34b,0x9303e34b
+.long 0x55fa3020,0x55fa3020
+.long 0xf66d76ad,0xf66d76ad
+.long 0x9176cc88,0x9176cc88
+.long 0x254c02f5,0x254c02f5
+.long 0xfcd7e54f,0xfcd7e54f
+.long 0xd7cb2ac5,0xd7cb2ac5
+.long 0x80443526,0x80443526
+.long 0x8fa362b5,0x8fa362b5
+.long 0x495ab1de,0x495ab1de
+.long 0x671bba25,0x671bba25
+.long 0x980eea45,0x980eea45
+.long 0xe1c0fe5d,0xe1c0fe5d
+.long 0x02752fc3,0x02752fc3
+.long 0x12f04c81,0x12f04c81
+.long 0xa397468d,0xa397468d
+.long 0xc6f9d36b,0xc6f9d36b
+.long 0xe75f8f03,0xe75f8f03
+.long 0x959c9215,0x959c9215
+.long 0xeb7a6dbf,0xeb7a6dbf
+.long 0xda595295,0xda595295
+.long 0x2d83bed4,0x2d83bed4
+.long 0xd3217458,0xd3217458
+.long 0x2969e049,0x2969e049
+.long 0x44c8c98e,0x44c8c98e
+.long 0x6a89c275,0x6a89c275
+.long 0x78798ef4,0x78798ef4
+.long 0x6b3e5899,0x6b3e5899
+.long 0xdd71b927,0xdd71b927
+.long 0xb64fe1be,0xb64fe1be
+.long 0x17ad88f0,0x17ad88f0
+.long 0x66ac20c9,0x66ac20c9
+.long 0xb43ace7d,0xb43ace7d
+.long 0x184adf63,0x184adf63
+.long 0x82311ae5,0x82311ae5
+.long 0x60335197,0x60335197
+.long 0x457f5362,0x457f5362
+.long 0xe07764b1,0xe07764b1
+.long 0x84ae6bbb,0x84ae6bbb
+.long 0x1ca081fe,0x1ca081fe
+.long 0x942b08f9,0x942b08f9
+.long 0x58684870,0x58684870
+.long 0x19fd458f,0x19fd458f
+.long 0x876cde94,0x876cde94
+.long 0xb7f87b52,0xb7f87b52
+.long 0x23d373ab,0x23d373ab
+.long 0xe2024b72,0xe2024b72
+.long 0x578f1fe3,0x578f1fe3
+.long 0x2aab5566,0x2aab5566
+.long 0x0728ebb2,0x0728ebb2
+.long 0x03c2b52f,0x03c2b52f
+.long 0x9a7bc586,0x9a7bc586
+.long 0xa50837d3,0xa50837d3
+.long 0xf2872830,0xf2872830
+.long 0xb2a5bf23,0xb2a5bf23
+.long 0xba6a0302,0xba6a0302
+.long 0x5c8216ed,0x5c8216ed
+.long 0x2b1ccf8a,0x2b1ccf8a
+.long 0x92b479a7,0x92b479a7
+.long 0xf0f207f3,0xf0f207f3
+.long 0xa1e2694e,0xa1e2694e
+.long 0xcdf4da65,0xcdf4da65
+.long 0xd5be0506,0xd5be0506
+.long 0x1f6234d1,0x1f6234d1
+.long 0x8afea6c4,0x8afea6c4
+.long 0x9d532e34,0x9d532e34
+.long 0xa055f3a2,0xa055f3a2
+.long 0x32e18a05,0x32e18a05
+.long 0x75ebf6a4,0x75ebf6a4
+.long 0x39ec830b,0x39ec830b
+.long 0xaaef6040,0xaaef6040
+.long 0x069f715e,0x069f715e
+.long 0x51106ebd,0x51106ebd
+.long 0xf98a213e,0xf98a213e
+.long 0x3d06dd96,0x3d06dd96
+.long 0xae053edd,0xae053edd
+.long 0x46bde64d,0x46bde64d
+.long 0xb58d5491,0xb58d5491
+.long 0x055dc471,0x055dc471
+.long 0x6fd40604,0x6fd40604
+.long 0xff155060,0xff155060
+.long 0x24fb9819,0x24fb9819
+.long 0x97e9bdd6,0x97e9bdd6
+.long 0xcc434089,0xcc434089
+.long 0x779ed967,0x779ed967
+.long 0xbd42e8b0,0xbd42e8b0
+.long 0x888b8907,0x888b8907
+.long 0x385b19e7,0x385b19e7
+.long 0xdbeec879,0xdbeec879
+.long 0x470a7ca1,0x470a7ca1
+.long 0xe90f427c,0xe90f427c
+.long 0xc91e84f8,0xc91e84f8
+.long 0x00000000,0x00000000
+.long 0x83868009,0x83868009
+.long 0x48ed2b32,0x48ed2b32
+.long 0xac70111e,0xac70111e
+.long 0x4e725a6c,0x4e725a6c
+.long 0xfbff0efd,0xfbff0efd
+.long 0x5638850f,0x5638850f
+.long 0x1ed5ae3d,0x1ed5ae3d
+.long 0x27392d36,0x27392d36
+.long 0x64d90f0a,0x64d90f0a
+.long 0x21a65c68,0x21a65c68
+.long 0xd1545b9b,0xd1545b9b
+.long 0x3a2e3624,0x3a2e3624
+.long 0xb1670a0c,0xb1670a0c
+.long 0x0fe75793,0x0fe75793
+.long 0xd296eeb4,0xd296eeb4
+.long 0x9e919b1b,0x9e919b1b
+.long 0x4fc5c080,0x4fc5c080
+.long 0xa220dc61,0xa220dc61
+.long 0x694b775a,0x694b775a
+.long 0x161a121c,0x161a121c
+.long 0x0aba93e2,0x0aba93e2
+.long 0xe52aa0c0,0xe52aa0c0
+.long 0x43e0223c,0x43e0223c
+.long 0x1d171b12,0x1d171b12
+.long 0x0b0d090e,0x0b0d090e
+.long 0xadc78bf2,0xadc78bf2
+.long 0xb9a8b62d,0xb9a8b62d
+.long 0xc8a91e14,0xc8a91e14
+.long 0x8519f157,0x8519f157
+.long 0x4c0775af,0x4c0775af
+.long 0xbbdd99ee,0xbbdd99ee
+.long 0xfd607fa3,0xfd607fa3
+.long 0x9f2601f7,0x9f2601f7
+.long 0xbcf5725c,0xbcf5725c
+.long 0xc53b6644,0xc53b6644
+.long 0x347efb5b,0x347efb5b
+.long 0x7629438b,0x7629438b
+.long 0xdcc623cb,0xdcc623cb
+.long 0x68fcedb6,0x68fcedb6
+.long 0x63f1e4b8,0x63f1e4b8
+.long 0xcadc31d7,0xcadc31d7
+.long 0x10856342,0x10856342
+.long 0x40229713,0x40229713
+.long 0x2011c684,0x2011c684
+.long 0x7d244a85,0x7d244a85
+.long 0xf83dbbd2,0xf83dbbd2
+.long 0x1132f9ae,0x1132f9ae
+.long 0x6da129c7,0x6da129c7
+.long 0x4b2f9e1d,0x4b2f9e1d
+.long 0xf330b2dc,0xf330b2dc
+.long 0xec52860d,0xec52860d
+.long 0xd0e3c177,0xd0e3c177
+.long 0x6c16b32b,0x6c16b32b
+.long 0x99b970a9,0x99b970a9
+.long 0xfa489411,0xfa489411
+.long 0x2264e947,0x2264e947
+.long 0xc48cfca8,0xc48cfca8
+.long 0x1a3ff0a0,0x1a3ff0a0
+.long 0xd82c7d56,0xd82c7d56
+.long 0xef903322,0xef903322
+.long 0xc74e4987,0xc74e4987
+.long 0xc1d138d9,0xc1d138d9
+.long 0xfea2ca8c,0xfea2ca8c
+.long 0x360bd498,0x360bd498
+.long 0xcf81f5a6,0xcf81f5a6
+.long 0x28de7aa5,0x28de7aa5
+.long 0x268eb7da,0x268eb7da
+.long 0xa4bfad3f,0xa4bfad3f
+.long 0xe49d3a2c,0xe49d3a2c
+.long 0x0d927850,0x0d927850
+.long 0x9bcc5f6a,0x9bcc5f6a
+.long 0x62467e54,0x62467e54
+.long 0xc2138df6,0xc2138df6
+.long 0xe8b8d890,0xe8b8d890
+.long 0x5ef7392e,0x5ef7392e
+.long 0xf5afc382,0xf5afc382
+.long 0xbe805d9f,0xbe805d9f
+.long 0x7c93d069,0x7c93d069
+.long 0xa92dd56f,0xa92dd56f
+.long 0xb31225cf,0xb31225cf
+.long 0x3b99acc8,0x3b99acc8
+.long 0xa77d1810,0xa77d1810
+.long 0x6e639ce8,0x6e639ce8
+.long 0x7bbb3bdb,0x7bbb3bdb
+.long 0x097826cd,0x097826cd
+.long 0xf418596e,0xf418596e
+.long 0x01b79aec,0x01b79aec
+.long 0xa89a4f83,0xa89a4f83
+.long 0x656e95e6,0x656e95e6
+.long 0x7ee6ffaa,0x7ee6ffaa
+.long 0x08cfbc21,0x08cfbc21
+.long 0xe6e815ef,0xe6e815ef
+.long 0xd99be7ba,0xd99be7ba
+.long 0xce366f4a,0xce366f4a
+.long 0xd4099fea,0xd4099fea
+.long 0xd67cb029,0xd67cb029
+.long 0xafb2a431,0xafb2a431
+.long 0x31233f2a,0x31233f2a
+.long 0x3094a5c6,0x3094a5c6
+.long 0xc066a235,0xc066a235
+.long 0x37bc4e74,0x37bc4e74
+.long 0xa6ca82fc,0xa6ca82fc
+.long 0xb0d090e0,0xb0d090e0
+.long 0x15d8a733,0x15d8a733
+.long 0x4a9804f1,0x4a9804f1
+.long 0xf7daec41,0xf7daec41
+.long 0x0e50cd7f,0x0e50cd7f
+.long 0x2ff69117,0x2ff69117
+.long 0x8dd64d76,0x8dd64d76
+.long 0x4db0ef43,0x4db0ef43
+.long 0x544daacc,0x544daacc
+.long 0xdf0496e4,0xdf0496e4
+.long 0xe3b5d19e,0xe3b5d19e
+.long 0x1b886a4c,0x1b886a4c
+.long 0xb81f2cc1,0xb81f2cc1
+.long 0x7f516546,0x7f516546
+.long 0x04ea5e9d,0x04ea5e9d
+.long 0x5d358c01,0x5d358c01
+.long 0x737487fa,0x737487fa
+.long 0x2e410bfb,0x2e410bfb
+.long 0x5a1d67b3,0x5a1d67b3
+.long 0x52d2db92,0x52d2db92
+.long 0x335610e9,0x335610e9
+.long 0x1347d66d,0x1347d66d
+.long 0x8c61d79a,0x8c61d79a
+.long 0x7a0ca137,0x7a0ca137
+.long 0x8e14f859,0x8e14f859
+.long 0x893c13eb,0x893c13eb
+.long 0xee27a9ce,0xee27a9ce
+.long 0x35c961b7,0x35c961b7
+.long 0xede51ce1,0xede51ce1
+.long 0x3cb1477a,0x3cb1477a
+.long 0x59dfd29c,0x59dfd29c
+.long 0x3f73f255,0x3f73f255
+.long 0x79ce1418,0x79ce1418
+.long 0xbf37c773,0xbf37c773
+.long 0xeacdf753,0xeacdf753
+.long 0x5baafd5f,0x5baafd5f
+.long 0x146f3ddf,0x146f3ddf
+.long 0x86db4478,0x86db4478
+.long 0x81f3afca,0x81f3afca
+.long 0x3ec468b9,0x3ec468b9
+.long 0x2c342438,0x2c342438
+.long 0x5f40a3c2,0x5f40a3c2
+.long 0x72c31d16,0x72c31d16
+.long 0x0c25e2bc,0x0c25e2bc
+.long 0x8b493c28,0x8b493c28
+.long 0x41950dff,0x41950dff
+.long 0x7101a839,0x7101a839
+.long 0xdeb30c08,0xdeb30c08
+.long 0x9ce4b4d8,0x9ce4b4d8
+.long 0x90c15664,0x90c15664
+.long 0x6184cb7b,0x6184cb7b
+.long 0x70b632d5,0x70b632d5
+.long 0x745c6c48,0x745c6c48
+.long 0x4257b8d0,0x4257b8d0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38
+.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb
+.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87
+.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb
+.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d
+.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e
+.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2
+.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25
+.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16
+.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92
+.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda
+.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84
+.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a
+.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06
+.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02
+.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b
+.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea
+.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73
+.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85
+.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e
+.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89
+.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b
+.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20
+.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4
+.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31
+.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f
+.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d
+.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef
+.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0
+.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61
+.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26
+.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
+.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
+.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S b/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
new file mode 100644
index 000000000000..e9a2053436fe
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
@@ -0,0 +1,1397 @@
+ # $FreeBSD$
+.text
+
+
+.globl aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc,@function
+.align 16
+aesni_cbc_sha1_enc:
+
+ movl OPENSSL_ia32cap_P+0(%rip),%r10d
+ movl OPENSSL_ia32cap_P+4(%rip),%r11d
+ jmp aesni_cbc_sha1_enc_ssse3
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
+.type aesni_cbc_sha1_enc_ssse3,@function
+.align 16
+aesni_cbc_sha1_enc_ssse3:
+ movq 8(%rsp),%r10
+
+
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -104(%rsp),%rsp
+
+
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqu (%r8),%xmm11
+ movq %r8,88(%rsp)
+ shlq $6,%r14
+ subq %r12,%r13
+ movl 240(%r15),%r8d
+ addq %r10,%r14
+
+ leaq K_XX_XX(%rip),%r11
+ movl 0(%r9),%eax
+ movl 4(%r9),%ebx
+ movl 8(%r9),%ecx
+ movl 12(%r9),%edx
+ movl %ebx,%esi
+ movl 16(%r9),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ movups (%r15),%xmm13
+ movups 16(%r15),%xmm14
+ jmp .Loop_ssse3
+.align 16
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ movups 0(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ xorps %xmm12,%xmm11
+.byte 102,69,15,56,220,222
+ movups 32(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+.byte 102,69,15,56,220,223
+ movups 48(%r15),%xmm14
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+.byte 102,69,15,56,220,222
+ movups 64(%r15),%xmm15
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+.byte 102,69,15,56,220,223
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+.byte 102,69,15,56,220,222
+ movups 96(%r15),%xmm15
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+.byte 102,69,15,56,220,223
+ movups 112(%r15),%xmm14
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ cmpl $11,%r8d
+ jb .Laesenclast1
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast1
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast1:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ movups 16(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,0(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+.byte 102,69,15,56,220,222
+ movups 32(%r15),%xmm15
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+.byte 102,69,15,56,220,223
+ movups 48(%r15),%xmm14
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 64(%r15),%xmm15
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+.byte 102,69,15,56,220,223
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast2
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast2
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast2:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ movups 32(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,16(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+.byte 102,69,15,56,220,222
+ movups 32(%r15),%xmm15
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+.byte 102,69,15,56,220,223
+ movups 48(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+.byte 102,69,15,56,220,222
+ movups 64(%r15),%xmm15
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+.byte 102,69,15,56,220,223
+ movups 80(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+.byte 102,69,15,56,220,222
+ movups 96(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+.byte 102,69,15,56,220,223
+ movups 112(%r15),%xmm14
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ cmpl $11,%r8d
+ jb .Laesenclast3
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast3
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast3:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ movups 48(%r12),%xmm12
+ xorps %xmm13,%xmm12
+ movups %xmm11,32(%r13,%r12,1)
+ xorps %xmm12,%xmm11
+.byte 102,69,15,56,220,222
+ movups 32(%r15),%xmm15
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+.byte 102,69,15,56,220,223
+ movups 48(%r15),%xmm14
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 64(%r15),%xmm15
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 80(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 96(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+.byte 102,69,15,56,220,223
+ movups 112(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r14,%r10
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r10),%xmm0
+ movdqu 16(%r10),%xmm1
+ movdqu 32(%r10),%xmm2
+ movdqu 48(%r10),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r10
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast4
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast4
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast4:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ leaq 64(%r12),%r12
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ addl 12(%r9),%edx
+ movl %eax,0(%r9)
+ addl 16(%r9),%ebp
+ movl %esi,4(%r9)
+ movl %esi,%ebx
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ jmp .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 128(%r15),%xmm15
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+.byte 102,69,15,56,220,223
+ movups 144(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+.byte 102,69,15,56,220,222
+ movups 160(%r15),%xmm15
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ cmpl $11,%r8d
+ jb .Laesenclast5
+ movups 176(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 192(%r15),%xmm15
+.byte 102,69,15,56,220,222
+ je .Laesenclast5
+ movups 208(%r15),%xmm14
+.byte 102,69,15,56,220,223
+ movups 224(%r15),%xmm15
+.byte 102,69,15,56,220,222
+.Laesenclast5:
+.byte 102,69,15,56,221,223
+ movups 16(%r15),%xmm14
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movups %xmm11,48(%r13,%r12,1)
+ movq 88(%rsp),%r8
+
+ addl 0(%r9),%eax
+ addl 4(%r9),%esi
+ addl 8(%r9),%ecx
+ movl %eax,0(%r9)
+ addl 12(%r9),%edx
+ movl %esi,4(%r9)
+ addl 16(%r9),%ebp
+ movl %ecx,8(%r9)
+ movl %edx,12(%r9)
+ movl %ebp,16(%r9)
+ movups %xmm11,(%r8)
+ leaq 104(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_ssse3:
+ .byte 0xf3,0xc3
+.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.align 64
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/secure/lib/libcrypto/amd64/aesni-x86_64.S b/secure/lib/libcrypto/amd64/aesni-x86_64.S
new file mode 100644
index 000000000000..df677a78d794
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/aesni-x86_64.S
@@ -0,0 +1,2536 @@
+ # $FreeBSD$
+.text
+.globl aesni_encrypt
+.type aesni_encrypt,@function
+.align 16
+aesni_encrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+.Loop_enc1_1:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_enc1_1
+.byte 102,15,56,221,209
+ movups %xmm2,(%rsi)
+ .byte 0xf3,0xc3
+.size aesni_encrypt,.-aesni_encrypt
+
+.globl aesni_decrypt
+.type aesni_decrypt,@function
+.align 16
+aesni_decrypt:
+ movups (%rdi),%xmm2
+ movl 240(%rdx),%eax
+ movups (%rdx),%xmm0
+ movups 16(%rdx),%xmm1
+ leaq 32(%rdx),%rdx
+ xorps %xmm0,%xmm2
+.Loop_dec1_2:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rdx),%xmm1
+ leaq 16(%rdx),%rdx
+ jnz .Loop_dec1_2
+.byte 102,15,56,223,209
+ movups %xmm2,(%rsi)
+ .byte 0xf3,0xc3
+.size aesni_decrypt, .-aesni_decrypt
+.type _aesni_encrypt3,@function
+.align 16
+_aesni_encrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Lenc_loop3:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop3
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+ .byte 0xf3,0xc3
+.size _aesni_encrypt3,.-_aesni_encrypt3
+.type _aesni_decrypt3,@function
+.align 16
+_aesni_decrypt3:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ movups (%rcx),%xmm0
+
+.Ldec_loop3:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop3
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+ .byte 0xf3,0xc3
+.size _aesni_decrypt3,.-_aesni_decrypt3
+.type _aesni_encrypt4,@function
+.align 16
+_aesni_encrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Lenc_loop4:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop4
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+ .byte 0xf3,0xc3
+.size _aesni_encrypt4,.-_aesni_encrypt4
+.type _aesni_decrypt4,@function
+.align 16
+_aesni_decrypt4:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+ xorps %xmm0,%xmm4
+ xorps %xmm0,%xmm5
+ movups (%rcx),%xmm0
+
+.Ldec_loop4:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop4
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+ .byte 0xf3,0xc3
+.size _aesni_decrypt4,.-_aesni_decrypt4
+.type _aesni_encrypt6,@function
+.align 16
+_aesni_encrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,220,241
+ movups (%rcx),%xmm0
+.byte 102,15,56,220,249
+ jmp .Lenc_loop6_enter
+.align 16
+.Lenc_loop6:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.Lenc_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop6
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+ .byte 0xf3,0xc3
+.size _aesni_encrypt6,.-_aesni_encrypt6
+.type _aesni_decrypt6,@function
+.align 16
+_aesni_decrypt6:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,222,241
+ movups (%rcx),%xmm0
+.byte 102,15,56,222,249
+ jmp .Ldec_loop6_enter
+.align 16
+.Ldec_loop6:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.Ldec_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop6
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+ .byte 0xf3,0xc3
+.size _aesni_decrypt6,.-_aesni_decrypt6
+.type _aesni_encrypt8,@function
+.align 16
+_aesni_encrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,220,241
+ pxor %xmm0,%xmm8
+.byte 102,15,56,220,249
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+.byte 102,68,15,56,220,193
+.byte 102,68,15,56,220,201
+ movups 16(%rcx),%xmm1
+ jmp .Lenc_loop8_enter
+.align 16
+.Lenc_loop8:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,68,15,56,220,193
+.byte 102,68,15,56,220,201
+ movups 16(%rcx),%xmm1
+.Lenc_loop8_enter:
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+.byte 102,68,15,56,220,192
+.byte 102,68,15,56,220,200
+ movups (%rcx),%xmm0
+ jnz .Lenc_loop8
+
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,68,15,56,220,193
+.byte 102,68,15,56,220,201
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+.byte 102,68,15,56,221,192
+.byte 102,68,15,56,221,200
+ .byte 0xf3,0xc3
+.size _aesni_encrypt8,.-_aesni_encrypt8
+.type _aesni_decrypt8,@function
+.align 16
+_aesni_decrypt8:
+ movups (%rcx),%xmm0
+ shrl $1,%eax
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm0,%xmm3
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm4
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,222,241
+ pxor %xmm0,%xmm8
+.byte 102,15,56,222,249
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+.byte 102,68,15,56,222,193
+.byte 102,68,15,56,222,201
+ movups 16(%rcx),%xmm1
+ jmp .Ldec_loop8_enter
+.align 16
+.Ldec_loop8:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,68,15,56,222,193
+.byte 102,68,15,56,222,201
+ movups 16(%rcx),%xmm1
+.Ldec_loop8_enter:
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+.byte 102,68,15,56,222,192
+.byte 102,68,15,56,222,200
+ movups (%rcx),%xmm0
+ jnz .Ldec_loop8
+
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,68,15,56,222,193
+.byte 102,68,15,56,222,201
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+.byte 102,68,15,56,223,192
+.byte 102,68,15,56,223,200
+ .byte 0xf3,0xc3
+.size _aesni_decrypt8,.-_aesni_decrypt8
+.globl aesni_ecb_encrypt
+.type aesni_ecb_encrypt,@function
+.align 16
+aesni_ecb_encrypt:
+ andq $-16,%rdx
+ jz .Lecb_ret
+
+ movl 240(%rcx),%eax
+ movups (%rcx),%xmm0
+ movq %rcx,%r11
+ movl %eax,%r10d
+ testl %r8d,%r8d
+ jz .Lecb_decrypt
+
+ cmpq $128,%rdx
+ jb .Lecb_enc_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_enc_loop8_enter
+.align 16
+.Lecb_enc_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_enc_loop8_enter:
+
+ call _aesni_encrypt8
+
+ subq $128,%rdx
+ jnc .Lecb_enc_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_enc_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_enc_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_enc_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_enc_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_enc_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_enc_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_enc_six
+ movdqu 96(%rdi),%xmm8
+ call _aesni_encrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_3:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_3
+.byte 102,15,56,221,209
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_three:
+ call _aesni_encrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_four:
+ call _aesni_encrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_five:
+ xorps %xmm7,%xmm7
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_enc_six:
+ call _aesni_encrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ jmp .Lecb_ret
+
+.align 16
+.Lecb_decrypt:
+ cmpq $128,%rdx
+ jb .Lecb_dec_tail
+
+ movdqu (%rdi),%xmm2
+ movdqu 16(%rdi),%xmm3
+ movdqu 32(%rdi),%xmm4
+ movdqu 48(%rdi),%xmm5
+ movdqu 64(%rdi),%xmm6
+ movdqu 80(%rdi),%xmm7
+ movdqu 96(%rdi),%xmm8
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+ subq $128,%rdx
+ jmp .Lecb_dec_loop8_enter
+.align 16
+.Lecb_dec_loop8:
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movdqu (%rdi),%xmm2
+ movl %r10d,%eax
+ movups %xmm3,16(%rsi)
+ movdqu 16(%rdi),%xmm3
+ movups %xmm4,32(%rsi)
+ movdqu 32(%rdi),%xmm4
+ movups %xmm5,48(%rsi)
+ movdqu 48(%rdi),%xmm5
+ movups %xmm6,64(%rsi)
+ movdqu 64(%rdi),%xmm6
+ movups %xmm7,80(%rsi)
+ movdqu 80(%rdi),%xmm7
+ movups %xmm8,96(%rsi)
+ movdqu 96(%rdi),%xmm8
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ movdqu 112(%rdi),%xmm9
+ leaq 128(%rdi),%rdi
+.Lecb_dec_loop8_enter:
+
+ call _aesni_decrypt8
+
+ movups (%r11),%xmm0
+ subq $128,%rdx
+ jnc .Lecb_dec_loop8
+
+ movups %xmm2,(%rsi)
+ movq %r11,%rcx
+ movups %xmm3,16(%rsi)
+ movl %r10d,%eax
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ movups %xmm9,112(%rsi)
+ leaq 128(%rsi),%rsi
+ addq $128,%rdx
+ jz .Lecb_ret
+
+.Lecb_dec_tail:
+ movups (%rdi),%xmm2
+ cmpq $32,%rdx
+ jb .Lecb_dec_one
+ movups 16(%rdi),%xmm3
+ je .Lecb_dec_two
+ movups 32(%rdi),%xmm4
+ cmpq $64,%rdx
+ jb .Lecb_dec_three
+ movups 48(%rdi),%xmm5
+ je .Lecb_dec_four
+ movups 64(%rdi),%xmm6
+ cmpq $96,%rdx
+ jb .Lecb_dec_five
+ movups 80(%rdi),%xmm7
+ je .Lecb_dec_six
+ movups 96(%rdi),%xmm8
+ movups (%rcx),%xmm0
+ call _aesni_decrypt8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ movups %xmm8,96(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_4:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_4
+.byte 102,15,56,223,209
+ movups %xmm2,(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_three:
+ call _aesni_decrypt3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_four:
+ call _aesni_decrypt4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ jmp .Lecb_ret
+.align 16
+.Lecb_dec_six:
+ call _aesni_decrypt6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+
+.Lecb_ret:
+ .byte 0xf3,0xc3
+.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
+.globl aesni_ccm64_encrypt_blocks
+.type aesni_ccm64_encrypt_blocks,@function
+.align 16
+aesni_ccm64_encrypt_blocks:
+ movl 240(%rcx),%eax
+ movdqu (%r8),%xmm9
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ shrl $1,%eax
+ leaq 0(%rcx),%r11
+ movdqu (%r9),%xmm3
+ movdqa %xmm9,%xmm2
+ movl %eax,%r10d
+.byte 102,68,15,56,0,207
+ jmp .Lccm64_enc_outer
+.align 16
+.Lccm64_enc_outer:
+ movups (%r11),%xmm0
+ movl %r10d,%eax
+ movups (%rdi),%xmm8
+
+ xorps %xmm0,%xmm2
+ movups 16(%r11),%xmm1
+ xorps %xmm8,%xmm0
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_enc2_loop:
+.byte 102,15,56,220,209
+ decl %eax
+.byte 102,15,56,220,217
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,216
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ paddq %xmm6,%xmm9
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+
+ decq %rdx
+ leaq 16(%rdi),%rdi
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+ jnz .Lccm64_enc_outer
+
+ movups %xmm3,(%r9)
+ .byte 0xf3,0xc3
+.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
+.globl aesni_ccm64_decrypt_blocks
+.type aesni_ccm64_decrypt_blocks,@function
+.align 16
+aesni_ccm64_decrypt_blocks:
+ movl 240(%rcx),%eax
+ movups (%r8),%xmm9
+ movdqu (%r9),%xmm3
+ movdqa .Lincrement64(%rip),%xmm6
+ movdqa .Lbswap_mask(%rip),%xmm7
+
+ movaps %xmm9,%xmm2
+ movl %eax,%r10d
+ movq %rcx,%r11
+.byte 102,68,15,56,0,207
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_5:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_5
+.byte 102,15,56,221,209
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+ leaq 16(%rdi),%rdi
+ jmp .Lccm64_dec_outer
+.align 16
+.Lccm64_dec_outer:
+ xorps %xmm2,%xmm8
+ movdqa %xmm9,%xmm2
+ movl %r10d,%eax
+ movups %xmm8,(%rsi)
+ leaq 16(%rsi),%rsi
+.byte 102,15,56,0,215
+
+ subq $1,%rdx
+ jz .Lccm64_dec_break
+
+ movups (%r11),%xmm0
+ shrl $1,%eax
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%rcx
+ xorps %xmm0,%xmm2
+ xorps %xmm8,%xmm3
+ movups (%rcx),%xmm0
+
+.Lccm64_dec2_loop:
+.byte 102,15,56,220,209
+ decl %eax
+.byte 102,15,56,220,217
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,216
+ movups 0(%rcx),%xmm0
+ jnz .Lccm64_dec2_loop
+ movups (%rdi),%xmm8
+ paddq %xmm6,%xmm9
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ leaq 16(%rdi),%rdi
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ jmp .Lccm64_dec_outer
+
+.align 16
+.Lccm64_dec_break:
+
+ movups (%r11),%xmm0
+ movups 16(%r11),%xmm1
+ xorps %xmm0,%xmm8
+ leaq 32(%r11),%r11
+ xorps %xmm8,%xmm3
+.Loop_enc1_6:
+.byte 102,15,56,220,217
+ decl %eax
+ movups (%r11),%xmm1
+ leaq 16(%r11),%r11
+ jnz .Loop_enc1_6
+.byte 102,15,56,221,217
+ movups %xmm3,(%r9)
+ .byte 0xf3,0xc3
+.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
+.globl aesni_ctr32_encrypt_blocks
+.type aesni_ctr32_encrypt_blocks,@function
+.align 16
+aesni_ctr32_encrypt_blocks:
+ cmpq $1,%rdx
+ je .Lctr32_one_shortcut
+
+ movdqu (%r8),%xmm14
+ movdqa .Lbswap_mask(%rip),%xmm15
+ xorl %eax,%eax
+.byte 102,69,15,58,22,242,3
+.byte 102,68,15,58,34,240,3
+
+ movl 240(%rcx),%eax
+ bswapl %r10d
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+.byte 102,69,15,58,34,226,0
+ leaq 3(%r10),%r11
+.byte 102,69,15,58,34,235,0
+ incl %r10d
+.byte 102,69,15,58,34,226,1
+ incq %r11
+.byte 102,69,15,58,34,235,1
+ incl %r10d
+.byte 102,69,15,58,34,226,2
+ incq %r11
+.byte 102,69,15,58,34,235,2
+ movdqa %xmm12,-40(%rsp)
+.byte 102,69,15,56,0,231
+ movdqa %xmm13,-24(%rsp)
+.byte 102,69,15,56,0,239
+
+ pshufd $192,%xmm12,%xmm2
+ pshufd $128,%xmm12,%xmm3
+ pshufd $64,%xmm12,%xmm4
+ cmpq $6,%rdx
+ jb .Lctr32_tail
+ shrl $1,%eax
+ movq %rcx,%r11
+ movl %eax,%r10d
+ subq $6,%rdx
+ jmp .Lctr32_loop6
+
+.align 16
+.Lctr32_loop6:
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm2
+ movups (%r11),%xmm0
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm3
+ movups 16(%r11),%xmm1
+ pshufd $64,%xmm13,%xmm7
+ por %xmm14,%xmm4
+ por %xmm14,%xmm5
+ xorps %xmm0,%xmm2
+ por %xmm14,%xmm6
+ por %xmm14,%xmm7
+
+
+
+
+ pxor %xmm0,%xmm3
+.byte 102,15,56,220,209
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,217
+ movdqa .Lincrement32(%rip),%xmm13
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,225
+ movdqa -40(%rsp),%xmm12
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ jmp .Lctr32_enc_loop6_enter
+.align 16
+.Lctr32_enc_loop6:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.Lctr32_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups (%rcx),%xmm0
+ jnz .Lctr32_enc_loop6
+
+.byte 102,15,56,220,209
+ paddd %xmm13,%xmm12
+.byte 102,15,56,220,217
+ paddd -24(%rsp),%xmm13
+.byte 102,15,56,220,225
+ movdqa %xmm12,-40(%rsp)
+.byte 102,15,56,220,233
+ movdqa %xmm13,-24(%rsp)
+.byte 102,15,56,220,241
+.byte 102,69,15,56,0,231
+.byte 102,15,56,220,249
+.byte 102,69,15,56,0,239
+
+.byte 102,15,56,221,208
+ movups (%rdi),%xmm8
+.byte 102,15,56,221,216
+ movups 16(%rdi),%xmm9
+.byte 102,15,56,221,224
+ movups 32(%rdi),%xmm10
+.byte 102,15,56,221,232
+ movups 48(%rdi),%xmm11
+.byte 102,15,56,221,240
+ movups 64(%rdi),%xmm1
+.byte 102,15,56,221,248
+ movups 80(%rdi),%xmm0
+ leaq 96(%rdi),%rdi
+
+ xorps %xmm2,%xmm8
+ pshufd $192,%xmm12,%xmm2
+ xorps %xmm3,%xmm9
+ pshufd $128,%xmm12,%xmm3
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ pshufd $64,%xmm12,%xmm4
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ xorps %xmm7,%xmm0
+ movups %xmm1,64(%rsi)
+ movups %xmm0,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movl %r10d,%eax
+ subq $6,%rdx
+ jnc .Lctr32_loop6
+
+ addq $6,%rdx
+ jz .Lctr32_done
+ movq %r11,%rcx
+ leal 1(%rax,%rax,1),%eax
+
+.Lctr32_tail:
+ por %xmm14,%xmm2
+ movups (%rdi),%xmm8
+ cmpq $2,%rdx
+ jb .Lctr32_one
+
+ por %xmm14,%xmm3
+ movups 16(%rdi),%xmm9
+ je .Lctr32_two
+
+ pshufd $192,%xmm13,%xmm5
+ por %xmm14,%xmm4
+ movups 32(%rdi),%xmm10
+ cmpq $4,%rdx
+ jb .Lctr32_three
+
+ pshufd $128,%xmm13,%xmm6
+ por %xmm14,%xmm5
+ movups 48(%rdi),%xmm11
+ je .Lctr32_four
+
+ por %xmm14,%xmm6
+ xorps %xmm7,%xmm7
+
+ call _aesni_encrypt6
+
+ movups 64(%rdi),%xmm1
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ xorps %xmm6,%xmm1
+ movups %xmm11,48(%rsi)
+ movups %xmm1,64(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_one_shortcut:
+ movups (%r8),%xmm2
+ movups (%rdi),%xmm8
+ movl 240(%rcx),%eax
+.Lctr32_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_7:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_7
+.byte 102,15,56,221,209
+ xorps %xmm2,%xmm8
+ movups %xmm8,(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_two:
+ xorps %xmm4,%xmm4
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ movups %xmm9,16(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_three:
+ call _aesni_encrypt3
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ movups %xmm10,32(%rsi)
+ jmp .Lctr32_done
+
+.align 16
+.Lctr32_four:
+ call _aesni_encrypt4
+ xorps %xmm2,%xmm8
+ xorps %xmm3,%xmm9
+ movups %xmm8,(%rsi)
+ xorps %xmm4,%xmm10
+ movups %xmm9,16(%rsi)
+ xorps %xmm5,%xmm11
+ movups %xmm10,32(%rsi)
+ movups %xmm11,48(%rsi)
+
+.Lctr32_done:
+ .byte 0xf3,0xc3
+.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
+.globl aesni_xts_encrypt
+.type aesni_xts_encrypt,@function
+.align 16
+aesni_xts_encrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_8:
+.byte 102,68,15,56,220,249
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_8
+.byte 102,68,15,56,221,249
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_enc_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_enc_grandloop
+
+.align 16
+.Lxts_enc_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+.byte 102,15,56,220,209
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+.byte 102,15,56,220,233
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+.byte 102,15,56,220,241
+ movdqa %xmm15,80(%rsp)
+.byte 102,15,56,220,249
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_enc_loop6_enter
+
+.align 16
+.Lxts_enc_loop6:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ decl %eax
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.Lxts_enc_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups (%rcx),%xmm0
+ jnz .Lxts_enc_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+.byte 102,15,56,220,209
+ pand %xmm8,%xmm9
+.byte 102,15,56,220,217
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,220,225
+ pxor %xmm9,%xmm15
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+.byte 102,15,56,220,208
+ pand %xmm8,%xmm9
+.byte 102,15,56,220,216
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,220,224
+ pxor %xmm9,%xmm15
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+.byte 102,15,56,220,209
+ pand %xmm8,%xmm9
+.byte 102,15,56,220,217
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,220,225
+ pxor %xmm9,%xmm15
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+.byte 102,15,56,221,208
+ pand %xmm8,%xmm9
+.byte 102,15,56,221,216
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,221,224
+ pxor %xmm9,%xmm15
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_enc_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_enc_short:
+ addq $96,%rdx
+ jz .Lxts_enc_done
+
+ cmpq $32,%rdx
+ jb .Lxts_enc_one
+ je .Lxts_enc_two
+
+ cmpq $64,%rdx
+ jb .Lxts_enc_three
+ je .Lxts_enc_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_encrypt6
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ movdqu %xmm5,48(%rsi)
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_9:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_9
+.byte 102,15,56,221,209
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_encrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_four:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_encrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm15,%xmm10
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_enc_done
+
+.align 16
+.Lxts_enc_done:
+ andq $15,%r9
+ jz .Lxts_enc_ret
+ movq %r9,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%rdi),%eax
+ movzbl -16(%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,-16(%rsi)
+ movb %cl,0(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_enc_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups -16(%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_enc1_10:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_10
+.byte 102,15,56,221,209
+ xorps %xmm10,%xmm2
+ movups %xmm2,-16(%rsi)
+
+.Lxts_enc_ret:
+ leaq 104(%rsp),%rsp
+.Lxts_enc_epilogue:
+ .byte 0xf3,0xc3
+.size aesni_xts_encrypt,.-aesni_xts_encrypt
+.globl aesni_xts_decrypt
+.type aesni_xts_decrypt,@function
+.align 16
+aesni_xts_decrypt:
+ leaq -104(%rsp),%rsp
+ movups (%r9),%xmm15
+ movl 240(%r8),%eax
+ movl 240(%rcx),%r10d
+ movups (%r8),%xmm0
+ movups 16(%r8),%xmm1
+ leaq 32(%r8),%r8
+ xorps %xmm0,%xmm15
+.Loop_enc1_11:
+.byte 102,68,15,56,220,249
+ decl %eax
+ movups (%r8),%xmm1
+ leaq 16(%r8),%r8
+ jnz .Loop_enc1_11
+.byte 102,68,15,56,221,249
+ xorl %eax,%eax
+ testq $15,%rdx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%rdx
+
+ movq %rcx,%r11
+ movl %r10d,%eax
+ movq %rdx,%r9
+ andq $-16,%rdx
+
+ movdqa .Lxts_magic(%rip),%xmm8
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm9
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+ subq $96,%rdx
+ jc .Lxts_dec_short
+
+ shrl $1,%eax
+ subl $1,%eax
+ movl %eax,%r10d
+ jmp .Lxts_dec_grandloop
+
+.align 16
+.Lxts_dec_grandloop:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu 0(%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ pxor %xmm12,%xmm4
+ movdqu 80(%rdi),%xmm7
+ leaq 96(%rdi),%rdi
+ pxor %xmm13,%xmm5
+ movups (%r11),%xmm0
+ pxor %xmm14,%xmm6
+ pxor %xmm15,%xmm7
+
+
+
+ movups 16(%r11),%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movdqa %xmm10,0(%rsp)
+.byte 102,15,56,222,209
+ leaq 32(%r11),%rcx
+ pxor %xmm0,%xmm4
+ movdqa %xmm11,16(%rsp)
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+ movdqa %xmm12,32(%rsp)
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+ movdqa %xmm13,48(%rsp)
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+ movups (%rcx),%xmm0
+ decl %eax
+ movdqa %xmm14,64(%rsp)
+.byte 102,15,56,222,241
+ movdqa %xmm15,80(%rsp)
+.byte 102,15,56,222,249
+ pxor %xmm14,%xmm14
+ pcmpgtd %xmm15,%xmm14
+ jmp .Lxts_dec_loop6_enter
+
+.align 16
+.Lxts_dec_loop6:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ decl %eax
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.Lxts_dec_loop6_enter:
+ movups 16(%rcx),%xmm1
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ leaq 32(%rcx),%rcx
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups (%rcx),%xmm0
+ jnz .Lxts_dec_loop6
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ paddq %xmm15,%xmm15
+.byte 102,15,56,222,209
+ pand %xmm8,%xmm9
+.byte 102,15,56,222,217
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,222,225
+ pxor %xmm9,%xmm15
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ movups 16(%rcx),%xmm1
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+.byte 102,15,56,222,208
+ pand %xmm8,%xmm9
+.byte 102,15,56,222,216
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,222,224
+ pxor %xmm9,%xmm15
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups 32(%rcx),%xmm0
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm11
+ paddq %xmm15,%xmm15
+.byte 102,15,56,222,209
+ pand %xmm8,%xmm9
+.byte 102,15,56,222,217
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,222,225
+ pxor %xmm9,%xmm15
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm12
+ paddq %xmm15,%xmm15
+.byte 102,15,56,223,208
+ pand %xmm8,%xmm9
+.byte 102,15,56,223,216
+ pcmpgtd %xmm15,%xmm14
+.byte 102,15,56,223,224
+ pxor %xmm9,%xmm15
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+
+ pshufd $19,%xmm14,%xmm9
+ pxor %xmm14,%xmm14
+ movdqa %xmm15,%xmm13
+ paddq %xmm15,%xmm15
+ xorps 0(%rsp),%xmm2
+ pand %xmm8,%xmm9
+ xorps 16(%rsp),%xmm3
+ pcmpgtd %xmm15,%xmm14
+ pxor %xmm9,%xmm15
+
+ xorps 32(%rsp),%xmm4
+ movups %xmm2,0(%rsi)
+ xorps 48(%rsp),%xmm5
+ movups %xmm3,16(%rsi)
+ xorps 64(%rsp),%xmm6
+ movups %xmm4,32(%rsi)
+ xorps 80(%rsp),%xmm7
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ subq $96,%rdx
+ jnc .Lxts_dec_grandloop
+
+ leal 3(%rax,%rax,1),%eax
+ movq %r11,%rcx
+ movl %eax,%r10d
+
+.Lxts_dec_short:
+ addq $96,%rdx
+ jz .Lxts_dec_done
+
+ cmpq $32,%rdx
+ jb .Lxts_dec_one
+ je .Lxts_dec_two
+
+ cmpq $64,%rdx
+ jb .Lxts_dec_three
+ je .Lxts_dec_four
+
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movdqu (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movdqu 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movdqu 32(%rdi),%xmm4
+ pxor %xmm10,%xmm2
+ movdqu 48(%rdi),%xmm5
+ pxor %xmm11,%xmm3
+ movdqu 64(%rdi),%xmm6
+ leaq 80(%rdi),%rdi
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm5
+ pxor %xmm14,%xmm6
+
+ call _aesni_decrypt6
+
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ movdqu %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movdqu %xmm3,16(%rsi)
+ xorps %xmm14,%xmm6
+ movdqu %xmm4,32(%rsi)
+ pxor %xmm14,%xmm14
+ movdqu %xmm5,48(%rsi)
+ pcmpgtd %xmm15,%xmm14
+ movdqu %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ pshufd $19,%xmm14,%xmm11
+ andq $15,%r9
+ jz .Lxts_dec_ret
+
+ movdqa %xmm15,%xmm10
+ paddq %xmm15,%xmm15
+ pand %xmm8,%xmm11
+ pxor %xmm15,%xmm11
+ jmp .Lxts_dec_done2
+
+.align 16
+.Lxts_dec_one:
+ movups (%rdi),%xmm2
+ leaq 16(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_12:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_12
+.byte 102,15,56,223,209
+ xorps %xmm10,%xmm2
+ movdqa %xmm11,%xmm10
+ movups %xmm2,(%rsi)
+ movdqa %xmm12,%xmm11
+ leaq 16(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_two:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ leaq 32(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm12,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm13,%xmm11
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ leaq 32(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_three:
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 32(%rdi),%xmm4
+ leaq 48(%rdi),%rdi
+ xorps %xmm10,%xmm2
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+
+ call _aesni_decrypt3
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm13,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ leaq 48(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_four:
+ pshufd $19,%xmm14,%xmm9
+ movdqa %xmm15,%xmm14
+ paddq %xmm15,%xmm15
+ movups (%rdi),%xmm2
+ pand %xmm8,%xmm9
+ movups 16(%rdi),%xmm3
+ pxor %xmm9,%xmm15
+
+ movups 32(%rdi),%xmm4
+ xorps %xmm10,%xmm2
+ movups 48(%rdi),%xmm5
+ leaq 64(%rdi),%rdi
+ xorps %xmm11,%xmm3
+ xorps %xmm12,%xmm4
+ xorps %xmm13,%xmm5
+
+ call _aesni_decrypt4
+
+ xorps %xmm10,%xmm2
+ movdqa %xmm14,%xmm10
+ xorps %xmm11,%xmm3
+ movdqa %xmm15,%xmm11
+ xorps %xmm12,%xmm4
+ movups %xmm2,(%rsi)
+ xorps %xmm13,%xmm5
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ jmp .Lxts_dec_done
+
+.align 16
+.Lxts_dec_done:
+ andq $15,%r9
+ jz .Lxts_dec_ret
+.Lxts_dec_done2:
+ movq %r9,%rdx
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rdi),%xmm2
+ xorps %xmm11,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_13:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_13
+.byte 102,15,56,223,209
+ xorps %xmm11,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_steal:
+ movzbl 16(%rdi),%eax
+ movzbl (%rsi),%ecx
+ leaq 1(%rdi),%rdi
+ movb %al,(%rsi)
+ movb %cl,16(%rsi)
+ leaq 1(%rsi),%rsi
+ subq $1,%rdx
+ jnz .Lxts_dec_steal
+
+ subq %r9,%rsi
+ movq %r11,%rcx
+ movl %r10d,%eax
+
+ movups (%rsi),%xmm2
+ xorps %xmm10,%xmm2
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_14:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_14
+.byte 102,15,56,223,209
+ xorps %xmm10,%xmm2
+ movups %xmm2,(%rsi)
+
+.Lxts_dec_ret:
+ leaq 104(%rsp),%rsp
+.Lxts_dec_epilogue:
+ .byte 0xf3,0xc3
+.size aesni_xts_decrypt,.-aesni_xts_decrypt
+.globl aesni_cbc_encrypt
+.type aesni_cbc_encrypt,@function
+.align 16
+aesni_cbc_encrypt:
+ testq %rdx,%rdx
+ jz .Lcbc_ret
+
+ movl 240(%rcx),%r10d
+ movq %rcx,%r11
+ testl %r9d,%r9d
+ jz .Lcbc_decrypt
+
+ movups (%r8),%xmm2
+ movl %r10d,%eax
+ cmpq $16,%rdx
+ jb .Lcbc_enc_tail
+ subq $16,%rdx
+ jmp .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_loop:
+ movups (%rdi),%xmm3
+ leaq 16(%rdi),%rdi
+
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ xorps %xmm0,%xmm3
+ leaq 32(%rcx),%rcx
+ xorps %xmm3,%xmm2
+.Loop_enc1_15:
+.byte 102,15,56,220,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_enc1_15
+.byte 102,15,56,221,209
+ movl %r10d,%eax
+ movq %r11,%rcx
+ movups %xmm2,0(%rsi)
+ leaq 16(%rsi),%rsi
+ subq $16,%rdx
+ jnc .Lcbc_enc_loop
+ addq $16,%rdx
+ jnz .Lcbc_enc_tail
+ movups %xmm2,(%r8)
+ jmp .Lcbc_ret
+
+.Lcbc_enc_tail:
+ movq %rdx,%rcx
+ xchgq %rdi,%rsi
+.long 0x9066A4F3
+ movl $16,%ecx
+ subq %rdx,%rcx
+ xorl %eax,%eax
+.long 0x9066AAF3
+ leaq -16(%rdi),%rdi
+ movl %r10d,%eax
+ movq %rdi,%rsi
+ movq %r11,%rcx
+ xorq %rdx,%rdx
+ jmp .Lcbc_enc_loop
+
+.align 16
+.Lcbc_decrypt:
+ movups (%r8),%xmm9
+ movl %r10d,%eax
+ cmpq $112,%rdx
+ jbe .Lcbc_dec_tail
+ shrl $1,%r10d
+ subq $112,%rdx
+ movl %r10d,%eax
+ movaps %xmm9,-24(%rsp)
+ jmp .Lcbc_dec_loop8_enter
+.align 16
+.Lcbc_dec_loop8:
+ movaps %xmm0,-24(%rsp)
+ movups %xmm9,(%rsi)
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_loop8_enter:
+ movups (%rcx),%xmm0
+ movups (%rdi),%xmm2
+ movups 16(%rdi),%xmm3
+ movups 16(%rcx),%xmm1
+
+ leaq 32(%rcx),%rcx
+ movdqu 32(%rdi),%xmm4
+ xorps %xmm0,%xmm2
+ movdqu 48(%rdi),%xmm5
+ xorps %xmm0,%xmm3
+ movdqu 64(%rdi),%xmm6
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm4
+ movdqu 80(%rdi),%xmm7
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm5
+ movdqu 96(%rdi),%xmm8
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm6
+ movdqu 112(%rdi),%xmm9
+.byte 102,15,56,222,233
+ pxor %xmm0,%xmm7
+ decl %eax
+.byte 102,15,56,222,241
+ pxor %xmm0,%xmm8
+.byte 102,15,56,222,249
+ pxor %xmm0,%xmm9
+ movups (%rcx),%xmm0
+.byte 102,68,15,56,222,193
+.byte 102,68,15,56,222,201
+ movups 16(%rcx),%xmm1
+
+ call .Ldec_loop8_enter
+
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm1
+ xorps %xmm0,%xmm8
+ movups 112(%rdi),%xmm0
+ xorps %xmm1,%xmm9
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movl %r10d,%eax
+ movups %xmm6,64(%rsi)
+ movq %r11,%rcx
+ movups %xmm7,80(%rsi)
+ leaq 128(%rdi),%rdi
+ movups %xmm8,96(%rsi)
+ leaq 112(%rsi),%rsi
+ subq $128,%rdx
+ ja .Lcbc_dec_loop8
+
+ movaps %xmm9,%xmm2
+ movaps %xmm0,%xmm9
+ addq $112,%rdx
+ jle .Lcbc_dec_tail_collected
+ movups %xmm2,(%rsi)
+ leal 1(%r10,%r10,1),%eax
+ leaq 16(%rsi),%rsi
+.Lcbc_dec_tail:
+ movups (%rdi),%xmm2
+ movaps %xmm2,%xmm8
+ cmpq $16,%rdx
+ jbe .Lcbc_dec_one
+
+ movups 16(%rdi),%xmm3
+ movaps %xmm3,%xmm7
+ cmpq $32,%rdx
+ jbe .Lcbc_dec_two
+
+ movups 32(%rdi),%xmm4
+ movaps %xmm4,%xmm6
+ cmpq $48,%rdx
+ jbe .Lcbc_dec_three
+
+ movups 48(%rdi),%xmm5
+ cmpq $64,%rdx
+ jbe .Lcbc_dec_four
+
+ movups 64(%rdi),%xmm6
+ cmpq $80,%rdx
+ jbe .Lcbc_dec_five
+
+ movups 80(%rdi),%xmm7
+ cmpq $96,%rdx
+ jbe .Lcbc_dec_six
+
+ movups 96(%rdi),%xmm8
+ movaps %xmm9,-24(%rsp)
+ call _aesni_decrypt8
+ movups (%rdi),%xmm1
+ movups 16(%rdi),%xmm0
+ xorps -24(%rsp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%rdi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%rdi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%rdi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%rdi),%xmm0
+ xorps %xmm1,%xmm7
+ movups 96(%rdi),%xmm9
+ xorps %xmm0,%xmm8
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ movups %xmm7,80(%rsi)
+ leaq 96(%rsi),%rsi
+ movaps %xmm8,%xmm2
+ subq $112,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_one:
+ movups (%rcx),%xmm0
+ movups 16(%rcx),%xmm1
+ leaq 32(%rcx),%rcx
+ xorps %xmm0,%xmm2
+.Loop_dec1_16:
+.byte 102,15,56,222,209
+ decl %eax
+ movups (%rcx),%xmm1
+ leaq 16(%rcx),%rcx
+ jnz .Loop_dec1_16
+.byte 102,15,56,223,209
+ xorps %xmm9,%xmm2
+ movaps %xmm8,%xmm9
+ subq $16,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_two:
+ xorps %xmm4,%xmm4
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ movaps %xmm7,%xmm9
+ movaps %xmm3,%xmm2
+ leaq 16(%rsi),%rsi
+ subq $32,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_three:
+ call _aesni_decrypt3
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ movaps %xmm6,%xmm9
+ movaps %xmm4,%xmm2
+ leaq 32(%rsi),%rsi
+ subq $48,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_four:
+ call _aesni_decrypt4
+ xorps %xmm9,%xmm2
+ movups 48(%rdi),%xmm9
+ xorps %xmm8,%xmm3
+ movups %xmm2,(%rsi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%rsi)
+ xorps %xmm6,%xmm5
+ movups %xmm4,32(%rsi)
+ movaps %xmm5,%xmm2
+ leaq 48(%rsi),%rsi
+ subq $64,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_five:
+ xorps %xmm7,%xmm7
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm9
+ xorps %xmm1,%xmm6
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ leaq 64(%rsi),%rsi
+ movaps %xmm6,%xmm2
+ subq $80,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_six:
+ call _aesni_decrypt6
+ movups 16(%rdi),%xmm1
+ movups 32(%rdi),%xmm0
+ xorps %xmm9,%xmm2
+ xorps %xmm8,%xmm3
+ xorps %xmm1,%xmm4
+ movups 48(%rdi),%xmm1
+ xorps %xmm0,%xmm5
+ movups 64(%rdi),%xmm0
+ xorps %xmm1,%xmm6
+ movups 80(%rdi),%xmm9
+ xorps %xmm0,%xmm7
+ movups %xmm2,(%rsi)
+ movups %xmm3,16(%rsi)
+ movups %xmm4,32(%rsi)
+ movups %xmm5,48(%rsi)
+ movups %xmm6,64(%rsi)
+ leaq 80(%rsi),%rsi
+ movaps %xmm7,%xmm2
+ subq $96,%rdx
+ jmp .Lcbc_dec_tail_collected
+.align 16
+.Lcbc_dec_tail_collected:
+ andq $15,%rdx
+ movups %xmm9,(%r8)
+ jnz .Lcbc_dec_tail_partial
+ movups %xmm2,(%rsi)
+ jmp .Lcbc_dec_ret
+.align 16
+.Lcbc_dec_tail_partial:
+ movaps %xmm2,-24(%rsp)
+ movq $16,%rcx
+ movq %rsi,%rdi
+ subq %rdx,%rcx
+ leaq -24(%rsp),%rsi
+.long 0x9066A4F3
+
+.Lcbc_dec_ret:
+.Lcbc_ret:
+ .byte 0xf3,0xc3
+.size aesni_cbc_encrypt,.-aesni_cbc_encrypt
+.globl aesni_set_decrypt_key
+.type aesni_set_decrypt_key,@function
+.align 16
+aesni_set_decrypt_key:
+.byte 0x48,0x83,0xEC,0x08
+ call __aesni_set_encrypt_key
+ shll $4,%esi
+ testl %eax,%eax
+ jnz .Ldec_key_ret
+ leaq 16(%rdx,%rsi,1),%rdi
+
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+ movups %xmm0,(%rdi)
+ movups %xmm1,(%rdx)
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+
+.Ldec_key_inverse:
+ movups (%rdx),%xmm0
+ movups (%rdi),%xmm1
+.byte 102,15,56,219,192
+.byte 102,15,56,219,201
+ leaq 16(%rdx),%rdx
+ leaq -16(%rdi),%rdi
+ movups %xmm0,16(%rdi)
+ movups %xmm1,-16(%rdx)
+ cmpq %rdx,%rdi
+ ja .Ldec_key_inverse
+
+ movups (%rdx),%xmm0
+.byte 102,15,56,219,192
+ movups %xmm0,(%rdi)
+.Ldec_key_ret:
+ addq $8,%rsp
+ .byte 0xf3,0xc3
+.LSEH_end_set_decrypt_key:
+.size aesni_set_decrypt_key,.-aesni_set_decrypt_key
+.globl aesni_set_encrypt_key
+.type aesni_set_encrypt_key,@function
+.align 16
+aesni_set_encrypt_key:
+__aesni_set_encrypt_key:
+.byte 0x48,0x83,0xEC,0x08
+ movq $-1,%rax
+ testq %rdi,%rdi
+ jz .Lenc_key_ret
+ testq %rdx,%rdx
+ jz .Lenc_key_ret
+
+ movups (%rdi),%xmm0
+ xorps %xmm4,%xmm4
+ leaq 16(%rdx),%rax
+ cmpl $256,%esi
+ je .L14rounds
+ cmpl $192,%esi
+ je .L12rounds
+ cmpl $128,%esi
+ jne .Lbad_keybits
+
+.L10rounds:
+ movl $9,%esi
+ movups %xmm0,(%rdx)
+.byte 102,15,58,223,200,1
+ call .Lkey_expansion_128_cold
+.byte 102,15,58,223,200,2
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,4
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,8
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,16
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,32
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,64
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,128
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,27
+ call .Lkey_expansion_128
+.byte 102,15,58,223,200,54
+ call .Lkey_expansion_128
+ movups %xmm0,(%rax)
+ movl %esi,80(%rax)
+ xorl %eax,%eax
+ jmp .Lenc_key_ret
+
+.align 16
+.L12rounds:
+ movq 16(%rdi),%xmm2
+ movl $11,%esi
+ movups %xmm0,(%rdx)
+.byte 102,15,58,223,202,1
+ call .Lkey_expansion_192a_cold
+.byte 102,15,58,223,202,2
+ call .Lkey_expansion_192b
+.byte 102,15,58,223,202,4
+ call .Lkey_expansion_192a
+.byte 102,15,58,223,202,8
+ call .Lkey_expansion_192b
+.byte 102,15,58,223,202,16
+ call .Lkey_expansion_192a
+.byte 102,15,58,223,202,32
+ call .Lkey_expansion_192b
+.byte 102,15,58,223,202,64
+ call .Lkey_expansion_192a
+.byte 102,15,58,223,202,128
+ call .Lkey_expansion_192b
+ movups %xmm0,(%rax)
+ movl %esi,48(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.align 16
+.L14rounds:
+ movups 16(%rdi),%xmm2
+ movl $13,%esi
+ leaq 16(%rax),%rax
+ movups %xmm0,(%rdx)
+ movups %xmm2,16(%rdx)
+.byte 102,15,58,223,202,1
+ call .Lkey_expansion_256a_cold
+.byte 102,15,58,223,200,1
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,2
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,2
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,4
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,4
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,8
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,8
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,16
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,16
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,32
+ call .Lkey_expansion_256a
+.byte 102,15,58,223,200,32
+ call .Lkey_expansion_256b
+.byte 102,15,58,223,202,64
+ call .Lkey_expansion_256a
+ movups %xmm0,(%rax)
+ movl %esi,16(%rax)
+ xorq %rax,%rax
+ jmp .Lenc_key_ret
+
+.align 16
+.Lbad_keybits:
+ movq $-2,%rax
+.Lenc_key_ret:
+ addq $8,%rsp
+ .byte 0xf3,0xc3
+.LSEH_end_set_encrypt_key:
+
+.align 16
+.Lkey_expansion_128:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ .byte 0xf3,0xc3
+
+.align 16
+.Lkey_expansion_192a:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_192a_cold:
+ movaps %xmm2,%xmm5
+.Lkey_expansion_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ .byte 0xf3,0xc3
+
+.align 16
+.Lkey_expansion_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%rax)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%rax)
+ leaq 32(%rax),%rax
+ jmp .Lkey_expansion_192b_warm
+
+.align 16
+.Lkey_expansion_256a:
+ movups %xmm2,(%rax)
+ leaq 16(%rax),%rax
+.Lkey_expansion_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ .byte 0xf3,0xc3
+
+.align 16
+.Lkey_expansion_256b:
+ movups %xmm0,(%rax)
+ leaq 16(%rax),%rax
+
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ .byte 0xf3,0xc3
+.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
+.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
+.align 64
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lincrement32:
+.long 6,6,6,0
+.Lincrement64:
+.long 1,0,0,0
+.Lxts_magic:
+.long 0x87,0,1,0
+
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/secure/lib/libcrypto/amd64/bsaes-x86_64.S b/secure/lib/libcrypto/amd64/bsaes-x86_64.S
new file mode 100644
index 000000000000..671925b1e205
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/bsaes-x86_64.S
@@ -0,0 +1,2562 @@
+ # $FreeBSD$
+.text
+
+
+
+
+.type _bsaes_encrypt8,@function
+.align 64
+_bsaes_encrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa 80(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+_bsaes_encrypt8_bitslice:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Lenc_sbox
+.align 16
+.Lenc_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Lenc_sbox:
+ pxor %xmm5,%xmm4
+ pxor %xmm0,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm1,%xmm5
+ pxor %xmm15,%xmm4
+
+ pxor %xmm2,%xmm5
+ pxor %xmm6,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm3,%xmm2
+ pxor %xmm4,%xmm3
+ pxor %xmm0,%xmm2
+
+ pxor %xmm6,%xmm1
+ pxor %xmm4,%xmm0
+ movdqa %xmm6,%xmm10
+ movdqa %xmm0,%xmm9
+ movdqa %xmm4,%xmm8
+ movdqa %xmm1,%xmm12
+ movdqa %xmm5,%xmm11
+
+ pxor %xmm3,%xmm10
+ pxor %xmm1,%xmm9
+ pxor %xmm2,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm3,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm15,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm2,%xmm11
+ pxor %xmm15,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm6,%xmm12
+ movdqa %xmm4,%xmm11
+ pxor %xmm0,%xmm12
+ pxor %xmm5,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm1,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm3,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm0,%xmm13
+ pand %xmm2,%xmm11
+ movdqa %xmm6,%xmm14
+ pand %xmm15,%xmm12
+ pand %xmm4,%xmm13
+ por %xmm5,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm5,%xmm11
+ movdqa %xmm4,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm5,%xmm9
+ pxor %xmm4,%xmm5
+ pand %xmm14,%xmm4
+ pand %xmm13,%xmm5
+ pxor %xmm4,%xmm5
+ pxor %xmm9,%xmm4
+ pxor %xmm15,%xmm11
+ pxor %xmm2,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm2,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm2
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm2,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm2
+ pxor %xmm11,%xmm5
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm2
+
+ movdqa %xmm6,%xmm11
+ movdqa %xmm0,%xmm7
+ pxor %xmm3,%xmm11
+ pxor %xmm1,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm3,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm1,%xmm3
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm1
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm3
+ pxor %xmm11,%xmm7
+ pxor %xmm1,%xmm3
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm1
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm6,%xmm10
+ pxor %xmm0,%xmm6
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm6
+ pxor %xmm0,%xmm6
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm6
+ pxor %xmm11,%xmm3
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm1
+ pxor %xmm15,%xmm6
+ pxor %xmm5,%xmm0
+ pxor %xmm6,%xmm3
+ pxor %xmm15,%xmm5
+ pxor %xmm0,%xmm15
+
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ pxor %xmm2,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm4,%xmm3
+
+ pxor %xmm2,%xmm5
+ decl %r10d
+ jl .Lenc_done
+ pshufd $147,%xmm15,%xmm7
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $147,%xmm3,%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm2,%xmm11
+ pxor %xmm10,%xmm5
+ pshufd $147,%xmm6,%xmm12
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm1,%xmm13
+ pxor %xmm12,%xmm6
+ pshufd $147,%xmm4,%xmm14
+ pxor %xmm13,%xmm1
+ pxor %xmm14,%xmm4
+
+ pxor %xmm15,%xmm8
+ pxor %xmm4,%xmm7
+ pxor %xmm4,%xmm8
+ pshufd $78,%xmm15,%xmm15
+ pxor %xmm0,%xmm9
+ pshufd $78,%xmm0,%xmm0
+ pxor %xmm2,%xmm12
+ pxor %xmm7,%xmm15
+ pxor %xmm6,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm5,%xmm11
+ pshufd $78,%xmm2,%xmm7
+ pxor %xmm1,%xmm14
+ pshufd $78,%xmm6,%xmm8
+ pxor %xmm3,%xmm10
+ pshufd $78,%xmm5,%xmm2
+ pxor %xmm4,%xmm10
+ pshufd $78,%xmm4,%xmm6
+ pxor %xmm4,%xmm11
+ pshufd $78,%xmm1,%xmm5
+ pxor %xmm11,%xmm7
+ pshufd $78,%xmm3,%xmm1
+ pxor %xmm12,%xmm8
+
+ pxor %xmm10,%xmm2
+ pxor %xmm14,%xmm6
+ pxor %xmm13,%xmm5
+ movdqa %xmm7,%xmm3
+ pxor %xmm9,%xmm1
+ movdqa %xmm8,%xmm4
+ movdqa 48(%r11),%xmm7
+ jnz .Lenc_loop
+ movdqa 64(%r11),%xmm7
+ jmp .Lenc_loop
+.align 16
+.Lenc_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm2,%xmm10
+ psrlq $1,%xmm2
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm2
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm2
+ pxor %xmm1,%xmm4
+ psllq $1,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $1,%xmm2
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm2
+ movdqa %xmm3,%xmm9
+ psrlq $1,%xmm3
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm5,%xmm3
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm15
+ pxor %xmm3,%xmm5
+ psllq $1,%xmm3
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm2,%xmm10
+ psrlq $2,%xmm2
+ pxor %xmm4,%xmm6
+ pxor %xmm1,%xmm2
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm2
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm2,%xmm1
+ psllq $2,%xmm2
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm2
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm5,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm5
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm5,%xmm9
+ psrlq $4,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $4,%xmm3
+ pxor %xmm4,%xmm5
+ pxor %xmm1,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm4
+ psllq $4,%xmm5
+ pxor %xmm3,%xmm1
+ psllq $4,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm2,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm2
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ .byte 0xf3,0xc3
+.size _bsaes_encrypt8,.-_bsaes_encrypt8
+
+.type _bsaes_decrypt8,@function
+.align 64
+_bsaes_decrypt8:
+ leaq .LBS0(%rip),%r11
+
+ movdqa (%rax),%xmm8
+ leaq 16(%rax),%rax
+ movdqa -48(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+.byte 102,15,56,0,247
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm3,%xmm10
+ psrlq $1,%xmm3
+ pxor %xmm6,%xmm5
+ pxor %xmm4,%xmm3
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm3
+ pxor %xmm5,%xmm6
+ psllq $1,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $1,%xmm3
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm3
+ movdqa %xmm1,%xmm9
+ psrlq $1,%xmm1
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm1
+ pand %xmm7,%xmm15
+ pxor %xmm1,%xmm2
+ psllq $1,%xmm1
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm1
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm4,%xmm9
+ psrlq $2,%xmm4
+ movdqa %xmm3,%xmm10
+ psrlq $2,%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm3
+ pand %xmm8,%xmm4
+ pand %xmm8,%xmm3
+ pxor %xmm4,%xmm6
+ psllq $2,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $2,%xmm3
+ pxor %xmm9,%xmm4
+ pxor %xmm10,%xmm3
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm2,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm2
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm2,%xmm9
+ psrlq $4,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $4,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm5,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm6
+ psllq $4,%xmm2
+ pxor %xmm1,%xmm5
+ psllq $4,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm4
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm3
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ decl %r10d
+ jmp .Ldec_sbox
+.align 16
+.Ldec_loop:
+ pxor 0(%rax),%xmm15
+ pxor 16(%rax),%xmm0
+.byte 102,68,15,56,0,255
+ pxor 32(%rax),%xmm1
+.byte 102,15,56,0,199
+ pxor 48(%rax),%xmm2
+.byte 102,15,56,0,207
+ pxor 64(%rax),%xmm3
+.byte 102,15,56,0,215
+ pxor 80(%rax),%xmm4
+.byte 102,15,56,0,223
+ pxor 96(%rax),%xmm5
+.byte 102,15,56,0,231
+ pxor 112(%rax),%xmm6
+.byte 102,15,56,0,239
+ leaq 128(%rax),%rax
+.byte 102,15,56,0,247
+.Ldec_sbox:
+ pxor %xmm3,%xmm2
+
+ pxor %xmm6,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm3,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm0
+
+ pxor %xmm0,%xmm15
+ pxor %xmm4,%xmm1
+ pxor %xmm15,%xmm2
+ pxor %xmm15,%xmm4
+ pxor %xmm2,%xmm0
+ movdqa %xmm2,%xmm10
+ movdqa %xmm6,%xmm9
+ movdqa %xmm0,%xmm8
+ movdqa %xmm3,%xmm12
+ movdqa %xmm4,%xmm11
+
+ pxor %xmm15,%xmm10
+ pxor %xmm3,%xmm9
+ pxor %xmm5,%xmm8
+ movdqa %xmm10,%xmm13
+ pxor %xmm15,%xmm12
+ movdqa %xmm9,%xmm7
+ pxor %xmm1,%xmm11
+ movdqa %xmm10,%xmm14
+
+ por %xmm8,%xmm9
+ por %xmm11,%xmm10
+ pxor %xmm7,%xmm14
+ pand %xmm11,%xmm13
+ pxor %xmm8,%xmm11
+ pand %xmm8,%xmm7
+ pand %xmm11,%xmm14
+ movdqa %xmm5,%xmm11
+ pxor %xmm1,%xmm11
+ pand %xmm11,%xmm12
+ pxor %xmm12,%xmm10
+ pxor %xmm12,%xmm9
+ movdqa %xmm2,%xmm12
+ movdqa %xmm0,%xmm11
+ pxor %xmm6,%xmm12
+ pxor %xmm4,%xmm11
+ movdqa %xmm12,%xmm8
+ pand %xmm11,%xmm12
+ por %xmm11,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm14,%xmm10
+ pxor %xmm13,%xmm9
+ pxor %xmm14,%xmm8
+ movdqa %xmm3,%xmm11
+ pxor %xmm13,%xmm7
+ movdqa %xmm15,%xmm12
+ pxor %xmm13,%xmm8
+ movdqa %xmm6,%xmm13
+ pand %xmm5,%xmm11
+ movdqa %xmm2,%xmm14
+ pand %xmm1,%xmm12
+ pand %xmm0,%xmm13
+ por %xmm4,%xmm14
+ pxor %xmm11,%xmm10
+ pxor %xmm12,%xmm9
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm7
+
+
+
+
+
+ movdqa %xmm10,%xmm11
+ pand %xmm8,%xmm10
+ pxor %xmm9,%xmm11
+
+ movdqa %xmm7,%xmm13
+ movdqa %xmm11,%xmm14
+ pxor %xmm10,%xmm13
+ pand %xmm13,%xmm14
+
+ movdqa %xmm8,%xmm12
+ pxor %xmm9,%xmm14
+ pxor %xmm7,%xmm12
+
+ pxor %xmm9,%xmm10
+
+ pand %xmm10,%xmm12
+
+ movdqa %xmm13,%xmm9
+ pxor %xmm7,%xmm12
+
+ pxor %xmm12,%xmm9
+ pxor %xmm12,%xmm8
+
+ pand %xmm7,%xmm9
+
+ pxor %xmm9,%xmm13
+ pxor %xmm9,%xmm8
+
+ pand %xmm14,%xmm13
+
+ pxor %xmm11,%xmm13
+ movdqa %xmm4,%xmm11
+ movdqa %xmm0,%xmm7
+ movdqa %xmm14,%xmm9
+ pxor %xmm13,%xmm9
+ pand %xmm4,%xmm9
+ pxor %xmm0,%xmm4
+ pand %xmm14,%xmm0
+ pand %xmm13,%xmm4
+ pxor %xmm0,%xmm4
+ pxor %xmm9,%xmm0
+ pxor %xmm1,%xmm11
+ pxor %xmm5,%xmm7
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm1,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm5,%xmm1
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm5
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm1
+ pxor %xmm11,%xmm7
+ pxor %xmm5,%xmm1
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm5
+ pxor %xmm11,%xmm4
+ pxor %xmm11,%xmm1
+ pxor %xmm7,%xmm0
+ pxor %xmm7,%xmm5
+
+ movdqa %xmm2,%xmm11
+ movdqa %xmm6,%xmm7
+ pxor %xmm15,%xmm11
+ pxor %xmm3,%xmm7
+ movdqa %xmm14,%xmm10
+ movdqa %xmm12,%xmm9
+ pxor %xmm13,%xmm10
+ pxor %xmm8,%xmm9
+ pand %xmm11,%xmm10
+ pand %xmm15,%xmm9
+ pxor %xmm7,%xmm11
+ pxor %xmm3,%xmm15
+ pand %xmm14,%xmm7
+ pand %xmm12,%xmm3
+ pand %xmm13,%xmm11
+ pand %xmm8,%xmm15
+ pxor %xmm11,%xmm7
+ pxor %xmm3,%xmm15
+ pxor %xmm10,%xmm11
+ pxor %xmm9,%xmm3
+ pxor %xmm12,%xmm14
+ pxor %xmm8,%xmm13
+ movdqa %xmm14,%xmm10
+ pxor %xmm13,%xmm10
+ pand %xmm2,%xmm10
+ pxor %xmm6,%xmm2
+ pand %xmm14,%xmm6
+ pand %xmm13,%xmm2
+ pxor %xmm6,%xmm2
+ pxor %xmm10,%xmm6
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm15
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm4,%xmm5
+
+ pxor %xmm0,%xmm3
+ pxor %xmm6,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm1,%xmm3
+ pxor %xmm15,%xmm6
+ pxor %xmm4,%xmm3
+ pxor %xmm5,%xmm2
+ pxor %xmm0,%xmm5
+ pxor %xmm3,%xmm2
+
+ pxor %xmm15,%xmm3
+ pxor %xmm2,%xmm6
+ decl %r10d
+ jl .Ldec_done
+
+ pshufd $147,%xmm4,%xmm14
+ movdqa %xmm5,%xmm9
+ pxor %xmm6,%xmm4
+ pxor %xmm6,%xmm5
+ pshufd $147,%xmm15,%xmm7
+ movdqa %xmm6,%xmm12
+ pxor %xmm15,%xmm6
+ pxor %xmm0,%xmm15
+ pshufd $147,%xmm0,%xmm8
+ pxor %xmm5,%xmm0
+ pxor %xmm2,%xmm15
+ pxor %xmm3,%xmm0
+ pshufd $147,%xmm3,%xmm10
+ pxor %xmm15,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm4
+ pshufd $147,%xmm2,%xmm13
+ movdqa %xmm1,%xmm11
+ pxor %xmm1,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm3
+ pxor %xmm12,%xmm2
+ pxor %xmm9,%xmm3
+ pxor %xmm11,%xmm3
+ pshufd $147,%xmm12,%xmm12
+
+ pxor %xmm4,%xmm6
+ pxor %xmm7,%xmm4
+ pxor %xmm8,%xmm6
+ pshufd $147,%xmm9,%xmm9
+ pxor %xmm12,%xmm4
+ pxor %xmm13,%xmm6
+ pxor %xmm14,%xmm4
+ pshufd $147,%xmm11,%xmm11
+ pxor %xmm13,%xmm14
+ pxor %xmm4,%xmm6
+
+ pxor %xmm7,%xmm5
+ pshufd $147,%xmm7,%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+ pxor %xmm9,%xmm15
+ pshufd $147,%xmm8,%xmm8
+ pxor %xmm9,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm14,%xmm15
+ pshufd $147,%xmm9,%xmm9
+ pxor %xmm10,%xmm5
+ pxor %xmm10,%xmm1
+ pxor %xmm10,%xmm0
+ pshufd $147,%xmm10,%xmm10
+ pxor %xmm11,%xmm2
+ pxor %xmm11,%xmm3
+ pxor %xmm14,%xmm2
+ pxor %xmm12,%xmm5
+ pxor %xmm11,%xmm0
+ pxor %xmm12,%xmm14
+
+ pxor %xmm14,%xmm3
+ pshufd $147,%xmm11,%xmm11
+ pxor %xmm14,%xmm1
+ pxor %xmm14,%xmm0
+
+ pxor %xmm12,%xmm14
+ pshufd $147,%xmm12,%xmm12
+ pxor %xmm13,%xmm14
+
+
+ pxor %xmm2,%xmm0
+ pxor %xmm11,%xmm2
+ pshufd $147,%xmm13,%xmm13
+ pxor %xmm7,%xmm15
+ pxor %xmm12,%xmm2
+ pxor %xmm9,%xmm15
+ pshufd $147,%xmm14,%xmm14
+
+ pxor %xmm6,%xmm5
+ pxor %xmm8,%xmm6
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm5
+ pxor %xmm12,%xmm6
+ pxor %xmm12,%xmm4
+ pxor %xmm14,%xmm6
+ pshufd $147,%xmm7,%xmm7
+ pxor %xmm13,%xmm4
+ pxor %xmm6,%xmm5
+ pxor %xmm8,%xmm0
+ pshufd $147,%xmm8,%xmm8
+
+ pxor %xmm14,%xmm2
+ pxor %xmm9,%xmm0
+ pxor %xmm9,%xmm3
+ pshufd $147,%xmm9,%xmm9
+ pxor %xmm13,%xmm15
+ pxor %xmm10,%xmm13
+ pxor %xmm2,%xmm0
+ pxor %xmm13,%xmm5
+
+ pxor %xmm13,%xmm1
+ pxor %xmm12,%xmm3
+ pxor %xmm11,%xmm1
+ pshufd $147,%xmm11,%xmm11
+ pxor %xmm13,%xmm3
+ pxor %xmm14,%xmm1
+ pxor %xmm10,%xmm13
+
+ pshufd $147,%xmm12,%xmm12
+ pshufd $147,%xmm13,%xmm13
+ pshufd $147,%xmm14,%xmm14
+ pshufd $147,%xmm10,%xmm10
+
+
+ pxor %xmm6,%xmm0
+ pxor %xmm6,%xmm8
+ pxor %xmm12,%xmm7
+ pxor %xmm12,%xmm8
+ pxor %xmm7,%xmm5
+ pxor %xmm4,%xmm7
+ pxor %xmm13,%xmm8
+ pxor %xmm14,%xmm13
+ pxor %xmm8,%xmm0
+ pxor %xmm11,%xmm2
+ pxor %xmm0,%xmm11
+ pxor %xmm10,%xmm1
+ pxor %xmm5,%xmm10
+ pxor %xmm9,%xmm3
+ pxor %xmm15,%xmm9
+ pxor %xmm14,%xmm10
+ pxor %xmm3,%xmm12
+ pxor %xmm13,%xmm9
+ pxor %xmm13,%xmm12
+ pxor %xmm1,%xmm13
+ pxor %xmm2,%xmm14
+
+ movdqa %xmm7,%xmm15
+ movdqa %xmm8,%xmm0
+ movdqa %xmm9,%xmm1
+ movdqa %xmm10,%xmm2
+ movdqa %xmm11,%xmm3
+ movdqa %xmm12,%xmm4
+ movdqa %xmm13,%xmm5
+ movdqa %xmm14,%xmm6
+ movdqa -16(%r11),%xmm7
+ jnz .Ldec_loop
+ movdqa -32(%r11),%xmm7
+ jmp .Ldec_loop
+.align 16
+.Ldec_done:
+ movdqa 0(%r11),%xmm7
+ movdqa 16(%r11),%xmm8
+ movdqa %xmm2,%xmm9
+ psrlq $1,%xmm2
+ movdqa %xmm1,%xmm10
+ psrlq $1,%xmm1
+ pxor %xmm4,%xmm2
+ pxor %xmm6,%xmm1
+ pand %xmm7,%xmm2
+ pand %xmm7,%xmm1
+ pxor %xmm2,%xmm4
+ psllq $1,%xmm2
+ pxor %xmm1,%xmm6
+ psllq $1,%xmm1
+ pxor %xmm9,%xmm2
+ pxor %xmm10,%xmm1
+ movdqa %xmm5,%xmm9
+ psrlq $1,%xmm5
+ movdqa %xmm15,%xmm10
+ psrlq $1,%xmm15
+ pxor %xmm3,%xmm5
+ pxor %xmm0,%xmm15
+ pand %xmm7,%xmm5
+ pand %xmm7,%xmm15
+ pxor %xmm5,%xmm3
+ psllq $1,%xmm5
+ pxor %xmm15,%xmm0
+ psllq $1,%xmm15
+ pxor %xmm9,%xmm5
+ pxor %xmm10,%xmm15
+ movdqa 32(%r11),%xmm7
+ movdqa %xmm6,%xmm9
+ psrlq $2,%xmm6
+ movdqa %xmm1,%xmm10
+ psrlq $2,%xmm1
+ pxor %xmm4,%xmm6
+ pxor %xmm2,%xmm1
+ pand %xmm8,%xmm6
+ pand %xmm8,%xmm1
+ pxor %xmm6,%xmm4
+ psllq $2,%xmm6
+ pxor %xmm1,%xmm2
+ psllq $2,%xmm1
+ pxor %xmm9,%xmm6
+ pxor %xmm10,%xmm1
+ movdqa %xmm0,%xmm9
+ psrlq $2,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $2,%xmm15
+ pxor %xmm3,%xmm0
+ pxor %xmm5,%xmm15
+ pand %xmm8,%xmm0
+ pand %xmm8,%xmm15
+ pxor %xmm0,%xmm3
+ psllq $2,%xmm0
+ pxor %xmm15,%xmm5
+ psllq $2,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa %xmm3,%xmm9
+ psrlq $4,%xmm3
+ movdqa %xmm5,%xmm10
+ psrlq $4,%xmm5
+ pxor %xmm4,%xmm3
+ pxor %xmm2,%xmm5
+ pand %xmm7,%xmm3
+ pand %xmm7,%xmm5
+ pxor %xmm3,%xmm4
+ psllq $4,%xmm3
+ pxor %xmm5,%xmm2
+ psllq $4,%xmm5
+ pxor %xmm9,%xmm3
+ pxor %xmm10,%xmm5
+ movdqa %xmm0,%xmm9
+ psrlq $4,%xmm0
+ movdqa %xmm15,%xmm10
+ psrlq $4,%xmm15
+ pxor %xmm6,%xmm0
+ pxor %xmm1,%xmm15
+ pand %xmm7,%xmm0
+ pand %xmm7,%xmm15
+ pxor %xmm0,%xmm6
+ psllq $4,%xmm0
+ pxor %xmm15,%xmm1
+ psllq $4,%xmm15
+ pxor %xmm9,%xmm0
+ pxor %xmm10,%xmm15
+ movdqa (%rax),%xmm7
+ pxor %xmm7,%xmm5
+ pxor %xmm7,%xmm3
+ pxor %xmm7,%xmm1
+ pxor %xmm7,%xmm6
+ pxor %xmm7,%xmm2
+ pxor %xmm7,%xmm4
+ pxor %xmm7,%xmm15
+ pxor %xmm7,%xmm0
+ .byte 0xf3,0xc3
+.size _bsaes_decrypt8,.-_bsaes_decrypt8
+.type _bsaes_key_convert,@function
+.align 16
+_bsaes_key_convert:
+ leaq .Lmasks(%rip),%r11
+ movdqu (%rcx),%xmm7
+ leaq 16(%rcx),%rcx
+ movdqa 0(%r11),%xmm0
+ movdqa 16(%r11),%xmm1
+ movdqa 32(%r11),%xmm2
+ movdqa 48(%r11),%xmm3
+ movdqa 64(%r11),%xmm4
+ pcmpeqd %xmm5,%xmm5
+
+ movdqu (%rcx),%xmm6
+ movdqa %xmm7,(%rax)
+ leaq 16(%rax),%rax
+ decl %r10d
+ jmp .Lkey_loop
+.align 16
+.Lkey_loop:
+.byte 102,15,56,0,244
+
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+
+ pand %xmm6,%xmm8
+ pand %xmm6,%xmm9
+ movdqa %xmm2,%xmm10
+ pcmpeqb %xmm0,%xmm8
+ psllq $4,%xmm0
+ movdqa %xmm3,%xmm11
+ pcmpeqb %xmm1,%xmm9
+ psllq $4,%xmm1
+
+ pand %xmm6,%xmm10
+ pand %xmm6,%xmm11
+ movdqa %xmm0,%xmm12
+ pcmpeqb %xmm2,%xmm10
+ psllq $4,%xmm2
+ movdqa %xmm1,%xmm13
+ pcmpeqb %xmm3,%xmm11
+ psllq $4,%xmm3
+
+ movdqa %xmm2,%xmm14
+ movdqa %xmm3,%xmm15
+ pxor %xmm5,%xmm8
+ pxor %xmm5,%xmm9
+
+ pand %xmm6,%xmm12
+ pand %xmm6,%xmm13
+ movdqa %xmm8,0(%rax)
+ pcmpeqb %xmm0,%xmm12
+ psrlq $4,%xmm0
+ movdqa %xmm9,16(%rax)
+ pcmpeqb %xmm1,%xmm13
+ psrlq $4,%xmm1
+ leaq 16(%rcx),%rcx
+
+ pand %xmm6,%xmm14
+ pand %xmm6,%xmm15
+ movdqa %xmm10,32(%rax)
+ pcmpeqb %xmm2,%xmm14
+ psrlq $4,%xmm2
+ movdqa %xmm11,48(%rax)
+ pcmpeqb %xmm3,%xmm15
+ psrlq $4,%xmm3
+ movdqu (%rcx),%xmm6
+
+ pxor %xmm5,%xmm13
+ pxor %xmm5,%xmm14
+ movdqa %xmm12,64(%rax)
+ movdqa %xmm13,80(%rax)
+ movdqa %xmm14,96(%rax)
+ movdqa %xmm15,112(%rax)
+ leaq 128(%rax),%rax
+ decl %r10d
+ jnz .Lkey_loop
+
+ movdqa 80(%r11),%xmm7
+
+ .byte 0xf3,0xc3
+.size _bsaes_key_convert,.-_bsaes_key_convert
+
+.globl bsaes_cbc_encrypt
+.type bsaes_cbc_encrypt,@function
+.align 16
+bsaes_cbc_encrypt:
+ cmpl $0,%r9d
+ jne asm_AES_cbc_encrypt
+ cmpq $128,%rdx
+ jb asm_AES_cbc_encrypt
+
+ movq %rsp,%rax
+.Lcbc_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movq %r8,%rbx
+ shrq $4,%r14
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ movdqu (%rbx),%xmm14
+ subq $8,%r14
+.Lcbc_dec_loop:
+ movdqu 0(%r12),%xmm15
+ movdqu 16(%r12),%xmm0
+ movdqu 32(%r12),%xmm1
+ movdqu 48(%r12),%xmm2
+ movdqu 64(%r12),%xmm3
+ movdqu 80(%r12),%xmm4
+ movq %rsp,%rax
+ movdqu 96(%r12),%xmm5
+ movl %edx,%r10d
+ movdqu 112(%r12),%xmm6
+ movdqa %xmm14,32(%rbp)
+
+ call _bsaes_decrypt8
+
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm2
+ movdqu 112(%r12),%xmm14
+ pxor %xmm13,%xmm4
+ movdqu %xmm15,0(%r13)
+ leaq 128(%r12),%r12
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ subq $8,%r14
+ jnc .Lcbc_dec_loop
+
+ addq $8,%r14
+ jz .Lcbc_dec_done
+
+ movdqu 0(%r12),%xmm15
+ movq %rsp,%rax
+ movl %edx,%r10d
+ cmpq $2,%r14
+ jb .Lcbc_dec_one
+ movdqu 16(%r12),%xmm0
+ je .Lcbc_dec_two
+ movdqu 32(%r12),%xmm1
+ cmpq $4,%r14
+ jb .Lcbc_dec_three
+ movdqu 48(%r12),%xmm2
+ je .Lcbc_dec_four
+ movdqu 64(%r12),%xmm3
+ cmpq $6,%r14
+ jb .Lcbc_dec_five
+ movdqu 80(%r12),%xmm4
+ je .Lcbc_dec_six
+ movdqu 96(%r12),%xmm5
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm6
+ movdqu 96(%r12),%xmm14
+ pxor %xmm12,%xmm2
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_six:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm1
+ movdqu 80(%r12),%xmm14
+ pxor %xmm11,%xmm6
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_five:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm3
+ movdqu 64(%r12),%xmm14
+ pxor %xmm10,%xmm1
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_four:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm5
+ movdqu 48(%r12),%xmm14
+ pxor %xmm9,%xmm3
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_three:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm0
+ movdqu 32(%r12),%xmm14
+ pxor %xmm8,%xmm5
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_two:
+ movdqa %xmm14,32(%rbp)
+ call _bsaes_decrypt8
+ pxor 32(%rbp),%xmm15
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm14
+ pxor %xmm7,%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ jmp .Lcbc_dec_done
+.align 16
+.Lcbc_dec_one:
+ leaq (%r12),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm14
+ movdqu %xmm14,(%r13)
+ movdqa %xmm15,%xmm14
+
+.Lcbc_dec_done:
+ movdqu %xmm14,(%rbx)
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lcbc_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lcbc_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lcbc_dec_epilogue:
+ .byte 0xf3,0xc3
+.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+
+.globl bsaes_ctr32_encrypt_blocks
+.type bsaes_ctr32_encrypt_blocks,@function
+.align 16
+bsaes_ctr32_encrypt_blocks:
+ movq %rsp,%rax
+.Lctr_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movdqu (%r8),%xmm0
+ movl 240(%rcx),%eax
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+ movdqa %xmm0,32(%rbp)
+ cmpq $8,%rdx
+ jb .Lctr_enc_short
+
+ movl %eax,%ebx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %ebx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ movdqa (%rsp),%xmm8
+ leaq .LADD1(%rip),%r11
+ movdqa 32(%rbp),%xmm15
+ movdqa -32(%r11),%xmm7
+.byte 102,68,15,56,0,199
+.byte 102,68,15,56,0,255
+ movdqa %xmm8,(%rsp)
+ jmp .Lctr_enc_loop
+.align 16
+.Lctr_enc_loop:
+ movdqa %xmm15,32(%rbp)
+ movdqa %xmm15,%xmm0
+ movdqa %xmm15,%xmm1
+ paddd 0(%r11),%xmm0
+ movdqa %xmm15,%xmm2
+ paddd 16(%r11),%xmm1
+ movdqa %xmm15,%xmm3
+ paddd 32(%r11),%xmm2
+ movdqa %xmm15,%xmm4
+ paddd 48(%r11),%xmm3
+ movdqa %xmm15,%xmm5
+ paddd 64(%r11),%xmm4
+ movdqa %xmm15,%xmm6
+ paddd 80(%r11),%xmm5
+ paddd 96(%r11),%xmm6
+
+
+
+ movdqa (%rsp),%xmm8
+ leaq 16(%rsp),%rax
+ movdqa -16(%r11),%xmm7
+ pxor %xmm8,%xmm15
+ pxor %xmm8,%xmm0
+.byte 102,68,15,56,0,255
+ pxor %xmm8,%xmm1
+.byte 102,15,56,0,199
+ pxor %xmm8,%xmm2
+.byte 102,15,56,0,207
+ pxor %xmm8,%xmm3
+.byte 102,15,56,0,215
+ pxor %xmm8,%xmm4
+.byte 102,15,56,0,223
+ pxor %xmm8,%xmm5
+.byte 102,15,56,0,231
+ pxor %xmm8,%xmm6
+.byte 102,15,56,0,239
+ leaq .LBS0(%rip),%r11
+.byte 102,15,56,0,247
+ movl %ebx,%r10d
+
+ call _bsaes_encrypt8_bitslice
+
+ subq $8,%r14
+ jc .Lctr_enc_loop_done
+
+ movdqu 0(%r12),%xmm7
+ movdqu 16(%r12),%xmm8
+ movdqu 32(%r12),%xmm9
+ movdqu 48(%r12),%xmm10
+ movdqu 64(%r12),%xmm11
+ movdqu 80(%r12),%xmm12
+ movdqu 96(%r12),%xmm13
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ pxor %xmm15,%xmm7
+ movdqa 32(%rbp),%xmm15
+ pxor %xmm8,%xmm0
+ movdqu %xmm7,0(%r13)
+ pxor %xmm9,%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor %xmm10,%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor %xmm11,%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor %xmm12,%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor %xmm13,%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor %xmm14,%xmm4
+ movdqu %xmm1,96(%r13)
+ leaq .LADD1(%rip),%r11
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+ paddd 112(%r11),%xmm15
+ jnz .Lctr_enc_loop
+
+ jmp .Lctr_enc_done
+.align 16
+.Lctr_enc_loop_done:
+ addq $8,%r14
+ movdqu 0(%r12),%xmm7
+ pxor %xmm7,%xmm15
+ movdqu %xmm15,0(%r13)
+ cmpq $2,%r14
+ jb .Lctr_enc_done
+ movdqu 16(%r12),%xmm8
+ pxor %xmm8,%xmm0
+ movdqu %xmm0,16(%r13)
+ je .Lctr_enc_done
+ movdqu 32(%r12),%xmm9
+ pxor %xmm9,%xmm3
+ movdqu %xmm3,32(%r13)
+ cmpq $4,%r14
+ jb .Lctr_enc_done
+ movdqu 48(%r12),%xmm10
+ pxor %xmm10,%xmm5
+ movdqu %xmm5,48(%r13)
+ je .Lctr_enc_done
+ movdqu 64(%r12),%xmm11
+ pxor %xmm11,%xmm2
+ movdqu %xmm2,64(%r13)
+ cmpq $6,%r14
+ jb .Lctr_enc_done
+ movdqu 80(%r12),%xmm12
+ pxor %xmm12,%xmm6
+ movdqu %xmm6,80(%r13)
+ je .Lctr_enc_done
+ movdqu 96(%r12),%xmm13
+ pxor %xmm13,%xmm1
+ movdqu %xmm1,96(%r13)
+ jmp .Lctr_enc_done
+
+.align 16
+.Lctr_enc_short:
+ leaq 32(%rbp),%rdi
+ leaq 48(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ movdqu (%r12),%xmm0
+ leaq 16(%r12),%r12
+ movl 44(%rbp),%eax
+ bswapl %eax
+ pxor 48(%rbp),%xmm0
+ incl %eax
+ movdqu %xmm0,(%r13)
+ bswapl %eax
+ leaq 16(%r13),%r13
+ movl %eax,44(%rsp)
+ decq %r14
+ jnz .Lctr_enc_short
+
+.Lctr_enc_done:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lctr_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lctr_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lctr_enc_epilogue:
+ .byte 0xf3,0xc3
+.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+.globl bsaes_xts_encrypt
+.type bsaes_xts_encrypt,@function
+.align 16
+bsaes_xts_encrypt:
+ movq %rsp,%rax
+.Lxts_enc_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor %xmm6,%xmm7
+ movdqa %xmm7,(%rax)
+
+ andq $-16,%r14
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_enc_short
+ jmp .Lxts_enc_loop
+
+.align 16
+.Lxts_enc_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm1,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_enc_loop
+
+.Lxts_enc_short:
+ addq $128,%r14
+ jz .Lxts_enc_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_enc_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_enc_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_enc_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_enc_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_enc_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_enc_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ pxor 96(%rsp),%xmm1
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm1,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm2,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ pxor 64(%rsp),%xmm2
+ movdqu %xmm5,48(%r13)
+ movdqu %xmm2,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm5
+ movdqu %xmm3,32(%r13)
+ movdqu %xmm5,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm3
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm3,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_encrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_enc_done
+.align 16
+.Lxts_enc_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_enc_done:
+ andl $15,%ebx
+ jz .Lxts_enc_ret
+ movq %r13,%rdx
+
+.Lxts_enc_steal:
+ movzbl (%r12),%eax
+ movzbl -16(%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,-16(%rdx)
+ movb %cl,0(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_enc_steal
+
+ movdqu -16(%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_encrypt
+ pxor 32(%rbp),%xmm6
+ movdqu %xmm6,-16(%r13)
+
+.Lxts_enc_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_enc_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_enc_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_enc_epilogue:
+ .byte 0xf3,0xc3
+.size bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl bsaes_xts_decrypt
+.type bsaes_xts_decrypt,@function
+.align 16
+bsaes_xts_decrypt:
+ movq %rsp,%rax
+.Lxts_dec_prologue:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ leaq -72(%rsp),%rsp
+ movq %rsp,%rbp
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %rcx,%r15
+
+ leaq (%r9),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r8),%rdx
+ call asm_AES_encrypt
+
+ movl 240(%r15),%eax
+ movq %r14,%rbx
+
+ movl %eax,%edx
+ shlq $7,%rax
+ subq $96,%rax
+ subq %rax,%rsp
+
+ movq %rsp,%rax
+ movq %r15,%rcx
+ movl %edx,%r10d
+ call _bsaes_key_convert
+ pxor (%rsp),%xmm7
+ movdqa %xmm6,(%rax)
+ movdqa %xmm7,(%rsp)
+
+ xorl %eax,%eax
+ andq $-16,%r14
+ testl $15,%ebx
+ setnz %al
+ shlq $4,%rax
+ subq %rax,%r14
+
+ subq $128,%rsp
+ movdqa 32(%rbp),%xmm6
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+
+ subq $128,%r14
+ jc .Lxts_dec_short
+ jmp .Lxts_dec_loop
+
+.align 16
+.Lxts_dec_loop:
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqu 112(%r12),%xmm14
+ leaq 128(%r12),%r12
+ movdqa %xmm6,112(%rsp)
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ pxor %xmm14,%xmm6
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ pxor 112(%rsp),%xmm4
+ movdqu %xmm2,96(%r13)
+ movdqu %xmm4,112(%r13)
+ leaq 128(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+
+ subq $128,%r14
+ jnc .Lxts_dec_loop
+
+.Lxts_dec_short:
+ addq $128,%r14
+ jz .Lxts_dec_done
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm15
+ movdqa %xmm6,0(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm0
+ movdqa %xmm6,16(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 0(%r12),%xmm7
+ cmpq $16,%r14
+ je .Lxts_dec_1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm1
+ movdqa %xmm6,32(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 16(%r12),%xmm8
+ cmpq $32,%r14
+ je .Lxts_dec_2
+ pxor %xmm7,%xmm15
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm2
+ movdqa %xmm6,48(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 32(%r12),%xmm9
+ cmpq $48,%r14
+ je .Lxts_dec_3
+ pxor %xmm8,%xmm0
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm3
+ movdqa %xmm6,64(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 48(%r12),%xmm10
+ cmpq $64,%r14
+ je .Lxts_dec_4
+ pxor %xmm9,%xmm1
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm4
+ movdqa %xmm6,80(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 64(%r12),%xmm11
+ cmpq $80,%r14
+ je .Lxts_dec_5
+ pxor %xmm10,%xmm2
+ pshufd $19,%xmm14,%xmm13
+ pxor %xmm14,%xmm14
+ movdqa %xmm6,%xmm5
+ movdqa %xmm6,96(%rsp)
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ pcmpgtd %xmm6,%xmm14
+ pxor %xmm13,%xmm6
+ movdqu 80(%r12),%xmm12
+ cmpq $96,%r14
+ je .Lxts_dec_6
+ pxor %xmm11,%xmm3
+ movdqu 96(%r12),%xmm13
+ pxor %xmm12,%xmm4
+ movdqa %xmm6,112(%rsp)
+ leaq 112(%r12),%r12
+ pxor %xmm13,%xmm5
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ pxor 96(%rsp),%xmm2
+ movdqu %xmm6,80(%r13)
+ movdqu %xmm2,96(%r13)
+ leaq 112(%r13),%r13
+
+ movdqa 112(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_6:
+ pxor %xmm11,%xmm3
+ leaq 96(%r12),%r12
+ pxor %xmm12,%xmm4
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ pxor 80(%rsp),%xmm6
+ movdqu %xmm1,64(%r13)
+ movdqu %xmm6,80(%r13)
+ leaq 96(%r13),%r13
+
+ movdqa 96(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_5:
+ pxor %xmm10,%xmm2
+ leaq 80(%r12),%r12
+ pxor %xmm11,%xmm3
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ pxor 64(%rsp),%xmm1
+ movdqu %xmm3,48(%r13)
+ movdqu %xmm1,64(%r13)
+ leaq 80(%r13),%r13
+
+ movdqa 80(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_4:
+ pxor %xmm9,%xmm1
+ leaq 64(%r12),%r12
+ pxor %xmm10,%xmm2
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ pxor 48(%rsp),%xmm3
+ movdqu %xmm5,32(%r13)
+ movdqu %xmm3,48(%r13)
+ leaq 64(%r13),%r13
+
+ movdqa 64(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_3:
+ pxor %xmm8,%xmm0
+ leaq 48(%r12),%r12
+ pxor %xmm9,%xmm1
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ pxor 32(%rsp),%xmm5
+ movdqu %xmm0,16(%r13)
+ movdqu %xmm5,32(%r13)
+ leaq 48(%r13),%r13
+
+ movdqa 48(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_2:
+ pxor %xmm7,%xmm15
+ leaq 32(%r12),%r12
+ pxor %xmm8,%xmm0
+ leaq 128(%rsp),%rax
+ movl %edx,%r10d
+
+ call _bsaes_decrypt8
+
+ pxor 0(%rsp),%xmm15
+ pxor 16(%rsp),%xmm0
+ movdqu %xmm15,0(%r13)
+ movdqu %xmm0,16(%r13)
+ leaq 32(%r13),%r13
+
+ movdqa 32(%rsp),%xmm6
+ jmp .Lxts_dec_done
+.align 16
+.Lxts_dec_1:
+ pxor %xmm15,%xmm7
+ leaq 16(%r12),%r12
+ movdqa %xmm7,32(%rbp)
+ leaq 32(%rbp),%rdi
+ leaq 32(%rbp),%rsi
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm15
+
+
+
+
+
+ movdqu %xmm15,0(%r13)
+ leaq 16(%r13),%r13
+
+ movdqa 16(%rsp),%xmm6
+
+.Lxts_dec_done:
+ andl $15,%ebx
+ jz .Lxts_dec_ret
+
+ pxor %xmm14,%xmm14
+ movdqa .Lxts_magic(%rip),%xmm12
+ pcmpgtd %xmm6,%xmm14
+ pshufd $19,%xmm14,%xmm13
+ movdqa %xmm6,%xmm5
+ paddq %xmm6,%xmm6
+ pand %xmm12,%xmm13
+ movdqu (%r12),%xmm15
+ pxor %xmm13,%xmm6
+
+ leaq 32(%rbp),%rdi
+ pxor %xmm6,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm6
+ movq %r13,%rdx
+ movdqu %xmm6,(%r13)
+
+.Lxts_dec_steal:
+ movzbl 16(%r12),%eax
+ movzbl (%rdx),%ecx
+ leaq 1(%r12),%r12
+ movb %al,(%rdx)
+ movb %cl,16(%rdx)
+ leaq 1(%rdx),%rdx
+ subl $1,%ebx
+ jnz .Lxts_dec_steal
+
+ movdqu (%r13),%xmm15
+ leaq 32(%rbp),%rdi
+ pxor %xmm5,%xmm15
+ leaq 32(%rbp),%rsi
+ movdqa %xmm15,32(%rbp)
+ leaq (%r15),%rdx
+ call asm_AES_decrypt
+ pxor 32(%rbp),%xmm5
+ movdqu %xmm5,(%r13)
+
+.Lxts_dec_ret:
+ leaq (%rsp),%rax
+ pxor %xmm0,%xmm0
+.Lxts_dec_bzero:
+ movdqa %xmm0,0(%rax)
+ movdqa %xmm0,16(%rax)
+ leaq 32(%rax),%rax
+ cmpq %rax,%rbp
+ ja .Lxts_dec_bzero
+
+ leaq (%rbp),%rsp
+ movq 72(%rsp),%r15
+ movq 80(%rsp),%r14
+ movq 88(%rsp),%r13
+ movq 96(%rsp),%r12
+ movq 104(%rsp),%rbx
+ movq 112(%rsp),%rax
+ leaq 120(%rsp),%rsp
+ movq %rax,%rbp
+.Lxts_dec_epilogue:
+ .byte 0xf3,0xc3
+.size bsaes_xts_decrypt,.-bsaes_xts_decrypt
+.type _bsaes_const,@object
+.align 64
+_bsaes_const:
+.LM0ISR:
+.quad 0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISRM0:
+.quad 0x01040b0e0205080f, 0x0306090c00070a0d
+.LISR:
+.quad 0x0504070602010003, 0x0f0e0d0c080b0a09
+.LBS0:
+.quad 0x5555555555555555, 0x5555555555555555
+.LBS1:
+.quad 0x3333333333333333, 0x3333333333333333
+.LBS2:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.LSR:
+.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0SR:
+.quad 0x0a0e02060f03070b, 0x0004080c05090d01
+.LSWPUP:
+.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
+.LSWPUPM0SR:
+.quad 0x0a0d02060c03070b, 0x0004080f05090e01
+.LADD1:
+.quad 0x0000000000000000, 0x0000000100000000
+.LADD2:
+.quad 0x0000000000000000, 0x0000000200000000
+.LADD3:
+.quad 0x0000000000000000, 0x0000000300000000
+.LADD4:
+.quad 0x0000000000000000, 0x0000000400000000
+.LADD5:
+.quad 0x0000000000000000, 0x0000000500000000
+.LADD6:
+.quad 0x0000000000000000, 0x0000000600000000
+.LADD7:
+.quad 0x0000000000000000, 0x0000000700000000
+.LADD8:
+.quad 0x0000000000000000, 0x0000000800000000
+.Lxts_magic:
+.long 0x87,0,1,0
+.Lmasks:
+.quad 0x0101010101010101, 0x0101010101010101
+.quad 0x0202020202020202, 0x0202020202020202
+.quad 0x0404040404040404, 0x0404040404040404
+.quad 0x0808080808080808, 0x0808080808080808
+.LM0:
+.quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.L63:
+.quad 0x6363636363636363, 0x6363636363636363
+.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0
+.align 64
+.size _bsaes_const,.-_bsaes_const
diff --git a/secure/lib/libcrypto/amd64/cmll-x86_64.S b/secure/lib/libcrypto/amd64/cmll-x86_64.S
new file mode 100644
index 000000000000..f42203c6ecb4
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/cmll-x86_64.S
@@ -0,0 +1,1839 @@
+ # $FreeBSD$
+.text
+
+
+.globl Camellia_EncryptBlock
+.type Camellia_EncryptBlock,@function
+.align 16
+Camellia_EncryptBlock:
+ movl $128,%eax
+ subl %edi,%eax
+ movl $3,%edi
+ adcl $0,%edi
+ jmp .Lenc_rounds
+.size Camellia_EncryptBlock,.-Camellia_EncryptBlock
+
+.globl Camellia_EncryptBlock_Rounds
+.type Camellia_EncryptBlock_Rounds,@function
+.align 16
+.Lenc_rounds:
+Camellia_EncryptBlock_Rounds:
+ pushq %rbx
+ pushq %rbp
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Lenc_prologue:
+
+
+ movq %rcx,%r13
+ movq %rdx,%r14
+
+ shll $6,%edi
+ leaq .LCamellia_SBOX(%rip),%rbp
+ leaq (%r14,%rdi,1),%r15
+
+ movl 0(%rsi),%r8d
+ movl 4(%rsi),%r9d
+ movl 8(%rsi),%r10d
+ bswapl %r8d
+ movl 12(%rsi),%r11d
+ bswapl %r9d
+ bswapl %r10d
+ bswapl %r11d
+
+ call _x86_64_Camellia_encrypt
+
+ bswapl %r8d
+ bswapl %r9d
+ bswapl %r10d
+ movl %r8d,0(%r13)
+ bswapl %r11d
+ movl %r9d,4(%r13)
+ movl %r10d,8(%r13)
+ movl %r11d,12(%r13)
+
+ movq 0(%rsp),%r15
+ movq 8(%rsp),%r14
+ movq 16(%rsp),%r13
+ movq 24(%rsp),%rbp
+ movq 32(%rsp),%rbx
+ leaq 40(%rsp),%rsp
+.Lenc_epilogue:
+ .byte 0xf3,0xc3
+.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds
+
+.type _x86_64_Camellia_encrypt,@function
+.align 16
+_x86_64_Camellia_encrypt:
+ xorl 0(%r14),%r9d
+ xorl 4(%r14),%r8d
+ xorl 8(%r14),%r11d
+ xorl 12(%r14),%r10d
+.align 16
+.Leloop:
+ movl 16(%r14),%ebx
+ movl 20(%r14),%eax
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 24(%r14),%ebx
+ movl 28(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 32(%r14),%ebx
+ movl 36(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 40(%r14),%ebx
+ movl 44(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 48(%r14),%ebx
+ movl 52(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 56(%r14),%ebx
+ movl 60(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 64(%r14),%ebx
+ movl 68(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ leaq 64(%r14),%r14
+ cmpq %r15,%r14
+ movl 8(%r14),%edx
+ movl 12(%r14),%ecx
+ je .Ledone
+
+ andl %r8d,%eax
+ orl %r11d,%edx
+ roll $1,%eax
+ xorl %edx,%r10d
+ xorl %eax,%r9d
+ andl %r10d,%ecx
+ orl %r9d,%ebx
+ roll $1,%ecx
+ xorl %ebx,%r8d
+ xorl %ecx,%r11d
+ jmp .Leloop
+
+.align 16
+.Ledone:
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ xorl %r8d,%ecx
+ xorl %r9d,%edx
+
+ movl %eax,%r8d
+ movl %ebx,%r9d
+ movl %ecx,%r10d
+ movl %edx,%r11d
+
+.byte 0xf3,0xc3
+.size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt
+
+
+.globl Camellia_DecryptBlock
+.type Camellia_DecryptBlock,@function
+.align 16
+Camellia_DecryptBlock:
+ movl $128,%eax
+ subl %edi,%eax
+ movl $3,%edi
+ adcl $0,%edi
+ jmp .Ldec_rounds
+.size Camellia_DecryptBlock,.-Camellia_DecryptBlock
+
+.globl Camellia_DecryptBlock_Rounds
+.type Camellia_DecryptBlock_Rounds,@function
+.align 16
+.Ldec_rounds:
+Camellia_DecryptBlock_Rounds:
+ pushq %rbx
+ pushq %rbp
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Ldec_prologue:
+
+
+ movq %rcx,%r13
+ movq %rdx,%r15
+
+ shll $6,%edi
+ leaq .LCamellia_SBOX(%rip),%rbp
+ leaq (%r15,%rdi,1),%r14
+
+ movl 0(%rsi),%r8d
+ movl 4(%rsi),%r9d
+ movl 8(%rsi),%r10d
+ bswapl %r8d
+ movl 12(%rsi),%r11d
+ bswapl %r9d
+ bswapl %r10d
+ bswapl %r11d
+
+ call _x86_64_Camellia_decrypt
+
+ bswapl %r8d
+ bswapl %r9d
+ bswapl %r10d
+ movl %r8d,0(%r13)
+ bswapl %r11d
+ movl %r9d,4(%r13)
+ movl %r10d,8(%r13)
+ movl %r11d,12(%r13)
+
+ movq 0(%rsp),%r15
+ movq 8(%rsp),%r14
+ movq 16(%rsp),%r13
+ movq 24(%rsp),%rbp
+ movq 32(%rsp),%rbx
+ leaq 40(%rsp),%rsp
+.Ldec_epilogue:
+ .byte 0xf3,0xc3
+.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds
+
+.type _x86_64_Camellia_decrypt,@function
+.align 16
+_x86_64_Camellia_decrypt:
+ xorl 0(%r14),%r9d
+ xorl 4(%r14),%r8d
+ xorl 8(%r14),%r11d
+ xorl 12(%r14),%r10d
+.align 16
+.Ldloop:
+ movl -8(%r14),%ebx
+ movl -4(%r14),%eax
+
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl -16(%r14),%ebx
+ movl -12(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl -24(%r14),%ebx
+ movl -20(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl -32(%r14),%ebx
+ movl -28(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl -40(%r14),%ebx
+ movl -36(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl -48(%r14),%ebx
+ movl -44(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl -56(%r14),%ebx
+ movl -52(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ leaq -64(%r14),%r14
+ cmpq %r15,%r14
+ movl 0(%r14),%edx
+ movl 4(%r14),%ecx
+ je .Lddone
+
+ andl %r8d,%eax
+ orl %r11d,%edx
+ roll $1,%eax
+ xorl %edx,%r10d
+ xorl %eax,%r9d
+ andl %r10d,%ecx
+ orl %r9d,%ebx
+ roll $1,%ecx
+ xorl %ebx,%r8d
+ xorl %ecx,%r11d
+
+ jmp .Ldloop
+
+.align 16
+.Lddone:
+ xorl %r10d,%ecx
+ xorl %r11d,%edx
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+
+ movl %ecx,%r8d
+ movl %edx,%r9d
+ movl %eax,%r10d
+ movl %ebx,%r11d
+
+.byte 0xf3,0xc3
+.size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt
+.globl Camellia_Ekeygen
+.type Camellia_Ekeygen,@function
+.align 16
+Camellia_Ekeygen:
+ pushq %rbx
+ pushq %rbp
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Lkey_prologue:
+
+ movq %rdi,%r15
+ movq %rdx,%r13
+
+ movl 0(%rsi),%r8d
+ movl 4(%rsi),%r9d
+ movl 8(%rsi),%r10d
+ movl 12(%rsi),%r11d
+
+ bswapl %r8d
+ bswapl %r9d
+ bswapl %r10d
+ bswapl %r11d
+ movl %r9d,0(%r13)
+ movl %r8d,4(%r13)
+ movl %r11d,8(%r13)
+ movl %r10d,12(%r13)
+ cmpq $128,%r15
+ je .L1st128
+
+ movl 16(%rsi),%r8d
+ movl 20(%rsi),%r9d
+ cmpq $192,%r15
+ je .L1st192
+ movl 24(%rsi),%r10d
+ movl 28(%rsi),%r11d
+ jmp .L1st256
+.L1st192:
+ movl %r8d,%r10d
+ movl %r9d,%r11d
+ notl %r10d
+ notl %r11d
+.L1st256:
+ bswapl %r8d
+ bswapl %r9d
+ bswapl %r10d
+ bswapl %r11d
+ movl %r9d,32(%r13)
+ movl %r8d,36(%r13)
+ movl %r11d,40(%r13)
+ movl %r10d,44(%r13)
+ xorl 0(%r13),%r9d
+ xorl 4(%r13),%r8d
+ xorl 8(%r13),%r11d
+ xorl 12(%r13),%r10d
+
+.L1st128:
+ leaq .LCamellia_SIGMA(%rip),%r14
+ leaq .LCamellia_SBOX(%rip),%rbp
+
+ movl 0(%r14),%ebx
+ movl 4(%r14),%eax
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 8(%r14),%ebx
+ movl 12(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 16(%r14),%ebx
+ movl 20(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ xorl 0(%r13),%r9d
+ xorl 4(%r13),%r8d
+ xorl 8(%r13),%r11d
+ xorl 12(%r13),%r10d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 24(%r14),%ebx
+ movl 28(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 32(%r14),%ebx
+ movl 36(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ cmpq $128,%r15
+ jne .L2nd256
+
+ leaq 128(%r13),%r13
+ shlq $32,%r8
+ shlq $32,%r10
+ orq %r9,%r8
+ orq %r11,%r10
+ movq -128(%r13),%rax
+ movq -120(%r13),%rbx
+ movq %r8,-112(%r13)
+ movq %r10,-104(%r13)
+ movq %rax,%r11
+ shlq $15,%rax
+ movq %rbx,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%rax
+ shlq $15,%rbx
+ orq %r11,%rbx
+ movq %rax,-96(%r13)
+ movq %rbx,-88(%r13)
+ movq %r8,%r11
+ shlq $15,%r8
+ movq %r10,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%r8
+ shlq $15,%r10
+ orq %r11,%r10
+ movq %r8,-80(%r13)
+ movq %r10,-72(%r13)
+ movq %r8,%r11
+ shlq $15,%r8
+ movq %r10,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%r8
+ shlq $15,%r10
+ orq %r11,%r10
+ movq %r8,-64(%r13)
+ movq %r10,-56(%r13)
+ movq %rax,%r11
+ shlq $30,%rax
+ movq %rbx,%r9
+ shrq $34,%r9
+ shrq $34,%r11
+ orq %r9,%rax
+ shlq $30,%rbx
+ orq %r11,%rbx
+ movq %rax,-48(%r13)
+ movq %rbx,-40(%r13)
+ movq %r8,%r11
+ shlq $15,%r8
+ movq %r10,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%r8
+ shlq $15,%r10
+ orq %r11,%r10
+ movq %r8,-32(%r13)
+ movq %rax,%r11
+ shlq $15,%rax
+ movq %rbx,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%rax
+ shlq $15,%rbx
+ orq %r11,%rbx
+ movq %rbx,-24(%r13)
+ movq %r8,%r11
+ shlq $15,%r8
+ movq %r10,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%r8
+ shlq $15,%r10
+ orq %r11,%r10
+ movq %r8,-16(%r13)
+ movq %r10,-8(%r13)
+ movq %rax,%r11
+ shlq $17,%rax
+ movq %rbx,%r9
+ shrq $47,%r9
+ shrq $47,%r11
+ orq %r9,%rax
+ shlq $17,%rbx
+ orq %r11,%rbx
+ movq %rax,0(%r13)
+ movq %rbx,8(%r13)
+ movq %rax,%r11
+ shlq $17,%rax
+ movq %rbx,%r9
+ shrq $47,%r9
+ shrq $47,%r11
+ orq %r9,%rax
+ shlq $17,%rbx
+ orq %r11,%rbx
+ movq %rax,16(%r13)
+ movq %rbx,24(%r13)
+ movq %r8,%r11
+ shlq $34,%r8
+ movq %r10,%r9
+ shrq $30,%r9
+ shrq $30,%r11
+ orq %r9,%r8
+ shlq $34,%r10
+ orq %r11,%r10
+ movq %r8,32(%r13)
+ movq %r10,40(%r13)
+ movq %rax,%r11
+ shlq $17,%rax
+ movq %rbx,%r9
+ shrq $47,%r9
+ shrq $47,%r11
+ orq %r9,%rax
+ shlq $17,%rbx
+ orq %r11,%rbx
+ movq %rax,48(%r13)
+ movq %rbx,56(%r13)
+ movq %r8,%r11
+ shlq $17,%r8
+ movq %r10,%r9
+ shrq $47,%r9
+ shrq $47,%r11
+ orq %r9,%r8
+ shlq $17,%r10
+ orq %r11,%r10
+ movq %r8,64(%r13)
+ movq %r10,72(%r13)
+ movl $3,%eax
+ jmp .Ldone
+.align 16
+.L2nd256:
+ movl %r9d,48(%r13)
+ movl %r8d,52(%r13)
+ movl %r11d,56(%r13)
+ movl %r10d,60(%r13)
+ xorl 32(%r13),%r9d
+ xorl 36(%r13),%r8d
+ xorl 40(%r13),%r11d
+ xorl 44(%r13),%r10d
+ xorl %r8d,%eax
+ xorl %r9d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 40(%r14),%ebx
+ movl 44(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r10d
+ xorl %ecx,%r11d
+ xorl %edx,%r11d
+ xorl %r10d,%eax
+ xorl %r11d,%ebx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ movl 2052(%rbp,%rsi,8),%edx
+ movl 0(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ shrl $16,%eax
+ movzbl %bh,%edi
+ xorl 4(%rbp,%rsi,8),%edx
+ shrl $16,%ebx
+ xorl 4(%rbp,%rdi,8),%ecx
+ movzbl %ah,%esi
+ movzbl %bl,%edi
+ xorl 0(%rbp,%rsi,8),%edx
+ xorl 2052(%rbp,%rdi,8),%ecx
+ movzbl %al,%esi
+ movzbl %bh,%edi
+ xorl 2048(%rbp,%rsi,8),%edx
+ xorl 2048(%rbp,%rdi,8),%ecx
+ movl 48(%r14),%ebx
+ movl 52(%r14),%eax
+ xorl %edx,%ecx
+ rorl $8,%edx
+ xorl %ecx,%r8d
+ xorl %ecx,%r9d
+ xorl %edx,%r9d
+ movq 0(%r13),%rax
+ movq 8(%r13),%rbx
+ movq 32(%r13),%rcx
+ movq 40(%r13),%rdx
+ movq 48(%r13),%r14
+ movq 56(%r13),%r15
+ leaq 128(%r13),%r13
+ shlq $32,%r8
+ shlq $32,%r10
+ orq %r9,%r8
+ orq %r11,%r10
+ movq %r8,-112(%r13)
+ movq %r10,-104(%r13)
+ movq %rcx,%r11
+ shlq $15,%rcx
+ movq %rdx,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%rcx
+ shlq $15,%rdx
+ orq %r11,%rdx
+ movq %rcx,-96(%r13)
+ movq %rdx,-88(%r13)
+ movq %r14,%r11
+ shlq $15,%r14
+ movq %r15,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%r14
+ shlq $15,%r15
+ orq %r11,%r15
+ movq %r14,-80(%r13)
+ movq %r15,-72(%r13)
+ movq %rcx,%r11
+ shlq $15,%rcx
+ movq %rdx,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%rcx
+ shlq $15,%rdx
+ orq %r11,%rdx
+ movq %rcx,-64(%r13)
+ movq %rdx,-56(%r13)
+ movq %r8,%r11
+ shlq $30,%r8
+ movq %r10,%r9
+ shrq $34,%r9
+ shrq $34,%r11
+ orq %r9,%r8
+ shlq $30,%r10
+ orq %r11,%r10
+ movq %r8,-48(%r13)
+ movq %r10,-40(%r13)
+ movq %rax,%r11
+ shlq $45,%rax
+ movq %rbx,%r9
+ shrq $19,%r9
+ shrq $19,%r11
+ orq %r9,%rax
+ shlq $45,%rbx
+ orq %r11,%rbx
+ movq %rax,-32(%r13)
+ movq %rbx,-24(%r13)
+ movq %r14,%r11
+ shlq $30,%r14
+ movq %r15,%r9
+ shrq $34,%r9
+ shrq $34,%r11
+ orq %r9,%r14
+ shlq $30,%r15
+ orq %r11,%r15
+ movq %r14,-16(%r13)
+ movq %r15,-8(%r13)
+ movq %rax,%r11
+ shlq $15,%rax
+ movq %rbx,%r9
+ shrq $49,%r9
+ shrq $49,%r11
+ orq %r9,%rax
+ shlq $15,%rbx
+ orq %r11,%rbx
+ movq %rax,0(%r13)
+ movq %rbx,8(%r13)
+ movq %rcx,%r11
+ shlq $30,%rcx
+ movq %rdx,%r9
+ shrq $34,%r9
+ shrq $34,%r11
+ orq %r9,%rcx
+ shlq $30,%rdx
+ orq %r11,%rdx
+ movq %rcx,16(%r13)
+ movq %rdx,24(%r13)
+ movq %r8,%r11
+ shlq $30,%r8
+ movq %r10,%r9
+ shrq $34,%r9
+ shrq $34,%r11
+ orq %r9,%r8
+ shlq $30,%r10
+ orq %r11,%r10
+ movq %r8,32(%r13)
+ movq %r10,40(%r13)
+ movq %rax,%r11
+ shlq $17,%rax
+ movq %rbx,%r9
+ shrq $47,%r9
+ shrq $47,%r11
+ orq %r9,%rax
+ shlq $17,%rbx
+ orq %r11,%rbx
+ movq %rax,48(%r13)
+ movq %rbx,56(%r13)
+ movq %r14,%r11
+ shlq $32,%r14
+ movq %r15,%r9
+ shrq $32,%r9
+ shrq $32,%r11
+ orq %r9,%r14
+ shlq $32,%r15
+ orq %r11,%r15
+ movq %r14,64(%r13)
+ movq %r15,72(%r13)
+ movq %rcx,%r11
+ shlq $34,%rcx
+ movq %rdx,%r9
+ shrq $30,%r9
+ shrq $30,%r11
+ orq %r9,%rcx
+ shlq $34,%rdx
+ orq %r11,%rdx
+ movq %rcx,80(%r13)
+ movq %rdx,88(%r13)
+ movq %r14,%r11
+ shlq $17,%r14
+ movq %r15,%r9
+ shrq $47,%r9
+ shrq $47,%r11
+ orq %r9,%r14
+ shlq $17,%r15
+ orq %r11,%r15
+ movq %r14,96(%r13)
+ movq %r15,104(%r13)
+ movq %rax,%r11
+ shlq $34,%rax
+ movq %rbx,%r9
+ shrq $30,%r9
+ shrq $30,%r11
+ orq %r9,%rax
+ shlq $34,%rbx
+ orq %r11,%rbx
+ movq %rax,112(%r13)
+ movq %rbx,120(%r13)
+ movq %r8,%r11
+ shlq $51,%r8
+ movq %r10,%r9
+ shrq $13,%r9
+ shrq $13,%r11
+ orq %r9,%r8
+ shlq $51,%r10
+ orq %r11,%r10
+ movq %r8,128(%r13)
+ movq %r10,136(%r13)
+ movl $4,%eax
+.Ldone:
+ movq 0(%rsp),%r15
+ movq 8(%rsp),%r14
+ movq 16(%rsp),%r13
+ movq 24(%rsp),%rbp
+ movq 32(%rsp),%rbx
+ leaq 40(%rsp),%rsp
+.Lkey_epilogue:
+ .byte 0xf3,0xc3
+.size Camellia_Ekeygen,.-Camellia_Ekeygen
+.align 64
+.LCamellia_SIGMA:
+.long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858
+.long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5
+.long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2
+.long 0, 0, 0, 0
+.LCamellia_SBOX:
+.long 0x70707000,0x70700070
+.long 0x82828200,0x2c2c002c
+.long 0x2c2c2c00,0xb3b300b3
+.long 0xececec00,0xc0c000c0
+.long 0xb3b3b300,0xe4e400e4
+.long 0x27272700,0x57570057
+.long 0xc0c0c000,0xeaea00ea
+.long 0xe5e5e500,0xaeae00ae
+.long 0xe4e4e400,0x23230023
+.long 0x85858500,0x6b6b006b
+.long 0x57575700,0x45450045
+.long 0x35353500,0xa5a500a5
+.long 0xeaeaea00,0xeded00ed
+.long 0x0c0c0c00,0x4f4f004f
+.long 0xaeaeae00,0x1d1d001d
+.long 0x41414100,0x92920092
+.long 0x23232300,0x86860086
+.long 0xefefef00,0xafaf00af
+.long 0x6b6b6b00,0x7c7c007c
+.long 0x93939300,0x1f1f001f
+.long 0x45454500,0x3e3e003e
+.long 0x19191900,0xdcdc00dc
+.long 0xa5a5a500,0x5e5e005e
+.long 0x21212100,0x0b0b000b
+.long 0xededed00,0xa6a600a6
+.long 0x0e0e0e00,0x39390039
+.long 0x4f4f4f00,0xd5d500d5
+.long 0x4e4e4e00,0x5d5d005d
+.long 0x1d1d1d00,0xd9d900d9
+.long 0x65656500,0x5a5a005a
+.long 0x92929200,0x51510051
+.long 0xbdbdbd00,0x6c6c006c
+.long 0x86868600,0x8b8b008b
+.long 0xb8b8b800,0x9a9a009a
+.long 0xafafaf00,0xfbfb00fb
+.long 0x8f8f8f00,0xb0b000b0
+.long 0x7c7c7c00,0x74740074
+.long 0xebebeb00,0x2b2b002b
+.long 0x1f1f1f00,0xf0f000f0
+.long 0xcecece00,0x84840084
+.long 0x3e3e3e00,0xdfdf00df
+.long 0x30303000,0xcbcb00cb
+.long 0xdcdcdc00,0x34340034
+.long 0x5f5f5f00,0x76760076
+.long 0x5e5e5e00,0x6d6d006d
+.long 0xc5c5c500,0xa9a900a9
+.long 0x0b0b0b00,0xd1d100d1
+.long 0x1a1a1a00,0x04040004
+.long 0xa6a6a600,0x14140014
+.long 0xe1e1e100,0x3a3a003a
+.long 0x39393900,0xdede00de
+.long 0xcacaca00,0x11110011
+.long 0xd5d5d500,0x32320032
+.long 0x47474700,0x9c9c009c
+.long 0x5d5d5d00,0x53530053
+.long 0x3d3d3d00,0xf2f200f2
+.long 0xd9d9d900,0xfefe00fe
+.long 0x01010100,0xcfcf00cf
+.long 0x5a5a5a00,0xc3c300c3
+.long 0xd6d6d600,0x7a7a007a
+.long 0x51515100,0x24240024
+.long 0x56565600,0xe8e800e8
+.long 0x6c6c6c00,0x60600060
+.long 0x4d4d4d00,0x69690069
+.long 0x8b8b8b00,0xaaaa00aa
+.long 0x0d0d0d00,0xa0a000a0
+.long 0x9a9a9a00,0xa1a100a1
+.long 0x66666600,0x62620062
+.long 0xfbfbfb00,0x54540054
+.long 0xcccccc00,0x1e1e001e
+.long 0xb0b0b000,0xe0e000e0
+.long 0x2d2d2d00,0x64640064
+.long 0x74747400,0x10100010
+.long 0x12121200,0x00000000
+.long 0x2b2b2b00,0xa3a300a3
+.long 0x20202000,0x75750075
+.long 0xf0f0f000,0x8a8a008a
+.long 0xb1b1b100,0xe6e600e6
+.long 0x84848400,0x09090009
+.long 0x99999900,0xdddd00dd
+.long 0xdfdfdf00,0x87870087
+.long 0x4c4c4c00,0x83830083
+.long 0xcbcbcb00,0xcdcd00cd
+.long 0xc2c2c200,0x90900090
+.long 0x34343400,0x73730073
+.long 0x7e7e7e00,0xf6f600f6
+.long 0x76767600,0x9d9d009d
+.long 0x05050500,0xbfbf00bf
+.long 0x6d6d6d00,0x52520052
+.long 0xb7b7b700,0xd8d800d8
+.long 0xa9a9a900,0xc8c800c8
+.long 0x31313100,0xc6c600c6
+.long 0xd1d1d100,0x81810081
+.long 0x17171700,0x6f6f006f
+.long 0x04040400,0x13130013
+.long 0xd7d7d700,0x63630063
+.long 0x14141400,0xe9e900e9
+.long 0x58585800,0xa7a700a7
+.long 0x3a3a3a00,0x9f9f009f
+.long 0x61616100,0xbcbc00bc
+.long 0xdedede00,0x29290029
+.long 0x1b1b1b00,0xf9f900f9
+.long 0x11111100,0x2f2f002f
+.long 0x1c1c1c00,0xb4b400b4
+.long 0x32323200,0x78780078
+.long 0x0f0f0f00,0x06060006
+.long 0x9c9c9c00,0xe7e700e7
+.long 0x16161600,0x71710071
+.long 0x53535300,0xd4d400d4
+.long 0x18181800,0xabab00ab
+.long 0xf2f2f200,0x88880088
+.long 0x22222200,0x8d8d008d
+.long 0xfefefe00,0x72720072
+.long 0x44444400,0xb9b900b9
+.long 0xcfcfcf00,0xf8f800f8
+.long 0xb2b2b200,0xacac00ac
+.long 0xc3c3c300,0x36360036
+.long 0xb5b5b500,0x2a2a002a
+.long 0x7a7a7a00,0x3c3c003c
+.long 0x91919100,0xf1f100f1
+.long 0x24242400,0x40400040
+.long 0x08080800,0xd3d300d3
+.long 0xe8e8e800,0xbbbb00bb
+.long 0xa8a8a800,0x43430043
+.long 0x60606000,0x15150015
+.long 0xfcfcfc00,0xadad00ad
+.long 0x69696900,0x77770077
+.long 0x50505000,0x80800080
+.long 0xaaaaaa00,0x82820082
+.long 0xd0d0d000,0xecec00ec
+.long 0xa0a0a000,0x27270027
+.long 0x7d7d7d00,0xe5e500e5
+.long 0xa1a1a100,0x85850085
+.long 0x89898900,0x35350035
+.long 0x62626200,0x0c0c000c
+.long 0x97979700,0x41410041
+.long 0x54545400,0xefef00ef
+.long 0x5b5b5b00,0x93930093
+.long 0x1e1e1e00,0x19190019
+.long 0x95959500,0x21210021
+.long 0xe0e0e000,0x0e0e000e
+.long 0xffffff00,0x4e4e004e
+.long 0x64646400,0x65650065
+.long 0xd2d2d200,0xbdbd00bd
+.long 0x10101000,0xb8b800b8
+.long 0xc4c4c400,0x8f8f008f
+.long 0x00000000,0xebeb00eb
+.long 0x48484800,0xcece00ce
+.long 0xa3a3a300,0x30300030
+.long 0xf7f7f700,0x5f5f005f
+.long 0x75757500,0xc5c500c5
+.long 0xdbdbdb00,0x1a1a001a
+.long 0x8a8a8a00,0xe1e100e1
+.long 0x03030300,0xcaca00ca
+.long 0xe6e6e600,0x47470047
+.long 0xdadada00,0x3d3d003d
+.long 0x09090900,0x01010001
+.long 0x3f3f3f00,0xd6d600d6
+.long 0xdddddd00,0x56560056
+.long 0x94949400,0x4d4d004d
+.long 0x87878700,0x0d0d000d
+.long 0x5c5c5c00,0x66660066
+.long 0x83838300,0xcccc00cc
+.long 0x02020200,0x2d2d002d
+.long 0xcdcdcd00,0x12120012
+.long 0x4a4a4a00,0x20200020
+.long 0x90909000,0xb1b100b1
+.long 0x33333300,0x99990099
+.long 0x73737300,0x4c4c004c
+.long 0x67676700,0xc2c200c2
+.long 0xf6f6f600,0x7e7e007e
+.long 0xf3f3f300,0x05050005
+.long 0x9d9d9d00,0xb7b700b7
+.long 0x7f7f7f00,0x31310031
+.long 0xbfbfbf00,0x17170017
+.long 0xe2e2e200,0xd7d700d7
+.long 0x52525200,0x58580058
+.long 0x9b9b9b00,0x61610061
+.long 0xd8d8d800,0x1b1b001b
+.long 0x26262600,0x1c1c001c
+.long 0xc8c8c800,0x0f0f000f
+.long 0x37373700,0x16160016
+.long 0xc6c6c600,0x18180018
+.long 0x3b3b3b00,0x22220022
+.long 0x81818100,0x44440044
+.long 0x96969600,0xb2b200b2
+.long 0x6f6f6f00,0xb5b500b5
+.long 0x4b4b4b00,0x91910091
+.long 0x13131300,0x08080008
+.long 0xbebebe00,0xa8a800a8
+.long 0x63636300,0xfcfc00fc
+.long 0x2e2e2e00,0x50500050
+.long 0xe9e9e900,0xd0d000d0
+.long 0x79797900,0x7d7d007d
+.long 0xa7a7a700,0x89890089
+.long 0x8c8c8c00,0x97970097
+.long 0x9f9f9f00,0x5b5b005b
+.long 0x6e6e6e00,0x95950095
+.long 0xbcbcbc00,0xffff00ff
+.long 0x8e8e8e00,0xd2d200d2
+.long 0x29292900,0xc4c400c4
+.long 0xf5f5f500,0x48480048
+.long 0xf9f9f900,0xf7f700f7
+.long 0xb6b6b600,0xdbdb00db
+.long 0x2f2f2f00,0x03030003
+.long 0xfdfdfd00,0xdada00da
+.long 0xb4b4b400,0x3f3f003f
+.long 0x59595900,0x94940094
+.long 0x78787800,0x5c5c005c
+.long 0x98989800,0x02020002
+.long 0x06060600,0x4a4a004a
+.long 0x6a6a6a00,0x33330033
+.long 0xe7e7e700,0x67670067
+.long 0x46464600,0xf3f300f3
+.long 0x71717100,0x7f7f007f
+.long 0xbababa00,0xe2e200e2
+.long 0xd4d4d400,0x9b9b009b
+.long 0x25252500,0x26260026
+.long 0xababab00,0x37370037
+.long 0x42424200,0x3b3b003b
+.long 0x88888800,0x96960096
+.long 0xa2a2a200,0x4b4b004b
+.long 0x8d8d8d00,0xbebe00be
+.long 0xfafafa00,0x2e2e002e
+.long 0x72727200,0x79790079
+.long 0x07070700,0x8c8c008c
+.long 0xb9b9b900,0x6e6e006e
+.long 0x55555500,0x8e8e008e
+.long 0xf8f8f800,0xf5f500f5
+.long 0xeeeeee00,0xb6b600b6
+.long 0xacacac00,0xfdfd00fd
+.long 0x0a0a0a00,0x59590059
+.long 0x36363600,0x98980098
+.long 0x49494900,0x6a6a006a
+.long 0x2a2a2a00,0x46460046
+.long 0x68686800,0xbaba00ba
+.long 0x3c3c3c00,0x25250025
+.long 0x38383800,0x42420042
+.long 0xf1f1f100,0xa2a200a2
+.long 0xa4a4a400,0xfafa00fa
+.long 0x40404000,0x07070007
+.long 0x28282800,0x55550055
+.long 0xd3d3d300,0xeeee00ee
+.long 0x7b7b7b00,0x0a0a000a
+.long 0xbbbbbb00,0x49490049
+.long 0xc9c9c900,0x68680068
+.long 0x43434300,0x38380038
+.long 0xc1c1c100,0xa4a400a4
+.long 0x15151500,0x28280028
+.long 0xe3e3e300,0x7b7b007b
+.long 0xadadad00,0xc9c900c9
+.long 0xf4f4f400,0xc1c100c1
+.long 0x77777700,0xe3e300e3
+.long 0xc7c7c700,0xf4f400f4
+.long 0x80808000,0xc7c700c7
+.long 0x9e9e9e00,0x9e9e009e
+.long 0x00e0e0e0,0x38003838
+.long 0x00050505,0x41004141
+.long 0x00585858,0x16001616
+.long 0x00d9d9d9,0x76007676
+.long 0x00676767,0xd900d9d9
+.long 0x004e4e4e,0x93009393
+.long 0x00818181,0x60006060
+.long 0x00cbcbcb,0xf200f2f2
+.long 0x00c9c9c9,0x72007272
+.long 0x000b0b0b,0xc200c2c2
+.long 0x00aeaeae,0xab00abab
+.long 0x006a6a6a,0x9a009a9a
+.long 0x00d5d5d5,0x75007575
+.long 0x00181818,0x06000606
+.long 0x005d5d5d,0x57005757
+.long 0x00828282,0xa000a0a0
+.long 0x00464646,0x91009191
+.long 0x00dfdfdf,0xf700f7f7
+.long 0x00d6d6d6,0xb500b5b5
+.long 0x00272727,0xc900c9c9
+.long 0x008a8a8a,0xa200a2a2
+.long 0x00323232,0x8c008c8c
+.long 0x004b4b4b,0xd200d2d2
+.long 0x00424242,0x90009090
+.long 0x00dbdbdb,0xf600f6f6
+.long 0x001c1c1c,0x07000707
+.long 0x009e9e9e,0xa700a7a7
+.long 0x009c9c9c,0x27002727
+.long 0x003a3a3a,0x8e008e8e
+.long 0x00cacaca,0xb200b2b2
+.long 0x00252525,0x49004949
+.long 0x007b7b7b,0xde00dede
+.long 0x000d0d0d,0x43004343
+.long 0x00717171,0x5c005c5c
+.long 0x005f5f5f,0xd700d7d7
+.long 0x001f1f1f,0xc700c7c7
+.long 0x00f8f8f8,0x3e003e3e
+.long 0x00d7d7d7,0xf500f5f5
+.long 0x003e3e3e,0x8f008f8f
+.long 0x009d9d9d,0x67006767
+.long 0x007c7c7c,0x1f001f1f
+.long 0x00606060,0x18001818
+.long 0x00b9b9b9,0x6e006e6e
+.long 0x00bebebe,0xaf00afaf
+.long 0x00bcbcbc,0x2f002f2f
+.long 0x008b8b8b,0xe200e2e2
+.long 0x00161616,0x85008585
+.long 0x00343434,0x0d000d0d
+.long 0x004d4d4d,0x53005353
+.long 0x00c3c3c3,0xf000f0f0
+.long 0x00727272,0x9c009c9c
+.long 0x00959595,0x65006565
+.long 0x00ababab,0xea00eaea
+.long 0x008e8e8e,0xa300a3a3
+.long 0x00bababa,0xae00aeae
+.long 0x007a7a7a,0x9e009e9e
+.long 0x00b3b3b3,0xec00ecec
+.long 0x00020202,0x80008080
+.long 0x00b4b4b4,0x2d002d2d
+.long 0x00adadad,0x6b006b6b
+.long 0x00a2a2a2,0xa800a8a8
+.long 0x00acacac,0x2b002b2b
+.long 0x00d8d8d8,0x36003636
+.long 0x009a9a9a,0xa600a6a6
+.long 0x00171717,0xc500c5c5
+.long 0x001a1a1a,0x86008686
+.long 0x00353535,0x4d004d4d
+.long 0x00cccccc,0x33003333
+.long 0x00f7f7f7,0xfd00fdfd
+.long 0x00999999,0x66006666
+.long 0x00616161,0x58005858
+.long 0x005a5a5a,0x96009696
+.long 0x00e8e8e8,0x3a003a3a
+.long 0x00242424,0x09000909
+.long 0x00565656,0x95009595
+.long 0x00404040,0x10001010
+.long 0x00e1e1e1,0x78007878
+.long 0x00636363,0xd800d8d8
+.long 0x00090909,0x42004242
+.long 0x00333333,0xcc00cccc
+.long 0x00bfbfbf,0xef00efef
+.long 0x00989898,0x26002626
+.long 0x00979797,0xe500e5e5
+.long 0x00858585,0x61006161
+.long 0x00686868,0x1a001a1a
+.long 0x00fcfcfc,0x3f003f3f
+.long 0x00ececec,0x3b003b3b
+.long 0x000a0a0a,0x82008282
+.long 0x00dadada,0xb600b6b6
+.long 0x006f6f6f,0xdb00dbdb
+.long 0x00535353,0xd400d4d4
+.long 0x00626262,0x98009898
+.long 0x00a3a3a3,0xe800e8e8
+.long 0x002e2e2e,0x8b008b8b
+.long 0x00080808,0x02000202
+.long 0x00afafaf,0xeb00ebeb
+.long 0x00282828,0x0a000a0a
+.long 0x00b0b0b0,0x2c002c2c
+.long 0x00747474,0x1d001d1d
+.long 0x00c2c2c2,0xb000b0b0
+.long 0x00bdbdbd,0x6f006f6f
+.long 0x00363636,0x8d008d8d
+.long 0x00222222,0x88008888
+.long 0x00383838,0x0e000e0e
+.long 0x00646464,0x19001919
+.long 0x001e1e1e,0x87008787
+.long 0x00393939,0x4e004e4e
+.long 0x002c2c2c,0x0b000b0b
+.long 0x00a6a6a6,0xa900a9a9
+.long 0x00303030,0x0c000c0c
+.long 0x00e5e5e5,0x79007979
+.long 0x00444444,0x11001111
+.long 0x00fdfdfd,0x7f007f7f
+.long 0x00888888,0x22002222
+.long 0x009f9f9f,0xe700e7e7
+.long 0x00656565,0x59005959
+.long 0x00878787,0xe100e1e1
+.long 0x006b6b6b,0xda00dada
+.long 0x00f4f4f4,0x3d003d3d
+.long 0x00232323,0xc800c8c8
+.long 0x00484848,0x12001212
+.long 0x00101010,0x04000404
+.long 0x00d1d1d1,0x74007474
+.long 0x00515151,0x54005454
+.long 0x00c0c0c0,0x30003030
+.long 0x00f9f9f9,0x7e007e7e
+.long 0x00d2d2d2,0xb400b4b4
+.long 0x00a0a0a0,0x28002828
+.long 0x00555555,0x55005555
+.long 0x00a1a1a1,0x68006868
+.long 0x00414141,0x50005050
+.long 0x00fafafa,0xbe00bebe
+.long 0x00434343,0xd000d0d0
+.long 0x00131313,0xc400c4c4
+.long 0x00c4c4c4,0x31003131
+.long 0x002f2f2f,0xcb00cbcb
+.long 0x00a8a8a8,0x2a002a2a
+.long 0x00b6b6b6,0xad00adad
+.long 0x003c3c3c,0x0f000f0f
+.long 0x002b2b2b,0xca00caca
+.long 0x00c1c1c1,0x70007070
+.long 0x00ffffff,0xff00ffff
+.long 0x00c8c8c8,0x32003232
+.long 0x00a5a5a5,0x69006969
+.long 0x00202020,0x08000808
+.long 0x00898989,0x62006262
+.long 0x00000000,0x00000000
+.long 0x00909090,0x24002424
+.long 0x00474747,0xd100d1d1
+.long 0x00efefef,0xfb00fbfb
+.long 0x00eaeaea,0xba00baba
+.long 0x00b7b7b7,0xed00eded
+.long 0x00151515,0x45004545
+.long 0x00060606,0x81008181
+.long 0x00cdcdcd,0x73007373
+.long 0x00b5b5b5,0x6d006d6d
+.long 0x00121212,0x84008484
+.long 0x007e7e7e,0x9f009f9f
+.long 0x00bbbbbb,0xee00eeee
+.long 0x00292929,0x4a004a4a
+.long 0x000f0f0f,0xc300c3c3
+.long 0x00b8b8b8,0x2e002e2e
+.long 0x00070707,0xc100c1c1
+.long 0x00040404,0x01000101
+.long 0x009b9b9b,0xe600e6e6
+.long 0x00949494,0x25002525
+.long 0x00212121,0x48004848
+.long 0x00666666,0x99009999
+.long 0x00e6e6e6,0xb900b9b9
+.long 0x00cecece,0xb300b3b3
+.long 0x00ededed,0x7b007b7b
+.long 0x00e7e7e7,0xf900f9f9
+.long 0x003b3b3b,0xce00cece
+.long 0x00fefefe,0xbf00bfbf
+.long 0x007f7f7f,0xdf00dfdf
+.long 0x00c5c5c5,0x71007171
+.long 0x00a4a4a4,0x29002929
+.long 0x00373737,0xcd00cdcd
+.long 0x00b1b1b1,0x6c006c6c
+.long 0x004c4c4c,0x13001313
+.long 0x00919191,0x64006464
+.long 0x006e6e6e,0x9b009b9b
+.long 0x008d8d8d,0x63006363
+.long 0x00767676,0x9d009d9d
+.long 0x00030303,0xc000c0c0
+.long 0x002d2d2d,0x4b004b4b
+.long 0x00dedede,0xb700b7b7
+.long 0x00969696,0xa500a5a5
+.long 0x00262626,0x89008989
+.long 0x007d7d7d,0x5f005f5f
+.long 0x00c6c6c6,0xb100b1b1
+.long 0x005c5c5c,0x17001717
+.long 0x00d3d3d3,0xf400f4f4
+.long 0x00f2f2f2,0xbc00bcbc
+.long 0x004f4f4f,0xd300d3d3
+.long 0x00191919,0x46004646
+.long 0x003f3f3f,0xcf00cfcf
+.long 0x00dcdcdc,0x37003737
+.long 0x00797979,0x5e005e5e
+.long 0x001d1d1d,0x47004747
+.long 0x00525252,0x94009494
+.long 0x00ebebeb,0xfa00fafa
+.long 0x00f3f3f3,0xfc00fcfc
+.long 0x006d6d6d,0x5b005b5b
+.long 0x005e5e5e,0x97009797
+.long 0x00fbfbfb,0xfe00fefe
+.long 0x00696969,0x5a005a5a
+.long 0x00b2b2b2,0xac00acac
+.long 0x00f0f0f0,0x3c003c3c
+.long 0x00313131,0x4c004c4c
+.long 0x000c0c0c,0x03000303
+.long 0x00d4d4d4,0x35003535
+.long 0x00cfcfcf,0xf300f3f3
+.long 0x008c8c8c,0x23002323
+.long 0x00e2e2e2,0xb800b8b8
+.long 0x00757575,0x5d005d5d
+.long 0x00a9a9a9,0x6a006a6a
+.long 0x004a4a4a,0x92009292
+.long 0x00575757,0xd500d5d5
+.long 0x00848484,0x21002121
+.long 0x00111111,0x44004444
+.long 0x00454545,0x51005151
+.long 0x001b1b1b,0xc600c6c6
+.long 0x00f5f5f5,0x7d007d7d
+.long 0x00e4e4e4,0x39003939
+.long 0x000e0e0e,0x83008383
+.long 0x00737373,0xdc00dcdc
+.long 0x00aaaaaa,0xaa00aaaa
+.long 0x00f1f1f1,0x7c007c7c
+.long 0x00dddddd,0x77007777
+.long 0x00595959,0x56005656
+.long 0x00141414,0x05000505
+.long 0x006c6c6c,0x1b001b1b
+.long 0x00929292,0xa400a4a4
+.long 0x00545454,0x15001515
+.long 0x00d0d0d0,0x34003434
+.long 0x00787878,0x1e001e1e
+.long 0x00707070,0x1c001c1c
+.long 0x00e3e3e3,0xf800f8f8
+.long 0x00494949,0x52005252
+.long 0x00808080,0x20002020
+.long 0x00505050,0x14001414
+.long 0x00a7a7a7,0xe900e9e9
+.long 0x00f6f6f6,0xbd00bdbd
+.long 0x00777777,0xdd00dddd
+.long 0x00939393,0xe400e4e4
+.long 0x00868686,0xa100a1a1
+.long 0x00838383,0xe000e0e0
+.long 0x002a2a2a,0x8a008a8a
+.long 0x00c7c7c7,0xf100f1f1
+.long 0x005b5b5b,0xd600d6d6
+.long 0x00e9e9e9,0x7a007a7a
+.long 0x00eeeeee,0xbb00bbbb
+.long 0x008f8f8f,0xe300e3e3
+.long 0x00010101,0x40004040
+.long 0x003d3d3d,0x4f004f4f
+.globl Camellia_cbc_encrypt
+.type Camellia_cbc_encrypt,@function
+.align 16
+Camellia_cbc_encrypt:
+ cmpq $0,%rdx
+ je .Lcbc_abort
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+.Lcbc_prologue:
+
+ movq %rsp,%rbp
+ subq $64,%rsp
+ andq $-64,%rsp
+
+
+
+ leaq -64-63(%rcx),%r10
+ subq %rsp,%r10
+ negq %r10
+ andq $960,%r10
+ subq %r10,%rsp
+
+
+ movq %rdi,%r12
+ movq %rsi,%r13
+ movq %r8,%rbx
+ movq %rcx,%r14
+ movl 272(%rcx),%r15d
+
+ movq %r8,40(%rsp)
+ movq %rbp,48(%rsp)
+
+.Lcbc_body:
+ leaq .LCamellia_SBOX(%rip),%rbp
+
+ movl $32,%ecx
+.align 4
+.Lcbc_prefetch_sbox:
+ movq 0(%rbp),%rax
+ movq 32(%rbp),%rsi
+ movq 64(%rbp),%rdi
+ movq 96(%rbp),%r11
+ leaq 128(%rbp),%rbp
+ loop .Lcbc_prefetch_sbox
+ subq $4096,%rbp
+ shlq $6,%r15
+ movq %rdx,%rcx
+ leaq (%r14,%r15,1),%r15
+
+ cmpl $0,%r9d
+ je .LCBC_DECRYPT
+
+ andq $-16,%rdx
+ andq $15,%rcx
+ leaq (%r12,%rdx,1),%rdx
+ movq %r14,0(%rsp)
+ movq %rdx,8(%rsp)
+ movq %rcx,16(%rsp)
+
+ cmpq %r12,%rdx
+ movl 0(%rbx),%r8d
+ movl 4(%rbx),%r9d
+ movl 8(%rbx),%r10d
+ movl 12(%rbx),%r11d
+ je .Lcbc_enc_tail
+ jmp .Lcbc_eloop
+
+.align 16
+.Lcbc_eloop:
+ xorl 0(%r12),%r8d
+ xorl 4(%r12),%r9d
+ xorl 8(%r12),%r10d
+ bswapl %r8d
+ xorl 12(%r12),%r11d
+ bswapl %r9d
+ bswapl %r10d
+ bswapl %r11d
+
+ call _x86_64_Camellia_encrypt
+
+ movq 0(%rsp),%r14
+ bswapl %r8d
+ movq 8(%rsp),%rdx
+ bswapl %r9d
+ movq 16(%rsp),%rcx
+ bswapl %r10d
+ movl %r8d,0(%r13)
+ bswapl %r11d
+ movl %r9d,4(%r13)
+ movl %r10d,8(%r13)
+ leaq 16(%r12),%r12
+ movl %r11d,12(%r13)
+ cmpq %rdx,%r12
+ leaq 16(%r13),%r13
+ jne .Lcbc_eloop
+
+ cmpq $0,%rcx
+ jne .Lcbc_enc_tail
+
+ movq 40(%rsp),%r13
+ movl %r8d,0(%r13)
+ movl %r9d,4(%r13)
+ movl %r10d,8(%r13)
+ movl %r11d,12(%r13)
+ jmp .Lcbc_done
+
+.align 16
+.Lcbc_enc_tail:
+ xorq %rax,%rax
+ movq %rax,0+24(%rsp)
+ movq %rax,8+24(%rsp)
+ movq %rax,16(%rsp)
+
+.Lcbc_enc_pushf:
+ pushfq
+ cld
+ movq %r12,%rsi
+ leaq 8+24(%rsp),%rdi
+.long 0x9066A4F3
+ popfq
+.Lcbc_enc_popf:
+
+ leaq 24(%rsp),%r12
+ leaq 16+24(%rsp),%rax
+ movq %rax,8(%rsp)
+ jmp .Lcbc_eloop
+
+.align 16
+.LCBC_DECRYPT:
+ xchgq %r14,%r15
+ addq $15,%rdx
+ andq $15,%rcx
+ andq $-16,%rdx
+ movq %r14,0(%rsp)
+ leaq (%r12,%rdx,1),%rdx
+ movq %rdx,8(%rsp)
+ movq %rcx,16(%rsp)
+
+ movq (%rbx),%rax
+ movq 8(%rbx),%rbx
+ jmp .Lcbc_dloop
+.align 16
+.Lcbc_dloop:
+ movl 0(%r12),%r8d
+ movl 4(%r12),%r9d
+ movl 8(%r12),%r10d
+ bswapl %r8d
+ movl 12(%r12),%r11d
+ bswapl %r9d
+ movq %rax,0+24(%rsp)
+ bswapl %r10d
+ movq %rbx,8+24(%rsp)
+ bswapl %r11d
+
+ call _x86_64_Camellia_decrypt
+
+ movq 0(%rsp),%r14
+ movq 8(%rsp),%rdx
+ movq 16(%rsp),%rcx
+
+ bswapl %r8d
+ movq (%r12),%rax
+ bswapl %r9d
+ movq 8(%r12),%rbx
+ bswapl %r10d
+ xorl 0+24(%rsp),%r8d
+ bswapl %r11d
+ xorl 4+24(%rsp),%r9d
+ xorl 8+24(%rsp),%r10d
+ leaq 16(%r12),%r12
+ xorl 12+24(%rsp),%r11d
+ cmpq %rdx,%r12
+ je .Lcbc_ddone
+
+ movl %r8d,0(%r13)
+ movl %r9d,4(%r13)
+ movl %r10d,8(%r13)
+ movl %r11d,12(%r13)
+
+ leaq 16(%r13),%r13
+ jmp .Lcbc_dloop
+
+.align 16
+.Lcbc_ddone:
+ movq 40(%rsp),%rdx
+ cmpq $0,%rcx
+ jne .Lcbc_dec_tail
+
+ movl %r8d,0(%r13)
+ movl %r9d,4(%r13)
+ movl %r10d,8(%r13)
+ movl %r11d,12(%r13)
+
+ movq %rax,(%rdx)
+ movq %rbx,8(%rdx)
+ jmp .Lcbc_done
+.align 16
+.Lcbc_dec_tail:
+ movl %r8d,0+24(%rsp)
+ movl %r9d,4+24(%rsp)
+ movl %r10d,8+24(%rsp)
+ movl %r11d,12+24(%rsp)
+
+.Lcbc_dec_pushf:
+ pushfq
+ cld
+ leaq 8+24(%rsp),%rsi
+ leaq (%r13),%rdi
+.long 0x9066A4F3
+ popfq
+.Lcbc_dec_popf:
+
+ movq %rax,(%rdx)
+ movq %rbx,8(%rdx)
+ jmp .Lcbc_done
+
+.align 16
+.Lcbc_done:
+ movq 48(%rsp),%rcx
+ movq 0(%rcx),%r15
+ movq 8(%rcx),%r14
+ movq 16(%rcx),%r13
+ movq 24(%rcx),%r12
+ movq 32(%rcx),%rbp
+ movq 40(%rcx),%rbx
+ leaq 48(%rcx),%rsp
+.Lcbc_abort:
+ .byte 0xf3,0xc3
+.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
+
+.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54,95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/secure/lib/libcrypto/amd64/ghash-x86_64.S b/secure/lib/libcrypto/amd64/ghash-x86_64.S
new file mode 100644
index 000000000000..d7ea764ee992
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/ghash-x86_64.S
@@ -0,0 +1,1027 @@
+ # $FreeBSD$
+.text
+
+.globl gcm_gmult_4bit
+.type gcm_gmult_4bit,@function
+.align 16
+gcm_gmult_4bit:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+.Lgmult_prologue:
+
+ movzbq 15(%rdi),%r8
+ leaq .Lrem_4bit(%rip),%r11
+ xorq %rax,%rax
+ xorq %rbx,%rbx
+ movb %r8b,%al
+ movb %r8b,%bl
+ shlb $4,%al
+ movq $14,%rcx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ movq %r8,%rdx
+ jmp .Loop1
+
+.align 16
+.Loop1:
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ movb (%rdi,%rcx,1),%al
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ movb %al,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ shlb $4,%al
+ xorq %r10,%r8
+ decq %rcx
+ js .Lbreak1
+
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+ jmp .Loop1
+
+.align 16
+.Lbreak1:
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rax,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rax,1),%r9
+ andb $240,%bl
+ xorq (%r11,%rdx,8),%r9
+ movq %r8,%rdx
+ xorq %r10,%r8
+
+ shrq $4,%r8
+ andq $15,%rdx
+ movq %r9,%r10
+ shrq $4,%r9
+ xorq 8(%rsi,%rbx,1),%r8
+ shlq $60,%r10
+ xorq (%rsi,%rbx,1),%r9
+ xorq %r10,%r8
+ xorq (%r11,%rdx,8),%r9
+
+ bswapq %r8
+ bswapq %r9
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+
+ movq 16(%rsp),%rbx
+ leaq 24(%rsp),%rsp
+.Lgmult_epilogue:
+ .byte 0xf3,0xc3
+.size gcm_gmult_4bit,.-gcm_gmult_4bit
+.globl gcm_ghash_4bit
+.type gcm_ghash_4bit,@function
+.align 16
+gcm_ghash_4bit:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $280,%rsp
+.Lghash_prologue:
+ movq %rdx,%r14
+ movq %rcx,%r15
+ subq $-128,%rsi
+ leaq 16+128(%rsp),%rbp
+ xorl %edx,%edx
+ movq 0+0-128(%rsi),%r8
+ movq 0+8-128(%rsi),%rax
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq 16+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq 16+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,0(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,0(%rbp)
+ movq 32+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,0-128(%rbp)
+ movq 32+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,1(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,8(%rbp)
+ movq 48+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,8-128(%rbp)
+ movq 48+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,2(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,16(%rbp)
+ movq 64+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,16-128(%rbp)
+ movq 64+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,3(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,24(%rbp)
+ movq 80+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,24-128(%rbp)
+ movq 80+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,4(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,32(%rbp)
+ movq 96+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,32-128(%rbp)
+ movq 96+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,5(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,40(%rbp)
+ movq 112+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,40-128(%rbp)
+ movq 112+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,6(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,48(%rbp)
+ movq 128+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,48-128(%rbp)
+ movq 128+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,7(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,56(%rbp)
+ movq 144+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,56-128(%rbp)
+ movq 144+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,8(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,64(%rbp)
+ movq 160+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,64-128(%rbp)
+ movq 160+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,9(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,72(%rbp)
+ movq 176+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,72-128(%rbp)
+ movq 176+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,10(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,80(%rbp)
+ movq 192+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,80-128(%rbp)
+ movq 192+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,11(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,88(%rbp)
+ movq 208+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,88-128(%rbp)
+ movq 208+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,12(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,96(%rbp)
+ movq 224+0-128(%rsi),%r8
+ shlb $4,%dl
+ movq %rax,96-128(%rbp)
+ movq 224+8-128(%rsi),%rax
+ shlq $60,%r10
+ movb %dl,13(%rsp)
+ orq %r10,%rbx
+ movb %al,%dl
+ shrq $4,%rax
+ movq %r8,%r10
+ shrq $4,%r8
+ movq %r9,104(%rbp)
+ movq 240+0-128(%rsi),%r9
+ shlb $4,%dl
+ movq %rbx,104-128(%rbp)
+ movq 240+8-128(%rsi),%rbx
+ shlq $60,%r10
+ movb %dl,14(%rsp)
+ orq %r10,%rax
+ movb %bl,%dl
+ shrq $4,%rbx
+ movq %r9,%r10
+ shrq $4,%r9
+ movq %r8,112(%rbp)
+ shlb $4,%dl
+ movq %rax,112-128(%rbp)
+ shlq $60,%r10
+ movb %dl,15(%rsp)
+ orq %r10,%rbx
+ movq %r9,120(%rbp)
+ movq %rbx,120-128(%rbp)
+ addq $-128,%rsi
+ movq 8(%rdi),%r8
+ movq 0(%rdi),%r9
+ addq %r14,%r15
+ leaq .Lrem_8bit(%rip),%r11
+ jmp .Louter_loop
+.align 16
+.Louter_loop:
+ xorq (%r14),%r9
+ movq 8(%r14),%rdx
+ leaq 16(%r14),%r14
+ xorq %r8,%rdx
+ movq %r9,(%rdi)
+ movq %rdx,8(%rdi)
+ shrq $32,%rdx
+ xorq %rax,%rax
+ roll $8,%edx
+ movb %dl,%al
+ movzbl %dl,%ebx
+ shlb $4,%al
+ shrl $4,%ebx
+ roll $8,%edx
+ movq 8(%rsi,%rax,1),%r8
+ movq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ xorq %r8,%r12
+ movq %r9,%r10
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 8(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl 0(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ shrl $4,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r12,2),%r12
+ movzbl %dl,%ebx
+ shlb $4,%al
+ movzbq (%rsp,%rcx,1),%r13
+ shrl $4,%ebx
+ shlq $48,%r12
+ xorq %r8,%r13
+ movq %r9,%r10
+ xorq %r12,%r9
+ shrq $8,%r8
+ movzbq %r13b,%r13
+ shrq $8,%r9
+ xorq -128(%rbp,%rcx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rcx,8),%r9
+ roll $8,%edx
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ movb %dl,%al
+ xorq %r10,%r8
+ movzwq (%r11,%r13,2),%r13
+ movzbl %dl,%ecx
+ shlb $4,%al
+ movzbq (%rsp,%rbx,1),%r12
+ andl $240,%ecx
+ shlq $48,%r13
+ xorq %r8,%r12
+ movq %r9,%r10
+ xorq %r13,%r9
+ shrq $8,%r8
+ movzbq %r12b,%r12
+ movl -4(%rdi),%edx
+ shrq $8,%r9
+ xorq -128(%rbp,%rbx,8),%r8
+ shlq $56,%r10
+ xorq (%rbp,%rbx,8),%r9
+ movzwq (%r11,%r12,2),%r12
+ xorq 8(%rsi,%rax,1),%r8
+ xorq (%rsi,%rax,1),%r9
+ shlq $48,%r12
+ xorq %r10,%r8
+ xorq %r12,%r9
+ movzbq %r8b,%r13
+ shrq $4,%r8
+ movq %r9,%r10
+ shlb $4,%r13b
+ shrq $4,%r9
+ xorq 8(%rsi,%rcx,1),%r8
+ movzwq (%r11,%r13,2),%r13
+ shlq $60,%r10
+ xorq (%rsi,%rcx,1),%r9
+ xorq %r10,%r8
+ shlq $48,%r13
+ bswapq %r8
+ xorq %r13,%r9
+ bswapq %r9
+ cmpq %r15,%r14
+ jb .Louter_loop
+ movq %r8,8(%rdi)
+ movq %r9,(%rdi)
+
+ leaq 280(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lghash_epilogue:
+ .byte 0xf3,0xc3
+.size gcm_ghash_4bit,.-gcm_ghash_4bit
+.globl gcm_init_clmul
+.type gcm_init_clmul,@function
+.align 16
+gcm_init_clmul:
+ movdqu (%rsi),%xmm2
+ pshufd $78,%xmm2,%xmm2
+
+
+ pshufd $255,%xmm2,%xmm4
+ movdqa %xmm2,%xmm3
+ psllq $1,%xmm2
+ pxor %xmm5,%xmm5
+ psrlq $63,%xmm3
+ pcmpgtd %xmm4,%xmm5
+ pslldq $8,%xmm3
+ por %xmm3,%xmm2
+
+
+ pand .L0x1c2_polynomial(%rip),%xmm5
+ pxor %xmm5,%xmm2
+
+
+ movdqa %xmm2,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ movdqu %xmm2,(%rdi)
+ movdqu %xmm0,16(%rdi)
+ .byte 0xf3,0xc3
+.size gcm_init_clmul,.-gcm_init_clmul
+.globl gcm_gmult_clmul
+.type gcm_gmult_clmul,@function
+.align 16
+gcm_gmult_clmul:
+ movdqu (%rdi),%xmm0
+ movdqa .Lbswap_mask(%rip),%xmm5
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ .byte 0xf3,0xc3
+.size gcm_gmult_clmul,.-gcm_gmult_clmul
+.globl gcm_ghash_clmul
+.type gcm_ghash_clmul,@function
+.align 16
+gcm_ghash_clmul:
+ movdqa .Lbswap_mask(%rip),%xmm5
+
+ movdqu (%rdi),%xmm0
+ movdqu (%rsi),%xmm2
+.byte 102,15,56,0,197
+
+ subq $16,%rcx
+ jz .Lodd_tail
+
+ movdqu 16(%rsi),%xmm8
+
+
+
+
+
+ movdqu (%rdx),%xmm3
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+ pxor %xmm3,%xmm0
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm6,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,242,0
+.byte 102,15,58,68,250,17
+.byte 102,15,58,68,220,0
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm7
+ pxor %xmm4,%xmm6
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ jbe .Leven_tail
+
+.Lmod_loop:
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ movdqu (%rdx),%xmm3
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+
+ movdqu 16(%rdx),%xmm6
+.byte 102,15,56,0,221
+.byte 102,15,56,0,245
+
+ movdqa %xmm6,%xmm7
+ pshufd $78,%xmm6,%xmm9
+ pshufd $78,%xmm2,%xmm10
+ pxor %xmm6,%xmm9
+ pxor %xmm2,%xmm10
+ pxor %xmm3,%xmm1
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+.byte 102,15,58,68,242,0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+.byte 102,15,58,68,250,17
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+
+.byte 102,69,15,58,68,202,0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm8,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm8,%xmm4
+
+ pxor %xmm6,%xmm9
+ pxor %xmm7,%xmm9
+ movdqa %xmm9,%xmm10
+ psrldq $8,%xmm9
+ pslldq $8,%xmm10
+ pxor %xmm9,%xmm7
+ pxor %xmm10,%xmm6
+
+ leaq 32(%rdx),%rdx
+ subq $32,%rcx
+ ja .Lmod_loop
+
+.Leven_tail:
+.byte 102,65,15,58,68,192,0
+.byte 102,65,15,58,68,200,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm6,%xmm0
+ pxor %xmm7,%xmm1
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ testq %rcx,%rcx
+ jnz .Ldone
+
+.Lodd_tail:
+ movdqu (%rdx),%xmm3
+.byte 102,15,56,0,221
+ pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm3
+ pshufd $78,%xmm2,%xmm4
+ pxor %xmm0,%xmm3
+ pxor %xmm2,%xmm4
+.byte 102,15,58,68,194,0
+.byte 102,15,58,68,202,17
+.byte 102,15,58,68,220,0
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+
+ movdqa %xmm3,%xmm4
+ psrldq $8,%xmm3
+ pslldq $8,%xmm4
+ pxor %xmm3,%xmm1
+ pxor %xmm4,%xmm0
+
+ movdqa %xmm0,%xmm3
+ psllq $1,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $5,%xmm0
+ pxor %xmm3,%xmm0
+ psllq $57,%xmm0
+ movdqa %xmm0,%xmm4
+ pslldq $8,%xmm0
+ psrldq $8,%xmm4
+ pxor %xmm3,%xmm0
+ pxor %xmm4,%xmm1
+
+
+ movdqa %xmm0,%xmm4
+ psrlq $5,%xmm0
+ pxor %xmm4,%xmm0
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm4,%xmm0
+.Ldone:
+.byte 102,15,56,0,197
+ movdqu %xmm0,(%rdi)
+ .byte 0xf3,0xc3
+.LSEH_end_gcm_ghash_clmul:
+.size gcm_ghash_clmul,.-gcm_ghash_clmul
+.align 64
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.L0x1c2_polynomial:
+.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.align 64
+.type .Lrem_4bit,@object
+.Lrem_4bit:
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
+.type .Lrem_8bit,@object
+.Lrem_8bit:
+.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
+.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
+.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
+.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
+.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
+.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
+.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
+.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
+.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
+.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
+.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
+.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
+.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
+.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
+.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
+.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
+.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
+.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
+.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
+.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
+.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
+.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
+.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
+.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
+.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
+.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
+.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
+.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
+.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
+.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
+.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
+.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
+
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/secure/lib/libcrypto/amd64/md5-x86_64.S b/secure/lib/libcrypto/amd64/md5-x86_64.S
new file mode 100644
index 000000000000..c592dcce66ec
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/md5-x86_64.S
@@ -0,0 +1,669 @@
+ # $FreeBSD$
+.text
+.align 16
+
+.globl md5_block_asm_data_order
+.type md5_block_asm_data_order,@function
+md5_block_asm_data_order:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r14
+ pushq %r15
+.Lprologue:
+
+
+
+
+ movq %rdi,%rbp
+ shlq $6,%rdx
+ leaq (%rsi,%rdx,1),%rdi
+ movl 0(%rbp),%eax
+ movl 4(%rbp),%ebx
+ movl 8(%rbp),%ecx
+ movl 12(%rbp),%edx
+
+
+
+
+
+
+
+ cmpq %rdi,%rsi
+ je .Lend
+
+
+.Lloop:
+ movl %eax,%r8d
+ movl %ebx,%r9d
+ movl %ecx,%r14d
+ movl %edx,%r15d
+ movl 0(%rsi),%r10d
+ movl %edx,%r11d
+ xorl %ecx,%r11d
+ leal -680876936(%rax,%r10,1),%eax
+ andl %ebx,%r11d
+ xorl %edx,%r11d
+ movl 4(%rsi),%r10d
+ addl %r11d,%eax
+ roll $7,%eax
+ movl %ecx,%r11d
+ addl %ebx,%eax
+ xorl %ebx,%r11d
+ leal -389564586(%rdx,%r10,1),%edx
+ andl %eax,%r11d
+ xorl %ecx,%r11d
+ movl 8(%rsi),%r10d
+ addl %r11d,%edx
+ roll $12,%edx
+ movl %ebx,%r11d
+ addl %eax,%edx
+ xorl %eax,%r11d
+ leal 606105819(%rcx,%r10,1),%ecx
+ andl %edx,%r11d
+ xorl %ebx,%r11d
+ movl 12(%rsi),%r10d
+ addl %r11d,%ecx
+ roll $17,%ecx
+ movl %eax,%r11d
+ addl %edx,%ecx
+ xorl %edx,%r11d
+ leal -1044525330(%rbx,%r10,1),%ebx
+ andl %ecx,%r11d
+ xorl %eax,%r11d
+ movl 16(%rsi),%r10d
+ addl %r11d,%ebx
+ roll $22,%ebx
+ movl %edx,%r11d
+ addl %ecx,%ebx
+ xorl %ecx,%r11d
+ leal -176418897(%rax,%r10,1),%eax
+ andl %ebx,%r11d
+ xorl %edx,%r11d
+ movl 20(%rsi),%r10d
+ addl %r11d,%eax
+ roll $7,%eax
+ movl %ecx,%r11d
+ addl %ebx,%eax
+ xorl %ebx,%r11d
+ leal 1200080426(%rdx,%r10,1),%edx
+ andl %eax,%r11d
+ xorl %ecx,%r11d
+ movl 24(%rsi),%r10d
+ addl %r11d,%edx
+ roll $12,%edx
+ movl %ebx,%r11d
+ addl %eax,%edx
+ xorl %eax,%r11d
+ leal -1473231341(%rcx,%r10,1),%ecx
+ andl %edx,%r11d
+ xorl %ebx,%r11d
+ movl 28(%rsi),%r10d
+ addl %r11d,%ecx
+ roll $17,%ecx
+ movl %eax,%r11d
+ addl %edx,%ecx
+ xorl %edx,%r11d
+ leal -45705983(%rbx,%r10,1),%ebx
+ andl %ecx,%r11d
+ xorl %eax,%r11d
+ movl 32(%rsi),%r10d
+ addl %r11d,%ebx
+ roll $22,%ebx
+ movl %edx,%r11d
+ addl %ecx,%ebx
+ xorl %ecx,%r11d
+ leal 1770035416(%rax,%r10,1),%eax
+ andl %ebx,%r11d
+ xorl %edx,%r11d
+ movl 36(%rsi),%r10d
+ addl %r11d,%eax
+ roll $7,%eax
+ movl %ecx,%r11d
+ addl %ebx,%eax
+ xorl %ebx,%r11d
+ leal -1958414417(%rdx,%r10,1),%edx
+ andl %eax,%r11d
+ xorl %ecx,%r11d
+ movl 40(%rsi),%r10d
+ addl %r11d,%edx
+ roll $12,%edx
+ movl %ebx,%r11d
+ addl %eax,%edx
+ xorl %eax,%r11d
+ leal -42063(%rcx,%r10,1),%ecx
+ andl %edx,%r11d
+ xorl %ebx,%r11d
+ movl 44(%rsi),%r10d
+ addl %r11d,%ecx
+ roll $17,%ecx
+ movl %eax,%r11d
+ addl %edx,%ecx
+ xorl %edx,%r11d
+ leal -1990404162(%rbx,%r10,1),%ebx
+ andl %ecx,%r11d
+ xorl %eax,%r11d
+ movl 48(%rsi),%r10d
+ addl %r11d,%ebx
+ roll $22,%ebx
+ movl %edx,%r11d
+ addl %ecx,%ebx
+ xorl %ecx,%r11d
+ leal 1804603682(%rax,%r10,1),%eax
+ andl %ebx,%r11d
+ xorl %edx,%r11d
+ movl 52(%rsi),%r10d
+ addl %r11d,%eax
+ roll $7,%eax
+ movl %ecx,%r11d
+ addl %ebx,%eax
+ xorl %ebx,%r11d
+ leal -40341101(%rdx,%r10,1),%edx
+ andl %eax,%r11d
+ xorl %ecx,%r11d
+ movl 56(%rsi),%r10d
+ addl %r11d,%edx
+ roll $12,%edx
+ movl %ebx,%r11d
+ addl %eax,%edx
+ xorl %eax,%r11d
+ leal -1502002290(%rcx,%r10,1),%ecx
+ andl %edx,%r11d
+ xorl %ebx,%r11d
+ movl 60(%rsi),%r10d
+ addl %r11d,%ecx
+ roll $17,%ecx
+ movl %eax,%r11d
+ addl %edx,%ecx
+ xorl %edx,%r11d
+ leal 1236535329(%rbx,%r10,1),%ebx
+ andl %ecx,%r11d
+ xorl %eax,%r11d
+ movl 0(%rsi),%r10d
+ addl %r11d,%ebx
+ roll $22,%ebx
+ movl %edx,%r11d
+ addl %ecx,%ebx
+ movl 4(%rsi),%r10d
+ movl %edx,%r11d
+ movl %edx,%r12d
+ notl %r11d
+ leal -165796510(%rax,%r10,1),%eax
+ andl %ebx,%r12d
+ andl %ecx,%r11d
+ movl 24(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %ecx,%r11d
+ addl %r12d,%eax
+ movl %ecx,%r12d
+ roll $5,%eax
+ addl %ebx,%eax
+ notl %r11d
+ leal -1069501632(%rdx,%r10,1),%edx
+ andl %eax,%r12d
+ andl %ebx,%r11d
+ movl 44(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %ebx,%r11d
+ addl %r12d,%edx
+ movl %ebx,%r12d
+ roll $9,%edx
+ addl %eax,%edx
+ notl %r11d
+ leal 643717713(%rcx,%r10,1),%ecx
+ andl %edx,%r12d
+ andl %eax,%r11d
+ movl 0(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %eax,%r11d
+ addl %r12d,%ecx
+ movl %eax,%r12d
+ roll $14,%ecx
+ addl %edx,%ecx
+ notl %r11d
+ leal -373897302(%rbx,%r10,1),%ebx
+ andl %ecx,%r12d
+ andl %edx,%r11d
+ movl 20(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %edx,%r11d
+ addl %r12d,%ebx
+ movl %edx,%r12d
+ roll $20,%ebx
+ addl %ecx,%ebx
+ notl %r11d
+ leal -701558691(%rax,%r10,1),%eax
+ andl %ebx,%r12d
+ andl %ecx,%r11d
+ movl 40(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %ecx,%r11d
+ addl %r12d,%eax
+ movl %ecx,%r12d
+ roll $5,%eax
+ addl %ebx,%eax
+ notl %r11d
+ leal 38016083(%rdx,%r10,1),%edx
+ andl %eax,%r12d
+ andl %ebx,%r11d
+ movl 60(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %ebx,%r11d
+ addl %r12d,%edx
+ movl %ebx,%r12d
+ roll $9,%edx
+ addl %eax,%edx
+ notl %r11d
+ leal -660478335(%rcx,%r10,1),%ecx
+ andl %edx,%r12d
+ andl %eax,%r11d
+ movl 16(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %eax,%r11d
+ addl %r12d,%ecx
+ movl %eax,%r12d
+ roll $14,%ecx
+ addl %edx,%ecx
+ notl %r11d
+ leal -405537848(%rbx,%r10,1),%ebx
+ andl %ecx,%r12d
+ andl %edx,%r11d
+ movl 36(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %edx,%r11d
+ addl %r12d,%ebx
+ movl %edx,%r12d
+ roll $20,%ebx
+ addl %ecx,%ebx
+ notl %r11d
+ leal 568446438(%rax,%r10,1),%eax
+ andl %ebx,%r12d
+ andl %ecx,%r11d
+ movl 56(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %ecx,%r11d
+ addl %r12d,%eax
+ movl %ecx,%r12d
+ roll $5,%eax
+ addl %ebx,%eax
+ notl %r11d
+ leal -1019803690(%rdx,%r10,1),%edx
+ andl %eax,%r12d
+ andl %ebx,%r11d
+ movl 12(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %ebx,%r11d
+ addl %r12d,%edx
+ movl %ebx,%r12d
+ roll $9,%edx
+ addl %eax,%edx
+ notl %r11d
+ leal -187363961(%rcx,%r10,1),%ecx
+ andl %edx,%r12d
+ andl %eax,%r11d
+ movl 32(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %eax,%r11d
+ addl %r12d,%ecx
+ movl %eax,%r12d
+ roll $14,%ecx
+ addl %edx,%ecx
+ notl %r11d
+ leal 1163531501(%rbx,%r10,1),%ebx
+ andl %ecx,%r12d
+ andl %edx,%r11d
+ movl 52(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %edx,%r11d
+ addl %r12d,%ebx
+ movl %edx,%r12d
+ roll $20,%ebx
+ addl %ecx,%ebx
+ notl %r11d
+ leal -1444681467(%rax,%r10,1),%eax
+ andl %ebx,%r12d
+ andl %ecx,%r11d
+ movl 8(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %ecx,%r11d
+ addl %r12d,%eax
+ movl %ecx,%r12d
+ roll $5,%eax
+ addl %ebx,%eax
+ notl %r11d
+ leal -51403784(%rdx,%r10,1),%edx
+ andl %eax,%r12d
+ andl %ebx,%r11d
+ movl 28(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %ebx,%r11d
+ addl %r12d,%edx
+ movl %ebx,%r12d
+ roll $9,%edx
+ addl %eax,%edx
+ notl %r11d
+ leal 1735328473(%rcx,%r10,1),%ecx
+ andl %edx,%r12d
+ andl %eax,%r11d
+ movl 48(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %eax,%r11d
+ addl %r12d,%ecx
+ movl %eax,%r12d
+ roll $14,%ecx
+ addl %edx,%ecx
+ notl %r11d
+ leal -1926607734(%rbx,%r10,1),%ebx
+ andl %ecx,%r12d
+ andl %edx,%r11d
+ movl 0(%rsi),%r10d
+ orl %r11d,%r12d
+ movl %edx,%r11d
+ addl %r12d,%ebx
+ movl %edx,%r12d
+ roll $20,%ebx
+ addl %ecx,%ebx
+ movl 20(%rsi),%r10d
+ movl %ecx,%r11d
+ leal -378558(%rax,%r10,1),%eax
+ movl 32(%rsi),%r10d
+ xorl %edx,%r11d
+ xorl %ebx,%r11d
+ addl %r11d,%eax
+ roll $4,%eax
+ movl %ebx,%r11d
+ addl %ebx,%eax
+ leal -2022574463(%rdx,%r10,1),%edx
+ movl 44(%rsi),%r10d
+ xorl %ecx,%r11d
+ xorl %eax,%r11d
+ addl %r11d,%edx
+ roll $11,%edx
+ movl %eax,%r11d
+ addl %eax,%edx
+ leal 1839030562(%rcx,%r10,1),%ecx
+ movl 56(%rsi),%r10d
+ xorl %ebx,%r11d
+ xorl %edx,%r11d
+ addl %r11d,%ecx
+ roll $16,%ecx
+ movl %edx,%r11d
+ addl %edx,%ecx
+ leal -35309556(%rbx,%r10,1),%ebx
+ movl 4(%rsi),%r10d
+ xorl %eax,%r11d
+ xorl %ecx,%r11d
+ addl %r11d,%ebx
+ roll $23,%ebx
+ movl %ecx,%r11d
+ addl %ecx,%ebx
+ leal -1530992060(%rax,%r10,1),%eax
+ movl 16(%rsi),%r10d
+ xorl %edx,%r11d
+ xorl %ebx,%r11d
+ addl %r11d,%eax
+ roll $4,%eax
+ movl %ebx,%r11d
+ addl %ebx,%eax
+ leal 1272893353(%rdx,%r10,1),%edx
+ movl 28(%rsi),%r10d
+ xorl %ecx,%r11d
+ xorl %eax,%r11d
+ addl %r11d,%edx
+ roll $11,%edx
+ movl %eax,%r11d
+ addl %eax,%edx
+ leal -155497632(%rcx,%r10,1),%ecx
+ movl 40(%rsi),%r10d
+ xorl %ebx,%r11d
+ xorl %edx,%r11d
+ addl %r11d,%ecx
+ roll $16,%ecx
+ movl %edx,%r11d
+ addl %edx,%ecx
+ leal -1094730640(%rbx,%r10,1),%ebx
+ movl 52(%rsi),%r10d
+ xorl %eax,%r11d
+ xorl %ecx,%r11d
+ addl %r11d,%ebx
+ roll $23,%ebx
+ movl %ecx,%r11d
+ addl %ecx,%ebx
+ leal 681279174(%rax,%r10,1),%eax
+ movl 0(%rsi),%r10d
+ xorl %edx,%r11d
+ xorl %ebx,%r11d
+ addl %r11d,%eax
+ roll $4,%eax
+ movl %ebx,%r11d
+ addl %ebx,%eax
+ leal -358537222(%rdx,%r10,1),%edx
+ movl 12(%rsi),%r10d
+ xorl %ecx,%r11d
+ xorl %eax,%r11d
+ addl %r11d,%edx
+ roll $11,%edx
+ movl %eax,%r11d
+ addl %eax,%edx
+ leal -722521979(%rcx,%r10,1),%ecx
+ movl 24(%rsi),%r10d
+ xorl %ebx,%r11d
+ xorl %edx,%r11d
+ addl %r11d,%ecx
+ roll $16,%ecx
+ movl %edx,%r11d
+ addl %edx,%ecx
+ leal 76029189(%rbx,%r10,1),%ebx
+ movl 36(%rsi),%r10d
+ xorl %eax,%r11d
+ xorl %ecx,%r11d
+ addl %r11d,%ebx
+ roll $23,%ebx
+ movl %ecx,%r11d
+ addl %ecx,%ebx
+ leal -640364487(%rax,%r10,1),%eax
+ movl 48(%rsi),%r10d
+ xorl %edx,%r11d
+ xorl %ebx,%r11d
+ addl %r11d,%eax
+ roll $4,%eax
+ movl %ebx,%r11d
+ addl %ebx,%eax
+ leal -421815835(%rdx,%r10,1),%edx
+ movl 60(%rsi),%r10d
+ xorl %ecx,%r11d
+ xorl %eax,%r11d
+ addl %r11d,%edx
+ roll $11,%edx
+ movl %eax,%r11d
+ addl %eax,%edx
+ leal 530742520(%rcx,%r10,1),%ecx
+ movl 8(%rsi),%r10d
+ xorl %ebx,%r11d
+ xorl %edx,%r11d
+ addl %r11d,%ecx
+ roll $16,%ecx
+ movl %edx,%r11d
+ addl %edx,%ecx
+ leal -995338651(%rbx,%r10,1),%ebx
+ movl 0(%rsi),%r10d
+ xorl %eax,%r11d
+ xorl %ecx,%r11d
+ addl %r11d,%ebx
+ roll $23,%ebx
+ movl %ecx,%r11d
+ addl %ecx,%ebx
+ movl 0(%rsi),%r10d
+ movl $4294967295,%r11d
+ xorl %edx,%r11d
+ leal -198630844(%rax,%r10,1),%eax
+ orl %ebx,%r11d
+ xorl %ecx,%r11d
+ addl %r11d,%eax
+ movl 28(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $6,%eax
+ xorl %ecx,%r11d
+ addl %ebx,%eax
+ leal 1126891415(%rdx,%r10,1),%edx
+ orl %eax,%r11d
+ xorl %ebx,%r11d
+ addl %r11d,%edx
+ movl 56(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $10,%edx
+ xorl %ebx,%r11d
+ addl %eax,%edx
+ leal -1416354905(%rcx,%r10,1),%ecx
+ orl %edx,%r11d
+ xorl %eax,%r11d
+ addl %r11d,%ecx
+ movl 20(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $15,%ecx
+ xorl %eax,%r11d
+ addl %edx,%ecx
+ leal -57434055(%rbx,%r10,1),%ebx
+ orl %ecx,%r11d
+ xorl %edx,%r11d
+ addl %r11d,%ebx
+ movl 48(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $21,%ebx
+ xorl %edx,%r11d
+ addl %ecx,%ebx
+ leal 1700485571(%rax,%r10,1),%eax
+ orl %ebx,%r11d
+ xorl %ecx,%r11d
+ addl %r11d,%eax
+ movl 12(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $6,%eax
+ xorl %ecx,%r11d
+ addl %ebx,%eax
+ leal -1894986606(%rdx,%r10,1),%edx
+ orl %eax,%r11d
+ xorl %ebx,%r11d
+ addl %r11d,%edx
+ movl 40(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $10,%edx
+ xorl %ebx,%r11d
+ addl %eax,%edx
+ leal -1051523(%rcx,%r10,1),%ecx
+ orl %edx,%r11d
+ xorl %eax,%r11d
+ addl %r11d,%ecx
+ movl 4(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $15,%ecx
+ xorl %eax,%r11d
+ addl %edx,%ecx
+ leal -2054922799(%rbx,%r10,1),%ebx
+ orl %ecx,%r11d
+ xorl %edx,%r11d
+ addl %r11d,%ebx
+ movl 32(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $21,%ebx
+ xorl %edx,%r11d
+ addl %ecx,%ebx
+ leal 1873313359(%rax,%r10,1),%eax
+ orl %ebx,%r11d
+ xorl %ecx,%r11d
+ addl %r11d,%eax
+ movl 60(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $6,%eax
+ xorl %ecx,%r11d
+ addl %ebx,%eax
+ leal -30611744(%rdx,%r10,1),%edx
+ orl %eax,%r11d
+ xorl %ebx,%r11d
+ addl %r11d,%edx
+ movl 24(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $10,%edx
+ xorl %ebx,%r11d
+ addl %eax,%edx
+ leal -1560198380(%rcx,%r10,1),%ecx
+ orl %edx,%r11d
+ xorl %eax,%r11d
+ addl %r11d,%ecx
+ movl 52(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $15,%ecx
+ xorl %eax,%r11d
+ addl %edx,%ecx
+ leal 1309151649(%rbx,%r10,1),%ebx
+ orl %ecx,%r11d
+ xorl %edx,%r11d
+ addl %r11d,%ebx
+ movl 16(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $21,%ebx
+ xorl %edx,%r11d
+ addl %ecx,%ebx
+ leal -145523070(%rax,%r10,1),%eax
+ orl %ebx,%r11d
+ xorl %ecx,%r11d
+ addl %r11d,%eax
+ movl 44(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $6,%eax
+ xorl %ecx,%r11d
+ addl %ebx,%eax
+ leal -1120210379(%rdx,%r10,1),%edx
+ orl %eax,%r11d
+ xorl %ebx,%r11d
+ addl %r11d,%edx
+ movl 8(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $10,%edx
+ xorl %ebx,%r11d
+ addl %eax,%edx
+ leal 718787259(%rcx,%r10,1),%ecx
+ orl %edx,%r11d
+ xorl %eax,%r11d
+ addl %r11d,%ecx
+ movl 36(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $15,%ecx
+ xorl %eax,%r11d
+ addl %edx,%ecx
+ leal -343485551(%rbx,%r10,1),%ebx
+ orl %ecx,%r11d
+ xorl %edx,%r11d
+ addl %r11d,%ebx
+ movl 0(%rsi),%r10d
+ movl $4294967295,%r11d
+ roll $21,%ebx
+ xorl %edx,%r11d
+ addl %ecx,%ebx
+
+ addl %r8d,%eax
+ addl %r9d,%ebx
+ addl %r14d,%ecx
+ addl %r15d,%edx
+
+
+ addq $64,%rsi
+ cmpq %rdi,%rsi
+ jb .Lloop
+
+
+.Lend:
+ movl %eax,0(%rbp)
+ movl %ebx,4(%rbp)
+ movl %ecx,8(%rbp)
+ movl %edx,12(%rbp)
+
+ movq (%rsp),%r15
+ movq 8(%rsp),%r14
+ movq 16(%rsp),%r12
+ movq 24(%rsp),%rbx
+ movq 32(%rsp),%rbp
+ addq $40,%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size md5_block_asm_data_order,.-md5_block_asm_data_order
diff --git a/secure/lib/libcrypto/amd64/modexp512-x86_64.S b/secure/lib/libcrypto/amd64/modexp512-x86_64.S
new file mode 100644
index 000000000000..71072ad94744
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/modexp512-x86_64.S
@@ -0,0 +1,1774 @@
+ # $FreeBSD$
+.text
+
+.type MULADD_128x512,@function
+.align 16
+MULADD_128x512:
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ movq %r8,0(%rcx)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%r8
+ movq 8(%rdi),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ movq %r9,8(%rcx)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%r9
+ .byte 0xf3,0xc3
+.size MULADD_128x512,.-MULADD_128x512
+.type mont_reduce,@function
+.align 16
+mont_reduce:
+ leaq 192(%rsp),%rdi
+ movq 32(%rsp),%rsi
+ addq $576,%rsi
+ leaq 520(%rsp),%rcx
+
+ movq 96(%rcx),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ movq (%rcx),%r8
+ addq %rax,%r8
+ adcq $0,%rdx
+ movq %r8,0(%rdi)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ movq 8(%rcx),%r9
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ movq 16(%rcx),%r10
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ movq 24(%rcx),%r11
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ movq 32(%rcx),%r12
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ movq 40(%rcx),%r13
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ movq 48(%rcx),%r14
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ movq 56(%rcx),%r15
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%r8
+ movq 104(%rcx),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ movq %r9,8(%rdi)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%r9
+ movq 112(%rcx),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ movq %r10,16(%rdi)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%r10
+ movq 120(%rcx),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ movq %r11,24(%rdi)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%r11
+ xorq %rax,%rax
+
+ addq 64(%rcx),%r8
+ adcq 72(%rcx),%r9
+ adcq 80(%rcx),%r10
+ adcq 88(%rcx),%r11
+ adcq $0,%rax
+
+
+
+
+ movq %r8,64(%rdi)
+ movq %r9,72(%rdi)
+ movq %r10,%rbp
+ movq %r11,88(%rdi)
+
+ movq %rax,384(%rsp)
+
+ movq 0(%rdi),%r8
+ movq 8(%rdi),%r9
+ movq 16(%rdi),%r10
+ movq 24(%rdi),%r11
+
+
+
+
+
+
+
+
+ addq $80,%rdi
+
+ addq $64,%rsi
+ leaq 296(%rsp),%rcx
+
+ call MULADD_128x512
+
+ movq 384(%rsp),%rax
+
+
+ addq -16(%rdi),%r8
+ adcq -8(%rdi),%r9
+ movq %r8,64(%rcx)
+ movq %r9,72(%rcx)
+
+ adcq %rax,%rax
+ movq %rax,384(%rsp)
+
+ leaq 192(%rsp),%rdi
+ addq $64,%rsi
+
+
+
+
+
+ movq (%rsi),%r8
+ movq 8(%rsi),%rbx
+
+ movq (%rcx),%rax
+ mulq %r8
+ movq %rax,%rbp
+ movq %rdx,%r9
+
+ movq 8(%rcx),%rax
+ mulq %r8
+ addq %rax,%r9
+
+ movq (%rcx),%rax
+ mulq %rbx
+ addq %rax,%r9
+
+ movq %r9,8(%rdi)
+
+
+ subq $192,%rsi
+
+ movq (%rcx),%r8
+ movq 8(%rcx),%r9
+
+ call MULADD_128x512
+
+
+
+
+ movq 0(%rsi),%rax
+ movq 8(%rsi),%rbx
+ movq 16(%rsi),%rdi
+ movq 24(%rsi),%rdx
+
+
+ movq 384(%rsp),%rbp
+
+ addq 64(%rcx),%r8
+ adcq 72(%rcx),%r9
+
+
+ adcq %rbp,%rbp
+
+
+
+ shlq $3,%rbp
+ movq 32(%rsp),%rcx
+ addq %rcx,%rbp
+
+
+ xorq %rsi,%rsi
+
+ addq 0(%rbp),%r10
+ adcq 64(%rbp),%r11
+ adcq 128(%rbp),%r12
+ adcq 192(%rbp),%r13
+ adcq 256(%rbp),%r14
+ adcq 320(%rbp),%r15
+ adcq 384(%rbp),%r8
+ adcq 448(%rbp),%r9
+
+
+
+ sbbq $0,%rsi
+
+
+ andq %rsi,%rax
+ andq %rsi,%rbx
+ andq %rsi,%rdi
+ andq %rsi,%rdx
+
+ movq $1,%rbp
+ subq %rax,%r10
+ sbbq %rbx,%r11
+ sbbq %rdi,%r12
+ sbbq %rdx,%r13
+
+
+
+
+ sbbq $0,%rbp
+
+
+
+ addq $512,%rcx
+ movq 32(%rcx),%rax
+ movq 40(%rcx),%rbx
+ movq 48(%rcx),%rdi
+ movq 56(%rcx),%rdx
+
+
+
+ andq %rsi,%rax
+ andq %rsi,%rbx
+ andq %rsi,%rdi
+ andq %rsi,%rdx
+
+
+
+ subq $1,%rbp
+
+ sbbq %rax,%r14
+ sbbq %rbx,%r15
+ sbbq %rdi,%r8
+ sbbq %rdx,%r9
+
+
+
+ movq 144(%rsp),%rsi
+ movq %r10,0(%rsi)
+ movq %r11,8(%rsi)
+ movq %r12,16(%rsi)
+ movq %r13,24(%rsi)
+ movq %r14,32(%rsi)
+ movq %r15,40(%rsi)
+ movq %r8,48(%rsi)
+ movq %r9,56(%rsi)
+
+ .byte 0xf3,0xc3
+.size mont_reduce,.-mont_reduce
+.type mont_mul_a3b,@function
+.align 16
+mont_mul_a3b:
+
+
+
+
+ movq 0(%rdi),%rbp
+
+ movq %r10,%rax
+ mulq %rbp
+ movq %rax,520(%rsp)
+ movq %rdx,%r10
+ movq %r11,%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ movq %rdx,%r11
+ movq %r12,%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ movq %rdx,%r12
+ movq %r13,%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ movq %rdx,%r13
+ movq %r14,%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ movq %rdx,%r14
+ movq %r15,%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ movq %rdx,%r15
+ movq %r8,%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ movq %rdx,%r8
+ movq %r9,%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ movq %rdx,%r9
+ movq 8(%rdi),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ movq %r10,528(%rsp)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%r10
+ movq 16(%rdi),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ movq %r11,536(%rsp)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%r11
+ movq 24(%rdi),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ movq %r12,544(%rsp)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%r12
+ movq 32(%rdi),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ movq %r13,552(%rsp)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%r13
+ movq 40(%rdi),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ movq %r14,560(%rsp)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%r14
+ movq 48(%rdi),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ movq %r15,568(%rsp)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ addq %rbx,%r8
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%r15
+ movq 56(%rdi),%rbp
+ movq 0(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r8
+ adcq $0,%rdx
+ movq %r8,576(%rsp)
+ movq %rdx,%rbx
+
+ movq 8(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r9
+ adcq $0,%rdx
+ addq %rbx,%r9
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 16(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r10
+ adcq $0,%rdx
+ addq %rbx,%r10
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 24(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %rbx,%r11
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 32(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %rbx,%r12
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 40(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %rbx,%r13
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 48(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %rbx,%r14
+ adcq $0,%rdx
+ movq %rdx,%rbx
+
+ movq 56(%rsi),%rax
+ mulq %rbp
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %rbx,%r15
+ adcq $0,%rdx
+ movq %rdx,%r8
+ movq %r9,584(%rsp)
+ movq %r10,592(%rsp)
+ movq %r11,600(%rsp)
+ movq %r12,608(%rsp)
+ movq %r13,616(%rsp)
+ movq %r14,624(%rsp)
+ movq %r15,632(%rsp)
+ movq %r8,640(%rsp)
+
+
+
+
+
+ jmp mont_reduce
+
+
+.size mont_mul_a3b,.-mont_mul_a3b
+.type sqr_reduce,@function
+.align 16
+sqr_reduce:
+ movq 16(%rsp),%rcx
+
+
+
+ movq %r10,%rbx
+
+ movq %r11,%rax
+ mulq %rbx
+ movq %rax,528(%rsp)
+ movq %rdx,%r10
+ movq %r12,%rax
+ mulq %rbx
+ addq %rax,%r10
+ adcq $0,%rdx
+ movq %rdx,%r11
+ movq %r13,%rax
+ mulq %rbx
+ addq %rax,%r11
+ adcq $0,%rdx
+ movq %rdx,%r12
+ movq %r14,%rax
+ mulq %rbx
+ addq %rax,%r12
+ adcq $0,%rdx
+ movq %rdx,%r13
+ movq %r15,%rax
+ mulq %rbx
+ addq %rax,%r13
+ adcq $0,%rdx
+ movq %rdx,%r14
+ movq %r8,%rax
+ mulq %rbx
+ addq %rax,%r14
+ adcq $0,%rdx
+ movq %rdx,%r15
+ movq %r9,%rax
+ mulq %rbx
+ addq %rax,%r15
+ adcq $0,%rdx
+ movq %rdx,%rsi
+
+ movq %r10,536(%rsp)
+
+
+
+
+
+ movq 8(%rcx),%rbx
+
+ movq 16(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r11
+ adcq $0,%rdx
+ movq %r11,544(%rsp)
+
+ movq %rdx,%r10
+ movq 24(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %r10,%r12
+ adcq $0,%rdx
+ movq %r12,552(%rsp)
+
+ movq %rdx,%r10
+ movq 32(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r13
+ adcq $0,%rdx
+ addq %r10,%r13
+ adcq $0,%rdx
+
+ movq %rdx,%r10
+ movq 40(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %r10,%r14
+ adcq $0,%rdx
+
+ movq %rdx,%r10
+ movq %r8,%rax
+ mulq %rbx
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %r10,%r15
+ adcq $0,%rdx
+
+ movq %rdx,%r10
+ movq %r9,%rax
+ mulq %rbx
+ addq %rax,%rsi
+ adcq $0,%rdx
+ addq %r10,%rsi
+ adcq $0,%rdx
+
+ movq %rdx,%r11
+
+
+
+
+ movq 16(%rcx),%rbx
+
+ movq 24(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r13
+ adcq $0,%rdx
+ movq %r13,560(%rsp)
+
+ movq %rdx,%r10
+ movq 32(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r14
+ adcq $0,%rdx
+ addq %r10,%r14
+ adcq $0,%rdx
+ movq %r14,568(%rsp)
+
+ movq %rdx,%r10
+ movq 40(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %r10,%r15
+ adcq $0,%rdx
+
+ movq %rdx,%r10
+ movq %r8,%rax
+ mulq %rbx
+ addq %rax,%rsi
+ adcq $0,%rdx
+ addq %r10,%rsi
+ adcq $0,%rdx
+
+ movq %rdx,%r10
+ movq %r9,%rax
+ mulq %rbx
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %r10,%r11
+ adcq $0,%rdx
+
+ movq %rdx,%r12
+
+
+
+
+
+ movq 24(%rcx),%rbx
+
+ movq 32(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r15
+ adcq $0,%rdx
+ movq %r15,576(%rsp)
+
+ movq %rdx,%r10
+ movq 40(%rcx),%rax
+ mulq %rbx
+ addq %rax,%rsi
+ adcq $0,%rdx
+ addq %r10,%rsi
+ adcq $0,%rdx
+ movq %rsi,584(%rsp)
+
+ movq %rdx,%r10
+ movq %r8,%rax
+ mulq %rbx
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %r10,%r11
+ adcq $0,%rdx
+
+ movq %rdx,%r10
+ movq %r9,%rax
+ mulq %rbx
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %r10,%r12
+ adcq $0,%rdx
+
+ movq %rdx,%r15
+
+
+
+
+ movq 32(%rcx),%rbx
+
+ movq 40(%rcx),%rax
+ mulq %rbx
+ addq %rax,%r11
+ adcq $0,%rdx
+ movq %r11,592(%rsp)
+
+ movq %rdx,%r10
+ movq %r8,%rax
+ mulq %rbx
+ addq %rax,%r12
+ adcq $0,%rdx
+ addq %r10,%r12
+ adcq $0,%rdx
+ movq %r12,600(%rsp)
+
+ movq %rdx,%r10
+ movq %r9,%rax
+ mulq %rbx
+ addq %rax,%r15
+ adcq $0,%rdx
+ addq %r10,%r15
+ adcq $0,%rdx
+
+ movq %rdx,%r11
+
+
+
+
+ movq 40(%rcx),%rbx
+
+ movq %r8,%rax
+ mulq %rbx
+ addq %rax,%r15
+ adcq $0,%rdx
+ movq %r15,608(%rsp)
+
+ movq %rdx,%r10
+ movq %r9,%rax
+ mulq %rbx
+ addq %rax,%r11
+ adcq $0,%rdx
+ addq %r10,%r11
+ adcq $0,%rdx
+ movq %r11,616(%rsp)
+
+ movq %rdx,%r12
+
+
+
+
+ movq %r8,%rbx
+
+ movq %r9,%rax
+ mulq %rbx
+ addq %rax,%r12
+ adcq $0,%rdx
+ movq %r12,624(%rsp)
+
+ movq %rdx,632(%rsp)
+
+
+ movq 528(%rsp),%r10
+ movq 536(%rsp),%r11
+ movq 544(%rsp),%r12
+ movq 552(%rsp),%r13
+ movq 560(%rsp),%r14
+ movq 568(%rsp),%r15
+
+ movq 24(%rcx),%rax
+ mulq %rax
+ movq %rax,%rdi
+ movq %rdx,%r8
+
+ addq %r10,%r10
+ adcq %r11,%r11
+ adcq %r12,%r12
+ adcq %r13,%r13
+ adcq %r14,%r14
+ adcq %r15,%r15
+ adcq $0,%r8
+
+ movq 0(%rcx),%rax
+ mulq %rax
+ movq %rax,520(%rsp)
+ movq %rdx,%rbx
+
+ movq 8(%rcx),%rax
+ mulq %rax
+
+ addq %rbx,%r10
+ adcq %rax,%r11
+ adcq $0,%rdx
+
+ movq %rdx,%rbx
+ movq %r10,528(%rsp)
+ movq %r11,536(%rsp)
+
+ movq 16(%rcx),%rax
+ mulq %rax
+
+ addq %rbx,%r12
+ adcq %rax,%r13
+ adcq $0,%rdx
+
+ movq %rdx,%rbx
+
+ movq %r12,544(%rsp)
+ movq %r13,552(%rsp)
+
+ xorq %rbp,%rbp
+ addq %rbx,%r14
+ adcq %rdi,%r15
+ adcq $0,%rbp
+
+ movq %r14,560(%rsp)
+ movq %r15,568(%rsp)
+
+
+
+
+ movq 576(%rsp),%r10
+ movq 584(%rsp),%r11
+ movq 592(%rsp),%r12
+ movq 600(%rsp),%r13
+ movq 608(%rsp),%r14
+ movq 616(%rsp),%r15
+ movq 624(%rsp),%rdi
+ movq 632(%rsp),%rsi
+
+ movq %r9,%rax
+ mulq %rax
+ movq %rax,%r9
+ movq %rdx,%rbx
+
+ addq %r10,%r10
+ adcq %r11,%r11
+ adcq %r12,%r12
+ adcq %r13,%r13
+ adcq %r14,%r14
+ adcq %r15,%r15
+ adcq %rdi,%rdi
+ adcq %rsi,%rsi
+ adcq $0,%rbx
+
+ addq %rbp,%r10
+
+ movq 32(%rcx),%rax
+ mulq %rax
+
+ addq %r8,%r10
+ adcq %rax,%r11
+ adcq $0,%rdx
+
+ movq %rdx,%rbp
+
+ movq %r10,576(%rsp)
+ movq %r11,584(%rsp)
+
+ movq 40(%rcx),%rax
+ mulq %rax
+
+ addq %rbp,%r12
+ adcq %rax,%r13
+ adcq $0,%rdx
+
+ movq %rdx,%rbp
+
+ movq %r12,592(%rsp)
+ movq %r13,600(%rsp)
+
+ movq 48(%rcx),%rax
+ mulq %rax
+
+ addq %rbp,%r14
+ adcq %rax,%r15
+ adcq $0,%rdx
+
+ movq %r14,608(%rsp)
+ movq %r15,616(%rsp)
+
+ addq %rdx,%rdi
+ adcq %r9,%rsi
+ adcq $0,%rbx
+
+ movq %rdi,624(%rsp)
+ movq %rsi,632(%rsp)
+ movq %rbx,640(%rsp)
+
+ jmp mont_reduce
+
+
+.size sqr_reduce,.-sqr_reduce
+.globl mod_exp_512
+.type mod_exp_512,@function
+mod_exp_512:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+
+
+ movq %rsp,%r8
+ subq $2688,%rsp
+ andq $-64,%rsp
+
+
+ movq %r8,0(%rsp)
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rcx,24(%rsp)
+.Lbody:
+
+
+
+ pxor %xmm4,%xmm4
+ movdqu 0(%rsi),%xmm0
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+ movdqa %xmm4,512(%rsp)
+ movdqa %xmm4,528(%rsp)
+ movdqa %xmm4,608(%rsp)
+ movdqa %xmm4,624(%rsp)
+ movdqa %xmm0,544(%rsp)
+ movdqa %xmm1,560(%rsp)
+ movdqa %xmm2,576(%rsp)
+ movdqa %xmm3,592(%rsp)
+
+
+ movdqu 0(%rdx),%xmm0
+ movdqu 16(%rdx),%xmm1
+ movdqu 32(%rdx),%xmm2
+ movdqu 48(%rdx),%xmm3
+
+ leaq 384(%rsp),%rbx
+ movq %rbx,136(%rsp)
+ call mont_reduce
+
+
+ leaq 448(%rsp),%rcx
+ xorq %rax,%rax
+ movq %rax,0(%rcx)
+ movq %rax,8(%rcx)
+ movq %rax,24(%rcx)
+ movq %rax,32(%rcx)
+ movq %rax,40(%rcx)
+ movq %rax,48(%rcx)
+ movq %rax,56(%rcx)
+ movq %rax,128(%rsp)
+ movq $1,16(%rcx)
+
+ leaq 640(%rsp),%rbp
+ movq %rcx,%rsi
+ movq %rbp,%rdi
+ movq $8,%rax
+loop_0:
+ movq (%rcx),%rbx
+ movw %bx,(%rdi)
+ shrq $16,%rbx
+ movw %bx,64(%rdi)
+ shrq $16,%rbx
+ movw %bx,128(%rdi)
+ shrq $16,%rbx
+ movw %bx,192(%rdi)
+ leaq 8(%rcx),%rcx
+ leaq 256(%rdi),%rdi
+ decq %rax
+ jnz loop_0
+ movq $31,%rax
+ movq %rax,32(%rsp)
+ movq %rbp,40(%rsp)
+
+ movq %rsi,136(%rsp)
+ movq 0(%rsi),%r10
+ movq 8(%rsi),%r11
+ movq 16(%rsi),%r12
+ movq 24(%rsi),%r13
+ movq 32(%rsi),%r14
+ movq 40(%rsi),%r15
+ movq 48(%rsi),%r8
+ movq 56(%rsi),%r9
+init_loop:
+ leaq 384(%rsp),%rdi
+ call mont_mul_a3b
+ leaq 448(%rsp),%rsi
+ movq 40(%rsp),%rbp
+ addq $2,%rbp
+ movq %rbp,40(%rsp)
+ movq %rsi,%rcx
+ movq $8,%rax
+loop_1:
+ movq (%rcx),%rbx
+ movw %bx,(%rbp)
+ shrq $16,%rbx
+ movw %bx,64(%rbp)
+ shrq $16,%rbx
+ movw %bx,128(%rbp)
+ shrq $16,%rbx
+ movw %bx,192(%rbp)
+ leaq 8(%rcx),%rcx
+ leaq 256(%rbp),%rbp
+ decq %rax
+ jnz loop_1
+ movq 32(%rsp),%rax
+ subq $1,%rax
+ movq %rax,32(%rsp)
+ jne init_loop
+
+
+
+ movdqa %xmm0,64(%rsp)
+ movdqa %xmm1,80(%rsp)
+ movdqa %xmm2,96(%rsp)
+ movdqa %xmm3,112(%rsp)
+
+
+
+
+
+ movl 126(%rsp),%eax
+ movq %rax,%rdx
+ shrq $11,%rax
+ andl $2047,%edx
+ movl %edx,126(%rsp)
+ leaq 640(%rsp,%rax,2),%rsi
+ movq 8(%rsp),%rdx
+ movq $4,%rbp
+loop_2:
+ movzwq 192(%rsi),%rbx
+ movzwq 448(%rsi),%rax
+ shlq $16,%rbx
+ shlq $16,%rax
+ movw 128(%rsi),%bx
+ movw 384(%rsi),%ax
+ shlq $16,%rbx
+ shlq $16,%rax
+ movw 64(%rsi),%bx
+ movw 320(%rsi),%ax
+ shlq $16,%rbx
+ shlq $16,%rax
+ movw 0(%rsi),%bx
+ movw 256(%rsi),%ax
+ movq %rbx,0(%rdx)
+ movq %rax,8(%rdx)
+ leaq 512(%rsi),%rsi
+ leaq 16(%rdx),%rdx
+ subq $1,%rbp
+ jnz loop_2
+ movq $505,48(%rsp)
+
+ movq 8(%rsp),%rcx
+ movq %rcx,136(%rsp)
+ movq 0(%rcx),%r10
+ movq 8(%rcx),%r11
+ movq 16(%rcx),%r12
+ movq 24(%rcx),%r13
+ movq 32(%rcx),%r14
+ movq 40(%rcx),%r15
+ movq 48(%rcx),%r8
+ movq 56(%rcx),%r9
+ jmp sqr_2
+
+main_loop_a3b:
+ call sqr_reduce
+ call sqr_reduce
+ call sqr_reduce
+sqr_2:
+ call sqr_reduce
+ call sqr_reduce
+
+
+
+ movq 48(%rsp),%rcx
+ movq %rcx,%rax
+ shrq $4,%rax
+ movl 64(%rsp,%rax,2),%edx
+ andq $15,%rcx
+ shrq %cl,%rdx
+ andq $31,%rdx
+
+ leaq 640(%rsp,%rdx,2),%rsi
+ leaq 448(%rsp),%rdx
+ movq %rdx,%rdi
+ movq $4,%rbp
+loop_3:
+ movzwq 192(%rsi),%rbx
+ movzwq 448(%rsi),%rax
+ shlq $16,%rbx
+ shlq $16,%rax
+ movw 128(%rsi),%bx
+ movw 384(%rsi),%ax
+ shlq $16,%rbx
+ shlq $16,%rax
+ movw 64(%rsi),%bx
+ movw 320(%rsi),%ax
+ shlq $16,%rbx
+ shlq $16,%rax
+ movw 0(%rsi),%bx
+ movw 256(%rsi),%ax
+ movq %rbx,0(%rdx)
+ movq %rax,8(%rdx)
+ leaq 512(%rsi),%rsi
+ leaq 16(%rdx),%rdx
+ subq $1,%rbp
+ jnz loop_3
+ movq 8(%rsp),%rsi
+ call mont_mul_a3b
+
+
+
+ movq 48(%rsp),%rcx
+ subq $5,%rcx
+ movq %rcx,48(%rsp)
+ jge main_loop_a3b
+
+
+
+end_main_loop_a3b:
+
+
+ movq 8(%rsp),%rdx
+ pxor %xmm4,%xmm4
+ movdqu 0(%rdx),%xmm0
+ movdqu 16(%rdx),%xmm1
+ movdqu 32(%rdx),%xmm2
+ movdqu 48(%rdx),%xmm3
+ movdqa %xmm4,576(%rsp)
+ movdqa %xmm4,592(%rsp)
+ movdqa %xmm4,608(%rsp)
+ movdqa %xmm4,624(%rsp)
+ movdqa %xmm0,512(%rsp)
+ movdqa %xmm1,528(%rsp)
+ movdqa %xmm2,544(%rsp)
+ movdqa %xmm3,560(%rsp)
+ call mont_reduce
+
+
+
+ movq 8(%rsp),%rax
+ movq 0(%rax),%r8
+ movq 8(%rax),%r9
+ movq 16(%rax),%r10
+ movq 24(%rax),%r11
+ movq 32(%rax),%r12
+ movq 40(%rax),%r13
+ movq 48(%rax),%r14
+ movq 56(%rax),%r15
+
+
+ movq 24(%rsp),%rbx
+ addq $512,%rbx
+
+ subq 0(%rbx),%r8
+ sbbq 8(%rbx),%r9
+ sbbq 16(%rbx),%r10
+ sbbq 24(%rbx),%r11
+ sbbq 32(%rbx),%r12
+ sbbq 40(%rbx),%r13
+ sbbq 48(%rbx),%r14
+ sbbq 56(%rbx),%r15
+
+
+ movq 0(%rax),%rsi
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rcx
+ movq 24(%rax),%rdx
+ cmovncq %r8,%rsi
+ cmovncq %r9,%rdi
+ cmovncq %r10,%rcx
+ cmovncq %r11,%rdx
+ movq %rsi,0(%rax)
+ movq %rdi,8(%rax)
+ movq %rcx,16(%rax)
+ movq %rdx,24(%rax)
+
+ movq 32(%rax),%rsi
+ movq 40(%rax),%rdi
+ movq 48(%rax),%rcx
+ movq 56(%rax),%rdx
+ cmovncq %r12,%rsi
+ cmovncq %r13,%rdi
+ cmovncq %r14,%rcx
+ cmovncq %r15,%rdx
+ movq %rsi,32(%rax)
+ movq %rdi,40(%rax)
+ movq %rcx,48(%rax)
+ movq %rdx,56(%rax)
+
+ movq 0(%rsp),%rsi
+ movq 0(%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbx
+ movq 40(%rsi),%rbp
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size mod_exp_512, . - mod_exp_512
diff --git a/secure/lib/libcrypto/amd64/rc4-md5-x86_64.S b/secure/lib/libcrypto/amd64/rc4-md5-x86_64.S
new file mode 100644
index 000000000000..c94f649e294d
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/rc4-md5-x86_64.S
@@ -0,0 +1,1260 @@
+ # $FreeBSD$
+.text
+.align 16
+
+.globl rc4_md5_enc
+.type rc4_md5_enc,@function
+rc4_md5_enc:
+ cmpq $0,%r9
+ je .Labort
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $40,%rsp
+.Lbody:
+ movq %rcx,%r11
+ movq %r9,%r12
+ movq %rsi,%r13
+ movq %rdx,%r14
+ movq %r8,%r15
+ xorq %rbp,%rbp
+ xorq %rcx,%rcx
+
+ leaq 8(%rdi),%rdi
+ movb -8(%rdi),%bpl
+ movb -4(%rdi),%cl
+
+ incb %bpl
+ subq %r13,%r14
+ movl (%rdi,%rbp,4),%eax
+ addb %al,%cl
+ leaq (%rdi,%rbp,4),%rsi
+ shlq $6,%r12
+ addq %r15,%r12
+ movq %r12,16(%rsp)
+
+ movq %r11,24(%rsp)
+ movl 0(%r11),%r8d
+ movl 4(%r11),%r9d
+ movl 8(%r11),%r10d
+ movl 12(%r11),%r11d
+ jmp .Loop
+
+.align 16
+.Loop:
+ movl %r8d,0(%rsp)
+ movl %r9d,4(%rsp)
+ movl %r10d,8(%rsp)
+ movl %r11d,%r12d
+ movl %r11d,12(%rsp)
+ pxor %xmm0,%xmm0
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 0(%r15),%r8d
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ addl $3614090360,%r8d
+ xorl %r11d,%r12d
+ movzbl %al,%eax
+ movl %edx,0(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $7,%r8d
+ movl %r10d,%r12d
+ movd (%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ pxor %xmm1,%xmm1
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 4(%r15),%r11d
+ addb %dl,%bl
+ movl 8(%rsi),%eax
+ addl $3905402710,%r11d
+ xorl %r10d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,4(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $12,%r11d
+ movl %r9d,%r12d
+ movd (%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 8(%r15),%r10d
+ addb %dl,%al
+ movl 12(%rsi),%ebx
+ addl $606105819,%r10d
+ xorl %r9d,%r12d
+ movzbl %al,%eax
+ movl %edx,8(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $17,%r10d
+ movl %r8d,%r12d
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 12(%r15),%r9d
+ addb %dl,%bl
+ movl 16(%rsi),%eax
+ addl $3250441966,%r9d
+ xorl %r8d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,12(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $22,%r9d
+ movl %r11d,%r12d
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 16(%r15),%r8d
+ addb %dl,%al
+ movl 20(%rsi),%ebx
+ addl $4118548399,%r8d
+ xorl %r11d,%r12d
+ movzbl %al,%eax
+ movl %edx,16(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $7,%r8d
+ movl %r10d,%r12d
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 20(%r15),%r11d
+ addb %dl,%bl
+ movl 24(%rsi),%eax
+ addl $1200080426,%r11d
+ xorl %r10d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,20(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $12,%r11d
+ movl %r9d,%r12d
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 24(%r15),%r10d
+ addb %dl,%al
+ movl 28(%rsi),%ebx
+ addl $2821735955,%r10d
+ xorl %r9d,%r12d
+ movzbl %al,%eax
+ movl %edx,24(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $17,%r10d
+ movl %r8d,%r12d
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 28(%r15),%r9d
+ addb %dl,%bl
+ movl 32(%rsi),%eax
+ addl $4249261313,%r9d
+ xorl %r8d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,28(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $22,%r9d
+ movl %r11d,%r12d
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 32(%r15),%r8d
+ addb %dl,%al
+ movl 36(%rsi),%ebx
+ addl $1770035416,%r8d
+ xorl %r11d,%r12d
+ movzbl %al,%eax
+ movl %edx,32(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $7,%r8d
+ movl %r10d,%r12d
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 36(%r15),%r11d
+ addb %dl,%bl
+ movl 40(%rsi),%eax
+ addl $2336552879,%r11d
+ xorl %r10d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,36(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $12,%r11d
+ movl %r9d,%r12d
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 40(%r15),%r10d
+ addb %dl,%al
+ movl 44(%rsi),%ebx
+ addl $4294925233,%r10d
+ xorl %r9d,%r12d
+ movzbl %al,%eax
+ movl %edx,40(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $17,%r10d
+ movl %r8d,%r12d
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 44(%r15),%r9d
+ addb %dl,%bl
+ movl 48(%rsi),%eax
+ addl $2304563134,%r9d
+ xorl %r8d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,44(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $22,%r9d
+ movl %r11d,%r12d
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 48(%r15),%r8d
+ addb %dl,%al
+ movl 52(%rsi),%ebx
+ addl $1804603682,%r8d
+ xorl %r11d,%r12d
+ movzbl %al,%eax
+ movl %edx,48(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $7,%r8d
+ movl %r10d,%r12d
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 52(%r15),%r11d
+ addb %dl,%bl
+ movl 56(%rsi),%eax
+ addl $4254626195,%r11d
+ xorl %r10d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,52(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $12,%r11d
+ movl %r9d,%r12d
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 56(%r15),%r10d
+ addb %dl,%al
+ movl 60(%rsi),%ebx
+ addl $2792965006,%r10d
+ xorl %r9d,%r12d
+ movzbl %al,%eax
+ movl %edx,56(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $17,%r10d
+ movl %r8d,%r12d
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movdqu (%r13),%xmm2
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 60(%r15),%r9d
+ addb %dl,%bl
+ movl 64(%rsi),%eax
+ addl $1236535329,%r9d
+ xorl %r8d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,60(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $22,%r9d
+ movl %r10d,%r12d
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm1,%xmm2
+ pxor %xmm0,%xmm0
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 4(%r15),%r8d
+ addb %dl,%al
+ movl 68(%rsi),%ebx
+ addl $4129170786,%r8d
+ xorl %r10d,%r12d
+ movzbl %al,%eax
+ movl %edx,64(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $5,%r8d
+ movl %r9d,%r12d
+ movd (%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ pxor %xmm1,%xmm1
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 24(%r15),%r11d
+ addb %dl,%bl
+ movl 72(%rsi),%eax
+ addl $3225465664,%r11d
+ xorl %r9d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,68(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $9,%r11d
+ movl %r8d,%r12d
+ movd (%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 44(%r15),%r10d
+ addb %dl,%al
+ movl 76(%rsi),%ebx
+ addl $643717713,%r10d
+ xorl %r8d,%r12d
+ movzbl %al,%eax
+ movl %edx,72(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $14,%r10d
+ movl %r11d,%r12d
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 0(%r15),%r9d
+ addb %dl,%bl
+ movl 80(%rsi),%eax
+ addl $3921069994,%r9d
+ xorl %r11d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,76(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $20,%r9d
+ movl %r10d,%r12d
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 20(%r15),%r8d
+ addb %dl,%al
+ movl 84(%rsi),%ebx
+ addl $3593408605,%r8d
+ xorl %r10d,%r12d
+ movzbl %al,%eax
+ movl %edx,80(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $5,%r8d
+ movl %r9d,%r12d
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 40(%r15),%r11d
+ addb %dl,%bl
+ movl 88(%rsi),%eax
+ addl $38016083,%r11d
+ xorl %r9d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,84(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $9,%r11d
+ movl %r8d,%r12d
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 60(%r15),%r10d
+ addb %dl,%al
+ movl 92(%rsi),%ebx
+ addl $3634488961,%r10d
+ xorl %r8d,%r12d
+ movzbl %al,%eax
+ movl %edx,88(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $14,%r10d
+ movl %r11d,%r12d
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 16(%r15),%r9d
+ addb %dl,%bl
+ movl 96(%rsi),%eax
+ addl $3889429448,%r9d
+ xorl %r11d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,92(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $20,%r9d
+ movl %r10d,%r12d
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 36(%r15),%r8d
+ addb %dl,%al
+ movl 100(%rsi),%ebx
+ addl $568446438,%r8d
+ xorl %r10d,%r12d
+ movzbl %al,%eax
+ movl %edx,96(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $5,%r8d
+ movl %r9d,%r12d
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 56(%r15),%r11d
+ addb %dl,%bl
+ movl 104(%rsi),%eax
+ addl $3275163606,%r11d
+ xorl %r9d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,100(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $9,%r11d
+ movl %r8d,%r12d
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 12(%r15),%r10d
+ addb %dl,%al
+ movl 108(%rsi),%ebx
+ addl $4107603335,%r10d
+ xorl %r8d,%r12d
+ movzbl %al,%eax
+ movl %edx,104(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $14,%r10d
+ movl %r11d,%r12d
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 32(%r15),%r9d
+ addb %dl,%bl
+ movl 112(%rsi),%eax
+ addl $1163531501,%r9d
+ xorl %r11d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,108(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $20,%r9d
+ movl %r10d,%r12d
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r11d,%r12d
+ addl 52(%r15),%r8d
+ addb %dl,%al
+ movl 116(%rsi),%ebx
+ addl $2850285829,%r8d
+ xorl %r10d,%r12d
+ movzbl %al,%eax
+ movl %edx,112(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $5,%r8d
+ movl %r9d,%r12d
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r10d,%r12d
+ addl 8(%r15),%r11d
+ addb %dl,%bl
+ movl 120(%rsi),%eax
+ addl $4243563512,%r11d
+ xorl %r9d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,116(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $9,%r11d
+ movl %r8d,%r12d
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ andl %r9d,%r12d
+ addl 28(%r15),%r10d
+ addb %dl,%al
+ movl 124(%rsi),%ebx
+ addl $1735328473,%r10d
+ xorl %r8d,%r12d
+ movzbl %al,%eax
+ movl %edx,120(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $14,%r10d
+ movl %r11d,%r12d
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movdqu 16(%r13),%xmm3
+ addb $32,%bpl
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ andl %r8d,%r12d
+ addl 48(%r15),%r9d
+ addb %dl,%bl
+ movl 0(%rdi,%rbp,4),%eax
+ addl $2368359562,%r9d
+ xorl %r11d,%r12d
+ movzbl %bl,%ebx
+ movl %edx,124(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $20,%r9d
+ movl %r11d,%r12d
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movq %rcx,%rsi
+ xorq %rcx,%rcx
+ movb %sil,%cl
+ leaq (%rdi,%rbp,4),%rsi
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pxor %xmm0,%xmm0
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r9d,%r12d
+ addl 20(%r15),%r8d
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ addl $4294588738,%r8d
+ movzbl %al,%eax
+ addl %r12d,%r8d
+ movl %edx,0(%rsi)
+ addb %bl,%cl
+ roll $4,%r8d
+ movl %r10d,%r12d
+ movd (%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ pxor %xmm1,%xmm1
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r8d,%r12d
+ addl 32(%r15),%r11d
+ addb %dl,%bl
+ movl 8(%rsi),%eax
+ addl $2272392833,%r11d
+ movzbl %bl,%ebx
+ addl %r12d,%r11d
+ movl %edx,4(%rsi)
+ addb %al,%cl
+ roll $11,%r11d
+ movl %r9d,%r12d
+ movd (%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r11d,%r12d
+ addl 44(%r15),%r10d
+ addb %dl,%al
+ movl 12(%rsi),%ebx
+ addl $1839030562,%r10d
+ movzbl %al,%eax
+ addl %r12d,%r10d
+ movl %edx,8(%rsi)
+ addb %bl,%cl
+ roll $16,%r10d
+ movl %r8d,%r12d
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r10d,%r12d
+ addl 56(%r15),%r9d
+ addb %dl,%bl
+ movl 16(%rsi),%eax
+ addl $4259657740,%r9d
+ movzbl %bl,%ebx
+ addl %r12d,%r9d
+ movl %edx,12(%rsi)
+ addb %al,%cl
+ roll $23,%r9d
+ movl %r11d,%r12d
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r9d,%r12d
+ addl 4(%r15),%r8d
+ addb %dl,%al
+ movl 20(%rsi),%ebx
+ addl $2763975236,%r8d
+ movzbl %al,%eax
+ addl %r12d,%r8d
+ movl %edx,16(%rsi)
+ addb %bl,%cl
+ roll $4,%r8d
+ movl %r10d,%r12d
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r8d,%r12d
+ addl 16(%r15),%r11d
+ addb %dl,%bl
+ movl 24(%rsi),%eax
+ addl $1272893353,%r11d
+ movzbl %bl,%ebx
+ addl %r12d,%r11d
+ movl %edx,20(%rsi)
+ addb %al,%cl
+ roll $11,%r11d
+ movl %r9d,%r12d
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r11d,%r12d
+ addl 28(%r15),%r10d
+ addb %dl,%al
+ movl 28(%rsi),%ebx
+ addl $4139469664,%r10d
+ movzbl %al,%eax
+ addl %r12d,%r10d
+ movl %edx,24(%rsi)
+ addb %bl,%cl
+ roll $16,%r10d
+ movl %r8d,%r12d
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r10d,%r12d
+ addl 40(%r15),%r9d
+ addb %dl,%bl
+ movl 32(%rsi),%eax
+ addl $3200236656,%r9d
+ movzbl %bl,%ebx
+ addl %r12d,%r9d
+ movl %edx,28(%rsi)
+ addb %al,%cl
+ roll $23,%r9d
+ movl %r11d,%r12d
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r9d,%r12d
+ addl 52(%r15),%r8d
+ addb %dl,%al
+ movl 36(%rsi),%ebx
+ addl $681279174,%r8d
+ movzbl %al,%eax
+ addl %r12d,%r8d
+ movl %edx,32(%rsi)
+ addb %bl,%cl
+ roll $4,%r8d
+ movl %r10d,%r12d
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r8d,%r12d
+ addl 0(%r15),%r11d
+ addb %dl,%bl
+ movl 40(%rsi),%eax
+ addl $3936430074,%r11d
+ movzbl %bl,%ebx
+ addl %r12d,%r11d
+ movl %edx,36(%rsi)
+ addb %al,%cl
+ roll $11,%r11d
+ movl %r9d,%r12d
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r11d,%r12d
+ addl 12(%r15),%r10d
+ addb %dl,%al
+ movl 44(%rsi),%ebx
+ addl $3572445317,%r10d
+ movzbl %al,%eax
+ addl %r12d,%r10d
+ movl %edx,40(%rsi)
+ addb %bl,%cl
+ roll $16,%r10d
+ movl %r8d,%r12d
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r10d,%r12d
+ addl 24(%r15),%r9d
+ addb %dl,%bl
+ movl 48(%rsi),%eax
+ addl $76029189,%r9d
+ movzbl %bl,%ebx
+ addl %r12d,%r9d
+ movl %edx,44(%rsi)
+ addb %al,%cl
+ roll $23,%r9d
+ movl %r11d,%r12d
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r9d,%r12d
+ addl 36(%r15),%r8d
+ addb %dl,%al
+ movl 52(%rsi),%ebx
+ addl $3654602809,%r8d
+ movzbl %al,%eax
+ addl %r12d,%r8d
+ movl %edx,48(%rsi)
+ addb %bl,%cl
+ roll $4,%r8d
+ movl %r10d,%r12d
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r8d,%r12d
+ addl 48(%r15),%r11d
+ addb %dl,%bl
+ movl 56(%rsi),%eax
+ addl $3873151461,%r11d
+ movzbl %bl,%ebx
+ addl %r12d,%r11d
+ movl %edx,52(%rsi)
+ addb %al,%cl
+ roll $11,%r11d
+ movl %r9d,%r12d
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ xorl %r11d,%r12d
+ addl 60(%r15),%r10d
+ addb %dl,%al
+ movl 60(%rsi),%ebx
+ addl $530742520,%r10d
+ movzbl %al,%eax
+ addl %r12d,%r10d
+ movl %edx,56(%rsi)
+ addb %bl,%cl
+ roll $16,%r10d
+ movl %r8d,%r12d
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movdqu 32(%r13),%xmm4
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ xorl %r10d,%r12d
+ addl 8(%r15),%r9d
+ addb %dl,%bl
+ movl 64(%rsi),%eax
+ addl $3299628645,%r9d
+ movzbl %bl,%ebx
+ addl %r12d,%r9d
+ movl %edx,60(%rsi)
+ addb %al,%cl
+ roll $23,%r9d
+ movl $-1,%r12d
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm1,%xmm4
+ pxor %xmm0,%xmm0
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r9d,%r12d
+ addl 0(%r15),%r8d
+ addb %dl,%al
+ movl 68(%rsi),%ebx
+ addl $4096336452,%r8d
+ movzbl %al,%eax
+ xorl %r10d,%r12d
+ movl %edx,64(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $6,%r8d
+ movl $-1,%r12d
+ movd (%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ pxor %xmm1,%xmm1
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r8d,%r12d
+ addl 28(%r15),%r11d
+ addb %dl,%bl
+ movl 72(%rsi),%eax
+ addl $1126891415,%r11d
+ movzbl %bl,%ebx
+ xorl %r9d,%r12d
+ movl %edx,68(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $10,%r11d
+ movl $-1,%r12d
+ movd (%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r11d,%r12d
+ addl 56(%r15),%r10d
+ addb %dl,%al
+ movl 76(%rsi),%ebx
+ addl $2878612391,%r10d
+ movzbl %al,%eax
+ xorl %r8d,%r12d
+ movl %edx,72(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $15,%r10d
+ movl $-1,%r12d
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r10d,%r12d
+ addl 20(%r15),%r9d
+ addb %dl,%bl
+ movl 80(%rsi),%eax
+ addl $4237533241,%r9d
+ movzbl %bl,%ebx
+ xorl %r11d,%r12d
+ movl %edx,76(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $21,%r9d
+ movl $-1,%r12d
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r9d,%r12d
+ addl 48(%r15),%r8d
+ addb %dl,%al
+ movl 84(%rsi),%ebx
+ addl $1700485571,%r8d
+ movzbl %al,%eax
+ xorl %r10d,%r12d
+ movl %edx,80(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $6,%r8d
+ movl $-1,%r12d
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r8d,%r12d
+ addl 12(%r15),%r11d
+ addb %dl,%bl
+ movl 88(%rsi),%eax
+ addl $2399980690,%r11d
+ movzbl %bl,%ebx
+ xorl %r9d,%r12d
+ movl %edx,84(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $10,%r11d
+ movl $-1,%r12d
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r11d,%r12d
+ addl 40(%r15),%r10d
+ addb %dl,%al
+ movl 92(%rsi),%ebx
+ addl $4293915773,%r10d
+ movzbl %al,%eax
+ xorl %r8d,%r12d
+ movl %edx,88(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $15,%r10d
+ movl $-1,%r12d
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r10d,%r12d
+ addl 4(%r15),%r9d
+ addb %dl,%bl
+ movl 96(%rsi),%eax
+ addl $2240044497,%r9d
+ movzbl %bl,%ebx
+ xorl %r11d,%r12d
+ movl %edx,92(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $21,%r9d
+ movl $-1,%r12d
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r9d,%r12d
+ addl 32(%r15),%r8d
+ addb %dl,%al
+ movl 100(%rsi),%ebx
+ addl $1873313359,%r8d
+ movzbl %al,%eax
+ xorl %r10d,%r12d
+ movl %edx,96(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $6,%r8d
+ movl $-1,%r12d
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r8d,%r12d
+ addl 60(%r15),%r11d
+ addb %dl,%bl
+ movl 104(%rsi),%eax
+ addl $4264355552,%r11d
+ movzbl %bl,%ebx
+ xorl %r9d,%r12d
+ movl %edx,100(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $10,%r11d
+ movl $-1,%r12d
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r11d,%r12d
+ addl 24(%r15),%r10d
+ addb %dl,%al
+ movl 108(%rsi),%ebx
+ addl $2734768916,%r10d
+ movzbl %al,%eax
+ xorl %r8d,%r12d
+ movl %edx,104(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $15,%r10d
+ movl $-1,%r12d
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r10d,%r12d
+ addl 52(%r15),%r9d
+ addb %dl,%bl
+ movl 112(%rsi),%eax
+ addl $1309151649,%r9d
+ movzbl %bl,%ebx
+ xorl %r11d,%r12d
+ movl %edx,108(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $21,%r9d
+ movl $-1,%r12d
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r11d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r9d,%r12d
+ addl 16(%r15),%r8d
+ addb %dl,%al
+ movl 116(%rsi),%ebx
+ addl $4149444226,%r8d
+ movzbl %al,%eax
+ xorl %r10d,%r12d
+ movl %edx,112(%rsi)
+ addl %r12d,%r8d
+ addb %bl,%cl
+ roll $6,%r8d
+ movl $-1,%r12d
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+
+ addl %r9d,%r8d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r10d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r8d,%r12d
+ addl 44(%r15),%r11d
+ addb %dl,%bl
+ movl 120(%rsi),%eax
+ addl $3174756917,%r11d
+ movzbl %bl,%ebx
+ xorl %r9d,%r12d
+ movl %edx,116(%rsi)
+ addl %r12d,%r11d
+ addb %al,%cl
+ roll $10,%r11d
+ movl $-1,%r12d
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+
+ addl %r8d,%r11d
+ movl (%rdi,%rcx,4),%edx
+ xorl %r9d,%r12d
+ movl %eax,(%rdi,%rcx,4)
+ orl %r11d,%r12d
+ addl 8(%r15),%r10d
+ addb %dl,%al
+ movl 124(%rsi),%ebx
+ addl $718787259,%r10d
+ movzbl %al,%eax
+ xorl %r8d,%r12d
+ movl %edx,120(%rsi)
+ addl %r12d,%r10d
+ addb %bl,%cl
+ roll $15,%r10d
+ movl $-1,%r12d
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+
+ addl %r11d,%r10d
+ movdqu 48(%r13),%xmm5
+ addb $32,%bpl
+ movl (%rdi,%rcx,4),%edx
+ xorl %r8d,%r12d
+ movl %ebx,(%rdi,%rcx,4)
+ orl %r10d,%r12d
+ addl 36(%r15),%r9d
+ addb %dl,%bl
+ movl 0(%rdi,%rbp,4),%eax
+ addl $3951481745,%r9d
+ movzbl %bl,%ebx
+ xorl %r11d,%r12d
+ movl %edx,124(%rsi)
+ addl %r12d,%r9d
+ addb %al,%cl
+ roll $21,%r9d
+ movl $-1,%r12d
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+
+ addl %r10d,%r9d
+ movq %rbp,%rsi
+ xorq %rbp,%rbp
+ movb %sil,%bpl
+ movq %rcx,%rsi
+ xorq %rcx,%rcx
+ movb %sil,%cl
+ leaq (%rdi,%rbp,4),%rsi
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm5
+ pxor %xmm1,%xmm5
+ addl 0(%rsp),%r8d
+ addl 4(%rsp),%r9d
+ addl 8(%rsp),%r10d
+ addl 12(%rsp),%r11d
+
+ movdqu %xmm2,(%r14,%r13,1)
+ movdqu %xmm3,16(%r14,%r13,1)
+ movdqu %xmm4,32(%r14,%r13,1)
+ movdqu %xmm5,48(%r14,%r13,1)
+ leaq 64(%r15),%r15
+ leaq 64(%r13),%r13
+ cmpq 16(%rsp),%r15
+ jb .Loop
+
+ movq 24(%rsp),%r12
+ subb %al,%cl
+ movl %r8d,0(%r12)
+ movl %r9d,4(%r12)
+ movl %r10d,8(%r12)
+ movl %r11d,12(%r12)
+ subb $1,%bpl
+ movl %ebp,-8(%rdi)
+ movl %ecx,-4(%rdi)
+
+ movq 40(%rsp),%r15
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r13
+ movq 64(%rsp),%r12
+ movq 72(%rsp),%rbp
+ movq 80(%rsp),%rbx
+ leaq 88(%rsp),%rsp
+.Lepilogue:
+.Labort:
+ .byte 0xf3,0xc3
+.size rc4_md5_enc,.-rc4_md5_enc
diff --git a/secure/lib/libcrypto/amd64/rc4-x86_64.S b/secure/lib/libcrypto/amd64/rc4-x86_64.S
new file mode 100644
index 000000000000..c561af754efe
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/rc4-x86_64.S
@@ -0,0 +1,616 @@
+ # $FreeBSD$
+.text
+
+
+.globl RC4
+.type RC4,@function
+.align 16
+RC4: orq %rsi,%rsi
+ jne .Lentry
+ .byte 0xf3,0xc3
+.Lentry:
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+.Lprologue:
+ movq %rsi,%r11
+ movq %rdx,%r12
+ movq %rcx,%r13
+ xorq %r10,%r10
+ xorq %rcx,%rcx
+
+ leaq 8(%rdi),%rdi
+ movb -8(%rdi),%r10b
+ movb -4(%rdi),%cl
+ cmpl $-1,256(%rdi)
+ je .LRC4_CHAR
+ movl OPENSSL_ia32cap_P(%rip),%r8d
+ xorq %rbx,%rbx
+ incb %r10b
+ subq %r10,%rbx
+ subq %r12,%r13
+ movl (%rdi,%r10,4),%eax
+ testq $-16,%r11
+ jz .Lloop1
+ btl $30,%r8d
+ jc .Lintel
+ andq $7,%rbx
+ leaq 1(%r10),%rsi
+ jz .Loop8
+ subq %rbx,%r11
+.Loop8_warmup:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
+ incb %r10b
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r13,%r12,1)
+ leaq 1(%r12),%r12
+ decq %rbx
+ jnz .Loop8_warmup
+
+ leaq 1(%r10),%rsi
+ jmp .Loop8
+.align 16
+.Loop8:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 0(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,0(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 4(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,4(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 8(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,8(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 12(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,12(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 16(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,16(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl 20(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,20(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl 24(%rdi,%rsi,4),%ebx
+ rorq $8,%r8
+ movl %edx,24(%rdi,%r10,4)
+ addb %al,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb $8,%sil
+ addb %bl,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ movl -4(%rdi,%rsi,4),%eax
+ rorq $8,%r8
+ movl %edx,28(%rdi,%r10,4)
+ addb %bl,%dl
+ movb (%rdi,%rdx,4),%r8b
+ addb $8,%r10b
+ rorq $8,%r8
+ subq $8,%r11
+
+ xorq (%r12),%r8
+ movq %r8,(%r13,%r12,1)
+ leaq 8(%r12),%r12
+
+ testq $-8,%r11
+ jnz .Loop8
+ cmpq $0,%r11
+ jne .Lloop1
+ jmp .Lexit
+
+.align 16
+.Lintel:
+ testq $-32,%r11
+ jz .Lloop1
+ andq $15,%rbx
+ jz .Loop16_is_hot
+ subq %rbx,%r11
+.Loop16_warmup:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
+ incb %r10b
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r13,%r12,1)
+ leaq 1(%r12),%r12
+ decq %rbx
+ jnz .Loop16_warmup
+
+ movq %rcx,%rbx
+ xorq %rcx,%rcx
+ movb %bl,%cl
+
+.Loop16_is_hot:
+ leaq (%rdi,%r10,4),%rsi
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm0,%xmm0
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,0(%rsi)
+ addb %bl,%cl
+ pinsrw $0,(%rdi,%rax,4),%xmm0
+ jmp .Loop16_enter
+.align 16
+.Loop16:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm0,%xmm2
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm0
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 4(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,0(%rsi)
+ pxor %xmm1,%xmm2
+ addb %bl,%cl
+ pinsrw $0,(%rdi,%rax,4),%xmm0
+ movdqu %xmm2,(%r13,%r12,1)
+ leaq 16(%r12),%r12
+.Loop16_enter:
+ movl (%rdi,%rcx,4),%edx
+ pxor %xmm1,%xmm1
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 8(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,4(%rsi)
+ addb %al,%cl
+ pinsrw $0,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 12(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,8(%rsi)
+ addb %bl,%cl
+ pinsrw $1,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 16(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,12(%rsi)
+ addb %al,%cl
+ pinsrw $1,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 20(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,16(%rsi)
+ addb %bl,%cl
+ pinsrw $2,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 24(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,20(%rsi)
+ addb %al,%cl
+ pinsrw $2,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 28(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,24(%rsi)
+ addb %bl,%cl
+ pinsrw $3,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 32(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,28(%rsi)
+ addb %al,%cl
+ pinsrw $3,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 36(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,32(%rsi)
+ addb %bl,%cl
+ pinsrw $4,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 40(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,36(%rsi)
+ addb %al,%cl
+ pinsrw $4,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 44(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,40(%rsi)
+ addb %bl,%cl
+ pinsrw $5,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 48(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,44(%rsi)
+ addb %al,%cl
+ pinsrw $5,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 52(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,48(%rsi)
+ addb %bl,%cl
+ pinsrw $6,(%rdi,%rax,4),%xmm0
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movl 56(%rsi),%eax
+ movzbl %bl,%ebx
+ movl %edx,52(%rsi)
+ addb %al,%cl
+ pinsrw $6,(%rdi,%rbx,4),%xmm1
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ addb %dl,%al
+ movl 60(%rsi),%ebx
+ movzbl %al,%eax
+ movl %edx,56(%rsi)
+ addb %bl,%cl
+ pinsrw $7,(%rdi,%rax,4),%xmm0
+ addb $16,%r10b
+ movdqu (%r12),%xmm2
+ movl (%rdi,%rcx,4),%edx
+ movl %ebx,(%rdi,%rcx,4)
+ addb %dl,%bl
+ movzbl %bl,%ebx
+ movl %edx,60(%rsi)
+ leaq (%rdi,%r10,4),%rsi
+ pinsrw $7,(%rdi,%rbx,4),%xmm1
+ movl (%rsi),%eax
+ movq %rcx,%rbx
+ xorq %rcx,%rcx
+ subq $16,%r11
+ movb %bl,%cl
+ testq $-16,%r11
+ jnz .Loop16
+
+ psllq $8,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm1,%xmm2
+ movdqu %xmm2,(%r13,%r12,1)
+ leaq 16(%r12),%r12
+
+ cmpq $0,%r11
+ jne .Lloop1
+ jmp .Lexit
+
+.align 16
+.Lloop1:
+ addb %al,%cl
+ movl (%rdi,%rcx,4),%edx
+ movl %eax,(%rdi,%rcx,4)
+ movl %edx,(%rdi,%r10,4)
+ addb %dl,%al
+ incb %r10b
+ movl (%rdi,%rax,4),%edx
+ movl (%rdi,%r10,4),%eax
+ xorb (%r12),%dl
+ movb %dl,(%r13,%r12,1)
+ leaq 1(%r12),%r12
+ decq %r11
+ jnz .Lloop1
+ jmp .Lexit
+
+.align 16
+.LRC4_CHAR:
+ addb $1,%r10b
+ movzbl (%rdi,%r10,1),%eax
+ testq $-8,%r11
+ jz .Lcloop1
+ jmp .Lcloop8
+.align 16
+.Lcloop8:
+ movl (%r12),%r8d
+ movl 4(%r12),%r9d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
+ jne .Lcmov0
+ movq %rax,%rbx
+.Lcmov0:
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
+ jne .Lcmov1
+ movq %rbx,%rax
+.Lcmov1:
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
+ jne .Lcmov2
+ movq %rax,%rbx
+.Lcmov2:
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
+ jne .Lcmov3
+ movq %rbx,%rax
+.Lcmov3:
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r8b
+ rorl $8,%r8d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
+ jne .Lcmov4
+ movq %rax,%rbx
+.Lcmov4:
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
+ jne .Lcmov5
+ movq %rbx,%rax
+.Lcmov5:
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %al,%cl
+ leaq 1(%r10),%rsi
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %sil,%esi
+ movzbl (%rdi,%rsi,1),%ebx
+ movb %al,(%rdi,%rcx,1)
+ cmpq %rsi,%rcx
+ movb %dl,(%rdi,%r10,1)
+ jne .Lcmov6
+ movq %rax,%rbx
+.Lcmov6:
+ addb %al,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ addb %bl,%cl
+ leaq 1(%rsi),%r10
+ movzbl (%rdi,%rcx,1),%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%r10,1),%eax
+ movb %bl,(%rdi,%rcx,1)
+ cmpq %r10,%rcx
+ movb %dl,(%rdi,%rsi,1)
+ jne .Lcmov7
+ movq %rbx,%rax
+.Lcmov7:
+ addb %bl,%dl
+ xorb (%rdi,%rdx,1),%r9b
+ rorl $8,%r9d
+ leaq -8(%r11),%r11
+ movl %r8d,(%r13)
+ leaq 8(%r12),%r12
+ movl %r9d,4(%r13)
+ leaq 8(%r13),%r13
+
+ testq $-8,%r11
+ jnz .Lcloop8
+ cmpq $0,%r11
+ jne .Lcloop1
+ jmp .Lexit
+.align 16
+.Lcloop1:
+ addb %al,%cl
+ movzbl %cl,%ecx
+ movzbl (%rdi,%rcx,1),%edx
+ movb %al,(%rdi,%rcx,1)
+ movb %dl,(%rdi,%r10,1)
+ addb %al,%dl
+ addb $1,%r10b
+ movzbl %dl,%edx
+ movzbl %r10b,%r10d
+ movzbl (%rdi,%rdx,1),%edx
+ movzbl (%rdi,%r10,1),%eax
+ xorb (%r12),%dl
+ leaq 1(%r12),%r12
+ movb %dl,(%r13)
+ leaq 1(%r13),%r13
+ subq $1,%r11
+ jnz .Lcloop1
+ jmp .Lexit
+
+.align 16
+.Lexit:
+ subb $1,%r10b
+ movl %r10d,-8(%rdi)
+ movl %ecx,-4(%rdi)
+
+ movq (%rsp),%r13
+ movq 8(%rsp),%r12
+ movq 16(%rsp),%rbx
+ addq $24,%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size RC4,.-RC4
+.globl private_RC4_set_key
+.type private_RC4_set_key,@function
+.align 16
+private_RC4_set_key:
+ leaq 8(%rdi),%rdi
+ leaq (%rdx,%rsi,1),%rdx
+ negq %rsi
+ movq %rsi,%rcx
+ xorl %eax,%eax
+ xorq %r9,%r9
+ xorq %r10,%r10
+ xorq %r11,%r11
+
+ movl OPENSSL_ia32cap_P(%rip),%r8d
+ btl $20,%r8d
+ jc .Lc1stloop
+ jmp .Lw1stloop
+
+.align 16
+.Lw1stloop:
+ movl %eax,(%rdi,%rax,4)
+ addb $1,%al
+ jnc .Lw1stloop
+
+ xorq %r9,%r9
+ xorq %r8,%r8
+.align 16
+.Lw2ndloop:
+ movl (%rdi,%r9,4),%r10d
+ addb (%rdx,%rsi,1),%r8b
+ addb %r10b,%r8b
+ addq $1,%rsi
+ movl (%rdi,%r8,4),%r11d
+ cmovzq %rcx,%rsi
+ movl %r10d,(%rdi,%r8,4)
+ movl %r11d,(%rdi,%r9,4)
+ addb $1,%r9b
+ jnc .Lw2ndloop
+ jmp .Lexit_key
+
+.align 16
+.Lc1stloop:
+ movb %al,(%rdi,%rax,1)
+ addb $1,%al
+ jnc .Lc1stloop
+
+ xorq %r9,%r9
+ xorq %r8,%r8
+.align 16
+.Lc2ndloop:
+ movb (%rdi,%r9,1),%r10b
+ addb (%rdx,%rsi,1),%r8b
+ addb %r10b,%r8b
+ addq $1,%rsi
+ movb (%rdi,%r8,1),%r11b
+ jnz .Lcnowrap
+ movq %rcx,%rsi
+.Lcnowrap:
+ movb %r10b,(%rdi,%r8,1)
+ movb %r11b,(%rdi,%r9,1)
+ addb $1,%r9b
+ jnc .Lc2ndloop
+ movl $-1,256(%rdi)
+
+.align 16
+.Lexit_key:
+ xorl %eax,%eax
+ movl %eax,-8(%rdi)
+ movl %eax,-4(%rdi)
+ .byte 0xf3,0xc3
+.size private_RC4_set_key,.-private_RC4_set_key
+
+.globl RC4_options
+.type RC4_options,@function
+.align 16
+RC4_options:
+ leaq .Lopts(%rip),%rax
+ movl OPENSSL_ia32cap_P(%rip),%edx
+ btl $20,%edx
+ jc .L8xchar
+ btl $30,%edx
+ jnc .Ldone
+ addq $25,%rax
+ .byte 0xf3,0xc3
+.L8xchar:
+ addq $12,%rax
+.Ldone:
+ .byte 0xf3,0xc3
+.align 64
+.Lopts:
+.byte 114,99,52,40,56,120,44,105,110,116,41,0
+.byte 114,99,52,40,56,120,44,99,104,97,114,41,0
+.byte 114,99,52,40,49,54,120,44,105,110,116,41,0
+.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+.size RC4_options,.-RC4_options
diff --git a/secure/lib/libcrypto/amd64/sha1-x86_64.S b/secure/lib/libcrypto/amd64/sha1-x86_64.S
new file mode 100644
index 000000000000..421423a5ccad
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/sha1-x86_64.S
@@ -0,0 +1,2487 @@
+ # $FreeBSD$
+.text
+
+
+.globl sha1_block_data_order
+.type sha1_block_data_order,@function
+.align 16
+sha1_block_data_order:
+ movl OPENSSL_ia32cap_P+0(%rip),%r9d
+ movl OPENSSL_ia32cap_P+4(%rip),%r8d
+ testl $512,%r8d
+ jz .Lialu
+ jmp _ssse3_shortcut
+
+.align 16
+.Lialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+.Lprologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz .Lloop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha1_block_data_order,.-sha1_block_data_order
+.type sha1_block_data_order_ssse3,@function
+.align 16
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -64(%rsp),%rsp
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp .Loop_ssse3
+.align 16
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r10,%r9
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ jmp .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ leaq 64(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+.Lepilogue_ssse3:
+ .byte 0xf3,0xc3
+.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
+.align 64
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/secure/lib/libcrypto/amd64/sha256-x86_64.S b/secure/lib/libcrypto/amd64/sha256-x86_64.S
new file mode 100644
index 000000000000..79e06b468a74
--- /dev/null
+++ b/secure/lib/libcrypto/amd64/sha256-x86_64.S
@@ -0,0 +1,1779 @@
+ # $FreeBSD$
+.text
+
+.globl sha256_block_data_order
+.type sha256_block_data_order,@function
+.align 16
+sha256_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+.Lprologue:
+
+ leaq K256(%rip),%rbp
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xorq %rdi,%rdi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d