aboutsummaryrefslogtreecommitdiffstats
path: root/secure/lib/libcrypto
diff options
context:
space:
mode:
authorJung-uk Kim <jkim@FreeBSD.org>2020-03-20 21:43:08 +0000
committerJung-uk Kim <jkim@FreeBSD.org>2020-03-20 21:43:08 +0000
commit737493770cbfb0e6144e7c1eddebbe9160394a0f (patch)
tree041bf5f9d13b214279a3737b71d9ed65826ac7a6 /secure/lib/libcrypto
parent29b5aa1b8efcee2c420239594c8840a0e465e8dd (diff)
downloadsrc-737493770cbfb0e6144e7c1eddebbe9160394a0f.tar.gz
src-737493770cbfb0e6144e7c1eddebbe9160394a0f.zip
MFC: r359060, r359061, r359066
Merge OpenSSL 1.1.1e.
Notes
Notes: svn path=/stable/12/; revision=359186
Diffstat (limited to 'secure/lib/libcrypto')
-rw-r--r--secure/lib/libcrypto/Makefile.inc4
-rw-r--r--secure/lib/libcrypto/aarch64/ecp_nistz256-armv8.S71
-rw-r--r--secure/lib/libcrypto/aarch64/sha256-armv8.S2
-rw-r--r--secure/lib/libcrypto/aarch64/sha512-armv8.S2
-rw-r--r--secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S784
-rw-r--r--secure/lib/libcrypto/amd64/aesni-mb-x86_64.S965
-rw-r--r--secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S1354
-rw-r--r--secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S4354
-rw-r--r--secure/lib/libcrypto/amd64/aesni-x86_64.S18
-rw-r--r--secure/lib/libcrypto/amd64/chacha-x86_64.S1026
-rw-r--r--secure/lib/libcrypto/amd64/cmll-x86_64.S8
-rw-r--r--secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S2093
-rw-r--r--secure/lib/libcrypto/amd64/ghash-x86_64.S475
-rw-r--r--secure/lib/libcrypto/amd64/keccak1600-x86_64.S2
-rw-r--r--secure/lib/libcrypto/amd64/poly1305-x86_64.S1787
-rw-r--r--secure/lib/libcrypto/amd64/rc4-x86_64.S9
-rw-r--r--secure/lib/libcrypto/amd64/rsaz-avx2.S1749
-rw-r--r--secure/lib/libcrypto/amd64/rsaz-x86_64.S863
-rw-r--r--secure/lib/libcrypto/amd64/sha1-mb-x86_64.S4315
-rw-r--r--secure/lib/libcrypto/amd64/sha1-x86_64.S2831
-rw-r--r--secure/lib/libcrypto/amd64/sha256-mb-x86_64.S4672
-rw-r--r--secure/lib/libcrypto/amd64/sha256-x86_64.S2347
-rw-r--r--secure/lib/libcrypto/amd64/sha512-x86_64.S3636
-rw-r--r--secure/lib/libcrypto/amd64/x25519-x86_64.S388
-rw-r--r--secure/lib/libcrypto/amd64/x86_64-mont.S380
-rw-r--r--secure/lib/libcrypto/amd64/x86_64-mont5.S1375
-rw-r--r--secure/lib/libcrypto/arm/aes-armv4.S2
-rw-r--r--secure/lib/libcrypto/arm/bsaes-armv7.S2
-rw-r--r--secure/lib/libcrypto/arm/ecp_nistz256-armv4.S183
-rw-r--r--secure/lib/libcrypto/arm/sha256-armv4.S2
-rw-r--r--secure/lib/libcrypto/arm/sha512-armv4.S2
-rw-r--r--secure/lib/libcrypto/i386/chacha-x86.S960
-rw-r--r--secure/lib/libcrypto/i386/ecp_nistz256-x86.S36
-rw-r--r--secure/lib/libcrypto/i386/poly1305-x86.S1110
-rw-r--r--secure/lib/libcrypto/i386/sha1-586.S2350
-rw-r--r--secure/lib/libcrypto/i386/sha256-586.S4496
-rw-r--r--secure/lib/libcrypto/man/man3/ADMISSIONS.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_INTEGER_get_int64.38
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_ITEM_lookup.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_OBJECT_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_STRING_TABLE_add.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_STRING_length.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_STRING_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_STRING_print_ex.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_TIME_set.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_TYPE_get.38
-rw-r--r--secure/lib/libcrypto/man/man3/ASN1_generate_nconf.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASYNC_WAIT_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/ASYNC_start_job.34
-rw-r--r--secure/lib/libcrypto/man/man3/BF_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_ADDR.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_ADDRINFO.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_connect.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_ctrl.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_f_base64.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_f_buffer.324
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_f_cipher.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_f_md.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_f_null.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_f_ssl.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_find_type.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_get_data.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_get_ex_new_index.310
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_new_CMS.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_parse_hostserv.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_printf.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_push.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_read.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_s_accept.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_s_bio.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_s_connect.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_s_fd.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_s_file.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_s_mem.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_s_null.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_s_socket.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_set_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/BIO_should_retry.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_BLINDING_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_CTX_start.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_add.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_add_word.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_bn2bin.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_cmp.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_copy.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_generate_prime.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_mod_inverse.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_mod_mul_montgomery.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_mod_mul_reciprocal.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_num_bytes.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_rand.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_security_bits.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_set_bit.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_swap.34
-rw-r--r--secure/lib/libcrypto/man/man3/BN_zero.34
-rw-r--r--secure/lib/libcrypto/man/man3/BUF_MEM_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_add0_cert.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_add1_recipient_cert.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_add1_signer.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_compress.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_decrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_final.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_get0_RecipientInfos.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_get0_SignerInfos.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_get0_type.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_get1_ReceiptRequest.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_sign.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_sign_receipt.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_uncompress.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_verify.34
-rw-r--r--secure/lib/libcrypto/man/man3/CMS_verify_receipt.34
-rw-r--r--secure/lib/libcrypto/man/man3/CONF_modules_free.34
-rw-r--r--secure/lib/libcrypto/man/man3/CONF_modules_load_file.34
-rw-r--r--secure/lib/libcrypto/man/man3/CRYPTO_THREAD_run_once.34
-rw-r--r--secure/lib/libcrypto/man/man3/CRYPTO_get_ex_new_index.34
-rw-r--r--secure/lib/libcrypto/man/man3/CRYPTO_memcmp.34
-rw-r--r--secure/lib/libcrypto/man/man3/CTLOG_STORE_get0_log_by_id.34
-rw-r--r--secure/lib/libcrypto/man/man3/CTLOG_STORE_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/CTLOG_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/CT_POLICY_EVAL_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/DEFINE_STACK_OF.34
-rw-r--r--secure/lib/libcrypto/man/man3/DES_random_key.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_generate_key.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_generate_parameters.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_get0_pqg.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_get_1024_160.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_new_by_nid.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_set_method.34
-rw-r--r--secure/lib/libcrypto/man/man3/DH_size.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_SIG_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_do_sign.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_dup_DH.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_generate_key.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_generate_parameters.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_get0_pqg.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_set_method.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_sign.34
-rw-r--r--secure/lib/libcrypto/man/man3/DSA_size.34
-rw-r--r--secure/lib/libcrypto/man/man3/DTLS_get_data_mtu.34
-rw-r--r--secure/lib/libcrypto/man/man3/DTLS_set_timer_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/DTLSv1_listen.316
-rw-r--r--secure/lib/libcrypto/man/man3/ECDSA_SIG_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/ECPKParameters_print.34
-rw-r--r--secure/lib/libcrypto/man/man3/EC_GFp_simple_method.34
-rw-r--r--secure/lib/libcrypto/man/man3/EC_GROUP_copy.394
-rw-r--r--secure/lib/libcrypto/man/man3/EC_GROUP_new.396
-rw-r--r--secure/lib/libcrypto/man/man3/EC_KEY_get_enc_flags.34
-rw-r--r--secure/lib/libcrypto/man/man3/EC_KEY_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/EC_POINT_add.34
-rw-r--r--secure/lib/libcrypto/man/man3/EC_POINT_new.324
-rw-r--r--secure/lib/libcrypto/man/man3/ENGINE_add.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_GET_LIB.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_clear_error.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_error_string.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_get_error.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_load_crypto_strings.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_load_strings.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_print_errors.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_put_error.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_remove_state.34
-rw-r--r--secure/lib/libcrypto/man/man3/ERR_set_mark.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_BytesToKey.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_CIPHER_CTX_get_cipher_data.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_CIPHER_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_DigestInit.373
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_DigestSignInit.314
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_DigestVerifyInit.36
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_EncodeInit.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_EncryptInit.313
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_MD_meth_new.323
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_OpenInit.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_ASN1_METHOD.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_CTX_ctrl.320
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_CTX_set1_pbe_pass.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_CTX_set_hkdf_md.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_CTX_set_rsa_pss_keygen_md.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_CTX_set_scrypt_N.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_CTX_set_tls1_prf_md.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_asn1_get_count.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_cmp.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_decrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_derive.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_get_default_digest_nid.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_keygen.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_meth_get_count.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_meth_new.330
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_new.335
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_print_private.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_set1_RSA.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_sign.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_size.3210
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_verify.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_PKEY_verify_recover.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_SealInit.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_SignInit.344
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_VerifyInit.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_aes.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_aria.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_bf_cbc.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_blake2b512.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_camellia.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_cast5_cbc.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_chacha20.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_des.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_desx_cbc.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_idea_cbc.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_md2.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_md4.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_md5.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_mdc2.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_rc2_cbc.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_rc4.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_rc5_32_12_16_cbc.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_ripemd160.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_seed_cbc.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_sha1.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_sha224.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_sha3_224.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_sm3.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_sm4_cbc.34
-rw-r--r--secure/lib/libcrypto/man/man3/EVP_whirlpool.34
-rw-r--r--secure/lib/libcrypto/man/man3/HMAC.34
-rw-r--r--secure/lib/libcrypto/man/man3/MD5.34
-rw-r--r--secure/lib/libcrypto/man/man3/MDC2_Init.34
-rw-r--r--secure/lib/libcrypto/man/man3/Makefile46
-rw-r--r--secure/lib/libcrypto/man/man3/OBJ_nid2obj.34
-rw-r--r--secure/lib/libcrypto/man/man3/OCSP_REQUEST_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/OCSP_cert_to_id.34
-rw-r--r--secure/lib/libcrypto/man/man3/OCSP_request_add1_nonce.38
-rw-r--r--secure/lib/libcrypto/man/man3/OCSP_resp_find_status.34
-rw-r--r--secure/lib/libcrypto/man/man3/OCSP_response_status.38
-rw-r--r--secure/lib/libcrypto/man/man3/OCSP_sendreq_new.313
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_Applink.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_LH_COMPFUNC.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_LH_stats.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_VERSION_NUMBER.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_config.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_fork_prepare.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_ia32cap.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_init_crypto.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_init_ssl.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_instrument_bus.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_load_builtin_modules.34
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_malloc.38
-rw-r--r--secure/lib/libcrypto/man/man3/OPENSSL_secure_malloc.39
-rw-r--r--secure/lib/libcrypto/man/man3/OSSL_STORE_INFO.34
-rw-r--r--secure/lib/libcrypto/man/man3/OSSL_STORE_LOADER.34
-rw-r--r--secure/lib/libcrypto/man/man3/OSSL_STORE_SEARCH.34
-rw-r--r--secure/lib/libcrypto/man/man3/OSSL_STORE_expect.34
-rw-r--r--secure/lib/libcrypto/man/man3/OSSL_STORE_open.34
-rw-r--r--secure/lib/libcrypto/man/man3/OpenSSL_add_all_algorithms.34
-rw-r--r--secure/lib/libcrypto/man/man3/PEM_bytes_read_bio.38
-rw-r--r--secure/lib/libcrypto/man/man3/PEM_read.34
-rw-r--r--secure/lib/libcrypto/man/man3/PEM_read_CMS.34
-rw-r--r--secure/lib/libcrypto/man/man3/PEM_read_bio_PrivateKey.315
-rw-r--r--secure/lib/libcrypto/man/man3/PEM_read_bio_ex.36
-rw-r--r--secure/lib/libcrypto/man/man3/PEM_write_bio_CMS_stream.34
-rw-r--r--secure/lib/libcrypto/man/man3/PEM_write_bio_PKCS7_stream.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS12_create.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS12_newpass.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS12_parse.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS5_PBKDF2_HMAC.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS7_decrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS7_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS7_sign.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS7_sign_add_signer.34
-rw-r--r--secure/lib/libcrypto/man/man3/PKCS7_verify.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_DRBG_generate.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_DRBG_get0_master.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_DRBG_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_DRBG_reseed.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_DRBG_set_callbacks.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_DRBG_set_ex_data.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_add.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_bytes.330
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_cleanup.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_egd.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_load_file.34
-rw-r--r--secure/lib/libcrypto/man/man3/RAND_set_rand_method.36
-rw-r--r--secure/lib/libcrypto/man/man3/RC4_set_key.34
-rw-r--r--secure/lib/libcrypto/man/man3/RIPEMD160_Init.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_blinding_on.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_check_key.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_generate_key.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_get0_key.311
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_meth_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_padding_add_PKCS1_type_1.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_print.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_private_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_public_encrypt.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_set_method.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_sign.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_sign_ASN1_OCTET_STRING.34
-rw-r--r--secure/lib/libcrypto/man/man3/RSA_size.34
-rw-r--r--secure/lib/libcrypto/man/man3/SCT_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/SCT_print.34
-rw-r--r--secure/lib/libcrypto/man/man3/SCT_validate.34
-rw-r--r--secure/lib/libcrypto/man/man3/SHA256_Init.34
-rw-r--r--secure/lib/libcrypto/man/man3/SMIME_read_CMS.34
-rw-r--r--secure/lib/libcrypto/man/man3/SMIME_read_PKCS7.34
-rw-r--r--secure/lib/libcrypto/man/man3/SMIME_write_CMS.34
-rw-r--r--secure/lib/libcrypto/man/man3/SMIME_write_PKCS7.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CIPHER_get_name.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_COMP_add_compression_method.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CONF_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CONF_CTX_set1_prefix.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CONF_CTX_set_flags.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CONF_CTX_set_ssl_ctx.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CONF_cmd.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CONF_cmd_argv.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_add1_chain_cert.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_add_extra_chain_cert.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_add_session.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_config.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_ctrl.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_dane_enable.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_flush_sessions.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_free.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_get0_param.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_get_verify_mode.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_has_client_custom_ext.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_load_verify_locations.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_sess_number.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_sess_set_cache_size.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_sess_set_get_cb.361
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_sessions.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set0_CA_list.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set1_curves.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set1_sigalgs.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set1_verify_cert_store.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_alpn_select_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_cert_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_cert_store.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_cert_verify_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_cipher_list.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_client_cert_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_client_hello_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_ct_validation_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_ctlog_list_file.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_default_passwd_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_ex_data.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_generate_session_id.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_info_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_keylog_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_max_cert_list.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_min_proto_version.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_mode.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_msg_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_num_tickets.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_options.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_psk_client_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_quiet_shutdown.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_read_ahead.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_record_padding_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_security_level.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_session_cache_mode.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_session_id_context.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_session_ticket_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_split_send_fragment.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_ssl_version.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_stateless_cookie_generate_cb.368
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_timeout.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_tlsext_servername_callback.3103
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_tlsext_status_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_tlsext_ticket_key_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_tlsext_use_srtp.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_tmp_dh_callback.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_set_verify.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_use_certificate.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_use_psk_identity_hint.310
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_CTX_use_serverinfo.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_free.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_get0_cipher.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_get0_hostname.311
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_get0_id_context.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_get0_peer.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_get_compress_id.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_get_ex_data.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_get_protocol_version.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_get_time.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_has_ticket.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_is_resumable.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_print.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_SESSION_set1_id.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_accept.38
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_alert_type_string.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_alloc_buffers.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_check_chain.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_clear.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_connect.38
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_do_handshake.38
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_export_keying_material.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_extension_supported.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_free.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get0_peer_scts.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_SSL_CTX.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_all_async_fds.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_ciphers.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_client_random.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_current_cipher.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_default_timeout.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_error.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_extms_support.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_fd.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_peer_cert_chain.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_peer_certificate.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_peer_signature_nid.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_peer_tmp_key.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_psk_identity.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_rbio.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_session.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_shared_sigalgs.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_verify_result.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_get_version.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_in_init.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_key_update.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_library_init.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_load_client_CA_file.320
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_pending.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_read.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_read_early_data.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_rstate_string.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_session_reused.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_set1_host.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_set_bio.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_set_connect_state.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_set_fd.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_set_session.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_set_shutdown.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_set_verify_result.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_shutdown.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_state_string.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_want.34
-rw-r--r--secure/lib/libcrypto/man/man3/SSL_write.34
-rw-r--r--secure/lib/libcrypto/man/man3/UI_STRING.34
-rw-r--r--secure/lib/libcrypto/man/man3/UI_UTIL_read_pw.34
-rw-r--r--secure/lib/libcrypto/man/man3/UI_create_method.34
-rw-r--r--secure/lib/libcrypto/man/man3/UI_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509V3_get_d2i.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_ALGOR_dup.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_CRL_get0_by_serial.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_EXTENSION_set_object.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_LOOKUP.3310
-rw-r--r--secure/lib/libcrypto/man/man3/X509_LOOKUP_hash_dir.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_LOOKUP_meth_new.325
-rw-r--r--secure/lib/libcrypto/man/man3/X509_NAME_ENTRY_get_object.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_NAME_add_entry_by_txt.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_NAME_get0_der.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_NAME_get_index_by_NID.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_NAME_print_ex.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_PUBKEY_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_SIG_get0.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_STORE_CTX_get_error.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_STORE_CTX_new.36
-rw-r--r--secure/lib/libcrypto/man/man3/X509_STORE_CTX_set_verify_cb.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_STORE_add_cert.321
-rw-r--r--secure/lib/libcrypto/man/man3/X509_STORE_get0_param.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_STORE_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_STORE_set_verify_cb_func.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_VERIFY_PARAM_set_flags.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_check_ca.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_check_host.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_check_issued.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_check_private_key.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_cmp.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_cmp_time.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_digest.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_dup.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_get0_notBefore.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_get0_signature.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_get0_uids.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_get_extension_flags.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_get_pubkey.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_get_serialNumber.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_get_subject_name.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_get_version.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_new.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_sign.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509_verify_cert.34
-rw-r--r--secure/lib/libcrypto/man/man3/X509v3_get_ext_by_NID.34
-rw-r--r--secure/lib/libcrypto/man/man3/d2i_DHparams.34
-rw-r--r--secure/lib/libcrypto/man/man3/d2i_PKCS8PrivateKey_bio.34
-rw-r--r--secure/lib/libcrypto/man/man3/d2i_PrivateKey.34
-rw-r--r--secure/lib/libcrypto/man/man3/d2i_SSL_SESSION.34
-rw-r--r--secure/lib/libcrypto/man/man3/d2i_X509.312
-rw-r--r--secure/lib/libcrypto/man/man3/i2d_CMS_bio_stream.34
-rw-r--r--secure/lib/libcrypto/man/man3/i2d_PKCS7_bio_stream.34
-rw-r--r--secure/lib/libcrypto/man/man3/i2d_re_X509_tbs.34
-rw-r--r--secure/lib/libcrypto/man/man3/o2i_SCT_LIST.34
-rw-r--r--secure/lib/libcrypto/man/man5/x509v3_config.52
-rw-r--r--secure/lib/libcrypto/man/man7/Ed25519.76
-rw-r--r--secure/lib/libcrypto/man/man7/Makefile1
-rw-r--r--secure/lib/libcrypto/man/man7/RAND.72
-rw-r--r--secure/lib/libcrypto/man/man7/RAND_DRBG.72
-rw-r--r--secure/lib/libcrypto/man/man7/RSA-PSS.72
-rw-r--r--secure/lib/libcrypto/man/man7/SM2.72
-rw-r--r--secure/lib/libcrypto/man/man7/X25519.76
-rw-r--r--secure/lib/libcrypto/man/man7/bio.72
-rw-r--r--secure/lib/libcrypto/man/man7/ct.72
-rw-r--r--secure/lib/libcrypto/man/man7/des_modes.72
-rw-r--r--secure/lib/libcrypto/man/man7/evp.72
-rw-r--r--secure/lib/libcrypto/man/man7/ossl_store-file.72
-rw-r--r--secure/lib/libcrypto/man/man7/ossl_store.72
-rw-r--r--secure/lib/libcrypto/man/man7/passphrase-encoding.76
-rw-r--r--secure/lib/libcrypto/man/man7/proxy-certificates.7478
-rw-r--r--secure/lib/libcrypto/man/man7/scrypt.72
-rw-r--r--secure/lib/libcrypto/man/man7/ssl.72
-rw-r--r--secure/lib/libcrypto/man/man7/x509.72
-rw-r--r--secure/lib/libcrypto/opensslconf.h.in6
523 files changed, 3009 insertions, 45454 deletions
diff --git a/secure/lib/libcrypto/Makefile.inc b/secure/lib/libcrypto/Makefile.inc
index a9d8df50be02..91217dc1e454 100644
--- a/secure/lib/libcrypto/Makefile.inc
+++ b/secure/lib/libcrypto/Makefile.inc
@@ -3,8 +3,8 @@
.include <bsd.own.mk>
# OpenSSL version used for manual page generation
-OPENSSL_VER= 1.1.1d
-OPENSSL_DATE= 2019-09-10
+OPENSSL_VER= 1.1.1e
+OPENSSL_DATE= 2020-03-17
LCRYPTO_SRC= ${SRCTOP}/crypto/openssl
LCRYPTO_DOC= ${LCRYPTO_SRC}/doc
diff --git a/secure/lib/libcrypto/aarch64/ecp_nistz256-armv8.S b/secure/lib/libcrypto/aarch64/ecp_nistz256-armv8.S
index c0b5f8cede17..f7fcce4365fa 100644
--- a/secure/lib/libcrypto/aarch64/ecp_nistz256-armv8.S
+++ b/secure/lib/libcrypto/aarch64/ecp_nistz256-armv8.S
@@ -3017,7 +3017,7 @@ __ecp_nistz256_div_by_2:
.align 5
ecp_nistz256_point_double:
.inst 0xd503233f // paciasp
- stp x29,x30,[sp,#-80]!
+ stp x29,x30,[sp,#-96]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
@@ -3150,7 +3150,7 @@ ecp_nistz256_point_double:
add sp,x29,#0 // destroy frame
ldp x19,x20,[x29,#16]
ldp x21,x22,[x29,#32]
- ldp x29,x30,[sp],#80
+ ldp x29,x30,[sp],#96
.inst 0xd50323bf // autiasp
ret
.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
@@ -3159,12 +3159,13 @@ ecp_nistz256_point_double:
.align 5
ecp_nistz256_point_add:
.inst 0xd503233f // paciasp
- stp x29,x30,[sp,#-80]!
+ stp x29,x30,[sp,#-96]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
sub sp,sp,#32*12
ldp x4,x5,[x2,#64] // in2_z
@@ -3178,7 +3179,7 @@ ecp_nistz256_point_add:
orr x10,x6,x7
orr x25,x8,x10
cmp x25,#0
- csetm x25,ne // !in2infty
+ csetm x25,ne // ~in2infty
add x0,sp,#192
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z);
@@ -3188,7 +3189,7 @@ ecp_nistz256_point_add:
orr x10,x6,x7
orr x24,x8,x10
cmp x24,#0
- csetm x24,ne // !in1infty
+ csetm x24,ne // ~in1infty
add x0,sp,#128
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@@ -3229,7 +3230,7 @@ ecp_nistz256_point_add:
orr x14,x14,x15 // see if result is zero
orr x16,x16,x17
- orr x26,x14,x16
+ orr x26,x14,x16 // ~is_equal(S1,S2)
add x2,sp,#192
add x0,sp,#256
@@ -3250,32 +3251,21 @@ ecp_nistz256_point_add:
orr x14,x14,x15 // see if result is zero
orr x16,x16,x17
- orr x14,x14,x16
- tst x14,x14
- b.ne .Ladd_proceed // is_equal(U1,U2)?
+ orr x14,x14,x16 // ~is_equal(U1,U2)
- tst x24,x25
- b.eq .Ladd_proceed // (in1infty || in2infty)?
+ mvn x27,x24 // -1/0 -> 0/-1
+ mvn x28,x25 // -1/0 -> 0/-1
+ orr x14,x14,x27
+ orr x14,x14,x28
+ orr x14,x14,x26
+ cbnz x14,.Ladd_proceed // if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
- tst x26,x26
- b.eq .Ladd_double // is_equal(S1,S2)?
-
- eor x4,x4,x4
- eor x5,x5,x5
- stp x4,x5,[x21]
- stp x4,x5,[x21,#16]
- stp x4,x5,[x21,#32]
- stp x4,x5,[x21,#48]
- stp x4,x5,[x21,#64]
- stp x4,x5,[x21,#80]
- b .Ladd_done
-
-.align 4
.Ladd_double:
mov x1,x22
mov x0,x21
ldp x23,x24,[x29,#48]
ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
add sp,sp,#32*(12-4) // difference in stack frames
b .Ldouble_shortcut
@@ -3357,14 +3347,14 @@ ecp_nistz256_point_add:
ldp x8,x9,[x23] // in2
ldp x10,x11,[x23,#16]
ldp x14,x15,[x22,#0] // in1
- cmp x24,#0 // !, remember?
+ cmp x24,#0 // ~, remember?
ldp x16,x17,[x22,#0+16]
csel x8,x4,x8,ne
csel x9,x5,x9,ne
ldp x4,x5,[sp,#0+0+32] // res
csel x10,x6,x10,ne
csel x11,x7,x11,ne
- cmp x25,#0 // !, remember?
+ cmp x25,#0 // ~, remember?
ldp x6,x7,[sp,#0+0+48]
csel x14,x8,x14,ne
csel x15,x9,x15,ne
@@ -3375,14 +3365,14 @@ ecp_nistz256_point_add:
stp x14,x15,[x21,#0]
stp x16,x17,[x21,#0+16]
ldp x14,x15,[x22,#32] // in1
- cmp x24,#0 // !, remember?
+ cmp x24,#0 // ~, remember?
ldp x16,x17,[x22,#32+16]
csel x8,x4,x8,ne
csel x9,x5,x9,ne
ldp x4,x5,[sp,#0+32+32] // res
csel x10,x6,x10,ne
csel x11,x7,x11,ne
- cmp x25,#0 // !, remember?
+ cmp x25,#0 // ~, remember?
ldp x6,x7,[sp,#0+32+48]
csel x14,x8,x14,ne
csel x15,x9,x15,ne
@@ -3393,13 +3383,13 @@ ecp_nistz256_point_add:
stp x14,x15,[x21,#32]
stp x16,x17,[x21,#32+16]
ldp x14,x15,[x22,#64] // in1
- cmp x24,#0 // !, remember?
+ cmp x24,#0 // ~, remember?
ldp x16,x17,[x22,#64+16]
csel x8,x4,x8,ne
csel x9,x5,x9,ne
csel x10,x6,x10,ne
csel x11,x7,x11,ne
- cmp x25,#0 // !, remember?
+ cmp x25,#0 // ~, remember?
csel x14,x8,x14,ne
csel x15,x9,x15,ne
csel x16,x10,x16,ne
@@ -3413,7 +3403,8 @@ ecp_nistz256_point_add:
ldp x21,x22,[x29,#32]
ldp x23,x24,[x29,#48]
ldp x25,x26,[x29,#64]
- ldp x29,x30,[sp],#80
+ ldp x27,x28,[x29,#80]
+ ldp x29,x30,[sp],#96
.inst 0xd50323bf // autiasp
ret
.size ecp_nistz256_point_add,.-ecp_nistz256_point_add
@@ -3442,7 +3433,7 @@ ecp_nistz256_point_add_affine:
orr x10,x6,x7
orr x24,x8,x10
cmp x24,#0
- csetm x24,ne // !in1infty
+ csetm x24,ne // ~in1infty
ldp x14,x15,[x2] // in2_x
ldp x16,x17,[x2,#16]
@@ -3456,7 +3447,7 @@ ecp_nistz256_point_add_affine:
orr x8,x8,x10
orr x25,x14,x8
cmp x25,#0
- csetm x25,ne // !in2infty
+ csetm x25,ne // ~in2infty
add x0,sp,#128
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@@ -3563,14 +3554,14 @@ ecp_nistz256_point_add_affine:
ldp x8,x9,[x23] // in2
ldp x10,x11,[x23,#16]
ldp x14,x15,[x22,#0] // in1
- cmp x24,#0 // !, remember?
+ cmp x24,#0 // ~, remember?
ldp x16,x17,[x22,#0+16]
csel x8,x4,x8,ne
csel x9,x5,x9,ne
ldp x4,x5,[sp,#0+0+32] // res
csel x10,x6,x10,ne
csel x11,x7,x11,ne
- cmp x25,#0 // !, remember?
+ cmp x25,#0 // ~, remember?
ldp x6,x7,[sp,#0+0+48]
csel x14,x8,x14,ne
csel x15,x9,x15,ne
@@ -3582,14 +3573,14 @@ ecp_nistz256_point_add_affine:
stp x16,x17,[x21,#0+16]
adr x23,.Lone_mont-64
ldp x14,x15,[x22,#32] // in1
- cmp x24,#0 // !, remember?
+ cmp x24,#0 // ~, remember?
ldp x16,x17,[x22,#32+16]
csel x8,x4,x8,ne
csel x9,x5,x9,ne
ldp x4,x5,[sp,#0+32+32] // res
csel x10,x6,x10,ne
csel x11,x7,x11,ne
- cmp x25,#0 // !, remember?
+ cmp x25,#0 // ~, remember?
ldp x6,x7,[sp,#0+32+48]
csel x14,x8,x14,ne
csel x15,x9,x15,ne
@@ -3600,13 +3591,13 @@ ecp_nistz256_point_add_affine:
stp x14,x15,[x21,#32]
stp x16,x17,[x21,#32+16]
ldp x14,x15,[x22,#64] // in1
- cmp x24,#0 // !, remember?
+ cmp x24,#0 // ~, remember?
ldp x16,x17,[x22,#64+16]
csel x8,x4,x8,ne
csel x9,x5,x9,ne
csel x10,x6,x10,ne
csel x11,x7,x11,ne
- cmp x25,#0 // !, remember?
+ cmp x25,#0 // ~, remember?
csel x14,x8,x14,ne
csel x15,x9,x15,ne
csel x16,x10,x16,ne
diff --git a/secure/lib/libcrypto/aarch64/sha256-armv8.S b/secure/lib/libcrypto/aarch64/sha256-armv8.S
index 40d1fb269b35..35bf48ba5178 100644
--- a/secure/lib/libcrypto/aarch64/sha256-armv8.S
+++ b/secure/lib/libcrypto/aarch64/sha256-armv8.S
@@ -1,6 +1,6 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from sha512-armv8.pl. */
-// Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
diff --git a/secure/lib/libcrypto/aarch64/sha512-armv8.S b/secure/lib/libcrypto/aarch64/sha512-armv8.S
index a2a2b030ef4c..06cf5a239d89 100644
--- a/secure/lib/libcrypto/aarch64/sha512-armv8.S
+++ b/secure/lib/libcrypto/aarch64/sha512-armv8.S
@@ -1,6 +1,6 @@
/* $FreeBSD$ */
/* Do not modify. This file is auto-generated from sha512-armv8.pl. */
-// Copyright 2014-2019 The OpenSSL Project Authors. All Rights Reserved.
+// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
//
// Licensed under the OpenSSL license (the "License"). You may not use
// this file except in compliance with the License. You can obtain a copy
diff --git a/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S b/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
index 723abb458f98..1cdcc86043b2 100644
--- a/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
+++ b/secure/lib/libcrypto/amd64/aesni-gcm-x86_64.S
@@ -2,786 +2,20 @@
/* Do not modify. This file is auto-generated from aesni-gcm-x86_64.pl. */
.text
-.type _aesni_ctr32_ghash_6x,@function
-.align 32
-_aesni_ctr32_ghash_6x:
- vmovdqu 32(%r11),%xmm2
- subq $6,%rdx
- vpxor %xmm4,%xmm4,%xmm4
- vmovdqu 0-128(%rcx),%xmm15
- vpaddb %xmm2,%xmm1,%xmm10
- vpaddb %xmm2,%xmm10,%xmm11
- vpaddb %xmm2,%xmm11,%xmm12
- vpaddb %xmm2,%xmm12,%xmm13
- vpaddb %xmm2,%xmm13,%xmm14
- vpxor %xmm15,%xmm1,%xmm9
- vmovdqu %xmm4,16+8(%rsp)
- jmp .Loop6x
-
-.align 32
-.Loop6x:
- addl $100663296,%ebx
- jc .Lhandle_ctr32
- vmovdqu 0-32(%r9),%xmm3
- vpaddb %xmm2,%xmm14,%xmm1
- vpxor %xmm15,%xmm10,%xmm10
- vpxor %xmm15,%xmm11,%xmm11
-
-.Lresume_ctr32:
- vmovdqu %xmm1,(%r8)
- vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
- vpxor %xmm15,%xmm12,%xmm12
- vmovups 16-128(%rcx),%xmm2
- vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
- xorq %r12,%r12
- cmpq %r14,%r15
-
- vaesenc %xmm2,%xmm9,%xmm9
- vmovdqu 48+8(%rsp),%xmm0
- vpxor %xmm15,%xmm13,%xmm13
- vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
- vaesenc %xmm2,%xmm10,%xmm10
- vpxor %xmm15,%xmm14,%xmm14
- setnc %r12b
- vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
- vaesenc %xmm2,%xmm11,%xmm11
- vmovdqu 16-32(%r9),%xmm3
- negq %r12
- vaesenc %xmm2,%xmm12,%xmm12
- vpxor %xmm5,%xmm6,%xmm6
- vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
- vpxor %xmm4,%xmm8,%xmm8
- vaesenc %xmm2,%xmm13,%xmm13
- vpxor %xmm5,%xmm1,%xmm4
- andq $0x60,%r12
- vmovups 32-128(%rcx),%xmm15
- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
- vaesenc %xmm2,%xmm14,%xmm14
-
- vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
- leaq (%r14,%r12,1),%r14
- vaesenc %xmm15,%xmm9,%xmm9
- vpxor 16+8(%rsp),%xmm8,%xmm8
- vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
- vmovdqu 64+8(%rsp),%xmm0
- vaesenc %xmm15,%xmm10,%xmm10
- movbeq 88(%r14),%r13
- vaesenc %xmm15,%xmm11,%xmm11
- movbeq 80(%r14),%r12
- vaesenc %xmm15,%xmm12,%xmm12
- movq %r13,32+8(%rsp)
- vaesenc %xmm15,%xmm13,%xmm13
- movq %r12,40+8(%rsp)
- vmovdqu 48-32(%r9),%xmm5
- vaesenc %xmm15,%xmm14,%xmm14
-
- vmovups 48-128(%rcx),%xmm15
- vpxor %xmm1,%xmm6,%xmm6
- vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
- vaesenc %xmm15,%xmm9,%xmm9
- vpxor %xmm2,%xmm6,%xmm6
- vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
- vaesenc %xmm15,%xmm10,%xmm10
- vpxor %xmm3,%xmm7,%xmm7
- vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
- vaesenc %xmm15,%xmm11,%xmm11
- vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
- vmovdqu 80+8(%rsp),%xmm0
- vaesenc %xmm15,%xmm12,%xmm12
- vaesenc %xmm15,%xmm13,%xmm13
- vpxor %xmm1,%xmm4,%xmm4
- vmovdqu 64-32(%r9),%xmm1
- vaesenc %xmm15,%xmm14,%xmm14
-
- vmovups 64-128(%rcx),%xmm15
- vpxor %xmm2,%xmm6,%xmm6
- vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
- vaesenc %xmm15,%xmm9,%xmm9
- vpxor %xmm3,%xmm6,%xmm6
- vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
- vaesenc %xmm15,%xmm10,%xmm10
- movbeq 72(%r14),%r13
- vpxor %xmm5,%xmm7,%xmm7
- vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
- vaesenc %xmm15,%xmm11,%xmm11
- movbeq 64(%r14),%r12
- vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
- vmovdqu 96+8(%rsp),%xmm0
- vaesenc %xmm15,%xmm12,%xmm12
- movq %r13,48+8(%rsp)
- vaesenc %xmm15,%xmm13,%xmm13
- movq %r12,56+8(%rsp)
- vpxor %xmm2,%xmm4,%xmm4
- vmovdqu 96-32(%r9),%xmm2
- vaesenc %xmm15,%xmm14,%xmm14
-
- vmovups 80-128(%rcx),%xmm15
- vpxor %xmm3,%xmm6,%xmm6
- vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
- vaesenc %xmm15,%xmm9,%xmm9
- vpxor %xmm5,%xmm6,%xmm6
- vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
- vaesenc %xmm15,%xmm10,%xmm10
- movbeq 56(%r14),%r13
- vpxor %xmm1,%xmm7,%xmm7
- vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
- vpxor 112+8(%rsp),%xmm8,%xmm8
- vaesenc %xmm15,%xmm11,%xmm11
- movbeq 48(%r14),%r12
- vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
- vaesenc %xmm15,%xmm12,%xmm12
- movq %r13,64+8(%rsp)
- vaesenc %xmm15,%xmm13,%xmm13
- movq %r12,72+8(%rsp)
- vpxor %xmm3,%xmm4,%xmm4
- vmovdqu 112-32(%r9),%xmm3
- vaesenc %xmm15,%xmm14,%xmm14
-
- vmovups 96-128(%rcx),%xmm15
- vpxor %xmm5,%xmm6,%xmm6
- vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
- vaesenc %xmm15,%xmm9,%xmm9
- vpxor %xmm1,%xmm6,%xmm6
- vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
- vaesenc %xmm15,%xmm10,%xmm10
- movbeq 40(%r14),%r13
- vpxor %xmm2,%xmm7,%xmm7
- vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
- vaesenc %xmm15,%xmm11,%xmm11
- movbeq 32(%r14),%r12
- vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
- vaesenc %xmm15,%xmm12,%xmm12
- movq %r13,80+8(%rsp)
- vaesenc %xmm15,%xmm13,%xmm13
- movq %r12,88+8(%rsp)
- vpxor %xmm5,%xmm6,%xmm6
- vaesenc %xmm15,%xmm14,%xmm14
- vpxor %xmm1,%xmm6,%xmm6
-
- vmovups 112-128(%rcx),%xmm15
- vpslldq $8,%xmm6,%xmm5
- vpxor %xmm2,%xmm4,%xmm4
- vmovdqu 16(%r11),%xmm3
-
- vaesenc %xmm15,%xmm9,%xmm9
- vpxor %xmm8,%xmm7,%xmm7
- vaesenc %xmm15,%xmm10,%xmm10
- vpxor %xmm5,%xmm4,%xmm4
- movbeq 24(%r14),%r13
- vaesenc %xmm15,%xmm11,%xmm11
- movbeq 16(%r14),%r12
- vpalignr $8,%xmm4,%xmm4,%xmm0
- vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
- movq %r13,96+8(%rsp)
- vaesenc %xmm15,%xmm12,%xmm12
- movq %r12,104+8(%rsp)
- vaesenc %xmm15,%xmm13,%xmm13
- vmovups 128-128(%rcx),%xmm1
- vaesenc %xmm15,%xmm14,%xmm14
-
- vaesenc %xmm1,%xmm9,%xmm9
- vmovups 144-128(%rcx),%xmm15
- vaesenc %xmm1,%xmm10,%xmm10
- vpsrldq $8,%xmm6,%xmm6
- vaesenc %xmm1,%xmm11,%xmm11
- vpxor %xmm6,%xmm7,%xmm7
- vaesenc %xmm1,%xmm12,%xmm12
- vpxor %xmm0,%xmm4,%xmm4
- movbeq 8(%r14),%r13
- vaesenc %xmm1,%xmm13,%xmm13
- movbeq 0(%r14),%r12
- vaesenc %xmm1,%xmm14,%xmm14
- vmovups 160-128(%rcx),%xmm1
- cmpl $11,%ebp
- jb .Lenc_tail
-
- vaesenc %xmm15,%xmm9,%xmm9
- vaesenc %xmm15,%xmm10,%xmm10
- vaesenc %xmm15,%xmm11,%xmm11
- vaesenc %xmm15,%xmm12,%xmm12
- vaesenc %xmm15,%xmm13,%xmm13
- vaesenc %xmm15,%xmm14,%xmm14
-
- vaesenc %xmm1,%xmm9,%xmm9
- vaesenc %xmm1,%xmm10,%xmm10
- vaesenc %xmm1,%xmm11,%xmm11
- vaesenc %xmm1,%xmm12,%xmm12
- vaesenc %xmm1,%xmm13,%xmm13
- vmovups 176-128(%rcx),%xmm15
- vaesenc %xmm1,%xmm14,%xmm14
- vmovups 192-128(%rcx),%xmm1
- je .Lenc_tail
-
- vaesenc %xmm15,%xmm9,%xmm9
- vaesenc %xmm15,%xmm10,%xmm10
- vaesenc %xmm15,%xmm11,%xmm11
- vaesenc %xmm15,%xmm12,%xmm12
- vaesenc %xmm15,%xmm13,%xmm13
- vaesenc %xmm15,%xmm14,%xmm14
-
- vaesenc %xmm1,%xmm9,%xmm9
- vaesenc %xmm1,%xmm10,%xmm10
- vaesenc %xmm1,%xmm11,%xmm11
- vaesenc %xmm1,%xmm12,%xmm12
- vaesenc %xmm1,%xmm13,%xmm13
- vmovups 208-128(%rcx),%xmm15
- vaesenc %xmm1,%xmm14,%xmm14
- vmovups 224-128(%rcx),%xmm1
- jmp .Lenc_tail
-
-.align 32
-.Lhandle_ctr32:
- vmovdqu (%r11),%xmm0
- vpshufb %xmm0,%xmm1,%xmm6
- vmovdqu 48(%r11),%xmm5
- vpaddd 64(%r11),%xmm6,%xmm10
- vpaddd %xmm5,%xmm6,%xmm11
- vmovdqu 0-32(%r9),%xmm3
- vpaddd %xmm5,%xmm10,%xmm12
- vpshufb %xmm0,%xmm10,%xmm10
- vpaddd %xmm5,%xmm11,%xmm13
- vpshufb %xmm0,%xmm11,%xmm11
- vpxor %xmm15,%xmm10,%xmm10
- vpaddd %xmm5,%xmm12,%xmm14
- vpshufb %xmm0,%xmm12,%xmm12
- vpxor %xmm15,%xmm11,%xmm11
- vpaddd %xmm5,%xmm13,%xmm1
- vpshufb %xmm0,%xmm13,%xmm13
- vpshufb %xmm0,%xmm14,%xmm14
- vpshufb %xmm0,%xmm1,%xmm1
- jmp .Lresume_ctr32
-
-.align 32
-.Lenc_tail:
- vaesenc %xmm15,%xmm9,%xmm9
- vmovdqu %xmm7,16+8(%rsp)
- vpalignr $8,%xmm4,%xmm4,%xmm8
- vaesenc %xmm15,%xmm10,%xmm10
- vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
- vpxor 0(%rdi),%xmm1,%xmm2
- vaesenc %xmm15,%xmm11,%xmm11
- vpxor 16(%rdi),%xmm1,%xmm0
- vaesenc %xmm15,%xmm12,%xmm12
- vpxor 32(%rdi),%xmm1,%xmm5
- vaesenc %xmm15,%xmm13,%xmm13
- vpxor 48(%rdi),%xmm1,%xmm6
- vaesenc %xmm15,%xmm14,%xmm14
- vpxor 64(%rdi),%xmm1,%xmm7
- vpxor 80(%rdi),%xmm1,%xmm3
- vmovdqu (%r8),%xmm1
-
- vaesenclast %xmm2,%xmm9,%xmm9
- vmovdqu 32(%r11),%xmm2
- vaesenclast %xmm0,%xmm10,%xmm10
- vpaddb %xmm2,%xmm1,%xmm0
- movq %r13,112+8(%rsp)
- leaq 96(%rdi),%rdi
- vaesenclast %xmm5,%xmm11,%xmm11
- vpaddb %xmm2,%xmm0,%xmm5
- movq %r12,120+8(%rsp)
- leaq 96(%rsi),%rsi
- vmovdqu 0-128(%rcx),%xmm15
- vaesenclast %xmm6,%xmm12,%xmm12
- vpaddb %xmm2,%xmm5,%xmm6
- vaesenclast %xmm7,%xmm13,%xmm13
- vpaddb %xmm2,%xmm6,%xmm7
- vaesenclast %xmm3,%xmm14,%xmm14
- vpaddb %xmm2,%xmm7,%xmm3
-
- addq $0x60,%r10
- subq $0x6,%rdx
- jc .L6x_done
-
- vmovups %xmm9,-96(%rsi)
- vpxor %xmm15,%xmm1,%xmm9
- vmovups %xmm10,-80(%rsi)
- vmovdqa %xmm0,%xmm10
- vmovups %xmm11,-64(%rsi)
- vmovdqa %xmm5,%xmm11
- vmovups %xmm12,-48(%rsi)
- vmovdqa %xmm6,%xmm12
- vmovups %xmm13,-32(%rsi)
- vmovdqa %xmm7,%xmm13
- vmovups %xmm14,-16(%rsi)
- vmovdqa %xmm3,%xmm14
- vmovdqu 32+8(%rsp),%xmm7
- jmp .Loop6x
-
-.L6x_done:
- vpxor 16+8(%rsp),%xmm8,%xmm8
- vpxor %xmm4,%xmm8,%xmm8
-
+.globl aesni_gcm_encrypt
+.type aesni_gcm_encrypt,@function
+aesni_gcm_encrypt:
+.cfi_startproc
+ xorl %eax,%eax
.byte 0xf3,0xc3
-.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
+.cfi_endproc
+.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
+
.globl aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
-.align 32
aesni_gcm_decrypt:
.cfi_startproc
- xorq %r10,%r10
- cmpq $0x60,%rdx
- jb .Lgcm_dec_abort
-
- leaq (%rsp),%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
- vzeroupper
-
- vmovdqu (%r8),%xmm1
- addq $-128,%rsp
- movl 12(%r8),%ebx
- leaq .Lbswap_mask(%rip),%r11
- leaq -128(%rcx),%r14
- movq $0xf80,%r15
- vmovdqu (%r9),%xmm8
- andq $-128,%rsp
- vmovdqu (%r11),%xmm0
- leaq 128(%rcx),%rcx
- leaq 32+32(%r9),%r9
- movl 240-128(%rcx),%ebp
- vpshufb %xmm0,%xmm8,%xmm8
-
- andq %r15,%r14
- andq %rsp,%r15
- subq %r14,%r15
- jc .Ldec_no_key_aliasing
- cmpq $768,%r15
- jnc .Ldec_no_key_aliasing
- subq %r15,%rsp
-.Ldec_no_key_aliasing:
-
- vmovdqu 80(%rdi),%xmm7
- leaq (%rdi),%r14
- vmovdqu 64(%rdi),%xmm4
- leaq -192(%rdi,%rdx,1),%r15
- vmovdqu 48(%rdi),%xmm5
- shrq $4,%rdx
- xorq %r10,%r10
- vmovdqu 32(%rdi),%xmm6
- vpshufb %xmm0,%xmm7,%xmm7
- vmovdqu 16(%rdi),%xmm2
- vpshufb %xmm0,%xmm4,%xmm4
- vmovdqu (%rdi),%xmm3
- vpshufb %xmm0,%xmm5,%xmm5
- vmovdqu %xmm4,48(%rsp)
- vpshufb %xmm0,%xmm6,%xmm6
- vmovdqu %xmm5,64(%rsp)
- vpshufb %xmm0,%xmm2,%xmm2
- vmovdqu %xmm6,80(%rsp)
- vpshufb %xmm0,%xmm3,%xmm3
- vmovdqu %xmm2,96(%rsp)
- vmovdqu %xmm3,112(%rsp)
-
- call _aesni_ctr32_ghash_6x
-
- vmovups %xmm9,-96(%rsi)
- vmovups %xmm10,-80(%rsi)
- vmovups %xmm11,-64(%rsi)
- vmovups %xmm12,-48(%rsi)
- vmovups %xmm13,-32(%rsi)
- vmovups %xmm14,-16(%rsi)
-
- vpshufb (%r11),%xmm8,%xmm8
- vmovdqu %xmm8,-64(%r9)
-
- vzeroupper
- movq -48(%rax),%r15
-.cfi_restore %r15
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbp
-.cfi_restore %rbp
- movq -8(%rax),%rbx
-.cfi_restore %rbx
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Lgcm_dec_abort:
- movq %r10,%rax
+ xorl %eax,%eax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
-.type _aesni_ctr32_6x,@function
-.align 32
-_aesni_ctr32_6x:
- vmovdqu 0-128(%rcx),%xmm4
- vmovdqu 32(%r11),%xmm2
- leaq -1(%rbp),%r13
- vmovups 16-128(%rcx),%xmm15
- leaq 32-128(%rcx),%r12
- vpxor %xmm4,%xmm1,%xmm9
- addl $100663296,%ebx
- jc .Lhandle_ctr32_2
- vpaddb %xmm2,%xmm1,%xmm10
- vpaddb %xmm2,%xmm10,%xmm11
- vpxor %xmm4,%xmm10,%xmm10
- vpaddb %xmm2,%xmm11,%xmm12
- vpxor %xmm4,%xmm11,%xmm11
- vpaddb %xmm2,%xmm12,%xmm13
- vpxor %xmm4,%xmm12,%xmm12
- vpaddb %xmm2,%xmm13,%xmm14
- vpxor %xmm4,%xmm13,%xmm13
- vpaddb %xmm2,%xmm14,%xmm1
- vpxor %xmm4,%xmm14,%xmm14
- jmp .Loop_ctr32
-
-.align 16
-.Loop_ctr32:
- vaesenc %xmm15,%xmm9,%xmm9
- vaesenc %xmm15,%xmm10,%xmm10
- vaesenc %xmm15,%xmm11,%xmm11
- vaesenc %xmm15,%xmm12,%xmm12
- vaesenc %xmm15,%xmm13,%xmm13
- vaesenc %xmm15,%xmm14,%xmm14
- vmovups (%r12),%xmm15
- leaq 16(%r12),%r12
- decl %r13d
- jnz .Loop_ctr32
-
- vmovdqu (%r12),%xmm3
- vaesenc %xmm15,%xmm9,%xmm9
- vpxor 0(%rdi),%xmm3,%xmm4
- vaesenc %xmm15,%xmm10,%xmm10
- vpxor 16(%rdi),%xmm3,%xmm5
- vaesenc %xmm15,%xmm11,%xmm11
- vpxor 32(%rdi),%xmm3,%xmm6
- vaesenc %xmm15,%xmm12,%xmm12
- vpxor 48(%rdi),%xmm3,%xmm8
- vaesenc %xmm15,%xmm13,%xmm13
- vpxor 64(%rdi),%xmm3,%xmm2
- vaesenc %xmm15,%xmm14,%xmm14
- vpxor 80(%rdi),%xmm3,%xmm3
- leaq 96(%rdi),%rdi
-
- vaesenclast %xmm4,%xmm9,%xmm9
- vaesenclast %xmm5,%xmm10,%xmm10
- vaesenclast %xmm6,%xmm11,%xmm11
- vaesenclast %xmm8,%xmm12,%xmm12
- vaesenclast %xmm2,%xmm13,%xmm13
- vaesenclast %xmm3,%xmm14,%xmm14
- vmovups %xmm9,0(%rsi)
- vmovups %xmm10,16(%rsi)
- vmovups %xmm11,32(%rsi)
- vmovups %xmm12,48(%rsi)
- vmovups %xmm13,64(%rsi)
- vmovups %xmm14,80(%rsi)
- leaq 96(%rsi),%rsi
-
- .byte 0xf3,0xc3
-.align 32
-.Lhandle_ctr32_2:
- vpshufb %xmm0,%xmm1,%xmm6
- vmovdqu 48(%r11),%xmm5
- vpaddd 64(%r11),%xmm6,%xmm10
- vpaddd %xmm5,%xmm6,%xmm11
- vpaddd %xmm5,%xmm10,%xmm12
- vpshufb %xmm0,%xmm10,%xmm10
- vpaddd %xmm5,%xmm11,%xmm13
- vpshufb %xmm0,%xmm11,%xmm11
- vpxor %xmm4,%xmm10,%xmm10
- vpaddd %xmm5,%xmm12,%xmm14
- vpshufb %xmm0,%xmm12,%xmm12
- vpxor %xmm4,%xmm11,%xmm11
- vpaddd %xmm5,%xmm13,%xmm1
- vpshufb %xmm0,%xmm13,%xmm13
- vpxor %xmm4,%xmm12,%xmm12
- vpshufb %xmm0,%xmm14,%xmm14
- vpxor %xmm4,%xmm13,%xmm13
- vpshufb %xmm0,%xmm1,%xmm1
- vpxor %xmm4,%xmm14,%xmm14
- jmp .Loop_ctr32
-.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
-
-.globl aesni_gcm_encrypt
-.type aesni_gcm_encrypt,@function
-.align 32
-aesni_gcm_encrypt:
-.cfi_startproc
- xorq %r10,%r10
- cmpq $288,%rdx
- jb .Lgcm_enc_abort
-
- leaq (%rsp),%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
- vzeroupper
-
- vmovdqu (%r8),%xmm1
- addq $-128,%rsp
- movl 12(%r8),%ebx
- leaq .Lbswap_mask(%rip),%r11
- leaq -128(%rcx),%r14
- movq $0xf80,%r15
- leaq 128(%rcx),%rcx
- vmovdqu (%r11),%xmm0
- andq $-128,%rsp
- movl 240-128(%rcx),%ebp
-
- andq %r15,%r14
- andq %rsp,%r15
- subq %r14,%r15
- jc .Lenc_no_key_aliasing
- cmpq $768,%r15
- jnc .Lenc_no_key_aliasing
- subq %r15,%rsp
-.Lenc_no_key_aliasing:
-
- leaq (%rsi),%r14
- leaq -192(%rsi,%rdx,1),%r15
- shrq $4,%rdx
-
- call _aesni_ctr32_6x
- vpshufb %xmm0,%xmm9,%xmm8
- vpshufb %xmm0,%xmm10,%xmm2
- vmovdqu %xmm8,112(%rsp)
- vpshufb %xmm0,%xmm11,%xmm4
- vmovdqu %xmm2,96(%rsp)
- vpshufb %xmm0,%xmm12,%xmm5
- vmovdqu %xmm4,80(%rsp)
- vpshufb %xmm0,%xmm13,%xmm6
- vmovdqu %xmm5,64(%rsp)
- vpshufb %xmm0,%xmm14,%xmm7
- vmovdqu %xmm6,48(%rsp)
-
- call _aesni_ctr32_6x
-
- vmovdqu (%r9),%xmm8
- leaq 32+32(%r9),%r9
- subq $12,%rdx
- movq $192,%r10
- vpshufb %xmm0,%xmm8,%xmm8
-
- call _aesni_ctr32_ghash_6x
- vmovdqu 32(%rsp),%xmm7
- vmovdqu (%r11),%xmm0
- vmovdqu 0-32(%r9),%xmm3
- vpunpckhqdq %xmm7,%xmm7,%xmm1
- vmovdqu 32-32(%r9),%xmm15
- vmovups %xmm9,-96(%rsi)
- vpshufb %xmm0,%xmm9,%xmm9
- vpxor %xmm7,%xmm1,%xmm1
- vmovups %xmm10,-80(%rsi)
- vpshufb %xmm0,%xmm10,%xmm10
- vmovups %xmm11,-64(%rsi)
- vpshufb %xmm0,%xmm11,%xmm11
- vmovups %xmm12,-48(%rsi)
- vpshufb %xmm0,%xmm12,%xmm12
- vmovups %xmm13,-32(%rsi)
- vpshufb %xmm0,%xmm13,%xmm13
- vmovups %xmm14,-16(%rsi)
- vpshufb %xmm0,%xmm14,%xmm14
- vmovdqu %xmm9,16(%rsp)
- vmovdqu 48(%rsp),%xmm6
- vmovdqu 16-32(%r9),%xmm0
- vpunpckhqdq %xmm6,%xmm6,%xmm2
- vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
- vpxor %xmm6,%xmm2,%xmm2
- vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
- vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
-
- vmovdqu 64(%rsp),%xmm9
- vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
- vmovdqu 48-32(%r9),%xmm3
- vpxor %xmm5,%xmm4,%xmm4
- vpunpckhqdq %xmm9,%xmm9,%xmm5
- vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
- vpxor %xmm9,%xmm5,%xmm5
- vpxor %xmm7,%xmm6,%xmm6
- vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
- vmovdqu 80-32(%r9),%xmm15
- vpxor %xmm1,%xmm2,%xmm2
-
- vmovdqu 80(%rsp),%xmm1
- vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
- vmovdqu 64-32(%r9),%xmm0
- vpxor %xmm4,%xmm7,%xmm7
- vpunpckhqdq %xmm1,%xmm1,%xmm4
- vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
- vpxor %xmm1,%xmm4,%xmm4
- vpxor %xmm6,%xmm9,%xmm9
- vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
- vpxor %xmm2,%xmm5,%xmm5
-
- vmovdqu 96(%rsp),%xmm2
- vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
- vmovdqu 96-32(%r9),%xmm3
- vpxor %xmm7,%xmm6,%xmm6
- vpunpckhqdq %xmm2,%xmm2,%xmm7
- vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
- vpxor %xmm2,%xmm7,%xmm7
- vpxor %xmm9,%xmm1,%xmm1
- vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
- vmovdqu 128-32(%r9),%xmm15
- vpxor %xmm5,%xmm4,%xmm4
-
- vpxor 112(%rsp),%xmm8,%xmm8
- vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
- vmovdqu 112-32(%r9),%xmm0
- vpunpckhqdq %xmm8,%xmm8,%xmm9
- vpxor %xmm6,%xmm5,%xmm5
- vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
- vpxor %xmm8,%xmm9,%xmm9
- vpxor %xmm1,%xmm2,%xmm2
- vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
- vpxor %xmm4,%xmm7,%xmm4
-
- vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
- vmovdqu 0-32(%r9),%xmm3
- vpunpckhqdq %xmm14,%xmm14,%xmm1
- vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
- vpxor %xmm14,%xmm1,%xmm1
- vpxor %xmm5,%xmm6,%xmm5
- vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
- vmovdqu 32-32(%r9),%xmm15
- vpxor %xmm2,%xmm8,%xmm7
- vpxor %xmm4,%xmm9,%xmm6
-
- vmovdqu 16-32(%r9),%xmm0
- vpxor %xmm5,%xmm7,%xmm9
- vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
- vpxor %xmm9,%xmm6,%xmm6
- vpunpckhqdq %xmm13,%xmm13,%xmm2
- vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
- vpxor %xmm13,%xmm2,%xmm2
- vpslldq $8,%xmm6,%xmm9
- vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
- vpxor %xmm9,%xmm5,%xmm8
- vpsrldq $8,%xmm6,%xmm6
- vpxor %xmm6,%xmm7,%xmm7
-
- vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
- vmovdqu 48-32(%r9),%xmm3
- vpxor %xmm4,%xmm5,%xmm5
- vpunpckhqdq %xmm12,%xmm12,%xmm9
- vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
- vpxor %xmm12,%xmm9,%xmm9
- vpxor %xmm14,%xmm13,%xmm13
- vpalignr $8,%xmm8,%xmm8,%xmm14
- vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
- vmovdqu 80-32(%r9),%xmm15
- vpxor %xmm1,%xmm2,%xmm2
-
- vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
- vmovdqu 64-32(%r9),%xmm0
- vpxor %xmm5,%xmm4,%xmm4
- vpunpckhqdq %xmm11,%xmm11,%xmm1
- vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
- vpxor %xmm11,%xmm1,%xmm1
- vpxor %xmm13,%xmm12,%xmm12
- vxorps 16(%rsp),%xmm7,%xmm7
- vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
- vpxor %xmm2,%xmm9,%xmm9
-
- vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
- vxorps %xmm14,%xmm8,%xmm8
-
- vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
- vmovdqu 96-32(%r9),%xmm3
- vpxor %xmm4,%xmm5,%xmm5
- vpunpckhqdq %xmm10,%xmm10,%xmm2
- vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
- vpxor %xmm10,%xmm2,%xmm2
- vpalignr $8,%xmm8,%xmm8,%xmm14
- vpxor %xmm12,%xmm11,%xmm11
- vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
- vmovdqu 128-32(%r9),%xmm15
- vpxor %xmm9,%xmm1,%xmm1
-
- vxorps %xmm7,%xmm14,%xmm14
- vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
- vxorps %xmm14,%xmm8,%xmm8
-
- vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
- vmovdqu 112-32(%r9),%xmm0
- vpxor %xmm5,%xmm4,%xmm4
- vpunpckhqdq %xmm8,%xmm8,%xmm9
- vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
- vpxor %xmm8,%xmm9,%xmm9
- vpxor %xmm11,%xmm10,%xmm10
- vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
- vpxor %xmm1,%xmm2,%xmm2
-
- vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
- vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
- vpxor %xmm4,%xmm5,%xmm5
- vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
- vpxor %xmm10,%xmm7,%xmm7
- vpxor %xmm2,%xmm6,%xmm6
-
- vpxor %xmm5,%xmm7,%xmm4
- vpxor %xmm4,%xmm6,%xmm6
- vpslldq $8,%xmm6,%xmm1
- vmovdqu 16(%r11),%xmm3
- vpsrldq $8,%xmm6,%xmm6
- vpxor %xmm1,%xmm5,%xmm8
- vpxor %xmm6,%xmm7,%xmm7
-
- vpalignr $8,%xmm8,%xmm8,%xmm2
- vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
- vpxor %xmm2,%xmm8,%xmm8
-
- vpalignr $8,%xmm8,%xmm8,%xmm2
- vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
- vpxor %xmm7,%xmm2,%xmm2
- vpxor %xmm2,%xmm8,%xmm8
- vpshufb (%r11),%xmm8,%xmm8
- vmovdqu %xmm8,-64(%r9)
-
- vzeroupper
- movq -48(%rax),%r15
-.cfi_restore %r15
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbp
-.cfi_restore %rbp
- movq -8(%rax),%rbx
-.cfi_restore %rbx
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Lgcm_enc_abort:
- movq %r10,%rax
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
-.align 64
-.Lbswap_mask:
-.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
-.Lpoly:
-.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
-.Lone_msb:
-.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
-.Ltwo_lsb:
-.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.Lone_lsb:
-.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 64
diff --git a/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S b/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S
index 706c5c59d38d..de4bac9488f7 100644
--- a/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S
+++ b/secure/lib/libcrypto/amd64/aesni-mb-x86_64.S
@@ -9,14 +9,6 @@
.align 32
aesni_multi_cbc_encrypt:
.cfi_startproc
- cmpl $2,%edx
- jb .Lenc_non_avx
- movl OPENSSL_ia32cap_P+4(%rip),%ecx
- testl $268435456,%ecx
- jnz _avx_cbc_enc_shortcut
- jmp .Lenc_non_avx
-.align 16
-.Lenc_non_avx:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
@@ -291,14 +283,6 @@ aesni_multi_cbc_encrypt:
.align 32
aesni_multi_cbc_decrypt:
.cfi_startproc
- cmpl $2,%edx
- jb .Ldec_non_avx
- movl OPENSSL_ia32cap_P+4(%rip),%ecx
- testl $268435456,%ecx
- jnz _avx_cbc_dec_shortcut
- jmp .Ldec_non_avx
-.align 16
-.Ldec_non_avx:
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
@@ -558,952 +542,3 @@ aesni_multi_cbc_decrypt:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
-.type aesni_multi_cbc_encrypt_avx,@function
-.align 32
-aesni_multi_cbc_encrypt_avx:
-.cfi_startproc
-_avx_cbc_enc_shortcut:
- movq %rsp,%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
-
-
-
-
-
-
-
-
- subq $192,%rsp
- andq $-128,%rsp
- movq %rax,16(%rsp)
-.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
-
-.Lenc8x_body:
- vzeroupper
- vmovdqu (%rsi),%xmm15
- leaq 120(%rsi),%rsi
- leaq 160(%rdi),%rdi
- shrl $1,%edx
-
-.Lenc8x_loop_grande:
-
- xorl %edx,%edx
- movl -144(%rdi),%ecx
- movq -160(%rdi),%r8
- cmpl %edx,%ecx
- movq -152(%rdi),%rbx
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu -136(%rdi),%xmm2
- movl %ecx,32(%rsp)
- cmovleq %rsp,%r8
- subq %r8,%rbx
- movq %rbx,64(%rsp)
- movl -104(%rdi),%ecx
- movq -120(%rdi),%r9
- cmpl %edx,%ecx
- movq -112(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu -96(%rdi),%xmm3
- movl %ecx,36(%rsp)
- cmovleq %rsp,%r9
- subq %r9,%rbp
- movq %rbp,72(%rsp)
- movl -64(%rdi),%ecx
- movq -80(%rdi),%r10
- cmpl %edx,%ecx
- movq -72(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu -56(%rdi),%xmm4
- movl %ecx,40(%rsp)
- cmovleq %rsp,%r10
- subq %r10,%rbp
- movq %rbp,80(%rsp)
- movl -24(%rdi),%ecx
- movq -40(%rdi),%r11
- cmpl %edx,%ecx
- movq -32(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu -16(%rdi),%xmm5
- movl %ecx,44(%rsp)
- cmovleq %rsp,%r11
- subq %r11,%rbp
- movq %rbp,88(%rsp)
- movl 16(%rdi),%ecx
- movq 0(%rdi),%r12
- cmpl %edx,%ecx
- movq 8(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu 24(%rdi),%xmm6
- movl %ecx,48(%rsp)
- cmovleq %rsp,%r12
- subq %r12,%rbp
- movq %rbp,96(%rsp)
- movl 56(%rdi),%ecx
- movq 40(%rdi),%r13
- cmpl %edx,%ecx
- movq 48(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu 64(%rdi),%xmm7
- movl %ecx,52(%rsp)
- cmovleq %rsp,%r13
- subq %r13,%rbp
- movq %rbp,104(%rsp)
- movl 96(%rdi),%ecx
- movq 80(%rdi),%r14
- cmpl %edx,%ecx
- movq 88(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu 104(%rdi),%xmm8
- movl %ecx,56(%rsp)
- cmovleq %rsp,%r14
- subq %r14,%rbp
- movq %rbp,112(%rsp)
- movl 136(%rdi),%ecx
- movq 120(%rdi),%r15
- cmpl %edx,%ecx
- movq 128(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu 144(%rdi),%xmm9
- movl %ecx,60(%rsp)
- cmovleq %rsp,%r15
- subq %r15,%rbp
- movq %rbp,120(%rsp)
- testl %edx,%edx
- jz .Lenc8x_done
-
- vmovups 16-120(%rsi),%xmm1
- vmovups 32-120(%rsi),%xmm0
- movl 240-120(%rsi),%eax
-
- vpxor (%r8),%xmm15,%xmm10
- leaq 128(%rsp),%rbp
- vpxor (%r9),%xmm15,%xmm11
- vpxor (%r10),%xmm15,%xmm12
- vpxor (%r11),%xmm15,%xmm13
- vpxor %xmm10,%xmm2,%xmm2
- vpxor (%r12),%xmm15,%xmm10
- vpxor %xmm11,%xmm3,%xmm3
- vpxor (%r13),%xmm15,%xmm11
- vpxor %xmm12,%xmm4,%xmm4
- vpxor (%r14),%xmm15,%xmm12
- vpxor %xmm13,%xmm5,%xmm5
- vpxor (%r15),%xmm15,%xmm13
- vpxor %xmm10,%xmm6,%xmm6
- movl $1,%ecx
- vpxor %xmm11,%xmm7,%xmm7
- vpxor %xmm12,%xmm8,%xmm8
- vpxor %xmm13,%xmm9,%xmm9
- jmp .Loop_enc8x
-
-.align 32
-.Loop_enc8x:
- vaesenc %xmm1,%xmm2,%xmm2
- cmpl 32+0(%rsp),%ecx
- vaesenc %xmm1,%xmm3,%xmm3
- prefetcht0 31(%r8)
- vaesenc %xmm1,%xmm4,%xmm4
- vaesenc %xmm1,%xmm5,%xmm5
- leaq (%r8,%rbx,1),%rbx
- cmovgeq %rsp,%r8
- vaesenc %xmm1,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesenc %xmm1,%xmm7,%xmm7
- subq %r8,%rbx
- vaesenc %xmm1,%xmm8,%xmm8
- vpxor 16(%r8),%xmm15,%xmm10
- movq %rbx,64+0(%rsp)
- vaesenc %xmm1,%xmm9,%xmm9
- vmovups -72(%rsi),%xmm1
- leaq 16(%r8,%rbx,1),%r8
- vmovdqu %xmm10,0(%rbp)
- vaesenc %xmm0,%xmm2,%xmm2
- cmpl 32+4(%rsp),%ecx
- movq 64+8(%rsp),%rbx
- vaesenc %xmm0,%xmm3,%xmm3
- prefetcht0 31(%r9)
- vaesenc %xmm0,%xmm4,%xmm4
- vaesenc %xmm0,%xmm5,%xmm5
- leaq (%r9,%rbx,1),%rbx
- cmovgeq %rsp,%r9
- vaesenc %xmm0,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesenc %xmm0,%xmm7,%xmm7
- subq %r9,%rbx
- vaesenc %xmm0,%xmm8,%xmm8
- vpxor 16(%r9),%xmm15,%xmm11
- movq %rbx,64+8(%rsp)
- vaesenc %xmm0,%xmm9,%xmm9
- vmovups -56(%rsi),%xmm0
- leaq 16(%r9,%rbx,1),%r9
- vmovdqu %xmm11,16(%rbp)
- vaesenc %xmm1,%xmm2,%xmm2
- cmpl 32+8(%rsp),%ecx
- movq 64+16(%rsp),%rbx
- vaesenc %xmm1,%xmm3,%xmm3
- prefetcht0 31(%r10)
- vaesenc %xmm1,%xmm4,%xmm4
- prefetcht0 15(%r8)
- vaesenc %xmm1,%xmm5,%xmm5
- leaq (%r10,%rbx,1),%rbx
- cmovgeq %rsp,%r10
- vaesenc %xmm1,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesenc %xmm1,%xmm7,%xmm7
- subq %r10,%rbx
- vaesenc %xmm1,%xmm8,%xmm8
- vpxor 16(%r10),%xmm15,%xmm12
- movq %rbx,64+16(%rsp)
- vaesenc %xmm1,%xmm9,%xmm9
- vmovups -40(%rsi),%xmm1
- leaq 16(%r10,%rbx,1),%r10
- vmovdqu %xmm12,32(%rbp)
- vaesenc %xmm0,%xmm2,%xmm2
- cmpl 32+12(%rsp),%ecx
- movq 64+24(%rsp),%rbx
- vaesenc %xmm0,%xmm3,%xmm3
- prefetcht0 31(%r11)
- vaesenc %xmm0,%xmm4,%xmm4
- prefetcht0 15(%r9)
- vaesenc %xmm0,%xmm5,%xmm5
- leaq (%r11,%rbx,1),%rbx
- cmovgeq %rsp,%r11
- vaesenc %xmm0,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesenc %xmm0,%xmm7,%xmm7
- subq %r11,%rbx
- vaesenc %xmm0,%xmm8,%xmm8
- vpxor 16(%r11),%xmm15,%xmm13
- movq %rbx,64+24(%rsp)
- vaesenc %xmm0,%xmm9,%xmm9
- vmovups -24(%rsi),%xmm0
- leaq 16(%r11,%rbx,1),%r11
- vmovdqu %xmm13,48(%rbp)
- vaesenc %xmm1,%xmm2,%xmm2
- cmpl 32+16(%rsp),%ecx
- movq 64+32(%rsp),%rbx
- vaesenc %xmm1,%xmm3,%xmm3
- prefetcht0 31(%r12)
- vaesenc %xmm1,%xmm4,%xmm4
- prefetcht0 15(%r10)
- vaesenc %xmm1,%xmm5,%xmm5
- leaq (%r12,%rbx,1),%rbx
- cmovgeq %rsp,%r12
- vaesenc %xmm1,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesenc %xmm1,%xmm7,%xmm7
- subq %r12,%rbx
- vaesenc %xmm1,%xmm8,%xmm8
- vpxor 16(%r12),%xmm15,%xmm10
- movq %rbx,64+32(%rsp)
- vaesenc %xmm1,%xmm9,%xmm9
- vmovups -8(%rsi),%xmm1
- leaq 16(%r12,%rbx,1),%r12
- vaesenc %xmm0,%xmm2,%xmm2
- cmpl 32+20(%rsp),%ecx
- movq 64+40(%rsp),%rbx
- vaesenc %xmm0,%xmm3,%xmm3
- prefetcht0 31(%r13)
- vaesenc %xmm0,%xmm4,%xmm4
- prefetcht0 15(%r11)
- vaesenc %xmm0,%xmm5,%xmm5
- leaq (%rbx,%r13,1),%rbx
- cmovgeq %rsp,%r13
- vaesenc %xmm0,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesenc %xmm0,%xmm7,%xmm7
- subq %r13,%rbx
- vaesenc %xmm0,%xmm8,%xmm8
- vpxor 16(%r13),%xmm15,%xmm11
- movq %rbx,64+40(%rsp)
- vaesenc %xmm0,%xmm9,%xmm9
- vmovups 8(%rsi),%xmm0
- leaq 16(%r13,%rbx,1),%r13
- vaesenc %xmm1,%xmm2,%xmm2
- cmpl 32+24(%rsp),%ecx
- movq 64+48(%rsp),%rbx
- vaesenc %xmm1,%xmm3,%xmm3
- prefetcht0 31(%r14)
- vaesenc %xmm1,%xmm4,%xmm4
- prefetcht0 15(%r12)
- vaesenc %xmm1,%xmm5,%xmm5
- leaq (%r14,%rbx,1),%rbx
- cmovgeq %rsp,%r14
- vaesenc %xmm1,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesenc %xmm1,%xmm7,%xmm7
- subq %r14,%rbx
- vaesenc %xmm1,%xmm8,%xmm8
- vpxor 16(%r14),%xmm15,%xmm12
- movq %rbx,64+48(%rsp)
- vaesenc %xmm1,%xmm9,%xmm9
- vmovups 24(%rsi),%xmm1
- leaq 16(%r14,%rbx,1),%r14
- vaesenc %xmm0,%xmm2,%xmm2
- cmpl 32+28(%rsp),%ecx
- movq 64+56(%rsp),%rbx
- vaesenc %xmm0,%xmm3,%xmm3
- prefetcht0 31(%r15)
- vaesenc %xmm0,%xmm4,%xmm4
- prefetcht0 15(%r13)
- vaesenc %xmm0,%xmm5,%xmm5
- leaq (%r15,%rbx,1),%rbx
- cmovgeq %rsp,%r15
- vaesenc %xmm0,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesenc %xmm0,%xmm7,%xmm7
- subq %r15,%rbx
- vaesenc %xmm0,%xmm8,%xmm8
- vpxor 16(%r15),%xmm15,%xmm13
- movq %rbx,64+56(%rsp)
- vaesenc %xmm0,%xmm9,%xmm9
- vmovups 40(%rsi),%xmm0
- leaq 16(%r15,%rbx,1),%r15
- vmovdqu 32(%rsp),%xmm14
- prefetcht0 15(%r14)
- prefetcht0 15(%r15)
- cmpl $11,%eax
- jb .Lenc8x_tail
-
- vaesenc %xmm1,%xmm2,%xmm2
- vaesenc %xmm1,%xmm3,%xmm3
- vaesenc %xmm1,%xmm4,%xmm4
- vaesenc %xmm1,%xmm5,%xmm5
- vaesenc %xmm1,%xmm6,%xmm6
- vaesenc %xmm1,%xmm7,%xmm7
- vaesenc %xmm1,%xmm8,%xmm8
- vaesenc %xmm1,%xmm9,%xmm9
- vmovups 176-120(%rsi),%xmm1
-
- vaesenc %xmm0,%xmm2,%xmm2
- vaesenc %xmm0,%xmm3,%xmm3
- vaesenc %xmm0,%xmm4,%xmm4
- vaesenc %xmm0,%xmm5,%xmm5
- vaesenc %xmm0,%xmm6,%xmm6
- vaesenc %xmm0,%xmm7,%xmm7
- vaesenc %xmm0,%xmm8,%xmm8
- vaesenc %xmm0,%xmm9,%xmm9
- vmovups 192-120(%rsi),%xmm0
- je .Lenc8x_tail
-
- vaesenc %xmm1,%xmm2,%xmm2
- vaesenc %xmm1,%xmm3,%xmm3
- vaesenc %xmm1,%xmm4,%xmm4
- vaesenc %xmm1,%xmm5,%xmm5
- vaesenc %xmm1,%xmm6,%xmm6
- vaesenc %xmm1,%xmm7,%xmm7
- vaesenc %xmm1,%xmm8,%xmm8
- vaesenc %xmm1,%xmm9,%xmm9
- vmovups 208-120(%rsi),%xmm1
-
- vaesenc %xmm0,%xmm2,%xmm2
- vaesenc %xmm0,%xmm3,%xmm3
- vaesenc %xmm0,%xmm4,%xmm4
- vaesenc %xmm0,%xmm5,%xmm5
- vaesenc %xmm0,%xmm6,%xmm6
- vaesenc %xmm0,%xmm7,%xmm7
- vaesenc %xmm0,%xmm8,%xmm8
- vaesenc %xmm0,%xmm9,%xmm9
- vmovups 224-120(%rsi),%xmm0
-
-.Lenc8x_tail:
- vaesenc %xmm1,%xmm2,%xmm2
- vpxor %xmm15,%xmm15,%xmm15
- vaesenc %xmm1,%xmm3,%xmm3
- vaesenc %xmm1,%xmm4,%xmm4
- vpcmpgtd %xmm15,%xmm14,%xmm15
- vaesenc %xmm1,%xmm5,%xmm5
- vaesenc %xmm1,%xmm6,%xmm6
- vpaddd %xmm14,%xmm15,%xmm15
- vmovdqu 48(%rsp),%xmm14
- vaesenc %xmm1,%xmm7,%xmm7
- movq 64(%rsp),%rbx
- vaesenc %xmm1,%xmm8,%xmm8
- vaesenc %xmm1,%xmm9,%xmm9
- vmovups 16-120(%rsi),%xmm1
-
- vaesenclast %xmm0,%xmm2,%xmm2
- vmovdqa %xmm15,32(%rsp)
- vpxor %xmm15,%xmm15,%xmm15
- vaesenclast %xmm0,%xmm3,%xmm3
- vaesenclast %xmm0,%xmm4,%xmm4
- vpcmpgtd %xmm15,%xmm14,%xmm15
- vaesenclast %xmm0,%xmm5,%xmm5
- vaesenclast %xmm0,%xmm6,%xmm6
- vpaddd %xmm15,%xmm14,%xmm14
- vmovdqu -120(%rsi),%xmm15
- vaesenclast %xmm0,%xmm7,%xmm7
- vaesenclast %xmm0,%xmm8,%xmm8
- vmovdqa %xmm14,48(%rsp)
- vaesenclast %xmm0,%xmm9,%xmm9
- vmovups 32-120(%rsi),%xmm0
-
- vmovups %xmm2,-16(%r8)
- subq %rbx,%r8
- vpxor 0(%rbp),%xmm2,%xmm2
- vmovups %xmm3,-16(%r9)
- subq 72(%rsp),%r9
- vpxor 16(%rbp),%xmm3,%xmm3
- vmovups %xmm4,-16(%r10)
- subq 80(%rsp),%r10
- vpxor 32(%rbp),%xmm4,%xmm4
- vmovups %xmm5,-16(%r11)
- subq 88(%rsp),%r11
- vpxor 48(%rbp),%xmm5,%xmm5
- vmovups %xmm6,-16(%r12)
- subq 96(%rsp),%r12
- vpxor %xmm10,%xmm6,%xmm6
- vmovups %xmm7,-16(%r13)
- subq 104(%rsp),%r13
- vpxor %xmm11,%xmm7,%xmm7
- vmovups %xmm8,-16(%r14)
- subq 112(%rsp),%r14
- vpxor %xmm12,%xmm8,%xmm8
- vmovups %xmm9,-16(%r15)
- subq 120(%rsp),%r15
- vpxor %xmm13,%xmm9,%xmm9
-
- decl %edx
- jnz .Loop_enc8x
-
- movq 16(%rsp),%rax
-.cfi_def_cfa %rax,8
-
-
-
-
-
-.Lenc8x_done:
- vzeroupper
- movq -48(%rax),%r15
-.cfi_restore %r15
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbp
-.cfi_restore %rbp
- movq -8(%rax),%rbx
-.cfi_restore %rbx
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Lenc8x_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
-
-.type aesni_multi_cbc_decrypt_avx,@function
-.align 32
-aesni_multi_cbc_decrypt_avx:
-.cfi_startproc
-_avx_cbc_dec_shortcut:
- movq %rsp,%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
-
-
-
-
-
-
-
-
-
- subq $256,%rsp
- andq $-256,%rsp
- subq $192,%rsp
- movq %rax,16(%rsp)
-.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
-
-.Ldec8x_body:
- vzeroupper
- vmovdqu (%rsi),%xmm15
- leaq 120(%rsi),%rsi
- leaq 160(%rdi),%rdi
- shrl $1,%edx
-
-.Ldec8x_loop_grande:
-
- xorl %edx,%edx
- movl -144(%rdi),%ecx
- movq -160(%rdi),%r8
- cmpl %edx,%ecx
- movq -152(%rdi),%rbx
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu -136(%rdi),%xmm2
- movl %ecx,32(%rsp)
- cmovleq %rsp,%r8
- subq %r8,%rbx
- movq %rbx,64(%rsp)
- vmovdqu %xmm2,192(%rsp)
- movl -104(%rdi),%ecx
- movq -120(%rdi),%r9
- cmpl %edx,%ecx
- movq -112(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu -96(%rdi),%xmm3
- movl %ecx,36(%rsp)
- cmovleq %rsp,%r9
- subq %r9,%rbp
- movq %rbp,72(%rsp)
- vmovdqu %xmm3,208(%rsp)
- movl -64(%rdi),%ecx
- movq -80(%rdi),%r10
- cmpl %edx,%ecx
- movq -72(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu -56(%rdi),%xmm4
- movl %ecx,40(%rsp)
- cmovleq %rsp,%r10
- subq %r10,%rbp
- movq %rbp,80(%rsp)
- vmovdqu %xmm4,224(%rsp)
- movl -24(%rdi),%ecx
- movq -40(%rdi),%r11
- cmpl %edx,%ecx
- movq -32(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu -16(%rdi),%xmm5
- movl %ecx,44(%rsp)
- cmovleq %rsp,%r11
- subq %r11,%rbp
- movq %rbp,88(%rsp)
- vmovdqu %xmm5,240(%rsp)
- movl 16(%rdi),%ecx
- movq 0(%rdi),%r12
- cmpl %edx,%ecx
- movq 8(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu 24(%rdi),%xmm6
- movl %ecx,48(%rsp)
- cmovleq %rsp,%r12
- subq %r12,%rbp
- movq %rbp,96(%rsp)
- vmovdqu %xmm6,256(%rsp)
- movl 56(%rdi),%ecx
- movq 40(%rdi),%r13
- cmpl %edx,%ecx
- movq 48(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu 64(%rdi),%xmm7
- movl %ecx,52(%rsp)
- cmovleq %rsp,%r13
- subq %r13,%rbp
- movq %rbp,104(%rsp)
- vmovdqu %xmm7,272(%rsp)
- movl 96(%rdi),%ecx
- movq 80(%rdi),%r14
- cmpl %edx,%ecx
- movq 88(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu 104(%rdi),%xmm8
- movl %ecx,56(%rsp)
- cmovleq %rsp,%r14
- subq %r14,%rbp
- movq %rbp,112(%rsp)
- vmovdqu %xmm8,288(%rsp)
- movl 136(%rdi),%ecx
- movq 120(%rdi),%r15
- cmpl %edx,%ecx
- movq 128(%rdi),%rbp
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- vmovdqu 144(%rdi),%xmm9
- movl %ecx,60(%rsp)
- cmovleq %rsp,%r15
- subq %r15,%rbp
- movq %rbp,120(%rsp)
- vmovdqu %xmm9,304(%rsp)
- testl %edx,%edx
- jz .Ldec8x_done
-
- vmovups 16-120(%rsi),%xmm1
- vmovups 32-120(%rsi),%xmm0
- movl 240-120(%rsi),%eax
- leaq 192+128(%rsp),%rbp
-
- vmovdqu (%r8),%xmm2
- vmovdqu (%r9),%xmm3
- vmovdqu (%r10),%xmm4
- vmovdqu (%r11),%xmm5
- vmovdqu (%r12),%xmm6
- vmovdqu (%r13),%xmm7
- vmovdqu (%r14),%xmm8
- vmovdqu (%r15),%xmm9
- vmovdqu %xmm2,0(%rbp)
- vpxor %xmm15,%xmm2,%xmm2
- vmovdqu %xmm3,16(%rbp)
- vpxor %xmm15,%xmm3,%xmm3
- vmovdqu %xmm4,32(%rbp)
- vpxor %xmm15,%xmm4,%xmm4
- vmovdqu %xmm5,48(%rbp)
- vpxor %xmm15,%xmm5,%xmm5
- vmovdqu %xmm6,64(%rbp)
- vpxor %xmm15,%xmm6,%xmm6
- vmovdqu %xmm7,80(%rbp)
- vpxor %xmm15,%xmm7,%xmm7
- vmovdqu %xmm8,96(%rbp)
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu %xmm9,112(%rbp)
- vpxor %xmm15,%xmm9,%xmm9
- xorq $0x80,%rbp
- movl $1,%ecx
- jmp .Loop_dec8x
-
-.align 32
-.Loop_dec8x:
- vaesdec %xmm1,%xmm2,%xmm2
- cmpl 32+0(%rsp),%ecx
- vaesdec %xmm1,%xmm3,%xmm3
- prefetcht0 31(%r8)
- vaesdec %xmm1,%xmm4,%xmm4
- vaesdec %xmm1,%xmm5,%xmm5
- leaq (%r8,%rbx,1),%rbx
- cmovgeq %rsp,%r8
- vaesdec %xmm1,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesdec %xmm1,%xmm7,%xmm7
- subq %r8,%rbx
- vaesdec %xmm1,%xmm8,%xmm8
- vmovdqu 16(%r8),%xmm10
- movq %rbx,64+0(%rsp)
- vaesdec %xmm1,%xmm9,%xmm9
- vmovups -72(%rsi),%xmm1
- leaq 16(%r8,%rbx,1),%r8
- vmovdqu %xmm10,128(%rsp)
- vaesdec %xmm0,%xmm2,%xmm2
- cmpl 32+4(%rsp),%ecx
- movq 64+8(%rsp),%rbx
- vaesdec %xmm0,%xmm3,%xmm3
- prefetcht0 31(%r9)
- vaesdec %xmm0,%xmm4,%xmm4
- vaesdec %xmm0,%xmm5,%xmm5
- leaq (%r9,%rbx,1),%rbx
- cmovgeq %rsp,%r9
- vaesdec %xmm0,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesdec %xmm0,%xmm7,%xmm7
- subq %r9,%rbx
- vaesdec %xmm0,%xmm8,%xmm8
- vmovdqu 16(%r9),%xmm11
- movq %rbx,64+8(%rsp)
- vaesdec %xmm0,%xmm9,%xmm9
- vmovups -56(%rsi),%xmm0
- leaq 16(%r9,%rbx,1),%r9
- vmovdqu %xmm11,144(%rsp)
- vaesdec %xmm1,%xmm2,%xmm2
- cmpl 32+8(%rsp),%ecx
- movq 64+16(%rsp),%rbx
- vaesdec %xmm1,%xmm3,%xmm3
- prefetcht0 31(%r10)
- vaesdec %xmm1,%xmm4,%xmm4
- prefetcht0 15(%r8)
- vaesdec %xmm1,%xmm5,%xmm5
- leaq (%r10,%rbx,1),%rbx
- cmovgeq %rsp,%r10
- vaesdec %xmm1,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesdec %xmm1,%xmm7,%xmm7
- subq %r10,%rbx
- vaesdec %xmm1,%xmm8,%xmm8
- vmovdqu 16(%r10),%xmm12
- movq %rbx,64+16(%rsp)
- vaesdec %xmm1,%xmm9,%xmm9
- vmovups -40(%rsi),%xmm1
- leaq 16(%r10,%rbx,1),%r10
- vmovdqu %xmm12,160(%rsp)
- vaesdec %xmm0,%xmm2,%xmm2
- cmpl 32+12(%rsp),%ecx
- movq 64+24(%rsp),%rbx
- vaesdec %xmm0,%xmm3,%xmm3
- prefetcht0 31(%r11)
- vaesdec %xmm0,%xmm4,%xmm4
- prefetcht0 15(%r9)
- vaesdec %xmm0,%xmm5,%xmm5
- leaq (%r11,%rbx,1),%rbx
- cmovgeq %rsp,%r11
- vaesdec %xmm0,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesdec %xmm0,%xmm7,%xmm7
- subq %r11,%rbx
- vaesdec %xmm0,%xmm8,%xmm8
- vmovdqu 16(%r11),%xmm13
- movq %rbx,64+24(%rsp)
- vaesdec %xmm0,%xmm9,%xmm9
- vmovups -24(%rsi),%xmm0
- leaq 16(%r11,%rbx,1),%r11
- vmovdqu %xmm13,176(%rsp)
- vaesdec %xmm1,%xmm2,%xmm2
- cmpl 32+16(%rsp),%ecx
- movq 64+32(%rsp),%rbx
- vaesdec %xmm1,%xmm3,%xmm3
- prefetcht0 31(%r12)
- vaesdec %xmm1,%xmm4,%xmm4
- prefetcht0 15(%r10)
- vaesdec %xmm1,%xmm5,%xmm5
- leaq (%r12,%rbx,1),%rbx
- cmovgeq %rsp,%r12
- vaesdec %xmm1,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesdec %xmm1,%xmm7,%xmm7
- subq %r12,%rbx
- vaesdec %xmm1,%xmm8,%xmm8
- vmovdqu 16(%r12),%xmm10
- movq %rbx,64+32(%rsp)
- vaesdec %xmm1,%xmm9,%xmm9
- vmovups -8(%rsi),%xmm1
- leaq 16(%r12,%rbx,1),%r12
- vaesdec %xmm0,%xmm2,%xmm2
- cmpl 32+20(%rsp),%ecx
- movq 64+40(%rsp),%rbx
- vaesdec %xmm0,%xmm3,%xmm3
- prefetcht0 31(%r13)
- vaesdec %xmm0,%xmm4,%xmm4
- prefetcht0 15(%r11)
- vaesdec %xmm0,%xmm5,%xmm5
- leaq (%rbx,%r13,1),%rbx
- cmovgeq %rsp,%r13
- vaesdec %xmm0,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesdec %xmm0,%xmm7,%xmm7
- subq %r13,%rbx
- vaesdec %xmm0,%xmm8,%xmm8
- vmovdqu 16(%r13),%xmm11
- movq %rbx,64+40(%rsp)
- vaesdec %xmm0,%xmm9,%xmm9
- vmovups 8(%rsi),%xmm0
- leaq 16(%r13,%rbx,1),%r13
- vaesdec %xmm1,%xmm2,%xmm2
- cmpl 32+24(%rsp),%ecx
- movq 64+48(%rsp),%rbx
- vaesdec %xmm1,%xmm3,%xmm3
- prefetcht0 31(%r14)
- vaesdec %xmm1,%xmm4,%xmm4
- prefetcht0 15(%r12)
- vaesdec %xmm1,%xmm5,%xmm5
- leaq (%r14,%rbx,1),%rbx
- cmovgeq %rsp,%r14
- vaesdec %xmm1,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesdec %xmm1,%xmm7,%xmm7
- subq %r14,%rbx
- vaesdec %xmm1,%xmm8,%xmm8
- vmovdqu 16(%r14),%xmm12
- movq %rbx,64+48(%rsp)
- vaesdec %xmm1,%xmm9,%xmm9
- vmovups 24(%rsi),%xmm1
- leaq 16(%r14,%rbx,1),%r14
- vaesdec %xmm0,%xmm2,%xmm2
- cmpl 32+28(%rsp),%ecx
- movq 64+56(%rsp),%rbx
- vaesdec %xmm0,%xmm3,%xmm3
- prefetcht0 31(%r15)
- vaesdec %xmm0,%xmm4,%xmm4
- prefetcht0 15(%r13)
- vaesdec %xmm0,%xmm5,%xmm5
- leaq (%r15,%rbx,1),%rbx
- cmovgeq %rsp,%r15
- vaesdec %xmm0,%xmm6,%xmm6
- cmovgq %rsp,%rbx
- vaesdec %xmm0,%xmm7,%xmm7
- subq %r15,%rbx
- vaesdec %xmm0,%xmm8,%xmm8
- vmovdqu 16(%r15),%xmm13
- movq %rbx,64+56(%rsp)
- vaesdec %xmm0,%xmm9,%xmm9
- vmovups 40(%rsi),%xmm0
- leaq 16(%r15,%rbx,1),%r15
- vmovdqu 32(%rsp),%xmm14
- prefetcht0 15(%r14)
- prefetcht0 15(%r15)
- cmpl $11,%eax
- jb .Ldec8x_tail
-
- vaesdec %xmm1,%xmm2,%xmm2
- vaesdec %xmm1,%xmm3,%xmm3
- vaesdec %xmm1,%xmm4,%xmm4
- vaesdec %xmm1,%xmm5,%xmm5
- vaesdec %xmm1,%xmm6,%xmm6
- vaesdec %xmm1,%xmm7,%xmm7
- vaesdec %xmm1,%xmm8,%xmm8
- vaesdec %xmm1,%xmm9,%xmm9
- vmovups 176-120(%rsi),%xmm1
-
- vaesdec %xmm0,%xmm2,%xmm2
- vaesdec %xmm0,%xmm3,%xmm3
- vaesdec %xmm0,%xmm4,%xmm4
- vaesdec %xmm0,%xmm5,%xmm5
- vaesdec %xmm0,%xmm6,%xmm6
- vaesdec %xmm0,%xmm7,%xmm7
- vaesdec %xmm0,%xmm8,%xmm8
- vaesdec %xmm0,%xmm9,%xmm9
- vmovups 192-120(%rsi),%xmm0
- je .Ldec8x_tail
-
- vaesdec %xmm1,%xmm2,%xmm2
- vaesdec %xmm1,%xmm3,%xmm3
- vaesdec %xmm1,%xmm4,%xmm4
- vaesdec %xmm1,%xmm5,%xmm5
- vaesdec %xmm1,%xmm6,%xmm6
- vaesdec %xmm1,%xmm7,%xmm7
- vaesdec %xmm1,%xmm8,%xmm8
- vaesdec %xmm1,%xmm9,%xmm9
- vmovups 208-120(%rsi),%xmm1
-
- vaesdec %xmm0,%xmm2,%xmm2
- vaesdec %xmm0,%xmm3,%xmm3
- vaesdec %xmm0,%xmm4,%xmm4
- vaesdec %xmm0,%xmm5,%xmm5
- vaesdec %xmm0,%xmm6,%xmm6
- vaesdec %xmm0,%xmm7,%xmm7
- vaesdec %xmm0,%xmm8,%xmm8
- vaesdec %xmm0,%xmm9,%xmm9
- vmovups 224-120(%rsi),%xmm0
-
-.Ldec8x_tail:
- vaesdec %xmm1,%xmm2,%xmm2
- vpxor %xmm15,%xmm15,%xmm15
- vaesdec %xmm1,%xmm3,%xmm3
- vaesdec %xmm1,%xmm4,%xmm4
- vpcmpgtd %xmm15,%xmm14,%xmm15
- vaesdec %xmm1,%xmm5,%xmm5
- vaesdec %xmm1,%xmm6,%xmm6
- vpaddd %xmm14,%xmm15,%xmm15
- vmovdqu 48(%rsp),%xmm14
- vaesdec %xmm1,%xmm7,%xmm7
- movq 64(%rsp),%rbx
- vaesdec %xmm1,%xmm8,%xmm8
- vaesdec %xmm1,%xmm9,%xmm9
- vmovups 16-120(%rsi),%xmm1
-
- vaesdeclast %xmm0,%xmm2,%xmm2
- vmovdqa %xmm15,32(%rsp)
- vpxor %xmm15,%xmm15,%xmm15
- vaesdeclast %xmm0,%xmm3,%xmm3
- vpxor 0(%rbp),%xmm2,%xmm2
- vaesdeclast %xmm0,%xmm4,%xmm4
- vpxor 16(%rbp),%xmm3,%xmm3
- vpcmpgtd %xmm15,%xmm14,%xmm15
- vaesdeclast %xmm0,%xmm5,%xmm5
- vpxor 32(%rbp),%xmm4,%xmm4
- vaesdeclast %xmm0,%xmm6,%xmm6
- vpxor 48(%rbp),%xmm5,%xmm5
- vpaddd %xmm15,%xmm14,%xmm14
- vmovdqu -120(%rsi),%xmm15
- vaesdeclast %xmm0,%xmm7,%xmm7
- vpxor 64(%rbp),%xmm6,%xmm6
- vaesdeclast %xmm0,%xmm8,%xmm8
- vpxor 80(%rbp),%xmm7,%xmm7
- vmovdqa %xmm14,48(%rsp)
- vaesdeclast %xmm0,%xmm9,%xmm9
- vpxor 96(%rbp),%xmm8,%xmm8
- vmovups 32-120(%rsi),%xmm0
-
- vmovups %xmm2,-16(%r8)
- subq %rbx,%r8
- vmovdqu 128+0(%rsp),%xmm2
- vpxor 112(%rbp),%xmm9,%xmm9
- vmovups %xmm3,-16(%r9)
- subq 72(%rsp),%r9
- vmovdqu %xmm2,0(%rbp)
- vpxor %xmm15,%xmm2,%xmm2
- vmovdqu 128+16(%rsp),%xmm3
- vmovups %xmm4,-16(%r10)
- subq 80(%rsp),%r10
- vmovdqu %xmm3,16(%rbp)
- vpxor %xmm15,%xmm3,%xmm3
- vmovdqu 128+32(%rsp),%xmm4
- vmovups %xmm5,-16(%r11)
- subq 88(%rsp),%r11
- vmovdqu %xmm4,32(%rbp)
- vpxor %xmm15,%xmm4,%xmm4
- vmovdqu 128+48(%rsp),%xmm5
- vmovups %xmm6,-16(%r12)
- subq 96(%rsp),%r12
- vmovdqu %xmm5,48(%rbp)
- vpxor %xmm15,%xmm5,%xmm5
- vmovdqu %xmm10,64(%rbp)
- vpxor %xmm10,%xmm15,%xmm6
- vmovups %xmm7,-16(%r13)
- subq 104(%rsp),%r13
- vmovdqu %xmm11,80(%rbp)
- vpxor %xmm11,%xmm15,%xmm7
- vmovups %xmm8,-16(%r14)
- subq 112(%rsp),%r14
- vmovdqu %xmm12,96(%rbp)
- vpxor %xmm12,%xmm15,%xmm8
- vmovups %xmm9,-16(%r15)
- subq 120(%rsp),%r15
- vmovdqu %xmm13,112(%rbp)
- vpxor %xmm13,%xmm15,%xmm9
-
- xorq $128,%rbp
- decl %edx
- jnz .Loop_dec8x
-
- movq 16(%rsp),%rax
-.cfi_def_cfa %rax,8
-
-
-
-
-
-.Ldec8x_done:
- vzeroupper
- movq -48(%rax),%r15
-.cfi_restore %r15
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbp
-.cfi_restore %rbp
- movq -8(%rax),%rbx
-.cfi_restore %rbx
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Ldec8x_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
diff --git a/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S b/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
index 92fa5bfd685d..294db310a06a 100644
--- a/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
+++ b/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
@@ -7,18 +7,15 @@
.type aesni_cbc_sha1_enc,@function
.align 32
aesni_cbc_sha1_enc:
+.cfi_startproc
movl OPENSSL_ia32cap_P+0(%rip),%r10d
movq OPENSSL_ia32cap_P+4(%rip),%r11
btq $61,%r11
jc aesni_cbc_sha1_enc_shaext
- andl $268435456,%r11d
- andl $1073741824,%r10d
- orl %r11d,%r10d
- cmpl $1342177280,%r10d
- je aesni_cbc_sha1_enc_avx
jmp aesni_cbc_sha1_enc_ssse3
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
.type aesni_cbc_sha1_enc_ssse3,@function
.align 32
@@ -1397,1327 +1394,6 @@ aesni_cbc_sha1_enc_ssse3:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
-.type aesni_cbc_sha1_enc_avx,@function
-.align 32
-aesni_cbc_sha1_enc_avx:
-.cfi_startproc
- movq 8(%rsp),%r10
-
-
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
- leaq -104(%rsp),%rsp
-.cfi_adjust_cfa_offset 104
-
-
- vzeroall
- movq %rdi,%r12
- movq %rsi,%r13
- movq %rdx,%r14
- leaq 112(%rcx),%r15
- vmovdqu (%r8),%xmm12
- movq %r8,88(%rsp)
- shlq $6,%r14
- subq %r12,%r13
- movl 240-112(%r15),%r8d
- addq %r10,%r14
-
- leaq K_XX_XX(%rip),%r11
- movl 0(%r9),%eax
- movl 4(%r9),%ebx
- movl 8(%r9),%ecx
- movl 12(%r9),%edx
- movl %ebx,%esi
- movl 16(%r9),%ebp
- movl %ecx,%edi
- xorl %edx,%edi
- andl %edi,%esi
-
- vmovdqa 64(%r11),%xmm6
- vmovdqa 0(%r11),%xmm10
- vmovdqu 0(%r10),%xmm0
- vmovdqu 16(%r10),%xmm1
- vmovdqu 32(%r10),%xmm2
- vmovdqu 48(%r10),%xmm3
- vpshufb %xmm6,%xmm0,%xmm0
- addq $64,%r10
- vpshufb %xmm6,%xmm1,%xmm1
- vpshufb %xmm6,%xmm2,%xmm2
- vpshufb %xmm6,%xmm3,%xmm3
- vpaddd %xmm10,%xmm0,%xmm4
- vpaddd %xmm10,%xmm1,%xmm5
- vpaddd %xmm10,%xmm2,%xmm6
- vmovdqa %xmm4,0(%rsp)
- vmovdqa %xmm5,16(%rsp)
- vmovdqa %xmm6,32(%rsp)
- vmovups -112(%r15),%xmm15
- vmovups 16-112(%r15),%xmm14
- jmp .Loop_avx
-.align 32
-.Loop_avx:
- shrdl $2,%ebx,%ebx
- vmovdqu 0(%r12),%xmm13
- vpxor %xmm15,%xmm13,%xmm13
- vpxor %xmm13,%xmm12,%xmm12
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -80(%r15),%xmm15
- xorl %edx,%esi
- vpalignr $8,%xmm0,%xmm1,%xmm4
- movl %eax,%edi
- addl 0(%rsp),%ebp
- vpaddd %xmm3,%xmm10,%xmm9
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vpsrldq $4,%xmm3,%xmm8
- addl %esi,%ebp
- andl %ebx,%edi
- vpxor %xmm0,%xmm4,%xmm4
- xorl %ecx,%ebx
- addl %eax,%ebp
- vpxor %xmm2,%xmm8,%xmm8
- shrdl $7,%eax,%eax
- xorl %ecx,%edi
- movl %ebp,%esi
- addl 4(%rsp),%edx
- vpxor %xmm8,%xmm4,%xmm4
- xorl %ebx,%eax
- shldl $5,%ebp,%ebp
- vmovdqa %xmm9,48(%rsp)
- addl %edi,%edx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups -64(%r15),%xmm14
- andl %eax,%esi
- vpsrld $31,%xmm4,%xmm8
- xorl %ebx,%eax
- addl %ebp,%edx
- shrdl $7,%ebp,%ebp
- xorl %ebx,%esi
- vpslldq $12,%xmm4,%xmm9
- vpaddd %xmm4,%xmm4,%xmm4
- movl %edx,%edi
- addl 8(%rsp),%ecx
- xorl %eax,%ebp
- shldl $5,%edx,%edx
- vpor %xmm8,%xmm4,%xmm4
- vpsrld $30,%xmm9,%xmm8
- addl %esi,%ecx
- andl %ebp,%edi
- xorl %eax,%ebp
- addl %edx,%ecx
- vpslld $2,%xmm9,%xmm9
- vpxor %xmm8,%xmm4,%xmm4
- shrdl $7,%edx,%edx
- xorl %eax,%edi
- movl %ecx,%esi
- addl 12(%rsp),%ebx
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -48(%r15),%xmm15
- vpxor %xmm9,%xmm4,%xmm4
- xorl %ebp,%edx
- shldl $5,%ecx,%ecx
- addl %edi,%ebx
- andl %edx,%esi
- xorl %ebp,%edx
- addl %ecx,%ebx
- shrdl $7,%ecx,%ecx
- xorl %ebp,%esi
- vpalignr $8,%xmm1,%xmm2,%xmm5
- movl %ebx,%edi
- addl 16(%rsp),%eax
- vpaddd %xmm4,%xmm10,%xmm9
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- vpsrldq $4,%xmm4,%xmm8
- addl %esi,%eax
- andl %ecx,%edi
- vpxor %xmm1,%xmm5,%xmm5
- xorl %edx,%ecx
- addl %ebx,%eax
- vpxor %xmm3,%xmm8,%xmm8
- shrdl $7,%ebx,%ebx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups -32(%r15),%xmm14
- xorl %edx,%edi
- movl %eax,%esi
- addl 20(%rsp),%ebp
- vpxor %xmm8,%xmm5,%xmm5
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vmovdqa %xmm9,0(%rsp)
- addl %edi,%ebp
- andl %ebx,%esi
- vpsrld $31,%xmm5,%xmm8
- xorl %ecx,%ebx
- addl %eax,%ebp
- shrdl $7,%eax,%eax
- xorl %ecx,%esi
- vpslldq $12,%xmm5,%xmm9
- vpaddd %xmm5,%xmm5,%xmm5
- movl %ebp,%edi
- addl 24(%rsp),%edx
- xorl %ebx,%eax
- shldl $5,%ebp,%ebp
- vpor %xmm8,%xmm5,%xmm5
- vpsrld $30,%xmm9,%xmm8
- addl %esi,%edx
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -16(%r15),%xmm15
- andl %eax,%edi
- xorl %ebx,%eax
- addl %ebp,%edx
- vpslld $2,%xmm9,%xmm9
- vpxor %xmm8,%xmm5,%xmm5
- shrdl $7,%ebp,%ebp
- xorl %ebx,%edi
- movl %edx,%esi
- addl 28(%rsp),%ecx
- vpxor %xmm9,%xmm5,%xmm5
- xorl %eax,%ebp
- shldl $5,%edx,%edx
- vmovdqa 16(%r11),%xmm10
- addl %edi,%ecx
- andl %ebp,%esi
- xorl %eax,%ebp
- addl %edx,%ecx
- shrdl $7,%edx,%edx
- xorl %eax,%esi
- vpalignr $8,%xmm2,%xmm3,%xmm6
- movl %ecx,%edi
- addl 32(%rsp),%ebx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 0(%r15),%xmm14
- vpaddd %xmm5,%xmm10,%xmm9
- xorl %ebp,%edx
- shldl $5,%ecx,%ecx
- vpsrldq $4,%xmm5,%xmm8
- addl %esi,%ebx
- andl %edx,%edi
- vpxor %xmm2,%xmm6,%xmm6
- xorl %ebp,%edx
- addl %ecx,%ebx
- vpxor %xmm4,%xmm8,%xmm8
- shrdl $7,%ecx,%ecx
- xorl %ebp,%edi
- movl %ebx,%esi
- addl 36(%rsp),%eax
- vpxor %xmm8,%xmm6,%xmm6
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- vmovdqa %xmm9,16(%rsp)
- addl %edi,%eax
- andl %ecx,%esi
- vpsrld $31,%xmm6,%xmm8
- xorl %edx,%ecx
- addl %ebx,%eax
- shrdl $7,%ebx,%ebx
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 16(%r15),%xmm15
- xorl %edx,%esi
- vpslldq $12,%xmm6,%xmm9
- vpaddd %xmm6,%xmm6,%xmm6
- movl %eax,%edi
- addl 40(%rsp),%ebp
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- vpor %xmm8,%xmm6,%xmm6
- vpsrld $30,%xmm9,%xmm8
- addl %esi,%ebp
- andl %ebx,%edi
- xorl %ecx,%ebx
- addl %eax,%ebp
- vpslld $2,%xmm9,%xmm9
- vpxor %xmm8,%xmm6,%xmm6
- shrdl $7,%eax,%eax
- xorl %ecx,%edi
- movl %ebp,%esi
- addl 44(%rsp),%edx
- vpxor %xmm9,%xmm6,%xmm6
- xorl %ebx,%eax
- shldl $5,%ebp,%ebp
- addl %edi,%edx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 32(%r15),%xmm14
- andl %eax,%esi
- xorl %ebx,%eax
- addl %ebp,%edx
- shrdl $7,%ebp,%ebp
- xorl %ebx,%esi
- vpalignr $8,%xmm3,%xmm4,%xmm7
- movl %edx,%edi
- addl 48(%rsp),%ecx
- vpaddd %xmm6,%xmm10,%xmm9
- xorl %eax,%ebp
- shldl $5,%edx,%edx
- vpsrldq $4,%xmm6,%xmm8
- addl %esi,%ecx
- andl %ebp,%edi
- vpxor %xmm3,%xmm7,%xmm7
- xorl %eax,%ebp
- addl %edx,%ecx
- vpxor %xmm5,%xmm8,%xmm8
- shrdl $7,%edx,%edx
- xorl %eax,%edi
- movl %ecx,%esi
- addl 52(%rsp),%ebx
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 48(%r15),%xmm15
- vpxor %xmm8,%xmm7,%xmm7
- xorl %ebp,%edx
- shldl $5,%ecx,%ecx
- vmovdqa %xmm9,32(%rsp)
- addl %edi,%ebx
- andl %edx,%esi
- vpsrld $31,%xmm7,%xmm8
- xorl %ebp,%edx
- addl %ecx,%ebx
- shrdl $7,%ecx,%ecx
- xorl %ebp,%esi
- vpslldq $12,%xmm7,%xmm9
- vpaddd %xmm7,%xmm7,%xmm7
- movl %ebx,%edi
- addl 56(%rsp),%eax
- xorl %edx,%ecx
- shldl $5,%ebx,%ebx
- vpor %xmm8,%xmm7,%xmm7
- vpsrld $30,%xmm9,%xmm8
- addl %esi,%eax
- andl %ecx,%edi
- xorl %edx,%ecx
- addl %ebx,%eax
- vpslld $2,%xmm9,%xmm9
- vpxor %xmm8,%xmm7,%xmm7
- shrdl $7,%ebx,%ebx
- cmpl $11,%r8d
- jb .Lvaesenclast6
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 64(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 80(%r15),%xmm15
- je .Lvaesenclast6
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 96(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 112(%r15),%xmm15
-.Lvaesenclast6:
- vaesenclast %xmm15,%xmm12,%xmm12
- vmovups -112(%r15),%xmm15
- vmovups 16-112(%r15),%xmm14
- xorl %edx,%edi
- movl %eax,%esi
- addl 60(%rsp),%ebp
- vpxor %xmm9,%xmm7,%xmm7
- xorl %ecx,%ebx
- shldl $5,%eax,%eax
- addl %edi,%ebp
- andl %ebx,%esi
- xorl %ecx,%ebx
- addl %eax,%ebp
- vpalignr $8,%xmm6,%xmm7,%xmm8
- vpxor %xmm4,%xmm0,%xmm0
- shrdl $7,%eax,%eax
- xorl %ecx,%esi
- movl %ebp,%edi
- addl 0(%rsp),%edx
- vpxor %xmm1,%xmm0,%xmm0
- xorl %ebx,%eax
- shldl $5,%ebp,%ebp
- vpaddd %xmm7,%xmm10,%xmm9
- addl %esi,%edx
- vmovdqu 16(%r12),%xmm13
- vpxor %xmm15,%xmm13,%xmm13
- vmovups %xmm12,0(%r12,%r13,1)
- vpxor %xmm13,%xmm12,%xmm12
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -80(%r15),%xmm15
- andl %eax,%edi
- vpxor %xmm8,%xmm0,%xmm0
- xorl %ebx,%eax
- addl %ebp,%edx
- shrdl $7,%ebp,%ebp
- xorl %ebx,%edi
- vpsrld $30,%xmm0,%xmm8
- vmovdqa %xmm9,48(%rsp)
- movl %edx,%esi
- addl 4(%rsp),%ecx
- xorl %eax,%ebp
- shldl $5,%edx,%edx
- vpslld $2,%xmm0,%xmm0
- addl %edi,%ecx
- andl %ebp,%esi
- xorl %eax,%ebp
- addl %edx,%ecx
- shrdl $7,%edx,%edx
- xorl %eax,%esi
- movl %ecx,%edi
- addl 8(%rsp),%ebx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups -64(%r15),%xmm14
- vpor %xmm8,%xmm0,%xmm0
- xorl %ebp,%edx
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- andl %edx,%edi
- xorl %ebp,%edx
- addl %ecx,%ebx
- addl 12(%rsp),%eax
- xorl %ebp,%edi
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %edi,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpalignr $8,%xmm7,%xmm0,%xmm8
- vpxor %xmm5,%xmm1,%xmm1
- addl 16(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -48(%r15),%xmm15
- xorl %ecx,%esi
- movl %eax,%edi
- shldl $5,%eax,%eax
- vpxor %xmm2,%xmm1,%xmm1
- addl %esi,%ebp
- xorl %ecx,%edi
- vpaddd %xmm0,%xmm10,%xmm9
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- vpxor %xmm8,%xmm1,%xmm1
- addl 20(%rsp),%edx
- xorl %ebx,%edi
- movl %ebp,%esi
- shldl $5,%ebp,%ebp
- vpsrld $30,%xmm1,%xmm8
- vmovdqa %xmm9,0(%rsp)
- addl %edi,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- vpslld $2,%xmm1,%xmm1
- addl 24(%rsp),%ecx
- xorl %eax,%esi
- movl %edx,%edi
- shldl $5,%edx,%edx
- addl %esi,%ecx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups -32(%r15),%xmm14
- xorl %eax,%edi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- vpor %xmm8,%xmm1,%xmm1
- addl 28(%rsp),%ebx
- xorl %ebp,%edi
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %edi,%ebx
- xorl %ebp,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpalignr $8,%xmm0,%xmm1,%xmm8
- vpxor %xmm6,%xmm2,%xmm2
- addl 32(%rsp),%eax
- xorl %edx,%esi
- movl %ebx,%edi
- shldl $5,%ebx,%ebx
- vpxor %xmm3,%xmm2,%xmm2
- addl %esi,%eax
- xorl %edx,%edi
- vpaddd %xmm1,%xmm10,%xmm9
- vmovdqa 32(%r11),%xmm10
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpxor %xmm8,%xmm2,%xmm2
- addl 36(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -16(%r15),%xmm15
- xorl %ecx,%edi
- movl %eax,%esi
- shldl $5,%eax,%eax
- vpsrld $30,%xmm2,%xmm8
- vmovdqa %xmm9,16(%rsp)
- addl %edi,%ebp
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- vpslld $2,%xmm2,%xmm2
- addl 40(%rsp),%edx
- xorl %ebx,%esi
- movl %ebp,%edi
- shldl $5,%ebp,%ebp
- addl %esi,%edx
- xorl %ebx,%edi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- vpor %xmm8,%xmm2,%xmm2
- addl 44(%rsp),%ecx
- xorl %eax,%edi
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %edi,%ecx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 0(%r15),%xmm14
- xorl %eax,%esi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- vpalignr $8,%xmm1,%xmm2,%xmm8
- vpxor %xmm7,%xmm3,%xmm3
- addl 48(%rsp),%ebx
- xorl %ebp,%esi
- movl %ecx,%edi
- shldl $5,%ecx,%ecx
- vpxor %xmm4,%xmm3,%xmm3
- addl %esi,%ebx
- xorl %ebp,%edi
- vpaddd %xmm2,%xmm10,%xmm9
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpxor %xmm8,%xmm3,%xmm3
- addl 52(%rsp),%eax
- xorl %edx,%edi
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- vpsrld $30,%xmm3,%xmm8
- vmovdqa %xmm9,32(%rsp)
- addl %edi,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpslld $2,%xmm3,%xmm3
- addl 56(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 16(%r15),%xmm15
- xorl %ecx,%esi
- movl %eax,%edi
- shldl $5,%eax,%eax
- addl %esi,%ebp
- xorl %ecx,%edi
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- vpor %xmm8,%xmm3,%xmm3
- addl 60(%rsp),%edx
- xorl %ebx,%edi
- movl %ebp,%esi
- shldl $5,%ebp,%ebp
- addl %edi,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- vpalignr $8,%xmm2,%xmm3,%xmm8
- vpxor %xmm0,%xmm4,%xmm4
- addl 0(%rsp),%ecx
- xorl %eax,%esi
- movl %edx,%edi
- shldl $5,%edx,%edx
- vpxor %xmm5,%xmm4,%xmm4
- addl %esi,%ecx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 32(%r15),%xmm14
- xorl %eax,%edi
- vpaddd %xmm3,%xmm10,%xmm9
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- vpxor %xmm8,%xmm4,%xmm4
- addl 4(%rsp),%ebx
- xorl %ebp,%edi
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- vpsrld $30,%xmm4,%xmm8
- vmovdqa %xmm9,48(%rsp)
- addl %edi,%ebx
- xorl %ebp,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpslld $2,%xmm4,%xmm4
- addl 8(%rsp),%eax
- xorl %edx,%esi
- movl %ebx,%edi
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%edi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vpor %xmm8,%xmm4,%xmm4
- addl 12(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 48(%r15),%xmm15
- xorl %ecx,%edi
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %edi,%ebp
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- vpalignr $8,%xmm3,%xmm4,%xmm8
- vpxor %xmm1,%xmm5,%xmm5
- addl 16(%rsp),%edx
- xorl %ebx,%esi
- movl %ebp,%edi
- shldl $5,%ebp,%ebp
- vpxor %xmm6,%xmm5,%xmm5
- addl %esi,%edx
- xorl %ebx,%edi
- vpaddd %xmm4,%xmm10,%xmm9
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- vpxor %xmm8,%xmm5,%xmm5
- addl 20(%rsp),%ecx
- xorl %eax,%edi
- movl %edx,%esi
- shldl $5,%edx,%edx
- vpsrld $30,%xmm5,%xmm8
- vmovdqa %xmm9,0(%rsp)
- addl %edi,%ecx
- cmpl $11,%r8d
- jb .Lvaesenclast7
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 64(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 80(%r15),%xmm15
- je .Lvaesenclast7
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 96(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 112(%r15),%xmm15
-.Lvaesenclast7:
- vaesenclast %xmm15,%xmm12,%xmm12
- vmovups -112(%r15),%xmm15
- vmovups 16-112(%r15),%xmm14
- xorl %eax,%esi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- vpslld $2,%xmm5,%xmm5
- addl 24(%rsp),%ebx
- xorl %ebp,%esi
- movl %ecx,%edi
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %ebp,%edi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vpor %xmm8,%xmm5,%xmm5
- addl 28(%rsp),%eax
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- xorl %edx,%edi
- shldl $5,%ebx,%ebx
- addl %edi,%eax
- xorl %ecx,%esi
- xorl %edx,%ecx
- addl %ebx,%eax
- vpalignr $8,%xmm4,%xmm5,%xmm8
- vpxor %xmm2,%xmm6,%xmm6
- addl 32(%rsp),%ebp
- vmovdqu 32(%r12),%xmm13
- vpxor %xmm15,%xmm13,%xmm13
- vmovups %xmm12,16(%r13,%r12,1)
- vpxor %xmm13,%xmm12,%xmm12
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -80(%r15),%xmm15
- andl %ecx,%esi
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- vpxor %xmm7,%xmm6,%xmm6
- movl %eax,%edi
- xorl %ecx,%esi
- vpaddd %xmm5,%xmm10,%xmm9
- shldl $5,%eax,%eax
- addl %esi,%ebp
- vpxor %xmm8,%xmm6,%xmm6
- xorl %ebx,%edi
- xorl %ecx,%ebx
- addl %eax,%ebp
- addl 36(%rsp),%edx
- vpsrld $30,%xmm6,%xmm8
- vmovdqa %xmm9,16(%rsp)
- andl %ebx,%edi
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- movl %ebp,%esi
- vpslld $2,%xmm6,%xmm6
- xorl %ebx,%edi
- shldl $5,%ebp,%ebp
- addl %edi,%edx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups -64(%r15),%xmm14
- xorl %eax,%esi
- xorl %ebx,%eax
- addl %ebp,%edx
- addl 40(%rsp),%ecx
- andl %eax,%esi
- vpor %xmm8,%xmm6,%xmm6
- xorl %ebx,%eax
- shrdl $7,%ebp,%ebp
- movl %edx,%edi
- xorl %eax,%esi
- shldl $5,%edx,%edx
- addl %esi,%ecx
- xorl %ebp,%edi
- xorl %eax,%ebp
- addl %edx,%ecx
- addl 44(%rsp),%ebx
- andl %ebp,%edi
- xorl %eax,%ebp
- shrdl $7,%edx,%edx
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -48(%r15),%xmm15
- movl %ecx,%esi
- xorl %ebp,%edi
- shldl $5,%ecx,%ecx
- addl %edi,%ebx
- xorl %edx,%esi
- xorl %ebp,%edx
- addl %ecx,%ebx
- vpalignr $8,%xmm5,%xmm6,%xmm8
- vpxor %xmm3,%xmm7,%xmm7
- addl 48(%rsp),%eax
- andl %edx,%esi
- xorl %ebp,%edx
- shrdl $7,%ecx,%ecx
- vpxor %xmm0,%xmm7,%xmm7
- movl %ebx,%edi
- xorl %edx,%esi
- vpaddd %xmm6,%xmm10,%xmm9
- vmovdqa 48(%r11),%xmm10
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- vpxor %xmm8,%xmm7,%xmm7
- xorl %ecx,%edi
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 52(%rsp),%ebp
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups -32(%r15),%xmm14
- vpsrld $30,%xmm7,%xmm8
- vmovdqa %xmm9,32(%rsp)
- andl %ecx,%edi
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- movl %eax,%esi
- vpslld $2,%xmm7,%xmm7
- xorl %ecx,%edi
- shldl $5,%eax,%eax
- addl %edi,%ebp
- xorl %ebx,%esi
- xorl %ecx,%ebx
- addl %eax,%ebp
- addl 56(%rsp),%edx
- andl %ebx,%esi
- vpor %xmm8,%xmm7,%xmm7
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- movl %ebp,%edi
- xorl %ebx,%esi
- shldl $5,%ebp,%ebp
- addl %esi,%edx
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -16(%r15),%xmm15
- xorl %eax,%edi
- xorl %ebx,%eax
- addl %ebp,%edx
- addl 60(%rsp),%ecx
- andl %eax,%edi
- xorl %ebx,%eax
- shrdl $7,%ebp,%ebp
- movl %edx,%esi
- xorl %eax,%edi
- shldl $5,%edx,%edx
- addl %edi,%ecx
- xorl %ebp,%esi
- xorl %eax,%ebp
- addl %edx,%ecx
- vpalignr $8,%xmm6,%xmm7,%xmm8
- vpxor %xmm4,%xmm0,%xmm0
- addl 0(%rsp),%ebx
- andl %ebp,%esi
- xorl %eax,%ebp
- shrdl $7,%edx,%edx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 0(%r15),%xmm14
- vpxor %xmm1,%xmm0,%xmm0
- movl %ecx,%edi
- xorl %ebp,%esi
- vpaddd %xmm7,%xmm10,%xmm9
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- vpxor %xmm8,%xmm0,%xmm0
- xorl %edx,%edi
- xorl %ebp,%edx
- addl %ecx,%ebx
- addl 4(%rsp),%eax
- vpsrld $30,%xmm0,%xmm8
- vmovdqa %xmm9,48(%rsp)
- andl %edx,%edi
- xorl %ebp,%edx
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- vpslld $2,%xmm0,%xmm0
- xorl %edx,%edi
- shldl $5,%ebx,%ebx
- addl %edi,%eax
- xorl %ecx,%esi
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 8(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 16(%r15),%xmm15
- andl %ecx,%esi
- vpor %xmm8,%xmm0,%xmm0
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- movl %eax,%edi
- xorl %ecx,%esi
- shldl $5,%eax,%eax
- addl %esi,%ebp
- xorl %ebx,%edi
- xorl %ecx,%ebx
- addl %eax,%ebp
- addl 12(%rsp),%edx
- andl %ebx,%edi
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- movl %ebp,%esi
- xorl %ebx,%edi
- shldl $5,%ebp,%ebp
- addl %edi,%edx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 32(%r15),%xmm14
- xorl %eax,%esi
- xorl %ebx,%eax
- addl %ebp,%edx
- vpalignr $8,%xmm7,%xmm0,%xmm8
- vpxor %xmm5,%xmm1,%xmm1
- addl 16(%rsp),%ecx
- andl %eax,%esi
- xorl %ebx,%eax
- shrdl $7,%ebp,%ebp
- vpxor %xmm2,%xmm1,%xmm1
- movl %edx,%edi
- xorl %eax,%esi
- vpaddd %xmm0,%xmm10,%xmm9
- shldl $5,%edx,%edx
- addl %esi,%ecx
- vpxor %xmm8,%xmm1,%xmm1
- xorl %ebp,%edi
- xorl %eax,%ebp
- addl %edx,%ecx
- addl 20(%rsp),%ebx
- vpsrld $30,%xmm1,%xmm8
- vmovdqa %xmm9,0(%rsp)
- andl %ebp,%edi
- xorl %eax,%ebp
- shrdl $7,%edx,%edx
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 48(%r15),%xmm15
- movl %ecx,%esi
- vpslld $2,%xmm1,%xmm1
- xorl %ebp,%edi
- shldl $5,%ecx,%ecx
- addl %edi,%ebx
- xorl %edx,%esi
- xorl %ebp,%edx
- addl %ecx,%ebx
- addl 24(%rsp),%eax
- andl %edx,%esi
- vpor %xmm8,%xmm1,%xmm1
- xorl %ebp,%edx
- shrdl $7,%ecx,%ecx
- movl %ebx,%edi
- xorl %edx,%esi
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %ecx,%edi
- xorl %edx,%ecx
- addl %ebx,%eax
- addl 28(%rsp),%ebp
- cmpl $11,%r8d
- jb .Lvaesenclast8
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 64(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 80(%r15),%xmm15
- je .Lvaesenclast8
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 96(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 112(%r15),%xmm15
-.Lvaesenclast8:
- vaesenclast %xmm15,%xmm12,%xmm12
- vmovups -112(%r15),%xmm15
- vmovups 16-112(%r15),%xmm14
- andl %ecx,%edi
- xorl %edx,%ecx
- shrdl $7,%ebx,%ebx
- movl %eax,%esi
- xorl %ecx,%edi
- shldl $5,%eax,%eax
- addl %edi,%ebp
- xorl %ebx,%esi
- xorl %ecx,%ebx
- addl %eax,%ebp
- vpalignr $8,%xmm0,%xmm1,%xmm8
- vpxor %xmm6,%xmm2,%xmm2
- addl 32(%rsp),%edx
- andl %ebx,%esi
- xorl %ecx,%ebx
- shrdl $7,%eax,%eax
- vpxor %xmm3,%xmm2,%xmm2
- movl %ebp,%edi
- xorl %ebx,%esi
- vpaddd %xmm1,%xmm10,%xmm9
- shldl $5,%ebp,%ebp
- addl %esi,%edx
- vmovdqu 48(%r12),%xmm13
- vpxor %xmm15,%xmm13,%xmm13
- vmovups %xmm12,32(%r13,%r12,1)
- vpxor %xmm13,%xmm12,%xmm12
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -80(%r15),%xmm15
- vpxor %xmm8,%xmm2,%xmm2
- xorl %eax,%edi
- xorl %ebx,%eax
- addl %ebp,%edx
- addl 36(%rsp),%ecx
- vpsrld $30,%xmm2,%xmm8
- vmovdqa %xmm9,16(%rsp)
- andl %eax,%edi
- xorl %ebx,%eax
- shrdl $7,%ebp,%ebp
- movl %edx,%esi
- vpslld $2,%xmm2,%xmm2
- xorl %eax,%edi
- shldl $5,%edx,%edx
- addl %edi,%ecx
- xorl %ebp,%esi
- xorl %eax,%ebp
- addl %edx,%ecx
- addl 40(%rsp),%ebx
- andl %ebp,%esi
- vpor %xmm8,%xmm2,%xmm2
- xorl %eax,%ebp
- shrdl $7,%edx,%edx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups -64(%r15),%xmm14
- movl %ecx,%edi
- xorl %ebp,%esi
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %edx,%edi
- xorl %ebp,%edx
- addl %ecx,%ebx
- addl 44(%rsp),%eax
- andl %edx,%edi
- xorl %ebp,%edx
- shrdl $7,%ecx,%ecx
- movl %ebx,%esi
- xorl %edx,%edi
- shldl $5,%ebx,%ebx
- addl %edi,%eax
- xorl %edx,%esi
- addl %ebx,%eax
- vpalignr $8,%xmm1,%xmm2,%xmm8
- vpxor %xmm7,%xmm3,%xmm3
- addl 48(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -48(%r15),%xmm15
- xorl %ecx,%esi
- movl %eax,%edi
- shldl $5,%eax,%eax
- vpxor %xmm4,%xmm3,%xmm3
- addl %esi,%ebp
- xorl %ecx,%edi
- vpaddd %xmm2,%xmm10,%xmm9
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- vpxor %xmm8,%xmm3,%xmm3
- addl 52(%rsp),%edx
- xorl %ebx,%edi
- movl %ebp,%esi
- shldl $5,%ebp,%ebp
- vpsrld $30,%xmm3,%xmm8
- vmovdqa %xmm9,32(%rsp)
- addl %edi,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- vpslld $2,%xmm3,%xmm3
- addl 56(%rsp),%ecx
- xorl %eax,%esi
- movl %edx,%edi
- shldl $5,%edx,%edx
- addl %esi,%ecx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups -32(%r15),%xmm14
- xorl %eax,%edi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- vpor %xmm8,%xmm3,%xmm3
- addl 60(%rsp),%ebx
- xorl %ebp,%edi
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %edi,%ebx
- xorl %ebp,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 0(%rsp),%eax
- vpaddd %xmm3,%xmm10,%xmm9
- xorl %edx,%esi
- movl %ebx,%edi
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- vmovdqa %xmm9,48(%rsp)
- xorl %edx,%edi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 4(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups -16(%r15),%xmm15
- xorl %ecx,%edi
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %edi,%ebp
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- addl 8(%rsp),%edx
- xorl %ebx,%esi
- movl %ebp,%edi
- shldl $5,%ebp,%ebp
- addl %esi,%edx
- xorl %ebx,%edi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- addl 12(%rsp),%ecx
- xorl %eax,%edi
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %edi,%ecx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 0(%r15),%xmm14
- xorl %eax,%esi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- cmpq %r14,%r10
- je .Ldone_avx
- vmovdqa 64(%r11),%xmm9
- vmovdqa 0(%r11),%xmm10
- vmovdqu 0(%r10),%xmm0
- vmovdqu 16(%r10),%xmm1
- vmovdqu 32(%r10),%xmm2
- vmovdqu 48(%r10),%xmm3
- vpshufb %xmm9,%xmm0,%xmm0
- addq $64,%r10
- addl 16(%rsp),%ebx
- xorl %ebp,%esi
- vpshufb %xmm9,%xmm1,%xmm1
- movl %ecx,%edi
- shldl $5,%ecx,%ecx
- vpaddd %xmm10,%xmm0,%xmm8
- addl %esi,%ebx
- xorl %ebp,%edi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- vmovdqa %xmm8,0(%rsp)
- addl 20(%rsp),%eax
- xorl %edx,%edi
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %edi,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 24(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 16(%r15),%xmm15
- xorl %ecx,%esi
- movl %eax,%edi
- shldl $5,%eax,%eax
- addl %esi,%ebp
- xorl %ecx,%edi
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- addl 28(%rsp),%edx
- xorl %ebx,%edi
- movl %ebp,%esi
- shldl $5,%ebp,%ebp
- addl %edi,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- addl 32(%rsp),%ecx
- xorl %eax,%esi
- vpshufb %xmm9,%xmm2,%xmm2
- movl %edx,%edi
- shldl $5,%edx,%edx
- vpaddd %xmm10,%xmm1,%xmm8
- addl %esi,%ecx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 32(%r15),%xmm14
- xorl %eax,%edi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- vmovdqa %xmm8,16(%rsp)
- addl 36(%rsp),%ebx
- xorl %ebp,%edi
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %edi,%ebx
- xorl %ebp,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 40(%rsp),%eax
- xorl %edx,%esi
- movl %ebx,%edi
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%edi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 44(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 48(%r15),%xmm15
- xorl %ecx,%edi
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %edi,%ebp
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- addl 48(%rsp),%edx
- xorl %ebx,%esi
- vpshufb %xmm9,%xmm3,%xmm3
- movl %ebp,%edi
- shldl $5,%ebp,%ebp
- vpaddd %xmm10,%xmm2,%xmm8
- addl %esi,%edx
- xorl %ebx,%edi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- vmovdqa %xmm8,32(%rsp)
- addl 52(%rsp),%ecx
- xorl %eax,%edi
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %edi,%ecx
- cmpl $11,%r8d
- jb .Lvaesenclast9
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 64(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 80(%r15),%xmm15
- je .Lvaesenclast9
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 96(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 112(%r15),%xmm15
-.Lvaesenclast9:
- vaesenclast %xmm15,%xmm12,%xmm12
- vmovups -112(%r15),%xmm15
- vmovups 16-112(%r15),%xmm14
- xorl %eax,%esi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- addl 56(%rsp),%ebx
- xorl %ebp,%esi
- movl %ecx,%edi
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %ebp,%edi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 60(%rsp),%eax
- xorl %edx,%edi
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %edi,%eax
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vmovups %xmm12,48(%r13,%r12,1)
- leaq 64(%r12),%r12
-
- addl 0(%r9),%eax
- addl 4(%r9),%esi
- addl 8(%r9),%ecx
- addl 12(%r9),%edx
- movl %eax,0(%r9)
- addl 16(%r9),%ebp
- movl %esi,4(%r9)
- movl %esi,%ebx
- movl %ecx,8(%r9)
- movl %ecx,%edi
- movl %edx,12(%r9)
- xorl %edx,%edi
- movl %ebp,16(%r9)
- andl %edi,%esi
- jmp .Loop_avx
-
-.Ldone_avx:
- addl 16(%rsp),%ebx
- xorl %ebp,%esi
- movl %ecx,%edi
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %ebp,%edi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 20(%rsp),%eax
- xorl %edx,%edi
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %edi,%eax
- xorl %edx,%esi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 24(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 16(%r15),%xmm15
- xorl %ecx,%esi
- movl %eax,%edi
- shldl $5,%eax,%eax
- addl %esi,%ebp
- xorl %ecx,%edi
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- addl 28(%rsp),%edx
- xorl %ebx,%edi
- movl %ebp,%esi
- shldl $5,%ebp,%ebp
- addl %edi,%edx
- xorl %ebx,%esi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- addl 32(%rsp),%ecx
- xorl %eax,%esi
- movl %edx,%edi
- shldl $5,%edx,%edx
- addl %esi,%ecx
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 32(%r15),%xmm14
- xorl %eax,%edi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- addl 36(%rsp),%ebx
- xorl %ebp,%edi
- movl %ecx,%esi
- shldl $5,%ecx,%ecx
- addl %edi,%ebx
- xorl %ebp,%esi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 40(%rsp),%eax
- xorl %edx,%esi
- movl %ebx,%edi
- shldl $5,%ebx,%ebx
- addl %esi,%eax
- xorl %edx,%edi
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- addl 44(%rsp),%ebp
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 48(%r15),%xmm15
- xorl %ecx,%edi
- movl %eax,%esi
- shldl $5,%eax,%eax
- addl %edi,%ebp
- xorl %ecx,%esi
- shrdl $7,%ebx,%ebx
- addl %eax,%ebp
- addl 48(%rsp),%edx
- xorl %ebx,%esi
- movl %ebp,%edi
- shldl $5,%ebp,%ebp
- addl %esi,%edx
- xorl %ebx,%edi
- shrdl $7,%eax,%eax
- addl %ebp,%edx
- addl 52(%rsp),%ecx
- xorl %eax,%edi
- movl %edx,%esi
- shldl $5,%edx,%edx
- addl %edi,%ecx
- cmpl $11,%r8d
- jb .Lvaesenclast10
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 64(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 80(%r15),%xmm15
- je .Lvaesenclast10
- vaesenc %xmm15,%xmm12,%xmm12
- vmovups 96(%r15),%xmm14
- vaesenc %xmm14,%xmm12,%xmm12
- vmovups 112(%r15),%xmm15
-.Lvaesenclast10:
- vaesenclast %xmm15,%xmm12,%xmm12
- vmovups -112(%r15),%xmm15
- vmovups 16-112(%r15),%xmm14
- xorl %eax,%esi
- shrdl $7,%ebp,%ebp
- addl %edx,%ecx
- addl 56(%rsp),%ebx
- xorl %ebp,%esi
- movl %ecx,%edi
- shldl $5,%ecx,%ecx
- addl %esi,%ebx
- xorl %ebp,%edi
- shrdl $7,%edx,%edx
- addl %ecx,%ebx
- addl 60(%rsp),%eax
- xorl %edx,%edi
- movl %ebx,%esi
- shldl $5,%ebx,%ebx
- addl %edi,%eax
- shrdl $7,%ecx,%ecx
- addl %ebx,%eax
- vmovups %xmm12,48(%r13,%r12,1)
- movq 88(%rsp),%r8
-
- addl 0(%r9),%eax
- addl 4(%r9),%esi
- addl 8(%r9),%ecx
- movl %eax,0(%r9)
- addl 12(%r9),%edx
- movl %esi,4(%r9)
- addl 16(%r9),%ebp
- movl %ecx,8(%r9)
- movl %edx,12(%r9)
- movl %ebp,16(%r9)
- vmovups %xmm12,(%r8)
- vzeroall
- leaq 104(%rsp),%rsi
-.cfi_def_cfa %rsi,56
- movq 0(%rsi),%r15
-.cfi_restore %r15
- movq 8(%rsi),%r14
-.cfi_restore %r14
- movq 16(%rsi),%r13
-.cfi_restore %r13
- movq 24(%rsi),%r12
-.cfi_restore %r12
- movq 32(%rsi),%rbp
-.cfi_restore %rbp
- movq 40(%rsi),%rbx
-.cfi_restore %rbx
- leaq 48(%rsi),%rsp
-.cfi_def_cfa %rsp,8
-.Lepilogue_avx:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx
.align 64
K_XX_XX:
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
@@ -2732,6 +1408,7 @@ K_XX_XX:
.type aesni_cbc_sha1_enc_shaext,@function
.align 32
aesni_cbc_sha1_enc_shaext:
+.cfi_startproc
movq 8(%rsp),%r10
movdqu (%r9),%xmm8
movd 16(%r9),%xmm9
@@ -2808,17 +1485,17 @@ aesni_cbc_sha1_enc_shaext:
pxor %xmm3,%xmm5
.byte 15,56,201,243
cmpl $11,%r11d
- jb .Laesenclast11
+ jb .Laesenclast6
movups 64(%rcx),%xmm0
.byte 102,15,56,220,209
movups 80(%rcx),%xmm1
.byte 102,15,56,220,208
- je .Laesenclast11
+ je .Laesenclast6
movups 96(%rcx),%xmm0
.byte 102,15,56,220,209
movups 112(%rcx),%xmm1
.byte 102,15,56,220,208
-.Laesenclast11:
+.Laesenclast6:
.byte 102,15,56,221,209
movups 16-112(%rcx),%xmm0
movdqa %xmm8,%xmm10
@@ -2874,17 +1551,17 @@ aesni_cbc_sha1_enc_shaext:
pxor %xmm4,%xmm6
.byte 15,56,201,220
cmpl $11,%r11d
- jb .Laesenclast12
+ jb .Laesenclast7
movups 64(%rcx),%xmm0
.byte 102,15,56,220,209
movups 80(%rcx),%xmm1
.byte 102,15,56,220,208
- je .Laesenclast12
+ je .Laesenclast7
movups 96(%rcx),%xmm0
.byte 102,15,56,220,209
movups 112(%rcx),%xmm1
.byte 102,15,56,220,208
-.Laesenclast12:
+.Laesenclast7:
.byte 102,15,56,221,209
movups 16-112(%rcx),%xmm0
movdqa %xmm8,%xmm9
@@ -2940,17 +1617,17 @@ aesni_cbc_sha1_enc_shaext:
pxor %xmm5,%xmm3
.byte 15,56,201,229
cmpl $11,%r11d
- jb .Laesenclast13
+ jb .Laesenclast8
movups 64(%rcx),%xmm0
.byte 102,15,56,220,209
movups 80(%rcx),%xmm1
.byte 102,15,56,220,208
- je .Laesenclast13
+ je .Laesenclast8
movups 96(%rcx),%xmm0
.byte 102,15,56,220,209
movups 112(%rcx),%xmm1
.byte 102,15,56,220,208
-.Laesenclast13:
+.Laesenclast8:
.byte 102,15,56,221,209
movups 16-112(%rcx),%xmm0
movdqa %xmm8,%xmm10
@@ -3004,17 +1681,17 @@ aesni_cbc_sha1_enc_shaext:
movups 48(%rcx),%xmm1
.byte 102,15,56,220,208
cmpl $11,%r11d
- jb .Laesenclast14
+ jb .Laesenclast9
movups 64(%rcx),%xmm0
.byte 102,15,56,220,209
movups 80(%rcx),%xmm1
.byte 102,15,56,220,208
- je .Laesenclast14
+ je .Laesenclast9
movups 96(%rcx),%xmm0
.byte 102,15,56,220,209
movups 112(%rcx),%xmm1
.byte 102,15,56,220,208
-.Laesenclast14:
+.Laesenclast9:
.byte 102,15,56,221,209
movups 16-112(%rcx),%xmm0
decq %rdx
@@ -3030,4 +1707,5 @@ aesni_cbc_sha1_enc_shaext:
movdqu %xmm8,(%r9)
movd %xmm9,16(%r9)
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_sha1_enc_shaext,.-aesni_cbc_sha1_enc_shaext
diff --git a/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S b/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S
index e013190f8727..e42a02ebe647 100644
--- a/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S
+++ b/secure/lib/libcrypto/amd64/aesni-sha256-x86_64.S
@@ -7,31 +7,14 @@
.type aesni_cbc_sha256_enc,@function
.align 16
aesni_cbc_sha256_enc:
- leaq OPENSSL_ia32cap_P(%rip),%r11
- movl $1,%eax
- cmpq $0,%rdi
- je .Lprobe
- movl 0(%r11),%eax
- movq 4(%r11),%r10
- btq $61,%r10
- jc aesni_cbc_sha256_enc_shaext
- movq %r10,%r11
- shrq $32,%r11
-
- testl $2048,%r10d
- jnz aesni_cbc_sha256_enc_xop
- andl $296,%r11d
- cmpl $296,%r11d
- je aesni_cbc_sha256_enc_avx2
- andl $268435456,%r10d
- jnz aesni_cbc_sha256_enc_avx
- ud2
+.cfi_startproc
xorl %eax,%eax
cmpq $0,%rdi
je .Lprobe
ud2
.Lprobe:
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc
.align 64
@@ -76,4336 +59,3 @@ K256:
.long 0,0,0,0, 0,0,0,0
.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
-.type aesni_cbc_sha256_enc_xop,@function
-.align 64
-aesni_cbc_sha256_enc_xop:
-.cfi_startproc
-.Lxop_shortcut:
- movq 8(%rsp),%r10
- movq %rsp,%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
- subq $128,%rsp
- andq $-64,%rsp
-
- shlq $6,%rdx
- subq %rdi,%rsi
- subq %rdi,%r10
- addq %rdi,%rdx
-
-
- movq %rsi,64+8(%rsp)
- movq %rdx,64+16(%rsp)
-
- movq %r8,64+32(%rsp)
- movq %r9,64+40(%rsp)
- movq %r10,64+48(%rsp)
- movq %rax,120(%rsp)
-.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
-.Lprologue_xop:
- vzeroall
-
- movq %rdi,%r12
- leaq 128(%rcx),%rdi
- leaq K256+544(%rip),%r13
- movl 240-128(%rdi),%r14d
- movq %r9,%r15
- movq %r10,%rsi
- vmovdqu (%r8),%xmm8
- subq $9,%r14
-
- movl 0(%r15),%eax
- movl 4(%r15),%ebx
- movl 8(%r15),%ecx
- movl 12(%r15),%edx
- movl 16(%r15),%r8d
- movl 20(%r15),%r9d
- movl 24(%r15),%r10d
- movl 28(%r15),%r11d
-
- vmovdqa 0(%r13,%r14,8),%xmm14
- vmovdqa 16(%r13,%r14,8),%xmm13
- vmovdqa 32(%r13,%r14,8),%xmm12
- vmovdqu 0-128(%rdi),%xmm10
- jmp .Lloop_xop
-.align 16
-.Lloop_xop:
- vmovdqa K256+512(%rip),%xmm7
- vmovdqu 0(%rsi,%r12,1),%xmm0
- vmovdqu 16(%rsi,%r12,1),%xmm1
- vmovdqu 32(%rsi,%r12,1),%xmm2
- vmovdqu 48(%rsi,%r12,1),%xmm3
- vpshufb %xmm7,%xmm0,%xmm0
- leaq K256(%rip),%rbp
- vpshufb %xmm7,%xmm1,%xmm1
- vpshufb %xmm7,%xmm2,%xmm2
- vpaddd 0(%rbp),%xmm0,%xmm4
- vpshufb %xmm7,%xmm3,%xmm3
- vpaddd 32(%rbp),%xmm1,%xmm5
- vpaddd 64(%rbp),%xmm2,%xmm6
- vpaddd 96(%rbp),%xmm3,%xmm7
- vmovdqa %xmm4,0(%rsp)
- movl %eax,%r14d
- vmovdqa %xmm5,16(%rsp)
- movl %ebx,%esi
- vmovdqa %xmm6,32(%rsp)
- xorl %ecx,%esi
- vmovdqa %xmm7,48(%rsp)
- movl %r8d,%r13d
- jmp .Lxop_00_47
-
-.align 16
-.Lxop_00_47:
- subq $-32*4,%rbp
- vmovdqu (%r12),%xmm9
- movq %r12,64+0(%rsp)
- vpalignr $4,%xmm0,%xmm1,%xmm4
- rorl $14,%r13d
- movl %r14d,%eax
- vpalignr $4,%xmm2,%xmm3,%xmm7
- movl %r9d,%r12d
- xorl %r8d,%r13d
-.byte 143,232,120,194,236,14
- rorl $9,%r14d
- xorl %r10d,%r12d
- vpsrld $3,%xmm4,%xmm4
- rorl $5,%r13d
- xorl %eax,%r14d
- vpaddd %xmm7,%xmm0,%xmm0
- andl %r8d,%r12d
- vpxor %xmm10,%xmm9,%xmm9
- vmovdqu 16-128(%rdi),%xmm10
- xorl %r8d,%r13d
- addl 0(%rsp),%r11d
- movl %eax,%r15d
-.byte 143,232,120,194,245,11
- rorl $11,%r14d
- xorl %r10d,%r12d
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ebx,%r15d
- rorl $6,%r13d
- addl %r12d,%r11d
- andl %r15d,%esi
-.byte 143,232,120,194,251,13
- xorl %eax,%r14d
- addl %r13d,%r11d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ebx,%esi
- addl %r11d,%edx
- vpsrld $10,%xmm3,%xmm6
- rorl $2,%r14d
- addl %esi,%r11d
- vpaddd %xmm4,%xmm0,%xmm0
- movl %edx,%r13d
- addl %r11d,%r14d
-.byte 143,232,120,194,239,2
- rorl $14,%r13d
- movl %r14d,%r11d
- vpxor %xmm6,%xmm7,%xmm7
- movl %r8d,%r12d
- xorl %edx,%r13d
- rorl $9,%r14d
- xorl %r9d,%r12d
- vpxor %xmm5,%xmm7,%xmm7
- rorl $5,%r13d
- xorl %r11d,%r14d
- andl %edx,%r12d
- vpxor %xmm8,%xmm9,%xmm9
- xorl %edx,%r13d
- vpsrldq $8,%xmm7,%xmm7
- addl 4(%rsp),%r10d
- movl %r11d,%esi
- rorl $11,%r14d
- xorl %r9d,%r12d
- vpaddd %xmm7,%xmm0,%xmm0
- xorl %eax,%esi
- rorl $6,%r13d
- addl %r12d,%r10d
- andl %esi,%r15d
-.byte 143,232,120,194,248,13
- xorl %r11d,%r14d
- addl %r13d,%r10d
- vpsrld $10,%xmm0,%xmm6
- xorl %eax,%r15d
- addl %r10d,%ecx
-.byte 143,232,120,194,239,2
- rorl $2,%r14d
- addl %r15d,%r10d
- vpxor %xmm6,%xmm7,%xmm7
- movl %ecx,%r13d
- addl %r10d,%r14d
- rorl $14,%r13d
- movl %r14d,%r10d
- vpxor %xmm5,%xmm7,%xmm7
- movl %edx,%r12d
- xorl %ecx,%r13d
- rorl $9,%r14d
- xorl %r8d,%r12d
- vpslldq $8,%xmm7,%xmm7
- rorl $5,%r13d
- xorl %r10d,%r14d
- andl %ecx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 32-128(%rdi),%xmm10
- xorl %ecx,%r13d
- vpaddd %xmm7,%xmm0,%xmm0
- addl 8(%rsp),%r9d
- movl %r10d,%r15d
- rorl $11,%r14d
- xorl %r8d,%r12d
- vpaddd 0(%rbp),%xmm0,%xmm6
- xorl %r11d,%r15d
- rorl $6,%r13d
- addl %r12d,%r9d
- andl %r15d,%esi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- xorl %r11d,%esi
- addl %r9d,%ebx
- rorl $2,%r14d
- addl %esi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- rorl $14,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- xorl %ebx,%r13d
- rorl $9,%r14d
- xorl %edx,%r12d
- rorl $5,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 48-128(%rdi),%xmm10
- xorl %ebx,%r13d
- addl 12(%rsp),%r8d
- movl %r9d,%esi
- rorl $11,%r14d
- xorl %edx,%r12d
- xorl %r10d,%esi
- rorl $6,%r13d
- addl %r12d,%r8d
- andl %esi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- addl %r8d,%eax
- rorl $2,%r14d
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- vmovdqa %xmm6,0(%rsp)
- vpalignr $4,%xmm1,%xmm2,%xmm4
- rorl $14,%r13d
- movl %r14d,%r8d
- vpalignr $4,%xmm3,%xmm0,%xmm7
- movl %ebx,%r12d
- xorl %eax,%r13d
-.byte 143,232,120,194,236,14
- rorl $9,%r14d
- xorl %ecx,%r12d
- vpsrld $3,%xmm4,%xmm4
- rorl $5,%r13d
- xorl %r8d,%r14d
- vpaddd %xmm7,%xmm1,%xmm1
- andl %eax,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 64-128(%rdi),%xmm10
- xorl %eax,%r13d
- addl 16(%rsp),%edx
- movl %r8d,%r15d
-.byte 143,232,120,194,245,11
- rorl $11,%r14d
- xorl %ecx,%r12d
- vpxor %xmm5,%xmm4,%xmm4
- xorl %r9d,%r15d
- rorl $6,%r13d
- addl %r12d,%edx
- andl %r15d,%esi
-.byte 143,232,120,194,248,13
- xorl %r8d,%r14d
- addl %r13d,%edx
- vpxor %xmm6,%xmm4,%xmm4
- xorl %r9d,%esi
- addl %edx,%r11d
- vpsrld $10,%xmm0,%xmm6
- rorl $2,%r14d
- addl %esi,%edx
- vpaddd %xmm4,%xmm1,%xmm1
- movl %r11d,%r13d
- addl %edx,%r14d
-.byte 143,232,120,194,239,2
- rorl $14,%r13d
- movl %r14d,%edx
- vpxor %xmm6,%xmm7,%xmm7
- movl %eax,%r12d
- xorl %r11d,%r13d
- rorl $9,%r14d
- xorl %ebx,%r12d
- vpxor %xmm5,%xmm7,%xmm7
- rorl $5,%r13d
- xorl %edx,%r14d
- andl %r11d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 80-128(%rdi),%xmm10
- xorl %r11d,%r13d
- vpsrldq $8,%xmm7,%xmm7
- addl 20(%rsp),%ecx
- movl %edx,%esi
- rorl $11,%r14d
- xorl %ebx,%r12d
- vpaddd %xmm7,%xmm1,%xmm1
- xorl %r8d,%esi
- rorl $6,%r13d
- addl %r12d,%ecx
- andl %esi,%r15d
-.byte 143,232,120,194,249,13
- xorl %edx,%r14d
- addl %r13d,%ecx
- vpsrld $10,%xmm1,%xmm6
- xorl %r8d,%r15d
- addl %ecx,%r10d
-.byte 143,232,120,194,239,2
- rorl $2,%r14d
- addl %r15d,%ecx
- vpxor %xmm6,%xmm7,%xmm7
- movl %r10d,%r13d
- addl %ecx,%r14d
- rorl $14,%r13d
- movl %r14d,%ecx
- vpxor %xmm5,%xmm7,%xmm7
- movl %r11d,%r12d
- xorl %r10d,%r13d
- rorl $9,%r14d
- xorl %eax,%r12d
- vpslldq $8,%xmm7,%xmm7
- rorl $5,%r13d
- xorl %ecx,%r14d
- andl %r10d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 96-128(%rdi),%xmm10
- xorl %r10d,%r13d
- vpaddd %xmm7,%xmm1,%xmm1
- addl 24(%rsp),%ebx
- movl %ecx,%r15d
- rorl $11,%r14d
- xorl %eax,%r12d
- vpaddd 32(%rbp),%xmm1,%xmm6
- xorl %edx,%r15d
- rorl $6,%r13d
- addl %r12d,%ebx
- andl %r15d,%esi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- xorl %edx,%esi
- addl %ebx,%r9d
- rorl $2,%r14d
- addl %esi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- rorl $14,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- xorl %r9d,%r13d
- rorl $9,%r14d
- xorl %r11d,%r12d
- rorl $5,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 112-128(%rdi),%xmm10
- xorl %r9d,%r13d
- addl 28(%rsp),%eax
- movl %ebx,%esi
- rorl $11,%r14d
- xorl %r11d,%r12d
- xorl %ecx,%esi
- rorl $6,%r13d
- addl %r12d,%eax
- andl %esi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- addl %eax,%r8d
- rorl $2,%r14d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- vmovdqa %xmm6,16(%rsp)
- vpalignr $4,%xmm2,%xmm3,%xmm4
- rorl $14,%r13d
- movl %r14d,%eax
- vpalignr $4,%xmm0,%xmm1,%xmm7
- movl %r9d,%r12d
- xorl %r8d,%r13d
-.byte 143,232,120,194,236,14
- rorl $9,%r14d
- xorl %r10d,%r12d
- vpsrld $3,%xmm4,%xmm4
- rorl $5,%r13d
- xorl %eax,%r14d
- vpaddd %xmm7,%xmm2,%xmm2
- andl %r8d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 128-128(%rdi),%xmm10
- xorl %r8d,%r13d
- addl 32(%rsp),%r11d
- movl %eax,%r15d
-.byte 143,232,120,194,245,11
- rorl $11,%r14d
- xorl %r10d,%r12d
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ebx,%r15d
- rorl $6,%r13d
- addl %r12d,%r11d
- andl %r15d,%esi
-.byte 143,232,120,194,249,13
- xorl %eax,%r14d
- addl %r13d,%r11d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %ebx,%esi
- addl %r11d,%edx
- vpsrld $10,%xmm1,%xmm6
- rorl $2,%r14d
- addl %esi,%r11d
- vpaddd %xmm4,%xmm2,%xmm2
- movl %edx,%r13d
- addl %r11d,%r14d
-.byte 143,232,120,194,239,2
- rorl $14,%r13d
- movl %r14d,%r11d
- vpxor %xmm6,%xmm7,%xmm7
- movl %r8d,%r12d
- xorl %edx,%r13d
- rorl $9,%r14d
- xorl %r9d,%r12d
- vpxor %xmm5,%xmm7,%xmm7
- rorl $5,%r13d
- xorl %r11d,%r14d
- andl %edx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 144-128(%rdi),%xmm10
- xorl %edx,%r13d
- vpsrldq $8,%xmm7,%xmm7
- addl 36(%rsp),%r10d
- movl %r11d,%esi
- rorl $11,%r14d
- xorl %r9d,%r12d
- vpaddd %xmm7,%xmm2,%xmm2
- xorl %eax,%esi
- rorl $6,%r13d
- addl %r12d,%r10d
- andl %esi,%r15d
-.byte 143,232,120,194,250,13
- xorl %r11d,%r14d
- addl %r13d,%r10d
- vpsrld $10,%xmm2,%xmm6
- xorl %eax,%r15d
- addl %r10d,%ecx
-.byte 143,232,120,194,239,2
- rorl $2,%r14d
- addl %r15d,%r10d
- vpxor %xmm6,%xmm7,%xmm7
- movl %ecx,%r13d
- addl %r10d,%r14d
- rorl $14,%r13d
- movl %r14d,%r10d
- vpxor %xmm5,%xmm7,%xmm7
- movl %edx,%r12d
- xorl %ecx,%r13d
- rorl $9,%r14d
- xorl %r8d,%r12d
- vpslldq $8,%xmm7,%xmm7
- rorl $5,%r13d
- xorl %r10d,%r14d
- andl %ecx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 160-128(%rdi),%xmm10
- xorl %ecx,%r13d
- vpaddd %xmm7,%xmm2,%xmm2
- addl 40(%rsp),%r9d
- movl %r10d,%r15d
- rorl $11,%r14d
- xorl %r8d,%r12d
- vpaddd 64(%rbp),%xmm2,%xmm6
- xorl %r11d,%r15d
- rorl $6,%r13d
- addl %r12d,%r9d
- andl %r15d,%esi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- xorl %r11d,%esi
- addl %r9d,%ebx
- rorl $2,%r14d
- addl %esi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- rorl $14,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- xorl %ebx,%r13d
- rorl $9,%r14d
- xorl %edx,%r12d
- rorl $5,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 176-128(%rdi),%xmm10
- xorl %ebx,%r13d
- addl 44(%rsp),%r8d
- movl %r9d,%esi
- rorl $11,%r14d
- xorl %edx,%r12d
- xorl %r10d,%esi
- rorl $6,%r13d
- addl %r12d,%r8d
- andl %esi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- addl %r8d,%eax
- rorl $2,%r14d
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- vmovdqa %xmm6,32(%rsp)
- vpalignr $4,%xmm3,%xmm0,%xmm4
- rorl $14,%r13d
- movl %r14d,%r8d
- vpalignr $4,%xmm1,%xmm2,%xmm7
- movl %ebx,%r12d
- xorl %eax,%r13d
-.byte 143,232,120,194,236,14
- rorl $9,%r14d
- xorl %ecx,%r12d
- vpsrld $3,%xmm4,%xmm4
- rorl $5,%r13d
- xorl %r8d,%r14d
- vpaddd %xmm7,%xmm3,%xmm3
- andl %eax,%r12d
- vpand %xmm12,%xmm11,%xmm8
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 192-128(%rdi),%xmm10
- xorl %eax,%r13d
- addl 48(%rsp),%edx
- movl %r8d,%r15d
-.byte 143,232,120,194,245,11
- rorl $11,%r14d
- xorl %ecx,%r12d
- vpxor %xmm5,%xmm4,%xmm4
- xorl %r9d,%r15d
- rorl $6,%r13d
- addl %r12d,%edx
- andl %r15d,%esi
-.byte 143,232,120,194,250,13
- xorl %r8d,%r14d
- addl %r13d,%edx
- vpxor %xmm6,%xmm4,%xmm4
- xorl %r9d,%esi
- addl %edx,%r11d
- vpsrld $10,%xmm2,%xmm6
- rorl $2,%r14d
- addl %esi,%edx
- vpaddd %xmm4,%xmm3,%xmm3
- movl %r11d,%r13d
- addl %edx,%r14d
-.byte 143,232,120,194,239,2
- rorl $14,%r13d
- movl %r14d,%edx
- vpxor %xmm6,%xmm7,%xmm7
- movl %eax,%r12d
- xorl %r11d,%r13d
- rorl $9,%r14d
- xorl %ebx,%r12d
- vpxor %xmm5,%xmm7,%xmm7
- rorl $5,%r13d
- xorl %edx,%r14d
- andl %r11d,%r12d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 208-128(%rdi),%xmm10
- xorl %r11d,%r13d
- vpsrldq $8,%xmm7,%xmm7
- addl 52(%rsp),%ecx
- movl %edx,%esi
- rorl $11,%r14d
- xorl %ebx,%r12d
- vpaddd %xmm7,%xmm3,%xmm3
- xorl %r8d,%esi
- rorl $6,%r13d
- addl %r12d,%ecx
- andl %esi,%r15d
-.byte 143,232,120,194,251,13
- xorl %edx,%r14d
- addl %r13d,%ecx
- vpsrld $10,%xmm3,%xmm6
- xorl %r8d,%r15d
- addl %ecx,%r10d
-.byte 143,232,120,194,239,2
- rorl $2,%r14d
- addl %r15d,%ecx
- vpxor %xmm6,%xmm7,%xmm7
- movl %r10d,%r13d
- addl %ecx,%r14d
- rorl $14,%r13d
- movl %r14d,%ecx
- vpxor %xmm5,%xmm7,%xmm7
- movl %r11d,%r12d
- xorl %r10d,%r13d
- rorl $9,%r14d
- xorl %eax,%r12d
- vpslldq $8,%xmm7,%xmm7
- rorl $5,%r13d
- xorl %ecx,%r14d
- andl %r10d,%r12d
- vpand %xmm13,%xmm11,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 224-128(%rdi),%xmm10
- xorl %r10d,%r13d
- vpaddd %xmm7,%xmm3,%xmm3
- addl 56(%rsp),%ebx
- movl %ecx,%r15d
- rorl $11,%r14d
- xorl %eax,%r12d
- vpaddd 96(%rbp),%xmm3,%xmm6
- xorl %edx,%r15d
- rorl $6,%r13d
- addl %r12d,%ebx
- andl %r15d,%esi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- xorl %edx,%esi
- addl %ebx,%r9d
- rorl $2,%r14d
- addl %esi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- rorl $14,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- xorl %r9d,%r13d
- rorl $9,%r14d
- xorl %r11d,%r12d
- rorl $5,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vpor %xmm11,%xmm8,%xmm8
- vaesenclast %xmm10,%xmm9,%xmm11
- vmovdqu 0-128(%rdi),%xmm10
- xorl %r9d,%r13d
- addl 60(%rsp),%eax
- movl %ebx,%esi
- rorl $11,%r14d
- xorl %r11d,%r12d
- xorl %ecx,%esi
- rorl $6,%r13d
- addl %r12d,%eax
- andl %esi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- addl %eax,%r8d
- rorl $2,%r14d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- vmovdqa %xmm6,48(%rsp)
- movq 64+0(%rsp),%r12
- vpand %xmm14,%xmm11,%xmm11
- movq 64+8(%rsp),%r15
- vpor %xmm11,%xmm8,%xmm8
- vmovdqu %xmm8,(%r15,%r12,1)
- leaq 16(%r12),%r12
- cmpb $0,131(%rbp)
- jne .Lxop_00_47
- vmovdqu (%r12),%xmm9
- movq %r12,64+0(%rsp)
- rorl $14,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- xorl %r8d,%r13d
- rorl $9,%r14d
- xorl %r10d,%r12d
- rorl $5,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- vpxor %xmm10,%xmm9,%xmm9
- vmovdqu 16-128(%rdi),%xmm10
- xorl %r8d,%r13d
- addl 0(%rsp),%r11d
- movl %eax,%r15d
- rorl $11,%r14d
- xorl %r10d,%r12d
- xorl %ebx,%r15d
- rorl $6,%r13d
- addl %r12d,%r11d
- andl %r15d,%esi
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%esi
- addl %r11d,%edx
- rorl $2,%r14d
- addl %esi,%r11d
- movl %edx,%r13d
- addl %r11d,%r14d
- rorl $14,%r13d
- movl %r14d,%r11d
- movl %r8d,%r12d
- xorl %edx,%r13d
- rorl $9,%r14d
- xorl %r9d,%r12d
- rorl $5,%r13d
- xorl %r11d,%r14d
- andl %edx,%r12d
- vpxor %xmm8,%xmm9,%xmm9
- xorl %edx,%r13d
- addl 4(%rsp),%r10d
- movl %r11d,%esi
- rorl $11,%r14d
- xorl %r9d,%r12d
- xorl %eax,%esi
- rorl $6,%r13d
- addl %r12d,%r10d
- andl %esi,%r15d
- xorl %r11d,%r14d
- addl %r13d,%r10d
- xorl %eax,%r15d
- addl %r10d,%ecx
- rorl $2,%r14d
- addl %r15d,%r10d
- movl %ecx,%r13d
- addl %r10d,%r14d
- rorl $14,%r13d
- movl %r14d,%r10d
- movl %edx,%r12d
- xorl %ecx,%r13d
- rorl $9,%r14d
- xorl %r8d,%r12d
- rorl $5,%r13d
- xorl %r10d,%r14d
- andl %ecx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 32-128(%rdi),%xmm10
- xorl %ecx,%r13d
- addl 8(%rsp),%r9d
- movl %r10d,%r15d
- rorl $11,%r14d
- xorl %r8d,%r12d
- xorl %r11d,%r15d
- rorl $6,%r13d
- addl %r12d,%r9d
- andl %r15d,%esi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- xorl %r11d,%esi
- addl %r9d,%ebx
- rorl $2,%r14d
- addl %esi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- rorl $14,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- xorl %ebx,%r13d
- rorl $9,%r14d
- xorl %edx,%r12d
- rorl $5,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 48-128(%rdi),%xmm10
- xorl %ebx,%r13d
- addl 12(%rsp),%r8d
- movl %r9d,%esi
- rorl $11,%r14d
- xorl %edx,%r12d
- xorl %r10d,%esi
- rorl $6,%r13d
- addl %r12d,%r8d
- andl %esi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- addl %r8d,%eax
- rorl $2,%r14d
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- rorl $14,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- xorl %eax,%r13d
- rorl $9,%r14d
- xorl %ecx,%r12d
- rorl $5,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 64-128(%rdi),%xmm10
- xorl %eax,%r13d
- addl 16(%rsp),%edx
- movl %r8d,%r15d
- rorl $11,%r14d
- xorl %ecx,%r12d
- xorl %r9d,%r15d
- rorl $6,%r13d
- addl %r12d,%edx
- andl %r15d,%esi
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%esi
- addl %edx,%r11d
- rorl $2,%r14d
- addl %esi,%edx
- movl %r11d,%r13d
- addl %edx,%r14d
- rorl $14,%r13d
- movl %r14d,%edx
- movl %eax,%r12d
- xorl %r11d,%r13d
- rorl $9,%r14d
- xorl %ebx,%r12d
- rorl $5,%r13d
- xorl %edx,%r14d
- andl %r11d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 80-128(%rdi),%xmm10
- xorl %r11d,%r13d
- addl 20(%rsp),%ecx
- movl %edx,%esi
- rorl $11,%r14d
- xorl %ebx,%r12d
- xorl %r8d,%esi
- rorl $6,%r13d
- addl %r12d,%ecx
- andl %esi,%r15d
- xorl %edx,%r14d
- addl %r13d,%ecx
- xorl %r8d,%r15d
- addl %ecx,%r10d
- rorl $2,%r14d
- addl %r15d,%ecx
- movl %r10d,%r13d
- addl %ecx,%r14d
- rorl $14,%r13d
- movl %r14d,%ecx
- movl %r11d,%r12d
- xorl %r10d,%r13d
- rorl $9,%r14d
- xorl %eax,%r12d
- rorl $5,%r13d
- xorl %ecx,%r14d
- andl %r10d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 96-128(%rdi),%xmm10
- xorl %r10d,%r13d
- addl 24(%rsp),%ebx
- movl %ecx,%r15d
- rorl $11,%r14d
- xorl %eax,%r12d
- xorl %edx,%r15d
- rorl $6,%r13d
- addl %r12d,%ebx
- andl %r15d,%esi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- xorl %edx,%esi
- addl %ebx,%r9d
- rorl $2,%r14d
- addl %esi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- rorl $14,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- xorl %r9d,%r13d
- rorl $9,%r14d
- xorl %r11d,%r12d
- rorl $5,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 112-128(%rdi),%xmm10
- xorl %r9d,%r13d
- addl 28(%rsp),%eax
- movl %ebx,%esi
- rorl $11,%r14d
- xorl %r11d,%r12d
- xorl %ecx,%esi
- rorl $6,%r13d
- addl %r12d,%eax
- andl %esi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- addl %eax,%r8d
- rorl $2,%r14d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- rorl $14,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- xorl %r8d,%r13d
- rorl $9,%r14d
- xorl %r10d,%r12d
- rorl $5,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 128-128(%rdi),%xmm10
- xorl %r8d,%r13d
- addl 32(%rsp),%r11d
- movl %eax,%r15d
- rorl $11,%r14d
- xorl %r10d,%r12d
- xorl %ebx,%r15d
- rorl $6,%r13d
- addl %r12d,%r11d
- andl %r15d,%esi
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%esi
- addl %r11d,%edx
- rorl $2,%r14d
- addl %esi,%r11d
- movl %edx,%r13d
- addl %r11d,%r14d
- rorl $14,%r13d
- movl %r14d,%r11d
- movl %r8d,%r12d
- xorl %edx,%r13d
- rorl $9,%r14d
- xorl %r9d,%r12d
- rorl $5,%r13d
- xorl %r11d,%r14d
- andl %edx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 144-128(%rdi),%xmm10
- xorl %edx,%r13d
- addl 36(%rsp),%r10d
- movl %r11d,%esi
- rorl $11,%r14d
- xorl %r9d,%r12d
- xorl %eax,%esi
- rorl $6,%r13d
- addl %r12d,%r10d
- andl %esi,%r15d
- xorl %r11d,%r14d
- addl %r13d,%r10d
- xorl %eax,%r15d
- addl %r10d,%ecx
- rorl $2,%r14d
- addl %r15d,%r10d
- movl %ecx,%r13d
- addl %r10d,%r14d
- rorl $14,%r13d
- movl %r14d,%r10d
- movl %edx,%r12d
- xorl %ecx,%r13d
- rorl $9,%r14d
- xorl %r8d,%r12d
- rorl $5,%r13d
- xorl %r10d,%r14d
- andl %ecx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 160-128(%rdi),%xmm10
- xorl %ecx,%r13d
- addl 40(%rsp),%r9d
- movl %r10d,%r15d
- rorl $11,%r14d
- xorl %r8d,%r12d
- xorl %r11d,%r15d
- rorl $6,%r13d
- addl %r12d,%r9d
- andl %r15d,%esi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- xorl %r11d,%esi
- addl %r9d,%ebx
- rorl $2,%r14d
- addl %esi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- rorl $14,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- xorl %ebx,%r13d
- rorl $9,%r14d
- xorl %edx,%r12d
- rorl $5,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 176-128(%rdi),%xmm10
- xorl %ebx,%r13d
- addl 44(%rsp),%r8d
- movl %r9d,%esi
- rorl $11,%r14d
- xorl %edx,%r12d
- xorl %r10d,%esi
- rorl $6,%r13d
- addl %r12d,%r8d
- andl %esi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- addl %r8d,%eax
- rorl $2,%r14d
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- rorl $14,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- xorl %eax,%r13d
- rorl $9,%r14d
- xorl %ecx,%r12d
- rorl $5,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- vpand %xmm12,%xmm11,%xmm8
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 192-128(%rdi),%xmm10
- xorl %eax,%r13d
- addl 48(%rsp),%edx
- movl %r8d,%r15d
- rorl $11,%r14d
- xorl %ecx,%r12d
- xorl %r9d,%r15d
- rorl $6,%r13d
- addl %r12d,%edx
- andl %r15d,%esi
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%esi
- addl %edx,%r11d
- rorl $2,%r14d
- addl %esi,%edx
- movl %r11d,%r13d
- addl %edx,%r14d
- rorl $14,%r13d
- movl %r14d,%edx
- movl %eax,%r12d
- xorl %r11d,%r13d
- rorl $9,%r14d
- xorl %ebx,%r12d
- rorl $5,%r13d
- xorl %edx,%r14d
- andl %r11d,%r12d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 208-128(%rdi),%xmm10
- xorl %r11d,%r13d
- addl 52(%rsp),%ecx
- movl %edx,%esi
- rorl $11,%r14d
- xorl %ebx,%r12d
- xorl %r8d,%esi
- rorl $6,%r13d
- addl %r12d,%ecx
- andl %esi,%r15d
- xorl %edx,%r14d
- addl %r13d,%ecx
- xorl %r8d,%r15d
- addl %ecx,%r10d
- rorl $2,%r14d
- addl %r15d,%ecx
- movl %r10d,%r13d
- addl %ecx,%r14d
- rorl $14,%r13d
- movl %r14d,%ecx
- movl %r11d,%r12d
- xorl %r10d,%r13d
- rorl $9,%r14d
- xorl %eax,%r12d
- rorl $5,%r13d
- xorl %ecx,%r14d
- andl %r10d,%r12d
- vpand %xmm13,%xmm11,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 224-128(%rdi),%xmm10
- xorl %r10d,%r13d
- addl 56(%rsp),%ebx
- movl %ecx,%r15d
- rorl $11,%r14d
- xorl %eax,%r12d
- xorl %edx,%r15d
- rorl $6,%r13d
- addl %r12d,%ebx
- andl %r15d,%esi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- xorl %edx,%esi
- addl %ebx,%r9d
- rorl $2,%r14d
- addl %esi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- rorl $14,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- xorl %r9d,%r13d
- rorl $9,%r14d
- xorl %r11d,%r12d
- rorl $5,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vpor %xmm11,%xmm8,%xmm8
- vaesenclast %xmm10,%xmm9,%xmm11
- vmovdqu 0-128(%rdi),%xmm10
- xorl %r9d,%r13d
- addl 60(%rsp),%eax
- movl %ebx,%esi
- rorl $11,%r14d
- xorl %r11d,%r12d
- xorl %ecx,%esi
- rorl $6,%r13d
- addl %r12d,%eax
- andl %esi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- addl %eax,%r8d
- rorl $2,%r14d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- movq 64+0(%rsp),%r12
- movq 64+8(%rsp),%r13
- movq 64+40(%rsp),%r15
- movq 64+48(%rsp),%rsi
-
- vpand %xmm14,%xmm11,%xmm11
- movl %r14d,%eax
- vpor %xmm11,%xmm8,%xmm8
- vmovdqu %xmm8,(%r12,%r13,1)
- leaq 16(%r12),%r12
-
- addl 0(%r15),%eax
- addl 4(%r15),%ebx
- addl 8(%r15),%ecx
- addl 12(%r15),%edx
- addl 16(%r15),%r8d
- addl 20(%r15),%r9d
- addl 24(%r15),%r10d
- addl 28(%r15),%r11d
-
- cmpq 64+16(%rsp),%r12
-
- movl %eax,0(%r15)
- movl %ebx,4(%r15)
- movl %ecx,8(%r15)
- movl %edx,12(%r15)
- movl %r8d,16(%r15)
- movl %r9d,20(%r15)
- movl %r10d,24(%r15)
- movl %r11d,28(%r15)
-
- jb .Lloop_xop
-
- movq 64+32(%rsp),%r8
- movq 120(%rsp),%rsi
-.cfi_def_cfa %rsi,8
- vmovdqu %xmm8,(%r8)
- vzeroall
- movq -48(%rsi),%r15
-.cfi_restore %r15
- movq -40(%rsi),%r14
-.cfi_restore %r14
- movq -32(%rsi),%r13
-.cfi_restore %r13
- movq -24(%rsi),%r12
-.cfi_restore %r12
- movq -16(%rsi),%rbp
-.cfi_restore %rbp
- movq -8(%rsi),%rbx
-.cfi_restore %rbx
- leaq (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Lepilogue_xop:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop
-.type aesni_cbc_sha256_enc_avx,@function
-.align 64
-aesni_cbc_sha256_enc_avx:
-.cfi_startproc
-.Lavx_shortcut:
- movq 8(%rsp),%r10
- movq %rsp,%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
- subq $128,%rsp
- andq $-64,%rsp
-
- shlq $6,%rdx
- subq %rdi,%rsi
- subq %rdi,%r10
- addq %rdi,%rdx
-
-
- movq %rsi,64+8(%rsp)
- movq %rdx,64+16(%rsp)
-
- movq %r8,64+32(%rsp)
- movq %r9,64+40(%rsp)
- movq %r10,64+48(%rsp)
- movq %rax,120(%rsp)
-.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
-.Lprologue_avx:
- vzeroall
-
- movq %rdi,%r12
- leaq 128(%rcx),%rdi
- leaq K256+544(%rip),%r13
- movl 240-128(%rdi),%r14d
- movq %r9,%r15
- movq %r10,%rsi
- vmovdqu (%r8),%xmm8
- subq $9,%r14
-
- movl 0(%r15),%eax
- movl 4(%r15),%ebx
- movl 8(%r15),%ecx
- movl 12(%r15),%edx
- movl 16(%r15),%r8d
- movl 20(%r15),%r9d
- movl 24(%r15),%r10d
- movl 28(%r15),%r11d
-
- vmovdqa 0(%r13,%r14,8),%xmm14
- vmovdqa 16(%r13,%r14,8),%xmm13
- vmovdqa 32(%r13,%r14,8),%xmm12
- vmovdqu 0-128(%rdi),%xmm10
- jmp .Lloop_avx
-.align 16
-.Lloop_avx:
- vmovdqa K256+512(%rip),%xmm7
- vmovdqu 0(%rsi,%r12,1),%xmm0
- vmovdqu 16(%rsi,%r12,1),%xmm1
- vmovdqu 32(%rsi,%r12,1),%xmm2
- vmovdqu 48(%rsi,%r12,1),%xmm3
- vpshufb %xmm7,%xmm0,%xmm0
- leaq K256(%rip),%rbp
- vpshufb %xmm7,%xmm1,%xmm1
- vpshufb %xmm7,%xmm2,%xmm2
- vpaddd 0(%rbp),%xmm0,%xmm4
- vpshufb %xmm7,%xmm3,%xmm3
- vpaddd 32(%rbp),%xmm1,%xmm5
- vpaddd 64(%rbp),%xmm2,%xmm6
- vpaddd 96(%rbp),%xmm3,%xmm7
- vmovdqa %xmm4,0(%rsp)
- movl %eax,%r14d
- vmovdqa %xmm5,16(%rsp)
- movl %ebx,%esi
- vmovdqa %xmm6,32(%rsp)
- xorl %ecx,%esi
- vmovdqa %xmm7,48(%rsp)
- movl %r8d,%r13d
- jmp .Lavx_00_47
-
-.align 16
-.Lavx_00_47:
- subq $-32*4,%rbp
- vmovdqu (%r12),%xmm9
- movq %r12,64+0(%rsp)
- vpalignr $4,%xmm0,%xmm1,%xmm4
- shrdl $14,%r13d,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- vpalignr $4,%xmm2,%xmm3,%xmm7
- xorl %r8d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r10d,%r12d
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- vpaddd %xmm7,%xmm0,%xmm0
- vpxor %xmm10,%xmm9,%xmm9
- vmovdqu 16-128(%rdi),%xmm10
- xorl %r8d,%r13d
- addl 0(%rsp),%r11d
- movl %eax,%r15d
- vpsrld $3,%xmm4,%xmm7
- shrdl $11,%r14d,%r14d
- xorl %r10d,%r12d
- xorl %ebx,%r15d
- vpslld $14,%xmm4,%xmm5
- shrdl $6,%r13d,%r13d
- addl %r12d,%r11d
- andl %r15d,%esi
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%esi
- vpshufd $250,%xmm3,%xmm7
- addl %r11d,%edx
- shrdl $2,%r14d,%r14d
- addl %esi,%r11d
- vpsrld $11,%xmm6,%xmm6
- movl %edx,%r13d
- addl %r11d,%r14d
- shrdl $14,%r13d,%r13d
- vpxor %xmm5,%xmm4,%xmm4
- movl %r14d,%r11d
- movl %r8d,%r12d
- xorl %edx,%r13d
- vpslld $11,%xmm5,%xmm5
- shrdl $9,%r14d,%r14d
- xorl %r9d,%r12d
- shrdl $5,%r13d,%r13d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %r11d,%r14d
- andl %edx,%r12d
- vpxor %xmm8,%xmm9,%xmm9
- xorl %edx,%r13d
- vpsrld $10,%xmm7,%xmm6
- addl 4(%rsp),%r10d
- movl %r11d,%esi
- shrdl $11,%r14d,%r14d
- vpxor %xmm5,%xmm4,%xmm4
- xorl %r9d,%r12d
- xorl %eax,%esi
- shrdl $6,%r13d,%r13d
- vpsrlq $17,%xmm7,%xmm7
- addl %r12d,%r10d
- andl %esi,%r15d
- xorl %r11d,%r14d
- vpaddd %xmm4,%xmm0,%xmm0
- addl %r13d,%r10d
- xorl %eax,%r15d
- addl %r10d,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $2,%r14d,%r14d
- addl %r15d,%r10d
- movl %ecx,%r13d
- vpsrlq $2,%xmm7,%xmm7
- addl %r10d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r10d
- vpxor %xmm7,%xmm6,%xmm6
- movl %edx,%r12d
- xorl %ecx,%r13d
- shrdl $9,%r14d,%r14d
- vpshufd $132,%xmm6,%xmm6
- xorl %r8d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r10d,%r14d
- vpsrldq $8,%xmm6,%xmm6
- andl %ecx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 32-128(%rdi),%xmm10
- xorl %ecx,%r13d
- addl 8(%rsp),%r9d
- vpaddd %xmm6,%xmm0,%xmm0
- movl %r10d,%r15d
- shrdl $11,%r14d,%r14d
- xorl %r8d,%r12d
- vpshufd $80,%xmm0,%xmm7
- xorl %r11d,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%r9d
- vpsrld $10,%xmm7,%xmm6
- andl %r15d,%esi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- vpsrlq $17,%xmm7,%xmm7
- xorl %r11d,%esi
- addl %r9d,%ebx
- shrdl $2,%r14d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- addl %esi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- vpsrlq $2,%xmm7,%xmm7
- shrdl $14,%r13d,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- vpxor %xmm7,%xmm6,%xmm6
- xorl %ebx,%r13d
- shrdl $9,%r14d,%r14d
- xorl %edx,%r12d
- vpshufd $232,%xmm6,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vpslldq $8,%xmm6,%xmm6
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 48-128(%rdi),%xmm10
- xorl %ebx,%r13d
- addl 12(%rsp),%r8d
- movl %r9d,%esi
- vpaddd %xmm6,%xmm0,%xmm0
- shrdl $11,%r14d,%r14d
- xorl %edx,%r12d
- xorl %r10d,%esi
- vpaddd 0(%rbp),%xmm0,%xmm6
- shrdl $6,%r13d,%r13d
- addl %r12d,%r8d
- andl %esi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- addl %r8d,%eax
- shrdl $2,%r14d,%r14d
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- vmovdqa %xmm6,0(%rsp)
- vpalignr $4,%xmm1,%xmm2,%xmm4
- shrdl $14,%r13d,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- vpalignr $4,%xmm3,%xmm0,%xmm7
- xorl %eax,%r13d
- shrdl $9,%r14d,%r14d
- xorl %ecx,%r12d
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- vpaddd %xmm7,%xmm1,%xmm1
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 64-128(%rdi),%xmm10
- xorl %eax,%r13d
- addl 16(%rsp),%edx
- movl %r8d,%r15d
- vpsrld $3,%xmm4,%xmm7
- shrdl $11,%r14d,%r14d
- xorl %ecx,%r12d
- xorl %r9d,%r15d
- vpslld $14,%xmm4,%xmm5
- shrdl $6,%r13d,%r13d
- addl %r12d,%edx
- andl %r15d,%esi
- vpxor %xmm6,%xmm7,%xmm4
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%esi
- vpshufd $250,%xmm0,%xmm7
- addl %edx,%r11d
- shrdl $2,%r14d,%r14d
- addl %esi,%edx
- vpsrld $11,%xmm6,%xmm6
- movl %r11d,%r13d
- addl %edx,%r14d
- shrdl $14,%r13d,%r13d
- vpxor %xmm5,%xmm4,%xmm4
- movl %r14d,%edx
- movl %eax,%r12d
- xorl %r11d,%r13d
- vpslld $11,%xmm5,%xmm5
- shrdl $9,%r14d,%r14d
- xorl %ebx,%r12d
- shrdl $5,%r13d,%r13d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %edx,%r14d
- andl %r11d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 80-128(%rdi),%xmm10
- xorl %r11d,%r13d
- vpsrld $10,%xmm7,%xmm6
- addl 20(%rsp),%ecx
- movl %edx,%esi
- shrdl $11,%r14d,%r14d
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ebx,%r12d
- xorl %r8d,%esi
- shrdl $6,%r13d,%r13d
- vpsrlq $17,%xmm7,%xmm7
- addl %r12d,%ecx
- andl %esi,%r15d
- xorl %edx,%r14d
- vpaddd %xmm4,%xmm1,%xmm1
- addl %r13d,%ecx
- xorl %r8d,%r15d
- addl %ecx,%r10d
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $2,%r14d,%r14d
- addl %r15d,%ecx
- movl %r10d,%r13d
- vpsrlq $2,%xmm7,%xmm7
- addl %ecx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- movl %r11d,%r12d
- xorl %r10d,%r13d
- shrdl $9,%r14d,%r14d
- vpshufd $132,%xmm6,%xmm6
- xorl %eax,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ecx,%r14d
- vpsrldq $8,%xmm6,%xmm6
- andl %r10d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 96-128(%rdi),%xmm10
- xorl %r10d,%r13d
- addl 24(%rsp),%ebx
- vpaddd %xmm6,%xmm1,%xmm1
- movl %ecx,%r15d
- shrdl $11,%r14d,%r14d
- xorl %eax,%r12d
- vpshufd $80,%xmm1,%xmm7
- xorl %edx,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%ebx
- vpsrld $10,%xmm7,%xmm6
- andl %r15d,%esi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- vpsrlq $17,%xmm7,%xmm7
- xorl %edx,%esi
- addl %ebx,%r9d
- shrdl $2,%r14d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- addl %esi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- vpsrlq $2,%xmm7,%xmm7
- shrdl $14,%r13d,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- vpxor %xmm7,%xmm6,%xmm6
- xorl %r9d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r11d,%r12d
- vpshufd $232,%xmm6,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vpslldq $8,%xmm6,%xmm6
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 112-128(%rdi),%xmm10
- xorl %r9d,%r13d
- addl 28(%rsp),%eax
- movl %ebx,%esi
- vpaddd %xmm6,%xmm1,%xmm1
- shrdl $11,%r14d,%r14d
- xorl %r11d,%r12d
- xorl %ecx,%esi
- vpaddd 32(%rbp),%xmm1,%xmm6
- shrdl $6,%r13d,%r13d
- addl %r12d,%eax
- andl %esi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- addl %eax,%r8d
- shrdl $2,%r14d,%r14d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- vmovdqa %xmm6,16(%rsp)
- vpalignr $4,%xmm2,%xmm3,%xmm4
- shrdl $14,%r13d,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- vpalignr $4,%xmm0,%xmm1,%xmm7
- xorl %r8d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r10d,%r12d
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- vpaddd %xmm7,%xmm2,%xmm2
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 128-128(%rdi),%xmm10
- xorl %r8d,%r13d
- addl 32(%rsp),%r11d
- movl %eax,%r15d
- vpsrld $3,%xmm4,%xmm7
- shrdl $11,%r14d,%r14d
- xorl %r10d,%r12d
- xorl %ebx,%r15d
- vpslld $14,%xmm4,%xmm5
- shrdl $6,%r13d,%r13d
- addl %r12d,%r11d
- andl %r15d,%esi
- vpxor %xmm6,%xmm7,%xmm4
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%esi
- vpshufd $250,%xmm1,%xmm7
- addl %r11d,%edx
- shrdl $2,%r14d,%r14d
- addl %esi,%r11d
- vpsrld $11,%xmm6,%xmm6
- movl %edx,%r13d
- addl %r11d,%r14d
- shrdl $14,%r13d,%r13d
- vpxor %xmm5,%xmm4,%xmm4
- movl %r14d,%r11d
- movl %r8d,%r12d
- xorl %edx,%r13d
- vpslld $11,%xmm5,%xmm5
- shrdl $9,%r14d,%r14d
- xorl %r9d,%r12d
- shrdl $5,%r13d,%r13d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %r11d,%r14d
- andl %edx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 144-128(%rdi),%xmm10
- xorl %edx,%r13d
- vpsrld $10,%xmm7,%xmm6
- addl 36(%rsp),%r10d
- movl %r11d,%esi
- shrdl $11,%r14d,%r14d
- vpxor %xmm5,%xmm4,%xmm4
- xorl %r9d,%r12d
- xorl %eax,%esi
- shrdl $6,%r13d,%r13d
- vpsrlq $17,%xmm7,%xmm7
- addl %r12d,%r10d
- andl %esi,%r15d
- xorl %r11d,%r14d
- vpaddd %xmm4,%xmm2,%xmm2
- addl %r13d,%r10d
- xorl %eax,%r15d
- addl %r10d,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $2,%r14d,%r14d
- addl %r15d,%r10d
- movl %ecx,%r13d
- vpsrlq $2,%xmm7,%xmm7
- addl %r10d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r10d
- vpxor %xmm7,%xmm6,%xmm6
- movl %edx,%r12d
- xorl %ecx,%r13d
- shrdl $9,%r14d,%r14d
- vpshufd $132,%xmm6,%xmm6
- xorl %r8d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r10d,%r14d
- vpsrldq $8,%xmm6,%xmm6
- andl %ecx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 160-128(%rdi),%xmm10
- xorl %ecx,%r13d
- addl 40(%rsp),%r9d
- vpaddd %xmm6,%xmm2,%xmm2
- movl %r10d,%r15d
- shrdl $11,%r14d,%r14d
- xorl %r8d,%r12d
- vpshufd $80,%xmm2,%xmm7
- xorl %r11d,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%r9d
- vpsrld $10,%xmm7,%xmm6
- andl %r15d,%esi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- vpsrlq $17,%xmm7,%xmm7
- xorl %r11d,%esi
- addl %r9d,%ebx
- shrdl $2,%r14d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- addl %esi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- vpsrlq $2,%xmm7,%xmm7
- shrdl $14,%r13d,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- vpxor %xmm7,%xmm6,%xmm6
- xorl %ebx,%r13d
- shrdl $9,%r14d,%r14d
- xorl %edx,%r12d
- vpshufd $232,%xmm6,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vpslldq $8,%xmm6,%xmm6
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 176-128(%rdi),%xmm10
- xorl %ebx,%r13d
- addl 44(%rsp),%r8d
- movl %r9d,%esi
- vpaddd %xmm6,%xmm2,%xmm2
- shrdl $11,%r14d,%r14d
- xorl %edx,%r12d
- xorl %r10d,%esi
- vpaddd 64(%rbp),%xmm2,%xmm6
- shrdl $6,%r13d,%r13d
- addl %r12d,%r8d
- andl %esi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- addl %r8d,%eax
- shrdl $2,%r14d,%r14d
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- vmovdqa %xmm6,32(%rsp)
- vpalignr $4,%xmm3,%xmm0,%xmm4
- shrdl $14,%r13d,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- vpalignr $4,%xmm1,%xmm2,%xmm7
- xorl %eax,%r13d
- shrdl $9,%r14d,%r14d
- xorl %ecx,%r12d
- vpsrld $7,%xmm4,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- vpaddd %xmm7,%xmm3,%xmm3
- vpand %xmm12,%xmm11,%xmm8
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 192-128(%rdi),%xmm10
- xorl %eax,%r13d
- addl 48(%rsp),%edx
- movl %r8d,%r15d
- vpsrld $3,%xmm4,%xmm7
- shrdl $11,%r14d,%r14d
- xorl %ecx,%r12d
- xorl %r9d,%r15d
- vpslld $14,%xmm4,%xmm5
- shrdl $6,%r13d,%r13d
- addl %r12d,%edx
- andl %r15d,%esi
- vpxor %xmm6,%xmm7,%xmm4
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%esi
- vpshufd $250,%xmm2,%xmm7
- addl %edx,%r11d
- shrdl $2,%r14d,%r14d
- addl %esi,%edx
- vpsrld $11,%xmm6,%xmm6
- movl %r11d,%r13d
- addl %edx,%r14d
- shrdl $14,%r13d,%r13d
- vpxor %xmm5,%xmm4,%xmm4
- movl %r14d,%edx
- movl %eax,%r12d
- xorl %r11d,%r13d
- vpslld $11,%xmm5,%xmm5
- shrdl $9,%r14d,%r14d
- xorl %ebx,%r12d
- shrdl $5,%r13d,%r13d
- vpxor %xmm6,%xmm4,%xmm4
- xorl %edx,%r14d
- andl %r11d,%r12d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 208-128(%rdi),%xmm10
- xorl %r11d,%r13d
- vpsrld $10,%xmm7,%xmm6
- addl 52(%rsp),%ecx
- movl %edx,%esi
- shrdl $11,%r14d,%r14d
- vpxor %xmm5,%xmm4,%xmm4
- xorl %ebx,%r12d
- xorl %r8d,%esi
- shrdl $6,%r13d,%r13d
- vpsrlq $17,%xmm7,%xmm7
- addl %r12d,%ecx
- andl %esi,%r15d
- xorl %edx,%r14d
- vpaddd %xmm4,%xmm3,%xmm3
- addl %r13d,%ecx
- xorl %r8d,%r15d
- addl %ecx,%r10d
- vpxor %xmm7,%xmm6,%xmm6
- shrdl $2,%r14d,%r14d
- addl %r15d,%ecx
- movl %r10d,%r13d
- vpsrlq $2,%xmm7,%xmm7
- addl %ecx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ecx
- vpxor %xmm7,%xmm6,%xmm6
- movl %r11d,%r12d
- xorl %r10d,%r13d
- shrdl $9,%r14d,%r14d
- vpshufd $132,%xmm6,%xmm6
- xorl %eax,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ecx,%r14d
- vpsrldq $8,%xmm6,%xmm6
- andl %r10d,%r12d
- vpand %xmm13,%xmm11,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 224-128(%rdi),%xmm10
- xorl %r10d,%r13d
- addl 56(%rsp),%ebx
- vpaddd %xmm6,%xmm3,%xmm3
- movl %ecx,%r15d
- shrdl $11,%r14d,%r14d
- xorl %eax,%r12d
- vpshufd $80,%xmm3,%xmm7
- xorl %edx,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%ebx
- vpsrld $10,%xmm7,%xmm6
- andl %r15d,%esi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- vpsrlq $17,%xmm7,%xmm7
- xorl %edx,%esi
- addl %ebx,%r9d
- shrdl $2,%r14d,%r14d
- vpxor %xmm7,%xmm6,%xmm6
- addl %esi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- vpsrlq $2,%xmm7,%xmm7
- shrdl $14,%r13d,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- vpxor %xmm7,%xmm6,%xmm6
- xorl %r9d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r11d,%r12d
- vpshufd $232,%xmm6,%xmm6
- shrdl $5,%r13d,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vpslldq $8,%xmm6,%xmm6
- vpor %xmm11,%xmm8,%xmm8
- vaesenclast %xmm10,%xmm9,%xmm11
- vmovdqu 0-128(%rdi),%xmm10
- xorl %r9d,%r13d
- addl 60(%rsp),%eax
- movl %ebx,%esi
- vpaddd %xmm6,%xmm3,%xmm3
- shrdl $11,%r14d,%r14d
- xorl %r11d,%r12d
- xorl %ecx,%esi
- vpaddd 96(%rbp),%xmm3,%xmm6
- shrdl $6,%r13d,%r13d
- addl %r12d,%eax
- andl %esi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- addl %eax,%r8d
- shrdl $2,%r14d,%r14d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- vmovdqa %xmm6,48(%rsp)
- movq 64+0(%rsp),%r12
- vpand %xmm14,%xmm11,%xmm11
- movq 64+8(%rsp),%r15
- vpor %xmm11,%xmm8,%xmm8
- vmovdqu %xmm8,(%r15,%r12,1)
- leaq 16(%r12),%r12
- cmpb $0,131(%rbp)
- jne .Lavx_00_47
- vmovdqu (%r12),%xmm9
- movq %r12,64+0(%rsp)
- shrdl $14,%r13d,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- xorl %r8d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r10d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- vpxor %xmm10,%xmm9,%xmm9
- vmovdqu 16-128(%rdi),%xmm10
- xorl %r8d,%r13d
- addl 0(%rsp),%r11d
- movl %eax,%r15d
- shrdl $11,%r14d,%r14d
- xorl %r10d,%r12d
- xorl %ebx,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%r11d
- andl %r15d,%esi
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%esi
- addl %r11d,%edx
- shrdl $2,%r14d,%r14d
- addl %esi,%r11d
- movl %edx,%r13d
- addl %r11d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r11d
- movl %r8d,%r12d
- xorl %edx,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r9d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r11d,%r14d
- andl %edx,%r12d
- vpxor %xmm8,%xmm9,%xmm9
- xorl %edx,%r13d
- addl 4(%rsp),%r10d
- movl %r11d,%esi
- shrdl $11,%r14d,%r14d
- xorl %r9d,%r12d
- xorl %eax,%esi
- shrdl $6,%r13d,%r13d
- addl %r12d,%r10d
- andl %esi,%r15d
- xorl %r11d,%r14d
- addl %r13d,%r10d
- xorl %eax,%r15d
- addl %r10d,%ecx
- shrdl $2,%r14d,%r14d
- addl %r15d,%r10d
- movl %ecx,%r13d
- addl %r10d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r10d
- movl %edx,%r12d
- xorl %ecx,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r8d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r10d,%r14d
- andl %ecx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 32-128(%rdi),%xmm10
- xorl %ecx,%r13d
- addl 8(%rsp),%r9d
- movl %r10d,%r15d
- shrdl $11,%r14d,%r14d
- xorl %r8d,%r12d
- xorl %r11d,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%r9d
- andl %r15d,%esi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- xorl %r11d,%esi
- addl %r9d,%ebx
- shrdl $2,%r14d,%r14d
- addl %esi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- xorl %ebx,%r13d
- shrdl $9,%r14d,%r14d
- xorl %edx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 48-128(%rdi),%xmm10
- xorl %ebx,%r13d
- addl 12(%rsp),%r8d
- movl %r9d,%esi
- shrdl $11,%r14d,%r14d
- xorl %edx,%r12d
- xorl %r10d,%esi
- shrdl $6,%r13d,%r13d
- addl %r12d,%r8d
- andl %esi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- addl %r8d,%eax
- shrdl $2,%r14d,%r14d
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- xorl %eax,%r13d
- shrdl $9,%r14d,%r14d
- xorl %ecx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 64-128(%rdi),%xmm10
- xorl %eax,%r13d
- addl 16(%rsp),%edx
- movl %r8d,%r15d
- shrdl $11,%r14d,%r14d
- xorl %ecx,%r12d
- xorl %r9d,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%edx
- andl %r15d,%esi
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%esi
- addl %edx,%r11d
- shrdl $2,%r14d,%r14d
- addl %esi,%edx
- movl %r11d,%r13d
- addl %edx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%edx
- movl %eax,%r12d
- xorl %r11d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %ebx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %edx,%r14d
- andl %r11d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 80-128(%rdi),%xmm10
- xorl %r11d,%r13d
- addl 20(%rsp),%ecx
- movl %edx,%esi
- shrdl $11,%r14d,%r14d
- xorl %ebx,%r12d
- xorl %r8d,%esi
- shrdl $6,%r13d,%r13d
- addl %r12d,%ecx
- andl %esi,%r15d
- xorl %edx,%r14d
- addl %r13d,%ecx
- xorl %r8d,%r15d
- addl %ecx,%r10d
- shrdl $2,%r14d,%r14d
- addl %r15d,%ecx
- movl %r10d,%r13d
- addl %ecx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ecx
- movl %r11d,%r12d
- xorl %r10d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %eax,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ecx,%r14d
- andl %r10d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 96-128(%rdi),%xmm10
- xorl %r10d,%r13d
- addl 24(%rsp),%ebx
- movl %ecx,%r15d
- shrdl $11,%r14d,%r14d
- xorl %eax,%r12d
- xorl %edx,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%ebx
- andl %r15d,%esi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- xorl %edx,%esi
- addl %ebx,%r9d
- shrdl $2,%r14d,%r14d
- addl %esi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- xorl %r9d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r11d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 112-128(%rdi),%xmm10
- xorl %r9d,%r13d
- addl 28(%rsp),%eax
- movl %ebx,%esi
- shrdl $11,%r14d,%r14d
- xorl %r11d,%r12d
- xorl %ecx,%esi
- shrdl $6,%r13d,%r13d
- addl %r12d,%eax
- andl %esi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- addl %eax,%r8d
- shrdl $2,%r14d,%r14d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%eax
- movl %r9d,%r12d
- xorl %r8d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r10d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %eax,%r14d
- andl %r8d,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 128-128(%rdi),%xmm10
- xorl %r8d,%r13d
- addl 32(%rsp),%r11d
- movl %eax,%r15d
- shrdl $11,%r14d,%r14d
- xorl %r10d,%r12d
- xorl %ebx,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%r11d
- andl %r15d,%esi
- xorl %eax,%r14d
- addl %r13d,%r11d
- xorl %ebx,%esi
- addl %r11d,%edx
- shrdl $2,%r14d,%r14d
- addl %esi,%r11d
- movl %edx,%r13d
- addl %r11d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r11d
- movl %r8d,%r12d
- xorl %edx,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r9d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r11d,%r14d
- andl %edx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 144-128(%rdi),%xmm10
- xorl %edx,%r13d
- addl 36(%rsp),%r10d
- movl %r11d,%esi
- shrdl $11,%r14d,%r14d
- xorl %r9d,%r12d
- xorl %eax,%esi
- shrdl $6,%r13d,%r13d
- addl %r12d,%r10d
- andl %esi,%r15d
- xorl %r11d,%r14d
- addl %r13d,%r10d
- xorl %eax,%r15d
- addl %r10d,%ecx
- shrdl $2,%r14d,%r14d
- addl %r15d,%r10d
- movl %ecx,%r13d
- addl %r10d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r10d
- movl %edx,%r12d
- xorl %ecx,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r8d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r10d,%r14d
- andl %ecx,%r12d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 160-128(%rdi),%xmm10
- xorl %ecx,%r13d
- addl 40(%rsp),%r9d
- movl %r10d,%r15d
- shrdl $11,%r14d,%r14d
- xorl %r8d,%r12d
- xorl %r11d,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%r9d
- andl %r15d,%esi
- xorl %r10d,%r14d
- addl %r13d,%r9d
- xorl %r11d,%esi
- addl %r9d,%ebx
- shrdl $2,%r14d,%r14d
- addl %esi,%r9d
- movl %ebx,%r13d
- addl %r9d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r9d
- movl %ecx,%r12d
- xorl %ebx,%r13d
- shrdl $9,%r14d,%r14d
- xorl %edx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r9d,%r14d
- andl %ebx,%r12d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 176-128(%rdi),%xmm10
- xorl %ebx,%r13d
- addl 44(%rsp),%r8d
- movl %r9d,%esi
- shrdl $11,%r14d,%r14d
- xorl %edx,%r12d
- xorl %r10d,%esi
- shrdl $6,%r13d,%r13d
- addl %r12d,%r8d
- andl %esi,%r15d
- xorl %r9d,%r14d
- addl %r13d,%r8d
- xorl %r10d,%r15d
- addl %r8d,%eax
- shrdl $2,%r14d,%r14d
- addl %r15d,%r8d
- movl %eax,%r13d
- addl %r8d,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%r8d
- movl %ebx,%r12d
- xorl %eax,%r13d
- shrdl $9,%r14d,%r14d
- xorl %ecx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %r8d,%r14d
- andl %eax,%r12d
- vpand %xmm12,%xmm11,%xmm8
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 192-128(%rdi),%xmm10
- xorl %eax,%r13d
- addl 48(%rsp),%edx
- movl %r8d,%r15d
- shrdl $11,%r14d,%r14d
- xorl %ecx,%r12d
- xorl %r9d,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%edx
- andl %r15d,%esi
- xorl %r8d,%r14d
- addl %r13d,%edx
- xorl %r9d,%esi
- addl %edx,%r11d
- shrdl $2,%r14d,%r14d
- addl %esi,%edx
- movl %r11d,%r13d
- addl %edx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%edx
- movl %eax,%r12d
- xorl %r11d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %ebx,%r12d
- shrdl $5,%r13d,%r13d
- xorl %edx,%r14d
- andl %r11d,%r12d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 208-128(%rdi),%xmm10
- xorl %r11d,%r13d
- addl 52(%rsp),%ecx
- movl %edx,%esi
- shrdl $11,%r14d,%r14d
- xorl %ebx,%r12d
- xorl %r8d,%esi
- shrdl $6,%r13d,%r13d
- addl %r12d,%ecx
- andl %esi,%r15d
- xorl %edx,%r14d
- addl %r13d,%ecx
- xorl %r8d,%r15d
- addl %ecx,%r10d
- shrdl $2,%r14d,%r14d
- addl %r15d,%ecx
- movl %r10d,%r13d
- addl %ecx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ecx
- movl %r11d,%r12d
- xorl %r10d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %eax,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ecx,%r14d
- andl %r10d,%r12d
- vpand %xmm13,%xmm11,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 224-128(%rdi),%xmm10
- xorl %r10d,%r13d
- addl 56(%rsp),%ebx
- movl %ecx,%r15d
- shrdl $11,%r14d,%r14d
- xorl %eax,%r12d
- xorl %edx,%r15d
- shrdl $6,%r13d,%r13d
- addl %r12d,%ebx
- andl %r15d,%esi
- xorl %ecx,%r14d
- addl %r13d,%ebx
- xorl %edx,%esi
- addl %ebx,%r9d
- shrdl $2,%r14d,%r14d
- addl %esi,%ebx
- movl %r9d,%r13d
- addl %ebx,%r14d
- shrdl $14,%r13d,%r13d
- movl %r14d,%ebx
- movl %r10d,%r12d
- xorl %r9d,%r13d
- shrdl $9,%r14d,%r14d
- xorl %r11d,%r12d
- shrdl $5,%r13d,%r13d
- xorl %ebx,%r14d
- andl %r9d,%r12d
- vpor %xmm11,%xmm8,%xmm8
- vaesenclast %xmm10,%xmm9,%xmm11
- vmovdqu 0-128(%rdi),%xmm10
- xorl %r9d,%r13d
- addl 60(%rsp),%eax
- movl %ebx,%esi
- shrdl $11,%r14d,%r14d
- xorl %r11d,%r12d
- xorl %ecx,%esi
- shrdl $6,%r13d,%r13d
- addl %r12d,%eax
- andl %esi,%r15d
- xorl %ebx,%r14d
- addl %r13d,%eax
- xorl %ecx,%r15d
- addl %eax,%r8d
- shrdl $2,%r14d,%r14d
- addl %r15d,%eax
- movl %r8d,%r13d
- addl %eax,%r14d
- movq 64+0(%rsp),%r12
- movq 64+8(%rsp),%r13
- movq 64+40(%rsp),%r15
- movq 64+48(%rsp),%rsi
-
- vpand %xmm14,%xmm11,%xmm11
- movl %r14d,%eax
- vpor %xmm11,%xmm8,%xmm8
- vmovdqu %xmm8,(%r12,%r13,1)
- leaq 16(%r12),%r12
-
- addl 0(%r15),%eax
- addl 4(%r15),%ebx
- addl 8(%r15),%ecx
- addl 12(%r15),%edx
- addl 16(%r15),%r8d
- addl 20(%r15),%r9d
- addl 24(%r15),%r10d
- addl 28(%r15),%r11d
-
- cmpq 64+16(%rsp),%r12
-
- movl %eax,0(%r15)
- movl %ebx,4(%r15)
- movl %ecx,8(%r15)
- movl %edx,12(%r15)
- movl %r8d,16(%r15)
- movl %r9d,20(%r15)
- movl %r10d,24(%r15)
- movl %r11d,28(%r15)
- jb .Lloop_avx
-
- movq 64+32(%rsp),%r8
- movq 120(%rsp),%rsi
-.cfi_def_cfa %rsi,8
- vmovdqu %xmm8,(%r8)
- vzeroall
- movq -48(%rsi),%r15
-.cfi_restore %r15
- movq -40(%rsi),%r14
-.cfi_restore %r14
- movq -32(%rsi),%r13
-.cfi_restore %r13
- movq -24(%rsi),%r12
-.cfi_restore %r12
- movq -16(%rsi),%rbp
-.cfi_restore %rbp
- movq -8(%rsi),%rbx
-.cfi_restore %rbx
- leaq (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Lepilogue_avx:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx
-.type aesni_cbc_sha256_enc_avx2,@function
-.align 64
-aesni_cbc_sha256_enc_avx2:
-.cfi_startproc
-.Lavx2_shortcut:
- movq 8(%rsp),%r10
- movq %rsp,%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
- subq $576,%rsp
- andq $-1024,%rsp
- addq $448,%rsp
-
- shlq $6,%rdx
- subq %rdi,%rsi
- subq %rdi,%r10
- addq %rdi,%rdx
-
-
-
- movq %rdx,64+16(%rsp)
-
- movq %r8,64+32(%rsp)
- movq %r9,64+40(%rsp)
- movq %r10,64+48(%rsp)
- movq %rax,120(%rsp)
-.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08
-.Lprologue_avx2:
- vzeroall
-
- movq %rdi,%r13
- vpinsrq $1,%rsi,%xmm15,%xmm15
- leaq 128(%rcx),%rdi
- leaq K256+544(%rip),%r12
- movl 240-128(%rdi),%r14d
- movq %r9,%r15
- movq %r10,%rsi
- vmovdqu (%r8),%xmm8
- leaq -9(%r14),%r14
-
- vmovdqa 0(%r12,%r14,8),%xmm14
- vmovdqa 16(%r12,%r14,8),%xmm13
- vmovdqa 32(%r12,%r14,8),%xmm12
-
- subq $-64,%r13
- movl 0(%r15),%eax
- leaq (%rsi,%r13,1),%r12
- movl 4(%r15),%ebx
- cmpq %rdx,%r13
- movl 8(%r15),%ecx
- cmoveq %rsp,%r12
- movl 12(%r15),%edx
- movl 16(%r15),%r8d
- movl 20(%r15),%r9d
- movl 24(%r15),%r10d
- movl 28(%r15),%r11d
- vmovdqu 0-128(%rdi),%xmm10
- jmp .Loop_avx2
-.align 16
-.Loop_avx2:
- vmovdqa K256+512(%rip),%ymm7
- vmovdqu -64+0(%rsi,%r13,1),%xmm0
- vmovdqu -64+16(%rsi,%r13,1),%xmm1
- vmovdqu -64+32(%rsi,%r13,1),%xmm2
- vmovdqu -64+48(%rsi,%r13,1),%xmm3
-
- vinserti128 $1,(%r12),%ymm0,%ymm0
- vinserti128 $1,16(%r12),%ymm1,%ymm1
- vpshufb %ymm7,%ymm0,%ymm0
- vinserti128 $1,32(%r12),%ymm2,%ymm2
- vpshufb %ymm7,%ymm1,%ymm1
- vinserti128 $1,48(%r12),%ymm3,%ymm3
-
- leaq K256(%rip),%rbp
- vpshufb %ymm7,%ymm2,%ymm2
- leaq -64(%r13),%r13
- vpaddd 0(%rbp),%ymm0,%ymm4
- vpshufb %ymm7,%ymm3,%ymm3
- vpaddd 32(%rbp),%ymm1,%ymm5
- vpaddd 64(%rbp),%ymm2,%ymm6
- vpaddd 96(%rbp),%ymm3,%ymm7
- vmovdqa %ymm4,0(%rsp)
- xorl %r14d,%r14d
- vmovdqa %ymm5,32(%rsp)
- leaq -64(%rsp),%rsp
- movl %ebx,%esi
- vmovdqa %ymm6,0(%rsp)
- xorl %ecx,%esi
- vmovdqa %ymm7,32(%rsp)
- movl %r9d,%r12d
- subq $-32*4,%rbp
- jmp .Lavx2_00_47
-
-.align 16
-.Lavx2_00_47:
- vmovdqu (%r13),%xmm9
- vpinsrq $0,%r13,%xmm15,%xmm15
- leaq -64(%rsp),%rsp
- vpalignr $4,%ymm0,%ymm1,%ymm4
- addl 0+128(%rsp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- vpalignr $4,%ymm2,%ymm3,%ymm7
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- vpsrld $7,%ymm4,%ymm6
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- vpaddd %ymm7,%ymm0,%ymm0
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- vpsrld $3,%ymm4,%ymm7
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- vpslld $14,%ymm4,%ymm5
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- vpxor %ymm6,%ymm7,%ymm4
- andl %r15d,%esi
- vpxor %xmm10,%xmm9,%xmm9
- vmovdqu 16-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ebx,%esi
- vpshufd $250,%ymm3,%ymm7
- xorl %r13d,%r14d
- leal (%r11,%rsi,1),%r11d
- movl %r8d,%r12d
- vpsrld $11,%ymm6,%ymm6
- addl 4+128(%rsp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $11,%edx,%esi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- vpslld $11,%ymm5,%ymm5
- andnl %r9d,%edx,%r12d
- xorl %esi,%r13d
- rorxl $6,%edx,%r14d
- vpxor %ymm6,%ymm4,%ymm4
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%esi
- vpsrld $10,%ymm7,%ymm6
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%esi
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- vpsrlq $17,%ymm7,%ymm7
- andl %esi,%r15d
- vpxor %xmm8,%xmm9,%xmm9
- xorl %r12d,%r14d
- xorl %eax,%r15d
- vpaddd %ymm4,%ymm0,%ymm0
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 8+128(%rsp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- vpxor %ymm7,%ymm6,%ymm6
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- vpshufd $132,%ymm6,%ymm6
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- vpsrldq $8,%ymm6,%ymm6
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- vpaddd %ymm6,%ymm0,%ymm0
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- vpshufd $80,%ymm0,%ymm7
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 32-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r11d,%esi
- vpsrld $10,%ymm7,%ymm6
- xorl %r13d,%r14d
- leal (%r9,%rsi,1),%r9d
- movl %ecx,%r12d
- vpsrlq $17,%ymm7,%ymm7
- addl 12+128(%rsp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- vpxor %ymm7,%ymm6,%ymm6
- rorxl $11,%ebx,%esi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- vpsrlq $2,%ymm7,%ymm7
- andnl %edx,%ebx,%r12d
- xorl %esi,%r13d
- rorxl $6,%ebx,%r14d
- vpxor %ymm7,%ymm6,%ymm6
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%esi
- vpshufd $232,%ymm6,%ymm6
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%esi
- vpslldq $8,%ymm6,%ymm6
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- vpaddd %ymm6,%ymm0,%ymm0
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 48-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- vpaddd 0(%rbp),%ymm0,%ymm6
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- vmovdqa %ymm6,0(%rsp)
- vpalignr $4,%ymm1,%ymm2,%ymm4
- addl 32+128(%rsp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- vpalignr $4,%ymm3,%ymm0,%ymm7
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- vpsrld $7,%ymm4,%ymm6
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- vpaddd %ymm7,%ymm1,%ymm1
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- vpsrld $3,%ymm4,%ymm7
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- vpslld $14,%ymm4,%ymm5
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- vpxor %ymm6,%ymm7,%ymm4
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 64-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r9d,%esi
- vpshufd $250,%ymm0,%ymm7
- xorl %r13d,%r14d
- leal (%rdx,%rsi,1),%edx
- movl %eax,%r12d
- vpsrld $11,%ymm6,%ymm6
- addl 36+128(%rsp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $11,%r11d,%esi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- vpslld $11,%ymm5,%ymm5
- andnl %ebx,%r11d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r11d,%r14d
- vpxor %ymm6,%ymm4,%ymm4
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%esi
- vpsrld $10,%ymm7,%ymm6
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%esi
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- vpsrlq $17,%ymm7,%ymm7
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 80-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- vpaddd %ymm4,%ymm1,%ymm1
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 40+128(%rsp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- vpxor %ymm7,%ymm6,%ymm6
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- vpshufd $132,%ymm6,%ymm6
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- vpsrldq $8,%ymm6,%ymm6
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- vpaddd %ymm6,%ymm1,%ymm1
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- vpshufd $80,%ymm1,%ymm7
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 96-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %edx,%esi
- vpsrld $10,%ymm7,%ymm6
- xorl %r13d,%r14d
- leal (%rbx,%rsi,1),%ebx
- movl %r10d,%r12d
- vpsrlq $17,%ymm7,%ymm7
- addl 44+128(%rsp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- vpxor %ymm7,%ymm6,%ymm6
- rorxl $11,%r9d,%esi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- vpsrlq $2,%ymm7,%ymm7
- andnl %r11d,%r9d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r9d,%r14d
- vpxor %ymm7,%ymm6,%ymm6
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%esi
- vpshufd $232,%ymm6,%ymm6
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%esi
- vpslldq $8,%ymm6,%ymm6
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- vpaddd %ymm6,%ymm1,%ymm1
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 112-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- vpaddd 32(%rbp),%ymm1,%ymm6
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- vmovdqa %ymm6,32(%rsp)
- leaq -64(%rsp),%rsp
- vpalignr $4,%ymm2,%ymm3,%ymm4
- addl 0+128(%rsp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- vpalignr $4,%ymm0,%ymm1,%ymm7
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- vpsrld $7,%ymm4,%ymm6
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- vpaddd %ymm7,%ymm2,%ymm2
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- vpsrld $3,%ymm4,%ymm7
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- vpslld $14,%ymm4,%ymm5
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- vpxor %ymm6,%ymm7,%ymm4
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 128-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ebx,%esi
- vpshufd $250,%ymm1,%ymm7
- xorl %r13d,%r14d
- leal (%r11,%rsi,1),%r11d
- movl %r8d,%r12d
- vpsrld $11,%ymm6,%ymm6
- addl 4+128(%rsp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $11,%edx,%esi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- vpslld $11,%ymm5,%ymm5
- andnl %r9d,%edx,%r12d
- xorl %esi,%r13d
- rorxl $6,%edx,%r14d
- vpxor %ymm6,%ymm4,%ymm4
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%esi
- vpsrld $10,%ymm7,%ymm6
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%esi
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- vpsrlq $17,%ymm7,%ymm7
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 144-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %eax,%r15d
- vpaddd %ymm4,%ymm2,%ymm2
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 8+128(%rsp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- vpxor %ymm7,%ymm6,%ymm6
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- vpshufd $132,%ymm6,%ymm6
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- vpsrldq $8,%ymm6,%ymm6
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- vpaddd %ymm6,%ymm2,%ymm2
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- vpshufd $80,%ymm2,%ymm7
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 160-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r11d,%esi
- vpsrld $10,%ymm7,%ymm6
- xorl %r13d,%r14d
- leal (%r9,%rsi,1),%r9d
- movl %ecx,%r12d
- vpsrlq $17,%ymm7,%ymm7
- addl 12+128(%rsp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- vpxor %ymm7,%ymm6,%ymm6
- rorxl $11,%ebx,%esi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- vpsrlq $2,%ymm7,%ymm7
- andnl %edx,%ebx,%r12d
- xorl %esi,%r13d
- rorxl $6,%ebx,%r14d
- vpxor %ymm7,%ymm6,%ymm6
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%esi
- vpshufd $232,%ymm6,%ymm6
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%esi
- vpslldq $8,%ymm6,%ymm6
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- vpaddd %ymm6,%ymm2,%ymm2
- andl %esi,%r15d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 176-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- vpaddd 64(%rbp),%ymm2,%ymm6
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- vmovdqa %ymm6,0(%rsp)
- vpalignr $4,%ymm3,%ymm0,%ymm4
- addl 32+128(%rsp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- vpalignr $4,%ymm1,%ymm2,%ymm7
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- vpsrld $7,%ymm4,%ymm6
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- vpaddd %ymm7,%ymm3,%ymm3
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- vpsrld $3,%ymm4,%ymm7
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- vpslld $14,%ymm4,%ymm5
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- vpxor %ymm6,%ymm7,%ymm4
- andl %r15d,%esi
- vpand %xmm12,%xmm11,%xmm8
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 192-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r9d,%esi
- vpshufd $250,%ymm2,%ymm7
- xorl %r13d,%r14d
- leal (%rdx,%rsi,1),%edx
- movl %eax,%r12d
- vpsrld $11,%ymm6,%ymm6
- addl 36+128(%rsp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $11,%r11d,%esi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- vpslld $11,%ymm5,%ymm5
- andnl %ebx,%r11d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r11d,%r14d
- vpxor %ymm6,%ymm4,%ymm4
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%esi
- vpsrld $10,%ymm7,%ymm6
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%esi
- vpxor %ymm5,%ymm4,%ymm4
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- vpsrlq $17,%ymm7,%ymm7
- andl %esi,%r15d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 208-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- vpaddd %ymm4,%ymm3,%ymm3
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- vpxor %ymm7,%ymm6,%ymm6
- addl 40+128(%rsp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- vpsrlq $2,%ymm7,%ymm7
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- vpxor %ymm7,%ymm6,%ymm6
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- vpshufd $132,%ymm6,%ymm6
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- vpsrldq $8,%ymm6,%ymm6
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- vpaddd %ymm6,%ymm3,%ymm3
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- vpshufd $80,%ymm3,%ymm7
- andl %r15d,%esi
- vpand %xmm13,%xmm11,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 224-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %edx,%esi
- vpsrld $10,%ymm7,%ymm6
- xorl %r13d,%r14d
- leal (%rbx,%rsi,1),%ebx
- movl %r10d,%r12d
- vpsrlq $17,%ymm7,%ymm7
- addl 44+128(%rsp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- vpxor %ymm7,%ymm6,%ymm6
- rorxl $11,%r9d,%esi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- vpsrlq $2,%ymm7,%ymm7
- andnl %r11d,%r9d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r9d,%r14d
- vpxor %ymm7,%ymm6,%ymm6
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%esi
- vpshufd $232,%ymm6,%ymm6
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%esi
- vpslldq $8,%ymm6,%ymm6
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- vpaddd %ymm6,%ymm3,%ymm3
- andl %esi,%r15d
- vpor %xmm11,%xmm8,%xmm8
- vaesenclast %xmm10,%xmm9,%xmm11
- vmovdqu 0-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- vpaddd 96(%rbp),%ymm3,%ymm6
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- vmovdqa %ymm6,32(%rsp)
- vmovq %xmm15,%r13
- vpextrq $1,%xmm15,%r15
- vpand %xmm14,%xmm11,%xmm11
- vpor %xmm11,%xmm8,%xmm8
- vmovdqu %xmm8,(%r15,%r13,1)
- leaq 16(%r13),%r13
- leaq 128(%rbp),%rbp
- cmpb $0,3(%rbp)
- jne .Lavx2_00_47
- vmovdqu (%r13),%xmm9
- vpinsrq $0,%r13,%xmm15,%xmm15
- addl 0+64(%rsp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- andl %r15d,%esi
- vpxor %xmm10,%xmm9,%xmm9
- vmovdqu 16-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ebx,%esi
- xorl %r13d,%r14d
- leal (%r11,%rsi,1),%r11d
- movl %r8d,%r12d
- addl 4+64(%rsp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- rorxl $11,%edx,%esi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- andnl %r9d,%edx,%r12d
- xorl %esi,%r13d
- rorxl $6,%edx,%r14d
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%esi
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%esi
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- andl %esi,%r15d
- vpxor %xmm8,%xmm9,%xmm9
- xorl %r12d,%r14d
- xorl %eax,%r15d
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- addl 8+64(%rsp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 32-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r11d,%esi
- xorl %r13d,%r14d
- leal (%r9,%rsi,1),%r9d
- movl %ecx,%r12d
- addl 12+64(%rsp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- rorxl $11,%ebx,%esi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- andnl %edx,%ebx,%r12d
- xorl %esi,%r13d
- rorxl $6,%ebx,%r14d
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%esi
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%esi
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 48-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- addl 32+64(%rsp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 64-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r9d,%esi
- xorl %r13d,%r14d
- leal (%rdx,%rsi,1),%edx
- movl %eax,%r12d
- addl 36+64(%rsp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- rorxl $11,%r11d,%esi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- andnl %ebx,%r11d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r11d,%r14d
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%esi
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%esi
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 80-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- addl 40+64(%rsp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 96-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %edx,%esi
- xorl %r13d,%r14d
- leal (%rbx,%rsi,1),%ebx
- movl %r10d,%r12d
- addl 44+64(%rsp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- rorxl $11,%r9d,%esi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- andnl %r11d,%r9d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r9d,%r14d
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%esi
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%esi
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 112-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- addl 0(%rsp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 128-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ebx,%esi
- xorl %r13d,%r14d
- leal (%r11,%rsi,1),%r11d
- movl %r8d,%r12d
- addl 4(%rsp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- rorxl $11,%edx,%esi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- andnl %r9d,%edx,%r12d
- xorl %esi,%r13d
- rorxl $6,%edx,%r14d
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%esi
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%esi
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 144-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %eax,%r15d
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- addl 8(%rsp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 160-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r11d,%esi
- xorl %r13d,%r14d
- leal (%r9,%rsi,1),%r9d
- movl %ecx,%r12d
- addl 12(%rsp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- rorxl $11,%ebx,%esi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- andnl %edx,%ebx,%r12d
- xorl %esi,%r13d
- rorxl $6,%ebx,%r14d
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%esi
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%esi
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %esi,%r15d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 176-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- addl 32(%rsp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- andl %r15d,%esi
- vpand %xmm12,%xmm11,%xmm8
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 192-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r9d,%esi
- xorl %r13d,%r14d
- leal (%rdx,%rsi,1),%edx
- movl %eax,%r12d
- addl 36(%rsp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- rorxl $11,%r11d,%esi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- andnl %ebx,%r11d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r11d,%r14d
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%esi
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%esi
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- andl %esi,%r15d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 208-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- addl 40(%rsp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- andl %r15d,%esi
- vpand %xmm13,%xmm11,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 224-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %edx,%esi
- xorl %r13d,%r14d
- leal (%rbx,%rsi,1),%ebx
- movl %r10d,%r12d
- addl 44(%rsp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- rorxl $11,%r9d,%esi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- andnl %r11d,%r9d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r9d,%r14d
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%esi
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%esi
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %esi,%r15d
- vpor %xmm11,%xmm8,%xmm8
- vaesenclast %xmm10,%xmm9,%xmm11
- vmovdqu 0-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- vpextrq $1,%xmm15,%r12
- vmovq %xmm15,%r13
- movq 552(%rsp),%r15
- addl %r14d,%eax
- leaq 448(%rsp),%rbp
-
- vpand %xmm14,%xmm11,%xmm11
- vpor %xmm11,%xmm8,%xmm8
- vmovdqu %xmm8,(%r12,%r13,1)
- leaq 16(%r13),%r13
-
- addl 0(%r15),%eax
- addl 4(%r15),%ebx
- addl 8(%r15),%ecx
- addl 12(%r15),%edx
- addl 16(%r15),%r8d
- addl 20(%r15),%r9d
- addl 24(%r15),%r10d
- addl 28(%r15),%r11d
-
- movl %eax,0(%r15)
- movl %ebx,4(%r15)
- movl %ecx,8(%r15)
- movl %edx,12(%r15)
- movl %r8d,16(%r15)
- movl %r9d,20(%r15)
- movl %r10d,24(%r15)
- movl %r11d,28(%r15)
-
- cmpq 80(%rbp),%r13
- je .Ldone_avx2
-
- xorl %r14d,%r14d
- movl %ebx,%esi
- movl %r9d,%r12d
- xorl %ecx,%esi
- jmp .Lower_avx2
-.align 16
-.Lower_avx2:
- vmovdqu (%r13),%xmm9
- vpinsrq $0,%r13,%xmm15,%xmm15
- addl 0+16(%rbp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- andl %r15d,%esi
- vpxor %xmm10,%xmm9,%xmm9
- vmovdqu 16-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ebx,%esi
- xorl %r13d,%r14d
- leal (%r11,%rsi,1),%r11d
- movl %r8d,%r12d
- addl 4+16(%rbp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- rorxl $11,%edx,%esi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- andnl %r9d,%edx,%r12d
- xorl %esi,%r13d
- rorxl $6,%edx,%r14d
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%esi
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%esi
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- andl %esi,%r15d
- vpxor %xmm8,%xmm9,%xmm9
- xorl %r12d,%r14d
- xorl %eax,%r15d
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- addl 8+16(%rbp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 32-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r11d,%esi
- xorl %r13d,%r14d
- leal (%r9,%rsi,1),%r9d
- movl %ecx,%r12d
- addl 12+16(%rbp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- rorxl $11,%ebx,%esi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- andnl %edx,%ebx,%r12d
- xorl %esi,%r13d
- rorxl $6,%ebx,%r14d
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%esi
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%esi
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 48-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- addl 32+16(%rbp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 64-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r9d,%esi
- xorl %r13d,%r14d
- leal (%rdx,%rsi,1),%edx
- movl %eax,%r12d
- addl 36+16(%rbp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- rorxl $11,%r11d,%esi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- andnl %ebx,%r11d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r11d,%r14d
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%esi
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%esi
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 80-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- addl 40+16(%rbp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 96-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %edx,%esi
- xorl %r13d,%r14d
- leal (%rbx,%rsi,1),%ebx
- movl %r10d,%r12d
- addl 44+16(%rbp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- rorxl $11,%r9d,%esi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- andnl %r11d,%r9d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r9d,%r14d
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%esi
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%esi
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 112-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- leaq -64(%rbp),%rbp
- addl 0+16(%rbp),%r11d
- andl %r8d,%r12d
- rorxl $25,%r8d,%r13d
- rorxl $11,%r8d,%r15d
- leal (%rax,%r14,1),%eax
- leal (%r11,%r12,1),%r11d
- andnl %r10d,%r8d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r8d,%r14d
- leal (%r11,%r12,1),%r11d
- xorl %r14d,%r13d
- movl %eax,%r15d
- rorxl $22,%eax,%r12d
- leal (%r11,%r13,1),%r11d
- xorl %ebx,%r15d
- rorxl $13,%eax,%r14d
- rorxl $2,%eax,%r13d
- leal (%rdx,%r11,1),%edx
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 128-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ebx,%esi
- xorl %r13d,%r14d
- leal (%r11,%rsi,1),%r11d
- movl %r8d,%r12d
- addl 4+16(%rbp),%r10d
- andl %edx,%r12d
- rorxl $25,%edx,%r13d
- rorxl $11,%edx,%esi
- leal (%r11,%r14,1),%r11d
- leal (%r10,%r12,1),%r10d
- andnl %r9d,%edx,%r12d
- xorl %esi,%r13d
- rorxl $6,%edx,%r14d
- leal (%r10,%r12,1),%r10d
- xorl %r14d,%r13d
- movl %r11d,%esi
- rorxl $22,%r11d,%r12d
- leal (%r10,%r13,1),%r10d
- xorl %eax,%esi
- rorxl $13,%r11d,%r14d
- rorxl $2,%r11d,%r13d
- leal (%rcx,%r10,1),%ecx
- andl %esi,%r15d
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 144-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %eax,%r15d
- xorl %r13d,%r14d
- leal (%r10,%r15,1),%r10d
- movl %edx,%r12d
- addl 8+16(%rbp),%r9d
- andl %ecx,%r12d
- rorxl $25,%ecx,%r13d
- rorxl $11,%ecx,%r15d
- leal (%r10,%r14,1),%r10d
- leal (%r9,%r12,1),%r9d
- andnl %r8d,%ecx,%r12d
- xorl %r15d,%r13d
- rorxl $6,%ecx,%r14d
- leal (%r9,%r12,1),%r9d
- xorl %r14d,%r13d
- movl %r10d,%r15d
- rorxl $22,%r10d,%r12d
- leal (%r9,%r13,1),%r9d
- xorl %r11d,%r15d
- rorxl $13,%r10d,%r14d
- rorxl $2,%r10d,%r13d
- leal (%rbx,%r9,1),%ebx
- andl %r15d,%esi
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 160-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r11d,%esi
- xorl %r13d,%r14d
- leal (%r9,%rsi,1),%r9d
- movl %ecx,%r12d
- addl 12+16(%rbp),%r8d
- andl %ebx,%r12d
- rorxl $25,%ebx,%r13d
- rorxl $11,%ebx,%esi
- leal (%r9,%r14,1),%r9d
- leal (%r8,%r12,1),%r8d
- andnl %edx,%ebx,%r12d
- xorl %esi,%r13d
- rorxl $6,%ebx,%r14d
- leal (%r8,%r12,1),%r8d
- xorl %r14d,%r13d
- movl %r9d,%esi
- rorxl $22,%r9d,%r12d
- leal (%r8,%r13,1),%r8d
- xorl %r10d,%esi
- rorxl $13,%r9d,%r14d
- rorxl $2,%r9d,%r13d
- leal (%rax,%r8,1),%eax
- andl %esi,%r15d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 176-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r10d,%r15d
- xorl %r13d,%r14d
- leal (%r8,%r15,1),%r8d
- movl %ebx,%r12d
- addl 32+16(%rbp),%edx
- andl %eax,%r12d
- rorxl $25,%eax,%r13d
- rorxl $11,%eax,%r15d
- leal (%r8,%r14,1),%r8d
- leal (%rdx,%r12,1),%edx
- andnl %ecx,%eax,%r12d
- xorl %r15d,%r13d
- rorxl $6,%eax,%r14d
- leal (%rdx,%r12,1),%edx
- xorl %r14d,%r13d
- movl %r8d,%r15d
- rorxl $22,%r8d,%r12d
- leal (%rdx,%r13,1),%edx
- xorl %r9d,%r15d
- rorxl $13,%r8d,%r14d
- rorxl $2,%r8d,%r13d
- leal (%r11,%rdx,1),%r11d
- andl %r15d,%esi
- vpand %xmm12,%xmm11,%xmm8
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 192-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r9d,%esi
- xorl %r13d,%r14d
- leal (%rdx,%rsi,1),%edx
- movl %eax,%r12d
- addl 36+16(%rbp),%ecx
- andl %r11d,%r12d
- rorxl $25,%r11d,%r13d
- rorxl $11,%r11d,%esi
- leal (%rdx,%r14,1),%edx
- leal (%rcx,%r12,1),%ecx
- andnl %ebx,%r11d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r11d,%r14d
- leal (%rcx,%r12,1),%ecx
- xorl %r14d,%r13d
- movl %edx,%esi
- rorxl $22,%edx,%r12d
- leal (%rcx,%r13,1),%ecx
- xorl %r8d,%esi
- rorxl $13,%edx,%r14d
- rorxl $2,%edx,%r13d
- leal (%r10,%rcx,1),%r10d
- andl %esi,%r15d
- vaesenclast %xmm10,%xmm9,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 208-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %r8d,%r15d
- xorl %r13d,%r14d
- leal (%rcx,%r15,1),%ecx
- movl %r11d,%r12d
- addl 40+16(%rbp),%ebx
- andl %r10d,%r12d
- rorxl $25,%r10d,%r13d
- rorxl $11,%r10d,%r15d
- leal (%rcx,%r14,1),%ecx
- leal (%rbx,%r12,1),%ebx
- andnl %eax,%r10d,%r12d
- xorl %r15d,%r13d
- rorxl $6,%r10d,%r14d
- leal (%rbx,%r12,1),%ebx
- xorl %r14d,%r13d
- movl %ecx,%r15d
- rorxl $22,%ecx,%r12d
- leal (%rbx,%r13,1),%ebx
- xorl %edx,%r15d
- rorxl $13,%ecx,%r14d
- rorxl $2,%ecx,%r13d
- leal (%r9,%rbx,1),%r9d
- andl %r15d,%esi
- vpand %xmm13,%xmm11,%xmm11
- vaesenc %xmm10,%xmm9,%xmm9
- vmovdqu 224-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %edx,%esi
- xorl %r13d,%r14d
- leal (%rbx,%rsi,1),%ebx
- movl %r10d,%r12d
- addl 44+16(%rbp),%eax
- andl %r9d,%r12d
- rorxl $25,%r9d,%r13d
- rorxl $11,%r9d,%esi
- leal (%rbx,%r14,1),%ebx
- leal (%rax,%r12,1),%eax
- andnl %r11d,%r9d,%r12d
- xorl %esi,%r13d
- rorxl $6,%r9d,%r14d
- leal (%rax,%r12,1),%eax
- xorl %r14d,%r13d
- movl %ebx,%esi
- rorxl $22,%ebx,%r12d
- leal (%rax,%r13,1),%eax
- xorl %ecx,%esi
- rorxl $13,%ebx,%r14d
- rorxl $2,%ebx,%r13d
- leal (%r8,%rax,1),%r8d
- andl %esi,%r15d
- vpor %xmm11,%xmm8,%xmm8
- vaesenclast %xmm10,%xmm9,%xmm11
- vmovdqu 0-128(%rdi),%xmm10
- xorl %r12d,%r14d
- xorl %ecx,%r15d
- xorl %r13d,%r14d
- leal (%rax,%r15,1),%eax
- movl %r9d,%r12d
- vmovq %xmm15,%r13
- vpextrq $1,%xmm15,%r15
- vpand %xmm14,%xmm11,%xmm11
- vpor %xmm11,%xmm8,%xmm8
- leaq -64(%rbp),%rbp
- vmovdqu %xmm8,(%r15,%r13,1)
- leaq 16(%r13),%r13
- cmpq %rsp,%rbp
- jae .Lower_avx2
-
- movq 552(%rsp),%r15
- leaq 64(%r13),%r13
- movq 560(%rsp),%rsi
- addl %r14d,%eax
- leaq 448(%rsp),%rsp
-
- addl 0(%r15),%eax
- addl 4(%r15),%ebx
- addl 8(%r15),%ecx
- addl 12(%r15),%edx
- addl 16(%r15),%r8d
- addl 20(%r15),%r9d
- addl 24(%r15),%r10d
- leaq (%rsi,%r13,1),%r12
- addl 28(%r15),%r11d
-
- cmpq 64+16(%rsp),%r13
-
- movl %eax,0(%r15)
- cmoveq %rsp,%r12
- movl %ebx,4(%r15)
- movl %ecx,8(%r15)
- movl %edx,12(%r15)
- movl %r8d,16(%r15)
- movl %r9d,20(%r15)
- movl %r10d,24(%r15)
- movl %r11d,28(%r15)
-
- jbe .Loop_avx2
- leaq (%rsp),%rbp
-
-.Ldone_avx2:
- leaq (%rbp),%rsp
- movq 64+32(%rsp),%r8
- movq 120(%rsp),%rsi
-.cfi_def_cfa %rsi,8
- vmovdqu %xmm8,(%r8)
- vzeroall
- movq -48(%rsi),%r15
-.cfi_restore %r15
- movq -40(%rsi),%r14
-.cfi_restore %r14
- movq -32(%rsi),%r13
-.cfi_restore %r13
- movq -24(%rsi),%r12
-.cfi_restore %r12
- movq -16(%rsi),%rbp
-.cfi_restore %rbp
- movq -8(%rsi),%rbx
-.cfi_restore %rbx
- leaq (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Lepilogue_avx2:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2
-.type aesni_cbc_sha256_enc_shaext,@function
-.align 32
-aesni_cbc_sha256_enc_shaext:
- movq 8(%rsp),%r10
- leaq K256+128(%rip),%rax
- movdqu (%r9),%xmm1
- movdqu 16(%r9),%xmm2
- movdqa 512-128(%rax),%xmm3
-
- movl 240(%rcx),%r11d
- subq %rdi,%rsi
- movups (%rcx),%xmm15
- movups (%r8),%xmm6
- movups 16(%rcx),%xmm4
- leaq 112(%rcx),%rcx
-
- pshufd $0x1b,%xmm1,%xmm0
- pshufd $0xb1,%xmm1,%xmm1
- pshufd $0x1b,%xmm2,%xmm2
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,202,8
- punpcklqdq %xmm0,%xmm2
-
- jmp .Loop_shaext
-
-.align 16
-.Loop_shaext:
- movdqu (%r10),%xmm10
- movdqu 16(%r10),%xmm11
- movdqu 32(%r10),%xmm12
-.byte 102,68,15,56,0,211
- movdqu 48(%r10),%xmm13
-
- movdqa 0-128(%rax),%xmm0
- paddd %xmm10,%xmm0
-.byte 102,68,15,56,0,219
- movdqa %xmm2,%xmm9
- movdqa %xmm1,%xmm8
- movups 0(%rdi),%xmm14
- xorps %xmm15,%xmm14
- xorps %xmm14,%xmm6
- movups -80(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movups -64(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,202
-
- movdqa 32-128(%rax),%xmm0
- paddd %xmm11,%xmm0
-.byte 102,68,15,56,0,227
- leaq 64(%r10),%r10
- movups -48(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movups -32(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,202
-
- movdqa 64-128(%rax),%xmm0
- paddd %xmm12,%xmm0
-.byte 102,68,15,56,0,235
-.byte 69,15,56,204,211
- movups -16(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm13,%xmm3
-.byte 102,65,15,58,15,220,4
- paddd %xmm3,%xmm10
- movups 0(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,202
-
- movdqa 96-128(%rax),%xmm0
- paddd %xmm13,%xmm0
-.byte 69,15,56,205,213
-.byte 69,15,56,204,220
- movups 16(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movups 32(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movdqa %xmm10,%xmm3
-.byte 102,65,15,58,15,221,4
- paddd %xmm3,%xmm11
-.byte 15,56,203,202
- movdqa 128-128(%rax),%xmm0
- paddd %xmm10,%xmm0
-.byte 69,15,56,205,218
-.byte 69,15,56,204,229
- movups 48(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm11,%xmm3
-.byte 102,65,15,58,15,218,4
- paddd %xmm3,%xmm12
- cmpl $11,%r11d
- jb .Laesenclast1
- movups 64(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 80(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- je .Laesenclast1
- movups 96(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 112(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.Laesenclast1:
- aesenclast %xmm5,%xmm6
- movups 16-112(%rcx),%xmm4
- nop
-.byte 15,56,203,202
- movups 16(%rdi),%xmm14
- xorps %xmm15,%xmm14
- movups %xmm6,0(%rsi,%rdi,1)
- xorps %xmm14,%xmm6
- movups -80(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- movdqa 160-128(%rax),%xmm0
- paddd %xmm11,%xmm0
-.byte 69,15,56,205,227
-.byte 69,15,56,204,234
- movups -64(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm12,%xmm3
-.byte 102,65,15,58,15,219,4
- paddd %xmm3,%xmm13
- movups -48(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,202
- movdqa 192-128(%rax),%xmm0
- paddd %xmm12,%xmm0
-.byte 69,15,56,205,236
-.byte 69,15,56,204,211
- movups -32(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm13,%xmm3
-.byte 102,65,15,58,15,220,4
- paddd %xmm3,%xmm10
- movups -16(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,202
- movdqa 224-128(%rax),%xmm0
- paddd %xmm13,%xmm0
-.byte 69,15,56,205,213
-.byte 69,15,56,204,220
- movups 0(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm10,%xmm3
-.byte 102,65,15,58,15,221,4
- paddd %xmm3,%xmm11
- movups 16(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,202
- movdqa 256-128(%rax),%xmm0
- paddd %xmm10,%xmm0
-.byte 69,15,56,205,218
-.byte 69,15,56,204,229
- movups 32(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm11,%xmm3
-.byte 102,65,15,58,15,218,4
- paddd %xmm3,%xmm12
- movups 48(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- cmpl $11,%r11d
- jb .Laesenclast2
- movups 64(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 80(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- je .Laesenclast2
- movups 96(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 112(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.Laesenclast2:
- aesenclast %xmm5,%xmm6
- movups 16-112(%rcx),%xmm4
- nop
-.byte 15,56,203,202
- movups 32(%rdi),%xmm14
- xorps %xmm15,%xmm14
- movups %xmm6,16(%rsi,%rdi,1)
- xorps %xmm14,%xmm6
- movups -80(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- movdqa 288-128(%rax),%xmm0
- paddd %xmm11,%xmm0
-.byte 69,15,56,205,227
-.byte 69,15,56,204,234
- movups -64(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm12,%xmm3
-.byte 102,65,15,58,15,219,4
- paddd %xmm3,%xmm13
- movups -48(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,202
- movdqa 320-128(%rax),%xmm0
- paddd %xmm12,%xmm0
-.byte 69,15,56,205,236
-.byte 69,15,56,204,211
- movups -32(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm13,%xmm3
-.byte 102,65,15,58,15,220,4
- paddd %xmm3,%xmm10
- movups -16(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,202
- movdqa 352-128(%rax),%xmm0
- paddd %xmm13,%xmm0
-.byte 69,15,56,205,213
-.byte 69,15,56,204,220
- movups 0(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm10,%xmm3
-.byte 102,65,15,58,15,221,4
- paddd %xmm3,%xmm11
- movups 16(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,202
- movdqa 384-128(%rax),%xmm0
- paddd %xmm10,%xmm0
-.byte 69,15,56,205,218
-.byte 69,15,56,204,229
- movups 32(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm11,%xmm3
-.byte 102,65,15,58,15,218,4
- paddd %xmm3,%xmm12
- movups 48(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,202
- movdqa 416-128(%rax),%xmm0
- paddd %xmm11,%xmm0
-.byte 69,15,56,205,227
-.byte 69,15,56,204,234
- cmpl $11,%r11d
- jb .Laesenclast3
- movups 64(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 80(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- je .Laesenclast3
- movups 96(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 112(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.Laesenclast3:
- aesenclast %xmm5,%xmm6
- movups 16-112(%rcx),%xmm4
- nop
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movdqa %xmm12,%xmm3
-.byte 102,65,15,58,15,219,4
- paddd %xmm3,%xmm13
- movups 48(%rdi),%xmm14
- xorps %xmm15,%xmm14
- movups %xmm6,32(%rsi,%rdi,1)
- xorps %xmm14,%xmm6
- movups -80(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- movups -64(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,202
-
- movdqa 448-128(%rax),%xmm0
- paddd %xmm12,%xmm0
-.byte 69,15,56,205,236
- movdqa %xmm7,%xmm3
- movups -48(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movups -32(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,202
-
- movdqa 480-128(%rax),%xmm0
- paddd %xmm13,%xmm0
- movups -16(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- movups 0(%rcx),%xmm4
- aesenc %xmm5,%xmm6
-.byte 15,56,203,209
- pshufd $0x0e,%xmm0,%xmm0
- movups 16(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.byte 15,56,203,202
-
- movups 32(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 48(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- cmpl $11,%r11d
- jb .Laesenclast4
- movups 64(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 80(%rcx),%xmm5
- aesenc %xmm4,%xmm6
- je .Laesenclast4
- movups 96(%rcx),%xmm4
- aesenc %xmm5,%xmm6
- movups 112(%rcx),%xmm5
- aesenc %xmm4,%xmm6
-.Laesenclast4:
- aesenclast %xmm5,%xmm6
- movups 16-112(%rcx),%xmm4
- nop
-
- paddd %xmm9,%xmm2
- paddd %xmm8,%xmm1
-
- decq %rdx
- movups %xmm6,48(%rsi,%rdi,1)
- leaq 64(%rdi),%rdi
- jnz .Loop_shaext
-
- pshufd $0xb1,%xmm2,%xmm2
- pshufd $0x1b,%xmm1,%xmm3
- pshufd $0xb1,%xmm1,%xmm1
- punpckhqdq %xmm2,%xmm1
-.byte 102,15,58,15,211,8
-
- movups %xmm6,(%r8)
- movdqu %xmm1,(%r9)
- movdqu %xmm2,16(%r9)
- .byte 0xf3,0xc3
-.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext
diff --git a/secure/lib/libcrypto/amd64/aesni-x86_64.S b/secure/lib/libcrypto/amd64/aesni-x86_64.S
index e2ef2d6666cb..ce3ba1266de1 100644
--- a/secure/lib/libcrypto/amd64/aesni-x86_64.S
+++ b/secure/lib/libcrypto/amd64/aesni-x86_64.S
@@ -863,6 +863,7 @@ aesni_ecb_encrypt:
.type aesni_ccm64_encrypt_blocks,@function
.align 16
aesni_ccm64_encrypt_blocks:
+.cfi_startproc
movl 240(%rcx),%eax
movdqu (%r8),%xmm6
movdqa .Lincrement64(%rip),%xmm9
@@ -921,11 +922,13 @@ aesni_ccm64_encrypt_blocks:
pxor %xmm8,%xmm8
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
.globl aesni_ccm64_decrypt_blocks
.type aesni_ccm64_decrypt_blocks,@function
.align 16
aesni_ccm64_decrypt_blocks:
+.cfi_startproc
movl 240(%rcx),%eax
movups (%r8),%xmm6
movdqu (%r9),%xmm3
@@ -1018,6 +1021,7 @@ aesni_ccm64_decrypt_blocks:
pxor %xmm8,%xmm8
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
.globl aesni_ctr32_encrypt_blocks
.type aesni_ctr32_encrypt_blocks,@function
@@ -2792,6 +2796,7 @@ aesni_ocb_encrypt:
.type __ocb_encrypt6,@function
.align 32
__ocb_encrypt6:
+.cfi_startproc
pxor %xmm9,%xmm15
movdqu (%rbx,%r12,1),%xmm11
movdqa %xmm10,%xmm12
@@ -2889,11 +2894,13 @@ __ocb_encrypt6:
.byte 102,65,15,56,221,246
.byte 102,65,15,56,221,255
.byte 0xf3,0xc3
+.cfi_endproc
.size __ocb_encrypt6,.-__ocb_encrypt6
.type __ocb_encrypt4,@function
.align 32
__ocb_encrypt4:
+.cfi_startproc
pxor %xmm9,%xmm15
movdqu (%rbx,%r12,1),%xmm11
movdqa %xmm10,%xmm12
@@ -2958,11 +2965,13 @@ __ocb_encrypt4:
.byte 102,65,15,56,221,228
.byte 102,65,15,56,221,237
.byte 0xf3,0xc3
+.cfi_endproc
.size __ocb_encrypt4,.-__ocb_encrypt4
.type __ocb_encrypt1,@function
.align 32
__ocb_encrypt1:
+.cfi_startproc
pxor %xmm15,%xmm7
pxor %xmm9,%xmm7
pxor %xmm2,%xmm8
@@ -2993,6 +3002,7 @@ __ocb_encrypt1:
.byte 102,15,56,221,215
.byte 0xf3,0xc3
+.cfi_endproc
.size __ocb_encrypt1,.-__ocb_encrypt1
.globl aesni_ocb_decrypt
@@ -3235,6 +3245,7 @@ aesni_ocb_decrypt:
.type __ocb_decrypt6,@function
.align 32
__ocb_decrypt6:
+.cfi_startproc
pxor %xmm9,%xmm15
movdqu (%rbx,%r12,1),%xmm11
movdqa %xmm10,%xmm12
@@ -3326,11 +3337,13 @@ __ocb_decrypt6:
.byte 102,65,15,56,223,246
.byte 102,65,15,56,223,255
.byte 0xf3,0xc3
+.cfi_endproc
.size __ocb_decrypt6,.-__ocb_decrypt6
.type __ocb_decrypt4,@function
.align 32
__ocb_decrypt4:
+.cfi_startproc
pxor %xmm9,%xmm15
movdqu (%rbx,%r12,1),%xmm11
movdqa %xmm10,%xmm12
@@ -3391,11 +3404,13 @@ __ocb_decrypt4:
.byte 102,65,15,56,223,228
.byte 102,65,15,56,223,237
.byte 0xf3,0xc3
+.cfi_endproc
.size __ocb_decrypt4,.-__ocb_decrypt4
.type __ocb_decrypt1,@function
.align 32
__ocb_decrypt1:
+.cfi_startproc
pxor %xmm15,%xmm7
pxor %xmm9,%xmm7
pxor %xmm7,%xmm2
@@ -3425,6 +3440,7 @@ __ocb_decrypt1:
.byte 102,15,56,223,215
.byte 0xf3,0xc3
+.cfi_endproc
.size __ocb_decrypt1,.-__ocb_decrypt1
.globl aesni_cbc_encrypt
.type aesni_cbc_encrypt,@function
@@ -4363,7 +4379,6 @@ __aesni_set_encrypt_key:
addq $8,%rsp
.cfi_adjust_cfa_offset -8
.byte 0xf3,0xc3
-.cfi_endproc
.LSEH_end_set_encrypt_key:
.align 16
@@ -4434,6 +4449,7 @@ __aesni_set_encrypt_key:
shufps $170,%xmm1,%xmm1
xorps %xmm1,%xmm2
.byte 0xf3,0xc3
+.cfi_endproc
.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
.align 64
diff --git a/secure/lib/libcrypto/amd64/chacha-x86_64.S b/secure/lib/libcrypto/amd64/chacha-x86_64.S
index b01c1b87d47b..0b3d5b8b6db4 100644
--- a/secure/lib/libcrypto/amd64/chacha-x86_64.S
+++ b/secure/lib/libcrypto/amd64/chacha-x86_64.S
@@ -331,8 +331,6 @@ ChaCha20_ssse3:
.LChaCha20_ssse3:
movq %rsp,%r9
.cfi_def_cfa_register %r9
- testl $2048,%r10d
- jnz .LChaCha20_4xop
cmpq $128,%rdx
je .LChaCha20_128
ja .LChaCha20_4x
@@ -628,9 +626,6 @@ ChaCha20_4x:
movq %rsp,%r9
.cfi_def_cfa_register %r9
movq %r10,%r11
- shrq $32,%r10
- testq $32,%r10
- jnz .LChaCha20_8x
cmpq $192,%rdx
ja .Lproceed4x
@@ -1172,1024 +1167,3 @@ ChaCha20_4x:
.byte 0xf3,0xc3
.cfi_endproc
.size ChaCha20_4x,.-ChaCha20_4x
-.type ChaCha20_4xop,@function
-.align 32
-ChaCha20_4xop:
-.cfi_startproc
-.LChaCha20_4xop:
- movq %rsp,%r9
-.cfi_def_cfa_register %r9
- subq $0x140+8,%rsp
- vzeroupper
-
- vmovdqa .Lsigma(%rip),%xmm11
- vmovdqu (%rcx),%xmm3
- vmovdqu 16(%rcx),%xmm15
- vmovdqu (%r8),%xmm7
- leaq 256(%rsp),%rcx
-
- vpshufd $0x00,%xmm11,%xmm8
- vpshufd $0x55,%xmm11,%xmm9
- vmovdqa %xmm8,64(%rsp)
- vpshufd $0xaa,%xmm11,%xmm10
- vmovdqa %xmm9,80(%rsp)
- vpshufd $0xff,%xmm11,%xmm11
- vmovdqa %xmm10,96(%rsp)
- vmovdqa %xmm11,112(%rsp)
-
- vpshufd $0x00,%xmm3,%xmm0
- vpshufd $0x55,%xmm3,%xmm1
- vmovdqa %xmm0,128-256(%rcx)
- vpshufd $0xaa,%xmm3,%xmm2
- vmovdqa %xmm1,144-256(%rcx)
- vpshufd $0xff,%xmm3,%xmm3
- vmovdqa %xmm2,160-256(%rcx)
- vmovdqa %xmm3,176-256(%rcx)
-
- vpshufd $0x00,%xmm15,%xmm12
- vpshufd $0x55,%xmm15,%xmm13
- vmovdqa %xmm12,192-256(%rcx)
- vpshufd $0xaa,%xmm15,%xmm14
- vmovdqa %xmm13,208-256(%rcx)
- vpshufd $0xff,%xmm15,%xmm15
- vmovdqa %xmm14,224-256(%rcx)
- vmovdqa %xmm15,240-256(%rcx)
-
- vpshufd $0x00,%xmm7,%xmm4
- vpshufd $0x55,%xmm7,%xmm5
- vpaddd .Linc(%rip),%xmm4,%xmm4
- vpshufd $0xaa,%xmm7,%xmm6
- vmovdqa %xmm5,272-256(%rcx)
- vpshufd $0xff,%xmm7,%xmm7
- vmovdqa %xmm6,288-256(%rcx)
- vmovdqa %xmm7,304-256(%rcx)
-
- jmp .Loop_enter4xop
-
-.align 32
-.Loop_outer4xop:
- vmovdqa 64(%rsp),%xmm8
- vmovdqa 80(%rsp),%xmm9
- vmovdqa 96(%rsp),%xmm10
- vmovdqa 112(%rsp),%xmm11
- vmovdqa 128-256(%rcx),%xmm0
- vmovdqa 144-256(%rcx),%xmm1
- vmovdqa 160-256(%rcx),%xmm2
- vmovdqa 176-256(%rcx),%xmm3
- vmovdqa 192-256(%rcx),%xmm12
- vmovdqa 208-256(%rcx),%xmm13
- vmovdqa 224-256(%rcx),%xmm14
- vmovdqa 240-256(%rcx),%xmm15
- vmovdqa 256-256(%rcx),%xmm4
- vmovdqa 272-256(%rcx),%xmm5
- vmovdqa 288-256(%rcx),%xmm6
- vmovdqa 304-256(%rcx),%xmm7
- vpaddd .Lfour(%rip),%xmm4,%xmm4
-
-.Loop_enter4xop:
- movl $10,%eax
- vmovdqa %xmm4,256-256(%rcx)
- jmp .Loop4xop
-
-.align 32
-.Loop4xop:
- vpaddd %xmm0,%xmm8,%xmm8
- vpaddd %xmm1,%xmm9,%xmm9
- vpaddd %xmm2,%xmm10,%xmm10
- vpaddd %xmm3,%xmm11,%xmm11
- vpxor %xmm4,%xmm8,%xmm4
- vpxor %xmm5,%xmm9,%xmm5
- vpxor %xmm6,%xmm10,%xmm6
- vpxor %xmm7,%xmm11,%xmm7
-.byte 143,232,120,194,228,16
-.byte 143,232,120,194,237,16
-.byte 143,232,120,194,246,16
-.byte 143,232,120,194,255,16
- vpaddd %xmm4,%xmm12,%xmm12
- vpaddd %xmm5,%xmm13,%xmm13
- vpaddd %xmm6,%xmm14,%xmm14
- vpaddd %xmm7,%xmm15,%xmm15
- vpxor %xmm0,%xmm12,%xmm0
- vpxor %xmm1,%xmm13,%xmm1
- vpxor %xmm14,%xmm2,%xmm2
- vpxor %xmm15,%xmm3,%xmm3
-.byte 143,232,120,194,192,12
-.byte 143,232,120,194,201,12
-.byte 143,232,120,194,210,12
-.byte 143,232,120,194,219,12
- vpaddd %xmm8,%xmm0,%xmm8
- vpaddd %xmm9,%xmm1,%xmm9
- vpaddd %xmm2,%xmm10,%xmm10
- vpaddd %xmm3,%xmm11,%xmm11
- vpxor %xmm4,%xmm8,%xmm4
- vpxor %xmm5,%xmm9,%xmm5
- vpxor %xmm6,%xmm10,%xmm6
- vpxor %xmm7,%xmm11,%xmm7
-.byte 143,232,120,194,228,8
-.byte 143,232,120,194,237,8
-.byte 143,232,120,194,246,8
-.byte 143,232,120,194,255,8
- vpaddd %xmm4,%xmm12,%xmm12
- vpaddd %xmm5,%xmm13,%xmm13
- vpaddd %xmm6,%xmm14,%xmm14
- vpaddd %xmm7,%xmm15,%xmm15
- vpxor %xmm0,%xmm12,%xmm0
- vpxor %xmm1,%xmm13,%xmm1
- vpxor %xmm14,%xmm2,%xmm2
- vpxor %xmm15,%xmm3,%xmm3
-.byte 143,232,120,194,192,7
-.byte 143,232,120,194,201,7
-.byte 143,232,120,194,210,7
-.byte 143,232,120,194,219,7
- vpaddd %xmm1,%xmm8,%xmm8
- vpaddd %xmm2,%xmm9,%xmm9
- vpaddd %xmm3,%xmm10,%xmm10
- vpaddd %xmm0,%xmm11,%xmm11
- vpxor %xmm7,%xmm8,%xmm7
- vpxor %xmm4,%xmm9,%xmm4
- vpxor %xmm5,%xmm10,%xmm5
- vpxor %xmm6,%xmm11,%xmm6
-.byte 143,232,120,194,255,16
-.byte 143,232,120,194,228,16
-.byte 143,232,120,194,237,16
-.byte 143,232,120,194,246,16
- vpaddd %xmm7,%xmm14,%xmm14
- vpaddd %xmm4,%xmm15,%xmm15
- vpaddd %xmm5,%xmm12,%xmm12
- vpaddd %xmm6,%xmm13,%xmm13
- vpxor %xmm1,%xmm14,%xmm1
- vpxor %xmm2,%xmm15,%xmm2
- vpxor %xmm12,%xmm3,%xmm3
- vpxor %xmm13,%xmm0,%xmm0
-.byte 143,232,120,194,201,12
-.byte 143,232,120,194,210,12
-.byte 143,232,120,194,219,12
-.byte 143,232,120,194,192,12
- vpaddd %xmm8,%xmm1,%xmm8
- vpaddd %xmm9,%xmm2,%xmm9
- vpaddd %xmm3,%xmm10,%xmm10
- vpaddd %xmm0,%xmm11,%xmm11
- vpxor %xmm7,%xmm8,%xmm7
- vpxor %xmm4,%xmm9,%xmm4
- vpxor %xmm5,%xmm10,%xmm5
- vpxor %xmm6,%xmm11,%xmm6
-.byte 143,232,120,194,255,8
-.byte 143,232,120,194,228,8
-.byte 143,232,120,194,237,8
-.byte 143,232,120,194,246,8
- vpaddd %xmm7,%xmm14,%xmm14
- vpaddd %xmm4,%xmm15,%xmm15
- vpaddd %xmm5,%xmm12,%xmm12
- vpaddd %xmm6,%xmm13,%xmm13
- vpxor %xmm1,%xmm14,%xmm1
- vpxor %xmm2,%xmm15,%xmm2
- vpxor %xmm12,%xmm3,%xmm3
- vpxor %xmm13,%xmm0,%xmm0
-.byte 143,232,120,194,201,7
-.byte 143,232,120,194,210,7
-.byte 143,232,120,194,219,7
-.byte 143,232,120,194,192,7
- decl %eax
- jnz .Loop4xop
-
- vpaddd 64(%rsp),%xmm8,%xmm8
- vpaddd 80(%rsp),%xmm9,%xmm9
- vpaddd 96(%rsp),%xmm10,%xmm10
- vpaddd 112(%rsp),%xmm11,%xmm11
-
- vmovdqa %xmm14,32(%rsp)
- vmovdqa %xmm15,48(%rsp)
-
- vpunpckldq %xmm9,%xmm8,%xmm14
- vpunpckldq %xmm11,%xmm10,%xmm15
- vpunpckhdq %xmm9,%xmm8,%xmm8
- vpunpckhdq %xmm11,%xmm10,%xmm10
- vpunpcklqdq %xmm15,%xmm14,%xmm9
- vpunpckhqdq %xmm15,%xmm14,%xmm14
- vpunpcklqdq %xmm10,%xmm8,%xmm11
- vpunpckhqdq %xmm10,%xmm8,%xmm8
- vpaddd 128-256(%rcx),%xmm0,%xmm0
- vpaddd 144-256(%rcx),%xmm1,%xmm1
- vpaddd 160-256(%rcx),%xmm2,%xmm2
- vpaddd 176-256(%rcx),%xmm3,%xmm3
-
- vmovdqa %xmm9,0(%rsp)
- vmovdqa %xmm14,16(%rsp)
- vmovdqa 32(%rsp),%xmm9
- vmovdqa 48(%rsp),%xmm14
-
- vpunpckldq %xmm1,%xmm0,%xmm10
- vpunpckldq %xmm3,%xmm2,%xmm15
- vpunpckhdq %xmm1,%xmm0,%xmm0
- vpunpckhdq %xmm3,%xmm2,%xmm2
- vpunpcklqdq %xmm15,%xmm10,%xmm1
- vpunpckhqdq %xmm15,%xmm10,%xmm10
- vpunpcklqdq %xmm2,%xmm0,%xmm3
- vpunpckhqdq %xmm2,%xmm0,%xmm0
- vpaddd 192-256(%rcx),%xmm12,%xmm12
- vpaddd 208-256(%rcx),%xmm13,%xmm13
- vpaddd 224-256(%rcx),%xmm9,%xmm9
- vpaddd 240-256(%rcx),%xmm14,%xmm14
-
- vpunpckldq %xmm13,%xmm12,%xmm2
- vpunpckldq %xmm14,%xmm9,%xmm15
- vpunpckhdq %xmm13,%xmm12,%xmm12
- vpunpckhdq %xmm14,%xmm9,%xmm9
- vpunpcklqdq %xmm15,%xmm2,%xmm13
- vpunpckhqdq %xmm15,%xmm2,%xmm2
- vpunpcklqdq %xmm9,%xmm12,%xmm14
- vpunpckhqdq %xmm9,%xmm12,%xmm12
- vpaddd 256-256(%rcx),%xmm4,%xmm4
- vpaddd 272-256(%rcx),%xmm5,%xmm5
- vpaddd 288-256(%rcx),%xmm6,%xmm6
- vpaddd 304-256(%rcx),%xmm7,%xmm7
-
- vpunpckldq %xmm5,%xmm4,%xmm9
- vpunpckldq %xmm7,%xmm6,%xmm15
- vpunpckhdq %xmm5,%xmm4,%xmm4
- vpunpckhdq %xmm7,%xmm6,%xmm6
- vpunpcklqdq %xmm15,%xmm9,%xmm5
- vpunpckhqdq %xmm15,%xmm9,%xmm9
- vpunpcklqdq %xmm6,%xmm4,%xmm7
- vpunpckhqdq %xmm6,%xmm4,%xmm4
- vmovdqa 0(%rsp),%xmm6
- vmovdqa 16(%rsp),%xmm15
-
- cmpq $256,%rdx
- jb .Ltail4xop
-
- vpxor 0(%rsi),%xmm6,%xmm6
- vpxor 16(%rsi),%xmm1,%xmm1
- vpxor 32(%rsi),%xmm13,%xmm13
- vpxor 48(%rsi),%xmm5,%xmm5
- vpxor 64(%rsi),%xmm15,%xmm15
- vpxor 80(%rsi),%xmm10,%xmm10
- vpxor 96(%rsi),%xmm2,%xmm2
- vpxor 112(%rsi),%xmm9,%xmm9
- leaq 128(%rsi),%rsi
- vpxor 0(%rsi),%xmm11,%xmm11
- vpxor 16(%rsi),%xmm3,%xmm3
- vpxor 32(%rsi),%xmm14,%xmm14
- vpxor 48(%rsi),%xmm7,%xmm7
- vpxor 64(%rsi),%xmm8,%xmm8
- vpxor 80(%rsi),%xmm0,%xmm0
- vpxor 96(%rsi),%xmm12,%xmm12
- vpxor 112(%rsi),%xmm4,%xmm4
- leaq 128(%rsi),%rsi
-
- vmovdqu %xmm6,0(%rdi)
- vmovdqu %xmm1,16(%rdi)
- vmovdqu %xmm13,32(%rdi)
- vmovdqu %xmm5,48(%rdi)
- vmovdqu %xmm15,64(%rdi)
- vmovdqu %xmm10,80(%rdi)
- vmovdqu %xmm2,96(%rdi)
- vmovdqu %xmm9,112(%rdi)
- leaq 128(%rdi),%rdi
- vmovdqu %xmm11,0(%rdi)
- vmovdqu %xmm3,16(%rdi)
- vmovdqu %xmm14,32(%rdi)
- vmovdqu %xmm7,48(%rdi)
- vmovdqu %xmm8,64(%rdi)
- vmovdqu %xmm0,80(%rdi)
- vmovdqu %xmm12,96(%rdi)
- vmovdqu %xmm4,112(%rdi)
- leaq 128(%rdi),%rdi
-
- subq $256,%rdx
- jnz .Loop_outer4xop
-
- jmp .Ldone4xop
-
-.align 32
-.Ltail4xop:
- cmpq $192,%rdx
- jae .L192_or_more4xop
- cmpq $128,%rdx
- jae .L128_or_more4xop
- cmpq $64,%rdx
- jae .L64_or_more4xop
-
- xorq %r10,%r10
- vmovdqa %xmm6,0(%rsp)
- vmovdqa %xmm1,16(%rsp)
- vmovdqa %xmm13,32(%rsp)
- vmovdqa %xmm5,48(%rsp)
- jmp .Loop_tail4xop
-
-.align 32
-.L64_or_more4xop:
- vpxor 0(%rsi),%xmm6,%xmm6
- vpxor 16(%rsi),%xmm1,%xmm1
- vpxor 32(%rsi),%xmm13,%xmm13
- vpxor 48(%rsi),%xmm5,%xmm5
- vmovdqu %xmm6,0(%rdi)
- vmovdqu %xmm1,16(%rdi)
- vmovdqu %xmm13,32(%rdi)
- vmovdqu %xmm5,48(%rdi)
- je .Ldone4xop
-
- leaq 64(%rsi),%rsi
- vmovdqa %xmm15,0(%rsp)
- xorq %r10,%r10
- vmovdqa %xmm10,16(%rsp)
- leaq 64(%rdi),%rdi
- vmovdqa %xmm2,32(%rsp)
- subq $64,%rdx
- vmovdqa %xmm9,48(%rsp)
- jmp .Loop_tail4xop
-
-.align 32
-.L128_or_more4xop:
- vpxor 0(%rsi),%xmm6,%xmm6
- vpxor 16(%rsi),%xmm1,%xmm1
- vpxor 32(%rsi),%xmm13,%xmm13
- vpxor 48(%rsi),%xmm5,%xmm5
- vpxor 64(%rsi),%xmm15,%xmm15
- vpxor 80(%rsi),%xmm10,%xmm10
- vpxor 96(%rsi),%xmm2,%xmm2
- vpxor 112(%rsi),%xmm9,%xmm9
-
- vmovdqu %xmm6,0(%rdi)
- vmovdqu %xmm1,16(%rdi)
- vmovdqu %xmm13,32(%rdi)
- vmovdqu %xmm5,48(%rdi)
- vmovdqu %xmm15,64(%rdi)
- vmovdqu %xmm10,80(%rdi)
- vmovdqu %xmm2,96(%rdi)
- vmovdqu %xmm9,112(%rdi)
- je .Ldone4xop
-
- leaq 128(%rsi),%rsi
- vmovdqa %xmm11,0(%rsp)
- xorq %r10,%r10
- vmovdqa %xmm3,16(%rsp)
- leaq 128(%rdi),%rdi
- vmovdqa %xmm14,32(%rsp)
- subq $128,%rdx
- vmovdqa %xmm7,48(%rsp)
- jmp .Loop_tail4xop
-
-.align 32
-.L192_or_more4xop:
- vpxor 0(%rsi),%xmm6,%xmm6
- vpxor 16(%rsi),%xmm1,%xmm1
- vpxor 32(%rsi),%xmm13,%xmm13
- vpxor 48(%rsi),%xmm5,%xmm5
- vpxor 64(%rsi),%xmm15,%xmm15
- vpxor 80(%rsi),%xmm10,%xmm10
- vpxor 96(%rsi),%xmm2,%xmm2
- vpxor 112(%rsi),%xmm9,%xmm9
- leaq 128(%rsi),%rsi
- vpxor 0(%rsi),%xmm11,%xmm11
- vpxor 16(%rsi),%xmm3,%xmm3
- vpxor 32(%rsi),%xmm14,%xmm14
- vpxor 48(%rsi),%xmm7,%xmm7
-
- vmovdqu %xmm6,0(%rdi)
- vmovdqu %xmm1,16(%rdi)
- vmovdqu %xmm13,32(%rdi)
- vmovdqu %xmm5,48(%rdi)
- vmovdqu %xmm15,64(%rdi)
- vmovdqu %xmm10,80(%rdi)
- vmovdqu %xmm2,96(%rdi)
- vmovdqu %xmm9,112(%rdi)
- leaq 128(%rdi),%rdi
- vmovdqu %xmm11,0(%rdi)
- vmovdqu %xmm3,16(%rdi)
- vmovdqu %xmm14,32(%rdi)
- vmovdqu %xmm7,48(%rdi)
- je .Ldone4xop
-
- leaq 64(%rsi),%rsi
- vmovdqa %xmm8,0(%rsp)
- xorq %r10,%r10
- vmovdqa %xmm0,16(%rsp)
- leaq 64(%rdi),%rdi
- vmovdqa %xmm12,32(%rsp)
- subq $192,%rdx
- vmovdqa %xmm4,48(%rsp)
-
-.Loop_tail4xop:
- movzbl (%rsi,%r10,1),%eax
- movzbl (%rsp,%r10,1),%ecx
- leaq 1(%r10),%r10
- xorl %ecx,%eax
- movb %al,-1(%rdi,%r10,1)
- decq %rdx
- jnz .Loop_tail4xop
-
-.Ldone4xop:
- vzeroupper
- leaq (%r9),%rsp
-.cfi_def_cfa_register %rsp
-.L4xop_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ChaCha20_4xop,.-ChaCha20_4xop
-.type ChaCha20_8x,@function
-.align 32
-ChaCha20_8x:
-.cfi_startproc
-.LChaCha20_8x:
- movq %rsp,%r9
-.cfi_def_cfa_register %r9
- subq $0x280+8,%rsp
- andq $-32,%rsp
- vzeroupper
-
-
-
-
-
-
-
-
-
-
- vbroadcasti128 .Lsigma(%rip),%ymm11
- vbroadcasti128 (%rcx),%ymm3
- vbroadcasti128 16(%rcx),%ymm15
- vbroadcasti128 (%r8),%ymm7
- leaq 256(%rsp),%rcx
- leaq 512(%rsp),%rax
- leaq .Lrot16(%rip),%r10
- leaq .Lrot24(%rip),%r11
-
- vpshufd $0x00,%ymm11,%ymm8
- vpshufd $0x55,%ymm11,%ymm9
- vmovdqa %ymm8,128-256(%rcx)
- vpshufd $0xaa,%ymm11,%ymm10
- vmovdqa %ymm9,160-256(%rcx)
- vpshufd $0xff,%ymm11,%ymm11
- vmovdqa %ymm10,192-256(%rcx)
- vmovdqa %ymm11,224-256(%rcx)
-
- vpshufd $0x00,%ymm3,%ymm0
- vpshufd $0x55,%ymm3,%ymm1
- vmovdqa %ymm0,256-256(%rcx)
- vpshufd $0xaa,%ymm3,%ymm2
- vmovdqa %ymm1,288-256(%rcx)
- vpshufd $0xff,%ymm3,%ymm3
- vmovdqa %ymm2,320-256(%rcx)
- vmovdqa %ymm3,352-256(%rcx)
-
- vpshufd $0x00,%ymm15,%ymm12
- vpshufd $0x55,%ymm15,%ymm13
- vmovdqa %ymm12,384-512(%rax)
- vpshufd $0xaa,%ymm15,%ymm14
- vmovdqa %ymm13,416-512(%rax)
- vpshufd $0xff,%ymm15,%ymm15
- vmovdqa %ymm14,448-512(%rax)
- vmovdqa %ymm15,480-512(%rax)
-
- vpshufd $0x00,%ymm7,%ymm4
- vpshufd $0x55,%ymm7,%ymm5
- vpaddd .Lincy(%rip),%ymm4,%ymm4
- vpshufd $0xaa,%ymm7,%ymm6
- vmovdqa %ymm5,544-512(%rax)
- vpshufd $0xff,%ymm7,%ymm7
- vmovdqa %ymm6,576-512(%rax)
- vmovdqa %ymm7,608-512(%rax)
-
- jmp .Loop_enter8x
-
-.align 32
-.Loop_outer8x:
- vmovdqa 128-256(%rcx),%ymm8
- vmovdqa 160-256(%rcx),%ymm9
- vmovdqa 192-256(%rcx),%ymm10
- vmovdqa 224-256(%rcx),%ymm11
- vmovdqa 256-256(%rcx),%ymm0
- vmovdqa 288-256(%rcx),%ymm1
- vmovdqa 320-256(%rcx),%ymm2
- vmovdqa 352-256(%rcx),%ymm3
- vmovdqa 384-512(%rax),%ymm12
- vmovdqa 416-512(%rax),%ymm13
- vmovdqa 448-512(%rax),%ymm14
- vmovdqa 480-512(%rax),%ymm15
- vmovdqa 512-512(%rax),%ymm4
- vmovdqa 544-512(%rax),%ymm5
- vmovdqa 576-512(%rax),%ymm6
- vmovdqa 608-512(%rax),%ymm7
- vpaddd .Leight(%rip),%ymm4,%ymm4
-
-.Loop_enter8x:
- vmovdqa %ymm14,64(%rsp)
- vmovdqa %ymm15,96(%rsp)
- vbroadcasti128 (%r10),%ymm15
- vmovdqa %ymm4,512-512(%rax)
- movl $10,%eax
- jmp .Loop8x
-
-.align 32
-.Loop8x:
- vpaddd %ymm0,%ymm8,%ymm8
- vpxor %ymm4,%ymm8,%ymm4
- vpshufb %ymm15,%ymm4,%ymm4
- vpaddd %ymm1,%ymm9,%ymm9
- vpxor %ymm5,%ymm9,%ymm5
- vpshufb %ymm15,%ymm5,%ymm5
- vpaddd %ymm4,%ymm12,%ymm12
- vpxor %ymm0,%ymm12,%ymm0
- vpslld $12,%ymm0,%ymm14
- vpsrld $20,%ymm0,%ymm0
- vpor %ymm0,%ymm14,%ymm0
- vbroadcasti128 (%r11),%ymm14
- vpaddd %ymm5,%ymm13,%ymm13
- vpxor %ymm1,%ymm13,%ymm1
- vpslld $12,%ymm1,%ymm15
- vpsrld $20,%ymm1,%ymm1
- vpor %ymm1,%ymm15,%ymm1
- vpaddd %ymm0,%ymm8,%ymm8
- vpxor %ymm4,%ymm8,%ymm4
- vpshufb %ymm14,%ymm4,%ymm4
- vpaddd %ymm1,%ymm9,%ymm9
- vpxor %ymm5,%ymm9,%ymm5
- vpshufb %ymm14,%ymm5,%ymm5
- vpaddd %ymm4,%ymm12,%ymm12
- vpxor %ymm0,%ymm12,%ymm0
- vpslld $7,%ymm0,%ymm15
- vpsrld $25,%ymm0,%ymm0
- vpor %ymm0,%ymm15,%ymm0
- vbroadcasti128 (%r10),%ymm15
- vpaddd %ymm5,%ymm13,%ymm13
- vpxor %ymm1,%ymm13,%ymm1
- vpslld $7,%ymm1,%ymm14
- vpsrld $25,%ymm1,%ymm1
- vpor %ymm1,%ymm14,%ymm1
- vmovdqa %ymm12,0(%rsp)
- vmovdqa %ymm13,32(%rsp)
- vmovdqa 64(%rsp),%ymm12
- vmovdqa 96(%rsp),%ymm13
- vpaddd %ymm2,%ymm10,%ymm10
- vpxor %ymm6,%ymm10,%ymm6
- vpshufb %ymm15,%ymm6,%ymm6
- vpaddd %ymm3,%ymm11,%ymm11
- vpxor %ymm7,%ymm11,%ymm7
- vpshufb %ymm15,%ymm7,%ymm7
- vpaddd %ymm6,%ymm12,%ymm12
- vpxor %ymm2,%ymm12,%ymm2
- vpslld $12,%ymm2,%ymm14
- vpsrld $20,%ymm2,%ymm2
- vpor %ymm2,%ymm14,%ymm2
- vbroadcasti128 (%r11),%ymm14
- vpaddd %ymm7,%ymm13,%ymm13
- vpxor %ymm3,%ymm13,%ymm3
- vpslld $12,%ymm3,%ymm15
- vpsrld $20,%ymm3,%ymm3
- vpor %ymm3,%ymm15,%ymm3
- vpaddd %ymm2,%ymm10,%ymm10
- vpxor %ymm6,%ymm10,%ymm6
- vpshufb %ymm14,%ymm6,%ymm6
- vpaddd %ymm3,%ymm11,%ymm11
- vpxor %ymm7,%ymm11,%ymm7
- vpshufb %ymm14,%ymm7,%ymm7
- vpaddd %ymm6,%ymm12,%ymm12
- vpxor %ymm2,%ymm12,%ymm2
- vpslld $7,%ymm2,%ymm15
- vpsrld $25,%ymm2,%ymm2
- vpor %ymm2,%ymm15,%ymm2
- vbroadcasti128 (%r10),%ymm15
- vpaddd %ymm7,%ymm13,%ymm13
- vpxor %ymm3,%ymm13,%ymm3
- vpslld $7,%ymm3,%ymm14
- vpsrld $25,%ymm3,%ymm3
- vpor %ymm3,%ymm14,%ymm3
- vpaddd %ymm1,%ymm8,%ymm8
- vpxor %ymm7,%ymm8,%ymm7
- vpshufb %ymm15,%ymm7,%ymm7
- vpaddd %ymm2,%ymm9,%ymm9
- vpxor %ymm4,%ymm9,%ymm4
- vpshufb %ymm15,%ymm4,%ymm4
- vpaddd %ymm7,%ymm12,%ymm12
- vpxor %ymm1,%ymm12,%ymm1
- vpslld $12,%ymm1,%ymm14
- vpsrld $20,%ymm1,%ymm1
- vpor %ymm1,%ymm14,%ymm1
- vbroadcasti128 (%r11),%ymm14
- vpaddd %ymm4,%ymm13,%ymm13
- vpxor %ymm2,%ymm13,%ymm2
- vpslld $12,%ymm2,%ymm15
- vpsrld $20,%ymm2,%ymm2
- vpor %ymm2,%ymm15,%ymm2
- vpaddd %ymm1,%ymm8,%ymm8
- vpxor %ymm7,%ymm8,%ymm7
- vpshufb %ymm14,%ymm7,%ymm7
- vpaddd %ymm2,%ymm9,%ymm9
- vpxor %ymm4,%ymm9,%ymm4
- vpshufb %ymm14,%ymm4,%ymm4
- vpaddd %ymm7,%ymm12,%ymm12
- vpxor %ymm1,%ymm12,%ymm1
- vpslld $7,%ymm1,%ymm15
- vpsrld $25,%ymm1,%ymm1
- vpor %ymm1,%ymm15,%ymm1
- vbroadcasti128 (%r10),%ymm15
- vpaddd %ymm4,%ymm13,%ymm13
- vpxor %ymm2,%ymm13,%ymm2
- vpslld $7,%ymm2,%ymm14
- vpsrld $25,%ymm2,%ymm2
- vpor %ymm2,%ymm14,%ymm2
- vmovdqa %ymm12,64(%rsp)
- vmovdqa %ymm13,96(%rsp)
- vmovdqa 0(%rsp),%ymm12
- vmovdqa 32(%rsp),%ymm13
- vpaddd %ymm3,%ymm10,%ymm10
- vpxor %ymm5,%ymm10,%ymm5
- vpshufb %ymm15,%ymm5,%ymm5
- vpaddd %ymm0,%ymm11,%ymm11
- vpxor %ymm6,%ymm11,%ymm6
- vpshufb %ymm15,%ymm6,%ymm6
- vpaddd %ymm5,%ymm12,%ymm12
- vpxor %ymm3,%ymm12,%ymm3
- vpslld $12,%ymm3,%ymm14
- vpsrld $20,%ymm3,%ymm3
- vpor %ymm3,%ymm14,%ymm3
- vbroadcasti128 (%r11),%ymm14
- vpaddd %ymm6,%ymm13,%ymm13
- vpxor %ymm0,%ymm13,%ymm0
- vpslld $12,%ymm0,%ymm15
- vpsrld $20,%ymm0,%ymm0
- vpor %ymm0,%ymm15,%ymm0
- vpaddd %ymm3,%ymm10,%ymm10
- vpxor %ymm5,%ymm10,%ymm5
- vpshufb %ymm14,%ymm5,%ymm5
- vpaddd %ymm0,%ymm11,%ymm11
- vpxor %ymm6,%ymm11,%ymm6
- vpshufb %ymm14,%ymm6,%ymm6
- vpaddd %ymm5,%ymm12,%ymm12
- vpxor %ymm3,%ymm12,%ymm3
- vpslld $7,%ymm3,%ymm15
- vpsrld $25,%ymm3,%ymm3
- vpor %ymm3,%ymm15,%ymm3
- vbroadcasti128 (%r10),%ymm15
- vpaddd %ymm6,%ymm13,%ymm13
- vpxor %ymm0,%ymm13,%ymm0
- vpslld $7,%ymm0,%ymm14
- vpsrld $25,%ymm0,%ymm0
- vpor %ymm0,%ymm14,%ymm0
- decl %eax
- jnz .Loop8x
-
- leaq 512(%rsp),%rax
- vpaddd 128-256(%rcx),%ymm8,%ymm8
- vpaddd 160-256(%rcx),%ymm9,%ymm9
- vpaddd 192-256(%rcx),%ymm10,%ymm10
- vpaddd 224-256(%rcx),%ymm11,%ymm11
-
- vpunpckldq %ymm9,%ymm8,%ymm14
- vpunpckldq %ymm11,%ymm10,%ymm15
- vpunpckhdq %ymm9,%ymm8,%ymm8
- vpunpckhdq %ymm11,%ymm10,%ymm10
- vpunpcklqdq %ymm15,%ymm14,%ymm9
- vpunpckhqdq %ymm15,%ymm14,%ymm14
- vpunpcklqdq %ymm10,%ymm8,%ymm11
- vpunpckhqdq %ymm10,%ymm8,%ymm8
- vpaddd 256-256(%rcx),%ymm0,%ymm0
- vpaddd 288-256(%rcx),%ymm1,%ymm1
- vpaddd 320-256(%rcx),%ymm2,%ymm2
- vpaddd 352-256(%rcx),%ymm3,%ymm3
-
- vpunpckldq %ymm1,%ymm0,%ymm10
- vpunpckldq %ymm3,%ymm2,%ymm15
- vpunpckhdq %ymm1,%ymm0,%ymm0
- vpunpckhdq %ymm3,%ymm2,%ymm2
- vpunpcklqdq %ymm15,%ymm10,%ymm1
- vpunpckhqdq %ymm15,%ymm10,%ymm10
- vpunpcklqdq %ymm2,%ymm0,%ymm3
- vpunpckhqdq %ymm2,%ymm0,%ymm0
- vperm2i128 $0x20,%ymm1,%ymm9,%ymm15
- vperm2i128 $0x31,%ymm1,%ymm9,%ymm1
- vperm2i128 $0x20,%ymm10,%ymm14,%ymm9
- vperm2i128 $0x31,%ymm10,%ymm14,%ymm10
- vperm2i128 $0x20,%ymm3,%ymm11,%ymm14
- vperm2i128 $0x31,%ymm3,%ymm11,%ymm3
- vperm2i128 $0x20,%ymm0,%ymm8,%ymm11
- vperm2i128 $0x31,%ymm0,%ymm8,%ymm0
- vmovdqa %ymm15,0(%rsp)
- vmovdqa %ymm9,32(%rsp)
- vmovdqa 64(%rsp),%ymm15
- vmovdqa 96(%rsp),%ymm9
-
- vpaddd 384-512(%rax),%ymm12,%ymm12
- vpaddd 416-512(%rax),%ymm13,%ymm13
- vpaddd 448-512(%rax),%ymm15,%ymm15
- vpaddd 480-512(%rax),%ymm9,%ymm9
-
- vpunpckldq %ymm13,%ymm12,%ymm2
- vpunpckldq %ymm9,%ymm15,%ymm8
- vpunpckhdq %ymm13,%ymm12,%ymm12
- vpunpckhdq %ymm9,%ymm15,%ymm15
- vpunpcklqdq %ymm8,%ymm2,%ymm13
- vpunpckhqdq %ymm8,%ymm2,%ymm2
- vpunpcklqdq %ymm15,%ymm12,%ymm9
- vpunpckhqdq %ymm15,%ymm12,%ymm12
- vpaddd 512-512(%rax),%ymm4,%ymm4
- vpaddd 544-512(%rax),%ymm5,%ymm5
- vpaddd 576-512(%rax),%ymm6,%ymm6
- vpaddd 608-512(%rax),%ymm7,%ymm7
-
- vpunpckldq %ymm5,%ymm4,%ymm15
- vpunpckldq %ymm7,%ymm6,%ymm8
- vpunpckhdq %ymm5,%ymm4,%ymm4
- vpunpckhdq %ymm7,%ymm6,%ymm6
- vpunpcklqdq %ymm8,%ymm15,%ymm5
- vpunpckhqdq %ymm8,%ymm15,%ymm15
- vpunpcklqdq %ymm6,%ymm4,%ymm7
- vpunpckhqdq %ymm6,%ymm4,%ymm4
- vperm2i128 $0x20,%ymm5,%ymm13,%ymm8
- vperm2i128 $0x31,%ymm5,%ymm13,%ymm5
- vperm2i128 $0x20,%ymm15,%ymm2,%ymm13
- vperm2i128 $0x31,%ymm15,%ymm2,%ymm15
- vperm2i128 $0x20,%ymm7,%ymm9,%ymm2
- vperm2i128 $0x31,%ymm7,%ymm9,%ymm7
- vperm2i128 $0x20,%ymm4,%ymm12,%ymm9
- vperm2i128 $0x31,%ymm4,%ymm12,%ymm4
- vmovdqa 0(%rsp),%ymm6
- vmovdqa 32(%rsp),%ymm12
-
- cmpq $512,%rdx
- jb .Ltail8x
-
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- leaq 128(%rsi),%rsi
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- leaq 128(%rdi),%rdi
-
- vpxor 0(%rsi),%ymm12,%ymm12
- vpxor 32(%rsi),%ymm13,%ymm13
- vpxor 64(%rsi),%ymm10,%ymm10
- vpxor 96(%rsi),%ymm15,%ymm15
- leaq 128(%rsi),%rsi
- vmovdqu %ymm12,0(%rdi)
- vmovdqu %ymm13,32(%rdi)
- vmovdqu %ymm10,64(%rdi)
- vmovdqu %ymm15,96(%rdi)
- leaq 128(%rdi),%rdi
-
- vpxor 0(%rsi),%ymm14,%ymm14
- vpxor 32(%rsi),%ymm2,%ymm2
- vpxor 64(%rsi),%ymm3,%ymm3
- vpxor 96(%rsi),%ymm7,%ymm7
- leaq 128(%rsi),%rsi
- vmovdqu %ymm14,0(%rdi)
- vmovdqu %ymm2,32(%rdi)
- vmovdqu %ymm3,64(%rdi)
- vmovdqu %ymm7,96(%rdi)
- leaq 128(%rdi),%rdi
-
- vpxor 0(%rsi),%ymm11,%ymm11
- vpxor 32(%rsi),%ymm9,%ymm9
- vpxor 64(%rsi),%ymm0,%ymm0
- vpxor 96(%rsi),%ymm4,%ymm4
- leaq 128(%rsi),%rsi
- vmovdqu %ymm11,0(%rdi)
- vmovdqu %ymm9,32(%rdi)
- vmovdqu %ymm0,64(%rdi)
- vmovdqu %ymm4,96(%rdi)
- leaq 128(%rdi),%rdi
-
- subq $512,%rdx
- jnz .Loop_outer8x
-
- jmp .Ldone8x
-
-.Ltail8x:
- cmpq $448,%rdx
- jae .L448_or_more8x
- cmpq $384,%rdx
- jae .L384_or_more8x
- cmpq $320,%rdx
- jae .L320_or_more8x
- cmpq $256,%rdx
- jae .L256_or_more8x
- cmpq $192,%rdx
- jae .L192_or_more8x
- cmpq $128,%rdx
- jae .L128_or_more8x
- cmpq $64,%rdx
- jae .L64_or_more8x
-
- xorq %r10,%r10
- vmovdqa %ymm6,0(%rsp)
- vmovdqa %ymm8,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L64_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- je .Ldone8x
-
- leaq 64(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm1,0(%rsp)
- leaq 64(%rdi),%rdi
- subq $64,%rdx
- vmovdqa %ymm5,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L128_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- je .Ldone8x
-
- leaq 128(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm12,0(%rsp)
- leaq 128(%rdi),%rdi
- subq $128,%rdx
- vmovdqa %ymm13,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L192_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- je .Ldone8x
-
- leaq 192(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm10,0(%rsp)
- leaq 192(%rdi),%rdi
- subq $192,%rdx
- vmovdqa %ymm15,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L256_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vpxor 192(%rsi),%ymm10,%ymm10
- vpxor 224(%rsi),%ymm15,%ymm15
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- vmovdqu %ymm10,192(%rdi)
- vmovdqu %ymm15,224(%rdi)
- je .Ldone8x
-
- leaq 256(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm14,0(%rsp)
- leaq 256(%rdi),%rdi
- subq $256,%rdx
- vmovdqa %ymm2,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L320_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vpxor 192(%rsi),%ymm10,%ymm10
- vpxor 224(%rsi),%ymm15,%ymm15
- vpxor 256(%rsi),%ymm14,%ymm14
- vpxor 288(%rsi),%ymm2,%ymm2
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- vmovdqu %ymm10,192(%rdi)
- vmovdqu %ymm15,224(%rdi)
- vmovdqu %ymm14,256(%rdi)
- vmovdqu %ymm2,288(%rdi)
- je .Ldone8x
-
- leaq 320(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm3,0(%rsp)
- leaq 320(%rdi),%rdi
- subq $320,%rdx
- vmovdqa %ymm7,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L384_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vpxor 192(%rsi),%ymm10,%ymm10
- vpxor 224(%rsi),%ymm15,%ymm15
- vpxor 256(%rsi),%ymm14,%ymm14
- vpxor 288(%rsi),%ymm2,%ymm2
- vpxor 320(%rsi),%ymm3,%ymm3
- vpxor 352(%rsi),%ymm7,%ymm7
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- vmovdqu %ymm10,192(%rdi)
- vmovdqu %ymm15,224(%rdi)
- vmovdqu %ymm14,256(%rdi)
- vmovdqu %ymm2,288(%rdi)
- vmovdqu %ymm3,320(%rdi)
- vmovdqu %ymm7,352(%rdi)
- je .Ldone8x
-
- leaq 384(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm11,0(%rsp)
- leaq 384(%rdi),%rdi
- subq $384,%rdx
- vmovdqa %ymm9,32(%rsp)
- jmp .Loop_tail8x
-
-.align 32
-.L448_or_more8x:
- vpxor 0(%rsi),%ymm6,%ymm6
- vpxor 32(%rsi),%ymm8,%ymm8
- vpxor 64(%rsi),%ymm1,%ymm1
- vpxor 96(%rsi),%ymm5,%ymm5
- vpxor 128(%rsi),%ymm12,%ymm12
- vpxor 160(%rsi),%ymm13,%ymm13
- vpxor 192(%rsi),%ymm10,%ymm10
- vpxor 224(%rsi),%ymm15,%ymm15
- vpxor 256(%rsi),%ymm14,%ymm14
- vpxor 288(%rsi),%ymm2,%ymm2
- vpxor 320(%rsi),%ymm3,%ymm3
- vpxor 352(%rsi),%ymm7,%ymm7
- vpxor 384(%rsi),%ymm11,%ymm11
- vpxor 416(%rsi),%ymm9,%ymm9
- vmovdqu %ymm6,0(%rdi)
- vmovdqu %ymm8,32(%rdi)
- vmovdqu %ymm1,64(%rdi)
- vmovdqu %ymm5,96(%rdi)
- vmovdqu %ymm12,128(%rdi)
- vmovdqu %ymm13,160(%rdi)
- vmovdqu %ymm10,192(%rdi)
- vmovdqu %ymm15,224(%rdi)
- vmovdqu %ymm14,256(%rdi)
- vmovdqu %ymm2,288(%rdi)
- vmovdqu %ymm3,320(%rdi)
- vmovdqu %ymm7,352(%rdi)
- vmovdqu %ymm11,384(%rdi)
- vmovdqu %ymm9,416(%rdi)
- je .Ldone8x
-
- leaq 448(%rsi),%rsi
- xorq %r10,%r10
- vmovdqa %ymm0,0(%rsp)
- leaq 448(%rdi),%rdi
- subq $448,%rdx
- vmovdqa %ymm4,32(%rsp)
-
-.Loop_tail8x:
- movzbl (%rsi,%r10,1),%eax
- movzbl (%rsp,%r10,1),%ecx
- leaq 1(%r10),%r10
- xorl %ecx,%eax
- movb %al,-1(%rdi,%r10,1)
- decq %rdx
- jnz .Loop_tail8x
-
-.Ldone8x:
- vzeroall
- leaq (%r9),%rsp
-.cfi_def_cfa_register %rsp
-.L8x_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ChaCha20_8x,.-ChaCha20_8x
diff --git a/secure/lib/libcrypto/amd64/cmll-x86_64.S b/secure/lib/libcrypto/amd64/cmll-x86_64.S
index 7feb198a7c1c..d1d284b5a32b 100644
--- a/secure/lib/libcrypto/amd64/cmll-x86_64.S
+++ b/secure/lib/libcrypto/amd64/cmll-x86_64.S
@@ -7,11 +7,13 @@
.type Camellia_EncryptBlock,@function
.align 16
Camellia_EncryptBlock:
+.cfi_startproc
movl $128,%eax
subl %edi,%eax
movl $3,%edi
adcl $0,%edi
jmp .Lenc_rounds
+.cfi_endproc
.size Camellia_EncryptBlock,.-Camellia_EncryptBlock
.globl Camellia_EncryptBlock_Rounds
@@ -85,6 +87,7 @@ Camellia_EncryptBlock_Rounds:
.type _x86_64_Camellia_encrypt,@function
.align 16
_x86_64_Camellia_encrypt:
+.cfi_startproc
xorl 0(%r14),%r9d
xorl 4(%r14),%r8d
xorl 8(%r14),%r11d
@@ -287,6 +290,7 @@ _x86_64_Camellia_encrypt:
movl %edx,%r11d
.byte 0xf3,0xc3
+.cfi_endproc
.size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt
@@ -294,11 +298,13 @@ _x86_64_Camellia_encrypt:
.type Camellia_DecryptBlock,@function
.align 16
Camellia_DecryptBlock:
+.cfi_startproc
movl $128,%eax
subl %edi,%eax
movl $3,%edi
adcl $0,%edi
jmp .Ldec_rounds
+.cfi_endproc
.size Camellia_DecryptBlock,.-Camellia_DecryptBlock
.globl Camellia_DecryptBlock_Rounds
@@ -372,6 +378,7 @@ Camellia_DecryptBlock_Rounds:
.type _x86_64_Camellia_decrypt,@function
.align 16
_x86_64_Camellia_decrypt:
+.cfi_startproc
xorl 0(%r14),%r9d
xorl 4(%r14),%r8d
xorl 8(%r14),%r11d
@@ -575,6 +582,7 @@ _x86_64_Camellia_decrypt:
movl %ebx,%r11d
.byte 0xf3,0xc3
+.cfi_endproc
.size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt
.globl Camellia_Ekeygen
.type Camellia_Ekeygen,@function
diff --git a/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S b/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
index 1176feea40c2..c69b4d978f39 100644
--- a/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
+++ b/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
@@ -2790,10 +2790,6 @@ ecp_nistz256_neg:
.align 32
ecp_nistz256_ord_mul_mont:
.cfi_startproc
- movl $0x80100,%ecx
- andl OPENSSL_ia32cap_P+8(%rip),%ecx
- cmpl $0x80100,%ecx
- je .Lecp_nistz256_ord_mul_montx
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
@@ -3122,10 +3118,6 @@ ecp_nistz256_ord_mul_mont:
.align 32
ecp_nistz256_ord_sqr_mont:
.cfi_startproc
- movl $0x80100,%ecx
- andl OPENSSL_ia32cap_P+8(%rip),%ecx
- cmpl $0x80100,%ecx
- je .Lecp_nistz256_ord_sqr_montx
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
@@ -3413,462 +3405,6 @@ ecp_nistz256_ord_sqr_mont:
.cfi_endproc
.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
-.type ecp_nistz256_ord_mul_montx,@function
-.align 32
-ecp_nistz256_ord_mul_montx:
-.cfi_startproc
-.Lecp_nistz256_ord_mul_montx:
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-16
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
-.Lord_mulx_body:
-
- movq %rdx,%rbx
- movq 0(%rdx),%rdx
- movq 0(%rsi),%r9
- movq 8(%rsi),%r10
- movq 16(%rsi),%r11
- movq 24(%rsi),%r12
- leaq -128(%rsi),%rsi
- leaq .Lord-128(%rip),%r14
- movq .LordK(%rip),%r15
-
-
- mulxq %r9,%r8,%r9
- mulxq %r10,%rcx,%r10
- mulxq %r11,%rbp,%r11
- addq %rcx,%r9
- mulxq %r12,%rcx,%r12
- movq %r8,%rdx
- mulxq %r15,%rdx,%rax
- adcq %rbp,%r10
- adcq %rcx,%r11
- adcq $0,%r12
-
-
- xorq %r13,%r13
- mulxq 0+128(%r14),%rcx,%rbp
- adcxq %rcx,%r8
- adoxq %rbp,%r9
-
- mulxq 8+128(%r14),%rcx,%rbp
- adcxq %rcx,%r9
- adoxq %rbp,%r10
-
- mulxq 16+128(%r14),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
-
- mulxq 24+128(%r14),%rcx,%rbp
- movq 8(%rbx),%rdx
- adcxq %rcx,%r11
- adoxq %rbp,%r12
- adcxq %r8,%r12
- adoxq %r8,%r13
- adcq $0,%r13
-
-
- mulxq 0+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r9
- adoxq %rbp,%r10
-
- mulxq 8+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
-
- mulxq 16+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 24+128(%rsi),%rcx,%rbp
- movq %r9,%rdx
- mulxq %r15,%rdx,%rax
- adcxq %rcx,%r12
- adoxq %rbp,%r13
-
- adcxq %r8,%r13
- adoxq %r8,%r8
- adcq $0,%r8
-
-
- mulxq 0+128(%r14),%rcx,%rbp
- adcxq %rcx,%r9
- adoxq %rbp,%r10
-
- mulxq 8+128(%r14),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
-
- mulxq 16+128(%r14),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 24+128(%r14),%rcx,%rbp
- movq 16(%rbx),%rdx
- adcxq %rcx,%r12
- adoxq %rbp,%r13
- adcxq %r9,%r13
- adoxq %r9,%r8
- adcq $0,%r8
-
-
- mulxq 0+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
-
- mulxq 8+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 16+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r12
- adoxq %rbp,%r13
-
- mulxq 24+128(%rsi),%rcx,%rbp
- movq %r10,%rdx
- mulxq %r15,%rdx,%rax
- adcxq %rcx,%r13
- adoxq %rbp,%r8
-
- adcxq %r9,%r8
- adoxq %r9,%r9
- adcq $0,%r9
-
-
- mulxq 0+128(%r14),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
-
- mulxq 8+128(%r14),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 16+128(%r14),%rcx,%rbp
- adcxq %rcx,%r12
- adoxq %rbp,%r13
-
- mulxq 24+128(%r14),%rcx,%rbp
- movq 24(%rbx),%rdx
- adcxq %rcx,%r13
- adoxq %rbp,%r8
- adcxq %r10,%r8
- adoxq %r10,%r9
- adcq $0,%r9
-
-
- mulxq 0+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 8+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r12
- adoxq %rbp,%r13
-
- mulxq 16+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r13
- adoxq %rbp,%r8
-
- mulxq 24+128(%rsi),%rcx,%rbp
- movq %r11,%rdx
- mulxq %r15,%rdx,%rax
- adcxq %rcx,%r8
- adoxq %rbp,%r9
-
- adcxq %r10,%r9
- adoxq %r10,%r10
- adcq $0,%r10
-
-
- mulxq 0+128(%r14),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 8+128(%r14),%rcx,%rbp
- adcxq %rcx,%r12
- adoxq %rbp,%r13
-
- mulxq 16+128(%r14),%rcx,%rbp
- adcxq %rcx,%r13
- adoxq %rbp,%r8
-
- mulxq 24+128(%r14),%rcx,%rbp
- leaq 128(%r14),%r14
- movq %r12,%rbx
- adcxq %rcx,%r8
- adoxq %rbp,%r9
- movq %r13,%rdx
- adcxq %r11,%r9
- adoxq %r11,%r10
- adcq $0,%r10
-
-
-
- movq %r8,%rcx
- subq 0(%r14),%r12
- sbbq 8(%r14),%r13
- sbbq 16(%r14),%r8
- movq %r9,%rbp
- sbbq 24(%r14),%r9
- sbbq $0,%r10
-
- cmovcq %rbx,%r12
- cmovcq %rdx,%r13
- cmovcq %rcx,%r8
- cmovcq %rbp,%r9
-
- movq %r12,0(%rdi)
- movq %r13,8(%rdi)
- movq %r8,16(%rdi)
- movq %r9,24(%rdi)
-
- movq 0(%rsp),%r15
-.cfi_restore %r15
- movq 8(%rsp),%r14
-.cfi_restore %r14
- movq 16(%rsp),%r13
-.cfi_restore %r13
- movq 24(%rsp),%r12
-.cfi_restore %r12
- movq 32(%rsp),%rbx
-.cfi_restore %rbx
- movq 40(%rsp),%rbp
-.cfi_restore %rbp
- leaq 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lord_mulx_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx
-
-.type ecp_nistz256_ord_sqr_montx,@function
-.align 32
-ecp_nistz256_ord_sqr_montx:
-.cfi_startproc
-.Lecp_nistz256_ord_sqr_montx:
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-16
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
-.Lord_sqrx_body:
-
- movq %rdx,%rbx
- movq 0(%rsi),%rdx
- movq 8(%rsi),%r14
- movq 16(%rsi),%r15
- movq 24(%rsi),%r8
- leaq .Lord(%rip),%rsi
- jmp .Loop_ord_sqrx
-
-.align 32
-.Loop_ord_sqrx:
- mulxq %r14,%r9,%r10
- mulxq %r15,%rcx,%r11
- movq %rdx,%rax
-.byte 102,73,15,110,206
- mulxq %r8,%rbp,%r12
- movq %r14,%rdx
- addq %rcx,%r10
-.byte 102,73,15,110,215
- adcq %rbp,%r11
- adcq $0,%r12
- xorq %r13,%r13
-
- mulxq %r15,%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq %r8,%rcx,%rbp
- movq %r15,%rdx
- adcxq %rcx,%r12
- adoxq %rbp,%r13
- adcq $0,%r13
-
- mulxq %r8,%rcx,%r14
- movq %rax,%rdx
-.byte 102,73,15,110,216
- xorq %r15,%r15
- adcxq %r9,%r9
- adoxq %rcx,%r13
- adcxq %r10,%r10
- adoxq %r15,%r14
-
-
- mulxq %rdx,%r8,%rbp
-.byte 102,72,15,126,202
- adcxq %r11,%r11
- adoxq %rbp,%r9
- adcxq %r12,%r12
- mulxq %rdx,%rcx,%rax
-.byte 102,72,15,126,210
- adcxq %r13,%r13
- adoxq %rcx,%r10
- adcxq %r14,%r14
- mulxq %rdx,%rcx,%rbp
-.byte 0x67
-.byte 102,72,15,126,218
- adoxq %rax,%r11
- adcxq %r15,%r15
- adoxq %rcx,%r12
- adoxq %rbp,%r13
- mulxq %rdx,%rcx,%rax
- adoxq %rcx,%r14
- adoxq %rax,%r15
-
-
- movq %r8,%rdx
- mulxq 32(%rsi),%rdx,%rcx
-
- xorq %rax,%rax
- mulxq 0(%rsi),%rcx,%rbp
- adcxq %rcx,%r8
- adoxq %rbp,%r9
- mulxq 8(%rsi),%rcx,%rbp
- adcxq %rcx,%r9
- adoxq %rbp,%r10
- mulxq 16(%rsi),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
- mulxq 24(%rsi),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r8
- adcxq %rax,%r8
-
-
- movq %r9,%rdx
- mulxq 32(%rsi),%rdx,%rcx
-
- mulxq 0(%rsi),%rcx,%rbp
- adoxq %rcx,%r9
- adcxq %rbp,%r10
- mulxq 8(%rsi),%rcx,%rbp
- adoxq %rcx,%r10
- adcxq %rbp,%r11
- mulxq 16(%rsi),%rcx,%rbp
- adoxq %rcx,%r11
- adcxq %rbp,%r8
- mulxq 24(%rsi),%rcx,%rbp
- adoxq %rcx,%r8
- adcxq %rbp,%r9
- adoxq %rax,%r9
-
-
- movq %r10,%rdx
- mulxq 32(%rsi),%rdx,%rcx
-
- mulxq 0(%rsi),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
- mulxq 8(%rsi),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r8
- mulxq 16(%rsi),%rcx,%rbp
- adcxq %rcx,%r8
- adoxq %rbp,%r9
- mulxq 24(%rsi),%rcx,%rbp
- adcxq %rcx,%r9
- adoxq %rbp,%r10
- adcxq %rax,%r10
-
-
- movq %r11,%rdx
- mulxq 32(%rsi),%rdx,%rcx
-
- mulxq 0(%rsi),%rcx,%rbp
- adoxq %rcx,%r11
- adcxq %rbp,%r8
- mulxq 8(%rsi),%rcx,%rbp
- adoxq %rcx,%r8
- adcxq %rbp,%r9
- mulxq 16(%rsi),%rcx,%rbp
- adoxq %rcx,%r9
- adcxq %rbp,%r10
- mulxq 24(%rsi),%rcx,%rbp
- adoxq %rcx,%r10
- adcxq %rbp,%r11
- adoxq %rax,%r11
-
-
- addq %r8,%r12
- adcq %r13,%r9
- movq %r12,%rdx
- adcq %r14,%r10
- adcq %r15,%r11
- movq %r9,%r14
- adcq $0,%rax
-
-
- subq 0(%rsi),%r12
- movq %r10,%r15
- sbbq 8(%rsi),%r9
- sbbq 16(%rsi),%r10
- movq %r11,%r8
- sbbq 24(%rsi),%r11
- sbbq $0,%rax
-
- cmovncq %r12,%rdx
- cmovncq %r9,%r14
- cmovncq %r10,%r15
- cmovncq %r11,%r8
-
- decq %rbx
- jnz .Loop_ord_sqrx
-
- movq %rdx,0(%rdi)
- movq %r14,8(%rdi)
- pxor %xmm1,%xmm1
- movq %r15,16(%rdi)
- pxor %xmm2,%xmm2
- movq %r8,24(%rdi)
- pxor %xmm3,%xmm3
-
- movq 0(%rsp),%r15
-.cfi_restore %r15
- movq 8(%rsp),%r14
-.cfi_restore %r14
- movq 16(%rsp),%r13
-.cfi_restore %r13
- movq 24(%rsp),%r12
-.cfi_restore %r12
- movq 32(%rsp),%rbx
-.cfi_restore %rbx
- movq 40(%rsp),%rbp
-.cfi_restore %rbp
- leaq 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lord_sqrx_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx
-
@@ -3876,10 +3412,10 @@ ecp_nistz256_ord_sqr_montx:
.type ecp_nistz256_to_mont,@function
.align 32
ecp_nistz256_to_mont:
- movl $0x80100,%ecx
- andl OPENSSL_ia32cap_P+8(%rip),%ecx
+.cfi_startproc
leaq .LRR(%rip),%rdx
jmp .Lmul_mont
+.cfi_endproc
.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
@@ -3893,8 +3429,6 @@ ecp_nistz256_to_mont:
.align 32
ecp_nistz256_mul_mont:
.cfi_startproc
- movl $0x80100,%ecx
- andl OPENSSL_ia32cap_P+8(%rip),%ecx
.Lmul_mont:
pushq %rbp
.cfi_adjust_cfa_offset 8
@@ -3915,8 +3449,6 @@ ecp_nistz256_mul_mont:
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lmul_body:
- cmpl $0x80100,%ecx
- je .Lmul_montx
movq %rdx,%rbx
movq 0(%rdx),%rax
movq 0(%rsi),%r9
@@ -3925,19 +3457,6 @@ ecp_nistz256_mul_mont:
movq 24(%rsi),%r12
call __ecp_nistz256_mul_montq
- jmp .Lmul_mont_done
-
-.align 32
-.Lmul_montx:
- movq %rdx,%rbx
- movq 0(%rdx),%rdx
- movq 0(%rsi),%r9
- movq 8(%rsi),%r10
- movq 16(%rsi),%r11
- movq 24(%rsi),%r12
- leaq -128(%rsi),%rsi
-
- call __ecp_nistz256_mul_montx
.Lmul_mont_done:
movq 0(%rsp),%r15
.cfi_restore %r15
@@ -4188,8 +3707,6 @@ __ecp_nistz256_mul_montq:
.align 32
ecp_nistz256_sqr_mont:
.cfi_startproc
- movl $0x80100,%ecx
- andl OPENSSL_ia32cap_P+8(%rip),%ecx
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
@@ -4209,25 +3726,12 @@ ecp_nistz256_sqr_mont:
.cfi_adjust_cfa_offset 8
.cfi_offset %r15,-56
.Lsqr_body:
- cmpl $0x80100,%ecx
- je .Lsqr_montx
movq 0(%rsi),%rax
movq 8(%rsi),%r14
movq 16(%rsi),%r15
movq 24(%rsi),%r8
call __ecp_nistz256_sqr_montq
- jmp .Lsqr_mont_done
-
-.align 32
-.Lsqr_montx:
- movq 0(%rsi),%rdx
- movq 8(%rsi),%r14
- movq 16(%rsi),%r15
- movq 24(%rsi),%r8
- leaq -128(%rsi),%rsi
-
- call __ecp_nistz256_sqr_montx
.Lsqr_mont_done:
movq 0(%rsp),%r15
.cfi_restore %r15
@@ -4411,304 +3915,6 @@ __ecp_nistz256_sqr_montq:
.byte 0xf3,0xc3
.cfi_endproc
.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
-.type __ecp_nistz256_mul_montx,@function
-.align 32
-__ecp_nistz256_mul_montx:
-.cfi_startproc
-
-
- mulxq %r9,%r8,%r9
- mulxq %r10,%rcx,%r10
- movq $32,%r14
- xorq %r13,%r13
- mulxq %r11,%rbp,%r11
- movq .Lpoly+24(%rip),%r15
- adcq %rcx,%r9
- mulxq %r12,%rcx,%r12
- movq %r8,%rdx
- adcq %rbp,%r10
- shlxq %r14,%r8,%rbp
- adcq %rcx,%r11
- shrxq %r14,%r8,%rcx
- adcq $0,%r12
-
-
-
- addq %rbp,%r9
- adcq %rcx,%r10
-
- mulxq %r15,%rcx,%rbp
- movq 8(%rbx),%rdx
- adcq %rcx,%r11
- adcq %rbp,%r12
- adcq $0,%r13
- xorq %r8,%r8
-
-
-
- mulxq 0+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r9
- adoxq %rbp,%r10
-
- mulxq 8+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
-
- mulxq 16+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 24+128(%rsi),%rcx,%rbp
- movq %r9,%rdx
- adcxq %rcx,%r12
- shlxq %r14,%r9,%rcx
- adoxq %rbp,%r13
- shrxq %r14,%r9,%rbp
-
- adcxq %r8,%r13
- adoxq %r8,%r8
- adcq $0,%r8
-
-
-
- addq %rcx,%r10
- adcq %rbp,%r11
-
- mulxq %r15,%rcx,%rbp
- movq 16(%rbx),%rdx
- adcq %rcx,%r12
- adcq %rbp,%r13
- adcq $0,%r8
- xorq %r9,%r9
-
-
-
- mulxq 0+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r10
- adoxq %rbp,%r11
-
- mulxq 8+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 16+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r12
- adoxq %rbp,%r13
-
- mulxq 24+128(%rsi),%rcx,%rbp
- movq %r10,%rdx
- adcxq %rcx,%r13
- shlxq %r14,%r10,%rcx
- adoxq %rbp,%r8
- shrxq %r14,%r10,%rbp
-
- adcxq %r9,%r8
- adoxq %r9,%r9
- adcq $0,%r9
-
-
-
- addq %rcx,%r11
- adcq %rbp,%r12
-
- mulxq %r15,%rcx,%rbp
- movq 24(%rbx),%rdx
- adcq %rcx,%r13
- adcq %rbp,%r8
- adcq $0,%r9
- xorq %r10,%r10
-
-
-
- mulxq 0+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq 8+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r12
- adoxq %rbp,%r13
-
- mulxq 16+128(%rsi),%rcx,%rbp
- adcxq %rcx,%r13
- adoxq %rbp,%r8
-
- mulxq 24+128(%rsi),%rcx,%rbp
- movq %r11,%rdx
- adcxq %rcx,%r8
- shlxq %r14,%r11,%rcx
- adoxq %rbp,%r9
- shrxq %r14,%r11,%rbp
-
- adcxq %r10,%r9
- adoxq %r10,%r10
- adcq $0,%r10
-
-
-
- addq %rcx,%r12
- adcq %rbp,%r13
-
- mulxq %r15,%rcx,%rbp
- movq %r12,%rbx
- movq .Lpoly+8(%rip),%r14
- adcq %rcx,%r8
- movq %r13,%rdx
- adcq %rbp,%r9
- adcq $0,%r10
-
-
-
- xorl %eax,%eax
- movq %r8,%rcx
- sbbq $-1,%r12
- sbbq %r14,%r13
- sbbq $0,%r8
- movq %r9,%rbp
- sbbq %r15,%r9
- sbbq $0,%r10
-
- cmovcq %rbx,%r12
- cmovcq %rdx,%r13
- movq %r12,0(%rdi)
- cmovcq %rcx,%r8
- movq %r13,8(%rdi)
- cmovcq %rbp,%r9
- movq %r8,16(%rdi)
- movq %r9,24(%rdi)
-
- .byte 0xf3,0xc3
-.cfi_endproc
-.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
-
-.type __ecp_nistz256_sqr_montx,@function
-.align 32
-__ecp_nistz256_sqr_montx:
-.cfi_startproc
- mulxq %r14,%r9,%r10
- mulxq %r15,%rcx,%r11
- xorl %eax,%eax
- adcq %rcx,%r10
- mulxq %r8,%rbp,%r12
- movq %r14,%rdx
- adcq %rbp,%r11
- adcq $0,%r12
- xorq %r13,%r13
-
-
- mulxq %r15,%rcx,%rbp
- adcxq %rcx,%r11
- adoxq %rbp,%r12
-
- mulxq %r8,%rcx,%rbp
- movq %r15,%rdx
- adcxq %rcx,%r12
- adoxq %rbp,%r13
- adcq $0,%r13
-
-
- mulxq %r8,%rcx,%r14
- movq 0+128(%rsi),%rdx
- xorq %r15,%r15
- adcxq %r9,%r9
- adoxq %rcx,%r13
- adcxq %r10,%r10
- adoxq %r15,%r14
-
- mulxq %rdx,%r8,%rbp
- movq 8+128(%rsi),%rdx
- adcxq %r11,%r11
- adoxq %rbp,%r9
- adcxq %r12,%r12
- mulxq %rdx,%rcx,%rax
- movq 16+128(%rsi),%rdx
- adcxq %r13,%r13
- adoxq %rcx,%r10
- adcxq %r14,%r14
-.byte 0x67
- mulxq %rdx,%rcx,%rbp
- movq 24+128(%rsi),%rdx
- adoxq %rax,%r11
- adcxq %r15,%r15
- adoxq %rcx,%r12
- movq $32,%rsi
- adoxq %rbp,%r13
-.byte 0x67,0x67
- mulxq %rdx,%rcx,%rax
- movq .Lpoly+24(%rip),%rdx
- adoxq %rcx,%r14
- shlxq %rsi,%r8,%rcx
- adoxq %rax,%r15
- shrxq %rsi,%r8,%rax
- movq %rdx,%rbp
-
-
- addq %rcx,%r9
- adcq %rax,%r10
-
- mulxq %r8,%rcx,%r8
- adcq %rcx,%r11
- shlxq %rsi,%r9,%rcx
- adcq $0,%r8
- shrxq %rsi,%r9,%rax
-
-
- addq %rcx,%r10
- adcq %rax,%r11
-
- mulxq %r9,%rcx,%r9
- adcq %rcx,%r8
- shlxq %rsi,%r10,%rcx
- adcq $0,%r9
- shrxq %rsi,%r10,%rax
-
-
- addq %rcx,%r11
- adcq %rax,%r8
-
- mulxq %r10,%rcx,%r10
- adcq %rcx,%r9
- shlxq %rsi,%r11,%rcx
- adcq $0,%r10
- shrxq %rsi,%r11,%rax
-
-
- addq %rcx,%r8
- adcq %rax,%r9
-
- mulxq %r11,%rcx,%r11
- adcq %rcx,%r10
- adcq $0,%r11
-
- xorq %rdx,%rdx
- addq %r8,%r12
- movq .Lpoly+8(%rip),%rsi
- adcq %r9,%r13
- movq %r12,%r8
- adcq %r10,%r14
- adcq %r11,%r15
- movq %r13,%r9
- adcq $0,%rdx
-
- subq $-1,%r12
- movq %r14,%r10
- sbbq %rsi,%r13
- sbbq $0,%r14
- movq %r15,%r11
- sbbq %rbp,%r15
- sbbq $0,%rdx
-
- cmovcq %r8,%r12
- cmovcq %r9,%r13
- movq %r12,0(%rdi)
- cmovcq %r10,%r14
- movq %r13,8(%rdi)
- cmovcq %r11,%r15
- movq %r14,16(%rdi)
- movq %r15,24(%rdi)
-
- .byte 0xf3,0xc3
-.cfi_endproc
-.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
@@ -4823,6 +4029,7 @@ ecp_nistz256_from_mont:
.type ecp_nistz256_scatter_w5,@function
.align 32
ecp_nistz256_scatter_w5:
+.cfi_startproc
leal -3(%rdx,%rdx,2),%edx
movdqa 0(%rsi),%xmm0
shll $5,%edx
@@ -4839,6 +4046,7 @@ ecp_nistz256_scatter_w5:
movdqa %xmm5,80(%rdi,%rdx,1)
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_scatter_w5,.-ecp_nistz256_scatter_w5
@@ -4848,9 +4056,6 @@ ecp_nistz256_scatter_w5:
.align 32
ecp_nistz256_gather_w5:
.cfi_startproc
- movl OPENSSL_ia32cap_P+8(%rip),%eax
- testl $32,%eax
- jnz .Lavx2_gather_w5
movdqa .LOne(%rip),%xmm0
movd %edx,%xmm1
@@ -4912,6 +4117,7 @@ ecp_nistz256_gather_w5:
.type ecp_nistz256_scatter_w7,@function
.align 32
ecp_nistz256_scatter_w7:
+.cfi_startproc
movdqu 0(%rsi),%xmm0
shll $6,%edx
movdqu 16(%rsi),%xmm1
@@ -4923,6 +4129,7 @@ ecp_nistz256_scatter_w7:
movdqa %xmm3,48(%rdi,%rdx,1)
.byte 0xf3,0xc3
+.cfi_endproc
.size ecp_nistz256_scatter_w7,.-ecp_nistz256_scatter_w7
@@ -4932,9 +4139,6 @@ ecp_nistz256_scatter_w7:
.align 32
ecp_nistz256_gather_w7:
.cfi_startproc
- movl OPENSSL_ia32cap_P+8(%rip),%eax
- testl $32,%eax
- jnz .Lavx2_gather_w7
movdqa .LOne(%rip),%xmm8
movd %edx,%xmm1
@@ -4978,148 +4182,14 @@ ecp_nistz256_gather_w7:
.cfi_endproc
.LSEH_end_ecp_nistz256_gather_w7:
.size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7
-
-
-.type ecp_nistz256_avx2_gather_w5,@function
-.align 32
-ecp_nistz256_avx2_gather_w5:
-.cfi_startproc
-.Lavx2_gather_w5:
- vzeroupper
- vmovdqa .LTwo(%rip),%ymm0
-
- vpxor %ymm2,%ymm2,%ymm2
- vpxor %ymm3,%ymm3,%ymm3
- vpxor %ymm4,%ymm4,%ymm4
-
- vmovdqa .LOne(%rip),%ymm5
- vmovdqa .LTwo(%rip),%ymm10
-
- vmovd %edx,%xmm1
- vpermd %ymm1,%ymm2,%ymm1
-
- movq $8,%rax
-.Lselect_loop_avx2_w5:
-
- vmovdqa 0(%rsi),%ymm6
- vmovdqa 32(%rsi),%ymm7
- vmovdqa 64(%rsi),%ymm8
-
- vmovdqa 96(%rsi),%ymm11
- vmovdqa 128(%rsi),%ymm12
- vmovdqa 160(%rsi),%ymm13
-
- vpcmpeqd %ymm1,%ymm5,%ymm9
- vpcmpeqd %ymm1,%ymm10,%ymm14
-
- vpaddd %ymm0,%ymm5,%ymm5
- vpaddd %ymm0,%ymm10,%ymm10
- leaq 192(%rsi),%rsi
-
- vpand %ymm9,%ymm6,%ymm6
- vpand %ymm9,%ymm7,%ymm7
- vpand %ymm9,%ymm8,%ymm8
- vpand %ymm14,%ymm11,%ymm11
- vpand %ymm14,%ymm12,%ymm12
- vpand %ymm14,%ymm13,%ymm13
-
- vpxor %ymm6,%ymm2,%ymm2
- vpxor %ymm7,%ymm3,%ymm3
- vpxor %ymm8,%ymm4,%ymm4
- vpxor %ymm11,%ymm2,%ymm2
- vpxor %ymm12,%ymm3,%ymm3
- vpxor %ymm13,%ymm4,%ymm4
-
- decq %rax
- jnz .Lselect_loop_avx2_w5
-
- vmovdqu %ymm2,0(%rdi)
- vmovdqu %ymm3,32(%rdi)
- vmovdqu %ymm4,64(%rdi)
- vzeroupper
- .byte 0xf3,0xc3
-.cfi_endproc
-.LSEH_end_ecp_nistz256_avx2_gather_w5:
-.size ecp_nistz256_avx2_gather_w5,.-ecp_nistz256_avx2_gather_w5
-
-
-
.globl ecp_nistz256_avx2_gather_w7
.type ecp_nistz256_avx2_gather_w7,@function
.align 32
ecp_nistz256_avx2_gather_w7:
.cfi_startproc
-.Lavx2_gather_w7:
- vzeroupper
- vmovdqa .LThree(%rip),%ymm0
-
- vpxor %ymm2,%ymm2,%ymm2
- vpxor %ymm3,%ymm3,%ymm3
-
- vmovdqa .LOne(%rip),%ymm4
- vmovdqa .LTwo(%rip),%ymm8
- vmovdqa .LThree(%rip),%ymm12
-
- vmovd %edx,%xmm1
- vpermd %ymm1,%ymm2,%ymm1
-
-
- movq $21,%rax
-.Lselect_loop_avx2_w7:
-
- vmovdqa 0(%rsi),%ymm5
- vmovdqa 32(%rsi),%ymm6
-
- vmovdqa 64(%rsi),%ymm9
- vmovdqa 96(%rsi),%ymm10
-
- vmovdqa 128(%rsi),%ymm13
- vmovdqa 160(%rsi),%ymm14
-
- vpcmpeqd %ymm1,%ymm4,%ymm7
- vpcmpeqd %ymm1,%ymm8,%ymm11
- vpcmpeqd %ymm1,%ymm12,%ymm15
-
- vpaddd %ymm0,%ymm4,%ymm4
- vpaddd %ymm0,%ymm8,%ymm8
- vpaddd %ymm0,%ymm12,%ymm12
- leaq 192(%rsi),%rsi
-
- vpand %ymm7,%ymm5,%ymm5
- vpand %ymm7,%ymm6,%ymm6
- vpand %ymm11,%ymm9,%ymm9
- vpand %ymm11,%ymm10,%ymm10
- vpand %ymm15,%ymm13,%ymm13
- vpand %ymm15,%ymm14,%ymm14
-
- vpxor %ymm5,%ymm2,%ymm2
- vpxor %ymm6,%ymm3,%ymm3
- vpxor %ymm9,%ymm2,%ymm2
- vpxor %ymm10,%ymm3,%ymm3
- vpxor %ymm13,%ymm2,%ymm2
- vpxor %ymm14,%ymm3,%ymm3
-
- decq %rax
- jnz .Lselect_loop_avx2_w7
-
-
- vmovdqa 0(%rsi),%ymm5
- vmovdqa 32(%rsi),%ymm6
-
- vpcmpeqd %ymm1,%ymm4,%ymm7
-
- vpand %ymm7,%ymm5,%ymm5
- vpand %ymm7,%ymm6,%ymm6
-
- vpxor %ymm5,%ymm2,%ymm2
- vpxor %ymm6,%ymm3,%ymm3
-
- vmovdqu %ymm2,0(%rdi)
- vmovdqu %ymm3,32(%rdi)
- vzeroupper
+.byte 0x0f,0x0b
.byte 0xf3,0xc3
.cfi_endproc
-.LSEH_end_ecp_nistz256_avx2_gather_w7:
.size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7
.type __ecp_nistz256_add_toq,@function
.align 32
@@ -5255,10 +4325,6 @@ __ecp_nistz256_mul_by_2q:
.align 32
ecp_nistz256_point_double:
.cfi_startproc
- movl $0x80100,%ecx
- andl OPENSSL_ia32cap_P+8(%rip),%ecx
- cmpl $0x80100,%ecx
- je .Lpoint_doublex
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
@@ -5487,10 +4553,6 @@ ecp_nistz256_point_double:
.align 32
ecp_nistz256_point_add:
.cfi_startproc
- movl $0x80100,%ecx
- andl OPENSSL_ia32cap_P+8(%rip),%ecx
- cmpl $0x80100,%ecx
- je .Lpoint_addx
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
@@ -5657,26 +4719,16 @@ ecp_nistz256_point_add:
orq %r8,%r12
orq %r9,%r12
-.byte 0x3e
- jnz .Ladd_proceedq
.byte 102,73,15,126,208
.byte 102,73,15,126,217
- testq %r8,%r8
- jnz .Ladd_proceedq
- testq %r9,%r9
- jz .Ladd_doubleq
-.byte 102,72,15,126,199
- pxor %xmm0,%xmm0
- movdqu %xmm0,0(%rdi)
- movdqu %xmm0,16(%rdi)
- movdqu %xmm0,32(%rdi)
- movdqu %xmm0,48(%rdi)
- movdqu %xmm0,64(%rdi)
- movdqu %xmm0,80(%rdi)
- jmp .Ladd_doneq
+ orq %r8,%r12
+ orq %r9,%r12
+
+
+.byte 0x3e
+ jnz .Ladd_proceedq
-.align 32
.Ladd_doubleq:
.byte 102,72,15,126,206
.byte 102,72,15,126,199
@@ -5915,10 +4967,6 @@ ecp_nistz256_point_add:
.align 32
ecp_nistz256_point_add_affine:
.cfi_startproc
- movl $0x80100,%ecx
- andl OPENSSL_ia32cap_P+8(%rip),%ecx
- cmpl $0x80100,%ecx
- je .Lpoint_add_affinex
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset %rbp,-16
@@ -6242,1118 +5290,3 @@ ecp_nistz256_point_add_affine:
.byte 0xf3,0xc3
.cfi_endproc
.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
-.type __ecp_nistz256_add_tox,@function
-.align 32
-__ecp_nistz256_add_tox:
-.cfi_startproc
- xorq %r11,%r11
- adcq 0(%rbx),%r12
- adcq 8(%rbx),%r13
- movq %r12,%rax
- adcq 16(%rbx),%r8
- adcq 24(%rbx),%r9
- movq %r13,%rbp
- adcq $0,%r11
-
- xorq %r10,%r10
- sbbq $-1,%r12
- movq %r8,%rcx
- sbbq %r14,%r13
- sbbq $0,%r8
- movq %r9,%r10
- sbbq %r15,%r9
- sbbq $0,%r11
-
- cmovcq %rax,%r12
- cmovcq %rbp,%r13
- movq %r12,0(%rdi)
- cmovcq %rcx,%r8
- movq %r13,8(%rdi)
- cmovcq %r10,%r9
- movq %r8,16(%rdi)
- movq %r9,24(%rdi)
-
- .byte 0xf3,0xc3
-.cfi_endproc
-.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
-
-.type __ecp_nistz256_sub_fromx,@function
-.align 32
-__ecp_nistz256_sub_fromx:
-.cfi_startproc
- xorq %r11,%r11
- sbbq 0(%rbx),%r12
- sbbq 8(%rbx),%r13
- movq %r12,%rax
- sbbq 16(%rbx),%r8
- sbbq 24(%rbx),%r9
- movq %r13,%rbp
- sbbq $0,%r11
-
- xorq %r10,%r10
- adcq $-1,%r12
- movq %r8,%rcx
- adcq %r14,%r13
- adcq $0,%r8
- movq %r9,%r10
- adcq %r15,%r9
-
- btq $0,%r11
- cmovncq %rax,%r12
- cmovncq %rbp,%r13
- movq %r12,0(%rdi)
- cmovncq %rcx,%r8
- movq %r13,8(%rdi)
- cmovncq %r10,%r9
- movq %r8,16(%rdi)
- movq %r9,24(%rdi)
-
- .byte 0xf3,0xc3
-.cfi_endproc
-.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
-
-.type __ecp_nistz256_subx,@function
-.align 32
-__ecp_nistz256_subx:
-.cfi_startproc
- xorq %r11,%r11
- sbbq %r12,%rax
- sbbq %r13,%rbp
- movq %rax,%r12
- sbbq %r8,%rcx
- sbbq %r9,%r10
- movq %rbp,%r13
- sbbq $0,%r11
-
- xorq %r9,%r9
- adcq $-1,%rax
- movq %rcx,%r8
- adcq %r14,%rbp
- adcq $0,%rcx
- movq %r10,%r9
- adcq %r15,%r10
-
- btq $0,%r11
- cmovcq %rax,%r12
- cmovcq %rbp,%r13
- cmovcq %rcx,%r8
- cmovcq %r10,%r9
-
- .byte 0xf3,0xc3
-.cfi_endproc
-.size __ecp_nistz256_subx,.-__ecp_nistz256_subx
-
-.type __ecp_nistz256_mul_by_2x,@function
-.align 32
-__ecp_nistz256_mul_by_2x:
-.cfi_startproc
- xorq %r11,%r11
- adcq %r12,%r12
- adcq %r13,%r13
- movq %r12,%rax
- adcq %r8,%r8
- adcq %r9,%r9
- movq %r13,%rbp
- adcq $0,%r11
-
- xorq %r10,%r10
- sbbq $-1,%r12
- movq %r8,%rcx
- sbbq %r14,%r13
- sbbq $0,%r8
- movq %r9,%r10
- sbbq %r15,%r9
- sbbq $0,%r11
-
- cmovcq %rax,%r12
- cmovcq %rbp,%r13
- movq %r12,0(%rdi)
- cmovcq %rcx,%r8
- movq %r13,8(%rdi)
- cmovcq %r10,%r9
- movq %r8,16(%rdi)
- movq %r9,24(%rdi)
-
- .byte 0xf3,0xc3
-.cfi_endproc
-.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
-.type ecp_nistz256_point_doublex,@function
-.align 32
-ecp_nistz256_point_doublex:
-.cfi_startproc
-.Lpoint_doublex:
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-16
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
- subq $160+8,%rsp
-.cfi_adjust_cfa_offset 32*5+8
-.Lpoint_doublex_body:
-
-.Lpoint_double_shortcutx:
- movdqu 0(%rsi),%xmm0
- movq %rsi,%rbx
- movdqu 16(%rsi),%xmm1
- movq 32+0(%rsi),%r12
- movq 32+8(%rsi),%r13
- movq 32+16(%rsi),%r8
- movq 32+24(%rsi),%r9
- movq .Lpoly+8(%rip),%r14
- movq .Lpoly+24(%rip),%r15
- movdqa %xmm0,96(%rsp)
- movdqa %xmm1,96+16(%rsp)
- leaq 32(%rdi),%r10
- leaq 64(%rdi),%r11
-.byte 102,72,15,110,199
-.byte 102,73,15,110,202
-.byte 102,73,15,110,211
-
- leaq 0(%rsp),%rdi
- call __ecp_nistz256_mul_by_2x
-
- movq 64+0(%rsi),%rdx
- movq 64+8(%rsi),%r14
- movq 64+16(%rsi),%r15
- movq 64+24(%rsi),%r8
- leaq 64-128(%rsi),%rsi
- leaq 64(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- movq 0+0(%rsp),%rdx
- movq 8+0(%rsp),%r14
- leaq -128+0(%rsp),%rsi
- movq 16+0(%rsp),%r15
- movq 24+0(%rsp),%r8
- leaq 0(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- movq 32(%rbx),%rdx
- movq 64+0(%rbx),%r9
- movq 64+8(%rbx),%r10
- movq 64+16(%rbx),%r11
- movq 64+24(%rbx),%r12
- leaq 64-128(%rbx),%rsi
- leaq 32(%rbx),%rbx
-.byte 102,72,15,126,215
- call __ecp_nistz256_mul_montx
- call __ecp_nistz256_mul_by_2x
-
- movq 96+0(%rsp),%r12
- movq 96+8(%rsp),%r13
- leaq 64(%rsp),%rbx
- movq 96+16(%rsp),%r8
- movq 96+24(%rsp),%r9
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_add_tox
-
- movq 96+0(%rsp),%r12
- movq 96+8(%rsp),%r13
- leaq 64(%rsp),%rbx
- movq 96+16(%rsp),%r8
- movq 96+24(%rsp),%r9
- leaq 64(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
- movq 0+0(%rsp),%rdx
- movq 8+0(%rsp),%r14
- leaq -128+0(%rsp),%rsi
- movq 16+0(%rsp),%r15
- movq 24+0(%rsp),%r8
-.byte 102,72,15,126,207
- call __ecp_nistz256_sqr_montx
- xorq %r9,%r9
- movq %r12,%rax
- addq $-1,%r12
- movq %r13,%r10
- adcq %rsi,%r13
- movq %r14,%rcx
- adcq $0,%r14
- movq %r15,%r8
- adcq %rbp,%r15
- adcq $0,%r9
- xorq %rsi,%rsi
- testq $1,%rax
-
- cmovzq %rax,%r12
- cmovzq %r10,%r13
- cmovzq %rcx,%r14
- cmovzq %r8,%r15
- cmovzq %rsi,%r9
-
- movq %r13,%rax
- shrq $1,%r12
- shlq $63,%rax
- movq %r14,%r10
- shrq $1,%r13
- orq %rax,%r12
- shlq $63,%r10
- movq %r15,%rcx
- shrq $1,%r14
- orq %r10,%r13
- shlq $63,%rcx
- movq %r12,0(%rdi)
- shrq $1,%r15
- movq %r13,8(%rdi)
- shlq $63,%r9
- orq %rcx,%r14
- orq %r9,%r15
- movq %r14,16(%rdi)
- movq %r15,24(%rdi)
- movq 64(%rsp),%rdx
- leaq 64(%rsp),%rbx
- movq 0+32(%rsp),%r9
- movq 8+32(%rsp),%r10
- leaq -128+32(%rsp),%rsi
- movq 16+32(%rsp),%r11
- movq 24+32(%rsp),%r12
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- leaq 128(%rsp),%rdi
- call __ecp_nistz256_mul_by_2x
-
- leaq 32(%rsp),%rbx
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_add_tox
-
- movq 96(%rsp),%rdx
- leaq 96(%rsp),%rbx
- movq 0+0(%rsp),%r9
- movq 8+0(%rsp),%r10
- leaq -128+0(%rsp),%rsi
- movq 16+0(%rsp),%r11
- movq 24+0(%rsp),%r12
- leaq 0(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- leaq 128(%rsp),%rdi
- call __ecp_nistz256_mul_by_2x
-
- movq 0+32(%rsp),%rdx
- movq 8+32(%rsp),%r14
- leaq -128+32(%rsp),%rsi
- movq 16+32(%rsp),%r15
- movq 24+32(%rsp),%r8
-.byte 102,72,15,126,199
- call __ecp_nistz256_sqr_montx
-
- leaq 128(%rsp),%rbx
- movq %r14,%r8
- movq %r15,%r9
- movq %rsi,%r14
- movq %rbp,%r15
- call __ecp_nistz256_sub_fromx
-
- movq 0+0(%rsp),%rax
- movq 0+8(%rsp),%rbp
- movq 0+16(%rsp),%rcx
- movq 0+24(%rsp),%r10
- leaq 0(%rsp),%rdi
- call __ecp_nistz256_subx
-
- movq 32(%rsp),%rdx
- leaq 32(%rsp),%rbx
- movq %r12,%r14
- xorl %ecx,%ecx
- movq %r12,0+0(%rsp)
- movq %r13,%r10
- movq %r13,0+8(%rsp)
- cmovzq %r8,%r11
- movq %r8,0+16(%rsp)
- leaq 0-128(%rsp),%rsi
- cmovzq %r9,%r12
- movq %r9,0+24(%rsp)
- movq %r14,%r9
- leaq 0(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
-.byte 102,72,15,126,203
-.byte 102,72,15,126,207
- call __ecp_nistz256_sub_fromx
-
- leaq 160+56(%rsp),%rsi
-.cfi_def_cfa %rsi,8
- movq -48(%rsi),%r15
-.cfi_restore %r15
- movq -40(%rsi),%r14
-.cfi_restore %r14
- movq -32(%rsi),%r13
-.cfi_restore %r13
- movq -24(%rsi),%r12
-.cfi_restore %r12
- movq -16(%rsi),%rbx
-.cfi_restore %rbx
- movq -8(%rsi),%rbp
-.cfi_restore %rbp
- leaq (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Lpoint_doublex_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
-.type ecp_nistz256_point_addx,@function
-.align 32
-ecp_nistz256_point_addx:
-.cfi_startproc
-.Lpoint_addx:
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-16
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
- subq $576+8,%rsp
-.cfi_adjust_cfa_offset 32*18+8
-.Lpoint_addx_body:
-
- movdqu 0(%rsi),%xmm0
- movdqu 16(%rsi),%xmm1
- movdqu 32(%rsi),%xmm2
- movdqu 48(%rsi),%xmm3
- movdqu 64(%rsi),%xmm4
- movdqu 80(%rsi),%xmm5
- movq %rsi,%rbx
- movq %rdx,%rsi
- movdqa %xmm0,384(%rsp)
- movdqa %xmm1,384+16(%rsp)
- movdqa %xmm2,416(%rsp)
- movdqa %xmm3,416+16(%rsp)
- movdqa %xmm4,448(%rsp)
- movdqa %xmm5,448+16(%rsp)
- por %xmm4,%xmm5
-
- movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm5,%xmm3
- movdqu 16(%rsi),%xmm1
- movdqu 32(%rsi),%xmm2
- por %xmm3,%xmm5
- movdqu 48(%rsi),%xmm3
- movq 64+0(%rsi),%rdx
- movq 64+8(%rsi),%r14
- movq 64+16(%rsi),%r15
- movq 64+24(%rsi),%r8
- movdqa %xmm0,480(%rsp)
- pshufd $0x1e,%xmm5,%xmm4
- movdqa %xmm1,480+16(%rsp)
- movdqu 64(%rsi),%xmm0
- movdqu 80(%rsi),%xmm1
- movdqa %xmm2,512(%rsp)
- movdqa %xmm3,512+16(%rsp)
- por %xmm4,%xmm5
- pxor %xmm4,%xmm4
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
-
- leaq 64-128(%rsi),%rsi
- movq %rdx,544+0(%rsp)
- movq %r14,544+8(%rsp)
- movq %r15,544+16(%rsp)
- movq %r8,544+24(%rsp)
- leaq 96(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm1,%xmm4
- por %xmm1,%xmm4
- pshufd $0,%xmm5,%xmm5
- pshufd $0x1e,%xmm4,%xmm3
- por %xmm3,%xmm4
- pxor %xmm3,%xmm3
- pcmpeqd %xmm3,%xmm4
- pshufd $0,%xmm4,%xmm4
- movq 64+0(%rbx),%rdx
- movq 64+8(%rbx),%r14
- movq 64+16(%rbx),%r15
- movq 64+24(%rbx),%r8
-.byte 102,72,15,110,203
-
- leaq 64-128(%rbx),%rsi
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- movq 544(%rsp),%rdx
- leaq 544(%rsp),%rbx
- movq 0+96(%rsp),%r9
- movq 8+96(%rsp),%r10
- leaq -128+96(%rsp),%rsi
- movq 16+96(%rsp),%r11
- movq 24+96(%rsp),%r12
- leaq 224(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 448(%rsp),%rdx
- leaq 448(%rsp),%rbx
- movq 0+32(%rsp),%r9
- movq 8+32(%rsp),%r10
- leaq -128+32(%rsp),%rsi
- movq 16+32(%rsp),%r11
- movq 24+32(%rsp),%r12
- leaq 256(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 416(%rsp),%rdx
- leaq 416(%rsp),%rbx
- movq 0+224(%rsp),%r9
- movq 8+224(%rsp),%r10
- leaq -128+224(%rsp),%rsi
- movq 16+224(%rsp),%r11
- movq 24+224(%rsp),%r12
- leaq 224(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 512(%rsp),%rdx
- leaq 512(%rsp),%rbx
- movq 0+256(%rsp),%r9
- movq 8+256(%rsp),%r10
- leaq -128+256(%rsp),%rsi
- movq 16+256(%rsp),%r11
- movq 24+256(%rsp),%r12
- leaq 256(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- leaq 224(%rsp),%rbx
- leaq 64(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
- orq %r13,%r12
- movdqa %xmm4,%xmm2
- orq %r8,%r12
- orq %r9,%r12
- por %xmm5,%xmm2
-.byte 102,73,15,110,220
-
- movq 384(%rsp),%rdx
- leaq 384(%rsp),%rbx
- movq 0+96(%rsp),%r9
- movq 8+96(%rsp),%r10
- leaq -128+96(%rsp),%rsi
- movq 16+96(%rsp),%r11
- movq 24+96(%rsp),%r12
- leaq 160(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 480(%rsp),%rdx
- leaq 480(%rsp),%rbx
- movq 0+32(%rsp),%r9
- movq 8+32(%rsp),%r10
- leaq -128+32(%rsp),%rsi
- movq 16+32(%rsp),%r11
- movq 24+32(%rsp),%r12
- leaq 192(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- leaq 160(%rsp),%rbx
- leaq 0(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
- orq %r13,%r12
- orq %r8,%r12
- orq %r9,%r12
-
-.byte 0x3e
- jnz .Ladd_proceedx
-.byte 102,73,15,126,208
-.byte 102,73,15,126,217
- testq %r8,%r8
- jnz .Ladd_proceedx
- testq %r9,%r9
- jz .Ladd_doublex
-
-.byte 102,72,15,126,199
- pxor %xmm0,%xmm0
- movdqu %xmm0,0(%rdi)
- movdqu %xmm0,16(%rdi)
- movdqu %xmm0,32(%rdi)
- movdqu %xmm0,48(%rdi)
- movdqu %xmm0,64(%rdi)
- movdqu %xmm0,80(%rdi)
- jmp .Ladd_donex
-
-.align 32
-.Ladd_doublex:
-.byte 102,72,15,126,206
-.byte 102,72,15,126,199
- addq $416,%rsp
-.cfi_adjust_cfa_offset -416
- jmp .Lpoint_double_shortcutx
-.cfi_adjust_cfa_offset 416
-
-.align 32
-.Ladd_proceedx:
- movq 0+64(%rsp),%rdx
- movq 8+64(%rsp),%r14
- leaq -128+64(%rsp),%rsi
- movq 16+64(%rsp),%r15
- movq 24+64(%rsp),%r8
- leaq 96(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- movq 448(%rsp),%rdx
- leaq 448(%rsp),%rbx
- movq 0+0(%rsp),%r9
- movq 8+0(%rsp),%r10
- leaq -128+0(%rsp),%rsi
- movq 16+0(%rsp),%r11
- movq 24+0(%rsp),%r12
- leaq 352(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 0+0(%rsp),%rdx
- movq 8+0(%rsp),%r14
- leaq -128+0(%rsp),%rsi
- movq 16+0(%rsp),%r15
- movq 24+0(%rsp),%r8
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- movq 544(%rsp),%rdx
- leaq 544(%rsp),%rbx
- movq 0+352(%rsp),%r9
- movq 8+352(%rsp),%r10
- leaq -128+352(%rsp),%rsi
- movq 16+352(%rsp),%r11
- movq 24+352(%rsp),%r12
- leaq 352(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 0(%rsp),%rdx
- leaq 0(%rsp),%rbx
- movq 0+32(%rsp),%r9
- movq 8+32(%rsp),%r10
- leaq -128+32(%rsp),%rsi
- movq 16+32(%rsp),%r11
- movq 24+32(%rsp),%r12
- leaq 128(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 160(%rsp),%rdx
- leaq 160(%rsp),%rbx
- movq 0+32(%rsp),%r9
- movq 8+32(%rsp),%r10
- leaq -128+32(%rsp),%rsi
- movq 16+32(%rsp),%r11
- movq 24+32(%rsp),%r12
- leaq 192(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
-
-
-
- xorq %r11,%r11
- addq %r12,%r12
- leaq 96(%rsp),%rsi
- adcq %r13,%r13
- movq %r12,%rax
- adcq %r8,%r8
- adcq %r9,%r9
- movq %r13,%rbp
- adcq $0,%r11
-
- subq $-1,%r12
- movq %r8,%rcx
- sbbq %r14,%r13
- sbbq $0,%r8
- movq %r9,%r10
- sbbq %r15,%r9
- sbbq $0,%r11
-
- cmovcq %rax,%r12
- movq 0(%rsi),%rax
- cmovcq %rbp,%r13
- movq 8(%rsi),%rbp
- cmovcq %rcx,%r8
- movq 16(%rsi),%rcx
- cmovcq %r10,%r9
- movq 24(%rsi),%r10
-
- call __ecp_nistz256_subx
-
- leaq 128(%rsp),%rbx
- leaq 288(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
- movq 192+0(%rsp),%rax
- movq 192+8(%rsp),%rbp
- movq 192+16(%rsp),%rcx
- movq 192+24(%rsp),%r10
- leaq 320(%rsp),%rdi
-
- call __ecp_nistz256_subx
-
- movq %r12,0(%rdi)
- movq %r13,8(%rdi)
- movq %r8,16(%rdi)
- movq %r9,24(%rdi)
- movq 128(%rsp),%rdx
- leaq 128(%rsp),%rbx
- movq 0+224(%rsp),%r9
- movq 8+224(%rsp),%r10
- leaq -128+224(%rsp),%rsi
- movq 16+224(%rsp),%r11
- movq 24+224(%rsp),%r12
- leaq 256(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 320(%rsp),%rdx
- leaq 320(%rsp),%rbx
- movq 0+64(%rsp),%r9
- movq 8+64(%rsp),%r10
- leaq -128+64(%rsp),%rsi
- movq 16+64(%rsp),%r11
- movq 24+64(%rsp),%r12
- leaq 320(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- leaq 256(%rsp),%rbx
- leaq 320(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
-.byte 102,72,15,126,199
-
- movdqa %xmm5,%xmm0
- movdqa %xmm5,%xmm1
- pandn 352(%rsp),%xmm0
- movdqa %xmm5,%xmm2
- pandn 352+16(%rsp),%xmm1
- movdqa %xmm5,%xmm3
- pand 544(%rsp),%xmm2
- pand 544+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
-
- movdqa %xmm4,%xmm0
- movdqa %xmm4,%xmm1
- pandn %xmm2,%xmm0
- movdqa %xmm4,%xmm2
- pandn %xmm3,%xmm1
- movdqa %xmm4,%xmm3
- pand 448(%rsp),%xmm2
- pand 448+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
- movdqu %xmm2,64(%rdi)
- movdqu %xmm3,80(%rdi)
-
- movdqa %xmm5,%xmm0
- movdqa %xmm5,%xmm1
- pandn 288(%rsp),%xmm0
- movdqa %xmm5,%xmm2
- pandn 288+16(%rsp),%xmm1
- movdqa %xmm5,%xmm3
- pand 480(%rsp),%xmm2
- pand 480+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
-
- movdqa %xmm4,%xmm0
- movdqa %xmm4,%xmm1
- pandn %xmm2,%xmm0
- movdqa %xmm4,%xmm2
- pandn %xmm3,%xmm1
- movdqa %xmm4,%xmm3
- pand 384(%rsp),%xmm2
- pand 384+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
- movdqu %xmm2,0(%rdi)
- movdqu %xmm3,16(%rdi)
-
- movdqa %xmm5,%xmm0
- movdqa %xmm5,%xmm1
- pandn 320(%rsp),%xmm0
- movdqa %xmm5,%xmm2
- pandn 320+16(%rsp),%xmm1
- movdqa %xmm5,%xmm3
- pand 512(%rsp),%xmm2
- pand 512+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
-
- movdqa %xmm4,%xmm0
- movdqa %xmm4,%xmm1
- pandn %xmm2,%xmm0
- movdqa %xmm4,%xmm2
- pandn %xmm3,%xmm1
- movdqa %xmm4,%xmm3
- pand 416(%rsp),%xmm2
- pand 416+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
- movdqu %xmm2,32(%rdi)
- movdqu %xmm3,48(%rdi)
-
-.Ladd_donex:
- leaq 576+56(%rsp),%rsi
-.cfi_def_cfa %rsi,8
- movq -48(%rsi),%r15
-.cfi_restore %r15
- movq -40(%rsi),%r14
-.cfi_restore %r14
- movq -32(%rsi),%r13
-.cfi_restore %r13
- movq -24(%rsi),%r12
-.cfi_restore %r12
- movq -16(%rsi),%rbx
-.cfi_restore %rbx
- movq -8(%rsi),%rbp
-.cfi_restore %rbp
- leaq (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Lpoint_addx_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
-.type ecp_nistz256_point_add_affinex,@function
-.align 32
-ecp_nistz256_point_add_affinex:
-.cfi_startproc
-.Lpoint_add_affinex:
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-16
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
- subq $480+8,%rsp
-.cfi_adjust_cfa_offset 32*15+8
-.Ladd_affinex_body:
-
- movdqu 0(%rsi),%xmm0
- movq %rdx,%rbx
- movdqu 16(%rsi),%xmm1
- movdqu 32(%rsi),%xmm2
- movdqu 48(%rsi),%xmm3
- movdqu 64(%rsi),%xmm4
- movdqu 80(%rsi),%xmm5
- movq 64+0(%rsi),%rdx
- movq 64+8(%rsi),%r14
- movq 64+16(%rsi),%r15
- movq 64+24(%rsi),%r8
- movdqa %xmm0,320(%rsp)
- movdqa %xmm1,320+16(%rsp)
- movdqa %xmm2,352(%rsp)
- movdqa %xmm3,352+16(%rsp)
- movdqa %xmm4,384(%rsp)
- movdqa %xmm5,384+16(%rsp)
- por %xmm4,%xmm5
-
- movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm5,%xmm3
- movdqu 16(%rbx),%xmm1
- movdqu 32(%rbx),%xmm2
- por %xmm3,%xmm5
- movdqu 48(%rbx),%xmm3
- movdqa %xmm0,416(%rsp)
- pshufd $0x1e,%xmm5,%xmm4
- movdqa %xmm1,416+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
- movdqa %xmm2,448(%rsp)
- movdqa %xmm3,448+16(%rsp)
- por %xmm2,%xmm3
- por %xmm4,%xmm5
- pxor %xmm4,%xmm4
- por %xmm1,%xmm3
-
- leaq 64-128(%rsi),%rsi
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- movq 0(%rbx),%rdx
-
- movq %r12,%r9
- por %xmm3,%xmm4
- pshufd $0,%xmm5,%xmm5
- pshufd $0x1e,%xmm4,%xmm3
- movq %r13,%r10
- por %xmm3,%xmm4
- pxor %xmm3,%xmm3
- movq %r14,%r11
- pcmpeqd %xmm3,%xmm4
- pshufd $0,%xmm4,%xmm4
-
- leaq 32-128(%rsp),%rsi
- movq %r15,%r12
- leaq 0(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- leaq 320(%rsp),%rbx
- leaq 64(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
- movq 384(%rsp),%rdx
- leaq 384(%rsp),%rbx
- movq 0+32(%rsp),%r9
- movq 8+32(%rsp),%r10
- leaq -128+32(%rsp),%rsi
- movq 16+32(%rsp),%r11
- movq 24+32(%rsp),%r12
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 384(%rsp),%rdx
- leaq 384(%rsp),%rbx
- movq 0+64(%rsp),%r9
- movq 8+64(%rsp),%r10
- leaq -128+64(%rsp),%rsi
- movq 16+64(%rsp),%r11
- movq 24+64(%rsp),%r12
- leaq 288(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 448(%rsp),%rdx
- leaq 448(%rsp),%rbx
- movq 0+32(%rsp),%r9
- movq 8+32(%rsp),%r10
- leaq -128+32(%rsp),%rsi
- movq 16+32(%rsp),%r11
- movq 24+32(%rsp),%r12
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- leaq 352(%rsp),%rbx
- leaq 96(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
- movq 0+64(%rsp),%rdx
- movq 8+64(%rsp),%r14
- leaq -128+64(%rsp),%rsi
- movq 16+64(%rsp),%r15
- movq 24+64(%rsp),%r8
- leaq 128(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- movq 0+96(%rsp),%rdx
- movq 8+96(%rsp),%r14
- leaq -128+96(%rsp),%rsi
- movq 16+96(%rsp),%r15
- movq 24+96(%rsp),%r8
- leaq 192(%rsp),%rdi
- call __ecp_nistz256_sqr_montx
-
- movq 128(%rsp),%rdx
- leaq 128(%rsp),%rbx
- movq 0+64(%rsp),%r9
- movq 8+64(%rsp),%r10
- leaq -128+64(%rsp),%rsi
- movq 16+64(%rsp),%r11
- movq 24+64(%rsp),%r12
- leaq 160(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 320(%rsp),%rdx
- leaq 320(%rsp),%rbx
- movq 0+128(%rsp),%r9
- movq 8+128(%rsp),%r10
- leaq -128+128(%rsp),%rsi
- movq 16+128(%rsp),%r11
- movq 24+128(%rsp),%r12
- leaq 0(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
-
-
-
- xorq %r11,%r11
- addq %r12,%r12
- leaq 192(%rsp),%rsi
- adcq %r13,%r13
- movq %r12,%rax
- adcq %r8,%r8
- adcq %r9,%r9
- movq %r13,%rbp
- adcq $0,%r11
-
- subq $-1,%r12
- movq %r8,%rcx
- sbbq %r14,%r13
- sbbq $0,%r8
- movq %r9,%r10
- sbbq %r15,%r9
- sbbq $0,%r11
-
- cmovcq %rax,%r12
- movq 0(%rsi),%rax
- cmovcq %rbp,%r13
- movq 8(%rsi),%rbp
- cmovcq %rcx,%r8
- movq 16(%rsi),%rcx
- cmovcq %r10,%r9
- movq 24(%rsi),%r10
-
- call __ecp_nistz256_subx
-
- leaq 160(%rsp),%rbx
- leaq 224(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
- movq 0+0(%rsp),%rax
- movq 0+8(%rsp),%rbp
- movq 0+16(%rsp),%rcx
- movq 0+24(%rsp),%r10
- leaq 64(%rsp),%rdi
-
- call __ecp_nistz256_subx
-
- movq %r12,0(%rdi)
- movq %r13,8(%rdi)
- movq %r8,16(%rdi)
- movq %r9,24(%rdi)
- movq 352(%rsp),%rdx
- leaq 352(%rsp),%rbx
- movq 0+160(%rsp),%r9
- movq 8+160(%rsp),%r10
- leaq -128+160(%rsp),%rsi
- movq 16+160(%rsp),%r11
- movq 24+160(%rsp),%r12
- leaq 32(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- movq 96(%rsp),%rdx
- leaq 96(%rsp),%rbx
- movq 0+64(%rsp),%r9
- movq 8+64(%rsp),%r10
- leaq -128+64(%rsp),%rsi
- movq 16+64(%rsp),%r11
- movq 24+64(%rsp),%r12
- leaq 64(%rsp),%rdi
- call __ecp_nistz256_mul_montx
-
- leaq 32(%rsp),%rbx
- leaq 256(%rsp),%rdi
- call __ecp_nistz256_sub_fromx
-
-.byte 102,72,15,126,199
-
- movdqa %xmm5,%xmm0
- movdqa %xmm5,%xmm1
- pandn 288(%rsp),%xmm0
- movdqa %xmm5,%xmm2
- pandn 288+16(%rsp),%xmm1
- movdqa %xmm5,%xmm3
- pand .LONE_mont(%rip),%xmm2
- pand .LONE_mont+16(%rip),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
-
- movdqa %xmm4,%xmm0
- movdqa %xmm4,%xmm1
- pandn %xmm2,%xmm0
- movdqa %xmm4,%xmm2
- pandn %xmm3,%xmm1
- movdqa %xmm4,%xmm3
- pand 384(%rsp),%xmm2
- pand 384+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
- movdqu %xmm2,64(%rdi)
- movdqu %xmm3,80(%rdi)
-
- movdqa %xmm5,%xmm0
- movdqa %xmm5,%xmm1
- pandn 224(%rsp),%xmm0
- movdqa %xmm5,%xmm2
- pandn 224+16(%rsp),%xmm1
- movdqa %xmm5,%xmm3
- pand 416(%rsp),%xmm2
- pand 416+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
-
- movdqa %xmm4,%xmm0
- movdqa %xmm4,%xmm1
- pandn %xmm2,%xmm0
- movdqa %xmm4,%xmm2
- pandn %xmm3,%xmm1
- movdqa %xmm4,%xmm3
- pand 320(%rsp),%xmm2
- pand 320+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
- movdqu %xmm2,0(%rdi)
- movdqu %xmm3,16(%rdi)
-
- movdqa %xmm5,%xmm0
- movdqa %xmm5,%xmm1
- pandn 256(%rsp),%xmm0
- movdqa %xmm5,%xmm2
- pandn 256+16(%rsp),%xmm1
- movdqa %xmm5,%xmm3
- pand 448(%rsp),%xmm2
- pand 448+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
-
- movdqa %xmm4,%xmm0
- movdqa %xmm4,%xmm1
- pandn %xmm2,%xmm0
- movdqa %xmm4,%xmm2
- pandn %xmm3,%xmm1
- movdqa %xmm4,%xmm3
- pand 352(%rsp),%xmm2
- pand 352+16(%rsp),%xmm3
- por %xmm0,%xmm2
- por %xmm1,%xmm3
- movdqu %xmm2,32(%rdi)
- movdqu %xmm3,48(%rdi)
-
- leaq 480+56(%rsp),%rsi
-.cfi_def_cfa %rsi,8
- movq -48(%rsi),%r15
-.cfi_restore %r15
- movq -40(%rsi),%r14
-.cfi_restore %r14
- movq -32(%rsi),%r13
-.cfi_restore %r13
- movq -24(%rsi),%r12
-.cfi_restore %r12
- movq -16(%rsi),%rbx
-.cfi_restore %rbx
- movq -8(%rsi),%rbp
-.cfi_restore %rbp
- leaq (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Ladd_affinex_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
diff --git a/secure/lib/libcrypto/amd64/ghash-x86_64.S b/secure/lib/libcrypto/amd64/ghash-x86_64.S
index 078353528d5f..55ad7db1f240 100644
--- a/secure/lib/libcrypto/amd64/ghash-x86_64.S
+++ b/secure/lib/libcrypto/amd64/ghash-x86_64.S
@@ -1304,108 +1304,7 @@ gcm_ghash_clmul:
.align 32
gcm_init_avx:
.cfi_startproc
- vzeroupper
-
- vmovdqu (%rsi),%xmm2
- vpshufd $78,%xmm2,%xmm2
-
-
- vpshufd $255,%xmm2,%xmm4
- vpsrlq $63,%xmm2,%xmm3
- vpsllq $1,%xmm2,%xmm2
- vpxor %xmm5,%xmm5,%xmm5
- vpcmpgtd %xmm4,%xmm5,%xmm5
- vpslldq $8,%xmm3,%xmm3
- vpor %xmm3,%xmm2,%xmm2
-
-
- vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5
- vpxor %xmm5,%xmm2,%xmm2
-
- vpunpckhqdq %xmm2,%xmm2,%xmm6
- vmovdqa %xmm2,%xmm0
- vpxor %xmm2,%xmm6,%xmm6
- movq $4,%r10
- jmp .Linit_start_avx
-.align 32
-.Linit_loop_avx:
- vpalignr $8,%xmm3,%xmm4,%xmm5
- vmovdqu %xmm5,-16(%rdi)
- vpunpckhqdq %xmm0,%xmm0,%xmm3
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
- vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
- vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
- vpxor %xmm0,%xmm1,%xmm4
- vpxor %xmm4,%xmm3,%xmm3
-
- vpslldq $8,%xmm3,%xmm4
- vpsrldq $8,%xmm3,%xmm3
- vpxor %xmm4,%xmm0,%xmm0
- vpxor %xmm3,%xmm1,%xmm1
- vpsllq $57,%xmm0,%xmm3
- vpsllq $62,%xmm0,%xmm4
- vpxor %xmm3,%xmm4,%xmm4
- vpsllq $63,%xmm0,%xmm3
- vpxor %xmm3,%xmm4,%xmm4
- vpslldq $8,%xmm4,%xmm3
- vpsrldq $8,%xmm4,%xmm4
- vpxor %xmm3,%xmm0,%xmm0
- vpxor %xmm4,%xmm1,%xmm1
-
- vpsrlq $1,%xmm0,%xmm4
- vpxor %xmm0,%xmm1,%xmm1
- vpxor %xmm4,%xmm0,%xmm0
- vpsrlq $5,%xmm4,%xmm4
- vpxor %xmm4,%xmm0,%xmm0
- vpsrlq $1,%xmm0,%xmm0
- vpxor %xmm1,%xmm0,%xmm0
-.Linit_start_avx:
- vmovdqa %xmm0,%xmm5
- vpunpckhqdq %xmm0,%xmm0,%xmm3
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
- vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
- vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
- vpxor %xmm0,%xmm1,%xmm4
- vpxor %xmm4,%xmm3,%xmm3
-
- vpslldq $8,%xmm3,%xmm4
- vpsrldq $8,%xmm3,%xmm3
- vpxor %xmm4,%xmm0,%xmm0
- vpxor %xmm3,%xmm1,%xmm1
- vpsllq $57,%xmm0,%xmm3
- vpsllq $62,%xmm0,%xmm4
- vpxor %xmm3,%xmm4,%xmm4
- vpsllq $63,%xmm0,%xmm3
- vpxor %xmm3,%xmm4,%xmm4
- vpslldq $8,%xmm4,%xmm3
- vpsrldq $8,%xmm4,%xmm4
- vpxor %xmm3,%xmm0,%xmm0
- vpxor %xmm4,%xmm1,%xmm1
-
- vpsrlq $1,%xmm0,%xmm4
- vpxor %xmm0,%xmm1,%xmm1
- vpxor %xmm4,%xmm0,%xmm0
- vpsrlq $5,%xmm4,%xmm4
- vpxor %xmm4,%xmm0,%xmm0
- vpsrlq $1,%xmm0,%xmm0
- vpxor %xmm1,%xmm0,%xmm0
- vpshufd $78,%xmm5,%xmm3
- vpshufd $78,%xmm0,%xmm4
- vpxor %xmm5,%xmm3,%xmm3
- vmovdqu %xmm5,0(%rdi)
- vpxor %xmm0,%xmm4,%xmm4
- vmovdqu %xmm0,16(%rdi)
- leaq 48(%rdi),%rdi
- subq $1,%r10
- jnz .Linit_loop_avx
-
- vpalignr $8,%xmm4,%xmm3,%xmm5
- vmovdqu %xmm5,-16(%rdi)
-
- vzeroupper
- .byte 0xf3,0xc3
+ jmp .L_init_clmul
.cfi_endproc
.size gcm_init_avx,.-gcm_init_avx
.globl gcm_gmult_avx
@@ -1421,377 +1320,7 @@ gcm_gmult_avx:
.align 32
gcm_ghash_avx:
.cfi_startproc
- vzeroupper
-
- vmovdqu (%rdi),%xmm10
- leaq .L0x1c2_polynomial(%rip),%r10
- leaq 64(%rsi),%rsi
- vmovdqu .Lbswap_mask(%rip),%xmm13
- vpshufb %xmm13,%xmm10,%xmm10
- cmpq $0x80,%rcx
- jb .Lshort_avx
- subq $0x80,%rcx
-
- vmovdqu 112(%rdx),%xmm14
- vmovdqu 0-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm14
- vmovdqu 32-64(%rsi),%xmm7
-
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vmovdqu 96(%rdx),%xmm15
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm14,%xmm9,%xmm9
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 16-64(%rsi),%xmm6
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vmovdqu 80(%rdx),%xmm14
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm15,%xmm8,%xmm8
-
- vpshufb %xmm13,%xmm14,%xmm14
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 48-64(%rsi),%xmm6
- vpxor %xmm14,%xmm9,%xmm9
- vmovdqu 64(%rdx),%xmm15
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 80-64(%rsi),%xmm7
-
- vpshufb %xmm13,%xmm15,%xmm15
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm1,%xmm4,%xmm4
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 64-64(%rsi),%xmm6
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm15,%xmm8,%xmm8
-
- vmovdqu 48(%rdx),%xmm14
- vpxor %xmm3,%xmm0,%xmm0
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpxor %xmm4,%xmm1,%xmm1
- vpshufb %xmm13,%xmm14,%xmm14
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 96-64(%rsi),%xmm6
- vpxor %xmm5,%xmm2,%xmm2
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 128-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
-
- vmovdqu 32(%rdx),%xmm15
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm1,%xmm4,%xmm4
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 112-64(%rsi),%xmm6
- vpxor %xmm2,%xmm5,%xmm5
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm15,%xmm8,%xmm8
-
- vmovdqu 16(%rdx),%xmm14
- vpxor %xmm3,%xmm0,%xmm0
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpxor %xmm4,%xmm1,%xmm1
- vpshufb %xmm13,%xmm14,%xmm14
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 144-64(%rsi),%xmm6
- vpxor %xmm5,%xmm2,%xmm2
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 176-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
-
- vmovdqu (%rdx),%xmm15
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm1,%xmm4,%xmm4
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 160-64(%rsi),%xmm6
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
-
- leaq 128(%rdx),%rdx
- cmpq $0x80,%rcx
- jb .Ltail_avx
-
- vpxor %xmm10,%xmm15,%xmm15
- subq $0x80,%rcx
- jmp .Loop8x_avx
-
-.align 32
-.Loop8x_avx:
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vmovdqu 112(%rdx),%xmm14
- vpxor %xmm0,%xmm3,%xmm3
- vpxor %xmm15,%xmm8,%xmm8
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
- vpshufb %xmm13,%xmm14,%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
- vmovdqu 0-64(%rsi),%xmm6
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
- vmovdqu 32-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
-
- vmovdqu 96(%rdx),%xmm15
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpxor %xmm3,%xmm10,%xmm10
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vxorps %xmm4,%xmm11,%xmm11
- vmovdqu 16-64(%rsi),%xmm6
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm5,%xmm12,%xmm12
- vxorps %xmm15,%xmm8,%xmm8
-
- vmovdqu 80(%rdx),%xmm14
- vpxor %xmm10,%xmm12,%xmm12
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpxor %xmm11,%xmm12,%xmm12
- vpslldq $8,%xmm12,%xmm9
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vpsrldq $8,%xmm12,%xmm12
- vpxor %xmm9,%xmm10,%xmm10
- vmovdqu 48-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm14
- vxorps %xmm12,%xmm11,%xmm11
- vpxor %xmm1,%xmm4,%xmm4
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 80-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vpxor %xmm2,%xmm5,%xmm5
-
- vmovdqu 64(%rdx),%xmm15
- vpalignr $8,%xmm10,%xmm10,%xmm12
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpshufb %xmm13,%xmm15,%xmm15
- vpxor %xmm3,%xmm0,%xmm0
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 64-64(%rsi),%xmm6
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm4,%xmm1,%xmm1
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vxorps %xmm15,%xmm8,%xmm8
- vpxor %xmm5,%xmm2,%xmm2
-
- vmovdqu 48(%rdx),%xmm14
- vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpshufb %xmm13,%xmm14,%xmm14
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 96-64(%rsi),%xmm6
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 128-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vpxor %xmm2,%xmm5,%xmm5
-
- vmovdqu 32(%rdx),%xmm15
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpshufb %xmm13,%xmm15,%xmm15
- vpxor %xmm3,%xmm0,%xmm0
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 112-64(%rsi),%xmm6
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm4,%xmm1,%xmm1
- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
- vpxor %xmm15,%xmm8,%xmm8
- vpxor %xmm5,%xmm2,%xmm2
- vxorps %xmm12,%xmm10,%xmm10
-
- vmovdqu 16(%rdx),%xmm14
- vpalignr $8,%xmm10,%xmm10,%xmm12
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
- vpshufb %xmm13,%xmm14,%xmm14
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
- vmovdqu 144-64(%rsi),%xmm6
- vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
- vxorps %xmm11,%xmm12,%xmm12
- vpunpckhqdq %xmm14,%xmm14,%xmm9
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
- vmovdqu 176-64(%rsi),%xmm7
- vpxor %xmm14,%xmm9,%xmm9
- vpxor %xmm2,%xmm5,%xmm5
-
- vmovdqu (%rdx),%xmm15
- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
- vpshufb %xmm13,%xmm15,%xmm15
- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
- vmovdqu 160-64(%rsi),%xmm6
- vpxor %xmm12,%xmm15,%xmm15
- vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
- vpxor %xmm10,%xmm15,%xmm15
-
- leaq 128(%rdx),%rdx
- subq $0x80,%rcx
- jnc .Loop8x_avx
-
- addq $0x80,%rcx
- jmp .Ltail_no_xor_avx
-
-.align 32
-.Lshort_avx:
- vmovdqu -16(%rdx,%rcx,1),%xmm14
- leaq (%rdx,%rcx,1),%rdx
- vmovdqu 0-64(%rsi),%xmm6
- vmovdqu 32-64(%rsi),%xmm7
- vpshufb %xmm13,%xmm14,%xmm15
-
- vmovdqa %xmm0,%xmm3
- vmovdqa %xmm1,%xmm4
- vmovdqa %xmm2,%xmm5
- subq $0x10,%rcx
- jz .Ltail_avx
-
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -32(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 16-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vpsrldq $8,%xmm7,%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
-
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -48(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 48-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vmovdqu 80-64(%rsi),%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
-
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -64(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 64-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vpsrldq $8,%xmm7,%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
-
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -80(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 96-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vmovdqu 128-64(%rsi),%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
-
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -96(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 112-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vpsrldq $8,%xmm7,%xmm7
- subq $0x10,%rcx
- jz .Ltail_avx
-
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vmovdqu -112(%rdx),%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vmovdqu 144-64(%rsi),%xmm6
- vpshufb %xmm13,%xmm14,%xmm15
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
- vmovq 184-64(%rsi),%xmm7
- subq $0x10,%rcx
- jmp .Ltail_avx
-
-.align 32
-.Ltail_avx:
- vpxor %xmm10,%xmm15,%xmm15
-.Ltail_no_xor_avx:
- vpunpckhqdq %xmm15,%xmm15,%xmm8
- vpxor %xmm0,%xmm3,%xmm3
- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
- vpxor %xmm15,%xmm8,%xmm8
- vpxor %xmm1,%xmm4,%xmm4
- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
- vpxor %xmm2,%xmm5,%xmm5
- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
-
- vmovdqu (%r10),%xmm12
-
- vpxor %xmm0,%xmm3,%xmm10
- vpxor %xmm1,%xmm4,%xmm11
- vpxor %xmm2,%xmm5,%xmm5
-
- vpxor %xmm10,%xmm5,%xmm5
- vpxor %xmm11,%xmm5,%xmm5
- vpslldq $8,%xmm5,%xmm9
- vpsrldq $8,%xmm5,%xmm5
- vpxor %xmm9,%xmm10,%xmm10
- vpxor %xmm5,%xmm11,%xmm11
-
- vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
- vpalignr $8,%xmm10,%xmm10,%xmm10
- vpxor %xmm9,%xmm10,%xmm10
-
- vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
- vpalignr $8,%xmm10,%xmm10,%xmm10
- vpxor %xmm11,%xmm10,%xmm10
- vpxor %xmm9,%xmm10,%xmm10
-
- cmpq $0,%rcx
- jne .Lshort_avx
-
- vpshufb %xmm13,%xmm10,%xmm10
- vmovdqu %xmm10,(%rdi)
- vzeroupper
- .byte 0xf3,0xc3
+ jmp .L_ghash_clmul
.cfi_endproc
.size gcm_ghash_avx,.-gcm_ghash_avx
.align 64
diff --git a/secure/lib/libcrypto/amd64/keccak1600-x86_64.S b/secure/lib/libcrypto/amd64/keccak1600-x86_64.S
index 582740bd2802..d36758807990 100644
--- a/secure/lib/libcrypto/amd64/keccak1600-x86_64.S
+++ b/secure/lib/libcrypto/amd64/keccak1600-x86_64.S
@@ -5,6 +5,7 @@
.type __KeccakF1600,@function
.align 32
__KeccakF1600:
+.cfi_startproc
movq 60(%rdi),%rax
movq 68(%rdi),%rbx
movq 76(%rdi),%rcx
@@ -257,6 +258,7 @@ __KeccakF1600:
leaq -192(%r15),%r15
.byte 0xf3,0xc3
+.cfi_endproc
.size __KeccakF1600,.-__KeccakF1600
.type KeccakF1600,@function
diff --git a/secure/lib/libcrypto/amd64/poly1305-x86_64.S b/secure/lib/libcrypto/amd64/poly1305-x86_64.S
index 6973743427f3..d74ee9b45052 100644
--- a/secure/lib/libcrypto/amd64/poly1305-x86_64.S
+++ b/secure/lib/libcrypto/amd64/poly1305-x86_64.S
@@ -14,6 +14,7 @@
.type poly1305_init,@function
.align 32
poly1305_init:
+.cfi_startproc
xorq %rax,%rax
movq %rax,0(%rdi)
movq %rax,8(%rdi)
@@ -24,15 +25,6 @@ poly1305_init:
leaq poly1305_blocks(%rip),%r10
leaq poly1305_emit(%rip),%r11
- movq OPENSSL_ia32cap_P+4(%rip),%r9
- leaq poly1305_blocks_avx(%rip),%rax
- leaq poly1305_emit_avx(%rip),%rcx
- btq $28,%r9
- cmovcq %rax,%r10
- cmovcq %rcx,%r11
- leaq poly1305_blocks_avx2(%rip),%rax
- btq $37,%r9
- cmovcq %rax,%r10
movq $0x0ffffffc0fffffff,%rax
movq $0x0ffffffc0ffffffc,%rcx
andq 0(%rsi),%rax
@@ -44,6 +36,7 @@ poly1305_init:
movl $1,%eax
.Lno_key:
.byte 0xf3,0xc3
+.cfi_endproc
.size poly1305_init,.-poly1305_init
.type poly1305_blocks,@function
@@ -164,6 +157,7 @@ poly1305_blocks:
.type poly1305_emit,@function
.align 32
poly1305_emit:
+.cfi_startproc
.Lemit:
movq 0(%rdi),%r8
movq 8(%rdi),%r9
@@ -184,1783 +178,15 @@ poly1305_emit:
movq %rcx,8(%rsi)
.byte 0xf3,0xc3
-.size poly1305_emit,.-poly1305_emit
-.type __poly1305_block,@function
-.align 32
-__poly1305_block:
- mulq %r14
- movq %rax,%r9
- movq %r11,%rax
- movq %rdx,%r10
-
- mulq %r14
- movq %rax,%r14
- movq %r11,%rax
- movq %rdx,%r8
-
- mulq %rbx
- addq %rax,%r9
- movq %r13,%rax
- adcq %rdx,%r10
-
- mulq %rbx
- movq %rbp,%rbx
- addq %rax,%r14
- adcq %rdx,%r8
-
- imulq %r13,%rbx
- addq %rbx,%r9
- movq %r8,%rbx
- adcq $0,%r10
-
- imulq %r11,%rbp
- addq %r9,%rbx
- movq $-4,%rax
- adcq %rbp,%r10
-
- andq %r10,%rax
- movq %r10,%rbp
- shrq $2,%r10
- andq $3,%rbp
- addq %r10,%rax
- addq %rax,%r14
- adcq $0,%rbx
- adcq $0,%rbp
- .byte 0xf3,0xc3
-.size __poly1305_block,.-__poly1305_block
-
-.type __poly1305_init_avx,@function
-.align 32
-__poly1305_init_avx:
- movq %r11,%r14
- movq %r12,%rbx
- xorq %rbp,%rbp
-
- leaq 48+64(%rdi),%rdi
-
- movq %r12,%rax
- call __poly1305_block
-
- movl $0x3ffffff,%eax
- movl $0x3ffffff,%edx
- movq %r14,%r8
- andl %r14d,%eax
- movq %r11,%r9
- andl %r11d,%edx
- movl %eax,-64(%rdi)
- shrq $26,%r8
- movl %edx,-60(%rdi)
- shrq $26,%r9
-
- movl $0x3ffffff,%eax
- movl $0x3ffffff,%edx
- andl %r8d,%eax
- andl %r9d,%edx
- movl %eax,-48(%rdi)
- leal (%rax,%rax,4),%eax
- movl %edx,-44(%rdi)
- leal (%rdx,%rdx,4),%edx
- movl %eax,-32(%rdi)
- shrq $26,%r8
- movl %edx,-28(%rdi)
- shrq $26,%r9
-
- movq %rbx,%rax
- movq %r12,%rdx
- shlq $12,%rax
- shlq $12,%rdx
- orq %r8,%rax
- orq %r9,%rdx
- andl $0x3ffffff,%eax
- andl $0x3ffffff,%edx
- movl %eax,-16(%rdi)
- leal (%rax,%rax,4),%eax
- movl %edx,-12(%rdi)
- leal (%rdx,%rdx,4),%edx
- movl %eax,0(%rdi)
- movq %rbx,%r8
- movl %edx,4(%rdi)
- movq %r12,%r9
-
- movl $0x3ffffff,%eax
- movl $0x3ffffff,%edx
- shrq $14,%r8
- shrq $14,%r9
- andl %r8d,%eax
- andl %r9d,%edx
- movl %eax,16(%rdi)
- leal (%rax,%rax,4),%eax
- movl %edx,20(%rdi)
- leal (%rdx,%rdx,4),%edx
- movl %eax,32(%rdi)
- shrq $26,%r8
- movl %edx,36(%rdi)
- shrq $26,%r9
-
- movq %rbp,%rax
- shlq $24,%rax
- orq %rax,%r8
- movl %r8d,48(%rdi)
- leaq (%r8,%r8,4),%r8
- movl %r9d,52(%rdi)
- leaq (%r9,%r9,4),%r9
- movl %r8d,64(%rdi)
- movl %r9d,68(%rdi)
-
- movq %r12,%rax
- call __poly1305_block
-
- movl $0x3ffffff,%eax
- movq %r14,%r8
- andl %r14d,%eax
- shrq $26,%r8
- movl %eax,-52(%rdi)
-
- movl $0x3ffffff,%edx
- andl %r8d,%edx
- movl %edx,-36(%rdi)
- leal (%rdx,%rdx,4),%edx
- shrq $26,%r8
- movl %edx,-20(%rdi)
-
- movq %rbx,%rax
- shlq $12,%rax
- orq %r8,%rax
- andl $0x3ffffff,%eax
- movl %eax,-4(%rdi)
- leal (%rax,%rax,4),%eax
- movq %rbx,%r8
- movl %eax,12(%rdi)
-
- movl $0x3ffffff,%edx
- shrq $14,%r8
- andl %r8d,%edx
- movl %edx,28(%rdi)
- leal (%rdx,%rdx,4),%edx
- shrq $26,%r8
- movl %edx,44(%rdi)
-
- movq %rbp,%rax
- shlq $24,%rax
- orq %rax,%r8
- movl %r8d,60(%rdi)
- leaq (%r8,%r8,4),%r8
- movl %r8d,76(%rdi)
-
- movq %r12,%rax
- call __poly1305_block
-
- movl $0x3ffffff,%eax
- movq %r14,%r8
- andl %r14d,%eax
- shrq $26,%r8
- movl %eax,-56(%rdi)
-
- movl $0x3ffffff,%edx
- andl %r8d,%edx
- movl %edx,-40(%rdi)
- leal (%rdx,%rdx,4),%edx
- shrq $26,%r8
- movl %edx,-24(%rdi)
-
- movq %rbx,%rax
- shlq $12,%rax
- orq %r8,%rax
- andl $0x3ffffff,%eax
- movl %eax,-8(%rdi)
- leal (%rax,%rax,4),%eax
- movq %rbx,%r8
- movl %eax,8(%rdi)
-
- movl $0x3ffffff,%edx
- shrq $14,%r8
- andl %r8d,%edx
- movl %edx,24(%rdi)
- leal (%rdx,%rdx,4),%edx
- shrq $26,%r8
- movl %edx,40(%rdi)
-
- movq %rbp,%rax
- shlq $24,%rax
- orq %rax,%r8
- movl %r8d,56(%rdi)
- leaq (%r8,%r8,4),%r8
- movl %r8d,72(%rdi)
-
- leaq -48-64(%rdi),%rdi
- .byte 0xf3,0xc3
-.size __poly1305_init_avx,.-__poly1305_init_avx
-
-.type poly1305_blocks_avx,@function
-.align 32
-poly1305_blocks_avx:
-.cfi_startproc
- movl 20(%rdi),%r8d
- cmpq $128,%rdx
- jae .Lblocks_avx
- testl %r8d,%r8d
- jz .Lblocks
-
-.Lblocks_avx:
- andq $-16,%rdx
- jz .Lno_data_avx
-
- vzeroupper
-
- testl %r8d,%r8d
- jz .Lbase2_64_avx
-
- testq $31,%rdx
- jz .Leven_avx
-
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
-.Lblocks_avx_body:
-
- movq %rdx,%r15
-
- movq 0(%rdi),%r8
- movq 8(%rdi),%r9
- movl 16(%rdi),%ebp
-
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
-
-
- movl %r8d,%r14d
- andq $-2147483648,%r8
- movq %r9,%r12
- movl %r9d,%ebx
- andq $-2147483648,%r9
-
- shrq $6,%r8
- shlq $52,%r12
- addq %r8,%r14
- shrq $12,%rbx
- shrq $18,%r9
- addq %r12,%r14
- adcq %r9,%rbx
-
- movq %rbp,%r8
- shlq $40,%r8
- shrq $24,%rbp
- addq %r8,%rbx
- adcq $0,%rbp
-
- movq $-4,%r9
- movq %rbp,%r8
- andq %rbp,%r9
- shrq $2,%r8
- andq $3,%rbp
- addq %r9,%r8
- addq %r8,%r14
- adcq $0,%rbx
- adcq $0,%rbp
-
- movq %r13,%r12
- movq %r13,%rax
- shrq $2,%r13
- addq %r12,%r13
-
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
-
- call __poly1305_block
-
- testq %rcx,%rcx
- jz .Lstore_base2_64_avx
-
-
- movq %r14,%rax
- movq %r14,%rdx
- shrq $52,%r14
- movq %rbx,%r11
- movq %rbx,%r12
- shrq $26,%rdx
- andq $0x3ffffff,%rax
- shlq $12,%r11
- andq $0x3ffffff,%rdx
- shrq $14,%rbx
- orq %r11,%r14
- shlq $24,%rbp
- andq $0x3ffffff,%r14
- shrq $40,%r12
- andq $0x3ffffff,%rbx
- orq %r12,%rbp
-
- subq $16,%r15
- jz .Lstore_base2_26_avx
-
- vmovd %eax,%xmm0
- vmovd %edx,%xmm1
- vmovd %r14d,%xmm2
- vmovd %ebx,%xmm3
- vmovd %ebp,%xmm4
- jmp .Lproceed_avx
-
-.align 32
-.Lstore_base2_64_avx:
- movq %r14,0(%rdi)
- movq %rbx,8(%rdi)
- movq %rbp,16(%rdi)
- jmp .Ldone_avx
-
-.align 16
-.Lstore_base2_26_avx:
- movl %eax,0(%rdi)
- movl %edx,4(%rdi)
- movl %r14d,8(%rdi)
- movl %ebx,12(%rdi)
- movl %ebp,16(%rdi)
-.align 16
-.Ldone_avx:
- movq 0(%rsp),%r15
-.cfi_restore %r15
- movq 8(%rsp),%r14
-.cfi_restore %r14
- movq 16(%rsp),%r13
-.cfi_restore %r13
- movq 24(%rsp),%r12
-.cfi_restore %r12
- movq 32(%rsp),%rbp
-.cfi_restore %rbp
- movq 40(%rsp),%rbx
-.cfi_restore %rbx
- leaq 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lno_data_avx:
-.Lblocks_avx_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-
-.align 32
-.Lbase2_64_avx:
-.cfi_startproc
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
-.Lbase2_64_avx_body:
-
- movq %rdx,%r15
-
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
-
- movq 0(%rdi),%r14
- movq 8(%rdi),%rbx
- movl 16(%rdi),%ebp
-
- movq %r13,%r12
- movq %r13,%rax
- shrq $2,%r13
- addq %r12,%r13
-
- testq $31,%rdx
- jz .Linit_avx
-
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
- subq $16,%r15
-
- call __poly1305_block
-
-.Linit_avx:
-
- movq %r14,%rax
- movq %r14,%rdx
- shrq $52,%r14
- movq %rbx,%r8
- movq %rbx,%r9
- shrq $26,%rdx
- andq $0x3ffffff,%rax
- shlq $12,%r8
- andq $0x3ffffff,%rdx
- shrq $14,%rbx
- orq %r8,%r14
- shlq $24,%rbp
- andq $0x3ffffff,%r14
- shrq $40,%r9
- andq $0x3ffffff,%rbx
- orq %r9,%rbp
-
- vmovd %eax,%xmm0
- vmovd %edx,%xmm1
- vmovd %r14d,%xmm2
- vmovd %ebx,%xmm3
- vmovd %ebp,%xmm4
- movl $1,20(%rdi)
-
- call __poly1305_init_avx
-
-.Lproceed_avx:
- movq %r15,%rdx
-
- movq 0(%rsp),%r15
-.cfi_restore %r15
- movq 8(%rsp),%r14
-.cfi_restore %r14
- movq 16(%rsp),%r13
-.cfi_restore %r13
- movq 24(%rsp),%r12
-.cfi_restore %r12
- movq 32(%rsp),%rbp
-.cfi_restore %rbp
- movq 40(%rsp),%rbx
-.cfi_restore %rbx
- leaq 48(%rsp),%rax
- leaq 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lbase2_64_avx_epilogue:
- jmp .Ldo_avx
-.cfi_endproc
-
-.align 32
-.Leven_avx:
-.cfi_startproc
- vmovd 0(%rdi),%xmm0
- vmovd 4(%rdi),%xmm1
- vmovd 8(%rdi),%xmm2
- vmovd 12(%rdi),%xmm3
- vmovd 16(%rdi),%xmm4
-
-.Ldo_avx:
- leaq -88(%rsp),%r11
-.cfi_def_cfa %r11,0x60
- subq $0x178,%rsp
- subq $64,%rdx
- leaq -32(%rsi),%rax
- cmovcq %rax,%rsi
-
- vmovdqu 48(%rdi),%xmm14
- leaq 112(%rdi),%rdi
- leaq .Lconst(%rip),%rcx
-
-
-
- vmovdqu 32(%rsi),%xmm5
- vmovdqu 48(%rsi),%xmm6
- vmovdqa 64(%rcx),%xmm15
-
- vpsrldq $6,%xmm5,%xmm7
- vpsrldq $6,%xmm6,%xmm8
- vpunpckhqdq %xmm6,%xmm5,%xmm9
- vpunpcklqdq %xmm6,%xmm5,%xmm5
- vpunpcklqdq %xmm8,%xmm7,%xmm8
-
- vpsrlq $40,%xmm9,%xmm9
- vpsrlq $26,%xmm5,%xmm6
- vpand %xmm15,%xmm5,%xmm5
- vpsrlq $4,%xmm8,%xmm7
- vpand %xmm15,%xmm6,%xmm6
- vpsrlq $30,%xmm8,%xmm8
- vpand %xmm15,%xmm7,%xmm7
- vpand %xmm15,%xmm8,%xmm8
- vpor 32(%rcx),%xmm9,%xmm9
-
- jbe .Lskip_loop_avx
-
-
- vmovdqu -48(%rdi),%xmm11
- vmovdqu -32(%rdi),%xmm12
- vpshufd $0xEE,%xmm14,%xmm13
- vpshufd $0x44,%xmm14,%xmm10
- vmovdqa %xmm13,-144(%r11)
- vmovdqa %xmm10,0(%rsp)
- vpshufd $0xEE,%xmm11,%xmm14
- vmovdqu -16(%rdi),%xmm10
- vpshufd $0x44,%xmm11,%xmm11
- vmovdqa %xmm14,-128(%r11)
- vmovdqa %xmm11,16(%rsp)
- vpshufd $0xEE,%xmm12,%xmm13
- vmovdqu 0(%rdi),%xmm11
- vpshufd $0x44,%xmm12,%xmm12
- vmovdqa %xmm13,-112(%r11)
- vmovdqa %xmm12,32(%rsp)
- vpshufd $0xEE,%xmm10,%xmm14
- vmovdqu 16(%rdi),%xmm12
- vpshufd $0x44,%xmm10,%xmm10
- vmovdqa %xmm14,-96(%r11)
- vmovdqa %xmm10,48(%rsp)
- vpshufd $0xEE,%xmm11,%xmm13
- vmovdqu 32(%rdi),%xmm10
- vpshufd $0x44,%xmm11,%xmm11
- vmovdqa %xmm13,-80(%r11)
- vmovdqa %xmm11,64(%rsp)
- vpshufd $0xEE,%xmm12,%xmm14
- vmovdqu 48(%rdi),%xmm11
- vpshufd $0x44,%xmm12,%xmm12
- vmovdqa %xmm14,-64(%r11)
- vmovdqa %xmm12,80(%rsp)
- vpshufd $0xEE,%xmm10,%xmm13
- vmovdqu 64(%rdi),%xmm12
- vpshufd $0x44,%xmm10,%xmm10
- vmovdqa %xmm13,-48(%r11)
- vmovdqa %xmm10,96(%rsp)
- vpshufd $0xEE,%xmm11,%xmm14
- vpshufd $0x44,%xmm11,%xmm11
- vmovdqa %xmm14,-32(%r11)
- vmovdqa %xmm11,112(%rsp)
- vpshufd $0xEE,%xmm12,%xmm13
- vmovdqa 0(%rsp),%xmm14
- vpshufd $0x44,%xmm12,%xmm12
- vmovdqa %xmm13,-16(%r11)
- vmovdqa %xmm12,128(%rsp)
-
- jmp .Loop_avx
-
-.align 32
-.Loop_avx:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- vpmuludq %xmm5,%xmm14,%xmm10
- vpmuludq %xmm6,%xmm14,%xmm11
- vmovdqa %xmm2,32(%r11)
- vpmuludq %xmm7,%xmm14,%xmm12
- vmovdqa 16(%rsp),%xmm2
- vpmuludq %xmm8,%xmm14,%xmm13
- vpmuludq %xmm9,%xmm14,%xmm14
-
- vmovdqa %xmm0,0(%r11)
- vpmuludq 32(%rsp),%xmm9,%xmm0
- vmovdqa %xmm1,16(%r11)
- vpmuludq %xmm8,%xmm2,%xmm1
- vpaddq %xmm0,%xmm10,%xmm10
- vpaddq %xmm1,%xmm14,%xmm14
- vmovdqa %xmm3,48(%r11)
- vpmuludq %xmm7,%xmm2,%xmm0
- vpmuludq %xmm6,%xmm2,%xmm1
- vpaddq %xmm0,%xmm13,%xmm13
- vmovdqa 48(%rsp),%xmm3
- vpaddq %xmm1,%xmm12,%xmm12
- vmovdqa %xmm4,64(%r11)
- vpmuludq %xmm5,%xmm2,%xmm2
- vpmuludq %xmm7,%xmm3,%xmm0
- vpaddq %xmm2,%xmm11,%xmm11
-
- vmovdqa 64(%rsp),%xmm4
- vpaddq %xmm0,%xmm14,%xmm14
- vpmuludq %xmm6,%xmm3,%xmm1
- vpmuludq %xmm5,%xmm3,%xmm3
- vpaddq %xmm1,%xmm13,%xmm13
- vmovdqa 80(%rsp),%xmm2
- vpaddq %xmm3,%xmm12,%xmm12
- vpmuludq %xmm9,%xmm4,%xmm0
- vpmuludq %xmm8,%xmm4,%xmm4
- vpaddq %xmm0,%xmm11,%xmm11
- vmovdqa 96(%rsp),%xmm3
- vpaddq %xmm4,%xmm10,%xmm10
-
- vmovdqa 128(%rsp),%xmm4
- vpmuludq %xmm6,%xmm2,%xmm1
- vpmuludq %xmm5,%xmm2,%xmm2
- vpaddq %xmm1,%xmm14,%xmm14
- vpaddq %xmm2,%xmm13,%xmm13
- vpmuludq %xmm9,%xmm3,%xmm0
- vpmuludq %xmm8,%xmm3,%xmm1
- vpaddq %xmm0,%xmm12,%xmm12
- vmovdqu 0(%rsi),%xmm0
- vpaddq %xmm1,%xmm11,%xmm11
- vpmuludq %xmm7,%xmm3,%xmm3
- vpmuludq %xmm7,%xmm4,%xmm7
- vpaddq %xmm3,%xmm10,%xmm10
-
- vmovdqu 16(%rsi),%xmm1
- vpaddq %xmm7,%xmm11,%xmm11
- vpmuludq %xmm8,%xmm4,%xmm8
- vpmuludq %xmm9,%xmm4,%xmm9
- vpsrldq $6,%xmm0,%xmm2
- vpaddq %xmm8,%xmm12,%xmm12
- vpaddq %xmm9,%xmm13,%xmm13
- vpsrldq $6,%xmm1,%xmm3
- vpmuludq 112(%rsp),%xmm5,%xmm9
- vpmuludq %xmm6,%xmm4,%xmm5
- vpunpckhqdq %xmm1,%xmm0,%xmm4
- vpaddq %xmm9,%xmm14,%xmm14
- vmovdqa -144(%r11),%xmm9
- vpaddq %xmm5,%xmm10,%xmm10
-
- vpunpcklqdq %xmm1,%xmm0,%xmm0
- vpunpcklqdq %xmm3,%xmm2,%xmm3
-
-
- vpsrldq $5,%xmm4,%xmm4
- vpsrlq $26,%xmm0,%xmm1
- vpand %xmm15,%xmm0,%xmm0
- vpsrlq $4,%xmm3,%xmm2
- vpand %xmm15,%xmm1,%xmm1
- vpand 0(%rcx),%xmm4,%xmm4
- vpsrlq $30,%xmm3,%xmm3
- vpand %xmm15,%xmm2,%xmm2
- vpand %xmm15,%xmm3,%xmm3
- vpor 32(%rcx),%xmm4,%xmm4
-
- vpaddq 0(%r11),%xmm0,%xmm0
- vpaddq 16(%r11),%xmm1,%xmm1
- vpaddq 32(%r11),%xmm2,%xmm2
- vpaddq 48(%r11),%xmm3,%xmm3
- vpaddq 64(%r11),%xmm4,%xmm4
-
- leaq 32(%rsi),%rax
- leaq 64(%rsi),%rsi
- subq $64,%rdx
- cmovcq %rax,%rsi
-
-
-
-
-
-
-
-
-
-
- vpmuludq %xmm0,%xmm9,%xmm5
- vpmuludq %xmm1,%xmm9,%xmm6
- vpaddq %xmm5,%xmm10,%xmm10
- vpaddq %xmm6,%xmm11,%xmm11
- vmovdqa -128(%r11),%xmm7
- vpmuludq %xmm2,%xmm9,%xmm5
- vpmuludq %xmm3,%xmm9,%xmm6
- vpaddq %xmm5,%xmm12,%xmm12
- vpaddq %xmm6,%xmm13,%xmm13
- vpmuludq %xmm4,%xmm9,%xmm9
- vpmuludq -112(%r11),%xmm4,%xmm5
- vpaddq %xmm9,%xmm14,%xmm14
-
- vpaddq %xmm5,%xmm10,%xmm10
- vpmuludq %xmm2,%xmm7,%xmm6
- vpmuludq %xmm3,%xmm7,%xmm5
- vpaddq %xmm6,%xmm13,%xmm13
- vmovdqa -96(%r11),%xmm8
- vpaddq %xmm5,%xmm14,%xmm14
- vpmuludq %xmm1,%xmm7,%xmm6
- vpmuludq %xmm0,%xmm7,%xmm7
- vpaddq %xmm6,%xmm12,%xmm12
- vpaddq %xmm7,%xmm11,%xmm11
-
- vmovdqa -80(%r11),%xmm9
- vpmuludq %xmm2,%xmm8,%xmm5
- vpmuludq %xmm1,%xmm8,%xmm6
- vpaddq %xmm5,%xmm14,%xmm14
- vpaddq %xmm6,%xmm13,%xmm13
- vmovdqa -64(%r11),%xmm7
- vpmuludq %xmm0,%xmm8,%xmm8
- vpmuludq %xmm4,%xmm9,%xmm5
- vpaddq %xmm8,%xmm12,%xmm12
- vpaddq %xmm5,%xmm11,%xmm11
- vmovdqa -48(%r11),%xmm8
- vpmuludq %xmm3,%xmm9,%xmm9
- vpmuludq %xmm1,%xmm7,%xmm6
- vpaddq %xmm9,%xmm10,%xmm10
-
- vmovdqa -16(%r11),%xmm9
- vpaddq %xmm6,%xmm14,%xmm14
- vpmuludq %xmm0,%xmm7,%xmm7
- vpmuludq %xmm4,%xmm8,%xmm5
- vpaddq %xmm7,%xmm13,%xmm13
- vpaddq %xmm5,%xmm12,%xmm12
- vmovdqu 32(%rsi),%xmm5
- vpmuludq %xmm3,%xmm8,%xmm7
- vpmuludq %xmm2,%xmm8,%xmm8
- vpaddq %xmm7,%xmm11,%xmm11
- vmovdqu 48(%rsi),%xmm6
- vpaddq %xmm8,%xmm10,%xmm10
-
- vpmuludq %xmm2,%xmm9,%xmm2
- vpmuludq %xmm3,%xmm9,%xmm3
- vpsrldq $6,%xmm5,%xmm7
- vpaddq %xmm2,%xmm11,%xmm11
- vpmuludq %xmm4,%xmm9,%xmm4
- vpsrldq $6,%xmm6,%xmm8
- vpaddq %xmm3,%xmm12,%xmm2
- vpaddq %xmm4,%xmm13,%xmm3
- vpmuludq -32(%r11),%xmm0,%xmm4
- vpmuludq %xmm1,%xmm9,%xmm0
- vpunpckhqdq %xmm6,%xmm5,%xmm9
- vpaddq %xmm4,%xmm14,%xmm4
- vpaddq %xmm0,%xmm10,%xmm0
-
- vpunpcklqdq %xmm6,%xmm5,%xmm5
- vpunpcklqdq %xmm8,%xmm7,%xmm8
-
-
- vpsrldq $5,%xmm9,%xmm9
- vpsrlq $26,%xmm5,%xmm6
- vmovdqa 0(%rsp),%xmm14
- vpand %xmm15,%xmm5,%xmm5
- vpsrlq $4,%xmm8,%xmm7
- vpand %xmm15,%xmm6,%xmm6
- vpand 0(%rcx),%xmm9,%xmm9
- vpsrlq $30,%xmm8,%xmm8
- vpand %xmm15,%xmm7,%xmm7
- vpand %xmm15,%xmm8,%xmm8
- vpor 32(%rcx),%xmm9,%xmm9
-
-
-
-
-
- vpsrlq $26,%xmm3,%xmm13
- vpand %xmm15,%xmm3,%xmm3
- vpaddq %xmm13,%xmm4,%xmm4
-
- vpsrlq $26,%xmm0,%xmm10
- vpand %xmm15,%xmm0,%xmm0
- vpaddq %xmm10,%xmm11,%xmm1
-
- vpsrlq $26,%xmm4,%xmm10
- vpand %xmm15,%xmm4,%xmm4
-
- vpsrlq $26,%xmm1,%xmm11
- vpand %xmm15,%xmm1,%xmm1
- vpaddq %xmm11,%xmm2,%xmm2
-
- vpaddq %xmm10,%xmm0,%xmm0
- vpsllq $2,%xmm10,%xmm10
- vpaddq %xmm10,%xmm0,%xmm0
-
- vpsrlq $26,%xmm2,%xmm12
- vpand %xmm15,%xmm2,%xmm2
- vpaddq %xmm12,%xmm3,%xmm3
-
- vpsrlq $26,%xmm0,%xmm10
- vpand %xmm15,%xmm0,%xmm0
- vpaddq %xmm10,%xmm1,%xmm1
-
- vpsrlq $26,%xmm3,%xmm13
- vpand %xmm15,%xmm3,%xmm3
- vpaddq %xmm13,%xmm4,%xmm4
-
- ja .Loop_avx
-
-.Lskip_loop_avx:
-
-
-
- vpshufd $0x10,%xmm14,%xmm14
- addq $32,%rdx
- jnz .Long_tail_avx
-
- vpaddq %xmm2,%xmm7,%xmm7
- vpaddq %xmm0,%xmm5,%xmm5
- vpaddq %xmm1,%xmm6,%xmm6
- vpaddq %xmm3,%xmm8,%xmm8
- vpaddq %xmm4,%xmm9,%xmm9
-
-.Long_tail_avx:
- vmovdqa %xmm2,32(%r11)
- vmovdqa %xmm0,0(%r11)
- vmovdqa %xmm1,16(%r11)
- vmovdqa %xmm3,48(%r11)
- vmovdqa %xmm4,64(%r11)
-
-
-
-
-
-
-
- vpmuludq %xmm7,%xmm14,%xmm12
- vpmuludq %xmm5,%xmm14,%xmm10
- vpshufd $0x10,-48(%rdi),%xmm2
- vpmuludq %xmm6,%xmm14,%xmm11
- vpmuludq %xmm8,%xmm14,%xmm13
- vpmuludq %xmm9,%xmm14,%xmm14
-
- vpmuludq %xmm8,%xmm2,%xmm0
- vpaddq %xmm0,%xmm14,%xmm14
- vpshufd $0x10,-32(%rdi),%xmm3
- vpmuludq %xmm7,%xmm2,%xmm1
- vpaddq %xmm1,%xmm13,%xmm13
- vpshufd $0x10,-16(%rdi),%xmm4
- vpmuludq %xmm6,%xmm2,%xmm0
- vpaddq %xmm0,%xmm12,%xmm12
- vpmuludq %xmm5,%xmm2,%xmm2
- vpaddq %xmm2,%xmm11,%xmm11
- vpmuludq %xmm9,%xmm3,%xmm3
- vpaddq %xmm3,%xmm10,%xmm10
-
- vpshufd $0x10,0(%rdi),%xmm2
- vpmuludq %xmm7,%xmm4,%xmm1
- vpaddq %xmm1,%xmm14,%xmm14
- vpmuludq %xmm6,%xmm4,%xmm0
- vpaddq %xmm0,%xmm13,%xmm13
- vpshufd $0x10,16(%rdi),%xmm3
- vpmuludq %xmm5,%xmm4,%xmm4
- vpaddq %xmm4,%xmm12,%xmm12
- vpmuludq %xmm9,%xmm2,%xmm1
- vpaddq %xmm1,%xmm11,%xmm11
- vpshufd $0x10,32(%rdi),%xmm4
- vpmuludq %xmm8,%xmm2,%xmm2
- vpaddq %xmm2,%xmm10,%xmm10
-
- vpmuludq %xmm6,%xmm3,%xmm0
- vpaddq %xmm0,%xmm14,%xmm14
- vpmuludq %xmm5,%xmm3,%xmm3
- vpaddq %xmm3,%xmm13,%xmm13
- vpshufd $0x10,48(%rdi),%xmm2
- vpmuludq %xmm9,%xmm4,%xmm1
- vpaddq %xmm1,%xmm12,%xmm12
- vpshufd $0x10,64(%rdi),%xmm3
- vpmuludq %xmm8,%xmm4,%xmm0
- vpaddq %xmm0,%xmm11,%xmm11
- vpmuludq %xmm7,%xmm4,%xmm4
- vpaddq %xmm4,%xmm10,%xmm10
-
- vpmuludq %xmm5,%xmm2,%xmm2
- vpaddq %xmm2,%xmm14,%xmm14
- vpmuludq %xmm9,%xmm3,%xmm1
- vpaddq %xmm1,%xmm13,%xmm13
- vpmuludq %xmm8,%xmm3,%xmm0
- vpaddq %xmm0,%xmm12,%xmm12
- vpmuludq %xmm7,%xmm3,%xmm1
- vpaddq %xmm1,%xmm11,%xmm11
- vpmuludq %xmm6,%xmm3,%xmm3
- vpaddq %xmm3,%xmm10,%xmm10
-
- jz .Lshort_tail_avx
-
- vmovdqu 0(%rsi),%xmm0
- vmovdqu 16(%rsi),%xmm1
-
- vpsrldq $6,%xmm0,%xmm2
- vpsrldq $6,%xmm1,%xmm3
- vpunpckhqdq %xmm1,%xmm0,%xmm4
- vpunpcklqdq %xmm1,%xmm0,%xmm0
- vpunpcklqdq %xmm3,%xmm2,%xmm3
-
- vpsrlq $40,%xmm4,%xmm4
- vpsrlq $26,%xmm0,%xmm1
- vpand %xmm15,%xmm0,%xmm0
- vpsrlq $4,%xmm3,%xmm2
- vpand %xmm15,%xmm1,%xmm1
- vpsrlq $30,%xmm3,%xmm3
- vpand %xmm15,%xmm2,%xmm2
- vpand %xmm15,%xmm3,%xmm3
- vpor 32(%rcx),%xmm4,%xmm4
-
- vpshufd $0x32,-64(%rdi),%xmm9
- vpaddq 0(%r11),%xmm0,%xmm0
- vpaddq 16(%r11),%xmm1,%xmm1
- vpaddq 32(%r11),%xmm2,%xmm2
- vpaddq 48(%r11),%xmm3,%xmm3
- vpaddq 64(%r11),%xmm4,%xmm4
-
-
-
-
- vpmuludq %xmm0,%xmm9,%xmm5
- vpaddq %xmm5,%xmm10,%xmm10
- vpmuludq %xmm1,%xmm9,%xmm6
- vpaddq %xmm6,%xmm11,%xmm11
- vpmuludq %xmm2,%xmm9,%xmm5
- vpaddq %xmm5,%xmm12,%xmm12
- vpshufd $0x32,-48(%rdi),%xmm7
- vpmuludq %xmm3,%xmm9,%xmm6
- vpaddq %xmm6,%xmm13,%xmm13
- vpmuludq %xmm4,%xmm9,%xmm9
- vpaddq %xmm9,%xmm14,%xmm14
-
- vpmuludq %xmm3,%xmm7,%xmm5
- vpaddq %xmm5,%xmm14,%xmm14
- vpshufd $0x32,-32(%rdi),%xmm8
- vpmuludq %xmm2,%xmm7,%xmm6
- vpaddq %xmm6,%xmm13,%xmm13
- vpshufd $0x32,-16(%rdi),%xmm9
- vpmuludq %xmm1,%xmm7,%xmm5
- vpaddq %xmm5,%xmm12,%xmm12
- vpmuludq %xmm0,%xmm7,%xmm7
- vpaddq %xmm7,%xmm11,%xmm11
- vpmuludq %xmm4,%xmm8,%xmm8
- vpaddq %xmm8,%xmm10,%xmm10
-
- vpshufd $0x32,0(%rdi),%xmm7
- vpmuludq %xmm2,%xmm9,%xmm6
- vpaddq %xmm6,%xmm14,%xmm14
- vpmuludq %xmm1,%xmm9,%xmm5
- vpaddq %xmm5,%xmm13,%xmm13
- vpshufd $0x32,16(%rdi),%xmm8
- vpmuludq %xmm0,%xmm9,%xmm9
- vpaddq %xmm9,%xmm12,%xmm12
- vpmuludq %xmm4,%xmm7,%xmm6
- vpaddq %xmm6,%xmm11,%xmm11
- vpshufd $0x32,32(%rdi),%xmm9
- vpmuludq %xmm3,%xmm7,%xmm7
- vpaddq %xmm7,%xmm10,%xmm10
-
- vpmuludq %xmm1,%xmm8,%xmm5
- vpaddq %xmm5,%xmm14,%xmm14
- vpmuludq %xmm0,%xmm8,%xmm8
- vpaddq %xmm8,%xmm13,%xmm13
- vpshufd $0x32,48(%rdi),%xmm7
- vpmuludq %xmm4,%xmm9,%xmm6
- vpaddq %xmm6,%xmm12,%xmm12
- vpshufd $0x32,64(%rdi),%xmm8
- vpmuludq %xmm3,%xmm9,%xmm5
- vpaddq %xmm5,%xmm11,%xmm11
- vpmuludq %xmm2,%xmm9,%xmm9
- vpaddq %xmm9,%xmm10,%xmm10
-
- vpmuludq %xmm0,%xmm7,%xmm7
- vpaddq %xmm7,%xmm14,%xmm14
- vpmuludq %xmm4,%xmm8,%xmm6
- vpaddq %xmm6,%xmm13,%xmm13
- vpmuludq %xmm3,%xmm8,%xmm5
- vpaddq %xmm5,%xmm12,%xmm12
- vpmuludq %xmm2,%xmm8,%xmm6
- vpaddq %xmm6,%xmm11,%xmm11
- vpmuludq %xmm1,%xmm8,%xmm8
- vpaddq %xmm8,%xmm10,%xmm10
-
-.Lshort_tail_avx:
-
-
-
- vpsrldq $8,%xmm14,%xmm9
- vpsrldq $8,%xmm13,%xmm8
- vpsrldq $8,%xmm11,%xmm6
- vpsrldq $8,%xmm10,%xmm5
- vpsrldq $8,%xmm12,%xmm7
- vpaddq %xmm8,%xmm13,%xmm13
- vpaddq %xmm9,%xmm14,%xmm14
- vpaddq %xmm5,%xmm10,%xmm10
- vpaddq %xmm6,%xmm11,%xmm11
- vpaddq %xmm7,%xmm12,%xmm12
-
-
-
-
- vpsrlq $26,%xmm13,%xmm3
- vpand %xmm15,%xmm13,%xmm13
- vpaddq %xmm3,%xmm14,%xmm14
-
- vpsrlq $26,%xmm10,%xmm0
- vpand %xmm15,%xmm10,%xmm10
- vpaddq %xmm0,%xmm11,%xmm11
-
- vpsrlq $26,%xmm14,%xmm4
- vpand %xmm15,%xmm14,%xmm14
-
- vpsrlq $26,%xmm11,%xmm1
- vpand %xmm15,%xmm11,%xmm11
- vpaddq %xmm1,%xmm12,%xmm12
-
- vpaddq %xmm4,%xmm10,%xmm10
- vpsllq $2,%xmm4,%xmm4
- vpaddq %xmm4,%xmm10,%xmm10
-
- vpsrlq $26,%xmm12,%xmm2
- vpand %xmm15,%xmm12,%xmm12
- vpaddq %xmm2,%xmm13,%xmm13
-
- vpsrlq $26,%xmm10,%xmm0
- vpand %xmm15,%xmm10,%xmm10
- vpaddq %xmm0,%xmm11,%xmm11
-
- vpsrlq $26,%xmm13,%xmm3
- vpand %xmm15,%xmm13,%xmm13
- vpaddq %xmm3,%xmm14,%xmm14
-
- vmovd %xmm10,-112(%rdi)
- vmovd %xmm11,-108(%rdi)
- vmovd %xmm12,-104(%rdi)
- vmovd %xmm13,-100(%rdi)
- vmovd %xmm14,-96(%rdi)
- leaq 88(%r11),%rsp
-.cfi_def_cfa %rsp,8
- vzeroupper
- .byte 0xf3,0xc3
-.cfi_endproc
-.size poly1305_blocks_avx,.-poly1305_blocks_avx
-
-.type poly1305_emit_avx,@function
-.align 32
-poly1305_emit_avx:
- cmpl $0,20(%rdi)
- je .Lemit
-
- movl 0(%rdi),%eax
- movl 4(%rdi),%ecx
- movl 8(%rdi),%r8d
- movl 12(%rdi),%r11d
- movl 16(%rdi),%r10d
-
- shlq $26,%rcx
- movq %r8,%r9
- shlq $52,%r8
- addq %rcx,%rax
- shrq $12,%r9
- addq %rax,%r8
- adcq $0,%r9
-
- shlq $14,%r11
- movq %r10,%rax
- shrq $24,%r10
- addq %r11,%r9
- shlq $40,%rax
- addq %rax,%r9
- adcq $0,%r10
-
- movq %r10,%rax
- movq %r10,%rcx
- andq $3,%r10
- shrq $2,%rax
- andq $-4,%rcx
- addq %rcx,%rax
- addq %rax,%r8
- adcq $0,%r9
- adcq $0,%r10
-
- movq %r8,%rax
- addq $5,%r8
- movq %r9,%rcx
- adcq $0,%r9
- adcq $0,%r10
- shrq $2,%r10
- cmovnzq %r8,%rax
- cmovnzq %r9,%rcx
-
- addq 0(%rdx),%rax
- adcq 8(%rdx),%rcx
- movq %rax,0(%rsi)
- movq %rcx,8(%rsi)
-
- .byte 0xf3,0xc3
-.size poly1305_emit_avx,.-poly1305_emit_avx
-.type poly1305_blocks_avx2,@function
-.align 32
-poly1305_blocks_avx2:
-.cfi_startproc
- movl 20(%rdi),%r8d
- cmpq $128,%rdx
- jae .Lblocks_avx2
- testl %r8d,%r8d
- jz .Lblocks
-
-.Lblocks_avx2:
- andq $-16,%rdx
- jz .Lno_data_avx2
-
- vzeroupper
-
- testl %r8d,%r8d
- jz .Lbase2_64_avx2
-
- testq $63,%rdx
- jz .Leven_avx2
-
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
-.Lblocks_avx2_body:
-
- movq %rdx,%r15
-
- movq 0(%rdi),%r8
- movq 8(%rdi),%r9
- movl 16(%rdi),%ebp
-
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
-
-
- movl %r8d,%r14d
- andq $-2147483648,%r8
- movq %r9,%r12
- movl %r9d,%ebx
- andq $-2147483648,%r9
-
- shrq $6,%r8
- shlq $52,%r12
- addq %r8,%r14
- shrq $12,%rbx
- shrq $18,%r9
- addq %r12,%r14
- adcq %r9,%rbx
-
- movq %rbp,%r8
- shlq $40,%r8
- shrq $24,%rbp
- addq %r8,%rbx
- adcq $0,%rbp
-
- movq $-4,%r9
- movq %rbp,%r8
- andq %rbp,%r9
- shrq $2,%r8
- andq $3,%rbp
- addq %r9,%r8
- addq %r8,%r14
- adcq $0,%rbx
- adcq $0,%rbp
-
- movq %r13,%r12
- movq %r13,%rax
- shrq $2,%r13
- addq %r12,%r13
-
-.Lbase2_26_pre_avx2:
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
- subq $16,%r15
-
- call __poly1305_block
- movq %r12,%rax
-
- testq $63,%r15
- jnz .Lbase2_26_pre_avx2
-
- testq %rcx,%rcx
- jz .Lstore_base2_64_avx2
-
-
- movq %r14,%rax
- movq %r14,%rdx
- shrq $52,%r14
- movq %rbx,%r11
- movq %rbx,%r12
- shrq $26,%rdx
- andq $0x3ffffff,%rax
- shlq $12,%r11
- andq $0x3ffffff,%rdx
- shrq $14,%rbx
- orq %r11,%r14
- shlq $24,%rbp
- andq $0x3ffffff,%r14
- shrq $40,%r12
- andq $0x3ffffff,%rbx
- orq %r12,%rbp
-
- testq %r15,%r15
- jz .Lstore_base2_26_avx2
-
- vmovd %eax,%xmm0
- vmovd %edx,%xmm1
- vmovd %r14d,%xmm2
- vmovd %ebx,%xmm3
- vmovd %ebp,%xmm4
- jmp .Lproceed_avx2
-
-.align 32
-.Lstore_base2_64_avx2:
- movq %r14,0(%rdi)
- movq %rbx,8(%rdi)
- movq %rbp,16(%rdi)
- jmp .Ldone_avx2
-
-.align 16
-.Lstore_base2_26_avx2:
- movl %eax,0(%rdi)
- movl %edx,4(%rdi)
- movl %r14d,8(%rdi)
- movl %ebx,12(%rdi)
- movl %ebp,16(%rdi)
-.align 16
-.Ldone_avx2:
- movq 0(%rsp),%r15
-.cfi_restore %r15
- movq 8(%rsp),%r14
-.cfi_restore %r14
- movq 16(%rsp),%r13
-.cfi_restore %r13
- movq 24(%rsp),%r12
-.cfi_restore %r12
- movq 32(%rsp),%rbp
-.cfi_restore %rbp
- movq 40(%rsp),%rbx
-.cfi_restore %rbx
- leaq 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lno_data_avx2:
-.Lblocks_avx2_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-
-.align 32
-.Lbase2_64_avx2:
-.cfi_startproc
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
-.Lbase2_64_avx2_body:
-
- movq %rdx,%r15
-
- movq 24(%rdi),%r11
- movq 32(%rdi),%r13
-
- movq 0(%rdi),%r14
- movq 8(%rdi),%rbx
- movl 16(%rdi),%ebp
-
- movq %r13,%r12
- movq %r13,%rax
- shrq $2,%r13
- addq %r12,%r13
-
- testq $63,%rdx
- jz .Linit_avx2
-
-.Lbase2_64_pre_avx2:
- addq 0(%rsi),%r14
- adcq 8(%rsi),%rbx
- leaq 16(%rsi),%rsi
- adcq %rcx,%rbp
- subq $16,%r15
-
- call __poly1305_block
- movq %r12,%rax
-
- testq $63,%r15
- jnz .Lbase2_64_pre_avx2
-
-.Linit_avx2:
-
- movq %r14,%rax
- movq %r14,%rdx
- shrq $52,%r14
- movq %rbx,%r8
- movq %rbx,%r9
- shrq $26,%rdx
- andq $0x3ffffff,%rax
- shlq $12,%r8
- andq $0x3ffffff,%rdx
- shrq $14,%rbx
- orq %r8,%r14
- shlq $24,%rbp
- andq $0x3ffffff,%r14
- shrq $40,%r9
- andq $0x3ffffff,%rbx
- orq %r9,%rbp
-
- vmovd %eax,%xmm0
- vmovd %edx,%xmm1
- vmovd %r14d,%xmm2
- vmovd %ebx,%xmm3
- vmovd %ebp,%xmm4
- movl $1,20(%rdi)
-
- call __poly1305_init_avx
-
-.Lproceed_avx2:
- movq %r15,%rdx
- movl OPENSSL_ia32cap_P+8(%rip),%r10d
- movl $3221291008,%r11d
-
- movq 0(%rsp),%r15
-.cfi_restore %r15
- movq 8(%rsp),%r14
-.cfi_restore %r14
- movq 16(%rsp),%r13
-.cfi_restore %r13
- movq 24(%rsp),%r12
-.cfi_restore %r12
- movq 32(%rsp),%rbp
-.cfi_restore %rbp
- movq 40(%rsp),%rbx
-.cfi_restore %rbx
- leaq 48(%rsp),%rax
- leaq 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lbase2_64_avx2_epilogue:
- jmp .Ldo_avx2
-.cfi_endproc
-
-.align 32
-.Leven_avx2:
-.cfi_startproc
- movl OPENSSL_ia32cap_P+8(%rip),%r10d
- vmovd 0(%rdi),%xmm0
- vmovd 4(%rdi),%xmm1
- vmovd 8(%rdi),%xmm2
- vmovd 12(%rdi),%xmm3
- vmovd 16(%rdi),%xmm4
-
-.Ldo_avx2:
- leaq -8(%rsp),%r11
-.cfi_def_cfa %r11,16
- subq $0x128,%rsp
- leaq .Lconst(%rip),%rcx
- leaq 48+64(%rdi),%rdi
- vmovdqa 96(%rcx),%ymm7
-
-
- vmovdqu -64(%rdi),%xmm9
- andq $-512,%rsp
- vmovdqu -48(%rdi),%xmm10
- vmovdqu -32(%rdi),%xmm6
- vmovdqu -16(%rdi),%xmm11
- vmovdqu 0(%rdi),%xmm12
- vmovdqu 16(%rdi),%xmm13
- leaq 144(%rsp),%rax
- vmovdqu 32(%rdi),%xmm14
- vpermd %ymm9,%ymm7,%ymm9
- vmovdqu 48(%rdi),%xmm15
- vpermd %ymm10,%ymm7,%ymm10
- vmovdqu 64(%rdi),%xmm5
- vpermd %ymm6,%ymm7,%ymm6
- vmovdqa %ymm9,0(%rsp)
- vpermd %ymm11,%ymm7,%ymm11
- vmovdqa %ymm10,32-144(%rax)
- vpermd %ymm12,%ymm7,%ymm12
- vmovdqa %ymm6,64-144(%rax)
- vpermd %ymm13,%ymm7,%ymm13
- vmovdqa %ymm11,96-144(%rax)
- vpermd %ymm14,%ymm7,%ymm14
- vmovdqa %ymm12,128-144(%rax)
- vpermd %ymm15,%ymm7,%ymm15
- vmovdqa %ymm13,160-144(%rax)
- vpermd %ymm5,%ymm7,%ymm5
- vmovdqa %ymm14,192-144(%rax)
- vmovdqa %ymm15,224-144(%rax)
- vmovdqa %ymm5,256-144(%rax)
- vmovdqa 64(%rcx),%ymm5
-
-
-
- vmovdqu 0(%rsi),%xmm7
- vmovdqu 16(%rsi),%xmm8
- vinserti128 $1,32(%rsi),%ymm7,%ymm7
- vinserti128 $1,48(%rsi),%ymm8,%ymm8
- leaq 64(%rsi),%rsi
-
- vpsrldq $6,%ymm7,%ymm9
- vpsrldq $6,%ymm8,%ymm10
- vpunpckhqdq %ymm8,%ymm7,%ymm6
- vpunpcklqdq %ymm10,%ymm9,%ymm9
- vpunpcklqdq %ymm8,%ymm7,%ymm7
-
- vpsrlq $30,%ymm9,%ymm10
- vpsrlq $4,%ymm9,%ymm9
- vpsrlq $26,%ymm7,%ymm8
- vpsrlq $40,%ymm6,%ymm6
- vpand %ymm5,%ymm9,%ymm9
- vpand %ymm5,%ymm7,%ymm7
- vpand %ymm5,%ymm8,%ymm8
- vpand %ymm5,%ymm10,%ymm10
- vpor 32(%rcx),%ymm6,%ymm6
-
- vpaddq %ymm2,%ymm9,%ymm2
- subq $64,%rdx
- jz .Ltail_avx2
- jmp .Loop_avx2
-
-.align 32
-.Loop_avx2:
-
-
-
-
-
-
-
-
- vpaddq %ymm0,%ymm7,%ymm0
- vmovdqa 0(%rsp),%ymm7
- vpaddq %ymm1,%ymm8,%ymm1
- vmovdqa 32(%rsp),%ymm8
- vpaddq %ymm3,%ymm10,%ymm3
- vmovdqa 96(%rsp),%ymm9
- vpaddq %ymm4,%ymm6,%ymm4
- vmovdqa 48(%rax),%ymm10
- vmovdqa 112(%rax),%ymm5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- vpmuludq %ymm2,%ymm7,%ymm13
- vpmuludq %ymm2,%ymm8,%ymm14
- vpmuludq %ymm2,%ymm9,%ymm15
- vpmuludq %ymm2,%ymm10,%ymm11
- vpmuludq %ymm2,%ymm5,%ymm12
-
- vpmuludq %ymm0,%ymm8,%ymm6
- vpmuludq %ymm1,%ymm8,%ymm2
- vpaddq %ymm6,%ymm12,%ymm12
- vpaddq %ymm2,%ymm13,%ymm13
- vpmuludq %ymm3,%ymm8,%ymm6
- vpmuludq 64(%rsp),%ymm4,%ymm2
- vpaddq %ymm6,%ymm15,%ymm15
- vpaddq %ymm2,%ymm11,%ymm11
- vmovdqa -16(%rax),%ymm8
-
- vpmuludq %ymm0,%ymm7,%ymm6
- vpmuludq %ymm1,%ymm7,%ymm2
- vpaddq %ymm6,%ymm11,%ymm11
- vpaddq %ymm2,%ymm12,%ymm12
- vpmuludq %ymm3,%ymm7,%ymm6
- vpmuludq %ymm4,%ymm7,%ymm2
- vmovdqu 0(%rsi),%xmm7
- vpaddq %ymm6,%ymm14,%ymm14
- vpaddq %ymm2,%ymm15,%ymm15
- vinserti128 $1,32(%rsi),%ymm7,%ymm7
-
- vpmuludq %ymm3,%ymm8,%ymm6
- vpmuludq %ymm4,%ymm8,%ymm2
- vmovdqu 16(%rsi),%xmm8
- vpaddq %ymm6,%ymm11,%ymm11
- vpaddq %ymm2,%ymm12,%ymm12
- vmovdqa 16(%rax),%ymm2
- vpmuludq %ymm1,%ymm9,%ymm6
- vpmuludq %ymm0,%ymm9,%ymm9
- vpaddq %ymm6,%ymm14,%ymm14
- vpaddq %ymm9,%ymm13,%ymm13
- vinserti128 $1,48(%rsi),%ymm8,%ymm8
- leaq 64(%rsi),%rsi
-
- vpmuludq %ymm1,%ymm2,%ymm6
- vpmuludq %ymm0,%ymm2,%ymm2
- vpsrldq $6,%ymm7,%ymm9
- vpaddq %ymm6,%ymm15,%ymm15
- vpaddq %ymm2,%ymm14,%ymm14
- vpmuludq %ymm3,%ymm10,%ymm6
- vpmuludq %ymm4,%ymm10,%ymm2
- vpsrldq $6,%ymm8,%ymm10
- vpaddq %ymm6,%ymm12,%ymm12
- vpaddq %ymm2,%ymm13,%ymm13
- vpunpckhqdq %ymm8,%ymm7,%ymm6
-
- vpmuludq %ymm3,%ymm5,%ymm3
- vpmuludq %ymm4,%ymm5,%ymm4
- vpunpcklqdq %ymm8,%ymm7,%ymm7
- vpaddq %ymm3,%ymm13,%ymm2
- vpaddq %ymm4,%ymm14,%ymm3
- vpunpcklqdq %ymm10,%ymm9,%ymm10
- vpmuludq 80(%rax),%ymm0,%ymm4
- vpmuludq %ymm1,%ymm5,%ymm0
- vmovdqa 64(%rcx),%ymm5
- vpaddq %ymm4,%ymm15,%ymm4
- vpaddq %ymm0,%ymm11,%ymm0
-
-
-
-
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpaddq %ymm14,%ymm4,%ymm4
-
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm11,%ymm12,%ymm1
-
- vpsrlq $26,%ymm4,%ymm15
- vpand %ymm5,%ymm4,%ymm4
-
- vpsrlq $4,%ymm10,%ymm9
-
- vpsrlq $26,%ymm1,%ymm12
- vpand %ymm5,%ymm1,%ymm1
- vpaddq %ymm12,%ymm2,%ymm2
-
- vpaddq %ymm15,%ymm0,%ymm0
- vpsllq $2,%ymm15,%ymm15
- vpaddq %ymm15,%ymm0,%ymm0
-
- vpand %ymm5,%ymm9,%ymm9
- vpsrlq $26,%ymm7,%ymm8
-
- vpsrlq $26,%ymm2,%ymm13
- vpand %ymm5,%ymm2,%ymm2
- vpaddq %ymm13,%ymm3,%ymm3
-
- vpaddq %ymm9,%ymm2,%ymm2
- vpsrlq $30,%ymm10,%ymm10
-
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm11,%ymm1,%ymm1
-
- vpsrlq $40,%ymm6,%ymm6
-
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpaddq %ymm14,%ymm4,%ymm4
-
- vpand %ymm5,%ymm7,%ymm7
- vpand %ymm5,%ymm8,%ymm8
- vpand %ymm5,%ymm10,%ymm10
- vpor 32(%rcx),%ymm6,%ymm6
-
- subq $64,%rdx
- jnz .Loop_avx2
-
-.byte 0x66,0x90
-.Ltail_avx2:
-
-
-
-
-
-
-
- vpaddq %ymm0,%ymm7,%ymm0
- vmovdqu 4(%rsp),%ymm7
- vpaddq %ymm1,%ymm8,%ymm1
- vmovdqu 36(%rsp),%ymm8
- vpaddq %ymm3,%ymm10,%ymm3
- vmovdqu 100(%rsp),%ymm9
- vpaddq %ymm4,%ymm6,%ymm4
- vmovdqu 52(%rax),%ymm10
- vmovdqu 116(%rax),%ymm5
-
- vpmuludq %ymm2,%ymm7,%ymm13
- vpmuludq %ymm2,%ymm8,%ymm14
- vpmuludq %ymm2,%ymm9,%ymm15
- vpmuludq %ymm2,%ymm10,%ymm11
- vpmuludq %ymm2,%ymm5,%ymm12
-
- vpmuludq %ymm0,%ymm8,%ymm6
- vpmuludq %ymm1,%ymm8,%ymm2
- vpaddq %ymm6,%ymm12,%ymm12
- vpaddq %ymm2,%ymm13,%ymm13
- vpmuludq %ymm3,%ymm8,%ymm6
- vpmuludq 68(%rsp),%ymm4,%ymm2
- vpaddq %ymm6,%ymm15,%ymm15
- vpaddq %ymm2,%ymm11,%ymm11
-
- vpmuludq %ymm0,%ymm7,%ymm6
- vpmuludq %ymm1,%ymm7,%ymm2
- vpaddq %ymm6,%ymm11,%ymm11
- vmovdqu -12(%rax),%ymm8
- vpaddq %ymm2,%ymm12,%ymm12
- vpmuludq %ymm3,%ymm7,%ymm6
- vpmuludq %ymm4,%ymm7,%ymm2
- vpaddq %ymm6,%ymm14,%ymm14
- vpaddq %ymm2,%ymm15,%ymm15
-
- vpmuludq %ymm3,%ymm8,%ymm6
- vpmuludq %ymm4,%ymm8,%ymm2
- vpaddq %ymm6,%ymm11,%ymm11
- vpaddq %ymm2,%ymm12,%ymm12
- vmovdqu 20(%rax),%ymm2
- vpmuludq %ymm1,%ymm9,%ymm6
- vpmuludq %ymm0,%ymm9,%ymm9
- vpaddq %ymm6,%ymm14,%ymm14
- vpaddq %ymm9,%ymm13,%ymm13
-
- vpmuludq %ymm1,%ymm2,%ymm6
- vpmuludq %ymm0,%ymm2,%ymm2
- vpaddq %ymm6,%ymm15,%ymm15
- vpaddq %ymm2,%ymm14,%ymm14
- vpmuludq %ymm3,%ymm10,%ymm6
- vpmuludq %ymm4,%ymm10,%ymm2
- vpaddq %ymm6,%ymm12,%ymm12
- vpaddq %ymm2,%ymm13,%ymm13
-
- vpmuludq %ymm3,%ymm5,%ymm3
- vpmuludq %ymm4,%ymm5,%ymm4
- vpaddq %ymm3,%ymm13,%ymm2
- vpaddq %ymm4,%ymm14,%ymm3
- vpmuludq 84(%rax),%ymm0,%ymm4
- vpmuludq %ymm1,%ymm5,%ymm0
- vmovdqa 64(%rcx),%ymm5
- vpaddq %ymm4,%ymm15,%ymm4
- vpaddq %ymm0,%ymm11,%ymm0
-
-
-
-
- vpsrldq $8,%ymm12,%ymm8
- vpsrldq $8,%ymm2,%ymm9
- vpsrldq $8,%ymm3,%ymm10
- vpsrldq $8,%ymm4,%ymm6
- vpsrldq $8,%ymm0,%ymm7
- vpaddq %ymm8,%ymm12,%ymm12
- vpaddq %ymm9,%ymm2,%ymm2
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm6,%ymm4,%ymm4
- vpaddq %ymm7,%ymm0,%ymm0
-
- vpermq $0x2,%ymm3,%ymm10
- vpermq $0x2,%ymm4,%ymm6
- vpermq $0x2,%ymm0,%ymm7
- vpermq $0x2,%ymm12,%ymm8
- vpermq $0x2,%ymm2,%ymm9
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm6,%ymm4,%ymm4
- vpaddq %ymm7,%ymm0,%ymm0
- vpaddq %ymm8,%ymm12,%ymm12
- vpaddq %ymm9,%ymm2,%ymm2
-
-
-
-
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpaddq %ymm14,%ymm4,%ymm4
-
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm11,%ymm12,%ymm1
-
- vpsrlq $26,%ymm4,%ymm15
- vpand %ymm5,%ymm4,%ymm4
-
- vpsrlq $26,%ymm1,%ymm12
- vpand %ymm5,%ymm1,%ymm1
- vpaddq %ymm12,%ymm2,%ymm2
-
- vpaddq %ymm15,%ymm0,%ymm0
- vpsllq $2,%ymm15,%ymm15
- vpaddq %ymm15,%ymm0,%ymm0
-
- vpsrlq $26,%ymm2,%ymm13
- vpand %ymm5,%ymm2,%ymm2
- vpaddq %ymm13,%ymm3,%ymm3
-
- vpsrlq $26,%ymm0,%ymm11
- vpand %ymm5,%ymm0,%ymm0
- vpaddq %ymm11,%ymm1,%ymm1
-
- vpsrlq $26,%ymm3,%ymm14
- vpand %ymm5,%ymm3,%ymm3
- vpaddq %ymm14,%ymm4,%ymm4
-
- vmovd %xmm0,-112(%rdi)
- vmovd %xmm1,-108(%rdi)
- vmovd %xmm2,-104(%rdi)
- vmovd %xmm3,-100(%rdi)
- vmovd %xmm4,-96(%rdi)
- leaq 8(%r11),%rsp
-.cfi_def_cfa %rsp,8
- vzeroupper
- .byte 0xf3,0xc3
.cfi_endproc
-.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
-.align 64
-.Lconst:
-.Lmask24:
-.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
-.L129:
-.long 16777216,0,16777216,0,16777216,0,16777216,0
-.Lmask26:
-.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
-.Lpermd_avx2:
-.long 2,2,2,3,2,0,2,1
-.Lpermd_avx512:
-.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
-
-.L2_44_inp_permd:
-.long 0,1,1,2,2,3,7,7
-.L2_44_inp_shift:
-.quad 0,12,24,64
-.L2_44_mask:
-.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
-.L2_44_shift_rgt:
-.quad 44,44,42,64
-.L2_44_shift_lft:
-.quad 8,8,10,64
-
-.align 64
-.Lx_mask44:
-.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
-.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
-.Lx_mask42:
-.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
-.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
+.size poly1305_emit,.-poly1305_emit
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 16
.globl xor128_encrypt_n_pad
.type xor128_encrypt_n_pad,@function
.align 16
xor128_encrypt_n_pad:
+.cfi_startproc
subq %rdx,%rsi
subq %rdx,%rdi
movq %rcx,%r10
@@ -2002,12 +228,14 @@ xor128_encrypt_n_pad:
.Ldone_enc:
movq %rdx,%rax
.byte 0xf3,0xc3
+.cfi_endproc
.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
.globl xor128_decrypt_n_pad
.type xor128_decrypt_n_pad,@function
.align 16
xor128_decrypt_n_pad:
+.cfi_startproc
subq %rdx,%rsi
subq %rdx,%rdi
movq %rcx,%r10
@@ -2053,4 +281,5 @@ xor128_decrypt_n_pad:
.Ldone_dec:
movq %rdx,%rax
.byte 0xf3,0xc3
+.cfi_endproc
.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
diff --git a/secure/lib/libcrypto/amd64/rc4-x86_64.S b/secure/lib/libcrypto/amd64/rc4-x86_64.S
index b77714c170f3..a084e9b9c993 100644
--- a/secure/lib/libcrypto/amd64/rc4-x86_64.S
+++ b/secure/lib/libcrypto/amd64/rc4-x86_64.S
@@ -6,11 +6,12 @@
.globl RC4
.type RC4,@function
.align 16
-RC4: orq %rsi,%rsi
+RC4:
+.cfi_startproc
+ orq %rsi,%rsi
jne .Lentry
.byte 0xf3,0xc3
.Lentry:
-.cfi_startproc
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset %rbx,-16
@@ -535,6 +536,7 @@ RC4: orq %rsi,%rsi
.type RC4_set_key,@function
.align 16
RC4_set_key:
+.cfi_startproc
leaq 8(%rdi),%rdi
leaq (%rdx,%rsi,1),%rdx
negq %rsi
@@ -601,12 +603,14 @@ RC4_set_key:
movl %eax,-8(%rdi)
movl %eax,-4(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size RC4_set_key,.-RC4_set_key
.globl RC4_options
.type RC4_options,@function
.align 16
RC4_options:
+.cfi_startproc
leaq .Lopts(%rip),%rax
movl OPENSSL_ia32cap_P(%rip),%edx
btl $20,%edx
@@ -619,6 +623,7 @@ RC4_options:
addq $12,%rax
.Ldone:
.byte 0xf3,0xc3
+.cfi_endproc
.align 64
.Lopts:
.byte 114,99,52,40,56,120,44,105,110,116,41,0
diff --git a/secure/lib/libcrypto/amd64/rsaz-avx2.S b/secure/lib/libcrypto/amd64/rsaz-avx2.S
index 3075a52d2eec..e957915a7d81 100644
--- a/secure/lib/libcrypto/amd64/rsaz-avx2.S
+++ b/secure/lib/libcrypto/amd64/rsaz-avx2.S
@@ -2,1745 +2,26 @@
/* Do not modify. This file is auto-generated from rsaz-avx2.pl. */
.text
+.globl rsaz_avx2_eligible
+.type rsaz_avx2_eligible,@function
+rsaz_avx2_eligible:
+ xorl %eax,%eax
+ .byte 0xf3,0xc3
+.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
+
.globl rsaz_1024_sqr_avx2
+.globl rsaz_1024_mul_avx2
+.globl rsaz_1024_norm2red_avx2
+.globl rsaz_1024_red2norm_avx2
+.globl rsaz_1024_scatter5_avx2
+.globl rsaz_1024_gather5_avx2
.type rsaz_1024_sqr_avx2,@function
-.align 64
rsaz_1024_sqr_avx2:
-.cfi_startproc
- leaq (%rsp),%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
- vzeroupper
- movq %rax,%rbp
-.cfi_def_cfa_register %rbp
- movq %rdx,%r13
- subq $832,%rsp
- movq %r13,%r15
- subq $-128,%rdi
- subq $-128,%rsi
- subq $-128,%r13
-
- andq $4095,%r15
- addq $320,%r15
- shrq $12,%r15
- vpxor %ymm9,%ymm9,%ymm9
- jz .Lsqr_1024_no_n_copy
-
-
-
-
-
- subq $320,%rsp
- vmovdqu 0-128(%r13),%ymm0
- andq $-2048,%rsp
- vmovdqu 32-128(%r13),%ymm1
- vmovdqu 64-128(%r13),%ymm2
- vmovdqu 96-128(%r13),%ymm3
- vmovdqu 128-128(%r13),%ymm4
- vmovdqu 160-128(%r13),%ymm5
- vmovdqu 192-128(%r13),%ymm6
- vmovdqu 224-128(%r13),%ymm7
- vmovdqu 256-128(%r13),%ymm8
- leaq 832+128(%rsp),%r13
- vmovdqu %ymm0,0-128(%r13)
- vmovdqu %ymm1,32-128(%r13)
- vmovdqu %ymm2,64-128(%r13)
- vmovdqu %ymm3,96-128(%r13)
- vmovdqu %ymm4,128-128(%r13)
- vmovdqu %ymm5,160-128(%r13)
- vmovdqu %ymm6,192-128(%r13)
- vmovdqu %ymm7,224-128(%r13)
- vmovdqu %ymm8,256-128(%r13)
- vmovdqu %ymm9,288-128(%r13)
-
-.Lsqr_1024_no_n_copy:
- andq $-1024,%rsp
-
- vmovdqu 32-128(%rsi),%ymm1
- vmovdqu 64-128(%rsi),%ymm2
- vmovdqu 96-128(%rsi),%ymm3
- vmovdqu 128-128(%rsi),%ymm4
- vmovdqu 160-128(%rsi),%ymm5
- vmovdqu 192-128(%rsi),%ymm6
- vmovdqu 224-128(%rsi),%ymm7
- vmovdqu 256-128(%rsi),%ymm8
-
- leaq 192(%rsp),%rbx
- vmovdqu .Land_mask(%rip),%ymm15
- jmp .LOOP_GRANDE_SQR_1024
-
-.align 32
-.LOOP_GRANDE_SQR_1024:
- leaq 576+128(%rsp),%r9
- leaq 448(%rsp),%r12
-
-
-
-
- vpaddq %ymm1,%ymm1,%ymm1
- vpbroadcastq 0-128(%rsi),%ymm10
- vpaddq %ymm2,%ymm2,%ymm2
- vmovdqa %ymm1,0-128(%r9)
- vpaddq %ymm3,%ymm3,%ymm3
- vmovdqa %ymm2,32-128(%r9)
- vpaddq %ymm4,%ymm4,%ymm4
- vmovdqa %ymm3,64-128(%r9)
- vpaddq %ymm5,%ymm5,%ymm5
- vmovdqa %ymm4,96-128(%r9)
- vpaddq %ymm6,%ymm6,%ymm6
- vmovdqa %ymm5,128-128(%r9)
- vpaddq %ymm7,%ymm7,%ymm7
- vmovdqa %ymm6,160-128(%r9)
- vpaddq %ymm8,%ymm8,%ymm8
- vmovdqa %ymm7,192-128(%r9)
- vpxor %ymm9,%ymm9,%ymm9
- vmovdqa %ymm8,224-128(%r9)
-
- vpmuludq 0-128(%rsi),%ymm10,%ymm0
- vpbroadcastq 32-128(%rsi),%ymm11
- vmovdqu %ymm9,288-192(%rbx)
- vpmuludq %ymm10,%ymm1,%ymm1
- vmovdqu %ymm9,320-448(%r12)
- vpmuludq %ymm10,%ymm2,%ymm2
- vmovdqu %ymm9,352-448(%r12)
- vpmuludq %ymm10,%ymm3,%ymm3
- vmovdqu %ymm9,384-448(%r12)
- vpmuludq %ymm10,%ymm4,%ymm4
- vmovdqu %ymm9,416-448(%r12)
- vpmuludq %ymm10,%ymm5,%ymm5
- vmovdqu %ymm9,448-448(%r12)
- vpmuludq %ymm10,%ymm6,%ymm6
- vmovdqu %ymm9,480-448(%r12)
- vpmuludq %ymm10,%ymm7,%ymm7
- vmovdqu %ymm9,512-448(%r12)
- vpmuludq %ymm10,%ymm8,%ymm8
- vpbroadcastq 64-128(%rsi),%ymm10
- vmovdqu %ymm9,544-448(%r12)
-
- movq %rsi,%r15
- movl $4,%r14d
- jmp .Lsqr_entry_1024
-.align 32
-.LOOP_SQR_1024:
- vpbroadcastq 32-128(%r15),%ymm11
- vpmuludq 0-128(%rsi),%ymm10,%ymm0
- vpaddq 0-192(%rbx),%ymm0,%ymm0
- vpmuludq 0-128(%r9),%ymm10,%ymm1
- vpaddq 32-192(%rbx),%ymm1,%ymm1
- vpmuludq 32-128(%r9),%ymm10,%ymm2
- vpaddq 64-192(%rbx),%ymm2,%ymm2
- vpmuludq 64-128(%r9),%ymm10,%ymm3
- vpaddq 96-192(%rbx),%ymm3,%ymm3
- vpmuludq 96-128(%r9),%ymm10,%ymm4
- vpaddq 128-192(%rbx),%ymm4,%ymm4
- vpmuludq 128-128(%r9),%ymm10,%ymm5
- vpaddq 160-192(%rbx),%ymm5,%ymm5
- vpmuludq 160-128(%r9),%ymm10,%ymm6
- vpaddq 192-192(%rbx),%ymm6,%ymm6
- vpmuludq 192-128(%r9),%ymm10,%ymm7
- vpaddq 224-192(%rbx),%ymm7,%ymm7
- vpmuludq 224-128(%r9),%ymm10,%ymm8
- vpbroadcastq 64-128(%r15),%ymm10
- vpaddq 256-192(%rbx),%ymm8,%ymm8
-.Lsqr_entry_1024:
- vmovdqu %ymm0,0-192(%rbx)
- vmovdqu %ymm1,32-192(%rbx)
-
- vpmuludq 32-128(%rsi),%ymm11,%ymm12
- vpaddq %ymm12,%ymm2,%ymm2
- vpmuludq 32-128(%r9),%ymm11,%ymm14
- vpaddq %ymm14,%ymm3,%ymm3
- vpmuludq 64-128(%r9),%ymm11,%ymm13
- vpaddq %ymm13,%ymm4,%ymm4
- vpmuludq 96-128(%r9),%ymm11,%ymm12
- vpaddq %ymm12,%ymm5,%ymm5
- vpmuludq 128-128(%r9),%ymm11,%ymm14
- vpaddq %ymm14,%ymm6,%ymm6
- vpmuludq 160-128(%r9),%ymm11,%ymm13
- vpaddq %ymm13,%ymm7,%ymm7
- vpmuludq 192-128(%r9),%ymm11,%ymm12
- vpaddq %ymm12,%ymm8,%ymm8
- vpmuludq 224-128(%r9),%ymm11,%ymm0
- vpbroadcastq 96-128(%r15),%ymm11
- vpaddq 288-192(%rbx),%ymm0,%ymm0
-
- vmovdqu %ymm2,64-192(%rbx)
- vmovdqu %ymm3,96-192(%rbx)
-
- vpmuludq 64-128(%rsi),%ymm10,%ymm13
- vpaddq %ymm13,%ymm4,%ymm4
- vpmuludq 64-128(%r9),%ymm10,%ymm12
- vpaddq %ymm12,%ymm5,%ymm5
- vpmuludq 96-128(%r9),%ymm10,%ymm14
- vpaddq %ymm14,%ymm6,%ymm6
- vpmuludq 128-128(%r9),%ymm10,%ymm13
- vpaddq %ymm13,%ymm7,%ymm7
- vpmuludq 160-128(%r9),%ymm10,%ymm12
- vpaddq %ymm12,%ymm8,%ymm8
- vpmuludq 192-128(%r9),%ymm10,%ymm14
- vpaddq %ymm14,%ymm0,%ymm0
- vpmuludq 224-128(%r9),%ymm10,%ymm1
- vpbroadcastq 128-128(%r15),%ymm10
- vpaddq 320-448(%r12),%ymm1,%ymm1
-
- vmovdqu %ymm4,128-192(%rbx)
- vmovdqu %ymm5,160-192(%rbx)
-
- vpmuludq 96-128(%rsi),%ymm11,%ymm12
- vpaddq %ymm12,%ymm6,%ymm6
- vpmuludq 96-128(%r9),%ymm11,%ymm14
- vpaddq %ymm14,%ymm7,%ymm7
- vpmuludq 128-128(%r9),%ymm11,%ymm13
- vpaddq %ymm13,%ymm8,%ymm8
- vpmuludq 160-128(%r9),%ymm11,%ymm12
- vpaddq %ymm12,%ymm0,%ymm0
- vpmuludq 192-128(%r9),%ymm11,%ymm14
- vpaddq %ymm14,%ymm1,%ymm1
- vpmuludq 224-128(%r9),%ymm11,%ymm2
- vpbroadcastq 160-128(%r15),%ymm11
- vpaddq 352-448(%r12),%ymm2,%ymm2
-
- vmovdqu %ymm6,192-192(%rbx)
- vmovdqu %ymm7,224-192(%rbx)
-
- vpmuludq 128-128(%rsi),%ymm10,%ymm12
- vpaddq %ymm12,%ymm8,%ymm8
- vpmuludq 128-128(%r9),%ymm10,%ymm14
- vpaddq %ymm14,%ymm0,%ymm0
- vpmuludq 160-128(%r9),%ymm10,%ymm13
- vpaddq %ymm13,%ymm1,%ymm1
- vpmuludq 192-128(%r9),%ymm10,%ymm12
- vpaddq %ymm12,%ymm2,%ymm2
- vpmuludq 224-128(%r9),%ymm10,%ymm3
- vpbroadcastq 192-128(%r15),%ymm10
- vpaddq 384-448(%r12),%ymm3,%ymm3
-
- vmovdqu %ymm8,256-192(%rbx)
- vmovdqu %ymm0,288-192(%rbx)
- leaq 8(%rbx),%rbx
-
- vpmuludq 160-128(%rsi),%ymm11,%ymm13
- vpaddq %ymm13,%ymm1,%ymm1
- vpmuludq 160-128(%r9),%ymm11,%ymm12
- vpaddq %ymm12,%ymm2,%ymm2
- vpmuludq 192-128(%r9),%ymm11,%ymm14
- vpaddq %ymm14,%ymm3,%ymm3
- vpmuludq 224-128(%r9),%ymm11,%ymm4
- vpbroadcastq 224-128(%r15),%ymm11
- vpaddq 416-448(%r12),%ymm4,%ymm4
-
- vmovdqu %ymm1,320-448(%r12)
- vmovdqu %ymm2,352-448(%r12)
-
- vpmuludq 192-128(%rsi),%ymm10,%ymm12
- vpaddq %ymm12,%ymm3,%ymm3
- vpmuludq 192-128(%r9),%ymm10,%ymm14
- vpbroadcastq 256-128(%r15),%ymm0
- vpaddq %ymm14,%ymm4,%ymm4
- vpmuludq 224-128(%r9),%ymm10,%ymm5
- vpbroadcastq 0+8-128(%r15),%ymm10
- vpaddq 448-448(%r12),%ymm5,%ymm5
-
- vmovdqu %ymm3,384-448(%r12)
- vmovdqu %ymm4,416-448(%r12)
- leaq 8(%r15),%r15
-
- vpmuludq 224-128(%rsi),%ymm11,%ymm12
- vpaddq %ymm12,%ymm5,%ymm5
- vpmuludq 224-128(%r9),%ymm11,%ymm6
- vpaddq 480-448(%r12),%ymm6,%ymm6
-
- vpmuludq 256-128(%rsi),%ymm0,%ymm7
- vmovdqu %ymm5,448-448(%r12)
- vpaddq 512-448(%r12),%ymm7,%ymm7
- vmovdqu %ymm6,480-448(%r12)
- vmovdqu %ymm7,512-448(%r12)
- leaq 8(%r12),%r12
-
- decl %r14d
- jnz .LOOP_SQR_1024
-
- vmovdqu 256(%rsp),%ymm8
- vmovdqu 288(%rsp),%ymm1
- vmovdqu 320(%rsp),%ymm2
- leaq 192(%rsp),%rbx
-
- vpsrlq $29,%ymm8,%ymm14
- vpand %ymm15,%ymm8,%ymm8
- vpsrlq $29,%ymm1,%ymm11
- vpand %ymm15,%ymm1,%ymm1
-
- vpermq $0x93,%ymm14,%ymm14
- vpxor %ymm9,%ymm9,%ymm9
- vpermq $0x93,%ymm11,%ymm11
-
- vpblendd $3,%ymm9,%ymm14,%ymm10
- vpblendd $3,%ymm14,%ymm11,%ymm14
- vpaddq %ymm10,%ymm8,%ymm8
- vpblendd $3,%ymm11,%ymm9,%ymm11
- vpaddq %ymm14,%ymm1,%ymm1
- vpaddq %ymm11,%ymm2,%ymm2
- vmovdqu %ymm1,288-192(%rbx)
- vmovdqu %ymm2,320-192(%rbx)
-
- movq (%rsp),%rax
- movq 8(%rsp),%r10
- movq 16(%rsp),%r11
- movq 24(%rsp),%r12
- vmovdqu 32(%rsp),%ymm1
- vmovdqu 64-192(%rbx),%ymm2
- vmovdqu 96-192(%rbx),%ymm3
- vmovdqu 128-192(%rbx),%ymm4
- vmovdqu 160-192(%rbx),%ymm5
- vmovdqu 192-192(%rbx),%ymm6
- vmovdqu 224-192(%rbx),%ymm7
-
- movq %rax,%r9
- imull %ecx,%eax
- andl $0x1fffffff,%eax
- vmovd %eax,%xmm12
-
- movq %rax,%rdx
- imulq -128(%r13),%rax
- vpbroadcastq %xmm12,%ymm12
- addq %rax,%r9
- movq %rdx,%rax
- imulq 8-128(%r13),%rax
- shrq $29,%r9
- addq %rax,%r10
- movq %rdx,%rax
- imulq 16-128(%r13),%rax
- addq %r9,%r10
- addq %rax,%r11
- imulq 24-128(%r13),%rdx
- addq %rdx,%r12
-
- movq %r10,%rax
- imull %ecx,%eax
- andl $0x1fffffff,%eax
-
- movl $9,%r14d
- jmp .LOOP_REDUCE_1024
-
-.align 32
-.LOOP_REDUCE_1024:
- vmovd %eax,%xmm13
- vpbroadcastq %xmm13,%ymm13
-
- vpmuludq 32-128(%r13),%ymm12,%ymm10
- movq %rax,%rdx
- imulq -128(%r13),%rax
- vpaddq %ymm10,%ymm1,%ymm1
- addq %rax,%r10
- vpmuludq 64-128(%r13),%ymm12,%ymm14
- movq %rdx,%rax
- imulq 8-128(%r13),%rax
- vpaddq %ymm14,%ymm2,%ymm2
- vpmuludq 96-128(%r13),%ymm12,%ymm11
-.byte 0x67
- addq %rax,%r11
-.byte 0x67
- movq %rdx,%rax
- imulq 16-128(%r13),%rax
- shrq $29,%r10
- vpaddq %ymm11,%ymm3,%ymm3
- vpmuludq 128-128(%r13),%ymm12,%ymm10
- addq %rax,%r12
- addq %r10,%r11
- vpaddq %ymm10,%ymm4,%ymm4
- vpmuludq 160-128(%r13),%ymm12,%ymm14
- movq %r11,%rax
- imull %ecx,%eax
- vpaddq %ymm14,%ymm5,%ymm5
- vpmuludq 192-128(%r13),%ymm12,%ymm11
- andl $0x1fffffff,%eax
- vpaddq %ymm11,%ymm6,%ymm6
- vpmuludq 224-128(%r13),%ymm12,%ymm10
- vpaddq %ymm10,%ymm7,%ymm7
- vpmuludq 256-128(%r13),%ymm12,%ymm14
- vmovd %eax,%xmm12
-
- vpaddq %ymm14,%ymm8,%ymm8
-
- vpbroadcastq %xmm12,%ymm12
-
- vpmuludq 32-8-128(%r13),%ymm13,%ymm11
- vmovdqu 96-8-128(%r13),%ymm14
- movq %rax,%rdx
- imulq -128(%r13),%rax
- vpaddq %ymm11,%ymm1,%ymm1
- vpmuludq 64-8-128(%r13),%ymm13,%ymm10
- vmovdqu 128-8-128(%r13),%ymm11
- addq %rax,%r11
- movq %rdx,%rax
- imulq 8-128(%r13),%rax
- vpaddq %ymm10,%ymm2,%ymm2
- addq %r12,%rax
- shrq $29,%r11
- vpmuludq %ymm13,%ymm14,%ymm14
- vmovdqu 160-8-128(%r13),%ymm10
- addq %r11,%rax
- vpaddq %ymm14,%ymm3,%ymm3
- vpmuludq %ymm13,%ymm11,%ymm11
- vmovdqu 192-8-128(%r13),%ymm14
-.byte 0x67
- movq %rax,%r12
- imull %ecx,%eax
- vpaddq %ymm11,%ymm4,%ymm4
- vpmuludq %ymm13,%ymm10,%ymm10
-.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00
- andl $0x1fffffff,%eax
- vpaddq %ymm10,%ymm5,%ymm5
- vpmuludq %ymm13,%ymm14,%ymm14
- vmovdqu 256-8-128(%r13),%ymm10
- vpaddq %ymm14,%ymm6,%ymm6
- vpmuludq %ymm13,%ymm11,%ymm11
- vmovdqu 288-8-128(%r13),%ymm9
- vmovd %eax,%xmm0
- imulq -128(%r13),%rax
- vpaddq %ymm11,%ymm7,%ymm7
- vpmuludq %ymm13,%ymm10,%ymm10
- vmovdqu 32-16-128(%r13),%ymm14
- vpbroadcastq %xmm0,%ymm0
- vpaddq %ymm10,%ymm8,%ymm8
- vpmuludq %ymm13,%ymm9,%ymm9
- vmovdqu 64-16-128(%r13),%ymm11
- addq %rax,%r12
-
- vmovdqu 32-24-128(%r13),%ymm13
- vpmuludq %ymm12,%ymm14,%ymm14
- vmovdqu 96-16-128(%r13),%ymm10
- vpaddq %ymm14,%ymm1,%ymm1
- vpmuludq %ymm0,%ymm13,%ymm13
- vpmuludq %ymm12,%ymm11,%ymm11
-.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff
- vpaddq %ymm1,%ymm13,%ymm13
- vpaddq %ymm11,%ymm2,%ymm2
- vpmuludq %ymm12,%ymm10,%ymm10
- vmovdqu 160-16-128(%r13),%ymm11
-.byte 0x67
- vmovq %xmm13,%rax
- vmovdqu %ymm13,(%rsp)
- vpaddq %ymm10,%ymm3,%ymm3
- vpmuludq %ymm12,%ymm14,%ymm14
- vmovdqu 192-16-128(%r13),%ymm10
- vpaddq %ymm14,%ymm4,%ymm4
- vpmuludq %ymm12,%ymm11,%ymm11
- vmovdqu 224-16-128(%r13),%ymm14
- vpaddq %ymm11,%ymm5,%ymm5
- vpmuludq %ymm12,%ymm10,%ymm10
- vmovdqu 256-16-128(%r13),%ymm11
- vpaddq %ymm10,%ymm6,%ymm6
- vpmuludq %ymm12,%ymm14,%ymm14
- shrq $29,%r12
- vmovdqu 288-16-128(%r13),%ymm10
- addq %r12,%rax
- vpaddq %ymm14,%ymm7,%ymm7
- vpmuludq %ymm12,%ymm11,%ymm11
-
- movq %rax,%r9
- imull %ecx,%eax
- vpaddq %ymm11,%ymm8,%ymm8
- vpmuludq %ymm12,%ymm10,%ymm10
- andl $0x1fffffff,%eax
- vmovd %eax,%xmm12
- vmovdqu 96-24-128(%r13),%ymm11
-.byte 0x67
- vpaddq %ymm10,%ymm9,%ymm9
- vpbroadcastq %xmm12,%ymm12
-
- vpmuludq 64-24-128(%r13),%ymm0,%ymm14
- vmovdqu 128-24-128(%r13),%ymm10
- movq %rax,%rdx
- imulq -128(%r13),%rax
- movq 8(%rsp),%r10
- vpaddq %ymm14,%ymm2,%ymm1
- vpmuludq %ymm0,%ymm11,%ymm11
- vmovdqu 160-24-128(%r13),%ymm14
- addq %rax,%r9
- movq %rdx,%rax
- imulq 8-128(%r13),%rax
-.byte 0x67
- shrq $29,%r9
- movq 16(%rsp),%r11
- vpaddq %ymm11,%ymm3,%ymm2
- vpmuludq %ymm0,%ymm10,%ymm10
- vmovdqu 192-24-128(%r13),%ymm11
- addq %rax,%r10
- movq %rdx,%rax
- imulq 16-128(%r13),%rax
- vpaddq %ymm10,%ymm4,%ymm3
- vpmuludq %ymm0,%ymm14,%ymm14
- vmovdqu 224-24-128(%r13),%ymm10
- imulq 24-128(%r13),%rdx
- addq %rax,%r11
- leaq (%r9,%r10,1),%rax
- vpaddq %ymm14,%ymm5,%ymm4
- vpmuludq %ymm0,%ymm11,%ymm11
- vmovdqu 256-24-128(%r13),%ymm14
- movq %rax,%r10
- imull %ecx,%eax
- vpmuludq %ymm0,%ymm10,%ymm10
- vpaddq %ymm11,%ymm6,%ymm5
- vmovdqu 288-24-128(%r13),%ymm11
- andl $0x1fffffff,%eax
- vpaddq %ymm10,%ymm7,%ymm6
- vpmuludq %ymm0,%ymm14,%ymm14
- addq 24(%rsp),%rdx
- vpaddq %ymm14,%ymm8,%ymm7
- vpmuludq %ymm0,%ymm11,%ymm11
- vpaddq %ymm11,%ymm9,%ymm8
- vmovq %r12,%xmm9
- movq %rdx,%r12
-
- decl %r14d
- jnz .LOOP_REDUCE_1024
- leaq 448(%rsp),%r12
- vpaddq %ymm9,%ymm13,%ymm0
- vpxor %ymm9,%ymm9,%ymm9
-
- vpaddq 288-192(%rbx),%ymm0,%ymm0
- vpaddq 320-448(%r12),%ymm1,%ymm1
- vpaddq 352-448(%r12),%ymm2,%ymm2
- vpaddq 384-448(%r12),%ymm3,%ymm3
- vpaddq 416-448(%r12),%ymm4,%ymm4
- vpaddq 448-448(%r12),%ymm5,%ymm5
- vpaddq 480-448(%r12),%ymm6,%ymm6
- vpaddq 512-448(%r12),%ymm7,%ymm7
- vpaddq 544-448(%r12),%ymm8,%ymm8
-
- vpsrlq $29,%ymm0,%ymm14
- vpand %ymm15,%ymm0,%ymm0
- vpsrlq $29,%ymm1,%ymm11
- vpand %ymm15,%ymm1,%ymm1
- vpsrlq $29,%ymm2,%ymm12
- vpermq $0x93,%ymm14,%ymm14
- vpand %ymm15,%ymm2,%ymm2
- vpsrlq $29,%ymm3,%ymm13
- vpermq $0x93,%ymm11,%ymm11
- vpand %ymm15,%ymm3,%ymm3
- vpermq $0x93,%ymm12,%ymm12
-
- vpblendd $3,%ymm9,%ymm14,%ymm10
- vpermq $0x93,%ymm13,%ymm13
- vpblendd $3,%ymm14,%ymm11,%ymm14
- vpaddq %ymm10,%ymm0,%ymm0
- vpblendd $3,%ymm11,%ymm12,%ymm11
- vpaddq %ymm14,%ymm1,%ymm1
- vpblendd $3,%ymm12,%ymm13,%ymm12
- vpaddq %ymm11,%ymm2,%ymm2
- vpblendd $3,%ymm13,%ymm9,%ymm13
- vpaddq %ymm12,%ymm3,%ymm3
- vpaddq %ymm13,%ymm4,%ymm4
-
- vpsrlq $29,%ymm0,%ymm14
- vpand %ymm15,%ymm0,%ymm0
- vpsrlq $29,%ymm1,%ymm11
- vpand %ymm15,%ymm1,%ymm1
- vpsrlq $29,%ymm2,%ymm12
- vpermq $0x93,%ymm14,%ymm14
- vpand %ymm15,%ymm2,%ymm2
- vpsrlq $29,%ymm3,%ymm13
- vpermq $0x93,%ymm11,%ymm11
- vpand %ymm15,%ymm3,%ymm3
- vpermq $0x93,%ymm12,%ymm12
-
- vpblendd $3,%ymm9,%ymm14,%ymm10
- vpermq $0x93,%ymm13,%ymm13
- vpblendd $3,%ymm14,%ymm11,%ymm14
- vpaddq %ymm10,%ymm0,%ymm0
- vpblendd $3,%ymm11,%ymm12,%ymm11
- vpaddq %ymm14,%ymm1,%ymm1
- vmovdqu %ymm0,0-128(%rdi)
- vpblendd $3,%ymm12,%ymm13,%ymm12
- vpaddq %ymm11,%ymm2,%ymm2
- vmovdqu %ymm1,32-128(%rdi)
- vpblendd $3,%ymm13,%ymm9,%ymm13
- vpaddq %ymm12,%ymm3,%ymm3
- vmovdqu %ymm2,64-128(%rdi)
- vpaddq %ymm13,%ymm4,%ymm4
- vmovdqu %ymm3,96-128(%rdi)
- vpsrlq $29,%ymm4,%ymm14
- vpand %ymm15,%ymm4,%ymm4
- vpsrlq $29,%ymm5,%ymm11
- vpand %ymm15,%ymm5,%ymm5
- vpsrlq $29,%ymm6,%ymm12
- vpermq $0x93,%ymm14,%ymm14
- vpand %ymm15,%ymm6,%ymm6
- vpsrlq $29,%ymm7,%ymm13
- vpermq $0x93,%ymm11,%ymm11
- vpand %ymm15,%ymm7,%ymm7
- vpsrlq $29,%ymm8,%ymm0
- vpermq $0x93,%ymm12,%ymm12
- vpand %ymm15,%ymm8,%ymm8
- vpermq $0x93,%ymm13,%ymm13
-
- vpblendd $3,%ymm9,%ymm14,%ymm10
- vpermq $0x93,%ymm0,%ymm0
- vpblendd $3,%ymm14,%ymm11,%ymm14
- vpaddq %ymm10,%ymm4,%ymm4
- vpblendd $3,%ymm11,%ymm12,%ymm11
- vpaddq %ymm14,%ymm5,%ymm5
- vpblendd $3,%ymm12,%ymm13,%ymm12
- vpaddq %ymm11,%ymm6,%ymm6
- vpblendd $3,%ymm13,%ymm0,%ymm13
- vpaddq %ymm12,%ymm7,%ymm7
- vpaddq %ymm13,%ymm8,%ymm8
-
- vpsrlq $29,%ymm4,%ymm14
- vpand %ymm15,%ymm4,%ymm4
- vpsrlq $29,%ymm5,%ymm11
- vpand %ymm15,%ymm5,%ymm5
- vpsrlq $29,%ymm6,%ymm12
- vpermq $0x93,%ymm14,%ymm14
- vpand %ymm15,%ymm6,%ymm6
- vpsrlq $29,%ymm7,%ymm13
- vpermq $0x93,%ymm11,%ymm11
- vpand %ymm15,%ymm7,%ymm7
- vpsrlq $29,%ymm8,%ymm0
- vpermq $0x93,%ymm12,%ymm12
- vpand %ymm15,%ymm8,%ymm8
- vpermq $0x93,%ymm13,%ymm13
-
- vpblendd $3,%ymm9,%ymm14,%ymm10
- vpermq $0x93,%ymm0,%ymm0
- vpblendd $3,%ymm14,%ymm11,%ymm14
- vpaddq %ymm10,%ymm4,%ymm4
- vpblendd $3,%ymm11,%ymm12,%ymm11
- vpaddq %ymm14,%ymm5,%ymm5
- vmovdqu %ymm4,128-128(%rdi)
- vpblendd $3,%ymm12,%ymm13,%ymm12
- vpaddq %ymm11,%ymm6,%ymm6
- vmovdqu %ymm5,160-128(%rdi)
- vpblendd $3,%ymm13,%ymm0,%ymm13
- vpaddq %ymm12,%ymm7,%ymm7
- vmovdqu %ymm6,192-128(%rdi)
- vpaddq %ymm13,%ymm8,%ymm8
- vmovdqu %ymm7,224-128(%rdi)
- vmovdqu %ymm8,256-128(%rdi)
-
- movq %rdi,%rsi
- decl %r8d
- jne .LOOP_GRANDE_SQR_1024
-
- vzeroall
- movq %rbp,%rax
-.cfi_def_cfa_register %rax
- movq -48(%rax),%r15
-.cfi_restore %r15
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbp
-.cfi_restore %rbp
- movq -8(%rax),%rbx
-.cfi_restore %rbx
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Lsqr_1024_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
-.globl rsaz_1024_mul_avx2
-.type rsaz_1024_mul_avx2,@function
-.align 64
rsaz_1024_mul_avx2:
-.cfi_startproc
- leaq (%rsp),%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_offset %r15,-56
- movq %rax,%rbp
-.cfi_def_cfa_register %rbp
- vzeroall
- movq %rdx,%r13
- subq $64,%rsp
-
-
-
-
-
-
-.byte 0x67,0x67
- movq %rsi,%r15
- andq $4095,%r15
- addq $320,%r15
- shrq $12,%r15
- movq %rsi,%r15
- cmovnzq %r13,%rsi
- cmovnzq %r15,%r13
-
- movq %rcx,%r15
- subq $-128,%rsi
- subq $-128,%rcx
- subq $-128,%rdi
-
- andq $4095,%r15
- addq $320,%r15
-.byte 0x67,0x67
- shrq $12,%r15
- jz .Lmul_1024_no_n_copy
-
-
-
-
-
- subq $320,%rsp
- vmovdqu 0-128(%rcx),%ymm0
- andq $-512,%rsp
- vmovdqu 32-128(%rcx),%ymm1
- vmovdqu 64-128(%rcx),%ymm2
- vmovdqu 96-128(%rcx),%ymm3
- vmovdqu 128-128(%rcx),%ymm4
- vmovdqu 160-128(%rcx),%ymm5
- vmovdqu 192-128(%rcx),%ymm6
- vmovdqu 224-128(%rcx),%ymm7
- vmovdqu 256-128(%rcx),%ymm8
- leaq 64+128(%rsp),%rcx
- vmovdqu %ymm0,0-128(%rcx)
- vpxor %ymm0,%ymm0,%ymm0
- vmovdqu %ymm1,32-128(%rcx)
- vpxor %ymm1,%ymm1,%ymm1
- vmovdqu %ymm2,64-128(%rcx)
- vpxor %ymm2,%ymm2,%ymm2
- vmovdqu %ymm3,96-128(%rcx)
- vpxor %ymm3,%ymm3,%ymm3
- vmovdqu %ymm4,128-128(%rcx)
- vpxor %ymm4,%ymm4,%ymm4
- vmovdqu %ymm5,160-128(%rcx)
- vpxor %ymm5,%ymm5,%ymm5
- vmovdqu %ymm6,192-128(%rcx)
- vpxor %ymm6,%ymm6,%ymm6
- vmovdqu %ymm7,224-128(%rcx)
- vpxor %ymm7,%ymm7,%ymm7
- vmovdqu %ymm8,256-128(%rcx)
- vmovdqa %ymm0,%ymm8
- vmovdqu %ymm9,288-128(%rcx)
-.Lmul_1024_no_n_copy:
- andq $-64,%rsp
-
- movq (%r13),%rbx
- vpbroadcastq (%r13),%ymm10
- vmovdqu %ymm0,(%rsp)
- xorq %r9,%r9
-.byte 0x67
- xorq %r10,%r10
- xorq %r11,%r11
- xorq %r12,%r12
-
- vmovdqu .Land_mask(%rip),%ymm15
- movl $9,%r14d
- vmovdqu %ymm9,288-128(%rdi)
- jmp .Loop_mul_1024
-
-.align 32
-.Loop_mul_1024:
- vpsrlq $29,%ymm3,%ymm9
- movq %rbx,%rax
- imulq -128(%rsi),%rax
- addq %r9,%rax
- movq %rbx,%r10
- imulq 8-128(%rsi),%r10
- addq 8(%rsp),%r10
-
- movq %rax,%r9
- imull %r8d,%eax
- andl $0x1fffffff,%eax
-
- movq %rbx,%r11
- imulq 16-128(%rsi),%r11
- addq 16(%rsp),%r11
-
- movq %rbx,%r12
- imulq 24-128(%rsi),%r12
- addq 24(%rsp),%r12
- vpmuludq 32-128(%rsi),%ymm10,%ymm0
- vmovd %eax,%xmm11
- vpaddq %ymm0,%ymm1,%ymm1
- vpmuludq 64-128(%rsi),%ymm10,%ymm12
- vpbroadcastq %xmm11,%ymm11
- vpaddq %ymm12,%ymm2,%ymm2
- vpmuludq 96-128(%rsi),%ymm10,%ymm13
- vpand %ymm15,%ymm3,%ymm3
- vpaddq %ymm13,%ymm3,%ymm3
- vpmuludq 128-128(%rsi),%ymm10,%ymm0
- vpaddq %ymm0,%ymm4,%ymm4
- vpmuludq 160-128(%rsi),%ymm10,%ymm12
- vpaddq %ymm12,%ymm5,%ymm5
- vpmuludq 192-128(%rsi),%ymm10,%ymm13
- vpaddq %ymm13,%ymm6,%ymm6
- vpmuludq 224-128(%rsi),%ymm10,%ymm0
- vpermq $0x93,%ymm9,%ymm9
- vpaddq %ymm0,%ymm7,%ymm7
- vpmuludq 256-128(%rsi),%ymm10,%ymm12
- vpbroadcastq 8(%r13),%ymm10
- vpaddq %ymm12,%ymm8,%ymm8
-
- movq %rax,%rdx
- imulq -128(%rcx),%rax
- addq %rax,%r9
- movq %rdx,%rax
- imulq 8-128(%rcx),%rax
- addq %rax,%r10
- movq %rdx,%rax
- imulq 16-128(%rcx),%rax
- addq %rax,%r11
- shrq $29,%r9
- imulq 24-128(%rcx),%rdx
- addq %rdx,%r12
- addq %r9,%r10
-
- vpmuludq 32-128(%rcx),%ymm11,%ymm13
- vmovq %xmm10,%rbx
- vpaddq %ymm13,%ymm1,%ymm1
- vpmuludq 64-128(%rcx),%ymm11,%ymm0
- vpaddq %ymm0,%ymm2,%ymm2
- vpmuludq 96-128(%rcx),%ymm11,%ymm12
- vpaddq %ymm12,%ymm3,%ymm3
- vpmuludq 128-128(%rcx),%ymm11,%ymm13
- vpaddq %ymm13,%ymm4,%ymm4
- vpmuludq 160-128(%rcx),%ymm11,%ymm0
- vpaddq %ymm0,%ymm5,%ymm5
- vpmuludq 192-128(%rcx),%ymm11,%ymm12
- vpaddq %ymm12,%ymm6,%ymm6
- vpmuludq 224-128(%rcx),%ymm11,%ymm13
- vpblendd $3,%ymm14,%ymm9,%ymm12
- vpaddq %ymm13,%ymm7,%ymm7
- vpmuludq 256-128(%rcx),%ymm11,%ymm0
- vpaddq %ymm12,%ymm3,%ymm3
- vpaddq %ymm0,%ymm8,%ymm8
-
- movq %rbx,%rax
- imulq -128(%rsi),%rax
- addq %rax,%r10
- vmovdqu -8+32-128(%rsi),%ymm12
- movq %rbx,%rax
- imulq 8-128(%rsi),%rax
- addq %rax,%r11
- vmovdqu -8+64-128(%rsi),%ymm13
-
- movq %r10,%rax
- vpblendd $0xfc,%ymm14,%ymm9,%ymm9
- imull %r8d,%eax
- vpaddq %ymm9,%ymm4,%ymm4
- andl $0x1fffffff,%eax
-
- imulq 16-128(%rsi),%rbx
- addq %rbx,%r12
- vpmuludq %ymm10,%ymm12,%ymm12
- vmovd %eax,%xmm11
- vmovdqu -8+96-128(%rsi),%ymm0
- vpaddq %ymm12,%ymm1,%ymm1
- vpmuludq %ymm10,%ymm13,%ymm13
- vpbroadcastq %xmm11,%ymm11
- vmovdqu -8+128-128(%rsi),%ymm12
- vpaddq %ymm13,%ymm2,%ymm2
- vpmuludq %ymm10,%ymm0,%ymm0
- vmovdqu -8+160-128(%rsi),%ymm13
- vpaddq %ymm0,%ymm3,%ymm3
- vpmuludq %ymm10,%ymm12,%ymm12
- vmovdqu -8+192-128(%rsi),%ymm0
- vpaddq %ymm12,%ymm4,%ymm4
- vpmuludq %ymm10,%ymm13,%ymm13
- vmovdqu -8+224-128(%rsi),%ymm12
- vpaddq %ymm13,%ymm5,%ymm5
- vpmuludq %ymm10,%ymm0,%ymm0
- vmovdqu -8+256-128(%rsi),%ymm13
- vpaddq %ymm0,%ymm6,%ymm6
- vpmuludq %ymm10,%ymm12,%ymm12
- vmovdqu -8+288-128(%rsi),%ymm9
- vpaddq %ymm12,%ymm7,%ymm7
- vpmuludq %ymm10,%ymm13,%ymm13
- vpaddq %ymm13,%ymm8,%ymm8
- vpmuludq %ymm10,%ymm9,%ymm9
- vpbroadcastq 16(%r13),%ymm10
-
- movq %rax,%rdx
- imulq -128(%rcx),%rax
- addq %rax,%r10
- vmovdqu -8+32-128(%rcx),%ymm0
- movq %rdx,%rax
- imulq 8-128(%rcx),%rax
- addq %rax,%r11
- vmovdqu -8+64-128(%rcx),%ymm12
- shrq $29,%r10
- imulq 16-128(%rcx),%rdx
- addq %rdx,%r12
- addq %r10,%r11
-
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovq %xmm10,%rbx
- vmovdqu -8+96-128(%rcx),%ymm13
- vpaddq %ymm0,%ymm1,%ymm1
- vpmuludq %ymm11,%ymm12,%ymm12
- vmovdqu -8+128-128(%rcx),%ymm0
- vpaddq %ymm12,%ymm2,%ymm2
- vpmuludq %ymm11,%ymm13,%ymm13
- vmovdqu -8+160-128(%rcx),%ymm12
- vpaddq %ymm13,%ymm3,%ymm3
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovdqu -8+192-128(%rcx),%ymm13
- vpaddq %ymm0,%ymm4,%ymm4
- vpmuludq %ymm11,%ymm12,%ymm12
- vmovdqu -8+224-128(%rcx),%ymm0
- vpaddq %ymm12,%ymm5,%ymm5
- vpmuludq %ymm11,%ymm13,%ymm13
- vmovdqu -8+256-128(%rcx),%ymm12
- vpaddq %ymm13,%ymm6,%ymm6
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovdqu -8+288-128(%rcx),%ymm13
- vpaddq %ymm0,%ymm7,%ymm7
- vpmuludq %ymm11,%ymm12,%ymm12
- vpaddq %ymm12,%ymm8,%ymm8
- vpmuludq %ymm11,%ymm13,%ymm13
- vpaddq %ymm13,%ymm9,%ymm9
-
- vmovdqu -16+32-128(%rsi),%ymm0
- movq %rbx,%rax
- imulq -128(%rsi),%rax
- addq %r11,%rax
-
- vmovdqu -16+64-128(%rsi),%ymm12
- movq %rax,%r11
- imull %r8d,%eax
- andl $0x1fffffff,%eax
-
- imulq 8-128(%rsi),%rbx
- addq %rbx,%r12
- vpmuludq %ymm10,%ymm0,%ymm0
- vmovd %eax,%xmm11
- vmovdqu -16+96-128(%rsi),%ymm13
- vpaddq %ymm0,%ymm1,%ymm1
- vpmuludq %ymm10,%ymm12,%ymm12
- vpbroadcastq %xmm11,%ymm11
- vmovdqu -16+128-128(%rsi),%ymm0
- vpaddq %ymm12,%ymm2,%ymm2
- vpmuludq %ymm10,%ymm13,%ymm13
- vmovdqu -16+160-128(%rsi),%ymm12
- vpaddq %ymm13,%ymm3,%ymm3
- vpmuludq %ymm10,%ymm0,%ymm0
- vmovdqu -16+192-128(%rsi),%ymm13
- vpaddq %ymm0,%ymm4,%ymm4
- vpmuludq %ymm10,%ymm12,%ymm12
- vmovdqu -16+224-128(%rsi),%ymm0
- vpaddq %ymm12,%ymm5,%ymm5
- vpmuludq %ymm10,%ymm13,%ymm13
- vmovdqu -16+256-128(%rsi),%ymm12
- vpaddq %ymm13,%ymm6,%ymm6
- vpmuludq %ymm10,%ymm0,%ymm0
- vmovdqu -16+288-128(%rsi),%ymm13
- vpaddq %ymm0,%ymm7,%ymm7
- vpmuludq %ymm10,%ymm12,%ymm12
- vpaddq %ymm12,%ymm8,%ymm8
- vpmuludq %ymm10,%ymm13,%ymm13
- vpbroadcastq 24(%r13),%ymm10
- vpaddq %ymm13,%ymm9,%ymm9
-
- vmovdqu -16+32-128(%rcx),%ymm0
- movq %rax,%rdx
- imulq -128(%rcx),%rax
- addq %rax,%r11
- vmovdqu -16+64-128(%rcx),%ymm12
- imulq 8-128(%rcx),%rdx
- addq %rdx,%r12
- shrq $29,%r11
-
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovq %xmm10,%rbx
- vmovdqu -16+96-128(%rcx),%ymm13
- vpaddq %ymm0,%ymm1,%ymm1
- vpmuludq %ymm11,%ymm12,%ymm12
- vmovdqu -16+128-128(%rcx),%ymm0
- vpaddq %ymm12,%ymm2,%ymm2
- vpmuludq %ymm11,%ymm13,%ymm13
- vmovdqu -16+160-128(%rcx),%ymm12
- vpaddq %ymm13,%ymm3,%ymm3
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovdqu -16+192-128(%rcx),%ymm13
- vpaddq %ymm0,%ymm4,%ymm4
- vpmuludq %ymm11,%ymm12,%ymm12
- vmovdqu -16+224-128(%rcx),%ymm0
- vpaddq %ymm12,%ymm5,%ymm5
- vpmuludq %ymm11,%ymm13,%ymm13
- vmovdqu -16+256-128(%rcx),%ymm12
- vpaddq %ymm13,%ymm6,%ymm6
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovdqu -16+288-128(%rcx),%ymm13
- vpaddq %ymm0,%ymm7,%ymm7
- vpmuludq %ymm11,%ymm12,%ymm12
- vmovdqu -24+32-128(%rsi),%ymm0
- vpaddq %ymm12,%ymm8,%ymm8
- vpmuludq %ymm11,%ymm13,%ymm13
- vmovdqu -24+64-128(%rsi),%ymm12
- vpaddq %ymm13,%ymm9,%ymm9
-
- addq %r11,%r12
- imulq -128(%rsi),%rbx
- addq %rbx,%r12
-
- movq %r12,%rax
- imull %r8d,%eax
- andl $0x1fffffff,%eax
-
- vpmuludq %ymm10,%ymm0,%ymm0
- vmovd %eax,%xmm11
- vmovdqu -24+96-128(%rsi),%ymm13
- vpaddq %ymm0,%ymm1,%ymm1
- vpmuludq %ymm10,%ymm12,%ymm12
- vpbroadcastq %xmm11,%ymm11
- vmovdqu -24+128-128(%rsi),%ymm0
- vpaddq %ymm12,%ymm2,%ymm2
- vpmuludq %ymm10,%ymm13,%ymm13
- vmovdqu -24+160-128(%rsi),%ymm12
- vpaddq %ymm13,%ymm3,%ymm3
- vpmuludq %ymm10,%ymm0,%ymm0
- vmovdqu -24+192-128(%rsi),%ymm13
- vpaddq %ymm0,%ymm4,%ymm4
- vpmuludq %ymm10,%ymm12,%ymm12
- vmovdqu -24+224-128(%rsi),%ymm0
- vpaddq %ymm12,%ymm5,%ymm5
- vpmuludq %ymm10,%ymm13,%ymm13
- vmovdqu -24+256-128(%rsi),%ymm12
- vpaddq %ymm13,%ymm6,%ymm6
- vpmuludq %ymm10,%ymm0,%ymm0
- vmovdqu -24+288-128(%rsi),%ymm13
- vpaddq %ymm0,%ymm7,%ymm7
- vpmuludq %ymm10,%ymm12,%ymm12
- vpaddq %ymm12,%ymm8,%ymm8
- vpmuludq %ymm10,%ymm13,%ymm13
- vpbroadcastq 32(%r13),%ymm10
- vpaddq %ymm13,%ymm9,%ymm9
- addq $32,%r13
-
- vmovdqu -24+32-128(%rcx),%ymm0
- imulq -128(%rcx),%rax
- addq %rax,%r12
- shrq $29,%r12
-
- vmovdqu -24+64-128(%rcx),%ymm12
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovq %xmm10,%rbx
- vmovdqu -24+96-128(%rcx),%ymm13
- vpaddq %ymm0,%ymm1,%ymm0
- vpmuludq %ymm11,%ymm12,%ymm12
- vmovdqu %ymm0,(%rsp)
- vpaddq %ymm12,%ymm2,%ymm1
- vmovdqu -24+128-128(%rcx),%ymm0
- vpmuludq %ymm11,%ymm13,%ymm13
- vmovdqu -24+160-128(%rcx),%ymm12
- vpaddq %ymm13,%ymm3,%ymm2
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovdqu -24+192-128(%rcx),%ymm13
- vpaddq %ymm0,%ymm4,%ymm3
- vpmuludq %ymm11,%ymm12,%ymm12
- vmovdqu -24+224-128(%rcx),%ymm0
- vpaddq %ymm12,%ymm5,%ymm4
- vpmuludq %ymm11,%ymm13,%ymm13
- vmovdqu -24+256-128(%rcx),%ymm12
- vpaddq %ymm13,%ymm6,%ymm5
- vpmuludq %ymm11,%ymm0,%ymm0
- vmovdqu -24+288-128(%rcx),%ymm13
- movq %r12,%r9
- vpaddq %ymm0,%ymm7,%ymm6
- vpmuludq %ymm11,%ymm12,%ymm12
- addq (%rsp),%r9
- vpaddq %ymm12,%ymm8,%ymm7
- vpmuludq %ymm11,%ymm13,%ymm13
- vmovq %r12,%xmm12
- vpaddq %ymm13,%ymm9,%ymm8
-
- decl %r14d
- jnz .Loop_mul_1024
- vpaddq (%rsp),%ymm12,%ymm0
-
- vpsrlq $29,%ymm0,%ymm12
- vpand %ymm15,%ymm0,%ymm0
- vpsrlq $29,%ymm1,%ymm13
- vpand %ymm15,%ymm1,%ymm1
- vpsrlq $29,%ymm2,%ymm10
- vpermq $0x93,%ymm12,%ymm12
- vpand %ymm15,%ymm2,%ymm2
- vpsrlq $29,%ymm3,%ymm11
- vpermq $0x93,%ymm13,%ymm13
- vpand %ymm15,%ymm3,%ymm3
-
- vpblendd $3,%ymm14,%ymm12,%ymm9
- vpermq $0x93,%ymm10,%ymm10
- vpblendd $3,%ymm12,%ymm13,%ymm12
- vpermq $0x93,%ymm11,%ymm11
- vpaddq %ymm9,%ymm0,%ymm0
- vpblendd $3,%ymm13,%ymm10,%ymm13
- vpaddq %ymm12,%ymm1,%ymm1
- vpblendd $3,%ymm10,%ymm11,%ymm10
- vpaddq %ymm13,%ymm2,%ymm2
- vpblendd $3,%ymm11,%ymm14,%ymm11
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm11,%ymm4,%ymm4
-
- vpsrlq $29,%ymm0,%ymm12
- vpand %ymm15,%ymm0,%ymm0
- vpsrlq $29,%ymm1,%ymm13
- vpand %ymm15,%ymm1,%ymm1
- vpsrlq $29,%ymm2,%ymm10
- vpermq $0x93,%ymm12,%ymm12
- vpand %ymm15,%ymm2,%ymm2
- vpsrlq $29,%ymm3,%ymm11
- vpermq $0x93,%ymm13,%ymm13
- vpand %ymm15,%ymm3,%ymm3
- vpermq $0x93,%ymm10,%ymm10
-
- vpblendd $3,%ymm14,%ymm12,%ymm9
- vpermq $0x93,%ymm11,%ymm11
- vpblendd $3,%ymm12,%ymm13,%ymm12
- vpaddq %ymm9,%ymm0,%ymm0
- vpblendd $3,%ymm13,%ymm10,%ymm13
- vpaddq %ymm12,%ymm1,%ymm1
- vpblendd $3,%ymm10,%ymm11,%ymm10
- vpaddq %ymm13,%ymm2,%ymm2
- vpblendd $3,%ymm11,%ymm14,%ymm11
- vpaddq %ymm10,%ymm3,%ymm3
- vpaddq %ymm11,%ymm4,%ymm4
-
- vmovdqu %ymm0,0-128(%rdi)
- vmovdqu %ymm1,32-128(%rdi)
- vmovdqu %ymm2,64-128(%rdi)
- vmovdqu %ymm3,96-128(%rdi)
- vpsrlq $29,%ymm4,%ymm12
- vpand %ymm15,%ymm4,%ymm4
- vpsrlq $29,%ymm5,%ymm13
- vpand %ymm15,%ymm5,%ymm5
- vpsrlq $29,%ymm6,%ymm10
- vpermq $0x93,%ymm12,%ymm12
- vpand %ymm15,%ymm6,%ymm6
- vpsrlq $29,%ymm7,%ymm11
- vpermq $0x93,%ymm13,%ymm13
- vpand %ymm15,%ymm7,%ymm7
- vpsrlq $29,%ymm8,%ymm0
- vpermq $0x93,%ymm10,%ymm10
- vpand %ymm15,%ymm8,%ymm8
- vpermq $0x93,%ymm11,%ymm11
-
- vpblendd $3,%ymm14,%ymm12,%ymm9
- vpermq $0x93,%ymm0,%ymm0
- vpblendd $3,%ymm12,%ymm13,%ymm12
- vpaddq %ymm9,%ymm4,%ymm4
- vpblendd $3,%ymm13,%ymm10,%ymm13
- vpaddq %ymm12,%ymm5,%ymm5
- vpblendd $3,%ymm10,%ymm11,%ymm10
- vpaddq %ymm13,%ymm6,%ymm6
- vpblendd $3,%ymm11,%ymm0,%ymm11
- vpaddq %ymm10,%ymm7,%ymm7
- vpaddq %ymm11,%ymm8,%ymm8
-
- vpsrlq $29,%ymm4,%ymm12
- vpand %ymm15,%ymm4,%ymm4
- vpsrlq $29,%ymm5,%ymm13
- vpand %ymm15,%ymm5,%ymm5
- vpsrlq $29,%ymm6,%ymm10
- vpermq $0x93,%ymm12,%ymm12
- vpand %ymm15,%ymm6,%ymm6
- vpsrlq $29,%ymm7,%ymm11
- vpermq $0x93,%ymm13,%ymm13
- vpand %ymm15,%ymm7,%ymm7
- vpsrlq $29,%ymm8,%ymm0
- vpermq $0x93,%ymm10,%ymm10
- vpand %ymm15,%ymm8,%ymm8
- vpermq $0x93,%ymm11,%ymm11
-
- vpblendd $3,%ymm14,%ymm12,%ymm9
- vpermq $0x93,%ymm0,%ymm0
- vpblendd $3,%ymm12,%ymm13,%ymm12
- vpaddq %ymm9,%ymm4,%ymm4
- vpblendd $3,%ymm13,%ymm10,%ymm13
- vpaddq %ymm12,%ymm5,%ymm5
- vpblendd $3,%ymm10,%ymm11,%ymm10
- vpaddq %ymm13,%ymm6,%ymm6
- vpblendd $3,%ymm11,%ymm0,%ymm11
- vpaddq %ymm10,%ymm7,%ymm7
- vpaddq %ymm11,%ymm8,%ymm8
-
- vmovdqu %ymm4,128-128(%rdi)
- vmovdqu %ymm5,160-128(%rdi)
- vmovdqu %ymm6,192-128(%rdi)
- vmovdqu %ymm7,224-128(%rdi)
- vmovdqu %ymm8,256-128(%rdi)
- vzeroupper
-
- movq %rbp,%rax
-.cfi_def_cfa_register %rax
- movq -48(%rax),%r15
-.cfi_restore %r15
- movq -40(%rax),%r14
-.cfi_restore %r14
- movq -32(%rax),%r13
-.cfi_restore %r13
- movq -24(%rax),%r12
-.cfi_restore %r12
- movq -16(%rax),%rbp
-.cfi_restore %rbp
- movq -8(%rax),%rbx
-.cfi_restore %rbx
- leaq (%rax),%rsp
-.cfi_def_cfa_register %rsp
-.Lmul_1024_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
-.globl rsaz_1024_red2norm_avx2
-.type rsaz_1024_red2norm_avx2,@function
-.align 32
-rsaz_1024_red2norm_avx2:
-.cfi_startproc
- subq $-128,%rsi
- xorq %rax,%rax
- movq -128(%rsi),%r8
- movq -120(%rsi),%r9
- movq -112(%rsi),%r10
- shlq $0,%r8
- shlq $29,%r9
- movq %r10,%r11
- shlq $58,%r10
- shrq $6,%r11
- addq %r8,%rax
- addq %r9,%rax
- addq %r10,%rax
- adcq $0,%r11
- movq %rax,0(%rdi)
- movq %r11,%rax
- movq -104(%rsi),%r8
- movq -96(%rsi),%r9
- shlq $23,%r8
- movq %r9,%r10
- shlq $52,%r9
- shrq $12,%r10
- addq %r8,%rax
- addq %r9,%rax
- adcq $0,%r10
- movq %rax,8(%rdi)
- movq %r10,%rax
- movq -88(%rsi),%r11
- movq -80(%rsi),%r8
- shlq $17,%r11
- movq %r8,%r9
- shlq $46,%r8
- shrq $18,%r9
- addq %r11,%rax
- addq %r8,%rax
- adcq $0,%r9
- movq %rax,16(%rdi)
- movq %r9,%rax
- movq -72(%rsi),%r10
- movq -64(%rsi),%r11
- shlq $11,%r10
- movq %r11,%r8
- shlq $40,%r11
- shrq $24,%r8
- addq %r10,%rax
- addq %r11,%rax
- adcq $0,%r8
- movq %rax,24(%rdi)
- movq %r8,%rax
- movq -56(%rsi),%r9
- movq -48(%rsi),%r10
- movq -40(%rsi),%r11
- shlq $5,%r9
- shlq $34,%r10
- movq %r11,%r8
- shlq $63,%r11
- shrq $1,%r8
- addq %r9,%rax
- addq %r10,%rax
- addq %r11,%rax
- adcq $0,%r8
- movq %rax,32(%rdi)
- movq %r8,%rax
- movq -32(%rsi),%r9
- movq -24(%rsi),%r10
- shlq $28,%r9
- movq %r10,%r11
- shlq $57,%r10
- shrq $7,%r11
- addq %r9,%rax
- addq %r10,%rax
- adcq $0,%r11
- movq %rax,40(%rdi)
- movq %r11,%rax
- movq -16(%rsi),%r8
- movq -8(%rsi),%r9
- shlq $22,%r8
- movq %r9,%r10
- shlq $51,%r9
- shrq $13,%r10
- addq %r8,%rax
- addq %r9,%rax
- adcq $0,%r10
- movq %rax,48(%rdi)
- movq %r10,%rax
- movq 0(%rsi),%r11
- movq 8(%rsi),%r8
- shlq $16,%r11
- movq %r8,%r9
- shlq $45,%r8
- shrq $19,%r9
- addq %r11,%rax
- addq %r8,%rax
- adcq $0,%r9
- movq %rax,56(%rdi)
- movq %r9,%rax
- movq 16(%rsi),%r10
- movq 24(%rsi),%r11
- shlq $10,%r10
- movq %r11,%r8
- shlq $39,%r11
- shrq $25,%r8
- addq %r10,%rax
- addq %r11,%rax
- adcq $0,%r8
- movq %rax,64(%rdi)
- movq %r8,%rax
- movq 32(%rsi),%r9
- movq 40(%rsi),%r10
- movq 48(%rsi),%r11
- shlq $4,%r9
- shlq $33,%r10
- movq %r11,%r8
- shlq $62,%r11
- shrq $2,%r8
- addq %r9,%rax
- addq %r10,%rax
- addq %r11,%rax
- adcq $0,%r8
- movq %rax,72(%rdi)
- movq %r8,%rax
- movq 56(%rsi),%r9
- movq 64(%rsi),%r10
- shlq $27,%r9
- movq %r10,%r11
- shlq $56,%r10
- shrq $8,%r11
- addq %r9,%rax
- addq %r10,%rax
- adcq $0,%r11
- movq %rax,80(%rdi)
- movq %r11,%rax
- movq 72(%rsi),%r8
- movq 80(%rsi),%r9
- shlq $21,%r8
- movq %r9,%r10
- shlq $50,%r9
- shrq $14,%r10
- addq %r8,%rax
- addq %r9,%rax
- adcq $0,%r10
- movq %rax,88(%rdi)
- movq %r10,%rax
- movq 88(%rsi),%r11
- movq 96(%rsi),%r8
- shlq $15,%r11
- movq %r8,%r9
- shlq $44,%r8
- shrq $20,%r9
- addq %r11,%rax
- addq %r8,%rax
- adcq $0,%r9
- movq %rax,96(%rdi)
- movq %r9,%rax
- movq 104(%rsi),%r10
- movq 112(%rsi),%r11
- shlq $9,%r10
- movq %r11,%r8
- shlq $38,%r11
- shrq $26,%r8
- addq %r10,%rax
- addq %r11,%rax
- adcq $0,%r8
- movq %rax,104(%rdi)
- movq %r8,%rax
- movq 120(%rsi),%r9
- movq 128(%rsi),%r10
- movq 136(%rsi),%r11
- shlq $3,%r9
- shlq $32,%r10
- movq %r11,%r8
- shlq $61,%r11
- shrq $3,%r8
- addq %r9,%rax
- addq %r10,%rax
- addq %r11,%rax
- adcq $0,%r8
- movq %rax,112(%rdi)
- movq %r8,%rax
- movq 144(%rsi),%r9
- movq 152(%rsi),%r10
- shlq $26,%r9
- movq %r10,%r11
- shlq $55,%r10
- shrq $9,%r11
- addq %r9,%rax
- addq %r10,%rax
- adcq $0,%r11
- movq %rax,120(%rdi)
- movq %r11,%rax
- .byte 0xf3,0xc3
-.cfi_endproc
-.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
-
-.globl rsaz_1024_norm2red_avx2
-.type rsaz_1024_norm2red_avx2,@function
-.align 32
rsaz_1024_norm2red_avx2:
-.cfi_startproc
- subq $-128,%rdi
- movq (%rsi),%r8
- movl $0x1fffffff,%eax
- movq 8(%rsi),%r9
- movq %r8,%r11
- shrq $0,%r11
- andq %rax,%r11
- movq %r11,-128(%rdi)
- movq %r8,%r10
- shrq $29,%r10
- andq %rax,%r10
- movq %r10,-120(%rdi)
- shrdq $58,%r9,%r8
- andq %rax,%r8
- movq %r8,-112(%rdi)
- movq 16(%rsi),%r10
- movq %r9,%r8
- shrq $23,%r8
- andq %rax,%r8
- movq %r8,-104(%rdi)
- shrdq $52,%r10,%r9
- andq %rax,%r9
- movq %r9,-96(%rdi)
- movq 24(%rsi),%r11
- movq %r10,%r9
- shrq $17,%r9
- andq %rax,%r9
- movq %r9,-88(%rdi)
- shrdq $46,%r11,%r10
- andq %rax,%r10
- movq %r10,-80(%rdi)
- movq 32(%rsi),%r8
- movq %r11,%r10
- shrq $11,%r10
- andq %rax,%r10
- movq %r10,-72(%rdi)
- shrdq $40,%r8,%r11
- andq %rax,%r11
- movq %r11,-64(%rdi)
- movq 40(%rsi),%r9
- movq %r8,%r11
- shrq $5,%r11
- andq %rax,%r11
- movq %r11,-56(%rdi)
- movq %r8,%r10
- shrq $34,%r10
- andq %rax,%r10
- movq %r10,-48(%rdi)
- shrdq $63,%r9,%r8
- andq %rax,%r8
- movq %r8,-40(%rdi)
- movq 48(%rsi),%r10
- movq %r9,%r8
- shrq $28,%r8
- andq %rax,%r8
- movq %r8,-32(%rdi)
- shrdq $57,%r10,%r9
- andq %rax,%r9
- movq %r9,-24(%rdi)
- movq 56(%rsi),%r11
- movq %r10,%r9
- shrq $22,%r9
- andq %rax,%r9
- movq %r9,-16(%rdi)
- shrdq $51,%r11,%r10
- andq %rax,%r10
- movq %r10,-8(%rdi)
- movq 64(%rsi),%r8
- movq %r11,%r10
- shrq $16,%r10
- andq %rax,%r10
- movq %r10,0(%rdi)
- shrdq $45,%r8,%r11
- andq %rax,%r11
- movq %r11,8(%rdi)
- movq 72(%rsi),%r9
- movq %r8,%r11
- shrq $10,%r11
- andq %rax,%r11
- movq %r11,16(%rdi)
- shrdq $39,%r9,%r8
- andq %rax,%r8
- movq %r8,24(%rdi)
- movq 80(%rsi),%r10
- movq %r9,%r8
- shrq $4,%r8
- andq %rax,%r8
- movq %r8,32(%rdi)
- movq %r9,%r11
- shrq $33,%r11
- andq %rax,%r11
- movq %r11,40(%rdi)
- shrdq $62,%r10,%r9
- andq %rax,%r9
- movq %r9,48(%rdi)
- movq 88(%rsi),%r11
- movq %r10,%r9
- shrq $27,%r9
- andq %rax,%r9
- movq %r9,56(%rdi)
- shrdq $56,%r11,%r10
- andq %rax,%r10
- movq %r10,64(%rdi)
- movq 96(%rsi),%r8
- movq %r11,%r10
- shrq $21,%r10
- andq %rax,%r10
- movq %r10,72(%rdi)
- shrdq $50,%r8,%r11
- andq %rax,%r11
- movq %r11,80(%rdi)
- movq 104(%rsi),%r9
- movq %r8,%r11
- shrq $15,%r11
- andq %rax,%r11
- movq %r11,88(%rdi)
- shrdq $44,%r9,%r8
- andq %rax,%r8
- movq %r8,96(%rdi)
- movq 112(%rsi),%r10
- movq %r9,%r8
- shrq $9,%r8
- andq %rax,%r8
- movq %r8,104(%rdi)
- shrdq $38,%r10,%r9
- andq %rax,%r9
- movq %r9,112(%rdi)
- movq 120(%rsi),%r11
- movq %r10,%r9
- shrq $3,%r9
- andq %rax,%r9
- movq %r9,120(%rdi)
- movq %r10,%r8
- shrq $32,%r8
- andq %rax,%r8
- movq %r8,128(%rdi)
- shrdq $61,%r11,%r10
- andq %rax,%r10
- movq %r10,136(%rdi)
- xorq %r8,%r8
- movq %r11,%r10
- shrq $26,%r10
- andq %rax,%r10
- movq %r10,144(%rdi)
- shrdq $55,%r8,%r11
- andq %rax,%r11
- movq %r11,152(%rdi)
- movq %r8,160(%rdi)
- movq %r8,168(%rdi)
- movq %r8,176(%rdi)
- movq %r8,184(%rdi)
- .byte 0xf3,0xc3
-.cfi_endproc
-.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
-.globl rsaz_1024_scatter5_avx2
-.type rsaz_1024_scatter5_avx2,@function
-.align 32
+rsaz_1024_red2norm_avx2:
rsaz_1024_scatter5_avx2:
-.cfi_startproc
- vzeroupper
- vmovdqu .Lscatter_permd(%rip),%ymm5
- shll $4,%edx
- leaq (%rdi,%rdx,1),%rdi
- movl $9,%eax
- jmp .Loop_scatter_1024
-
-.align 32
-.Loop_scatter_1024:
- vmovdqu (%rsi),%ymm0
- leaq 32(%rsi),%rsi
- vpermd %ymm0,%ymm5,%ymm0
- vmovdqu %xmm0,(%rdi)
- leaq 512(%rdi),%rdi
- decl %eax
- jnz .Loop_scatter_1024
-
- vzeroupper
- .byte 0xf3,0xc3
-.cfi_endproc
-.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
-
-.globl rsaz_1024_gather5_avx2
-.type rsaz_1024_gather5_avx2,@function
-.align 32
rsaz_1024_gather5_avx2:
-.cfi_startproc
- vzeroupper
- movq %rsp,%r11
-.cfi_def_cfa_register %r11
- leaq -256(%rsp),%rsp
- andq $-32,%rsp
- leaq .Linc(%rip),%r10
- leaq -128(%rsp),%rax
-
- vmovd %edx,%xmm4
- vmovdqa (%r10),%ymm0
- vmovdqa 32(%r10),%ymm1
- vmovdqa 64(%r10),%ymm5
- vpbroadcastd %xmm4,%ymm4
-
- vpaddd %ymm5,%ymm0,%ymm2
- vpcmpeqd %ymm4,%ymm0,%ymm0
- vpaddd %ymm5,%ymm1,%ymm3
- vpcmpeqd %ymm4,%ymm1,%ymm1
- vmovdqa %ymm0,0+128(%rax)
- vpaddd %ymm5,%ymm2,%ymm0
- vpcmpeqd %ymm4,%ymm2,%ymm2
- vmovdqa %ymm1,32+128(%rax)
- vpaddd %ymm5,%ymm3,%ymm1
- vpcmpeqd %ymm4,%ymm3,%ymm3
- vmovdqa %ymm2,64+128(%rax)
- vpaddd %ymm5,%ymm0,%ymm2
- vpcmpeqd %ymm4,%ymm0,%ymm0
- vmovdqa %ymm3,96+128(%rax)
- vpaddd %ymm5,%ymm1,%ymm3
- vpcmpeqd %ymm4,%ymm1,%ymm1
- vmovdqa %ymm0,128+128(%rax)
- vpaddd %ymm5,%ymm2,%ymm8
- vpcmpeqd %ymm4,%ymm2,%ymm2
- vmovdqa %ymm1,160+128(%rax)
- vpaddd %ymm5,%ymm3,%ymm9
- vpcmpeqd %ymm4,%ymm3,%ymm3
- vmovdqa %ymm2,192+128(%rax)
- vpaddd %ymm5,%ymm8,%ymm10
- vpcmpeqd %ymm4,%ymm8,%ymm8
- vmovdqa %ymm3,224+128(%rax)
- vpaddd %ymm5,%ymm9,%ymm11
- vpcmpeqd %ymm4,%ymm9,%ymm9
- vpaddd %ymm5,%ymm10,%ymm12
- vpcmpeqd %ymm4,%ymm10,%ymm10
- vpaddd %ymm5,%ymm11,%ymm13
- vpcmpeqd %ymm4,%ymm11,%ymm11
- vpaddd %ymm5,%ymm12,%ymm14
- vpcmpeqd %ymm4,%ymm12,%ymm12
- vpaddd %ymm5,%ymm13,%ymm15
- vpcmpeqd %ymm4,%ymm13,%ymm13
- vpcmpeqd %ymm4,%ymm14,%ymm14
- vpcmpeqd %ymm4,%ymm15,%ymm15
-
- vmovdqa -32(%r10),%ymm7
- leaq 128(%rsi),%rsi
- movl $9,%edx
-
-.Loop_gather_1024:
- vmovdqa 0-128(%rsi),%ymm0
- vmovdqa 32-128(%rsi),%ymm1
- vmovdqa 64-128(%rsi),%ymm2
- vmovdqa 96-128(%rsi),%ymm3
- vpand 0+128(%rax),%ymm0,%ymm0
- vpand 32+128(%rax),%ymm1,%ymm1
- vpand 64+128(%rax),%ymm2,%ymm2
- vpor %ymm0,%ymm1,%ymm4
- vpand 96+128(%rax),%ymm3,%ymm3
- vmovdqa 128-128(%rsi),%ymm0
- vmovdqa 160-128(%rsi),%ymm1
- vpor %ymm2,%ymm3,%ymm5
- vmovdqa 192-128(%rsi),%ymm2
- vmovdqa 224-128(%rsi),%ymm3
- vpand 128+128(%rax),%ymm0,%ymm0
- vpand 160+128(%rax),%ymm1,%ymm1
- vpand 192+128(%rax),%ymm2,%ymm2
- vpor %ymm0,%ymm4,%ymm4
- vpand 224+128(%rax),%ymm3,%ymm3
- vpand 256-128(%rsi),%ymm8,%ymm0
- vpor %ymm1,%ymm5,%ymm5
- vpand 288-128(%rsi),%ymm9,%ymm1
- vpor %ymm2,%ymm4,%ymm4
- vpand 320-128(%rsi),%ymm10,%ymm2
- vpor %ymm3,%ymm5,%ymm5
- vpand 352-128(%rsi),%ymm11,%ymm3
- vpor %ymm0,%ymm4,%ymm4
- vpand 384-128(%rsi),%ymm12,%ymm0
- vpor %ymm1,%ymm5,%ymm5
- vpand 416-128(%rsi),%ymm13,%ymm1
- vpor %ymm2,%ymm4,%ymm4
- vpand 448-128(%rsi),%ymm14,%ymm2
- vpor %ymm3,%ymm5,%ymm5
- vpand 480-128(%rsi),%ymm15,%ymm3
- leaq 512(%rsi),%rsi
- vpor %ymm0,%ymm4,%ymm4
- vpor %ymm1,%ymm5,%ymm5
- vpor %ymm2,%ymm4,%ymm4
- vpor %ymm3,%ymm5,%ymm5
-
- vpor %ymm5,%ymm4,%ymm4
- vextracti128 $1,%ymm4,%xmm5
- vpor %xmm4,%xmm5,%xmm5
- vpermd %ymm5,%ymm7,%ymm5
- vmovdqu %ymm5,(%rdi)
- leaq 32(%rdi),%rdi
- decl %edx
- jnz .Loop_gather_1024
-
- vpxor %ymm0,%ymm0,%ymm0
- vmovdqu %ymm0,(%rdi)
- vzeroupper
- leaq (%r11),%rsp
-.cfi_def_cfa_register %rsp
+.byte 0x0f,0x0b
.byte 0xf3,0xc3
-.cfi_endproc
-.LSEH_end_rsaz_1024_gather5:
-.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
-
-.globl rsaz_avx2_eligible
-.type rsaz_avx2_eligible,@function
-.align 32
-rsaz_avx2_eligible:
- movl OPENSSL_ia32cap_P+8(%rip),%eax
- movl $524544,%ecx
- movl $0,%edx
- andl %eax,%ecx
- cmpl $524544,%ecx
- cmovel %edx,%eax
- andl $32,%eax
- shrl $5,%eax
- .byte 0xf3,0xc3
-.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
-
-.align 64
-.Land_mask:
-.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
-.Lscatter_permd:
-.long 0,2,4,6,7,7,7,7
-.Lgather_permd:
-.long 0,7,1,7,2,7,3,7
-.Linc:
-.long 0,0,0,0, 1,1,1,1
-.long 2,2,2,2, 3,3,3,3
-.long 4,4,4,4, 4,4,4,4
-.align 64
+.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
diff --git a/secure/lib/libcrypto/amd64/rsaz-x86_64.S b/secure/lib/libcrypto/amd64/rsaz-x86_64.S
index e4e7b0469a53..ae64f7a73987 100644
--- a/secure/lib/libcrypto/amd64/rsaz-x86_64.S
+++ b/secure/lib/libcrypto/amd64/rsaz-x86_64.S
@@ -31,14 +31,10 @@ rsaz_512_sqr:
subq $128+24,%rsp
.cfi_adjust_cfa_offset 128+24
.Lsqr_body:
- movq %rdx,%rbp
+.byte 102,72,15,110,202
movq (%rsi),%rdx
movq 8(%rsi),%rax
movq %rcx,128(%rsp)
- movl $0x80100,%r11d
- andl OPENSSL_ia32cap_P+8(%rip),%r11d
- cmpl $0x80100,%r11d
- je .Loop_sqrx
jmp .Loop_sqr
.align 32
@@ -46,6 +42,7 @@ rsaz_512_sqr:
movl %r8d,128+8(%rsp)
movq %rdx,%rbx
+ movq %rax,%rbp
mulq %rdx
movq %rax,%r8
movq 16(%rsi),%rax
@@ -84,31 +81,29 @@ rsaz_512_sqr:
mulq %rbx
addq %rax,%r14
movq %rbx,%rax
- movq %rdx,%r15
- adcq $0,%r15
+ adcq $0,%rdx
+ xorq %rcx,%rcx
addq %r8,%r8
- movq %r9,%rcx
- adcq %r9,%r9
+ movq %rdx,%r15
+ adcq $0,%rcx
mulq %rax
- movq %rax,(%rsp)
- addq %rdx,%r8
- adcq $0,%r9
+ addq %r8,%rdx
+ adcq $0,%rcx
- movq %r8,8(%rsp)
- shrq $63,%rcx
+ movq %rax,(%rsp)
+ movq %rdx,8(%rsp)
- movq 8(%rsi),%r8
movq 16(%rsi),%rax
- mulq %r8
+ mulq %rbp
addq %rax,%r10
movq 24(%rsi),%rax
movq %rdx,%rbx
adcq $0,%rbx
- mulq %r8
+ mulq %rbp
addq %rax,%r11
movq 32(%rsi),%rax
adcq $0,%rdx
@@ -116,7 +111,7 @@ rsaz_512_sqr:
movq %rdx,%rbx
adcq $0,%rbx
- mulq %r8
+ mulq %rbp
addq %rax,%r12
movq 40(%rsi),%rax
adcq $0,%rdx
@@ -124,7 +119,7 @@ rsaz_512_sqr:
movq %rdx,%rbx
adcq $0,%rbx
- mulq %r8
+ mulq %rbp
addq %rax,%r13
movq 48(%rsi),%rax
adcq $0,%rdx
@@ -132,7 +127,7 @@ rsaz_512_sqr:
movq %rdx,%rbx
adcq $0,%rbx
- mulq %r8
+ mulq %rbp
addq %rax,%r14
movq 56(%rsi),%rax
adcq $0,%rdx
@@ -140,39 +135,39 @@ rsaz_512_sqr:
movq %rdx,%rbx
adcq $0,%rbx
- mulq %r8
+ mulq %rbp
addq %rax,%r15
- movq %r8,%rax
+ movq %rbp,%rax
adcq $0,%rdx
addq %rbx,%r15
- movq %rdx,%r8
- movq %r10,%rdx
- adcq $0,%r8
+ adcq $0,%rdx
- addq %rdx,%rdx
- leaq (%rcx,%r10,2),%r10
- movq %r11,%rbx
- adcq %r11,%r11
+ xorq %rbx,%rbx
+ addq %r9,%r9
+ movq %rdx,%r8
+ adcq %r10,%r10
+ adcq $0,%rbx
mulq %rax
+
+ addq %rcx,%rax
+ movq 16(%rsi),%rbp
addq %rax,%r9
+ movq 24(%rsi),%rax
adcq %rdx,%r10
- adcq $0,%r11
+ adcq $0,%rbx
movq %r9,16(%rsp)
movq %r10,24(%rsp)
- shrq $63,%rbx
- movq 16(%rsi),%r9
- movq 24(%rsi),%rax
- mulq %r9
+ mulq %rbp
addq %rax,%r12
movq 32(%rsi),%rax
movq %rdx,%rcx
adcq $0,%rcx
- mulq %r9
+ mulq %rbp
addq %rax,%r13
movq 40(%rsi),%rax
adcq $0,%rdx
@@ -180,7 +175,7 @@ rsaz_512_sqr:
movq %rdx,%rcx
adcq $0,%rcx
- mulq %r9
+ mulq %rbp
addq %rax,%r14
movq 48(%rsi),%rax
adcq $0,%rdx
@@ -188,9 +183,7 @@ rsaz_512_sqr:
movq %rdx,%rcx
adcq $0,%rcx
- mulq %r9
- movq %r12,%r10
- leaq (%rbx,%r12,2),%r12
+ mulq %rbp
addq %rax,%r15
movq 56(%rsi),%rax
adcq $0,%rdx
@@ -198,36 +191,40 @@ rsaz_512_sqr:
movq %rdx,%rcx
adcq $0,%rcx
- mulq %r9
- shrq $63,%r10
+ mulq %rbp
addq %rax,%r8
- movq %r9,%rax
+ movq %rbp,%rax
adcq $0,%rdx
addq %rcx,%r8
- movq %rdx,%r9
- adcq $0,%r9
+ adcq $0,%rdx
- movq %r13,%rcx
- leaq (%r10,%r13,2),%r13
+ xorq %rcx,%rcx
+ addq %r11,%r11
+ movq %rdx,%r9
+ adcq %r12,%r12
+ adcq $0,%rcx
mulq %rax
+
+ addq %rbx,%rax
+ movq 24(%rsi),%r10
addq %rax,%r11
+ movq 32(%rsi),%rax
adcq %rdx,%r12
- adcq $0,%r13
+ adcq $0,%rcx
movq %r11,32(%rsp)
movq %r12,40(%rsp)
- shrq $63,%rcx
- movq 24(%rsi),%r10
- movq 32(%rsi),%rax
+ movq %rax,%r11
mulq %r10
addq %rax,%r14
movq 40(%rsi),%rax
movq %rdx,%rbx
adcq $0,%rbx
+ movq %rax,%r12
mulq %r10
addq %rax,%r15
movq 48(%rsi),%rax
@@ -236,9 +233,8 @@ rsaz_512_sqr:
movq %rdx,%rbx
adcq $0,%rbx
+ movq %rax,%rbp
mulq %r10
- movq %r14,%r12
- leaq (%rcx,%r14,2),%r14
addq %rax,%r8
movq 56(%rsi),%rax
adcq $0,%rdx
@@ -247,32 +243,33 @@ rsaz_512_sqr:
adcq $0,%rbx
mulq %r10
- shrq $63,%r12
addq %rax,%r9
movq %r10,%rax
adcq $0,%rdx
addq %rbx,%r9
- movq %rdx,%r10
- adcq $0,%r10
+ adcq $0,%rdx
- movq %r15,%rbx
- leaq (%r12,%r15,2),%r15
+ xorq %rbx,%rbx
+ addq %r13,%r13
+ movq %rdx,%r10
+ adcq %r14,%r14
+ adcq $0,%rbx
mulq %rax
+
+ addq %rcx,%rax
addq %rax,%r13
+ movq %r12,%rax
adcq %rdx,%r14
- adcq $0,%r15
+ adcq $0,%rbx
movq %r13,48(%rsp)
movq %r14,56(%rsp)
- shrq $63,%rbx
- movq 32(%rsi),%r11
- movq 40(%rsi),%rax
mulq %r11
addq %rax,%r8
- movq 48(%rsi),%rax
+ movq %rbp,%rax
movq %rdx,%rcx
adcq $0,%rcx
@@ -280,97 +277,99 @@ rsaz_512_sqr:
addq %rax,%r9
movq 56(%rsi),%rax
adcq $0,%rdx
- movq %r8,%r12
- leaq (%rbx,%r8,2),%r8
addq %rcx,%r9
movq %rdx,%rcx
adcq $0,%rcx
+ movq %rax,%r14
mulq %r11
- shrq $63,%r12
addq %rax,%r10
movq %r11,%rax
adcq $0,%rdx
addq %rcx,%r10
- movq %rdx,%r11
- adcq $0,%r11
+ adcq $0,%rdx
- movq %r9,%rcx
- leaq (%r12,%r9,2),%r9
+ xorq %rcx,%rcx
+ addq %r15,%r15
+ movq %rdx,%r11
+ adcq %r8,%r8
+ adcq $0,%rcx
mulq %rax
+
+ addq %rbx,%rax
addq %rax,%r15
+ movq %rbp,%rax
adcq %rdx,%r8
- adcq $0,%r9
+ adcq $0,%rcx
movq %r15,64(%rsp)
movq %r8,72(%rsp)
- shrq $63,%rcx
- movq 40(%rsi),%r12
- movq 48(%rsi),%rax
mulq %r12
addq %rax,%r10
- movq 56(%rsi),%rax
+ movq %r14,%rax
movq %rdx,%rbx
adcq $0,%rbx
mulq %r12
addq %rax,%r11
movq %r12,%rax
- movq %r10,%r15
- leaq (%rcx,%r10,2),%r10
adcq $0,%rdx
- shrq $63,%r15
addq %rbx,%r11
- movq %rdx,%r12
- adcq $0,%r12
+ adcq $0,%rdx
- movq %r11,%rbx
- leaq (%r15,%r11,2),%r11
+ xorq %rbx,%rbx
+ addq %r9,%r9
+ movq %rdx,%r12
+ adcq %r10,%r10
+ adcq $0,%rbx
mulq %rax
+
+ addq %rcx,%rax
addq %rax,%r9
+ movq %r14,%rax
adcq %rdx,%r10
- adcq $0,%r11
+ adcq $0,%rbx
movq %r9,80(%rsp)
movq %r10,88(%rsp)
- movq 48(%rsi),%r13
- movq 56(%rsi),%rax
- mulq %r13
+ mulq %rbp
addq %rax,%r12
- movq %r13,%rax
- movq %rdx,%r13
- adcq $0,%r13
+ movq %rbp,%rax
+ adcq $0,%rdx
- xorq %r14,%r14
- shlq $1,%rbx
+ xorq %rcx,%rcx
+ addq %r11,%r11
+ movq %rdx,%r13
adcq %r12,%r12
- adcq %r13,%r13
- adcq %r14,%r14
+ adcq $0,%rcx
mulq %rax
+
+ addq %rbx,%rax
addq %rax,%r11
+ movq %r14,%rax
adcq %rdx,%r12
- adcq $0,%r13
+ adcq $0,%rcx
movq %r11,96(%rsp)
movq %r12,104(%rsp)
- movq 56(%rsi),%rax
- mulq %rax
- addq %rax,%r13
- adcq $0,%rdx
+ xorq %rbx,%rbx
+ addq %r13,%r13
+ adcq $0,%rbx
- addq %rdx,%r14
+ mulq %rax
- movq %r13,112(%rsp)
- movq %r14,120(%rsp)
+ addq %rcx,%rax
+ addq %r13,%rax
+ adcq %rbx,%rdx
movq (%rsp),%r8
movq 8(%rsp),%r9
@@ -380,276 +379,12 @@ rsaz_512_sqr:
movq 40(%rsp),%r13
movq 48(%rsp),%r14
movq 56(%rsp),%r15
-
- call __rsaz_512_reduce
-
- addq 64(%rsp),%r8
- adcq 72(%rsp),%r9
- adcq 80(%rsp),%r10
- adcq 88(%rsp),%r11
- adcq 96(%rsp),%r12
- adcq 104(%rsp),%r13
- adcq 112(%rsp),%r14
- adcq 120(%rsp),%r15
- sbbq %rcx,%rcx
-
- call __rsaz_512_subtract
-
- movq %r8,%rdx
- movq %r9,%rax
- movl 128+8(%rsp),%r8d
- movq %rdi,%rsi
-
- decl %r8d
- jnz .Loop_sqr
- jmp .Lsqr_tail
-
-.align 32
-.Loop_sqrx:
- movl %r8d,128+8(%rsp)
-.byte 102,72,15,110,199
-.byte 102,72,15,110,205
-
- mulxq %rax,%r8,%r9
-
- mulxq 16(%rsi),%rcx,%r10
- xorq %rbp,%rbp
-
- mulxq 24(%rsi),%rax,%r11
- adcxq %rcx,%r9
-
- mulxq 32(%rsi),%rcx,%r12
- adcxq %rax,%r10
-
- mulxq 40(%rsi),%rax,%r13
- adcxq %rcx,%r11
-
-.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
- adcxq %rax,%r12
- adcxq %rcx,%r13
-
-.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
- adcxq %rax,%r14
- adcxq %rbp,%r15
-
- movq %r9,%rcx
- shldq $1,%r8,%r9
- shlq $1,%r8
-
- xorl %ebp,%ebp
- mulxq %rdx,%rax,%rdx
- adcxq %rdx,%r8
- movq 8(%rsi),%rdx
- adcxq %rbp,%r9
-
- movq %rax,(%rsp)
- movq %r8,8(%rsp)
-
-
- mulxq 16(%rsi),%rax,%rbx
- adoxq %rax,%r10
- adcxq %rbx,%r11
-
-.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
- adoxq %rdi,%r11
- adcxq %r8,%r12
-
- mulxq 32(%rsi),%rax,%rbx
- adoxq %rax,%r12
- adcxq %rbx,%r13
-
- mulxq 40(%rsi),%rdi,%r8
- adoxq %rdi,%r13
- adcxq %r8,%r14
-
-.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
- adoxq %rax,%r14
- adcxq %rbx,%r15
-
-.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
- adoxq %rdi,%r15
- adcxq %rbp,%r8
- adoxq %rbp,%r8
-
- movq %r11,%rbx
- shldq $1,%r10,%r11
- shldq $1,%rcx,%r10
-
- xorl %ebp,%ebp
- mulxq %rdx,%rax,%rcx
- movq 16(%rsi),%rdx
- adcxq %rax,%r9
- adcxq %rcx,%r10
- adcxq %rbp,%r11
-
- movq %r9,16(%rsp)
-.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
-
-
-.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
- adoxq %rdi,%r12
- adcxq %r9,%r13
-
- mulxq 32(%rsi),%rax,%rcx
- adoxq %rax,%r13
- adcxq %rcx,%r14
-
- mulxq 40(%rsi),%rdi,%r9
- adoxq %rdi,%r14
- adcxq %r9,%r15
-
-.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
- adoxq %rax,%r15
- adcxq %rcx,%r8
-
-.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
- adoxq %rdi,%r8
- adcxq %rbp,%r9
- adoxq %rbp,%r9
-
- movq %r13,%rcx
- shldq $1,%r12,%r13
- shldq $1,%rbx,%r12
-
- xorl %ebp,%ebp
- mulxq %rdx,%rax,%rdx
- adcxq %rax,%r11
- adcxq %rdx,%r12
- movq 24(%rsi),%rdx
- adcxq %rbp,%r13
-
- movq %r11,32(%rsp)
-.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
-
-
-.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
- adoxq %rax,%r14
- adcxq %rbx,%r15
-
- mulxq 40(%rsi),%rdi,%r10
- adoxq %rdi,%r15
- adcxq %r10,%r8
-
- mulxq 48(%rsi),%rax,%rbx
- adoxq %rax,%r8
- adcxq %rbx,%r9
-
- mulxq 56(%rsi),%rdi,%r10
- adoxq %rdi,%r9
- adcxq %rbp,%r10
- adoxq %rbp,%r10
-
-.byte 0x66
- movq %r15,%rbx
- shldq $1,%r14,%r15
- shldq $1,%rcx,%r14
-
- xorl %ebp,%ebp
- mulxq %rdx,%rax,%rdx
- adcxq %rax,%r13
- adcxq %rdx,%r14
- movq 32(%rsi),%rdx
- adcxq %rbp,%r15
-
- movq %r13,48(%rsp)
- movq %r14,56(%rsp)
-
-
-.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
- adoxq %rdi,%r8
- adcxq %r11,%r9
-
- mulxq 48(%rsi),%rax,%rcx
- adoxq %rax,%r9
- adcxq %rcx,%r10
-
- mulxq 56(%rsi),%rdi,%r11
- adoxq %rdi,%r10
- adcxq %rbp,%r11
- adoxq %rbp,%r11
-
- movq %r9,%rcx
- shldq $1,%r8,%r9
- shldq $1,%rbx,%r8
-
- xorl %ebp,%ebp
- mulxq %rdx,%rax,%rdx
- adcxq %rax,%r15
- adcxq %rdx,%r8
- movq 40(%rsi),%rdx
- adcxq %rbp,%r9
-
- movq %r15,64(%rsp)
- movq %r8,72(%rsp)
-
-
-.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
- adoxq %rax,%r10
- adcxq %rbx,%r11
-
-.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
- adoxq %rdi,%r11
- adcxq %rbp,%r12
- adoxq %rbp,%r12
-
- movq %r11,%rbx
- shldq $1,%r10,%r11
- shldq $1,%rcx,%r10
-
- xorl %ebp,%ebp
- mulxq %rdx,%rax,%rdx
- adcxq %rax,%r9
- adcxq %rdx,%r10
- movq 48(%rsi),%rdx
- adcxq %rbp,%r11
-
- movq %r9,80(%rsp)
- movq %r10,88(%rsp)
-
-
-.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
- adoxq %rax,%r12
- adoxq %rbp,%r13
-
- xorq %r14,%r14
- shldq $1,%r13,%r14
- shldq $1,%r12,%r13
- shldq $1,%rbx,%r12
-
- xorl %ebp,%ebp
- mulxq %rdx,%rax,%rdx
- adcxq %rax,%r11
- adcxq %rdx,%r12
- movq 56(%rsi),%rdx
- adcxq %rbp,%r13
-
-.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
-.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
-
-
- mulxq %rdx,%rax,%rdx
- adoxq %rax,%r13
- adoxq %rbp,%rdx
-
-.byte 0x66
- addq %rdx,%r14
-
- movq %r13,112(%rsp)
- movq %r14,120(%rsp)
-.byte 102,72,15,126,199
.byte 102,72,15,126,205
- movq 128(%rsp),%rdx
- movq (%rsp),%r8
- movq 8(%rsp),%r9
- movq 16(%rsp),%r10
- movq 24(%rsp),%r11
- movq 32(%rsp),%r12
- movq 40(%rsp),%r13
- movq 48(%rsp),%r14
- movq 56(%rsp),%r15
+ movq %rax,112(%rsp)
+ movq %rdx,120(%rsp)
- call __rsaz_512_reducex
+ call __rsaz_512_reduce
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
@@ -669,9 +404,7 @@ rsaz_512_sqr:
movq %rdi,%rsi
decl %r8d
- jnz .Loop_sqrx
-
-.Lsqr_tail:
+ jnz .Loop_sqr
leaq 128+24+48(%rsp),%rax
.cfi_def_cfa %rax,8
@@ -723,10 +456,6 @@ rsaz_512_mul:
.byte 102,72,15,110,199
.byte 102,72,15,110,201
movq %r8,128(%rsp)
- movl $0x80100,%r11d
- andl OPENSSL_ia32cap_P+8(%rip),%r11d
- cmpl $0x80100,%r11d
- je .Lmulx
movq (%rdx),%rbx
movq %rdx,%rbp
call __rsaz_512_mul
@@ -744,29 +473,6 @@ rsaz_512_mul:
movq 56(%rsp),%r15
call __rsaz_512_reduce
- jmp .Lmul_tail
-
-.align 32
-.Lmulx:
- movq %rdx,%rbp
- movq (%rdx),%rdx
- call __rsaz_512_mulx
-
-.byte 102,72,15,126,199
-.byte 102,72,15,126,205
-
- movq 128(%rsp),%rdx
- movq (%rsp),%r8
- movq 8(%rsp),%r9
- movq 16(%rsp),%r10
- movq 24(%rsp),%r11
- movq 32(%rsp),%r12
- movq 40(%rsp),%r13
- movq 48(%rsp),%r14
- movq 56(%rsp),%r15
-
- call __rsaz_512_reducex
-.Lmul_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@@ -880,10 +586,6 @@ rsaz_512_mul_gather4:
por %xmm9,%xmm8
pshufd $0x4e,%xmm8,%xmm9
por %xmm9,%xmm8
- movl $0x80100,%r11d
- andl OPENSSL_ia32cap_P+8(%rip),%r11d
- cmpl $0x80100,%r11d
- je .Lmulx_gather
.byte 102,76,15,126,195
movq %r8,128(%rsp)
@@ -1064,142 +766,6 @@ rsaz_512_mul_gather4:
movq 56(%rsp),%r15
call __rsaz_512_reduce
- jmp .Lmul_gather_tail
-
-.align 32
-.Lmulx_gather:
-.byte 102,76,15,126,194
-
- movq %r8,128(%rsp)
- movq %rdi,128+8(%rsp)
- movq %rcx,128+16(%rsp)
-
- mulxq (%rsi),%rbx,%r8
- movq %rbx,(%rsp)
- xorl %edi,%edi
-
- mulxq 8(%rsi),%rax,%r9
-
- mulxq 16(%rsi),%rbx,%r10
- adcxq %rax,%r8
-
- mulxq 24(%rsi),%rax,%r11
- adcxq %rbx,%r9
-
- mulxq 32(%rsi),%rbx,%r12
- adcxq %rax,%r10
-
- mulxq 40(%rsi),%rax,%r13
- adcxq %rbx,%r11
-
- mulxq 48(%rsi),%rbx,%r14
- adcxq %rax,%r12
-
- mulxq 56(%rsi),%rax,%r15
- adcxq %rbx,%r13
- adcxq %rax,%r14
-.byte 0x67
- movq %r8,%rbx
- adcxq %rdi,%r15
-
- movq $-7,%rcx
- jmp .Loop_mulx_gather
-
-.align 32
-.Loop_mulx_gather:
- movdqa 0(%rbp),%xmm8
- movdqa 16(%rbp),%xmm9
- movdqa 32(%rbp),%xmm10
- movdqa 48(%rbp),%xmm11
- pand %xmm0,%xmm8
- movdqa 64(%rbp),%xmm12
- pand %xmm1,%xmm9
- movdqa 80(%rbp),%xmm13
- pand %xmm2,%xmm10
- movdqa 96(%rbp),%xmm14
- pand %xmm3,%xmm11
- movdqa 112(%rbp),%xmm15
- leaq 128(%rbp),%rbp
- pand %xmm4,%xmm12
- pand %xmm5,%xmm13
- pand %xmm6,%xmm14
- pand %xmm7,%xmm15
- por %xmm10,%xmm8
- por %xmm11,%xmm9
- por %xmm12,%xmm8
- por %xmm13,%xmm9
- por %xmm14,%xmm8
- por %xmm15,%xmm9
-
- por %xmm9,%xmm8
- pshufd $0x4e,%xmm8,%xmm9
- por %xmm9,%xmm8
-.byte 102,76,15,126,194
-
-.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
- adcxq %rax,%rbx
- adoxq %r9,%r8
-
- mulxq 8(%rsi),%rax,%r9
- adcxq %rax,%r8
- adoxq %r10,%r9
-
- mulxq 16(%rsi),%rax,%r10
- adcxq %rax,%r9
- adoxq %r11,%r10
-
-.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
- adcxq %rax,%r10
- adoxq %r12,%r11
-
- mulxq 32(%rsi),%rax,%r12
- adcxq %rax,%r11
- adoxq %r13,%r12
-
- mulxq 40(%rsi),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
-
-.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
- adcxq %rax,%r13
-.byte 0x67
- adoxq %r15,%r14
-
- mulxq 56(%rsi),%rax,%r15
- movq %rbx,64(%rsp,%rcx,8)
- adcxq %rax,%r14
- adoxq %rdi,%r15
- movq %r8,%rbx
- adcxq %rdi,%r15
-
- incq %rcx
- jnz .Loop_mulx_gather
-
- movq %r8,64(%rsp)
- movq %r9,64+8(%rsp)
- movq %r10,64+16(%rsp)
- movq %r11,64+24(%rsp)
- movq %r12,64+32(%rsp)
- movq %r13,64+40(%rsp)
- movq %r14,64+48(%rsp)
- movq %r15,64+56(%rsp)
-
- movq 128(%rsp),%rdx
- movq 128+8(%rsp),%rdi
- movq 128+16(%rsp),%rbp
-
- movq (%rsp),%r8
- movq 8(%rsp),%r9
- movq 16(%rsp),%r10
- movq 24(%rsp),%r11
- movq 32(%rsp),%r12
- movq 40(%rsp),%r13
- movq 48(%rsp),%r14
- movq 56(%rsp),%r15
-
- call __rsaz_512_reducex
-
-.Lmul_gather_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@@ -1267,10 +833,6 @@ rsaz_512_mul_scatter4:
movq %rcx,128(%rsp)
movq %rdi,%rbp
- movl $0x80100,%r11d
- andl OPENSSL_ia32cap_P+8(%rip),%r11d
- cmpl $0x80100,%r11d
- je .Lmulx_scatter
movq (%rdi),%rbx
call __rsaz_512_mul
@@ -1287,29 +849,6 @@ rsaz_512_mul_scatter4:
movq 56(%rsp),%r15
call __rsaz_512_reduce
- jmp .Lmul_scatter_tail
-
-.align 32
-.Lmulx_scatter:
- movq (%rdi),%rdx
- call __rsaz_512_mulx
-
-.byte 102,72,15,126,199
-.byte 102,72,15,126,205
-
- movq 128(%rsp),%rdx
- movq (%rsp),%r8
- movq 8(%rsp),%r9
- movq 16(%rsp),%r10
- movq 24(%rsp),%r11
- movq 32(%rsp),%r12
- movq 40(%rsp),%r13
- movq 48(%rsp),%r14
- movq 56(%rsp),%r15
-
- call __rsaz_512_reducex
-
-.Lmul_scatter_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@@ -1379,7 +918,6 @@ rsaz_512_mul_by_one:
subq $128+24,%rsp
.cfi_adjust_cfa_offset 128+24
.Lmul_by_one_body:
- movl OPENSSL_ia32cap_P+8(%rip),%eax
movq %rdx,%rbp
movq %rcx,128(%rsp)
@@ -1400,16 +938,7 @@ rsaz_512_mul_by_one:
movdqa %xmm0,64(%rsp)
movdqa %xmm0,80(%rsp)
movdqa %xmm0,96(%rsp)
- andl $0x80100,%eax
- cmpl $0x80100,%eax
- je .Lby_one_callx
call __rsaz_512_reduce
- jmp .Lby_one_tail
-.align 32
-.Lby_one_callx:
- movq 128(%rsp),%rdx
- call __rsaz_512_reducex
-.Lby_one_tail:
movq %r8,(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
@@ -1442,6 +971,7 @@ rsaz_512_mul_by_one:
.type __rsaz_512_reduce,@function
.align 32
__rsaz_512_reduce:
+.cfi_startproc
movq %r8,%rbx
imulq 128+8(%rsp),%rbx
movq 0(%rbp),%rax
@@ -1521,66 +1051,12 @@ __rsaz_512_reduce:
jne .Lreduction_loop
.byte 0xf3,0xc3
+.cfi_endproc
.size __rsaz_512_reduce,.-__rsaz_512_reduce
-.type __rsaz_512_reducex,@function
-.align 32
-__rsaz_512_reducex:
-
- imulq %r8,%rdx
- xorq %rsi,%rsi
- movl $8,%ecx
- jmp .Lreduction_loopx
-
-.align 32
-.Lreduction_loopx:
- movq %r8,%rbx
- mulxq 0(%rbp),%rax,%r8
- adcxq %rbx,%rax
- adoxq %r9,%r8
-
- mulxq 8(%rbp),%rax,%r9
- adcxq %rax,%r8
- adoxq %r10,%r9
-
- mulxq 16(%rbp),%rbx,%r10
- adcxq %rbx,%r9
- adoxq %r11,%r10
-
- mulxq 24(%rbp),%rbx,%r11
- adcxq %rbx,%r10
- adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
- movq %rdx,%rax
- movq %r8,%rdx
- adcxq %rbx,%r11
- adoxq %r13,%r12
-
- mulxq 128+8(%rsp),%rbx,%rdx
- movq %rax,%rdx
-
- mulxq 40(%rbp),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
-
-.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
- adcxq %rax,%r13
- adoxq %r15,%r14
-
- mulxq 56(%rbp),%rax,%r15
- movq %rbx,%rdx
- adcxq %rax,%r14
- adoxq %rsi,%r15
- adcxq %rsi,%r15
-
- decl %ecx
- jne .Lreduction_loopx
-
- .byte 0xf3,0xc3
-.size __rsaz_512_reducex,.-__rsaz_512_reducex
.type __rsaz_512_subtract,@function
.align 32
__rsaz_512_subtract:
+.cfi_startproc
movq %r8,(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
@@ -1634,10 +1110,12 @@ __rsaz_512_subtract:
movq %r15,56(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __rsaz_512_subtract,.-__rsaz_512_subtract
.type __rsaz_512_mul,@function
.align 32
__rsaz_512_mul:
+.cfi_startproc
leaq 8(%rsp),%rdi
movq (%rsi),%rax
@@ -1776,131 +1254,13 @@ __rsaz_512_mul:
movq %r15,56(%rdi)
.byte 0xf3,0xc3
+.cfi_endproc
.size __rsaz_512_mul,.-__rsaz_512_mul
-.type __rsaz_512_mulx,@function
-.align 32
-__rsaz_512_mulx:
- mulxq (%rsi),%rbx,%r8
- movq $-6,%rcx
-
- mulxq 8(%rsi),%rax,%r9
- movq %rbx,8(%rsp)
-
- mulxq 16(%rsi),%rbx,%r10
- adcq %rax,%r8
-
- mulxq 24(%rsi),%rax,%r11
- adcq %rbx,%r9
-
- mulxq 32(%rsi),%rbx,%r12
- adcq %rax,%r10
-
- mulxq 40(%rsi),%rax,%r13
- adcq %rbx,%r11
-
- mulxq 48(%rsi),%rbx,%r14
- adcq %rax,%r12
-
- mulxq 56(%rsi),%rax,%r15
- movq 8(%rbp),%rdx
- adcq %rbx,%r13
- adcq %rax,%r14
- adcq $0,%r15
-
- xorq %rdi,%rdi
- jmp .Loop_mulx
-
-.align 32
-.Loop_mulx:
- movq %r8,%rbx
- mulxq (%rsi),%rax,%r8
- adcxq %rax,%rbx
- adoxq %r9,%r8
-
- mulxq 8(%rsi),%rax,%r9
- adcxq %rax,%r8
- adoxq %r10,%r9
-
- mulxq 16(%rsi),%rax,%r10
- adcxq %rax,%r9
- adoxq %r11,%r10
-
- mulxq 24(%rsi),%rax,%r11
- adcxq %rax,%r10
- adoxq %r12,%r11
-
-.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
- adcxq %rax,%r11
- adoxq %r13,%r12
-
- mulxq 40(%rsi),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
-
- mulxq 48(%rsi),%rax,%r14
- adcxq %rax,%r13
- adoxq %r15,%r14
-
- mulxq 56(%rsi),%rax,%r15
- movq 64(%rbp,%rcx,8),%rdx
- movq %rbx,8+64-8(%rsp,%rcx,8)
- adcxq %rax,%r14
- adoxq %rdi,%r15
- adcxq %rdi,%r15
-
- incq %rcx
- jnz .Loop_mulx
-
- movq %r8,%rbx
- mulxq (%rsi),%rax,%r8
- adcxq %rax,%rbx
- adoxq %r9,%r8
-
-.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
- adcxq %rax,%r8
- adoxq %r10,%r9
-
-.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
- adcxq %rax,%r9
- adoxq %r11,%r10
-
- mulxq 24(%rsi),%rax,%r11
- adcxq %rax,%r10
- adoxq %r12,%r11
-
- mulxq 32(%rsi),%rax,%r12
- adcxq %rax,%r11
- adoxq %r13,%r12
-
- mulxq 40(%rsi),%rax,%r13
- adcxq %rax,%r12
- adoxq %r14,%r13
-
-.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
- adcxq %rax,%r13
- adoxq %r15,%r14
-
-.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
- adcxq %rax,%r14
- adoxq %rdi,%r15
- adcxq %rdi,%r15
-
- movq %rbx,8+64-8(%rsp)
- movq %r8,8+64(%rsp)
- movq %r9,8+64+8(%rsp)
- movq %r10,8+64+16(%rsp)
- movq %r11,8+64+24(%rsp)
- movq %r12,8+64+32(%rsp)
- movq %r13,8+64+40(%rsp)
- movq %r14,8+64+48(%rsp)
- movq %r15,8+64+56(%rsp)
-
- .byte 0xf3,0xc3
-.size __rsaz_512_mulx,.-__rsaz_512_mulx
.globl rsaz_512_scatter4
.type rsaz_512_scatter4,@function
.align 16
rsaz_512_scatter4:
+.cfi_startproc
leaq (%rdi,%rdx,8),%rdi
movl $8,%r9d
jmp .Loop_scatter
@@ -1913,12 +1273,14 @@ rsaz_512_scatter4:
decl %r9d
jnz .Loop_scatter
.byte 0xf3,0xc3
+.cfi_endproc
.size rsaz_512_scatter4,.-rsaz_512_scatter4
.globl rsaz_512_gather4
.type rsaz_512_gather4,@function
.align 16
rsaz_512_gather4:
+.cfi_startproc
movd %edx,%xmm8
movdqa .Linc+16(%rip),%xmm1
movdqa .Linc(%rip),%xmm0
@@ -1982,6 +1344,7 @@ rsaz_512_gather4:
jnz .Loop_gather
.byte 0xf3,0xc3
.LSEH_end_rsaz_512_gather4:
+.cfi_endproc
.size rsaz_512_gather4,.-rsaz_512_gather4
.align 64
diff --git a/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S b/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S
index 0090e020c573..488e554c247e 100644
--- a/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S
+++ b/secure/lib/libcrypto/amd64/sha1-mb-x86_64.S
@@ -12,8 +12,6 @@ sha1_multi_block:
movq OPENSSL_ia32cap_P+4(%rip),%rcx
btq $61,%rcx
jc _shaext_shortcut
- testl $268435456,%ecx
- jnz _avx_shortcut
movq %rsp,%rax
.cfi_def_cfa_register %rax
pushq %rbx
@@ -2939,4319 +2937,6 @@ _shaext_shortcut:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_multi_block_shaext,.-sha1_multi_block_shaext
-.type sha1_multi_block_avx,@function
-.align 32
-sha1_multi_block_avx:
-.cfi_startproc
-_avx_shortcut:
- shrq $32,%rcx
- cmpl $2,%edx
- jb .Lavx
- testl $32,%ecx
- jnz _avx2_shortcut
- jmp .Lavx
-.align 32
-.Lavx:
- movq %rsp,%rax
-.cfi_def_cfa_register %rax
- pushq %rbx
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_offset %rbp,-24
- subq $288,%rsp
- andq $-256,%rsp
- movq %rax,272(%rsp)
-.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08
-.Lbody_avx:
- leaq K_XX_XX(%rip),%rbp
- leaq 256(%rsp),%rbx
-
- vzeroupper
-.Loop_grande_avx:
- movl %edx,280(%rsp)
- xorl %edx,%edx
- movq 0(%rsi),%r8
- movl 8(%rsi),%ecx
- cmpl %edx,%ecx
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- movl %ecx,0(%rbx)
- cmovleq %rbp,%r8
- movq 16(%rsi),%r9
- movl 24(%rsi),%ecx
- cmpl %edx,%ecx
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- movl %ecx,4(%rbx)
- cmovleq %rbp,%r9
- movq 32(%rsi),%r10
- movl 40(%rsi),%ecx
- cmpl %edx,%ecx
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- movl %ecx,8(%rbx)
- cmovleq %rbp,%r10
- movq 48(%rsi),%r11
- movl 56(%rsi),%ecx
- cmpl %edx,%ecx
- cmovgl %ecx,%edx
- testl %ecx,%ecx
- movl %ecx,12(%rbx)
- cmovleq %rbp,%r11
- testl %edx,%edx
- jz .Ldone_avx
-
- vmovdqu 0(%rdi),%xmm10
- leaq 128(%rsp),%rax
- vmovdqu 32(%rdi),%xmm11
- vmovdqu 64(%rdi),%xmm12
- vmovdqu 96(%rdi),%xmm13
- vmovdqu 128(%rdi),%xmm14
- vmovdqu 96(%rbp),%xmm5
- jmp .Loop_avx
-
-.align 32
-.Loop_avx:
- vmovdqa -32(%rbp),%xmm15
- vmovd (%r8),%xmm0
- leaq 64(%r8),%r8
- vmovd (%r9),%xmm2
- leaq 64(%r9),%r9
- vpinsrd $1,(%r10),%xmm0,%xmm0
- leaq 64(%r10),%r10
- vpinsrd $1,(%r11),%xmm2,%xmm2
- leaq 64(%r11),%r11
- vmovd -60(%r8),%xmm1
- vpunpckldq %xmm2,%xmm0,%xmm0
- vmovd -60(%r9),%xmm9
- vpshufb %xmm5,%xmm0,%xmm0
- vpinsrd $1,-60(%r10),%xmm1,%xmm1
- vpinsrd $1,-60(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm14,%xmm14
- vpslld $5,%xmm10,%xmm8
- vpandn %xmm13,%xmm11,%xmm7
- vpand %xmm12,%xmm11,%xmm6
-
- vmovdqa %xmm0,0-128(%rax)
- vpaddd %xmm0,%xmm14,%xmm14
- vpunpckldq %xmm9,%xmm1,%xmm1
- vpsrld $27,%xmm10,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -56(%r8),%xmm2
-
- vpslld $30,%xmm11,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -56(%r9),%xmm9
- vpaddd %xmm6,%xmm14,%xmm14
-
- vpsrld $2,%xmm11,%xmm11
- vpaddd %xmm8,%xmm14,%xmm14
- vpshufb %xmm5,%xmm1,%xmm1
- vpor %xmm7,%xmm11,%xmm11
- vpinsrd $1,-56(%r10),%xmm2,%xmm2
- vpinsrd $1,-56(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm13,%xmm13
- vpslld $5,%xmm14,%xmm8
- vpandn %xmm12,%xmm10,%xmm7
- vpand %xmm11,%xmm10,%xmm6
-
- vmovdqa %xmm1,16-128(%rax)
- vpaddd %xmm1,%xmm13,%xmm13
- vpunpckldq %xmm9,%xmm2,%xmm2
- vpsrld $27,%xmm14,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -52(%r8),%xmm3
-
- vpslld $30,%xmm10,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -52(%r9),%xmm9
- vpaddd %xmm6,%xmm13,%xmm13
-
- vpsrld $2,%xmm10,%xmm10
- vpaddd %xmm8,%xmm13,%xmm13
- vpshufb %xmm5,%xmm2,%xmm2
- vpor %xmm7,%xmm10,%xmm10
- vpinsrd $1,-52(%r10),%xmm3,%xmm3
- vpinsrd $1,-52(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm12,%xmm12
- vpslld $5,%xmm13,%xmm8
- vpandn %xmm11,%xmm14,%xmm7
- vpand %xmm10,%xmm14,%xmm6
-
- vmovdqa %xmm2,32-128(%rax)
- vpaddd %xmm2,%xmm12,%xmm12
- vpunpckldq %xmm9,%xmm3,%xmm3
- vpsrld $27,%xmm13,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -48(%r8),%xmm4
-
- vpslld $30,%xmm14,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -48(%r9),%xmm9
- vpaddd %xmm6,%xmm12,%xmm12
-
- vpsrld $2,%xmm14,%xmm14
- vpaddd %xmm8,%xmm12,%xmm12
- vpshufb %xmm5,%xmm3,%xmm3
- vpor %xmm7,%xmm14,%xmm14
- vpinsrd $1,-48(%r10),%xmm4,%xmm4
- vpinsrd $1,-48(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm11,%xmm11
- vpslld $5,%xmm12,%xmm8
- vpandn %xmm10,%xmm13,%xmm7
- vpand %xmm14,%xmm13,%xmm6
-
- vmovdqa %xmm3,48-128(%rax)
- vpaddd %xmm3,%xmm11,%xmm11
- vpunpckldq %xmm9,%xmm4,%xmm4
- vpsrld $27,%xmm12,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -44(%r8),%xmm0
-
- vpslld $30,%xmm13,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -44(%r9),%xmm9
- vpaddd %xmm6,%xmm11,%xmm11
-
- vpsrld $2,%xmm13,%xmm13
- vpaddd %xmm8,%xmm11,%xmm11
- vpshufb %xmm5,%xmm4,%xmm4
- vpor %xmm7,%xmm13,%xmm13
- vpinsrd $1,-44(%r10),%xmm0,%xmm0
- vpinsrd $1,-44(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm10,%xmm10
- vpslld $5,%xmm11,%xmm8
- vpandn %xmm14,%xmm12,%xmm7
- vpand %xmm13,%xmm12,%xmm6
-
- vmovdqa %xmm4,64-128(%rax)
- vpaddd %xmm4,%xmm10,%xmm10
- vpunpckldq %xmm9,%xmm0,%xmm0
- vpsrld $27,%xmm11,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -40(%r8),%xmm1
-
- vpslld $30,%xmm12,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -40(%r9),%xmm9
- vpaddd %xmm6,%xmm10,%xmm10
-
- vpsrld $2,%xmm12,%xmm12
- vpaddd %xmm8,%xmm10,%xmm10
- vpshufb %xmm5,%xmm0,%xmm0
- vpor %xmm7,%xmm12,%xmm12
- vpinsrd $1,-40(%r10),%xmm1,%xmm1
- vpinsrd $1,-40(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm14,%xmm14
- vpslld $5,%xmm10,%xmm8
- vpandn %xmm13,%xmm11,%xmm7
- vpand %xmm12,%xmm11,%xmm6
-
- vmovdqa %xmm0,80-128(%rax)
- vpaddd %xmm0,%xmm14,%xmm14
- vpunpckldq %xmm9,%xmm1,%xmm1
- vpsrld $27,%xmm10,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -36(%r8),%xmm2
-
- vpslld $30,%xmm11,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -36(%r9),%xmm9
- vpaddd %xmm6,%xmm14,%xmm14
-
- vpsrld $2,%xmm11,%xmm11
- vpaddd %xmm8,%xmm14,%xmm14
- vpshufb %xmm5,%xmm1,%xmm1
- vpor %xmm7,%xmm11,%xmm11
- vpinsrd $1,-36(%r10),%xmm2,%xmm2
- vpinsrd $1,-36(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm13,%xmm13
- vpslld $5,%xmm14,%xmm8
- vpandn %xmm12,%xmm10,%xmm7
- vpand %xmm11,%xmm10,%xmm6
-
- vmovdqa %xmm1,96-128(%rax)
- vpaddd %xmm1,%xmm13,%xmm13
- vpunpckldq %xmm9,%xmm2,%xmm2
- vpsrld $27,%xmm14,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -32(%r8),%xmm3
-
- vpslld $30,%xmm10,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -32(%r9),%xmm9
- vpaddd %xmm6,%xmm13,%xmm13
-
- vpsrld $2,%xmm10,%xmm10
- vpaddd %xmm8,%xmm13,%xmm13
- vpshufb %xmm5,%xmm2,%xmm2
- vpor %xmm7,%xmm10,%xmm10
- vpinsrd $1,-32(%r10),%xmm3,%xmm3
- vpinsrd $1,-32(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm12,%xmm12
- vpslld $5,%xmm13,%xmm8
- vpandn %xmm11,%xmm14,%xmm7
- vpand %xmm10,%xmm14,%xmm6
-
- vmovdqa %xmm2,112-128(%rax)
- vpaddd %xmm2,%xmm12,%xmm12
- vpunpckldq %xmm9,%xmm3,%xmm3
- vpsrld $27,%xmm13,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -28(%r8),%xmm4
-
- vpslld $30,%xmm14,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -28(%r9),%xmm9
- vpaddd %xmm6,%xmm12,%xmm12
-
- vpsrld $2,%xmm14,%xmm14
- vpaddd %xmm8,%xmm12,%xmm12
- vpshufb %xmm5,%xmm3,%xmm3
- vpor %xmm7,%xmm14,%xmm14
- vpinsrd $1,-28(%r10),%xmm4,%xmm4
- vpinsrd $1,-28(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm11,%xmm11
- vpslld $5,%xmm12,%xmm8
- vpandn %xmm10,%xmm13,%xmm7
- vpand %xmm14,%xmm13,%xmm6
-
- vmovdqa %xmm3,128-128(%rax)
- vpaddd %xmm3,%xmm11,%xmm11
- vpunpckldq %xmm9,%xmm4,%xmm4
- vpsrld $27,%xmm12,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -24(%r8),%xmm0
-
- vpslld $30,%xmm13,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -24(%r9),%xmm9
- vpaddd %xmm6,%xmm11,%xmm11
-
- vpsrld $2,%xmm13,%xmm13
- vpaddd %xmm8,%xmm11,%xmm11
- vpshufb %xmm5,%xmm4,%xmm4
- vpor %xmm7,%xmm13,%xmm13
- vpinsrd $1,-24(%r10),%xmm0,%xmm0
- vpinsrd $1,-24(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm10,%xmm10
- vpslld $5,%xmm11,%xmm8
- vpandn %xmm14,%xmm12,%xmm7
- vpand %xmm13,%xmm12,%xmm6
-
- vmovdqa %xmm4,144-128(%rax)
- vpaddd %xmm4,%xmm10,%xmm10
- vpunpckldq %xmm9,%xmm0,%xmm0
- vpsrld $27,%xmm11,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -20(%r8),%xmm1
-
- vpslld $30,%xmm12,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -20(%r9),%xmm9
- vpaddd %xmm6,%xmm10,%xmm10
-
- vpsrld $2,%xmm12,%xmm12
- vpaddd %xmm8,%xmm10,%xmm10
- vpshufb %xmm5,%xmm0,%xmm0
- vpor %xmm7,%xmm12,%xmm12
- vpinsrd $1,-20(%r10),%xmm1,%xmm1
- vpinsrd $1,-20(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm14,%xmm14
- vpslld $5,%xmm10,%xmm8
- vpandn %xmm13,%xmm11,%xmm7
- vpand %xmm12,%xmm11,%xmm6
-
- vmovdqa %xmm0,160-128(%rax)
- vpaddd %xmm0,%xmm14,%xmm14
- vpunpckldq %xmm9,%xmm1,%xmm1
- vpsrld $27,%xmm10,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -16(%r8),%xmm2
-
- vpslld $30,%xmm11,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -16(%r9),%xmm9
- vpaddd %xmm6,%xmm14,%xmm14
-
- vpsrld $2,%xmm11,%xmm11
- vpaddd %xmm8,%xmm14,%xmm14
- vpshufb %xmm5,%xmm1,%xmm1
- vpor %xmm7,%xmm11,%xmm11
- vpinsrd $1,-16(%r10),%xmm2,%xmm2
- vpinsrd $1,-16(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm13,%xmm13
- vpslld $5,%xmm14,%xmm8
- vpandn %xmm12,%xmm10,%xmm7
- vpand %xmm11,%xmm10,%xmm6
-
- vmovdqa %xmm1,176-128(%rax)
- vpaddd %xmm1,%xmm13,%xmm13
- vpunpckldq %xmm9,%xmm2,%xmm2
- vpsrld $27,%xmm14,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -12(%r8),%xmm3
-
- vpslld $30,%xmm10,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -12(%r9),%xmm9
- vpaddd %xmm6,%xmm13,%xmm13
-
- vpsrld $2,%xmm10,%xmm10
- vpaddd %xmm8,%xmm13,%xmm13
- vpshufb %xmm5,%xmm2,%xmm2
- vpor %xmm7,%xmm10,%xmm10
- vpinsrd $1,-12(%r10),%xmm3,%xmm3
- vpinsrd $1,-12(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm12,%xmm12
- vpslld $5,%xmm13,%xmm8
- vpandn %xmm11,%xmm14,%xmm7
- vpand %xmm10,%xmm14,%xmm6
-
- vmovdqa %xmm2,192-128(%rax)
- vpaddd %xmm2,%xmm12,%xmm12
- vpunpckldq %xmm9,%xmm3,%xmm3
- vpsrld $27,%xmm13,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -8(%r8),%xmm4
-
- vpslld $30,%xmm14,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -8(%r9),%xmm9
- vpaddd %xmm6,%xmm12,%xmm12
-
- vpsrld $2,%xmm14,%xmm14
- vpaddd %xmm8,%xmm12,%xmm12
- vpshufb %xmm5,%xmm3,%xmm3
- vpor %xmm7,%xmm14,%xmm14
- vpinsrd $1,-8(%r10),%xmm4,%xmm4
- vpinsrd $1,-8(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm11,%xmm11
- vpslld $5,%xmm12,%xmm8
- vpandn %xmm10,%xmm13,%xmm7
- vpand %xmm14,%xmm13,%xmm6
-
- vmovdqa %xmm3,208-128(%rax)
- vpaddd %xmm3,%xmm11,%xmm11
- vpunpckldq %xmm9,%xmm4,%xmm4
- vpsrld $27,%xmm12,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vmovd -4(%r8),%xmm0
-
- vpslld $30,%xmm13,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vmovd -4(%r9),%xmm9
- vpaddd %xmm6,%xmm11,%xmm11
-
- vpsrld $2,%xmm13,%xmm13
- vpaddd %xmm8,%xmm11,%xmm11
- vpshufb %xmm5,%xmm4,%xmm4
- vpor %xmm7,%xmm13,%xmm13
- vmovdqa 0-128(%rax),%xmm1
- vpinsrd $1,-4(%r10),%xmm0,%xmm0
- vpinsrd $1,-4(%r11),%xmm9,%xmm9
- vpaddd %xmm15,%xmm10,%xmm10
- prefetcht0 63(%r8)
- vpslld $5,%xmm11,%xmm8
- vpandn %xmm14,%xmm12,%xmm7
- vpand %xmm13,%xmm12,%xmm6
-
- vmovdqa %xmm4,224-128(%rax)
- vpaddd %xmm4,%xmm10,%xmm10
- vpunpckldq %xmm9,%xmm0,%xmm0
- vpsrld $27,%xmm11,%xmm9
- prefetcht0 63(%r9)
- vpxor %xmm7,%xmm6,%xmm6
-
- vpslld $30,%xmm12,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- prefetcht0 63(%r10)
- vpaddd %xmm6,%xmm10,%xmm10
-
- vpsrld $2,%xmm12,%xmm12
- vpaddd %xmm8,%xmm10,%xmm10
- prefetcht0 63(%r11)
- vpshufb %xmm5,%xmm0,%xmm0
- vpor %xmm7,%xmm12,%xmm12
- vmovdqa 16-128(%rax),%xmm2
- vpxor %xmm3,%xmm1,%xmm1
- vmovdqa 32-128(%rax),%xmm3
-
- vpaddd %xmm15,%xmm14,%xmm14
- vpslld $5,%xmm10,%xmm8
- vpandn %xmm13,%xmm11,%xmm7
-
- vpand %xmm12,%xmm11,%xmm6
-
- vmovdqa %xmm0,240-128(%rax)
- vpaddd %xmm0,%xmm14,%xmm14
- vpxor 128-128(%rax),%xmm1,%xmm1
- vpsrld $27,%xmm10,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vpxor %xmm3,%xmm1,%xmm1
-
-
- vpslld $30,%xmm11,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm14,%xmm14
-
- vpsrld $31,%xmm1,%xmm5
- vpaddd %xmm1,%xmm1,%xmm1
-
- vpsrld $2,%xmm11,%xmm11
-
- vpaddd %xmm8,%xmm14,%xmm14
- vpor %xmm5,%xmm1,%xmm1
- vpor %xmm7,%xmm11,%xmm11
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa 48-128(%rax),%xmm4
-
- vpaddd %xmm15,%xmm13,%xmm13
- vpslld $5,%xmm14,%xmm8
- vpandn %xmm12,%xmm10,%xmm7
-
- vpand %xmm11,%xmm10,%xmm6
-
- vmovdqa %xmm1,0-128(%rax)
- vpaddd %xmm1,%xmm13,%xmm13
- vpxor 144-128(%rax),%xmm2,%xmm2
- vpsrld $27,%xmm14,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vpxor %xmm4,%xmm2,%xmm2
-
-
- vpslld $30,%xmm10,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm13,%xmm13
-
- vpsrld $31,%xmm2,%xmm5
- vpaddd %xmm2,%xmm2,%xmm2
-
- vpsrld $2,%xmm10,%xmm10
-
- vpaddd %xmm8,%xmm13,%xmm13
- vpor %xmm5,%xmm2,%xmm2
- vpor %xmm7,%xmm10,%xmm10
- vpxor %xmm0,%xmm3,%xmm3
- vmovdqa 64-128(%rax),%xmm0
-
- vpaddd %xmm15,%xmm12,%xmm12
- vpslld $5,%xmm13,%xmm8
- vpandn %xmm11,%xmm14,%xmm7
-
- vpand %xmm10,%xmm14,%xmm6
-
- vmovdqa %xmm2,16-128(%rax)
- vpaddd %xmm2,%xmm12,%xmm12
- vpxor 160-128(%rax),%xmm3,%xmm3
- vpsrld $27,%xmm13,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vpxor %xmm0,%xmm3,%xmm3
-
-
- vpslld $30,%xmm14,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm12,%xmm12
-
- vpsrld $31,%xmm3,%xmm5
- vpaddd %xmm3,%xmm3,%xmm3
-
- vpsrld $2,%xmm14,%xmm14
-
- vpaddd %xmm8,%xmm12,%xmm12
- vpor %xmm5,%xmm3,%xmm3
- vpor %xmm7,%xmm14,%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vmovdqa 80-128(%rax),%xmm1
-
- vpaddd %xmm15,%xmm11,%xmm11
- vpslld $5,%xmm12,%xmm8
- vpandn %xmm10,%xmm13,%xmm7
-
- vpand %xmm14,%xmm13,%xmm6
-
- vmovdqa %xmm3,32-128(%rax)
- vpaddd %xmm3,%xmm11,%xmm11
- vpxor 176-128(%rax),%xmm4,%xmm4
- vpsrld $27,%xmm12,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vpxor %xmm1,%xmm4,%xmm4
-
-
- vpslld $30,%xmm13,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm11,%xmm11
-
- vpsrld $31,%xmm4,%xmm5
- vpaddd %xmm4,%xmm4,%xmm4
-
- vpsrld $2,%xmm13,%xmm13
-
- vpaddd %xmm8,%xmm11,%xmm11
- vpor %xmm5,%xmm4,%xmm4
- vpor %xmm7,%xmm13,%xmm13
- vpxor %xmm2,%xmm0,%xmm0
- vmovdqa 96-128(%rax),%xmm2
-
- vpaddd %xmm15,%xmm10,%xmm10
- vpslld $5,%xmm11,%xmm8
- vpandn %xmm14,%xmm12,%xmm7
-
- vpand %xmm13,%xmm12,%xmm6
-
- vmovdqa %xmm4,48-128(%rax)
- vpaddd %xmm4,%xmm10,%xmm10
- vpxor 192-128(%rax),%xmm0,%xmm0
- vpsrld $27,%xmm11,%xmm9
- vpxor %xmm7,%xmm6,%xmm6
- vpxor %xmm2,%xmm0,%xmm0
-
-
- vpslld $30,%xmm12,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm10,%xmm10
-
- vpsrld $31,%xmm0,%xmm5
- vpaddd %xmm0,%xmm0,%xmm0
-
- vpsrld $2,%xmm12,%xmm12
-
- vpaddd %xmm8,%xmm10,%xmm10
- vpor %xmm5,%xmm0,%xmm0
- vpor %xmm7,%xmm12,%xmm12
- vmovdqa 0(%rbp),%xmm15
- vpxor %xmm3,%xmm1,%xmm1
- vmovdqa 112-128(%rax),%xmm3
-
- vpslld $5,%xmm10,%xmm8
- vpaddd %xmm15,%xmm14,%xmm14
- vpxor %xmm11,%xmm13,%xmm6
- vmovdqa %xmm0,64-128(%rax)
- vpaddd %xmm0,%xmm14,%xmm14
- vpxor 208-128(%rax),%xmm1,%xmm1
- vpsrld $27,%xmm10,%xmm9
- vpxor %xmm12,%xmm6,%xmm6
- vpxor %xmm3,%xmm1,%xmm1
-
- vpslld $30,%xmm11,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm14,%xmm14
- vpsrld $31,%xmm1,%xmm5
- vpaddd %xmm1,%xmm1,%xmm1
-
- vpsrld $2,%xmm11,%xmm11
- vpaddd %xmm8,%xmm14,%xmm14
- vpor %xmm5,%xmm1,%xmm1
- vpor %xmm7,%xmm11,%xmm11
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa 128-128(%rax),%xmm4
-
- vpslld $5,%xmm14,%xmm8
- vpaddd %xmm15,%xmm13,%xmm13
- vpxor %xmm10,%xmm12,%xmm6
- vmovdqa %xmm1,80-128(%rax)
- vpaddd %xmm1,%xmm13,%xmm13
- vpxor 224-128(%rax),%xmm2,%xmm2
- vpsrld $27,%xmm14,%xmm9
- vpxor %xmm11,%xmm6,%xmm6
- vpxor %xmm4,%xmm2,%xmm2
-
- vpslld $30,%xmm10,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm13,%xmm13
- vpsrld $31,%xmm2,%xmm5
- vpaddd %xmm2,%xmm2,%xmm2
-
- vpsrld $2,%xmm10,%xmm10
- vpaddd %xmm8,%xmm13,%xmm13
- vpor %xmm5,%xmm2,%xmm2
- vpor %xmm7,%xmm10,%xmm10
- vpxor %xmm0,%xmm3,%xmm3
- vmovdqa 144-128(%rax),%xmm0
-
- vpslld $5,%xmm13,%xmm8
- vpaddd %xmm15,%xmm12,%xmm12
- vpxor %xmm14,%xmm11,%xmm6
- vmovdqa %xmm2,96-128(%rax)
- vpaddd %xmm2,%xmm12,%xmm12
- vpxor 240-128(%rax),%xmm3,%xmm3
- vpsrld $27,%xmm13,%xmm9
- vpxor %xmm10,%xmm6,%xmm6
- vpxor %xmm0,%xmm3,%xmm3
-
- vpslld $30,%xmm14,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm12,%xmm12
- vpsrld $31,%xmm3,%xmm5
- vpaddd %xmm3,%xmm3,%xmm3
-
- vpsrld $2,%xmm14,%xmm14
- vpaddd %xmm8,%xmm12,%xmm12
- vpor %xmm5,%xmm3,%xmm3
- vpor %xmm7,%xmm14,%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vmovdqa 160-128(%rax),%xmm1
-
- vpslld $5,%xmm12,%xmm8
- vpaddd %xmm15,%xmm11,%xmm11
- vpxor %xmm13,%xmm10,%xmm6
- vmovdqa %xmm3,112-128(%rax)
- vpaddd %xmm3,%xmm11,%xmm11
- vpxor 0-128(%rax),%xmm4,%xmm4
- vpsrld $27,%xmm12,%xmm9
- vpxor %xmm14,%xmm6,%xmm6
- vpxor %xmm1,%xmm4,%xmm4
-
- vpslld $30,%xmm13,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm11,%xmm11
- vpsrld $31,%xmm4,%xmm5
- vpaddd %xmm4,%xmm4,%xmm4
-
- vpsrld $2,%xmm13,%xmm13
- vpaddd %xmm8,%xmm11,%xmm11
- vpor %xmm5,%xmm4,%xmm4
- vpor %xmm7,%xmm13,%xmm13
- vpxor %xmm2,%xmm0,%xmm0
- vmovdqa 176-128(%rax),%xmm2
-
- vpslld $5,%xmm11,%xmm8
- vpaddd %xmm15,%xmm10,%xmm10
- vpxor %xmm12,%xmm14,%xmm6
- vmovdqa %xmm4,128-128(%rax)
- vpaddd %xmm4,%xmm10,%xmm10
- vpxor 16-128(%rax),%xmm0,%xmm0
- vpsrld $27,%xmm11,%xmm9
- vpxor %xmm13,%xmm6,%xmm6
- vpxor %xmm2,%xmm0,%xmm0
-
- vpslld $30,%xmm12,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm10,%xmm10
- vpsrld $31,%xmm0,%xmm5
- vpaddd %xmm0,%xmm0,%xmm0
-
- vpsrld $2,%xmm12,%xmm12
- vpaddd %xmm8,%xmm10,%xmm10
- vpor %xmm5,%xmm0,%xmm0
- vpor %xmm7,%xmm12,%xmm12
- vpxor %xmm3,%xmm1,%xmm1
- vmovdqa 192-128(%rax),%xmm3
-
- vpslld $5,%xmm10,%xmm8
- vpaddd %xmm15,%xmm14,%xmm14
- vpxor %xmm11,%xmm13,%xmm6
- vmovdqa %xmm0,144-128(%rax)
- vpaddd %xmm0,%xmm14,%xmm14
- vpxor 32-128(%rax),%xmm1,%xmm1
- vpsrld $27,%xmm10,%xmm9
- vpxor %xmm12,%xmm6,%xmm6
- vpxor %xmm3,%xmm1,%xmm1
-
- vpslld $30,%xmm11,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm14,%xmm14
- vpsrld $31,%xmm1,%xmm5
- vpaddd %xmm1,%xmm1,%xmm1
-
- vpsrld $2,%xmm11,%xmm11
- vpaddd %xmm8,%xmm14,%xmm14
- vpor %xmm5,%xmm1,%xmm1
- vpor %xmm7,%xmm11,%xmm11
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa 208-128(%rax),%xmm4
-
- vpslld $5,%xmm14,%xmm8
- vpaddd %xmm15,%xmm13,%xmm13
- vpxor %xmm10,%xmm12,%xmm6
- vmovdqa %xmm1,160-128(%rax)
- vpaddd %xmm1,%xmm13,%xmm13
- vpxor 48-128(%rax),%xmm2,%xmm2
- vpsrld $27,%xmm14,%xmm9
- vpxor %xmm11,%xmm6,%xmm6
- vpxor %xmm4,%xmm2,%xmm2
-
- vpslld $30,%xmm10,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm13,%xmm13
- vpsrld $31,%xmm2,%xmm5
- vpaddd %xmm2,%xmm2,%xmm2
-
- vpsrld $2,%xmm10,%xmm10
- vpaddd %xmm8,%xmm13,%xmm13
- vpor %xmm5,%xmm2,%xmm2
- vpor %xmm7,%xmm10,%xmm10
- vpxor %xmm0,%xmm3,%xmm3
- vmovdqa 224-128(%rax),%xmm0
-
- vpslld $5,%xmm13,%xmm8
- vpaddd %xmm15,%xmm12,%xmm12
- vpxor %xmm14,%xmm11,%xmm6
- vmovdqa %xmm2,176-128(%rax)
- vpaddd %xmm2,%xmm12,%xmm12
- vpxor 64-128(%rax),%xmm3,%xmm3
- vpsrld $27,%xmm13,%xmm9
- vpxor %xmm10,%xmm6,%xmm6
- vpxor %xmm0,%xmm3,%xmm3
-
- vpslld $30,%xmm14,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm12,%xmm12
- vpsrld $31,%xmm3,%xmm5
- vpaddd %xmm3,%xmm3,%xmm3
-
- vpsrld $2,%xmm14,%xmm14
- vpaddd %xmm8,%xmm12,%xmm12
- vpor %xmm5,%xmm3,%xmm3
- vpor %xmm7,%xmm14,%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vmovdqa 240-128(%rax),%xmm1
-
- vpslld $5,%xmm12,%xmm8
- vpaddd %xmm15,%xmm11,%xmm11
- vpxor %xmm13,%xmm10,%xmm6
- vmovdqa %xmm3,192-128(%rax)
- vpaddd %xmm3,%xmm11,%xmm11
- vpxor 80-128(%rax),%xmm4,%xmm4
- vpsrld $27,%xmm12,%xmm9
- vpxor %xmm14,%xmm6,%xmm6
- vpxor %xmm1,%xmm4,%xmm4
-
- vpslld $30,%xmm13,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm11,%xmm11
- vpsrld $31,%xmm4,%xmm5
- vpaddd %xmm4,%xmm4,%xmm4
-
- vpsrld $2,%xmm13,%xmm13
- vpaddd %xmm8,%xmm11,%xmm11
- vpor %xmm5,%xmm4,%xmm4
- vpor %xmm7,%xmm13,%xmm13
- vpxor %xmm2,%xmm0,%xmm0
- vmovdqa 0-128(%rax),%xmm2
-
- vpslld $5,%xmm11,%xmm8
- vpaddd %xmm15,%xmm10,%xmm10
- vpxor %xmm12,%xmm14,%xmm6
- vmovdqa %xmm4,208-128(%rax)
- vpaddd %xmm4,%xmm10,%xmm10
- vpxor 96-128(%rax),%xmm0,%xmm0
- vpsrld $27,%xmm11,%xmm9
- vpxor %xmm13,%xmm6,%xmm6
- vpxor %xmm2,%xmm0,%xmm0
-
- vpslld $30,%xmm12,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm10,%xmm10
- vpsrld $31,%xmm0,%xmm5
- vpaddd %xmm0,%xmm0,%xmm0
-
- vpsrld $2,%xmm12,%xmm12
- vpaddd %xmm8,%xmm10,%xmm10
- vpor %xmm5,%xmm0,%xmm0
- vpor %xmm7,%xmm12,%xmm12
- vpxor %xmm3,%xmm1,%xmm1
- vmovdqa 16-128(%rax),%xmm3
-
- vpslld $5,%xmm10,%xmm8
- vpaddd %xmm15,%xmm14,%xmm14
- vpxor %xmm11,%xmm13,%xmm6
- vmovdqa %xmm0,224-128(%rax)
- vpaddd %xmm0,%xmm14,%xmm14
- vpxor 112-128(%rax),%xmm1,%xmm1
- vpsrld $27,%xmm10,%xmm9
- vpxor %xmm12,%xmm6,%xmm6
- vpxor %xmm3,%xmm1,%xmm1
-
- vpslld $30,%xmm11,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm14,%xmm14
- vpsrld $31,%xmm1,%xmm5
- vpaddd %xmm1,%xmm1,%xmm1
-
- vpsrld $2,%xmm11,%xmm11
- vpaddd %xmm8,%xmm14,%xmm14
- vpor %xmm5,%xmm1,%xmm1
- vpor %xmm7,%xmm11,%xmm11
- vpxor %xmm4,%xmm2,%xmm2
- vmovdqa 32-128(%rax),%xmm4
-
- vpslld $5,%xmm14,%xmm8
- vpaddd %xmm15,%xmm13,%xmm13
- vpxor %xmm10,%xmm12,%xmm6
- vmovdqa %xmm1,240-128(%rax)
- vpaddd %xmm1,%xmm13,%xmm13
- vpxor 128-128(%rax),%xmm2,%xmm2
- vpsrld $27,%xmm14,%xmm9
- vpxor %xmm11,%xmm6,%xmm6
- vpxor %xmm4,%xmm2,%xmm2
-
- vpslld $30,%xmm10,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm13,%xmm13
- vpsrld $31,%xmm2,%xmm5
- vpaddd %xmm2,%xmm2,%xmm2
-
- vpsrld $2,%xmm10,%xmm10
- vpaddd %xmm8,%xmm13,%xmm13
- vpor %xmm5,%xmm2,%xmm2
- vpor %xmm7,%xmm10,%xmm10
- vpxor %xmm0,%xmm3,%xmm3
- vmovdqa 48-128(%rax),%xmm0
-
- vpslld $5,%xmm13,%xmm8
- vpaddd %xmm15,%xmm12,%xmm12
- vpxor %xmm14,%xmm11,%xmm6
- vmovdqa %xmm2,0-128(%rax)
- vpaddd %xmm2,%xmm12,%xmm12
- vpxor 144-128(%rax),%xmm3,%xmm3
- vpsrld $27,%xmm13,%xmm9
- vpxor %xmm10,%xmm6,%xmm6
- vpxor %xmm0,%xmm3,%xmm3
-
- vpslld $30,%xmm14,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm12,%xmm12
- vpsrld $31,%xmm3,%xmm5
- vpaddd %xmm3,%xmm3,%xmm3
-
- vpsrld $2,%xmm14,%xmm14
- vpaddd %xmm8,%xmm12,%xmm12
- vpor %xmm5,%xmm3,%xmm3
- vpor %xmm7,%xmm14,%xmm14
- vpxor %xmm1,%xmm4,%xmm4
- vmovdqa 64-128(%rax),%xmm1
-
- vpslld $5,%xmm12,%xmm8
- vpaddd %xmm15,%xmm11,%xmm11
- vpxor %xmm13,%xmm10,%xmm6
- vmovdqa %xmm3,16-128(%rax)
- vpaddd %xmm3,%xmm11,%xmm11
- vpxor 160-128(%rax),%xmm4,%xmm4
- vpsrld $27,%xmm12,%xmm9
- vpxor %xmm14,%xmm6,%xmm6
- vpxor %xmm1,%xmm4,%xmm4
-
- vpslld $30,%xmm13,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm11,%xmm11
- vpsrld $31,%xmm4,%xmm5
- vpaddd %xmm4,%xmm4,%xmm4
-
- vpsrld $2,%xmm13,%xmm13
- vpaddd %xmm8,%xmm11,%xmm11
- vpor %xmm5,%xmm4,%xmm4
- vpor %xmm7,%xmm13,%xmm13
- vpxor %xmm2,%xmm0,%xmm0
- vmovdqa 80-128(%rax),%xmm2
-
- vpslld $5,%xmm11,%xmm8
- vpaddd %xmm15,%xmm10,%xmm10
- vpxor %xmm12,%xmm14,%xmm6
- vmovdqa %xmm4,32-128(%rax)
- vpaddd %xmm4,%xmm10,%xmm10
- vpxor 176-128(%rax),%xmm0,%xmm0
- vpsrld $27,%xmm11,%xmm9
- vpxor %xmm13,%xmm6,%xmm6
- vpxor %xmm2,%xmm0,%xmm0
-
- vpslld $30,%xmm12,%xmm7
- vpor %xmm9,%xmm8,%xmm8
- vpaddd %xmm6,%xmm10,%xmm10
- vpsrld $31,%xmm0,%xmm5
- vpaddd %xmm0,%xmm0,%xmm0
-
- vpsrld $2,%xmm12,%xmm12
- vpaddd %xmm8,%xmm10,%xmm10
- vpor %xmm5,%xmm0,%xmm0
- vpor %xmm7,%xmm12,%xmm12
- vpxor %xmm3,%xmm1,%xmm1
- vmovdqa 96-128(%rax),%xmm3
-
- vpslld $5,%xmm10,%xmm8
- vpaddd %xmm15,%xmm14,%xmm14
- vpxor %xmm11,%xmm13,%xmm6
- vmovdqa %xmm0,48-128(%rax)
- vpaddd %xmm0,%xmm14,%xmm14
- vpxor 192-128(%rax),%xmm1,%xmm1
- vpsrld $27,%xmm10,%xmm9
- vpxor %xmm12,%xmm6,%xmm6
- vpxor %xmm3,%xmm1,%xmm1
-
- vpslld $30,%xmm11,%xmm7