aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarcin Wojtas <mw@FreeBSD.org>2020-05-26 22:41:12 +0000
committerMarcin Wojtas <mw@FreeBSD.org>2020-05-26 22:41:12 +0000
commit48d7e753a7b42e782f7578914f383cd9bf2df28f (patch)
tree2a6d70a930696c685245fdfce6e1b9e4bfb4edc7
parent1e328b98ce09e59e8f9e132388c865c439dc5197 (diff)
downloadsrc-48d7e753a7b42e782f7578914f383cd9bf2df28f.tar.gz
src-48d7e753a7b42e782f7578914f383cd9bf2df28f.zip
MF11: r361467-361468,361534
This patch upgrades the ENA driver to version 2.2.0. Approved by: re (gjb) Sponsored by: Amazon, Inc.
Notes
Notes: svn path=/releng/11.4/; revision=361539
-rw-r--r--share/man/man4/ena.496
-rw-r--r--sys/contrib/ena-com/ena_com.c824
-rw-r--r--sys/contrib/ena-com/ena_com.h289
-rw-r--r--sys/contrib/ena-com/ena_defs/ena_admin_defs.h651
-rw-r--r--sys/contrib/ena-com/ena_defs/ena_common_defs.h8
-rw-r--r--sys/contrib/ena-com/ena_defs/ena_eth_io_defs.h251
-rw-r--r--sys/contrib/ena-com/ena_defs/ena_gen_info.h6
-rw-r--r--sys/contrib/ena-com/ena_defs/ena_regs_defs.h209
-rw-r--r--sys/contrib/ena-com/ena_eth_com.c272
-rw-r--r--sys/contrib/ena-com/ena_eth_com.h134
-rw-r--r--sys/contrib/ena-com/ena_plat.h81
-rw-r--r--sys/dev/ena/ena.c2670
-rw-r--r--sys/dev/ena/ena.h207
-rw-r--r--sys/dev/ena/ena_datapath.c1110
-rw-r--r--sys/dev/ena/ena_datapath.h42
-rw-r--r--sys/dev/ena/ena_netmap.c1092
-rw-r--r--sys/dev/ena/ena_netmap.h60
-rw-r--r--sys/dev/ena/ena_sysctl.c225
-rw-r--r--sys/dev/ena/ena_sysctl.h7
-rw-r--r--sys/modules/ena/Makefile5
20 files changed, 5581 insertions, 2658 deletions
diff --git a/share/man/man4/ena.4 b/share/man/man4/ena.4
index 5008ab2aa28a..358c2b786bbb 100644
--- a/share/man/man4/ena.4
+++ b/share/man/man4/ena.4
@@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd May 04, 2017
+.Dd August 16, 2017
.Dt ENA 4
.Os
.Sh NAME
@@ -35,7 +35,7 @@
.Nd "FreeBSD kernel driver for Elastic Network Adapter (ENA) family"
.Sh SYNOPSIS
To compile this driver into the kernel,
-place the following line in your
+place the following line in the
kernel configuration file:
.Bd -ragged -offset indent
.Cd "device ena"
@@ -59,8 +59,9 @@ The driver supports a range of ENA devices, is link-speed independent
(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
a negotiated and extendable feature set.
.Pp
-Some ENA devices support SR-IOV. This driver is used for both the
-SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
+Some ENA devices support SR-IOV.
+This driver is used for both the SR-IOV Physical Function (PF) and Virtual
+Function (VF) devices.
.Pp
The ENA devices enable high speed and low overhead network traffic
processing by providing multiple Tx/Rx queue pairs (the maximum number
@@ -82,8 +83,8 @@ to recover in a manner transparent to the application, as well as
debug logs.
.Pp
Some of the ENA devices support a working mode called Low-latency
-Queue (LLQ), which saves several more microseconds. This feature will
-be implemented for driver in future releases.
+Queue (LLQ), which saves several more microseconds.
+This feature will be implemented for driver in future releases.
.Sh HARDWARE
Supported PCI vendor ID/device IDs:
.Pp
@@ -105,19 +106,23 @@ Supported PCI vendor ID/device IDs:
Error occurred during initialization of the mmio register read request.
.It ena%d: Can not reset device
.Pp
-Device could not be reset; device may not be responding or is already
-during reset.
+Device could not be reset.
+.br
+Device may not be responding or is already during reset.
.It ena%d: device version is too low
.Pp
-Version of the controller is too low and it is not supported by the driver.
+Version of the controller is too old and it is not supported by the driver.
.It ena%d: Invalid dma width value %d
.Pp
-The controller is able to request dma transcation width. Device stopped
-responding or it demanded invalid value.
+The controller is able to request dma transaction width.
+.br
+Device stopped responding or it demanded invalid value.
.It ena%d: Can not initialize ena admin queue with device
.Pp
-Initialization of the Admin Queue failed; device may not be responding or there
-was a problem with initialization of the resources.
+Initialization of the Admin Queue failed.
+.br
+Device may not be responding or there was a problem with initialization of
+the resources.
.It ena%d: Cannot get attribute for ena device rc: %d
.Pp
Failed to get attributes of the device from the controller.
@@ -141,11 +146,14 @@ Errors occurred when trying to configure AENQ groups.
.It ena%d: could not allocate irq vector: %d
.It ena%d: Unable to allocate bus resource: registers
.Pp
-Resource allocation failed when initializing the device; driver will not
-be attached.
+Resource allocation failed when initializing the device.
+.br
+Driver will not be attached.
.It ena%d: ENA device init failed (err: %d)
.Pp
-Device initialization failed; driver will not be attached.
+Device initialization failed.
+.br
+Driver will not be attached.
.It ena%d: could not activate irq vector: %d
.Pp
Error occurred when trying to activate interrupt vectors for Admin Queue.
@@ -157,13 +165,16 @@ Error occurred when trying to register Admin Queue interrupt handler.
Error occurred during configuration of the Admin Queue interrupts.
.It ena%d: Enable MSI-X failed
.Pp
-Configuration of the MSI-X for Admin Queue failed; there could be lack
-of resources or interrupts could not have been configured; driver will
-not be attached.
+Configuration of the MSI-X for Admin Queue failed.
+.br
+There could be lack of resources or interrupts could not have been configured.
+.br
+Driver will not be attached.
.It ena%d: VLAN is in use, detach first
.Pp
-VLANs are being used when trying to detach the driver; VLANs should be detached
-first and then detach routine should be called again.
+VLANs are being used when trying to detach the driver.
+.br
+VLANs must be detached first and then detach routine have to be called again.
.It ena%d: Unmapped RX DMA tag associations
.It ena%d: Unmapped TX DMA tag associations
.Pp
@@ -175,8 +186,9 @@ Error occurred when trying to destroy RX/TX DMA tag.
.It ena%d: Cannot fill hash control
.It ena%d: WARNING: RSS was not properly initialized, it will affect bandwidth
.Pp
-Error occurred during initialization of one of RSS resources; device is still
-going to work but it will affect performance because all RX packets will be
+Error occurred during initialization of one of RSS resources.
+.br
+The device will work with reduced performance because all RX packets will be
passed to queue 0 and there will be no hash information.
.It ena%d: failed to tear down irq: %d
.It ena%d: dev has no parent while releasing res for irq: %d
@@ -196,16 +208,20 @@ Requested MTU value is not supported and will not be set.
Device stopped responding and will be reset.
.It ena%d: Found a Tx that wasn't completed on time, qid %d, index %d.
.Pp
-Packet was pushed to the NIC but not sent within given time limit; it may
-be caused by hang of the IO queue.
+Packet was pushed to the NIC but not sent within given time limit.
+.br
+It may be caused by hang of the IO queue.
.It ena%d: The number of lost tx completion is aboce the threshold (%d > %d). Reset the device
.Pp
-If too many Tx wasn't completed on time the device is going to be reset; it may
-be caused by hanged queue or device.
+If too many Tx wasn't completed on time the device is going to be reset.
+.br
+It may be caused by hanged queue or device.
.It ena%d: trigger reset is on
.Pp
-Device will be reset; reset is triggered either by watchdog or if too many TX
-packets were not completed on time.
+Device will be reset.
+.br
+Reset is triggered either by watchdog or if too many TX packets were not
+completed on time.
.It ena%d: invalid value recvd
.Pp
Link status received from the device in the AENQ handler is invalid.
@@ -220,7 +236,9 @@ Link status received from the device in the AENQ handler is invalid.
.It ena%d: could not allocate irq vector: %d
.It ena%d: failed to register interrupt handler for irq %ju: %d
.Pp
-IO resources initialization failed. Interface will not be brought up.
+IO resources initialization failed.
+.br
+Interface will not be brought up.
.It ena%d: LRO[%d] Initialization failed!
.Pp
Initialization of the LRO for the RX ring failed.
@@ -228,20 +246,26 @@ Initialization of the LRO for the RX ring failed.
.It ena%d: failed to add buffer for rx queue %d
.It ena%d: refilled rx queue %d with %d pages only
.Pp
-Allocation of resources used on RX path failed; if happened during
-initialization of the IO queue, the interface will not be brought up.
+Allocation of resources used on RX path failed.
+.br
+If happened during initialization of the IO queue, the interface will not be
+brought up.
.It ena%d: ioctl promisc/allmulti
.Pp
-IOCTL request for the device to work in promiscuous/allmulti mode; see
+IOCTL request for the device to work in promiscuous/allmulti mode.
+.br
+See
.Xr ifconfig 8
for more details.
.It ena%d: too many fragments. Last fragment: %d!
.Pp
Packet with unsupported number of segments was queued for sending to the
-device; packet will be dropped.
+device.
+.br
+Packet will be dropped.
.Sh SUPPORT
-If an issue is identified with the released source code with a supported adapter
-email the specific information related to the issue to
+If an issue is identified with the released source code with a supported
+adapter, please email the specific information related to the issue to
.Aq Mt mk@semihalf.com
and
.Aq Mt mw@semihalf.com .
diff --git a/sys/contrib/ena-com/ena_com.c b/sys/contrib/ena-com/ena_com.c
index 4314b31cbb59..dde0c3357f63 100644
--- a/sys/contrib/ena-com/ena_com.c
+++ b/sys/contrib/ena-com/ena_com.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,9 +32,6 @@
*/
#include "ena_com.h"
-#ifdef ENA_INTERNAL
-#include "ena_gen_info.h"
-#endif
/*****************************************************************************/
/*****************************************************************************/
@@ -52,9 +49,6 @@
#define ENA_EXTENDED_STAT_GET_QUEUE(_funct_queue) (_funct_queue >> 16)
#endif /* ENA_EXTENDED_STATS */
-#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \
- ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \
- | (ENA_COMMON_SPEC_VERSION_MINOR))
#define ENA_CTRL_MAJOR 0
#define ENA_CTRL_MINOR 0
@@ -76,6 +70,10 @@
#define ENA_REGS_ADMIN_INTR_MASK 1
+#define ENA_MIN_POLL_US 100
+
+#define ENA_MAX_POLL_US 5000
+
/*****************************************************************************/
/*****************************************************************************/
/*****************************************************************************/
@@ -103,7 +101,7 @@ struct ena_com_stats_ctx {
struct ena_admin_acq_get_stats_resp get_resp;
};
-static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
+static int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
struct ena_common_mem_addr *ena_addr,
dma_addr_t addr)
{
@@ -112,8 +110,8 @@ static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
return ENA_COM_INVAL;
}
- ena_addr->mem_addr_low = (u32)addr;
- ena_addr->mem_addr_high = (u16)((u64)addr >> 32);
+ ena_addr->mem_addr_low = lower_32_bits(addr);
+ ena_addr->mem_addr_high = (u16)upper_32_bits(addr);
return 0;
}
@@ -127,7 +125,7 @@ static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue)
sq->mem_handle);
if (!sq->entries) {
- ena_trc_err("memory allocation failed");
+ ena_trc_err("memory allocation failed\n");
return ENA_COM_NO_MEM;
}
@@ -149,7 +147,7 @@ static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue)
cq->mem_handle);
if (!cq->entries) {
- ena_trc_err("memory allocation failed");
+ ena_trc_err("memory allocation failed\n");
return ENA_COM_NO_MEM;
}
@@ -174,7 +172,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
aenq->mem_handle);
if (!aenq->entries) {
- ena_trc_err("memory allocation failed");
+ ena_trc_err("memory allocation failed\n");
return ENA_COM_NO_MEM;
}
@@ -204,7 +202,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
return 0;
}
-static inline void comp_ctxt_release(struct ena_com_admin_queue *queue,
+static void comp_ctxt_release(struct ena_com_admin_queue *queue,
struct ena_comp_ctx *comp_ctx)
{
comp_ctx->occupied = false;
@@ -220,6 +218,11 @@ static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue,
return NULL;
}
+ if (unlikely(!queue->comp_ctx)) {
+ ena_trc_err("Completion context is NULL\n");
+ return NULL;
+ }
+
if (unlikely(queue->comp_ctx[command_id].occupied && capture)) {
ena_trc_err("Completion context is occupied\n");
return NULL;
@@ -249,7 +252,7 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
tail_masked = admin_queue->sq.tail & queue_size_mask;
/* In case of queue FULL */
- cnt = ATOMIC32_READ(&admin_queue->outstanding_cmds);
+ cnt = (u16)ATOMIC32_READ(&admin_queue->outstanding_cmds);
if (cnt >= admin_queue->q_depth) {
ena_trc_dbg("admin queue is full.\n");
admin_queue->stats.out_of_space++;
@@ -293,7 +296,7 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu
return comp_ctx;
}
-static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
+static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
{
size_t size = queue->q_depth * sizeof(struct ena_comp_ctx);
struct ena_comp_ctx *comp_ctx;
@@ -301,7 +304,7 @@ static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
queue->comp_ctx = ENA_MEM_ALLOC(queue->q_dmadev, size);
if (unlikely(!queue->comp_ctx)) {
- ena_trc_err("memory allocation failed");
+ ena_trc_err("memory allocation failed\n");
return ENA_COM_NO_MEM;
}
@@ -320,7 +323,7 @@ static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue
struct ena_admin_acq_entry *comp,
size_t comp_size_in_bytes)
{
- unsigned long flags;
+ unsigned long flags = 0;
struct ena_comp_ctx *comp_ctx;
ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags);
@@ -332,7 +335,7 @@ static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue
cmd_size_in_bytes,
comp,
comp_size_in_bytes);
- if (unlikely(IS_ERR(comp_ctx)))
+ if (IS_ERR(comp_ctx))
admin_queue->running_state = false;
ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags);
@@ -348,6 +351,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
+ io_sq->dma_addr_bits = (u8)ena_dev->dma_addr_bits;
io_sq->desc_entry_size =
(io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
sizeof(struct ena_eth_io_tx_desc) :
@@ -373,18 +377,21 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
}
if (!io_sq->desc_addr.virt_addr) {
- ena_trc_err("memory allocation failed");
+ ena_trc_err("memory allocation failed\n");
return ENA_COM_NO_MEM;
}
}
if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
/* Allocate bounce buffers */
- io_sq->bounce_buf_ctrl.buffer_size = ena_dev->llq_info.desc_list_entry_size;
- io_sq->bounce_buf_ctrl.buffers_num = ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
+ io_sq->bounce_buf_ctrl.buffer_size =
+ ena_dev->llq_info.desc_list_entry_size;
+ io_sq->bounce_buf_ctrl.buffers_num =
+ ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
io_sq->bounce_buf_ctrl.next_to_use = 0;
- size = io_sq->bounce_buf_ctrl.buffer_size * io_sq->bounce_buf_ctrl.buffers_num;
+ size = io_sq->bounce_buf_ctrl.buffer_size *
+ io_sq->bounce_buf_ctrl.buffers_num;
ENA_MEM_ALLOC_NODE(ena_dev->dmadev,
size,
@@ -395,11 +402,12 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
io_sq->bounce_buf_ctrl.base_buffer = ENA_MEM_ALLOC(ena_dev->dmadev, size);
if (!io_sq->bounce_buf_ctrl.base_buffer) {
- ena_trc_err("bounce buffer memory allocation failed");
+ ena_trc_err("bounce buffer memory allocation failed\n");
return ENA_COM_NO_MEM;
}
- memcpy(&io_sq->llq_info, &ena_dev->llq_info, sizeof(io_sq->llq_info));
+ memcpy(&io_sq->llq_info, &ena_dev->llq_info,
+ sizeof(io_sq->llq_info));
/* Initiate the first bounce buffer */
io_sq->llq_buf_ctrl.curr_bounce_buf =
@@ -408,6 +416,12 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
0x0, io_sq->llq_info.desc_list_entry_size);
io_sq->llq_buf_ctrl.descs_left_in_line =
io_sq->llq_info.descs_num_before_header;
+ io_sq->disable_meta_caching =
+ io_sq->llq_info.disable_meta_caching;
+
+ if (io_sq->llq_info.max_entries_in_tx_burst > 0)
+ io_sq->entries_in_tx_burst_left =
+ io_sq->llq_info.max_entries_in_tx_burst;
}
io_sq->tail = 0;
@@ -451,7 +465,7 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
}
if (!io_cq->cdesc_addr.virt_addr) {
- ena_trc_err("memory allocation failed");
+ ena_trc_err("memory allocation failed\n");
return ENA_COM_NO_MEM;
}
@@ -500,12 +514,12 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu
cqe = &admin_queue->cq.entries[head_masked];
/* Go over all the completions */
- while ((cqe->acq_common_descriptor.flags &
+ while ((READ_ONCE8(cqe->acq_common_descriptor.flags) &
ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
/* Do not read the rest of the completion entry before the
* phase bit was validated
*/
- rmb();
+ dma_rmb();
ena_com_handle_single_admin_completion(admin_queue, cqe);
head_masked++;
@@ -529,12 +543,9 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
if (unlikely(comp_status != 0))
ena_trc_err("admin command failed[%u]\n", comp_status);
- if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR))
- return ENA_COM_INVAL;
-
switch (comp_status) {
case ENA_ADMIN_SUCCESS:
- return 0;
+ return ENA_COM_OK;
case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
return ENA_COM_NO_MEM;
case ENA_ADMIN_UNSUPPORTED_OPCODE:
@@ -546,23 +557,32 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
return ENA_COM_INVAL;
}
- return 0;
+ return ENA_COM_INVAL;
+}
+
+static inline void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us)
+{
+ delay_us = ENA_MAX32(ENA_MIN_POLL_US, delay_us);
+ delay_us = ENA_MIN32(delay_us * (1 << exp), ENA_MAX_POLL_US);
+ ENA_USLEEP(delay_us);
}
static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
struct ena_com_admin_queue *admin_queue)
{
- unsigned long flags, timeout;
+ unsigned long flags = 0;
+ ena_time_t timeout;
int ret;
+ u32 exp = 0;
timeout = ENA_GET_SYSTEM_TIMEOUT(admin_queue->completion_timeout);
while (1) {
- ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags);
- ena_com_handle_admin_completion(admin_queue);
- ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags);
+ ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags);
+ ena_com_handle_admin_completion(admin_queue);
+ ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags);
- if (comp_ctx->status != ENA_CMD_SUBMITTED)
+ if (comp_ctx->status != ENA_CMD_SUBMITTED)
break;
if (ENA_TIME_EXPIRE(timeout)) {
@@ -577,7 +597,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
goto err;
}
- ENA_MSLEEP(100);
+ ena_delay_exponential_backoff_us(exp++, admin_queue->ena_dev->ena_min_poll_delay_us);
}
if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
@@ -598,42 +618,121 @@ err:
return ret;
}
+/**
+ * Set the LLQ configurations of the firmware
+ *
+ * The driver provides only the enabled feature values to the device,
+ * which in turn, checks if they are supported.
+ */
+static int ena_com_set_llq(struct ena_com_dev *ena_dev)
+{
+ struct ena_com_admin_queue *admin_queue;
+ struct ena_admin_set_feat_cmd cmd;
+ struct ena_admin_set_feat_resp resp;
+ struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+ int ret;
+
+ memset(&cmd, 0x0, sizeof(cmd));
+ admin_queue = &ena_dev->admin_queue;
+
+ cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+ cmd.feat_common.feature_id = ENA_ADMIN_LLQ;
+
+ cmd.u.llq.header_location_ctrl_enabled = llq_info->header_location_ctrl;
+ cmd.u.llq.entry_size_ctrl_enabled = llq_info->desc_list_entry_size_ctrl;
+ cmd.u.llq.desc_num_before_header_enabled = llq_info->descs_num_before_header;
+ cmd.u.llq.descriptors_stride_ctrl_enabled = llq_info->desc_stride_ctrl;
+
+ if (llq_info->disable_meta_caching)
+ cmd.u.llq.accel_mode.u.set.enabled_flags |=
+ BIT(ENA_ADMIN_DISABLE_META_CACHING);
+
+ if (llq_info->max_entries_in_tx_burst)
+ cmd.u.llq.accel_mode.u.set.enabled_flags |=
+ BIT(ENA_ADMIN_LIMIT_TX_BURST);
+
+ ret = ena_com_execute_admin_command(admin_queue,
+ (struct ena_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct ena_admin_acq_entry *)&resp,
+ sizeof(resp));
+
+ if (unlikely(ret))
+ ena_trc_err("Failed to set LLQ configurations: %d\n", ret);
+
+ return ret;
+}
+
static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
- struct ena_admin_feature_llq_desc *llq_desc)
+ struct ena_admin_feature_llq_desc *llq_features,
+ struct ena_llq_configurations *llq_default_cfg)
{
struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
+ u16 supported_feat;
+ int rc;
memset(llq_info, 0, sizeof(*llq_info));
- switch (llq_desc->header_location_ctrl) {
- case ENA_ADMIN_INLINE_HEADER:
- llq_info->inline_header = true;
- break;
- case ENA_ADMIN_HEADER_RING:
- llq_info->inline_header = false;
- break;
- default:
- ena_trc_err("Invalid header location control\n");
+ supported_feat = llq_features->header_location_ctrl_supported;
+
+ if (likely(supported_feat & llq_default_cfg->llq_header_location)) {
+ llq_info->header_location_ctrl =
+ llq_default_cfg->llq_header_location;
+ } else {
+ ena_trc_err("Invalid header location control, supported: 0x%x\n",
+ supported_feat);
return -EINVAL;
}
- switch (llq_desc->entry_size_ctrl) {
- case ENA_ADMIN_LIST_ENTRY_SIZE_128B:
- llq_info->desc_list_entry_size = 128;
- break;
- case ENA_ADMIN_LIST_ENTRY_SIZE_192B:
- llq_info->desc_list_entry_size = 192;
- break;
- case ENA_ADMIN_LIST_ENTRY_SIZE_256B:
- llq_info->desc_list_entry_size = 256;
- break;
- default:
- ena_trc_err("Invalid entry_size_ctrl %d\n",
- llq_desc->entry_size_ctrl);
- return -EINVAL;
+ if (likely(llq_info->header_location_ctrl == ENA_ADMIN_INLINE_HEADER)) {
+ supported_feat = llq_features->descriptors_stride_ctrl_supported;
+ if (likely(supported_feat & llq_default_cfg->llq_stride_ctrl)) {
+ llq_info->desc_stride_ctrl = llq_default_cfg->llq_stride_ctrl;
+ } else {
+ if (supported_feat & ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY) {
+ llq_info->desc_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
+ } else if (supported_feat & ENA_ADMIN_SINGLE_DESC_PER_ENTRY) {
+ llq_info->desc_stride_ctrl = ENA_ADMIN_SINGLE_DESC_PER_ENTRY;
+ } else {
+ ena_trc_err("Invalid desc_stride_ctrl, supported: 0x%x\n",
+ supported_feat);
+ return -EINVAL;
+ }
+
+ ena_trc_err("Default llq stride ctrl is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+ llq_default_cfg->llq_stride_ctrl,
+ supported_feat,
+ llq_info->desc_stride_ctrl);
+ }
+ } else {
+ llq_info->desc_stride_ctrl = 0;
}
- if ((llq_info->desc_list_entry_size & 0x7)) {
+ supported_feat = llq_features->entry_size_ctrl_supported;
+ if (likely(supported_feat & llq_default_cfg->llq_ring_entry_size)) {
+ llq_info->desc_list_entry_size_ctrl = llq_default_cfg->llq_ring_entry_size;
+ llq_info->desc_list_entry_size = llq_default_cfg->llq_ring_entry_size_value;
+ } else {
+ if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_128B) {
+ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
+ llq_info->desc_list_entry_size = 128;
+ } else if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_192B) {
+ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_192B;
+ llq_info->desc_list_entry_size = 192;
+ } else if (supported_feat & ENA_ADMIN_LIST_ENTRY_SIZE_256B) {
+ llq_info->desc_list_entry_size_ctrl = ENA_ADMIN_LIST_ENTRY_SIZE_256B;
+ llq_info->desc_list_entry_size = 256;
+ } else {
+ ena_trc_err("Invalid entry_size_ctrl, supported: 0x%x\n", supported_feat);
+ return -EINVAL;
+ }
+
+ ena_trc_err("Default llq ring entry size is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+ llq_default_cfg->llq_ring_entry_size,
+ supported_feat,
+ llq_info->desc_list_entry_size);
+ }
+ if (unlikely(llq_info->desc_list_entry_size & 0x7)) {
/* The desc list entry size should be whole multiply of 8
* This requirement comes from __iowrite64_copy()
*/
@@ -642,35 +741,56 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
return -EINVAL;
}
- if (llq_info->inline_header) {
- llq_info->desc_stride_ctrl = llq_desc->descriptors_stride_ctrl;
- if ((llq_info->desc_stride_ctrl != ENA_ADMIN_SINGLE_DESC_PER_ENTRY) &&
- (llq_info->desc_stride_ctrl != ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY)) {
- ena_trc_err("Invalid desc_stride_ctrl %d\n",
- llq_info->desc_stride_ctrl);
- return -EINVAL;
- }
- } else {
- llq_info->desc_stride_ctrl = ENA_ADMIN_SINGLE_DESC_PER_ENTRY;
- }
-
- if (llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY)
+ if (llq_info->desc_stride_ctrl == ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY)
llq_info->descs_per_entry = llq_info->desc_list_entry_size /
sizeof(struct ena_eth_io_tx_desc);
else
llq_info->descs_per_entry = 1;
- llq_info->descs_num_before_header = llq_desc->desc_num_before_header_ctrl;
+ supported_feat = llq_features->desc_num_before_header_supported;
+ if (likely(supported_feat & llq_default_cfg->llq_num_decs_before_header)) {
+ llq_info->descs_num_before_header = llq_default_cfg->llq_num_decs_before_header;
+ } else {
+ if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2) {
+ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
+ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1) {
+ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1;
+ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4) {
+ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4;
+ } else if (supported_feat & ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8) {
+ llq_info->descs_num_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8;
+ } else {
+ ena_trc_err("Invalid descs_num_before_header, supported: 0x%x\n",
+ supported_feat);
+ return -EINVAL;
+ }
- return 0;
-}
+ ena_trc_err("Default llq num descs before header is not supported, performing fallback, default: 0x%x, supported: 0x%x, used: 0x%x\n",
+ llq_default_cfg->llq_num_decs_before_header,
+ supported_feat,
+ llq_info->descs_num_before_header);
+ }
+ /* Check for accelerated queue supported */
+ llq_info->disable_meta_caching =
+ llq_features->accel_mode.u.get.supported_flags &
+ BIT(ENA_ADMIN_DISABLE_META_CACHING);
+ if (llq_features->accel_mode.u.get.supported_flags & BIT(ENA_ADMIN_LIMIT_TX_BURST))
+ llq_info->max_entries_in_tx_burst =
+ llq_features->accel_mode.u.get.max_tx_burst_size /
+ llq_default_cfg->llq_ring_entry_size_value;
+ rc = ena_com_set_llq(ena_dev);
+ if (rc)
+ ena_trc_err("Cannot set LLQ configuration: %d\n", rc);
+
+ return rc;
+}
static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx,
struct ena_com_admin_queue *admin_queue)
{
- unsigned long flags;
+ unsigned long flags = 0;
int ret;
ENA_WAIT_EVENT_WAIT(comp_ctx->wait_event,
@@ -687,16 +807,25 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com
admin_queue->stats.no_completion++;
ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags);
- if (comp_ctx->status == ENA_CMD_COMPLETED)
- ena_trc_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n",
- comp_ctx->cmd_opcode);
- else
- ena_trc_err("The ena device doesn't send any completion for the admin cmd %d status %d\n",
+ if (comp_ctx->status == ENA_CMD_COMPLETED) {
+ ena_trc_err("The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n",
+ comp_ctx->cmd_opcode, admin_queue->auto_polling ? "ON" : "OFF");
+ /* Check if fallback to polling is enabled */
+ if (admin_queue->auto_polling)
+ admin_queue->polling = true;
+ } else {
+ ena_trc_err("The ena device didn't send a completion for the admin cmd %d status %d\n",
comp_ctx->cmd_opcode, comp_ctx->status);
-
- admin_queue->running_state = false;
- ret = ENA_COM_TIMER_EXPIRED;
- goto err;
+ }
+ /* Check if shifted to polling mode.
+ * This will happen if there is a completion without an interrupt
+ * and autopolling mode is enabled. Continuing normal execution in such case
+ */
+ if (!admin_queue->polling) {
+ admin_queue->running_state = false;
+ ret = ENA_COM_TIMER_EXPIRED;
+ goto err;
+ }
}
ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
@@ -715,7 +844,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp =
mmio_read->read_resp;
u32 mmio_read_reg, ret, i;
- unsigned long flags;
+ unsigned long flags = 0;
u32 timeout = mmio_read->reg_read_to;
ENA_MIGHT_SLEEP();
@@ -736,15 +865,11 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
mmio_read_reg |= mmio_read->seq_num &
ENA_REGS_MMIO_REG_READ_REQ_ID_MASK;
- /* make sure read_resp->req_id get updated before the hw can write
- * there
- */
- wmb();
-
- ENA_REG_WRITE32(ena_dev->bus, mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+ ENA_REG_WRITE32(ena_dev->bus, mmio_read_reg,
+ ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
for (i = 0; i < timeout; i++) {
- if (read_resp->req_id == mmio_read->seq_num)
+ if (READ_ONCE16(read_resp->req_id) == mmio_read->seq_num)
break;
ENA_UDELAY(1);
@@ -761,7 +886,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
}
if (read_resp->reg_off != offset) {
- ena_trc_err("Read failure: wrong offset provided");
+ ena_trc_err("Read failure: wrong offset provided\n");
ret = ENA_MMIO_READ_TIMEOUT;
} else {
ret = read_resp->reg_val;
@@ -856,8 +981,9 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
}
if (io_sq->bounce_buf_ctrl.base_buffer) {
- size = io_sq->llq_info.desc_list_entry_size * ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
- ENA_MEM_FREE(ena_dev->dmadev, io_sq->bounce_buf_ctrl.base_buffer);
+ ENA_MEM_FREE(ena_dev->dmadev,
+ io_sq->bounce_buf_ctrl.base_buffer,
+ (io_sq->llq_info.desc_list_entry_size * ENA_COM_BOUNCE_BUFFER_CNTRL_CNT));
io_sq->bounce_buf_ctrl.base_buffer = NULL;
}
}
@@ -865,9 +991,13 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
u16 exp_state)
{
- u32 val, i;
+ u32 val, exp = 0;
+ ena_time_t timeout_stamp;
- for (i = 0; i < timeout; i++) {
+ /* Convert timeout from resolution of 100ms to us resolution. */
+ timeout_stamp = ENA_GET_SYSTEM_TIMEOUT(100 * 1000 * timeout);
+
+ while (1) {
val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) {
@@ -879,11 +1009,11 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
exp_state)
return 0;
- /* The resolution of the timeout is 100ms */
- ENA_MSLEEP(100);
- }
+ if (ENA_TIME_EXPIRE(timeout_stamp))
+ return ENA_COM_TIMER_EXPIRED;
- return ENA_COM_TIMER_EXPIRED;
+ ena_delay_exponential_backoff_us(exp++, ena_dev->ena_min_poll_delay_us);
+ }
}
static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev,
@@ -903,7 +1033,8 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
struct ena_admin_get_feat_resp *get_resp,
enum ena_admin_aq_feature_id feature_id,
dma_addr_t control_buf_dma_addr,
- u32 control_buff_size)
+ u32 control_buff_size,
+ u8 feature_ver)
{
struct ena_com_admin_queue *admin_queue;
struct ena_admin_get_feat_cmd get_cmd;
@@ -934,7 +1065,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
}
get_cmd.control_buffer.length = control_buff_size;
-
+ get_cmd.feat_common.feature_version = feature_ver;
get_cmd.feat_common.feature_id = feature_id;
ret = ena_com_execute_admin_command(admin_queue,
@@ -954,19 +1085,45 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
static int ena_com_get_feature(struct ena_com_dev *ena_dev,
struct ena_admin_get_feat_resp *get_resp,
- enum ena_admin_aq_feature_id feature_id)
+ enum ena_admin_aq_feature_id feature_id,
+ u8 feature_ver)
{
return ena_com_get_feature_ex(ena_dev,
get_resp,
feature_id,
0,
- 0);
+ 0,
+ feature_ver);
+}
+
+int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev)
+{
+ return ena_dev->rss.hash_func;
+}
+
+static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev)
+{
+ struct ena_admin_feature_rss_flow_hash_control *hash_key =
+ (ena_dev->rss).hash_key;
+
+ ENA_RSS_FILL_KEY(&hash_key->key, sizeof(hash_key->key));
+ /* The key buffer is stored in the device in an array of
+ * uint32 elements. Therefore the number of elements can be derived
+ * by dividing the buffer length by the size of each array element.
+ * In current implementation each element is sized at uint32_t
+ * so it's actually a division by 4 but if the element size changes,
+ * there is no need to rewrite this code.
+ */
+ hash_key->keys_num = sizeof(hash_key->key) / sizeof(hash_key->key[0]);
}
static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
{
struct ena_rss *rss = &ena_dev->rss;
+ if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_RSS_HASH_FUNCTION))
+ return ENA_COM_UNSUPPORTED;
+
ENA_MEM_ALLOC_COHERENT(ena_dev->dmadev,
sizeof(*rss->hash_key),
rss->hash_key,
@@ -1030,7 +1187,7 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev,
int ret;
ret = ena_com_get_feature(ena_dev, &get_resp,
- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+ ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0);
if (unlikely(ret))
return ret;
@@ -1094,7 +1251,9 @@ static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev)
rss->rss_ind_tbl = NULL;
if (rss->host_rss_ind_tbl)
- ENA_MEM_FREE(ena_dev->dmadev, rss->host_rss_ind_tbl);
+ ENA_MEM_FREE(ena_dev->dmadev,
+ rss->host_rss_ind_tbl,
+ ((1ULL << rss->tbl_log_size) * sizeof(u16)));
rss->host_rss_ind_tbl = NULL;
}
@@ -1195,63 +1354,29 @@ static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev)
return 0;
}
-static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev)
-{
- u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 };
- struct ena_rss *rss = &ena_dev->rss;
- u8 idx;
- u16 i;
-
- for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++)
- dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i;
-
- for (i = 0; i < 1 << rss->tbl_log_size; i++) {
- if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES)
- return ENA_COM_INVAL;
- idx = (u8)rss->rss_ind_tbl[i].cq_idx;
-
- if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES)
- return ENA_COM_INVAL;
-
- rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx];
- }
-
- return 0;
-}
-
-static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev)
-{
- size_t size;
-
- size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS;
-
- ena_dev->intr_moder_tbl = ENA_MEM_ALLOC(ena_dev->dmadev, size);
- if (!ena_dev->intr_moder_tbl)
- return ENA_COM_NO_MEM;
-
- ena_com_config_default_interrupt_moderation_table(ena_dev);
-
- return 0;
-}
-
static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev,
u16 intr_delay_resolution)
{
- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
- unsigned int i;
+ u16 prev_intr_delay_resolution = ena_dev->intr_delay_resolution;
- if (!intr_delay_resolution) {
+ if (unlikely(!intr_delay_resolution)) {
ena_trc_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n");
- intr_delay_resolution = 1;
+ intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
}
- ena_dev->intr_delay_resolution = intr_delay_resolution;
/* update Rx */
- for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++)
- intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution;
+ ena_dev->intr_moder_rx_interval =
+ ena_dev->intr_moder_rx_interval *
+ prev_intr_delay_resolution /
+ intr_delay_resolution;
/* update Tx */
- ena_dev->intr_moder_tx_interval /= intr_delay_resolution;
+ ena_dev->intr_moder_tx_interval =
+ ena_dev->intr_moder_tx_interval *
+ prev_intr_delay_resolution /
+ intr_delay_resolution;
+
+ ena_dev->intr_delay_resolution = intr_delay_resolution;
}
/*****************************************************************************/
@@ -1269,7 +1394,7 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size,
comp, comp_size);
- if (unlikely(IS_ERR(comp_ctx))) {
+ if (IS_ERR(comp_ctx)) {
if (comp_ctx == ERR_PTR(ENA_COM_NO_DEVICE))
ena_trc_dbg("Failed to submit command [%ld]\n",
PTR_ERR(comp_ctx));
@@ -1389,12 +1514,13 @@ void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev)
void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
{
struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
- unsigned long flags;
+ unsigned long flags = 0;
+ u32 exp = 0;
ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags);
while (ATOMIC32_READ(&admin_queue->outstanding_cmds) != 0) {
ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags);
- ENA_MSLEEP(20);
+ ena_delay_exponential_backoff_us(exp++, ena_dev->ena_min_poll_delay_us);
ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags);
}
ENA_SPINLOCK_UNLOCK(admin_queue->q_lock, flags);
@@ -1433,7 +1559,7 @@ bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev)
void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state)
{
struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
- unsigned long flags;
+ unsigned long flags = 0;
ENA_SPINLOCK_LOCK(admin_queue->q_lock, flags);
ena_dev->admin_queue.running_state = state;
@@ -1460,14 +1586,14 @@ int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag)
struct ena_admin_get_feat_resp get_resp;
int ret;
- ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG);
+ ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG, 0);
if (ret) {
ena_trc_info("Can't get aenq configuration\n");
return ret;
}
if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) {
- ena_trc_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n",
+ ena_trc_warn("Trying to set unsupported aenq events. supported flag: 0x%x asked flag: 0x%x\n",
get_resp.u.aenq.supported_groups,
groups_flag);
return ENA_COM_UNSUPPORTED;
@@ -1542,11 +1668,6 @@ int ena_com_validate_version(struct ena_com_dev *ena_dev)
ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
ver & ENA_REGS_VERSION_MINOR_VERSION_MASK);
- if (ver < MIN_ENA_VER) {
- ena_trc_err("ENA version is lower than the minimal version the driver supports\n");
- return -1;
- }
-
ena_trc_info("ena controller version: %d.%d.%d implementation version %d\n",
(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK)
>> ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
@@ -1579,11 +1700,10 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
u16 size;
ENA_WAIT_EVENT_DESTROY(admin_queue->comp_ctx->wait_event);
-
- ENA_SPINLOCK_DESTROY(admin_queue->q_lock);
-
if (admin_queue->comp_ctx)
- ENA_MEM_FREE(ena_dev->dmadev, admin_queue->comp_ctx);
+ ENA_MEM_FREE(ena_dev->dmadev,
+ admin_queue->comp_ctx,
+ (admin_queue->q_depth * sizeof(struct ena_comp_ctx)));
admin_queue->comp_ctx = NULL;
size = ADMIN_SQ_SIZE(admin_queue->q_depth);
if (sq->entries)
@@ -1602,6 +1722,7 @@ void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
ENA_MEM_FREE_COHERENT(ena_dev->dmadev, size, aenq->entries,
aenq->dma_addr, aenq->mem_handle);
aenq->entries = NULL;
+ ENA_SPINLOCK_DESTROY(admin_queue->q_lock);
}
void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
@@ -1611,10 +1732,22 @@ void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
if (polling)
mask_value = ENA_REGS_ADMIN_INTR_MASK;
- ENA_REG_WRITE32(ena_dev->bus, mask_value, ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF);
+ ENA_REG_WRITE32(ena_dev->bus, mask_value,
+ ena_dev->reg_bar + ENA_REGS_INTR_MASK_OFF);
ena_dev->admin_queue.polling = polling;
}
+bool ena_com_get_admin_polling_mode(struct ena_com_dev *ena_dev)
+{
+ return ena_dev->admin_queue.polling;
+}
+
+void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
+ bool polling)
+{
+ ena_dev->admin_queue.auto_polling = polling;
+}
+
int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
{
struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
@@ -1626,7 +1759,7 @@ int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
mmio_read->read_resp_dma_addr,
mmio_read->read_resp_mem_handle);
if (unlikely(!mmio_read->read_resp))
- return ENA_COM_NO_MEM;
+ goto err;
ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
@@ -1635,6 +1768,10 @@ int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
mmio_read->readless_supported = true;
return 0;
+
+err:
+ ENA_SPINLOCK_DESTROY(mmio_read->lock);
+ return ENA_COM_NO_MEM;
}
void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported)
@@ -1658,7 +1795,6 @@ void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev)
mmio_read->read_resp_mem_handle);
mmio_read->read_resp = NULL;
-
ENA_SPINLOCK_DESTROY(mmio_read->lock);
}
@@ -1675,17 +1811,12 @@ void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev)
}
int ena_com_admin_init(struct ena_com_dev *ena_dev,
- struct ena_aenq_handlers *aenq_handlers,
- bool init_spinlock)
+ struct ena_aenq_handlers *aenq_handlers)
{
struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high;
int ret;
-#ifdef ENA_INTERNAL
- ena_trc_info("ena_defs : Version:[%s] Build date [%s]",
- ENA_GEN_COMMIT, ENA_GEN_DATE);
-#endif
dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) {
@@ -1707,8 +1838,7 @@ int ena_com_admin_init(struct ena_com_dev *ena_dev,
ATOMIC32_SET(&admin_queue->outstanding_cmds, 0);
- if (init_spinlock)
- ENA_SPINLOCK_INIT(admin_queue->q_lock);
+ ENA_SPINLOCK_INIT(admin_queue->q_lock);
ret = ena_com_init_comp_ctxt(admin_queue);
if (ret)
@@ -1755,6 +1885,7 @@ int ena_com_admin_init(struct ena_com_dev *ena_dev,
if (ret)
goto error;
+ admin_queue->ena_dev = ena_dev;
admin_queue->running_state = true;
return 0;
@@ -1848,7 +1979,7 @@ void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid)
int ena_com_get_link_params(struct ena_com_dev *ena_dev,
struct ena_admin_get_feat_resp *resp)
{
- return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG);
+ return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG, 0);
}
int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
@@ -1858,7 +1989,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
int rc;
rc = ena_com_get_feature(ena_dev, &get_resp,
- ENA_ADMIN_DEVICE_ATTRIBUTES);
+ ENA_ADMIN_DEVICE_ATTRIBUTES, 0);
if (rc)
return rc;
@@ -1866,17 +1997,34 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
sizeof(get_resp.u.dev_attr));
ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
- rc = ena_com_get_feature(ena_dev, &get_resp,
- ENA_ADMIN_MAX_QUEUES_NUM);
- if (rc)
- return rc;
+ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_MAX_QUEUES_EXT,
+ ENA_FEATURE_MAX_QUEUE_EXT_VER);
+ if (rc)
+ return rc;
- memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
- sizeof(get_resp.u.max_queue));
- ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size;
+ if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER)
+ return -EINVAL;
+
+ memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext,
+ sizeof(get_resp.u.max_queue_ext));
+ ena_dev->tx_max_header_size =
+ get_resp.u.max_queue_ext.max_queue_ext.max_tx_header_size;
+ } else {
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_MAX_QUEUES_NUM, 0);
+ memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
+ sizeof(get_resp.u.max_queue));
+ ena_dev->tx_max_header_size =
+ get_resp.u.max_queue.max_header_size;
+
+ if (rc)
+ return rc;
+ }
rc = ena_com_get_feature(ena_dev, &get_resp,
- ENA_ADMIN_AENQ_CONFIG);
+ ENA_ADMIN_AENQ_CONFIG, 0);
if (rc)
return rc;
@@ -1884,7 +2032,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
sizeof(get_resp.u.aenq));
rc = ena_com_get_feature(ena_dev, &get_resp,
- ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
if (rc)
return rc;
@@ -1894,7 +2042,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
/* Driver hints isn't mandatory admin command. So in case the
* command isn't supported set driver hints to 0
*/
- rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS);
+ rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0);
if (!rc)
memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints,
@@ -1904,7 +2052,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
else
return rc;
- rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ);
+ rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0);
if (!rc)
memcpy(&get_feat_ctx->llq, &get_resp.u.llq,
sizeof(get_resp.u.llq));
@@ -1913,6 +2061,17 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
else
return rc;
+ rc = ena_com_get_feature(ena_dev, &get_resp,
+ ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0);
+ if (!rc)
+ memcpy(&get_feat_ctx->ind_table, &get_resp.u.ind_table,
+ sizeof(get_resp.u.ind_table));
+ else if (rc == ENA_COM_UNSUPPORTED)
+ memset(&get_feat_ctx->ind_table, 0x0,
+ sizeof(get_feat_ctx->ind_table));
+ else
+ return rc;
+
return 0;
}
@@ -1944,8 +2103,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
struct ena_admin_aenq_entry *aenq_e;
struct ena_admin_aenq_common_desc *aenq_common;
struct ena_com_aenq *aenq = &dev->aenq;
+ u64 timestamp;
ena_aenq_handler handler_cb;
- unsigned long long timestamp;
u16 masked_head, processed = 0;
u8 phase;
@@ -1955,11 +2114,16 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
aenq_common = &aenq_e->aenq_common_desc;
/* Go over all the events */
- while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) ==
- phase) {
- timestamp = (unsigned long long)aenq_common->timestamp_low |
- ((unsigned long long)aenq_common->timestamp_high << 32);
- ena_trc_dbg("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
+ while ((READ_ONCE8(aenq_common->flags) &
+ ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
+ /* Make sure the phase bit (ownership) is as expected before
+ * reading the rest of the descriptor.
+ */
+ dma_rmb();
+
+ timestamp = (u64)aenq_common->timestamp_low |
+ ((u64)aenq_common->timestamp_high << 32);
+ ena_trc_dbg("AENQ! Group[%x] Syndrom[%x] timestamp: [%" ENA_PRIu64 "s]\n",
aenq_common->group,
aenq_common->syndrom,
timestamp);
@@ -1990,7 +2154,9 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
/* write the aenq doorbell after all AENQ descriptors were read */
mb();
- ENA_REG_WRITE32(dev->bus, (u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+ ENA_REG_WRITE32_RELAXED(dev->bus, (u32)aenq->head,
+ dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+ mmiowb();
}
#ifdef ENA_EXTENDED_STATS
/*
@@ -2141,7 +2307,7 @@ int ena_com_get_dev_extended_stats(struct ena_com_dev *ena_dev, char *buff,
phys_addr);
if (unlikely(ret)) {
ena_trc_err("memory address set failed\n");
- return ret;
+ goto free_ext_stats_mem;
}
get_cmd->u.control_buffer.length = len;
@@ -2202,7 +2368,7 @@ int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
struct ena_admin_get_feat_resp resp;
ret = ena_com_get_feature(ena_dev, &resp,
- ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0);
if (unlikely(ret)) {
ena_trc_err("Failed to get offload capabilities %d\n", ret);
return ret;
@@ -2231,11 +2397,11 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
/* Validate hash function is supported */
ret = ena_com_get_feature(ena_dev, &get_resp,
- ENA_ADMIN_RSS_HASH_FUNCTION);
+ ENA_ADMIN_RSS_HASH_FUNCTION, 0);
if (unlikely(ret))
return ret;
- if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) {
+ if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) {
ena_trc_err("Func hash %d isn't supported by device, abort\n",
rss->hash_func);
return ENA_COM_UNSUPPORTED;
@@ -2278,12 +2444,14 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
enum ena_admin_hash_functions func,
const u8 *key, u16 key_len, u32 init_val)
{
- struct ena_rss *rss = &ena_dev->rss;
+ struct ena_admin_feature_rss_flow_hash_control *hash_key;
struct ena_admin_get_feat_resp get_resp;
- struct ena_admin_feature_rss_flow_hash_control *hash_key =
- rss->hash_key;
+ enum ena_admin_hash_functions old_func;
+ struct ena_rss *rss = &ena_dev->rss;
int rc;
+ hash_key = rss->hash_key;
+
/* Make sure size is a mult of DWs */
if (unlikely(key_len & 0x3))
return ENA_COM_INVAL;
@@ -2291,26 +2459,27 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
rc = ena_com_get_feature_ex(ena_dev, &get_resp,
ENA_ADMIN_RSS_HASH_FUNCTION,
rss->hash_key_dma_addr,
- sizeof(*rss->hash_key));
+ sizeof(*rss->hash_key), 0);
if (unlikely(rc))
return rc;
- if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) {
+ if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) {
ena_trc_err("Flow hash function %d isn't supported\n", func);
return ENA_COM_UNSUPPORTED;
}
switch (func) {
case ENA_ADMIN_TOEPLITZ:
- if (key_len > sizeof(hash_key->key)) {
- ena_trc_err("key len (%hu) is bigger than the max supported (%zu)\n",
- key_len, sizeof(hash_key->key));
- return ENA_COM_INVAL;
+ if (key) {
+ if (key_len != sizeof(hash_key->key)) {
+ ena_trc_err("key len (%hu) doesn't equal the supported size (%zu)\n",
+ key_len, sizeof(hash_key->key));
+ return ENA_COM_INVAL;
+ }
+ memcpy(hash_key->key, key, key_len);
+ rss->hash_init_val = init_val;
+ hash_key->keys_num = key_len / sizeof(hash_key->key[0]);
}
-
- memcpy(hash_key->key, key, key_len);
- rss->hash_init_val = init_val;
- hash_key->keys_num = key_len >> 2;
break;
case ENA_ADMIN_CRC32:
rss->hash_init_val = init_val;
@@ -2320,35 +2489,48 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
return ENA_COM_INVAL;
}
+ old_func = rss->hash_func;
+ rss->hash_func = func;
rc = ena_com_set_hash_function(ena_dev);
/* Restore the old function */
if (unlikely(rc))
- ena_com_get_hash_function(ena_dev, NULL, NULL);
+ rss->hash_func = old_func;
return rc;
}
int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
- enum ena_admin_hash_functions *func,
- u8 *key)
+ enum ena_admin_hash_functions *func)
{
struct ena_rss *rss = &ena_dev->rss;
struct ena_admin_get_feat_resp get_resp;
- struct ena_admin_feature_rss_flow_hash_control *hash_key =
- rss->hash_key;
int rc;
+ if (unlikely(!func))
+ return ENA_COM_INVAL;
+
rc = ena_com_get_feature_ex(ena_dev, &get_resp,
ENA_ADMIN_RSS_HASH_FUNCTION,
rss->hash_key_dma_addr,
- sizeof(*rss->hash_key));
+ sizeof(*rss->hash_key), 0);
if (unlikely(rc))
return rc;
- rss->hash_func = get_resp.u.flow_hash_func.selected_func;
- if (func)
- *func = rss->hash_func;
+ /* ENA_FFS() returns 1 in case the lsb is set */
+ rss->hash_func = ENA_FFS(get_resp.u.flow_hash_func.selected_func);
+ if (rss->hash_func)
+ rss->hash_func--;
+
+ *func = rss->hash_func;
+
+ return 0;
+}
+
+int ena_com_get_hash_key(struct ena_com_dev *ena_dev, u8 *key)
+{
+ struct ena_admin_feature_rss_flow_hash_control *hash_key =
+ ena_dev->rss.hash_key;
if (key)
memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2);
@@ -2367,7 +2549,7 @@ int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
rc = ena_com_get_feature_ex(ena_dev, &get_resp,
ENA_ADMIN_RSS_HASH_INPUT,
rss->hash_ctrl_dma_addr,
- sizeof(*rss->hash_ctrl));
+ sizeof(*rss->hash_ctrl), 0);
if (unlikely(rc))
return rc;
@@ -2603,17 +2785,13 @@ int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl)
rc = ena_com_get_feature_ex(ena_dev, &get_resp,
ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG,
rss->rss_ind_tbl_dma_addr,
- tbl_size);
+ tbl_size, 0);
if (unlikely(rc))
return rc;
if (!ind_tbl)
return 0;
- rc = ena_com_ind_tbl_convert_from_device(ena_dev);
- if (unlikely(rc))
- return rc;
-
for (i = 0; i < (1 << rss->tbl_log_size); i++)
ind_tbl[i] = rss->host_rss_ind_tbl[i];
@@ -2630,8 +2808,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size)
if (unlikely(rc))
goto err_indr_tbl;
+ /* The following function might return unsupported in case the
+ * device doesn't support setting the key / hash function. We can safely
+ * ignore this error and have indirection table support only.
+ */
rc = ena_com_hash_key_allocate(ena_dev);
- if (unlikely(rc))
+ if (likely(!rc))
+ ena_com_hash_key_fill_default_key(ena_dev);
+ else if (rc != ENA_COM_UNSUPPORTED)
goto err_hash_key;
rc = ena_com_hash_ctrl_init(ena_dev);
@@ -2670,6 +2854,10 @@ int ena_com_allocate_host_info(struct ena_com_dev *ena_dev)
if (unlikely(!host_attr->host_info))
return ENA_COM_NO_MEM;
+ host_attr->host_info->ena_spec_version = ((ENA_COMMON_SPEC_VERSION_MAJOR <<
+ ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) |
+ (ENA_COMMON_SPEC_VERSION_MINOR));
+
return 0;
}
@@ -2777,42 +2965,35 @@ bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev)
ENA_ADMIN_INTERRUPT_MODERATION);
}
-int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
- u32 tx_coalesce_usecs)
+static int ena_com_update_nonadaptive_moderation_interval(u32 coalesce_usecs,
+ u32 intr_delay_resolution,
+ u32 *intr_moder_interval)
{
- if (!ena_dev->intr_delay_resolution) {
+ if (!intr_delay_resolution) {
ena_trc_err("Illegal interrupt delay granularity value\n");
return ENA_COM_FAULT;
}
- ena_dev->intr_moder_tx_interval = tx_coalesce_usecs /
- ena_dev->intr_delay_resolution;
+ *intr_moder_interval = coalesce_usecs / intr_delay_resolution;
return 0;
}
-int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
- u32 rx_coalesce_usecs)
-{
- if (!ena_dev->intr_delay_resolution) {
- ena_trc_err("Illegal interrupt delay granularity value\n");
- return ENA_COM_FAULT;
- }
-
- /* We use LOWEST entry of moderation table for storing
- * nonadaptive interrupt coalescing values
- */
- ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
- rx_coalesce_usecs / ena_dev->intr_delay_resolution;
- return 0;
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+ u32 tx_coalesce_usecs)
+{
+ return ena_com_update_nonadaptive_moderation_interval(tx_coalesce_usecs,
+ ena_dev->intr_delay_resolution,
+ &ena_dev->intr_moder_tx_interval);
}
-void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev)
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+ u32 rx_coalesce_usecs)
{
- if (ena_dev->intr_moder_tbl)
- ENA_MEM_FREE(ena_dev->dmadev, ena_dev->intr_moder_tbl);
- ena_dev->intr_moder_tbl = NULL;
+ return ena_com_update_nonadaptive_moderation_interval(rx_coalesce_usecs,
+ ena_dev->intr_delay_resolution,
+ &ena_dev->intr_moder_rx_interval);
}
int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
@@ -2822,7 +3003,7 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
int rc;
rc = ena_com_get_feature(ena_dev, &get_resp,
- ENA_ADMIN_INTERRUPT_MODERATION);
+ ENA_ADMIN_INTERRUPT_MODERATION, 0);
if (rc) {
if (rc == ENA_COM_UNSUPPORTED) {
@@ -2839,62 +3020,14 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
return rc;
}
- rc = ena_com_init_interrupt_moderation_table(ena_dev);
- if (rc)
- goto err;
-
/* if moderation is supported by device we set adaptive moderation */
delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution;
ena_com_update_intr_delay_resolution(ena_dev, delay_resolution);
- ena_com_enable_adaptive_moderation(ena_dev);
-
- return 0;
-err:
- ena_com_destroy_interrupt_moderation(ena_dev);
- return rc;
-}
-void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev)
-{
- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-
- if (!intr_moder_tbl)
- return;
+ /* Disable adaptive moderation by default - can be enabled later */
+ ena_com_disable_adaptive_moderation(ena_dev);
- intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
- ENA_INTR_LOWEST_USECS;
- intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval =
- ENA_INTR_LOWEST_PKTS;
- intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval =
- ENA_INTR_LOWEST_BYTES;
-
- intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval =
- ENA_INTR_LOW_USECS;
- intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval =
- ENA_INTR_LOW_PKTS;
- intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval =
- ENA_INTR_LOW_BYTES;
-
- intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval =
- ENA_INTR_MID_USECS;
- intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval =
- ENA_INTR_MID_PKTS;
- intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval =
- ENA_INTR_MID_BYTES;
-
- intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval =
- ENA_INTR_HIGH_USECS;
- intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval =
- ENA_INTR_HIGH_PKTS;
- intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval =
- ENA_INTR_HIGH_BYTES;
-
- intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval =
- ENA_INTR_HIGHEST_USECS;
- intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval =
- ENA_INTR_HIGHEST_PKTS;
- intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval =
- ENA_INTR_HIGHEST_BYTES;
+ return 0;
}
unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev)
@@ -2904,74 +3037,31 @@ unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *
unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev)
{
- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-
- if (intr_moder_tbl)
- return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval;
-
- return 0;
-}
-
-void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
- enum ena_intr_moder_level level,
- struct ena_intr_moder_entry *entry)
-{
- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-
- if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
- return;
-
- intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval;
- if (ena_dev->intr_delay_resolution)
- intr_moder_tbl[level].intr_moder_interval /=
- ena_dev->intr_delay_resolution;
- intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval;
-
- /* use hardcoded value until ethtool supports bytecount parameter */
- if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED)
- intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval;
-}
-
-void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
- enum ena_intr_moder_level level,
- struct ena_intr_moder_entry *entry)
-{
- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
-
- if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
- return;
-
- entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval;
- if (ena_dev->intr_delay_resolution)
- entry->intr_moder_interval *= ena_dev->intr_delay_resolution;
- entry->pkts_per_interval =
- intr_moder_tbl[level].pkts_per_interval;
- entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval;
+ return ena_dev->intr_moder_rx_interval;
}
int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
- struct ena_admin_feature_llq_desc *llq)
+ struct ena_admin_feature_llq_desc *llq_features,
+ struct ena_llq_configurations *llq_default_cfg)
{
+ struct ena_com_llq_info *llq_info = &ena_dev->llq_info;
int rc;
- int size;
- if (llq->max_llq_num == 0) {
+ if (!llq_features->max_llq_num) {
ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
return 0;
}
- rc = ena_com_config_llq_info(ena_dev, llq);
+ rc = ena_com_config_llq_info(ena_dev, llq_features, llq_default_cfg);
if (rc)
return rc;
- /* Validate the descriptor is not too big */
- size = ena_dev->tx_max_header_size;
- size += ena_dev->llq_info.descs_num_before_header *
- sizeof(struct ena_eth_io_tx_desc);
+ ena_dev->tx_max_header_size = llq_info->desc_list_entry_size -
+ (llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc));
- if (unlikely(ena_dev->llq_info.desc_list_entry_size < size)) {
+ if (unlikely(ena_dev->tx_max_header_size == 0)) {
ena_trc_err("the size of the LLQ entry is smaller than needed\n");
- return ENA_COM_INVAL;
+ return -EINVAL;
}
ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
diff --git a/sys/contrib/ena-com/ena_com.h b/sys/contrib/ena-com/ena_com.h
index 9425205f5811..c1b9540edd0b 100644
--- a/sys/contrib/ena-com/ena_com.h
+++ b/sys/contrib/ena-com/ena_com.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,16 +34,11 @@
#ifndef ENA_COM
#define ENA_COM
-#ifndef ENA_INTERNAL
#include "ena_plat.h"
-#else
-#include "ena_plat.h"
-#include "ena_includes.h"
-#endif
-#define ENA_MAX_NUM_IO_QUEUES 128U
+#define ENA_MAX_NUM_IO_QUEUES 128U
/* We need to queues for each IO (on for Tx and one for Rx) */
-#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES))
+#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES))
#define ENA_MAX_HANDLERS 256
@@ -60,48 +55,22 @@
/*****************************************************************************/
/* ENA adaptive interrupt moderation settings */
-#define ENA_INTR_LOWEST_USECS (0)
-#define ENA_INTR_LOWEST_PKTS (3)
-#define ENA_INTR_LOWEST_BYTES (2 * 1524)
-
-#define ENA_INTR_LOW_USECS (32)
-#define ENA_INTR_LOW_PKTS (12)
-#define ENA_INTR_LOW_BYTES (16 * 1024)
-
-#define ENA_INTR_MID_USECS (80)
-#define ENA_INTR_MID_PKTS (48)
-#define ENA_INTR_MID_BYTES (64 * 1024)
-
-#define ENA_INTR_HIGH_USECS (128)
-#define ENA_INTR_HIGH_PKTS (96)
-#define ENA_INTR_HIGH_BYTES (128 * 1024)
-
-#define ENA_INTR_HIGHEST_USECS (192)
-#define ENA_INTR_HIGHEST_PKTS (128)
-#define ENA_INTR_HIGHEST_BYTES (192 * 1024)
-
-#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196
-#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 4
-#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT 6
-#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT 4
-#define ENA_INTR_MODER_LEVEL_STRIDE 1
-#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF
-
-#define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF
-
-enum ena_intr_moder_level {
- ENA_INTR_MODER_LOWEST = 0,
- ENA_INTR_MODER_LOW,
- ENA_INTR_MODER_MID,
- ENA_INTR_MODER_HIGH,
- ENA_INTR_MODER_HIGHEST,
- ENA_INTR_MAX_NUM_OF_LEVELS,
-};
+#define ENA_INTR_INITIAL_TX_INTERVAL_USECS ENA_INTR_INITIAL_TX_INTERVAL_USECS_PLAT
+#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0
+#define ENA_DEFAULT_INTR_DELAY_RESOLUTION 1
+
+#define ENA_HASH_KEY_SIZE 40
-struct ena_intr_moder_entry {
- unsigned int intr_moder_interval;
- unsigned int pkts_per_interval;
- unsigned int bytes_per_interval;
+#define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF
+
+#define ENA_FEATURE_MAX_QUEUE_EXT_VER 1
+
+struct ena_llq_configurations {
+ enum ena_admin_llq_header_location llq_header_location;
+ enum ena_admin_llq_ring_entry_size llq_ring_entry_size;
+ enum ena_admin_llq_stride_ctrl llq_stride_ctrl;
+ enum ena_admin_llq_num_descs_before_header llq_num_decs_before_header;
+ u16 llq_ring_entry_size_value;
};
enum queue_direction {
@@ -134,12 +103,14 @@ struct ena_com_tx_meta {
};
struct ena_com_llq_info {
- bool inline_header;
+ u16 header_location_ctrl;
u16 desc_stride_ctrl;
-
+ u16 desc_list_entry_size_ctrl;
u16 desc_list_entry_size;
u16 descs_num_before_header;
u16 descs_per_entry;
+ u16 max_entries_in_tx_burst;
+ bool disable_meta_caching;
};
struct ena_com_io_cq {
@@ -204,6 +175,8 @@ struct ena_com_io_sq {
enum queue_direction direction;
enum ena_admin_placement_policy_type mem_queue_type;
+ bool disable_meta_caching;
+
u32 msix_vector;
struct ena_com_tx_meta cached_tx_meta;
struct ena_com_llq_info llq_info;
@@ -221,6 +194,7 @@ struct ena_com_io_sq {
u8 phase;
u8 desc_entry_size;
u8 dma_addr_bits;
+ u16 entries_in_tx_burst_left;
} ____cacheline_aligned;
struct ena_com_admin_cq {
@@ -246,16 +220,17 @@ struct ena_com_admin_sq {
};
struct ena_com_stats_admin {
- u32 aborted_cmd;
- u32 submitted_cmd;
- u32 completed_cmd;
- u32 out_of_space;
- u32 no_completion;
+ u64 aborted_cmd;
+ u64 submitted_cmd;
+ u64 completed_cmd;
+ u64 out_of_space;
+ u64 no_completion;
};
struct ena_com_admin_queue {
void *q_dmadev;
void *bus;
+ struct ena_com_dev *ena_dev;
ena_spinlock_t q_lock; /* spinlock for the admin queue */
struct ena_comp_ctx *comp_ctx;
@@ -267,6 +242,9 @@ struct ena_com_admin_queue {
/* Indicate if the admin queue should poll for completion */
bool polling;
+ /* Define if fallback to polling mode should occur */
+ bool auto_polling;
+
u16 curr_cmd_id;
/* Indicate that the ena was initialized and can
@@ -363,19 +341,29 @@ struct ena_com_dev {
struct ena_host_attribute host_attr;
bool adaptive_coalescing;
u16 intr_delay_resolution;
+
+ /* interrupt moderation intervals are in usec divided by
+ * intr_delay_resolution, which is supplied by the device.
+ */
u32 intr_moder_tx_interval;
+ u32 intr_moder_rx_interval;
+
struct ena_intr_moder_entry *intr_moder_tbl;
struct ena_com_llq_info llq_info;
+
+ u32 ena_min_poll_delay_us;
};
struct ena_com_dev_get_features_ctx {
struct ena_admin_queue_feature_desc max_queues;
+ struct ena_admin_queue_ext_feature_desc max_queue_ext;
struct ena_admin_device_attr_feature_desc dev_attr;
struct ena_admin_feature_aenq_desc aenq;
struct ena_admin_feature_offload_desc offload;
struct ena_admin_ena_hw_hints hw_hints;
struct ena_admin_feature_llq_desc llq;
+ struct ena_admin_feature_rss_ind_table ind_table;
};
struct ena_com_create_io_ctx {
@@ -413,7 +401,7 @@ extern "C" {
*/
int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev);
-/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism
+/* ena_com_set_mmio_read_mode - Enable/disable the indirect mmio reg read mechanism
* @ena_dev: ENA communication layer struct
* @readless_supported: readless mode (enable/disable)
*/
@@ -434,8 +422,6 @@ void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev);
/* ena_com_admin_init - Init the admin and the async queues
* @ena_dev: ENA communication layer struct
* @aenq_handlers: Those handlers to be called upon event.
- * @init_spinlock: Indicate if this method should init the admin spinlock or
- * the spinlock was init before (for example, in a case of FLR).
*
* Initialize the admin submission and completion queues.
* Initialize the asynchronous events notification queues.
@@ -443,8 +429,7 @@ void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev);
* @return - 0 on success, negative value on failure.
*/
int ena_com_admin_init(struct ena_com_dev *ena_dev,
- struct ena_aenq_handlers *aenq_handlers,
- bool init_spinlock);
+ struct ena_aenq_handlers *aenq_handlers);
/* ena_com_admin_destroy - Destroy the admin and the async events queues.
* @ena_dev: ENA communication layer struct
@@ -524,7 +509,7 @@ bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev);
*/
void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling);
-/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode
+/* ena_com_get_admin_polling_mode - Get the admin completion queue polling mode
* @ena_dev: ENA communication layer struct
*
* Get the admin completion mode.
@@ -534,12 +519,23 @@ void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling);
*
* @return state
*/
-bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev);
+bool ena_com_get_admin_polling_mode(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_auto_polling_mode - Enable autoswitch to polling mode
+ * @ena_dev: ENA communication layer struct
+ * @polling: Enable/Disable polling mode
+ *
+ * Set the autopolling mode.
+ * If autopolling is on:
+ * In case of missing interrupt when data is available switch to polling.
+ */
+void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
+ bool polling);
/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler
* @ena_dev: ENA communication layer struct
*
- * This method go over the admin completion queue and wake up all the pending
+ * This method goes over the admin completion queue and wakes up all the pending
* threads that wait on the commands wait event.
*
* @note: Should be called after MSI-X interrupt.
@@ -549,7 +545,7 @@ void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev);
/* ena_com_aenq_intr_handler - AENQ interrupt handler
* @ena_dev: ENA communication layer struct
*
- * This method go over the async event notification queue and call the proper
+ * This method goes over the async event notification queue and calls the proper
* aenq handler.
*/
void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data);
@@ -566,14 +562,14 @@ void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev);
/* ena_com_wait_for_abort_completion - Wait for admin commands abort.
* @ena_dev: ENA communication layer struct
*
- * This method wait until all the outstanding admin commands will be completed.
+ * This method waits until all the outstanding admin commands are completed.
*/
void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev);
/* ena_com_validate_version - Validate the device parameters
* @ena_dev: ENA communication layer struct
*
- * This method validate the device parameters are the same as the saved
+ * This method verifies the device parameters are the same as the saved
* parameters in ena_dev.
* This method is useful after device reset, to validate the device mac address
* and the device offloads are the same as before the reset.
@@ -668,6 +664,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size);
*/
void ena_com_rss_destroy(struct ena_com_dev *ena_dev);
+/* ena_com_get_current_hash_function - Get RSS hash function
+ * @ena_dev: ENA communication layer struct
+ *
+ * Return the current hash function.
+ * @return: 0 or one of the ena_admin_hash_functions values.
+ */
+int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev);
+
/* ena_com_fill_hash_function - Fill RSS hash function
* @ena_dev: ENA communication layer struct
* @func: The hash function (Toeplitz or crc)
@@ -699,23 +703,32 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
*/
int ena_com_set_hash_function(struct ena_com_dev *ena_dev);
-/* ena_com_get_hash_function - Retrieve the hash function and the hash key
- * from the device.
+/* ena_com_get_hash_function - Retrieve the hash function from the device.
* @ena_dev: ENA communication layer struct
* @func: hash function
- * @key: hash key
*
- * Retrieve the hash function and the hash key from the device.
+ * Retrieve the hash function from the device.
*
- * @note: If the caller called ena_com_fill_hash_function but didn't flash
+ * @note: If the caller called ena_com_fill_hash_function but didn't flush
* it to the device, the new configuration will be lost.
*
* @return: 0 on Success and negative value otherwise.
*/
int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
- enum ena_admin_hash_functions *func,
- u8 *key);
+ enum ena_admin_hash_functions *func);
+/* ena_com_get_hash_key - Retrieve the hash key
+ * @ena_dev: ENA communication layer struct
+ * @key: hash key
+ *
+ * Retrieve the hash key.
+ *
+ * @note: If the caller called ena_com_fill_hash_key but didn't flush
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_key(struct ena_com_dev *ena_dev, u8 *key);
/* ena_com_fill_hash_ctrl - Fill RSS hash control
* @ena_dev: ENA communication layer struct.
* @proto: The protocol to configure.
@@ -750,7 +763,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev);
*
* Retrieve the hash control from the device.
*
- * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash
+ * @note: If the caller called ena_com_fill_hash_ctrl but didn't flush
* it to the device, the new configuration will be lost.
*
* @return: 0 on Success and negative value otherwise.
@@ -802,7 +815,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev);
*
* Retrieve the RSS indirection table from the device.
*
- * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash
+ * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flush
* it to the device, the new configuration will be lost.
*
* @return: 0 on Success and negative value otherwise.
@@ -828,14 +841,14 @@ int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
/* ena_com_delete_debug_area - Free the debug area resources.
* @ena_dev: ENA communication layer struct
*
- * Free the allocate debug area.
+ * Free the allocated debug area.
*/
void ena_com_delete_debug_area(struct ena_com_dev *ena_dev);
/* ena_com_delete_host_info - Free the host info resources.
* @ena_dev: ENA communication layer struct
*
- * Free the allocate host info.
+ * Free the allocated host info.
*/
void ena_com_delete_host_info(struct ena_com_dev *ena_dev);
@@ -876,9 +889,9 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
* @cmd_completion: command completion return value.
* @cmd_comp_size: command completion size.
- * Submit an admin command and then wait until the device will return a
+ * Submit an admin command and then wait until the device returns a
* completion.
- * The completion will be copyed into cmd_comp.
+ * The completion will be copied into cmd_comp.
*
* @return - 0 on success, negative value on failure.
*/
@@ -895,11 +908,6 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
*/
int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev);
-/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources
- * @ena_dev: ENA communication layer struct
- */
-void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev);
-
/* ena_com_interrupt_moderation_supported - Return if interrupt moderation
* capability is supported by the device.
*
@@ -907,12 +915,6 @@ void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev);
*/
bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev);
-/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt
- * moderation table back to the default parameters.
- * @ena_dev: ENA communication layer struct
- */
-void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev);
-
/* ena_com_update_nonadaptive_moderation_interval_tx - Update the
* non-adaptive interval in Tx direction.
* @ena_dev: ENA communication layer struct
@@ -949,37 +951,15 @@ unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *
*/
unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev);
-/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt
- * moderation table.
- * @ena_dev: ENA communication layer struct
- * @level: Interrupt moderation table level
- * @entry: Entry value
- *
- * Update a single entry in the interrupt moderation table.
- */
-void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
- enum ena_intr_moder_level level,
- struct ena_intr_moder_entry *entry);
-
-/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry.
- * @ena_dev: ENA communication layer struct
- * @level: Interrupt moderation table level
- * @entry: Entry to fill.
- *
- * Initialize the entry according to the adaptive interrupt moderation table.
- */
-void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
- enum ena_intr_moder_level level,
- struct ena_intr_moder_entry *entry);
-
-
/* ena_com_config_dev_mode - Configure the placement policy of the device.
* @ena_dev: ENA communication layer struct
- * @llq: LLQ feature descriptor, retrieve via ena_com_get_dev_attr_feat.
- *
+ * @llq_features: LLQ feature descriptor, retrieve via
+ * ena_com_get_dev_attr_feat.
+ * @ena_llq_config: The default driver LLQ parameters configurations
*/
int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
- struct ena_admin_feature_llq_desc *llq);
+ struct ena_admin_feature_llq_desc *llq_features,
+ struct ena_llq_configurations *llq_default_config);
static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev)
{
@@ -996,80 +976,11 @@ static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_d
ena_dev->adaptive_coalescing = false;
}
-/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay
- * @ena_dev: ENA communication layer struct
- * @pkts: Number of packets since the last update
- * @bytes: Number of bytes received since the last update.
- * @smoothed_interval: Returned interval
- * @moder_tbl_idx: Current table level as input update new level as return
- * value.
- */
-static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev,
- unsigned int pkts,
- unsigned int bytes,
- unsigned int *smoothed_interval,
- unsigned int *moder_tbl_idx)
-{
- enum ena_intr_moder_level curr_moder_idx, new_moder_idx;
- struct ena_intr_moder_entry *curr_moder_entry;
- struct ena_intr_moder_entry *pred_moder_entry;
- struct ena_intr_moder_entry *new_moder_entry;
- struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
- unsigned int interval;
-
- /* We apply adaptive moderation on Rx path only.
- * Tx uses static interrupt moderation.
- */
- if (!pkts || !bytes)
- /* Tx interrupt, or spurious interrupt,
- * in both cases we just use same delay values
- */
- return;
-
- curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx);
- if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) {
- ena_trc_err("Wrong moderation index %u\n", curr_moder_idx);
- return;
- }
-
- curr_moder_entry = &intr_moder_tbl[curr_moder_idx];
- new_moder_idx = curr_moder_idx;
-
- if (curr_moder_idx == ENA_INTR_MODER_LOWEST) {
- if ((pkts > curr_moder_entry->pkts_per_interval) ||
- (bytes > curr_moder_entry->bytes_per_interval))
- new_moder_idx =
- (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
- } else {
- pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE];
-
- if ((pkts <= pred_moder_entry->pkts_per_interval) ||
- (bytes <= pred_moder_entry->bytes_per_interval))
- new_moder_idx =
- (enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE);
- else if ((pkts > curr_moder_entry->pkts_per_interval) ||
- (bytes > curr_moder_entry->bytes_per_interval)) {
- if (curr_moder_idx != ENA_INTR_MODER_HIGHEST)
- new_moder_idx =
- (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
- }
- }
- new_moder_entry = &intr_moder_tbl[new_moder_idx];
-
- interval = new_moder_entry->intr_moder_interval;
- *smoothed_interval = (
- (interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT +
- ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) /
- 10;
-
- *moder_tbl_idx = new_moder_idx;
-}
-
/* ena_com_update_intr_reg - Prepare interrupt register
* @intr_reg: interrupt register to update.
* @rx_delay_interval: Rx interval in usecs
* @tx_delay_interval: Tx interval in usecs
- * @unmask: unask enable/disable
+ * @unmask: unmask enable/disable
*
* Prepare interrupt update register with the supplied parameters.
*/
@@ -1101,7 +1012,7 @@ static inline u8 *ena_com_get_next_bounce_buffer(struct ena_com_io_bounce_buffer
buf = bounce_buf_ctrl->base_buffer +
(bounce_buf_ctrl->next_to_use++ & (buffers_num - 1)) * size;
- prefetch(bounce_buf_ctrl->base_buffer +
+ prefetchw(bounce_buf_ctrl->base_buffer +
(bounce_buf_ctrl->next_to_use & (buffers_num - 1)) * size);
return buf;
diff --git a/sys/contrib/ena-com/ena_defs/ena_admin_defs.h b/sys/contrib/ena-com/ena_defs/ena_admin_defs.h
index f32bfccb672d..52cdb9e5e394 100644
--- a/sys/contrib/ena-com/ena_defs/ena_admin_defs.h
+++ b/sys/contrib/ena-com/ena_defs/ena_admin_defs.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,117 +33,86 @@
#ifndef _ENA_ADMIN_H_
#define _ENA_ADMIN_H_
-enum ena_admin_aq_opcode {
- ENA_ADMIN_CREATE_SQ = 1,
-
- ENA_ADMIN_DESTROY_SQ = 2,
-
- ENA_ADMIN_CREATE_CQ = 3,
-
- ENA_ADMIN_DESTROY_CQ = 4,
+#define ENA_ADMIN_EXTRA_PROPERTIES_STRING_LEN 32
+#define ENA_ADMIN_EXTRA_PROPERTIES_COUNT 32
- ENA_ADMIN_GET_FEATURE = 8,
-
- ENA_ADMIN_SET_FEATURE = 9,
-
- ENA_ADMIN_GET_STATS = 11,
+enum ena_admin_aq_opcode {
+ ENA_ADMIN_CREATE_SQ = 1,
+ ENA_ADMIN_DESTROY_SQ = 2,
+ ENA_ADMIN_CREATE_CQ = 3,
+ ENA_ADMIN_DESTROY_CQ = 4,
+ ENA_ADMIN_GET_FEATURE = 8,
+ ENA_ADMIN_SET_FEATURE = 9,
+ ENA_ADMIN_GET_STATS = 11,
};
enum ena_admin_aq_completion_status {
- ENA_ADMIN_SUCCESS = 0,
-
- ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1,
-
- ENA_ADMIN_BAD_OPCODE = 2,
-
- ENA_ADMIN_UNSUPPORTED_OPCODE = 3,
-
- ENA_ADMIN_MALFORMED_REQUEST = 4,
-
+ ENA_ADMIN_SUCCESS = 0,
+ ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1,
+ ENA_ADMIN_BAD_OPCODE = 2,
+ ENA_ADMIN_UNSUPPORTED_OPCODE = 3,
+ ENA_ADMIN_MALFORMED_REQUEST = 4,
/* Additional status is provided in ACQ entry extended_status */
- ENA_ADMIN_ILLEGAL_PARAMETER = 5,
-
- ENA_ADMIN_UNKNOWN_ERROR = 6,
+ ENA_ADMIN_ILLEGAL_PARAMETER = 5,
+ ENA_ADMIN_UNKNOWN_ERROR = 6,
+ ENA_ADMIN_RESOURCE_BUSY = 7,
};
enum ena_admin_aq_feature_id {
- ENA_ADMIN_DEVICE_ATTRIBUTES = 1,
-
- ENA_ADMIN_MAX_QUEUES_NUM = 2,
-
- ENA_ADMIN_HW_HINTS = 3,
-
- ENA_ADMIN_LLQ = 4,
-
- ENA_ADMIN_RSS_HASH_FUNCTION = 10,
-
- ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11,
-
- ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12,
-
- ENA_ADMIN_MTU = 14,
-
- ENA_ADMIN_RSS_HASH_INPUT = 18,
-
- ENA_ADMIN_INTERRUPT_MODERATION = 20,
-
- ENA_ADMIN_AENQ_CONFIG = 26,
-
- ENA_ADMIN_LINK_CONFIG = 27,
-
- ENA_ADMIN_HOST_ATTR_CONFIG = 28,
-
- ENA_ADMIN_FEATURES_OPCODE_NUM = 32,
+ ENA_ADMIN_DEVICE_ATTRIBUTES = 1,
+ ENA_ADMIN_MAX_QUEUES_NUM = 2,
+ ENA_ADMIN_HW_HINTS = 3,
+ ENA_ADMIN_LLQ = 4,
+ ENA_ADMIN_EXTRA_PROPERTIES_STRINGS = 5,
+ ENA_ADMIN_EXTRA_PROPERTIES_FLAGS = 6,
+ ENA_ADMIN_MAX_QUEUES_EXT = 7,
+ ENA_ADMIN_RSS_HASH_FUNCTION = 10,
+ ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11,
+ ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12,
+ ENA_ADMIN_MTU = 14,
+ ENA_ADMIN_RSS_HASH_INPUT = 18,
+ ENA_ADMIN_INTERRUPT_MODERATION = 20,
+ ENA_ADMIN_AENQ_CONFIG = 26,
+ ENA_ADMIN_LINK_CONFIG = 27,
+ ENA_ADMIN_HOST_ATTR_CONFIG = 28,
+ ENA_ADMIN_FEATURES_OPCODE_NUM = 32,
};
enum ena_admin_placement_policy_type {
/* descriptors and headers are in host memory */
- ENA_ADMIN_PLACEMENT_POLICY_HOST = 1,
-
+ ENA_ADMIN_PLACEMENT_POLICY_HOST = 1,
/* descriptors and headers are in device memory (a.k.a Low Latency
* Queue)
*/
- ENA_ADMIN_PLACEMENT_POLICY_DEV = 3,
+ ENA_ADMIN_PLACEMENT_POLICY_DEV = 3,
};
enum ena_admin_link_types {
- ENA_ADMIN_LINK_SPEED_1G = 0x1,
-
- ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2,
-
- ENA_ADMIN_LINK_SPEED_5G = 0x4,
-
- ENA_ADMIN_LINK_SPEED_10G = 0x8,
-
- ENA_ADMIN_LINK_SPEED_25G = 0x10,
-
- ENA_ADMIN_LINK_SPEED_40G = 0x20,
-
- ENA_ADMIN_LINK_SPEED_50G = 0x40,
-
- ENA_ADMIN_LINK_SPEED_100G = 0x80,
-
- ENA_ADMIN_LINK_SPEED_200G = 0x100,
-
- ENA_ADMIN_LINK_SPEED_400G = 0x200,
+ ENA_ADMIN_LINK_SPEED_1G = 0x1,
+ ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2,
+ ENA_ADMIN_LINK_SPEED_5G = 0x4,
+ ENA_ADMIN_LINK_SPEED_10G = 0x8,
+ ENA_ADMIN_LINK_SPEED_25G = 0x10,
+ ENA_ADMIN_LINK_SPEED_40G = 0x20,
+ ENA_ADMIN_LINK_SPEED_50G = 0x40,
+ ENA_ADMIN_LINK_SPEED_100G = 0x80,
+ ENA_ADMIN_LINK_SPEED_200G = 0x100,
+ ENA_ADMIN_LINK_SPEED_400G = 0x200,
};
enum ena_admin_completion_policy_type {
/* completion queue entry for each sq descriptor */
- ENA_ADMIN_COMPLETION_POLICY_DESC = 0,
-
+ ENA_ADMIN_COMPLETION_POLICY_DESC = 0,
/* completion queue entry upon request in sq descriptor */
- ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1,
-
+ ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1,
/* current queue head pointer is updated in OS memory upon sq
* descriptor request
*/
- ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2,
-
+ ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2,
/* current queue head pointer is updated in OS memory for each sq
* descriptor
*/
- ENA_ADMIN_COMPLETION_POLICY_HEAD = 3,
+ ENA_ADMIN_COMPLETION_POLICY_HEAD = 3,
};
/* basic stats return ena_admin_basic_stats while extanded stats return a
@@ -151,15 +120,13 @@ enum ena_admin_completion_policy_type {
* device id
*/
enum ena_admin_get_stats_type {
- ENA_ADMIN_GET_STATS_TYPE_BASIC = 0,
-
- ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1,
+ ENA_ADMIN_GET_STATS_TYPE_BASIC = 0,
+ ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1,
};
enum ena_admin_get_stats_scope {
- ENA_ADMIN_SPECIFIC_QUEUE = 0,
-
- ENA_ADMIN_ETH_TRAFFIC = 1,
+ ENA_ADMIN_SPECIFIC_QUEUE = 0,
+ ENA_ADMIN_ETH_TRAFFIC = 1,
};
struct ena_admin_aq_common_desc {
@@ -230,7 +197,9 @@ struct ena_admin_acq_common_desc {
uint16_t extended_status;
- /* serves as a hint what AQ entries can be revoked */
+ /* indicates to the driver which AQ entry has been consumed by the
+ * device and could be reused
+ */
uint16_t sq_head_indx;
};
@@ -299,9 +268,8 @@ struct ena_admin_aq_create_sq_cmd {
};
enum ena_admin_sq_direction {
- ENA_ADMIN_SQ_DIRECTION_TX = 1,
-
- ENA_ADMIN_SQ_DIRECTION_RX = 2,
+ ENA_ADMIN_SQ_DIRECTION_TX = 1,
+ ENA_ADMIN_SQ_DIRECTION_RX = 2,
};
struct ena_admin_acq_create_sq_resp_desc {
@@ -441,6 +409,10 @@ struct ena_admin_basic_stats {
uint32_t rx_drops_low;
uint32_t rx_drops_high;
+
+ uint32_t tx_drops_low;
+
+ uint32_t tx_drops_high;
};
struct ena_admin_acq_get_stats_resp {
@@ -459,7 +431,13 @@ struct ena_admin_get_set_feature_common_desc {
/* as appears in ena_admin_aq_feature_id */
uint8_t feature_id;
- uint16_t reserved16;
+ /* The driver specifies the max feature version it supports and the
+ * device responds with the currently supported feature version. The
+ * field is zero based
+ */
+ uint8_t feature_version;
+
+ uint8_t reserved8;
};
struct ena_admin_device_attr_feature_desc {
@@ -488,30 +466,23 @@ struct ena_admin_device_attr_feature_desc {
enum ena_admin_llq_header_location {
/* header is in descriptor list */
- ENA_ADMIN_INLINE_HEADER = 1,
-
+ ENA_ADMIN_INLINE_HEADER = 1,
/* header in a separate ring, implies 16B descriptor list entry */
- ENA_ADMIN_HEADER_RING = 2,
+ ENA_ADMIN_HEADER_RING = 2,
};
enum ena_admin_llq_ring_entry_size {
- ENA_ADMIN_LIST_ENTRY_SIZE_128B = 1,
-
- ENA_ADMIN_LIST_ENTRY_SIZE_192B = 2,
-
- ENA_ADMIN_LIST_ENTRY_SIZE_256B = 4,
+ ENA_ADMIN_LIST_ENTRY_SIZE_128B = 1,
+ ENA_ADMIN_LIST_ENTRY_SIZE_192B = 2,
+ ENA_ADMIN_LIST_ENTRY_SIZE_256B = 4,
};
enum ena_admin_llq_num_descs_before_header {
- ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_0 = 0,
-
- ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1 = 1,
-
- ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2 = 2,
-
- ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4 = 4,
-
- ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8 = 8,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_0 = 0,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_1 = 1,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2 = 2,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_4 = 4,
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_8 = 8,
};
/* packet descriptor list entry always starts with one or more descriptors,
@@ -521,9 +492,38 @@ enum ena_admin_llq_num_descs_before_header {
* mode
*/
enum ena_admin_llq_stride_ctrl {
- ENA_ADMIN_SINGLE_DESC_PER_ENTRY = 1,
+ ENA_ADMIN_SINGLE_DESC_PER_ENTRY = 1,
+ ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY = 2,
+};
+
+enum ena_admin_accel_mode_feat {
+ ENA_ADMIN_DISABLE_META_CACHING = 0,
+ ENA_ADMIN_LIMIT_TX_BURST = 1,
+};
+
+struct ena_admin_accel_mode_get {
+ /* bit field of enum ena_admin_accel_mode_feat */
+ uint16_t supported_flags;
+
+ /* maximum burst size between two doorbells. The size is in bytes */
+ uint16_t max_tx_burst_size;
+};
+
+struct ena_admin_accel_mode_set {
+ /* bit field of enum ena_admin_accel_mode_feat */
+ uint16_t enabled_flags;
+
+ uint16_t reserved;
+};
+
+struct ena_admin_accel_mode_req {
+ union {
+ uint32_t raw[2];
+
+ struct ena_admin_accel_mode_get get;
- ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY = 2,
+ struct ena_admin_accel_mode_set set;
+ } u;
};
struct ena_admin_feature_llq_desc {
@@ -531,32 +531,84 @@ struct ena_admin_feature_llq_desc {
uint32_t max_llq_depth;
- /* use enum ena_admin_llq_header_location */
- uint16_t header_location_ctrl;
+ /* specify the header locations the device supports. bitfield of
+ * enum ena_admin_llq_header_location.
+ */
+ uint16_t header_location_ctrl_supported;
+
+ /* the header location the driver selected to use. */
+ uint16_t header_location_ctrl_enabled;
/* if inline header is specified - this is the size of descriptor
* list entry. If header in a separate ring is specified - this is
- * the size of header ring entry. use enum
- * ena_admin_llq_ring_entry_size
+ * the size of header ring entry. bitfield of enum
+ * ena_admin_llq_ring_entry_size. specify the entry sizes the device
+ * supports
*/
- uint16_t entry_size_ctrl;
+ uint16_t entry_size_ctrl_supported;
+
+ /* the entry size the driver selected to use. */
+ uint16_t entry_size_ctrl_enabled;
/* valid only if inline header is specified. First entry associated
* with the packet includes descriptors and header. Rest of the
* entries occupied by descriptors. This parameter defines the max
* number of descriptors precedding the header in the first entry.
- * Values: use enum llq_num_descs_before_header
+ * The field is bitfield of enum
+ * ena_admin_llq_num_descs_before_header and specify the values the
+ * device supports
*/
- uint16_t desc_num_before_header_ctrl;
+ uint16_t desc_num_before_header_supported;
+
+ /* the desire field the driver selected to use */
+ uint16_t desc_num_before_header_enabled;
- /* valid, only if inline header is specified. Note, use enum
- * ena_admin_llq_stide_ctrl
+ /* valid only if inline was chosen. bitfield of enum
+ * ena_admin_llq_stride_ctrl
*/
- uint16_t descriptors_stride_ctrl;
+ uint16_t descriptors_stride_ctrl_supported;
+
+ /* the stride control the driver selected to use */
+ uint16_t descriptors_stride_ctrl_enabled;
+
+ /* reserved */
+ uint32_t reserved1;
+
+ /* accelerated low latency queues requirment. driver needs to
+ * support those requirments in order to use accelerated llq
+ */
+ struct ena_admin_accel_mode_req accel_mode;
+};
+
+struct ena_admin_queue_ext_feature_fields {
+ uint32_t max_tx_sq_num;
+
+ uint32_t max_tx_cq_num;
+
+ uint32_t max_rx_sq_num;
+
+ uint32_t max_rx_cq_num;
+
+ uint32_t max_tx_sq_depth;
+
+ uint32_t max_tx_cq_depth;
+
+ uint32_t max_rx_sq_depth;
+
+ uint32_t max_rx_cq_depth;
+
+ uint32_t max_tx_header_size;
+
+ /* Maximum Descriptors number, including meta descriptor, allowed for
+ * a single Tx packet
+ */
+ uint16_t max_per_packet_tx_descs;
+
+ /* Maximum Descriptors number allowed for a single Rx packet */
+ uint16_t max_per_packet_rx_descs;
};
struct ena_admin_queue_feature_desc {
- /* including LLQs */
uint32_t max_sq_num;
uint32_t max_sq_depth;
@@ -585,6 +637,14 @@ struct ena_admin_set_feature_mtu_desc {
uint32_t mtu;
};
+struct ena_admin_get_extra_properties_strings_desc {
+ uint32_t count;
+};
+
+struct ena_admin_get_extra_properties_flags_desc {
+ uint32_t flags;
+};
+
struct ena_admin_set_feature_host_attr_desc {
/* host OS info base address in OS memory. host info is 4KB of
* physically contiguous
@@ -655,9 +715,8 @@ struct ena_admin_feature_offload_desc {
};
enum ena_admin_hash_functions {
- ENA_ADMIN_TOEPLITZ = 1,
-
- ENA_ADMIN_CRC32 = 2,
+ ENA_ADMIN_TOEPLITZ = 1,
+ ENA_ADMIN_CRC32 = 2,
};
struct ena_admin_feature_rss_flow_hash_control {
@@ -683,50 +742,35 @@ struct ena_admin_feature_rss_flow_hash_function {
/* RSS flow hash protocols */
enum ena_admin_flow_hash_proto {
- ENA_ADMIN_RSS_TCP4 = 0,
-
- ENA_ADMIN_RSS_UDP4 = 1,
-
- ENA_ADMIN_RSS_TCP6 = 2,
-
- ENA_ADMIN_RSS_UDP6 = 3,
-
- ENA_ADMIN_RSS_IP4 = 4,
-
- ENA_ADMIN_RSS_IP6 = 5,
-
- ENA_ADMIN_RSS_IP4_FRAG = 6,
-
- ENA_ADMIN_RSS_NOT_IP = 7,
-
+ ENA_ADMIN_RSS_TCP4 = 0,
+ ENA_ADMIN_RSS_UDP4 = 1,
+ ENA_ADMIN_RSS_TCP6 = 2,
+ ENA_ADMIN_RSS_UDP6 = 3,
+ ENA_ADMIN_RSS_IP4 = 4,
+ ENA_ADMIN_RSS_IP6 = 5,
+ ENA_ADMIN_RSS_IP4_FRAG = 6,
+ ENA_ADMIN_RSS_NOT_IP = 7,
/* TCPv6 with extension header */
- ENA_ADMIN_RSS_TCP6_EX = 8,
-
+ ENA_ADMIN_RSS_TCP6_EX = 8,
/* IPv6 with extension header */
- ENA_ADMIN_RSS_IP6_EX = 9,
-
- ENA_ADMIN_RSS_PROTO_NUM = 16,
+ ENA_ADMIN_RSS_IP6_EX = 9,
+ ENA_ADMIN_RSS_PROTO_NUM = 16,
};
/* RSS flow hash fields */
enum ena_admin_flow_hash_fields {
/* Ethernet Dest Addr */
- ENA_ADMIN_RSS_L2_DA = BIT(0),
-
+ ENA_ADMIN_RSS_L2_DA = BIT(0),
/* Ethernet Src Addr */
- ENA_ADMIN_RSS_L2_SA = BIT(1),
-
+ ENA_ADMIN_RSS_L2_SA = BIT(1),
/* ipv4/6 Dest Addr */
- ENA_ADMIN_RSS_L3_DA = BIT(2),
-
+ ENA_ADMIN_RSS_L3_DA = BIT(2),
/* ipv4/6 Src Addr */
- ENA_ADMIN_RSS_L3_SA = BIT(3),
-
+ ENA_ADMIN_RSS_L3_SA = BIT(3),
/* tcp/udp Dest Port */
- ENA_ADMIN_RSS_L4_DP = BIT(4),
-
+ ENA_ADMIN_RSS_L4_DP = BIT(4),
/* tcp/udp Src Port */
- ENA_ADMIN_RSS_L4_SP = BIT(5),
+ ENA_ADMIN_RSS_L4_SP = BIT(5),
};
struct ena_admin_proto_input {
@@ -765,15 +809,13 @@ struct ena_admin_feature_rss_flow_hash_input {
};
enum ena_admin_os_type {
- ENA_ADMIN_OS_LINUX = 1,
-
- ENA_ADMIN_OS_WIN = 2,
-
- ENA_ADMIN_OS_DPDK = 3,
-
- ENA_ADMIN_OS_FREEBSD = 4,
-
- ENA_ADMIN_OS_IPXE = 5,
+ ENA_ADMIN_OS_LINUX = 1,
+ ENA_ADMIN_OS_WIN = 2,
+ ENA_ADMIN_OS_DPDK = 3,
+ ENA_ADMIN_OS_FREEBSD = 4,
+ ENA_ADMIN_OS_IPXE = 5,
+ ENA_ADMIN_OS_ESXI = 6,
+ ENA_ADMIN_OS_GROUPS_NUM = 6,
};
struct ena_admin_host_info {
@@ -795,11 +837,35 @@ struct ena_admin_host_info {
/* 7:0 : major
* 15:8 : minor
* 23:16 : sub_minor
+ * 31:24 : module_type
*/
uint32_t driver_version;
/* features bitmap */
- uint32_t supported_network_features[4];
+ uint32_t supported_network_features[2];
+
+ /* ENA spec version of driver */
+ uint16_t ena_spec_version;
+
+ /* ENA device's Bus, Device and Function
+ * 2:0 : function
+ * 7:3 : device
+ * 15:8 : bus
+ */
+ uint16_t bdf;
+
+ /* Number of CPUs */
+ uint16_t num_cpus;
+
+ uint16_t reserved;
+
+ /* 0 : mutable_rss_table_size
+ * 1 : rx_offset
+ * 2 : interrupt_moderation
+ * 3 : map_rx_buf_bidirectional
+ * 31:4 : reserved
+ */
+ uint32_t driver_supported_features;
};
struct ena_admin_rss_ind_table_entry {
@@ -818,7 +884,12 @@ struct ena_admin_feature_rss_ind_table {
/* table size (2^size) */
uint16_t size;
- uint16_t reserved;
+ /* 0 : one_entry_update - The ENA device supports
+ * setting a single RSS table entry
+ */
+ uint8_t flags;
+
+ uint8_t reserved;
/* index of the inline entry. 0xFFFFFFFF means invalid */
uint32_t inline_index;
@@ -864,6 +935,19 @@ struct ena_admin_get_feat_cmd {
uint32_t raw[11];
};
+struct ena_admin_queue_ext_feature_desc {
+ /* version */
+ uint8_t version;
+
+ uint8_t reserved1[3];
+
+ union {
+ struct ena_admin_queue_ext_feature_fields max_queue_ext;
+
+ uint32_t raw[10];
+ } ;
+};
+
struct ena_admin_get_feat_resp {
struct ena_admin_acq_common_desc acq_common_desc;
@@ -876,6 +960,8 @@ struct ena_admin_get_feat_resp {
struct ena_admin_queue_feature_desc max_queue;
+ struct ena_admin_queue_ext_feature_desc max_queue_ext;
+
struct ena_admin_feature_aenq_desc aenq;
struct ena_admin_get_feature_link_desc link;
@@ -891,6 +977,10 @@ struct ena_admin_get_feat_resp {
struct ena_admin_feature_intr_moder_desc intr_moderation;
struct ena_admin_ena_hw_hints hw_hints;
+
+ struct ena_admin_get_extra_properties_strings_desc extra_properties_strings;
+
+ struct ena_admin_get_extra_properties_flags_desc extra_properties_flags;
} u;
};
@@ -921,6 +1011,9 @@ struct ena_admin_set_feat_cmd {
/* rss indirection table */
struct ena_admin_feature_rss_ind_table ind_table;
+
+ /* LLQ configuration */
+ struct ena_admin_feature_llq_desc llq;
} u;
};
@@ -937,7 +1030,9 @@ struct ena_admin_aenq_common_desc {
uint16_t syndrom;
- /* 0 : phase */
+ /* 0 : phase
+ * 7:1 : reserved - MBZ
+ */
uint8_t flags;
uint8_t reserved1[3];
@@ -949,25 +1044,18 @@ struct ena_admin_aenq_common_desc {
/* asynchronous event notification groups */
enum ena_admin_aenq_group {
- ENA_ADMIN_LINK_CHANGE = 0,
-
- ENA_ADMIN_FATAL_ERROR = 1,
-
- ENA_ADMIN_WARNING = 2,
-
- ENA_ADMIN_NOTIFICATION = 3,
-
- ENA_ADMIN_KEEP_ALIVE = 4,
-
- ENA_ADMIN_AENQ_GROUPS_NUM = 5,
+ ENA_ADMIN_LINK_CHANGE = 0,
+ ENA_ADMIN_FATAL_ERROR = 1,
+ ENA_ADMIN_WARNING = 2,
+ ENA_ADMIN_NOTIFICATION = 3,
+ ENA_ADMIN_KEEP_ALIVE = 4,
+ ENA_ADMIN_AENQ_GROUPS_NUM = 5,
};
enum ena_admin_aenq_notification_syndrom {
- ENA_ADMIN_SUSPEND = 0,
-
- ENA_ADMIN_RESUME = 1,
-
- ENA_ADMIN_UPDATE_HINTS = 2,
+ ENA_ADMIN_SUSPEND = 0,
+ ENA_ADMIN_RESUME = 1,
+ ENA_ADMIN_UPDATE_HINTS = 2,
};
struct ena_admin_aenq_entry {
@@ -990,6 +1078,10 @@ struct ena_admin_aenq_keep_alive_desc {
uint32_t rx_drops_low;
uint32_t rx_drops_high;
+
+ uint32_t tx_drops_low;
+
+ uint32_t tx_drops_high;
};
struct ena_admin_ena_mmio_req_read_less_resp {
@@ -1002,27 +1094,27 @@ struct ena_admin_ena_mmio_req_read_less_resp {
};
/* aq_common_desc */
-#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
-#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
-#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
+#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
/* sq */
-#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5
-#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5)
+#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5)
/* acq_common_desc */
-#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
-#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
/* aq_create_sq_cmd */
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5)
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0)
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4
-#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4)
#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0)
/* aq_create_cq_cmd */
@@ -1031,12 +1123,12 @@ struct ena_admin_ena_mmio_req_read_less_resp {
#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
/* get_set_feature_common_desc */
-#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
/* get_feature_link_desc */
-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0)
-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1
-#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1)
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0)
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1)
/* feature_offload_desc */
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0)
@@ -1048,19 +1140,19 @@ struct ena_admin_ena_mmio_req_read_less_resp {
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3)
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4)
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5)
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6)
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7)
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0)
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1)
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2
#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2)
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3
-#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3)
/* feature_rss_flow_hash_function */
#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0)
@@ -1068,28 +1160,45 @@ struct ena_admin_ena_mmio_req_read_less_resp {
/* feature_rss_flow_hash_input */
#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1
-#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1)
#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2
-#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2)
#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1
#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1)
#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2
#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2)
/* host_info */
-#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0)
-#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8
-#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8)
-#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16
-#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16)
+#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0)
+#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8
+#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16)
+#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT 24
+#define ENA_ADMIN_HOST_INFO_MODULE_TYPE_MASK GENMASK(31, 24)
+#define ENA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0)
+#define ENA_ADMIN_HOST_INFO_DEVICE_SHIFT 3
+#define ENA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3)
+#define ENA_ADMIN_HOST_INFO_BUS_SHIFT 8
+#define ENA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_MUTABLE_RSS_TABLE_SIZE_MASK BIT(0)
+#define ENA_ADMIN_HOST_INFO_RX_OFFSET_SHIFT 1
+#define ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK BIT(1)
+#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT 2
+#define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK BIT(2)
+#define ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_SHIFT 3
+#define ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_MASK BIT(3)
+
+/* feature_rss_ind_table */
+#define ENA_ADMIN_FEATURE_RSS_IND_TABLE_ONE_ENTRY_UPDATE_MASK BIT(0)
/* aenq_common_desc */
-#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
/* aenq_link_change_desc */
-#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0)
+#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0)
-#if !defined(ENA_DEFS_LINUX_MAINLINE)
+#if !defined(DEFS_LINUX_MAINLINE)
static inline uint16_t get_ena_admin_aq_common_desc_command_id(const struct ena_admin_aq_common_desc *p)
{
return p->command_id & ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
@@ -1460,6 +1569,96 @@ static inline void set_ena_admin_host_info_sub_minor(struct ena_admin_host_info
p->driver_version |= (val << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) & ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK;
}
+static inline uint32_t get_ena_admin_host_info_module_type(const struct ena_admin_host_info *p)
+{
+ return (p->driver_version & ENA_ADMIN_HOST_INFO_MODULE_TYPE_MASK) >> ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT;
+}
+
+static inline void set_ena_admin_host_info_module_type(struct ena_admin_host_info *p, uint32_t val)
+{
+ p->driver_version |= (val << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT) & ENA_ADMIN_HOST_INFO_MODULE_TYPE_MASK;
+}
+
+static inline uint16_t get_ena_admin_host_info_function(const struct ena_admin_host_info *p)
+{
+ return p->bdf & ENA_ADMIN_HOST_INFO_FUNCTION_MASK;
+}
+
+static inline void set_ena_admin_host_info_function(struct ena_admin_host_info *p, uint16_t val)
+{
+ p->bdf |= val & ENA_ADMIN_HOST_INFO_FUNCTION_MASK;
+}
+
+static inline uint16_t get_ena_admin_host_info_device(const struct ena_admin_host_info *p)
+{
+ return (p->bdf & ENA_ADMIN_HOST_INFO_DEVICE_MASK) >> ENA_ADMIN_HOST_INFO_DEVICE_SHIFT;
+}
+
+static inline void set_ena_admin_host_info_device(struct ena_admin_host_info *p, uint16_t val)
+{
+ p->bdf |= (val << ENA_ADMIN_HOST_INFO_DEVICE_SHIFT) & ENA_ADMIN_HOST_INFO_DEVICE_MASK;
+}
+
+static inline uint16_t get_ena_admin_host_info_bus(const struct ena_admin_host_info *p)
+{
+ return (p->bdf & ENA_ADMIN_HOST_INFO_BUS_MASK) >> ENA_ADMIN_HOST_INFO_BUS_SHIFT;
+}
+
+static inline void set_ena_admin_host_info_bus(struct ena_admin_host_info *p, uint16_t val)
+{
+ p->bdf |= (val << ENA_ADMIN_HOST_INFO_BUS_SHIFT) & ENA_ADMIN_HOST_INFO_BUS_MASK;
+}
+
+static inline uint32_t get_ena_admin_host_info_mutable_rss_table_size(const struct ena_admin_host_info *p)
+{
+ return p->driver_supported_features & ENA_ADMIN_HOST_INFO_MUTABLE_RSS_TABLE_SIZE_MASK;
+}
+
+static inline void set_ena_admin_host_info_mutable_rss_table_size(struct ena_admin_host_info *p, uint32_t val)
+{
+ p->driver_supported_features |= val & ENA_ADMIN_HOST_INFO_MUTABLE_RSS_TABLE_SIZE_MASK;
+}
+
+static inline uint32_t get_ena_admin_host_info_rx_offset(const struct ena_admin_host_info *p)
+{
+ return (p->driver_supported_features & ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK) >> ENA_ADMIN_HOST_INFO_RX_OFFSET_SHIFT;
+}
+
+static inline void set_ena_admin_host_info_rx_offset(struct ena_admin_host_info *p, uint32_t val)
+{
+ p->driver_supported_features |= (val << ENA_ADMIN_HOST_INFO_RX_OFFSET_SHIFT) & ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK;
+}
+
+static inline uint32_t get_ena_admin_host_info_interrupt_moderation(const struct ena_admin_host_info *p)
+{
+ return (p->driver_supported_features & ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK) >> ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT;
+}
+
+static inline void set_ena_admin_host_info_interrupt_moderation(struct ena_admin_host_info *p, uint32_t val)
+{
+ p->driver_supported_features |= (val << ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT) & ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK;
+}
+
+static inline uint32_t get_ena_admin_host_info_map_rx_buf_bidirectional(const struct ena_admin_host_info *p)
+{
+ return (p->driver_supported_features & ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_MASK) >> ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_SHIFT;
+}
+
+static inline void set_ena_admin_host_info_map_rx_buf_bidirectional(struct ena_admin_host_info *p, uint32_t val)
+{
+ p->driver_supported_features |= (val << ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_SHIFT) & ENA_ADMIN_HOST_INFO_MAP_RX_BUF_BIDIRECTIONAL_MASK;
+}
+
+static inline uint8_t get_ena_admin_feature_rss_ind_table_one_entry_update(const struct ena_admin_feature_rss_ind_table *p)
+{
+ return p->flags & ENA_ADMIN_FEATURE_RSS_IND_TABLE_ONE_ENTRY_UPDATE_MASK;
+}
+
+static inline void set_ena_admin_feature_rss_ind_table_one_entry_update(struct ena_admin_feature_rss_ind_table *p, uint8_t val)
+{
+ p->flags |= val & ENA_ADMIN_FEATURE_RSS_IND_TABLE_ONE_ENTRY_UPDATE_MASK;
+}
+
static inline uint8_t get_ena_admin_aenq_common_desc_phase(const struct ena_admin_aenq_common_desc *p)
{
return p->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK;
@@ -1480,5 +1679,5 @@ static inline void set_ena_admin_aenq_link_change_desc_link_status(struct ena_ad
p->flags |= val & ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
}
-#endif /* !defined(ENA_DEFS_LINUX_MAINLINE) */
-#endif /*_ENA_ADMIN_H_ */
+#endif /* !defined(DEFS_LINUX_MAINLINE) */
+#endif /* _ENA_ADMIN_H_ */
diff --git a/sys/contrib/ena-com/ena_defs/ena_common_defs.h b/sys/contrib/ena-com/ena_defs/ena_common_defs.h
index 66b381ba4077..88b90d44a79a 100644
--- a/sys/contrib/ena-com/ena_defs/ena_common_defs.h
+++ b/sys/contrib/ena-com/ena_defs/ena_common_defs.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,8 +33,8 @@
#ifndef _ENA_COMMON_H_
#define _ENA_COMMON_H_
-#define ENA_COMMON_SPEC_VERSION_MAJOR 0 /* */
-#define ENA_COMMON_SPEC_VERSION_MINOR 10 /* */
+#define ENA_COMMON_SPEC_VERSION_MAJOR 2
+#define ENA_COMMON_SPEC_VERSION_MINOR 0
/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */
struct ena_common_mem_addr {
@@ -46,4 +46,4 @@ struct ena_common_mem_addr {
uint16_t reserved16;
};
-#endif /*_ENA_COMMON_H_ */
+#endif /* _ENA_COMMON_H_ */
diff --git a/sys/contrib/ena-com/ena_defs/ena_eth_io_defs.h b/sys/contrib/ena-com/ena_defs/ena_eth_io_defs.h
index f2cc0f0dea45..14f44d0d9a86 100644
--- a/sys/contrib/ena-com/ena_defs/ena_eth_io_defs.h
+++ b/sys/contrib/ena-com/ena_defs/ena_eth_io_defs.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,25 +34,18 @@
#define _ENA_ETH_IO_H_
enum ena_eth_io_l3_proto_index {
- ENA_ETH_IO_L3_PROTO_UNKNOWN = 0,
-
- ENA_ETH_IO_L3_PROTO_IPV4 = 8,
-
- ENA_ETH_IO_L3_PROTO_IPV6 = 11,
-
- ENA_ETH_IO_L3_PROTO_FCOE = 21,
-
- ENA_ETH_IO_L3_PROTO_ROCE = 22,
+ ENA_ETH_IO_L3_PROTO_UNKNOWN = 0,
+ ENA_ETH_IO_L3_PROTO_IPV4 = 8,
+ ENA_ETH_IO_L3_PROTO_IPV6 = 11,
+ ENA_ETH_IO_L3_PROTO_FCOE = 21,
+ ENA_ETH_IO_L3_PROTO_ROCE = 22,
};
enum ena_eth_io_l4_proto_index {
- ENA_ETH_IO_L4_PROTO_UNKNOWN = 0,
-
- ENA_ETH_IO_L4_PROTO_TCP = 12,
-
- ENA_ETH_IO_L4_PROTO_UDP = 13,
-
- ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23,
+ ENA_ETH_IO_L4_PROTO_UNKNOWN = 0,
+ ENA_ETH_IO_L4_PROTO_TCP = 12,
+ ENA_ETH_IO_L4_PROTO_UDP = 13,
+ ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23,
};
struct ena_eth_io_tx_desc {
@@ -243,9 +236,13 @@ struct ena_eth_io_rx_cdesc_base {
* checksum error detected, or, the controller didn't
* validate the checksum. This bit is valid only when
* l4_proto_idx indicates TCP/UDP packet, and,
- * ipv4_frag is not set
+ * ipv4_frag is not set. This bit is valid only when
+ * l4_csum_checked below is set.
* 15 : ipv4_frag - Indicates IPv4 fragmented packet
- * 23:16 : reserved16
+ * 16 : l4_csum_checked - L4 checksum was verified
+ * (could be OK or error), when cleared the status of
+ * checksum is unknown
+ * 23:17 : reserved17 - MBZ
* 24 : phase
* 25 : l3_csum2 - second checksum engine result
* 26 : first - Indicates first descriptor in
@@ -268,7 +265,9 @@ struct ena_eth_io_rx_cdesc_base {
uint16_t sub_qid;
- uint16_t reserved;
+ uint8_t offset;
+
+ uint8_t reserved;
};
/* 8-word format */
@@ -304,117 +303,119 @@ struct ena_eth_io_numa_node_cfg_reg {
};
/* tx_desc */
-#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0)
-#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16
-#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16)
-#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23
-#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23)
-#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24
-#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24)
-#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26
-#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26)
-#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27
-#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27)
-#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28
-#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28)
-#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0)
-#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4
-#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4)
-#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7
-#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7)
-#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8
-#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8)
-#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13
-#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13)
-#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14
-#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14)
-#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15
-#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15)
-#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17
-#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17)
-#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22
-#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22)
-#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0)
-#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24
-#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24)
+#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0)
+#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4
+#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4)
+#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7
+#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7)
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14)
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22)
+#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24)
/* tx_meta_desc */
-#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0)
-#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14
-#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14)
-#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16
-#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16)
-#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20
-#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20)
-#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21
-#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21)
-#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23
-#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23)
-#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24
-#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24)
-#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26
-#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26)
-#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27
-#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27)
-#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28
-#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28)
-#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0)
-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0)
-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8
-#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8)
-#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16
-#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16)
-#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22
-#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22)
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0)
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14)
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16)
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20)
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21)
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8)
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22)
/* tx_cdesc */
-#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0)
+#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0)
/* rx_desc */
-#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0)
-#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2
-#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2)
-#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3
-#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3)
-#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4
-#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4)
+#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0)
+#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2
+#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2)
+#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3
+#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3)
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4)
/* rx_cdesc_base */
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0)
-#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5
-#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5)
-#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8
-#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8)
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13)
-#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14
-#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14)
-#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15
-#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15)
-#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24
-#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24)
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25
-#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25)
-#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26
-#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26)
-#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27
-#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27)
-#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30
-#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0)
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14)
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT 16
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK BIT(16)
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25)
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27)
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30)
/* intr_reg */
-#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0)
-#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15
-#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15)
-#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30
-#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30)
+#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0)
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15)
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30)
/* numa_node_cfg_reg */
-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0)
-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31
-#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31)
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31)
-#if !defined(ENA_DEFS_LINUX_MAINLINE)
+#if !defined(DEFS_LINUX_MAINLINE)
static inline uint32_t get_ena_eth_io_tx_desc_length(const struct ena_eth_io_tx_desc *p)
{
return p->len_ctrl & ENA_ETH_IO_TX_DESC_LENGTH_MASK;
@@ -855,6 +856,16 @@ static inline void set_ena_eth_io_rx_cdesc_base_ipv4_frag(struct ena_eth_io_rx_c
p->status |= (val << ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT) & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK;
}
+static inline uint32_t get_ena_eth_io_rx_cdesc_base_l4_csum_checked(const struct ena_eth_io_rx_cdesc_base *p)
+{
+ return (p->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK) >> ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT;
+}
+
+static inline void set_ena_eth_io_rx_cdesc_base_l4_csum_checked(struct ena_eth_io_rx_cdesc_base *p, uint32_t val)
+{
+ p->status |= (val << ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT) & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK;
+}
+
static inline uint32_t get_ena_eth_io_rx_cdesc_base_phase(const struct ena_eth_io_rx_cdesc_base *p)
{
return (p->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
@@ -955,5 +966,5 @@ static inline void set_ena_eth_io_numa_node_cfg_reg_enabled(struct ena_eth_io_nu
p->numa_cfg |= (val << ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT) & ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK;
}
-#endif /* !defined(ENA_DEFS_LINUX_MAINLINE) */
-#endif /*_ENA_ETH_IO_H_ */
+#endif /* !defined(DEFS_LINUX_MAINLINE) */
+#endif /* _ENA_ETH_IO_H_ */
diff --git a/sys/contrib/ena-com/ena_defs/ena_gen_info.h b/sys/contrib/ena-com/ena_defs/ena_gen_info.h
index 0ff47871ea22..83ed024ae4cc 100644
--- a/sys/contrib/ena-com/ena_defs/ena_gen_info.h
+++ b/sys/contrib/ena-com/ena_defs/ena_gen_info.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,5 +30,5 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#define ENA_GEN_DATE "Sun Nov 20 11:22:05 IST 2016"
-#define ENA_GEN_COMMIT "44da4e8"
+#define ENA_GEN_DATE "Mon Apr 20 15:41:59 DST 2020"
+#define ENA_GEN_COMMIT "daa45ac"
diff --git a/sys/contrib/ena-com/ena_defs/ena_regs_defs.h b/sys/contrib/ena-com/ena_defs/ena_regs_defs.h
index 5a540d8f47f0..53ac662b6189 100644
--- a/sys/contrib/ena-com/ena_defs/ena_regs_defs.h
+++ b/sys/contrib/ena-com/ena_defs/ena_regs_defs.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,135 +34,126 @@
#define _ENA_REGS_H_
enum ena_regs_reset_reason_types {
- ENA_REGS_RESET_NORMAL = 0,
-
- ENA_REGS_RESET_KEEP_ALIVE_TO = 1,
-
- ENA_REGS_RESET_ADMIN_TO = 2,
-
- ENA_REGS_RESET_MISS_TX_CMPL = 3,
-
- ENA_REGS_RESET_INV_RX_REQ_ID = 4,
-
- ENA_REGS_RESET_INV_TX_REQ_ID = 5,
-
- ENA_REGS_RESET_TOO_MANY_RX_DESCS = 6,
-
- ENA_REGS_RESET_INIT_ERR = 7,
-
- ENA_REGS_RESET_DRIVER_INVALID_STATE = 8,
-
- ENA_REGS_RESET_OS_TRIGGER = 9,
-
- ENA_REGS_RESET_OS_NETDEV_WD = 10,
-
- ENA_REGS_RESET_SHUTDOWN = 11,
-
- ENA_REGS_RESET_USER_TRIGGER = 12,
-
- ENA_REGS_RESET_GENERIC = 13,
+ ENA_REGS_RESET_NORMAL = 0,
+ ENA_REGS_RESET_KEEP_ALIVE_TO = 1,
+ ENA_REGS_RESET_ADMIN_TO = 2,
+ ENA_REGS_RESET_MISS_TX_CMPL = 3,
+ ENA_REGS_RESET_INV_RX_REQ_ID = 4,
+ ENA_REGS_RESET_INV_TX_REQ_ID = 5,
+ ENA_REGS_RESET_TOO_MANY_RX_DESCS = 6,
+ ENA_REGS_RESET_INIT_ERR = 7,
+ ENA_REGS_RESET_DRIVER_INVALID_STATE = 8,
+ ENA_REGS_RESET_OS_TRIGGER = 9,
+ ENA_REGS_RESET_OS_NETDEV_WD = 10,
+ ENA_REGS_RESET_SHUTDOWN = 11,
+ ENA_REGS_RESET_USER_TRIGGER = 12,
+ ENA_REGS_RESET_GENERIC = 13,
+ ENA_REGS_RESET_MISS_INTERRUPT = 14,
+ ENA_REGS_RESET_LAST,
};
/* ena_registers offsets */
-#define ENA_REGS_VERSION_OFF 0x0
-#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4
-#define ENA_REGS_CAPS_OFF 0x8
-#define ENA_REGS_CAPS_EXT_OFF 0xc
-#define ENA_REGS_AQ_BASE_LO_OFF 0x10
-#define ENA_REGS_AQ_BASE_HI_OFF 0x14
-#define ENA_REGS_AQ_CAPS_OFF 0x18
-#define ENA_REGS_ACQ_BASE_LO_OFF 0x20
-#define ENA_REGS_ACQ_BASE_HI_OFF 0x24
-#define ENA_REGS_ACQ_CAPS_OFF 0x28
-#define ENA_REGS_AQ_DB_OFF 0x2c
-#define ENA_REGS_ACQ_TAIL_OFF 0x30
-#define ENA_REGS_AENQ_CAPS_OFF 0x34
-#define ENA_REGS_AENQ_BASE_LO_OFF 0x38
-#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c
-#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40
-#define ENA_REGS_AENQ_TAIL_OFF 0x44
-#define ENA_REGS_INTR_MASK_OFF 0x4c
-#define ENA_REGS_DEV_CTL_OFF 0x54
-#define ENA_REGS_DEV_STS_OFF 0x58
-#define ENA_REGS_MMIO_REG_READ_OFF 0x5c
-#define ENA_REGS_MMIO_RESP_LO_OFF 0x60
-#define ENA_REGS_MMIO_RESP_HI_OFF 0x64
-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68
+
+/* 0 base */
+#define ENA_REGS_VERSION_OFF 0x0
+#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4
+#define ENA_REGS_CAPS_OFF 0x8
+#define ENA_REGS_CAPS_EXT_OFF 0xc
+#define ENA_REGS_AQ_BASE_LO_OFF 0x10
+#define ENA_REGS_AQ_BASE_HI_OFF 0x14
+#define ENA_REGS_AQ_CAPS_OFF 0x18
+#define ENA_REGS_ACQ_BASE_LO_OFF 0x20
+#define ENA_REGS_ACQ_BASE_HI_OFF 0x24
+#define ENA_REGS_ACQ_CAPS_OFF 0x28
+#define ENA_REGS_AQ_DB_OFF 0x2c
+#define ENA_REGS_ACQ_TAIL_OFF 0x30
+#define ENA_REGS_AENQ_CAPS_OFF 0x34
+#define ENA_REGS_AENQ_BASE_LO_OFF 0x38
+#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c
+#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40
+#define ENA_REGS_AENQ_TAIL_OFF 0x44
+#define ENA_REGS_INTR_MASK_OFF 0x4c
+#define ENA_REGS_DEV_CTL_OFF 0x54
+#define ENA_REGS_DEV_STS_OFF 0x58
+#define ENA_REGS_MMIO_REG_READ_OFF 0x5c
+#define ENA_REGS_MMIO_RESP_LO_OFF 0x60
+#define ENA_REGS_MMIO_RESP_HI_OFF 0x64
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68
/* version register */
-#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff
-#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8
-#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00
+#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff
+#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8
+#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00
/* controller_version register */
-#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff
-#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8
-#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00
-#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16
-#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000
-#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24
-#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000
+#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000
/* caps register */
-#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1
-#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1
-#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e
-#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8
-#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00
-#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16
-#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000
+#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00
+#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16
+#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000
/* aq_caps register */
-#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff
-#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16
-#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000
+#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000
/* acq_caps register */
-#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff
-#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16
-#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000
+#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000
/* aenq_caps register */
-#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff
-#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16
-#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000
+#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000
/* dev_ctl register */
-#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1
-#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1
-#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2
-#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2
-#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4
-#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3
-#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8
-#define ENA_REGS_DEV_CTL_RESET_REASON_SHIFT 28
-#define ENA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000
+#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2
+#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2
+#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4
+#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3
+#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8
+#define ENA_REGS_DEV_CTL_RESET_REASON_SHIFT 28
+#define ENA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000
/* dev_sts register */
-#define ENA_REGS_DEV_STS_READY_MASK 0x1
-#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1
-#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2
-#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2
-#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4
-#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3
-#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8
-#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4
-#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10
-#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5
-#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20
-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6
-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40
-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7
-#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80
+#define ENA_REGS_DEV_STS_READY_MASK 0x1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8
+#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4
+#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10
+#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5
+#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80
/* mmio_reg_read register */
-#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff
-#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16
-#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000
+#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000
/* rss_ind_entry_update register */
-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff
-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16
-#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000
-#endif /*_ENA_REGS_H_ */
+#endif /* _ENA_REGS_H_ */
diff --git a/sys/contrib/ena-com/ena_eth_com.c b/sys/contrib/ena-com/ena_eth_com.c
index ce42fd1fecd3..58ddb82246fd 100644
--- a/sys/contrib/ena-com/ena_eth_com.c
+++ b/sys/contrib/ena-com/ena_eth_com.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#include "ena_eth_com.h"
-static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
+static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
struct ena_com_io_cq *io_cq)
{
struct ena_eth_io_rx_cdesc_base *cdesc;
@@ -46,25 +46,21 @@ static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
+ (head_masked * io_cq->cdesc_entry_size_in_bytes));
- desc_phase = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+ desc_phase = (READ_ONCE32(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
if (desc_phase != expected_phase)
return NULL;
- return cdesc;
-}
-
-static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
-{
- io_cq->head++;
+ /* Make sure we read the rest of the descriptor after the phase bit
+ * has been read
+ */
+ dma_rmb();
- /* Switch phase bit in case of wrap around */
- if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
- io_cq->phase ^= 1;
+ return cdesc;
}
-static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
+static void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
{
u16 tail_masked;
u32 offset;
@@ -76,8 +72,8 @@ static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq)
return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset);
}
-static inline void ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
- u8 *bounce_buffer)
+static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq,
+ u8 *bounce_buffer)
{
struct ena_com_llq_info *llq_info = &io_sq->llq_info;
@@ -87,6 +83,17 @@ static inline void ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_s
dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1);
dst_offset = dst_tail_mask * llq_info->desc_list_entry_size;
+ if (is_llq_max_tx_burst_exists(io_sq)) {
+ if (unlikely(!io_sq->entries_in_tx_burst_left)) {
+ ena_trc_err("Error: trying to send more packets than tx burst allows\n");
+ return ENA_COM_NO_SPACE;
+ }
+
+ io_sq->entries_in_tx_burst_left--;
+ ena_trc_dbg("decreasing entries_in_tx_burst_left of queue %d to %d\n",
+ io_sq->qid, io_sq->entries_in_tx_burst_left);
+ }
+
/* Make sure everything was written into the bounce buffer before
* writing the bounce buffer to the device
*/
@@ -102,9 +109,11 @@ static inline void ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_s
/* Switch phase bit in case of wrap around */
if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
io_sq->phase ^= 1;
+
+ return ENA_COM_OK;
}
-static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
+static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
u8 *header_src,
u16 header_len)
{
@@ -113,7 +122,7 @@ static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
u8 *bounce_buffer = pkt_ctrl->curr_bounce_buf;
u16 header_offset;
- if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST))
return 0;
header_offset =
@@ -134,7 +143,7 @@ static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq,
return 0;
}
-static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
+static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
{
struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
u8 *bounce_buffer;
@@ -154,29 +163,36 @@ static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq)
return sq_desc;
}
-static inline void ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
+static int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq)
{
struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+ int rc;
- if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
- return;
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST))
+ return ENA_COM_OK;
/* bounce buffer was used, so write it and get a new one */
if (pkt_ctrl->idx) {
- ena_com_write_bounce_buffer_to_dev(io_sq,
- pkt_ctrl->curr_bounce_buf);
+ rc = ena_com_write_bounce_buffer_to_dev(io_sq,
+ pkt_ctrl->curr_bounce_buf);
+ if (unlikely(rc)) {
+ ena_trc_err("failed to write bounce buffer to device\n");
+ return rc;
+ }
+
pkt_ctrl->curr_bounce_buf =
ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
- memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
- 0x0, llq_info->desc_list_entry_size);
+ memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+ 0x0, llq_info->desc_list_entry_size);
}
pkt_ctrl->idx = 0;
pkt_ctrl->descs_left_in_line = llq_info->descs_num_before_header;
+ return ENA_COM_OK;
}
-static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
+static void *get_sq_desc(struct ena_com_io_sq *io_sq)
{
if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
return get_sq_desc_llq(io_sq);
@@ -184,45 +200,51 @@ static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
return get_sq_desc_regular_queue(io_sq);
}
-static inline void ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
+static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq)
{
struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl;
struct ena_com_llq_info *llq_info = &io_sq->llq_info;
+ int rc;
if (!pkt_ctrl->descs_left_in_line) {
- ena_com_write_bounce_buffer_to_dev(io_sq,
- pkt_ctrl->curr_bounce_buf);
+ rc = ena_com_write_bounce_buffer_to_dev(io_sq,
+ pkt_ctrl->curr_bounce_buf);
+ if (unlikely(rc)) {
+ ena_trc_err("failed to write bounce buffer to device\n");
+ return rc;
+ }
pkt_ctrl->curr_bounce_buf =
ena_com_get_next_bounce_buffer(&io_sq->bounce_buf_ctrl);
- memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
- 0x0, llq_info->desc_list_entry_size);
+ memset(io_sq->llq_buf_ctrl.curr_bounce_buf,
+ 0x0, llq_info->desc_list_entry_size);
pkt_ctrl->idx = 0;
- if (llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY)
+ if (unlikely(llq_info->desc_stride_ctrl == ENA_ADMIN_SINGLE_DESC_PER_ENTRY))
pkt_ctrl->descs_left_in_line = 1;
else
pkt_ctrl->descs_left_in_line =
llq_info->desc_list_entry_size / io_sq->desc_entry_size;
}
+
+ return ENA_COM_OK;
}
-static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
+static int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
{
-
- if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- ena_com_sq_update_llq_tail(io_sq);
- return;
- }
+ if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ return ena_com_sq_update_llq_tail(io_sq);
io_sq->tail++;
/* Switch phase bit in case of wrap around */
if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
io_sq->phase ^= 1;
+
+ return ENA_COM_OK;
}
-static inline struct ena_eth_io_rx_cdesc_base *
+static struct ena_eth_io_rx_cdesc_base *
ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx)
{
idx &= (io_cq->q_depth - 1);
@@ -231,7 +253,7 @@ static inline struct ena_eth_io_rx_cdesc_base *
idx * io_cq->cdesc_entry_size_in_bytes);
}
-static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
+static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
u16 *first_cdesc_idx)
{
struct ena_eth_io_rx_cdesc_base *cdesc;
@@ -245,7 +267,7 @@ static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
ena_com_cq_inc_head(io_cq);
count++;
- last = (READ_ONCE(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+ last = (READ_ONCE32(cdesc->status) & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
} while (!last);
@@ -268,28 +290,10 @@ static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
return count;
}
-static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
- struct ena_com_tx_ctx *ena_tx_ctx)
-{
- int rc;
-
- if (ena_tx_ctx->meta_valid) {
- rc = memcmp(&io_sq->cached_tx_meta,
- &ena_tx_ctx->ena_meta,
- sizeof(struct ena_com_tx_meta));
-
- if (unlikely(rc != 0))
- return true;
- }
-
- return false;
-}
-
-static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
- struct ena_com_tx_ctx *ena_tx_ctx)
+static int ena_com_create_meta(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_meta *ena_meta)
{
struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
- struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
meta_desc = get_sq_desc(io_sq);
memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc));
@@ -309,12 +313,13 @@ static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *i
/* Extended meta desc */
meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK;
- meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
meta_desc->len_ctrl |= (io_sq->phase <<
ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) &
ENA_ETH_IO_TX_META_DESC_PHASE_MASK;
meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK;
+ meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+
meta_desc->word2 |= ena_meta->l3_hdr_len &
ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK;
meta_desc->word2 |= (ena_meta->l3_hdr_offset <<
@@ -325,16 +330,37 @@ static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *i
ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) &
ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK;
- meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+ return ena_com_sq_update_tail(io_sq);
+}
- /* Cached the meta desc */
- memcpy(&io_sq->cached_tx_meta, ena_meta,
- sizeof(struct ena_com_tx_meta));
+static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ bool *have_meta)
+{
+ struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
- ena_com_sq_update_tail(io_sq);
+ /* When disable meta caching is set, don't bother to save the meta and
+ * compare it to the stored version, just create the meta
+ */
+ if (io_sq->disable_meta_caching) {
+ if (unlikely(!ena_tx_ctx->meta_valid))
+ return ENA_COM_INVAL;
+
+ *have_meta = true;
+ return ena_com_create_meta(io_sq, ena_meta);
+ } else if (ena_com_meta_desc_changed(io_sq, ena_tx_ctx)) {
+ *have_meta = true;
+ /* Cache the meta desc */
+ memcpy(&io_sq->cached_tx_meta, ena_meta,
+ sizeof(struct ena_com_tx_meta));
+ return ena_com_create_meta(io_sq, ena_meta);
+ } else {
+ *have_meta = false;
+ return ENA_COM_OK;
+ }
}
-static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
+static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
struct ena_eth_io_rx_cdesc_base *cdesc)
{
ena_rx_ctx->l3_proto = cdesc->status &
@@ -343,11 +369,14 @@ static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >>
ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT;
ena_rx_ctx->l3_csum_err =
- (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >>
- ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT;
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT);
ena_rx_ctx->l4_csum_err =
- (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
- ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT;
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT);
+ ena_rx_ctx->l4_csum_checked =
+ !!((cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_MASK) >>
+ ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_CHECKED_SHIFT);
ena_rx_ctx->hash = cdesc->hash;
ena_rx_ctx->frag =
(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >>
@@ -385,7 +414,7 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
"wrong Q type");
/* num_bufs +1 for potential meta desc */
- if (!ena_com_sq_have_enough_space(io_sq, num_bufs + 1)) {
+ if (unlikely(!ena_com_sq_have_enough_space(io_sq, num_bufs + 1))) {
ena_trc_dbg("Not enough space in the tx queue\n");
return ENA_COM_NO_MEM;
}
@@ -396,23 +425,29 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
return ENA_COM_INVAL;
}
- if (unlikely((io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) && !buffer_to_push))
+ if (unlikely(io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV
+ && !buffer_to_push)) {
+ ena_trc_err("push header wasn't provided on LLQ mode\n");
return ENA_COM_INVAL;
+ }
rc = ena_com_write_header_to_bounce(io_sq, buffer_to_push, header_len);
if (unlikely(rc))
return rc;
- have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq,
- ena_tx_ctx);
- if (have_meta)
- ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
+ rc = ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx, &have_meta);
+ if (unlikely(rc)) {
+ ena_trc_err("failed to create and store tx meta desc\n");
+ return rc;
+ }
- /* If the caller doesn't want send packets */
+ /* If the caller doesn't want to send packets */
if (unlikely(!num_bufs && !header_len)) {
- ena_com_close_bounce_buffer(io_sq);
+ rc = ena_com_close_bounce_buffer(io_sq);
+ if (rc)
+ ena_trc_err("failed to write buffers to LLQ\n");
*nb_hw_desc = io_sq->tail - start_tail;
- return 0;
+ return rc;
}
desc = get_sq_desc(io_sq);
@@ -469,7 +504,11 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
for (i = 0; i < num_bufs; i++) {
/* The first desc share the same desc as the header */
if (likely(i != 0)) {
- ena_com_sq_update_tail(io_sq);
+ rc = ena_com_sq_update_tail(io_sq);
+ if (unlikely(rc)) {
+ ena_trc_err("failed to update sq tail\n");
+ return rc;
+ }
desc = get_sq_desc(io_sq);
if (unlikely(!desc))
@@ -497,12 +536,18 @@ int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
/* set the last desc indicator */
desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK;
- ena_com_sq_update_tail(io_sq);
+ rc = ena_com_sq_update_tail(io_sq);
+ if (unlikely(rc)) {
+ ena_trc_err("failed to update sq tail of the last descriptor\n");
+ return rc;
+ }
- ena_com_close_bounce_buffer(io_sq);
+ rc = ena_com_close_bounce_buffer(io_sq);
+ if (rc)
+ ena_trc_err("failed when closing bounce buffer\n");
*nb_hw_desc = io_sq->tail - start_tail;
- return 0;
+ return rc;
}
int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
@@ -513,7 +558,7 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
struct ena_eth_io_rx_cdesc_base *cdesc = NULL;
u16 cdesc_idx = 0;
u16 nb_hw_desc;
- u16 i;
+ u16 i = 0;
ENA_WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX,
"wrong Q type");
@@ -533,13 +578,14 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
return ENA_COM_NO_SPACE;
}
- for (i = 0; i < nb_hw_desc; i++) {
- cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i);
+ cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx);
+ ena_rx_ctx->pkt_offset = cdesc->offset;
+ do {
ena_buf->len = cdesc->length;
ena_buf->req_id = cdesc->req_id;
ena_buf++;
- }
+ } while ((++i < nb_hw_desc) && (cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i)));
/* Update SQ head ptr */
io_sq->next_to_comp += nb_hw_desc;
@@ -574,10 +620,10 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
desc->length = ena_buf->len;
- desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK;
- desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK;
- desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK;
- desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
+ desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK |
+ ENA_ETH_IO_RX_DESC_LAST_MASK |
+ (io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK) |
+ ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
desc->req_id = req_id;
@@ -585,40 +631,16 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
desc->buff_addr_hi =
((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
- ena_com_sq_update_tail(io_sq);
-
- return 0;
+ return ena_com_sq_update_tail(io_sq);
}
-int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
+bool ena_com_cq_empty(struct ena_com_io_cq *io_cq)
{
- u8 expected_phase, cdesc_phase;
- struct ena_eth_io_tx_cdesc *cdesc;
- u16 masked_head;
-
- masked_head = io_cq->head & (io_cq->q_depth - 1);
- expected_phase = io_cq->phase;
-
- cdesc = (struct ena_eth_io_tx_cdesc *)
- ((uintptr_t)io_cq->cdesc_addr.virt_addr +
- (masked_head * io_cq->cdesc_entry_size_in_bytes));
-
- /* When the current completion descriptor phase isn't the same as the
- * expected, it mean that the device still didn't update
- * this completion.
- */
- cdesc_phase = READ_ONCE(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
- if (cdesc_phase != expected_phase)
- return ENA_COM_TRY_AGAIN;
-
- if (unlikely(cdesc->req_id >= io_cq->q_depth)) {
- ena_trc_err("Invalid req id %d\n", cdesc->req_id);
- return ENA_COM_INVAL;
- }
-
- ena_com_cq_inc_head(io_cq);
-
- *req_id = READ_ONCE(cdesc->req_id);
+ struct ena_eth_io_rx_cdesc_base *cdesc;
- return 0;
+ cdesc = ena_com_get_next_rx_cdesc(io_cq);
+ if (cdesc)
+ return false;
+ else
+ return true;
}
diff --git a/sys/contrib/ena-com/ena_eth_com.h b/sys/contrib/ena-com/ena_eth_com.h
index d0c8b9080589..4b91221ea093 100644
--- a/sys/contrib/ena-com/ena_eth_com.h
+++ b/sys/contrib/ena-com/ena_eth_com.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -71,11 +71,13 @@ struct ena_com_rx_ctx {
enum ena_eth_io_l4_proto_index l4_proto;
bool l3_csum_err;
bool l4_csum_err;
+ u8 l4_csum_checked;
/* fragmented packet */
bool frag;
u32 hash;
u16 descs;
int max_bufs;
+ u8 pkt_offset;
};
int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
@@ -90,7 +92,7 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
struct ena_com_buf *ena_buf,
u16 req_id);
-int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id);
+bool ena_com_cq_empty(struct ena_com_io_cq *io_cq);
static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
struct ena_eth_io_intr_reg *intr_reg)
@@ -98,7 +100,7 @@ static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
ENA_REG_WRITE32(io_cq->bus, intr_reg->intr_control, io_cq->unmask_reg);
}
-static inline int ena_com_free_desc(struct ena_com_io_sq *io_sq)
+static inline int ena_com_free_q_entries(struct ena_com_io_sq *io_sq)
{
u16 tail, next_to_comp, cnt;
@@ -116,7 +118,7 @@ static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq,
int temp;
if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
- return ena_com_free_desc(io_sq) >= required_buffers;
+ return ena_com_free_q_entries(io_sq) >= required_buffers;
/* This calculation doesn't need to be 100% accurate. So to reduce
* the calculation overhead just Subtract 2 lines from the free descs
@@ -125,20 +127,72 @@ static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq,
*/
temp = required_buffers / io_sq->llq_info.descs_per_entry + 2;
- return ena_com_free_desc(io_sq) > temp;
+ return ena_com_free_q_entries(io_sq) > temp;
}
-static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx)
{
- u16 tail;
+ if (!ena_tx_ctx->meta_valid)
+ return false;
- tail = io_sq->tail;
+ return !!memcmp(&io_sq->cached_tx_meta,
+ &ena_tx_ctx->ena_meta,
+ sizeof(struct ena_com_tx_meta));
+}
+
+static inline bool is_llq_max_tx_burst_exists(struct ena_com_io_sq *io_sq)
+{
+ return (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) &&
+ io_sq->llq_info.max_entries_in_tx_burst > 0;
+}
+
+static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq,
+ struct ena_com_tx_ctx *ena_tx_ctx)
+{
+ struct ena_com_llq_info *llq_info;
+ int descs_after_first_entry;
+ int num_entries_needed = 1;
+ u16 num_descs;
+
+ if (!is_llq_max_tx_burst_exists(io_sq))
+ return false;
+
+ llq_info = &io_sq->llq_info;
+ num_descs = ena_tx_ctx->num_bufs;
+
+ if (llq_info->disable_meta_caching ||
+ unlikely(ena_com_meta_desc_changed(io_sq, ena_tx_ctx)))
+ ++num_descs;
+
+ if (num_descs > llq_info->descs_num_before_header) {
+ descs_after_first_entry = num_descs - llq_info->descs_num_before_header;
+ num_entries_needed += DIV_ROUND_UP(descs_after_first_entry,
+ llq_info->descs_per_entry);
+ }
+
+ ena_trc_dbg("queue: %d num_descs: %d num_entries_needed: %d\n",
+ io_sq->qid, num_descs, num_entries_needed);
+
+ return num_entries_needed > io_sq->entries_in_tx_burst_left;
+}
+
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+{
+ u16 max_entries_in_tx_burst = io_sq->llq_info.max_entries_in_tx_burst;
+ u16 tail = io_sq->tail;
ena_trc_dbg("write submission queue doorbell for queue: %d tail: %d\n",
io_sq->qid, tail);
ENA_REG_WRITE32(io_sq->bus, tail, io_sq->db_addr);
+ if (is_llq_max_tx_burst_exists(io_sq)) {
+ ena_trc_dbg("reset available entries in tx burst for queue %d to %d\n",
+ io_sq->qid, max_entries_in_tx_burst);
+ io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst;
+ }
+
return 0;
}
@@ -147,15 +201,17 @@ static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq)
u16 unreported_comp, head;
bool need_update;
- head = io_cq->head;
- unreported_comp = head - io_cq->last_head_update;
- need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
-
- if (io_cq->cq_head_db_reg && need_update) {
- ena_trc_dbg("Write completion queue doorbell for queue %d: head: %d\n",
- io_cq->qid, head);
- ENA_REG_WRITE32(io_cq->bus, head, io_cq->cq_head_db_reg);
- io_cq->last_head_update = head;
+ if (unlikely(io_cq->cq_head_db_reg)) {
+ head = io_cq->head;
+ unreported_comp = head - io_cq->last_head_update;
+ need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
+
+ if (unlikely(need_update)) {
+ ena_trc_dbg("Write completion queue doorbell for queue %d: head: %d\n",
+ io_cq->qid, head);
+ ENA_REG_WRITE32(io_cq->bus, head, io_cq->cq_head_db_reg);
+ io_cq->last_head_update = head;
+ }
}
return 0;
@@ -180,6 +236,50 @@ static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem)
io_sq->next_to_comp += elem;
}
+static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
+{
+ io_cq->head++;
+
+ /* Switch phase bit in case of wrap around */
+ if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
+ io_cq->phase ^= 1;
+}
+
+static inline int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq,
+ u16 *req_id)
+{
+ u8 expected_phase, cdesc_phase;
+ struct ena_eth_io_tx_cdesc *cdesc;
+ u16 masked_head;
+
+ masked_head = io_cq->head & (io_cq->q_depth - 1);
+ expected_phase = io_cq->phase;
+
+ cdesc = (struct ena_eth_io_tx_cdesc *)
+ ((uintptr_t)io_cq->cdesc_addr.virt_addr +
+ (masked_head * io_cq->cdesc_entry_size_in_bytes));
+
+ /* When the current completion descriptor phase isn't the same as the
+ * expected, it mean that the device still didn't update
+ * this completion.
+ */
+ cdesc_phase = READ_ONCE16(cdesc->flags) & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
+ if (cdesc_phase != expected_phase)
+ return ENA_COM_TRY_AGAIN;
+
+ dma_rmb();
+
+ *req_id = READ_ONCE16(cdesc->req_id);
+ if (unlikely(*req_id >= io_cq->q_depth)) {
+ ena_trc_err("Invalid req id %d\n", cdesc->req_id);
+ return ENA_COM_INVAL;
+ }
+
+ ena_com_cq_inc_head(io_cq);
+
+ return 0;
+}
+
#if defined(__cplusplus)
}
#endif
diff --git a/sys/contrib/ena-com/ena_plat.h b/sys/contrib/ena-com/ena_plat.h
index 6312ac252aed..e3536cdf3573 100644
--- a/sys/contrib/ena-com/ena_plat.h
+++ b/sys/contrib/ena-com/ena_plat.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <machine/in_cksum.h>
#include <machine/pcpu.h>
#include <machine/resource.h>
+#include <machine/_inttypes.h>
#include <net/bpf.h>
#include <net/ethernet.h>
@@ -103,6 +104,7 @@ extern struct ena_bus_space ebs;
#define ENA_RSC (1 << 6) /* Goes with TXPTH or RXPTH, free/alloc res. */
#define ENA_IOQ (1 << 7) /* Detailed info about IO queues. */
#define ENA_ADMQ (1 << 8) /* Detailed info about admin queue. */
+#define ENA_NETMAP (1 << 9) /* Detailed info about netmap. */
extern int ena_log_level;
@@ -115,7 +117,7 @@ extern int ena_log_level;
#define ena_trace(level, fmt, args...) \
ena_trace_raw(level, "%s() [TID:%d]: " \
- fmt " \n", __func__, curthread->td_tid, ##args)
+ fmt, __func__, curthread->td_tid, ##args)
#define ena_trc_dbg(format, arg...) ena_trace(ENA_DBG, format, ##arg)
@@ -123,8 +125,8 @@ extern int ena_log_level;
#define ena_trc_warn(format, arg...) ena_trace(ENA_WARNING, format, ##arg)
#define ena_trc_err(format, arg...) ena_trace(ENA_ALERT, format, ##arg)
-#define unlikely(x) __predict_false(x)
-#define likely(x) __predict_true(x)
+#define unlikely(x) __predict_false(!!(x))
+#define likely(x) __predict_true(!!(x))
#define __iomem
#define ____cacheline_aligned __aligned(CACHE_LINE_SIZE)
@@ -163,7 +165,7 @@ static inline long PTR_ERR(const void *ptr)
return (long) ptr;
}
-#define GENMASK(h, l) (((1U << ((h) - (l) + 1)) - 1) << (l))
+#define GENMASK(h, l) (((~0U) - (1U << (l)) + 1) & (~0U >> (32 - 1 - (h))))
#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (64 - 1 - (h))))
#define BIT(x) (1UL << (x))
@@ -185,10 +187,11 @@ static inline long PTR_ERR(const void *ptr)
#define ENA_COM_TIMER_EXPIRED ETIMEDOUT
#define ENA_MSLEEP(x) pause_sbt("ena", SBT_1MS * (x), SBT_1MS, 0)
+#define ENA_USLEEP(x) pause_sbt("ena", SBT_1US * (x), SBT_1US, 0)
#define ENA_UDELAY(x) DELAY(x)
#define ENA_GET_SYSTEM_TIMEOUT(timeout_us) \
((long)cputick2usec(cpu_ticks()) + (timeout_us))
-#define ENA_TIME_EXPIRE(timeout) ((timeout) < (long)cputick2usec(cpu_ticks()))
+#define ENA_TIME_EXPIRE(timeout) ((timeout) < cputick2usec(cpu_ticks()))
#define ENA_MIGHT_SLEEP()
#define min_t(type, _x, _y) ((type)(_x) < (type)(_y) ? (type)(_x) : (type)(_y))
@@ -275,11 +278,23 @@ struct ena_bus {
typedef uint32_t ena_atomic32_t;
+#define ENA_PRIu64 PRIu64
+
+typedef uint64_t ena_time_t;
+
void ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg,
int error);
int ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma,
int mapflags);
+static inline uint32_t
+ena_reg_read32(struct ena_bus *bus, bus_size_t offset)
+{
+ uint32_t v = bus_space_read_4(bus->reg_bar_t, bus->reg_bar_h, offset);
+ rmb();
+ return v;
+}
+
#define ENA_MEMCPY_TO_DEVICE_64(dst, src, size) \
do { \
int count, i; \
@@ -293,7 +308,11 @@ int ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma,
#define ENA_MEM_ALLOC(dmadev, size) malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO)
#define ENA_MEM_ALLOC_NODE(dmadev, size, virt, node, dev_node) (virt = NULL)
-#define ENA_MEM_FREE(dmadev, ptr) free(ptr, M_DEVBUF)
+#define ENA_MEM_FREE(dmadev, ptr, size) \
+ do { \
+ (void)(size); \
+ free(ptr, M_DEVBUF); \
+ } while (0)
#define ENA_MEM_ALLOC_COHERENT_NODE(dmadev, size, virt, phys, handle, node, \
dev_node) \
do { \
@@ -320,34 +339,35 @@ int ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma,
/* Register R/W methods */
#define ENA_REG_WRITE32(bus, value, offset) \
+ do { \
+ wmb(); \
+ ENA_REG_WRITE32_RELAXED(bus, value, offset); \
+ } while (0)
+
+#define ENA_REG_WRITE32_RELAXED(bus, value, offset) \
bus_space_write_4( \
((struct ena_bus*)bus)->reg_bar_t, \
((struct ena_bus*)bus)->reg_bar_h, \
(bus_size_t)(offset), (value))
#define ENA_REG_READ32(bus, offset) \
- bus_space_read_4( \
- ((struct ena_bus*)bus)->reg_bar_t, \
- ((struct ena_bus*)bus)->reg_bar_h, \
- (bus_size_t)(offset))
+ ena_reg_read32((struct ena_bus*)(bus), (bus_size_t)(offset))
-#define ENA_DB_SYNC(mem_handle) bus_dmamap_sync((mem_handle)->tag, \
- (mem_handle)->map, BUS_DMASYNC_PREREAD)
+#define ENA_DB_SYNC_WRITE(mem_handle) bus_dmamap_sync( \
+ (mem_handle)->tag, (mem_handle)->map, BUS_DMASYNC_PREWRITE)
+#define ENA_DB_SYNC_PREREAD(mem_handle) bus_dmamap_sync( \
+ (mem_handle)->tag, (mem_handle)->map, BUS_DMASYNC_PREREAD)
+#define ENA_DB_SYNC_POSTREAD(mem_handle) bus_dmamap_sync( \
+ (mem_handle)->tag, (mem_handle)->map, BUS_DMASYNC_POSTREAD)
+#define ENA_DB_SYNC(mem_handle) ENA_DB_SYNC_WRITE(mem_handle)
#define time_after(a,b) ((long)((unsigned long)(b) - (unsigned long)(a)) < 0)
#define VLAN_HLEN sizeof(struct ether_vlan_header)
#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP)
-#if defined(__i386__) || defined(__amd64__)
-static __inline
-void prefetch(void *x)
-{
- __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
-}
-#else
-#define prefetch(x)
-#endif
+#define prefetch(x) (void)(x)
+#define prefetchw(x) (void)(x)
/* DMA buffers access */
#define dma_unmap_addr(p, name) ((p)->dma->name)
@@ -363,6 +383,9 @@ void prefetch(void *x)
#define ATOMIC32_SET(I32_PTR, VAL) atomic_store_rel_int(I32_PTR, VAL)
#define barrier() __asm__ __volatile__("": : :"memory")
+#define dma_rmb() barrier()
+#define mmiowb() barrier()
+
#define ACCESS_ONCE(x) (*(volatile __typeof(x) *)&(x))
#define READ_ONCE(x) ({ \
__typeof(x) __var; \
@@ -371,6 +394,20 @@ void prefetch(void *x)
barrier(); \
__var; \
})
+#define READ_ONCE8(x) READ_ONCE(x)
+#define READ_ONCE16(x) READ_ONCE(x)
+#define READ_ONCE32(x) READ_ONCE(x)
+
+#define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16))
+#define lower_32_bits(n) ((uint32_t)(n))
+
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
+#define ENA_FFS(x) ffs(x)
+
+void ena_rss_key_fill(void *key, size_t size);
+
+#define ENA_RSS_FILL_KEY(key, size) ena_rss_key_fill(key, size)
#include "ena_defs/ena_includes.h"
diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c
index 1900d176371e..1bd25c231967 100644
--- a/sys/dev/ena/ena.c
+++ b/sys/dev/ena/ena.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,11 +59,9 @@ __FBSDID("$FreeBSD$");
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
-#include <net/rss_config.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
-#include <netinet/in_rss.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
@@ -75,15 +73,22 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include "ena_datapath.h"
#include "ena.h"
#include "ena_sysctl.h"
+#ifdef DEV_NETMAP
+#include "ena_netmap.h"
+#endif /* DEV_NETMAP */
+
/*********************************************************
* Function prototypes
*********************************************************/
static int ena_probe(device_t);
static void ena_intr_msix_mgmnt(void *);
-static int ena_allocate_pci_resources(struct ena_adapter*);
static void ena_free_pci_resources(struct ena_adapter *);
static int ena_change_mtu(if_t, int);
static inline void ena_alloc_counters(counter_u64_t *, int);
@@ -91,6 +96,8 @@ static inline void ena_free_counters(counter_u64_t *, int);
static inline void ena_reset_counters(counter_u64_t *, int);
static void ena_init_io_rings_common(struct ena_adapter *,
struct ena_ring *, uint16_t);
+static void ena_init_io_rings_basic(struct ena_adapter *);
+static void ena_init_io_rings_advanced(struct ena_adapter *);
static void ena_init_io_rings(struct ena_adapter *);
static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
static void ena_free_all_io_rings_resources(struct ena_adapter *);
@@ -98,11 +105,11 @@ static int ena_setup_tx_dma_tag(struct ena_adapter *);
static int ena_free_tx_dma_tag(struct ena_adapter *);
static int ena_setup_rx_dma_tag(struct ena_adapter *);
static int ena_free_rx_dma_tag(struct ena_adapter *);
+static void ena_release_all_tx_dmamap(struct ena_ring *);
static int ena_setup_tx_resources(struct ena_adapter *, int);
static void ena_free_tx_resources(struct ena_adapter *, int);
static int ena_setup_all_tx_resources(struct ena_adapter *);
static void ena_free_all_tx_resources(struct ena_adapter *);
-static inline int validate_rx_req_id(struct ena_ring *, uint16_t);
static int ena_setup_rx_resources(struct ena_adapter *, unsigned int);
static void ena_free_rx_resources(struct ena_adapter *, unsigned int);
static int ena_setup_all_rx_resources(struct ena_adapter *);
@@ -111,7 +118,6 @@ static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
struct ena_rx_buffer *);
static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
struct ena_rx_buffer *);
-static int ena_refill_rx_bufs(struct ena_ring *, uint32_t);
static void ena_free_rx_bufs(struct ena_adapter *, unsigned int);
static void ena_refill_all_rx_bufs(struct ena_adapter *);
static void ena_free_all_rx_bufs(struct ena_adapter *);
@@ -121,20 +127,10 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *);
static void ena_destroy_all_rx_queues(struct ena_adapter *);
static void ena_destroy_all_io_queues(struct ena_adapter *);
static int ena_create_io_queues(struct ena_adapter *);
-static int ena_tx_cleanup(struct ena_ring *);
-static void ena_deferred_rx_cleanup(void *, int);
-static int ena_rx_cleanup(struct ena_ring *);
-static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
-static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
- struct mbuf *);
-static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
- struct ena_com_rx_ctx *, uint16_t *);
-static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
- struct mbuf *);
-static void ena_handle_msix(void *);
+static int ena_handle_msix(void *);
static int ena_enable_msix(struct ena_adapter *);
static void ena_setup_mgmnt_intr(struct ena_adapter *);
-static void ena_setup_io_intr(struct ena_adapter *);
+static int ena_setup_io_intr(struct ena_adapter *);
static int ena_request_mgmnt_irq(struct ena_adapter *);
static int ena_request_io_irq(struct ena_adapter *);
static void ena_free_mgmnt_irq(struct ena_adapter *);
@@ -144,8 +140,6 @@ static void ena_disable_msix(struct ena_adapter *);
static void ena_unmask_all_io_irqs(struct ena_adapter *);
static int ena_rss_configure(struct ena_adapter *);
static int ena_up_complete(struct ena_adapter *);
-static int ena_up(struct ena_adapter *);
-static void ena_down(struct ena_adapter *);
static uint64_t ena_get_counter(if_t, ift_counter);
static int ena_media_change(if_t);
static void ena_media_status(if_t, struct ifmediareq *);
@@ -156,27 +150,20 @@ static void ena_update_host_info(struct ena_admin_host_info *, if_t);
static void ena_update_hwassist(struct ena_adapter *);
static int ena_setup_ifnet(device_t, struct ena_adapter *,
struct ena_com_dev_get_features_ctx *);
-static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *);
-static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
- struct mbuf **mbuf);
-static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
-static void ena_start_xmit(struct ena_ring *);
-static int ena_mq_start(if_t, struct mbuf *);
-static void ena_deferred_mq_start(void *, int);
-static void ena_qflush(if_t);
-static int ena_calc_io_queue_num(struct ena_adapter *,
+static int ena_enable_wc(struct resource *);
+static int ena_set_queues_placement_policy(device_t, struct ena_com_dev *,
+ struct ena_admin_feature_llq_desc *, struct ena_llq_configurations *);
+static uint32_t ena_calc_max_io_queue_num(device_t, struct ena_com_dev *,
struct ena_com_dev_get_features_ctx *);
-static int ena_calc_queue_size(struct ena_adapter *, uint16_t *,
- uint16_t *, struct ena_com_dev_get_features_ctx *);
+static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *);
static int ena_rss_init_default(struct ena_adapter *);
static void ena_rss_init_default_deferred(void *);
-static void ena_config_host_info(struct ena_com_dev *);
+static void ena_config_host_info(struct ena_com_dev *, device_t);
static int ena_attach(device_t);
static int ena_detach(device_t);
static int ena_device_init(struct ena_adapter *, device_t,
struct ena_com_dev_get_features_ctx *, int *);
-static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *,
- int);
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *);
static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
static void unimplemented_aenq_handler(void *,
struct ena_admin_aenq_entry *);
@@ -184,22 +171,6 @@ static void ena_timer_service(void *);
static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
-static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD, 0, "ENA driver parameters");
-
-/*
- * Tuneable number of buffers in the buf-ring (drbr)
- */
-static int ena_buf_ring_size = 4096;
-SYSCTL_INT(_hw_ena, OID_AUTO, buf_ring_size, CTLFLAG_RWTUN,
- &ena_buf_ring_size, 0, "Size of the bufring");
-
-/*
- * Logging level for changing verbosity of the output
- */
-int ena_log_level = ENA_ALERT | ENA_WARNING;
-SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
- &ena_log_level, 0, "Logging level indicating verbosity of the logs");
-
static ena_vendor_info_t ena_vendor_info_array[] = {
{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
{ PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0},
@@ -270,6 +241,9 @@ ena_dma_alloc(device_t dmadev, bus_size_t size,
goto fail_map_load;
}
+ bus_dmamap_sync(dma->tag, dma->map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+
return (0);
fail_map_load:
@@ -278,27 +252,31 @@ fail_map_create:
bus_dma_tag_destroy(dma->tag);
fail_tag:
dma->tag = NULL;
+ dma->vaddr = NULL;
+ dma->paddr = 0;
return (error);
}
-static int
-ena_allocate_pci_resources(struct ena_adapter* adapter)
+/*
+ * This function should generate unique key for the whole driver.
+ * If the key was already genereated in the previous call (for example
+ * for another adapter), then it should be returned instead.
+ */
+void
+ena_rss_key_fill(void *key, size_t size)
{
- device_t pdev = adapter->pdev;
- int rid;
+ static bool key_generated;
+ static uint8_t default_key[ENA_HASH_KEY_SIZE];
- rid = PCIR_BAR(ENA_REG_BAR);
- adapter->memory = NULL;
- adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
- &rid, RF_ACTIVE);
- if (unlikely(adapter->registers == NULL)) {
- device_printf(pdev, "Unable to allocate bus resource: "
- "registers\n");
- return (ENXIO);
+ KASSERT(size <= ENA_HASH_KEY_SIZE, ("Requested more bytes than ENA RSS key can hold"));
+
+ if (!key_generated) {
+ arc4rand(default_key, ENA_HASH_KEY_SIZE, 0);
+ key_generated = true;
}
- return (0);
+ memcpy(key, default_key, size);
}
static void
@@ -332,7 +310,7 @@ ena_probe(device_t dev)
while (ent->vendor_id != 0) {
if ((pci_vendor_id == ent->vendor_id) &&
(pci_device_id == ent->device_id)) {
- ena_trace(ENA_DBG, "vendor=%x device=%x ",
+ ena_trace(ENA_DBG, "vendor=%x device=%x\n",
pci_vendor_id, pci_device_id);
sprintf(adapter_name, DEVICE_DESC);
@@ -407,10 +385,12 @@ ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
ring->qid = qid;
ring->adapter = adapter;
ring->ena_dev = adapter->ena_dev;
+ ring->first_interrupt = false;
+ ring->no_interrupt_event_cnt = 0;
}
static void
-ena_init_io_rings(struct ena_adapter *adapter)
+ena_init_io_rings_basic(struct ena_adapter *adapter)
{
struct ena_com_dev *ena_dev;
struct ena_ring *txr, *rxr;
@@ -419,7 +399,7 @@ ena_init_io_rings(struct ena_adapter *adapter)
ena_dev = adapter->ena_dev;
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
txr = &adapter->tx_ring[i];
rxr = &adapter->rx_ring[i];
@@ -428,26 +408,43 @@ ena_init_io_rings(struct ena_adapter *adapter)
ena_init_io_rings_common(adapter, rxr, i);
/* TX specific ring state */
- txr->ring_size = adapter->tx_ring_size;
txr->tx_max_header_size = ena_dev->tx_max_header_size;
txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
- txr->smoothed_interval =
- ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
+
+ que = &adapter->que[i];
+ que->adapter = adapter;
+ que->id = i;
+ que->tx_ring = txr;
+ que->rx_ring = rxr;
+
+ txr->que = que;
+ rxr->que = que;
+
+ rxr->empty_rx_queue = 0;
+ rxr->rx_mbuf_sz = ena_mbuf_sz;
+ }
+}
+
+static void
+ena_init_io_rings_advanced(struct ena_adapter *adapter)
+{
+ struct ena_ring *txr, *rxr;
+ int i;
+
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ txr = &adapter->tx_ring[i];
+ rxr = &adapter->rx_ring[i];
/* Allocate a buf ring */
- txr->br = buf_ring_alloc(ena_buf_ring_size, M_DEVBUF,
+ txr->buf_ring_size = adapter->buf_ring_size;
+ txr->br = buf_ring_alloc(txr->buf_ring_size, M_DEVBUF,
M_WAITOK, &txr->ring_mtx);
- /* Alloc TX statistics. */
+ /* Allocate Tx statistics. */
ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
sizeof(txr->tx_stats));
- /* RX specific ring state */
- rxr->ring_size = adapter->rx_ring_size;
- rxr->smoothed_interval =
- ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
-
- /* Alloc RX statistics. */
+ /* Allocate Rx statistics. */
ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
sizeof(rxr->rx_stats));
@@ -458,22 +455,23 @@ ena_init_io_rings(struct ena_adapter *adapter)
device_get_nameunit(adapter->pdev), i);
mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
- mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF);
-
- que = &adapter->que[i];
- que->adapter = adapter;
- que->id = i;
- que->tx_ring = txr;
- que->rx_ring = rxr;
-
- txr->que = que;
- rxr->que = que;
-
- rxr->empty_rx_queue = 0;
}
}
static void
+ena_init_io_rings(struct ena_adapter *adapter)
+{
+ /*
+ * IO rings initialization can be divided into the 2 steps:
+ * 1. Initialize variables and fields with initial values and copy
+ * them from adapter/ena_dev (basic)
+ * 2. Allocate mutex, counters and buf_ring (advanced)
+ */
+ ena_init_io_rings_basic(adapter);
+ ena_init_io_rings_advanced(adapter);
+}
+
+static void
ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
{
struct ena_ring *txr = &adapter->tx_ring[qid];
@@ -489,7 +487,6 @@ ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
ENA_RING_MTX_UNLOCK(txr);
mtx_destroy(&txr->ring_mtx);
- mtx_destroy(&rxr->ring_mtx);
}
static void
@@ -497,7 +494,7 @@ ena_free_all_io_rings_resources(struct ena_adapter *adapter)
{
int i;
- for (i = 0; i < adapter->num_queues; i++)
+ for (i = 0; i < adapter->num_io_queues; i++)
ena_free_io_ring_resources(adapter, i);
}
@@ -548,9 +545,9 @@ ena_setup_rx_dma_tag(struct ena_adapter *adapter)
ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */
BUS_SPACE_MAXADDR, /* highaddr of excl window */
NULL, NULL, /* filter, filterarg */
- MJUM16BYTES, /* maxsize */
+ ena_mbuf_sz, /* maxsize */
adapter->max_rx_sgl_size, /* nsegments */
- MJUM16BYTES, /* maxsegsize */
+ ena_mbuf_sz, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockarg */
@@ -572,6 +569,39 @@ ena_free_rx_dma_tag(struct ena_adapter *adapter)
return (ret);
}
+static void
+ena_release_all_tx_dmamap(struct ena_ring *tx_ring)
+{
+ struct ena_adapter *adapter = tx_ring->adapter;
+ struct ena_tx_buffer *tx_info;
+ bus_dma_tag_t tx_tag = adapter->tx_buf_tag;;
+ int i;
+#ifdef DEV_NETMAP
+ struct ena_netmap_tx_info *nm_info;
+ int j;
+#endif /* DEV_NETMAP */
+
+ for (i = 0; i < tx_ring->ring_size; ++i) {
+ tx_info = &tx_ring->tx_buffer_info[i];
+#ifdef DEV_NETMAP
+ if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
+ nm_info = &tx_info->nm_info;
+ for (j = 0; j < ENA_PKT_MAX_BUFS; ++j) {
+ if (nm_info->map_seg[j] != NULL) {
+ bus_dmamap_destroy(tx_tag,
+ nm_info->map_seg[j]);
+ nm_info->map_seg[j] = NULL;
+ }
+ }
+ }
+#endif /* DEV_NETMAP */
+ if (tx_info->dmamap != NULL) {
+ bus_dmamap_destroy(tx_tag, tx_info->dmamap);
+ tx_info->dmamap = NULL;
+ }
+ }
+}
+
/**
* ena_setup_tx_resources - allocate Tx resources (Descriptors)
* @adapter: network interface device structure
@@ -585,9 +615,12 @@ ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
struct ena_que *que = &adapter->que[qid];
struct ena_ring *tx_ring = que->tx_ring;
int size, i, err;
-#ifdef RSS
- cpuset_t cpu_mask;
-#endif
+#ifdef DEV_NETMAP
+ bus_dmamap_t *map;
+ int j;
+
+ ena_netmap_reset_tx_ring(adapter, qid);
+#endif /* DEV_NETMAP */
size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
@@ -600,6 +633,12 @@ ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
if (unlikely(tx_ring->free_tx_ids == NULL))
goto err_buf_info_free;
+ size = tx_ring->tx_max_header_size;
+ tx_ring->push_buf_intermediate_buf = malloc(size, M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (unlikely(tx_ring->push_buf_intermediate_buf == NULL))
+ goto err_tx_ids_free;
+
/* Req id stack for TX OOO completions */
for (i = 0; i < tx_ring->ring_size; i++)
tx_ring->free_tx_ids[i] = i;
@@ -610,6 +649,7 @@ ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
+ tx_ring->acum_pkts = 0;
/* Make sure that drbr is empty */
ENA_RING_MTX_LOCK(tx_ring);
@@ -619,12 +659,28 @@ ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
/* ... and create the buffer DMA maps */
for (i = 0; i < tx_ring->ring_size; i++) {
err = bus_dmamap_create(adapter->tx_buf_tag, 0,
- &tx_ring->tx_buffer_info[i].map);
+ &tx_ring->tx_buffer_info[i].dmamap);
if (unlikely(err != 0)) {
ena_trace(ENA_ALERT,
- "Unable to create Tx DMA map for buffer %d\n", i);
- goto err_buf_info_unmap;
+ "Unable to create Tx DMA map for buffer %d\n",
+ i);
+ goto err_map_release;
}
+
+#ifdef DEV_NETMAP
+ if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
+ map = tx_ring->tx_buffer_info[i].nm_info.map_seg;
+ for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
+ err = bus_dmamap_create(adapter->tx_buf_tag, 0,
+ &map[j]);
+ if (unlikely(err != 0)) {
+ ena_trace(ENA_ALERT, "Unable to create "
+ "Tx DMA for buffer %d %d\n", i, j);
+ goto err_map_release;
+ }
+ }
+ }
+#endif /* DEV_NETMAP */
}
/* Allocate taskqueues */
@@ -635,27 +691,19 @@ ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
ena_trace(ENA_ALERT,
"Unable to create taskqueue for enqueue task\n");
i = tx_ring->ring_size;
- goto err_buf_info_unmap;
+ goto err_map_release;
}
- /* RSS set cpu for thread */
-#ifdef RSS
- CPU_SETOF(que->cpu, &cpu_mask);
- taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET,
- &cpu_mask, "%s tx_ring enq (bucket %d)",
- device_get_nameunit(adapter->pdev), que->cpu);
-#else /* RSS */
+ tx_ring->running = true;
+
taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
"%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
-#endif /* RSS */
return (0);
-err_buf_info_unmap:
- while (i--) {
- bus_dmamap_destroy(adapter->tx_buf_tag,
- tx_ring->tx_buffer_info[i].map);
- }
+err_map_release:
+ ena_release_all_tx_dmamap(tx_ring);
+err_tx_ids_free:
free(tx_ring->free_tx_ids, M_DEVBUF);
tx_ring->free_tx_ids = NULL;
err_buf_info_free:
@@ -676,6 +724,10 @@ static void
ena_free_tx_resources(struct ena_adapter *adapter, int qid)
{
struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+#ifdef DEV_NETMAP
+ struct ena_netmap_tx_info *nm_info;
+ int j;
+#endif /* DEV_NETMAP */
while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
NULL))
@@ -689,12 +741,33 @@ ena_free_tx_resources(struct ena_adapter *adapter, int qid)
/* Free buffer DMA maps, */
for (int i = 0; i < tx_ring->ring_size; i++) {
- m_freem(tx_ring->tx_buffer_info[i].mbuf);
- tx_ring->tx_buffer_info[i].mbuf = NULL;
+ bus_dmamap_sync(adapter->tx_buf_tag,
+ tx_ring->tx_buffer_info[i].dmamap, BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(adapter->tx_buf_tag,
- tx_ring->tx_buffer_info[i].map);
+ tx_ring->tx_buffer_info[i].dmamap);
bus_dmamap_destroy(adapter->tx_buf_tag,
- tx_ring->tx_buffer_info[i].map);
+ tx_ring->tx_buffer_info[i].dmamap);
+
+#ifdef DEV_NETMAP
+ if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
+ nm_info = &tx_ring->tx_buffer_info[i].nm_info;
+ for (j = 0; j < ENA_PKT_MAX_BUFS; j++) {
+ if (nm_info->socket_buf_idx[j] != 0) {
+ bus_dmamap_sync(adapter->tx_buf_tag,
+ nm_info->map_seg[j],
+ BUS_DMASYNC_POSTWRITE);
+ ena_netmap_unload(adapter,
+ nm_info->map_seg[j]);
+ }
+ bus_dmamap_destroy(adapter->tx_buf_tag,
+ nm_info->map_seg[j]);
+ nm_info->socket_buf_idx[j] = 0;
+ }
+ }
+#endif /* DEV_NETMAP */
+
+ m_freem(tx_ring->tx_buffer_info[i].mbuf);
+ tx_ring->tx_buffer_info[i].mbuf = NULL;
}
ENA_RING_MTX_UNLOCK(tx_ring);
@@ -704,6 +777,9 @@ ena_free_tx_resources(struct ena_adapter *adapter, int qid)
free(tx_ring->free_tx_ids, M_DEVBUF);
tx_ring->free_tx_ids = NULL;
+
+ free(tx_ring->push_buf_intermediate_buf, M_DEVBUF);
+ tx_ring->push_buf_intermediate_buf = NULL;
}
/**
@@ -717,7 +793,7 @@ ena_setup_all_tx_resources(struct ena_adapter *adapter)
{
int i, rc;
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
rc = ena_setup_tx_resources(adapter, i);
if (rc != 0) {
device_printf(adapter->pdev,
@@ -746,27 +822,10 @@ ena_free_all_tx_resources(struct ena_adapter *adapter)
{
int i;
- for (i = 0; i < adapter->num_queues; i++)
+ for (i = 0; i < adapter->num_io_queues; i++)
ena_free_tx_resources(adapter, i);
}
-static inline int
-validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
-{
- if (likely(req_id < rx_ring->ring_size))
- return (0);
-
- device_printf(rx_ring->adapter->pdev, "Invalid rx req_id: %hu\n",
- req_id);
- counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
-
- /* Trigger device reset */
- rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
- rx_ring->adapter->trigger_reset = true;
-
- return (EFAULT);
-}
-
/**
* ena_setup_rx_resources - allocate Rx resources (Descriptors)
* @adapter: network interface device structure
@@ -780,12 +839,14 @@ ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
struct ena_que *que = &adapter->que[qid];
struct ena_ring *rx_ring = que->rx_ring;
int size, err, i;
-#ifdef RSS
- cpuset_t cpu_mask;
-#endif
size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
+#ifdef DEV_NETMAP
+ ena_netmap_reset_rx_ring(adapter, qid);
+ rx_ring->initialized = false;
+#endif /* DEV_NETMAP */
+
/*
* Alloc extra element so in rx path
* we can always prefetch rx_info + 1
@@ -831,22 +892,6 @@ ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
}
}
- /* Allocate taskqueues */
- TASK_INIT(&rx_ring->cmpl_task, 0, ena_deferred_rx_cleanup, rx_ring);
- rx_ring->cmpl_tq = taskqueue_create_fast("ena RX completion", M_WAITOK,
- taskqueue_thread_enqueue, &rx_ring->cmpl_tq);
-
- /* RSS set cpu for thread */
-#ifdef RSS
- CPU_SETOF(que->cpu, &cpu_mask);
- taskqueue_start_threads_cpuset(&rx_ring->cmpl_tq, 1, PI_NET, &cpu_mask,
- "%s rx_ring cmpl (bucket %d)",
- device_get_nameunit(adapter->pdev), que->cpu);
-#else
- taskqueue_start_threads(&rx_ring->cmpl_tq, 1, PI_NET,
- "%s rx_ring cmpl %d", device_get_nameunit(adapter->pdev), que->cpu);
-#endif
-
return (0);
err_buf_info_unmap:
@@ -874,13 +919,10 @@ ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
{
struct ena_ring *rx_ring = &adapter->rx_ring[qid];
- while (taskqueue_cancel(rx_ring->cmpl_tq, &rx_ring->cmpl_task, NULL) != 0)
- taskqueue_drain(rx_ring->cmpl_tq, &rx_ring->cmpl_task);
-
- taskqueue_free(rx_ring->cmpl_tq);
-
/* Free buffer DMA maps, */
for (int i = 0; i < rx_ring->ring_size; i++) {
+ bus_dmamap_sync(adapter->rx_buf_tag,
+ rx_ring->rx_buffer_info[i].map, BUS_DMASYNC_POSTREAD);
m_freem(rx_ring->rx_buffer_info[i].mbuf);
rx_ring->rx_buffer_info[i].mbuf = NULL;
bus_dmamap_unload(adapter->rx_buf_tag,
@@ -911,7 +953,7 @@ ena_setup_all_rx_resources(struct ena_adapter *adapter)
{
int i, rc = 0;
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
rc = ena_setup_rx_resources(adapter, i);
if (rc != 0) {
device_printf(adapter->pdev,
@@ -939,7 +981,7 @@ ena_free_all_rx_resources(struct ena_adapter *adapter)
{
int i;
- for (i = 0; i < adapter->num_queues; i++)
+ for (i = 0; i < adapter->num_io_queues; i++)
ena_free_rx_resources(adapter, i);
}
@@ -957,7 +999,8 @@ ena_alloc_rx_mbuf(struct ena_adapter *adapter,
return (0);
/* Get mbuf using UMA allocator */
- rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
+ rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
+ rx_ring->rx_mbuf_sz);
if (unlikely(rx_info->mbuf == NULL)) {
counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
@@ -968,14 +1011,14 @@ ena_alloc_rx_mbuf(struct ena_adapter *adapter,
}
mlen = MCLBYTES;
} else {
- mlen = MJUM16BYTES;
+ mlen = rx_ring->rx_mbuf_sz;
}
/* Set mbuf length*/
rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
/* Map packets for DMA */
ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
- "Using tag %p for buffers' DMA mapping, mbuf %p len: %d",
+ "Using tag %p for buffers' DMA mapping, mbuf %p len: %d\n",
adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len);
error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map,
rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
@@ -1015,6 +1058,8 @@ ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
return;
}
+ bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
+ BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map);
m_freem(rx_info->mbuf);
rx_info->mbuf = NULL;
@@ -1026,7 +1071,7 @@ ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
* @num: number of descriptors to refill
* Refills the ring with newly allocated DMA-mapped mbufs for receiving
**/
-static int
+int
ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
{
struct ena_adapter *adapter = rx_ring->adapter;
@@ -1034,7 +1079,7 @@ ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
uint32_t i;
int rc;
- ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d",
+ ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d\n",
rx_ring->qid);
next_to_use = rx_ring->next_to_use;
@@ -1043,12 +1088,16 @@ ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
struct ena_rx_buffer *rx_info;
ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC,
- "RX buffer - next to use: %d", next_to_use);
+ "RX buffer - next to use: %d\n", next_to_use);
req_id = rx_ring->free_rx_ids[next_to_use];
rx_info = &rx_ring->rx_buffer_info[req_id];
-
- rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
+#ifdef DEV_NETMAP
+ if (ena_rx_ring_in_netmap(adapter, rx_ring->qid))
+ rc = ena_netmap_alloc_rx_slot(adapter, rx_ring, rx_info);
+ else
+#endif /* DEV_NETMAP */
+ rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
if (unlikely(rc != 0)) {
ena_trace(ENA_WARNING,
"failed to alloc buffer for rx queue %d\n",
@@ -1074,14 +1123,172 @@ ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
rx_ring->qid, i, num);
}
- if (likely(i != 0)) {
- wmb();
+ if (likely(i != 0))
ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
- }
+
rx_ring->next_to_use = next_to_use;
return (i);
}
+int
+ena_update_buf_ring_size(struct ena_adapter *adapter,
+ uint32_t new_buf_ring_size)
+{
+ uint32_t old_buf_ring_size;
+ int rc = 0;
+ bool dev_was_up;
+
+ ENA_LOCK_LOCK(adapter);
+
+ old_buf_ring_size = adapter->buf_ring_size;
+ adapter->buf_ring_size = new_buf_ring_size;
+
+ dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
+ ena_down(adapter);
+
+ /* Reconfigure buf ring for all Tx rings. */
+ ena_free_all_io_rings_resources(adapter);
+ ena_init_io_rings_advanced(adapter);
+ if (dev_was_up) {
+ /*
+ * If ena_up() fails, it's not because of recent buf_ring size
+ * changes. Because of that, we just want to revert old drbr
+ * value and trigger the reset because something else had to
+ * go wrong.
+ */
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev,
+ "Failed to configure device after setting new drbr size: %u. Reverting old value: %u and triggering the reset\n",
+ new_buf_ring_size, old_buf_ring_size);
+
+ /* Revert old size and trigger the reset */
+ adapter->buf_ring_size = old_buf_ring_size;
+ ena_free_all_io_rings_resources(adapter);
+ ena_init_io_rings_advanced(adapter);
+
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET,
+ adapter);
+ ena_trigger_reset(adapter, ENA_REGS_RESET_OS_TRIGGER);
+
+ }
+ }
+
+ ENA_LOCK_UNLOCK(adapter);
+
+ return (rc);
+}
+
+int
+ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
+ uint32_t new_rx_size)
+{
+ uint32_t old_tx_size, old_rx_size;
+ int rc = 0;
+ bool dev_was_up;
+
+ ENA_LOCK_LOCK(adapter);
+
+ old_tx_size = adapter->requested_tx_ring_size;
+ old_rx_size = adapter->requested_rx_ring_size;
+ adapter->requested_tx_ring_size = new_tx_size;
+ adapter->requested_rx_ring_size = new_rx_size;
+
+ dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
+ ena_down(adapter);
+
+ /* Configure queues with new size. */
+ ena_init_io_rings_basic(adapter);
+ if (dev_was_up) {
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev,
+ "Failed to configure device with the new sizes - Tx: %u Rx: %u. Reverting old values - Tx: %u Rx: %u\n",
+ new_tx_size, new_rx_size, old_tx_size, old_rx_size);
+
+ /* Revert old size. */
+ adapter->requested_tx_ring_size = old_tx_size;
+ adapter->requested_rx_ring_size = old_rx_size;
+ ena_init_io_rings_basic(adapter);
+
+ /* And try again. */
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev,
+ "Failed to revert old queue sizes. Triggering device reset.\n");
+ /*
+ * If we've failed again, something had to go
+ * wrong. After reset, the device should try to
+ * go up
+ */
+ ENA_FLAG_SET_ATOMIC(
+ ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
+ ena_trigger_reset(adapter,
+ ENA_REGS_RESET_OS_TRIGGER);
+ }
+ }
+ }
+
+ ENA_LOCK_UNLOCK(adapter);
+
+ return (rc);
+}
+
+static void
+ena_update_io_rings(struct ena_adapter *adapter, uint32_t num)
+{
+ ena_free_all_io_rings_resources(adapter);
+ /* Force indirection table to be reinitialized */
+ ena_com_rss_destroy(adapter->ena_dev);
+
+ adapter->num_io_queues = num;
+ ena_init_io_rings(adapter);
+}
+
+/* Caller should sanitize new_num */
+int
+ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num)
+{
+ uint32_t old_num;
+ int rc = 0;
+ bool dev_was_up;
+
+ ENA_LOCK_LOCK(adapter);
+
+ dev_was_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
+ old_num = adapter->num_io_queues;
+ ena_down(adapter);
+
+ ena_update_io_rings(adapter, new_num);
+
+ if (dev_was_up) {
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev,
+ "Failed to configure device with %u IO queues. "
+ "Reverting to previous value: %u\n",
+ new_num, old_num);
+
+ ena_update_io_rings(adapter, old_num);
+
+ rc = ena_up(adapter);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev,
+ "Failed to revert to previous setup IO "
+ "queues. Triggering device reset.\n");
+ ENA_FLAG_SET_ATOMIC(
+ ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
+ ena_trigger_reset(adapter,
+ ENA_REGS_RESET_OS_TRIGGER);
+ }
+ }
+ }
+
+ ENA_LOCK_UNLOCK(adapter);
+
+ return (rc);
+}
+
static void
ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
{
@@ -1093,6 +1300,14 @@ ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
if (rx_info->mbuf != NULL)
ena_free_rx_mbuf(adapter, rx_ring, rx_info);
+#ifdef DEV_NETMAP
+ if (((if_getflags(adapter->ifp) & IFF_DYING) == 0) &&
+ (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
+ if (rx_info->netmap_buf_idx != 0)
+ ena_netmap_free_rx_slot(adapter, rx_ring,
+ rx_info);
+ }
+#endif /* DEV_NETMAP */
}
}
@@ -1107,14 +1322,16 @@ ena_refill_all_rx_bufs(struct ena_adapter *adapter)
struct ena_ring *rx_ring;
int i, rc, bufs_num;
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
rx_ring = &adapter->rx_ring[i];
bufs_num = rx_ring->ring_size - 1;
rc = ena_refill_rx_bufs(rx_ring, bufs_num);
-
if (unlikely(rc != bufs_num))
ena_trace(ENA_WARNING, "refilling Queue %d failed. "
"Allocated %d buffers from: %d\n", i, rc, bufs_num);
+#ifdef DEV_NETMAP
+ rx_ring->initialized = true;
+#endif /* DEV_NETMAP */
}
}
@@ -1123,7 +1340,7 @@ ena_free_all_rx_bufs(struct ena_adapter *adapter)
{
int i;
- for (i = 0; i < adapter->num_queues; i++)
+ for (i = 0; i < adapter->num_io_queues; i++)
ena_free_rx_bufs(adapter, i);
}
@@ -1147,16 +1364,19 @@ ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
if (print_once) {
device_printf(adapter->pdev,
- "free uncompleted tx mbuf qid %d idx 0x%x",
+ "free uncompleted tx mbuf qid %d idx 0x%x\n",
qid, i);
print_once = false;
} else {
ena_trace(ENA_DBG,
- "free uncompleted tx mbuf qid %d idx 0x%x",
+ "free uncompleted tx mbuf qid %d idx 0x%x\n",
qid, i);
}
- bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
+ bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
+ BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
+
m_free(tx_info->mbuf);
tx_info->mbuf = NULL;
}
@@ -1167,7 +1387,7 @@ static void
ena_free_all_tx_bufs(struct ena_adapter *adapter)
{
- for (int i = 0; i < adapter->num_queues; i++)
+ for (int i = 0; i < adapter->num_io_queues; i++)
ena_free_tx_bufs(adapter, i);
}
@@ -1177,7 +1397,7 @@ ena_destroy_all_tx_queues(struct ena_adapter *adapter)
uint16_t ena_qid;
int i;
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
ena_qid = ENA_IO_TXQ_IDX(i);
ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
}
@@ -1189,7 +1409,7 @@ ena_destroy_all_rx_queues(struct ena_adapter *adapter)
uint16_t ena_qid;
int i;
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
ena_qid = ENA_IO_RXQ_IDX(i);
ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
}
@@ -1198,31 +1418,20 @@ ena_destroy_all_rx_queues(struct ena_adapter *adapter)
static void
ena_destroy_all_io_queues(struct ena_adapter *adapter)
{
- ena_destroy_all_tx_queues(adapter);
- ena_destroy_all_rx_queues(adapter);
-}
-
-static inline int
-validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
-{
- struct ena_adapter *adapter = tx_ring->adapter;
- struct ena_tx_buffer *tx_info = NULL;
+ struct ena_que *queue;
+ int i;
- if (likely(req_id < tx_ring->ring_size)) {
- tx_info = &tx_ring->tx_buffer_info[req_id];
- if (tx_info->mbuf != NULL)
- return (0);
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ queue = &adapter->que[i];
+ while (taskqueue_cancel(queue->cleanup_tq,
+ &queue->cleanup_task, NULL))
+ taskqueue_drain(queue->cleanup_tq,
+ &queue->cleanup_task);
+ taskqueue_free(queue->cleanup_tq);
}
- if (tx_info->mbuf == NULL)
- device_printf(adapter->pdev,
- "tx_info doesn't have valid mbuf\n");
- else
- device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
-
- counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
-
- return (EFAULT);
+ ena_destroy_all_tx_queues(adapter);
+ ena_destroy_all_rx_queues(adapter);
}
static int
@@ -1231,17 +1440,18 @@ ena_create_io_queues(struct ena_adapter *adapter)
struct ena_com_dev *ena_dev = adapter->ena_dev;
struct ena_com_create_io_ctx ctx;
struct ena_ring *ring;
+ struct ena_que *queue;
uint16_t ena_qid;
uint32_t msix_vector;
int rc, i;
/* Create TX queues */
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
msix_vector = ENA_IO_IRQ_IDX(i);
ena_qid = ENA_IO_TXQ_IDX(i);
ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
- ctx.queue_size = adapter->tx_ring_size;
+ ctx.queue_size = adapter->requested_tx_ring_size;
ctx.msix_vector = msix_vector;
ctx.qid = ena_qid;
rc = ena_com_create_io_queue(ena_dev, &ctx);
@@ -1264,12 +1474,12 @@ ena_create_io_queues(struct ena_adapter *adapter)
}
/* Create RX queues */
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
msix_vector = ENA_IO_IRQ_IDX(i);
ena_qid = ENA_IO_RXQ_IDX(i);
ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
- ctx.queue_size = adapter->rx_ring_size;
+ ctx.queue_size = adapter->requested_rx_ring_size;
ctx.msix_vector = msix_vector;
ctx.qid = ena_qid;
rc = ena_com_create_io_queue(ena_dev, &ctx);
@@ -1292,12 +1502,24 @@ ena_create_io_queues(struct ena_adapter *adapter)
}
}
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ queue = &adapter->que[i];
+
+ TASK_INIT(&queue->cleanup_task, 0, ena_cleanup, queue);
+ queue->cleanup_tq = taskqueue_create_fast("ena cleanup",
+ M_WAITOK, taskqueue_thread_enqueue, &queue->cleanup_tq);
+
+ taskqueue_start_threads(&queue->cleanup_tq, 1, PI_NET,
+ "%s queue %d cleanup",
+ device_get_nameunit(adapter->pdev), i);
+ }
+
return (0);
err_rx:
while (i--)
ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
- i = adapter->num_queues;
+ i = adapter->num_io_queues;
err_tx:
while (i--)
ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
@@ -1305,445 +1527,6 @@ err_tx:
return (ENXIO);
}
-/**
- * ena_tx_cleanup - clear sent packets and corresponding descriptors
- * @tx_ring: ring for which we want to clean packets
- *
- * Once packets are sent, we ask the device in a loop for no longer used
- * descriptors. We find the related mbuf chain in a map (index in an array)
- * and free it, then update ring state.
- * This is performed in "endless" loop, updating ring pointers every
- * TX_COMMIT. The first check of free descriptor is performed before the actual
- * loop, then repeated at the loop end.
- **/
-static int
-ena_tx_cleanup(struct ena_ring *tx_ring)
-{
- struct ena_adapter *adapter;
- struct ena_com_io_cq* io_cq;
- uint16_t next_to_clean;
- uint16_t req_id;
- uint16_t ena_qid;
- unsigned int total_done = 0;
- int rc;
- int commit = TX_COMMIT;
- int budget = TX_BUDGET;
- int work_done;
-
- adapter = tx_ring->que->adapter;
- ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
- io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
- next_to_clean = tx_ring->next_to_clean;
-
- do {
- struct ena_tx_buffer *tx_info;
- struct mbuf *mbuf;
-
- rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
- if (unlikely(rc != 0))
- break;
-
- rc = validate_tx_req_id(tx_ring, req_id);
- if (unlikely(rc != 0))
- break;
-
- tx_info = &tx_ring->tx_buffer_info[req_id];
-
- mbuf = tx_info->mbuf;
-
- tx_info->mbuf = NULL;
- bintime_clear(&tx_info->timestamp);
-
- if (likely(tx_info->num_of_bufs != 0)) {
- /* Map is no longer required */
- bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
- }
-
- ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed",
- tx_ring->qid, mbuf);
-
- m_freem(mbuf);
-
- total_done += tx_info->tx_descs;
-
- tx_ring->free_tx_ids[next_to_clean] = req_id;
- next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
- tx_ring->ring_size);
-
- if (unlikely(--commit == 0)) {
- commit = TX_COMMIT;
- /* update ring state every TX_COMMIT descriptor */
- tx_ring->next_to_clean = next_to_clean;
- ena_com_comp_ack(
- &adapter->ena_dev->io_sq_queues[ena_qid],
- total_done);
- ena_com_update_dev_comp_head(io_cq);
- total_done = 0;
- }
- } while (likely(--budget));
-
- work_done = TX_BUDGET - budget;
-
- ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d",
- tx_ring->qid, work_done);
-
- /* If there is still something to commit update ring state */
- if (likely(commit != TX_COMMIT)) {
- tx_ring->next_to_clean = next_to_clean;
- ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
- total_done);
- ena_com_update_dev_comp_head(io_cq);
- }
-
- taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
-
- return (work_done);
-}
-
-static void
-ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
- struct mbuf *mbuf)
-{
- struct ena_adapter *adapter = rx_ring->adapter;
-
- if (likely(adapter->rss_support)) {
- mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
-
- if (ena_rx_ctx->frag &&
- (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
- return;
- }
-
- switch (ena_rx_ctx->l3_proto) {
- case ENA_ETH_IO_L3_PROTO_IPV4:
- switch (ena_rx_ctx->l4_proto) {
- case ENA_ETH_IO_L4_PROTO_TCP:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
- break;
- case ENA_ETH_IO_L4_PROTO_UDP:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
- break;
- default:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
- }
- break;
- case ENA_ETH_IO_L3_PROTO_IPV6:
- switch (ena_rx_ctx->l4_proto) {
- case ENA_ETH_IO_L4_PROTO_TCP:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
- break;
- case ENA_ETH_IO_L4_PROTO_UDP:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
- break;
- default:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
- }
- break;
- case ENA_ETH_IO_L3_PROTO_UNKNOWN:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
- break;
- default:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
- }
- } else {
- mbuf->m_pkthdr.flowid = rx_ring->qid;
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
- }
-}
-
-/**
- * ena_rx_mbuf - assemble mbuf from descriptors
- * @rx_ring: ring for which we want to clean packets
- * @ena_bufs: buffer info
- * @ena_rx_ctx: metadata for this packet(s)
- * @next_to_clean: ring pointer, will be updated only upon success
- *
- **/
-static struct mbuf*
-ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
- struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
-{
- struct mbuf *mbuf;
- struct ena_rx_buffer *rx_info;
- struct ena_adapter *adapter;
- unsigned int descs = ena_rx_ctx->descs;
- int rc;
- uint16_t ntc, len, req_id, buf = 0;
-
- ntc = *next_to_clean;
- adapter = rx_ring->adapter;
-
- len = ena_bufs[buf].len;
- req_id = ena_bufs[buf].req_id;
- rc = validate_rx_req_id(rx_ring, req_id);
- if (unlikely(rc != 0))
- return (NULL);
-
- rx_info = &rx_ring->rx_buffer_info[req_id];
- if (unlikely(rx_info->mbuf == NULL)) {
- device_printf(adapter->pdev, "NULL mbuf in rx_info");
- return (NULL);
- }
-
- ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx",
- rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
-
- mbuf = rx_info->mbuf;
- mbuf->m_flags |= M_PKTHDR;
- mbuf->m_pkthdr.len = len;
- mbuf->m_len = len;
- mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
-
- /* Fill mbuf with hash key and it's interpretation for optimization */
- ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
-
- ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d",
- mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
-
- /* DMA address is not needed anymore, unmap it */
- bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
-
- rx_info->mbuf = NULL;
- rx_ring->free_rx_ids[ntc] = req_id;
- ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
-
- /*
- * While we have more than 1 descriptors for one rcvd packet, append
- * other mbufs to the main one
- */
- while (--descs) {
- ++buf;
- len = ena_bufs[buf].len;
- req_id = ena_bufs[buf].req_id;
- rc = validate_rx_req_id(rx_ring, req_id);
- if (unlikely(rc != 0)) {
- /*
- * If the req_id is invalid, then the device will be
- * reset. In that case we must free all mbufs that
- * were already gathered.
- */
- m_freem(mbuf);
- return (NULL);
- }
- rx_info = &rx_ring->rx_buffer_info[req_id];
-
- if (unlikely(rx_info->mbuf == NULL)) {
- device_printf(adapter->pdev, "NULL mbuf in rx_info");
- /*
- * If one of the required mbufs was not allocated yet,
- * we can break there.
- * All earlier used descriptors will be reallocated
- * later and not used mbufs can be reused.
- * The next_to_clean pointer will not be updated in case
- * of an error, so caller should advance it manually
- * in error handling routine to keep it up to date
- * with hw ring.
- */
- m_freem(mbuf);
- return (NULL);
- }
-
- if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
- counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
- ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p",
- mbuf);
- }
-
- ena_trace(ENA_DBG | ENA_RXPTH,
- "rx mbuf updated. len %d", mbuf->m_pkthdr.len);
-
- /* Free already appended mbuf, it won't be useful anymore */
- bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
- m_freem(rx_info->mbuf);
- rx_info->mbuf = NULL;
-
- rx_ring->free_rx_ids[ntc] = req_id;
- ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
- }
-
- *next_to_clean = ntc;
-
- return (mbuf);
-}
-
-/**
- * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
- **/
-static inline void
-ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
- struct mbuf *mbuf)
-{
-
- /* if IP and error */
- if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
- ena_rx_ctx->l3_csum_err)) {
- /* ipv4 checksum error */
- mbuf->m_pkthdr.csum_flags = 0;
- counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
- ena_trace(ENA_DBG, "RX IPv4 header checksum error");
- return;
- }
-
- /* if TCP/UDP */
- if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
- (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
- if (ena_rx_ctx->l4_csum_err) {
- /* TCP/UDP checksum error */
- mbuf->m_pkthdr.csum_flags = 0;
- counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
- ena_trace(ENA_DBG, "RX L4 checksum error");
- } else {
- mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
- mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
- }
- }
-}
-
-static void
-ena_deferred_rx_cleanup(void *arg, int pending)
-{
- struct ena_ring *rx_ring = arg;
- int budget = CLEAN_BUDGET;
-
- ENA_RING_MTX_LOCK(rx_ring);
- /*
- * If deferred task was executed, perform cleanup of all awaiting
- * descs (or until given budget is depleted to avoid infinite loop).
- */
- while (likely(budget--)) {
- if (ena_rx_cleanup(rx_ring) == 0)
- break;
- }
- ENA_RING_MTX_UNLOCK(rx_ring);
-}
-
-/**
- * ena_rx_cleanup - handle rx irq
- * @arg: ring for which irq is being handled
- **/
-static int
-ena_rx_cleanup(struct ena_ring *rx_ring)
-{
- struct ena_adapter *adapter;
- struct mbuf *mbuf;
- struct ena_com_rx_ctx ena_rx_ctx;
- struct ena_com_io_cq* io_cq;
- struct ena_com_io_sq* io_sq;
- if_t ifp;
- uint16_t ena_qid;
- uint16_t next_to_clean;
- uint32_t refill_required;
- uint32_t refill_threshold;
- uint32_t do_if_input = 0;
- unsigned int qid;
- int rc, i;
- int budget = RX_BUDGET;
-
- adapter = rx_ring->que->adapter;
- ifp = adapter->ifp;
- qid = rx_ring->que->id;
- ena_qid = ENA_IO_RXQ_IDX(qid);
- io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
- io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
- next_to_clean = rx_ring->next_to_clean;
-
- ena_trace(ENA_DBG, "rx: qid %d", qid);
-
- do {
- ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
- ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
- ena_rx_ctx.descs = 0;
- rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
-
- if (unlikely(rc != 0))
- goto error;
-
- if (unlikely(ena_rx_ctx.descs == 0))
- break;
-
- ena_trace(ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
- "descs #: %d l3 proto %d l4 proto %d hash: %x",
- rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
- ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
-
- /* Receive mbuf from the ring */
- mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
- &ena_rx_ctx, &next_to_clean);
-
- /* Exit if we failed to retrieve a buffer */
- if (unlikely(mbuf == NULL)) {
- for (i = 0; i < ena_rx_ctx.descs; ++i) {
- rx_ring->free_rx_ids[next_to_clean] =
- rx_ring->ena_bufs[i].req_id;
- next_to_clean =
- ENA_RX_RING_IDX_NEXT(next_to_clean,
- rx_ring->ring_size);
-
- }
- break;
- }
-
- if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
- ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
- ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
- }
-
- counter_enter();
- counter_u64_add_protected(rx_ring->rx_stats.bytes,
- mbuf->m_pkthdr.len);
- counter_u64_add_protected(adapter->hw_stats.rx_bytes,
- mbuf->m_pkthdr.len);
- counter_exit();
- /*
- * LRO is only for IP/TCP packets and TCP checksum of the packet
- * should be computed by hardware.
- */
- do_if_input = 1;
- if (((ifp->if_capenable & IFCAP_LRO) != 0) &&
- ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
- (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
- /*
- * Send to the stack if:
- * - LRO not enabled, or
- * - no LRO resources, or
- * - lro enqueue fails
- */
- if ((rx_ring->lro.lro_cnt != 0) &&
- (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
- do_if_input = 0;
- }
- if (do_if_input != 0) {
- ena_trace(ENA_DBG | ENA_RXPTH,
- "calling if_input() with mbuf %p", mbuf);
- (*ifp->if_input)(ifp, mbuf);
- }
-
- counter_enter();
- counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
- counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
- counter_exit();
- } while (--budget);
-
- rx_ring->next_to_clean = next_to_clean;
-
- refill_required = ena_com_free_desc(io_sq);
- refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
-
- if (refill_required > refill_threshold) {
- ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
- ena_refill_rx_bufs(rx_ring, refill_required);
- }
-
- tcp_lro_flush_all(&rx_ring->lro);
-
- return (RX_BUDGET - budget);
-
-error:
- counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
- return (RX_BUDGET - budget);
-}
-
/*********************************************************************
*
* MSIX & Interrupt Service routine
@@ -1760,69 +1543,27 @@ ena_intr_msix_mgmnt(void *arg)
struct ena_adapter *adapter = (struct ena_adapter *)arg;
ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
- if (likely(adapter->running))
+ if (likely(ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)))
ena_com_aenq_intr_handler(adapter->ena_dev, arg);
}
/**
* ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
- * @arg: interrupt number
+ * @arg: queue
**/
-static void
+static int
ena_handle_msix(void *arg)
{
- struct ena_que *que = arg;
- struct ena_adapter *adapter = que->adapter;
+ struct ena_que *queue = arg;
+ struct ena_adapter *adapter = queue->adapter;
if_t ifp = adapter->ifp;
- struct ena_ring *tx_ring;
- struct ena_ring *rx_ring;
- struct ena_com_io_cq* io_cq;
- struct ena_eth_io_intr_reg intr_reg;
- int qid, ena_qid;
- int txc, rxc, i;
if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
- return;
-
- ena_trace(ENA_DBG, "MSI-X TX/RX routine");
-
- tx_ring = que->tx_ring;
- rx_ring = que->rx_ring;
- qid = que->id;
- ena_qid = ENA_IO_TXQ_IDX(qid);
- io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
-
- for (i = 0; i < CLEAN_BUDGET; ++i) {
- /*
- * If lock cannot be acquired, then deferred cleanup task was
- * being executed and rx ring is being cleaned up in
- * another thread.
- */
- if (likely(ENA_RING_MTX_TRYLOCK(rx_ring) != 0)) {
- rxc = ena_rx_cleanup(rx_ring);
- ENA_RING_MTX_UNLOCK(rx_ring);
- } else {
- rxc = 0;
- }
+ return (FILTER_STRAY);
- /* Protection from calling ena_tx_cleanup from ena_start_xmit */
- ENA_RING_MTX_LOCK(tx_ring);
- txc = ena_tx_cleanup(tx_ring);
- ENA_RING_MTX_UNLOCK(tx_ring);
-
- if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
- return;
+ taskqueue_enqueue(queue->cleanup_tq, &queue->cleanup_task);
- if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
- break;
- }
-
- /* Signal that work is done and unmask interrupt */
- ena_com_update_intr_reg(&intr_reg,
- RX_IRQ_INTERVAL,
- TX_IRQ_INTERVAL,
- true);
- ena_com_unmask_intr(io_cq, &intr_reg);
+ return (FILTER_HANDLED);
}
static int
@@ -1832,13 +1573,18 @@ ena_enable_msix(struct ena_adapter *adapter)
int msix_vecs, msix_req;
int i, rc = 0;
+ if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
+ device_printf(dev, "Error, MSI-X is already enabled\n");
+ return (EINVAL);
+ }
+
/* Reserved the max msix vectors we might need */
- msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_queues);
+ msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
adapter->msix_entries = malloc(msix_vecs * sizeof(struct msix_entry),
M_DEVBUF, M_WAITOK | M_ZERO);
- ena_trace(ENA_DBG, "trying to enable MSI-X, vectors: %d", msix_vecs);
+ ena_trace(ENA_DBG, "trying to enable MSI-X, vectors: %d\n", msix_vecs);
for (i = 0; i < msix_vecs; i++) {
adapter->msix_entries[i].entry = i;
@@ -1857,13 +1603,20 @@ ena_enable_msix(struct ena_adapter *adapter)
}
if (msix_vecs != msix_req) {
+ if (msix_vecs == ENA_ADMIN_MSIX_VEC) {
+ device_printf(dev,
+ "Not enough number of MSI-x allocated: %d\n",
+ msix_vecs);
+ pci_release_msi(dev);
+ rc = ENOSPC;
+ goto err_msix_free;
+ }
device_printf(dev, "Enable only %d MSI-x (out of %d), reduce "
"the number of queues\n", msix_vecs, msix_req);
- adapter->num_queues = msix_vecs - ENA_ADMIN_MSIX_VEC;
}
adapter->msix_vecs = msix_vecs;
- adapter->msix_enabled = true;
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
return (0);
@@ -1891,13 +1644,16 @@ ena_setup_mgmnt_intr(struct ena_adapter *adapter)
adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
}
-static void
+static int
ena_setup_io_intr(struct ena_adapter *adapter)
{
static int last_bind_cpu = -1;
int irq_idx;
- for (int i = 0; i < adapter->num_queues; i++) {
+ if (adapter->msix_entries == NULL)
+ return (EINVAL);
+
+ for (int i = 0; i < adapter->num_io_queues; i++) {
irq_idx = ENA_IO_IRQ_IDX(i);
snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
@@ -1908,12 +1664,9 @@ ena_setup_io_intr(struct ena_adapter *adapter)
adapter->msix_entries[irq_idx].vector;
ena_trace(ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
adapter->msix_entries[irq_idx].vector);
-#ifdef RSS
- adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
- rss_getcpu(i % rss_getnumbuckets());
-#else
+
/*
- * We still want to bind rings to the corresponding cpu
+ * We want to bind rings to the corresponding cpu
* using something similar to the RSS round-robin technique.
*/
if (unlikely(last_bind_cpu < 0))
@@ -1921,8 +1674,9 @@ ena_setup_io_intr(struct ena_adapter *adapter)
adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
last_bind_cpu;
last_bind_cpu = CPU_NEXT(last_bind_cpu);
-#endif
}
+
+ return (0);
}
static int
@@ -1977,7 +1731,7 @@ ena_request_io_irq(struct ena_adapter *adapter)
unsigned long flags = 0;
int rc = 0, i, rcc;
- if (unlikely(adapter->msix_enabled == 0)) {
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter))) {
device_printf(adapter->pdev,
"failed to request I/O IRQ: MSI-X is not enabled\n");
return (EINVAL);
@@ -1994,14 +1748,15 @@ ena_request_io_irq(struct ena_adapter *adapter)
irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ,
&irq->vector, flags);
if (unlikely(irq->res == NULL)) {
+ rc = ENOMEM;
device_printf(adapter->pdev, "could not allocate "
"irq vector: %d\n", irq->vector);
goto err;
}
rc = bus_setup_intr(adapter->pdev, irq->res,
- INTR_TYPE_NET | INTR_MPSAFE, NULL,
- irq->handler, irq->data, &irq->cookie);
+ INTR_TYPE_NET | INTR_MPSAFE, irq->handler, NULL,
+ irq->data, &irq->cookie);
if (unlikely(rc != 0)) {
device_printf(adapter->pdev, "failed to register "
"interrupt handler for irq %ju: %d\n",
@@ -2010,13 +1765,8 @@ ena_request_io_irq(struct ena_adapter *adapter)
}
irq->requested = true;
-#ifdef RSS
- ena_trace(ENA_INFO, "queue %d - RSS bucket %d\n",
- i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
-#else
ena_trace(ENA_INFO, "queue %d - cpu %d\n",
i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
-#endif
}
return (rc);
@@ -2131,10 +1881,14 @@ static void
ena_disable_msix(struct ena_adapter *adapter)
{
- pci_release_msi(adapter->pdev);
+ if (ENA_FLAG_ISSET(ENA_FLAG_MSIX_ENABLED, adapter)) {
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_MSIX_ENABLED, adapter);
+ pci_release_msi(adapter->pdev);
+ }
adapter->msix_vecs = 0;
- free(adapter->msix_entries, M_DEVBUF);
+ if (adapter->msix_entries != NULL)
+ free(adapter->msix_entries, M_DEVBUF);
adapter->msix_entries = NULL;
}
@@ -2147,7 +1901,7 @@ ena_unmask_all_io_irqs(struct ena_adapter *adapter)
int i;
/* Unmask interrupts for all queues */
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
ena_qid = ENA_IO_TXQ_IDX(i);
io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
ena_com_update_intr_reg(&intr_reg, 0, 0, true);
@@ -2162,6 +1916,18 @@ ena_rss_configure(struct ena_adapter *adapter)
struct ena_com_dev *ena_dev = adapter->ena_dev;
int rc;
+ /* In case the RSS table was destroyed */
+ if (!ena_dev->rss.tbl_log_size) {
+ rc = ena_rss_init_default(adapter);
+ if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
+ device_printf(adapter->pdev,
+ "WARNING: RSS was not properly re-initialized,"
+ " it will affect bandwidth\n");
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
+ return (rc);
+ }
+ }
+
/* Set indirect table */
rc = ena_com_indirect_table_set(ena_dev);
if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
@@ -2185,10 +1951,13 @@ ena_up_complete(struct ena_adapter *adapter)
{
int rc;
- if (likely(adapter->rss_support)) {
+ if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
rc = ena_rss_configure(adapter);
- if (rc != 0)
+ if (rc != 0) {
+ device_printf(adapter->pdev,
+ "Failed to configure RSS\n");
return (rc);
+ }
}
rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
@@ -2202,87 +1971,183 @@ ena_up_complete(struct ena_adapter *adapter)
return (0);
}
-static int
-ena_up(struct ena_adapter *adapter)
+static void
+set_io_rings_size(struct ena_adapter *adapter, int new_tx_size,
+ int new_rx_size)
{
- int rc = 0;
-
- if (unlikely(device_is_attached(adapter->pdev) == 0)) {
- device_printf(adapter->pdev, "device is not attached!\n");
- return (ENXIO);
- }
+ int i;
- if (unlikely(!adapter->running)) {
- device_printf(adapter->pdev, "device is not running!\n");
- return (ENXIO);
+ for (i = 0; i < adapter->num_io_queues; i++) {
+ adapter->tx_ring[i].ring_size = new_tx_size;
+ adapter->rx_ring[i].ring_size = new_rx_size;
}
+}
- if (!adapter->up) {
- device_printf(adapter->pdev, "device is going UP\n");
+static int
+create_queues_with_size_backoff(struct ena_adapter *adapter)
+{
+ int rc;
+ uint32_t cur_rx_ring_size, cur_tx_ring_size;
+ uint32_t new_rx_ring_size, new_tx_ring_size;
- /* setup interrupts for IO queues */
- ena_setup_io_intr(adapter);
- rc = ena_request_io_irq(adapter);
- if (unlikely(rc != 0)) {
- ena_trace(ENA_ALERT, "err_req_irq");
- goto err_req_irq;
- }
+ /*
+ * Current queue sizes might be set to smaller than the requested
+ * ones due to past queue allocation failures.
+ */
+ set_io_rings_size(adapter, adapter->requested_tx_ring_size,
+ adapter->requested_rx_ring_size);
- /* allocate transmit descriptors */
+ while (1) {
+ /* Allocate transmit descriptors */
rc = ena_setup_all_tx_resources(adapter);
if (unlikely(rc != 0)) {
- ena_trace(ENA_ALERT, "err_setup_tx");
+ ena_trace(ENA_ALERT, "err_setup_tx\n");
goto err_setup_tx;
}
- /* allocate receive descriptors */
+ /* Allocate receive descriptors */
rc = ena_setup_all_rx_resources(adapter);
if (unlikely(rc != 0)) {
- ena_trace(ENA_ALERT, "err_setup_rx");
+ ena_trace(ENA_ALERT, "err_setup_rx\n");
goto err_setup_rx;
}
- /* create IO queues for Rx & Tx */
+ /* Create IO queues for Rx & Tx */
rc = ena_create_io_queues(adapter);
if (unlikely(rc != 0)) {
ena_trace(ENA_ALERT,
- "create IO queues failed");
+ "create IO queues failed\n");
goto err_io_que;
}
- if (unlikely(adapter->link_status))
- if_link_state_change(adapter->ifp, LINK_STATE_UP);
+ return (0);
- rc = ena_up_complete(adapter);
- if (unlikely(rc != 0))
- goto err_up_complete;
+err_io_que:
+ ena_free_all_rx_resources(adapter);
+err_setup_rx:
+ ena_free_all_tx_resources(adapter);
+err_setup_tx:
+ /*
+ * Lower the ring size if ENOMEM. Otherwise, return the
+ * error straightaway.
+ */
+ if (unlikely(rc != ENOMEM)) {
+ ena_trace(ENA_ALERT,
+ "Queue creation failed with error code: %d\n", rc);
+ return (rc);
+ }
- counter_u64_add(adapter->dev_stats.interface_up, 1);
+ cur_tx_ring_size = adapter->tx_ring[0].ring_size;
+ cur_rx_ring_size = adapter->rx_ring[0].ring_size;
- ena_update_hwassist(adapter);
+ device_printf(adapter->pdev,
+ "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
+ cur_tx_ring_size, cur_rx_ring_size);
- if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
- IFF_DRV_OACTIVE);
+ new_tx_ring_size = cur_tx_ring_size;
+ new_rx_ring_size = cur_rx_ring_size;
- callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
- ena_timer_service, (void *)adapter, 0);
+ /*
+ * Decrease the size of a larger queue, or decrease both if they are
+ * the same size.
+ */
+ if (cur_rx_ring_size <= cur_tx_ring_size)
+ new_tx_ring_size = cur_tx_ring_size / 2;
+ if (cur_rx_ring_size >= cur_tx_ring_size)
+ new_rx_ring_size = cur_rx_ring_size / 2;
+
+ if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
+ new_rx_ring_size < ENA_MIN_RING_SIZE) {
+ device_printf(adapter->pdev,
+ "Queue creation failed with the smallest possible queue size"
+ "of %d for both queues. Not retrying with smaller queues\n",
+ ENA_MIN_RING_SIZE);
+ return (rc);
+ }
+
+ set_io_rings_size(adapter, new_tx_ring_size, new_rx_ring_size);
+ }
+}
+
+int
+ena_up(struct ena_adapter *adapter)
+{
+ int rc = 0;
+
+ if (unlikely(device_is_attached(adapter->pdev) == 0)) {
+ device_printf(adapter->pdev, "device is not attached!\n");
+ return (ENXIO);
+ }
+
+ if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
+ return (0);
+
+ device_printf(adapter->pdev, "device is going UP\n");
+
+ /* setup interrupts for IO queues */
+ rc = ena_setup_io_intr(adapter);
+ if (unlikely(rc != 0)) {
+ ena_trace(ENA_ALERT, "error setting up IO interrupt\n");
+ goto error;
+ }
+ rc = ena_request_io_irq(adapter);
+ if (unlikely(rc != 0)) {
+ ena_trace(ENA_ALERT, "err_req_irq\n");
+ goto error;
+ }
- adapter->up = true;
+ device_printf(adapter->pdev,
+ "Creating %u IO queues. Rx queue size: %d, Tx queue size: %d, "
+ "LLQ is %s\n",
+ adapter->num_io_queues,
+ adapter->requested_rx_ring_size,
+ adapter->requested_tx_ring_size,
+ (adapter->ena_dev->tx_mem_queue_type ==
+ ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED");
- ena_unmask_all_io_irqs(adapter);
+ rc = create_queues_with_size_backoff(adapter);
+ if (unlikely(rc != 0)) {
+ ena_trace(ENA_ALERT,
+ "error creating queues with size backoff\n");
+ goto err_create_queues_with_backoff;
}
+ if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
+ if_link_state_change(adapter->ifp, LINK_STATE_UP);
+
+ rc = ena_up_complete(adapter);
+ if (unlikely(rc != 0))
+ goto err_up_complete;
+
+ counter_u64_add(adapter->dev_stats.interface_up, 1);
+
+ ena_update_hwassist(adapter);
+
+ if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING,
+ IFF_DRV_OACTIVE);
+
+ /* Activate timer service only if the device is running.
+ * If this flag is not set, it means that the driver is being
+ * reset and timer service will be activated afterwards.
+ */
+ if (ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter)) {
+ callout_reset_sbt(&adapter->timer_service, SBT_1S,
+ SBT_1S, ena_timer_service, (void *)adapter, 0);
+ }
+
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP, adapter);
+
+ ena_unmask_all_io_irqs(adapter);
+
return (0);
err_up_complete:
ena_destroy_all_io_queues(adapter);
-err_io_que:
ena_free_all_rx_resources(adapter);
-err_setup_rx:
ena_free_all_tx_resources(adapter);
-err_setup_tx:
+err_create_queues_with_backoff:
ena_free_io_irq(adapter);
-err_req_irq:
+error:
return (rc);
}
@@ -2306,6 +2171,8 @@ ena_get_counter(if_t ifp, ift_counter cnt)
return (counter_u64_fetch(stats->tx_bytes));
case IFCOUNTER_IQDROPS:
return (counter_u64_fetch(stats->rx_drops));
+ case IFCOUNTER_OQDROPS:
+ return (counter_u64_fetch(stats->tx_drops));
default:
return (if_get_counter_default(ifp, cnt));
}
@@ -2322,23 +2189,23 @@ static void
ena_media_status(if_t ifp, struct ifmediareq *ifmr)
{
struct ena_adapter *adapter = if_getsoftc(ifp);
- ena_trace(ENA_DBG, "enter");
+ ena_trace(ENA_DBG, "enter\n");
- mtx_lock(&adapter->global_mtx);
+ ENA_LOCK_LOCK(adapter);
ifmr->ifm_status = IFM_AVALID;
ifmr->ifm_active = IFM_ETHER;
- if (!adapter->link_status) {
- mtx_unlock(&adapter->global_mtx);
- ena_trace(ENA_INFO, "link_status = false");
+ if (!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)) {
+ ENA_LOCK_UNLOCK(adapter);
+ ena_trace(ENA_INFO, "Link is down\n");
return;
}
ifmr->ifm_status |= IFM_ACTIVE;
- ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
+ ifmr->ifm_active |= IFM_UNKNOWN | IFM_FDX;
- mtx_unlock(&adapter->global_mtx);
+ ENA_LOCK_UNLOCK(adapter);
}
static void
@@ -2346,10 +2213,10 @@ ena_init(void *arg)
{
struct ena_adapter *adapter = (struct ena_adapter *)arg;
- if (!adapter->up) {
- sx_xlock(&adapter->ioctl_sx);
+ if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter)) {
+ ENA_LOCK_LOCK(adapter);
ena_up(adapter);
- sx_unlock(&adapter->ioctl_sx);
+ ENA_LOCK_UNLOCK(adapter);
}
}
@@ -2371,13 +2238,13 @@ ena_ioctl(if_t ifp, u_long command, caddr_t data)
case SIOCSIFMTU:
if (ifp->if_mtu == ifr->ifr_mtu)
break;
- sx_xlock(&adapter->ioctl_sx);
+ ENA_LOCK_LOCK(adapter);
ena_down(adapter);
ena_change_mtu(ifp, ifr->ifr_mtu);
rc = ena_up(adapter);
- sx_unlock(&adapter->ioctl_sx);
+ ENA_LOCK_UNLOCK(adapter);
break;
case SIOCSIFFLAGS:
@@ -2389,15 +2256,15 @@ ena_ioctl(if_t ifp, u_long command, caddr_t data)
"ioctl promisc/allmulti\n");
}
} else {
- sx_xlock(&adapter->ioctl_sx);
+ ENA_LOCK_LOCK(adapter);
rc = ena_up(adapter);
- sx_unlock(&adapter->ioctl_sx);
+ ENA_LOCK_UNLOCK(adapter);
}
} else {
if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
- sx_xlock(&adapter->ioctl_sx);
+ ENA_LOCK_LOCK(adapter);
ena_down(adapter);
- sx_unlock(&adapter->ioctl_sx);
+ ENA_LOCK_UNLOCK(adapter);
}
}
break;
@@ -2422,10 +2289,10 @@ ena_ioctl(if_t ifp, u_long command, caddr_t data)
if ((reinit != 0) &&
((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)) {
- sx_xlock(&adapter->ioctl_sx);
+ ENA_LOCK_LOCK(adapter);
ena_down(adapter);
rc = ena_up(adapter);
- sx_unlock(&adapter->ioctl_sx);
+ ENA_LOCK_UNLOCK(adapter);
}
}
@@ -2446,7 +2313,7 @@ ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
if ((feat->offload.tx &
(ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
- ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
+ ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
caps |= IFCAP_TXCSUM;
if ((feat->offload.tx &
@@ -2539,7 +2406,7 @@ ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
if_setioctlfn(ifp, ena_ioctl);
if_setgetcounterfn(ifp, ena_get_counter);
- if_setsendqlen(ifp, adapter->tx_ring_size);
+ if_setsendqlen(ifp, adapter->requested_tx_ring_size);
if_setsendqready(ifp);
if_setmtu(ifp, ETHERMTU);
if_setbaudrate(ifp, 0);
@@ -2574,493 +2441,239 @@ ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
return (0);
}
-static void
+void
ena_down(struct ena_adapter *adapter)
{
int rc;
- if (adapter->up) {
- device_printf(adapter->pdev, "device is going DOWN\n");
-
- callout_drain(&adapter->timer_service);
-
- adapter->up = false;
- if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
- IFF_DRV_RUNNING);
+ if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
+ return;
- ena_free_io_irq(adapter);
+ device_printf(adapter->pdev, "device is going DOWN\n");
- if (adapter->trigger_reset) {
- rc = ena_com_dev_reset(adapter->ena_dev,
- adapter->reset_reason);
- if (unlikely(rc != 0))
- device_printf(adapter->pdev,
- "Device reset failed\n");
- }
+ callout_drain(&adapter->timer_service);
- ena_destroy_all_io_queues(adapter);
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP, adapter);
+ if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE,
+ IFF_DRV_RUNNING);
- ena_free_all_tx_bufs(adapter);
- ena_free_all_rx_bufs(adapter);
- ena_free_all_tx_resources(adapter);
- ena_free_all_rx_resources(adapter);
+ ena_free_io_irq(adapter);
- counter_u64_add(adapter->dev_stats.interface_down, 1);
+ if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter)) {
+ rc = ena_com_dev_reset(adapter->ena_dev,
+ adapter->reset_reason);
+ if (unlikely(rc != 0))
+ device_printf(adapter->pdev,
+ "Device reset failed\n");
}
-}
-
-static void
-ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf)
-{
- struct ena_com_tx_meta *ena_meta;
- struct ether_vlan_header *eh;
- u32 mss;
- bool offload;
- uint16_t etype;
- int ehdrlen;
- struct ip *ip;
- int iphlen;
- struct tcphdr *th;
- offload = false;
- ena_meta = &ena_tx_ctx->ena_meta;
- mss = mbuf->m_pkthdr.tso_segsz;
+ ena_destroy_all_io_queues(adapter);
- if (mss != 0)
- offload = true;
+ ena_free_all_tx_bufs(adapter);
+ ena_free_all_rx_bufs(adapter);
+ ena_free_all_tx_resources(adapter);
+ ena_free_all_rx_resources(adapter);
- if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
- offload = true;
+ counter_u64_add(adapter->dev_stats.interface_down, 1);
+}
- if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
- offload = true;
+static uint32_t
+ena_calc_max_io_queue_num(device_t pdev, struct ena_com_dev *ena_dev,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+ uint32_t io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
- if (!offload) {
- ena_tx_ctx->meta_valid = 0;
- return;
- }
+ /* Regular queues capabilities */
+ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+ struct ena_admin_queue_ext_feature_fields *max_queue_ext =
+ &get_feat_ctx->max_queue_ext.max_queue_ext;
+ io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num,
+ max_queue_ext->max_rx_cq_num);
- /* Determine where frame payload starts. */
- eh = mtod(mbuf, struct ether_vlan_header *);
- if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
- etype = ntohs(eh->evl_proto);
- ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+ io_tx_sq_num = max_queue_ext->max_tx_sq_num;
+ io_tx_cq_num = max_queue_ext->max_tx_cq_num;
} else {
- etype = ntohs(eh->evl_encap_proto);
- ehdrlen = ETHER_HDR_LEN;
+ struct ena_admin_queue_feature_desc *max_queues =
+ &get_feat_ctx->max_queues;
+ io_tx_sq_num = max_queues->max_sq_num;
+ io_tx_cq_num = max_queues->max_cq_num;
+ io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num);
}
- ip = (struct ip *)(mbuf->m_data + ehdrlen);
- iphlen = ip->ip_hl << 2;
- th = (struct tcphdr *)((caddr_t)ip + iphlen);
+ /* In case of LLQ use the llq fields for the tx SQ/CQ */
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
- if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
- ena_tx_ctx->l3_csum_enable = 1;
- }
- if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
- ena_tx_ctx->tso_enable = 1;
- ena_meta->l4_hdr_len = (th->th_off);
- }
-
- switch (etype) {
- case ETHERTYPE_IP:
- ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
- if ((ip->ip_off & htons(IP_DF)) != 0)
- ena_tx_ctx->df = 1;
- break;
- case ETHERTYPE_IPV6:
- ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
-
- default:
- break;
- }
-
- if (ip->ip_p == IPPROTO_TCP) {
- ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
- if ((mbuf->m_pkthdr.csum_flags &
- (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
- ena_tx_ctx->l4_csum_enable = 1;
- else
- ena_tx_ctx->l4_csum_enable = 0;
- } else if (ip->ip_p == IPPROTO_UDP) {
- ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
- if ((mbuf->m_pkthdr.csum_flags &
- (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
- ena_tx_ctx->l4_csum_enable = 1;
- else
- ena_tx_ctx->l4_csum_enable = 0;
- } else {
- ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
- ena_tx_ctx->l4_csum_enable = 0;
- }
+ max_num_io_queues = min_t(uint32_t, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
+ max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_rx_num);
+ max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_sq_num);
+ max_num_io_queues = min_t(uint32_t, max_num_io_queues, io_tx_cq_num);
+ /* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
+ max_num_io_queues = min_t(uint32_t, max_num_io_queues,
+ pci_msix_count(pdev) - 1);
- ena_meta->mss = mss;
- ena_meta->l3_hdr_len = iphlen;
- ena_meta->l3_hdr_offset = ehdrlen;
- ena_tx_ctx->meta_valid = 1;
+ return (max_num_io_queues);
}
static int
-ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
+ena_enable_wc(struct resource *res)
{
- struct ena_adapter *adapter;
- struct mbuf *collapsed_mbuf;
- int num_frags;
-
- adapter = tx_ring->adapter;
- num_frags = ena_mbuf_count(*mbuf);
-
- /* One segment must be reserved for configuration descriptor. */
- if (num_frags < adapter->max_tx_sgl_size)
- return (0);
- counter_u64_add(tx_ring->tx_stats.collapse, 1);
+#if defined(__i386) || defined(__amd64)
+ vm_offset_t va;
+ vm_size_t len;
+ int rc;
- collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
- adapter->max_tx_sgl_size - 1);
- if (unlikely(collapsed_mbuf == NULL)) {
- counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
- return (ENOMEM);
+ va = (vm_offset_t)rman_get_virtual(res);
+ len = rman_get_size(res);
+ /* Enable write combining */
+ rc = pmap_change_attr(va, len, PAT_WRITE_COMBINING);
+ if (unlikely(rc != 0)) {
+ ena_trace(ENA_ALERT, "pmap_change_attr failed, %d\n", rc);
+ return (rc);
}
- /* If mbuf was collapsed succesfully, original mbuf is released. */
- *mbuf = collapsed_mbuf;
-
return (0);
+#endif
+ return (EOPNOTSUPP);
}
static int
-ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
+ena_set_queues_placement_policy(device_t pdev, struct ena_com_dev *ena_dev,
+ struct ena_admin_feature_llq_desc *llq,
+ struct ena_llq_configurations *llq_default_configurations)
{
- struct ena_adapter *adapter;
- struct ena_tx_buffer *tx_info;
- struct ena_com_tx_ctx ena_tx_ctx;
- struct ena_com_dev *ena_dev;
- struct ena_com_buf *ena_buf;
- struct ena_com_io_sq* io_sq;
- bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
- void *push_hdr;
- uint16_t next_to_use;
- uint16_t req_id;
- uint16_t push_len;
- uint16_t ena_qid;
- uint32_t nsegs, header_len;
- int i, rc;
- int nb_hw_desc;
-
- ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
- adapter = tx_ring->que->adapter;
- ena_dev = adapter->ena_dev;
- io_sq = &ena_dev->io_sq_queues[ena_qid];
-
- rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
- if (unlikely(rc != 0)) {
- ena_trace(ENA_WARNING,
- "Failed to collapse mbuf! err: %d", rc);
- return (rc);
- }
-
- next_to_use = tx_ring->next_to_use;
- req_id = tx_ring->free_tx_ids[next_to_use];
- tx_info = &tx_ring->tx_buffer_info[req_id];
-
- tx_info->mbuf = *mbuf;
- tx_info->num_of_bufs = 0;
-
- ena_buf = tx_info->bufs;
-
- ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes", (*mbuf)->m_pkthdr.len);
-
- push_len = 0;
- /*
- * header_len is just a hint for the device. Because FreeBSD is not
- * giving us information about packet header length and it is not
- * guaranteed that all packet headers will be in the 1st mbuf, setting
- * header_len to 0 is making the device ignore this value and resolve
- * header on it's own.
- */
- header_len = 0;
- push_hdr = NULL;
-
- rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->map,
- *mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
-
- if (unlikely((rc != 0) || (nsegs == 0))) {
- ena_trace(ENA_WARNING,
- "dmamap load failed! err: %d nsegs: %d", rc, nsegs);
- counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
- tx_info->mbuf = NULL;
- if (rc == ENOMEM)
- return (ENA_COM_NO_MEM);
- else
- return (ENA_COM_INVAL);
- }
+ struct ena_adapter *adapter = device_get_softc(pdev);
+ int rc, rid;
+ uint32_t llq_feature_mask;
- for (i = 0; i < nsegs; i++) {
- ena_buf->len = segs[i].ds_len;
- ena_buf->paddr = segs[i].ds_addr;
- ena_buf++;
+ llq_feature_mask = 1 << ENA_ADMIN_LLQ;
+ if (!(ena_dev->supported_features & llq_feature_mask)) {
+ device_printf(pdev,
+ "LLQ is not supported. Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return (0);
}
- tx_info->num_of_bufs = nsegs;
-
- memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
- ena_tx_ctx.ena_bufs = tx_info->bufs;
- ena_tx_ctx.push_header = push_hdr;
- ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
- ena_tx_ctx.req_id = req_id;
- ena_tx_ctx.header_len = header_len;
- /* Set flags and meta data */
- ena_tx_csum(&ena_tx_ctx, *mbuf);
- /* Prepare the packet's descriptors and send them to device */
- rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
+ rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
if (unlikely(rc != 0)) {
- ena_trace(ENA_DBG | ENA_TXPTH, "failed to prepare tx bufs\n");
- counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
- goto dma_error;
+ device_printf(pdev, "Failed to configure the device mode. "
+ "Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return (0);
}
- counter_enter();
- counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
- counter_u64_add_protected(tx_ring->tx_stats.bytes,
- (*mbuf)->m_pkthdr.len);
-
- counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
- counter_u64_add_protected(adapter->hw_stats.tx_bytes,
- (*mbuf)->m_pkthdr.len);
- counter_exit();
-
- tx_info->tx_descs = nb_hw_desc;
- getbinuptime(&tx_info->timestamp);
- tx_info->print_once = true;
-
- tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
- tx_ring->ring_size);
-
- bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map,
- BUS_DMASYNC_PREWRITE);
-
- return (0);
-
-dma_error:
- tx_info->mbuf = NULL;
- bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map);
-
- return (rc);
-}
-
-static void
-ena_start_xmit(struct ena_ring *tx_ring)
-{
- struct mbuf *mbuf;
- struct ena_adapter *adapter = tx_ring->adapter;
- struct ena_com_io_sq* io_sq;
- int ena_qid;
- int acum_pkts = 0;
- int ret = 0;
-
- if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
- return;
-
- if (unlikely(!adapter->link_status))
- return;
-
- ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
- io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
-
- while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
- ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
- " header csum flags %#jx",
- mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
-
- if (unlikely(!ena_com_sq_have_enough_space(io_sq,
- ENA_TX_CLEANUP_THRESHOLD)))
- ena_tx_cleanup(tx_ring);
-
- if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
- if (ret == ENA_COM_NO_MEM) {
- drbr_putback(adapter->ifp, tx_ring->br, mbuf);
- } else if (ret == ENA_COM_NO_SPACE) {
- drbr_putback(adapter->ifp, tx_ring->br, mbuf);
- } else {
- m_freem(mbuf);
- drbr_advance(adapter->ifp, tx_ring->br);
- }
-
- break;
- }
-
- drbr_advance(adapter->ifp, tx_ring->br);
-
- if (unlikely((if_getdrvflags(adapter->ifp) &
- IFF_DRV_RUNNING) == 0))
- return;
-
- acum_pkts++;
-
- BPF_MTAP(adapter->ifp, mbuf);
-
- if (unlikely(acum_pkts == DB_THRESHOLD)) {
- acum_pkts = 0;
- wmb();
- /* Trigger the dma engine */
- ena_com_write_sq_doorbell(io_sq);
- counter_u64_add(tx_ring->tx_stats.doorbells, 1);
- }
-
- }
+ /* Nothing to config, exit */
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+ return (0);
- if (likely(acum_pkts != 0)) {
- wmb();
- /* Trigger the dma engine */
- ena_com_write_sq_doorbell(io_sq);
- counter_u64_add(tx_ring->tx_stats.doorbells, 1);
+ /* Try to allocate resources for LLQ bar */
+ rid = PCIR_BAR(ENA_MEM_BAR);
+ adapter->memory = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
+ &rid, RF_ACTIVE);
+ if (unlikely(adapter->memory == NULL)) {
+ device_printf(pdev, "unable to allocate LLQ bar resource. "
+ "Fallback to host mode policy.\n");
+ ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ return (0);
}
- if (!ena_com_sq_have_enough_space(io_sq, ENA_TX_CLEANUP_THRESHOLD))
- ena_tx_cleanup(tx_ring);
-}
-
-static void
-ena_deferred_mq_start(void *arg, int pending)
-{
- struct ena_ring *tx_ring = (struct ena_ring *)arg;
- struct ifnet *ifp = tx_ring->adapter->ifp;
-
- while (!drbr_empty(ifp, tx_ring->br) &&
- (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
- ENA_RING_MTX_LOCK(tx_ring);
- ena_start_xmit(tx_ring);
- ENA_RING_MTX_UNLOCK(tx_ring);
+ /* Enable write combining for better LLQ performance */
+ rc = ena_enable_wc(adapter->memory);
+ if (unlikely(rc != 0)) {
+ device_printf(pdev, "failed to enable write combining.\n");
+ return (rc);
}
-}
-static int
-ena_mq_start(if_t ifp, struct mbuf *m)
-{
- struct ena_adapter *adapter = ifp->if_softc;
- struct ena_ring *tx_ring;
- int ret, is_drbr_empty;
- uint32_t i;
-
- if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
- return (ENODEV);
-
- /* Which queue to use */
/*
- * If everything is setup correctly, it should be the
- * same bucket that the current CPU we're on is.
- * It should improve performance.
+ * Save virtual address of the device's memory region
+ * for the ena_com layer.
*/
- if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
-#ifdef RSS
- if (rss_hash2bucket(m->m_pkthdr.flowid,
- M_HASHTYPE_GET(m), &i) == 0) {
- i = i % adapter->num_queues;
-
- } else
-#endif
- {
- i = m->m_pkthdr.flowid % adapter->num_queues;
- }
- } else {
- i = curcpu % adapter->num_queues;
- }
- tx_ring = &adapter->tx_ring[i];
-
- /* Check if drbr is empty before putting packet */
- is_drbr_empty = drbr_empty(ifp, tx_ring->br);
- ret = drbr_enqueue(ifp, tx_ring->br, m);
- if (unlikely(ret != 0)) {
- taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
- return (ret);
- }
-
- if ((is_drbr_empty != 0) && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
- ena_start_xmit(tx_ring);
- ENA_RING_MTX_UNLOCK(tx_ring);
- } else {
- taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
- }
+ ena_dev->mem_bar = rman_get_virtual(adapter->memory);
return (0);
}
-static void
-ena_qflush(if_t ifp)
+static inline
+void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
{
- struct ena_adapter *adapter = ifp->if_softc;
- struct ena_ring *tx_ring = adapter->tx_ring;
- int i;
-
- for(i = 0; i < adapter->num_queues; ++i, ++tx_ring)
- if (!drbr_empty(ifp, tx_ring->br)) {
- ENA_RING_MTX_LOCK(tx_ring);
- drbr_flush(ifp, tx_ring->br);
- ENA_RING_MTX_UNLOCK(tx_ring);
- }
-
- if_qflush(ifp);
+ llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
+ llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
+ llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
+ llq_config->llq_num_decs_before_header =
+ ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
+ llq_config->llq_ring_entry_size_value = 128;
}
static int
-ena_calc_io_queue_num(struct ena_adapter *adapter,
- struct ena_com_dev_get_features_ctx *get_feat_ctx)
-{
- int io_sq_num, io_cq_num, io_queue_num;
-
- io_sq_num = get_feat_ctx->max_queues.max_sq_num;
- io_cq_num = get_feat_ctx->max_queues.max_cq_num;
-
- io_queue_num = min_t(int, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
- io_queue_num = min_t(int, io_queue_num, io_sq_num);
- io_queue_num = min_t(int, io_queue_num, io_cq_num);
- /* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
- io_queue_num = min_t(int, io_queue_num,
- pci_msix_count(adapter->pdev) - 1);
-#ifdef RSS
- io_queue_num = min_t(int, io_queue_num, rss_getnumbuckets());
-#endif
-
- return (io_queue_num);
-}
+ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
+{
+ struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
+ struct ena_com_dev *ena_dev = ctx->ena_dev;
+ uint32_t tx_queue_size = ENA_DEFAULT_RING_SIZE;
+ uint32_t rx_queue_size = ENA_DEFAULT_RING_SIZE;
+ uint32_t max_tx_queue_size;
+ uint32_t max_rx_queue_size;
+
+ if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
+ struct ena_admin_queue_ext_feature_fields *max_queue_ext =
+ &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
+ max_rx_queue_size = min_t(uint32_t,
+ max_queue_ext->max_rx_cq_depth,
+ max_queue_ext->max_rx_sq_depth);
+ max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
+
+ if (ena_dev->tx_mem_queue_type ==
+ ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
+ llq->max_llq_depth);
+ else
+ max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
+ max_queue_ext->max_tx_sq_depth);
-static int
-ena_calc_queue_size(struct ena_adapter *adapter, uint16_t *max_tx_sgl_size,
- uint16_t *max_rx_sgl_size, struct ena_com_dev_get_features_ctx *feat)
-{
- uint32_t queue_size = ENA_DEFAULT_RING_SIZE;
- uint32_t v;
- uint32_t q;
+ ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
+ max_queue_ext->max_per_packet_tx_descs);
+ ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
+ max_queue_ext->max_per_packet_rx_descs);
+ } else {
+ struct ena_admin_queue_feature_desc *max_queues =
+ &ctx->get_feat_ctx->max_queues;
+ max_rx_queue_size = min_t(uint32_t,
+ max_queues->max_cq_depth,
+ max_queues->max_sq_depth);
+ max_tx_queue_size = max_queues->max_cq_depth;
+
+ if (ena_dev->tx_mem_queue_type ==
+ ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
+ llq->max_llq_depth);
+ else
+ max_tx_queue_size = min_t(uint32_t, max_tx_queue_size,
+ max_queues->max_sq_depth);
- queue_size = min_t(uint32_t, queue_size,
- feat->max_queues.max_cq_depth);
- queue_size = min_t(uint32_t, queue_size,
- feat->max_queues.max_sq_depth);
+ ctx->max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
+ max_queues->max_packet_tx_descs);
+ ctx->max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
+ max_queues->max_packet_rx_descs);
+ }
/* round down to the nearest power of 2 */
- v = queue_size;
- while (v != 0) {
- if (powerof2(queue_size) != 0)
- break;
- v /= 2;
- q = rounddown2(queue_size, v);
- if (q != 0) {
- queue_size = q;
- break;
- }
- }
+ max_tx_queue_size = 1 << (flsl(max_tx_queue_size) - 1);
+ max_rx_queue_size = 1 << (flsl(max_rx_queue_size) - 1);
- if (unlikely(queue_size == 0)) {
- device_printf(adapter->pdev, "Invalid queue size\n");
- return (ENA_COM_FAULT);
- }
+ tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
+ max_tx_queue_size);
+ rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
+ max_rx_queue_size);
+
+ tx_queue_size = 1 << (flsl(tx_queue_size) - 1);
+ rx_queue_size = 1 << (flsl(rx_queue_size) - 1);
- *max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
- feat->max_queues.max_packet_tx_descs);
- *max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
- feat->max_queues.max_packet_rx_descs);
+ ctx->max_tx_queue_size = max_tx_queue_size;
+ ctx->max_rx_queue_size = max_rx_queue_size;
+ ctx->tx_queue_size = tx_queue_size;
+ ctx->rx_queue_size = rx_queue_size;
- return (queue_size);
+ return (0);
}
static int
@@ -3077,12 +2690,7 @@ ena_rss_init_default(struct ena_adapter *adapter)
}
for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
-#ifdef RSS
- qid = rss_get_indirection_to_bucket(i);
- qid = qid % adapter->num_queues;
-#else
- qid = i % adapter->num_queues;
-#endif
+ qid = i % adapter->num_io_queues;
rc = ena_com_indirect_table_fill_entry(ena_dev, i,
ENA_IO_RXQ_IDX(qid));
if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
@@ -3130,12 +2738,12 @@ ena_rss_init_default_deferred(void *arg)
adapter = devclass_get_softc(dc, max);
if (adapter != NULL) {
rc = ena_rss_init_default(adapter);
- adapter->rss_support = true;
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
if (unlikely(rc != 0)) {
device_printf(adapter->pdev,
"WARNING: RSS was not properly initialized,"
" it will affect bandwidth\n");
- adapter->rss_support = false;
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_RSS_ACTIVE, adapter);
}
}
}
@@ -3143,9 +2751,10 @@ ena_rss_init_default_deferred(void *arg)
SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
static void
-ena_config_host_info(struct ena_com_dev *ena_dev)
+ena_config_host_info(struct ena_com_dev *ena_dev, device_t dev)
{
struct ena_admin_host_info *host_info;
+ uintptr_t rid;
int rc;
/* Allocate only the host info */
@@ -3157,6 +2766,8 @@ ena_config_host_info(struct ena_com_dev *ena_dev)
host_info = ena_dev->host_attr.host_info;
+ if (pci_get_id(dev, PCI_ID_RID, &rid) == 0)
+ host_info->bdf = rid;
host_info->os_type = ENA_ADMIN_OS_FREEBSD;
host_info->kernel_ver = osreldate;
@@ -3169,6 +2780,7 @@ ena_config_host_info(struct ena_com_dev *ena_dev)
(DRV_MODULE_VER_MAJOR) |
(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
+ host_info->num_cpus = mp_ncpus;
rc = ena_com_set_host_attributes(ena_dev);
if (unlikely(rc != 0)) {
@@ -3230,7 +2842,7 @@ ena_device_init(struct ena_adapter *adapter, device_t pdev,
adapter->dma_width = dma_width;
/* ENA admin level init */
- rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
+ rc = ena_com_admin_init(ena_dev, &aenq_handlers);
if (unlikely(rc != 0)) {
device_printf(pdev,
"Can not initialize ena admin queue with device\n");
@@ -3244,7 +2856,7 @@ ena_device_init(struct ena_adapter *adapter, device_t pdev,
*/
ena_com_set_admin_polling_mode(ena_dev, true);
- ena_config_host_info(ena_dev);
+ ena_config_host_info(ena_dev, pdev);
/* Get Device Attributes */
rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
@@ -3254,7 +2866,11 @@ ena_device_init(struct ena_adapter *adapter, device_t pdev,
goto err_admin_init;
}
- aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | BIT(ENA_ADMIN_KEEP_ALIVE);
+ aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
+ BIT(ENA_ADMIN_FATAL_ERROR) |
+ BIT(ENA_ADMIN_WARNING) |
+ BIT(ENA_ADMIN_NOTIFICATION) |
+ BIT(ENA_ADMIN_KEEP_ALIVE);
aenq_groups &= get_feat_ctx->aenq.supported_groups;
rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
@@ -3276,8 +2892,7 @@ err_mmio_read_less:
return (rc);
}
-static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
- int io_vectors)
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
{
struct ena_com_dev *ena_dev = adapter->ena_dev;
int rc;
@@ -3316,12 +2931,16 @@ static void ena_keep_alive_wd(void *adapter_data,
struct ena_admin_aenq_keep_alive_desc *desc;
sbintime_t stime;
uint64_t rx_drops;
+ uint64_t tx_drops;
desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
+ tx_drops = ((uint64_t)desc->tx_drops_high << 32) | desc->tx_drops_low;
counter_u64_zero(adapter->hw_stats.rx_drops);
counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
+ counter_u64_zero(adapter->hw_stats.tx_drops);
+ counter_u64_add(adapter->hw_stats.tx_drops, tx_drops);
stime = getsbinuptime();
atomic_store_rel_64(&adapter->keep_alive_timestamp, stime);
@@ -3335,7 +2954,7 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter)
if (adapter->wd_active == 0)
return;
- if (likely(adapter->keep_alive_timeout == 0))
+ if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
return;
timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp);
@@ -3344,8 +2963,7 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter)
device_printf(adapter->pdev,
"Keep alive watchdog timeout.\n");
counter_u64_add(adapter->dev_stats.wd_expired, 1);
- adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
- adapter->trigger_reset = true;
+ ena_trigger_reset(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
}
}
@@ -3357,19 +2975,41 @@ static void check_for_admin_com_state(struct ena_adapter *adapter)
device_printf(adapter->pdev,
"ENA admin queue is not in running state!\n");
counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
- adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
- adapter->trigger_reset = true;
+ ena_trigger_reset(adapter, ENA_REGS_RESET_ADMIN_TO);
+ }
+}
+
+static int
+check_for_rx_interrupt_queue(struct ena_adapter *adapter,
+ struct ena_ring *rx_ring)
+{
+ if (likely(rx_ring->first_interrupt))
+ return (0);
+
+ if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
+ return (0);
+
+ rx_ring->no_interrupt_event_cnt++;
+
+ if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
+ device_printf(adapter->pdev, "Potential MSIX issue on Rx side "
+ "Queue = %d. Reset the device\n", rx_ring->qid);
+ ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
+ return (EIO);
}
+
+ return (0);
}
static int
-check_missing_comp_in_queue(struct ena_adapter *adapter,
+check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
struct ena_ring *tx_ring)
{
struct bintime curtime, time;
struct ena_tx_buffer *tx_buf;
+ sbintime_t time_offset;
uint32_t missed_tx = 0;
- int i;
+ int i, rc = 0;
getbinuptime(&curtime);
@@ -3381,9 +3021,24 @@ check_missing_comp_in_queue(struct ena_adapter *adapter,
time = curtime;
bintime_sub(&time, &tx_buf->timestamp);
+ time_offset = bttosbt(time);
+
+ if (unlikely(!tx_ring->first_interrupt &&
+ time_offset > 2 * adapter->missing_tx_timeout)) {
+ /*
+ * If after graceful period interrupt is still not
+ * received, we schedule a reset.
+ */
+ device_printf(adapter->pdev,
+ "Potential MSIX issue on Tx side Queue = %d. "
+ "Reset the device\n", tx_ring->qid);
+ ena_trigger_reset(adapter,
+ ENA_REGS_RESET_MISS_INTERRUPT);
+ return (EIO);
+ }
/* Check again if packet is still waiting */
- if (unlikely(bttosbt(time) > adapter->missing_tx_timeout)) {
+ if (unlikely(time_offset > adapter->missing_tx_timeout)) {
if (!tx_buf->print_once)
ena_trace(ENA_WARNING, "Found a Tx that wasn't "
@@ -3392,24 +3047,21 @@ check_missing_comp_in_queue(struct ena_adapter *adapter,
tx_buf->print_once = true;
missed_tx++;
- counter_u64_add(tx_ring->tx_stats.missing_tx_comp, 1);
-
- if (unlikely(missed_tx >
- adapter->missing_tx_threshold)) {
- device_printf(adapter->pdev,
- "The number of lost tx completion "
- "is above the threshold (%d > %d). "
- "Reset the device\n",
- missed_tx, adapter->missing_tx_threshold);
- adapter->reset_reason =
- ENA_REGS_RESET_MISS_TX_CMPL;
- adapter->trigger_reset = true;
- return (EIO);
- }
}
}
- return (0);
+ if (unlikely(missed_tx > adapter->missing_tx_threshold)) {
+ device_printf(adapter->pdev,
+ "The number of lost tx completion is above the threshold "
+ "(%d > %d). Reset the device\n",
+ missed_tx, adapter->missing_tx_threshold);
+ ena_trigger_reset(adapter, ENA_REGS_RESET_MISS_TX_CMPL);
+ rc = EIO;
+ }
+
+ counter_u64_add(tx_ring->tx_stats.missing_tx_comp, missed_tx);
+
+ return (rc);
}
/*
@@ -3419,29 +3071,35 @@ check_missing_comp_in_queue(struct ena_adapter *adapter,
* transactions exceeds "missing_tx_threshold".
*/
static void
-check_for_missing_tx_completions(struct ena_adapter *adapter)
+check_for_missing_completions(struct ena_adapter *adapter)
{
struct ena_ring *tx_ring;
+ struct ena_ring *rx_ring;
int i, budget, rc;
/* Make sure the driver doesn't turn the device in other process */
rmb();
- if (!adapter->up)
+ if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
return;
- if (adapter->trigger_reset)
+ if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
return;
- if (adapter->missing_tx_timeout == 0)
+ if (adapter->missing_tx_timeout == ENA_HW_HINTS_NO_TIMEOUT)
return;
budget = adapter->missing_tx_max_queues;
- for (i = adapter->next_monitored_tx_qid; i < adapter->num_queues; i++) {
+ for (i = adapter->next_monitored_tx_qid; i < adapter->num_io_queues; i++) {
tx_ring = &adapter->tx_ring[i];
+ rx_ring = &adapter->rx_ring[i];
- rc = check_missing_comp_in_queue(adapter, tx_ring);
+ rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
+ if (unlikely(rc != 0))
+ return;
+
+ rc = check_for_rx_interrupt_queue(adapter, rx_ring);
if (unlikely(rc != 0))
return;
@@ -3452,10 +3110,10 @@ check_for_missing_tx_completions(struct ena_adapter *adapter)
}
}
- adapter->next_monitored_tx_qid = i % adapter->num_queues;
+ adapter->next_monitored_tx_qid = i % adapter->num_io_queues;
}
-/* trigger deferred rx cleanup after 2 consecutive detections */
+/* trigger rx cleanup after 2 consecutive detections */
#define EMPTY_RX_REFILL 2
/* For the rare case where the device runs out of Rx descriptors and the
* msix handler failed to refill new Rx descriptors (due to a lack of memory
@@ -3473,16 +3131,16 @@ check_for_empty_rx_ring(struct ena_adapter *adapter)
struct ena_ring *rx_ring;
int i, refill_required;
- if (!adapter->up)
+ if (!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
return;
- if (adapter->trigger_reset)
+ if (ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))
return;
- for (i = 0; i < adapter->num_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues; i++) {
rx_ring = &adapter->rx_ring[i];
- refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
+ refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
rx_ring->empty_rx_queue++;
@@ -3493,8 +3151,8 @@ check_for_empty_rx_ring(struct ena_adapter *adapter)
device_printf(adapter->pdev,
"trigger refill for ring %d\n", i);
- taskqueue_enqueue(rx_ring->cmpl_tq,
- &rx_ring->cmpl_task);
+ taskqueue_enqueue(rx_ring->que->cleanup_tq,
+ &rx_ring->que->cleanup_task);
rx_ring->empty_rx_queue = 0;
}
} else {
@@ -3503,6 +3161,42 @@ check_for_empty_rx_ring(struct ena_adapter *adapter)
}
}
+static void ena_update_hints(struct ena_adapter *adapter,
+ struct ena_admin_ena_hw_hints *hints)
+{
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+
+ if (hints->admin_completion_tx_timeout)
+ ena_dev->admin_queue.completion_timeout =
+ hints->admin_completion_tx_timeout * 1000;
+
+ if (hints->mmio_read_timeout)
+ /* convert to usec */
+ ena_dev->mmio_read.reg_read_to =
+ hints->mmio_read_timeout * 1000;
+
+ if (hints->missed_tx_completion_count_threshold_to_reset)
+ adapter->missing_tx_threshold =
+ hints->missed_tx_completion_count_threshold_to_reset;
+
+ if (hints->missing_tx_completion_timeout) {
+ if (hints->missing_tx_completion_timeout ==
+ ENA_HW_HINTS_NO_TIMEOUT)
+ adapter->missing_tx_timeout = ENA_HW_HINTS_NO_TIMEOUT;
+ else
+ adapter->missing_tx_timeout =
+ SBT_1MS * hints->missing_tx_completion_timeout;
+ }
+
+ if (hints->driver_watchdog_timeout) {
+ if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
+ adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
+ else
+ adapter->keep_alive_timeout =
+ SBT_1MS * hints->driver_watchdog_timeout;
+ }
+}
+
static void
ena_timer_service(void *data)
{
@@ -3514,14 +3208,14 @@ ena_timer_service(void *data)
check_for_admin_com_state(adapter);
- check_for_missing_tx_completions(adapter);
+ check_for_missing_completions(adapter);
check_for_empty_rx_ring(adapter);
if (host_info != NULL)
ena_update_host_info(host_info, adapter->ifp);
- if (unlikely(adapter->trigger_reset)) {
+ if (unlikely(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
device_printf(adapter->pdev, "Trigger reset is on\n");
taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task);
return;
@@ -3533,81 +3227,200 @@ ena_timer_service(void *data)
callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0);
}
-static void
-ena_reset_task(void *arg, int pending)
+void
+ena_destroy_device(struct ena_adapter *adapter, bool graceful)
{
- struct ena_com_dev_get_features_ctx get_feat_ctx;
- struct ena_adapter *adapter = (struct ena_adapter *)arg;
+ if_t ifp = adapter->ifp;
struct ena_com_dev *ena_dev = adapter->ena_dev;
bool dev_up;
- int rc;
- if (unlikely(!adapter->trigger_reset)) {
- device_printf(adapter->pdev,
- "device reset scheduled but trigger_reset is off\n");
+ if (!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))
return;
- }
- sx_xlock(&adapter->ioctl_sx);
+ if_link_state_change(ifp, LINK_STATE_DOWN);
callout_drain(&adapter->timer_service);
- dev_up = adapter->up;
+ dev_up = ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter);
+ if (dev_up)
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
+
+ if (!graceful)
+ ena_com_set_admin_running_state(ena_dev, false);
+
+ if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, adapter))
+ ena_down(adapter);
+
+ /*
+ * Stop the device from sending AENQ events (if the device was up, and
+ * the trigger reset was on, ena_down already performs device reset)
+ */
+ if (!(ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter) && dev_up))
+ ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
- ena_com_set_admin_running_state(ena_dev, false);
- ena_down(adapter);
ena_free_mgmnt_irq(adapter);
+
ena_disable_msix(adapter);
+
+ /*
+ * IO rings resources should be freed because `ena_restore_device()`
+ * calls (not directly) `ena_enable_msix()`, which re-allocates MSIX
+ * vectors. The amount of MSIX vectors after destroy-restore may be
+ * different than before. Therefore, IO rings resources should be
+ * established from scratch each time.
+ */
+ ena_free_all_io_rings_resources(adapter);
+
ena_com_abort_admin_commands(ena_dev);
+
ena_com_wait_for_abort_completion(ena_dev);
+
ena_com_admin_destroy(ena_dev);
+
ena_com_mmio_reg_read_request_destroy(ena_dev);
adapter->reset_reason = ENA_REGS_RESET_NORMAL;
- adapter->trigger_reset = false;
- /* Finished destroy part. Restart the device */
- rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx,
- &adapter->wd_active);
- if (unlikely(rc != 0)) {
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
+}
+
+static int
+ena_device_validate_params(struct ena_adapter *adapter,
+ struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+
+ if (memcmp(get_feat_ctx->dev_attr.mac_addr, adapter->mac_addr,
+ ETHER_ADDR_LEN) != 0) {
device_printf(adapter->pdev,
- "ENA device init failed! (err: %d)\n", rc);
- goto err_dev_free;
+ "Error, mac address are different\n");
+ return (EINVAL);
}
- rc = ena_enable_msix_and_set_admin_interrupts(adapter,
- adapter->num_queues);
- if (unlikely(rc != 0)) {
- device_printf(adapter->pdev, "Enable MSI-X failed\n");
- goto err_com_free;
+ if (get_feat_ctx->dev_attr.max_mtu < if_getmtu(adapter->ifp)) {
+ device_printf(adapter->pdev,
+ "Error, device max mtu is smaller than ifp MTU\n");
+ return (EINVAL);
}
+ return 0;
+}
+
+int
+ena_restore_device(struct ena_adapter *adapter)
+{
+ struct ena_com_dev_get_features_ctx get_feat_ctx;
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ if_t ifp = adapter->ifp;
+ device_t dev = adapter->pdev;
+ int wd_active;
+ int rc;
+
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
+
+ rc = ena_device_init(adapter, dev, &get_feat_ctx, &wd_active);
+ if (rc != 0) {
+ device_printf(dev, "Cannot initialize device\n");
+ goto err;
+ }
+ /*
+ * Only enable WD if it was enabled before reset, so it won't override
+ * value set by the user by the sysctl.
+ */
+ if (adapter->wd_active != 0)
+ adapter->wd_active = wd_active;
+
+ rc = ena_device_validate_params(adapter, &get_feat_ctx);
+ if (rc != 0) {
+ device_printf(dev, "Validation of device parameters failed\n");
+ goto err_device_destroy;
+ }
+
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
+ /* Make sure we don't have a race with AENQ Links state handler */
+ if (ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter))
+ if_link_state_change(ifp, LINK_STATE_UP);
+
+ rc = ena_enable_msix_and_set_admin_interrupts(adapter);
+ if (rc != 0) {
+ device_printf(dev, "Enable MSI-X failed\n");
+ goto err_device_destroy;
+ }
+
+ /*
+ * Effective value of used MSIX vectors should be the same as before
+ * `ena_destroy_device()`, if possible, or closest to it if less vectors
+ * are available.
+ */
+ if ((adapter->msix_vecs - ENA_ADMIN_MSIX_VEC) < adapter->num_io_queues)
+ adapter->num_io_queues =
+ adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
+
+ /* Re-initialize rings basic information */
+ ena_init_io_rings(adapter);
+
/* If the interface was up before the reset bring it up */
- if (dev_up) {
+ if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
rc = ena_up(adapter);
- if (unlikely(rc != 0)) {
- device_printf(adapter->pdev,
- "Failed to create I/O queues\n");
- goto err_msix_free;
+ if (rc != 0) {
+ device_printf(dev, "Failed to create I/O queues\n");
+ goto err_disable_msix;
}
}
- callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
- ena_timer_service, (void *)adapter, 0);
+ /* Indicate that device is running again and ready to work */
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
+
+ if (ENA_FLAG_ISSET(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter)) {
+ /*
+ * As the AENQ handlers weren't executed during reset because
+ * the flag ENA_FLAG_DEVICE_RUNNING was turned off, the
+ * timestamp must be updated again That will prevent next reset
+ * caused by missing keep alive.
+ */
+ adapter->keep_alive_timestamp = getsbinuptime();
+ callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S,
+ ena_timer_service, (void *)adapter, 0);
+ }
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
- sx_unlock(&adapter->ioctl_sx);
+ device_printf(dev,
+ "Device reset completed successfully, Driver info: %s\n", ena_version);
- return;
+ return (rc);
-err_msix_free:
+err_disable_msix:
ena_free_mgmnt_irq(adapter);
ena_disable_msix(adapter);
-err_com_free:
+err_device_destroy:
+ ena_com_abort_admin_commands(ena_dev);
+ ena_com_wait_for_abort_completion(ena_dev);
ena_com_admin_destroy(ena_dev);
-err_dev_free:
- device_printf(adapter->pdev, "ENA reset failed!\n");
- adapter->running = false;
- sx_unlock(&adapter->ioctl_sx);
+ ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
+ ena_com_mmio_reg_read_request_destroy(ena_dev);
+err:
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_ONGOING_RESET, adapter);
+ device_printf(dev, "Reset attempt failed. Can not reset the device\n");
+
+ return (rc);
+}
+
+static void
+ena_reset_task(void *arg, int pending)
+{
+ struct ena_adapter *adapter = (struct ena_adapter *)arg;
+
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
+ device_printf(adapter->pdev,
+ "device reset scheduled but trigger_reset is off\n");
+ return;
+ }
+
+ ENA_LOCK_LOCK(adapter);
+ ena_destroy_device(adapter, false);
+ ena_restore_device(adapter);
+ ENA_LOCK_UNLOCK(adapter);
}
/**
@@ -3624,22 +3437,24 @@ static int
ena_attach(device_t pdev)
{
struct ena_com_dev_get_features_ctx get_feat_ctx;
+ struct ena_llq_configurations llq_config;
+ struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
static int version_printed;
struct ena_adapter *adapter;
struct ena_com_dev *ena_dev = NULL;
- uint16_t tx_sgl_size = 0;
- uint16_t rx_sgl_size = 0;
- int io_queue_num;
- int queue_size;
- int rc;
+ uint32_t max_num_io_queues;
+ int rid, rc;
+
adapter = device_get_softc(pdev);
adapter->pdev = pdev;
- mtx_init(&adapter->global_mtx, "ENA global mtx", NULL, MTX_DEF);
- sx_init(&adapter->ioctl_sx, "ENA ioctl sx");
+ ENA_LOCK_INIT(adapter);
- /* Set up the timer service */
- callout_init_mtx(&adapter->timer_service, &adapter->global_mtx, 0);
+ /*
+ * Set up the timer service - driver is responsible for avoiding
+ * concurrency, as the callout won't be using any locking inside.
+ */
+ callout_init(&adapter->timer_service, true);
adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
@@ -3648,19 +3463,24 @@ ena_attach(device_t pdev)
if (version_printed++ == 0)
device_printf(pdev, "%s\n", ena_version);
- rc = ena_allocate_pci_resources(adapter);
- if (unlikely(rc != 0)) {
- device_printf(pdev, "PCI resource allocation failed!\n");
- ena_free_pci_resources(adapter);
- return (rc);
- }
-
/* Allocate memory for ena_dev structure */
ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
M_WAITOK | M_ZERO);
adapter->ena_dev = ena_dev;
ena_dev->dmadev = pdev;
+
+ rid = PCIR_BAR(ENA_REG_BAR);
+ adapter->memory = NULL;
+ adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
+ &rid, RF_ACTIVE);
+ if (unlikely(adapter->registers == NULL)) {
+ device_printf(pdev,
+ "unable to allocate bus resource: registers!\n");
+ rc = ENOMEM;
+ goto err_dev_free;
+ }
+
ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
M_WAITOK | M_ZERO);
@@ -3678,6 +3498,9 @@ ena_attach(device_t pdev)
ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+ /* Initially clear all the flags */
+ ENA_FLAG_ZERO(adapter);
+
/* Device initialization */
rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active);
if (unlikely(rc != 0)) {
@@ -3686,40 +3509,62 @@ ena_attach(device_t pdev)
goto err_bus_free;
}
+ set_default_llq_configurations(&llq_config);
+
+#if defined(__arm__) || defined(__aarch64__)
+ /*
+ * Force LLQ disable, as the driver is not supporting WC enablement
+ * on the ARM architecture. Using LLQ without WC would affect
+ * performance in a negative way.
+ */
+ ena_dev->supported_features &= ~(1 << ENA_ADMIN_LLQ);
+#endif
+ rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx.llq,
+ &llq_config);
+ if (unlikely(rc != 0)) {
+ device_printf(pdev, "failed to set placement policy\n");
+ goto err_com_free;
+ }
+
+ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+ adapter->disable_meta_caching =
+ !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
+ BIT(ENA_ADMIN_DISABLE_META_CACHING));
+
adapter->keep_alive_timestamp = getsbinuptime();
adapter->tx_offload_cap = get_feat_ctx.offload.tx;
- /* Set for sure that interface is not up */
- adapter->up = false;
-
memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
ETHER_ADDR_LEN);
- /* calculate IO queue number to create */
- io_queue_num = ena_calc_io_queue_num(adapter, &get_feat_ctx);
-
- ENA_ASSERT(io_queue_num > 0, "Invalid queue number: %d\n",
- io_queue_num);
- adapter->num_queues = io_queue_num;
+ calc_queue_ctx.pdev = pdev;
+ calc_queue_ctx.ena_dev = ena_dev;
+ calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
- adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
-
- /* calculatre ring sizes */
- queue_size = ena_calc_queue_size(adapter,&tx_sgl_size,
- &rx_sgl_size, &get_feat_ctx);
- if (unlikely((queue_size <= 0) || (io_queue_num <= 0))) {
- rc = ENA_COM_FAULT;
+ /* Calculate initial and maximum IO queue number and size */
+ max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev,
+ &get_feat_ctx);
+ rc = ena_calc_io_queue_size(&calc_queue_ctx);
+ if (unlikely((rc != 0) || (max_num_io_queues <= 0))) {
+ rc = EFAULT;
goto err_com_free;
}
- adapter->reset_reason = ENA_REGS_RESET_NORMAL;
+ adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
+ adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
+ adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
+ adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
+ adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
+ adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
- adapter->tx_ring_size = queue_size;
- adapter->rx_ring_size = queue_size;
+ adapter->max_num_io_queues = max_num_io_queues;
- adapter->max_tx_sgl_size = tx_sgl_size;
- adapter->max_rx_sgl_size = rx_sgl_size;
+ adapter->buf_ring_size = ENA_DEFAULT_BUF_RING_SIZE;
+
+ adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
+
+ adapter->reset_reason = ENA_REGS_RESET_NORMAL;
/* set up dma tags for rx and tx buffers */
rc = ena_setup_tx_dma_tag(adapter);
@@ -3734,22 +3579,32 @@ ena_attach(device_t pdev)
goto err_tx_tag_free;
}
+ /*
+ * The amount of requested MSIX vectors is equal to
+ * adapter::max_num_io_queues (see `ena_enable_msix()`), plus a constant
+ * number of admin queue interrupts. The former is initially determined
+ * by HW capabilities (see `ena_calc_max_io_queue_num())` but may not be
+ * achieved if there are not enough system resources. By default, the
+ * number of effectively used IO queues is the same but later on it can
+ * be limited by the user using sysctl interface.
+ */
+ rc = ena_enable_msix_and_set_admin_interrupts(adapter);
+ if (unlikely(rc != 0)) {
+ device_printf(pdev,
+ "Failed to enable and set the admin interrupts\n");
+ goto err_io_free;
+ }
+ /* By default all of allocated MSIX vectors are actively used */
+ adapter->num_io_queues = adapter->msix_vecs - ENA_ADMIN_MSIX_VEC;
+
/* initialize rings basic information */
- device_printf(pdev, "initalize %d io queues\n", io_queue_num);
ena_init_io_rings(adapter);
/* setup network interface */
rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx);
if (unlikely(rc != 0)) {
device_printf(pdev, "Error with network interface setup\n");
- goto err_io_free;
- }
-
- rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
- if (unlikely(rc != 0)) {
- device_printf(pdev,
- "Failed to enable and set the admin interrupts\n");
- goto err_ifp_free;
+ goto err_msix_free;
}
/* Initialize reset task queue */
@@ -3766,15 +3621,28 @@ ena_attach(device_t pdev)
sizeof(struct ena_hw_stats));
ena_sysctl_add_nodes(adapter);
+#ifdef DEV_NETMAP
+ rc = ena_netmap_attach(adapter);
+ if (rc != 0) {
+ device_printf(pdev, "netmap attach failed: %d\n", rc);
+ goto err_detach;
+ }
+#endif /* DEV_NETMAP */
+
/* Tell the stack that the interface is not active */
if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEVICE_RUNNING, adapter);
- adapter->running = true;
return (0);
-err_ifp_free:
- if_detach(adapter->ifp);
- if_free(adapter->ifp);
+#ifdef DEV_NETMAP
+err_detach:
+ ether_ifdetach(adapter->ifp);
+#endif /* DEV_NETMAP */
+err_msix_free:
+ ena_com_dev_reset(adapter->ena_dev, ENA_REGS_RESET_INIT_ERR);
+ ena_free_mgmnt_irq(adapter);
+ ena_disable_msix(adapter);
err_io_free:
ena_free_all_io_rings_resources(adapter);
ena_free_rx_dma_tag(adapter);
@@ -3786,8 +3654,9 @@ err_com_free:
ena_com_mmio_reg_read_request_destroy(ena_dev);
err_bus_free:
free(ena_dev->bus, M_DEVBUF);
- free(ena_dev, M_DEVBUF);
ena_free_pci_resources(adapter);
+err_dev_free:
+ free(ena_dev, M_DEVBUF);
return (rc);
}
@@ -3812,31 +3681,32 @@ ena_detach(device_t pdev)
return (EBUSY);
}
- /* Free reset task and callout */
+ ether_ifdetach(adapter->ifp);
+
+ /* Stop timer service */
+ ENA_LOCK_LOCK(adapter);
callout_drain(&adapter->timer_service);
+ ENA_LOCK_UNLOCK(adapter);
+
+ /* Release reset task */
while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL))
taskqueue_drain(adapter->reset_tq, &adapter->reset_task);
taskqueue_free(adapter->reset_tq);
- sx_xlock(&adapter->ioctl_sx);
+ ENA_LOCK_LOCK(adapter);
ena_down(adapter);
- sx_unlock(&adapter->ioctl_sx);
-
- if (adapter->ifp != NULL) {
- ether_ifdetach(adapter->ifp);
- if_free(adapter->ifp);
- }
+ ena_destroy_device(adapter, true);
+ ENA_LOCK_UNLOCK(adapter);
- ena_free_all_io_rings_resources(adapter);
+#ifdef DEV_NETMAP
+ netmap_detach(adapter->ifp);
+#endif /* DEV_NETMAP */
ena_free_counters((counter_u64_t *)&adapter->hw_stats,
sizeof(struct ena_hw_stats));
ena_free_counters((counter_u64_t *)&adapter->dev_stats,
sizeof(struct ena_stats_dev));
- if (likely(adapter->rss_support))
- ena_com_rss_destroy(ena_dev);
-
rc = ena_free_rx_dma_tag(adapter);
if (unlikely(rc != 0))
device_printf(adapter->pdev,
@@ -3847,26 +3717,18 @@ ena_detach(device_t pdev)
device_printf(adapter->pdev,
"Unmapped TX DMA tag associations\n");
- /* Reset the device only if the device is running. */
- if (adapter->running)
- ena_com_dev_reset(ena_dev, adapter->reset_reason);
-
- ena_com_delete_host_info(ena_dev);
-
ena_free_irqs(adapter);
- ena_com_abort_admin_commands(ena_dev);
-
- ena_com_wait_for_abort_completion(ena_dev);
+ ena_free_pci_resources(adapter);
- ena_com_admin_destroy(ena_dev);
+ if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter)))
+ ena_com_rss_destroy(ena_dev);
- ena_com_mmio_reg_read_request_destroy(ena_dev);
+ ena_com_delete_host_info(ena_dev);
- ena_free_pci_resources(adapter);
+ ENA_LOCK_DESTROY(adapter);
- mtx_destroy(&adapter->global_mtx);
- sx_destroy(&adapter->ioctl_sx);
+ if_free(adapter->ifp);
if (ena_dev->bus != NULL)
free(ena_dev->bus, M_DEVBUF);
@@ -3900,31 +3762,56 @@ ena_update_on_link_change(void *adapter_data,
if (status != 0) {
device_printf(adapter->pdev, "link is UP\n");
- if_link_state_change(ifp, LINK_STATE_UP);
- } else if (status == 0) {
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_LINK_UP, adapter);
+ if (!ENA_FLAG_ISSET(ENA_FLAG_ONGOING_RESET, adapter))
+ if_link_state_change(ifp, LINK_STATE_UP);
+ } else {
device_printf(adapter->pdev, "link is DOWN\n");
if_link_state_change(ifp, LINK_STATE_DOWN);
- } else {
- device_printf(adapter->pdev, "invalid value recvd\n");
- BUG();
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_LINK_UP, adapter);
}
+}
+
+static void ena_notification(void *adapter_data,
+ struct ena_admin_aenq_entry *aenq_e)
+{
+ struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+ struct ena_admin_ena_hw_hints *hints;
+
+ ENA_WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
+ "Invalid group(%x) expected %x\n", aenq_e->aenq_common_desc.group,
+ ENA_ADMIN_NOTIFICATION);
- adapter->link_status = status;
+ switch (aenq_e->aenq_common_desc.syndrom) {
+ case ENA_ADMIN_UPDATE_HINTS:
+ hints =
+ (struct ena_admin_ena_hw_hints *)(&aenq_e->inline_data_w4);
+ ena_update_hints(adapter, hints);
+ break;
+ default:
+ device_printf(adapter->pdev,
+ "Invalid aenq notification link state %d\n",
+ aenq_e->aenq_common_desc.syndrom);
+ }
}
/**
* This handler will called for unknown event group or unimplemented handlers
**/
static void
-unimplemented_aenq_handler(void *data,
+unimplemented_aenq_handler(void *adapter_data,
struct ena_admin_aenq_entry *aenq_e)
{
- return;
+ struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+ device_printf(adapter->pdev,
+ "Unknown event was received or event with unimplemented handler\n");
}
static struct ena_aenq_handlers aenq_handlers = {
.handlers = {
[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
+ [ENA_ADMIN_NOTIFICATION] = ena_notification,
[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
},
.unimplemented_handler = unimplemented_aenq_handler
@@ -3952,5 +3839,8 @@ MODULE_PNP_INFO("U16:vendor;U16:device", pci, ena, ena_vendor_info_array,
sizeof(ena_vendor_info_array[0]), nitems(ena_vendor_info_array) - 1);
MODULE_DEPEND(ena, pci, 1, 1, 1);
MODULE_DEPEND(ena, ether, 1, 1, 1);
+#ifdef DEV_NETMAP
+MODULE_DEPEND(ena, netmap, 1, 1, 1);
+#endif /* DEV_NETMAP */
/*********************************************************************/
diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h
index 1d9194e24616..ee57f53f365b 100644
--- a/sys/dev/ena/ena.h
+++ b/sys/dev/ena/ena.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,9 +39,9 @@
#include "ena-com/ena_com.h"
#include "ena-com/ena_eth_com.h"
-#define DRV_MODULE_VER_MAJOR 0
-#define DRV_MODULE_VER_MINOR 8
-#define DRV_MODULE_VER_SUBMINOR 4
+#define DRV_MODULE_VER_MAJOR 2
+#define DRV_MODULE_VER_MINOR 2
+#define DRV_MODULE_VER_SUBMINOR 0
#define DRV_MODULE_NAME "ena"
@@ -66,9 +66,17 @@
#define ENA_BUS_DMA_SEGS 32
+#define ENA_DEFAULT_BUF_RING_SIZE 4096
+
#define ENA_DEFAULT_RING_SIZE 1024
+#define ENA_MIN_RING_SIZE 256
+/*
+ * Refill Rx queue when number of required descriptors is above
+ * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER or ENA_RX_REFILL_THRESH_PACKET
+ */
#define ENA_RX_REFILL_THRESH_DIVIDER 8
+#define ENA_RX_REFILL_THRESH_PACKET 256
#define ENA_IRQNAME_SIZE 40
@@ -82,7 +90,7 @@
#define ENA_MAX_FRAME_LEN 10000
#define ENA_MIN_FRAME_LEN 60
-#define ENA_TX_CLEANUP_THRESHOLD 128
+#define ENA_TX_RESUME_THRESH (ENA_PKT_MAX_BUFS + 2)
#define DB_THRESHOLD 64
@@ -120,6 +128,8 @@
#define ENA_IO_IRQ_FIRST_IDX 1
#define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q))
+#define ENA_MAX_NO_INTERRUPT_ITERATIONS 3
+
/*
* ENA device should send keep alive msg every 1 sec.
* We wait for 6 sec just to be on the safe side.
@@ -145,6 +155,33 @@
#define PCI_DEV_ID_ENA_VF 0xec20
#define PCI_DEV_ID_ENA_LLQ_VF 0xec21
+/*
+ * Flags indicating current ENA driver state
+ */
+enum ena_flags_t {
+ ENA_FLAG_DEVICE_RUNNING,
+ ENA_FLAG_DEV_UP,
+ ENA_FLAG_LINK_UP,
+ ENA_FLAG_MSIX_ENABLED,
+ ENA_FLAG_TRIGGER_RESET,
+ ENA_FLAG_ONGOING_RESET,
+ ENA_FLAG_DEV_UP_BEFORE_RESET,
+ ENA_FLAG_RSS_ACTIVE,
+ ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE
+};
+
+BITSET_DEFINE(_ena_state, ENA_FLAGS_NUMBER);
+typedef struct _ena_state ena_state_t;
+
+#define ENA_FLAG_ZERO(adapter) \
+ BIT_ZERO(ENA_FLAGS_NUMBER, &(adapter)->flags)
+#define ENA_FLAG_ISSET(bit, adapter) \
+ BIT_ISSET(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
+#define ENA_FLAG_SET_ATOMIC(bit, adapter) \
+ BIT_SET_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
+#define ENA_FLAG_CLEAR_ATOMIC(bit, adapter) \
+ BIT_CLR_ATOMIC(ENA_FLAGS_NUMBER, (bit), &(adapter)->flags)
+
struct msix_entry {
int entry;
int vector;
@@ -159,7 +196,7 @@ typedef struct _ena_vendor_info_t {
struct ena_irq {
/* Interrupt resources */
struct resource *res;
- driver_intr_t *handler;
+ driver_filter_t *handler;
void *data;
void *cookie;
unsigned int vector;
@@ -172,10 +209,34 @@ struct ena_que {
struct ena_adapter *adapter;
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
+
+ struct task cleanup_task;
+ struct taskqueue *cleanup_tq;
+
uint32_t id;
int cpu;
};
+struct ena_calc_queue_size_ctx {
+ struct ena_com_dev_get_features_ctx *get_feat_ctx;
+ struct ena_com_dev *ena_dev;
+ device_t pdev;
+ uint32_t tx_queue_size;
+ uint32_t rx_queue_size;
+ uint32_t max_tx_queue_size;
+ uint32_t max_rx_queue_size;
+ uint16_t max_tx_sgl_size;
+ uint16_t max_rx_sgl_size;
+};
+
+#ifdef DEV_NETMAP
+struct ena_netmap_tx_info {
+ uint32_t socket_buf_idx[ENA_PKT_MAX_BUFS];
+ bus_dmamap_t map_seg[ENA_PKT_MAX_BUFS];
+ unsigned int sockets_used;
+};
+#endif
+
struct ena_tx_buffer {
struct mbuf *mbuf;
/* # of ena desc for this specific mbuf
@@ -183,12 +244,17 @@ struct ena_tx_buffer {
unsigned int tx_descs;
/* # of buffers used by this mbuf */
unsigned int num_of_bufs;
- bus_dmamap_t map;
+
+ bus_dmamap_t dmamap;
/* Used to detect missing tx packets */
struct bintime timestamp;
bool print_once;
+#ifdef DEV_NETMAP
+ struct ena_netmap_tx_info nm_info;
+#endif /* DEV_NETMAP */
+
struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
} __aligned(CACHE_LINE_SIZE);
@@ -196,6 +262,9 @@ struct ena_rx_buffer {
struct mbuf *mbuf;
bus_dmamap_t map;
struct ena_com_buf ena_buf;
+#ifdef DEV_NETMAP
+ uint32_t netmap_buf_idx;
+#endif /* DEV_NETMAP */
} __aligned(CACHE_LINE_SIZE);
struct ena_stats_tx {
@@ -208,6 +277,9 @@ struct ena_stats_tx {
counter_u64_t bad_req_id;
counter_u64_t collapse;
counter_u64_t collapse_err;
+ counter_u64_t queue_wakeup;
+ counter_u64_t queue_stop;
+ counter_u64_t llq_buffer_copy;
};
struct ena_stats_rx {
@@ -238,17 +310,18 @@ struct ena_ring {
/* Determines if device will use LLQ or normal mode for TX */
enum ena_admin_placement_policy_type tx_mem_queue_type;
- /* The maximum length the driver can push to the device (For LLQ) */
- uint8_t tx_max_header_size;
+ union {
+ /* The maximum length the driver can push to the device (For LLQ) */
+ uint8_t tx_max_header_size;
+ /* The maximum (and default) mbuf size for the Rx descriptor. */
+ uint16_t rx_mbuf_sz;
- struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
+ };
- /*
- * Fields used for Adaptive Interrupt Modulation - to be implemented in
- * the future releases
- */
- uint32_t smoothed_interval;
- enum ena_intr_moder_level moder_tbl_idx;
+ bool first_interrupt;
+ uint16_t no_interrupt_event_cnt;
+
+ struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
struct ena_que *que;
struct lro_ctrl lro;
@@ -263,19 +336,14 @@ struct ena_ring {
int ring_size; /* number of tx/rx_buffer_info's entries */
struct buf_ring *br; /* only for TX */
+ uint32_t buf_ring_size;
struct mtx ring_mtx;
char mtx_name[16];
- union {
- struct {
- struct task enqueue_task;
- struct taskqueue *enqueue_tq;
- };
- struct {
- struct task cmpl_task;
- struct taskqueue *cmpl_tq;
- };
+ struct {
+ struct task enqueue_task;
+ struct taskqueue *enqueue_tq;
};
union {
@@ -283,7 +351,21 @@ struct ena_ring {
struct ena_stats_rx rx_stats;
};
- int empty_rx_queue;
+ union {
+ int empty_rx_queue;
+ /* For Tx ring to indicate if it's running or not */
+ bool running;
+ };
+
+ /* How many packets are sent in one Tx loop, used for doorbells */
+ uint32_t acum_pkts;
+
+ /* Used for LLQ */
+ uint8_t *push_buf_intermediate_buf;
+
+#ifdef DEV_NETMAP
+ bool initialized;
+#endif /* DEV_NETMAP */
} __aligned(CACHE_LINE_SIZE);
struct ena_stats_dev {
@@ -301,6 +383,7 @@ struct ena_hw_stats {
counter_u64_t tx_bytes;
counter_u64_t rx_drops;
+ counter_u64_t tx_drops;
};
/* Board specific private data structure */
@@ -316,11 +399,9 @@ struct ena_adapter {
struct resource *memory;
struct resource *registers;
- struct mtx global_mtx;
- struct sx ioctl_sx;
+ struct sx global_lock;
/* MSI-X */
- uint32_t msix_enabled;
struct msix_entry *msix_entries;
int msix_vecs;
@@ -331,28 +412,29 @@ struct ena_adapter {
uint32_t max_mtu;
+ uint32_t num_io_queues;
+ uint32_t max_num_io_queues;
+
+ uint32_t requested_tx_ring_size;
+ uint32_t requested_rx_ring_size;
+
+ uint32_t max_tx_ring_size;
+ uint32_t max_rx_ring_size;
+
uint16_t max_tx_sgl_size;
uint16_t max_rx_sgl_size;
uint32_t tx_offload_cap;
- /* Tx fast path data */
- int num_queues;
-
- unsigned int tx_ring_size;
- unsigned int rx_ring_size;
+ uint32_t buf_ring_size;
/* RSS*/
uint8_t rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE];
- bool rss_support;
uint8_t mac_addr[ETHER_ADDR_LEN];
/* mdio and phy*/
- bool link_status;
- bool trigger_reset;
- bool up;
- bool running;
+ ena_state_t flags;
/* Queue will represent one TX and one RX ring */
struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
@@ -379,6 +461,7 @@ struct ena_adapter {
sbintime_t missing_tx_timeout;
uint32_t missing_tx_max_queues;
uint32_t missing_tx_threshold;
+ bool disable_meta_caching;
/* Statistics */
struct ena_stats_dev dev_stats;
@@ -391,6 +474,15 @@ struct ena_adapter {
#define ENA_RING_MTX_TRYLOCK(_ring) mtx_trylock(&(_ring)->ring_mtx)
#define ENA_RING_MTX_UNLOCK(_ring) mtx_unlock(&(_ring)->ring_mtx)
+#define ENA_LOCK_INIT(adapter) \
+ sx_init(&(adapter)->global_lock, "ENA global lock")
+#define ENA_LOCK_DESTROY(adapter) sx_destroy(&(adapter)->global_lock)
+#define ENA_LOCK_LOCK(adapter) sx_xlock(&(adapter)->global_lock)
+#define ENA_LOCK_UNLOCK(adapter) sx_unlock(&(adapter)->global_lock)
+
+#define clamp_t(type, _x, min, max) min_t(type, max_t(type, _x, min), max)
+#define clamp_val(val, lo, hi) clamp_t(__typeof(val), val, lo, hi)
+
static inline int ena_mbuf_count(struct mbuf *mbuf)
{
int count = 1;
@@ -401,4 +493,41 @@ static inline int ena_mbuf_count(struct mbuf *mbuf)
return count;
}
+int ena_up(struct ena_adapter *adapter);
+void ena_down(struct ena_adapter *adapter);
+int ena_restore_device(struct ena_adapter *adapter);
+void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
+int ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num);
+int ena_update_buf_ring_size(struct ena_adapter *adapter,
+ uint32_t new_buf_ring_size);
+int ena_update_queue_size(struct ena_adapter *adapter, uint32_t new_tx_size,
+ uint32_t new_rx_size);
+int ena_update_io_queue_nb(struct ena_adapter *adapter, uint32_t new_num);
+
+static inline void
+ena_trigger_reset(struct ena_adapter *adapter,
+ enum ena_regs_reset_reason_types reset_reason)
+{
+ if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
+ adapter->reset_reason = reset_reason;
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+ }
+}
+
+static inline int
+validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
+{
+ if (likely(req_id < rx_ring->ring_size))
+ return (0);
+
+ device_printf(rx_ring->adapter->pdev, "Invalid rx req_id: %hu\n",
+ req_id);
+ counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
+
+ /* Trigger device reset */
+ ena_trigger_reset(rx_ring->adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
+
+ return (EFAULT);
+}
+
#endif /* !(ENA_H) */
diff --git a/sys/dev/ena/ena_datapath.c b/sys/dev/ena/ena_datapath.c
new file mode 100644
index 000000000000..51beabc283f7
--- /dev/null
+++ b/sys/dev/ena/ena_datapath.c
@@ -0,0 +1,1110 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "ena.h"
+#include "ena_datapath.h"
+#ifdef DEV_NETMAP
+#include "ena_netmap.h"
+#endif /* DEV_NETMAP */
+
+/*********************************************************************
+ * Static functions prototypes
+ *********************************************************************/
+
+static int ena_tx_cleanup(struct ena_ring *);
+static int ena_rx_cleanup(struct ena_ring *);
+static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
+static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
+ struct mbuf *);
+static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
+ struct ena_com_rx_ctx *, uint16_t *);
+static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
+ struct mbuf *);
+static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *, bool);
+static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
+ struct mbuf **mbuf);
+static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
+static void ena_start_xmit(struct ena_ring *);
+
+/*********************************************************************
+ * Global functions
+ *********************************************************************/
+
+void
+ena_cleanup(void *arg, int pending)
+{
+ struct ena_que *que = arg;
+ struct ena_adapter *adapter = que->adapter;
+ if_t ifp = adapter->ifp;
+ struct ena_ring *tx_ring;
+ struct ena_ring *rx_ring;
+ struct ena_com_io_cq* io_cq;
+ struct ena_eth_io_intr_reg intr_reg;
+ int qid, ena_qid;
+ int txc, rxc, i;
+
+ if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
+ return;
+
+ ena_trace(ENA_DBG, "MSI-X TX/RX routine\n");
+
+ tx_ring = que->tx_ring;
+ rx_ring = que->rx_ring;
+ qid = que->id;
+ ena_qid = ENA_IO_TXQ_IDX(qid);
+ io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
+
+ tx_ring->first_interrupt = true;
+ rx_ring->first_interrupt = true;
+
+ for (i = 0; i < CLEAN_BUDGET; ++i) {
+ rxc = ena_rx_cleanup(rx_ring);
+ txc = ena_tx_cleanup(tx_ring);
+
+ if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
+ return;
+
+ if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
+ break;
+ }
+
+ /* Signal that work is done and unmask interrupt */
+ ena_com_update_intr_reg(&intr_reg,
+ RX_IRQ_INTERVAL,
+ TX_IRQ_INTERVAL,
+ true);
+ ena_com_unmask_intr(io_cq, &intr_reg);
+}
+
+void
+ena_deferred_mq_start(void *arg, int pending)
+{
+ struct ena_ring *tx_ring = (struct ena_ring *)arg;
+ struct ifnet *ifp = tx_ring->adapter->ifp;
+
+ while (!drbr_empty(ifp, tx_ring->br) &&
+ tx_ring->running &&
+ (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
+ ENA_RING_MTX_LOCK(tx_ring);
+ ena_start_xmit(tx_ring);
+ ENA_RING_MTX_UNLOCK(tx_ring);
+ }
+}
+
+int
+ena_mq_start(if_t ifp, struct mbuf *m)
+{
+ struct ena_adapter *adapter = ifp->if_softc;
+ struct ena_ring *tx_ring;
+ int ret, is_drbr_empty;
+ uint32_t i;
+
+ if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
+ return (ENODEV);
+
+ /* Which queue to use */
+ /*
+ * If everything is setup correctly, it should be the
+ * same bucket that the current CPU we're on is.
+ * It should improve performance.
+ */
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+ i = m->m_pkthdr.flowid % adapter->num_io_queues;
+ } else {
+ i = curcpu % adapter->num_io_queues;
+ }
+ tx_ring = &adapter->tx_ring[i];
+
+ /* Check if drbr is empty before putting packet */
+ is_drbr_empty = drbr_empty(ifp, tx_ring->br);
+ ret = drbr_enqueue(ifp, tx_ring->br, m);
+ if (unlikely(ret != 0)) {
+ taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
+ return (ret);
+ }
+
+ if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
+ ena_start_xmit(tx_ring);
+ ENA_RING_MTX_UNLOCK(tx_ring);
+ } else {
+ taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
+ }
+
+ return (0);
+}
+
+void
+ena_qflush(if_t ifp)
+{
+ struct ena_adapter *adapter = ifp->if_softc;
+ struct ena_ring *tx_ring = adapter->tx_ring;
+ int i;
+
+ for(i = 0; i < adapter->num_io_queues; ++i, ++tx_ring)
+ if (!drbr_empty(ifp, tx_ring->br)) {
+ ENA_RING_MTX_LOCK(tx_ring);
+ drbr_flush(ifp, tx_ring->br);
+ ENA_RING_MTX_UNLOCK(tx_ring);
+ }
+
+ if_qflush(ifp);
+}
+
+/*********************************************************************
+ * Static functions
+ *********************************************************************/
+
+static inline int
+validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
+{
+ struct ena_adapter *adapter = tx_ring->adapter;
+ struct ena_tx_buffer *tx_info = NULL;
+
+ if (likely(req_id < tx_ring->ring_size)) {
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ if (tx_info->mbuf != NULL)
+ return (0);
+ device_printf(adapter->pdev,
+ "tx_info doesn't have valid mbuf\n");
+ }
+
+ device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
+ counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
+
+ /* Trigger device reset */
+ ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
+
+ return (EFAULT);
+}
+
+/**
+ * ena_tx_cleanup - clear sent packets and corresponding descriptors
+ * @tx_ring: ring for which we want to clean packets
+ *
+ * Once packets are sent, we ask the device in a loop for no longer used
+ * descriptors. We find the related mbuf chain in a map (index in an array)
+ * and free it, then update ring state.
+ * This is performed in "endless" loop, updating ring pointers every
+ * TX_COMMIT. The first check of free descriptor is performed before the actual
+ * loop, then repeated at the loop end.
+ **/
+static int
+ena_tx_cleanup(struct ena_ring *tx_ring)
+{
+ struct ena_adapter *adapter;
+ struct ena_com_io_cq* io_cq;
+ uint16_t next_to_clean;
+ uint16_t req_id;
+ uint16_t ena_qid;
+ unsigned int total_done = 0;
+ int rc;
+ int commit = TX_COMMIT;
+ int budget = TX_BUDGET;
+ int work_done;
+ bool above_thresh;
+
+ adapter = tx_ring->que->adapter;
+ ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
+ io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
+ next_to_clean = tx_ring->next_to_clean;
+
+#ifdef DEV_NETMAP
+ if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
+ return (0);
+#endif /* DEV_NETMAP */
+
+ do {
+ struct ena_tx_buffer *tx_info;
+ struct mbuf *mbuf;
+
+ rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
+ if (unlikely(rc != 0))
+ break;
+
+ rc = validate_tx_req_id(tx_ring, req_id);
+ if (unlikely(rc != 0))
+ break;
+
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+
+ mbuf = tx_info->mbuf;
+
+ tx_info->mbuf = NULL;
+ bintime_clear(&tx_info->timestamp);
+
+ bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
+ BUS_DMASYNC_POSTWRITE);
+ bus_dmamap_unload(adapter->tx_buf_tag,
+ tx_info->dmamap);
+
+ ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed\n",
+ tx_ring->qid, mbuf);
+
+ m_freem(mbuf);
+
+ total_done += tx_info->tx_descs;
+
+ tx_ring->free_tx_ids[next_to_clean] = req_id;
+ next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+ tx_ring->ring_size);
+
+ if (unlikely(--commit == 0)) {
+ commit = TX_COMMIT;
+ /* update ring state every TX_COMMIT descriptor */
+ tx_ring->next_to_clean = next_to_clean;
+ ena_com_comp_ack(
+ &adapter->ena_dev->io_sq_queues[ena_qid],
+ total_done);
+ ena_com_update_dev_comp_head(io_cq);
+ total_done = 0;
+ }
+ } while (likely(--budget));
+
+ work_done = TX_BUDGET - budget;
+
+ ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d\n",
+ tx_ring->qid, work_done);
+
+ /* If there is still something to commit update ring state */
+ if (likely(commit != TX_COMMIT)) {
+ tx_ring->next_to_clean = next_to_clean;
+ ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
+ total_done);
+ ena_com_update_dev_comp_head(io_cq);
+ }
+
+ /*
+ * Need to make the rings circular update visible to
+ * ena_xmit_mbuf() before checking for tx_ring->running.
+ */
+ mb();
+
+ above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_RESUME_THRESH);
+ if (unlikely(!tx_ring->running && above_thresh)) {
+ ENA_RING_MTX_LOCK(tx_ring);
+ above_thresh =
+ ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_RESUME_THRESH);
+ if (!tx_ring->running && above_thresh) {
+ tx_ring->running = true;
+ counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
+ taskqueue_enqueue(tx_ring->enqueue_tq,
+ &tx_ring->enqueue_task);
+ }
+ ENA_RING_MTX_UNLOCK(tx_ring);
+ }
+
+ return (work_done);
+}
+
+static void
+ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
+ struct mbuf *mbuf)
+{
+ struct ena_adapter *adapter = rx_ring->adapter;
+
+ if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
+ mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
+
+ if (ena_rx_ctx->frag &&
+ (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
+ return;
+ }
+
+ switch (ena_rx_ctx->l3_proto) {
+ case ENA_ETH_IO_L3_PROTO_IPV4:
+ switch (ena_rx_ctx->l4_proto) {
+ case ENA_ETH_IO_L4_PROTO_TCP:
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
+ break;
+ case ENA_ETH_IO_L4_PROTO_UDP:
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
+ break;
+ default:
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
+ }
+ break;
+ case ENA_ETH_IO_L3_PROTO_IPV6:
+ switch (ena_rx_ctx->l4_proto) {
+ case ENA_ETH_IO_L4_PROTO_TCP:
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
+ break;
+ case ENA_ETH_IO_L4_PROTO_UDP:
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
+ break;
+ default:
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
+ }
+ break;
+ case ENA_ETH_IO_L3_PROTO_UNKNOWN:
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
+ break;
+ default:
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
+ }
+ } else {
+ mbuf->m_pkthdr.flowid = rx_ring->qid;
+ M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
+ }
+}
+
+/**
+ * ena_rx_mbuf - assemble mbuf from descriptors
+ * @rx_ring: ring for which we want to clean packets
+ * @ena_bufs: buffer info
+ * @ena_rx_ctx: metadata for this packet(s)
+ * @next_to_clean: ring pointer, will be updated only upon success
+ *
+ **/
+static struct mbuf*
+ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
+ struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
+{
+ struct mbuf *mbuf;
+ struct ena_rx_buffer *rx_info;
+ struct ena_adapter *adapter;
+ unsigned int descs = ena_rx_ctx->descs;
+ int rc;
+ uint16_t ntc, len, req_id, buf = 0;
+
+ ntc = *next_to_clean;
+ adapter = rx_ring->adapter;
+
+ len = ena_bufs[buf].len;
+ req_id = ena_bufs[buf].req_id;
+ rc = validate_rx_req_id(rx_ring, req_id);
+ if (unlikely(rc != 0))
+ return (NULL);
+
+ rx_info = &rx_ring->rx_buffer_info[req_id];
+ if (unlikely(rx_info->mbuf == NULL)) {
+ device_printf(adapter->pdev, "NULL mbuf in rx_info");
+ return (NULL);
+ }
+
+ ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx\n",
+ rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
+
+ bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
+ BUS_DMASYNC_POSTREAD);
+ mbuf = rx_info->mbuf;
+ mbuf->m_flags |= M_PKTHDR;
+ mbuf->m_pkthdr.len = len;
+ mbuf->m_len = len;
+ mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
+
+ /* Fill mbuf with hash key and it's interpretation for optimization */
+ ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
+
+ ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d\n",
+ mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
+
+ /* DMA address is not needed anymore, unmap it */
+ bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
+
+ rx_info->mbuf = NULL;
+ rx_ring->free_rx_ids[ntc] = req_id;
+ ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
+
+ /*
+ * While we have more than 1 descriptors for one rcvd packet, append
+ * other mbufs to the main one
+ */
+ while (--descs) {
+ ++buf;
+ len = ena_bufs[buf].len;
+ req_id = ena_bufs[buf].req_id;
+ rc = validate_rx_req_id(rx_ring, req_id);
+ if (unlikely(rc != 0)) {
+ /*
+ * If the req_id is invalid, then the device will be
+ * reset. In that case we must free all mbufs that
+ * were already gathered.
+ */
+ m_freem(mbuf);
+ return (NULL);
+ }
+ rx_info = &rx_ring->rx_buffer_info[req_id];
+
+ if (unlikely(rx_info->mbuf == NULL)) {
+ device_printf(adapter->pdev, "NULL mbuf in rx_info");
+ /*
+ * If one of the required mbufs was not allocated yet,
+ * we can break there.
+ * All earlier used descriptors will be reallocated
+ * later and not used mbufs can be reused.
+ * The next_to_clean pointer will not be updated in case
+ * of an error, so caller should advance it manually
+ * in error handling routine to keep it up to date
+ * with hw ring.
+ */
+ m_freem(mbuf);
+ return (NULL);
+ }
+
+ bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
+ BUS_DMASYNC_POSTREAD);
+ if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
+ counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
+ ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p\n",
+ mbuf);
+ }
+
+ ena_trace(ENA_DBG | ENA_RXPTH,
+ "rx mbuf updated. len %d\n", mbuf->m_pkthdr.len);
+
+ /* Free already appended mbuf, it won't be useful anymore */
+ bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
+ m_freem(rx_info->mbuf);
+ rx_info->mbuf = NULL;
+
+ rx_ring->free_rx_ids[ntc] = req_id;
+ ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
+ }
+
+ *next_to_clean = ntc;
+
+ return (mbuf);
+}
+
+/**
+ * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
+ **/
+static inline void
+ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
+ struct mbuf *mbuf)
+{
+
+ /* if IP and error */
+ if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
+ ena_rx_ctx->l3_csum_err)) {
+ /* ipv4 checksum error */
+ mbuf->m_pkthdr.csum_flags = 0;
+ counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
+ ena_trace(ENA_DBG, "RX IPv4 header checksum error\n");
+ return;
+ }
+
+ /* if TCP/UDP */
+ if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+ (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
+ if (ena_rx_ctx->l4_csum_err) {
+ /* TCP/UDP checksum error */
+ mbuf->m_pkthdr.csum_flags = 0;
+ counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
+ ena_trace(ENA_DBG, "RX L4 checksum error\n");
+ } else {
+ mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
+ mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
+ }
+ }
+}
+
+/**
+ * ena_rx_cleanup - handle rx irq
+ * @arg: ring for which irq is being handled
+ **/
+static int
+ena_rx_cleanup(struct ena_ring *rx_ring)
+{
+ struct ena_adapter *adapter;
+ struct mbuf *mbuf;
+ struct ena_com_rx_ctx ena_rx_ctx;
+ struct ena_com_io_cq* io_cq;
+ struct ena_com_io_sq* io_sq;
+ if_t ifp;
+ uint16_t ena_qid;
+ uint16_t next_to_clean;
+ uint32_t refill_required;
+ uint32_t refill_threshold;
+ uint32_t do_if_input = 0;
+ unsigned int qid;
+ int rc, i;
+ int budget = RX_BUDGET;
+#ifdef DEV_NETMAP
+ int done;
+#endif /* DEV_NETMAP */
+
+ adapter = rx_ring->que->adapter;
+ ifp = adapter->ifp;
+ qid = rx_ring->que->id;
+ ena_qid = ENA_IO_RXQ_IDX(qid);
+ io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
+ io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
+ next_to_clean = rx_ring->next_to_clean;
+
+#ifdef DEV_NETMAP
+ if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
+ return (0);
+#endif /* DEV_NETMAP */
+
+ ena_trace(ENA_DBG, "rx: qid %d\n", qid);
+
+ do {
+ ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
+ ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
+ ena_rx_ctx.descs = 0;
+ bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
+ io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
+ rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
+
+ if (unlikely(rc != 0))
+ goto error;
+
+ if (unlikely(ena_rx_ctx.descs == 0))
+ break;
+
+ ena_trace(ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
+ "descs #: %d l3 proto %d l4 proto %d hash: %x\n",
+ rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
+ ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
+
+ /* Receive mbuf from the ring */
+ mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
+ &ena_rx_ctx, &next_to_clean);
+ bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
+ io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
+ /* Exit if we failed to retrieve a buffer */
+ if (unlikely(mbuf == NULL)) {
+ for (i = 0; i < ena_rx_ctx.descs; ++i) {
+ rx_ring->free_rx_ids[next_to_clean] =
+ rx_ring->ena_bufs[i].req_id;
+ next_to_clean =
+ ENA_RX_RING_IDX_NEXT(next_to_clean,
+ rx_ring->ring_size);
+
+ }
+ break;
+ }
+
+ if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
+ ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
+ ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
+ }
+
+ counter_enter();
+ counter_u64_add_protected(rx_ring->rx_stats.bytes,
+ mbuf->m_pkthdr.len);
+ counter_u64_add_protected(adapter->hw_stats.rx_bytes,
+ mbuf->m_pkthdr.len);
+ counter_exit();
+ /*
+ * LRO is only for IP/TCP packets and TCP checksum of the packet
+ * should be computed by hardware.
+ */
+ do_if_input = 1;
+ if (((ifp->if_capenable & IFCAP_LRO) != 0) &&
+ ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
+ (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
+ /*
+ * Send to the stack if:
+ * - LRO not enabled, or
+ * - no LRO resources, or
+ * - lro enqueue fails
+ */
+ if ((rx_ring->lro.lro_cnt != 0) &&
+ (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
+ do_if_input = 0;
+ }
+ if (do_if_input != 0) {
+ ena_trace(ENA_DBG | ENA_RXPTH,
+ "calling if_input() with mbuf %p\n", mbuf);
+ (*ifp->if_input)(ifp, mbuf);
+ }
+
+ counter_enter();
+ counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
+ counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
+ counter_exit();
+ } while (--budget);
+
+ rx_ring->next_to_clean = next_to_clean;
+
+ refill_required = ena_com_free_q_entries(io_sq);
+ refill_threshold = min_t(int,
+ rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
+ ENA_RX_REFILL_THRESH_PACKET);
+
+ if (refill_required > refill_threshold) {
+ ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
+ ena_refill_rx_bufs(rx_ring, refill_required);
+ }
+
+ tcp_lro_flush_all(&rx_ring->lro);
+
+ return (RX_BUDGET - budget);
+
+error:
+ counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
+
+ /* Too many desc from the device. Trigger reset */
+ ena_trigger_reset(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
+
+ return (0);
+}
+
+static void
+ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
+ bool disable_meta_caching)
+{
+ struct ena_com_tx_meta *ena_meta;
+ struct ether_vlan_header *eh;
+ struct mbuf *mbuf_next;
+ u32 mss;
+ bool offload;
+ uint16_t etype;
+ int ehdrlen;
+ struct ip *ip;
+ int iphlen;
+ struct tcphdr *th;
+ int offset;
+
+ offload = false;
+ ena_meta = &ena_tx_ctx->ena_meta;
+ mss = mbuf->m_pkthdr.tso_segsz;
+
+ if (mss != 0)
+ offload = true;
+
+ if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
+ offload = true;
+
+ if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
+ offload = true;
+
+ if (!offload) {
+ if (disable_meta_caching) {
+ memset(ena_meta, 0, sizeof(*ena_meta));
+ ena_tx_ctx->meta_valid = 1;
+ } else {
+ ena_tx_ctx->meta_valid = 0;
+ }
+ return;
+ }
+
+ /* Determine where frame payload starts. */
+ eh = mtod(mbuf, struct ether_vlan_header *);
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ etype = ntohs(eh->evl_proto);
+ ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+ } else {
+ etype = ntohs(eh->evl_encap_proto);
+ ehdrlen = ETHER_HDR_LEN;
+ }
+
+ mbuf_next = m_getptr(mbuf, ehdrlen, &offset);
+ ip = (struct ip *)(mtodo(mbuf_next, offset));
+ iphlen = ip->ip_hl << 2;
+
+ mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
+ th = (struct tcphdr *)(mtodo(mbuf_next, offset));
+
+ if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
+ ena_tx_ctx->l3_csum_enable = 1;
+ }
+ if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
+ ena_tx_ctx->tso_enable = 1;
+ ena_meta->l4_hdr_len = (th->th_off);
+ }
+
+ switch (etype) {
+ case ETHERTYPE_IP:
+ ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
+ if ((ip->ip_off & htons(IP_DF)) != 0)
+ ena_tx_ctx->df = 1;
+ break;
+ case ETHERTYPE_IPV6:
+ ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
+
+ default:
+ break;
+ }
+
+ if (ip->ip_p == IPPROTO_TCP) {
+ ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
+ if ((mbuf->m_pkthdr.csum_flags &
+ (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
+ ena_tx_ctx->l4_csum_enable = 1;
+ else
+ ena_tx_ctx->l4_csum_enable = 0;
+ } else if (ip->ip_p == IPPROTO_UDP) {
+ ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
+ if ((mbuf->m_pkthdr.csum_flags &
+ (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
+ ena_tx_ctx->l4_csum_enable = 1;
+ else
+ ena_tx_ctx->l4_csum_enable = 0;
+ } else {
+ ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
+ ena_tx_ctx->l4_csum_enable = 0;
+ }
+
+ ena_meta->mss = mss;
+ ena_meta->l3_hdr_len = iphlen;
+ ena_meta->l3_hdr_offset = ehdrlen;
+ ena_tx_ctx->meta_valid = 1;
+}
+
+static int
+ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
+{
+ struct ena_adapter *adapter;
+ struct mbuf *collapsed_mbuf;
+ int num_frags;
+
+ adapter = tx_ring->adapter;
+ num_frags = ena_mbuf_count(*mbuf);
+
+ /* One segment must be reserved for configuration descriptor. */
+ if (num_frags < adapter->max_tx_sgl_size)
+ return (0);
+ counter_u64_add(tx_ring->tx_stats.collapse, 1);
+
+ collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
+ adapter->max_tx_sgl_size - 1);
+ if (unlikely(collapsed_mbuf == NULL)) {
+ counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
+ return (ENOMEM);
+ }
+
+ /* If mbuf was collapsed succesfully, original mbuf is released. */
+ *mbuf = collapsed_mbuf;
+
+ return (0);
+}
+
+static int
+ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
+ struct mbuf *mbuf, void **push_hdr, u16 *header_len)
+{
+ struct ena_adapter *adapter = tx_ring->adapter;
+ struct ena_com_buf *ena_buf;
+ bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
+ size_t iseg = 0;
+ uint32_t mbuf_head_len;
+ uint16_t offset;
+ int rc, nsegs;
+
+ mbuf_head_len = mbuf->m_len;
+ tx_info->mbuf = mbuf;
+ ena_buf = tx_info->bufs;
+
+ /*
+ * For easier maintaining of the DMA map, map the whole mbuf even if
+ * the LLQ is used. The descriptors will be filled using the segments.
+ */
+ rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->dmamap, mbuf,
+ segs, &nsegs, BUS_DMA_NOWAIT);
+ if (unlikely((rc != 0) || (nsegs == 0))) {
+ ena_trace(ENA_WARNING,
+ "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
+ goto dma_error;
+ }
+
+ if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ /*
+ * When the device is LLQ mode, the driver will copy
+ * the header into the device memory space.
+ * the ena_com layer assumes the header is in a linear
+ * memory space.
+ * This assumption might be wrong since part of the header
+ * can be in the fragmented buffers.
+ * First check if header fits in the mbuf. If not, copy it to
+ * separate buffer that will be holding linearized data.
+ */
+ *header_len = min_t(uint32_t, mbuf->m_pkthdr.len, tx_ring->tx_max_header_size);
+
+ /* If header is in linear space, just point into mbuf's data. */
+ if (likely(*header_len <= mbuf_head_len)) {
+ *push_hdr = mbuf->m_data;
+ /*
+ * Otherwise, copy whole portion of header from multiple mbufs
+ * to intermediate buffer.
+ */
+ } else {
+ m_copydata(mbuf, 0, *header_len, tx_ring->push_buf_intermediate_buf);
+ *push_hdr = tx_ring->push_buf_intermediate_buf;
+
+ counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
+ }
+
+ ena_trace(ENA_DBG | ENA_TXPTH,
+ "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
+ mbuf, *push_hdr, *header_len);
+
+ /* If packet is fitted in LLQ header, no need for DMA segments. */
+ if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) {
+ return (0);
+ } else {
+ offset = tx_ring->tx_max_header_size;
+ /*
+ * As Header part is mapped to LLQ header, we can skip it and just
+ * map the residuum of the mbuf to DMA Segments.
+ */
+ while (offset > 0) {
+ if (offset >= segs[iseg].ds_len) {
+ offset -= segs[iseg].ds_len;
+ } else {
+ ena_buf->paddr = segs[iseg].ds_addr + offset;
+ ena_buf->len = segs[iseg].ds_len - offset;
+ ena_buf++;
+ tx_info->num_of_bufs++;
+ offset = 0;
+ }
+ iseg++;
+ }
+ }
+ } else {
+ *push_hdr = NULL;
+ /*
+ * header_len is just a hint for the device. Because FreeBSD is not
+ * giving us information about packet header length and it is not
+ * guaranteed that all packet headers will be in the 1st mbuf, setting
+ * header_len to 0 is making the device ignore this value and resolve
+ * header on it's own.
+ */
+ *header_len = 0;
+ }
+
+ /* Map rest of the mbuf */
+ while (iseg < nsegs) {
+ ena_buf->paddr = segs[iseg].ds_addr;
+ ena_buf->len = segs[iseg].ds_len;
+ ena_buf++;
+ iseg++;
+ tx_info->num_of_bufs++;
+ }
+
+ return (0);
+
+dma_error:
+ counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
+ tx_info->mbuf = NULL;
+ return (rc);
+}
+
+static int
+ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
+{
+ struct ena_adapter *adapter;
+ struct ena_tx_buffer *tx_info;
+ struct ena_com_tx_ctx ena_tx_ctx;
+ struct ena_com_dev *ena_dev;
+ struct ena_com_io_sq* io_sq;
+ void *push_hdr;
+ uint16_t next_to_use;
+ uint16_t req_id;
+ uint16_t ena_qid;
+ uint16_t header_len;
+ int rc;
+ int nb_hw_desc;
+
+ ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
+ adapter = tx_ring->que->adapter;
+ ena_dev = adapter->ena_dev;
+ io_sq = &ena_dev->io_sq_queues[ena_qid];
+
+ rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
+ if (unlikely(rc != 0)) {
+ ena_trace(ENA_WARNING,
+ "Failed to collapse mbuf! err: %d\n", rc);
+ return (rc);
+ }
+
+ ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
+
+ next_to_use = tx_ring->next_to_use;
+ req_id = tx_ring->free_tx_ids[next_to_use];
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ tx_info->num_of_bufs = 0;
+
+ rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
+ if (unlikely(rc != 0)) {
+ ena_trace(ENA_WARNING, "Failed to map TX mbuf\n");
+ return (rc);
+ }
+ memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
+ ena_tx_ctx.ena_bufs = tx_info->bufs;
+ ena_tx_ctx.push_header = push_hdr;
+ ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+ ena_tx_ctx.req_id = req_id;
+ ena_tx_ctx.header_len = header_len;
+
+ /* Set flags and meta data */
+ ena_tx_csum(&ena_tx_ctx, *mbuf, adapter->disable_meta_caching);
+
+ if (tx_ring->acum_pkts == DB_THRESHOLD ||
+ ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
+ ena_trace(ENA_DBG | ENA_TXPTH,
+ "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
+ tx_ring->que->id);
+ ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+ counter_u64_add(tx_ring->tx_stats.doorbells, 1);
+ tx_ring->acum_pkts = 0;
+ }
+
+ /* Prepare the packet's descriptors and send them to device */
+ rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
+ if (unlikely(rc != 0)) {
+ if (likely(rc == ENA_COM_NO_MEM)) {
+ ena_trace(ENA_DBG | ENA_TXPTH,
+ "tx ring[%d] if out of space\n", tx_ring->que->id);
+ } else {
+ device_printf(adapter->pdev,
+ "failed to prepare tx bufs\n");
+ }
+ counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
+ goto dma_error;
+ }
+
+ counter_enter();
+ counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
+ counter_u64_add_protected(tx_ring->tx_stats.bytes,
+ (*mbuf)->m_pkthdr.len);
+
+ counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
+ counter_u64_add_protected(adapter->hw_stats.tx_bytes,
+ (*mbuf)->m_pkthdr.len);
+ counter_exit();
+
+ tx_info->tx_descs = nb_hw_desc;
+ getbinuptime(&tx_info->timestamp);
+ tx_info->print_once = true;
+
+ tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
+ tx_ring->ring_size);
+
+ /* stop the queue when no more space available, the packet can have up
+ * to sgl_size + 2. one for the meta descriptor and one for header
+ * (if the header is larger than tx_max_header_size).
+ */
+ if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ adapter->max_tx_sgl_size + 2))) {
+ ena_trace(ENA_DBG | ENA_TXPTH, "Stop queue %d\n",
+ tx_ring->que->id);
+
+ tx_ring->running = false;
+ counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
+
+ /* There is a rare condition where this function decides to
+ * stop the queue but meanwhile tx_cleanup() updates
+ * next_to_completion and terminates.
+ * The queue will remain stopped forever.
+ * To solve this issue this function performs mb(), checks
+ * the wakeup condition and wakes up the queue if needed.
+ */
+ mb();
+
+ if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
+ ENA_TX_RESUME_THRESH)) {
+ tx_ring->running = true;
+ counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
+ }
+ }
+
+ bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
+ BUS_DMASYNC_PREWRITE);
+
+ return (0);
+
+dma_error:
+ tx_info->mbuf = NULL;
+ bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
+
+ return (rc);
+}
+
+static void
+ena_start_xmit(struct ena_ring *tx_ring)
+{
+ struct mbuf *mbuf;
+ struct ena_adapter *adapter = tx_ring->adapter;
+ struct ena_com_io_sq* io_sq;
+ int ena_qid;
+ int ret = 0;
+
+ if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
+ return;
+
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
+ return;
+
+ ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
+ io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
+
+ while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
+ ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
+ " header csum flags %#jx\n",
+ mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
+
+ if (unlikely(!tx_ring->running)) {
+ drbr_putback(adapter->ifp, tx_ring->br, mbuf);
+ break;
+ }
+
+ if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
+ if (ret == ENA_COM_NO_MEM) {
+ drbr_putback(adapter->ifp, tx_ring->br, mbuf);
+ } else if (ret == ENA_COM_NO_SPACE) {
+ drbr_putback(adapter->ifp, tx_ring->br, mbuf);
+ } else {
+ m_freem(mbuf);
+ drbr_advance(adapter->ifp, tx_ring->br);
+ }
+
+ break;
+ }
+
+ drbr_advance(adapter->ifp, tx_ring->br);
+
+ if (unlikely((if_getdrvflags(adapter->ifp) &
+ IFF_DRV_RUNNING) == 0))
+ return;
+
+ tx_ring->acum_pkts++;
+
+ BPF_MTAP(adapter->ifp, mbuf);
+ }
+
+ if (likely(tx_ring->acum_pkts != 0)) {
+ /* Trigger the dma engine */
+ ena_com_write_sq_doorbell(io_sq);
+ counter_u64_add(tx_ring->tx_stats.doorbells, 1);
+ tx_ring->acum_pkts = 0;
+ }
+
+ if (unlikely(!tx_ring->running))
+ taskqueue_enqueue(tx_ring->que->cleanup_tq,
+ &tx_ring->que->cleanup_task);
+}
diff --git a/sys/dev/ena/ena_datapath.h b/sys/dev/ena/ena_datapath.h
new file mode 100644
index 000000000000..86d875cbe015
--- /dev/null
+++ b/sys/dev/ena/ena_datapath.h
@@ -0,0 +1,42 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef ENA_TXRX_H
+#define ENA_TXRX_H
+
+void ena_cleanup(void *arg, int pending);
+void ena_qflush(if_t ifp);
+int ena_mq_start(if_t ifp, struct mbuf *m);
+void ena_deferred_mq_start(void *arg, int pending);
+
+#endif /* ENA_TXRX_H */
diff --git a/sys/dev/ena/ena_netmap.c b/sys/dev/ena/ena_netmap.c
new file mode 100644
index 000000000000..20a341173c8c
--- /dev/null
+++ b/sys/dev/ena/ena_netmap.c
@@ -0,0 +1,1092 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef DEV_NETMAP
+
+#include "ena.h"
+#include "ena_netmap.h"
+
+#define ENA_NETMAP_MORE_FRAMES 1
+#define ENA_NETMAP_NO_MORE_FRAMES 0
+#define ENA_MAX_FRAMES 16384
+
+struct ena_netmap_ctx {
+ struct netmap_kring *kring;
+ struct ena_adapter *adapter;
+ struct netmap_adapter *na;
+ struct netmap_slot *slots;
+ struct ena_ring *ring;
+ struct ena_com_io_cq *io_cq;
+ struct ena_com_io_sq *io_sq;
+ u_int nm_i;
+ uint16_t nt;
+ uint16_t lim;
+};
+
+/* Netmap callbacks */
+static int ena_netmap_reg(struct netmap_adapter *, int);
+static int ena_netmap_txsync(struct netmap_kring *, int);
+static int ena_netmap_rxsync(struct netmap_kring *, int);
+
+/* Helper functions */
+static int ena_netmap_tx_frames(struct ena_netmap_ctx *);
+static int ena_netmap_tx_frame(struct ena_netmap_ctx *);
+static inline uint16_t ena_netmap_count_slots(struct ena_netmap_ctx *);
+static inline uint16_t ena_netmap_packet_len(struct netmap_slot *, u_int,
+ uint16_t);
+static int ena_netmap_copy_data(struct netmap_adapter *,
+ struct netmap_slot *, u_int, uint16_t, uint16_t, void *);
+static int ena_netmap_map_single_slot(struct netmap_adapter *,
+ struct netmap_slot *, bus_dma_tag_t, bus_dmamap_t, void **, uint64_t *);
+static int ena_netmap_tx_map_slots(struct ena_netmap_ctx *,
+ struct ena_tx_buffer *, void **, uint16_t *, uint16_t *);
+static void ena_netmap_unmap_last_socket_chain(struct ena_netmap_ctx *,
+ struct ena_tx_buffer *);
+static void ena_netmap_tx_cleanup(struct ena_netmap_ctx *);
+static uint16_t ena_netmap_tx_clean_one(struct ena_netmap_ctx *,
+ uint16_t);
+static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
+static int ena_netmap_rx_frames(struct ena_netmap_ctx *);
+static int ena_netmap_rx_frame(struct ena_netmap_ctx *);
+static int ena_netmap_rx_load_desc(struct ena_netmap_ctx *, uint16_t,
+ int *);
+static void ena_netmap_rx_cleanup(struct ena_netmap_ctx *);
+static void ena_netmap_fill_ctx(struct netmap_kring *,
+ struct ena_netmap_ctx *, uint16_t);
+
+int
+ena_netmap_attach(struct ena_adapter *adapter)
+{
+ struct netmap_adapter na;
+
+ ena_trace(ENA_NETMAP, "netmap attach\n");
+
+ bzero(&na, sizeof(na));
+ na.na_flags = NAF_MOREFRAG;
+ na.ifp = adapter->ifp;
+ na.num_tx_desc = adapter->requested_tx_ring_size;
+ na.num_rx_desc = adapter->requested_rx_ring_size;
+ na.num_tx_rings = adapter->num_io_queues;
+ na.num_rx_rings = adapter->num_io_queues;
+ na.rx_buf_maxsize = adapter->buf_ring_size;
+ na.nm_txsync = ena_netmap_txsync;
+ na.nm_rxsync = ena_netmap_rxsync;
+ na.nm_register = ena_netmap_reg;
+
+ return (netmap_attach(&na));
+}
+
+int
+ena_netmap_alloc_rx_slot(struct ena_adapter *adapter,
+ struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
+{
+ struct netmap_adapter *na = NA(adapter->ifp);
+ struct netmap_kring *kring;
+ struct netmap_ring *ring;
+ struct netmap_slot *slot;
+ void *addr;
+ uint64_t paddr;
+ int nm_i, qid, head, lim, rc;
+
+ /* if previously allocated frag is not used */
+ if (unlikely(rx_info->netmap_buf_idx != 0))
+ return (0);
+
+ qid = rx_ring->qid;
+ kring = na->rx_rings[qid];
+ nm_i = kring->nr_hwcur;
+ head = kring->rhead;
+
+ ena_trace(ENA_NETMAP | ENA_DBG, "nr_hwcur: %d, nr_hwtail: %d, "
+ "rhead: %d, rcur: %d, rtail: %d\n", kring->nr_hwcur,
+ kring->nr_hwtail, kring->rhead, kring->rcur, kring->rtail);
+
+ if ((nm_i == head) && rx_ring->initialized) {
+ ena_trace(ENA_NETMAP, "No free slots in netmap ring\n");
+ return (ENOMEM);
+ }
+
+ ring = kring->ring;
+ if (ring == NULL) {
+ device_printf(adapter->pdev, "Rx ring %d is NULL\n", qid);
+ return (EFAULT);
+ }
+ slot = &ring->slot[nm_i];
+
+ addr = PNMB(na, slot, &paddr);
+ if (addr == NETMAP_BUF_BASE(na)) {
+ device_printf(adapter->pdev, "Bad buff in slot\n");
+ return (EFAULT);
+ }
+
+ rc = netmap_load_map(na, adapter->rx_buf_tag, rx_info->map, addr);
+ if (rc != 0) {
+ ena_trace(ENA_WARNING, "DMA mapping error\n");
+ return (rc);
+ }
+ bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD);
+
+ rx_info->ena_buf.paddr = paddr;
+ rx_info->ena_buf.len = ring->nr_buf_size;
+ rx_info->mbuf = NULL;
+ rx_info->netmap_buf_idx = slot->buf_idx;
+
+ slot->buf_idx = 0;
+
+ lim = kring->nkr_num_slots - 1;
+ kring->nr_hwcur = nm_next(nm_i, lim);
+
+ return (0);
+}
+
+void
+ena_netmap_free_rx_slot(struct ena_adapter *adapter,
+ struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
+{
+ struct netmap_adapter *na;
+ struct netmap_kring *kring;
+ struct netmap_slot *slot;
+ int nm_i, qid, lim;
+
+ na = NA(adapter->ifp);
+ if (na == NULL) {
+ device_printf(adapter->pdev, "netmap adapter is NULL\n");
+ return;
+ }
+
+ if (na->rx_rings == NULL) {
+ device_printf(adapter->pdev, "netmap rings are NULL\n");
+ return;
+ }
+
+ qid = rx_ring->qid;
+ kring = na->rx_rings[qid];
+ if (kring == NULL) {
+ device_printf(adapter->pdev,
+ "netmap kernel ring %d is NULL\n", qid);
+ return;
+ }
+
+ lim = kring->nkr_num_slots - 1;
+ nm_i = nm_prev(kring->nr_hwcur, lim);
+
+ if (kring->nr_mode != NKR_NETMAP_ON)
+ return;
+
+ bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
+ BUS_DMASYNC_POSTREAD);
+ netmap_unload_map(na, adapter->rx_buf_tag, rx_info->map);
+
+ KASSERT(kring->ring == NULL, ("Netmap Rx ring is NULL\n"));
+
+ slot = &kring->ring->slot[nm_i];
+
+ ENA_ASSERT(slot->buf_idx == 0, "Overwrite slot buf\n");
+ slot->buf_idx = rx_info->netmap_buf_idx;
+ slot->flags = NS_BUF_CHANGED;
+
+ rx_info->netmap_buf_idx = 0;
+ kring->nr_hwcur = nm_i;
+}
+
+static bool
+ena_ring_in_netmap(struct ena_adapter *adapter, int qid, enum txrx x)
+{
+ struct netmap_adapter *na;
+ struct netmap_kring *kring;
+
+ if (adapter->ifp->if_capenable & IFCAP_NETMAP) {
+ na = NA(adapter->ifp);
+ kring = (x == NR_RX) ? na->rx_rings[qid] : na->tx_rings[qid];
+ if (kring->nr_mode == NKR_NETMAP_ON)
+ return true;
+ }
+ return false;
+}
+
+bool
+ena_tx_ring_in_netmap(struct ena_adapter *adapter, int qid)
+{
+ return ena_ring_in_netmap(adapter, qid, NR_TX);
+}
+
+bool
+ena_rx_ring_in_netmap(struct ena_adapter *adapter, int qid)
+{
+ return ena_ring_in_netmap(adapter, qid, NR_RX);
+}
+
+static void
+ena_netmap_reset_ring(struct ena_adapter *adapter, int qid, enum txrx x)
+{
+ if (!ena_ring_in_netmap(adapter, qid, x))
+ return;
+
+ netmap_reset(NA(adapter->ifp), x, qid, 0);
+ ena_trace(ENA_NETMAP, "%s ring %d is in netmap mode\n",
+ (x == NR_TX) ? "Tx" : "Rx", qid);
+}
+
+void
+ena_netmap_reset_rx_ring(struct ena_adapter *adapter, int qid)
+{
+ ena_netmap_reset_ring(adapter, qid, NR_RX);
+}
+
+void
+ena_netmap_reset_tx_ring(struct ena_adapter *adapter, int qid)
+{
+ ena_netmap_reset_ring(adapter, qid, NR_TX);
+}
+
+static int
+ena_netmap_reg(struct netmap_adapter *na, int onoff)
+{
+ struct ifnet *ifp = na->ifp;
+ struct ena_adapter* adapter = ifp->if_softc;
+ struct netmap_kring *kring;
+ enum txrx t;
+ int rc, i;
+
+ ENA_LOCK_LOCK(adapter);
+ ENA_FLAG_CLEAR_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+ ena_down(adapter);
+
+ if (onoff) {
+ ena_trace(ENA_NETMAP, "netmap on\n");
+ for_rx_tx(t) {
+ for (i = 0; i <= nma_get_nrings(na, t); i++) {
+ kring = NMR(na, t)[i];
+ if (nm_kring_pending_on(kring)) {
+ kring->nr_mode = NKR_NETMAP_ON;
+ }
+ }
+ }
+ nm_set_native_flags(na);
+ } else {
+ ena_trace(ENA_NETMAP, "netmap off\n");
+ nm_clear_native_flags(na);
+ for_rx_tx(t) {
+ for (i = 0; i <= nma_get_nrings(na, t); i++) {
+ kring = NMR(na, t)[i];
+ if (nm_kring_pending_off(kring)) {
+ kring->nr_mode = NKR_NETMAP_OFF;
+ }
+ }
+ }
+ }
+
+ rc = ena_up(adapter);
+ if (rc != 0) {
+ ena_trace(ENA_WARNING, "ena_up failed with rc=%d\n", rc);
+ adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE;
+ nm_clear_native_flags(na);
+ ena_destroy_device(adapter, false);
+ ENA_FLAG_SET_ATOMIC(ENA_FLAG_DEV_UP_BEFORE_RESET, adapter);
+ rc = ena_restore_device(adapter);
+ }
+ ENA_LOCK_UNLOCK(adapter);
+
+ return (rc);
+}
+
+static int
+ena_netmap_txsync(struct netmap_kring *kring, int flags)
+{
+ struct ena_netmap_ctx ctx;
+ int rc = 0;
+
+ ena_netmap_fill_ctx(kring, &ctx, ENA_IO_TXQ_IDX(kring->ring_id));
+ ctx.ring = &ctx.adapter->tx_ring[kring->ring_id];
+
+ ENA_RING_MTX_LOCK(ctx.ring);
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEV_UP, ctx.adapter)))
+ goto txsync_end;
+
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, ctx.adapter)))
+ goto txsync_end;
+
+ rc = ena_netmap_tx_frames(&ctx);
+ ena_netmap_tx_cleanup(&ctx);
+
+txsync_end:
+ ENA_RING_MTX_UNLOCK(ctx.ring);
+ return (rc);
+}
+
+static int
+ena_netmap_tx_frames(struct ena_netmap_ctx *ctx)
+{
+ struct ena_ring *tx_ring = ctx->ring;
+ int rc = 0;
+
+ ctx->nm_i = ctx->kring->nr_hwcur;
+ ctx->nt = ctx->ring->next_to_use;
+
+ __builtin_prefetch(&ctx->slots[ctx->nm_i]);
+
+ while (ctx->nm_i != ctx->kring->rhead) {
+ if ((rc = ena_netmap_tx_frame(ctx)) != 0) {
+ /*
+ * When there is no empty space in Tx ring, error is
+ * still being returned. It should not be passed to the
+ * netmap, as application knows current ring state from
+ * netmap ring pointers. Returning error there could
+ * cause application to exit, but the Tx ring is commonly
+ * being full.
+ */
+ if (rc == ENA_COM_NO_MEM)
+ rc = 0;
+ break;
+ }
+ tx_ring->acum_pkts++;
+ }
+
+ /* If any packet was sent... */
+ if (likely(ctx->nm_i != ctx->kring->nr_hwcur)) {
+ /* ...send the doorbell to the device. */
+ ena_com_write_sq_doorbell(ctx->io_sq);
+ counter_u64_add(ctx->ring->tx_stats.doorbells, 1);
+ tx_ring->acum_pkts = 0;
+
+ ctx->ring->next_to_use = ctx->nt;
+ ctx->kring->nr_hwcur = ctx->nm_i;
+ }
+
+ return (rc);
+}
+
+static int
+ena_netmap_tx_frame(struct ena_netmap_ctx *ctx)
+{
+ struct ena_com_tx_ctx ena_tx_ctx;
+ struct ena_adapter *adapter;
+ struct ena_ring *tx_ring;
+ struct ena_tx_buffer *tx_info;
+ uint16_t req_id;
+ uint16_t header_len;
+ uint16_t packet_len;
+ int nb_hw_desc;
+ int rc;
+ void *push_hdr;
+
+ adapter = ctx->adapter;
+ if (ena_netmap_count_slots(ctx) > adapter->max_tx_sgl_size) {
+ ena_trace(ENA_WARNING, "Too many slots per packet\n");
+ return (EINVAL);
+ }
+
+ tx_ring = ctx->ring;
+
+ req_id = tx_ring->free_tx_ids[ctx->nt];
+ tx_info = &tx_ring->tx_buffer_info[req_id];
+ tx_info->num_of_bufs = 0;
+ tx_info->nm_info.sockets_used = 0;
+
+ rc = ena_netmap_tx_map_slots(ctx, tx_info, &push_hdr, &header_len,
+ &packet_len);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev, "Failed to map Tx slot\n");
+ return (rc);
+ }
+
+ bzero(&ena_tx_ctx, sizeof(struct ena_com_tx_ctx));
+ ena_tx_ctx.ena_bufs = tx_info->bufs;
+ ena_tx_ctx.push_header = push_hdr;
+ ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+ ena_tx_ctx.req_id = req_id;
+ ena_tx_ctx.header_len = header_len;
+
+ /* There are no any offloads, as the netmap doesn't support them */
+
+ if (tx_ring->acum_pkts == DB_THRESHOLD ||
+ ena_com_is_doorbell_needed(ctx->io_sq, &ena_tx_ctx)) {
+ ena_com_write_sq_doorbell(ctx->io_sq);
+ counter_u64_add(tx_ring->tx_stats.doorbells, 1);
+ tx_ring->acum_pkts = 0;
+ }
+
+ rc = ena_com_prepare_tx(ctx->io_sq, &ena_tx_ctx, &nb_hw_desc);
+ if (unlikely(rc != 0)) {
+ if (likely(rc == ENA_COM_NO_MEM)) {
+ ena_trace(ENA_NETMAP | ENA_DBG | ENA_TXPTH,
+ "Tx ring[%d] is out of space\n", tx_ring->que->id);
+ } else {
+ device_printf(adapter->pdev,
+ "Failed to prepare Tx bufs\n");
+ }
+ counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
+
+ ena_netmap_unmap_last_socket_chain(ctx, tx_info);
+ return (rc);
+ }
+
+ counter_enter();
+ counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
+ counter_u64_add_protected(tx_ring->tx_stats.bytes, packet_len);
+ counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
+ counter_u64_add_protected(adapter->hw_stats.tx_bytes, packet_len);
+ counter_exit();
+
+ tx_info->tx_descs = nb_hw_desc;
+
+ ctx->nt = ENA_TX_RING_IDX_NEXT(ctx->nt, ctx->ring->ring_size);
+
+ for (unsigned int i = 0; i < tx_info->num_of_bufs; i++)
+ bus_dmamap_sync(adapter->tx_buf_tag,
+ tx_info->nm_info.map_seg[i], BUS_DMASYNC_PREWRITE);
+
+ return (0);
+}
+
+static inline uint16_t
+ena_netmap_count_slots(struct ena_netmap_ctx *ctx)
+{
+ uint16_t slots = 1;
+ uint16_t nm = ctx->nm_i;
+
+ while ((ctx->slots[nm].flags & NS_MOREFRAG) != 0) {
+ slots++;
+ nm = nm_next(nm, ctx->lim);
+ }
+
+ return slots;
+}
+
+static inline uint16_t
+ena_netmap_packet_len(struct netmap_slot *slots, u_int slot_index,
+ uint16_t limit)
+{
+ struct netmap_slot *nm_slot;
+ uint16_t packet_size = 0;
+
+ do {
+ nm_slot = &slots[slot_index];
+ packet_size += nm_slot->len;
+ slot_index = nm_next(slot_index, limit);
+ } while ((nm_slot->flags & NS_MOREFRAG) != 0);
+
+ return packet_size;
+}
+
+static int
+ena_netmap_copy_data(struct netmap_adapter *na, struct netmap_slot *slots,
+ u_int slot_index, uint16_t limit, uint16_t bytes_to_copy, void *destination)
+{
+ struct netmap_slot *nm_slot;
+ void *slot_vaddr;
+ uint16_t packet_size;
+ uint16_t data_amount;
+
+ packet_size = 0;
+ do {
+ nm_slot = &slots[slot_index];
+ slot_vaddr = NMB(na, nm_slot);
+ if (unlikely(slot_vaddr == NULL))
+ return (EINVAL);
+
+ data_amount = min_t(uint16_t, bytes_to_copy, nm_slot->len);
+ memcpy(destination, slot_vaddr, data_amount);
+ bytes_to_copy -= data_amount;
+
+ slot_index = nm_next(slot_index, limit);
+ } while ((nm_slot->flags & NS_MOREFRAG) != 0 && bytes_to_copy > 0);
+
+ return (0);
+}
+
+static int
+ena_netmap_map_single_slot(struct netmap_adapter *na, struct netmap_slot *slot,
+ bus_dma_tag_t dmatag, bus_dmamap_t dmamap, void **vaddr, uint64_t *paddr)
+{
+ int rc;
+
+ *vaddr = PNMB(na, slot, paddr);
+ if (unlikely(vaddr == NULL)) {
+ ena_trace(ENA_ALERT, "Slot address is NULL\n");
+ return (EINVAL);
+ }
+
+ rc = netmap_load_map(na, dmatag, dmamap, *vaddr);
+ if (unlikely(rc != 0)) {
+ ena_trace(ENA_ALERT, "Failed to map slot %d for DMA\n",
+ slot->buf_idx);
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int
+ena_netmap_tx_map_slots(struct ena_netmap_ctx *ctx,
+ struct ena_tx_buffer *tx_info, void **push_hdr, uint16_t *header_len,
+ uint16_t *packet_len)
+{
+ struct netmap_slot *slot;
+ struct ena_com_buf *ena_buf;
+ struct ena_adapter *adapter;
+ struct ena_ring *tx_ring;
+ struct ena_netmap_tx_info *nm_info;
+ bus_dmamap_t *nm_maps;
+ void *vaddr;
+ uint64_t paddr;
+ uint32_t *nm_buf_idx;
+ uint32_t slot_head_len;
+ uint32_t frag_len;
+ uint32_t remaining_len;
+ uint16_t push_len;
+ uint16_t delta;
+ int rc;
+
+ adapter = ctx->adapter;
+ tx_ring = ctx->ring;
+ ena_buf = tx_info->bufs;
+ nm_info = &tx_info->nm_info;
+ nm_maps = nm_info->map_seg;
+ nm_buf_idx = nm_info->socket_buf_idx;
+ slot = &ctx->slots[ctx->nm_i];
+
+ slot_head_len = slot->len;
+ *packet_len = ena_netmap_packet_len(ctx->slots, ctx->nm_i, ctx->lim);
+ remaining_len = *packet_len;
+ delta = 0;
+
+ __builtin_prefetch(&ctx->slots[ctx->nm_i + 1]);
+ if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ /*
+ * When the device is in LLQ mode, the driver will copy
+ * the header into the device memory space.
+ * The ena_com layer assumes that the header is in a linear
+ * memory space.
+ * This assumption might be wrong since part of the header
+ * can be in the fragmented buffers.
+ * First, check if header fits in the first slot. If not, copy
+ * it to separate buffer that will be holding linearized data.
+ */
+ push_len = min_t(uint32_t, *packet_len,
+ tx_ring->tx_max_header_size);
+ *header_len = push_len;
+ /* If header is in linear space, just point to socket's data. */
+ if (likely(push_len <= slot_head_len)) {
+ *push_hdr = NMB(ctx->na, slot);
+ if (unlikely(push_hdr == NULL)) {
+ device_printf(adapter->pdev,
+ "Slot vaddress is NULL\n");
+ return (EINVAL);
+ }
+ /*
+ * Otherwise, copy whole portion of header from multiple slots
+ * to intermediate buffer.
+ */
+ } else {
+ rc = ena_netmap_copy_data(ctx->na,
+ ctx->slots,
+ ctx->nm_i,
+ ctx->lim,
+ push_len,
+ tx_ring->push_buf_intermediate_buf);
+ if (unlikely(rc)) {
+ device_printf(adapter->pdev,
+ "Failed to copy data from slots to push_buf\n");
+ return (EINVAL);
+ }
+
+ *push_hdr = tx_ring->push_buf_intermediate_buf;
+ counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
+
+ delta = push_len - slot_head_len;
+ }
+
+ ena_trace(ENA_NETMAP | ENA_DBG | ENA_TXPTH,
+ "slot: %d header_buf->vaddr: %p push_len: %d\n",
+ slot->buf_idx, *push_hdr, push_len);
+
+ /*
+ * If header was in linear memory space, map for the dma rest of the data
+ * in the first mbuf of the mbuf chain.
+ */
+ if (slot_head_len > push_len) {
+ rc = ena_netmap_map_single_slot(ctx->na,
+ slot,
+ adapter->tx_buf_tag,
+ *nm_maps,
+ &vaddr,
+ &paddr);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev,
+ "DMA mapping error\n");
+ return (rc);
+ }
+ nm_maps++;
+
+ ena_buf->paddr = paddr + push_len;
+ ena_buf->len = slot->len - push_len;
+ ena_buf++;
+
+ tx_info->num_of_bufs++;
+ }
+
+ remaining_len -= slot->len;
+
+ /* Save buf idx before advancing */
+ *nm_buf_idx = slot->buf_idx;
+ nm_buf_idx++;
+ slot->buf_idx = 0;
+
+ /* Advance to the next socket */
+ ctx->nm_i = nm_next(ctx->nm_i, ctx->lim);
+ slot = &ctx->slots[ctx->nm_i];
+ nm_info->sockets_used++;
+
+ /*
+ * If header is in non linear space (delta > 0), then skip mbufs
+ * containing header and map the last one containing both header
+ * and the packet data.
+ * The first segment is already counted in.
+ */
+ while (delta > 0) {
+ __builtin_prefetch(&ctx->slots[ctx->nm_i + 1]);
+ frag_len = slot->len;
+
+ /*
+ * If whole segment contains header just move to the
+ * next one and reduce delta.
+ */
+ if (unlikely(delta >= frag_len)) {
+ delta -= frag_len;
+ } else {
+ /*
+ * Map the data and then assign it with the
+ * offsets
+ */
+ rc = ena_netmap_map_single_slot(ctx->na,
+ slot,
+ adapter->tx_buf_tag,
+ *nm_maps,
+ &vaddr,
+ &paddr);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev,
+ "DMA mapping error\n");
+ goto error_map;
+ }
+ nm_maps++;
+
+ ena_buf->paddr = paddr + delta;
+ ena_buf->len = slot->len - delta;
+ ena_buf++;
+
+ tx_info->num_of_bufs++;
+ delta = 0;
+ }
+
+ remaining_len -= slot->len;
+
+ /* Save buf idx before advancing */
+ *nm_buf_idx = slot->buf_idx;
+ nm_buf_idx++;
+ slot->buf_idx = 0;
+
+ /* Advance to the next socket */
+ ctx->nm_i = nm_next(ctx->nm_i, ctx->lim);
+ slot = &ctx->slots[ctx->nm_i];
+ nm_info->sockets_used++;
+ }
+ } else {
+ *push_hdr = NULL;
+ /*
+ * header_len is just a hint for the device. Because netmap is
+ * not giving us any information about packet header length and
+ * it is not guaranteed that all packet headers will be in the
+ * 1st slot, setting header_len to 0 is making the device ignore
+ * this value and resolve header on it's own.
+ */
+ *header_len = 0;
+ }
+
+ /* Map all remaining data (regular routine for non-LLQ mode) */
+ while (remaining_len > 0) {
+ __builtin_prefetch(&ctx->slots[ctx->nm_i + 1]);
+
+ rc = ena_netmap_map_single_slot(ctx->na,
+ slot,
+ adapter->tx_buf_tag,
+ *nm_maps,
+ &vaddr,
+ &paddr);
+ if (unlikely(rc != 0)) {
+ device_printf(adapter->pdev,
+ "DMA mapping error\n");
+ goto error_map;
+ }
+ nm_maps++;
+
+ ena_buf->paddr = paddr;
+ ena_buf->len = slot->len;
+ ena_buf++;
+
+ tx_info->num_of_bufs++;
+
+ remaining_len -= slot->len;
+
+ /* Save buf idx before advancing */
+ *nm_buf_idx = slot->buf_idx;
+ nm_buf_idx++;
+ slot->buf_idx = 0;
+
+ /* Advance to the next socket */
+ ctx->nm_i = nm_next(ctx->nm_i, ctx->lim);
+ slot = &ctx->slots[ctx->nm_i];
+ nm_info->sockets_used++;
+ }
+
+ return (0);
+
+error_map:
+ ena_netmap_unmap_last_socket_chain(ctx, tx_info);
+
+ return (rc);
+}
+
+static void
+ena_netmap_unmap_last_socket_chain(struct ena_netmap_ctx *ctx,
+ struct ena_tx_buffer *tx_info)
+{
+ struct ena_netmap_tx_info *nm_info;
+ int n;
+
+ nm_info = &tx_info->nm_info;
+
+ /**
+ * As the used sockets must not be equal to the buffers used in the LLQ
+ * mode, they must be treated separately.
+ * First, unmap the DMA maps.
+ */
+ n = tx_info->num_of_bufs;
+ while (n--) {
+ netmap_unload_map(ctx->na, ctx->adapter->tx_buf_tag,
+ nm_info->map_seg[n]);
+ }
+ tx_info->num_of_bufs = 0;
+
+ /* Next, retain the sockets back to the userspace */
+ n = nm_info->sockets_used;
+ while (n--) {
+ ctx->slots[ctx->nm_i].buf_idx = nm_info->socket_buf_idx[n];
+ ctx->slots[ctx->nm_i].flags = NS_BUF_CHANGED;
+ nm_info->socket_buf_idx[n] = 0;
+ ctx->nm_i = nm_prev(ctx->nm_i, ctx->lim);
+ }
+ nm_info->sockets_used = 0;
+}
+
+static void
+ena_netmap_tx_cleanup(struct ena_netmap_ctx *ctx)
+{
+ uint16_t req_id;
+ uint16_t total_tx_descs = 0;
+
+ ctx->nm_i = ctx->kring->nr_hwtail;
+ ctx->nt = ctx->ring->next_to_clean;
+
+ /* Reclaim buffers for completed transmissions */
+ while (ena_com_tx_comp_req_id_get(ctx->io_cq, &req_id) >= 0) {
+ if (validate_tx_req_id(ctx->ring, req_id) != 0)
+ break;
+ total_tx_descs += ena_netmap_tx_clean_one(ctx, req_id);
+ }
+
+ ctx->kring->nr_hwtail = ctx->nm_i;
+
+ if (total_tx_descs > 0) {
+ /* acknowledge completion of sent packets */
+ ctx->ring->next_to_clean = ctx->nt;
+ ena_com_comp_ack(ctx->ring->ena_com_io_sq, total_tx_descs);
+ ena_com_update_dev_comp_head(ctx->ring->ena_com_io_cq);
+ }
+}
+
+static uint16_t
+ena_netmap_tx_clean_one(struct ena_netmap_ctx *ctx, uint16_t req_id)
+{
+ struct ena_tx_buffer *tx_info;
+ struct ena_netmap_tx_info *nm_info;
+ int n;
+
+ tx_info = &ctx->ring->tx_buffer_info[req_id];
+ nm_info = &tx_info->nm_info;
+
+ /**
+ * As the used sockets must not be equal to the buffers used in the LLQ
+ * mode, they must be treated separately.
+ * First, unmap the DMA maps.
+ */
+ n = tx_info->num_of_bufs;
+ for (n = 0; n < tx_info->num_of_bufs; n++) {
+ netmap_unload_map(ctx->na, ctx->adapter->tx_buf_tag,
+ nm_info->map_seg[n]);
+ }
+ tx_info->num_of_bufs = 0;
+
+ /* Next, retain the sockets back to the userspace */
+ for (n = 0; n < nm_info->sockets_used; n++) {
+ ctx->nm_i = nm_next(ctx->nm_i, ctx->lim);
+ ENA_ASSERT(ctx->slots[ctx->nm_i].buf_idx == 0,
+ "Tx idx is not 0.\n");
+ ctx->slots[ctx->nm_i].buf_idx = nm_info->socket_buf_idx[n];
+ ctx->slots[ctx->nm_i].flags = NS_BUF_CHANGED;
+ nm_info->socket_buf_idx[n] = 0;
+ }
+ nm_info->sockets_used = 0;
+
+ ctx->ring->free_tx_ids[ctx->nt] = req_id;
+ ctx->nt = ENA_TX_RING_IDX_NEXT(ctx->nt, ctx->lim);
+
+ return tx_info->tx_descs;
+}
+
+static inline int
+validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
+{
+ struct ena_adapter *adapter = tx_ring->adapter;
+
+ if (likely(req_id < tx_ring->ring_size))
+ return (0);
+
+ ena_trace(ENA_WARNING, "Invalid req_id: %hu\n", req_id);
+ counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
+
+ ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
+
+ return (EFAULT);
+}
+
+static int
+ena_netmap_rxsync(struct netmap_kring *kring, int flags)
+{
+ struct ena_netmap_ctx ctx;
+ int rc;
+
+ ena_netmap_fill_ctx(kring, &ctx, ENA_IO_RXQ_IDX(kring->ring_id));
+ ctx.ring = &ctx.adapter->rx_ring[kring->ring_id];
+
+ if (ctx.kring->rhead > ctx.lim) {
+ /* Probably not needed to release slots from RX ring. */
+ return (netmap_ring_reinit(ctx.kring));
+ }
+
+ if (unlikely((if_getdrvflags(ctx.na->ifp) & IFF_DRV_RUNNING) == 0))
+ return (0);
+
+ if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, ctx.adapter)))
+ return (0);
+
+ if ((rc = ena_netmap_rx_frames(&ctx)) != 0)
+ return (rc);
+
+ ena_netmap_rx_cleanup(&ctx);
+
+ return (0);
+}
+
+static inline int
+ena_netmap_rx_frames(struct ena_netmap_ctx *ctx)
+{
+ int rc = 0;
+ int frames_counter = 0;
+
+ ctx->nt = ctx->ring->next_to_clean;
+ ctx->nm_i = ctx->kring->nr_hwtail;
+
+ while((rc = ena_netmap_rx_frame(ctx)) == ENA_NETMAP_MORE_FRAMES) {
+ frames_counter++;
+ /* In case of multiple frames, it is not an error. */
+ rc = 0;
+ if (frames_counter > ENA_MAX_FRAMES) {
+ device_printf(ctx->adapter->pdev,
+ "Driver is stuck in the Rx loop\n");
+ break;
+ }
+ };
+
+ ctx->kring->nr_hwtail = ctx->nm_i;
+ ctx->kring->nr_kflags &= ~NKR_PENDINTR;
+ ctx->ring->next_to_clean = ctx->nt;
+
+ return (rc);
+}
+
+static inline int
+ena_netmap_rx_frame(struct ena_netmap_ctx *ctx)
+{
+ struct ena_com_rx_ctx ena_rx_ctx;
+ int rc, len = 0;
+ uint16_t buf, nm;
+
+ ena_rx_ctx.ena_bufs = ctx->ring->ena_bufs;
+ ena_rx_ctx.max_bufs = ctx->adapter->max_rx_sgl_size;
+ bus_dmamap_sync(ctx->io_cq->cdesc_addr.mem_handle.tag,
+ ctx->io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
+
+ rc = ena_com_rx_pkt(ctx->io_cq, ctx->io_sq, &ena_rx_ctx);
+ if (unlikely(rc != 0)) {
+ ena_trace(ENA_ALERT, "Too many desc from the device.\n");
+ counter_u64_add(ctx->ring->rx_stats.bad_desc_num, 1);
+ ena_trigger_reset(ctx->adapter,
+ ENA_REGS_RESET_TOO_MANY_RX_DESCS);
+ return (rc);
+ }
+ if (unlikely(ena_rx_ctx.descs == 0))
+ return (ENA_NETMAP_NO_MORE_FRAMES);
+
+ ena_trace(ENA_NETMAP | ENA_DBG, "Rx: q %d got packet from ena. descs #:"
+ " %d l3 proto %d l4 proto %d hash: %x\n", ctx->ring->qid,
+ ena_rx_ctx.descs, ena_rx_ctx.l3_proto, ena_rx_ctx.l4_proto,
+ ena_rx_ctx.hash);
+
+ for (buf = 0; buf < ena_rx_ctx.descs; buf++)
+ if ((rc = ena_netmap_rx_load_desc(ctx, buf, &len)) != 0)
+ break;
+ /*
+ * ena_netmap_rx_load_desc doesn't know the number of descriptors.
+ * It just set flag NS_MOREFRAG to all slots, then here flag of
+ * last slot is cleared.
+ */
+ ctx->slots[nm_prev(ctx->nm_i, ctx->lim)].flags = NS_BUF_CHANGED;
+
+ if (rc != 0) {
+ goto rx_clear_desc;
+ }
+
+ bus_dmamap_sync(ctx->io_cq->cdesc_addr.mem_handle.tag,
+ ctx->io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
+
+ counter_enter();
+ counter_u64_add_protected(ctx->ring->rx_stats.bytes, len);
+ counter_u64_add_protected(ctx->adapter->hw_stats.rx_bytes, len);
+ counter_u64_add_protected(ctx->ring->rx_stats.cnt, 1);
+ counter_u64_add_protected(ctx->adapter->hw_stats.rx_packets, 1);
+ counter_exit();
+
+ return (ENA_NETMAP_MORE_FRAMES);
+
+rx_clear_desc:
+ nm = ctx->nm_i;
+
+ /* Remove failed packet from ring */
+ while(buf--) {
+ ctx->slots[nm].flags = 0;
+ ctx->slots[nm].len = 0;
+ nm = nm_prev(nm, ctx->lim);
+ }
+
+ return (rc);
+}
+
+static inline int
+ena_netmap_rx_load_desc(struct ena_netmap_ctx *ctx, uint16_t buf, int *len)
+{
+ struct ena_rx_buffer *rx_info;
+ uint16_t req_id;
+ int rc;
+
+ req_id = ctx->ring->ena_bufs[buf].req_id;
+ rc = validate_rx_req_id(ctx->ring, req_id);
+ if (unlikely(rc != 0))
+ return (rc);
+
+ rx_info = &ctx->ring->rx_buffer_info[req_id];
+ bus_dmamap_sync(ctx->adapter->rx_buf_tag, rx_info->map,
+ BUS_DMASYNC_POSTREAD);
+ netmap_unload_map(ctx->na, ctx->adapter->rx_buf_tag, rx_info->map);
+
+ ENA_ASSERT(ctx->slots[ctx->nm_i].buf_idx == 0, "Rx idx is not 0.\n");
+
+ ctx->slots[ctx->nm_i].buf_idx = rx_info->netmap_buf_idx;
+ rx_info->netmap_buf_idx = 0;
+ /*
+ * Set NS_MOREFRAG to all slots.
+ * Then ena_netmap_rx_frame clears it from last one.
+ */
+ ctx->slots[ctx->nm_i].flags |= NS_MOREFRAG | NS_BUF_CHANGED;
+ ctx->slots[ctx->nm_i].len = ctx->ring->ena_bufs[buf].len;
+ *len += ctx->slots[ctx->nm_i].len;
+ ctx->ring->free_rx_ids[ctx->nt] = req_id;
+ ena_trace(ENA_DBG, "rx_info %p, buf_idx %d, paddr %jx, nm: %d\n",
+ rx_info, ctx->slots[ctx->nm_i].buf_idx,
+ (uintmax_t)rx_info->ena_buf.paddr, ctx->nm_i);
+
+ ctx->nm_i = nm_next(ctx->nm_i, ctx->lim);
+ ctx->nt = ENA_RX_RING_IDX_NEXT(ctx->nt, ctx->ring->ring_size);
+
+ return (0);
+}
+
+static inline void
+ena_netmap_rx_cleanup(struct ena_netmap_ctx *ctx)
+{
+ int refill_required;
+
+ refill_required = ctx->kring->rhead - ctx->kring->nr_hwcur;
+ if (ctx->kring->nr_hwcur != ctx->kring->nr_hwtail)
+ refill_required -= 1;
+
+ if (refill_required == 0)
+ return;
+ else if (refill_required < 0)
+ refill_required += ctx->kring->nkr_num_slots;
+
+ ena_refill_rx_bufs(ctx->ring, refill_required);
+}
+
+static inline void
+ena_netmap_fill_ctx(struct netmap_kring *kring, struct ena_netmap_ctx *ctx,
+ uint16_t ena_qid)
+{
+ ctx->kring = kring;
+ ctx->na = kring->na;
+ ctx->adapter = ctx->na->ifp->if_softc;
+ ctx->lim = kring->nkr_num_slots - 1;
+ ctx->io_cq = &ctx->adapter->ena_dev->io_cq_queues[ena_qid];
+ ctx->io_sq = &ctx->adapter->ena_dev->io_sq_queues[ena_qid];
+ ctx->slots = kring->ring->slot;
+}
+
+void
+ena_netmap_unload(struct ena_adapter *adapter, bus_dmamap_t map)
+{
+ struct netmap_adapter *na = NA(adapter->ifp);
+
+ netmap_unload_map(na, adapter->tx_buf_tag, map);
+}
+
+#endif /* DEV_NETMAP */
diff --git a/sys/dev/ena/ena_netmap.h b/sys/dev/ena/ena_netmap.h
new file mode 100644
index 000000000000..7f13da21e2b4
--- /dev/null
+++ b/sys/dev/ena/ena_netmap.h
@@ -0,0 +1,60 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef _ENA_NETMAP_H_
+#define _ENA_NETMAP_H_
+
+/* Undef (un)likely as they are defined in netmap_kern.h */
+#ifdef likely
+#undef likely
+#endif /* likely */
+#ifdef unlikely
+#undef unlikely
+#endif /* unlikely */
+
+#include <net/netmap.h>
+#include <sys/selinfo.h>
+#include <dev/netmap/netmap_kern.h>
+
+int ena_netmap_attach(struct ena_adapter *adapter);
+int ena_netmap_alloc_rx_slot(struct ena_adapter *adapter,
+ struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info);
+void ena_netmap_free_rx_slot(struct ena_adapter *adapter,
+ struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info);
+bool ena_rx_ring_in_netmap(struct ena_adapter *adapter, int qid);
+bool ena_tx_ring_in_netmap(struct ena_adapter *adapter, int qid);
+void ena_netmap_reset_rx_ring(struct ena_adapter *adapter, int qid);
+void ena_netmap_reset_tx_ring(struct ena_adapter *adapter, int qid);
+void ena_netmap_unload(struct ena_adapter *adapter, bus_dmamap_t map);
+
+#endif /* _ENA_NETMAP_H_ */
diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c
index 81331a79ecf5..55d9c96da4c8 100644
--- a/sys/dev/ena/ena_sysctl.c
+++ b/sys/dev/ena/ena_sysctl.c
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,12 +34,41 @@ __FBSDID("$FreeBSD$");
static void ena_sysctl_add_wd(struct ena_adapter *);
static void ena_sysctl_add_stats(struct ena_adapter *);
+static void ena_sysctl_add_tuneables(struct ena_adapter *);
+static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
+static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
+static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
+
+static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD, 0, "ENA driver parameters");
+
+/*
+ * Logging level for changing verbosity of the output
+ */
+int ena_log_level = ENA_ALERT | ENA_WARNING;
+SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
+ &ena_log_level, 0, "Logging level indicating verbosity of the logs");
+
+SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
+ DRV_MODULE_VERSION, "ENA driver version");
+
+/*
+ * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
+ * Using 9k mbufs in low memory conditions might cause allocation to take a lot
+ * of time and lead to the OS instability as it needs to look for the contiguous
+ * pages.
+ * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
+ * the network performance is the priority, the 9k mbufs can be used.
+ */
+int ena_enable_9k_mbufs = 0;
+SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
+ &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
void
ena_sysctl_add_nodes(struct ena_adapter *adapter)
{
ena_sysctl_add_wd(adapter);
ena_sysctl_add_stats(adapter);
+ ena_sysctl_add_tuneables(adapter);
}
static void
@@ -132,7 +161,7 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
CTLFLAG_RD, &dev_stats->admin_q_pause,
"Admin queue pauses");
- for (i = 0; i < adapter->num_queues; ++i, ++tx_ring, ++rx_ring) {
+ for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
@@ -176,6 +205,16 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
"mbuf_collapse_err", CTLFLAG_RD,
&tx_stats->collapse_err,
"Mbuf collapse failures");
+ SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
+ "queue_wakeups", CTLFLAG_RD,
+ &tx_stats->queue_wakeup, "Queue wakeups");
+ SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
+ "queue_stops", CTLFLAG_RD,
+ &tx_stats->queue_stop, "Queue stops");
+ SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
+ "llq_buffer_copy", CTLFLAG_RD,
+ &tx_stats->llq_buffer_copy,
+ "Header copies for llq transaction");
/* RX specific stats */
rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
@@ -231,21 +270,193 @@ ena_sysctl_add_stats(struct ena_adapter *adapter)
&hw_stats->tx_bytes, "Bytes transmitted");
SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
&hw_stats->rx_drops, "Receive packet drops");
+ SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
+ &hw_stats->tx_drops, "Transmit packet drops");
/* ENA Admin queue stats */
admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
CTLFLAG_RD, NULL, "ENA Admin Queue statistics");
admin_list = SYSCTL_CHILDREN(admin_node);
- SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
+ SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
&admin_stats->aborted_cmd, 0, "Aborted commands");
- SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
+ SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
&admin_stats->submitted_cmd, 0, "Submitted commands");
- SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
+ SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
&admin_stats->completed_cmd, 0, "Completed commands");
- SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
+ SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
&admin_stats->out_of_space, 0, "Queue out of space");
- SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
+ SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
&admin_stats->no_completion, 0, "Commands not completed");
}
+static void
+ena_sysctl_add_tuneables(struct ena_adapter *adapter)
+{
+ device_t dev;
+
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *tree;
+ struct sysctl_oid_list *child;
+
+ dev = adapter->pdev;
+
+ ctx = device_get_sysctl_ctx(dev);
+ tree = device_get_sysctl_tree(dev);
+ child = SYSCTL_CHILDREN(tree);
+
+ /* Tuneable number of buffers in the buf-ring (drbr) */
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
+ CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+ ena_sysctl_buf_ring_size, "I",
+ "Size of the Tx buffer ring (drbr).");
+
+ /* Tuneable number of the Rx ring size */
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
+ CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+ ena_sysctl_rx_queue_size, "I",
+ "Size of the Rx ring. The size should be a power of 2.");
+
+ /* Tuneable number of IO queues */
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
+ CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
+ ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
+}
+
+
+static int
+ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
+{
+ struct ena_adapter *adapter = arg1;
+ uint32_t val;
+ int error;
+
+ val = 0;
+ error = sysctl_wire_old_buffer(req, sizeof(val));
+ if (error == 0) {
+ val = adapter->buf_ring_size;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ }
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ if (!powerof2(val) || val == 0) {
+ device_printf(adapter->pdev,
+ "Requested new Tx buffer ring size (%u) is not a power of 2\n",
+ val);
+ return (EINVAL);
+ }
+
+ if (val != adapter->buf_ring_size) {
+ device_printf(adapter->pdev,
+ "Requested new Tx buffer ring size: %d. Old size: %d\n",
+ val, adapter->buf_ring_size);
+
+ error = ena_update_buf_ring_size(adapter, val);
+ } else {
+ device_printf(adapter->pdev,
+ "New Tx buffer ring size is the same as already used: %u\n",
+ adapter->buf_ring_size);
+ }
+
+ return (error);
+}
+
+static int
+ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
+{
+ struct ena_adapter *adapter = arg1;
+ uint32_t val;
+ int error;
+
+ val = 0;
+ error = sysctl_wire_old_buffer(req, sizeof(val));
+ if (error == 0) {
+ val = adapter->requested_rx_ring_size;
+ error = sysctl_handle_32(oidp, &val, 0, req);
+ }
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
+ device_printf(adapter->pdev,
+ "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
+ val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
+ return (EINVAL);
+ }
+
+ /* Check if the parameter is power of 2 */
+ if (!powerof2(val)) {
+ device_printf(adapter->pdev,
+ "Requested new Rx queue size (%u) is not a power of 2\n",
+ val);
+ return (EINVAL);
+ }
+
+ if (val != adapter->requested_rx_ring_size) {
+ device_printf(adapter->pdev,
+ "Requested new Rx queue size: %u. Old size: %u\n",
+ val, adapter->requested_rx_ring_size);
+
+ error = ena_update_queue_size(adapter,
+ adapter->requested_tx_ring_size, val);
+ } else {
+ device_printf(adapter->pdev,
+ "New Rx queue size is the same as already used: %u\n",
+ adapter->requested_rx_ring_size);
+ }
+
+ return (error);
+}
+
+/*
+ * Change number of effectively used IO queues adapter->num_io_queues
+ */
+static int
+ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
+{
+ struct ena_adapter *adapter = arg1;
+ uint32_t tmp = 0;
+ int error;
+
+ error = sysctl_wire_old_buffer(req, sizeof(tmp));
+ if (error == 0) {
+ tmp = adapter->num_io_queues;
+ error = sysctl_handle_int(oidp, &tmp, 0, req);
+ }
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ if (tmp == 0) {
+ device_printf(adapter->pdev,
+ "Requested number of IO queues is zero\n");
+ return (EINVAL);
+ }
+
+ /*
+ * The adapter::max_num_io_queues is the HW capability. The system
+ * resources availability may potentially be a tighter limit. Therefore
+ * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
+ * always holds true, while the `adapter::msix_vecs` is variable across
+ * device reset (`ena_destroy_device()` + `ena_restore_device()`).
+ */
+ if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
+ device_printf(adapter->pdev,
+ "Requested number of IO queues is higher than maximum "
+ "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
+ return (EINVAL);
+ }
+ if (tmp == adapter->num_io_queues) {
+ device_printf(adapter->pdev,
+ "Requested number of IO queues is equal to current value "
+ "(%u)\n", adapter->num_io_queues);
+ } else {
+ device_printf(adapter->pdev,
+ "Requested new number of IO queues: %u, current value: "
+ "%u\n", tmp, adapter->num_io_queues);
+
+ error = ena_update_io_queue_nb(adapter, tmp);
+ }
+
+ return (error);
+}
diff --git a/sys/dev/ena/ena_sysctl.h b/sys/dev/ena/ena_sysctl.h
index b2c5ccca38c0..6041969e57e0 100644
--- a/sys/dev/ena/ena_sysctl.h
+++ b/sys/dev/ena/ena_sysctl.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,6 +39,9 @@
#include "ena.h"
-void ena_sysctl_add_nodes(struct ena_adapter *);
+void ena_sysctl_add_nodes(struct ena_adapter *adapter);
+
+extern int ena_enable_9k_mbufs;
+#define ena_mbuf_sz (ena_enable_9k_mbufs ? MJUM9BYTES : MJUMPAGESIZE)
#endif /* !(ENA_SYSCTL_H) */
diff --git a/sys/modules/ena/Makefile b/sys/modules/ena/Makefile
index fc70205ae956..72d0028367e8 100644
--- a/sys/modules/ena/Makefile
+++ b/sys/modules/ena/Makefile
@@ -1,7 +1,7 @@
#
# BSD LICENSE
#
-# Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+# Copyright (c) 2015-2019 Amazon.com, Inc. or its affiliates.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,8 @@
${SRCTOP}/sys/contrib/ena-com
KMOD = if_ena
-SRCS = ena.c ena_com.c ena_eth_com.c ena_sysctl.c
+SRCS = ena_com.c ena_eth_com.c
+SRCS += ena.c ena_sysctl.c ena_datapath.c ena_netmap.c
SRCS += device_if.h bus_if.h pci_if.h
CFLAGS += -I${SRCTOP}/sys/contrib