aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--share/man/man4/cxgbe.478
-rw-r--r--sys/dev/cxgbe/adapter.h231
-rw-r--r--sys/dev/cxgbe/common/common.h35
-rw-r--r--sys/dev/cxgbe/common/t4_hw.c365
-rw-r--r--sys/dev/cxgbe/common/t4_hw.h78
-rw-r--r--sys/dev/cxgbe/firmware/t4fw_cfg.txt132
-rw-r--r--sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt503
-rw-r--r--sys/dev/cxgbe/firmware/t4fw_interface.h (renamed from sys/dev/cxgbe/common/t4fw_interface.h)640
-rw-r--r--sys/dev/cxgbe/offload.h71
-rw-r--r--sys/dev/cxgbe/osdep.h1
-rw-r--r--sys/dev/cxgbe/t4_ioctl.h27
-rw-r--r--sys/dev/cxgbe/t4_l2t.c675
-rw-r--r--sys/dev/cxgbe/t4_l2t.h12
-rw-r--r--sys/dev/cxgbe/t4_main.c3023
-rw-r--r--sys/dev/cxgbe/t4_sge.c2191
-rw-r--r--sys/modules/cxgbe/Makefile1
-rw-r--r--sys/modules/cxgbe/firmware/Makefile27
-rw-r--r--tools/tools/cxgbetool/cxgbetool.c64
18 files changed, 6282 insertions, 1872 deletions
diff --git a/share/man/man4/cxgbe.4 b/share/man/man4/cxgbe.4
index ab2979a18b2b..4bf5ac3ec462 100644
--- a/share/man/man4/cxgbe.4
+++ b/share/man/man4/cxgbe.4
@@ -99,18 +99,29 @@ Tunables can be set at the
prompt before booting the kernel or stored in
.Xr loader.conf 5 .
.Bl -tag -width indent
-.It Va hw.cxgbe.max_ntxq_10G_port
-The maximum number of tx queues to use for a 10Gb port.
-The default value is 8.
-.It Va hw.cxgbe.max_nrxq_10G_port
-The maximum number of rx queues to use for a 10Gb port.
-The default value is 8.
-.It Va hw.cxgbe.max_ntxq_1G_port
-The maximum number of tx queues to use for a 1Gb port.
-The default value is 2.
-.It Va hw.cxgbe.max_nrxq_1G_port
-The maximum number of rx queues to use for a 1Gb port.
-The default value is 2.
+.It Va hw.cxgbe.ntxq10g
+The number of tx queues to use for a 10Gb port. The default is 16 or the number
+of CPU cores in the system, whichever is less.
+.It Va hw.cxgbe.nrxq10g
+The number of rx queues to use for a 10Gb port. The default is 8 or the number
+of CPU cores in the system, whichever is less.
+.It Va hw.cxgbe.ntxq1g
+The number of tx queues to use for a 1Gb port. The default is 4 or the number
+of CPU cores in the system, whichever is less.
+.It Va hw.cxgbe.nrxq1g
+The number of rx queues to use for a 1Gb port. The default is 2 or the number
+of CPU cores in the system, whichever is less.
+.It Va hw.cxgbe.nofldtxq10g
+The number of TOE tx queues to use for a 10Gb port. The default is 8 or the
+number of CPU cores in the system, whichever is less.
+.It Va hw.cxgbe.nofldrxq10g
+The number of TOE rx queues to use for a 10Gb port. The default is 2 or the
+number of CPU cores in the system, whichever is less.
+.It Va hw.cxgbe.nofldtxq1g
+The number of TOE tx queues to use for a 1Gb port. The default is 2 or the
+number of CPU cores in the system, whichever is less.
+.It Va hw.cxgbe.nofldrxq1g
+The number of TOE rx queues to use for a 1Gb port. The default is 1.
.It Va hw.cxgbe.holdoff_timer_idx_10G
.It Va hw.cxgbe.holdoff_timer_idx_1G
The timer index value to use to delay interrupts.
@@ -119,6 +130,8 @@ by default (all values are in microseconds) and the index selects a
value from this list.
The default value is 1 for both 10Gb and 1Gb ports, which means the
timer value is 5us.
+Different cxgbe interfaces can be assigned different values at any time via the
+dev.cxgbe.X.holdoff_tmr_idx sysctl.
.It Va hw.cxgbe.holdoff_pktc_idx_10G
.It Va hw.cxgbe.holdoff_pktc_idx_1G
The packet-count index value to use to delay interrupts.
@@ -127,6 +140,11 @@ and the index selects a value from this list.
The default value is 2 for both 10Gb and 1Gb ports, which means 16
packets (or the holdoff timer going off) before an interrupt is
generated.
+-1 disables packet counting.
+Different cxgbe interfaces can be assigned different values via the
+dev.cxgbe.X.holdoff_pktc_idx sysctl.
+This sysctl works only when the interface has never been marked up (as done by
+ifconfig up).
.It Va hw.cxgbe.qsize_txq
The size, in number of entries, of the descriptor ring used for a tx
queue.
@@ -134,10 +152,46 @@ A buf_ring of the same size is also allocated for additional
software queuing. See
.Xr ifnet 9 .
The default value is 1024.
+Different cxgbe interfaces can be assigned different values via the
+dev.cxgbe.X.qsize_txq sysctl.
+This sysctl works only when the interface has never been marked up (as done by
+ifconfig up).
.It Va hw.cxgbe.qsize_rxq
The size, in number of entries, of the descriptor ring used for an
rx queue.
The default value is 1024.
+Different cxgbe interfaces can be assigned different values via the
+dev.cxgbe.X.qsize_rxq sysctl.
+This sysctl works only when the interface has never been marked up (as done by
+ifconfig up).
+.It Va hw.cxgbe.interrupt_types
+The interrupt types that the driver is allowed to use.
+Bit 0 represents INTx (line interrupts), bit 1 MSI, bit 2 MSI-X.
+The default is 7 (all allowed).
+The driver will select the best possible type out of the allowed types by
+itself.
+.It Va hw.cxgbe.config_file
+Select a pre-packaged device configuration file.
+A configuration file contains a recipe for partitioning and configuring the
+hardware resources on the card.
+This tunable is for specialized applications only and should not be used in
+normal operation.
+The configuration profile currently in use is available in the dev.t4nex.X.cf
+and dev.t4nex.X.cfcsum sysctls.
+.It Va hw.cxgbe.linkcaps_allowed
+.It Va hw.cxgbe.niccaps_allowed
+.It Va hw.cxgbe.toecaps_allowed
+.It Va hw.cxgbe.rdmacaps_allowed
+.It Va hw.cxgbe.iscsicaps_allowed
+.It Va hw.cxgbe.fcoecaps_allowed
+Disallowing capabilities provides a hint to the driver and firmware to not
+reserve hardware resources for that feature.
+Each of these is a bit field with a bit for each sub-capability within the
+capability.
+This tunable is for specialized applications only and should not be used in
+normal operation.
+The capabilities for which hardware resources have been reserved are listed in
+dev.t4nex.X.*caps sysctls.
.El
.Sh SUPPORT
For general information and support,
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 8624fc19dfa0..7e56932c2a72 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -31,6 +31,7 @@
#ifndef __T4_ADAPTER_H__
#define __T4_ADAPTER_H__
+#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/rman.h>
#include <sys/types.h>
@@ -46,8 +47,9 @@
#include <netinet/tcp_lro.h>
#include "offload.h"
-#include "common/t4fw_interface.h"
+#include "firmware/t4fw_interface.h"
+#define T4_CFGNAME "t4fw_cfg"
#define T4_FWNAME "t4fw"
MALLOC_DECLARE(M_CXGBE);
@@ -110,25 +112,21 @@ enum {
FW_IQ_QSIZE = 256,
FW_IQ_ESIZE = 64, /* At least 64 mandated by the firmware spec */
- INTR_IQ_QSIZE = 64,
- INTR_IQ_ESIZE = 64, /* Handles some CPLs too, do not reduce */
-
- CTRL_EQ_QSIZE = 128,
- CTRL_EQ_ESIZE = 64,
-
RX_IQ_QSIZE = 1024,
RX_IQ_ESIZE = 64, /* At least 64 so CPL_RX_PKT will fit */
- RX_FL_ESIZE = 64, /* 8 64bit addresses */
+ EQ_ESIZE = 64, /* All egress queues use this entry size */
+ RX_FL_ESIZE = EQ_ESIZE, /* 8 64bit addresses */
#if MJUMPAGESIZE != MCLBYTES
FL_BUF_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */
#else
FL_BUF_SIZES = 3, /* cluster, jumbo9k, jumbo16k */
#endif
+ CTRL_EQ_QSIZE = 128,
+
TX_EQ_QSIZE = 1024,
- TX_EQ_ESIZE = 64,
TX_SGL_SEGS = 36,
TX_WR_FLITS = SGE_MAX_WR_LEN / 8
};
@@ -144,13 +142,16 @@ enum {
/* adapter flags */
FULL_INIT_DONE = (1 << 0),
FW_OK = (1 << 1),
- INTR_SHARED = (1 << 2), /* one set of intrq's for all ports */
+ INTR_DIRECT = (1 << 2), /* direct interrupts for everything */
+ MASTER_PF = (1 << 3),
+ ADAP_SYSCTL_CTX = (1 << 4),
CXGBE_BUSY = (1 << 9),
/* port flags */
DOOMED = (1 << 0),
- VI_ENABLED = (1 << 1),
+ PORT_INIT_DONE = (1 << 1),
+ PORT_SYSCTL_CTX = (1 << 2),
};
#define IS_DOOMED(pi) (pi->flags & DOOMED)
@@ -186,6 +187,12 @@ struct port_info {
int first_txq; /* index of first tx queue */
int nrxq; /* # of rx queues */
int first_rxq; /* index of first rx queue */
+#ifndef TCP_OFFLOAD_DISABLE
+ int nofldtxq; /* # of offload tx queues */
+ int first_ofld_txq; /* index of first offload tx queue */
+ int nofldrxq; /* # of offload rx queues */
+ int first_ofld_rxq; /* index of first offload rx queue */
+#endif
int tmr_idx;
int pktc_idx;
int qsize_rxq;
@@ -194,11 +201,8 @@ struct port_info {
struct link_config link_cfg;
struct port_stats stats;
- struct taskqueue *tq;
struct callout tick;
- struct sysctl_ctx_list ctx; /* lives from ifconfig up to down */
- struct sysctl_oid *oid_rxq;
- struct sysctl_oid *oid_txq;
+ struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */
uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */
};
@@ -222,17 +226,26 @@ struct tx_map {
bus_dmamap_t map;
};
+/* DMA maps used for tx */
+struct tx_maps {
+ struct tx_map *maps;
+ uint32_t map_total; /* # of DMA maps */
+ uint32_t map_pidx; /* next map to be used */
+ uint32_t map_cidx; /* reclaimed up to this index */
+ uint32_t map_avail; /* # of available maps */
+};
+
struct tx_sdesc {
uint8_t desc_used; /* # of hardware descriptors used by the WR */
uint8_t credits; /* NIC txq: # of frames sent out in the WR */
};
-typedef void (iq_intr_handler_t)(void *);
-
enum {
/* iq flags */
- IQ_ALLOCATED = (1 << 1), /* firmware resources allocated */
- IQ_STARTED = (1 << 2), /* started */
+ IQ_ALLOCATED = (1 << 0), /* firmware resources allocated */
+ IQ_HAS_FL = (1 << 1), /* iq associated with a freelist */
+ IQ_INTR = (1 << 2), /* iq takes direct interrupt */
+ IQ_LRO_ENABLED = (1 << 3), /* iq is an eth rxq with LRO enabled */
/* iq state */
IQS_DISABLED = 0,
@@ -252,26 +265,35 @@ struct sge_iq {
uint16_t abs_id; /* absolute SGE id for the iq */
int8_t intr_pktc_idx; /* packet count threshold index */
int8_t pad0;
- iq_intr_handler_t *handler;
__be64 *desc; /* KVA of descriptor ring */
- volatile uint32_t state;
+ volatile int state;
struct adapter *adapter;
const __be64 *cdesc; /* current descriptor */
uint8_t gen; /* generation bit */
uint8_t intr_params; /* interrupt holdoff parameters */
- uint8_t intr_next; /* holdoff for next interrupt */
+ uint8_t intr_next; /* XXX: holdoff for next interrupt */
uint8_t esize; /* size (bytes) of each entry in the queue */
uint16_t qsize; /* size (# of entries) of the queue */
uint16_t cidx; /* consumer index */
- uint16_t cntxt_id; /* SGE context id for the iq */
+ uint16_t cntxt_id; /* SGE context id for the iq */
+
+ STAILQ_ENTRY(sge_iq) link;
};
enum {
+ EQ_CTRL = 1,
+ EQ_ETH = 2,
+#ifndef TCP_OFFLOAD_DISABLE
+ EQ_OFLD = 3,
+#endif
+
/* eq flags */
- EQ_ALLOCATED = (1 << 1), /* firmware resources allocated */
- EQ_STARTED = (1 << 2), /* started */
- EQ_CRFLUSHED = (1 << 3), /* expecting an update from SGE */
+ EQ_TYPEMASK = 7, /* 3 lsbits hold the type */
+ EQ_ALLOCATED = (1 << 3), /* firmware resources allocated */
+ EQ_DOOMED = (1 << 4), /* about to be destroyed */
+ EQ_CRFLUSHED = (1 << 5), /* expecting an update from SGE */
+ EQ_STALLED = (1 << 6), /* out of hw descriptors or dmamaps */
};
/*
@@ -281,10 +303,11 @@ enum {
* consumes them) but it's special enough to have its own struct (see sge_fl).
*/
struct sge_eq {
+ unsigned int flags; /* MUST be first */
+ unsigned int cntxt_id; /* SGE context id for the eq */
bus_dma_tag_t desc_tag;
bus_dmamap_t desc_map;
char lockname[16];
- unsigned int flags;
struct mtx eq_lock;
struct tx_desc *desc; /* KVA of descriptor ring */
@@ -297,9 +320,24 @@ struct sge_eq {
uint16_t pidx; /* producer idx (desc idx) */
uint16_t pending; /* # of descriptors used since last doorbell */
uint16_t iqid; /* iq that gets egr_update for the eq */
- unsigned int cntxt_id; /* SGE context id for the eq */
+ uint8_t tx_chan; /* tx channel used by the eq */
+ struct task tx_task;
+ struct callout tx_callout;
+
+ /* stats */
+
+ uint32_t egr_update; /* # of SGE_EGR_UPDATE notifications for eq */
+ uint32_t unstalled; /* recovered from stall */
+};
+
+enum {
+ FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */
+ FL_DOOMED = (1 << 1), /* about to be destroyed */
};
+#define FL_RUNNING_LOW(fl) (fl->cap - fl->needed <= fl->lowat)
+#define FL_NOT_RUNNING_LOW(fl) (fl->cap - fl->needed >= 2 * fl->lowat)
+
struct sge_fl {
bus_dma_tag_t desc_tag;
bus_dmamap_t desc_map;
@@ -307,6 +345,7 @@ struct sge_fl {
uint8_t tag_idx;
struct mtx fl_lock;
char lockname[16];
+ int flags;
__be64 *desc; /* KVA of descriptor ring, ptr to addresses */
bus_addr_t ba; /* bus address of descriptor ring */
@@ -317,8 +356,10 @@ struct sge_fl {
uint32_t cidx; /* consumer idx (buffer idx, NOT hw desc idx) */
uint32_t pidx; /* producer idx (buffer idx, NOT hw desc idx) */
uint32_t needed; /* # of buffers needed to fill up fl. */
+ uint32_t lowat; /* # of buffers <= this means fl needs help */
uint32_t pending; /* # of bufs allocated since last doorbell */
unsigned int dmamap_failed;
+ TAILQ_ENTRY(sge_fl) link; /* All starving freelists */
};
/* txq: SGE egress queue + what's needed for Ethernet NIC */
@@ -330,14 +371,8 @@ struct sge_txq {
struct buf_ring *br; /* tx buffer ring */
struct tx_sdesc *sdesc; /* KVA of software descriptor ring */
struct mbuf *m; /* held up due to temporary resource shortage */
- struct task resume_tx;
- /* DMA maps used for tx */
- struct tx_map *maps;
- uint32_t map_total; /* # of DMA maps */
- uint32_t map_pidx; /* next map to be used */
- uint32_t map_cidx; /* reclaimed up to this index */
- uint32_t map_avail; /* # of available maps */
+ struct tx_maps txmaps;
/* stats for common events first */
@@ -354,20 +389,14 @@ struct sge_txq {
uint32_t no_dmamap; /* no DMA map to load the mbuf */
uint32_t no_desc; /* out of hardware descriptors */
- uint32_t egr_update; /* # of SGE_EGR_UPDATE notifications for txq */
} __aligned(CACHE_LINE_SIZE);
-enum {
- RXQ_LRO_ENABLED = (1 << 0)
-};
-
/* rxq: SGE ingress queue + SGE free list + miscellaneous items */
struct sge_rxq {
struct sge_iq iq; /* MUST be first */
- struct sge_fl fl;
+ struct sge_fl fl; /* MUST follow iq */
struct ifnet *ifp; /* the interface this rxq belongs to */
- unsigned int flags;
#ifdef INET
struct lro_ctrl lro; /* LRO state */
#endif
@@ -381,12 +410,28 @@ struct sge_rxq {
} __aligned(CACHE_LINE_SIZE);
-/* ctrlq: SGE egress queue + stats for control queue */
-struct sge_ctrlq {
+#ifndef TCP_OFFLOAD_DISABLE
+/* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */
+struct sge_ofld_rxq {
+ struct sge_iq iq; /* MUST be first */
+ struct sge_fl fl; /* MUST follow iq */
+} __aligned(CACHE_LINE_SIZE);
+#endif
+
+/*
+ * wrq: SGE egress queue that is given prebuilt work requests. Both the control
+ * and offload tx queues are of this type.
+ */
+struct sge_wrq {
struct sge_eq eq; /* MUST be first */
+ struct adapter *adapter;
+ struct mbuf *head; /* held up due to lack of descriptors */
+ struct mbuf *tail; /* valid only if head is valid */
+
/* stats for common events first */
+ uint64_t tx_wrs; /* # of tx work requests */
/* stats for not-that-common events */
@@ -394,20 +439,28 @@ struct sge_ctrlq {
} __aligned(CACHE_LINE_SIZE);
struct sge {
- uint16_t timer_val[SGE_NTIMERS];
- uint8_t counter_val[SGE_NCOUNTERS];
+ int timer_val[SGE_NTIMERS];
+ int counter_val[SGE_NCOUNTERS];
int fl_starve_threshold;
- int nrxq; /* total rx queues (all ports and the rest) */
- int ntxq; /* total tx queues (all ports and the rest) */
- int niq; /* total ingress queues */
- int neq; /* total egress queues */
+ int nrxq; /* total # of Ethernet rx queues */
+ int ntxq; /* total # of Ethernet tx tx queues */
+#ifndef TCP_OFFLOAD_DISABLE
+ int nofldrxq; /* total # of TOE rx queues */
+ int nofldtxq; /* total # of TOE tx queues */
+#endif
+ int niq; /* total # of ingress queues */
+ int neq; /* total # of egress queues */
struct sge_iq fwq; /* Firmware event queue */
- struct sge_ctrlq *ctrlq;/* Control queues */
- struct sge_iq *intrq; /* Interrupt queues */
+ struct sge_wrq mgmtq; /* Management queue (control queue) */
+ struct sge_wrq *ctrlq; /* Control queues */
struct sge_txq *txq; /* NIC tx queues */
struct sge_rxq *rxq; /* NIC rx queues */
+#ifndef TCP_OFFLOAD_DISABLE
+ struct sge_wrq *ofld_txq; /* TOE tx queues */
+ struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */
+#endif
uint16_t iq_start;
int eq_start;
@@ -415,7 +468,12 @@ struct sge {
struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */
};
+struct rss_header;
+typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *,
+ struct mbuf *);
+
struct adapter {
+ SLIST_ENTRY(adapter) link;
device_t dev;
struct cdev *cdev;
@@ -444,27 +502,47 @@ struct adapter {
struct sge sge;
+ struct taskqueue *tq[NCHAN]; /* taskqueues that flush data out */
struct port_info *port[MAX_NPORTS];
uint8_t chan_map[NCHAN];
+ uint32_t filter_mode;
+#ifndef TCP_OFFLOAD_DISABLE
+ struct uld_softc tom;
+ struct tom_tunables tt;
+#endif
struct l2t_data *l2t; /* L2 table */
struct tid_info tids;
- int registered_device_map;
int open_device_map;
+#ifndef TCP_OFFLOAD_DISABLE
+ int offload_map;
+#endif
int flags;
char fw_version[32];
+ unsigned int cfcsum;
struct adapter_params params;
struct t4_virt_res vres;
- struct sysctl_ctx_list ctx; /* from first_port_up to last_port_down */
- struct sysctl_oid *oid_fwq;
- struct sysctl_oid *oid_ctrlq;
- struct sysctl_oid *oid_intrq;
+ uint16_t linkcaps;
+ uint16_t niccaps;
+ uint16_t toecaps;
+ uint16_t rdmacaps;
+ uint16_t iscsicaps;
+ uint16_t fcoecaps;
+
+ struct sysctl_ctx_list ctx; /* from adapter_full_init to full_uninit */
struct mtx sc_lock;
char lockname[16];
+
+ /* Starving free lists */
+ struct mtx sfl_lock; /* same cache-line as sc_lock? but that's ok */
+ TAILQ_HEAD(, sge_fl) sfl;
+ struct callout sfl_callout;
+
+ cpl_handler_t cpl_handler[256] __aligned(CACHE_LINE_SIZE);
};
#define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock)
@@ -506,11 +584,15 @@ struct adapter {
#define for_each_rxq(pi, iter, rxq) \
rxq = &pi->adapter->sge.rxq[pi->first_rxq]; \
for (iter = 0; iter < pi->nrxq; ++iter, ++rxq)
+#define for_each_ofld_txq(pi, iter, ofld_txq) \
+ ofld_txq = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq]; \
+ for (iter = 0; iter < pi->nofldtxq; ++iter, ++ofld_txq)
+#define for_each_ofld_rxq(pi, iter, ofld_rxq) \
+ ofld_rxq = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq]; \
+ for (iter = 0; iter < pi->nofldrxq; ++iter, ++ofld_rxq)
/* One for errors, one for firmware events */
#define T4_EXTRA_INTR 2
-#define NINTRQ(sc) ((sc)->intr_count > T4_EXTRA_INTR ? \
- (sc)->intr_count - T4_EXTRA_INTR : 1)
static inline uint32_t
t4_read_reg(struct adapter *sc, uint32_t reg)
@@ -589,29 +671,52 @@ static inline bool is_10G_port(const struct port_info *pi)
return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0);
}
+static inline int tx_resume_threshold(struct sge_eq *eq)
+{
+ return (eq->qsize / 4);
+}
+
/* t4_main.c */
-void cxgbe_txq_start(void *, int);
+void t4_tx_task(void *, int);
+void t4_tx_callout(void *);
int t4_os_find_pci_capability(struct adapter *, int);
int t4_os_pci_save_state(struct adapter *);
int t4_os_pci_restore_state(struct adapter *);
void t4_os_portmod_changed(const struct adapter *, int);
void t4_os_link_changed(struct adapter *, int, int);
+void t4_iterate(void (*)(struct adapter *, void *), void *);
+int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t);
/* t4_sge.c */
void t4_sge_modload(void);
-void t4_sge_init(struct adapter *);
+int t4_sge_init(struct adapter *);
int t4_create_dma_tag(struct adapter *);
int t4_destroy_dma_tag(struct adapter *);
int t4_setup_adapter_queues(struct adapter *);
int t4_teardown_adapter_queues(struct adapter *);
-int t4_setup_eth_queues(struct port_info *);
-int t4_teardown_eth_queues(struct port_info *);
+int t4_setup_port_queues(struct port_info *);
+int t4_teardown_port_queues(struct port_info *);
+int t4_alloc_tx_maps(struct tx_maps *, bus_dma_tag_t, int, int);
+void t4_free_tx_maps(struct tx_maps *, bus_dma_tag_t);
void t4_intr_all(void *);
void t4_intr(void *);
void t4_intr_err(void *);
void t4_intr_evt(void *);
int t4_mgmt_tx(struct adapter *, struct mbuf *);
+int t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct mbuf *);
int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *);
void t4_update_fl_bufsize(struct ifnet *);
+int can_resume_tx(struct sge_eq *);
+
+static inline int t4_wrq_tx(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m)
+{
+ int rc;
+
+ TXQ_LOCK(wrq);
+ rc = t4_wrq_tx_locked(sc, wrq, m);
+ TXQ_UNLOCK(wrq);
+ return (rc);
+}
+
#endif
diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h
index 913be9bd6e27..81866df633ed 100644
--- a/sys/dev/cxgbe/common/common.h
+++ b/sys/dev/cxgbe/common/common.h
@@ -42,6 +42,15 @@ enum {
enum { MEM_EDC0, MEM_EDC1, MEM_MC };
+enum {
+ MEMWIN0_APERTURE = 2048,
+ MEMWIN0_BASE = 0x1b800,
+ MEMWIN1_APERTURE = 32768,
+ MEMWIN1_BASE = 0x28000,
+ MEMWIN2_APERTURE = 65536,
+ MEMWIN2_BASE = 0x30000,
+};
+
enum dev_master { MASTER_CANT, MASTER_MAY, MASTER_MUST };
enum dev_state { DEV_STATE_UNINIT, DEV_STATE_INIT, DEV_STATE_ERR };
@@ -53,8 +62,8 @@ enum {
};
#define FW_VERSION_MAJOR 1
-#define FW_VERSION_MINOR 3
-#define FW_VERSION_MICRO 10
+#define FW_VERSION_MINOR 4
+#define FW_VERSION_MICRO 16
struct port_stats {
u64 tx_octets; /* total # of octets in good frames */
@@ -190,7 +199,6 @@ struct tp_proxy_stats {
struct tp_cpl_stats {
u32 req[4];
u32 rsp[4];
- u32 tx_err[4];
};
struct tp_rdma_stats {
@@ -214,9 +222,9 @@ struct vpd_params {
};
struct pci_params {
- unsigned int vpd_cap_addr;
- unsigned char speed;
- unsigned char width;
+ unsigned int vpd_cap_addr;
+ unsigned short speed;
+ unsigned short width;
};
/*
@@ -239,20 +247,20 @@ struct adapter_params {
unsigned int fw_vers;
unsigned int tp_vers;
- u8 api_vers[7];
unsigned short mtus[NMTUS];
unsigned short a_wnd[NCCTRL_WIN];
unsigned short b_wnd[NCCTRL_WIN];
- unsigned int mc_size; /* MC memory size */
- unsigned int nfilters; /* size of filter region */
+ unsigned int mc_size; /* MC memory size */
+ unsigned int nfilters; /* size of filter region */
unsigned int cim_la_size;
- unsigned int nports; /* # of ethernet ports */
+ /* Used as int in sysctls, do not reduce size */
+ unsigned int nports; /* # of ethernet ports */
unsigned int portvec;
- unsigned int rev; /* chip revision */
+ unsigned int rev; /* chip revision */
unsigned int offload;
unsigned int ofldq_wr_cred;
@@ -366,6 +374,9 @@ int t4_seeprom_wp(struct adapter *adapter, int enable);
int t4_read_flash(struct adapter *adapter, unsigned int addr, unsigned int nwords,
u32 *data, int byte_oriented);
int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size);
+int t4_load_boot(struct adapter *adap, const u8 *boot_data,
+ unsigned int boot_addr, unsigned int size);
+unsigned int t4_flash_cfg_addr(struct adapter *adapter);
int t4_load_cfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size);
int t4_get_fw_version(struct adapter *adapter, u32 *vers);
int t4_get_tp_version(struct adapter *adapter, u32 *vers);
@@ -460,8 +471,8 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map,
int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
enum dev_master master, enum dev_state *state);
int t4_fw_bye(struct adapter *adap, unsigned int mbox);
-int t4_early_init(struct adapter *adap, unsigned int mbox);
int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset);
+int t4_fw_initialize(struct adapter *adap, unsigned int mbox);
int t4_query_params(struct adapter *adap, unsigned int mbox, unsigned int pf,
unsigned int vf, unsigned int nparams, const u32 *params,
u32 *val);
diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c
index dd8d0fc2705f..48462aa68664 100644
--- a/sys/dev/cxgbe/common/t4_hw.c
+++ b/sys/dev/cxgbe/common/t4_hw.c
@@ -30,10 +30,10 @@ __FBSDID("$FreeBSD$");
#include "common.h"
#include "t4_regs.h"
#include "t4_regs_values.h"
-#include "t4fw_interface.h"
+#include "firmware/t4fw_interface.h"
#undef msleep
-#define msleep(x) DELAY((x) * 1000)
+#define msleep(x) pause("t4hw", (x) * hz / 1000)
/**
* t4_wait_op_done_val - wait until an operation is completed
@@ -187,7 +187,7 @@ int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size,
* off to larger delays to a maximum retry delay.
*/
static const int delay[] = {
- 1, 1, 3, 5, 10, 10, 20, 50, 100, 200
+ 1, 1, 3, 5, 10, 10, 20, 50, 100
};
u32 v;
@@ -625,17 +625,6 @@ enum {
SF_RD_DATA_FAST = 0xb, /* read flash */
SF_RD_ID = 0x9f, /* read ID */
SF_ERASE_SECTOR = 0xd8, /* erase sector */
-
- FW_START_SEC = 8, /* first flash sector for FW */
- FW_END_SEC = 15, /* last flash sector for FW */
- FW_IMG_START = FW_START_SEC * SF_SEC_SIZE,
- FW_MAX_SIZE = (FW_END_SEC - FW_START_SEC + 1) * SF_SEC_SIZE,
-
- FLASH_CFG_MAX_SIZE = 0x10000 , /* max size of the flash config file */
- FLASH_CFG_OFFSET = 0x1f0000,
- FLASH_CFG_START_SEC = FLASH_CFG_OFFSET / SF_SEC_SIZE,
- FPGA_FLASH_CFG_OFFSET = 0xf0000 , /* if FPGA mode, then cfg file is at 1MB - 64KB */
- FPGA_FLASH_CFG_START_SEC = FPGA_FLASH_CFG_OFFSET / SF_SEC_SIZE,
};
/**
@@ -763,12 +752,15 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr,
* @addr: the start address to write
* @n: length of data to write in bytes
* @data: the data to write
+ * @byte_oriented: whether to store data as bytes or as words
*
* Writes up to a page of data (256 bytes) to the serial flash starting
* at the given address. All the data must be written to the same page.
+ * If @byte_oriented is set the write data is stored as byte stream
+ * (i.e. matches what on disk), otherwise in big-endian.
*/
static int t4_write_flash(struct adapter *adapter, unsigned int addr,
- unsigned int n, const u8 *data)
+ unsigned int n, const u8 *data, int byte_oriented)
{
int ret;
u32 buf[SF_PAGE_SIZE / 4];
@@ -788,6 +780,9 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
for (val = 0, i = 0; i < c; ++i)
val = (val << 8) + *data++;
+ if (!byte_oriented)
+ val = htonl(val);
+
ret = sf1_write(adapter, c, c != left, 1, val);
if (ret)
goto unlock;
@@ -799,7 +794,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
t4_write_reg(adapter, A_SF_OP, 0); /* unlock SF */
/* Read the page to verify the write succeeded */
- ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1);
+ ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf,
+ byte_oriented);
if (ret)
return ret;
@@ -825,7 +821,7 @@ unlock:
int t4_get_fw_version(struct adapter *adapter, u32 *vers)
{
return t4_read_flash(adapter,
- FW_IMG_START + offsetof(struct fw_hdr, fw_ver), 1,
+ FLASH_FW_START + offsetof(struct fw_hdr, fw_ver), 1,
vers, 0);
}
@@ -838,7 +834,7 @@ int t4_get_fw_version(struct adapter *adapter, u32 *vers)
*/
int t4_get_tp_version(struct adapter *adapter, u32 *vers)
{
- return t4_read_flash(adapter, FW_IMG_START + offsetof(struct fw_hdr,
+ return t4_read_flash(adapter, FLASH_FW_START + offsetof(struct fw_hdr,
tp_microcode_ver),
1, vers, 0);
}
@@ -854,24 +850,17 @@ int t4_get_tp_version(struct adapter *adapter, u32 *vers)
*/
int t4_check_fw_version(struct adapter *adapter)
{
- u32 api_vers[2];
int ret, major, minor, micro;
ret = t4_get_fw_version(adapter, &adapter->params.fw_vers);
if (!ret)
ret = t4_get_tp_version(adapter, &adapter->params.tp_vers);
- if (!ret)
- ret = t4_read_flash(adapter,
- FW_IMG_START + offsetof(struct fw_hdr, intfver_nic),
- 2, api_vers, 1);
if (ret)
return ret;
major = G_FW_HDR_FW_VER_MAJOR(adapter->params.fw_vers);
minor = G_FW_HDR_FW_VER_MINOR(adapter->params.fw_vers);
micro = G_FW_HDR_FW_VER_MICRO(adapter->params.fw_vers);
- memcpy(adapter->params.api_vers, api_vers,
- sizeof(adapter->params.api_vers));
if (major != FW_VERSION_MAJOR) { /* major mismatch - fail */
CH_ERR(adapter, "card FW has major version %u, driver wants "
@@ -914,6 +903,21 @@ static int t4_flash_erase_sectors(struct adapter *adapter, int start, int end)
}
/**
+ * t4_flash_cfg_addr - return the address of the flash configuration file
+ * @adapter: the adapter
+ *
+ * Return the address within the flash where the Firmware Configuration
+ * File is stored.
+ */
+unsigned int t4_flash_cfg_addr(struct adapter *adapter)
+{
+ if (adapter->params.sf_size == 0x100000)
+ return FLASH_FPGA_CFG_START;
+ else
+ return FLASH_CFG_START;
+}
+
+/**
* t4_load_cfg - download config file
* @adap: the adapter
* @cfg_data: the cfg text file to write
@@ -928,17 +932,8 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
unsigned int flash_cfg_start_sec;
unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
- if (adap->params.sf_size == 0x100000) {
- addr = FPGA_FLASH_CFG_OFFSET;
- flash_cfg_start_sec = FPGA_FLASH_CFG_START_SEC;
- } else {
- addr = FLASH_CFG_OFFSET;
- flash_cfg_start_sec = FLASH_CFG_START_SEC;
- }
- if (!size) {
- CH_ERR(adap, "cfg file has no data\n");
- return -EINVAL;
- }
+ addr = t4_flash_cfg_addr(adap);
+ flash_cfg_start_sec = addr / SF_SEC_SIZE;
if (size > FLASH_CFG_MAX_SIZE) {
CH_ERR(adap, "cfg file too large, max is %u bytes\n",
@@ -950,7 +945,11 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
sf_sec_size);
ret = t4_flash_erase_sectors(adap, flash_cfg_start_sec,
flash_cfg_start_sec + i - 1);
- if (ret)
+ /*
+ * If size == 0 then we're simply erasing the FLASH sectors associated
+ * with the on-adapter Firmware Configuration File.
+ */
+ if (ret || size == 0)
goto out;
/* this will write to the flash up to SF_PAGE_SIZE at a time */
@@ -959,7 +958,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
n = size - i;
else
n = SF_PAGE_SIZE;
- ret = t4_write_flash(adap, addr, n, cfg_data);
+ ret = t4_write_flash(adap, addr, n, cfg_data, 1);
if (ret)
goto out;
@@ -969,7 +968,8 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
out:
if (ret)
- CH_ERR(adap, "config file download failed %d\n", ret);
+ CH_ERR(adap, "config file %s failed %d\n",
+ (size == 0 ? "clear" : "download"), ret);
return ret;
}
@@ -1004,9 +1004,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
CH_ERR(adap, "FW image size differs from size in FW header\n");
return -EINVAL;
}
- if (size > FW_MAX_SIZE) {
+ if (size > FLASH_FW_MAX_SIZE) {
CH_ERR(adap, "FW image too large, max is %u bytes\n",
- FW_MAX_SIZE);
+ FLASH_FW_MAX_SIZE);
return -EFBIG;
}
@@ -1020,7 +1020,8 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
}
i = DIV_ROUND_UP(size, sf_sec_size); /* # of sectors spanned */
- ret = t4_flash_erase_sectors(adap, FW_START_SEC, FW_START_SEC + i - 1);
+ ret = t4_flash_erase_sectors(adap, FLASH_FW_START_SEC,
+ FLASH_FW_START_SEC + i - 1);
if (ret)
goto out;
@@ -1031,28 +1032,110 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
*/
memcpy(first_page, fw_data, SF_PAGE_SIZE);
((struct fw_hdr *)first_page)->fw_ver = htonl(0xffffffff);
- ret = t4_write_flash(adap, FW_IMG_START, SF_PAGE_SIZE, first_page);
+ ret = t4_write_flash(adap, FLASH_FW_START, SF_PAGE_SIZE, first_page, 1);
if (ret)
goto out;
- addr = FW_IMG_START;
+ addr = FLASH_FW_START;
for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
addr += SF_PAGE_SIZE;
fw_data += SF_PAGE_SIZE;
- ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data);
+ ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, 1);
if (ret)
goto out;
}
ret = t4_write_flash(adap,
- FW_IMG_START + offsetof(struct fw_hdr, fw_ver),
- sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
+ FLASH_FW_START + offsetof(struct fw_hdr, fw_ver),
+ sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver, 1);
out:
if (ret)
CH_ERR(adap, "firmware download failed, error %d\n", ret);
return ret;
}
+/* BIOS boot header */
+typedef struct boot_header_s {
+ u8 signature[2]; /* signature */
+ u8 length; /* image length (include header) */
+ u8 offset[4]; /* initialization vector */
+ u8 reserved[19]; /* reserved */
+ u8 exheader[2]; /* offset to expansion header */
+} boot_header_t;
+
+enum {
+ BOOT_FLASH_BOOT_ADDR = 0x0,/* start address of boot image in flash */
+ BOOT_SIGNATURE = 0xaa55, /* signature of BIOS boot ROM */
+ BOOT_SIZE_INC = 512, /* image size measured in 512B chunks */
+ BOOT_MIN_SIZE = sizeof(boot_header_t), /* at least basic header */
+ BOOT_MAX_SIZE = 1024*BOOT_SIZE_INC /* 1 byte * length increment */
+};
+
+/*
+ * t4_load_boot - download boot flash
+ * @adapter: the adapter
+ * @boot_data: the boot image to write
+ * @size: image size
+ *
+ * Write the supplied boot image to the card's serial flash.
+ * The boot image has the following sections: a 28-byte header and the
+ * boot image.
+ */
+int t4_load_boot(struct adapter *adap, const u8 *boot_data,
+ unsigned int boot_addr, unsigned int size)
+{
+ int ret, addr;
+ unsigned int i;
+ unsigned int boot_sector = boot_addr * 1024;
+ unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
+
+ /*
+ * Perform some primitive sanity testing to avoid accidentally
+ * writing garbage over the boot sectors. We ought to check for
+ * more but it's not worth it for now ...
+ */
+ if (size < BOOT_MIN_SIZE || size > BOOT_MAX_SIZE) {
+ CH_ERR(adap, "boot image too small/large\n");
+ return -EFBIG;
+ }
+
+ /*
+ * Make sure the boot image does not encroach on the firmware region
+ */
+ if ((boot_sector + size) >> 16 > FLASH_FW_START_SEC) {
+ CH_ERR(adap, "boot image encroaching on firmware region\n");
+ return -EFBIG;
+ }
+
+ i = DIV_ROUND_UP(size, sf_sec_size); /* # of sectors spanned */
+ ret = t4_flash_erase_sectors(adap, boot_sector >> 16,
+ (boot_sector >> 16) + i - 1);
+ if (ret)
+ goto out;
+
+ /*
+ * Skip over the first SF_PAGE_SIZE worth of data and write it after
+ * we finish copying the rest of the boot image. This will ensure
+ * that the BIOS boot header will only be written if the boot image
+ * was written in full.
+ */
+ addr = boot_sector;
+ for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
+ addr += SF_PAGE_SIZE;
+ boot_data += SF_PAGE_SIZE;
+ ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data, 0);
+ if (ret)
+ goto out;
+ }
+
+ ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE, boot_data, 0);
+
+out:
+ if (ret)
+ CH_ERR(adap, "boot image download failed, error %d\n", ret);
+ return ret;
+}
+
/**
* t4_read_cimq_cfg - read CIM queue configuration
* @adap: the adapter
@@ -1668,7 +1751,10 @@ static void sge_intr_handler(struct adapter *adapter)
err = t4_read_reg(adapter, A_SGE_ERROR_STATS);
if (err & F_ERROR_QID_VALID) {
CH_ERR(adapter, "SGE error for queue %u\n", G_ERROR_QID(err));
- t4_write_reg(adapter, A_SGE_ERROR_STATS, F_ERROR_QID_VALID);
+ if (err & F_UNCAPTURED_ERROR)
+ CH_ERR(adapter, "SGE UNCAPTURED_ERROR set (clearing)\n");
+ t4_write_reg(adapter, A_SGE_ERROR_STATS, F_ERROR_QID_VALID |
+ F_UNCAPTURED_ERROR);
}
if (v != 0)
@@ -2261,6 +2347,7 @@ int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid,
*/
while (n > 0) {
int nq = min(n, 32);
+ int nq_packed = 0;
__be32 *qp = &cmd.iq0_to_iq2;
/*
@@ -2282,25 +2369,28 @@ int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid,
* Ingress Queue ID array and insert them into the command.
*/
while (nq > 0) {
- unsigned int v;
/*
* Grab up to the next 3 Ingress Queue IDs (wrapping
* around the Ingress Queue ID array if necessary) and
* insert them into the firmware RSS command at the
* current 3-tuple position within the commad.
*/
- v = V_FW_RSS_IND_TBL_CMD_IQ0(*rsp);
- if (++rsp >= rsp_end)
- rsp = rspq;
- v |= V_FW_RSS_IND_TBL_CMD_IQ1(*rsp);
- if (++rsp >= rsp_end)
- rsp = rspq;
- v |= V_FW_RSS_IND_TBL_CMD_IQ2(*rsp);
- if (++rsp >= rsp_end)
- rsp = rspq;
-
- *qp++ = htonl(v);
- nq -= 3;
+ u16 qbuf[3];
+ u16 *qbp = qbuf;
+ int nqbuf = min(3, nq);
+
+ nq -= nqbuf;
+ qbuf[0] = qbuf[1] = qbuf[2] = 0;
+ while (nqbuf && nq_packed < 32) {
+ nqbuf--;
+ nq_packed++;
+ *qbp++ = *rsp++;
+ if (rsp >= rsp_end)
+ rsp = rspq;
+ }
+ *qp++ = cpu_to_be32(V_FW_RSS_IND_TBL_CMD_IQ0(qbuf[0]) |
+ V_FW_RSS_IND_TBL_CMD_IQ1(qbuf[1]) |
+ V_FW_RSS_IND_TBL_CMD_IQ2(qbuf[2]));
}
/*
@@ -2694,8 +2784,6 @@ void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st)
{
t4_read_indirect(adap, A_TP_MIB_INDEX, A_TP_MIB_DATA, st->req,
8, A_TP_MIB_CPL_IN_REQ_0);
- t4_read_indirect(adap, A_TP_MIB_INDEX, A_TP_MIB_DATA, st->tx_err,
- 4, A_TP_MIB_CPL_OUT_ERR_0);
}
/**
@@ -3298,6 +3386,7 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
t4_read_reg64(adap, PORT_REG(idx, A_MPS_PORT_STAT_##name##_L))
#define GET_STAT_COM(name) t4_read_reg64(adap, A_MPS_STAT_##name##_L)
+ p->tx_pause = GET_STAT(TX_PORT_PAUSE);
p->tx_octets = GET_STAT(TX_PORT_BYTES);
p->tx_frames = GET_STAT(TX_PORT_FRAMES);
p->tx_bcast_frames = GET_STAT(TX_PORT_BCAST);
@@ -3312,7 +3401,6 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
p->tx_frames_1024_1518 = GET_STAT(TX_PORT_1024B_1518B);
p->tx_frames_1519_max = GET_STAT(TX_PORT_1519B_MAX);
p->tx_drop = GET_STAT(TX_PORT_DROP);
- p->tx_pause = GET_STAT(TX_PORT_PAUSE);
p->tx_ppp0 = GET_STAT(TX_PORT_PPP0);
p->tx_ppp1 = GET_STAT(TX_PORT_PPP1);
p->tx_ppp2 = GET_STAT(TX_PORT_PPP2);
@@ -3322,6 +3410,7 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
p->tx_ppp6 = GET_STAT(TX_PORT_PPP6);
p->tx_ppp7 = GET_STAT(TX_PORT_PPP7);
+ p->rx_pause = GET_STAT(RX_PORT_PAUSE);
p->rx_octets = GET_STAT(RX_PORT_BYTES);
p->rx_frames = GET_STAT(RX_PORT_FRAMES);
p->rx_bcast_frames = GET_STAT(RX_PORT_BCAST);
@@ -3340,7 +3429,6 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
p->rx_frames_512_1023 = GET_STAT(RX_PORT_512B_1023B);
p->rx_frames_1024_1518 = GET_STAT(RX_PORT_1024B_1518B);
p->rx_frames_1519_max = GET_STAT(RX_PORT_1519B_MAX);
- p->rx_pause = GET_STAT(RX_PORT_PAUSE);
p->rx_ppp0 = GET_STAT(RX_PORT_PPP0);
p->rx_ppp1 = GET_STAT(RX_PORT_PPP1);
p->rx_ppp2 = GET_STAT(RX_PORT_PPP2);
@@ -3683,28 +3771,114 @@ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
{
int ret;
struct fw_hello_cmd c;
+ u32 v;
+ unsigned int master_mbox;
+ int retries = FW_CMD_HELLO_RETRIES;
+retry:
memset(&c, 0, sizeof(c));
INIT_CMD(c, HELLO, WRITE);
- c.err_to_mbasyncnot = htonl(
+ c.err_to_clearinit = htonl(
V_FW_HELLO_CMD_MASTERDIS(master == MASTER_CANT) |
V_FW_HELLO_CMD_MASTERFORCE(master == MASTER_MUST) |
V_FW_HELLO_CMD_MBMASTER(master == MASTER_MUST ? mbox :
M_FW_HELLO_CMD_MBMASTER) |
- V_FW_HELLO_CMD_MBASYNCNOT(evt_mbox));
+ V_FW_HELLO_CMD_MBASYNCNOT(evt_mbox) |
+ V_FW_HELLO_CMD_STAGE(FW_HELLO_CMD_STAGE_OS) |
+ F_FW_HELLO_CMD_CLEARINIT);
+ /*
+ * Issue the HELLO command to the firmware. If it's not successful
+ * but indicates that we got a "busy" or "timeout" condition, retry
+ * the HELLO until we exhaust our retry limit.
+ */
ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c);
- if (ret == 0 && state) {
- u32 v = ntohl(c.err_to_mbasyncnot);
- if (v & F_FW_HELLO_CMD_INIT)
- *state = DEV_STATE_INIT;
- else if (v & F_FW_HELLO_CMD_ERR)
+ if (ret != FW_SUCCESS) {
+ if ((ret == -EBUSY || ret == -ETIMEDOUT) && retries-- > 0)
+ goto retry;
+ return ret;
+ }
+
+ v = ntohl(c.err_to_clearinit);
+ master_mbox = G_FW_HELLO_CMD_MBMASTER(v);
+ if (state) {
+ if (v & F_FW_HELLO_CMD_ERR)
*state = DEV_STATE_ERR;
+ else if (v & F_FW_HELLO_CMD_INIT)
+ *state = DEV_STATE_INIT;
else
*state = DEV_STATE_UNINIT;
- return G_FW_HELLO_CMD_MBMASTER(v);
}
- return ret;
+
+ /*
+ * If we're not the Master PF then we need to wait around for the
+ * Master PF Driver to finish setting up the adapter.
+ *
+ * Note that we also do this wait if we're a non-Master-capable PF and
+ * there is no current Master PF; a Master PF may show up momentarily
+ * and we wouldn't want to fail pointlessly. (This can happen when an
+ * OS loads lots of different drivers rapidly at the same time). In
+ * this case, the Master PF returned by the firmware will be
+ * M_PCIE_FW_MASTER so the test below will work ...
+ */
+ if ((v & (F_FW_HELLO_CMD_ERR|F_FW_HELLO_CMD_INIT)) == 0 &&
+ master_mbox != mbox) {
+ int waiting = FW_CMD_HELLO_TIMEOUT;
+
+ /*
+ * Wait for the firmware to either indicate an error or
+ * initialized state. If we see either of these we bail out
+ * and report the issue to the caller. If we exhaust the
+ * "hello timeout" and we haven't exhausted our retries, try
+ * again. Otherwise bail with a timeout error.
+ */
+ for (;;) {
+ u32 pcie_fw;
+
+ msleep(50);
+ waiting -= 50;
+
+ /*
+ * If neither Error nor Initialialized are indicated
+ * by the firmware keep waiting till we exhaust our
+ * timeout ... and then retry if we haven't exhausted
+ * our retries ...
+ */
+ pcie_fw = t4_read_reg(adap, A_PCIE_FW);
+ if (!(pcie_fw & (F_PCIE_FW_ERR|F_PCIE_FW_INIT))) {
+ if (waiting <= 0) {
+ if (retries-- > 0)
+ goto retry;
+
+ return -ETIMEDOUT;
+ }
+ continue;
+ }
+
+ /*
+ * We either have an Error or Initialized condition
+ * report errors preferentially.
+ */
+ if (state) {
+ if (pcie_fw & F_PCIE_FW_ERR)
+ *state = DEV_STATE_ERR;
+ else if (pcie_fw & F_PCIE_FW_INIT)
+ *state = DEV_STATE_INIT;
+ }
+
+ /*
+ * If we arrived before a Master PF was selected and
+ * there's not a valid Master PF, grab its identity
+ * for our caller.
+ */
+ if (master_mbox == M_PCIE_FW_MASTER &&
+ (pcie_fw & F_PCIE_FW_MASTER_VLD))
+ master_mbox = G_PCIE_FW_MASTER(pcie_fw);
+ break;
+ }
+ }
+
+ return master_mbox;
}
/**
@@ -3724,37 +3898,37 @@ int t4_fw_bye(struct adapter *adap, unsigned int mbox)
}
/**
- * t4_init_cmd - ask FW to initialize the device
+ * t4_fw_reset - issue a reset to FW
* @adap: the adapter
* @mbox: mailbox to use for the FW command
+ * @reset: specifies the type of reset to perform
*
- * Issues a command to FW to partially initialize the device. This
- * performs initialization that generally doesn't depend on user input.
+ * Issues a reset command of the specified type to FW.
*/
-int t4_early_init(struct adapter *adap, unsigned int mbox)
+int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset)
{
- struct fw_initialize_cmd c;
+ struct fw_reset_cmd c;
memset(&c, 0, sizeof(c));
- INIT_CMD(c, INITIALIZE, WRITE);
+ INIT_CMD(c, RESET, WRITE);
+ c.val = htonl(reset);
return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
}
/**
- * t4_fw_reset - issue a reset to FW
+ * t4_fw_initialize - ask FW to initialize the device
* @adap: the adapter
* @mbox: mailbox to use for the FW command
- * @reset: specifies the type of reset to perform
*
- * Issues a reset command of the specified type to FW.
+ * Issues a command to FW to partially initialize the device. This
+ * performs initialization that generally doesn't depend on user input.
*/
-int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset)
+int t4_fw_initialize(struct adapter *adap, unsigned int mbox)
{
- struct fw_reset_cmd c;
+ struct fw_initialize_cmd c;
memset(&c, 0, sizeof(c));
- INIT_CMD(c, RESET, WRITE);
- c.val = htonl(reset);
+ INIT_CMD(c, INITIALIZE, WRITE);
return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
}
@@ -4495,6 +4669,21 @@ static int __devinit get_flash_params(struct adapter *adapter)
return 0;
}
+static void __devinit set_pcie_completion_timeout(struct adapter *adapter,
+ u8 range)
+{
+ u16 val;
+ u32 pcie_cap;
+
+ pcie_cap = t4_os_find_pci_capability(adapter, PCI_CAP_ID_EXP);
+ if (pcie_cap) {
+ t4_os_pci_read_cfg2(adapter, pcie_cap + PCI_EXP_DEVCTL2, &val);
+ val &= 0xfff0;
+ val |= range ;
+ t4_os_pci_write_cfg2(adapter, pcie_cap + PCI_EXP_DEVCTL2, val);
+ }
+}
+
/**
* t4_prep_adapter - prepare SW and HW for operation
* @adapter: the adapter
@@ -4541,6 +4730,8 @@ int __devinit t4_prep_adapter(struct adapter *adapter)
adapter->params.portvec = 1;
adapter->params.vpd.cclk = 50000;
+ /* Set pci completion timeout value to 4 seconds. */
+ set_pcie_completion_timeout(adapter, 0xd);
return 0;
}
diff --git a/sys/dev/cxgbe/common/t4_hw.h b/sys/dev/cxgbe/common/t4_hw.h
index fd48aabaa249..b93734e8de39 100644
--- a/sys/dev/cxgbe/common/t4_hw.h
+++ b/sys/dev/cxgbe/common/t4_hw.h
@@ -182,4 +182,82 @@ struct pagepod {
#define M_PPOD_OFST 0xFFFFFFFF
#define V_PPOD_OFST(x) ((x) << S_PPOD_OFST)
+/*
+ * Flash layout.
+ */
+#define FLASH_START(start) ((start) * SF_SEC_SIZE)
+#define FLASH_MAX_SIZE(nsecs) ((nsecs) * SF_SEC_SIZE)
+
+enum {
+ /*
+ * Various Expansion-ROM boot images, etc.
+ */
+ FLASH_EXP_ROM_START_SEC = 0,
+ FLASH_EXP_ROM_NSECS = 6,
+ FLASH_EXP_ROM_START = FLASH_START(FLASH_EXP_ROM_START_SEC),
+ FLASH_EXP_ROM_MAX_SIZE = FLASH_MAX_SIZE(FLASH_EXP_ROM_NSECS),
+
+ /*
+ * iSCSI Boot Firmware Table (iBFT) and other driver-related
+ * parameters ...
+ */
+ FLASH_IBFT_START_SEC = 6,
+ FLASH_IBFT_NSECS = 1,
+ FLASH_IBFT_START = FLASH_START(FLASH_IBFT_START_SEC),
+ FLASH_IBFT_MAX_SIZE = FLASH_MAX_SIZE(FLASH_IBFT_NSECS),
+
+ /*
+ * Boot configuration data.
+ */
+ FLASH_BOOTCFG_START_SEC = 7,
+ FLASH_BOOTCFG_NSECS = 1,
+ FLASH_BOOTCFG_START = FLASH_START(FLASH_BOOTCFG_START_SEC),
+ FLASH_BOOTCFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_BOOTCFG_NSECS),
+
+ /*
+ * Location of firmware image in FLASH.
+ */
+ FLASH_FW_START_SEC = 8,
+ FLASH_FW_NSECS = 8,
+ FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC),
+ FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS),
+
+ /*
+ * iSCSI persistent/crash information.
+ */
+ FLASH_ISCSI_CRASH_START_SEC = 29,
+ FLASH_ISCSI_CRASH_NSECS = 1,
+ FLASH_ISCSI_CRASH_START = FLASH_START(FLASH_ISCSI_CRASH_START_SEC),
+ FLASH_ISCSI_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_ISCSI_CRASH_NSECS),
+
+ /*
+ * FCoE persistent/crash information.
+ */
+ FLASH_FCOE_CRASH_START_SEC = 30,
+ FLASH_FCOE_CRASH_NSECS = 1,
+ FLASH_FCOE_CRASH_START = FLASH_START(FLASH_FCOE_CRASH_START_SEC),
+ FLASH_FCOE_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FCOE_CRASH_NSECS),
+
+ /*
+ * Location of Firmware Configuration File in FLASH. Since the FPGA
+ * "FLASH" is smaller we need to store the Configuration File in a
+ * different location -- which will overlap the end of the firmware
+ * image if firmware ever gets that large ...
+ */
+ FLASH_CFG_START_SEC = 31,
+ FLASH_CFG_NSECS = 1,
+ FLASH_CFG_START = FLASH_START(FLASH_CFG_START_SEC),
+ FLASH_CFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_CFG_NSECS),
+
+ FLASH_FPGA_CFG_START_SEC = 15,
+ FLASH_FPGA_CFG_START = FLASH_START(FLASH_FPGA_CFG_START_SEC),
+
+ /*
+ * Sectors 32-63 are reserved for FLASH failover.
+ */
+};
+
+#undef FLASH_START
+#undef FLASH_MAX_SIZE
+
#endif /* __T4_HW_H */
diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg.txt b/sys/dev/cxgbe/firmware/t4fw_cfg.txt
new file mode 100644
index 000000000000..65d6f0bf011a
--- /dev/null
+++ b/sys/dev/cxgbe/firmware/t4fw_cfg.txt
@@ -0,0 +1,132 @@
+# Firmware configuration file.
+#
+# Global limits (some are hardware limits, others are due to the firmware).
+# Also note that the firmware reserves some of these resources for its own use
+# so it's not always possible for the drivers to grab everything listed here.
+# nvi = 128 virtual interfaces
+# niqflint = 1023 ingress queues with freelists and/or interrupts
+# nethctrl = 64K Ethernet or ctrl egress queues
+# neq = 64K egress queues of all kinds, including freelists
+# nexactf = 336 MPS TCAM entries, can oversubscribe.
+#
+
+[global]
+ rss_glb_config_mode = basicvirtual
+ rss_glb_config_options = tnlmapen, hashtoeplitz, tnlalllkp
+
+ sge_timer_value = 1, 5, 10, 50, 100, 200 # usecs
+
+ # TP_SHIFT_CNT
+ reg[0x7dc0] = 0x64f8849
+
+ filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe
+
+ # TP rx and tx payload memory (% of the total EDRAM + DDR3).
+ tp_pmrx = 40
+ tp_pmtx = 60
+ tp_pmrx_pagesize = 64K
+ tp_pmtx_pagesize = 64K
+
+# PFs 0-3. These get 8 MSI/8 MSI-X vectors each. VFs are supported by
+# these 4 PFs only. Not used here at all.
+[function "0"]
+ nvf = 16
+ nvi = 1
+[function "0/*"]
+ nvi = 1
+
+[function "1"]
+ nvf = 16
+ nvi = 1
+[function "1/*"]
+ nvi = 1
+
+[function "2"]
+ nvf = 16
+ nvi = 1
+[function "2/*"]
+ nvi = 1
+
+[function "3"]
+ nvf = 16
+ nvi = 1
+[function "3/*"]
+ nvi = 1
+
+# PF4 is the resource-rich PF that the bus/nexus driver attaches to.
+# It gets 32 MSI/128 MSI-X vectors.
+[function "4"]
+ wx_caps = all
+ r_caps = all
+ nvi = 48
+ niqflint = 256
+ nethctrl = 128
+ neq = 256
+ nexactf = 300
+ cmask = all
+ pmask = all
+
+ # driver will mask off features it won't use
+ protocol = ofld
+
+ tp_l2t = 100
+
+ # TCAM has 8K cells; each region must start at a multiple of 128 cell.
+ # Each entry in these categories takes 4 cells each. nhash will use the
+ # TCAM iff there is room left (that is, the rest don't add up to 2048).
+ nroute = 32
+ nclip = 0 # needed only for IPv6 offload
+ nfilter = 1504
+ nserver = 512
+ nhash = 16384
+
+# PF5 is the SCSI Controller PF. It gets 32 MSI/40 MSI-X vectors.
+# Not used right now.
+[function "5"]
+ nvi = 1
+
+# PF6 is the FCoE Controller PF. It gets 32 MSI/40 MSI-X vectors.
+# Not used right now.
+[function "6"]
+ nvi = 1
+
+# MPS has 192K buffer space for ingress packets from the wire as well as
+# loopback path of the L2 switch.
+[port "0"]
+ dcb = none
+ bg_mem = 25
+ lpbk_mem = 25
+ hwm = 30
+ lwm = 15
+ dwm = 30
+
+[port "1"]
+ dcb = none
+ bg_mem = 25
+ lpbk_mem = 25
+ hwm = 30
+ lwm = 15
+ dwm = 30
+
+[port "2"]
+ dcb = none
+ bg_mem = 25
+ lpbk_mem = 25
+ hwm = 30
+ lwm = 15
+ dwm = 30
+
+[port "3"]
+ dcb = none
+ bg_mem = 25
+ lpbk_mem = 25
+ hwm = 30
+ lwm = 15
+ dwm = 30
+
+[fini]
+ version = 0x1
+ checksum = 0xb31cdfac
+#
+# $FreeBSD$
+#
diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt b/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt
new file mode 100644
index 000000000000..134d60c4d27d
--- /dev/null
+++ b/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt
@@ -0,0 +1,503 @@
+# Chelsio T4 Factory Default configuration file.
+#
+# Copyright (C) 2010 Chelsio Communications. All rights reserved.
+#
+
+# This file provides the default, power-on configuration for 4-port T4-based
+# adapters shipped from the factory. These defaults are designed to address
+# the needs of the vast majority of T4 customers. The basic idea is to have
+# a default configuration which allows a customer to plug a T4 adapter in and
+# have it work regardless of OS, driver or application except in the most
+# unusual and/or demanding customer applications.
+#
+# Many of the T4 resources which are described by this configuration are
+# finite. This requires balancing the configuration/operation needs of
+# device drivers across OSes and a large number of customer application.
+#
+# Some of the more important resources to allocate and their constaints are:
+# 1. Virtual Interfaces: 128.
+# 2. Ingress Queues with Free Lists: 1024. PCI-E SR-IOV Virtual Functions
+# must use a power of 2 Ingress Queues.
+# 3. Egress Queues: 128K. PCI-E SR-IOV Virtual Functions must use a
+# power of 2 Egress Queues.
+# 4. MSI-X Vectors: 1088. A complication here is that the PCI-E SR-IOV
+# Virtual Functions based off of a Physical Function all get the
+# same umber of MSI-X Vectors as the base Physical Function.
+# Additionally, regardless of whether Virtual Functions are enabled or
+# not, their MSI-X "needs" are counted by the PCI-E implementation.
+# And finally, all Physical Funcations capable of supporting Virtual
+# Functions (PF0-3) must have the same number of configured TotalVFs in
+# their SR-IOV Capabilities.
+# 5. Multi-Port Support (MPS) TCAM: 336 entries to support MAC destination
+# address matching on Ingress Packets.
+#
+# Some of the important OS/Driver resource needs are:
+# 6. Some OS Drivers will manage all resources through a single Physical
+# Function (currently PF0 but it could be any Physical Function). Thus,
+# this "Unified PF" will need to have enough resources allocated to it
+# to allow for this. And because of the MSI-X resource allocation
+# constraints mentioned above, this probably means we'll either have to
+# severely limit the TotalVFs if we continue to use PF0 as the Unified PF
+# or we'll need to move the Unified PF into the PF4-7 range since those
+# Physical Functions don't have any Virtual Functions associated with
+# them.
+# 7. Some OS Drivers will manage different ports and functions (NIC,
+# storage, etc.) on different Physical Functions. For example, NIC
+# functions for ports 0-3 on PF0-3, FCoE on PF4, iSCSI on PF5, etc.
+#
+# Some of the customer application needs which need to be accommodated:
+# 8. Some customers will want to support large CPU count systems with
+# good scaling. Thus, we'll need to accommodate a number of
+# Ingress Queues and MSI-X Vectors to allow up to some number of CPUs
+# to be involved per port and per application function. For example,
+# in the case where all ports and application functions will be
+# managed via a single Unified PF and we want to accommodate scaling up
+# to 8 CPUs, we would want:
+#
+# 4 ports *
+# 3 application functions (NIC, FCoE, iSCSI) per port *
+# 8 Ingress Queue/MSI-X Vectors per application function
+#
+# for a total of 96 Ingress Queues and MSI-X Vectors on the Unified PF.
+# (Plus a few for Firmware Event Queues, etc.)
+#
+# 9. Some customers will want to use T4's PCI-E SR-IOV Capability to allow
+# Virtual Machines to directly access T4 functionality via SR-IOV
+# Virtual Functions and "PCI Device Passthrough" -- this is especially
+# true for the NIC application functionality. (Note that there is
+# currently no ability to use the TOE, FCoE, iSCSI, etc. via Virtual
+# Functions so this is in fact solely limited to NIC.)
+#
+
+
+# Global configuration settings.
+#
+[global]
+ rss_glb_config_mode = basicvirtual
+ rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp
+
+ # The following Scatter Gather Engine (SGE) settings assume a 4KB Host
+ # Page Size and a 64B L1 Cache Line Size. It programs the
+ # EgrStatusPageSize and IngPadBoundary to 64B and the PktShift to 2.
+ # If a Master PF Driver finds itself on a machine with different
+ # parameters, then the Master PF Driver is responsible for initializing
+ # these parameters to appropriate values.
+ #
+ # Notes:
+ # 1. The Free List Buffer Sizes below are raw and the firmware will
+ # round them up to the Ingress Padding Boundary.
+ # 2. The SGE Timer Values below are expressed below in microseconds.
+ # The firmware will convert these values to Core Clock Ticks when
+ # it processes the configuration parameters.
+ #
+ reg[0x1008] = 0x40810/0x21c70 # SGE_CONTROL
+ reg[0x100c] = 0x22222222 # SGE_HOST_PAGE_SIZE
+ reg[0x10a0] = 0x01040810 # SGE_INGRESS_RX_THRESHOLD
+ reg[0x1044] = 4096 # SGE_FL_BUFFER_SIZE0
+ reg[0x1048] = 65536 # SGE_FL_BUFFER_SIZE1
+ reg[0x104c] = 1536 # SGE_FL_BUFFER_SIZE2
+ reg[0x1050] = 9024 # SGE_FL_BUFFER_SIZE3
+ reg[0x1054] = 9216 # SGE_FL_BUFFER_SIZE4
+ reg[0x1058] = 2048 # SGE_FL_BUFFER_SIZE5
+ reg[0x105c] = 128 # SGE_FL_BUFFER_SIZE6
+ reg[0x1060] = 8192 # SGE_FL_BUFFER_SIZE7
+ reg[0x1064] = 16384 # SGE_FL_BUFFER_SIZE8
+ reg[0x10a4] = 0xa000a000/0xf000f000 # SGE_DBFIFO_STATUS
+ reg[0x10a8] = 0x2000/0x2000 # SGE_DOORBELL_CONTROL
+ sge_timer_value = 5, 10, 20, 50, 100, 200 # SGE_TIMER_VALUE* in usecs
+
+ reg[0x7dc0] = 0x64f8849 # TP_SHIFT_CNT
+
+ # Selection of tuples for LE filter lookup, fields (and widths which
+ # must sum to <= 36): { IP Fragment (1), MPS Match Type (3),
+ # IP Protocol (8), [Inner] VLAN (17), Port (3), FCoE (1) }
+ #
+ filterMode = fragmentation, mpshittype, protocol, vnic_id, port, fcoe
+
+ # Percentage of dynamic memory (in either the EDRAM or external MEM)
+ # to use for TP RX payload
+ tp_pmrx = 30
+
+ # TP RX payload page size
+ tp_pmrx_pagesize = 64K
+
+ # Percentage of dynamic memory (in either the EDRAM or external MEM)
+ # to use for TP TX payload
+ tp_pmtx = 50
+
+ # TP TX payload page size
+ tp_pmtx_pagesize = 64K
+
+# Some "definitions" to make the rest of this a bit more readable. We support
+# 4 ports, 3 functions (NIC, FCoE and iSCSI), scaling up to 8 "CPU Queue Sets"
+# per function per port ...
+#
+# NMSIX = 1088 # available MSI-X Vectors
+# NVI = 128 # available Virtual Interfaces
+# NMPSTCAM = 336 # MPS TCAM entries
+#
+# NPORTS = 4 # ports
+# NCPUS = 8 # CPUs we want to support scalably
+# NFUNCS = 3 # functions per port (NIC, FCoE, iSCSI)
+
+# Breakdown of Virtual Interface/Queue/Interrupt resources for the "Unified
+# PF" which many OS Drivers will use to manage most or all functions.
+#
+# Each Ingress Queue can use one MSI-X interrupt but some Ingress Queues can
+# use Forwarded Interrupt Ingress Queues. For these latter, an Ingress Queue
+# would be created and the Queue ID of a Forwarded Interrupt Ingress Queue
+# will be specified as the "Ingress Queue Asynchronous Destination Index."
+# Thus, the number of MSI-X Vectors assigned to the Unified PF will be less
+# than or equal to the number of Ingress Queues ...
+#
+# NVI_NIC = 4 # NIC access to NPORTS
+# NFLIQ_NIC = 32 # NIC Ingress Queues with Free Lists
+# NETHCTRL_NIC = 32 # NIC Ethernet Control/TX Queues
+# NEQ_NIC = 64 # NIC Egress Queues (FL, ETHCTRL/TX)
+# NMPSTCAM_NIC = 16 # NIC MPS TCAM Entries (NPORTS*4)
+# NMSIX_NIC = 32 # NIC MSI-X Interrupt Vectors (FLIQ)
+#
+# NVI_OFLD = 0 # Offload uses NIC function to access ports
+# NFLIQ_OFLD = 16 # Offload Ingress Queues with Free Lists
+# NETHCTRL_OFLD = 0 # Offload Ethernet Control/TX Queues
+# NEQ_OFLD = 16 # Offload Egress Queues (FL)
+# NMPSTCAM_OFLD = 0 # Offload MPS TCAM Entries (uses NIC's)
+# NMSIX_OFLD = 16 # Offload MSI-X Interrupt Vectors (FLIQ)
+#
+# NVI_RDMA = 0 # RDMA uses NIC function to access ports
+# NFLIQ_RDMA = 4 # RDMA Ingress Queues with Free Lists
+# NETHCTRL_RDMA = 0 # RDMA Ethernet Control/TX Queues
+# NEQ_RDMA = 4 # RDMA Egress Queues (FL)
+# NMPSTCAM_RDMA = 0 # RDMA MPS TCAM Entries (uses NIC's)
+# NMSIX_RDMA = 4 # RDMA MSI-X Interrupt Vectors (FLIQ)
+#
+# NEQ_WD = 128 # Wire Direct TX Queues and FLs
+# NETHCTRL_WD = 64 # Wire Direct TX Queues
+# NFLIQ_WD = 64 ` # Wire Direct Ingress Queues with Free Lists
+#
+# NVI_ISCSI = 4 # ISCSI access to NPORTS
+# NFLIQ_ISCSI = 4 # ISCSI Ingress Queues with Free Lists
+# NETHCTRL_ISCSI = 0 # ISCSI Ethernet Control/TX Queues
+# NEQ_ISCSI = 4 # ISCSI Egress Queues (FL)
+# NMPSTCAM_ISCSI = 4 # ISCSI MPS TCAM Entries (NPORTS)
+# NMSIX_ISCSI = 4 # ISCSI MSI-X Interrupt Vectors (FLIQ)
+#
+# NVI_FCOE = 4 # FCOE access to NPORTS
+# NFLIQ_FCOE = 34 # FCOE Ingress Queues with Free Lists
+# NETHCTRL_FCOE = 32 # FCOE Ethernet Control/TX Queues
+# NEQ_FCOE = 66 # FCOE Egress Queues (FL)
+# NMPSTCAM_FCOE = 32 # FCOE MPS TCAM Entries (NPORTS)
+# NMSIX_FCOE = 34 # FCOE MSI-X Interrupt Vectors (FLIQ)
+
+# Two extra Ingress Queues per function for Firmware Events and Forwarded
+# Interrupts, and two extra interrupts per function for Firmware Events (or a
+# Forwarded Interrupt Queue) and General Interrupts per function.
+#
+# NFLIQ_EXTRA = 6 # "extra" Ingress Queues 2*NFUNCS (Firmware and
+# # Forwarded Interrupts
+# NMSIX_EXTRA = 6 # extra interrupts 2*NFUNCS (Firmware and
+# # General Interrupts
+
+# Microsoft HyperV resources. The HyperV Virtual Ingress Queues will have
+# their interrupts forwarded to another set of Forwarded Interrupt Queues.
+#
+# NVI_HYPERV = 16 # VMs we want to support
+# NVIIQ_HYPERV = 2 # Virtual Ingress Queues with Free Lists per VM
+# NFLIQ_HYPERV = 40 # VIQs + NCPUS Forwarded Interrupt Queues
+# NEQ_HYPERV = 32 # VIQs Free Lists
+# NMPSTCAM_HYPERV = 16 # MPS TCAM Entries (NVI_HYPERV)
+# NMSIX_HYPERV = 8 # NCPUS Forwarded Interrupt Queues
+
+# Adding all of the above Unified PF resource needs together: (NIC + OFLD +
+# RDMA + ISCSI + FCOE + EXTRA + HYPERV)
+#
+# NVI_UNIFIED = 28
+# NFLIQ_UNIFIED = 106
+# NETHCTRL_UNIFIED = 32
+# NEQ_UNIFIED = 124
+# NMPSTCAM_UNIFIED = 40
+#
+# The sum of all the MSI-X resources above is 74 MSI-X Vectors but we'll round
+# that up to 128 to make sure the Unified PF doesn't run out of resources.
+#
+# NMSIX_UNIFIED = 128
+#
+# The Storage PFs could need up to NPORTS*NCPUS + NMSIX_EXTRA MSI-X Vectors
+# which is 34 but they're probably safe with 32.
+#
+# NMSIX_STORAGE = 32
+
+# Note: The UnifiedPF is PF4 which doesn't have any Virtual Functions
+# associated with it. Thus, the MSI-X Vector allocations we give to the
+# UnifiedPF aren't inherited by any Virtual Functions. As a result we can
+# provision many more Virtual Functions than we can if the UnifiedPF were
+# one of PF0-3.
+#
+
+# All of the below PCI-E parameters are actually stored in various *_init.txt
+# files. We include them below essentially as comments.
+#
+# For PF0-3 we assign 8 vectors each for NIC Ingress Queues of the associated
+# ports 0-3.
+#
+# For PF4, the Unified PF, we give it an MSI-X Table Size as outlined above.
+#
+# For PF5-6 we assign enough MSI-X Vectors to support FCoE and iSCSI
+# storage applications across all four possible ports.
+#
+# Additionally, since the UnifiedPF isn't one of the per-port Physical
+# Functions, we give the UnifiedPF and the PF0-3 Physical Functions
+# different PCI Device IDs which will allow Unified and Per-Port Drivers
+# to directly select the type of Physical Function to which they wish to be
+# attached.
+#
+# Note that the actual values used for the PCI-E Intelectual Property will be
+# 1 less than those below since that's the way it "counts" things. For
+# readability, we use the number we actually mean ...
+#
+# PF0_INT = 8 # NCPUS
+# PF1_INT = 8 # NCPUS
+# PF2_INT = 8 # NCPUS
+# PF3_INT = 8 # NCPUS
+# PF0_3_INT = 32 # PF0_INT + PF1_INT + PF2_INT + PF3_INT
+#
+# PF4_INT = 128 # NMSIX_UNIFIED
+# PF5_INT = 32 # NMSIX_STORAGE
+# PF6_INT = 32 # NMSIX_STORAGE
+# PF7_INT = 0 # Nothing Assigned
+# PF4_7_INT = 192 # PF4_INT + PF5_INT + PF6_INT + PF7_INT
+#
+# PF0_7_INT = 224 # PF0_3_INT + PF4_7_INT
+#
+# With the above we can get 17 VFs/PF0-3 (limited by 336 MPS TCAM entries)
+# but we'll lower that to 16 to make our total 64 and a nice power of 2 ...
+#
+# NVF = 16
+
+# For those OSes which manage different ports on different PFs, we need
+# only enough resources to support a single port's NIC application functions
+# on PF0-3. The below assumes that we're only doing NIC with NCPUS "Queue
+# Sets" for ports 0-3. The FCoE and iSCSI functions for such OSes will be
+# managed on the "storage PFs" (see below).
+#
+[function "0"]
+ nvf = 16 # NVF on this function
+ wx_caps = all # write/execute permissions for all commands
+ r_caps = all # read permissions for all commands
+ nvi = 1 # 1 port
+ niqflint = 8 # NCPUS "Queue Sets"
+ nethctrl = 8 # NCPUS "Queue Sets"
+ neq = 16 # niqflint + nethctrl Egress Queues
+ nexactf = 8 # number of exact MPSTCAM MAC filters
+ cmask = all # access to all channels
+ pmask = 0x1 # access to only one port
+
+[function "1"]
+ nvf = 16 # NVF on this function
+ wx_caps = all # write/execute permissions for all commands
+ r_caps = all # read permissions for all commands
+ nvi = 1 # 1 port
+ niqflint = 8 # NCPUS "Queue Sets"
+ nethctrl = 8 # NCPUS "Queue Sets"
+ neq = 16 # niqflint + nethctrl Egress Queues
+ nexactf = 8 # number of exact MPSTCAM MAC filters
+ cmask = all # access to all channels
+ pmask = 0x2 # access to only one port
+
+[function "2"]
+ nvf = 16 # NVF on this function
+ wx_caps = all # write/execute permissions for all commands
+ r_caps = all # read permissions for all commands
+ nvi = 1 # 1 port
+ niqflint = 8 # NCPUS "Queue Sets"
+ nethctrl = 8 # NCPUS "Queue Sets"
+ neq = 16 # niqflint + nethctrl Egress Queues
+ nexactf = 8 # number of exact MPSTCAM MAC filters
+ cmask = all # access to all channels
+ pmask = 0x4 # access to only one port
+
+[function "3"]
+ nvf = 16 # NVF on this function
+ wx_caps = all # write/execute permissions for all commands
+ r_caps = all # read permissions for all commands
+ nvi = 1 # 1 port
+ niqflint = 8 # NCPUS "Queue Sets"
+ nethctrl = 8 # NCPUS "Queue Sets"
+ neq = 16 # niqflint + nethctrl Egress Queues
+ nexactf = 8 # number of exact MPSTCAM MAC filters
+ cmask = all # access to all channels
+ pmask = 0x8 # access to only one port
+
+# Some OS Drivers manage all application functions for all ports via PF4.
+# Thus we need to provide a large number of resources here. For Egress
+# Queues we need to account for both TX Queues as well as Free List Queues
+# (because the host is responsible for producing Free List Buffers for the
+# hardware to consume).
+#
+[function "4"]
+ wx_caps = all # write/execute permissions for all commands
+ r_caps = all # read permissions for all commands
+ nvi = 28 # NVI_UNIFIED
+ niqflint = 170 # NFLIQ_UNIFIED + NLFIQ_WD
+ nethctrl = 96 # NETHCTRL_UNIFIED + NETHCTRL_WD
+ neq = 252 # NEQ_UNIFIED + NEQ_WD
+ nexactf = 40 # NMPSTCAM_UNIFIED
+ cmask = all # access to all channels
+ pmask = all # access to all four ports ...
+ nroute = 32 # number of routing region entries
+ nclip = 32 # number of clip region entries
+ nfilter = 768 # number of filter region entries
+ nserver = 256 # number of server region entries
+ nhash = 0 # number of hash region entries
+ protocol = nic_vm, ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu
+ tp_l2t = 100
+ tp_ddp = 2
+ tp_ddp_iscsi = 2
+ tp_stag = 2
+ tp_pbl = 5
+ tp_rq = 7
+
+# We have FCoE and iSCSI storage functions on PF5 and PF6 each of which may
+# need to have Virtual Interfaces on each of the four ports with up to NCPUS
+# "Queue Sets" each.
+#
+[function "5"]
+ wx_caps = all # write/execute permissions for all commands
+ r_caps = all # read permissions for all commands
+ nvi = 4 # NPORTS
+ niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA
+ nethctrl = 32 # NPORTS*NCPUS
+ neq = 64 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX)
+ nexactf = 4 # NPORTS
+ cmask = all # access to all channels
+ pmask = all # access to all four ports ...
+
+[function "6"]
+ wx_caps = all # write/execute permissions for all commands
+ r_caps = all # read permissions for all commands
+ nvi = 4 # NPORTS
+ niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA
+ nethctrl = 32 # NPORTS*NCPUS
+ neq = 66 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + 2 (EXTRA)
+ nexactf = 32 # NPORTS + adding 28 exact entries for FCoE
+ # which is OK since < MIN(SUM PF0..3, PF4)
+ # and we never load PF0..3 and PF4 concurrently
+ cmask = all # access to all channels
+ pmask = all # access to all four ports ...
+ nhash = 0
+ protocol = fcoe_initiator
+ tp_ddp = 2
+ fcoe_nfcf = 16
+ fcoe_nvnp = 32
+ fcoe_nssn = 1024
+
+# For Virtual functions, we only allow NIC functionality and we only allow
+# access to one port (1 << PF). Note that because of limitations in the
+# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL
+# and GTS registers, the number of Ingress and Egress Queues must be a power
+# of 2.
+#
+[function "0/*"] # NVF
+ wx_caps = 0x82 # DMAQ | VF
+ r_caps = 0x86 # DMAQ | VF | PORT
+ nvi = 1 # 1 port
+ niqflint = 4 # 2 "Queue Sets" + NXIQ
+ nethctrl = 2 # 2 "Queue Sets"
+ neq = 4 # 2 "Queue Sets" * 2
+ nexactf = 4
+ cmask = all # access to all channels
+ pmask = 0x1 # access to only one port ...
+
+[function "1/*"] # NVF
+ wx_caps = 0x82 # DMAQ | VF
+ r_caps = 0x86 # DMAQ | VF | PORT
+ nvi = 1 # 1 port
+ niqflint = 4 # 2 "Queue Sets" + NXIQ
+ nethctrl = 2 # 2 "Queue Sets"
+ neq = 4 # 2 "Queue Sets" * 2
+ nexactf = 4
+ cmask = all # access to all channels
+ pmask = 0x2 # access to only one port ...
+
+[function "2/*"] # NVF
+ wx_caps = 0x82 # DMAQ | VF
+ r_caps = 0x86 # DMAQ | VF | PORT
+ nvi = 1 # 1 port
+ niqflint = 4 # 2 "Queue Sets" + NXIQ
+ nethctrl = 2 # 2 "Queue Sets"
+ neq = 4 # 2 "Queue Sets" * 2
+ nexactf = 4
+ cmask = all # access to all channels
+ pmask = 0x4 # access to only one port ...
+
+[function "3/*"] # NVF
+ wx_caps = 0x82 # DMAQ | VF
+ r_caps = 0x86 # DMAQ | VF | PORT
+ nvi = 1 # 1 port
+ niqflint = 4 # 2 "Queue Sets" + NXIQ
+ nethctrl = 2 # 2 "Queue Sets"
+ neq = 4 # 2 "Queue Sets" * 2
+ nexactf = 4
+ cmask = all # access to all channels
+ pmask = 0x8 # access to only one port ...
+
+# MPS features a 196608 bytes ingress buffer that is used for ingress buffering
+# for packets from the wire as well as the loopback path of the L2 switch. The
+# folling params control how the buffer memory is distributed and the L2 flow
+# control settings:
+#
+# bg_mem: %-age of mem to use for port/buffer group
+# lpbk_mem: %-age of port/bg mem to use for loopback
+# hwm: high watermark; bytes available when starting to send pause
+# frames (in units of 0.1 MTU)
+# lwm: low watermark; bytes remaining when sending 'unpause' frame
+# (in inuits of 0.1 MTU)
+# dwm: minimum delta between high and low watermark (in units of 100
+# Bytes)
+#
+[port "0"]
+ dcb = ppp, dcbx # configure for DCB PPP and enable DCBX offload
+ bg_mem = 25
+ lpbk_mem = 25
+ hwm = 30
+ lwm = 15
+ dwm = 30
+
+[port "1"]
+ dcb = ppp, dcbx
+ bg_mem = 25
+ lpbk_mem = 25
+ hwm = 30
+ lwm = 15
+ dwm = 30
+
+[port "2"]
+ dcb = ppp, dcbx
+ bg_mem = 25
+ lpbk_mem = 25
+ hwm = 30
+ lwm = 15
+ dwm = 30
+
+[port "3"]
+ dcb = ppp, dcbx
+ bg_mem = 25
+ lpbk_mem = 25
+ hwm = 30
+ lwm = 15
+ dwm = 30
+
+[fini]
+ version = 0x14250007
+ checksum = 0xfcbadefb
+
+# Total resources used by above allocations:
+# Virtual Interfaces: 104
+# Ingress Queues/w Free Lists and Interrupts: 526
+# Egress Queues: 702
+# MPS TCAM Entries: 336
+# MSI-X Vectors: 736
+# Virtual Functions: 64
+#
+# $FreeBSD$
+#
diff --git a/sys/dev/cxgbe/common/t4fw_interface.h b/sys/dev/cxgbe/firmware/t4fw_interface.h
index 3eb961500ead..6d259a5d260e 100644
--- a/sys/dev/cxgbe/common/t4fw_interface.h
+++ b/sys/dev/cxgbe/firmware/t4fw_interface.h
@@ -37,16 +37,23 @@
enum fw_retval {
FW_SUCCESS = 0, /* completed sucessfully */
FW_EPERM = 1, /* operation not permitted */
+ FW_ENOENT = 2, /* no such file or directory */
FW_EIO = 5, /* input/output error; hw bad */
- FW_ENOEXEC = 8, /* Exec format error; inv microcode */
+ FW_ENOEXEC = 8, /* exec format error; inv microcode */
FW_EAGAIN = 11, /* try again */
FW_ENOMEM = 12, /* out of memory */
FW_EFAULT = 14, /* bad address; fw bad */
FW_EBUSY = 16, /* resource busy */
- FW_EEXIST = 17, /* File exists */
+ FW_EEXIST = 17, /* file exists */
FW_EINVAL = 22, /* invalid argument */
+ FW_ENOSPC = 28, /* no space left on device */
FW_ENOSYS = 38, /* functionality not implemented */
FW_EPROTO = 71, /* protocol error */
+ FW_EADDRINUSE = 98, /* address already in use */
+ FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */
+ FW_ENETDOWN = 100, /* network is down */
+ FW_ENETUNREACH = 101, /* network is unreachable */
+ FW_ENOBUFS = 105, /* no buffer space available */
FW_ETIMEDOUT = 110, /* timeout */
FW_EINPROGRESS = 115, /* fw internal */
FW_SCSI_ABORT_REQUESTED = 128, /* */
@@ -62,6 +69,8 @@ enum fw_retval {
FW_ERR_RDEV_IMPL_LOGO = 138, /* */
FW_SCSI_UNDER_FLOW_ERR = 139, /* */
FW_SCSI_OVER_FLOW_ERR = 140, /* */
+ FW_SCSI_DDP_ERR = 141, /* DDP error*/
+ FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */
};
/******************************************************************************
@@ -89,7 +98,7 @@ enum fw_wr_opcodes {
FW_RI_INV_LSTAG_WR = 0x1a,
FW_RI_WR = 0x0d,
FW_ISCSI_NODE_WR = 0x4a,
- FW_LASTC2E_WR = 0x4b
+ FW_LASTC2E_WR = 0x50
};
/*
@@ -512,8 +521,14 @@ struct fw_eth_tx_pkt_wr {
__be64 r3;
};
+#define S_FW_ETH_TX_PKT_WR_IMMDLEN 0
+#define M_FW_ETH_TX_PKT_WR_IMMDLEN 0x1ff
+#define V_FW_ETH_TX_PKT_WR_IMMDLEN(x) ((x) << S_FW_ETH_TX_PKT_WR_IMMDLEN)
+#define G_FW_ETH_TX_PKT_WR_IMMDLEN(x) \
+ (((x) >> S_FW_ETH_TX_PKT_WR_IMMDLEN) & M_FW_ETH_TX_PKT_WR_IMMDLEN)
+
struct fw_eth_tx_pkts_wr {
- __be32 op_immdlen;
+ __be32 op_pkd;
__be32 equiq_to_len16;
__be32 r3;
__be16 plen;
@@ -537,7 +552,7 @@ enum fw_flowc_mnem {
FW_FLOWC_MNEM_RCVNXT,
FW_FLOWC_MNEM_SNDBUF,
FW_FLOWC_MNEM_MSS,
- FW_FLOWC_MEM_TXDATAPLEN_MAX,
+ FW_FLOWC_MNEM_TXDATAPLEN_MAX,
};
struct fw_flowc_mnemval {
@@ -1469,22 +1484,129 @@ struct fw_ri_wr {
#define G_FW_RI_WR_P2PTYPE(x) \
(((x) >> S_FW_RI_WR_P2PTYPE) & M_FW_RI_WR_P2PTYPE)
-#ifdef FOISCSI
+/******************************************************************************
+ * S C S I W O R K R E Q U E S T s
+ **********************************************/
+
+
+/******************************************************************************
+ * F O i S C S I W O R K R E Q U E S T s
+ **********************************************/
+
+#define ISCSI_NAME_MAX_LEN 224
+#define ISCSI_ALIAS_MAX_LEN 224
+
+enum session_type {
+ ISCSI_SESSION_DISCOVERY = 0,
+ ISCSI_SESSION_NORMAL,
+};
+
+enum digest_val {
+ DIGEST_NONE = 0,
+ DIGEST_CRC32,
+ DIGEST_BOTH,
+};
+
+enum fw_iscsi_subops {
+ NODE_ONLINE = 1,
+ SESS_ONLINE,
+ CONN_ONLINE,
+ NODE_OFFLINE,
+ SESS_OFFLINE,
+ CONN_OFFLINE,
+ NODE_STATS,
+ SESS_STATS,
+ CONN_STATS,
+ UPDATE_IOHANDLE,
+};
+
+struct fw_iscsi_node_attr {
+ __u8 name_len;
+ __u8 node_name[ISCSI_NAME_MAX_LEN];
+ __u8 alias_len;
+ __u8 node_alias[ISCSI_ALIAS_MAX_LEN];
+};
+
+struct fw_iscsi_sess_attr {
+ __u8 sess_type;
+ __u8 seq_inorder;
+ __u8 pdu_inorder;
+ __u8 immd_data_en;
+ __u8 init_r2t_en;
+ __u8 erl;
+ __be16 max_conn;
+ __be16 max_r2t;
+ __be16 time2wait;
+ __be16 time2retain;
+ __be32 max_burst;
+ __be32 first_burst;
+};
+
+struct fw_iscsi_conn_attr {
+ __u8 hdr_digest;
+ __u8 data_digest;
+ __be32 max_rcv_dsl;
+ __be16 dst_port;
+ __be32 dst_addr;
+ __be16 src_port;
+ __be32 src_addr;
+ __be32 ping_tmo;
+};
+
+struct fw_iscsi_node_stats {
+ __be16 sess_count;
+ __be16 chap_fail_count;
+ __be16 login_count;
+ __be16 r1;
+};
+
+struct fw_iscsi_sess_stats {
+ __be32 rxbytes;
+ __be32 txbytes;
+ __be32 scmd_count;
+ __be32 read_cmds;
+ __be32 write_cmds;
+ __be32 read_bytes;
+ __be32 write_bytes;
+ __be32 scsi_err_count;
+ __be32 scsi_rst_count;
+ __be32 iscsi_tmf_count;
+ __be32 conn_count;
+};
+
+struct fw_iscsi_conn_stats {
+ __be32 txbytes;
+ __be32 rxbytes;
+ __be32 dataout;
+ __be32 datain;
+};
+
struct fw_iscsi_node_wr {
__u8 opcode;
__u8 subop;
- __u8 node_attr_to_compl;
- __u8 len16;
- __u8 status;
- __u8 r2;
__be16 immd_len;
+ __be32 flowid_len16;
__be64 cookie;
+ __u8 node_attr_to_compl;
+ __u8 status;
+ __be16 r1;
__be32 node_id;
__be32 ctrl_handle;
__be32 io_handle;
- __be32 r3;
};
+#define S_FW_ISCSI_NODE_WR_FLOWID 8
+#define M_FW_ISCSI_NODE_WR_FLOWID 0xfffff
+#define V_FW_ISCSI_NODE_WR_FLOWID(x) ((x) << S_FW_ISCSI_NODE_WR_FLOWID)
+#define G_FW_ISCSI_NODE_WR_FLOWID(x) \
+ (((x) >> S_FW_ISCSI_NODE_WR_FLOWID) & M_FW_ISCSI_NODE_WR_FLOWID)
+
+#define S_FW_ISCSI_NODE_WR_LEN16 0
+#define M_FW_ISCSI_NODE_WR_LEN16 0xff
+#define V_FW_ISCSI_NODE_WR_LEN16(x) ((x) << S_FW_ISCSI_NODE_WR_LEN16)
+#define G_FW_ISCSI_NODE_WR_LEN16(x) \
+ (((x) >> S_FW_ISCSI_NODE_WR_LEN16) & M_FW_ISCSI_NODE_WR_LEN16)
+
#define S_FW_ISCSI_NODE_WR_NODE_ATTR 7
#define M_FW_ISCSI_NODE_WR_NODE_ATTR 0x1
#define V_FW_ISCSI_NODE_WR_NODE_ATTR(x) ((x) << S_FW_ISCSI_NODE_WR_NODE_ATTR)
@@ -1527,7 +1649,109 @@ struct fw_iscsi_node_wr {
(((x) >> S_FW_ISCSI_NODE_WR_COMPL) & M_FW_ISCSI_NODE_WR_COMPL)
#define F_FW_ISCSI_NODE_WR_COMPL V_FW_ISCSI_NODE_WR_COMPL(1U)
-#endif
+#define FW_ISCSI_NODE_INVALID_ID 0xffffffff
+
+struct fw_scsi_iscsi_data {
+ __u8 r0;
+ __u8 fbit_to_tattr;
+ __be16 r2;
+ __be32 r3;
+ __u8 lun[8];
+ __be32 r4;
+ __be32 dlen;
+ __be32 r5;
+ __be32 r6;
+ __u8 cdb[16];
+};
+
+#define S_FW_SCSI_ISCSI_DATA_FBIT 7
+#define M_FW_SCSI_ISCSI_DATA_FBIT 0x1
+#define V_FW_SCSI_ISCSI_DATA_FBIT(x) ((x) << S_FW_SCSI_ISCSI_DATA_FBIT)
+#define G_FW_SCSI_ISCSI_DATA_FBIT(x) \
+ (((x) >> S_FW_SCSI_ISCSI_DATA_FBIT) & M_FW_SCSI_ISCSI_DATA_FBIT)
+#define F_FW_SCSI_ISCSI_DATA_FBIT V_FW_SCSI_ISCSI_DATA_FBIT(1U)
+
+#define S_FW_SCSI_ISCSI_DATA_RBIT 6
+#define M_FW_SCSI_ISCSI_DATA_RBIT 0x1
+#define V_FW_SCSI_ISCSI_DATA_RBIT(x) ((x) << S_FW_SCSI_ISCSI_DATA_RBIT)
+#define G_FW_SCSI_ISCSI_DATA_RBIT(x) \
+ (((x) >> S_FW_SCSI_ISCSI_DATA_RBIT) & M_FW_SCSI_ISCSI_DATA_RBIT)
+#define F_FW_SCSI_ISCSI_DATA_RBIT V_FW_SCSI_ISCSI_DATA_RBIT(1U)
+
+#define S_FW_SCSI_ISCSI_DATA_WBIT 5
+#define M_FW_SCSI_ISCSI_DATA_WBIT 0x1
+#define V_FW_SCSI_ISCSI_DATA_WBIT(x) ((x) << S_FW_SCSI_ISCSI_DATA_WBIT)
+#define G_FW_SCSI_ISCSI_DATA_WBIT(x) \
+ (((x) >> S_FW_SCSI_ISCSI_DATA_WBIT) & M_FW_SCSI_ISCSI_DATA_WBIT)
+#define F_FW_SCSI_ISCSI_DATA_WBIT V_FW_SCSI_ISCSI_DATA_WBIT(1U)
+
+#define S_FW_SCSI_ISCSI_DATA_TATTR 0
+#define M_FW_SCSI_ISCSI_DATA_TATTR 0x7
+#define V_FW_SCSI_ISCSI_DATA_TATTR(x) ((x) << S_FW_SCSI_ISCSI_DATA_TATTR)
+#define G_FW_SCSI_ISCSI_DATA_TATTR(x) \
+ (((x) >> S_FW_SCSI_ISCSI_DATA_TATTR) & M_FW_SCSI_ISCSI_DATA_TATTR)
+
+#define FW_SCSI_ISCSI_DATA_TATTR_UNTAGGED 0
+#define FW_SCSI_ISCSI_DATA_TATTR_SIMPLE 1
+#define FW_SCSI_ISCSI_DATA_TATTR_ORDERED 2
+#define FW_SCSI_ISCSI_DATA_TATTR_HEADOQ 3
+#define FW_SCSI_ISCSI_DATA_TATTR_ACA 4
+
+#define FW_SCSI_ISCSI_TMF_OP 0x02
+#define FW_SCSI_ISCSI_ABORT_FUNC 0x01
+#define FW_SCSI_ISCSI_LUN_RESET_FUNC 0x05
+#define FW_SCSI_ISCSI_RESERVED_TAG 0xffffffff
+
+struct fw_scsi_iscsi_rsp {
+ __u8 r0;
+ __u8 sbit_to_uflow;
+ __u8 response;
+ __u8 status;
+ __be32 r4;
+ __u8 r5[32];
+ __be32 bidir_res_cnt;
+ __be32 res_cnt;
+ __u8 sense_data[128];
+};
+
+#define S_FW_SCSI_ISCSI_RSP_SBIT 7
+#define M_FW_SCSI_ISCSI_RSP_SBIT 0x1
+#define V_FW_SCSI_ISCSI_RSP_SBIT(x) ((x) << S_FW_SCSI_ISCSI_RSP_SBIT)
+#define G_FW_SCSI_ISCSI_RSP_SBIT(x) \
+ (((x) >> S_FW_SCSI_ISCSI_RSP_SBIT) & M_FW_SCSI_ISCSI_RSP_SBIT)
+#define F_FW_SCSI_ISCSI_RSP_SBIT V_FW_SCSI_ISCSI_RSP_SBIT(1U)
+
+#define S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW 4
+#define M_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW 0x1
+#define V_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(x) \
+ ((x) << S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW)
+#define G_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(x) \
+ (((x) >> S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW) & \
+ M_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW)
+#define F_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW V_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(1U)
+
+#define S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW 3
+#define M_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW 0x1
+#define V_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(x) \
+ ((x) << S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW)
+#define G_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(x) \
+ (((x) >> S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW) & \
+ M_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW)
+#define F_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW V_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(1U)
+
+#define S_FW_SCSI_ISCSI_RSP_OFLOW 2
+#define M_FW_SCSI_ISCSI_RSP_OFLOW 0x1
+#define V_FW_SCSI_ISCSI_RSP_OFLOW(x) ((x) << S_FW_SCSI_ISCSI_RSP_OFLOW)
+#define G_FW_SCSI_ISCSI_RSP_OFLOW(x) \
+ (((x) >> S_FW_SCSI_ISCSI_RSP_OFLOW) & M_FW_SCSI_ISCSI_RSP_OFLOW)
+#define F_FW_SCSI_ISCSI_RSP_OFLOW V_FW_SCSI_ISCSI_RSP_OFLOW(1U)
+
+#define S_FW_SCSI_ISCSI_RSP_UFLOW 1
+#define M_FW_SCSI_ISCSI_RSP_UFLOW 0x1
+#define V_FW_SCSI_ISCSI_RSP_UFLOW(x) ((x) << S_FW_SCSI_ISCSI_RSP_UFLOW)
+#define G_FW_SCSI_ISCSI_RSP_UFLOW(x) \
+ (((x) >> S_FW_SCSI_ISCSI_RSP_UFLOW) & M_FW_SCSI_ISCSI_RSP_UFLOW)
+#define F_FW_SCSI_ISCSI_RSP_UFLOW V_FW_SCSI_ISCSI_RSP_UFLOW(1U)
/******************************************************************************
* C O M M A N D s
@@ -1543,6 +1767,16 @@ struct fw_iscsi_node_wr {
*/
#define FW_CMD_MAX_TIMEOUT 10000
+/*
+ * If a host driver does a HELLO and discovers that there's already a MASTER
+ * selected, we may have to wait for that MASTER to finish issuing RESET,
+ * configuration and INITIALIZE commands. Also, there's a possibility that
+ * our own HELLO may get lost if it happens right as the MASTER is issuign a
+ * RESET command, so we need to be willing to make a few retries of our HELLO.
+ */
+#define FW_CMD_HELLO_TIMEOUT (3 * FW_CMD_MAX_TIMEOUT)
+#define FW_CMD_HELLO_RETRIES 3
+
enum fw_cmd_opcodes {
FW_LDST_CMD = 0x01,
FW_RESET_CMD = 0x03,
@@ -1575,10 +1809,11 @@ enum fw_cmd_opcodes {
FW_SCHED_CMD = 0x24,
FW_DEVLOG_CMD = 0x25,
FW_NETIF_CMD = 0x26,
+ FW_WATCHDOG_CMD = 0x27,
+ FW_CLIP_CMD = 0x28,
FW_LASTC2E_CMD = 0x40,
FW_ERROR_CMD = 0x80,
FW_DEBUG_CMD = 0x81,
-
};
enum fw_cmd_cap {
@@ -1696,7 +1931,7 @@ struct fw_ldst_cmd {
} addrval;
struct fw_ldst_idctxt {
__be32 physid;
- __be32 msg_pkd;
+ __be32 msg_ctxtflush;
__be32 ctxt_data7;
__be32 ctxt_data6;
__be32 ctxt_data5;
@@ -1769,6 +2004,13 @@ struct fw_ldst_cmd {
(((x) >> S_FW_LDST_CMD_MSG) & M_FW_LDST_CMD_MSG)
#define F_FW_LDST_CMD_MSG V_FW_LDST_CMD_MSG(1U)
+#define S_FW_LDST_CMD_CTXTFLUSH 30
+#define M_FW_LDST_CMD_CTXTFLUSH 0x1
+#define V_FW_LDST_CMD_CTXTFLUSH(x) ((x) << S_FW_LDST_CMD_CTXTFLUSH)
+#define G_FW_LDST_CMD_CTXTFLUSH(x) \
+ (((x) >> S_FW_LDST_CMD_CTXTFLUSH) & M_FW_LDST_CMD_CTXTFLUSH)
+#define F_FW_LDST_CMD_CTXTFLUSH V_FW_LDST_CMD_CTXTFLUSH(1U)
+
#define S_FW_LDST_CMD_PADDR 8
#define M_FW_LDST_CMD_PADDR 0x1f
#define V_FW_LDST_CMD_PADDR(x) ((x) << S_FW_LDST_CMD_PADDR)
@@ -1852,13 +2094,27 @@ struct fw_reset_cmd {
__be32 op_to_write;
__be32 retval_len16;
__be32 val;
- __be32 r3;
+ __be32 halt_pkd;
+};
+
+#define S_FW_RESET_CMD_HALT 31
+#define M_FW_RESET_CMD_HALT 0x1
+#define V_FW_RESET_CMD_HALT(x) ((x) << S_FW_RESET_CMD_HALT)
+#define G_FW_RESET_CMD_HALT(x) \
+ (((x) >> S_FW_RESET_CMD_HALT) & M_FW_RESET_CMD_HALT)
+#define F_FW_RESET_CMD_HALT V_FW_RESET_CMD_HALT(1U)
+
+enum {
+ FW_HELLO_CMD_STAGE_OS = 0,
+ FW_HELLO_CMD_STAGE_PREOS0 = 1,
+ FW_HELLO_CMD_STAGE_PREOS1 = 2,
+ FW_HELLO_CMD_STAGE_POSTOS = 3,
};
struct fw_hello_cmd {
__be32 op_to_write;
__be32 retval_len16;
- __be32 err_to_mbasyncnot;
+ __be32 err_to_clearinit;
__be32 fwrev;
};
@@ -1909,6 +2165,19 @@ struct fw_hello_cmd {
#define G_FW_HELLO_CMD_MBASYNCNOT(x) \
(((x) >> S_FW_HELLO_CMD_MBASYNCNOT) & M_FW_HELLO_CMD_MBASYNCNOT)
+#define S_FW_HELLO_CMD_STAGE 17
+#define M_FW_HELLO_CMD_STAGE 0x7
+#define V_FW_HELLO_CMD_STAGE(x) ((x) << S_FW_HELLO_CMD_STAGE)
+#define G_FW_HELLO_CMD_STAGE(x) \
+ (((x) >> S_FW_HELLO_CMD_STAGE) & M_FW_HELLO_CMD_STAGE)
+
+#define S_FW_HELLO_CMD_CLEARINIT 16
+#define M_FW_HELLO_CMD_CLEARINIT 0x1
+#define V_FW_HELLO_CMD_CLEARINIT(x) ((x) << S_FW_HELLO_CMD_CLEARINIT)
+#define G_FW_HELLO_CMD_CLEARINIT(x) \
+ (((x) >> S_FW_HELLO_CMD_CLEARINIT) & M_FW_HELLO_CMD_CLEARINIT)
+#define F_FW_HELLO_CMD_CLEARINIT V_FW_HELLO_CMD_CLEARINIT(1U)
+
struct fw_bye_cmd {
__be32 op_to_write;
__be32 retval_len16;
@@ -1989,6 +2258,8 @@ enum fw_caps_config_nic {
FW_CAPS_CONFIG_NIC = 0x00000001,
FW_CAPS_CONFIG_NIC_VM = 0x00000002,
FW_CAPS_CONFIG_NIC_IDS = 0x00000004,
+ FW_CAPS_CONFIG_NIC_UM = 0x00000008,
+ FW_CAPS_CONFIG_NIC_UM_ISGL = 0x00000010,
};
enum fw_caps_config_toe {
@@ -2015,9 +2286,16 @@ enum fw_caps_config_fcoe {
FW_CAPS_CONFIG_FCOE_CTRL_OFLD = 0x00000004,
};
+enum fw_memtype_cf {
+ FW_MEMTYPE_CF_EDC0 = 0x0,
+ FW_MEMTYPE_CF_EDC1 = 0x1,
+ FW_MEMTYPE_CF_EXTMEM = 0x2,
+ FW_MEMTYPE_CF_FLASH = 0x4,
+};
+
struct fw_caps_config_cmd {
__be32 op_to_write;
- __be32 retval_len16;
+ __be32 cfvalid_to_len16;
__be32 r2;
__be32 hwmbitmap;
__be16 nbmcaps;
@@ -2030,9 +2308,33 @@ struct fw_caps_config_cmd {
__be16 r4;
__be16 iscsicaps;
__be16 fcoecaps;
- __be32 r5;
- __be64 r6;
-};
+ __be32 cfcsum;
+ __be32 finiver;
+ __be32 finicsum;
+};
+
+#define S_FW_CAPS_CONFIG_CMD_CFVALID 27
+#define M_FW_CAPS_CONFIG_CMD_CFVALID 0x1
+#define V_FW_CAPS_CONFIG_CMD_CFVALID(x) ((x) << S_FW_CAPS_CONFIG_CMD_CFVALID)
+#define G_FW_CAPS_CONFIG_CMD_CFVALID(x) \
+ (((x) >> S_FW_CAPS_CONFIG_CMD_CFVALID) & M_FW_CAPS_CONFIG_CMD_CFVALID)
+#define F_FW_CAPS_CONFIG_CMD_CFVALID V_FW_CAPS_CONFIG_CMD_CFVALID(1U)
+
+#define S_FW_CAPS_CONFIG_CMD_MEMTYPE_CF 24
+#define M_FW_CAPS_CONFIG_CMD_MEMTYPE_CF 0x7
+#define V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(x) \
+ ((x) << S_FW_CAPS_CONFIG_CMD_MEMTYPE_CF)
+#define G_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(x) \
+ (((x) >> S_FW_CAPS_CONFIG_CMD_MEMTYPE_CF) & \
+ M_FW_CAPS_CONFIG_CMD_MEMTYPE_CF)
+
+#define S_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF 16
+#define M_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF 0xff
+#define V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(x) \
+ ((x) << S_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF)
+#define G_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(x) \
+ (((x) >> S_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF) & \
+ M_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF)
/*
* params command mnemonics
@@ -2056,15 +2358,17 @@ enum fw_params_param_dev {
* Lookup Engine
*/
FW_PARAMS_PARAM_DEV_FLOWC_BUFFIFO_SZ = 0x03,
- FW_PARAMS_PARAM_DEV_INTVER_NIC = 0x04,
- FW_PARAMS_PARAM_DEV_INTVER_VNIC = 0x05,
- FW_PARAMS_PARAM_DEV_INTVER_OFLD = 0x06,
- FW_PARAMS_PARAM_DEV_INTVER_RI = 0x07,
- FW_PARAMS_PARAM_DEV_INTVER_ISCSIPDU = 0x08,
- FW_PARAMS_PARAM_DEV_INTVER_ISCSI = 0x09,
- FW_PARAMS_PARAM_DEV_INTVER_FCOE = 0x0A,
+ FW_PARAMS_PARAM_DEV_INTFVER_NIC = 0x04,
+ FW_PARAMS_PARAM_DEV_INTFVER_VNIC = 0x05,
+ FW_PARAMS_PARAM_DEV_INTFVER_OFLD = 0x06,
+ FW_PARAMS_PARAM_DEV_INTFVER_RI = 0x07,
+ FW_PARAMS_PARAM_DEV_INTFVER_ISCSIPDU = 0x08,
+ FW_PARAMS_PARAM_DEV_INTFVER_ISCSI = 0x09,
+ FW_PARAMS_PARAM_DEV_INTFVER_FCOE = 0x0A,
FW_PARAMS_PARAM_DEV_FWREV = 0x0B,
FW_PARAMS_PARAM_DEV_TPREV = 0x0C,
+ FW_PARAMS_PARAM_DEV_CF = 0x0D,
+ FW_PARAMS_PARAM_DEV_BYPASS = 0x0E,
};
/*
@@ -2119,6 +2423,23 @@ enum fw_params_param_dmaq {
FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
};
+/*
+ * dev bypass parameters; actions and modes
+ */
+enum fw_params_param_dev_bypass {
+
+ /* actions
+ */
+ FW_PARAMS_PARAM_DEV_BYPASS_PFAIL = 0x00,
+ FW_PARAMS_PARAM_DEV_BYPASS_CURRENT = 0x01,
+
+ /* modes
+ */
+ FW_PARAMS_PARAM_DEV_BYPASS_NORMAL = 0x00,
+ FW_PARAMS_PARAM_DEV_BYPASS_DROP = 0x1,
+ FW_PARAMS_PARAM_DEV_BYPASS_BYPASS = 0x2,
+};
+
#define S_FW_PARAMS_MNEM 24
#define M_FW_PARAMS_MNEM 0xff
#define V_FW_PARAMS_MNEM(x) ((x) << S_FW_PARAMS_MNEM)
@@ -2271,6 +2592,7 @@ struct fw_pfvf_cmd {
#define V_FW_PFVF_CMD_NETHCTRL(x) ((x) << S_FW_PFVF_CMD_NETHCTRL)
#define G_FW_PFVF_CMD_NETHCTRL(x) \
(((x) >> S_FW_PFVF_CMD_NETHCTRL) & M_FW_PFVF_CMD_NETHCTRL)
+
/*
* ingress queue type; the first 1K ingress queues can have associated 0,
* 1 or 2 free lists and an interrupt, all other ingress queues lack these
@@ -3518,6 +3840,7 @@ struct fw_eq_ofld_cmd {
#define V_FW_EQ_OFLD_CMD_EQSIZE(x) ((x) << S_FW_EQ_OFLD_CMD_EQSIZE)
#define G_FW_EQ_OFLD_CMD_EQSIZE(x) \
(((x) >> S_FW_EQ_OFLD_CMD_EQSIZE) & M_FW_EQ_OFLD_CMD_EQSIZE)
+
/* Macros for VIID parsing:
VIID - [10:8] PFN, [7] VI Valid, [6:0] VI number */
#define S_FW_VIID_PFN 8
@@ -4081,8 +4404,10 @@ enum fw_port_action {
FW_PORT_ACTION_L2_WOL_MODE_EN = 0x0012,
FW_PORT_ACTION_LPBK_TO_NORMAL = 0x0020,
FW_PORT_ACTION_L1_SS_LPBK_ASIC = 0x0021,
+ FW_PORT_ACTION_MAC_LPBK = 0x0022,
FW_PORT_ACTION_L1_WS_LPBK_ASIC = 0x0023,
FW_PORT_ACTION_L1_EXT_LPBK = 0x0026,
+ FW_PORT_ACTION_PCS_LPBK = 0x0028,
FW_PORT_ACTION_PHY_RESET = 0x0040,
FW_PORT_ACTION_PMA_RESET = 0x0041,
FW_PORT_ACTION_PCS_RESET = 0x0042,
@@ -4164,7 +4489,8 @@ struct fw_port_cmd {
struct fw_port_dcb_pgrate {
__u8 type;
__u8 apply_pkd;
- __u8 r10_lo[6];
+ __u8 r10_lo[5];
+ __u8 num_tcs_supported;
__u8 pgrate[8];
} pgrate;
struct fw_port_dcb_priorate {
@@ -4181,11 +4507,12 @@ struct fw_port_cmd {
} pfc;
struct fw_port_app_priority {
__u8 type;
- __u8 r10_lo[3];
- __u8 prio;
- __u8 sel;
+ __u8 r10[2];
+ __u8 idx;
+ __u8 user_prio_map;
+ __u8 sel_field;
__be16 protocolid;
- __u8 r12[8];
+ __be64 r12;
} app_priority;
} dcb;
} u;
@@ -4337,20 +4664,6 @@ struct fw_port_cmd {
(((x) >> S_FW_PORT_CMD_APPLY) & M_FW_PORT_CMD_APPLY)
#define F_FW_PORT_CMD_APPLY V_FW_PORT_CMD_APPLY(1U)
-#define S_FW_PORT_CMD_APPLY 7
-#define M_FW_PORT_CMD_APPLY 0x1
-#define V_FW_PORT_CMD_APPLY(x) ((x) << S_FW_PORT_CMD_APPLY)
-#define G_FW_PORT_CMD_APPLY(x) \
- (((x) >> S_FW_PORT_CMD_APPLY) & M_FW_PORT_CMD_APPLY)
-#define F_FW_PORT_CMD_APPLY V_FW_PORT_CMD_APPLY(1U)
-
-#define S_FW_PORT_CMD_APPLY 7
-#define M_FW_PORT_CMD_APPLY 0x1
-#define V_FW_PORT_CMD_APPLY(x) ((x) << S_FW_PORT_CMD_APPLY)
-#define G_FW_PORT_CMD_APPLY(x) \
- (((x) >> S_FW_PORT_CMD_APPLY) & M_FW_PORT_CMD_APPLY)
-#define F_FW_PORT_CMD_APPLY V_FW_PORT_CMD_APPLY(1U)
-
/*
* These are configured into the VPD and hence tools that generate
* VPD may use this enumeration.
@@ -4383,6 +4696,7 @@ enum fw_port_module_type {
FW_PORT_MOD_TYPE_TWINAX_PASSIVE = 0x4,
FW_PORT_MOD_TYPE_TWINAX_ACTIVE = 0x5,
FW_PORT_MOD_TYPE_LRM = 0x6,
+ FW_PORT_MOD_TYPE_ERROR = M_FW_PORT_CMD_MODTYPE - 3,
FW_PORT_MOD_TYPE_UNKNOWN = M_FW_PORT_CMD_MODTYPE - 2,
FW_PORT_MOD_TYPE_NOTSUPPORTED = M_FW_PORT_CMD_MODTYPE - 1,
FW_PORT_MOD_TYPE_NONE = M_FW_PORT_CMD_MODTYPE
@@ -5189,15 +5503,12 @@ struct fw_rss_vi_config_cmd {
#define F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN \
V_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN(1U)
-#define S_FW_RSS_VI_CONFIG_CMD_UDPEN 0
-#define M_FW_RSS_VI_CONFIG_CMD_UDPEN 0x1
-#define V_FW_RSS_VI_CONFIG_CMD_UDPEN(x) \
- ((x) << S_FW_RSS_VI_CONFIG_CMD_UDPEN)
-#define G_FW_RSS_VI_CONFIG_CMD_UDPEN(x) \
- (((x) >> S_FW_RSS_VI_CONFIG_CMD_UDPEN) & \
- M_FW_RSS_VI_CONFIG_CMD_UDPEN)
-#define F_FW_RSS_VI_CONFIG_CMD_UDPEN \
- V_FW_RSS_VI_CONFIG_CMD_UDPEN(1U)
+#define S_FW_RSS_VI_CONFIG_CMD_UDPEN 0
+#define M_FW_RSS_VI_CONFIG_CMD_UDPEN 0x1
+#define V_FW_RSS_VI_CONFIG_CMD_UDPEN(x) ((x) << S_FW_RSS_VI_CONFIG_CMD_UDPEN)
+#define G_FW_RSS_VI_CONFIG_CMD_UDPEN(x) \
+ (((x) >> S_FW_RSS_VI_CONFIG_CMD_UDPEN) & M_FW_RSS_VI_CONFIG_CMD_UDPEN)
+#define F_FW_RSS_VI_CONFIG_CMD_UDPEN V_FW_RSS_VI_CONFIG_CMD_UDPEN(1U)
enum fw_sched_sc {
FW_SCHED_SC_CONFIG = 0,
@@ -5352,103 +5663,97 @@ struct fw_devlog_cmd {
M_FW_DEVLOG_CMD_MEMADDR16_DEVLOG)
struct fw_netif_cmd {
- __be32 op_portid;
- __be32 retval_to_len16;
- __be32 add_to_ipv4gw;
- __be32 vlanid_mtuval;
+ __be32 op_to_ipv4gw;
+ __be32 retval_len16;
+ __be32 netifi_ifadridx;
+ __be32 portid_to_mtuval;
__be32 gwaddr;
__be32 addr;
__be32 nmask;
__be32 bcaddr;
};
-#define S_FW_NETIF_CMD_PORTID 0
-#define M_FW_NETIF_CMD_PORTID 0xf
-#define V_FW_NETIF_CMD_PORTID(x) ((x) << S_FW_NETIF_CMD_PORTID)
-#define G_FW_NETIF_CMD_PORTID(x) \
- (((x) >> S_FW_NETIF_CMD_PORTID) & M_FW_NETIF_CMD_PORTID)
-
-#define S_FW_NETIF_CMD_RETVAL 24
-#define M_FW_NETIF_CMD_RETVAL 0xff
-#define V_FW_NETIF_CMD_RETVAL(x) ((x) << S_FW_NETIF_CMD_RETVAL)
-#define G_FW_NETIF_CMD_RETVAL(x) \
- (((x) >> S_FW_NETIF_CMD_RETVAL) & M_FW_NETIF_CMD_RETVAL)
-
-#define S_FW_NETIF_CMD_IFIDX 16
-#define M_FW_NETIF_CMD_IFIDX 0xff
-#define V_FW_NETIF_CMD_IFIDX(x) ((x) << S_FW_NETIF_CMD_IFIDX)
-#define G_FW_NETIF_CMD_IFIDX(x) \
- (((x) >> S_FW_NETIF_CMD_IFIDX) & M_FW_NETIF_CMD_IFIDX)
-
-#define S_FW_NETIF_CMD_LEN16 0
-#define M_FW_NETIF_CMD_LEN16 0xff
-#define V_FW_NETIF_CMD_LEN16(x) ((x) << S_FW_NETIF_CMD_LEN16)
-#define G_FW_NETIF_CMD_LEN16(x) \
- (((x) >> S_FW_NETIF_CMD_LEN16) & M_FW_NETIF_CMD_LEN16)
-
-#define S_FW_NETIF_CMD_ADD 31
+#define S_FW_NETIF_CMD_ADD 20
#define M_FW_NETIF_CMD_ADD 0x1
#define V_FW_NETIF_CMD_ADD(x) ((x) << S_FW_NETIF_CMD_ADD)
#define G_FW_NETIF_CMD_ADD(x) \
(((x) >> S_FW_NETIF_CMD_ADD) & M_FW_NETIF_CMD_ADD)
#define F_FW_NETIF_CMD_ADD V_FW_NETIF_CMD_ADD(1U)
-#define S_FW_NETIF_CMD_LINK 30
+#define S_FW_NETIF_CMD_LINK 19
#define M_FW_NETIF_CMD_LINK 0x1
#define V_FW_NETIF_CMD_LINK(x) ((x) << S_FW_NETIF_CMD_LINK)
#define G_FW_NETIF_CMD_LINK(x) \
(((x) >> S_FW_NETIF_CMD_LINK) & M_FW_NETIF_CMD_LINK)
#define F_FW_NETIF_CMD_LINK V_FW_NETIF_CMD_LINK(1U)
-#define S_FW_NETIF_CMD_VLAN 29
+#define S_FW_NETIF_CMD_VLAN 18
#define M_FW_NETIF_CMD_VLAN 0x1
#define V_FW_NETIF_CMD_VLAN(x) ((x) << S_FW_NETIF_CMD_VLAN)
#define G_FW_NETIF_CMD_VLAN(x) \
(((x) >> S_FW_NETIF_CMD_VLAN) & M_FW_NETIF_CMD_VLAN)
#define F_FW_NETIF_CMD_VLAN V_FW_NETIF_CMD_VLAN(1U)
-#define S_FW_NETIF_CMD_MTU 28
+#define S_FW_NETIF_CMD_MTU 17
#define M_FW_NETIF_CMD_MTU 0x1
#define V_FW_NETIF_CMD_MTU(x) ((x) << S_FW_NETIF_CMD_MTU)
#define G_FW_NETIF_CMD_MTU(x) \
(((x) >> S_FW_NETIF_CMD_MTU) & M_FW_NETIF_CMD_MTU)
#define F_FW_NETIF_CMD_MTU V_FW_NETIF_CMD_MTU(1U)
-#define S_FW_NETIF_CMD_DHCP 27
+#define S_FW_NETIF_CMD_DHCP 16
#define M_FW_NETIF_CMD_DHCP 0x1
#define V_FW_NETIF_CMD_DHCP(x) ((x) << S_FW_NETIF_CMD_DHCP)
#define G_FW_NETIF_CMD_DHCP(x) \
(((x) >> S_FW_NETIF_CMD_DHCP) & M_FW_NETIF_CMD_DHCP)
#define F_FW_NETIF_CMD_DHCP V_FW_NETIF_CMD_DHCP(1U)
-#define S_FW_NETIF_CMD_IPV4BCADDR 3
+#define S_FW_NETIF_CMD_IPV4BCADDR 15
#define M_FW_NETIF_CMD_IPV4BCADDR 0x1
#define V_FW_NETIF_CMD_IPV4BCADDR(x) ((x) << S_FW_NETIF_CMD_IPV4BCADDR)
#define G_FW_NETIF_CMD_IPV4BCADDR(x) \
(((x) >> S_FW_NETIF_CMD_IPV4BCADDR) & M_FW_NETIF_CMD_IPV4BCADDR)
#define F_FW_NETIF_CMD_IPV4BCADDR V_FW_NETIF_CMD_IPV4BCADDR(1U)
-#define S_FW_NETIF_CMD_IPV4NMASK 2
+#define S_FW_NETIF_CMD_IPV4NMASK 14
#define M_FW_NETIF_CMD_IPV4NMASK 0x1
#define V_FW_NETIF_CMD_IPV4NMASK(x) ((x) << S_FW_NETIF_CMD_IPV4NMASK)
#define G_FW_NETIF_CMD_IPV4NMASK(x) \
(((x) >> S_FW_NETIF_CMD_IPV4NMASK) & M_FW_NETIF_CMD_IPV4NMASK)
#define F_FW_NETIF_CMD_IPV4NMASK V_FW_NETIF_CMD_IPV4NMASK(1U)
-#define S_FW_NETIF_CMD_IPV4ADDR 1
+#define S_FW_NETIF_CMD_IPV4ADDR 13
#define M_FW_NETIF_CMD_IPV4ADDR 0x1
#define V_FW_NETIF_CMD_IPV4ADDR(x) ((x) << S_FW_NETIF_CMD_IPV4ADDR)
#define G_FW_NETIF_CMD_IPV4ADDR(x) \
(((x) >> S_FW_NETIF_CMD_IPV4ADDR) & M_FW_NETIF_CMD_IPV4ADDR)
#define F_FW_NETIF_CMD_IPV4ADDR V_FW_NETIF_CMD_IPV4ADDR(1U)
-#define S_FW_NETIF_CMD_IPV4GW 0
+#define S_FW_NETIF_CMD_IPV4GW 12
#define M_FW_NETIF_CMD_IPV4GW 0x1
#define V_FW_NETIF_CMD_IPV4GW(x) ((x) << S_FW_NETIF_CMD_IPV4GW)
#define G_FW_NETIF_CMD_IPV4GW(x) \
(((x) >> S_FW_NETIF_CMD_IPV4GW) & M_FW_NETIF_CMD_IPV4GW)
#define F_FW_NETIF_CMD_IPV4GW V_FW_NETIF_CMD_IPV4GW(1U)
+#define S_FW_NETIF_CMD_NETIFI 8
+#define M_FW_NETIF_CMD_NETIFI 0xffffff
+#define V_FW_NETIF_CMD_NETIFI(x) ((x) << S_FW_NETIF_CMD_NETIFI)
+#define G_FW_NETIF_CMD_NETIFI(x) \
+ (((x) >> S_FW_NETIF_CMD_NETIFI) & M_FW_NETIF_CMD_NETIFI)
+
+#define S_FW_NETIF_CMD_IFADRIDX 0
+#define M_FW_NETIF_CMD_IFADRIDX 0xff
+#define V_FW_NETIF_CMD_IFADRIDX(x) ((x) << S_FW_NETIF_CMD_IFADRIDX)
+#define G_FW_NETIF_CMD_IFADRIDX(x) \
+ (((x) >> S_FW_NETIF_CMD_IFADRIDX) & M_FW_NETIF_CMD_IFADRIDX)
+
+#define S_FW_NETIF_CMD_PORTID 28
+#define M_FW_NETIF_CMD_PORTID 0xf
+#define V_FW_NETIF_CMD_PORTID(x) ((x) << S_FW_NETIF_CMD_PORTID)
+#define G_FW_NETIF_CMD_PORTID(x) \
+ (((x) >> S_FW_NETIF_CMD_PORTID) & M_FW_NETIF_CMD_PORTID)
+
#define S_FW_NETIF_CMD_VLANID 16
#define M_FW_NETIF_CMD_VLANID 0xfff
#define V_FW_NETIF_CMD_VLANID(x) ((x) << S_FW_NETIF_CMD_VLANID)
@@ -5461,6 +5766,42 @@ struct fw_netif_cmd {
#define G_FW_NETIF_CMD_MTUVAL(x) \
(((x) >> S_FW_NETIF_CMD_MTUVAL) & M_FW_NETIF_CMD_MTUVAL)
+enum fw_watchdog_actions {
+ FW_WATCHDOG_ACTION_FLR = 0x1,
+ FW_WATCHDOG_ACTION_BYPASS = 0x2,
+};
+
+#define FW_WATCHDOG_MAX_TIMEOUT_SECS 60
+
+struct fw_watchdog_cmd {
+ __be32 op_to_write;
+ __be32 retval_len16;
+ __be32 timeout;
+ __be32 actions;
+};
+
+struct fw_clip_cmd {
+ __be32 op_to_write;
+ __be32 alloc_to_len16;
+ __be64 ip_hi;
+ __be64 ip_lo;
+ __be32 r4[2];
+};
+
+#define S_FW_CLIP_CMD_ALLOC 31
+#define M_FW_CLIP_CMD_ALLOC 0x1
+#define V_FW_CLIP_CMD_ALLOC(x) ((x) << S_FW_CLIP_CMD_ALLOC)
+#define G_FW_CLIP_CMD_ALLOC(x) \
+ (((x) >> S_FW_CLIP_CMD_ALLOC) & M_FW_CLIP_CMD_ALLOC)
+#define F_FW_CLIP_CMD_ALLOC V_FW_CLIP_CMD_ALLOC(1U)
+
+#define S_FW_CLIP_CMD_FREE 30
+#define M_FW_CLIP_CMD_FREE 0x1
+#define V_FW_CLIP_CMD_FREE(x) ((x) << S_FW_CLIP_CMD_FREE)
+#define G_FW_CLIP_CMD_FREE(x) \
+ (((x) >> S_FW_CLIP_CMD_FREE) & M_FW_CLIP_CMD_FREE)
+#define F_FW_CLIP_CMD_FREE V_FW_CLIP_CMD_FREE(1U)
+
enum fw_error_type {
FW_ERROR_TYPE_EXCEPTION = 0x0,
FW_ERROR_TYPE_HWMODULE = 0x1,
@@ -5570,6 +5911,94 @@ struct fw_debug_cmd {
#define G_FW_DEBUG_CMD_TYPE(x) \
(((x) >> S_FW_DEBUG_CMD_TYPE) & M_FW_DEBUG_CMD_TYPE)
+
+/******************************************************************************
+ * P C I E F W R E G I S T E R
+ **************************************/
+
+/**
+ * Register definitions for the PCIE_FW register which the firmware uses
+ * to retain status across RESETs. This register should be considered
+ * as a READ-ONLY register for Host Software and only to be used to
+ * track firmware initialization/error state, etc.
+ */
+#define S_PCIE_FW_ERR 31
+#define M_PCIE_FW_ERR 0x1
+#define V_PCIE_FW_ERR(x) ((x) << S_PCIE_FW_ERR)
+#define G_PCIE_FW_ERR(x) (((x) >> S_PCIE_FW_ERR) & M_PCIE_FW_ERR)
+#define F_PCIE_FW_ERR V_PCIE_FW_ERR(1U)
+
+#define S_PCIE_FW_INIT 30
+#define M_PCIE_FW_INIT 0x1
+#define V_PCIE_FW_INIT(x) ((x) << S_PCIE_FW_INIT)
+#define G_PCIE_FW_INIT(x) (((x) >> S_PCIE_FW_INIT) & M_PCIE_FW_INIT)
+#define F_PCIE_FW_INIT V_PCIE_FW_INIT(1U)
+
+#define S_PCIE_FW_HALT 29
+#define M_PCIE_FW_HALT 0x1
+#define V_PCIE_FW_HALT(x) ((x) << S_PCIE_FW_HALT)
+#define G_PCIE_FW_HALT(x) (((x) >> S_PCIE_FW_HALT) & M_PCIE_FW_HALT)
+#define F_PCIE_FW_HALT V_PCIE_FW_HALT(1U)
+
+#define S_PCIE_FW_STAGE 21
+#define M_PCIE_FW_STAGE 0x7
+#define V_PCIE_FW_STAGE(x) ((x) << S_PCIE_FW_STAGE)
+#define G_PCIE_FW_STAGE(x) (((x) >> S_PCIE_FW_STAGE) & M_PCIE_FW_STAGE)
+
+#define S_PCIE_FW_ASYNCNOT_VLD 20
+#define M_PCIE_FW_ASYNCNOT_VLD 0x1
+#define V_PCIE_FW_ASYNCNOT_VLD(x) \
+ ((x) << S_PCIE_FW_ASYNCNOT_VLD)
+#define G_PCIE_FW_ASYNCNOT_VLD(x) \
+ (((x) >> S_PCIE_FW_ASYNCNOT_VLD) & M_PCIE_FW_ASYNCNOT_VLD)
+#define F_PCIE_FW_ASYNCNOT_VLD V_PCIE_FW_ASYNCNOT_VLD(1U)
+
+#define S_PCIE_FW_ASYNCNOTINT 19
+#define M_PCIE_FW_ASYNCNOTINT 0x1
+#define V_PCIE_FW_ASYNCNOTINT(x) \
+ ((x) << S_PCIE_FW_ASYNCNOTINT)
+#define G_PCIE_FW_ASYNCNOTINT(x) \
+ (((x) >> S_PCIE_FW_ASYNCNOTINT) & M_PCIE_FW_ASYNCNOTINT)
+#define F_PCIE_FW_ASYNCNOTINT V_PCIE_FW_ASYNCNOTINT(1U)
+
+#define S_PCIE_FW_ASYNCNOT 16
+#define M_PCIE_FW_ASYNCNOT 0x7
+#define V_PCIE_FW_ASYNCNOT(x) ((x) << S_PCIE_FW_ASYNCNOT)
+#define G_PCIE_FW_ASYNCNOT(x) \
+ (((x) >> S_PCIE_FW_ASYNCNOT) & M_PCIE_FW_ASYNCNOT)
+
+#define S_PCIE_FW_MASTER_VLD 15
+#define M_PCIE_FW_MASTER_VLD 0x1
+#define V_PCIE_FW_MASTER_VLD(x) ((x) << S_PCIE_FW_MASTER_VLD)
+#define G_PCIE_FW_MASTER_VLD(x) \
+ (((x) >> S_PCIE_FW_MASTER_VLD) & M_PCIE_FW_MASTER_VLD)
+#define F_PCIE_FW_MASTER_VLD V_PCIE_FW_MASTER_VLD(1U)
+
+#define S_PCIE_FW_MASTER 12
+#define M_PCIE_FW_MASTER 0x7
+#define V_PCIE_FW_MASTER(x) ((x) << S_PCIE_FW_MASTER)
+#define G_PCIE_FW_MASTER(x) (((x) >> S_PCIE_FW_MASTER) & M_PCIE_FW_MASTER)
+
+#define S_PCIE_FW_RESET_VLD 11
+#define M_PCIE_FW_RESET_VLD 0x1
+#define V_PCIE_FW_RESET_VLD(x) ((x) << S_PCIE_FW_RESET_VLD)
+#define G_PCIE_FW_RESET_VLD(x) \
+ (((x) >> S_PCIE_FW_RESET_VLD) & M_PCIE_FW_RESET_VLD)
+#define F_PCIE_FW_RESET_VLD V_PCIE_FW_RESET_VLD(1U)
+
+#define S_PCIE_FW_RESET 8
+#define M_PCIE_FW_RESET 0x7
+#define V_PCIE_FW_RESET(x) ((x) << S_PCIE_FW_RESET)
+#define G_PCIE_FW_RESET(x) \
+ (((x) >> S_PCIE_FW_RESET) & M_PCIE_FW_RESET)
+
+#define S_PCIE_FW_REGISTERED 0
+#define M_PCIE_FW_REGISTERED 0xff
+#define V_PCIE_FW_REGISTERED(x) ((x) << S_PCIE_FW_REGISTERED)
+#define G_PCIE_FW_REGISTERED(x) \
+ (((x) >> S_PCIE_FW_REGISTERED) & M_PCIE_FW_REGISTERED)
+
+
/******************************************************************************
* B I N A R Y H E A D E R F O R M A T
**********************************************/
@@ -5579,7 +6008,7 @@ struct fw_debug_cmd {
*/
struct fw_hdr {
__u8 ver;
- __u8 reserved1;
+ __u8 chip; /* terminator chip family */
__be16 len512; /* bin length in units of 512-bytes */
__be32 fw_ver; /* firmware version */
__be32 tp_microcode_ver; /* tcp processor microcode version */
@@ -5591,7 +6020,16 @@ struct fw_hdr {
__u8 intfver_iscsi;
__u8 intfver_fcoe;
__u8 reserved2;
- __be32 reserved3[27];
+ __u32 reserved3;
+ __u32 reserved4;
+ __u32 reserved5;
+ __be32 flags;
+ __be32 reserved6[23];
+};
+
+enum fw_hdr_chip {
+ FW_HDR_CHIP_T4,
+ FW_HDR_CHIP_T5
};
#define S_FW_HDR_FW_VER_MAJOR 24
@@ -5622,4 +6060,18 @@ struct fw_hdr {
#define G_FW_HDR_FW_VER_BUILD(x) \
(((x) >> S_FW_HDR_FW_VER_BUILD) & M_FW_HDR_FW_VER_BUILD)
+enum fw_hdr_intfver {
+ FW_HDR_INTFVER_NIC = 0x00,
+ FW_HDR_INTFVER_VNIC = 0x00,
+ FW_HDR_INTFVER_OFLD = 0x00,
+ FW_HDR_INTFVER_RI = 0x00,
+ FW_HDR_INTFVER_ISCSIPDU = 0x00,
+ FW_HDR_INTFVER_ISCSI = 0x00,
+ FW_HDR_INTFVER_FCOE = 0x00,
+};
+
+enum fw_hdr_flags {
+ FW_HDR_FLAGS_RESET_HALT = 0x00000001,
+};
+
#endif /* _T4FW_INTERFACE_H_ */
diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h
index f31b840b6b08..f6ada9d09345 100644
--- a/sys/dev/cxgbe/offload.h
+++ b/sys/dev/cxgbe/offload.h
@@ -31,15 +31,18 @@
#ifndef __T4_OFFLOAD_H__
#define __T4_OFFLOAD_H__
-/* CPL message priority levels */
-enum {
- CPL_PRIORITY_DATA = 0, /* data messages */
- CPL_PRIORITY_SETUP = 1, /* connection setup messages */
- CPL_PRIORITY_TEARDOWN = 0, /* connection teardown messages */
- CPL_PRIORITY_LISTEN = 1, /* listen start/stop messages */
- CPL_PRIORITY_ACK = 1, /* RX ACK messages */
- CPL_PRIORITY_CONTROL = 1 /* control messages */
-};
+/* XXX: flagrant misuse of mbuf fields (during tx by TOM) */
+#define MBUF_EQ(m) (*((void **)(&(m)->m_pkthdr.rcvif)))
+/* These have to work for !M_PKTHDR so we use a field from m_hdr. */
+#define MBUF_TX_CREDITS(m) ((m)->m_hdr.pad[0])
+#define MBUF_DMA_MAPPED(m) ((m)->m_hdr.pad[1])
+
+#define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \
+ (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
+ (w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
+ V_FW_WR_FLOWID(tid)); \
+ (w)->wr.wr_lo = cpu_to_be64(0); \
+} while (0)
#define INIT_TP_WR(w, tid) do { \
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \
@@ -49,13 +52,19 @@ enum {
(w)->wr.wr_lo = cpu_to_be64(0); \
} while (0)
+#define INIT_TP_WR_MIT_CPL(w, cpl, tid) do { \
+ INIT_TP_WR(w, tid); \
+ OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \
+} while (0)
+
/*
* Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
*/
#define MAX_ATIDS 8192U
-struct serv_entry {
+union serv_entry {
void *data;
+ union serv_entry *next;
};
union aopen_entry {
@@ -71,8 +80,7 @@ struct tid_info {
void **tid_tab;
unsigned int ntids;
- struct serv_entry *stid_tab;
- unsigned long *stid_bmap;
+ union serv_entry *stid_tab;
unsigned int nstids;
unsigned int stid_base;
@@ -84,10 +92,15 @@ struct tid_info {
unsigned int ftid_base;
unsigned int ftids_in_use;
+ struct mtx atid_lock;
union aopen_entry *afree;
unsigned int atids_in_use;
+ struct mtx stid_lock;
+ union serv_entry *sfree;
unsigned int stids_in_use;
+
+ unsigned int tids_in_use;
};
struct t4_range {
@@ -101,6 +114,40 @@ struct t4_virt_res { /* virtualized HW resources */
struct t4_range stag;
struct t4_range rq;
struct t4_range pbl;
+ struct t4_range qp;
+ struct t4_range cq;
+ struct t4_range ocq;
};
+#ifndef TCP_OFFLOAD_DISABLE
+enum {
+ ULD_TOM = 1,
+};
+
+struct adapter;
+struct port_info;
+struct uld_info {
+ SLIST_ENTRY(uld_info) link;
+ int refcount;
+ int uld_id;
+ int (*attach)(struct adapter *, void **);
+ int (*detach)(void *);
+};
+
+struct uld_softc {
+ struct uld_info *uld;
+ void *softc;
+};
+
+struct tom_tunables {
+ int sndbuf;
+ int ddp;
+ int indsz;
+ int ddp_thres;
+};
+
+int t4_register_uld(struct uld_info *);
+int t4_unregister_uld(struct uld_info *);
+#endif
+
#endif
diff --git a/sys/dev/cxgbe/osdep.h b/sys/dev/cxgbe/osdep.h
index 85a8206ff8af..40ed61b3d911 100644
--- a/sys/dev/cxgbe/osdep.h
+++ b/sys/dev/cxgbe/osdep.h
@@ -124,6 +124,7 @@ typedef boolean_t bool;
#define PCI_EXP_LNKSTA PCIR_EXPRESS_LINK_STA
#define PCI_EXP_LNKSTA_CLS PCIM_LINK_STA_SPEED
#define PCI_EXP_LNKSTA_NLW PCIM_LINK_STA_WIDTH
+#define PCI_EXP_DEVCTL2 0x28
static inline int
ilog2(long x)
diff --git a/sys/dev/cxgbe/t4_ioctl.h b/sys/dev/cxgbe/t4_ioctl.h
index ecc2c3d6a4c6..2a3fa3998ef1 100644
--- a/sys/dev/cxgbe/t4_ioctl.h
+++ b/sys/dev/cxgbe/t4_ioctl.h
@@ -47,6 +47,8 @@ enum {
T4_SET_FILTER, /* program a filter */
T4_DEL_FILTER, /* delete a filter */
T4_GET_SGE_CONTEXT, /* get SGE context for a queue */
+ T4_LOAD_FW, /* flash firmware */
+ T4_GET_MEM, /* read memory */
};
struct t4_reg {
@@ -62,6 +64,11 @@ struct t4_regdump {
uint32_t *data;
};
+struct t4_data {
+ uint32_t len;
+ uint8_t *data;
+};
+
/*
* A hardware filter is some valid combination of these.
*/
@@ -73,8 +80,8 @@ struct t4_regdump {
#define T4_FILTER_IP_DPORT 0x20 /* Destination IP port */
#define T4_FILTER_FCoE 0x40 /* Fibre Channel over Ethernet packet */
#define T4_FILTER_PORT 0x80 /* Physical ingress port */
-#define T4_FILTER_OVLAN 0x100 /* Outer VLAN ID */
-#define T4_FILTER_IVLAN 0x200 /* Inner VLAN ID */
+#define T4_FILTER_VNIC 0x100 /* VNIC id or outer VLAN */
+#define T4_FILTER_VLAN 0x200 /* VLAN ID */
#define T4_FILTER_IP_TOS 0x400 /* IPv4 TOS/IPv6 Traffic Class */
#define T4_FILTER_IP_PROTO 0x800 /* IP protocol */
#define T4_FILTER_ETH_TYPE 0x1000 /* Ethernet Type */
@@ -131,8 +138,8 @@ struct t4_filter_tuple {
* is used to select the global mode and all filters are limited to the
* set of fields allowed by the global mode.
*/
- uint16_t ovlan; /* outer VLAN */
- uint16_t ivlan; /* inner VLAN */
+ uint16_t vnic; /* VNIC id or outer VLAN tag */
+ uint16_t vlan; /* VLAN tag */
uint16_t ethtype; /* Ethernet type */
uint8_t tos; /* TOS/Traffic Type */
uint8_t proto; /* protocol type */
@@ -141,8 +148,8 @@ struct t4_filter_tuple {
uint32_t matchtype:3; /* MPS match type */
uint32_t frag:1; /* fragmentation extension header */
uint32_t macidx:9; /* exact match MAC index */
- uint32_t ivlan_vld:1; /* inner VLAN valid */
- uint32_t ovlan_vld:1; /* outer VLAN valid */
+ uint32_t vlan_vld:1; /* VLAN valid */
+ uint32_t vnic_vld:1; /* VNIC id/outer VLAN tag valid */
};
struct t4_filter_specification {
@@ -199,6 +206,12 @@ struct t4_sge_context {
uint32_t data[T4_SGE_CONTEXT_SIZE / 4];
};
+struct t4_mem_range {
+ uint32_t addr;
+ uint32_t len;
+ uint32_t *data;
+};
+
#define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg)
#define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg)
#define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump)
@@ -209,4 +222,6 @@ struct t4_sge_context {
#define CHELSIO_T4_DEL_FILTER _IOW('f', T4_DEL_FILTER, struct t4_filter)
#define CHELSIO_T4_GET_SGE_CONTEXT _IOWR('f', T4_GET_SGE_CONTEXT, \
struct t4_sge_context)
+#define CHELSIO_T4_LOAD_FW _IOW('f', T4_LOAD_FW, struct t4_data)
+#define CHELSIO_T4_GET_MEM _IOW('f', T4_GET_MEM, struct t4_mem_range)
#endif
diff --git a/sys/dev/cxgbe/t4_l2t.c b/sys/dev/cxgbe/t4_l2t.c
index 31197b8654f7..be206c1fe892 100644
--- a/sys/dev/cxgbe/t4_l2t.c
+++ b/sys/dev/cxgbe/t4_l2t.c
@@ -37,7 +37,9 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/rwlock.h>
#include <sys/socket.h>
+#include <sys/sbuf.h>
#include <net/if.h>
+#include <net/if_types.h>
#include <net/ethernet.h>
#include <net/if_vlan_var.h>
#include <net/if_dl.h>
@@ -50,9 +52,26 @@ __FBSDID("$FreeBSD$");
#include "common/common.h"
#include "common/jhash.h"
#include "common/t4_msg.h"
-#include "offload.h"
#include "t4_l2t.h"
+/*
+ * Module locking notes: There is a RW lock protecting the L2 table as a
+ * whole plus a spinlock per L2T entry. Entry lookups and allocations happen
+ * under the protection of the table lock, individual entry changes happen
+ * while holding that entry's spinlock. The table lock nests outside the
+ * entry locks. Allocations of new entries take the table lock as writers so
+ * no other lookups can happen while allocating new entries. Entry updates
+ * take the table lock as readers so multiple entries can be updated in
+ * parallel. An L2T entry can be dropped by decrementing its reference count
+ * and therefore can happen in parallel with entry allocation but no entry
+ * can change state or increment its ref count during allocation as both of
+ * these perform lookups.
+ *
+ * Note: We do not take refereces to ifnets in this module because both
+ * the TOE and the sockets already hold references to the interfaces and the
+ * lifetime of an L2T entry is fully contained in the lifetime of the TOE.
+ */
+
/* identifies sync vs async L2T_WRITE_REQs */
#define S_SYNC_WR 12
#define V_SYNC_WR(x) ((x) << S_SYNC_WR)
@@ -76,34 +95,251 @@ struct l2t_data {
struct l2t_entry l2tab[L2T_SIZE];
};
+static int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *,
+ struct mbuf *);
+
+#define VLAN_NONE 0xfff
+#define SA(x) ((struct sockaddr *)(x))
+#define SIN(x) ((struct sockaddr_in *)(x))
+#define SINADDR(x) (SIN(x)->sin_addr.s_addr)
+
/*
- * Module locking notes: There is a RW lock protecting the L2 table as a
- * whole plus a spinlock per L2T entry. Entry lookups and allocations happen
- * under the protection of the table lock, individual entry changes happen
- * while holding that entry's spinlock. The table lock nests outside the
- * entry locks. Allocations of new entries take the table lock as writers so
- * no other lookups can happen while allocating new entries. Entry updates
- * take the table lock as readers so multiple entries can be updated in
- * parallel. An L2T entry can be dropped by decrementing its reference count
- * and therefore can happen in parallel with entry allocation but no entry
- * can change state or increment its ref count during allocation as both of
- * these perform lookups.
- *
- * Note: We do not take refereces to ifnets in this module because both
- * the TOE and the sockets already hold references to the interfaces and the
- * lifetime of an L2T entry is fully contained in the lifetime of the TOE.
+ * Allocate a free L2T entry. Must be called with l2t_data.lock held.
*/
+static struct l2t_entry *
+alloc_l2e(struct l2t_data *d)
+{
+ struct l2t_entry *end, *e, **p;
+
+ rw_assert(&d->lock, RA_WLOCKED);
+
+ if (!atomic_load_acq_int(&d->nfree))
+ return (NULL);
+
+ /* there's definitely a free entry */
+ for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
+ if (atomic_load_acq_int(&e->refcnt) == 0)
+ goto found;
+
+ for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ;
+found:
+ d->rover = e + 1;
+ atomic_subtract_int(&d->nfree, 1);
+
+ /*
+ * The entry we found may be an inactive entry that is
+ * presently in the hash table. We need to remove it.
+ */
+ if (e->state < L2T_STATE_SWITCHING) {
+ for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
+ if (*p == e) {
+ *p = e->next;
+ e->next = NULL;
+ break;
+ }
+ }
+ }
+
+ e->state = L2T_STATE_UNUSED;
+ return (e);
+}
+
+/*
+ * Write an L2T entry. Must be called with the entry locked.
+ * The write may be synchronous or asynchronous.
+ */
+static int
+write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
+{
+ struct mbuf *m;
+ struct cpl_l2t_write_req *req;
+
+ mtx_assert(&e->lock, MA_OWNED);
+
+ if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
+ return (ENOMEM);
+
+ req = mtod(m, struct cpl_l2t_write_req *);
+ m->m_pkthdr.len = m->m_len = sizeof(*req);
+
+ INIT_TP_WR(req, 0);
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx |
+ V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id)));
+ req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync));
+ req->l2t_idx = htons(e->idx);
+ req->vlan = htons(e->vlan);
+ memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
+
+ t4_mgmt_tx(sc, m);
+
+ if (sync && e->state != L2T_STATE_SWITCHING)
+ e->state = L2T_STATE_SYNC_WRITE;
+
+ return (0);
+}
+
+/*
+ * Allocate an L2T entry for use by a switching rule. Such need to be
+ * explicitly freed and while busy they are not on any hash chain, so normal
+ * address resolution updates do not see them.
+ */
+struct l2t_entry *
+t4_l2t_alloc_switching(struct l2t_data *d)
+{
+ struct l2t_entry *e;
+
+ rw_rlock(&d->lock);
+ e = alloc_l2e(d);
+ if (e) {
+ mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
+ e->state = L2T_STATE_SWITCHING;
+ atomic_store_rel_int(&e->refcnt, 1);
+ mtx_unlock(&e->lock);
+ }
+ rw_runlock(&d->lock);
+ return e;
+}
+
+/*
+ * Sets/updates the contents of a switching L2T entry that has been allocated
+ * with an earlier call to @t4_l2t_alloc_switching.
+ */
+int
+t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan,
+ uint8_t port, uint8_t *eth_addr)
+{
+ int rc;
+
+ e->vlan = vlan;
+ e->lport = port;
+ memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
+ mtx_lock(&e->lock);
+ rc = write_l2e(sc, e, 0);
+ mtx_unlock(&e->lock);
+ return (rc);
+}
+
+int
+t4_init_l2t(struct adapter *sc, int flags)
+{
+ int i;
+ struct l2t_data *d;
+
+ d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags);
+ if (!d)
+ return (ENOMEM);
+
+ d->rover = d->l2tab;
+ atomic_store_rel_int(&d->nfree, L2T_SIZE);
+ rw_init(&d->lock, "L2T");
+
+ for (i = 0; i < L2T_SIZE; i++) {
+ d->l2tab[i].idx = i;
+ d->l2tab[i].state = L2T_STATE_UNUSED;
+ mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
+ atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
+ }
+
+ sc->l2t = d;
+ t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl);
+
+ return (0);
+}
+
+int
+t4_free_l2t(struct l2t_data *d)
+{
+ int i;
+
+ for (i = 0; i < L2T_SIZE; i++)
+ mtx_destroy(&d->l2tab[i].lock);
+ rw_destroy(&d->lock);
+ free(d, M_CXGBE);
+
+ return (0);
+}
+
static inline unsigned int
vlan_prio(const struct l2t_entry *e)
{
return e->vlan >> 13;
}
+static char
+l2e_state(const struct l2t_entry *e)
+{
+ switch (e->state) {
+ case L2T_STATE_VALID: return 'V'; /* valid, fast-path entry */
+ case L2T_STATE_STALE: return 'S'; /* needs revalidation, but usable */
+ case L2T_STATE_SYNC_WRITE: return 'W';
+ case L2T_STATE_RESOLVING: return e->arpq_head ? 'A' : 'R';
+ case L2T_STATE_SWITCHING: return 'X';
+ default: return 'U';
+ }
+}
+
+int
+sysctl_l2t(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct l2t_data *l2t = sc->l2t;
+ struct l2t_entry *e;
+ struct sbuf *sb;
+ int rc, i, header = 0;
+ char ip[60];
+
+ if (l2t == NULL)
+ return (ENXIO);
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ e = &l2t->l2tab[0];
+ for (i = 0; i < L2T_SIZE; i++, e++) {
+ mtx_lock(&e->lock);
+ if (e->state == L2T_STATE_UNUSED)
+ goto skip;
+
+ if (header == 0) {
+ sbuf_printf(sb, " Idx IP address "
+ "Ethernet address VLAN/P LP State Users Port");
+ header = 1;
+ }
+ if (e->state == L2T_STATE_SWITCHING || e->v6)
+ ip[0] = 0;
+ else
+ snprintf(ip, sizeof(ip), "%s",
+ inet_ntoa(*(struct in_addr *)&e->addr[0]));
+
+ /* XXX: accessing lle probably not safe? */
+ sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
+ " %u %2u %c %5u %s",
+ e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
+ e->dmac[3], e->dmac[4], e->dmac[5],
+ e->vlan & 0xfff, vlan_prio(e), e->lport,
+ l2e_state(e), atomic_load_acq_int(&e->refcnt),
+ e->lle ? e->lle->lle_tbl->llt_ifp->if_xname : "");
+skip:
+ mtx_unlock(&e->lock);
+ }
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+#ifndef TCP_OFFLOAD_DISABLE
static inline void
l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */
- atomic_add_int(&d->nfree, -1);
+ atomic_subtract_int(&d->nfree, 1);
}
/*
@@ -154,38 +390,6 @@ addreq(const struct l2t_entry *e, const uint32_t *addr)
}
/*
- * Write an L2T entry. Must be called with the entry locked (XXX: really?).
- * The write may be synchronous or asynchronous.
- */
-static int
-write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
-{
- struct mbuf *m;
- struct cpl_l2t_write_req *req;
-
- if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
- return (ENOMEM);
-
- req = mtod(m, struct cpl_l2t_write_req *);
- m->m_pkthdr.len = m->m_len = sizeof(*req);
-
- INIT_TP_WR(req, 0);
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx |
- V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id)));
- req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync));
- req->l2t_idx = htons(e->idx);
- req->vlan = htons(e->vlan);
- memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
-
- t4_mgmt_tx(sc, m);
-
- if (sync && e->state != L2T_STATE_SWITCHING)
- e->state = L2T_STATE_SYNC_WRITE;
-
- return (0);
-}
-
-/*
* Add a packet to an L2T entry's queue of packets awaiting resolution.
* Must be called with the entry's lock held.
*/
@@ -194,53 +398,133 @@ arpq_enqueue(struct l2t_entry *e, struct mbuf *m)
{
mtx_assert(&e->lock, MA_OWNED);
- m->m_next = NULL;
+ KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt not NULL", __func__));
if (e->arpq_head)
- e->arpq_tail->m_next = m;
+ e->arpq_tail->m_nextpkt = m;
else
e->arpq_head = m;
e->arpq_tail = m;
}
-/*
- * Allocate a free L2T entry. Must be called with l2t_data.lock held.
- */
-static struct l2t_entry *
-alloc_l2e(struct l2t_data *d)
+static inline void
+send_pending(struct adapter *sc, struct l2t_entry *e)
{
- struct l2t_entry *end, *e, **p;
+ struct mbuf *m, *next;
- rw_assert(&d->lock, RA_WLOCKED);
+ mtx_assert(&e->lock, MA_OWNED);
- if (!atomic_load_acq_int(&d->nfree))
- return (NULL);
+ for (m = e->arpq_head; m; m = next) {
+ next = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ t4_wrq_tx(sc, MBUF_EQ(m), m);
+ }
+ e->arpq_head = e->arpq_tail = NULL;
+}
- /* there's definitely a free entry */
- for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
- if (atomic_load_acq_int(&e->refcnt) == 0)
- goto found;
+#ifdef INET
+/*
+ * Looks up and fills up an l2t_entry's lle. We grab all the locks that we need
+ * ourself, and update e->state at the end if e->lle was successfully filled.
+ *
+ * The lle passed in comes from arpresolve and is ignored as it does not appear
+ * to be of much use.
+ */
+static int
+l2t_fill_lle(struct adapter *sc, struct l2t_entry *e, struct llentry *unused)
+{
+ int rc = 0;
+ struct sockaddr_in sin;
+ struct ifnet *ifp = e->ifp;
+ struct llentry *lle;
- for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ;
-found:
- d->rover = e + 1;
- atomic_add_int(&d->nfree, -1);
+ bzero(&sin, sizeof(struct sockaddr_in));
+ if (e->v6)
+ panic("%s: IPv6 L2 resolution not supported yet.", __func__);
+
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
+
+ mtx_assert(&e->lock, MA_NOTOWNED);
+ KASSERT(e->addr && ifp, ("%s: bad prep before call", __func__));
+
+ IF_AFDATA_LOCK(ifp);
+ lle = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, SA(&sin));
+ IF_AFDATA_UNLOCK(ifp);
+ if (!LLE_IS_VALID(lle))
+ return (ENOMEM);
+ if (!(lle->la_flags & LLE_VALID)) {
+ rc = EINVAL;
+ goto done;
+ }
+
+ LLE_ADDREF(lle);
+
+ mtx_lock(&e->lock);
+ if (e->state == L2T_STATE_RESOLVING) {
+ KASSERT(e->lle == NULL, ("%s: lle already valid", __func__));
+ e->lle = lle;
+ memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
+ write_l2e(sc, e, 1);
+ } else {
+ KASSERT(e->lle == lle, ("%s: lle changed", __func__));
+ LLE_REMREF(lle);
+ }
+ mtx_unlock(&e->lock);
+done:
+ LLE_WUNLOCK(lle);
+ return (rc);
+}
+#endif
- /*
- * The entry we found may be an inactive entry that is
- * presently in the hash table. We need to remove it.
- */
- if (e->state < L2T_STATE_SWITCHING) {
- for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) {
- if (*p == e) {
- *p = e->next;
- e->next = NULL;
- break;
- }
+int
+t4_l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e)
+{
+#ifndef INET
+ return (EINVAL);
+#else
+ struct llentry *lle = NULL;
+ struct sockaddr_in sin;
+ struct ifnet *ifp = e->ifp;
+
+ if (e->v6)
+ panic("%s: IPv6 L2 resolution not supported yet.", __func__);
+
+ bzero(&sin, sizeof(struct sockaddr_in));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in));
+
+again:
+ switch (e->state) {
+ case L2T_STATE_STALE: /* entry is stale, kick off revalidation */
+ if (arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
+ l2t_fill_lle(sc, e, lle);
+
+ /* Fall through */
+
+ case L2T_STATE_VALID: /* fast-path, send the packet on */
+ return t4_wrq_tx(sc, MBUF_EQ(m), m);
+
+ case L2T_STATE_RESOLVING:
+ case L2T_STATE_SYNC_WRITE:
+ mtx_lock(&e->lock);
+ if (e->state != L2T_STATE_SYNC_WRITE &&
+ e->state != L2T_STATE_RESOLVING) {
+ /* state changed by the time we got here */
+ mtx_unlock(&e->lock);
+ goto again;
}
+ arpq_enqueue(e, m);
+ mtx_unlock(&e->lock);
+
+ if (e->state == L2T_STATE_RESOLVING &&
+ arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0)
+ l2t_fill_lle(sc, e, lle);
}
- e->state = L2T_STATE_UNUSED;
- return e;
+ return (0);
+#endif
}
/*
@@ -287,75 +571,214 @@ t4_l2t_release(struct l2t_entry *e)
t4_l2e_free(e);
}
+static int
+do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
+ struct mbuf *m)
+{
+ struct adapter *sc = iq->adapter;
+ const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
+ unsigned int tid = GET_TID(rpl);
+ unsigned int idx = tid & (L2T_SIZE - 1);
+
+ if (__predict_false(rpl->status != CPL_ERR_NONE)) {
+ log(LOG_ERR,
+ "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
+ rpl->status, idx);
+ return (EINVAL);
+ }
+
+ if (tid & F_SYNC_WR) {
+ struct l2t_entry *e = &sc->l2t->l2tab[idx];
+
+ mtx_lock(&e->lock);
+ if (e->state != L2T_STATE_SWITCHING) {
+ send_pending(sc, e);
+ e->state = L2T_STATE_VALID;
+ }
+ mtx_unlock(&e->lock);
+ }
+
+ return (0);
+}
+
/*
- * Allocate an L2T entry for use by a switching rule. Such need to be
- * explicitly freed and while busy they are not on any hash chain, so normal
- * address resolution updates do not see them.
+ * Reuse an L2T entry that was previously used for the same next hop.
+ */
+static void
+reuse_entry(struct l2t_entry *e)
+{
+ struct llentry *lle;
+
+ mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
+ lle = e->lle;
+ if (lle) {
+ KASSERT(lle->la_flags & LLE_VALID,
+ ("%s: invalid lle stored in l2t_entry", __func__));
+
+ if (lle->la_expire >= time_uptime)
+ e->state = L2T_STATE_STALE;
+ else
+ e->state = L2T_STATE_VALID;
+ } else
+ e->state = L2T_STATE_RESOLVING;
+ mtx_unlock(&e->lock);
+}
+
+/*
+ * The TOE wants an L2 table entry that it can use to reach the next hop over
+ * the specified port. Produce such an entry - create one if needed.
+ *
+ * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on
+ * top of the real cxgbe interface.
*/
struct l2t_entry *
-t4_l2t_alloc_switching(struct l2t_data *d)
+t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
{
struct l2t_entry *e;
+ struct l2t_data *d = pi->adapter->l2t;
+ int addr_len;
+ uint32_t *addr;
+ int hash;
+ struct sockaddr_in6 *sin6;
+ unsigned int smt_idx = pi->port_id;
+
+ if (sa->sa_family == AF_INET) {
+ addr = (uint32_t *)&SINADDR(sa);
+ addr_len = sizeof(SINADDR(sa));
+ } else if (sa->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)sa;
+ addr = (uint32_t *)&sin6->sin6_addr.s6_addr;
+ addr_len = sizeof(sin6->sin6_addr.s6_addr);
+ } else
+ return (NULL);
- rw_rlock(&d->lock);
+ hash = addr_hash(addr, addr_len, ifp->if_index);
+
+ rw_wlock(&d->lock);
+ for (e = d->l2tab[hash].first; e; e = e->next) {
+ if (!addreq(e, addr) && e->ifp == ifp && e->smt_idx == smt_idx){
+ l2t_hold(d, e);
+ if (atomic_load_acq_int(&e->refcnt) == 1)
+ reuse_entry(e);
+ goto done;
+ }
+ }
+
+ /* Need to allocate a new entry */
e = alloc_l2e(d);
if (e) {
mtx_lock(&e->lock); /* avoid race with t4_l2t_free */
- e->state = L2T_STATE_SWITCHING;
+ e->state = L2T_STATE_RESOLVING;
+ memcpy(e->addr, addr, addr_len);
+ e->ifindex = ifp->if_index;
+ e->smt_idx = smt_idx;
+ e->ifp = ifp;
+ e->hash = hash;
+ e->lport = pi->lport;
+ e->v6 = (addr_len == 16);
+ e->lle = NULL;
atomic_store_rel_int(&e->refcnt, 1);
+ if (ifp->if_type == IFT_L2VLAN)
+ VLAN_TAG(ifp, &e->vlan);
+ else
+ e->vlan = VLAN_NONE;
+ e->next = d->l2tab[hash].first;
+ d->l2tab[hash].first = e;
mtx_unlock(&e->lock);
}
- rw_runlock(&d->lock);
+done:
+ rw_wunlock(&d->lock);
return e;
}
/*
- * Sets/updates the contents of a switching L2T entry that has been allocated
- * with an earlier call to @t4_l2t_alloc_switching.
+ * Called when the host's neighbor layer makes a change to some entry that is
+ * loaded into the HW L2 table.
*/
-int
-t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan,
- uint8_t port, uint8_t *eth_addr)
+void
+t4_l2t_update(struct adapter *sc, struct llentry *lle)
{
- e->vlan = vlan;
- e->lport = port;
- memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
- return write_l2e(sc, e, 0);
-}
+ struct l2t_entry *e;
+ struct l2t_data *d = sc->l2t;
+ struct sockaddr *sa = L3_ADDR(lle);
+ struct llentry *old_lle = NULL;
+ uint32_t *addr = (uint32_t *)&SINADDR(sa);
+ struct ifnet *ifp = lle->lle_tbl->llt_ifp;
+ int hash = addr_hash(addr, sizeof(*addr), ifp->if_index);
+
+ KASSERT(d != NULL, ("%s: no L2 table", __func__));
+ LLE_WLOCK_ASSERT(lle);
+ KASSERT(lle->la_flags & LLE_VALID || lle->la_flags & LLE_DELETED,
+ ("%s: entry neither valid nor deleted.", __func__));
-struct l2t_data *
-t4_init_l2t(int flags)
-{
- int i;
- struct l2t_data *d;
+ rw_rlock(&d->lock);
+ for (e = d->l2tab[hash].first; e; e = e->next) {
+ if (!addreq(e, addr) && e->ifp == ifp) {
+ mtx_lock(&e->lock);
+ if (atomic_load_acq_int(&e->refcnt))
+ goto found;
+ e->state = L2T_STATE_STALE;
+ mtx_unlock(&e->lock);
+ break;
+ }
+ }
+ rw_runlock(&d->lock);
- d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags);
- if (!d)
- return (NULL);
+ /* The TOE has no interest in this LLE */
+ return;
- d->rover = d->l2tab;
- atomic_store_rel_int(&d->nfree, L2T_SIZE);
- rw_init(&d->lock, "L2T");
+ found:
+ rw_runlock(&d->lock);
- for (i = 0; i < L2T_SIZE; i++) {
- d->l2tab[i].idx = i;
- d->l2tab[i].state = L2T_STATE_UNUSED;
- mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF);
- atomic_store_rel_int(&d->l2tab[i].refcnt, 0);
- }
+ if (atomic_load_acq_int(&e->refcnt)) {
- return (d);
-}
+ /* Entry is referenced by at least 1 offloaded connection. */
-int
-t4_free_l2t(struct l2t_data *d)
-{
- int i;
+ /* Handle deletes first */
+ if (lle->la_flags & LLE_DELETED) {
+ if (lle == e->lle) {
+ e->lle = NULL;
+ e->state = L2T_STATE_RESOLVING;
+ LLE_REMREF(lle);
+ }
+ goto done;
+ }
- for (i = 0; i < L2T_SIZE; i++)
- mtx_destroy(&d->l2tab[i].lock);
- rw_destroy(&d->lock);
- free(d, M_CXGBE);
+ if (lle != e->lle) {
+ old_lle = e->lle;
+ LLE_ADDREF(lle);
+ e->lle = lle;
+ }
- return (0);
+ if (e->state == L2T_STATE_RESOLVING ||
+ memcmp(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN)) {
+
+ /* unresolved -> resolved; or dmac changed */
+
+ memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN);
+ write_l2e(sc, e, 1);
+ } else {
+
+ /* +ve reinforcement of a valid or stale entry */
+
+ }
+
+ e->state = L2T_STATE_VALID;
+
+ } else {
+ /*
+ * Entry was used previously but is unreferenced right now.
+ * e->lle has been released and NULL'd out by t4_l2t_free, or
+ * l2t_release is about to call t4_l2t_free and do that.
+ *
+ * Either way this is of no interest to us.
+ */
+ }
+
+done:
+ mtx_unlock(&e->lock);
+ if (old_lle)
+ LLE_FREE(old_lle);
}
+
+#endif
diff --git a/sys/dev/cxgbe/t4_l2t.h b/sys/dev/cxgbe/t4_l2t.h
index c5520c612146..8004c9ec3b39 100644
--- a/sys/dev/cxgbe/t4_l2t.h
+++ b/sys/dev/cxgbe/t4_l2t.h
@@ -54,18 +54,26 @@ struct l2t_entry {
struct mbuf *arpq_head; /* list of mbufs awaiting resolution */
struct mbuf *arpq_tail;
struct mtx lock;
- volatile uint32_t refcnt; /* entry reference count */
+ volatile int refcnt; /* entry reference count */
uint16_t hash; /* hash bucket the entry is on */
uint8_t v6; /* whether entry is for IPv6 */
uint8_t lport; /* associated offload logical port */
uint8_t dmac[ETHER_ADDR_LEN]; /* next hop's MAC address */
};
-struct l2t_data *t4_init_l2t(int);
+int t4_init_l2t(struct adapter *, int);
int t4_free_l2t(struct l2t_data *);
struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *);
int t4_l2t_set_switching(struct adapter *, struct l2t_entry *, uint16_t,
uint8_t, uint8_t *);
void t4_l2t_release(struct l2t_entry *);
+int sysctl_l2t(SYSCTL_HANDLER_ARGS);
+
+#ifndef TCP_OFFLOAD_DISABLE
+struct l2t_entry *t4_l2t_get(struct port_info *, struct ifnet *,
+ struct sockaddr *);
+int t4_l2t_send(struct adapter *, struct mbuf *, struct l2t_entry *);
+void t4_l2t_update(struct adapter *, struct llentry *);
+#endif
#endif /* __T4_L2T_H */
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index adca421ea0a3..37a4a7c4e2bd 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -55,12 +55,10 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/if_vlan_var.h>
-#include "common/t4_hw.h"
#include "common/common.h"
#include "common/t4_msg.h"
#include "common/t4_regs.h"
#include "common/t4_regs_values.h"
-#include "common/t4fw_interface.h"
#include "t4_ioctl.h"
#include "t4_l2t.h"
@@ -122,115 +120,130 @@ static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4 Ethernet driver and services");
-/*
- * Tunables.
- */
-static SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0,
- "cxgbe driver parameters");
-
-static int force_firmware_install = 0;
-TUNABLE_INT("hw.cxgbe.force_firmware_install", &force_firmware_install);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, force_firmware_install, CTLFLAG_RDTUN,
- &force_firmware_install, 0, "install firmware on every attach.");
+static struct mtx t4_list_lock;
+static SLIST_HEAD(, adapter) t4_list;
+#ifndef TCP_OFFLOAD_DISABLE
+static struct mtx t4_uld_list_lock;
+static SLIST_HEAD(, uld_info) t4_uld_list;
+#endif
/*
- * Holdoff timer and packet counter values.
+ * Tunables. See tweak_tunables() too.
*/
-static unsigned int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
-static unsigned int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
/*
- * Max # of tx and rx queues to use for each 10G and 1G port.
+ * Number of queues for tx and rx, 10G and 1G, NIC and offload.
*/
-static unsigned int max_ntxq_10g = 8;
-TUNABLE_INT("hw.cxgbe.max_ntxq_10G_port", &max_ntxq_10g);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_ntxq_10G_port, CTLFLAG_RDTUN,
- &max_ntxq_10g, 0, "maximum number of tx queues per 10G port.");
-
-static unsigned int max_nrxq_10g = 8;
-TUNABLE_INT("hw.cxgbe.max_nrxq_10G_port", &max_nrxq_10g);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_nrxq_10G_port, CTLFLAG_RDTUN,
- &max_nrxq_10g, 0, "maximum number of rxq's (per 10G port).");
-
-static unsigned int max_ntxq_1g = 2;
-TUNABLE_INT("hw.cxgbe.max_ntxq_1G_port", &max_ntxq_1g);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_ntxq_1G_port, CTLFLAG_RDTUN,
- &max_ntxq_1g, 0, "maximum number of tx queues per 1G port.");
-
-static unsigned int max_nrxq_1g = 2;
-TUNABLE_INT("hw.cxgbe.max_nrxq_1G_port", &max_nrxq_1g);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_nrxq_1G_port, CTLFLAG_RDTUN,
- &max_nrxq_1g, 0, "maximum number of rxq's (per 1G port).");
+#define NTXQ_10G 16
+static int t4_ntxq10g = -1;
+TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g);
+
+#define NRXQ_10G 8
+static int t4_nrxq10g = -1;
+TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g);
+
+#define NTXQ_1G 4
+static int t4_ntxq1g = -1;
+TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
+
+#define NRXQ_1G 2
+static int t4_nrxq1g = -1;
+TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
+
+#ifndef TCP_OFFLOAD_DISABLE
+#define NOFLDTXQ_10G 8
+static int t4_nofldtxq10g = -1;
+TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
+
+#define NOFLDRXQ_10G 2
+static int t4_nofldrxq10g = -1;
+TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g);
+
+#define NOFLDTXQ_1G 2
+static int t4_nofldtxq1g = -1;
+TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g);
+
+#define NOFLDRXQ_1G 1
+static int t4_nofldrxq1g = -1;
+TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g);
+#endif
/*
* Holdoff parameters for 10G and 1G ports.
*/
-static unsigned int tmr_idx_10g = 1;
-TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &tmr_idx_10g);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_10G, CTLFLAG_RDTUN,
- &tmr_idx_10g, 0,
- "default timer index for interrupt holdoff (10G ports).");
-
-static int pktc_idx_10g = 2;
-TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &pktc_idx_10g);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_10G, CTLFLAG_RDTUN,
- &pktc_idx_10g, 0,
- "default pkt counter index for interrupt holdoff (10G ports).");
-
-static unsigned int tmr_idx_1g = 1;
-TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &tmr_idx_1g);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_1G, CTLFLAG_RDTUN,
- &tmr_idx_1g, 0,
- "default timer index for interrupt holdoff (1G ports).");
-
-static int pktc_idx_1g = 2;
-TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &pktc_idx_1g);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_1G, CTLFLAG_RDTUN,
- &pktc_idx_1g, 0,
- "default pkt counter index for interrupt holdoff (1G ports).");
+#define TMR_IDX_10G 1
+static int t4_tmr_idx_10g = TMR_IDX_10G;
+TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
+
+#define PKTC_IDX_10G 2
+static int t4_pktc_idx_10g = PKTC_IDX_10G;
+TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
+
+#define TMR_IDX_1G 1
+static int t4_tmr_idx_1g = TMR_IDX_1G;
+TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
+
+#define PKTC_IDX_1G 2
+static int t4_pktc_idx_1g = PKTC_IDX_1G;
+TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
/*
* Size (# of entries) of each tx and rx queue.
*/
-static unsigned int qsize_txq = TX_EQ_QSIZE;
-TUNABLE_INT("hw.cxgbe.qsize_txq", &qsize_txq);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN,
- &qsize_txq, 0, "default queue size of NIC tx queues.");
+static unsigned int t4_qsize_txq = TX_EQ_QSIZE;
+TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
+
+static unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
+TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
-static unsigned int qsize_rxq = RX_IQ_QSIZE;
-TUNABLE_INT("hw.cxgbe.qsize_rxq", &qsize_rxq);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN,
- &qsize_rxq, 0, "default queue size of NIC rx queues.");
+/*
+ * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
+ */
+static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
+TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
/*
- * Interrupt types allowed.
+ * Configuration file.
*/
-static int intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
-TUNABLE_INT("hw.cxgbe.interrupt_types", &intr_types);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &intr_types, 0,
- "interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively)");
+static char t4_cfg_file[32] = "default";
+TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
/*
- * Force the driver to use the same set of interrupts for all ports.
+ * ASIC features that will be used. Disable the ones you don't want so that the
+ * chip resources aren't wasted on features that will not be used.
*/
-static int intr_shared = 0;
-TUNABLE_INT("hw.cxgbe.interrupts_shared", &intr_shared);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupts_shared, CTLFLAG_RDTUN,
- &intr_shared, 0, "interrupts shared between all ports");
+static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */
+TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
+
+static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
+TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
+
+static int t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
+TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
+
+static int t4_rdmacaps_allowed = 0;
+TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
-static unsigned int filter_mode = HW_TPL_FR_MT_PR_IV_P_FC;
-TUNABLE_INT("hw.cxgbe.filter_mode", &filter_mode);
-SYSCTL_UINT(_hw_cxgbe, OID_AUTO, filter_mode, CTLFLAG_RDTUN,
- &filter_mode, 0, "default global filter mode.");
+static int t4_iscsicaps_allowed = 0;
+TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
+
+static int t4_fcoecaps_allowed = 0;
+TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
struct intrs_and_queues {
int intr_type; /* INTx, MSI, or MSI-X */
int nirq; /* Number of vectors */
- int intr_shared; /* Interrupts shared between all ports */
+ int intr_flags;
int ntxq10g; /* # of NIC txq's for each 10G port */
int nrxq10g; /* # of NIC rxq's for each 10G port */
int ntxq1g; /* # of NIC txq's for each 1G port */
int nrxq1g; /* # of NIC rxq's for each 1G port */
+#ifndef TCP_OFFLOAD_DISABLE
+ int nofldtxq10g; /* # of TOE txq's for each 10G port */
+ int nofldrxq10g; /* # of TOE rxq's for each 10G port */
+ int nofldtxq1g; /* # of TOE txq's for each 1G port */
+ int nofldrxq1g; /* # of TOE rxq's for each 1G port */
+#endif
};
struct filter_entry {
@@ -244,15 +257,6 @@ struct filter_entry {
};
enum {
- MEMWIN0_APERTURE = 2048,
- MEMWIN0_BASE = 0x1b800,
- MEMWIN1_APERTURE = 32768,
- MEMWIN1_BASE = 0x28000,
- MEMWIN2_APERTURE = 65536,
- MEMWIN2_BASE = 0x30000,
-};
-
-enum {
XGMAC_MTU = (1 << 0),
XGMAC_PROMISC = (1 << 1),
XGMAC_ALLMULTI = (1 << 2),
@@ -268,9 +272,11 @@ static void setup_memwin(struct adapter *);
static int cfg_itype_and_nqueues(struct adapter *, int, int,
struct intrs_and_queues *);
static int prep_firmware(struct adapter *);
-static int get_devlog_params(struct adapter *, struct devlog_params *);
-static int get_capabilities(struct adapter *, struct fw_caps_config_cmd *);
-static int get_params(struct adapter *, struct fw_caps_config_cmd *);
+static int upload_config_file(struct adapter *, const struct firmware *,
+ uint32_t *, uint32_t *);
+static int partition_resources(struct adapter *, const struct firmware *);
+static int get_params__pre_init(struct adapter *);
+static int get_params__post_init(struct adapter *);
static void t4_set_desc(struct adapter *);
static void build_medialist(struct port_info *);
static int update_mac_settings(struct port_info *, int);
@@ -278,24 +284,46 @@ static int cxgbe_init_locked(struct port_info *);
static int cxgbe_init_synchronized(struct port_info *);
static int cxgbe_uninit_locked(struct port_info *);
static int cxgbe_uninit_synchronized(struct port_info *);
-static int first_port_up(struct adapter *);
-static int last_port_down(struct adapter *);
+static int adapter_full_init(struct adapter *);
+static int adapter_full_uninit(struct adapter *);
+static int port_full_init(struct port_info *);
+static int port_full_uninit(struct port_info *);
+static void quiesce_eq(struct adapter *, struct sge_eq *);
+static void quiesce_iq(struct adapter *, struct sge_iq *);
+static void quiesce_fl(struct adapter *, struct sge_fl *);
static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
- iq_intr_handler_t *, void *, char *);
+ driver_intr_t *, void *, char *);
static int t4_free_irq(struct adapter *, struct irq *);
static void reg_block_dump(struct adapter *, uint8_t *, unsigned int,
unsigned int);
static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
static void cxgbe_tick(void *);
+static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
+ struct mbuf *);
static int t4_sysctls(struct adapter *);
static int cxgbe_sysctls(struct port_info *);
static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
+static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
+static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
+static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
+static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
+static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
+static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
+static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
+static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
+static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
+static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
+static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
+static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
+static int sysctl_tids(SYSCTL_HANDLER_ARGS);
+static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
+static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
static inline void txq_start(struct ifnet *, struct sge_txq *);
static uint32_t fconf_to_mode(uint32_t);
static uint32_t mode_to_fconf(uint32_t);
@@ -309,8 +337,15 @@ static int del_filter(struct adapter *, struct t4_filter *);
static void clear_filter(struct filter_entry *);
static int set_filter_wr(struct adapter *, int);
static int del_filter_wr(struct adapter *, int);
-void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *);
+static int filter_rpl(struct sge_iq *, const struct rss_header *,
+ struct mbuf *);
static int get_sge_context(struct adapter *, struct t4_sge_context *);
+static int read_card_mem(struct adapter *, struct t4_mem_range *);
+#ifndef TCP_OFFLOAD_DISABLE
+static int toe_capability(struct port_info *, int);
+static int activate_uld(struct adapter *, int, struct uld_softc *);
+static int deactivate_uld(struct uld_softc *);
+#endif
static int t4_mod_event(module_t, int, void *);
struct t4_pciids {
@@ -332,6 +367,11 @@ struct t4_pciids {
{0x440a, 4, "Chelsio T404-BT"},
};
+#ifndef TCP_OFFLOAD_DISABLE
+/* This is used in service_iq() to get to the fl associated with an iq. */
+CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
+#endif
+
static int
t4_probe(device_t dev)
{
@@ -358,10 +398,11 @@ t4_attach(device_t dev)
{
struct adapter *sc;
int rc = 0, i, n10g, n1g, rqidx, tqidx;
- struct fw_caps_config_cmd caps;
- uint32_t p, v;
struct intrs_and_queues iaq;
struct sge *s;
+#ifndef TCP_OFFLOAD_DISABLE
+ int ofld_rqidx, ofld_tqidx;
+#endif
sc = device_get_softc(dev);
sc->dev = dev;
@@ -370,6 +411,8 @@ t4_attach(device_t dev)
pci_enable_busmaster(dev);
if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
+ uint32_t v;
+
pci_set_max_read_req(dev, 4096);
v = pci_read_config(dev, i + PCIR_EXPRESS_DEVICE_CTL, 2);
v |= PCIM_EXP_CTL_RELAXED_ORD_ENABLE;
@@ -379,12 +422,22 @@ t4_attach(device_t dev)
snprintf(sc->lockname, sizeof(sc->lockname), "%s",
device_get_nameunit(dev));
mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
+ mtx_lock(&t4_list_lock);
+ SLIST_INSERT_HEAD(&t4_list, sc, link);
+ mtx_unlock(&t4_list_lock);
+
+ mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
+ TAILQ_INIT(&sc->sfl);
+ callout_init(&sc->sfl_callout, CALLOUT_MPSAFE);
rc = map_bars(sc);
if (rc != 0)
goto done; /* error message displayed already */
memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
+ for (i = 0; i < ARRAY_SIZE(sc->cpl_handler); i++)
+ sc->cpl_handler[i] = cpl_not_handled;
+ t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, filter_rpl);
/* Prepare the adapter for operation */
rc = -t4_prep_adapter(sc);
@@ -393,107 +446,75 @@ t4_attach(device_t dev)
goto done;
}
- /* Do this really early */
+ /*
+ * Do this really early, with the memory windows set up even before the
+ * character device. The userland tool's register i/o and mem read
+ * will work even in "recovery mode".
+ */
+ setup_memwin(sc);
sc->cdev = make_dev(&t4_cdevsw, device_get_unit(dev), UID_ROOT,
GID_WHEEL, 0600, "%s", device_get_nameunit(dev));
sc->cdev->si_drv1 = sc;
+ /* Go no further if recovery mode has been requested. */
+ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
+ device_printf(dev, "recovery mode.\n");
+ goto done;
+ }
+
/* Prepare the firmware for operation */
rc = prep_firmware(sc);
if (rc != 0)
goto done; /* error message displayed already */
- /* Read firmware devlog parameters */
- (void) get_devlog_params(sc, &sc->params.devlog);
-
- /* Get device capabilities and select which ones we'll use */
- rc = get_capabilities(sc, &caps);
- if (rc != 0) {
- device_printf(dev,
- "failed to initialize adapter capabilities: %d.\n", rc);
- goto done;
- }
+ rc = get_params__pre_init(sc);
+ if (rc != 0)
+ goto done; /* error message displayed already */
- /* Choose the global RSS mode. */
- rc = -t4_config_glbl_rss(sc, sc->mbox,
- FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
- F_FW_RSS_GLB_CONFIG_CMD_TNLMAPEN |
- F_FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ |
- F_FW_RSS_GLB_CONFIG_CMD_TNLALLLKP);
- if (rc != 0) {
- device_printf(dev,
- "failed to select global RSS mode: %d.\n", rc);
- goto done;
- }
+ rc = t4_sge_init(sc);
+ if (rc != 0)
+ goto done; /* error message displayed already */
- /* These are total (sum of all ports) limits for a bus driver */
- rc = -t4_cfg_pfvf(sc, sc->mbox, sc->pf, 0,
- 128, /* max # of egress queues */
- 64, /* max # of egress Ethernet or control queues */
- 64, /* max # of ingress queues with fl/interrupt */
- 0, /* max # of ingress queues without interrupt */
- 0, /* PCIe traffic class */
- 4, /* max # of virtual interfaces */
- M_FW_PFVF_CMD_CMASK, M_FW_PFVF_CMD_PMASK, 16,
- FW_CMD_CAP_PF, FW_CMD_CAP_PF);
- if (rc != 0) {
- device_printf(dev,
- "failed to configure pf/vf resources: %d.\n", rc);
- goto done;
+ if (sc->flags & MASTER_PF) {
+ /* get basic stuff going */
+ rc = -t4_fw_initialize(sc, sc->mbox);
+ if (rc != 0) {
+ device_printf(dev, "early init failed: %d.\n", rc);
+ goto done;
+ }
}
- /* Need this before sge_init */
- for (i = 0; i < SGE_NTIMERS; i++)
- sc->sge.timer_val[i] = min(intr_timer[i], 200U);
- for (i = 0; i < SGE_NCOUNTERS; i++)
- sc->sge.counter_val[i] = min(intr_pktcount[i], M_THRESHOLD_0);
-
- /* Also need the cooked value of cclk before sge_init */
- p = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
- V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CCLK));
- rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &p, &v);
- if (rc != 0) {
- device_printf(sc->dev,
- "failed to obtain core clock value: %d.\n", rc);
- goto done;
- }
- sc->params.vpd.cclk = v;
+ rc = get_params__post_init(sc);
+ if (rc != 0)
+ goto done; /* error message displayed already */
- t4_sge_init(sc);
+ if (sc->flags & MASTER_PF) {
- t4_set_filter_mode(sc, filter_mode);
- t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG,
- V_FIVETUPLELOOKUP(M_FIVETUPLELOOKUP),
- V_FIVETUPLELOOKUP(M_FIVETUPLELOOKUP));
- t4_tp_wr_bits_indirect(sc, A_TP_INGRESS_CONFIG, F_CSUM_HAS_PSEUDO_HDR,
- F_LOOKUPEVERYPKT);
+ /* final tweaks to some settings */
- /* get basic stuff going */
- rc = -t4_early_init(sc, sc->mbox);
- if (rc != 0) {
- device_printf(dev, "early init failed: %d.\n", rc);
- goto done;
+ t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd,
+ sc->params.b_wnd);
+ t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
+ t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 |
+ F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3, 0);
+ t4_set_reg_field(sc, A_TP_PARA_REG5,
+ V_INDICATESIZE(M_INDICATESIZE) |
+ F_REARMDDPOFFSET | F_RESETDDPOFFSET,
+ V_INDICATESIZE(M_INDICATESIZE) |
+ F_REARMDDPOFFSET | F_RESETDDPOFFSET);
+ } else {
+ /*
+ * XXX: Verify that we can live with whatever the master driver
+ * has done so far, and hope that it doesn't change any global
+ * setting from underneath us in the future.
+ */
}
- rc = get_params(sc, &caps);
- if (rc != 0)
- goto done; /* error message displayed already */
-
- /* These are finalized by FW initialization, load their values now */
- v = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
- sc->params.tp.tre = G_TIMERRESOLUTION(v);
- sc->params.tp.dack_re = G_DELAYEDACKRESOLUTION(v);
- t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
-
- /* tweak some settings */
- t4_write_reg(sc, A_TP_SHIFT_CNT, V_SYNSHIFTMAX(6) | V_RXTSHIFTMAXR1(4) |
- V_RXTSHIFTMAXR2(15) | V_PERSHIFTBACKOFFMAX(8) | V_PERSHIFTMAX(8) |
- V_KEEPALIVEMAXR1(4) | V_KEEPALIVEMAXR2(9));
- t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
- t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 |
- F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3, 0);
+ t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &sc->filter_mode, 1,
+ A_TP_VLAN_PRI_MAP);
- setup_memwin(sc);
+ for (i = 0; i < NCHAN; i++)
+ sc->params.tp.tx_modq[i] = i;
rc = t4_create_dma_tag(sc);
if (rc != 0)
@@ -532,31 +553,18 @@ t4_attach(device_t dev)
if (is_10G_port(pi)) {
n10g++;
- pi->tmr_idx = tmr_idx_10g;
- pi->pktc_idx = pktc_idx_10g;
+ pi->tmr_idx = t4_tmr_idx_10g;
+ pi->pktc_idx = t4_pktc_idx_10g;
} else {
n1g++;
- pi->tmr_idx = tmr_idx_1g;
- pi->pktc_idx = pktc_idx_1g;
+ pi->tmr_idx = t4_tmr_idx_1g;
+ pi->pktc_idx = t4_pktc_idx_1g;
}
pi->xact_addr_filt = -1;
- pi->qsize_rxq = max(qsize_rxq, 128);
- while (pi->qsize_rxq & 7)
- pi->qsize_rxq++;
- pi->qsize_txq = max(qsize_txq, 128);
-
- if (pi->qsize_rxq != qsize_rxq) {
- device_printf(dev,
- "using %d instead of %d as the rx queue size.\n",
- pi->qsize_rxq, qsize_rxq);
- }
- if (pi->qsize_txq != qsize_txq) {
- device_printf(dev,
- "using %d instead of %d as the tx queue size.\n",
- pi->qsize_txq, qsize_txq);
- }
+ pi->qsize_rxq = t4_qsize_rxq;
+ pi->qsize_txq = t4_qsize_txq;
pi->dev = device_add_child(dev, "cxgbe", -1);
if (pi->dev == NULL) {
@@ -566,14 +574,6 @@ t4_attach(device_t dev)
goto done;
}
device_set_softc(pi->dev, pi);
-
- setbit(&sc->registered_device_map, i);
- }
-
- if (sc->registered_device_map == 0) {
- device_printf(dev, "no usable ports\n");
- rc = ENXIO;
- goto done;
}
/*
@@ -585,20 +585,31 @@ t4_attach(device_t dev)
sc->intr_type = iaq.intr_type;
sc->intr_count = iaq.nirq;
+ sc->flags |= iaq.intr_flags;
s = &sc->sge;
s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */
- s->neq += sc->params.nports; /* control queues, 1 per port */
+ s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */
- if (iaq.intr_shared)
- sc->flags |= INTR_SHARED;
- s->niq += NINTRQ(sc); /* interrupt queues */
- s->intrq = malloc(NINTRQ(sc) * sizeof(struct sge_iq), M_CXGBE,
- M_ZERO | M_WAITOK);
- s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_ctrlq), M_CXGBE,
+#ifndef TCP_OFFLOAD_DISABLE
+ if (is_offload(sc)) {
+
+ s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
+ s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
+ s->neq += s->nofldtxq + s->nofldrxq;
+ s->niq += s->nofldrxq;
+
+ s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
+ M_CXGBE, M_ZERO | M_WAITOK);
+ s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
+ M_CXGBE, M_ZERO | M_WAITOK);
+ }
+#endif
+
+ s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE,
M_ZERO | M_WAITOK);
s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
M_ZERO | M_WAITOK);
@@ -612,15 +623,16 @@ t4_attach(device_t dev)
sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
M_ZERO | M_WAITOK);
- sc->l2t = t4_init_l2t(M_WAITOK);
-
- t4_sysctls(sc);
+ t4_init_l2t(sc, M_WAITOK);
/*
* Second pass over the ports. This time we know the number of rx and
* tx queues that each port should get.
*/
rqidx = tqidx = 0;
+#ifndef TCP_OFFLOAD_DISABLE
+ ofld_rqidx = ofld_tqidx = 0;
+#endif
for_each_port(sc, i) {
struct port_info *pi = sc->port[i];
@@ -628,13 +640,33 @@ t4_attach(device_t dev)
continue;
pi->first_rxq = rqidx;
- pi->nrxq = is_10G_port(pi) ? iaq.nrxq10g : iaq.nrxq1g;
-
pi->first_txq = tqidx;
- pi->ntxq = is_10G_port(pi) ? iaq.ntxq10g : iaq.ntxq1g;
+ if (is_10G_port(pi)) {
+ pi->nrxq = iaq.nrxq10g;
+ pi->ntxq = iaq.ntxq10g;
+ } else {
+ pi->nrxq = iaq.nrxq1g;
+ pi->ntxq = iaq.ntxq1g;
+ }
rqidx += pi->nrxq;
tqidx += pi->ntxq;
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (is_offload(sc)) {
+ pi->first_ofld_rxq = ofld_rqidx;
+ pi->first_ofld_txq = ofld_tqidx;
+ if (is_10G_port(pi)) {
+ pi->nofldrxq = iaq.nofldrxq10g;
+ pi->nofldtxq = iaq.nofldtxq10g;
+ } else {
+ pi->nofldrxq = iaq.nofldrxq1g;
+ pi->nofldtxq = iaq.nofldtxq1g;
+ }
+ ofld_rqidx += pi->nofldrxq;
+ ofld_tqidx += pi->nofldtxq;
+ }
+#endif
}
rc = bus_generic_attach(dev);
@@ -644,17 +676,27 @@ t4_attach(device_t dev)
goto done;
}
-#ifdef INVARIANTS
device_printf(dev,
- "%p, %d ports (0x%x), %d intr_type, %d intr_count\n",
- sc, sc->params.nports, sc->params.portvec,
- sc->intr_type, sc->intr_count);
-#endif
+ "PCIe x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
+ sc->params.pci.width, sc->params.nports, sc->intr_count,
+ sc->intr_type == INTR_MSIX ? "MSI-X" :
+ (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
+ sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
+
t4_set_desc(sc);
done:
+ if (rc != 0 && sc->cdev) {
+ /* cdev was created and so cxgbetool works; recover that way. */
+ device_printf(dev,
+ "error during attach, adapter is now in recovery mode.\n");
+ rc = 0;
+ }
+
if (rc != 0)
t4_detach(dev);
+ else
+ t4_sysctls(sc);
return (rc);
}
@@ -667,14 +709,25 @@ t4_detach(device_t dev)
{
struct adapter *sc;
struct port_info *pi;
- int i;
+ int i, rc;
sc = device_get_softc(dev);
- if (sc->cdev)
+ if (sc->flags & FULL_INIT_DONE)
+ t4_intr_disable(sc);
+
+ if (sc->cdev) {
destroy_dev(sc->cdev);
+ sc->cdev = NULL;
+ }
+
+ rc = bus_generic_detach(dev);
+ if (rc) {
+ device_printf(dev,
+ "failed to detach child devices: %d\n", rc);
+ return (rc);
+ }
- bus_generic_detach(dev);
for (i = 0; i < MAX_NPORTS; i++) {
pi = sc->port[i];
if (pi) {
@@ -687,6 +740,9 @@ t4_detach(device_t dev)
}
}
+ if (sc->flags & FULL_INIT_DONE)
+ adapter_full_uninit(sc);
+
if (sc->flags & FW_OK)
t4_fw_bye(sc, sc->mbox);
@@ -704,16 +760,27 @@ t4_detach(device_t dev)
if (sc->l2t)
t4_free_l2t(sc->l2t);
+#ifndef TCP_OFFLOAD_DISABLE
+ free(sc->sge.ofld_rxq, M_CXGBE);
+ free(sc->sge.ofld_txq, M_CXGBE);
+#endif
free(sc->irq, M_CXGBE);
free(sc->sge.rxq, M_CXGBE);
free(sc->sge.txq, M_CXGBE);
free(sc->sge.ctrlq, M_CXGBE);
- free(sc->sge.intrq, M_CXGBE);
free(sc->sge.iqmap, M_CXGBE);
free(sc->sge.eqmap, M_CXGBE);
free(sc->tids.ftid_tab, M_CXGBE);
t4_destroy_dma_tag(sc);
- mtx_destroy(&sc->sc_lock);
+ if (mtx_initialized(&sc->sc_lock)) {
+ mtx_lock(&t4_list_lock);
+ SLIST_REMOVE(&t4_list, sc, adapter, link);
+ mtx_unlock(&t4_list_lock);
+ mtx_destroy(&sc->sc_lock);
+ }
+
+ if (mtx_initialized(&sc->sfl_lock))
+ mtx_destroy(&sc->sfl_lock);
bzero(sc, sizeof(*sc));
@@ -727,7 +794,7 @@ cxgbe_probe(device_t dev)
char buf[128];
struct port_info *pi = device_get_softc(dev);
- snprintf(buf, sizeof(buf), "Port %d", pi->port_id);
+ snprintf(buf, sizeof(buf), "port %d", pi->port_id);
device_set_desc_copy(dev, buf);
return (BUS_PROBE_DEFAULT);
@@ -754,15 +821,6 @@ cxgbe_attach(device_t dev)
ifp->if_softc = pi;
callout_init(&pi->tick, CALLOUT_MPSAFE);
- pi->tq = taskqueue_create("cxgbe_taskq", M_NOWAIT,
- taskqueue_thread_enqueue, &pi->tq);
- if (pi->tq == NULL) {
- device_printf(dev, "failed to allocate port task queue\n");
- if_free(pi->ifp);
- return (ENOMEM);
- }
- taskqueue_start_threads(&pi->tq, 1, PI_NET, "%s taskq",
- device_get_nameunit(dev));
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
@@ -778,6 +836,10 @@ cxgbe_attach(device_t dev)
IFQ_SET_READY(&ifp->if_snd);
ifp->if_capabilities = T4_CAP;
+#ifndef TCP_OFFLOAD_DISABLE
+ if (is_offload(pi->adapter))
+ ifp->if_capabilities |= IFCAP_TOE4;
+#endif
ifp->if_capenable = T4_CAP_ENABLE;
ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
@@ -788,9 +850,14 @@ cxgbe_attach(device_t dev)
ether_ifattach(ifp, pi->hw_addr);
-#ifdef INVARIANTS
- device_printf(dev, "%p, %d txq, %d rxq\n", pi, pi->ntxq, pi->nrxq);
+#ifndef TCP_OFFLOAD_DISABLE
+ if (is_offload(pi->adapter)) {
+ device_printf(dev,
+ "%d txq, %d rxq (NIC); %d txq, %d rxq (TOE)\n",
+ pi->ntxq, pi->nrxq, pi->nofldtxq, pi->nofldrxq);
+ } else
#endif
+ device_printf(dev, "%d txq, %d rxq\n", pi->ntxq, pi->nrxq);
cxgbe_sysctls(pi);
@@ -802,7 +869,7 @@ cxgbe_detach(device_t dev)
{
struct port_info *pi = device_get_softc(dev);
struct adapter *sc = pi->adapter;
- int rc;
+ struct ifnet *ifp = pi->ifp;
/* Tell if_ioctl and if_init that the port is going away */
ADAPTER_LOCK(sc);
@@ -813,11 +880,15 @@ cxgbe_detach(device_t dev)
SET_BUSY(sc);
ADAPTER_UNLOCK(sc);
- rc = cxgbe_uninit_synchronized(pi);
- if (rc != 0)
- device_printf(dev, "port uninit failed: %d.\n", rc);
+ PORT_LOCK(pi);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ callout_stop(&pi->tick);
+ PORT_UNLOCK(pi);
+ callout_drain(&pi->tick);
- taskqueue_free(pi->tq);
+ /* Let detach proceed even if these fail. */
+ cxgbe_uninit_synchronized(pi);
+ port_full_uninit(pi);
ifmedia_removeall(&pi->media);
ether_ifdetach(pi->ifp);
@@ -956,6 +1027,7 @@ fail:
if_printf(ifp,
"enable txcsum first.\n");
rc = EAGAIN;
+ goto fail;
}
} else
ifp->if_hwassist &= ~CSUM_TSO;
@@ -968,15 +1040,21 @@ fail:
ifp->if_capenable ^= IFCAP_LRO;
for_each_rxq(pi, i, rxq) {
if (ifp->if_capenable & IFCAP_LRO)
- rxq->flags |= RXQ_LRO_ENABLED;
+ rxq->iq.flags |= IQ_LRO_ENABLED;
else
- rxq->flags &= ~RXQ_LRO_ENABLED;
+ rxq->iq.flags &= ~IQ_LRO_ENABLED;
}
#endif
}
#ifndef TCP_OFFLOAD_DISABLE
- if (mask & IFCAP_TOE4) {
- rc = EOPNOTSUPP;
+ if (mask & IFCAP_TOE) {
+ int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
+
+ rc = toe_capability(pi, enable);
+ if (rc != 0)
+ goto fail;
+
+ ifp->if_capenable ^= mask;
}
#endif
if (mask & IFCAP_VLAN_HWTAGGING) {
@@ -1041,9 +1119,9 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
M_ASSERTPKTHDR(m);
- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ if (__predict_false(pi->link_cfg.link_ok == 0)) {
m_freem(m);
- return (0);
+ return (ENETDOWN);
}
if (m->m_flags & M_FLOWID)
@@ -1051,13 +1129,20 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
br = txq->br;
if (TXQ_TRYLOCK(txq) == 0) {
+ struct sge_eq *eq = &txq->eq;
+
/*
- * XXX: make sure that this packet really is sent out. There is
- * a small race where t4_eth_tx may stop draining the drbr and
- * goes away, just before we enqueued this mbuf.
+ * It is possible that t4_eth_tx finishes up and releases the
+ * lock between the TRYLOCK above and the drbr_enqueue here. We
+ * need to make sure that this mbuf doesn't just sit there in
+ * the drbr.
*/
- return (drbr_enqueue(ifp, br, m));
+ rc = drbr_enqueue(ifp, br, m);
+ if (rc == 0 && callout_pending(&eq->tx_callout) == 0 &&
+ !(eq->flags & EQ_DOOMED))
+ callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
+ return (rc);
}
/*
@@ -1098,11 +1183,12 @@ cxgbe_qflush(struct ifnet *ifp)
int i;
struct mbuf *m;
- /* queues do not exist if !IFF_DRV_RUNNING. */
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ /* queues do not exist if !PORT_INIT_DONE. */
+ if (pi->flags & PORT_INIT_DONE) {
for_each_txq(pi, i, txq) {
TXQ_LOCK(txq);
m_freem(txq->m);
+ txq->m = NULL;
while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
m_freem(m);
TXQ_UNLOCK(txq);
@@ -1216,14 +1302,25 @@ static int
cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g,
struct intrs_and_queues *iaq)
{
- int rc, itype, navail, nc, nrxq10g, nrxq1g;
+ int rc, itype, navail, nrxq10g, nrxq1g, n;
+ int nofldrxq10g = 0, nofldrxq1g = 0;
bzero(iaq, sizeof(*iaq));
- nc = mp_ncpus; /* our snapshot of the number of CPUs */
+
+ iaq->ntxq10g = t4_ntxq10g;
+ iaq->ntxq1g = t4_ntxq1g;
+ iaq->nrxq10g = nrxq10g = t4_nrxq10g;
+ iaq->nrxq1g = nrxq1g = t4_nrxq1g;
+#ifndef TCP_OFFLOAD_DISABLE
+ iaq->nofldtxq10g = t4_nofldtxq10g;
+ iaq->nofldtxq1g = t4_nofldtxq1g;
+ iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
+ iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g;
+#endif
for (itype = INTR_MSIX; itype; itype >>= 1) {
- if ((itype & intr_types) == 0)
+ if ((itype & t4_intr_types) == 0)
continue; /* not allowed */
if (itype == INTR_MSIX)
@@ -1232,60 +1329,93 @@ cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g,
navail = pci_msi_count(sc->dev);
else
navail = 1;
-
+restart:
if (navail == 0)
continue;
iaq->intr_type = itype;
+ iaq->intr_flags = 0;
- iaq->ntxq10g = min(nc, max_ntxq_10g);
- iaq->ntxq1g = min(nc, max_ntxq_1g);
-
- nrxq10g = min(nc, max_nrxq_10g);
- nrxq1g = min(nc, max_nrxq_1g);
-
- iaq->nirq = n10g * nrxq10g + n1g * nrxq1g + T4_EXTRA_INTR;
- if (iaq->nirq <= navail && intr_shared == 0) {
-
- if (itype == INTR_MSI && !powerof2(iaq->nirq))
- goto share;
-
- /* One for err, one for fwq, and one for each rxq */
-
- iaq->intr_shared = 0;
- iaq->nrxq10g = nrxq10g;
- iaq->nrxq1g = nrxq1g;
+ /*
+ * Best option: an interrupt vector for errors, one for the
+ * firmware event queue, and one each for each rxq (NIC as well
+ * as offload).
+ */
+ iaq->nirq = T4_EXTRA_INTR;
+ iaq->nirq += n10g * (nrxq10g + nofldrxq10g);
+ iaq->nirq += n1g * (nrxq1g + nofldrxq1g);
+ if (iaq->nirq <= navail &&
+ (itype != INTR_MSI || powerof2(iaq->nirq))) {
+ iaq->intr_flags |= INTR_DIRECT;
+ goto allocate;
+ }
- } else {
-share:
- iaq->intr_shared = 1;
+ /*
+ * Second best option: an interrupt vector for errors, one for
+ * the firmware event queue, and one each for either NIC or
+ * offload rxq's.
+ */
+ iaq->nirq = T4_EXTRA_INTR;
+ iaq->nirq += n10g * max(nrxq10g, nofldrxq10g);
+ iaq->nirq += n1g * max(nrxq1g, nofldrxq1g);
+ if (iaq->nirq <= navail &&
+ (itype != INTR_MSI || powerof2(iaq->nirq)))
+ goto allocate;
- if (navail >= nc + T4_EXTRA_INTR) {
- if (itype == INTR_MSIX)
- navail = nc + T4_EXTRA_INTR;
+ /*
+ * Next best option: an interrupt vector for errors, one for the
+ * firmware event queue, and at least one per port. At this
+ * point we know we'll have to downsize nrxq or nofldrxq to fit
+ * what's available to us.
+ */
+ iaq->nirq = T4_EXTRA_INTR;
+ iaq->nirq += n10g + n1g;
+ if (iaq->nirq <= navail) {
+ int leftover = navail - iaq->nirq;
+
+ if (n10g > 0) {
+ int target = max(nrxq10g, nofldrxq10g);
+
+ n = 1;
+ while (n < target && leftover >= n10g) {
+ leftover -= n10g;
+ iaq->nirq += n10g;
+ n++;
+ }
+ iaq->nrxq10g = min(n, nrxq10g);
+#ifndef TCP_OFFLOAD_DISABLE
+ iaq->nofldrxq10g = min(n, nofldrxq10g);
+#endif
+ }
- /* navail is and must remain a pow2 for MSI */
- if (itype == INTR_MSI) {
- KASSERT(powerof2(navail),
- ("%d not power of 2", navail));
+ if (n1g > 0) {
+ int target = max(nrxq1g, nofldrxq1g);
- while (navail / 2 >= nc + T4_EXTRA_INTR)
- navail /= 2;
+ n = 1;
+ while (n < target && leftover >= n1g) {
+ leftover -= n1g;
+ iaq->nirq += n1g;
+ n++;
}
+ iaq->nrxq1g = min(n, nrxq1g);
+#ifndef TCP_OFFLOAD_DISABLE
+ iaq->nofldrxq1g = min(n, nofldrxq1g);
+#endif
}
- iaq->nirq = navail; /* total # of interrupts */
- /*
- * If we have multiple vectors available reserve one
- * exclusively for errors. The rest will be shared by
- * the fwq and data.
- */
- if (navail > 1)
- navail--;
- iaq->nrxq10g = min(nrxq10g, navail);
- iaq->nrxq1g = min(nrxq1g, navail);
+ if (itype != INTR_MSI || powerof2(iaq->nirq))
+ goto allocate;
}
+ /*
+ * Least desirable option: one interrupt vector for everything.
+ */
+ iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
+#ifndef TCP_OFFLOAD_DISABLE
+ iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
+#endif
+
+allocate:
navail = iaq->nirq;
rc = 0;
if (itype == INTR_MSIX)
@@ -1301,8 +1431,11 @@ share:
* Didn't get the number requested. Use whatever number
* the kernel is willing to allocate (it's in navail).
*/
+ device_printf(sc->dev, "fewer vectors than requested, "
+ "type=%d, req=%d, rcvd=%d; will downshift req.\n",
+ itype, iaq->nirq, navail);
pci_release_msi(sc->dev);
- goto share;
+ goto restart;
}
device_printf(sc->dev,
@@ -1312,26 +1445,30 @@ share:
device_printf(sc->dev,
"failed to find a usable interrupt type. "
- "allowed=%d, msi-x=%d, msi=%d, intx=1", intr_types,
+ "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
pci_msix_count(sc->dev), pci_msi_count(sc->dev));
return (ENXIO);
}
/*
- * Install a compatible firmware (if required), establish contact with it,
- * become the master, and reset the device.
+ * Install a compatible firmware (if required), establish contact with it (by
+ * saying hello), and reset the device. If we end up as the master driver,
+ * partition adapter resources by providing a configuration file to the
+ * firmware.
*/
static int
prep_firmware(struct adapter *sc)
{
- const struct firmware *fw;
+ const struct firmware *fw = NULL, *cfg = NULL, *default_cfg;
int rc;
enum dev_state state;
+ default_cfg = firmware_get(T4_CFGNAME);
+
/* Check firmware version and install a different one if necessary */
rc = t4_check_fw_version(sc);
- if (rc != 0 || force_firmware_install) {
+ if (rc != 0) {
uint32_t v = 0;
fw = firmware_get(T4_FWNAME);
@@ -1343,7 +1480,7 @@ prep_firmware(struct adapter *sc)
/*
* The firmware module will not be used if it isn't the
* same major version as what the driver was compiled
- * with. This check trumps force_firmware_install.
+ * with.
*/
if (G_FW_HDR_FW_VER_MAJOR(v) != FW_VERSION_MAJOR) {
device_printf(sc->dev,
@@ -1356,17 +1493,16 @@ prep_firmware(struct adapter *sc)
}
}
- if (fw == NULL && (rc < 0 || force_firmware_install)) {
+ if (fw == NULL && rc < 0) {
device_printf(sc->dev, "No usable firmware. "
- "card has %d.%d.%d, driver compiled with %d.%d.%d, "
- "force_firmware_install%s set",
+ "card has %d.%d.%d, driver compiled with %d.%d.%d",
G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
FW_VERSION_MAJOR, FW_VERSION_MINOR,
- FW_VERSION_MICRO,
- force_firmware_install ? "" : " not");
- return (EAGAIN);
+ FW_VERSION_MICRO);
+ rc = EAGAIN;
+ goto done;
}
/*
@@ -1374,8 +1510,7 @@ prep_firmware(struct adapter *sc)
* Downgrade only for a major version mismatch or if
* force_firmware_install was specified.
*/
- if (fw != NULL && (rc < 0 || force_firmware_install ||
- v > sc->params.fw_vers)) {
+ if (fw != NULL && (rc < 0 || v > sc->params.fw_vers)) {
device_printf(sc->dev,
"installing firmware %d.%d.%d.%d on card.\n",
G_FW_HDR_FW_VER_MAJOR(v), G_FW_HDR_FW_VER_MINOR(v),
@@ -1385,26 +1520,24 @@ prep_firmware(struct adapter *sc)
if (rc != 0) {
device_printf(sc->dev,
"failed to install firmware: %d\n", rc);
- firmware_put(fw, FIRMWARE_UNLOAD);
- return (rc);
+ goto done;
} else {
/* refresh */
(void) t4_check_fw_version(sc);
}
}
-
- if (fw != NULL)
- firmware_put(fw, FIRMWARE_UNLOAD);
}
- /* Contact firmware, request master */
- rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MUST, &state);
+ /* Contact firmware. */
+ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
if (rc < 0) {
rc = -rc;
device_printf(sc->dev,
"failed to connect to the firmware: %d.\n", rc);
- return (rc);
+ goto done;
}
+ if (rc == sc->mbox)
+ sc->flags |= MASTER_PF;
/* Reset device */
rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
@@ -1412,7 +1545,26 @@ prep_firmware(struct adapter *sc)
device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
if (rc != ETIMEDOUT && rc != EIO)
t4_fw_bye(sc, sc->mbox);
- return (rc);
+ goto done;
+ }
+
+ /* Partition adapter resources as specified in the config file. */
+ if (sc->flags & MASTER_PF) {
+ if (strncmp(t4_cfg_file, "default", sizeof(t4_cfg_file))) {
+ char s[32];
+
+ snprintf(s, sizeof(s), "t4fw_cfg_%s", t4_cfg_file);
+ cfg = firmware_get(s);
+ if (cfg == NULL) {
+ device_printf(sc->dev,
+ "unable to locate %s module, "
+ "will use default config file.\n", s);
+ }
+ }
+
+ rc = partition_resources(sc, cfg ? cfg : default_cfg);
+ if (rc != 0)
+ goto done; /* error message displayed already */
}
snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
@@ -1422,84 +1574,207 @@ prep_firmware(struct adapter *sc)
G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
sc->flags |= FW_OK;
- return (0);
+done:
+ if (fw != NULL)
+ firmware_put(fw, FIRMWARE_UNLOAD);
+ if (cfg != NULL)
+ firmware_put(cfg, FIRMWARE_UNLOAD);
+ if (default_cfg != NULL)
+ firmware_put(default_cfg, FIRMWARE_UNLOAD);
+
+ return (rc);
}
+#define FW_PARAM_DEV(param) \
+ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
+ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
+#define FW_PARAM_PFVF(param) \
+ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
+ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
+
+/*
+ * Upload configuration file to card's memory.
+ */
static int
-get_devlog_params(struct adapter *sc, struct devlog_params *dlog)
+upload_config_file(struct adapter *sc, const struct firmware *fw, uint32_t *mt,
+ uint32_t *ma)
{
- struct fw_devlog_cmd devlog_cmd;
- uint32_t meminfo;
- int rc;
+ int rc, i;
+ uint32_t param, val, mtype, maddr, bar, off, win, remaining;
+ const uint32_t *b;
- bzero(&devlog_cmd, sizeof(devlog_cmd));
- devlog_cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) |
- F_FW_CMD_REQUEST | F_FW_CMD_READ);
- devlog_cmd.retval_len16 = htobe32(FW_LEN16(devlog_cmd));
- rc = -t4_wr_mbox(sc, sc->mbox, &devlog_cmd, sizeof(devlog_cmd),
- &devlog_cmd);
+ /* Figure out where the firmware wants us to upload it. */
+ param = FW_PARAM_DEV(CF);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
if (rc != 0) {
+ /* Firmwares without config file support will fail this way */
device_printf(sc->dev,
- "failed to get devlog parameters: %d.\n", rc);
- bzero(dlog, sizeof (*dlog));
+ "failed to query config file location: %d.\n", rc);
return (rc);
}
+ *mt = mtype = G_FW_PARAMS_PARAM_Y(val);
+ *ma = maddr = G_FW_PARAMS_PARAM_Z(val) << 16;
+
+ if (maddr & 3) {
+ device_printf(sc->dev,
+ "cannot upload config file (type %u, addr %x).\n",
+ mtype, maddr);
+ return (EFAULT);
+ }
- meminfo = be32toh(devlog_cmd.memtype_devlog_memaddr16_devlog);
- dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(meminfo);
- dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(meminfo) << 4;
- dlog->size = be32toh(devlog_cmd.memsize_devlog);
+ /* Translate mtype/maddr to an address suitable for the PCIe window */
+ val = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
+ val &= F_EDRAM0_ENABLE | F_EDRAM1_ENABLE | F_EXT_MEM_ENABLE;
+ switch (mtype) {
+ case FW_MEMTYPE_CF_EDC0:
+ if (!(val & F_EDRAM0_ENABLE))
+ goto err;
+ bar = t4_read_reg(sc, A_MA_EDRAM0_BAR);
+ maddr += G_EDRAM0_BASE(bar) << 20;
+ break;
- return (0);
+ case FW_MEMTYPE_CF_EDC1:
+ if (!(val & F_EDRAM1_ENABLE))
+ goto err;
+ bar = t4_read_reg(sc, A_MA_EDRAM1_BAR);
+ maddr += G_EDRAM1_BASE(bar) << 20;
+ break;
+
+ case FW_MEMTYPE_CF_EXTMEM:
+ if (!(val & F_EXT_MEM_ENABLE))
+ goto err;
+ bar = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
+ maddr += G_EXT_MEM_BASE(bar) << 20;
+ break;
+
+ default:
+err:
+ device_printf(sc->dev,
+ "cannot upload config file (type %u, enabled %u).\n",
+ mtype, val);
+ return (EFAULT);
+ }
+
+ /*
+ * Position the PCIe window (we use memwin2) to the 16B aligned area
+ * just at/before the upload location.
+ */
+ win = maddr & ~0xf;
+ off = maddr - win; /* offset from the start of the window. */
+ t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), win);
+ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2));
+
+ remaining = fw->datasize;
+ if (remaining > FLASH_CFG_MAX_SIZE ||
+ remaining > MEMWIN2_APERTURE - off) {
+ device_printf(sc->dev, "cannot upload config file all at once "
+ "(size %u, max %u, room %u).\n",
+ remaining, FLASH_CFG_MAX_SIZE, MEMWIN2_APERTURE - off);
+ return (EFBIG);
+ }
+
+ /*
+ * XXX: sheer laziness. We deliberately added 4 bytes of useless
+ * stuffing/comments at the end of the config file so it's ok to simply
+ * throw away the last remaining bytes when the config file is not an
+ * exact multiple of 4.
+ */
+ b = fw->data;
+ for (i = 0; remaining >= 4; i += 4, remaining -= 4)
+ t4_write_reg(sc, MEMWIN2_BASE + off + i, *b++);
+
+ return (rc);
}
+/*
+ * Partition chip resources for use between various PFs, VFs, etc. This is done
+ * by uploading the firmware configuration file to the adapter and instructing
+ * the firmware to process it.
+ */
static int
-get_capabilities(struct adapter *sc, struct fw_caps_config_cmd *caps)
+partition_resources(struct adapter *sc, const struct firmware *cfg)
{
int rc;
+ struct fw_caps_config_cmd caps;
+ uint32_t mtype, maddr, finicsum, cfcsum;
- bzero(caps, sizeof(*caps));
- caps->op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
- F_FW_CMD_REQUEST | F_FW_CMD_READ);
- caps->retval_len16 = htobe32(FW_LEN16(*caps));
+ rc = cfg ? upload_config_file(sc, cfg, &mtype, &maddr) : ENOENT;
+ if (rc != 0) {
+ mtype = FW_MEMTYPE_CF_FLASH;
+ maddr = t4_flash_cfg_addr(sc);
+ }
- rc = -t4_wr_mbox(sc, sc->mbox, caps, sizeof(*caps), caps);
- if (rc != 0)
+ bzero(&caps, sizeof(caps));
+ caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+ F_FW_CMD_REQUEST | F_FW_CMD_READ);
+ caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
+ V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
+ V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) | FW_LEN16(caps));
+ rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to pre-process config file: %d.\n", rc);
return (rc);
+ }
- if (caps->niccaps & htobe16(FW_CAPS_CONFIG_NIC_VM))
- caps->niccaps ^= htobe16(FW_CAPS_CONFIG_NIC_VM);
+ finicsum = be32toh(caps.finicsum);
+ cfcsum = be32toh(caps.cfcsum);
+ if (finicsum != cfcsum) {
+ device_printf(sc->dev,
+ "WARNING: config file checksum mismatch: %08x %08x\n",
+ finicsum, cfcsum);
+ }
+ sc->cfcsum = cfcsum;
+
+#define LIMIT_CAPS(x) do { \
+ caps.x &= htobe16(t4_##x##_allowed); \
+ sc->x = htobe16(caps.x); \
+} while (0)
- caps->op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+ /*
+ * Let the firmware know what features will (not) be used so it can tune
+ * things accordingly.
+ */
+ LIMIT_CAPS(linkcaps);
+ LIMIT_CAPS(niccaps);
+ LIMIT_CAPS(toecaps);
+ LIMIT_CAPS(rdmacaps);
+ LIMIT_CAPS(iscsicaps);
+ LIMIT_CAPS(fcoecaps);
+#undef LIMIT_CAPS
+
+ caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
- rc = -t4_wr_mbox(sc, sc->mbox, caps, sizeof(*caps), NULL);
+ caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
+ rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to process config file: %d.\n", rc);
+ return (rc);
+ }
- return (rc);
+ return (0);
}
+/*
+ * Retrieve parameters that are needed (or nice to have) prior to calling
+ * t4_sge_init and t4_fw_initialize.
+ */
static int
-get_params(struct adapter *sc, struct fw_caps_config_cmd *caps)
+get_params__pre_init(struct adapter *sc)
{
int rc;
- uint32_t params[7], val[7];
+ uint32_t param[2], val[2];
+ struct fw_devlog_cmd cmd;
+ struct devlog_params *dlog = &sc->params.devlog;
-#define FW_PARAM_DEV(param) \
- (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
- V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
-#define FW_PARAM_PFVF(param) \
- (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
- V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
-
- params[0] = FW_PARAM_DEV(PORTVEC);
- params[1] = FW_PARAM_PFVF(IQFLINT_START);
- params[2] = FW_PARAM_PFVF(EQ_START);
- params[3] = FW_PARAM_PFVF(FILTER_START);
- params[4] = FW_PARAM_PFVF(FILTER_END);
- rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 5, params, val);
+ param[0] = FW_PARAM_DEV(PORTVEC);
+ param[1] = FW_PARAM_DEV(CCLK);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
if (rc != 0) {
device_printf(sc->dev,
- "failed to query parameters: %d.\n", rc);
- goto done;
+ "failed to query parameters (pre_init): %d.\n", rc);
+ return (rc);
}
sc->params.portvec = val[0];
@@ -1509,24 +1784,81 @@ get_params(struct adapter *sc, struct fw_caps_config_cmd *caps)
val[0] &= val[0] - 1;
}
- sc->sge.iq_start = val[1];
- sc->sge.eq_start = val[2];
- sc->tids.ftid_base = val[3];
- sc->tids.nftids = val[4] - val[3] + 1;
+ sc->params.vpd.cclk = val[1];
+
+ /* Read device log parameters. */
+ bzero(&cmd, sizeof(cmd));
+ cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) |
+ F_FW_CMD_REQUEST | F_FW_CMD_READ);
+ cmd.retval_len16 = htobe32(FW_LEN16(cmd));
+ rc = -t4_wr_mbox(sc, sc->mbox, &cmd, sizeof(cmd), &cmd);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to get devlog parameters: %d.\n", rc);
+ bzero(dlog, sizeof (*dlog));
+ rc = 0; /* devlog isn't critical for device operation */
+ } else {
+ val[0] = be32toh(cmd.memtype_devlog_memaddr16_devlog);
+ dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(val[0]);
+ dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(val[0]) << 4;
+ dlog->size = be32toh(cmd.memsize_devlog);
+ }
- if (caps->toecaps) {
+ return (rc);
+}
+
+/*
+ * Retrieve various parameters that are of interest to the driver. The device
+ * has been initialized by the firmware at this point.
+ */
+static int
+get_params__post_init(struct adapter *sc)
+{
+ int rc;
+ uint32_t param[7], val[7];
+ struct fw_caps_config_cmd caps;
+
+ param[0] = FW_PARAM_PFVF(IQFLINT_START);
+ param[1] = FW_PARAM_PFVF(EQ_START);
+ param[2] = FW_PARAM_PFVF(FILTER_START);
+ param[3] = FW_PARAM_PFVF(FILTER_END);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to query parameters (post_init): %d.\n", rc);
+ return (rc);
+ }
+
+ sc->sge.iq_start = val[0];
+ sc->sge.eq_start = val[1];
+ sc->tids.ftid_base = val[2];
+ sc->tids.nftids = val[3] - val[2] + 1;
+
+ /* get capabilites */
+ bzero(&caps, sizeof(caps));
+ caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+ F_FW_CMD_REQUEST | F_FW_CMD_READ);
+ caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
+ rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to get card capabilities: %d.\n", rc);
+ return (rc);
+ }
+
+ if (caps.toecaps) {
/* query offload-related parameters */
- params[0] = FW_PARAM_DEV(NTID);
- params[1] = FW_PARAM_PFVF(SERVER_START);
- params[2] = FW_PARAM_PFVF(SERVER_END);
- params[3] = FW_PARAM_PFVF(TDDP_START);
- params[4] = FW_PARAM_PFVF(TDDP_END);
- params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
- rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, params, val);
+ param[0] = FW_PARAM_DEV(NTID);
+ param[1] = FW_PARAM_PFVF(SERVER_START);
+ param[2] = FW_PARAM_PFVF(SERVER_END);
+ param[3] = FW_PARAM_PFVF(TDDP_START);
+ param[4] = FW_PARAM_PFVF(TDDP_END);
+ param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
if (rc != 0) {
device_printf(sc->dev,
"failed to query TOE parameters: %d.\n", rc);
- goto done;
+ return (rc);
}
sc->tids.ntids = val[0];
sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
@@ -1537,18 +1869,18 @@ get_params(struct adapter *sc, struct fw_caps_config_cmd *caps)
sc->params.ofldq_wr_cred = val[5];
sc->params.offload = 1;
}
- if (caps->rdmacaps) {
- params[0] = FW_PARAM_PFVF(STAG_START);
- params[1] = FW_PARAM_PFVF(STAG_END);
- params[2] = FW_PARAM_PFVF(RQ_START);
- params[3] = FW_PARAM_PFVF(RQ_END);
- params[4] = FW_PARAM_PFVF(PBL_START);
- params[5] = FW_PARAM_PFVF(PBL_END);
- rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, params, val);
+ if (caps.rdmacaps) {
+ param[0] = FW_PARAM_PFVF(STAG_START);
+ param[1] = FW_PARAM_PFVF(STAG_END);
+ param[2] = FW_PARAM_PFVF(RQ_START);
+ param[3] = FW_PARAM_PFVF(RQ_END);
+ param[4] = FW_PARAM_PFVF(PBL_START);
+ param[5] = FW_PARAM_PFVF(PBL_END);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
if (rc != 0) {
device_printf(sc->dev,
- "failed to query RDMA parameters: %d.\n", rc);
- goto done;
+ "failed to query RDMA parameters(1): %d.\n", rc);
+ return (rc);
}
sc->vres.stag.start = val[0];
sc->vres.stag.size = val[1] - val[0] + 1;
@@ -1556,37 +1888,59 @@ get_params(struct adapter *sc, struct fw_caps_config_cmd *caps)
sc->vres.rq.size = val[3] - val[2] + 1;
sc->vres.pbl.start = val[4];
sc->vres.pbl.size = val[5] - val[4] + 1;
+
+ param[0] = FW_PARAM_PFVF(SQRQ_START);
+ param[1] = FW_PARAM_PFVF(SQRQ_END);
+ param[2] = FW_PARAM_PFVF(CQ_START);
+ param[3] = FW_PARAM_PFVF(CQ_END);
+ param[4] = FW_PARAM_PFVF(OCQ_START);
+ param[5] = FW_PARAM_PFVF(OCQ_END);
+ rc = -t4_query_params(sc, 0, 0, 0, 6, param, val);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to query RDMA parameters(2): %d.\n", rc);
+ return (rc);
+ }
+ sc->vres.qp.start = val[0];
+ sc->vres.qp.size = val[1] - val[0] + 1;
+ sc->vres.cq.start = val[2];
+ sc->vres.cq.size = val[3] - val[2] + 1;
+ sc->vres.ocq.start = val[4];
+ sc->vres.ocq.size = val[5] - val[4] + 1;
}
- if (caps->iscsicaps) {
- params[0] = FW_PARAM_PFVF(ISCSI_START);
- params[1] = FW_PARAM_PFVF(ISCSI_END);
- rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, params, val);
+ if (caps.iscsicaps) {
+ param[0] = FW_PARAM_PFVF(ISCSI_START);
+ param[1] = FW_PARAM_PFVF(ISCSI_END);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
if (rc != 0) {
device_printf(sc->dev,
"failed to query iSCSI parameters: %d.\n", rc);
- goto done;
+ return (rc);
}
sc->vres.iscsi.start = val[0];
sc->vres.iscsi.size = val[1] - val[0] + 1;
}
-#undef FW_PARAM_PFVF
-#undef FW_PARAM_DEV
-done:
+ /* These are finalized by FW initialization, load their values now */
+ val[0] = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
+ sc->params.tp.tre = G_TIMERRESOLUTION(val[0]);
+ sc->params.tp.dack_re = G_DELAYEDACKRESOLUTION(val[0]);
+ t4_read_mtu_tbl(sc, sc->params.mtus, NULL);
+
return (rc);
}
+#undef FW_PARAM_PFVF
+#undef FW_PARAM_DEV
+
static void
t4_set_desc(struct adapter *sc)
{
char buf[128];
struct adapter_params *p = &sc->params;
- snprintf(buf, sizeof(buf),
- "Chelsio %s (rev %d) %d port %sNIC PCIe-x%d %d %s, S/N:%s, E/C:%s",
- p->vpd.id, p->rev, p->nports, is_offload(sc) ? "R" : "",
- p->pci.width, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
- (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), p->vpd.sn, p->vpd.ec);
+ snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, E/C:%s",
+ p->vpd.id, is_offload(sc) ? "R" : "", p->rev, p->vpd.sn, p->vpd.ec);
device_set_desc_copy(sc->dev, buf);
}
@@ -1803,9 +2157,7 @@ cxgbe_init_synchronized(struct port_info *pi)
{
struct adapter *sc = pi->adapter;
struct ifnet *ifp = pi->ifp;
- int rc = 0, i;
- uint16_t *rss;
- struct sge_rxq *rxq;
+ int rc = 0;
ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
@@ -1815,30 +2167,13 @@ cxgbe_init_synchronized(struct port_info *pi)
return (0); /* already running */
}
- if (sc->open_device_map == 0 && ((rc = first_port_up(sc)) != 0))
+ if (!(sc->flags & FULL_INIT_DONE) &&
+ ((rc = adapter_full_init(sc)) != 0))
return (rc); /* error message displayed already */
- /*
- * Allocate tx/rx/fl queues for this port.
- */
- rc = t4_setup_eth_queues(pi);
- if (rc != 0)
- goto done; /* error message displayed already */
-
- /*
- * Setup RSS for this port.
- */
- rss = malloc(pi->nrxq * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
- for_each_rxq(pi, i, rxq) {
- rss[i] = rxq->iq.abs_id;
- }
- rc = -t4_config_rss_range(sc, sc->mbox, pi->viid, 0, pi->rss_size, rss,
- pi->nrxq);
- free(rss, M_CXGBE);
- if (rc != 0) {
- if_printf(ifp, "rss_config failed: %d\n", rc);
- goto done;
- }
+ if (!(pi->flags & PORT_INIT_DONE) &&
+ ((rc = port_full_init(pi)) != 0))
+ return (rc); /* error message displayed already */
PORT_LOCK(pi);
rc = update_mac_settings(pi, XGMAC_ALL);
@@ -1857,12 +2192,10 @@ cxgbe_init_synchronized(struct port_info *pi)
if_printf(ifp, "enable_vi failed: %d\n", rc);
goto done;
}
- pi->flags |= VI_ENABLED;
/* all ok */
setbit(&sc->open_device_map, pi->port_id);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
callout_reset(&pi->tick, hz, cxgbe_tick, pi);
done:
@@ -1915,51 +2248,28 @@ cxgbe_uninit_synchronized(struct port_info *pi)
struct ifnet *ifp = pi->ifp;
int rc;
- /*
- * taskqueue_drain may cause a deadlock if the adapter lock is held.
- */
ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
/*
- * Clear this port's bit from the open device map, and then drain
- * tasks and callouts.
+ * Disable the VI so that all its data in either direction is discarded
+ * by the MPS. Leave everything else (the queues, interrupts, and 1Hz
+ * tick) intact as the TP can deliver negative advice or data that it's
+ * holding in its RAM (for an offloaded connection) even after the VI is
+ * disabled.
*/
- clrbit(&sc->open_device_map, pi->port_id);
-
- PORT_LOCK(pi);
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
- callout_stop(&pi->tick);
- PORT_UNLOCK(pi);
- callout_drain(&pi->tick);
-
- /*
- * Stop and then free the queues' resources, including the queues
- * themselves.
- *
- * XXX: we could just stop the queues here (on ifconfig down) and free
- * them later (on port detach), but having up/down go through the entire
- * allocate/activate/deactivate/free sequence is a good way to find
- * leaks and bugs.
- */
- rc = t4_teardown_eth_queues(pi);
- if (rc != 0)
- if_printf(ifp, "teardown failed: %d\n", rc);
-
- if (pi->flags & VI_ENABLED) {
- rc = -t4_enable_vi(sc, sc->mbox, pi->viid, false, false);
- if (rc)
- if_printf(ifp, "disable_vi failed: %d\n", rc);
- else
- pi->flags &= ~VI_ENABLED;
+ rc = -t4_enable_vi(sc, sc->mbox, pi->viid, false, false);
+ if (rc) {
+ if_printf(ifp, "disable_vi failed: %d\n", rc);
+ return (rc);
}
+ clrbit(&sc->open_device_map, pi->port_id);
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
pi->link_cfg.link_ok = 0;
pi->link_cfg.speed = 0;
t4_os_link_changed(sc, pi->port_id, 0);
- if (sc->open_device_map == 0)
- last_port_down(sc);
-
return (0);
}
@@ -1968,15 +2278,22 @@ cxgbe_uninit_synchronized(struct port_info *pi)
if (rc != 0) \
goto done; \
} while (0)
+
static int
-first_port_up(struct adapter *sc)
+adapter_full_init(struct adapter *sc)
{
int rc, i, rid, p, q;
char s[8];
struct irq *irq;
- struct sge_iq *intrq;
+ struct port_info *pi;
+ struct sge_rxq *rxq;
+#ifndef TCP_OFFLOAD_DISABLE
+ struct sge_ofld_rxq *ofld_rxq;
+#endif
ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+ KASSERT((sc->flags & FULL_INIT_DONE) == 0,
+ ("%s: FULL_INIT_DONE already", __func__));
/*
* queues that belong to the adapter (not any particular port).
@@ -1985,95 +2302,265 @@ first_port_up(struct adapter *sc)
if (rc != 0)
goto done;
+ for (i = 0; i < ARRAY_SIZE(sc->tq); i++) {
+ sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
+ taskqueue_thread_enqueue, &sc->tq[i]);
+ if (sc->tq[i] == NULL) {
+ device_printf(sc->dev,
+ "failed to allocate task queue %d\n", i);
+ rc = ENOMEM;
+ goto done;
+ }
+ taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
+ device_get_nameunit(sc->dev), i);
+ }
+
/*
* Setup interrupts.
*/
irq = &sc->irq[0];
rid = sc->intr_type == INTR_INTX ? 0 : 1;
if (sc->intr_count == 1) {
- KASSERT(sc->flags & INTR_SHARED,
- ("%s: single interrupt but not shared?", __func__));
+ KASSERT(!(sc->flags & INTR_DIRECT),
+ ("%s: single interrupt && INTR_DIRECT?", __func__));
T4_ALLOC_IRQ(sc, irq, rid, t4_intr_all, sc, "all");
} else {
- /* Multiple interrupts. The first one is always error intr */
+ /* Multiple interrupts. */
+ KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
+ ("%s: too few intr.", __func__));
+
+ /* The first one is always error intr */
T4_ALLOC_IRQ(sc, irq, rid, t4_intr_err, sc, "err");
irq++;
rid++;
- /* Firmware event queue normally has an interrupt of its own */
- if (sc->intr_count > T4_EXTRA_INTR) {
- T4_ALLOC_IRQ(sc, irq, rid, t4_intr_evt, &sc->sge.fwq,
- "evt");
- irq++;
- rid++;
- }
-
- intrq = &sc->sge.intrq[0];
- if (sc->flags & INTR_SHARED) {
+ /* The second one is always the firmware event queue */
+ T4_ALLOC_IRQ(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, "evt");
+ irq++;
+ rid++;
- /* All ports share these interrupt queues */
+ /*
+ * Note that if INTR_DIRECT is not set then either the NIC rx
+ * queues or (exclusive or) the TOE rx queueus will be taking
+ * direct interrupts.
+ *
+ * There is no need to check for is_offload(sc) as nofldrxq
+ * will be 0 if offload is disabled.
+ */
+ for_each_port(sc, p) {
+ pi = sc->port[p];
- for (i = 0; i < NINTRQ(sc); i++) {
- snprintf(s, sizeof(s), "*.%d", i);
- T4_ALLOC_IRQ(sc, irq, rid, t4_intr, intrq, s);
+#ifndef TCP_OFFLOAD_DISABLE
+ /*
+ * Skip over the NIC queues if they aren't taking direct
+ * interrupts.
+ */
+ if (!(sc->flags & INTR_DIRECT) &&
+ pi->nofldrxq > pi->nrxq)
+ goto ofld_queues;
+#endif
+ rxq = &sc->sge.rxq[pi->first_rxq];
+ for (q = 0; q < pi->nrxq; q++, rxq++) {
+ snprintf(s, sizeof(s), "%d.%d", p, q);
+ T4_ALLOC_IRQ(sc, irq, rid, t4_intr, rxq, s);
irq++;
rid++;
- intrq++;
}
- } else {
-
- /* Each port has its own set of interrupt queues */
- for (p = 0; p < sc->params.nports; p++) {
- for (q = 0; q < sc->port[p]->nrxq; q++) {
- snprintf(s, sizeof(s), "%d.%d", p, q);
- T4_ALLOC_IRQ(sc, irq, rid, t4_intr,
- intrq, s);
- irq++;
- rid++;
- intrq++;
- }
+#ifndef TCP_OFFLOAD_DISABLE
+ /*
+ * Skip over the offload queues if they aren't taking
+ * direct interrupts.
+ */
+ if (!(sc->flags & INTR_DIRECT))
+ continue;
+ofld_queues:
+ ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq];
+ for (q = 0; q < pi->nofldrxq; q++, ofld_rxq++) {
+ snprintf(s, sizeof(s), "%d,%d", p, q);
+ T4_ALLOC_IRQ(sc, irq, rid, t4_intr, ofld_rxq, s);
+ irq++;
+ rid++;
}
+#endif
}
}
t4_intr_enable(sc);
sc->flags |= FULL_INIT_DONE;
-
done:
if (rc != 0)
- last_port_down(sc);
+ adapter_full_uninit(sc);
return (rc);
}
#undef T4_ALLOC_IRQ
-/*
- * Idempotent.
- */
static int
-last_port_down(struct adapter *sc)
+adapter_full_uninit(struct adapter *sc)
{
int i;
ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
- t4_intr_disable(sc);
-
t4_teardown_adapter_queues(sc);
for (i = 0; i < sc->intr_count; i++)
t4_free_irq(sc, &sc->irq[i]);
+ for (i = 0; i < ARRAY_SIZE(sc->tq) && sc->tq[i]; i++) {
+ taskqueue_free(sc->tq[i]);
+ sc->tq[i] = NULL;
+ }
+
sc->flags &= ~FULL_INIT_DONE;
return (0);
}
static int
+port_full_init(struct port_info *pi)
+{
+ struct adapter *sc = pi->adapter;
+ struct ifnet *ifp = pi->ifp;
+ uint16_t *rss;
+ struct sge_rxq *rxq;
+ int rc, i;
+
+ ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+ KASSERT((pi->flags & PORT_INIT_DONE) == 0,
+ ("%s: PORT_INIT_DONE already", __func__));
+
+ sysctl_ctx_init(&pi->ctx);
+ pi->flags |= PORT_SYSCTL_CTX;
+
+ /*
+ * Allocate tx/rx/fl queues for this port.
+ */
+ rc = t4_setup_port_queues(pi);
+ if (rc != 0)
+ goto done; /* error message displayed already */
+
+ /*
+ * Setup RSS for this port.
+ */
+ rss = malloc(pi->nrxq * sizeof (*rss), M_CXGBE,
+ M_ZERO | M_WAITOK);
+ for_each_rxq(pi, i, rxq) {
+ rss[i] = rxq->iq.abs_id;
+ }
+ rc = -t4_config_rss_range(sc, sc->mbox, pi->viid, 0,
+ pi->rss_size, rss, pi->nrxq);
+ free(rss, M_CXGBE);
+ if (rc != 0) {
+ if_printf(ifp, "rss_config failed: %d\n", rc);
+ goto done;
+ }
+
+ pi->flags |= PORT_INIT_DONE;
+done:
+ if (rc != 0)
+ port_full_uninit(pi);
+
+ return (rc);
+}
+
+/*
+ * Idempotent.
+ */
+static int
+port_full_uninit(struct port_info *pi)
+{
+ struct adapter *sc = pi->adapter;
+ int i;
+ struct sge_rxq *rxq;
+ struct sge_txq *txq;
+#ifndef TCP_OFFLOAD_DISABLE
+ struct sge_ofld_rxq *ofld_rxq;
+ struct sge_wrq *ofld_txq;
+#endif
+
+ if (pi->flags & PORT_INIT_DONE) {
+
+ /* Need to quiesce queues. XXX: ctrl queues? */
+
+ for_each_txq(pi, i, txq) {
+ quiesce_eq(sc, &txq->eq);
+ }
+
+#ifndef TCP_OFFLOAD_DISABLE
+ for_each_ofld_txq(pi, i, ofld_txq) {
+ quiesce_eq(sc, &ofld_txq->eq);
+ }
+#endif
+
+ for_each_rxq(pi, i, rxq) {
+ quiesce_iq(sc, &rxq->iq);
+ quiesce_fl(sc, &rxq->fl);
+ }
+
+#ifndef TCP_OFFLOAD_DISABLE
+ for_each_ofld_rxq(pi, i, ofld_rxq) {
+ quiesce_iq(sc, &ofld_rxq->iq);
+ quiesce_fl(sc, &ofld_rxq->fl);
+ }
+#endif
+ }
+
+ t4_teardown_port_queues(pi);
+ pi->flags &= ~PORT_INIT_DONE;
+
+ return (0);
+}
+
+static void
+quiesce_eq(struct adapter *sc, struct sge_eq *eq)
+{
+ EQ_LOCK(eq);
+ eq->flags |= EQ_DOOMED;
+
+ /*
+ * Wait for the response to a credit flush if one's
+ * pending.
+ */
+ while (eq->flags & EQ_CRFLUSHED)
+ mtx_sleep(eq, &eq->eq_lock, 0, "crflush", 0);
+ EQ_UNLOCK(eq);
+
+ callout_drain(&eq->tx_callout); /* XXX: iffy */
+ pause("callout", 10); /* Still iffy */
+
+ taskqueue_drain(sc->tq[eq->tx_chan], &eq->tx_task);
+}
+
+static void
+quiesce_iq(struct adapter *sc, struct sge_iq *iq)
+{
+ (void) sc; /* unused */
+
+ /* Synchronize with the interrupt handler */
+ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
+ pause("iqfree", 1);
+}
+
+static void
+quiesce_fl(struct adapter *sc, struct sge_fl *fl)
+{
+ mtx_lock(&sc->sfl_lock);
+ FL_LOCK(fl);
+ fl->flags |= FL_DOOMED;
+ FL_UNLOCK(fl);
+ mtx_unlock(&sc->sfl_lock);
+
+ callout_drain(&sc->sfl_callout);
+ KASSERT((fl->flags & FL_STARVING) == 0,
+ ("%s: still starving", __func__));
+}
+
+static int
t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
- iq_intr_handler_t *handler, void *arg, char *name)
+ driver_intr_t *handler, void *arg, char *name)
{
int rc;
@@ -2365,12 +2852,12 @@ cxgbe_tick(void *arg)
t4_get_port_stats(pi->adapter, pi->tx_chan, s);
- ifp->if_opackets = s->tx_frames;
- ifp->if_ipackets = s->rx_frames;
- ifp->if_obytes = s->tx_octets;
- ifp->if_ibytes = s->rx_octets;
- ifp->if_omcasts = s->tx_mcast_frames;
- ifp->if_imcasts = s->rx_mcast_frames;
+ ifp->if_opackets = s->tx_frames - s->tx_pause;
+ ifp->if_ipackets = s->rx_frames - s->rx_pause;
+ ifp->if_obytes = s->tx_octets - s->tx_pause * 64;
+ ifp->if_ibytes = s->rx_octets - s->rx_pause * 64;
+ ifp->if_omcasts = s->tx_mcast_frames - s->tx_pause;
+ ifp->if_imcasts = s->rx_mcast_frames - s->rx_pause;
ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
s->rx_ovflow3;
@@ -2388,15 +2875,58 @@ cxgbe_tick(void *arg)
}
static int
+cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+{
+#ifdef INVARIANTS
+ panic("%s: opcode %02x on iq %p with payload %p",
+ __func__, rss->opcode, iq, m);
+#else
+ log(LOG_ERR, "%s: opcode %02x on iq %p with payload %p",
+ __func__, rss->opcode, iq, m);
+ m_freem(m);
+#endif
+ return (EDOOFUS);
+}
+
+int
+t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
+{
+ uintptr_t *loc, new;
+
+ if (opcode >= ARRAY_SIZE(sc->cpl_handler))
+ return (EINVAL);
+
+ new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
+ loc = (uintptr_t *) &sc->cpl_handler[opcode];
+ atomic_store_rel_ptr(loc, new);
+
+ return (0);
+}
+
+static int
t4_sysctls(struct adapter *sc)
{
struct sysctl_ctx_list *ctx;
struct sysctl_oid *oid;
- struct sysctl_oid_list *children;
+ struct sysctl_oid_list *children, *c0;
+ static char *caps[] = {
+ "\20\1PPP\2QFC\3DCBX", /* caps[0] linkcaps */
+ "\20\1NIC\2VM\3IDS\4UM\5UM_ISGL", /* caps[1] niccaps */
+ "\20\1TOE", /* caps[2] toecaps */
+ "\20\1RDDP\2RDMAC", /* caps[3] rdmacaps */
+ "\20\1INITIATOR_PDU\2TARGET_PDU" /* caps[4] iscsicaps */
+ "\3INITIATOR_CNXOFLD\4TARGET_CNXOFLD"
+ "\5INITIATOR_SSNOFLD\6TARGET_SSNOFLD",
+ "\20\1INITIATOR\2TARGET\3CTRL_OFLD" /* caps[5] fcoecaps */
+ };
ctx = device_get_sysctl_ctx(sc->dev);
+
+ /*
+ * dev.t4nex.X.
+ */
oid = device_get_sysctl_tree(sc->dev);
- children = SYSCTL_CHILDREN(oid);
+ c0 = children = SYSCTL_CHILDREN(oid);
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD,
&sc->params.nports, 0, "# of ports");
@@ -2407,23 +2937,146 @@ t4_sysctls(struct adapter *sc)
SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
CTLFLAG_RD, &sc->fw_version, 0, "firmware version");
- SYSCTL_ADD_INT(ctx, children, OID_AUTO, "TOE", CTLFLAG_RD,
- &sc->params.offload, 0, "hardware is capable of TCP offload");
+ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
+ CTLFLAG_RD, &t4_cfg_file, 0, "configuration file");
+
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD,
+ &sc->cfcsum, 0, "config file checksum");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkcaps",
+ CTLTYPE_STRING | CTLFLAG_RD, caps[0], sc->linkcaps,
+ sysctl_bitfield, "A", "available link capabilities");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "niccaps",
+ CTLTYPE_STRING | CTLFLAG_RD, caps[1], sc->niccaps,
+ sysctl_bitfield, "A", "available NIC capabilities");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "toecaps",
+ CTLTYPE_STRING | CTLFLAG_RD, caps[2], sc->toecaps,
+ sysctl_bitfield, "A", "available TCP offload capabilities");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdmacaps",
+ CTLTYPE_STRING | CTLFLAG_RD, caps[3], sc->rdmacaps,
+ sysctl_bitfield, "A", "available RDMA capabilities");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "iscsicaps",
+ CTLTYPE_STRING | CTLFLAG_RD, caps[4], sc->iscsicaps,
+ sysctl_bitfield, "A", "available iSCSI capabilities");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoecaps",
+ CTLTYPE_STRING | CTLFLAG_RD, caps[5], sc->fcoecaps,
+ sysctl_bitfield, "A", "available FCoE capabilities");
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD,
&sc->params.vpd.cclk, 0, "core clock frequency (in KHz)");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
- CTLTYPE_STRING | CTLFLAG_RD, &intr_timer, sizeof(intr_timer),
- sysctl_int_array, "A", "interrupt holdoff timer values (us)");
+ CTLTYPE_STRING | CTLFLAG_RD, sc->sge.timer_val,
+ sizeof(sc->sge.timer_val), sysctl_int_array, "A",
+ "interrupt holdoff timer values (us)");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
- CTLTYPE_STRING | CTLFLAG_RD, &intr_pktcount, sizeof(intr_pktcount),
- sysctl_int_array, "A", "interrupt holdoff packet counter values");
+ CTLTYPE_STRING | CTLFLAG_RD, sc->sge.counter_val,
+ sizeof(sc->sge.counter_val), sysctl_int_array, "A",
+ "interrupt holdoff packet counter values");
+
+ /*
+ * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload.
+ */
+ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
+ CTLFLAG_RD | CTLFLAG_SKIP, NULL,
+ "logs and miscellaneous information");
+ children = SYSCTL_CHILDREN(oid);
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_cctrl, "A", "congestion control");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_cpl_stats, "A", "CPL statistics");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_ddp_stats, "A", "DDP statistics");
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
- sysctl_devlog, "A", "device log");
+ sysctl_devlog, "A", "firmware's device log");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_fcoe_stats, "A", "FCoE statistics");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_hw_sched, "A", "hardware scheduler ");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_l2t, "A", "hardware L2 table");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_lb_stats, "A", "loopback statistics");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_meminfo, "A", "memory regions");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_path_mtus, "A", "path MTUs");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_pm_stats, "A", "PM statistics");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_rdma_stats, "A", "RDMA statistics");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_tcp_stats, "A", "TCP statistics");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_tids, "A", "TID information");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_tp_err_stats, "A", "TP error statistics");
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
+ CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
+ sysctl_tx_rate, "A", "Tx rate");
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (is_offload(sc)) {
+ /*
+ * dev.t4nex.X.toe.
+ */
+ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
+ NULL, "TOE parameters");
+ children = SYSCTL_CHILDREN(oid);
+
+ sc->tt.sndbuf = 256 * 1024;
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
+ &sc->tt.sndbuf, 0, "max hardware send buffer size");
+
+ sc->tt.ddp = 0;
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
+ &sc->tt.ddp, 0, "DDP allowed");
+ sc->tt.indsz = M_INDICATESIZE;
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW,
+ &sc->tt.indsz, 0, "DDP max indicate size allowed");
+ sc->tt.ddp_thres = 3*4096;
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW,
+ &sc->tt.ddp_thres, 0, "DDP threshold");
+ }
+#endif
+
return (0);
}
@@ -2452,6 +3105,23 @@ cxgbe_sysctls(struct port_info *pi)
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
&pi->first_txq, 0, "index of first tx queue");
+#ifndef TCP_OFFLOAD_DISABLE
+ if (is_offload(pi->adapter)) {
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
+ &pi->nofldrxq, 0,
+ "# of rx queues for offloaded TCP connections");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
+ &pi->nofldtxq, 0,
+ "# of tx queues for offloaded TCP connections");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
+ CTLFLAG_RD, &pi->first_ofld_rxq, 0,
+ "index of first TOE rx queue");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
+ CTLFLAG_RD, &pi->first_ofld_txq, 0,
+ "index of first TOE tx queue");
+ }
+#endif
+
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_tmr_idx, "I",
"holdoff timer index");
@@ -2642,11 +3312,31 @@ sysctl_int_array(SYSCTL_HANDLER_ARGS)
}
static int
+sysctl_bitfield(SYSCTL_HANDLER_ARGS)
+{
+ int rc;
+ struct sbuf *sb;
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return(rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
{
struct port_info *pi = arg1;
struct adapter *sc = pi->adapter;
- struct sge_rxq *rxq;
int idx, rc, i;
idx = pi->tmr_idx;
@@ -2661,9 +3351,16 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
ADAPTER_LOCK(sc);
rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
if (rc == 0) {
+ struct sge_rxq *rxq;
+ uint8_t v;
+
+ v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1);
for_each_rxq(pi, i, rxq) {
- rxq->iq.intr_params = V_QINTR_TIMER_IDX(idx) |
- V_QINTR_CNT_EN(pi->pktc_idx != -1);
+#ifdef atomic_store_rel_8
+ atomic_store_rel_8(&rxq->iq.intr_params, v);
+#else
+ rxq->iq.intr_params = v;
+#endif
}
pi->tmr_idx = idx;
}
@@ -2690,8 +3387,8 @@ sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
ADAPTER_LOCK(sc);
rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
- if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING)
- rc = EBUSY; /* can be changed only when port is down */
+ if (rc == 0 && pi->flags & PORT_INIT_DONE)
+ rc = EBUSY; /* cannot be changed once the queues are created */
if (rc == 0)
pi->pktc_idx = idx;
@@ -2718,8 +3415,8 @@ sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
ADAPTER_LOCK(sc);
rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
- if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING)
- rc = EBUSY; /* can be changed only when port is down */
+ if (rc == 0 && pi->flags & PORT_INIT_DONE)
+ rc = EBUSY; /* cannot be changed once the queues are created */
if (rc == 0)
pi->qsize_rxq = qsize;
@@ -2746,8 +3443,8 @@ sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
ADAPTER_LOCK(sc);
rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
- if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING)
- rc = EBUSY; /* can be changed only when port is down */
+ if (rc == 0 && pi->flags & PORT_INIT_DONE)
+ rc = EBUSY; /* cannot be changed once the queues are created */
if (rc == 0)
pi->qsize_txq = qsize;
@@ -2768,6 +3465,103 @@ sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
return (sysctl_handle_64(oidp, &val, 0, req));
}
+static int
+sysctl_cctrl(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc, i;
+ uint16_t incr[NMTUS][NCCTRL_WIN];
+ static const char *dec_fac[] = {
+ "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
+ "0.9375"
+ };
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_read_cong_tbl(sc, incr);
+
+ for (i = 0; i < NCCTRL_WIN; ++i) {
+ sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
+ incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
+ incr[5][i], incr[6][i], incr[7][i]);
+ sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
+ incr[8][i], incr[9][i], incr[10][i], incr[11][i],
+ incr[12][i], incr[13][i], incr[14][i], incr[15][i],
+ sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
+ }
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ struct tp_cpl_stats stats;
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_tp_get_cpl_stats(sc, &stats);
+
+ sbuf_printf(sb, " channel 0 channel 1 channel 2 "
+ "channel 3\n");
+ sbuf_printf(sb, "CPL requests: %10u %10u %10u %10u\n",
+ stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
+ sbuf_printf(sb, "CPL responses: %10u %10u %10u %10u",
+ stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ struct tp_usm_stats stats;
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return(rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_get_usm_stats(sc, &stats);
+
+ sbuf_printf(sb, "Frames: %u\n", stats.frames);
+ sbuf_printf(sb, "Octets: %ju\n", stats.octets);
+ sbuf_printf(sb, "Drops: %u", stats.drops);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
const char *devlog_level_strings[] = {
[FW_DEVLOG_LEVEL_EMERG] = "EMERG",
[FW_DEVLOG_LEVEL_CRIT] = "CRIT",
@@ -2852,7 +3646,11 @@ sysctl_devlog(SYSCTL_HANDLER_ARGS)
goto done;
sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
- sbuf_printf(sb, "\n%10s %15s %8s %8s %s\n",
+ if (sb == NULL) {
+ rc = ENOMEM;
+ goto done;
+ }
+ sbuf_printf(sb, "%10s %15s %8s %8s %s\n",
"Seq#", "Tstamp", "Level", "Facility", "Message");
i = first;
@@ -2882,6 +3680,624 @@ done:
return (rc);
}
+static int
+sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ struct tp_fcoe_stats stats[4];
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_get_fcoe_stats(sc, 0, &stats[0]);
+ t4_get_fcoe_stats(sc, 1, &stats[1]);
+ t4_get_fcoe_stats(sc, 2, &stats[2]);
+ t4_get_fcoe_stats(sc, 3, &stats[3]);
+
+ sbuf_printf(sb, " channel 0 channel 1 "
+ "channel 2 channel 3\n");
+ sbuf_printf(sb, "octetsDDP: %16ju %16ju %16ju %16ju\n",
+ stats[0].octetsDDP, stats[1].octetsDDP, stats[2].octetsDDP,
+ stats[3].octetsDDP);
+ sbuf_printf(sb, "framesDDP: %16u %16u %16u %16u\n", stats[0].framesDDP,
+ stats[1].framesDDP, stats[2].framesDDP, stats[3].framesDDP);
+ sbuf_printf(sb, "framesDrop: %16u %16u %16u %16u",
+ stats[0].framesDrop, stats[1].framesDrop, stats[2].framesDrop,
+ stats[3].framesDrop);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc, i;
+ unsigned int map, kbps, ipg, mode;
+ unsigned int pace_tab[NTX_SCHED];
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
+ mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
+ t4_read_pace_tbl(sc, pace_tab);
+
+ sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) "
+ "Class IPG (0.1 ns) Flow IPG (us)");
+
+ for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
+ t4_get_tx_sched(sc, i, &kbps, &ipg);
+ sbuf_printf(sb, "\n %u %-5s %u ", i,
+ (mode & (1 << i)) ? "flow" : "class", map & 3);
+ if (kbps)
+ sbuf_printf(sb, "%9u ", kbps);
+ else
+ sbuf_printf(sb, " disabled ");
+
+ if (ipg)
+ sbuf_printf(sb, "%13u ", ipg);
+ else
+ sbuf_printf(sb, " disabled ");
+
+ if (pace_tab[i])
+ sbuf_printf(sb, "%10u", pace_tab[i]);
+ else
+ sbuf_printf(sb, " disabled");
+ }
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc, i, j;
+ uint64_t *p0, *p1;
+ struct lb_port_stats s[2];
+ static const char *stat_name[] = {
+ "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
+ "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
+ "Frames128To255:", "Frames256To511:", "Frames512To1023:",
+ "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
+ "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
+ "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
+ "BG2FramesTrunc:", "BG3FramesTrunc:"
+ };
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ memset(s, 0, sizeof(s));
+
+ for (i = 0; i < 4; i += 2) {
+ t4_get_lb_stats(sc, i, &s[0]);
+ t4_get_lb_stats(sc, i + 1, &s[1]);
+
+ p0 = &s[0].octets;
+ p1 = &s[1].octets;
+ sbuf_printf(sb, "%s Loopback %u"
+ " Loopback %u", i == 0 ? "" : "\n", i, i + 1);
+
+ for (j = 0; j < ARRAY_SIZE(stat_name); j++)
+ sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
+ *p0++, *p1++);
+ }
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+struct mem_desc {
+ unsigned int base;
+ unsigned int limit;
+ unsigned int idx;
+};
+
+static int
+mem_desc_cmp(const void *a, const void *b)
+{
+ return ((const struct mem_desc *)a)->base -
+ ((const struct mem_desc *)b)->base;
+}
+
+static void
+mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
+ unsigned int to)
+{
+ unsigned int size;
+
+ size = to - from + 1;
+ if (size == 0)
+ return;
+
+ /* XXX: need humanize_number(3) in libkern for a more readable 'size' */
+ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
+}
+
+static int
+sysctl_meminfo(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc, i, n;
+ uint32_t lo, hi;
+ static const char *memory[] = { "EDC0:", "EDC1:", "MC:" };
+ static const char *region[] = {
+ "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
+ "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
+ "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
+ "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
+ "RQUDP region:", "PBL region:", "TXPBL region:", "ULPRX state:",
+ "ULPTX state:", "On-chip queues:"
+ };
+ struct mem_desc avail[3];
+ struct mem_desc mem[ARRAY_SIZE(region) + 3]; /* up to 3 holes */
+ struct mem_desc *md = mem;
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ for (i = 0; i < ARRAY_SIZE(mem); i++) {
+ mem[i].limit = 0;
+ mem[i].idx = i;
+ }
+
+ /* Find and sort the populated memory ranges */
+ i = 0;
+ lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
+ if (lo & F_EDRAM0_ENABLE) {
+ hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
+ avail[i].base = G_EDRAM0_BASE(hi) << 20;
+ avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
+ avail[i].idx = 0;
+ i++;
+ }
+ if (lo & F_EDRAM1_ENABLE) {
+ hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
+ avail[i].base = G_EDRAM1_BASE(hi) << 20;
+ avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
+ avail[i].idx = 1;
+ i++;
+ }
+ if (lo & F_EXT_MEM_ENABLE) {
+ hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
+ avail[i].base = G_EXT_MEM_BASE(hi) << 20;
+ avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20);
+ avail[i].idx = 2;
+ i++;
+ }
+ if (!i) /* no memory available */
+ return 0;
+ qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
+
+ (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
+ (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
+ (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
+ (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
+ (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
+ (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
+ (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
+ (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
+ (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
+
+ /* the next few have explicit upper bounds */
+ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
+ md->limit = md->base - 1 +
+ t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
+ G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
+ md++;
+
+ md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
+ md->limit = md->base - 1 +
+ t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
+ G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
+ md++;
+
+ if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
+ hi = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
+ md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
+ md->limit = (sc->tids.ntids - hi) * 16 + md->base - 1;
+ } else {
+ md->base = 0;
+ md->idx = ARRAY_SIZE(region); /* hide it */
+ }
+ md++;
+
+#define ulp_region(reg) \
+ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
+ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
+
+ ulp_region(RX_ISCSI);
+ ulp_region(RX_TDDP);
+ ulp_region(TX_TPT);
+ ulp_region(RX_STAG);
+ ulp_region(RX_RQ);
+ ulp_region(RX_RQUDP);
+ ulp_region(RX_PBL);
+ ulp_region(TX_PBL);
+#undef ulp_region
+
+ md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
+ md->limit = md->base + sc->tids.ntids - 1;
+ md++;
+ md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
+ md->limit = md->base + sc->tids.ntids - 1;
+ md++;
+
+ md->base = sc->vres.ocq.start;
+ if (sc->vres.ocq.size)
+ md->limit = md->base + sc->vres.ocq.size - 1;
+ else
+ md->idx = ARRAY_SIZE(region); /* hide it */
+ md++;
+
+ /* add any address-space holes, there can be up to 3 */
+ for (n = 0; n < i - 1; n++)
+ if (avail[n].limit < avail[n + 1].base)
+ (md++)->base = avail[n].limit;
+ if (avail[n].limit)
+ (md++)->base = avail[n].limit;
+
+ n = md - mem;
+ qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
+
+ for (lo = 0; lo < i; lo++)
+ mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
+ avail[lo].limit - 1);
+
+ sbuf_printf(sb, "\n");
+ for (i = 0; i < n; i++) {
+ if (mem[i].idx >= ARRAY_SIZE(region))
+ continue; /* skip holes */
+ if (!mem[i].limit)
+ mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
+ mem_region_show(sb, region[mem[i].idx], mem[i].base,
+ mem[i].limit);
+ }
+
+ sbuf_printf(sb, "\n");
+ lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
+ hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
+ mem_region_show(sb, "uP RAM:", lo, hi);
+
+ lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
+ hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
+ mem_region_show(sb, "uP Extmem2:", lo, hi);
+
+ lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
+ sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
+ G_PMRXMAXPAGE(lo),
+ t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
+ (lo & F_PMRXNUMCHN) ? 2 : 1);
+
+ lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
+ hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
+ sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
+ G_PMTXMAXPAGE(lo),
+ hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
+ hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
+ sbuf_printf(sb, "%u p-structs\n",
+ t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
+
+ for (i = 0; i < 4; i++) {
+ lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
+ sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
+ i, G_USED(lo), G_ALLOC(lo));
+ }
+ for (i = 0; i < 4; i++) {
+ lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
+ sbuf_printf(sb,
+ "\nLoopback %d using %u pages out of %u allocated",
+ i, G_USED(lo), G_ALLOC(lo));
+ }
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ uint16_t mtus[NMTUS];
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_read_mtu_tbl(sc, mtus, NULL);
+
+ sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
+ mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
+ mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
+ mtus[14], mtus[15]);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc, i;
+ uint32_t tx_cnt[PM_NSTATS], rx_cnt[PM_NSTATS];
+ uint64_t tx_cyc[PM_NSTATS], rx_cyc[PM_NSTATS];
+ static const char *pm_stats[] = {
+ "Read:", "Write bypass:", "Write mem:", "Flush:", "FIFO wait:"
+ };
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
+ t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
+
+ sbuf_printf(sb, " Tx count Tx cycles "
+ "Rx count Rx cycles");
+ for (i = 0; i < PM_NSTATS; i++)
+ sbuf_printf(sb, "\n%-13s %10u %20ju %10u %20ju",
+ pm_stats[i], tx_cnt[i], tx_cyc[i], rx_cnt[i], rx_cyc[i]);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ struct tp_rdma_stats stats;
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_tp_get_rdma_stats(sc, &stats);
+ sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
+ sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ struct tp_tcp_stats v4, v6;
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_tp_get_tcp_stats(sc, &v4, &v6);
+ sbuf_printf(sb,
+ " IP IPv6\n");
+ sbuf_printf(sb, "OutRsts: %20u %20u\n",
+ v4.tcpOutRsts, v6.tcpOutRsts);
+ sbuf_printf(sb, "InSegs: %20ju %20ju\n",
+ v4.tcpInSegs, v6.tcpInSegs);
+ sbuf_printf(sb, "OutSegs: %20ju %20ju\n",
+ v4.tcpOutSegs, v6.tcpOutSegs);
+ sbuf_printf(sb, "RetransSegs: %20ju %20ju",
+ v4.tcpRetransSegs, v6.tcpRetransSegs);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_tids(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ struct tid_info *t = &sc->tids;
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ if (t->natids) {
+ sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
+ t->atids_in_use);
+ }
+
+ if (t->ntids) {
+ if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
+ uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
+
+ if (b) {
+ sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1,
+ t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
+ t->ntids - 1);
+ } else {
+ sbuf_printf(sb, "TID range: %u-%u",
+ t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
+ t->ntids - 1);
+ }
+ } else
+ sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1);
+ sbuf_printf(sb, ", in use: %u\n",
+ atomic_load_acq_int(&t->tids_in_use));
+ }
+
+ if (t->nstids) {
+ sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
+ t->stid_base + t->nstids - 1, t->stids_in_use);
+ }
+
+ if (t->nftids) {
+ sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
+ t->ftid_base + t->nftids - 1);
+ }
+
+ sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
+ t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
+ t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ struct tp_err_stats stats;
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_tp_get_err_stats(sc, &stats);
+
+ sbuf_printf(sb, " channel 0 channel 1 channel 2 "
+ "channel 3\n");
+ sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n",
+ stats.macInErrs[0], stats.macInErrs[1], stats.macInErrs[2],
+ stats.macInErrs[3]);
+ sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n",
+ stats.hdrInErrs[0], stats.hdrInErrs[1], stats.hdrInErrs[2],
+ stats.hdrInErrs[3]);
+ sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n",
+ stats.tcpInErrs[0], stats.tcpInErrs[1], stats.tcpInErrs[2],
+ stats.tcpInErrs[3]);
+ sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n",
+ stats.tcp6InErrs[0], stats.tcp6InErrs[1], stats.tcp6InErrs[2],
+ stats.tcp6InErrs[3]);
+ sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n",
+ stats.tnlCongDrops[0], stats.tnlCongDrops[1], stats.tnlCongDrops[2],
+ stats.tnlCongDrops[3]);
+ sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n",
+ stats.tnlTxDrops[0], stats.tnlTxDrops[1], stats.tnlTxDrops[2],
+ stats.tnlTxDrops[3]);
+ sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n",
+ stats.ofldVlanDrops[0], stats.ofldVlanDrops[1],
+ stats.ofldVlanDrops[2], stats.ofldVlanDrops[3]);
+ sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n",
+ stats.ofldChanDrops[0], stats.ofldChanDrops[1],
+ stats.ofldChanDrops[2], stats.ofldChanDrops[3]);
+ sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u",
+ stats.ofldNoNeigh, stats.ofldCongDefer);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
+static int
+sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
+{
+ struct adapter *sc = arg1;
+ struct sbuf *sb;
+ int rc;
+ u64 nrate[NCHAN], orate[NCHAN];
+
+ rc = sysctl_wire_old_buffer(req, 0);
+ if (rc != 0)
+ return (rc);
+
+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
+ if (sb == NULL)
+ return (ENOMEM);
+
+ t4_get_chan_txrate(sc, nrate, orate);
+ sbuf_printf(sb, " channel 0 channel 1 channel 2 "
+ "channel 3\n");
+ sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n",
+ nrate[0], nrate[1], nrate[2], nrate[3]);
+ sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju",
+ orate[0], orate[1], orate[2], orate[3]);
+
+ rc = sbuf_finish(sb);
+ sbuf_delete(sb);
+
+ return (rc);
+}
+
static inline void
txq_start(struct ifnet *ifp, struct sge_txq *txq)
{
@@ -2897,17 +4313,57 @@ txq_start(struct ifnet *ifp, struct sge_txq *txq)
}
void
-cxgbe_txq_start(void *arg, int count)
+t4_tx_callout(void *arg)
{
- struct sge_txq *txq = arg;
+ struct sge_eq *eq = arg;
+ struct adapter *sc;
+
+ if (EQ_TRYLOCK(eq) == 0)
+ goto reschedule;
+
+ if (eq->flags & EQ_STALLED && !can_resume_tx(eq)) {
+ EQ_UNLOCK(eq);
+reschedule:
+ if (__predict_true(!(eq->flags && EQ_DOOMED)))
+ callout_schedule(&eq->tx_callout, 1);
+ return;
+ }
+
+ EQ_LOCK_ASSERT_OWNED(eq);
+
+ if (__predict_true((eq->flags & EQ_DOOMED) == 0)) {
+
+ if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) {
+ struct sge_txq *txq = arg;
+ struct port_info *pi = txq->ifp->if_softc;
+
+ sc = pi->adapter;
+ } else {
+ struct sge_wrq *wrq = arg;
+
+ sc = wrq->adapter;
+ }
+
+ taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
+ }
- TXQ_LOCK(txq);
- if (txq->eq.flags & EQ_CRFLUSHED) {
- txq->eq.flags &= ~EQ_CRFLUSHED;
+ EQ_UNLOCK(eq);
+}
+
+void
+t4_tx_task(void *arg, int count)
+{
+ struct sge_eq *eq = arg;
+
+ EQ_LOCK(eq);
+ if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) {
+ struct sge_txq *txq = arg;
txq_start(txq->ifp, txq);
- } else
- wakeup_one(txq); /* txq is going away, wakeup free_txq */
- TXQ_UNLOCK(txq);
+ } else {
+ struct sge_wrq *wrq = arg;
+ t4_wrq_tx_locked(wrq->adapter, wrq, NULL);
+ }
+ EQ_UNLOCK(eq);
}
static uint32_t
@@ -2937,10 +4393,10 @@ fconf_to_mode(uint32_t fconf)
mode |= T4_FILTER_IP_TOS;
if (fconf & F_VLAN)
- mode |= T4_FILTER_IVLAN;
+ mode |= T4_FILTER_VLAN;
if (fconf & F_VNIC_ID)
- mode |= T4_FILTER_OVLAN;
+ mode |= T4_FILTER_VNIC;
if (fconf & F_PORT)
mode |= T4_FILTER_PORT;
@@ -2974,10 +4430,10 @@ mode_to_fconf(uint32_t mode)
if (mode & T4_FILTER_IP_TOS)
fconf |= F_TOS;
- if (mode & T4_FILTER_IVLAN)
+ if (mode & T4_FILTER_VLAN)
fconf |= F_VLAN;
- if (mode & T4_FILTER_OVLAN)
+ if (mode & T4_FILTER_VNIC)
fconf |= F_VNIC_ID;
if (mode & T4_FILTER_PORT)
@@ -3012,10 +4468,10 @@ fspec_to_fconf(struct t4_filter_specification *fs)
if (fs->val.tos || fs->mask.tos)
fconf |= F_TOS;
- if (fs->val.ivlan_vld || fs->mask.ivlan_vld)
+ if (fs->val.vlan_vld || fs->mask.vlan_vld)
fconf |= F_VLAN;
- if (fs->val.ovlan_vld || fs->mask.ovlan_vld)
+ if (fs->val.vnic_vld || fs->mask.vnic_vld)
fconf |= F_VNIC_ID;
if (fs->val.iport || fs->mask.iport)
@@ -3035,7 +4491,13 @@ get_filter_mode(struct adapter *sc, uint32_t *mode)
t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1,
A_TP_VLAN_PRI_MAP);
- *mode = fconf_to_mode(fconf);
+ if (sc->filter_mode != fconf) {
+ log(LOG_WARNING, "%s: cached filter mode out of sync %x %x.\n",
+ device_get_nameunit(sc->dev), sc->filter_mode, fconf);
+ sc->filter_mode = fconf;
+ }
+
+ *mode = fconf_to_mode(sc->filter_mode);
return (0);
}
@@ -3059,7 +4521,21 @@ set_filter_mode(struct adapter *sc, uint32_t mode)
goto done;
}
+#ifndef TCP_OFFLOAD_DISABLE
+ if (sc->offload_map) {
+ rc = EBUSY;
+ goto done;
+ }
+#endif
+
+#ifdef notyet
rc = -t4_set_filter_mode(sc, fconf);
+ if (rc == 0)
+ sc->filter_mode = fconf;
+#else
+ rc = ENOTSUP;
+#endif
+
done:
ADAPTER_UNLOCK(sc);
return (rc);
@@ -3119,7 +4595,6 @@ get_filter(struct adapter *sc, struct t4_filter *t)
static int
set_filter(struct adapter *sc, struct t4_filter *t)
{
- uint32_t fconf;
unsigned int nfilters, nports;
struct filter_entry *f;
int i;
@@ -3139,9 +4614,7 @@ set_filter(struct adapter *sc, struct t4_filter *t)
return (EINVAL);
/* Validate against the global filter mode */
- t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1,
- A_TP_VLAN_PRI_MAP);
- if ((fconf | fspec_to_fconf(&t->fs)) != fconf)
+ if ((sc->filter_mode | fspec_to_fconf(&t->fs)) != sc->filter_mode)
return (E2BIG);
if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports)
@@ -3238,7 +4711,6 @@ clear_filter(struct filter_entry *f)
static int
set_filter_wr(struct adapter *sc, int fidx)
{
- int rc;
struct filter_entry *f = &sc->tids.ftid_tab[fidx];
struct mbuf *m;
struct fw_filter_wr *fwr;
@@ -3298,13 +4770,13 @@ set_filter_wr(struct adapter *sc, int fidx)
fwr->frag_to_ovlan_vldm =
(V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
- V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.ivlan_vld) |
- V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.ovlan_vld) |
- V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.ivlan_vld) |
- V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld));
+ V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
+ V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.vnic_vld) |
+ V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
+ V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.vnic_vld));
fwr->smac_sel = 0;
fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
- V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.intrq[0].abs_id));
+ V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
fwr->maci_to_matchtypem =
htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
@@ -3318,10 +4790,10 @@ set_filter_wr(struct adapter *sc, int fidx)
fwr->ptclm = f->fs.mask.proto;
fwr->ttyp = f->fs.val.tos;
fwr->ttypm = f->fs.mask.tos;
- fwr->ivlan = htobe16(f->fs.val.ivlan);
- fwr->ivlanm = htobe16(f->fs.mask.ivlan);
- fwr->ovlan = htobe16(f->fs.val.ovlan);
- fwr->ovlanm = htobe16(f->fs.mask.ovlan);
+ fwr->ivlan = htobe16(f->fs.val.vlan);
+ fwr->ivlanm = htobe16(f->fs.mask.vlan);
+ fwr->ovlan = htobe16(f->fs.val.vnic);
+ fwr->ovlanm = htobe16(f->fs.mask.vnic);
bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
@@ -3335,13 +4807,9 @@ set_filter_wr(struct adapter *sc, int fidx)
f->pending = 1;
sc->tids.ftids_in_use++;
- rc = t4_mgmt_tx(sc, m);
- if (rc != 0) {
- sc->tids.ftids_in_use--;
- m_freem(m);
- clear_filter(f);
- }
- return (rc);
+
+ t4_mgmt_tx(sc, m);
+ return (0);
}
static int
@@ -3350,7 +4818,7 @@ del_filter_wr(struct adapter *sc, int fidx)
struct filter_entry *f = &sc->tids.ftid_tab[fidx];
struct mbuf *m;
struct fw_filter_wr *fwr;
- unsigned int rc, ftid;
+ unsigned int ftid;
ADAPTER_LOCK_ASSERT_OWNED(sc);
@@ -3364,55 +4832,48 @@ del_filter_wr(struct adapter *sc, int fidx)
m->m_len = m->m_pkthdr.len = sizeof(*fwr);
bzero(fwr, sizeof (*fwr));
- t4_mk_filtdelwr(ftid, fwr, sc->sge.intrq[0].abs_id);
+ t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
f->pending = 1;
- rc = t4_mgmt_tx(sc, m);
- if (rc != 0) {
- f->pending = 0;
- m_freem(m);
- }
- return (rc);
+ t4_mgmt_tx(sc, m);
+ return (0);
}
-/* XXX move intr handlers to main.c and make this static */
-void
-filter_rpl(struct adapter *sc, const struct cpl_set_tcb_rpl *rpl)
+static int
+filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
+ struct adapter *sc = iq->adapter;
+ const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
unsigned int idx = GET_TID(rpl);
+ KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
+ rss->opcode));
+
if (idx >= sc->tids.ftid_base &&
(idx -= sc->tids.ftid_base) < sc->tids.nftids) {
unsigned int rc = G_COOKIE(rpl->cookie);
struct filter_entry *f = &sc->tids.ftid_tab[idx];
- if (rc == FW_FILTER_WR_FLT_DELETED) {
- /*
- * Clear the filter when we get confirmation from the
- * hardware that the filter has been deleted.
- */
- clear_filter(f);
- sc->tids.ftids_in_use--;
- } else if (rc == FW_FILTER_WR_SMT_TBL_FULL) {
- device_printf(sc->dev,
- "filter %u setup failed due to full SMT\n", idx);
- clear_filter(f);
- sc->tids.ftids_in_use--;
- } else if (rc == FW_FILTER_WR_FLT_ADDED) {
+ if (rc == FW_FILTER_WR_FLT_ADDED) {
f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
f->pending = 0; /* asynchronous setup completed */
f->valid = 1;
- } else {
- /*
- * Something went wrong. Issue a warning about the
- * problem and clear everything out.
- */
+ return (0);
+ }
+
+ if (rc != FW_FILTER_WR_FLT_DELETED) {
+ /* Add or delete failed, need to display an error */
device_printf(sc->dev,
"filter %u setup failed with error %u\n", idx, rc);
- clear_filter(f);
- sc->tids.ftids_in_use--;
}
+
+ clear_filter(f);
+ ADAPTER_LOCK(sc);
+ sc->tids.ftids_in_use--;
+ ADAPTER_UNLOCK(sc);
}
+
+ return (0);
}
static int
@@ -3444,6 +4905,84 @@ get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
return (rc);
}
+static int
+read_card_mem(struct adapter *sc, struct t4_mem_range *mr)
+{
+ uint32_t base, size, lo, hi, win, off, remaining, i, n;
+ uint32_t *buf, *b;
+ int rc;
+
+ /* reads are in multiples of 32 bits */
+ if (mr->addr & 3 || mr->len & 3 || mr->len == 0)
+ return (EINVAL);
+
+ /*
+ * We don't want to deal with potential holes so we mandate that the
+ * requested region must lie entirely within one of the 3 memories.
+ */
+ lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
+ if (lo & F_EDRAM0_ENABLE) {
+ hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
+ base = G_EDRAM0_BASE(hi) << 20;
+ size = G_EDRAM0_SIZE(hi) << 20;
+ if (size > 0 &&
+ mr->addr >= base && mr->addr < base + size &&
+ mr->addr + mr->len <= base + size)
+ goto proceed;
+ }
+ if (lo & F_EDRAM1_ENABLE) {
+ hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
+ base = G_EDRAM1_BASE(hi) << 20;
+ size = G_EDRAM1_SIZE(hi) << 20;
+ if (size > 0 &&
+ mr->addr >= base && mr->addr < base + size &&
+ mr->addr + mr->len <= base + size)
+ goto proceed;
+ }
+ if (lo & F_EXT_MEM_ENABLE) {
+ hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
+ base = G_EXT_MEM_BASE(hi) << 20;
+ size = G_EXT_MEM_SIZE(hi) << 20;
+ if (size > 0 &&
+ mr->addr >= base && mr->addr < base + size &&
+ mr->addr + mr->len <= base + size)
+ goto proceed;
+ }
+ return (ENXIO);
+
+proceed:
+ buf = b = malloc(mr->len, M_CXGBE, M_WAITOK);
+
+ /*
+ * Position the PCIe window (we use memwin2) to the 16B aligned area
+ * just at/before the requested region.
+ */
+ win = mr->addr & ~0xf;
+ off = mr->addr - win; /* offset of the requested region in the win */
+ remaining = mr->len;
+
+ while (remaining) {
+ t4_write_reg(sc,
+ PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), win);
+ t4_read_reg(sc,
+ PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2));
+
+ /* number of bytes that we'll copy in the inner loop */
+ n = min(remaining, MEMWIN2_APERTURE - off);
+
+ for (i = 0; i < n; i += 4, remaining -= 4)
+ *b++ = t4_read_reg(sc, MEMWIN2_BASE + off + i);
+
+ win += MEMWIN2_APERTURE;
+ off = 0;
+ }
+
+ rc = copyout(buf, mr->data, mr->len);
+ free(buf, M_CXGBE);
+
+ return (rc);
+}
+
int
t4_os_find_pci_capability(struct adapter *sc, int cap)
{
@@ -3514,6 +5053,22 @@ t4_os_link_changed(struct adapter *sc, int idx, int link_stat)
if_link_state_change(ifp, LINK_STATE_DOWN);
}
+void
+t4_iterate(void (*func)(struct adapter *, void *), void *arg)
+{
+ struct adapter *sc;
+
+ mtx_lock(&t4_list_lock);
+ SLIST_FOREACH(sc, &t4_list, link) {
+ /*
+ * func should not make any assumptions about what state sc is
+ * in - the only guarantee is that sc->sc_lock is a valid lock.
+ */
+ func(sc, arg);
+ }
+ mtx_unlock(&t4_list_lock);
+}
+
static int
t4_open(struct cdev *dev, int flags, int type, struct thread *td)
{
@@ -3610,6 +5165,27 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
case CHELSIO_T4_GET_SGE_CONTEXT:
rc = get_sge_context(sc, (struct t4_sge_context *)data);
break;
+ case CHELSIO_T4_LOAD_FW: {
+ struct t4_data *fw = (struct t4_data *)data;
+ uint8_t *fw_data;
+
+ if (sc->flags & FULL_INIT_DONE)
+ return (EBUSY);
+
+ fw_data = malloc(fw->len, M_CXGBE, M_NOWAIT);
+ if (fw_data == NULL)
+ return (ENOMEM);
+
+ rc = copyin(fw->data, fw_data, fw->len);
+ if (rc == 0)
+ rc = -t4_load_fw(sc, fw_data, fw->len);
+
+ free(fw_data, M_CXGBE);
+ break;
+ }
+ case CHELSIO_T4_GET_MEM:
+ rc = read_card_mem(sc, (struct t4_mem_range *)data);
+ break;
default:
rc = EINVAL;
}
@@ -3617,14 +5193,247 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
return (rc);
}
+#ifndef TCP_OFFLOAD_DISABLE
+static int
+toe_capability(struct port_info *pi, int enable)
+{
+ int rc;
+ struct adapter *sc = pi->adapter;
+
+ ADAPTER_LOCK_ASSERT_OWNED(sc);
+
+ if (!is_offload(sc))
+ return (ENODEV);
+
+ if (enable) {
+ if (isset(&sc->offload_map, pi->port_id))
+ return (0);
+
+ if (sc->offload_map == 0) {
+ rc = activate_uld(sc, ULD_TOM, &sc->tom);
+ if (rc != 0)
+ return (rc);
+ }
+
+ setbit(&sc->offload_map, pi->port_id);
+ } else {
+ if (!isset(&sc->offload_map, pi->port_id))
+ return (0);
+
+ clrbit(&sc->offload_map, pi->port_id);
+
+ if (sc->offload_map == 0) {
+ rc = deactivate_uld(&sc->tom);
+ if (rc != 0) {
+ setbit(&sc->offload_map, pi->port_id);
+ return (rc);
+ }
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Add an upper layer driver to the global list.
+ */
+int
+t4_register_uld(struct uld_info *ui)
+{
+ int rc = 0;
+ struct uld_info *u;
+
+ mtx_lock(&t4_uld_list_lock);
+ SLIST_FOREACH(u, &t4_uld_list, link) {
+ if (u->uld_id == ui->uld_id) {
+ rc = EEXIST;
+ goto done;
+ }
+ }
+
+ SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
+ ui->refcount = 0;
+done:
+ mtx_unlock(&t4_uld_list_lock);
+ return (rc);
+}
+
+int
+t4_unregister_uld(struct uld_info *ui)
+{
+ int rc = EINVAL;
+ struct uld_info *u;
+
+ mtx_lock(&t4_uld_list_lock);
+
+ SLIST_FOREACH(u, &t4_uld_list, link) {
+ if (u == ui) {
+ if (ui->refcount > 0) {
+ rc = EBUSY;
+ goto done;
+ }
+
+ SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
+ rc = 0;
+ goto done;
+ }
+ }
+done:
+ mtx_unlock(&t4_uld_list_lock);
+ return (rc);
+}
+
+static int
+activate_uld(struct adapter *sc, int id, struct uld_softc *usc)
+{
+ int rc = EAGAIN;
+ struct uld_info *ui;
+
+ mtx_lock(&t4_uld_list_lock);
+
+ SLIST_FOREACH(ui, &t4_uld_list, link) {
+ if (ui->uld_id == id) {
+ rc = ui->attach(sc, &usc->softc);
+ if (rc == 0) {
+ KASSERT(usc->softc != NULL,
+ ("%s: ULD %d has no state", __func__, id));
+ ui->refcount++;
+ usc->uld = ui;
+ }
+ goto done;
+ }
+ }
+done:
+ mtx_unlock(&t4_uld_list_lock);
+
+ return (rc);
+}
+
+static int
+deactivate_uld(struct uld_softc *usc)
+{
+ int rc;
+
+ mtx_lock(&t4_uld_list_lock);
+
+ if (usc->uld == NULL || usc->softc == NULL) {
+ rc = EINVAL;
+ goto done;
+ }
+
+ rc = usc->uld->detach(usc->softc);
+ if (rc == 0) {
+ KASSERT(usc->uld->refcount > 0,
+ ("%s: ULD has bad refcount", __func__));
+ usc->uld->refcount--;
+ usc->uld = NULL;
+ usc->softc = NULL;
+ }
+done:
+ mtx_unlock(&t4_uld_list_lock);
+
+ return (rc);
+}
+#endif
+
+/*
+ * Come up with reasonable defaults for some of the tunables, provided they're
+ * not set by the user (in which case we'll use the values as is).
+ */
+static void
+tweak_tunables(void)
+{
+ int nc = mp_ncpus; /* our snapshot of the number of CPUs */
+
+ if (t4_ntxq10g < 1)
+ t4_ntxq10g = min(nc, NTXQ_10G);
+
+ if (t4_ntxq1g < 1)
+ t4_ntxq1g = min(nc, NTXQ_1G);
+
+ if (t4_nrxq10g < 1)
+ t4_nrxq10g = min(nc, NRXQ_10G);
+
+ if (t4_nrxq1g < 1)
+ t4_nrxq1g = min(nc, NRXQ_1G);
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (t4_nofldtxq10g < 1)
+ t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
+
+ if (t4_nofldtxq1g < 1)
+ t4_nofldtxq1g = min(nc, NOFLDTXQ_1G);
+
+ if (t4_nofldrxq10g < 1)
+ t4_nofldrxq10g = min(nc, NOFLDRXQ_10G);
+
+ if (t4_nofldrxq1g < 1)
+ t4_nofldrxq1g = min(nc, NOFLDRXQ_1G);
+#endif
+
+ if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS)
+ t4_tmr_idx_10g = TMR_IDX_10G;
+
+ if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS)
+ t4_pktc_idx_10g = PKTC_IDX_10G;
+
+ if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS)
+ t4_tmr_idx_1g = TMR_IDX_1G;
+
+ if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS)
+ t4_pktc_idx_1g = PKTC_IDX_1G;
+
+ if (t4_qsize_txq < 128)
+ t4_qsize_txq = 128;
+
+ if (t4_qsize_rxq < 128)
+ t4_qsize_rxq = 128;
+ while (t4_qsize_rxq & 7)
+ t4_qsize_rxq++;
+
+ t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
+}
+
static int
t4_mod_event(module_t mod, int cmd, void *arg)
{
+ int rc = 0;
- if (cmd == MOD_LOAD)
+ switch (cmd) {
+ case MOD_LOAD:
t4_sge_modload();
+ mtx_init(&t4_list_lock, "T4 adapters", 0, MTX_DEF);
+ SLIST_INIT(&t4_list);
+#ifndef TCP_OFFLOAD_DISABLE
+ mtx_init(&t4_uld_list_lock, "T4 ULDs", 0, MTX_DEF);
+ SLIST_INIT(&t4_uld_list);
+#endif
+ tweak_tunables();
+ break;
- return (0);
+ case MOD_UNLOAD:
+#ifndef TCP_OFFLOAD_DISABLE
+ mtx_lock(&t4_uld_list_lock);
+ if (!SLIST_EMPTY(&t4_uld_list)) {
+ rc = EBUSY;
+ mtx_unlock(&t4_uld_list_lock);
+ break;
+ }
+ mtx_unlock(&t4_uld_list_lock);
+ mtx_destroy(&t4_uld_list_lock);
+#endif
+ mtx_lock(&t4_list_lock);
+ if (!SLIST_EMPTY(&t4_list)) {
+ rc = EBUSY;
+ mtx_unlock(&t4_list_lock);
+ break;
+ }
+ mtx_unlock(&t4_list_lock);
+ mtx_destroy(&t4_list_lock);
+ break;
+ }
+
+ return (rc);
}
static devclass_t t4_devclass;
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index 09e3c19cb61a..41630e4f9cfa 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/queue.h>
#include <sys/taskqueue.h>
#include <sys/sysctl.h>
+#include <sys/smp.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -50,7 +51,7 @@ __FBSDID("$FreeBSD$");
#include "common/t4_regs.h"
#include "common/t4_regs_values.h"
#include "common/t4_msg.h"
-#include "common/t4fw_interface.h"
+#include "t4_l2t.h"
struct fl_buf_info {
int size;
@@ -91,12 +92,15 @@ struct sgl {
bus_dma_segment_t seg[TX_SGL_SEGS];
};
-static void t4_evt_rx(void *);
-static void t4_eth_rx(void *);
+static int service_iq(struct sge_iq *, int);
+static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
+ int *);
+static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
- int, iq_intr_handler_t *, char *);
-static inline void init_fl(struct sge_fl *, int, char *);
-static inline void init_eq(struct sge_eq *, int, char *);
+ int, char *);
+static inline void init_fl(struct sge_fl *, int, int, char *);
+static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
+ char *);
static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
bus_addr_t *, void **);
static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
@@ -104,26 +108,41 @@ static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
int, int);
static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
-static int alloc_intrq(struct adapter *, int, int, int);
-static int free_intrq(struct sge_iq *);
-static int alloc_fwq(struct adapter *, int);
-static int free_fwq(struct sge_iq *);
-static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int);
+static int alloc_fwq(struct adapter *);
+static int free_fwq(struct adapter *);
+static int alloc_mgmtq(struct adapter *);
+static int free_mgmtq(struct adapter *);
+static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int,
+ struct sysctl_oid *);
static int free_rxq(struct port_info *, struct sge_rxq *);
-static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int);
-static int free_ctrlq(struct adapter *, struct sge_ctrlq *);
-static int alloc_txq(struct port_info *, struct sge_txq *, int);
+#ifndef TCP_OFFLOAD_DISABLE
+static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int,
+ struct sysctl_oid *);
+static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *);
+#endif
+static int ctrl_eq_alloc(struct adapter *, struct sge_eq *);
+static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
+#ifndef TCP_OFFLOAD_DISABLE
+static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *);
+#endif
+static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *);
+static int free_eq(struct adapter *, struct sge_eq *);
+static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *,
+ struct sysctl_oid *);
+static int free_wrq(struct adapter *, struct sge_wrq *);
+static int alloc_txq(struct port_info *, struct sge_txq *, int,
+ struct sysctl_oid *);
static int free_txq(struct port_info *, struct sge_txq *);
static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
static inline void iq_next(struct sge_iq *);
static inline void ring_fl_db(struct adapter *, struct sge_fl *);
-static void refill_fl(struct adapter *, struct sge_fl *, int, int);
+static int refill_fl(struct adapter *, struct sge_fl *, int);
+static void refill_sfl(void *);
static int alloc_fl_sdesc(struct sge_fl *);
static void free_fl_sdesc(struct sge_fl *);
-static int alloc_tx_maps(struct sge_txq *);
-static void free_tx_maps(struct sge_txq *);
static void set_fl_tag_idx(struct sge_fl *, int);
+static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
static int free_pkt_sgl(struct sge_txq *, struct sgl *);
@@ -141,15 +160,13 @@ static inline int reclaimable(struct sge_eq *);
static int reclaim_tx_descs(struct sge_txq *, int, int);
static void write_eqflush_wr(struct sge_eq *);
static __be64 get_flit(bus_dma_segment_t *, int, int);
-static int handle_sge_egr_update(struct adapter *,
- const struct cpl_sge_egr_update *);
-static void handle_cpl(struct adapter *, struct sge_iq *);
+static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
+ struct mbuf *);
+static int handle_fw_rpl(struct sge_iq *, const struct rss_header *,
+ struct mbuf *);
-static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *);
static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
-extern void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *);
-
/*
* Called on MOD_LOAD and fills up fl_buf_info[].
*/
@@ -181,46 +198,103 @@ t4_sge_modload(void)
* We do not initialize any of the queues here, instead the driver
* top-level must request them individually.
*/
-void
+int
t4_sge_init(struct adapter *sc)
{
struct sge *s = &sc->sge;
- int i;
+ int i, rc = 0;
+ uint32_t ctrl_mask, ctrl_val, hpsize, v;
+
+ ctrl_mask = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
+ V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
+ F_EGRSTATUSPAGESIZE;
+ ctrl_val = V_PKTSHIFT(FL_PKTSHIFT) | F_RXPKTCPLMODE |
+ V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) |
+ V_EGRSTATUSPAGESIZE(SPG_LEN == 128);
+
+ hpsize = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
+ V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
+ V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
+ V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) |
+ V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) |
+ V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) |
+ V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) |
+ V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
+
+ if (sc->flags & MASTER_PF) {
+ int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200};
+ int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
+
+ t4_set_reg_field(sc, A_SGE_CONTROL, ctrl_mask, ctrl_val);
+ t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, hpsize);
+ for (i = 0; i < FL_BUF_SIZES; i++) {
+ t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
+ FL_BUF_SIZE(i));
+ }
+
+ t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
+ V_THRESHOLD_0(intr_pktcount[0]) |
+ V_THRESHOLD_1(intr_pktcount[1]) |
+ V_THRESHOLD_2(intr_pktcount[2]) |
+ V_THRESHOLD_3(intr_pktcount[3]));
+
+ t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
+ V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) |
+ V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])));
+ t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
+ V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) |
+ V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])));
+ t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
+ V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) |
+ V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])));
+ }
+
+ v = t4_read_reg(sc, A_SGE_CONTROL);
+ if ((v & ctrl_mask) != ctrl_val) {
+ device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", v);
+ rc = EINVAL;
+ }
- t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) |
- V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
- F_EGRSTATUSPAGESIZE,
- V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) |
- V_PKTSHIFT(FL_PKTSHIFT) |
- F_RXPKTCPLMODE |
- V_EGRSTATUSPAGESIZE(SPG_LEN == 128));
- t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE,
- V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0),
- V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10));
+ v = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE);
+ if (v != hpsize) {
+ device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", v);
+ rc = EINVAL;
+ }
for (i = 0; i < FL_BUF_SIZES; i++) {
- t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
- FL_BUF_SIZE(i));
+ v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
+ if (v != FL_BUF_SIZE(i)) {
+ device_printf(sc->dev,
+ "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v);
+ rc = EINVAL;
+ }
}
- i = t4_read_reg(sc, A_SGE_CONM_CTRL);
- s->fl_starve_threshold = G_EGRTHRESHOLD(i) * 2 + 1;
+ v = t4_read_reg(sc, A_SGE_CONM_CTRL);
+ s->fl_starve_threshold = G_EGRTHRESHOLD(v) * 2 + 1;
+
+ v = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
+ sc->sge.counter_val[0] = G_THRESHOLD_0(v);
+ sc->sge.counter_val[1] = G_THRESHOLD_1(v);
+ sc->sge.counter_val[2] = G_THRESHOLD_2(v);
+ sc->sge.counter_val[3] = G_THRESHOLD_3(v);
+
+ v = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1);
+ sc->sge.timer_val[0] = G_TIMERVALUE0(v) / core_ticks_per_usec(sc);
+ sc->sge.timer_val[1] = G_TIMERVALUE1(v) / core_ticks_per_usec(sc);
+ v = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3);
+ sc->sge.timer_val[2] = G_TIMERVALUE2(v) / core_ticks_per_usec(sc);
+ sc->sge.timer_val[3] = G_TIMERVALUE3(v) / core_ticks_per_usec(sc);
+ v = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5);
+ sc->sge.timer_val[4] = G_TIMERVALUE4(v) / core_ticks_per_usec(sc);
+ sc->sge.timer_val[5] = G_TIMERVALUE5(v) / core_ticks_per_usec(sc);
- t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
- V_THRESHOLD_0(s->counter_val[0]) |
- V_THRESHOLD_1(s->counter_val[1]) |
- V_THRESHOLD_2(s->counter_val[2]) |
- V_THRESHOLD_3(s->counter_val[3]));
+ t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_rpl);
+ t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_rpl);
+ t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
+ t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
- t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
- V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) |
- V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1])));
- t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
- V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) |
- V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3])));
- t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
- V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) |
- V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5])));
+ return (rc);
}
int
@@ -250,8 +324,7 @@ t4_destroy_dma_tag(struct adapter *sc)
}
/*
- * Allocate and initialize the firmware event queue, control queues, and the
- * interrupt queues. The adapter owns all of these queues.
+ * Allocate and initialize the firmware event queue and the management queue.
*
* Returns errno on failure. Resources allocated up to that point may still be
* allocated. Caller is responsible for cleanup in case this function fails.
@@ -259,110 +332,32 @@ t4_destroy_dma_tag(struct adapter *sc)
int
t4_setup_adapter_queues(struct adapter *sc)
{
- int i, j, rc, intr_idx, qsize;
- struct sge_iq *iq;
- struct sge_ctrlq *ctrlq;
- iq_intr_handler_t *handler;
- char name[16];
+ int rc;
ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
- if (sysctl_ctx_init(&sc->ctx) == 0) {
- struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
- struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
-
- sc->oid_fwq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
- "fwq", CTLFLAG_RD, NULL, "firmware event queue");
- sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
- "ctrlq", CTLFLAG_RD, NULL, "ctrl queues");
- sc->oid_intrq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
- "intrq", CTLFLAG_RD, NULL, "interrupt queues");
- }
-
- /*
- * Interrupt queues
- */
- intr_idx = sc->intr_count - NINTRQ(sc);
- if (sc->flags & INTR_SHARED) {
- qsize = max((sc->sge.nrxq + 1) * 2, INTR_IQ_QSIZE);
- for (i = 0; i < NINTRQ(sc); i++, intr_idx++) {
- snprintf(name, sizeof(name), "%s intrq%d",
- device_get_nameunit(sc->dev), i);
-
- iq = &sc->sge.intrq[i];
- init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, NULL, name);
- rc = alloc_intrq(sc, i % sc->params.nports, i,
- intr_idx);
-
- if (rc != 0) {
- device_printf(sc->dev,
- "failed to create %s: %d\n", name, rc);
- return (rc);
- }
- }
- } else {
- int qidx = 0;
- struct port_info *pi;
-
- for (i = 0; i < sc->params.nports; i++) {
- pi = sc->port[i];
- qsize = max((pi->nrxq + 1) * 2, INTR_IQ_QSIZE);
- for (j = 0; j < pi->nrxq; j++, qidx++, intr_idx++) {
- snprintf(name, sizeof(name), "%s intrq%d",
- device_get_nameunit(pi->dev), j);
-
- iq = &sc->sge.intrq[qidx];
- init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE,
- NULL, name);
- rc = alloc_intrq(sc, i, qidx, intr_idx);
-
- if (rc != 0) {
- device_printf(sc->dev,
- "failed to create %s: %d\n",
- name, rc);
- return (rc);
- }
- }
- }
- }
+ sysctl_ctx_init(&sc->ctx);
+ sc->flags |= ADAP_SYSCTL_CTX;
/*
* Firmware event queue
*/
- snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev));
- if (sc->intr_count > T4_EXTRA_INTR) {
- handler = NULL;
- intr_idx = 1;
- } else {
- handler = t4_evt_rx;
- intr_idx = 0;
- }
-
- iq = &sc->sge.fwq;
- init_iq(iq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name);
- rc = alloc_fwq(sc, intr_idx);
+ rc = alloc_fwq(sc);
if (rc != 0) {
device_printf(sc->dev,
"failed to create firmware event queue: %d\n", rc);
-
return (rc);
}
/*
- * Control queues - one per port.
+ * Management queue. This is just a control queue that uses the fwq as
+ * its associated iq.
*/
- ctrlq = &sc->sge.ctrlq[0];
- for (i = 0; i < sc->params.nports; i++, ctrlq++) {
- snprintf(name, sizeof(name), "%s ctrlq%d",
- device_get_nameunit(sc->dev), i);
- init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name);
-
- rc = alloc_ctrlq(sc, ctrlq, i);
- if (rc != 0) {
- device_printf(sc->dev,
- "failed to create control queue %d: %d\n", i, rc);
- return (rc);
- }
+ rc = alloc_mgmtq(sc);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to create management queue: %d\n", rc);
+ return (rc);
}
return (rc);
@@ -374,86 +369,278 @@ t4_setup_adapter_queues(struct adapter *sc)
int
t4_teardown_adapter_queues(struct adapter *sc)
{
- int i;
- struct sge_iq *iq;
ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
- /* Do this before freeing the queues */
- if (sc->oid_fwq || sc->oid_ctrlq || sc->oid_intrq) {
+ /* Do this before freeing the queue */
+ if (sc->flags & ADAP_SYSCTL_CTX) {
sysctl_ctx_free(&sc->ctx);
- sc->oid_fwq = NULL;
- sc->oid_ctrlq = NULL;
- sc->oid_intrq = NULL;
+ sc->flags &= ~ADAP_SYSCTL_CTX;
}
- for (i = 0; i < sc->params.nports; i++)
- free_ctrlq(sc, &sc->sge.ctrlq[i]);
+ free_mgmtq(sc);
+ free_fwq(sc);
+
+ return (0);
+}
+
+static inline int
+first_vector(struct port_info *pi)
+{
+ struct adapter *sc = pi->adapter;
+ int rc = T4_EXTRA_INTR, i;
+
+ if (sc->intr_count == 1)
+ return (0);
- iq = &sc->sge.fwq;
- free_fwq(iq);
+ for_each_port(sc, i) {
+ if (i == pi->port_id)
+ break;
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (sc->flags & INTR_DIRECT)
+ rc += pi->nrxq + pi->nofldrxq;
+ else
+ rc += max(pi->nrxq, pi->nofldrxq);
+#else
+ /*
+ * Not compiled with offload support and intr_count > 1. Only
+ * NIC queues exist and they'd better be taking direct
+ * interrupts.
+ */
+ KASSERT(sc->flags & INTR_DIRECT,
+ ("%s: intr_count %d, !INTR_DIRECT", __func__,
+ sc->intr_count));
- for (i = 0; i < NINTRQ(sc); i++) {
- iq = &sc->sge.intrq[i];
- free_intrq(iq);
+ rc += pi->nrxq;
+#endif
}
- return (0);
+ return (rc);
+}
+
+/*
+ * Given an arbitrary "index," come up with an iq that can be used by other
+ * queues (of this port) for interrupt forwarding, SGE egress updates, etc.
+ * The iq returned is guaranteed to be something that takes direct interrupts.
+ */
+static struct sge_iq *
+port_intr_iq(struct port_info *pi, int idx)
+{
+ struct adapter *sc = pi->adapter;
+ struct sge *s = &sc->sge;
+ struct sge_iq *iq = NULL;
+
+ if (sc->intr_count == 1)
+ return (&sc->sge.fwq);
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (sc->flags & INTR_DIRECT) {
+ idx %= pi->nrxq + pi->nofldrxq;
+
+ if (idx >= pi->nrxq) {
+ idx -= pi->nrxq;
+ iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
+ } else
+ iq = &s->rxq[pi->first_rxq + idx].iq;
+
+ } else {
+ idx %= max(pi->nrxq, pi->nofldrxq);
+
+ if (pi->nrxq >= pi->nofldrxq)
+ iq = &s->rxq[pi->first_rxq + idx].iq;
+ else
+ iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
+ }
+#else
+ /*
+ * Not compiled with offload support and intr_count > 1. Only NIC
+ * queues exist and they'd better be taking direct interrupts.
+ */
+ KASSERT(sc->flags & INTR_DIRECT,
+ ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count));
+
+ idx %= pi->nrxq;
+ iq = &s->rxq[pi->first_rxq + idx].iq;
+#endif
+
+ KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__));
+ return (iq);
}
int
-t4_setup_eth_queues(struct port_info *pi)
+t4_setup_port_queues(struct port_info *pi)
{
- int rc = 0, i, intr_idx;
+ int rc = 0, i, j, intr_idx, iqid;
struct sge_rxq *rxq;
struct sge_txq *txq;
+ struct sge_wrq *ctrlq;
+#ifndef TCP_OFFLOAD_DISABLE
+ struct sge_ofld_rxq *ofld_rxq;
+ struct sge_wrq *ofld_txq;
+#endif
char name[16];
struct adapter *sc = pi->adapter;
+ struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev), *oid2 = NULL;
+ struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
- if (sysctl_ctx_init(&pi->ctx) == 0) {
- struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
- struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
+ oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
+ NULL, "rx queues");
- pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO,
- "rxq", CTLFLAG_RD, NULL, "rx queues");
- pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO,
- "txq", CTLFLAG_RD, NULL, "tx queues");
+#ifndef TCP_OFFLOAD_DISABLE
+ if (is_offload(sc)) {
+ oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq",
+ CTLFLAG_RD, NULL,
+ "rx queues for offloaded TCP connections");
}
+#endif
+
+ /* Interrupt vector to start from (when using multiple vectors) */
+ intr_idx = first_vector(pi);
+ /*
+ * First pass over all rx queues (NIC and TOE):
+ * a) initialize iq and fl
+ * b) allocate queue iff it will take direct interrupts.
+ */
for_each_rxq(pi, i, rxq) {
snprintf(name, sizeof(name), "%s rxq%d-iq",
device_get_nameunit(pi->dev), i);
- init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
- pi->qsize_rxq, RX_IQ_ESIZE, t4_eth_rx, name);
+ init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
+ RX_IQ_ESIZE, name);
snprintf(name, sizeof(name), "%s rxq%d-fl",
device_get_nameunit(pi->dev), i);
- init_fl(&rxq->fl, pi->qsize_rxq / 8, name);
+ init_fl(&rxq->fl, pi->qsize_rxq / 8, pi->ifp->if_mtu, name);
+
+ if (sc->flags & INTR_DIRECT
+#ifndef TCP_OFFLOAD_DISABLE
+ || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq)
+#endif
+ ) {
+ rxq->iq.flags |= IQ_INTR;
+ rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
+ if (rc != 0)
+ goto done;
+ intr_idx++;
+ }
+ }
- intr_idx = pi->first_rxq + i;
- if (sc->flags & INTR_SHARED)
- intr_idx %= NINTRQ(sc);
+#ifndef TCP_OFFLOAD_DISABLE
+ for_each_ofld_rxq(pi, i, ofld_rxq) {
- rc = alloc_rxq(pi, rxq, intr_idx, i);
+ snprintf(name, sizeof(name), "%s ofld_rxq%d-iq",
+ device_get_nameunit(pi->dev), i);
+ init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
+ pi->qsize_rxq, RX_IQ_ESIZE, name);
+
+ snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
+ device_get_nameunit(pi->dev), i);
+ init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, MJUM16BYTES, name);
+
+ if (sc->flags & INTR_DIRECT ||
+ (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
+ ofld_rxq->iq.flags |= IQ_INTR;
+ rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
+ if (rc != 0)
+ goto done;
+ intr_idx++;
+ }
+ }
+#endif
+
+ /*
+ * Second pass over all rx queues (NIC and TOE). The queues forwarding
+ * their interrupts are allocated now.
+ */
+ j = 0;
+ for_each_rxq(pi, i, rxq) {
+ if (rxq->iq.flags & IQ_INTR)
+ continue;
+
+ intr_idx = port_intr_iq(pi, j)->abs_id;
+
+ rc = alloc_rxq(pi, rxq, intr_idx, i, oid);
+ if (rc != 0)
+ goto done;
+ j++;
+ }
+
+#ifndef TCP_OFFLOAD_DISABLE
+ for_each_ofld_rxq(pi, i, ofld_rxq) {
+ if (ofld_rxq->iq.flags & IQ_INTR)
+ continue;
+
+ intr_idx = port_intr_iq(pi, j)->abs_id;
+
+ rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2);
if (rc != 0)
goto done;
+ j++;
}
+#endif
+ /*
+ * Now the tx queues. Only one pass needed.
+ */
+ oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD,
+ NULL, "tx queues");
+ j = 0;
for_each_txq(pi, i, txq) {
+ uint16_t iqid;
+
+ iqid = port_intr_iq(pi, j)->cntxt_id;
snprintf(name, sizeof(name), "%s txq%d",
device_get_nameunit(pi->dev), i);
- init_eq(&txq->eq, pi->qsize_txq, name);
+ init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid,
+ name);
- rc = alloc_txq(pi, txq, i);
+ rc = alloc_txq(pi, txq, i, oid);
if (rc != 0)
goto done;
+ j++;
}
+#ifndef TCP_OFFLOAD_DISABLE
+ oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq",
+ CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections");
+ for_each_ofld_txq(pi, i, ofld_txq) {
+ uint16_t iqid;
+
+ iqid = port_intr_iq(pi, j)->cntxt_id;
+
+ snprintf(name, sizeof(name), "%s ofld_txq%d",
+ device_get_nameunit(pi->dev), i);
+ init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan,
+ iqid, name);
+
+ snprintf(name, sizeof(name), "%d", i);
+ oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+ name, CTLFLAG_RD, NULL, "offload tx queue");
+
+ rc = alloc_wrq(sc, pi, ofld_txq, oid2);
+ if (rc != 0)
+ goto done;
+ j++;
+ }
+#endif
+
+ /*
+ * Finally, the control queue.
+ */
+ oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD,
+ NULL, "ctrl queue");
+ ctrlq = &sc->sge.ctrlq[pi->port_id];
+ iqid = port_intr_iq(pi, 0)->cntxt_id;
+ snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev));
+ init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name);
+ rc = alloc_wrq(sc, pi, ctrlq, oid);
+
done:
if (rc)
- t4_teardown_eth_queues(pi);
+ t4_teardown_port_queues(pi);
return (rc);
}
@@ -462,90 +649,91 @@ done:
* Idempotent
*/
int
-t4_teardown_eth_queues(struct port_info *pi)
+t4_teardown_port_queues(struct port_info *pi)
{
int i;
+ struct adapter *sc = pi->adapter;
struct sge_rxq *rxq;
struct sge_txq *txq;
+#ifndef TCP_OFFLOAD_DISABLE
+ struct sge_ofld_rxq *ofld_rxq;
+ struct sge_wrq *ofld_txq;
+#endif
/* Do this before freeing the queues */
- if (pi->oid_txq || pi->oid_rxq) {
+ if (pi->flags & PORT_SYSCTL_CTX) {
sysctl_ctx_free(&pi->ctx);
- pi->oid_txq = pi->oid_rxq = NULL;
+ pi->flags &= ~PORT_SYSCTL_CTX;
}
+ /*
+ * Take down all the tx queues first, as they reference the rx queues
+ * (for egress updates, etc.).
+ */
+
+ free_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
+
for_each_txq(pi, i, txq) {
free_txq(pi, txq);
}
+#ifndef TCP_OFFLOAD_DISABLE
+ for_each_ofld_txq(pi, i, ofld_txq) {
+ free_wrq(sc, ofld_txq);
+ }
+#endif
+
+ /*
+ * Then take down the rx queues that forward their interrupts, as they
+ * reference other rx queues.
+ */
+
+ for_each_rxq(pi, i, rxq) {
+ if ((rxq->iq.flags & IQ_INTR) == 0)
+ free_rxq(pi, rxq);
+ }
+
+#ifndef TCP_OFFLOAD_DISABLE
+ for_each_ofld_rxq(pi, i, ofld_rxq) {
+ if ((ofld_rxq->iq.flags & IQ_INTR) == 0)
+ free_ofld_rxq(pi, ofld_rxq);
+ }
+#endif
+
+ /*
+ * Then take down the rx queues that take direct interrupts.
+ */
+
for_each_rxq(pi, i, rxq) {
- free_rxq(pi, rxq);
+ if (rxq->iq.flags & IQ_INTR)
+ free_rxq(pi, rxq);
}
+#ifndef TCP_OFFLOAD_DISABLE
+ for_each_ofld_rxq(pi, i, ofld_rxq) {
+ if (ofld_rxq->iq.flags & IQ_INTR)
+ free_ofld_rxq(pi, ofld_rxq);
+ }
+#endif
+
return (0);
}
-/* Deals with errors and the first (and only) interrupt queue */
+/*
+ * Deals with errors and the firmware event queue. All data rx queues forward
+ * their interrupt to the firmware event queue.
+ */
void
t4_intr_all(void *arg)
{
struct adapter *sc = arg;
+ struct sge_iq *fwq = &sc->sge.fwq;
t4_intr_err(arg);
- t4_intr(&sc->sge.intrq[0]);
-}
-
-/* Deals with interrupts, and a few CPLs, on the given interrupt queue */
-void
-t4_intr(void *arg)
-{
- struct sge_iq *iq = arg, *q;
- struct adapter *sc = iq->adapter;
- struct rsp_ctrl *ctrl;
- const struct rss_header *rss;
- int ndesc_pending = 0, ndesc_total = 0;
- int qid, rsp_type;
-
- if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY))
- return;
-
- while (is_new_response(iq, &ctrl)) {
-
- rmb();
-
- rss = (const void *)iq->cdesc;
- rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
-
- if (__predict_false(rsp_type == X_RSPD_TYPE_CPL)) {
- handle_cpl(sc, iq);
- goto nextdesc;
- }
-
- qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start;
- q = sc->sge.iqmap[qid];
-
- if (atomic_cmpset_32(&q->state, IQS_IDLE, IQS_BUSY)) {
- q->handler(q);
- atomic_cmpset_32(&q->state, IQS_BUSY, IQS_IDLE);
- }
-
-nextdesc: ndesc_total++;
- if (++ndesc_pending >= iq->qsize / 4) {
- t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
- V_CIDXINC(ndesc_pending) |
- V_INGRESSQID(iq->cntxt_id) |
- V_SEINTARM(
- V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
- ndesc_pending = 0;
- }
-
- iq_next(iq);
+ if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) {
+ service_iq(fwq, 0);
+ atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE);
}
-
- t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) |
- V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
-
- atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
}
/* Deals with error interrupts */
@@ -558,259 +746,452 @@ t4_intr_err(void *arg)
t4_slow_intr_handler(sc);
}
-/* Deals with the firmware event queue */
void
t4_intr_evt(void *arg)
{
struct sge_iq *iq = arg;
- if (atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) {
- t4_evt_rx(arg);
- atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
+ if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
+ service_iq(iq, 0);
+ atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
}
}
-static void
-t4_evt_rx(void *arg)
+void
+t4_intr(void *arg)
{
struct sge_iq *iq = arg;
+
+ if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) {
+ service_iq(iq, 0);
+ atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE);
+ }
+}
+
+/*
+ * Deals with anything and everything on the given ingress queue.
+ */
+static int
+service_iq(struct sge_iq *iq, int budget)
+{
+ struct sge_iq *q;
+ struct sge_rxq *rxq = (void *)iq; /* Use iff iq is part of rxq */
+ struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */
struct adapter *sc = iq->adapter;
struct rsp_ctrl *ctrl;
- int ndesc_pending = 0, ndesc_total = 0;
+ const struct rss_header *rss;
+ int ndescs = 0, limit, fl_bufs_used = 0;
+ int rsp_type;
+ uint32_t lq;
+ struct mbuf *m0;
+ STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql);
+
+ limit = budget ? budget : iq->qsize / 8;
+
+ KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq));
+
+ /*
+ * We always come back and check the descriptor ring for new indirect
+ * interrupts and other responses after running a single handler.
+ */
+ for (;;) {
+ while (is_new_response(iq, &ctrl)) {
+
+ rmb();
+
+ m0 = NULL;
+ rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
+ lq = be32toh(ctrl->pldbuflen_qid);
+ rss = (const void *)iq->cdesc;
+
+ switch (rsp_type) {
+ case X_RSPD_TYPE_FLBUF:
+
+ KASSERT(iq->flags & IQ_HAS_FL,
+ ("%s: data for an iq (%p) with no freelist",
+ __func__, iq));
+
+ m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
+#ifdef T4_PKT_TIMESTAMP
+ /*
+ * 60 bit timestamp for the payload is
+ * *(uint64_t *)m0->m_pktdat. Note that it is
+ * in the leading free-space in the mbuf. The
+ * kernel can clobber it during a pullup,
+ * m_copymdata, etc. You need to make sure that
+ * the mbuf reaches you unmolested if you care
+ * about the timestamp.
+ */
+ *(uint64_t *)m0->m_pktdat =
+ be64toh(ctrl->u.last_flit) &
+ 0xfffffffffffffff;
+#endif
+
+ /* fall through */
+
+ case X_RSPD_TYPE_CPL:
+ KASSERT(rss->opcode < NUM_CPL_CMDS,
+ ("%s: bad opcode %02x.", __func__,
+ rss->opcode));
+ sc->cpl_handler[rss->opcode](iq, rss, m0);
+ break;
+
+ case X_RSPD_TYPE_INTR:
+
+ /*
+ * Interrupts should be forwarded only to queues
+ * that are not forwarding their interrupts.
+ * This means service_iq can recurse but only 1
+ * level deep.
+ */
+ KASSERT(budget == 0,
+ ("%s: budget %u, rsp_type %u", __func__,
+ budget, rsp_type));
+
+ q = sc->sge.iqmap[lq - sc->sge.iq_start];
+ if (atomic_cmpset_int(&q->state, IQS_IDLE,
+ IQS_BUSY)) {
+ if (service_iq(q, q->qsize / 8) == 0) {
+ atomic_cmpset_int(&q->state,
+ IQS_BUSY, IQS_IDLE);
+ } else {
+ STAILQ_INSERT_TAIL(&iql, q,
+ link);
+ }
+ }
+ break;
- KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__));
+ default:
+ panic("%s: rsp_type %u", __func__, rsp_type);
+ }
- while (is_new_response(iq, &ctrl)) {
- int rsp_type;
+ iq_next(iq);
+ if (++ndescs == limit) {
+ t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
+ V_CIDXINC(ndescs) |
+ V_INGRESSQID(iq->cntxt_id) |
+ V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
+ ndescs = 0;
+
+ if (fl_bufs_used > 0) {
+ FL_LOCK(fl);
+ fl->needed += fl_bufs_used;
+ refill_fl(sc, fl, fl->cap / 8);
+ FL_UNLOCK(fl);
+ fl_bufs_used = 0;
+ }
- rmb();
+ if (budget)
+ return (EINPROGRESS);
+ }
+ }
- rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
- if (__predict_false(rsp_type != X_RSPD_TYPE_CPL))
- panic("%s: unexpected rsp_type %d", __func__, rsp_type);
+ if (STAILQ_EMPTY(&iql))
+ break;
- handle_cpl(sc, iq);
+ /*
+ * Process the head only, and send it to the back of the list if
+ * it's still not done.
+ */
+ q = STAILQ_FIRST(&iql);
+ STAILQ_REMOVE_HEAD(&iql, link);
+ if (service_iq(q, q->qsize / 8) == 0)
+ atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE);
+ else
+ STAILQ_INSERT_TAIL(&iql, q, link);
+ }
- ndesc_total++;
- if (++ndesc_pending >= iq->qsize / 4) {
- t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
- V_CIDXINC(ndesc_pending) |
- V_INGRESSQID(iq->cntxt_id) |
- V_SEINTARM(
- V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
- ndesc_pending = 0;
+#ifdef INET
+ if (iq->flags & IQ_LRO_ENABLED) {
+ struct lro_ctrl *lro = &rxq->lro;
+ struct lro_entry *l;
+
+ while (!SLIST_EMPTY(&lro->lro_active)) {
+ l = SLIST_FIRST(&lro->lro_active);
+ SLIST_REMOVE_HEAD(&lro->lro_active, next);
+ tcp_lro_flush(lro, l);
}
+ }
+#endif
+
+ t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
+ V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
+
+ if (iq->flags & IQ_HAS_FL) {
+ int starved;
- iq_next(iq);
+ FL_LOCK(fl);
+ fl->needed += fl_bufs_used;
+ starved = refill_fl(sc, fl, fl->cap / 4);
+ FL_UNLOCK(fl);
+ if (__predict_false(starved != 0))
+ add_fl_to_sfl(sc, fl);
}
- t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) |
- V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(iq->intr_params));
+ return (0);
}
+
#ifdef T4_PKT_TIMESTAMP
#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
#else
#define RX_COPY_THRESHOLD MINCLSIZE
#endif
-static void
-t4_eth_rx(void *arg)
+static struct mbuf *
+get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
+ int *fl_bufs_used)
{
- struct sge_rxq *rxq = arg;
- struct sge_iq *iq = arg;
- struct adapter *sc = iq->adapter;
- struct rsp_ctrl *ctrl;
- struct ifnet *ifp = rxq->ifp;
- struct sge_fl *fl = &rxq->fl;
- struct fl_sdesc *sd = &fl->sdesc[fl->cidx], *sd_next;
- const struct rss_header *rss;
- const struct cpl_rx_pkt *cpl;
- uint32_t len;
- int ndescs = 0, i;
struct mbuf *m0, *m;
-#ifdef INET
- struct lro_ctrl *lro = &rxq->lro;
- struct lro_entry *l;
-#endif
+ struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
+ unsigned int nbuf, len;
- prefetch(sd->m);
- prefetch(sd->cl);
+ /*
+ * No assertion for the fl lock because we don't need it. This routine
+ * is called only from the rx interrupt handler and it only updates
+ * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be
+ * updated in the rx interrupt handler or the starvation helper routine.
+ * That's why code that manipulates fl->pidx/fl->needed needs the fl
+ * lock but this routine does not).
+ */
- iq->intr_next = iq->intr_params;
- while (is_new_response(iq, &ctrl)) {
+ if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0))
+ panic("%s: cannot handle packed frames", __func__);
+ len = G_RSPD_LEN(len_newbuf);
- rmb();
+ m0 = sd->m;
+ sd->m = NULL; /* consumed */
- rss = (const void *)iq->cdesc;
- i = G_RSPD_TYPE(ctrl->u.type_gen);
+ bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
+ m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
+#ifdef T4_PKT_TIMESTAMP
+ /* Leave room for a timestamp */
+ m0->m_data += 8;
+#endif
- KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT,
- ("%s: unexpected type %d CPL opcode 0x%x",
- __func__, i, rss->opcode));
+ if (len < RX_COPY_THRESHOLD) {
+ /* copy data to mbuf, buffer will be recycled */
+ bcopy(sd->cl, mtod(m0, caddr_t), len);
+ m0->m_len = len;
+ } else {
+ bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
+ m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
+ sd->cl = NULL; /* consumed */
+ m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
+ }
+ m0->m_pkthdr.len = len;
- sd_next = sd + 1;
- if (__predict_false(fl->cidx + 1 == fl->cap))
- sd_next = fl->sdesc;
- prefetch(sd_next->m);
- prefetch(sd_next->cl);
+ sd++;
+ if (__predict_false(++fl->cidx == fl->cap)) {
+ sd = fl->sdesc;
+ fl->cidx = 0;
+ }
- cpl = (const void *)(rss + 1);
+ m = m0;
+ len -= m->m_len;
+ nbuf = 1; /* # of fl buffers used */
- m0 = sd->m;
+ while (len > 0) {
+ m->m_next = sd->m;
sd->m = NULL; /* consumed */
-
- len = be32toh(ctrl->pldbuflen_qid);
- if (__predict_false((len & F_RSPD_NEWBUF) == 0))
- panic("%s: cannot handle packed frames", __func__);
- len = G_RSPD_LEN(len);
+ m = m->m_next;
bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
BUS_DMASYNC_POSTREAD);
- m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
-
-#ifdef T4_PKT_TIMESTAMP
- *mtod(m0, uint64_t *) =
- be64toh(ctrl->u.last_flit & 0xfffffffffffffff);
- m0->m_data += 8;
-
- /*
- * 60 bit timestamp value is *(uint64_t *)m0->m_pktdat. Note
- * that it is in the leading free-space (see M_LEADINGSPACE) in
- * the mbuf. The kernel can clobber it during a pullup,
- * m_copymdata, etc. You need to make sure that the mbuf
- * reaches you unmolested if you care about the timestamp.
- */
-#endif
-
- if (len < RX_COPY_THRESHOLD) {
- /* copy data to mbuf, buffer will be recycled */
- bcopy(sd->cl, mtod(m0, caddr_t), len);
- m0->m_len = len;
+ m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
+ if (len <= MLEN) {
+ bcopy(sd->cl, mtod(m, caddr_t), len);
+ m->m_len = len;
} else {
- bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
- m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
+ bus_dmamap_unload(fl->tag[sd->tag_idx],
+ sd->map);
+ m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
sd->cl = NULL; /* consumed */
- m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
+ m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
}
- len -= FL_PKTSHIFT;
- m0->m_len -= FL_PKTSHIFT;
- m0->m_data += FL_PKTSHIFT;
-
- m0->m_pkthdr.len = len;
- m0->m_pkthdr.rcvif = ifp;
- m0->m_flags |= M_FLOWID;
- m0->m_pkthdr.flowid = rss->hash_val;
-
- if (cpl->csum_calc && !cpl->err_vec &&
- ifp->if_capenable & IFCAP_RXCSUM) {
- m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED |
- CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
- if (cpl->ip_frag)
- m0->m_pkthdr.csum_data = be16toh(cpl->csum);
- else
- m0->m_pkthdr.csum_data = 0xffff;
- rxq->rxcsum++;
- }
-
- if (cpl->vlan_ex) {
- m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
- m0->m_flags |= M_VLANTAG;
- rxq->vlan_extraction++;
- }
-
- i = 1; /* # of fl sdesc used */
- sd = sd_next;
- if (__predict_false(++fl->cidx == fl->cap))
+ sd++;
+ if (__predict_false(++fl->cidx == fl->cap)) {
+ sd = fl->sdesc;
fl->cidx = 0;
+ }
- len -= m0->m_len;
- m = m0;
- while (len) {
- i++;
-
- sd_next = sd + 1;
- if (__predict_false(fl->cidx + 1 == fl->cap))
- sd_next = fl->sdesc;
- prefetch(sd_next->m);
- prefetch(sd_next->cl);
-
- m->m_next = sd->m;
- sd->m = NULL; /* consumed */
- m = m->m_next;
-
- bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
- BUS_DMASYNC_POSTREAD);
-
- m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
- if (len <= MLEN) {
- bcopy(sd->cl, mtod(m, caddr_t), len);
- m->m_len = len;
- } else {
- bus_dmamap_unload(fl->tag[sd->tag_idx],
- sd->map);
- m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
- sd->cl = NULL; /* consumed */
- m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
- }
+ len -= m->m_len;
+ nbuf++;
+ }
- i++;
- sd = sd_next;
- if (__predict_false(++fl->cidx == fl->cap))
- fl->cidx = 0;
+ (*fl_bufs_used) += nbuf;
- len -= m->m_len;
- }
+ return (m0);
+}
+static int
+t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
+{
+ struct sge_rxq *rxq = (void *)iq;
+ struct ifnet *ifp = rxq->ifp;
+ const struct cpl_rx_pkt *cpl = (const void *)(rss + 1);
#ifdef INET
- if (cpl->l2info & htobe32(F_RXF_LRO) &&
- rxq->flags & RXQ_LRO_ENABLED &&
- tcp_lro_rx(lro, m0, 0) == 0) {
- /* queued for LRO */
- } else
+ struct lro_ctrl *lro = &rxq->lro;
#endif
- ifp->if_input(ifp, m0);
- FL_LOCK(fl);
- fl->needed += i;
- if (fl->needed >= 32)
- refill_fl(sc, fl, 64, 32);
- FL_UNLOCK(fl);
+ KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
+ rss->opcode));
- if (++ndescs > 32) {
- t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
- V_CIDXINC(ndescs) |
- V_INGRESSQID((u32)iq->cntxt_id) |
- V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
- ndescs = 0;
- }
+ m0->m_pkthdr.len -= FL_PKTSHIFT;
+ m0->m_len -= FL_PKTSHIFT;
+ m0->m_data += FL_PKTSHIFT;
+
+ m0->m_pkthdr.rcvif = ifp;
+ m0->m_flags |= M_FLOWID;
+ m0->m_pkthdr.flowid = rss->hash_val;
- iq_next(iq);
+ if (cpl->csum_calc && !cpl->err_vec &&
+ ifp->if_capenable & IFCAP_RXCSUM) {
+ m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED |
+ CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ if (cpl->ip_frag)
+ m0->m_pkthdr.csum_data = be16toh(cpl->csum);
+ else
+ m0->m_pkthdr.csum_data = 0xffff;
+ rxq->rxcsum++;
}
-#ifdef INET
- while (!SLIST_EMPTY(&lro->lro_active)) {
- l = SLIST_FIRST(&lro->lro_active);
- SLIST_REMOVE_HEAD(&lro->lro_active, next);
- tcp_lro_flush(lro, l);
+ if (cpl->vlan_ex) {
+ m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
+ m0->m_flags |= M_VLANTAG;
+ rxq->vlan_extraction++;
}
-#endif
- t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
- V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next));
+#ifdef INET
+ if (cpl->l2info & htobe32(F_RXF_LRO) &&
+ iq->flags & IQ_LRO_ENABLED &&
+ tcp_lro_rx(lro, m0, 0) == 0) {
+ /* queued for LRO */
+ } else
+#endif
+ ifp->if_input(ifp, m0);
- FL_LOCK(fl);
- if (fl->needed >= 32)
- refill_fl(sc, fl, 128, 8);
- FL_UNLOCK(fl);
+ return (0);
}
int
t4_mgmt_tx(struct adapter *sc, struct mbuf *m)
{
- return ctrl_tx(sc, &sc->sge.ctrlq[0], m);
+ return t4_wrq_tx(sc, &sc->sge.mgmtq, m);
+}
+
+/*
+ * Doesn't fail. Holds on to work requests it can't send right away.
+ */
+int
+t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m0)
+{
+ struct sge_eq *eq = &wrq->eq;
+ int can_reclaim;
+ caddr_t dst;
+ struct mbuf *wr, *next;
+
+ TXQ_LOCK_ASSERT_OWNED(wrq);
+ KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
+ (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
+ ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
+
+ if (__predict_true(m0 != NULL)) {
+ if (wrq->head)
+ wrq->tail->m_nextpkt = m0;
+ else
+ wrq->head = m0;
+ while (m0->m_nextpkt)
+ m0 = m0->m_nextpkt;
+ wrq->tail = m0;
+ }
+
+ can_reclaim = reclaimable(eq);
+ if (__predict_false(eq->flags & EQ_STALLED)) {
+ if (can_reclaim < tx_resume_threshold(eq))
+ return (0);
+ eq->flags &= ~EQ_STALLED;
+ eq->unstalled++;
+ }
+ eq->cidx += can_reclaim;
+ eq->avail += can_reclaim;
+ if (__predict_false(eq->cidx >= eq->cap))
+ eq->cidx -= eq->cap;
+
+ for (wr = wrq->head; wr; wr = next) {
+ int ndesc;
+ struct mbuf *m;
+
+ next = wr->m_nextpkt;
+ wr->m_nextpkt = NULL;
+
+ M_ASSERTPKTHDR(wr);
+ KASSERT(wr->m_pkthdr.len > 0 && (wr->m_pkthdr.len & 0x7) == 0,
+ ("%s: work request len %d.", __func__, wr->m_pkthdr.len));
+
+ if (wr->m_pkthdr.len > SGE_MAX_WR_LEN) {
+#ifdef INVARIANTS
+ panic("%s: oversized work request", __func__);
+#else
+ log(LOG_ERR, "%s: %s work request too long (%d)",
+ device_get_nameunit(sc->dev), __func__,
+ wr->m_pkthdr.len);
+ m_freem(wr);
+ continue;
+#endif
+ }
+
+ ndesc = howmany(wr->m_pkthdr.len, EQ_ESIZE);
+ if (eq->avail < ndesc) {
+ wr->m_nextpkt = next;
+ wrq->no_desc++;
+ break;
+ }
+
+ dst = (void *)&eq->desc[eq->pidx];
+ for (m = wr; m; m = m->m_next)
+ copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
+
+ eq->pidx += ndesc;
+ eq->avail -= ndesc;
+ if (__predict_false(eq->pidx >= eq->cap))
+ eq->pidx -= eq->cap;
+
+ eq->pending += ndesc;
+ if (eq->pending > 16)
+ ring_eq_db(sc, eq);
+
+ wrq->tx_wrs++;
+ m_freem(wr);
+
+ if (eq->avail < 8) {
+ can_reclaim = reclaimable(eq);
+ eq->cidx += can_reclaim;
+ eq->avail += can_reclaim;
+ if (__predict_false(eq->cidx >= eq->cap))
+ eq->cidx -= eq->cap;
+ }
+ }
+
+ if (eq->pending)
+ ring_eq_db(sc, eq);
+
+ if (wr == NULL)
+ wrq->head = wrq->tail = NULL;
+ else {
+ wrq->head = wr;
+
+ KASSERT(wrq->tail->m_nextpkt == NULL,
+ ("%s: wrq->tail grew a tail of its own", __func__));
+
+ eq->flags |= EQ_STALLED;
+ if (callout_pending(&eq->tx_callout) == 0)
+ callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
+ }
+
+ return (0);
}
/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
@@ -852,6 +1233,8 @@ t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
TXQ_LOCK_ASSERT_OWNED(txq);
KASSERT(m, ("%s: called with nothing to do.", __func__));
+ KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
+ ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
prefetch(&eq->desc[eq->pidx]);
prefetch(&txq->sdesc[eq->pidx]);
@@ -859,8 +1242,25 @@ t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
txpkts.npkt = 0;/* indicates there's nothing in txpkts */
coalescing = 0;
- if (eq->avail < 8)
- reclaim_tx_descs(txq, 0, 8);
+ can_reclaim = reclaimable(eq);
+ if (__predict_false(eq->flags & EQ_STALLED)) {
+ if (can_reclaim < tx_resume_threshold(eq)) {
+ txq->m = m;
+ return (0);
+ }
+ eq->flags &= ~EQ_STALLED;
+ eq->unstalled++;
+ }
+
+ if (__predict_false(eq->flags & EQ_DOOMED)) {
+ m_freem(m);
+ while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
+ m_freem(m);
+ return (ENETDOWN);
+ }
+
+ if (eq->avail < 8 && can_reclaim)
+ reclaim_tx_descs(txq, can_reclaim, 32);
for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
@@ -938,15 +1338,13 @@ t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
ETHER_BPF_MTAP(ifp, m);
if (sgl.nsegs == 0)
m_freem(m);
-
doorbell:
- /* Fewer and fewer doorbells as the queue fills up */
- if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2)))
+ if (eq->pending >= 64)
ring_eq_db(sc, eq);
can_reclaim = reclaimable(eq);
if (can_reclaim >= 32)
- reclaim_tx_descs(txq, can_reclaim, 32);
+ reclaim_tx_descs(txq, can_reclaim, 64);
}
if (txpkts.npkt > 0)
@@ -957,14 +1355,17 @@ doorbell:
* This can happen when we're short of tx descriptors (no_desc) or maybe
* even DMA maps (no_dmamap). Either way, a credit flush and reclaim
* will get things going again.
- *
- * If eq->avail is already 0 we know a credit flush was requested in the
- * WR that reduced it to 0 so we don't need another flush (we don't have
- * any descriptor for a flush WR anyway, duh).
*/
- if (m && eq->avail > 0 && !(eq->flags & EQ_CRFLUSHED)) {
+ if (m && !(eq->flags & EQ_CRFLUSHED)) {
struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
+ /*
+ * If EQ_CRFLUSHED is not set then we know we have at least one
+ * available descriptor because any WR that reduces eq->avail to
+ * 0 also sets EQ_CRFLUSHED.
+ */
+ KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
+
txsd->desc_used = 1;
txsd->credits = 0;
write_eqflush_wr(eq);
@@ -974,9 +1375,10 @@ doorbell:
if (eq->pending)
ring_eq_db(sc, eq);
- can_reclaim = reclaimable(eq);
- if (can_reclaim >= 32)
- reclaim_tx_descs(txq, can_reclaim, 128);
+ reclaim_tx_descs(txq, 0, 128);
+
+ if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
+ callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
return (0);
}
@@ -987,24 +1389,29 @@ t4_update_fl_bufsize(struct ifnet *ifp)
struct port_info *pi = ifp->if_softc;
struct sge_rxq *rxq;
struct sge_fl *fl;
- int i;
+ int i, bufsize;
+ /* large enough for a frame even when VLAN extraction is disabled */
+ bufsize = FL_PKTSHIFT + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
+ ifp->if_mtu;
for_each_rxq(pi, i, rxq) {
fl = &rxq->fl;
FL_LOCK(fl);
- set_fl_tag_idx(fl, ifp->if_mtu);
+ set_fl_tag_idx(fl, bufsize);
FL_UNLOCK(fl);
}
}
-/*
- * A non-NULL handler indicates this iq will not receive direct interrupts, the
- * handler will be invoked by an interrupt queue.
- */
+int
+can_resume_tx(struct sge_eq *eq)
+{
+ return (reclaimable(eq) >= tx_resume_threshold(eq));
+}
+
static inline void
init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
- int qsize, int esize, iq_intr_handler_t *handler, char *name)
+ int qsize, int esize, char *name)
{
KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
("%s: bad tmr_idx %d", __func__, tmr_idx));
@@ -1018,22 +1425,32 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
iq->intr_pktc_idx = pktc_idx;
iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */
iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */
- iq->handler = handler;
strlcpy(iq->lockname, name, sizeof(iq->lockname));
}
static inline void
-init_fl(struct sge_fl *fl, int qsize, char *name)
+init_fl(struct sge_fl *fl, int qsize, int bufsize, char *name)
{
fl->qsize = qsize;
strlcpy(fl->lockname, name, sizeof(fl->lockname));
+ set_fl_tag_idx(fl, bufsize);
}
static inline void
-init_eq(struct sge_eq *eq, int qsize, char *name)
+init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
+ uint16_t iqid, char *name)
{
+ KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan));
+ KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype));
+
+ eq->flags = eqtype & EQ_TYPEMASK;
+ eq->tx_chan = tx_chan;
+ eq->iqid = iqid;
eq->qsize = qsize;
strlcpy(eq->lockname, name, sizeof(eq->lockname));
+
+ TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
+ callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
}
static int
@@ -1090,9 +1507,9 @@ free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
* Returns errno on failure. Resources allocated up to that point may still be
* allocated. Caller is responsible for cleanup in case this function fails.
*
- * If the ingress queue will take interrupts directly (iq->handler == NULL) then
+ * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then
* the intr_idx specifies the vector, starting from 0. Otherwise it specifies
- * the index of the interrupt queue to which its interrupts will be forwarded.
+ * the abs_id of the ingress queue to which its interrupts should be forwarded.
*/
static int
alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
@@ -1122,16 +1539,12 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
if (iq == &sc->sge.fwq)
v |= F_FW_IQ_CMD_IQASYNCH;
- if (iq->handler) {
- KASSERT(intr_idx < NINTRQ(sc),
- ("%s: invalid indirect intr_idx %d", __func__, intr_idx));
- v |= F_FW_IQ_CMD_IQANDST;
- v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.intrq[intr_idx].abs_id);
- } else {
+ if (iq->flags & IQ_INTR) {
KASSERT(intr_idx < sc->intr_count,
("%s: invalid direct intr_idx %d", __func__, intr_idx));
- v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
- }
+ } else
+ v |= F_FW_IQ_CMD_IQANDST;
+ v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
c.type_to_iqandstindex = htobe32(v |
V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
@@ -1176,7 +1589,6 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
/* Allocate space for one software descriptor per buffer. */
fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8;
FL_LOCK(fl);
- set_fl_tag_idx(fl, pi->ifp->if_mtu);
rc = alloc_fl_sdesc(fl);
FL_UNLOCK(fl);
if (rc != 0) {
@@ -1186,6 +1598,7 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
return (rc);
}
fl->needed = fl->cap;
+ fl->lowat = roundup(sc->sge.fl_starve_threshold, 8);
c.iqns_to_fl0congen |=
htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
@@ -1217,12 +1630,13 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
iq->intr_next = iq->intr_params;
iq->cntxt_id = be16toh(c.iqid);
iq->abs_id = be16toh(c.physiqid);
- iq->flags |= (IQ_ALLOCATED | IQ_STARTED);
+ iq->flags |= IQ_ALLOCATED;
cntxt_id = iq->cntxt_id - sc->sge.iq_start;
- KASSERT(cntxt_id < sc->sge.niq,
- ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
- cntxt_id, sc->sge.niq - 1));
+ if (cntxt_id >= sc->sge.niq) {
+ panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
+ cntxt_id, sc->sge.niq - 1);
+ }
sc->sge.iqmap[cntxt_id] = iq;
if (fl) {
@@ -1230,29 +1644,28 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
fl->pidx = fl->cidx = 0;
cntxt_id = fl->cntxt_id - sc->sge.eq_start;
- KASSERT(cntxt_id < sc->sge.neq,
- ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__,
- cntxt_id, sc->sge.neq - 1));
+ if (cntxt_id >= sc->sge.neq) {
+ panic("%s: fl->cntxt_id (%d) more than the max (%d)",
+ __func__, cntxt_id, sc->sge.neq - 1);
+ }
sc->sge.eqmap[cntxt_id] = (void *)fl;
FL_LOCK(fl);
- /* Just enough to make sure it doesn't starve right away. */
- refill_fl(sc, fl, roundup(sc->sge.fl_starve_threshold, 8), 8);
+ /* Enough to make sure the SGE doesn't think it's starved */
+ refill_fl(sc, fl, fl->lowat);
FL_UNLOCK(fl);
+
+ iq->flags |= IQ_HAS_FL;
}
/* Enable IQ interrupts */
- atomic_store_rel_32(&iq->state, IQS_IDLE);
+ atomic_store_rel_int(&iq->state, IQS_IDLE);
t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
V_INGRESSQID(iq->cntxt_id));
return (0);
}
-/*
- * This can be called with the iq/fl in any state - fully allocated and
- * functional, partially allocated, even all-zeroed out.
- */
static int
free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
{
@@ -1265,23 +1678,7 @@ free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
dev = pi ? pi->dev : sc->dev;
- if (iq->flags & IQ_STARTED) {
- rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0,
- iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff);
- if (rc != 0) {
- device_printf(dev,
- "failed to stop queue %p: %d\n", iq, rc);
- return (rc);
- }
- iq->flags &= ~IQ_STARTED;
-
- /* Synchronize with the interrupt handler */
- while (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_DISABLED))
- pause("iqfree", hz / 1000);
- }
-
if (iq->flags & IQ_ALLOCATED) {
-
rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
fl ? fl->cntxt_id : 0xffff, 0xffff);
@@ -1322,76 +1719,84 @@ free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
}
static int
-alloc_intrq(struct adapter *sc, int port_idx, int intrq_idx, int intr_idx)
+alloc_fwq(struct adapter *sc)
{
- int rc;
- struct sysctl_oid *oid;
- struct sysctl_oid_list *children;
+ int rc, intr_idx;
+ struct sge_iq *fwq = &sc->sge.fwq;
char name[16];
- struct sge_iq *intrq = &sc->sge.intrq[intrq_idx];
+ struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
+ struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
- rc = alloc_iq_fl(sc->port[port_idx], intrq, NULL, intr_idx, -1);
- if (rc != 0)
+ snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev));
+ init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, name);
+ fwq->flags |= IQ_INTR; /* always */
+ intr_idx = sc->intr_count > 1 ? 1 : 0;
+ rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to create firmware event queue: %d\n", rc);
return (rc);
+ }
- children = SYSCTL_CHILDREN(sc->oid_intrq);
-
- snprintf(name, sizeof(name), "%d", intrq_idx);
- oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD,
- NULL, "interrupt queue");
+ oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD,
+ NULL, "firmware event queue");
children = SYSCTL_CHILDREN(oid);
+ SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
+ CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
+ "absolute id of the queue");
+ SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
+ CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
+ "SGE context id of the queue");
SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
- CTLTYPE_INT | CTLFLAG_RD, &intrq->cidx, 0, sysctl_uint16, "I",
+ CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
"consumer index");
- return (rc);
+ return (0);
}
static int
-free_intrq(struct sge_iq *iq)
+free_fwq(struct adapter *sc)
{
- return free_iq_fl(NULL, iq, NULL);
-
+ return free_iq_fl(NULL, &sc->sge.fwq, NULL);
}
static int
-alloc_fwq(struct adapter *sc, int intr_idx)
+alloc_mgmtq(struct adapter *sc)
{
int rc;
- struct sysctl_oid_list *children;
- struct sge_iq *fwq = &sc->sge.fwq;
-
- rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
- if (rc != 0)
- return (rc);
+ struct sge_wrq *mgmtq = &sc->sge.mgmtq;
+ char name[16];
+ struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
+ struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
- children = SYSCTL_CHILDREN(sc->oid_fwq);
+ oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD,
+ NULL, "management queue");
- SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
- CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
- "absolute id of the queue");
- SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
- CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
- "SGE context id of the queue");
- SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
- CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
- "consumer index");
+ snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev));
+ init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan,
+ sc->sge.fwq.cntxt_id, name);
+ rc = alloc_wrq(sc, NULL, mgmtq, oid);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to create management queue: %d\n", rc);
+ return (rc);
+ }
- return (rc);
+ return (0);
}
static int
-free_fwq(struct sge_iq *iq)
+free_mgmtq(struct adapter *sc)
{
- return free_iq_fl(NULL, iq, NULL);
+ return free_wrq(sc, &sc->sge.mgmtq);
}
static int
-alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx)
+alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
+ struct sysctl_oid *oid)
{
int rc;
- struct sysctl_oid *oid;
struct sysctl_oid_list *children;
char name[16];
@@ -1400,7 +1805,7 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx)
return (rc);
FL_LOCK(&rxq->fl);
- refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8, 8);
+ refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8);
FL_UNLOCK(&rxq->fl);
#ifdef INET
@@ -1410,11 +1815,11 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx)
rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
if (pi->ifp->if_capenable & IFCAP_LRO)
- rxq->flags |= RXQ_LRO_ENABLED;
+ rxq->iq.flags |= IQ_LRO_ENABLED;
#endif
rxq->ifp = pi->ifp;
- children = SYSCTL_CHILDREN(pi->oid_rxq);
+ children = SYSCTL_CHILDREN(oid);
snprintf(name, sizeof(name), "%d", idx);
oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
@@ -1477,32 +1882,71 @@ free_rxq(struct port_info *pi, struct sge_rxq *rxq)
return (rc);
}
+#ifndef TCP_OFFLOAD_DISABLE
static int
-alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx)
+alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
+ int intr_idx, int idx, struct sysctl_oid *oid)
{
- int rc, cntxt_id;
- size_t len;
- struct fw_eq_ctrl_cmd c;
- struct sge_eq *eq = &ctrlq->eq;
- char name[16];
- struct sysctl_oid *oid;
+ int rc;
struct sysctl_oid_list *children;
+ char name[16];
- mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
-
- len = eq->qsize * CTRL_EQ_ESIZE;
- rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
- &eq->ba, (void **)&eq->desc);
- if (rc)
+ rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx,
+ 1 << pi->tx_chan);
+ if (rc != 0)
return (rc);
- eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE;
- eq->spg = (void *)&eq->desc[eq->cap];
- eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */
- if (sc->flags & INTR_SHARED)
- eq->iqid = sc->sge.intrq[idx % NINTRQ(sc)].cntxt_id;
- else
- eq->iqid = sc->sge.intrq[sc->port[idx]->first_rxq].cntxt_id;
+ children = SYSCTL_CHILDREN(oid);
+
+ snprintf(name, sizeof(name), "%d", idx);
+ oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
+ NULL, "rx queue");
+ children = SYSCTL_CHILDREN(oid);
+
+ SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
+ CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16,
+ "I", "absolute id of the queue");
+ SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
+ CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16,
+ "I", "SGE context id of the queue");
+ SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
+ CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
+ "consumer index");
+
+ children = SYSCTL_CHILDREN(oid);
+ oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
+ NULL, "freelist");
+ children = SYSCTL_CHILDREN(oid);
+
+ SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
+ CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16,
+ "I", "SGE context id of the queue");
+ SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
+ &ofld_rxq->fl.cidx, 0, "consumer index");
+ SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
+ &ofld_rxq->fl.pidx, 0, "producer index");
+
+ return (rc);
+}
+
+static int
+free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq)
+{
+ int rc;
+
+ rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl);
+ if (rc == 0)
+ bzero(ofld_rxq, sizeof(*ofld_rxq));
+
+ return (rc);
+}
+#endif
+
+static int
+ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
+{
+ int rc, cntxt_id;
+ struct fw_eq_ctrl_cmd c;
bzero(&c, sizeof(c));
@@ -1515,7 +1959,7 @@ alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx)
c.physeqid_pkd = htobe32(0);
c.fetchszm_to_iqid =
htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
- V_FW_EQ_CTRL_CMD_PCIECHN(sc->port[idx]->tx_chan) |
+ V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
c.dcaen_to_eqsize =
htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
@@ -1527,51 +1971,191 @@ alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx)
rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
if (rc != 0) {
device_printf(sc->dev,
- "failed to create control queue %d: %d\n", idx, rc);
+ "failed to create control queue %d: %d\n", eq->tx_chan, rc);
return (rc);
}
+ eq->flags |= EQ_ALLOCATED;
- eq->pidx = eq->cidx = 0;
eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
- eq->flags |= (EQ_ALLOCATED | EQ_STARTED);
+ cntxt_id = eq->cntxt_id - sc->sge.eq_start;
+ if (cntxt_id >= sc->sge.neq)
+ panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
+ cntxt_id, sc->sge.neq - 1);
+ sc->sge.eqmap[cntxt_id] = eq;
+
+ return (rc);
+}
+
+static int
+eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
+{
+ int rc, cntxt_id;
+ struct fw_eq_eth_cmd c;
+
+ bzero(&c, sizeof(c));
+ c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
+ F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
+ V_FW_EQ_ETH_CMD_VFN(0));
+ c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
+ F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
+ c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
+ c.fetchszm_to_iqid =
+ htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
+ V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
+ V_FW_EQ_ETH_CMD_IQID(eq->iqid));
+ c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
+ V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
+ V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
+ V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
+ c.eqaddr = htobe64(eq->ba);
+
+ rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
+ if (rc != 0) {
+ device_printf(pi->dev,
+ "failed to create Ethernet egress queue: %d\n", rc);
+ return (rc);
+ }
+ eq->flags |= EQ_ALLOCATED;
+
+ eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
cntxt_id = eq->cntxt_id - sc->sge.eq_start;
- KASSERT(cntxt_id < sc->sge.neq,
- ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
- cntxt_id, sc->sge.neq - 1));
+ if (cntxt_id >= sc->sge.neq)
+ panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
+ cntxt_id, sc->sge.neq - 1);
sc->sge.eqmap[cntxt_id] = eq;
- children = SYSCTL_CHILDREN(sc->oid_ctrlq);
+ return (rc);
+}
- snprintf(name, sizeof(name), "%d", idx);
- oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD,
- NULL, "ctrl queue");
- children = SYSCTL_CHILDREN(oid);
+#ifndef TCP_OFFLOAD_DISABLE
+static int
+ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
+{
+ int rc, cntxt_id;
+ struct fw_eq_ofld_cmd c;
- SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "pidx",
- CTLTYPE_INT | CTLFLAG_RD, &ctrlq->eq.pidx, 0, sysctl_uint16, "I",
- "producer index");
- SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
- &ctrlq->no_desc, 0,
- "# of times ctrlq ran out of hardware descriptors");
+ bzero(&c, sizeof(c));
+
+ c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST |
+ F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) |
+ V_FW_EQ_OFLD_CMD_VFN(0));
+ c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
+ F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
+ c.fetchszm_to_iqid =
+ htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
+ V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
+ F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
+ c.dcaen_to_eqsize =
+ htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
+ V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
+ V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
+ V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
+ c.eqaddr = htobe64(eq->ba);
+
+ rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
+ if (rc != 0) {
+ device_printf(pi->dev,
+ "failed to create egress queue for TCP offload: %d\n", rc);
+ return (rc);
+ }
+ eq->flags |= EQ_ALLOCATED;
+
+ eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd));
+ cntxt_id = eq->cntxt_id - sc->sge.eq_start;
+ if (cntxt_id >= sc->sge.neq)
+ panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
+ cntxt_id, sc->sge.neq - 1);
+ sc->sge.eqmap[cntxt_id] = eq;
+
+ return (rc);
+}
+#endif
+
+static int
+alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
+{
+ int rc;
+ size_t len;
+
+ mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
+
+ len = eq->qsize * EQ_ESIZE;
+ rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
+ &eq->ba, (void **)&eq->desc);
+ if (rc)
+ return (rc);
+
+ eq->cap = eq->qsize - SPG_LEN / EQ_ESIZE;
+ eq->spg = (void *)&eq->desc[eq->cap];
+ eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */
+ eq->pidx = eq->cidx = 0;
+
+ switch (eq->flags & EQ_TYPEMASK) {
+ case EQ_CTRL:
+ rc = ctrl_eq_alloc(sc, eq);
+ break;
+
+ case EQ_ETH:
+ rc = eth_eq_alloc(sc, pi, eq);
+ break;
+
+#ifndef TCP_OFFLOAD_DISABLE
+ case EQ_OFLD:
+ rc = ofld_eq_alloc(sc, pi, eq);
+ break;
+#endif
+
+ default:
+ panic("%s: invalid eq type %d.", __func__,
+ eq->flags & EQ_TYPEMASK);
+ }
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to allocate egress queue(%d): %d",
+ eq->flags & EQ_TYPEMASK, rc);
+ }
+
+ eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
return (rc);
}
static int
-free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq)
+free_eq(struct adapter *sc, struct sge_eq *eq)
{
int rc;
- struct sge_eq *eq = &ctrlq->eq;
- if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) {
- rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
+ if (eq->flags & EQ_ALLOCATED) {
+ switch (eq->flags & EQ_TYPEMASK) {
+ case EQ_CTRL:
+ rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0,
+ eq->cntxt_id);
+ break;
+
+ case EQ_ETH:
+ rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0,
+ eq->cntxt_id);
+ break;
+
+#ifndef TCP_OFFLOAD_DISABLE
+ case EQ_OFLD:
+ rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0,
+ eq->cntxt_id);
+ break;
+#endif
+
+ default:
+ panic("%s: invalid eq type %d.", __func__,
+ eq->flags & EQ_TYPEMASK);
+ }
if (rc != 0) {
device_printf(sc->dev,
- "failed to free ctrl queue %p: %d\n", eq, rc);
+ "failed to free egress queue (%d): %d\n",
+ eq->flags & EQ_TYPEMASK, rc);
return (rc);
}
- eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED);
+ eq->flags &= ~EQ_ALLOCATED;
}
free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
@@ -1579,47 +2163,77 @@ free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq)
if (mtx_initialized(&eq->eq_lock))
mtx_destroy(&eq->eq_lock);
- bzero(ctrlq, sizeof(*ctrlq));
+ bzero(eq, sizeof(*eq));
return (0);
}
static int
-alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx)
+alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
+ struct sysctl_oid *oid)
{
- int rc, cntxt_id;
- size_t len;
+ int rc;
+ struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx;
+ struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
+
+ rc = alloc_eq(sc, pi, &wrq->eq);
+ if (rc)
+ return (rc);
+
+ wrq->adapter = sc;
+
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
+ &wrq->eq.cntxt_id, 0, "SGE context id of the queue");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx",
+ CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I",
+ "consumer index");
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
+ CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
+ "producer index");
+ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
+ &wrq->tx_wrs, "# of work requests");
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
+ &wrq->no_desc, 0,
+ "# of times queue ran out of hardware descriptors");
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
+ &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
+
+
+ return (rc);
+}
+
+static int
+free_wrq(struct adapter *sc, struct sge_wrq *wrq)
+{
+ int rc;
+
+ rc = free_eq(sc, &wrq->eq);
+ if (rc)
+ return (rc);
+
+ bzero(wrq, sizeof(*wrq));
+ return (0);
+}
+
+static int
+alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
+ struct sysctl_oid *oid)
+{
+ int rc;
struct adapter *sc = pi->adapter;
- struct fw_eq_eth_cmd c;
struct sge_eq *eq = &txq->eq;
char name[16];
- struct sysctl_oid *oid;
- struct sysctl_oid_list *children;
- struct sge_iq *intrq;
-
- txq->ifp = pi->ifp;
- TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq);
-
- mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
+ struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
- len = eq->qsize * TX_EQ_ESIZE;
- rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
- &eq->ba, (void **)&eq->desc);
+ rc = alloc_eq(sc, pi, eq);
if (rc)
return (rc);
- eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE;
- eq->spg = (void *)&eq->desc[eq->cap];
- eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */
+ txq->ifp = pi->ifp;
+
txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
M_ZERO | M_WAITOK);
txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
- intrq = &sc->sge.intrq[0];
- if (sc->flags & INTR_SHARED)
- eq->iqid = intrq[(pi->first_txq + idx) % NINTRQ(sc)].cntxt_id;
- else
- eq->iqid = intrq[pi->first_rxq + (idx % pi->nrxq)].cntxt_id;
-
rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
@@ -1629,49 +2243,18 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx)
return (rc);
}
- rc = alloc_tx_maps(txq);
+ /*
+ * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
+ * limit for any WR). txq->no_dmamap events shouldn't occur if maps is
+ * sized for the worst case.
+ */
+ rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
+ M_WAITOK);
if (rc != 0) {
device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
return (rc);
}
- bzero(&c, sizeof(c));
-
- c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
- F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
- V_FW_EQ_ETH_CMD_VFN(0));
- c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
- F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
- c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
- c.fetchszm_to_iqid =
- htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
- V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
- V_FW_EQ_ETH_CMD_IQID(eq->iqid));
- c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
- V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
- V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
- V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
- c.eqaddr = htobe64(eq->ba);
-
- rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
- if (rc != 0) {
- device_printf(pi->dev,
- "failed to create egress queue: %d\n", rc);
- return (rc);
- }
-
- eq->pidx = eq->cidx = 0;
- eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
- eq->flags |= (EQ_ALLOCATED | EQ_STARTED);
-
- cntxt_id = eq->cntxt_id - sc->sge.eq_start;
- KASSERT(cntxt_id < sc->sge.neq,
- ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
- cntxt_id, sc->sge.neq - 1));
- sc->sge.eqmap[cntxt_id] = eq;
-
- children = SYSCTL_CHILDREN(pi->oid_txq);
-
snprintf(name, sizeof(name), "%d", idx);
oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
NULL, "tx queue");
@@ -1709,7 +2292,9 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx)
SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
&txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
- &txq->egr_update, 0, "egress update notifications from the SGE");
+ &eq->egr_update, 0, "egress update notifications from the SGE");
+ SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
+ &eq->unstalled, 0, "# of times txq recovered after stall");
return (rc);
}
@@ -1721,52 +2306,20 @@ free_txq(struct port_info *pi, struct sge_txq *txq)
struct adapter *sc = pi->adapter;
struct sge_eq *eq = &txq->eq;
- if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) {
-
- /*
- * Wait for the response to a credit flush if there's one
- * pending. Clearing the flag tells handle_sge_egr_update or
- * cxgbe_txq_start (depending on how far the response has made
- * it) that they should ignore the response and wake up free_txq
- * instead.
- *
- * The interface has been marked down by the time we get here
- * (both IFF_UP and IFF_DRV_RUNNING cleared). qflush has
- * emptied the tx buf_rings and we know nothing new is being
- * queued for tx so we don't have to worry about a new credit
- * flush request.
- */
- TXQ_LOCK(txq);
- if (eq->flags & EQ_CRFLUSHED) {
- eq->flags &= ~EQ_CRFLUSHED;
- msleep(txq, &eq->eq_lock, 0, "crflush", 0);
- }
- TXQ_UNLOCK(txq);
-
- rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
- if (rc != 0) {
- device_printf(pi->dev,
- "failed to free egress queue %p: %d\n", eq, rc);
- return (rc);
- }
- eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED);
- }
-
- free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
+ rc = free_eq(sc, eq);
+ if (rc)
+ return (rc);
free(txq->sdesc, M_CXGBE);
- if (txq->maps)
- free_tx_maps(txq);
+ if (txq->txmaps.maps)
+ t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
buf_ring_free(txq->br, M_CXGBE);
if (txq->tx_tag)
bus_dma_tag_destroy(txq->tx_tag);
- if (mtx_initialized(&eq->eq_lock))
- mtx_destroy(&eq->eq_lock);
-
bzero(txq, sizeof(*txq));
return (0);
}
@@ -1822,11 +2375,13 @@ ring_fl_db(struct adapter *sc, struct sge_fl *fl)
}
/*
- * Fill up the freelist by upto nbufs and ring its doorbell if the number of
- * buffers ready to be handed to the hardware >= dbthresh.
+ * Fill up the freelist by upto nbufs and maybe ring its doorbell.
+ *
+ * Returns non-zero to indicate that it should be added to the list of starving
+ * freelists.
*/
-static void
-refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh)
+static int
+refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
{
__be64 *d = &fl->desc[fl->pidx];
struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
@@ -1837,7 +2392,7 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh)
FL_LOCK_ASSERT_OWNED(fl);
- if (nbufs < 0 || nbufs > fl->needed)
+ if (nbufs > fl->needed)
nbufs = fl->needed;
while (nbufs--) {
@@ -1918,8 +2473,35 @@ recycled:
}
}
- if (fl->pending >= dbthresh)
+ if (fl->pending >= 8)
ring_fl_db(sc, fl);
+
+ return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING));
+}
+
+/*
+ * Attempt to refill all starving freelists.
+ */
+static void
+refill_sfl(void *arg)
+{
+ struct adapter *sc = arg;
+ struct sge_fl *fl, *fl_temp;
+
+ mtx_lock(&sc->sfl_lock);
+ TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) {
+ FL_LOCK(fl);
+ refill_fl(sc, fl, 64);
+ if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) {
+ TAILQ_REMOVE(&sc->sfl, fl, link);
+ fl->flags &= ~FL_STARVING;
+ }
+ FL_UNLOCK(fl);
+ }
+
+ if (!TAILQ_EMPTY(&sc->sfl))
+ callout_schedule(&sc->sfl_callout, hz / 5);
+ mtx_unlock(&sc->sfl_lock);
}
static int
@@ -1993,27 +2575,22 @@ free_fl_sdesc(struct sge_fl *fl)
fl->sdesc = NULL;
}
-static int
-alloc_tx_maps(struct sge_txq *txq)
+int
+t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
+ int flags)
{
struct tx_map *txm;
- int i, rc, count;
+ int i, rc;
- /*
- * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
- * limit for any WR). txq->no_dmamap events shouldn't occur if maps is
- * sized for the worst case.
- */
- count = txq->eq.qsize * 10 / 8;
- txq->map_total = txq->map_avail = count;
- txq->map_cidx = txq->map_pidx = 0;
+ txmaps->map_total = txmaps->map_avail = count;
+ txmaps->map_cidx = txmaps->map_pidx = 0;
- txq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
- M_ZERO | M_WAITOK);
+ txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
+ M_ZERO | flags);
- txm = txq->maps;
+ txm = txmaps->maps;
for (i = 0; i < count; i++, txm++) {
- rc = bus_dmamap_create(txq->tx_tag, 0, &txm->map);
+ rc = bus_dmamap_create(tx_tag, 0, &txm->map);
if (rc != 0)
goto failed;
}
@@ -2022,36 +2599,36 @@ alloc_tx_maps(struct sge_txq *txq)
failed:
while (--i >= 0) {
txm--;
- bus_dmamap_destroy(txq->tx_tag, txm->map);
+ bus_dmamap_destroy(tx_tag, txm->map);
}
- KASSERT(txm == txq->maps, ("%s: EDOOFUS", __func__));
+ KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
- free(txq->maps, M_CXGBE);
- txq->maps = NULL;
+ free(txmaps->maps, M_CXGBE);
+ txmaps->maps = NULL;
return (rc);
}
-static void
-free_tx_maps(struct sge_txq *txq)
+void
+t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
{
struct tx_map *txm;
int i;
- txm = txq->maps;
- for (i = 0; i < txq->map_total; i++, txm++) {
+ txm = txmaps->maps;
+ for (i = 0; i < txmaps->map_total; i++, txm++) {
if (txm->m) {
- bus_dmamap_unload(txq->tx_tag, txm->map);
+ bus_dmamap_unload(tx_tag, txm->map);
m_freem(txm->m);
txm->m = NULL;
}
- bus_dmamap_destroy(txq->tx_tag, txm->map);
+ bus_dmamap_destroy(tx_tag, txm->map);
}
- free(txq->maps, M_CXGBE);
- txq->maps = NULL;
+ free(txmaps->maps, M_CXGBE);
+ txmaps->maps = NULL;
}
/*
@@ -2060,7 +2637,7 @@ free_tx_maps(struct sge_txq *txq)
* of immediate data.
*/
#define IMM_LEN ( \
- 2 * TX_EQ_ESIZE \
+ 2 * EQ_ESIZE \
- sizeof(struct fw_eth_tx_pkt_wr) \
- sizeof(struct cpl_tx_pkt_core))
@@ -2078,6 +2655,7 @@ get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
int sgl_only)
{
struct mbuf *m = *fp;
+ struct tx_maps *txmaps;
struct tx_map *txm;
int rc, defragged = 0, n;
@@ -2091,11 +2669,12 @@ start: sgl->nsegs = 0;
if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
return (0); /* nsegs = 0 tells caller to use imm. tx */
- if (txq->map_avail == 0) {
+ txmaps = &txq->txmaps;
+ if (txmaps->map_avail == 0) {
txq->no_dmamap++;
return (ENOMEM);
}
- txm = &txq->maps[txq->map_pidx];
+ txm = &txmaps->maps[txmaps->map_pidx];
if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
*fp = m_pullup(m, 50);
@@ -2119,9 +2698,9 @@ start: sgl->nsegs = 0;
return (rc);
txm->m = m;
- txq->map_avail--;
- if (++txq->map_pidx == txq->map_total)
- txq->map_pidx = 0;
+ txmaps->map_avail--;
+ if (++txmaps->map_pidx == txmaps->map_total)
+ txmaps->map_pidx = 0;
KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
@@ -2145,6 +2724,7 @@ start: sgl->nsegs = 0;
static int
free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
{
+ struct tx_maps *txmaps;
struct tx_map *txm;
TXQ_LOCK_ASSERT_OWNED(txq);
@@ -2152,15 +2732,17 @@ free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
if (sgl->nsegs == 0)
return (0); /* didn't use any map */
+ txmaps = &txq->txmaps;
+
/* 1 pkt uses exactly 1 map, back it out */
- txq->map_avail++;
- if (txq->map_pidx > 0)
- txq->map_pidx--;
+ txmaps->map_avail++;
+ if (txmaps->map_pidx > 0)
+ txmaps->map_pidx--;
else
- txq->map_pidx = txq->map_total - 1;
+ txmaps->map_pidx = txmaps->map_total - 1;
- txm = &txq->maps[txq->map_pidx];
+ txm = &txmaps->maps[txmaps->map_pidx];
bus_dmamap_unload(txq->tx_tag, txm->map);
txm->m = NULL;
@@ -2206,11 +2788,14 @@ write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
/* Firmware work request header */
wr = (void *)&eq->desc[eq->pidx];
wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
- V_FW_WR_IMMDLEN(ctrl));
+ V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
- if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) {
- ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
- eq->flags |= EQ_CRFLUSHED;
+ if (eq->avail == ndesc) {
+ if (!(eq->flags & EQ_CRFLUSHED)) {
+ ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
+ eq->flags |= EQ_CRFLUSHED;
+ }
+ eq->flags |= EQ_STALLED;
}
wr->equiq_to_len16 = htobe32(ctrl);
@@ -2325,6 +2910,8 @@ add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
TXQ_LOCK_ASSERT_OWNED(txq);
+ KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
+
if (txpkts->npkt > 0) {
flits = TXPKTS_PKT_HDR + sgl->nflits;
can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
@@ -2397,12 +2984,14 @@ write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
ndesc = howmany(txpkts->nflits, 8);
wr = (void *)&eq->desc[eq->pidx];
- wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) |
- V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */
+ wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
- if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) {
- ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
- eq->flags |= EQ_CRFLUSHED;
+ if (eq->avail == ndesc) {
+ if (!(eq->flags & EQ_CRFLUSHED)) {
+ ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
+ eq->flags |= EQ_CRFLUSHED;
+ }
+ eq->flags |= EQ_STALLED;
}
wr->equiq_to_len16 = htobe32(ctrl);
wr->plen = htobe16(txpkts->plen);
@@ -2616,7 +3205,7 @@ reclaimable(struct sge_eq *eq)
unsigned int cidx;
cidx = eq->spg->cidx; /* stable snapshot */
- cidx = be16_to_cpu(cidx);
+ cidx = be16toh(cidx);
if (cidx >= eq->cidx)
return (cidx - eq->cidx);
@@ -2634,11 +3223,12 @@ static int
reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
{
struct tx_sdesc *txsd;
+ struct tx_maps *txmaps;
struct tx_map *txm;
unsigned int reclaimed, maps;
struct sge_eq *eq = &txq->eq;
- EQ_LOCK_ASSERT_OWNED(eq);
+ TXQ_LOCK_ASSERT_OWNED(txq);
if (can_reclaim == 0)
can_reclaim = reclaimable(eq);
@@ -2665,7 +3255,8 @@ reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
eq->cidx -= eq->cap;
}
- txm = &txq->maps[txq->map_cidx];
+ txmaps = &txq->txmaps;
+ txm = &txmaps->maps[txmaps->map_cidx];
if (maps)
prefetch(txm->m);
@@ -2673,16 +3264,16 @@ reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */
("%s: too many descriptors available", __func__));
- txq->map_avail += maps;
- KASSERT(txq->map_avail <= txq->map_total,
+ txmaps->map_avail += maps;
+ KASSERT(txmaps->map_avail <= txmaps->map_total,
("%s: too many maps available", __func__));
while (maps--) {
struct tx_map *next;
next = txm + 1;
- if (__predict_false(txq->map_cidx + 1 == txq->map_total))
- next = txq->maps;
+ if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
+ next = txmaps->maps;
prefetch(next->m);
bus_dmamap_unload(txq->tx_tag, txm->map);
@@ -2690,8 +3281,8 @@ reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
txm->m = NULL;
txm = next;
- if (__predict_false(++txq->map_cidx == txq->map_total))
- txq->map_cidx = 0;
+ if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
+ txmaps->map_cidx = 0;
}
return (reclaimed);
@@ -2704,6 +3295,7 @@ write_eqflush_wr(struct sge_eq *eq)
EQ_LOCK_ASSERT_OWNED(eq);
KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
+ KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
wr = (void *)&eq->desc[eq->pidx];
bzero(wr, sizeof(*wr));
@@ -2711,7 +3303,7 @@ write_eqflush_wr(struct sge_eq *eq)
wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
- eq->flags |= EQ_CRFLUSHED;
+ eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
eq->pending++;
eq->avail--;
if (++eq->pidx == eq->cap)
@@ -2743,118 +3335,73 @@ get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
}
static void
-set_fl_tag_idx(struct sge_fl *fl, int mtu)
+set_fl_tag_idx(struct sge_fl *fl, int bufsize)
{
int i;
- FL_LOCK_ASSERT_OWNED(fl);
-
for (i = 0; i < FL_BUF_SIZES - 1; i++) {
- if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT))
+ if (FL_BUF_SIZE(i) >= bufsize)
break;
}
fl->tag_idx = i;
}
-static int
-handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl)
-{
- unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
- struct sge *s = &sc->sge;
- struct sge_txq *txq;
- struct port_info *pi;
-
- txq = (void *)s->eqmap[qid - s->eq_start];
- TXQ_LOCK(txq);
- if (txq->eq.flags & EQ_CRFLUSHED) {
- pi = txq->ifp->if_softc;
- taskqueue_enqueue(pi->tq, &txq->resume_tx);
- txq->egr_update++;
- } else
- wakeup_one(txq); /* txq is going away, wakeup free_txq */
- TXQ_UNLOCK(txq);
-
- return (0);
-}
-
static void
-handle_cpl(struct adapter *sc, struct sge_iq *iq)
+add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
{
- const struct rss_header *rss = (const void *)iq->cdesc;
- const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
-
- switch (rss->opcode) {
- case CPL_FW4_MSG:
- case CPL_FW6_MSG:
- if (cpl->type == FW6_TYPE_CMD_RPL)
- t4_handle_fw_rpl(sc, cpl->data);
- break;
-
- case CPL_SGE_EGR_UPDATE:
- handle_sge_egr_update(sc, (const void *)cpl);
- break;
-
- case CPL_SET_TCB_RPL:
- filter_rpl(sc, (const void *)cpl);
- break;
-
- default:
- panic("%s: unexpected CPL opcode 0x%x", __func__, rss->opcode);
+ mtx_lock(&sc->sfl_lock);
+ FL_LOCK(fl);
+ if ((fl->flags & FL_DOOMED) == 0) {
+ fl->flags |= FL_STARVING;
+ TAILQ_INSERT_TAIL(&sc->sfl, fl, link);
+ callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc);
}
+ FL_UNLOCK(fl);
+ mtx_unlock(&sc->sfl_lock);
}
-/*
- * m0 is freed on successful transmission.
- */
static int
-ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0)
+handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
+ struct mbuf *m)
{
- struct sge_eq *eq = &ctrlq->eq;
- int rc = 0, ndesc;
- int can_reclaim;
- caddr_t dst;
- struct mbuf *m;
-
- M_ASSERTPKTHDR(m0);
+ const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1);
+ unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
+ struct adapter *sc = iq->adapter;
+ struct sge *s = &sc->sge;
+ struct sge_eq *eq;
- if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) {
- log(LOG_ERR, "%s: %s work request too long (%d)",
- device_get_nameunit(sc->dev), __func__, m0->m_pkthdr.len);
- return (EMSGSIZE);
- }
- ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE);
+ KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
+ rss->opcode));
+ eq = s->eqmap[qid - s->eq_start];
EQ_LOCK(eq);
+ KASSERT(eq->flags & EQ_CRFLUSHED,
+ ("%s: unsolicited egress update", __func__));
+ eq->flags &= ~EQ_CRFLUSHED;
+ eq->egr_update++;
+
+ if (__predict_false(eq->flags & EQ_DOOMED))
+ wakeup_one(eq);
+ else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
+ taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
+ EQ_UNLOCK(eq);
- can_reclaim = reclaimable(eq);
- eq->cidx += can_reclaim;
- eq->avail += can_reclaim;
- if (__predict_false(eq->cidx >= eq->cap))
- eq->cidx -= eq->cap;
-
- if (eq->avail < ndesc) {
- rc = EAGAIN;
- ctrlq->no_desc++;
- goto failed;
- }
+ return (0);
+}
- dst = (void *)&eq->desc[eq->pidx];
- for (m = m0; m; m = m->m_next)
- copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
+static int
+handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+{
+ const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
- eq->pidx += ndesc;
- if (__predict_false(eq->pidx >= eq->cap))
- eq->pidx -= eq->cap;
+ KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
+ rss->opcode));
- eq->pending += ndesc;
- ring_eq_db(sc, eq);
-failed:
- EQ_UNLOCK(eq);
- if (rc == 0)
- m_freem(m0);
+ if (cpl->type == FW6_TYPE_CMD_RPL)
+ t4_handle_fw_rpl(iq->adapter, cpl->data);
- return (rc);
+ return (0);
}
static int
diff --git a/sys/modules/cxgbe/Makefile b/sys/modules/cxgbe/Makefile
index 2e834461fd1c..1d69f761422e 100644
--- a/sys/modules/cxgbe/Makefile
+++ b/sys/modules/cxgbe/Makefile
@@ -3,5 +3,6 @@
#
SUBDIR = if_cxgbe
+SUBDIR+= firmware
.include <bsd.subdir.mk>
diff --git a/sys/modules/cxgbe/firmware/Makefile b/sys/modules/cxgbe/firmware/Makefile
new file mode 100644
index 000000000000..035de02c0376
--- /dev/null
+++ b/sys/modules/cxgbe/firmware/Makefile
@@ -0,0 +1,27 @@
+#
+# $FreeBSD$
+#
+
+T4FW = ${.CURDIR}/../../../dev/cxgbe/firmware
+.PATH: ${T4FW}
+
+KMOD = t4fw_cfg
+FIRMWS = ${KMOD}.txt:${KMOD}:1.0.0.0
+
+# You can have additional configuration files in the ${T4FW} directory.
+# t4fw_cfg_<name>.txt
+CFG_FILES != cd ${T4FW} && echo ${KMOD}_*.txt
+.for F in ${CFG_FILES}
+.if exists(${F})
+FIRMWS += ${F}:${F:C/.txt//}:1.0.0.0
+.endif
+.endfor
+
+# The firmware binary is optional.
+# t4fw-<a>.<b>.<c>.<d>.bin
+FW_BIN != cd ${T4FW} && echo t4fw-*.bin
+.if exists(${FW_BIN})
+FIRMWS += ${FW_BIN}:t4fw:${FW_BIN:C/t4fw-//:C/.bin//}
+.endif
+
+.include <bsd.kmod.mk>
diff --git a/tools/tools/cxgbetool/cxgbetool.c b/tools/tools/cxgbetool/cxgbetool.c
index da6bfba5d04f..06fa50bffc0b 100644
--- a/tools/tools/cxgbetool/cxgbetool.c
+++ b/tools/tools/cxgbetool/cxgbetool.c
@@ -396,12 +396,12 @@ do_show_info_header(uint32_t mode)
printf (" Port");
break;
- case T4_FILTER_OVLAN:
- printf (" vld:oVLAN");
+ case T4_FILTER_VNIC:
+ printf (" vld:VNIC");
break;
- case T4_FILTER_IVLAN:
- printf (" vld:iVLAN");
+ case T4_FILTER_VLAN:
+ printf (" vld:VLAN");
break;
case T4_FILTER_IP_TOS:
@@ -653,18 +653,18 @@ do_show_one_filter_info(struct t4_filter *t, uint32_t mode)
printf(" %1d/%1d", t->fs.val.iport, t->fs.mask.iport);
break;
- case T4_FILTER_OVLAN:
+ case T4_FILTER_VNIC:
printf(" %1d:%1x:%02x/%1d:%1x:%02x",
- t->fs.val.ovlan_vld, (t->fs.val.ovlan >> 7) & 0x7,
- t->fs.val.ovlan & 0x7f, t->fs.mask.ovlan_vld,
- (t->fs.mask.ovlan >> 7) & 0x7,
- t->fs.mask.ovlan & 0x7f);
+ t->fs.val.vnic_vld, (t->fs.val.vnic >> 7) & 0x7,
+ t->fs.val.vnic & 0x7f, t->fs.mask.vnic_vld,
+ (t->fs.mask.vnic >> 7) & 0x7,
+ t->fs.mask.vnic & 0x7f);
break;
- case T4_FILTER_IVLAN:
+ case T4_FILTER_VLAN:
printf(" %1d:%04x/%1d:%04x",
- t->fs.val.ivlan_vld, t->fs.val.ivlan,
- t->fs.mask.ivlan_vld, t->fs.mask.ivlan);
+ t->fs.val.vlan_vld, t->fs.val.vlan,
+ t->fs.mask.vlan_vld, t->fs.mask.vlan);
break;
case T4_FILTER_IP_TOS:
@@ -830,11 +830,11 @@ get_filter_mode(void)
if (mode & T4_FILTER_IP_TOS)
printf("tos ");
- if (mode & T4_FILTER_IVLAN)
- printf("ivlan ");
+ if (mode & T4_FILTER_VLAN)
+ printf("vlan ");
- if (mode & T4_FILTER_OVLAN)
- printf("ovlan ");
+ if (mode & T4_FILTER_VNIC)
+ printf("vnic ");
if (mode & T4_FILTER_PORT)
printf("iport ");
@@ -868,11 +868,12 @@ set_filter_mode(int argc, const char *argv[])
if (!strcmp(argv[0], "tos"))
mode |= T4_FILTER_IP_TOS;
- if (!strcmp(argv[0], "ivlan"))
- mode |= T4_FILTER_IVLAN;
+ if (!strcmp(argv[0], "vlan"))
+ mode |= T4_FILTER_VLAN;
- if (!strcmp(argv[0], "ovlan"))
- mode |= T4_FILTER_OVLAN;
+ if (!strcmp(argv[0], "ovlan") ||
+ !strcmp(argv[0], "vnic"))
+ mode |= T4_FILTER_VNIC;
if (!strcmp(argv[0], "iport"))
mode |= T4_FILTER_PORT;
@@ -936,15 +937,20 @@ set_filter(uint32_t idx, int argc, const char *argv[])
t.fs.val.iport = val;
t.fs.mask.iport = mask;
} else if (!parse_val_mask("ovlan", args, &val, &mask)) {
- t.fs.val.ovlan = val;
- t.fs.mask.ovlan = mask;
- t.fs.val.ovlan_vld = 1;
- t.fs.mask.ovlan_vld = 1;
- } else if (!parse_val_mask("ivlan", args, &val, &mask)) {
- t.fs.val.ivlan = val;
- t.fs.mask.ivlan = mask;
- t.fs.val.ivlan_vld = 1;
- t.fs.mask.ivlan_vld = 1;
+ t.fs.val.vnic = val;
+ t.fs.mask.vnic = mask;
+ t.fs.val.vnic_vld = 1;
+ t.fs.mask.vnic_vld = 1;
+ } else if (!parse_val_mask("vnic", args, &val, &mask)) {
+ t.fs.val.vnic = val;
+ t.fs.mask.vnic = mask;
+ t.fs.val.vnic_vld = 1;
+ t.fs.mask.vnic_vld = 1;
+ } else if (!parse_val_mask("vlan", args, &val, &mask)) {
+ t.fs.val.vlan = val;
+ t.fs.mask.vlan = mask;
+ t.fs.val.vlan_vld = 1;
+ t.fs.mask.vlan_vld = 1;
} else if (!parse_val_mask("tos", args, &val, &mask)) {
t.fs.val.tos = val;
t.fs.mask.tos = mask;