diff options
author | Navdeep Parhar <np@FreeBSD.org> | 2011-12-16 02:09:51 +0000 |
---|---|---|
committer | Navdeep Parhar <np@FreeBSD.org> | 2011-12-16 02:09:51 +0000 |
commit | 733b92779e406798c19bf5122279f5c3ecbbd64d (patch) | |
tree | 3f54f68c887ea6a17b5427746efe66dfa931b56d | |
parent | 22ea9f58f0c0b19ed8fdf8042f08aa3a243408e0 (diff) | |
download | src-733b92779e406798c19bf5122279f5c3ecbbd64d.tar.gz src-733b92779e406798c19bf5122279f5c3ecbbd64d.zip |
Many updates to cxgbe(4)
- Device configuration via plain text config file. Also able to operate
when not attached to the chip as the master driver.
- Generic "work request" queue that serves as the base for both ctrl and
ofld tx queues.
- Generic interrupt handler routine that can process any event on any
kind of ingress queue (via a dispatch table).
- A couple of new driver ioctls. cxgbetool can now install a firmware
to the card ("loadfw" command) and can read the card's memory
("memdump" and "tcb" commands).
- Lots of assorted information within dev.t4nex.X.misc.* This is
primarily for debugging and won't show up in sysctl -a.
- Code to manage the L2 tables on the chip.
- Updates to cxgbe(4) man page to go with the tunables that have changed.
- Updates to the shared code in common/
- Updates to the driver-firmware interface (now at fw 1.4.16.0)
MFC after: 1 month
Notes
Notes:
svn path=/head/; revision=228561
-rw-r--r-- | share/man/man4/cxgbe.4 | 78 | ||||
-rw-r--r-- | sys/dev/cxgbe/adapter.h | 231 | ||||
-rw-r--r-- | sys/dev/cxgbe/common/common.h | 35 | ||||
-rw-r--r-- | sys/dev/cxgbe/common/t4_hw.c | 365 | ||||
-rw-r--r-- | sys/dev/cxgbe/common/t4_hw.h | 78 | ||||
-rw-r--r-- | sys/dev/cxgbe/firmware/t4fw_cfg.txt | 132 | ||||
-rw-r--r-- | sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt | 503 | ||||
-rw-r--r-- | sys/dev/cxgbe/firmware/t4fw_interface.h (renamed from sys/dev/cxgbe/common/t4fw_interface.h) | 640 | ||||
-rw-r--r-- | sys/dev/cxgbe/offload.h | 71 | ||||
-rw-r--r-- | sys/dev/cxgbe/osdep.h | 1 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_ioctl.h | 27 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_l2t.c | 675 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_l2t.h | 12 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_main.c | 3023 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_sge.c | 2191 | ||||
-rw-r--r-- | sys/modules/cxgbe/Makefile | 1 | ||||
-rw-r--r-- | sys/modules/cxgbe/firmware/Makefile | 27 | ||||
-rw-r--r-- | tools/tools/cxgbetool/cxgbetool.c | 64 |
18 files changed, 6282 insertions, 1872 deletions
diff --git a/share/man/man4/cxgbe.4 b/share/man/man4/cxgbe.4 index ab2979a18b2b..4bf5ac3ec462 100644 --- a/share/man/man4/cxgbe.4 +++ b/share/man/man4/cxgbe.4 @@ -99,18 +99,29 @@ Tunables can be set at the prompt before booting the kernel or stored in .Xr loader.conf 5 . .Bl -tag -width indent -.It Va hw.cxgbe.max_ntxq_10G_port -The maximum number of tx queues to use for a 10Gb port. -The default value is 8. -.It Va hw.cxgbe.max_nrxq_10G_port -The maximum number of rx queues to use for a 10Gb port. -The default value is 8. -.It Va hw.cxgbe.max_ntxq_1G_port -The maximum number of tx queues to use for a 1Gb port. -The default value is 2. -.It Va hw.cxgbe.max_nrxq_1G_port -The maximum number of rx queues to use for a 1Gb port. -The default value is 2. +.It Va hw.cxgbe.ntxq10g +The number of tx queues to use for a 10Gb port. The default is 16 or the number +of CPU cores in the system, whichever is less. +.It Va hw.cxgbe.nrxq10g +The number of rx queues to use for a 10Gb port. The default is 8 or the number +of CPU cores in the system, whichever is less. +.It Va hw.cxgbe.ntxq1g +The number of tx queues to use for a 1Gb port. The default is 4 or the number +of CPU cores in the system, whichever is less. +.It Va hw.cxgbe.nrxq1g +The number of rx queues to use for a 1Gb port. The default is 2 or the number +of CPU cores in the system, whichever is less. +.It Va hw.cxgbe.nofldtxq10g +The number of TOE tx queues to use for a 10Gb port. The default is 8 or the +number of CPU cores in the system, whichever is less. +.It Va hw.cxgbe.nofldrxq10g +The number of TOE rx queues to use for a 10Gb port. The default is 2 or the +number of CPU cores in the system, whichever is less. +.It Va hw.cxgbe.nofldtxq1g +The number of TOE tx queues to use for a 1Gb port. The default is 2 or the +number of CPU cores in the system, whichever is less. +.It Va hw.cxgbe.nofldrxq1g +The number of TOE rx queues to use for a 1Gb port. The default is 1. .It Va hw.cxgbe.holdoff_timer_idx_10G .It Va hw.cxgbe.holdoff_timer_idx_1G The timer index value to use to delay interrupts. @@ -119,6 +130,8 @@ by default (all values are in microseconds) and the index selects a value from this list. The default value is 1 for both 10Gb and 1Gb ports, which means the timer value is 5us. +Different cxgbe interfaces can be assigned different values at any time via the +dev.cxgbe.X.holdoff_tmr_idx sysctl. .It Va hw.cxgbe.holdoff_pktc_idx_10G .It Va hw.cxgbe.holdoff_pktc_idx_1G The packet-count index value to use to delay interrupts. @@ -127,6 +140,11 @@ and the index selects a value from this list. The default value is 2 for both 10Gb and 1Gb ports, which means 16 packets (or the holdoff timer going off) before an interrupt is generated. +-1 disables packet counting. +Different cxgbe interfaces can be assigned different values via the +dev.cxgbe.X.holdoff_pktc_idx sysctl. +This sysctl works only when the interface has never been marked up (as done by +ifconfig up). .It Va hw.cxgbe.qsize_txq The size, in number of entries, of the descriptor ring used for a tx queue. @@ -134,10 +152,46 @@ A buf_ring of the same size is also allocated for additional software queuing. See .Xr ifnet 9 . The default value is 1024. +Different cxgbe interfaces can be assigned different values via the +dev.cxgbe.X.qsize_txq sysctl. +This sysctl works only when the interface has never been marked up (as done by +ifconfig up). .It Va hw.cxgbe.qsize_rxq The size, in number of entries, of the descriptor ring used for an rx queue. The default value is 1024. +Different cxgbe interfaces can be assigned different values via the +dev.cxgbe.X.qsize_rxq sysctl. +This sysctl works only when the interface has never been marked up (as done by +ifconfig up). +.It Va hw.cxgbe.interrupt_types +The interrupt types that the driver is allowed to use. +Bit 0 represents INTx (line interrupts), bit 1 MSI, bit 2 MSI-X. +The default is 7 (all allowed). +The driver will select the best possible type out of the allowed types by +itself. +.It Va hw.cxgbe.config_file +Select a pre-packaged device configuration file. +A configuration file contains a recipe for partitioning and configuring the +hardware resources on the card. +This tunable is for specialized applications only and should not be used in +normal operation. +The configuration profile currently in use is available in the dev.t4nex.X.cf +and dev.t4nex.X.cfcsum sysctls. +.It Va hw.cxgbe.linkcaps_allowed +.It Va hw.cxgbe.niccaps_allowed +.It Va hw.cxgbe.toecaps_allowed +.It Va hw.cxgbe.rdmacaps_allowed +.It Va hw.cxgbe.iscsicaps_allowed +.It Va hw.cxgbe.fcoecaps_allowed +Disallowing capabilities provides a hint to the driver and firmware to not +reserve hardware resources for that feature. +Each of these is a bit field with a bit for each sub-capability within the +capability. +This tunable is for specialized applications only and should not be used in +normal operation. +The capabilities for which hardware resources have been reserved are listed in +dev.t4nex.X.*caps sysctls. .El .Sh SUPPORT For general information and support, diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 8624fc19dfa0..7e56932c2a72 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -31,6 +31,7 @@ #ifndef __T4_ADAPTER_H__ #define __T4_ADAPTER_H__ +#include <sys/kernel.h> #include <sys/bus.h> #include <sys/rman.h> #include <sys/types.h> @@ -46,8 +47,9 @@ #include <netinet/tcp_lro.h> #include "offload.h" -#include "common/t4fw_interface.h" +#include "firmware/t4fw_interface.h" +#define T4_CFGNAME "t4fw_cfg" #define T4_FWNAME "t4fw" MALLOC_DECLARE(M_CXGBE); @@ -110,25 +112,21 @@ enum { FW_IQ_QSIZE = 256, FW_IQ_ESIZE = 64, /* At least 64 mandated by the firmware spec */ - INTR_IQ_QSIZE = 64, - INTR_IQ_ESIZE = 64, /* Handles some CPLs too, do not reduce */ - - CTRL_EQ_QSIZE = 128, - CTRL_EQ_ESIZE = 64, - RX_IQ_QSIZE = 1024, RX_IQ_ESIZE = 64, /* At least 64 so CPL_RX_PKT will fit */ - RX_FL_ESIZE = 64, /* 8 64bit addresses */ + EQ_ESIZE = 64, /* All egress queues use this entry size */ + RX_FL_ESIZE = EQ_ESIZE, /* 8 64bit addresses */ #if MJUMPAGESIZE != MCLBYTES FL_BUF_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */ #else FL_BUF_SIZES = 3, /* cluster, jumbo9k, jumbo16k */ #endif + CTRL_EQ_QSIZE = 128, + TX_EQ_QSIZE = 1024, - TX_EQ_ESIZE = 64, TX_SGL_SEGS = 36, TX_WR_FLITS = SGE_MAX_WR_LEN / 8 }; @@ -144,13 +142,16 @@ enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), FW_OK = (1 << 1), - INTR_SHARED = (1 << 2), /* one set of intrq's for all ports */ + INTR_DIRECT = (1 << 2), /* direct interrupts for everything */ + MASTER_PF = (1 << 3), + ADAP_SYSCTL_CTX = (1 << 4), CXGBE_BUSY = (1 << 9), /* port flags */ DOOMED = (1 << 0), - VI_ENABLED = (1 << 1), + PORT_INIT_DONE = (1 << 1), + PORT_SYSCTL_CTX = (1 << 2), }; #define IS_DOOMED(pi) (pi->flags & DOOMED) @@ -186,6 +187,12 @@ struct port_info { int first_txq; /* index of first tx queue */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ +#ifndef TCP_OFFLOAD_DISABLE + int nofldtxq; /* # of offload tx queues */ + int first_ofld_txq; /* index of first offload tx queue */ + int nofldrxq; /* # of offload rx queues */ + int first_ofld_rxq; /* index of first offload rx queue */ +#endif int tmr_idx; int pktc_idx; int qsize_rxq; @@ -194,11 +201,8 @@ struct port_info { struct link_config link_cfg; struct port_stats stats; - struct taskqueue *tq; struct callout tick; - struct sysctl_ctx_list ctx; /* lives from ifconfig up to down */ - struct sysctl_oid *oid_rxq; - struct sysctl_oid *oid_txq; + struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */ uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; @@ -222,17 +226,26 @@ struct tx_map { bus_dmamap_t map; }; +/* DMA maps used for tx */ +struct tx_maps { + struct tx_map *maps; + uint32_t map_total; /* # of DMA maps */ + uint32_t map_pidx; /* next map to be used */ + uint32_t map_cidx; /* reclaimed up to this index */ + uint32_t map_avail; /* # of available maps */ +}; + struct tx_sdesc { uint8_t desc_used; /* # of hardware descriptors used by the WR */ uint8_t credits; /* NIC txq: # of frames sent out in the WR */ }; -typedef void (iq_intr_handler_t)(void *); - enum { /* iq flags */ - IQ_ALLOCATED = (1 << 1), /* firmware resources allocated */ - IQ_STARTED = (1 << 2), /* started */ + IQ_ALLOCATED = (1 << 0), /* firmware resources allocated */ + IQ_HAS_FL = (1 << 1), /* iq associated with a freelist */ + IQ_INTR = (1 << 2), /* iq takes direct interrupt */ + IQ_LRO_ENABLED = (1 << 3), /* iq is an eth rxq with LRO enabled */ /* iq state */ IQS_DISABLED = 0, @@ -252,26 +265,35 @@ struct sge_iq { uint16_t abs_id; /* absolute SGE id for the iq */ int8_t intr_pktc_idx; /* packet count threshold index */ int8_t pad0; - iq_intr_handler_t *handler; __be64 *desc; /* KVA of descriptor ring */ - volatile uint32_t state; + volatile int state; struct adapter *adapter; const __be64 *cdesc; /* current descriptor */ uint8_t gen; /* generation bit */ uint8_t intr_params; /* interrupt holdoff parameters */ - uint8_t intr_next; /* holdoff for next interrupt */ + uint8_t intr_next; /* XXX: holdoff for next interrupt */ uint8_t esize; /* size (bytes) of each entry in the queue */ uint16_t qsize; /* size (# of entries) of the queue */ uint16_t cidx; /* consumer index */ - uint16_t cntxt_id; /* SGE context id for the iq */ + uint16_t cntxt_id; /* SGE context id for the iq */ + + STAILQ_ENTRY(sge_iq) link; }; enum { + EQ_CTRL = 1, + EQ_ETH = 2, +#ifndef TCP_OFFLOAD_DISABLE + EQ_OFLD = 3, +#endif + /* eq flags */ - EQ_ALLOCATED = (1 << 1), /* firmware resources allocated */ - EQ_STARTED = (1 << 2), /* started */ - EQ_CRFLUSHED = (1 << 3), /* expecting an update from SGE */ + EQ_TYPEMASK = 7, /* 3 lsbits hold the type */ + EQ_ALLOCATED = (1 << 3), /* firmware resources allocated */ + EQ_DOOMED = (1 << 4), /* about to be destroyed */ + EQ_CRFLUSHED = (1 << 5), /* expecting an update from SGE */ + EQ_STALLED = (1 << 6), /* out of hw descriptors or dmamaps */ }; /* @@ -281,10 +303,11 @@ enum { * consumes them) but it's special enough to have its own struct (see sge_fl). */ struct sge_eq { + unsigned int flags; /* MUST be first */ + unsigned int cntxt_id; /* SGE context id for the eq */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; char lockname[16]; - unsigned int flags; struct mtx eq_lock; struct tx_desc *desc; /* KVA of descriptor ring */ @@ -297,9 +320,24 @@ struct sge_eq { uint16_t pidx; /* producer idx (desc idx) */ uint16_t pending; /* # of descriptors used since last doorbell */ uint16_t iqid; /* iq that gets egr_update for the eq */ - unsigned int cntxt_id; /* SGE context id for the eq */ + uint8_t tx_chan; /* tx channel used by the eq */ + struct task tx_task; + struct callout tx_callout; + + /* stats */ + + uint32_t egr_update; /* # of SGE_EGR_UPDATE notifications for eq */ + uint32_t unstalled; /* recovered from stall */ +}; + +enum { + FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */ + FL_DOOMED = (1 << 1), /* about to be destroyed */ }; +#define FL_RUNNING_LOW(fl) (fl->cap - fl->needed <= fl->lowat) +#define FL_NOT_RUNNING_LOW(fl) (fl->cap - fl->needed >= 2 * fl->lowat) + struct sge_fl { bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; @@ -307,6 +345,7 @@ struct sge_fl { uint8_t tag_idx; struct mtx fl_lock; char lockname[16]; + int flags; __be64 *desc; /* KVA of descriptor ring, ptr to addresses */ bus_addr_t ba; /* bus address of descriptor ring */ @@ -317,8 +356,10 @@ struct sge_fl { uint32_t cidx; /* consumer idx (buffer idx, NOT hw desc idx) */ uint32_t pidx; /* producer idx (buffer idx, NOT hw desc idx) */ uint32_t needed; /* # of buffers needed to fill up fl. */ + uint32_t lowat; /* # of buffers <= this means fl needs help */ uint32_t pending; /* # of bufs allocated since last doorbell */ unsigned int dmamap_failed; + TAILQ_ENTRY(sge_fl) link; /* All starving freelists */ }; /* txq: SGE egress queue + what's needed for Ethernet NIC */ @@ -330,14 +371,8 @@ struct sge_txq { struct buf_ring *br; /* tx buffer ring */ struct tx_sdesc *sdesc; /* KVA of software descriptor ring */ struct mbuf *m; /* held up due to temporary resource shortage */ - struct task resume_tx; - /* DMA maps used for tx */ - struct tx_map *maps; - uint32_t map_total; /* # of DMA maps */ - uint32_t map_pidx; /* next map to be used */ - uint32_t map_cidx; /* reclaimed up to this index */ - uint32_t map_avail; /* # of available maps */ + struct tx_maps txmaps; /* stats for common events first */ @@ -354,20 +389,14 @@ struct sge_txq { uint32_t no_dmamap; /* no DMA map to load the mbuf */ uint32_t no_desc; /* out of hardware descriptors */ - uint32_t egr_update; /* # of SGE_EGR_UPDATE notifications for txq */ } __aligned(CACHE_LINE_SIZE); -enum { - RXQ_LRO_ENABLED = (1 << 0) -}; - /* rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_rxq { struct sge_iq iq; /* MUST be first */ - struct sge_fl fl; + struct sge_fl fl; /* MUST follow iq */ struct ifnet *ifp; /* the interface this rxq belongs to */ - unsigned int flags; #ifdef INET struct lro_ctrl lro; /* LRO state */ #endif @@ -381,12 +410,28 @@ struct sge_rxq { } __aligned(CACHE_LINE_SIZE); -/* ctrlq: SGE egress queue + stats for control queue */ -struct sge_ctrlq { +#ifndef TCP_OFFLOAD_DISABLE +/* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */ +struct sge_ofld_rxq { + struct sge_iq iq; /* MUST be first */ + struct sge_fl fl; /* MUST follow iq */ +} __aligned(CACHE_LINE_SIZE); +#endif + +/* + * wrq: SGE egress queue that is given prebuilt work requests. Both the control + * and offload tx queues are of this type. + */ +struct sge_wrq { struct sge_eq eq; /* MUST be first */ + struct adapter *adapter; + struct mbuf *head; /* held up due to lack of descriptors */ + struct mbuf *tail; /* valid only if head is valid */ + /* stats for common events first */ + uint64_t tx_wrs; /* # of tx work requests */ /* stats for not-that-common events */ @@ -394,20 +439,28 @@ struct sge_ctrlq { } __aligned(CACHE_LINE_SIZE); struct sge { - uint16_t timer_val[SGE_NTIMERS]; - uint8_t counter_val[SGE_NCOUNTERS]; + int timer_val[SGE_NTIMERS]; + int counter_val[SGE_NCOUNTERS]; int fl_starve_threshold; - int nrxq; /* total rx queues (all ports and the rest) */ - int ntxq; /* total tx queues (all ports and the rest) */ - int niq; /* total ingress queues */ - int neq; /* total egress queues */ + int nrxq; /* total # of Ethernet rx queues */ + int ntxq; /* total # of Ethernet tx tx queues */ +#ifndef TCP_OFFLOAD_DISABLE + int nofldrxq; /* total # of TOE rx queues */ + int nofldtxq; /* total # of TOE tx queues */ +#endif + int niq; /* total # of ingress queues */ + int neq; /* total # of egress queues */ struct sge_iq fwq; /* Firmware event queue */ - struct sge_ctrlq *ctrlq;/* Control queues */ - struct sge_iq *intrq; /* Interrupt queues */ + struct sge_wrq mgmtq; /* Management queue (control queue) */ + struct sge_wrq *ctrlq; /* Control queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ +#ifndef TCP_OFFLOAD_DISABLE + struct sge_wrq *ofld_txq; /* TOE tx queues */ + struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */ +#endif uint16_t iq_start; int eq_start; @@ -415,7 +468,12 @@ struct sge { struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ }; +struct rss_header; +typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *, + struct mbuf *); + struct adapter { + SLIST_ENTRY(adapter) link; device_t dev; struct cdev *cdev; @@ -444,27 +502,47 @@ struct adapter { struct sge sge; + struct taskqueue *tq[NCHAN]; /* taskqueues that flush data out */ struct port_info *port[MAX_NPORTS]; uint8_t chan_map[NCHAN]; + uint32_t filter_mode; +#ifndef TCP_OFFLOAD_DISABLE + struct uld_softc tom; + struct tom_tunables tt; +#endif struct l2t_data *l2t; /* L2 table */ struct tid_info tids; - int registered_device_map; int open_device_map; +#ifndef TCP_OFFLOAD_DISABLE + int offload_map; +#endif int flags; char fw_version[32]; + unsigned int cfcsum; struct adapter_params params; struct t4_virt_res vres; - struct sysctl_ctx_list ctx; /* from first_port_up to last_port_down */ - struct sysctl_oid *oid_fwq; - struct sysctl_oid *oid_ctrlq; - struct sysctl_oid *oid_intrq; + uint16_t linkcaps; + uint16_t niccaps; + uint16_t toecaps; + uint16_t rdmacaps; + uint16_t iscsicaps; + uint16_t fcoecaps; + + struct sysctl_ctx_list ctx; /* from adapter_full_init to full_uninit */ struct mtx sc_lock; char lockname[16]; + + /* Starving free lists */ + struct mtx sfl_lock; /* same cache-line as sc_lock? but that's ok */ + TAILQ_HEAD(, sge_fl) sfl; + struct callout sfl_callout; + + cpl_handler_t cpl_handler[256] __aligned(CACHE_LINE_SIZE); }; #define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock) @@ -506,11 +584,15 @@ struct adapter { #define for_each_rxq(pi, iter, rxq) \ rxq = &pi->adapter->sge.rxq[pi->first_rxq]; \ for (iter = 0; iter < pi->nrxq; ++iter, ++rxq) +#define for_each_ofld_txq(pi, iter, ofld_txq) \ + ofld_txq = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq]; \ + for (iter = 0; iter < pi->nofldtxq; ++iter, ++ofld_txq) +#define for_each_ofld_rxq(pi, iter, ofld_rxq) \ + ofld_rxq = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq]; \ + for (iter = 0; iter < pi->nofldrxq; ++iter, ++ofld_rxq) /* One for errors, one for firmware events */ #define T4_EXTRA_INTR 2 -#define NINTRQ(sc) ((sc)->intr_count > T4_EXTRA_INTR ? \ - (sc)->intr_count - T4_EXTRA_INTR : 1) static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) @@ -589,29 +671,52 @@ static inline bool is_10G_port(const struct port_info *pi) return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0); } +static inline int tx_resume_threshold(struct sge_eq *eq) +{ + return (eq->qsize / 4); +} + /* t4_main.c */ -void cxgbe_txq_start(void *, int); +void t4_tx_task(void *, int); +void t4_tx_callout(void *); int t4_os_find_pci_capability(struct adapter *, int); int t4_os_pci_save_state(struct adapter *); int t4_os_pci_restore_state(struct adapter *); void t4_os_portmod_changed(const struct adapter *, int); void t4_os_link_changed(struct adapter *, int, int); +void t4_iterate(void (*)(struct adapter *, void *), void *); +int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t); /* t4_sge.c */ void t4_sge_modload(void); -void t4_sge_init(struct adapter *); +int t4_sge_init(struct adapter *); int t4_create_dma_tag(struct adapter *); int t4_destroy_dma_tag(struct adapter *); int t4_setup_adapter_queues(struct adapter *); int t4_teardown_adapter_queues(struct adapter *); -int t4_setup_eth_queues(struct port_info *); -int t4_teardown_eth_queues(struct port_info *); +int t4_setup_port_queues(struct port_info *); +int t4_teardown_port_queues(struct port_info *); +int t4_alloc_tx_maps(struct tx_maps *, bus_dma_tag_t, int, int); +void t4_free_tx_maps(struct tx_maps *, bus_dma_tag_t); void t4_intr_all(void *); void t4_intr(void *); void t4_intr_err(void *); void t4_intr_evt(void *); int t4_mgmt_tx(struct adapter *, struct mbuf *); +int t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct mbuf *); int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *); void t4_update_fl_bufsize(struct ifnet *); +int can_resume_tx(struct sge_eq *); + +static inline int t4_wrq_tx(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m) +{ + int rc; + + TXQ_LOCK(wrq); + rc = t4_wrq_tx_locked(sc, wrq, m); + TXQ_UNLOCK(wrq); + return (rc); +} + #endif diff --git a/sys/dev/cxgbe/common/common.h b/sys/dev/cxgbe/common/common.h index 913be9bd6e27..81866df633ed 100644 --- a/sys/dev/cxgbe/common/common.h +++ b/sys/dev/cxgbe/common/common.h @@ -42,6 +42,15 @@ enum { enum { MEM_EDC0, MEM_EDC1, MEM_MC }; +enum { + MEMWIN0_APERTURE = 2048, + MEMWIN0_BASE = 0x1b800, + MEMWIN1_APERTURE = 32768, + MEMWIN1_BASE = 0x28000, + MEMWIN2_APERTURE = 65536, + MEMWIN2_BASE = 0x30000, +}; + enum dev_master { MASTER_CANT, MASTER_MAY, MASTER_MUST }; enum dev_state { DEV_STATE_UNINIT, DEV_STATE_INIT, DEV_STATE_ERR }; @@ -53,8 +62,8 @@ enum { }; #define FW_VERSION_MAJOR 1 -#define FW_VERSION_MINOR 3 -#define FW_VERSION_MICRO 10 +#define FW_VERSION_MINOR 4 +#define FW_VERSION_MICRO 16 struct port_stats { u64 tx_octets; /* total # of octets in good frames */ @@ -190,7 +199,6 @@ struct tp_proxy_stats { struct tp_cpl_stats { u32 req[4]; u32 rsp[4]; - u32 tx_err[4]; }; struct tp_rdma_stats { @@ -214,9 +222,9 @@ struct vpd_params { }; struct pci_params { - unsigned int vpd_cap_addr; - unsigned char speed; - unsigned char width; + unsigned int vpd_cap_addr; + unsigned short speed; + unsigned short width; }; /* @@ -239,20 +247,20 @@ struct adapter_params { unsigned int fw_vers; unsigned int tp_vers; - u8 api_vers[7]; unsigned short mtus[NMTUS]; unsigned short a_wnd[NCCTRL_WIN]; unsigned short b_wnd[NCCTRL_WIN]; - unsigned int mc_size; /* MC memory size */ - unsigned int nfilters; /* size of filter region */ + unsigned int mc_size; /* MC memory size */ + unsigned int nfilters; /* size of filter region */ unsigned int cim_la_size; - unsigned int nports; /* # of ethernet ports */ + /* Used as int in sysctls, do not reduce size */ + unsigned int nports; /* # of ethernet ports */ unsigned int portvec; - unsigned int rev; /* chip revision */ + unsigned int rev; /* chip revision */ unsigned int offload; unsigned int ofldq_wr_cred; @@ -366,6 +374,9 @@ int t4_seeprom_wp(struct adapter *adapter, int enable); int t4_read_flash(struct adapter *adapter, unsigned int addr, unsigned int nwords, u32 *data, int byte_oriented); int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size); +int t4_load_boot(struct adapter *adap, const u8 *boot_data, + unsigned int boot_addr, unsigned int size); +unsigned int t4_flash_cfg_addr(struct adapter *adapter); int t4_load_cfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size); int t4_get_fw_version(struct adapter *adapter, u32 *vers); int t4_get_tp_version(struct adapter *adapter, u32 *vers); @@ -460,8 +471,8 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map, int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox, enum dev_master master, enum dev_state *state); int t4_fw_bye(struct adapter *adap, unsigned int mbox); -int t4_early_init(struct adapter *adap, unsigned int mbox); int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset); +int t4_fw_initialize(struct adapter *adap, unsigned int mbox); int t4_query_params(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int nparams, const u32 *params, u32 *val); diff --git a/sys/dev/cxgbe/common/t4_hw.c b/sys/dev/cxgbe/common/t4_hw.c index dd8d0fc2705f..48462aa68664 100644 --- a/sys/dev/cxgbe/common/t4_hw.c +++ b/sys/dev/cxgbe/common/t4_hw.c @@ -30,10 +30,10 @@ __FBSDID("$FreeBSD$"); #include "common.h" #include "t4_regs.h" #include "t4_regs_values.h" -#include "t4fw_interface.h" +#include "firmware/t4fw_interface.h" #undef msleep -#define msleep(x) DELAY((x) * 1000) +#define msleep(x) pause("t4hw", (x) * hz / 1000) /** * t4_wait_op_done_val - wait until an operation is completed @@ -187,7 +187,7 @@ int t4_wr_mbox_meat(struct adapter *adap, int mbox, const void *cmd, int size, * off to larger delays to a maximum retry delay. */ static const int delay[] = { - 1, 1, 3, 5, 10, 10, 20, 50, 100, 200 + 1, 1, 3, 5, 10, 10, 20, 50, 100 }; u32 v; @@ -625,17 +625,6 @@ enum { SF_RD_DATA_FAST = 0xb, /* read flash */ SF_RD_ID = 0x9f, /* read ID */ SF_ERASE_SECTOR = 0xd8, /* erase sector */ - - FW_START_SEC = 8, /* first flash sector for FW */ - FW_END_SEC = 15, /* last flash sector for FW */ - FW_IMG_START = FW_START_SEC * SF_SEC_SIZE, - FW_MAX_SIZE = (FW_END_SEC - FW_START_SEC + 1) * SF_SEC_SIZE, - - FLASH_CFG_MAX_SIZE = 0x10000 , /* max size of the flash config file */ - FLASH_CFG_OFFSET = 0x1f0000, - FLASH_CFG_START_SEC = FLASH_CFG_OFFSET / SF_SEC_SIZE, - FPGA_FLASH_CFG_OFFSET = 0xf0000 , /* if FPGA mode, then cfg file is at 1MB - 64KB */ - FPGA_FLASH_CFG_START_SEC = FPGA_FLASH_CFG_OFFSET / SF_SEC_SIZE, }; /** @@ -763,12 +752,15 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr, * @addr: the start address to write * @n: length of data to write in bytes * @data: the data to write + * @byte_oriented: whether to store data as bytes or as words * * Writes up to a page of data (256 bytes) to the serial flash starting * at the given address. All the data must be written to the same page. + * If @byte_oriented is set the write data is stored as byte stream + * (i.e. matches what on disk), otherwise in big-endian. */ static int t4_write_flash(struct adapter *adapter, unsigned int addr, - unsigned int n, const u8 *data) + unsigned int n, const u8 *data, int byte_oriented) { int ret; u32 buf[SF_PAGE_SIZE / 4]; @@ -788,6 +780,9 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr, for (val = 0, i = 0; i < c; ++i) val = (val << 8) + *data++; + if (!byte_oriented) + val = htonl(val); + ret = sf1_write(adapter, c, c != left, 1, val); if (ret) goto unlock; @@ -799,7 +794,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr, t4_write_reg(adapter, A_SF_OP, 0); /* unlock SF */ /* Read the page to verify the write succeeded */ - ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1); + ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, + byte_oriented); if (ret) return ret; @@ -825,7 +821,7 @@ unlock: int t4_get_fw_version(struct adapter *adapter, u32 *vers) { return t4_read_flash(adapter, - FW_IMG_START + offsetof(struct fw_hdr, fw_ver), 1, + FLASH_FW_START + offsetof(struct fw_hdr, fw_ver), 1, vers, 0); } @@ -838,7 +834,7 @@ int t4_get_fw_version(struct adapter *adapter, u32 *vers) */ int t4_get_tp_version(struct adapter *adapter, u32 *vers) { - return t4_read_flash(adapter, FW_IMG_START + offsetof(struct fw_hdr, + return t4_read_flash(adapter, FLASH_FW_START + offsetof(struct fw_hdr, tp_microcode_ver), 1, vers, 0); } @@ -854,24 +850,17 @@ int t4_get_tp_version(struct adapter *adapter, u32 *vers) */ int t4_check_fw_version(struct adapter *adapter) { - u32 api_vers[2]; int ret, major, minor, micro; ret = t4_get_fw_version(adapter, &adapter->params.fw_vers); if (!ret) ret = t4_get_tp_version(adapter, &adapter->params.tp_vers); - if (!ret) - ret = t4_read_flash(adapter, - FW_IMG_START + offsetof(struct fw_hdr, intfver_nic), - 2, api_vers, 1); if (ret) return ret; major = G_FW_HDR_FW_VER_MAJOR(adapter->params.fw_vers); minor = G_FW_HDR_FW_VER_MINOR(adapter->params.fw_vers); micro = G_FW_HDR_FW_VER_MICRO(adapter->params.fw_vers); - memcpy(adapter->params.api_vers, api_vers, - sizeof(adapter->params.api_vers)); if (major != FW_VERSION_MAJOR) { /* major mismatch - fail */ CH_ERR(adapter, "card FW has major version %u, driver wants " @@ -914,6 +903,21 @@ static int t4_flash_erase_sectors(struct adapter *adapter, int start, int end) } /** + * t4_flash_cfg_addr - return the address of the flash configuration file + * @adapter: the adapter + * + * Return the address within the flash where the Firmware Configuration + * File is stored. + */ +unsigned int t4_flash_cfg_addr(struct adapter *adapter) +{ + if (adapter->params.sf_size == 0x100000) + return FLASH_FPGA_CFG_START; + else + return FLASH_CFG_START; +} + +/** * t4_load_cfg - download config file * @adap: the adapter * @cfg_data: the cfg text file to write @@ -928,17 +932,8 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) unsigned int flash_cfg_start_sec; unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; - if (adap->params.sf_size == 0x100000) { - addr = FPGA_FLASH_CFG_OFFSET; - flash_cfg_start_sec = FPGA_FLASH_CFG_START_SEC; - } else { - addr = FLASH_CFG_OFFSET; - flash_cfg_start_sec = FLASH_CFG_START_SEC; - } - if (!size) { - CH_ERR(adap, "cfg file has no data\n"); - return -EINVAL; - } + addr = t4_flash_cfg_addr(adap); + flash_cfg_start_sec = addr / SF_SEC_SIZE; if (size > FLASH_CFG_MAX_SIZE) { CH_ERR(adap, "cfg file too large, max is %u bytes\n", @@ -950,7 +945,11 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) sf_sec_size); ret = t4_flash_erase_sectors(adap, flash_cfg_start_sec, flash_cfg_start_sec + i - 1); - if (ret) + /* + * If size == 0 then we're simply erasing the FLASH sectors associated + * with the on-adapter Firmware Configuration File. + */ + if (ret || size == 0) goto out; /* this will write to the flash up to SF_PAGE_SIZE at a time */ @@ -959,7 +958,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) n = size - i; else n = SF_PAGE_SIZE; - ret = t4_write_flash(adap, addr, n, cfg_data); + ret = t4_write_flash(adap, addr, n, cfg_data, 1); if (ret) goto out; @@ -969,7 +968,8 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) out: if (ret) - CH_ERR(adap, "config file download failed %d\n", ret); + CH_ERR(adap, "config file %s failed %d\n", + (size == 0 ? "clear" : "download"), ret); return ret; } @@ -1004,9 +1004,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) CH_ERR(adap, "FW image size differs from size in FW header\n"); return -EINVAL; } - if (size > FW_MAX_SIZE) { + if (size > FLASH_FW_MAX_SIZE) { CH_ERR(adap, "FW image too large, max is %u bytes\n", - FW_MAX_SIZE); + FLASH_FW_MAX_SIZE); return -EFBIG; } @@ -1020,7 +1020,8 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) } i = DIV_ROUND_UP(size, sf_sec_size); /* # of sectors spanned */ - ret = t4_flash_erase_sectors(adap, FW_START_SEC, FW_START_SEC + i - 1); + ret = t4_flash_erase_sectors(adap, FLASH_FW_START_SEC, + FLASH_FW_START_SEC + i - 1); if (ret) goto out; @@ -1031,28 +1032,110 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) */ memcpy(first_page, fw_data, SF_PAGE_SIZE); ((struct fw_hdr *)first_page)->fw_ver = htonl(0xffffffff); - ret = t4_write_flash(adap, FW_IMG_START, SF_PAGE_SIZE, first_page); + ret = t4_write_flash(adap, FLASH_FW_START, SF_PAGE_SIZE, first_page, 1); if (ret) goto out; - addr = FW_IMG_START; + addr = FLASH_FW_START; for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; fw_data += SF_PAGE_SIZE; - ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data); + ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, 1); if (ret) goto out; } ret = t4_write_flash(adap, - FW_IMG_START + offsetof(struct fw_hdr, fw_ver), - sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); + FLASH_FW_START + offsetof(struct fw_hdr, fw_ver), + sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver, 1); out: if (ret) CH_ERR(adap, "firmware download failed, error %d\n", ret); return ret; } +/* BIOS boot header */ +typedef struct boot_header_s { + u8 signature[2]; /* signature */ + u8 length; /* image length (include header) */ + u8 offset[4]; /* initialization vector */ + u8 reserved[19]; /* reserved */ + u8 exheader[2]; /* offset to expansion header */ +} boot_header_t; + +enum { + BOOT_FLASH_BOOT_ADDR = 0x0,/* start address of boot image in flash */ + BOOT_SIGNATURE = 0xaa55, /* signature of BIOS boot ROM */ + BOOT_SIZE_INC = 512, /* image size measured in 512B chunks */ + BOOT_MIN_SIZE = sizeof(boot_header_t), /* at least basic header */ + BOOT_MAX_SIZE = 1024*BOOT_SIZE_INC /* 1 byte * length increment */ +}; + +/* + * t4_load_boot - download boot flash + * @adapter: the adapter + * @boot_data: the boot image to write + * @size: image size + * + * Write the supplied boot image to the card's serial flash. + * The boot image has the following sections: a 28-byte header and the + * boot image. + */ +int t4_load_boot(struct adapter *adap, const u8 *boot_data, + unsigned int boot_addr, unsigned int size) +{ + int ret, addr; + unsigned int i; + unsigned int boot_sector = boot_addr * 1024; + unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; + + /* + * Perform some primitive sanity testing to avoid accidentally + * writing garbage over the boot sectors. We ought to check for + * more but it's not worth it for now ... + */ + if (size < BOOT_MIN_SIZE || size > BOOT_MAX_SIZE) { + CH_ERR(adap, "boot image too small/large\n"); + return -EFBIG; + } + + /* + * Make sure the boot image does not encroach on the firmware region + */ + if ((boot_sector + size) >> 16 > FLASH_FW_START_SEC) { + CH_ERR(adap, "boot image encroaching on firmware region\n"); + return -EFBIG; + } + + i = DIV_ROUND_UP(size, sf_sec_size); /* # of sectors spanned */ + ret = t4_flash_erase_sectors(adap, boot_sector >> 16, + (boot_sector >> 16) + i - 1); + if (ret) + goto out; + + /* + * Skip over the first SF_PAGE_SIZE worth of data and write it after + * we finish copying the rest of the boot image. This will ensure + * that the BIOS boot header will only be written if the boot image + * was written in full. + */ + addr = boot_sector; + for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { + addr += SF_PAGE_SIZE; + boot_data += SF_PAGE_SIZE; + ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data, 0); + if (ret) + goto out; + } + + ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE, boot_data, 0); + +out: + if (ret) + CH_ERR(adap, "boot image download failed, error %d\n", ret); + return ret; +} + /** * t4_read_cimq_cfg - read CIM queue configuration * @adap: the adapter @@ -1668,7 +1751,10 @@ static void sge_intr_handler(struct adapter *adapter) err = t4_read_reg(adapter, A_SGE_ERROR_STATS); if (err & F_ERROR_QID_VALID) { CH_ERR(adapter, "SGE error for queue %u\n", G_ERROR_QID(err)); - t4_write_reg(adapter, A_SGE_ERROR_STATS, F_ERROR_QID_VALID); + if (err & F_UNCAPTURED_ERROR) + CH_ERR(adapter, "SGE UNCAPTURED_ERROR set (clearing)\n"); + t4_write_reg(adapter, A_SGE_ERROR_STATS, F_ERROR_QID_VALID | + F_UNCAPTURED_ERROR); } if (v != 0) @@ -2261,6 +2347,7 @@ int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, */ while (n > 0) { int nq = min(n, 32); + int nq_packed = 0; __be32 *qp = &cmd.iq0_to_iq2; /* @@ -2282,25 +2369,28 @@ int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid, * Ingress Queue ID array and insert them into the command. */ while (nq > 0) { - unsigned int v; /* * Grab up to the next 3 Ingress Queue IDs (wrapping * around the Ingress Queue ID array if necessary) and * insert them into the firmware RSS command at the * current 3-tuple position within the commad. */ - v = V_FW_RSS_IND_TBL_CMD_IQ0(*rsp); - if (++rsp >= rsp_end) - rsp = rspq; - v |= V_FW_RSS_IND_TBL_CMD_IQ1(*rsp); - if (++rsp >= rsp_end) - rsp = rspq; - v |= V_FW_RSS_IND_TBL_CMD_IQ2(*rsp); - if (++rsp >= rsp_end) - rsp = rspq; - - *qp++ = htonl(v); - nq -= 3; + u16 qbuf[3]; + u16 *qbp = qbuf; + int nqbuf = min(3, nq); + + nq -= nqbuf; + qbuf[0] = qbuf[1] = qbuf[2] = 0; + while (nqbuf && nq_packed < 32) { + nqbuf--; + nq_packed++; + *qbp++ = *rsp++; + if (rsp >= rsp_end) + rsp = rspq; + } + *qp++ = cpu_to_be32(V_FW_RSS_IND_TBL_CMD_IQ0(qbuf[0]) | + V_FW_RSS_IND_TBL_CMD_IQ1(qbuf[1]) | + V_FW_RSS_IND_TBL_CMD_IQ2(qbuf[2])); } /* @@ -2694,8 +2784,6 @@ void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st) { t4_read_indirect(adap, A_TP_MIB_INDEX, A_TP_MIB_DATA, st->req, 8, A_TP_MIB_CPL_IN_REQ_0); - t4_read_indirect(adap, A_TP_MIB_INDEX, A_TP_MIB_DATA, st->tx_err, - 4, A_TP_MIB_CPL_OUT_ERR_0); } /** @@ -3298,6 +3386,7 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) t4_read_reg64(adap, PORT_REG(idx, A_MPS_PORT_STAT_##name##_L)) #define GET_STAT_COM(name) t4_read_reg64(adap, A_MPS_STAT_##name##_L) + p->tx_pause = GET_STAT(TX_PORT_PAUSE); p->tx_octets = GET_STAT(TX_PORT_BYTES); p->tx_frames = GET_STAT(TX_PORT_FRAMES); p->tx_bcast_frames = GET_STAT(TX_PORT_BCAST); @@ -3312,7 +3401,6 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) p->tx_frames_1024_1518 = GET_STAT(TX_PORT_1024B_1518B); p->tx_frames_1519_max = GET_STAT(TX_PORT_1519B_MAX); p->tx_drop = GET_STAT(TX_PORT_DROP); - p->tx_pause = GET_STAT(TX_PORT_PAUSE); p->tx_ppp0 = GET_STAT(TX_PORT_PPP0); p->tx_ppp1 = GET_STAT(TX_PORT_PPP1); p->tx_ppp2 = GET_STAT(TX_PORT_PPP2); @@ -3322,6 +3410,7 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) p->tx_ppp6 = GET_STAT(TX_PORT_PPP6); p->tx_ppp7 = GET_STAT(TX_PORT_PPP7); + p->rx_pause = GET_STAT(RX_PORT_PAUSE); p->rx_octets = GET_STAT(RX_PORT_BYTES); p->rx_frames = GET_STAT(RX_PORT_FRAMES); p->rx_bcast_frames = GET_STAT(RX_PORT_BCAST); @@ -3340,7 +3429,6 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p) p->rx_frames_512_1023 = GET_STAT(RX_PORT_512B_1023B); p->rx_frames_1024_1518 = GET_STAT(RX_PORT_1024B_1518B); p->rx_frames_1519_max = GET_STAT(RX_PORT_1519B_MAX); - p->rx_pause = GET_STAT(RX_PORT_PAUSE); p->rx_ppp0 = GET_STAT(RX_PORT_PPP0); p->rx_ppp1 = GET_STAT(RX_PORT_PPP1); p->rx_ppp2 = GET_STAT(RX_PORT_PPP2); @@ -3683,28 +3771,114 @@ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox, { int ret; struct fw_hello_cmd c; + u32 v; + unsigned int master_mbox; + int retries = FW_CMD_HELLO_RETRIES; +retry: memset(&c, 0, sizeof(c)); INIT_CMD(c, HELLO, WRITE); - c.err_to_mbasyncnot = htonl( + c.err_to_clearinit = htonl( V_FW_HELLO_CMD_MASTERDIS(master == MASTER_CANT) | V_FW_HELLO_CMD_MASTERFORCE(master == MASTER_MUST) | V_FW_HELLO_CMD_MBMASTER(master == MASTER_MUST ? mbox : M_FW_HELLO_CMD_MBMASTER) | - V_FW_HELLO_CMD_MBASYNCNOT(evt_mbox)); + V_FW_HELLO_CMD_MBASYNCNOT(evt_mbox) | + V_FW_HELLO_CMD_STAGE(FW_HELLO_CMD_STAGE_OS) | + F_FW_HELLO_CMD_CLEARINIT); + /* + * Issue the HELLO command to the firmware. If it's not successful + * but indicates that we got a "busy" or "timeout" condition, retry + * the HELLO until we exhaust our retry limit. + */ ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); - if (ret == 0 && state) { - u32 v = ntohl(c.err_to_mbasyncnot); - if (v & F_FW_HELLO_CMD_INIT) - *state = DEV_STATE_INIT; - else if (v & F_FW_HELLO_CMD_ERR) + if (ret != FW_SUCCESS) { + if ((ret == -EBUSY || ret == -ETIMEDOUT) && retries-- > 0) + goto retry; + return ret; + } + + v = ntohl(c.err_to_clearinit); + master_mbox = G_FW_HELLO_CMD_MBMASTER(v); + if (state) { + if (v & F_FW_HELLO_CMD_ERR) *state = DEV_STATE_ERR; + else if (v & F_FW_HELLO_CMD_INIT) + *state = DEV_STATE_INIT; else *state = DEV_STATE_UNINIT; - return G_FW_HELLO_CMD_MBMASTER(v); } - return ret; + + /* + * If we're not the Master PF then we need to wait around for the + * Master PF Driver to finish setting up the adapter. + * + * Note that we also do this wait if we're a non-Master-capable PF and + * there is no current Master PF; a Master PF may show up momentarily + * and we wouldn't want to fail pointlessly. (This can happen when an + * OS loads lots of different drivers rapidly at the same time). In + * this case, the Master PF returned by the firmware will be + * M_PCIE_FW_MASTER so the test below will work ... + */ + if ((v & (F_FW_HELLO_CMD_ERR|F_FW_HELLO_CMD_INIT)) == 0 && + master_mbox != mbox) { + int waiting = FW_CMD_HELLO_TIMEOUT; + + /* + * Wait for the firmware to either indicate an error or + * initialized state. If we see either of these we bail out + * and report the issue to the caller. If we exhaust the + * "hello timeout" and we haven't exhausted our retries, try + * again. Otherwise bail with a timeout error. + */ + for (;;) { + u32 pcie_fw; + + msleep(50); + waiting -= 50; + + /* + * If neither Error nor Initialialized are indicated + * by the firmware keep waiting till we exhaust our + * timeout ... and then retry if we haven't exhausted + * our retries ... + */ + pcie_fw = t4_read_reg(adap, A_PCIE_FW); + if (!(pcie_fw & (F_PCIE_FW_ERR|F_PCIE_FW_INIT))) { + if (waiting <= 0) { + if (retries-- > 0) + goto retry; + + return -ETIMEDOUT; + } + continue; + } + + /* + * We either have an Error or Initialized condition + * report errors preferentially. + */ + if (state) { + if (pcie_fw & F_PCIE_FW_ERR) + *state = DEV_STATE_ERR; + else if (pcie_fw & F_PCIE_FW_INIT) + *state = DEV_STATE_INIT; + } + + /* + * If we arrived before a Master PF was selected and + * there's not a valid Master PF, grab its identity + * for our caller. + */ + if (master_mbox == M_PCIE_FW_MASTER && + (pcie_fw & F_PCIE_FW_MASTER_VLD)) + master_mbox = G_PCIE_FW_MASTER(pcie_fw); + break; + } + } + + return master_mbox; } /** @@ -3724,37 +3898,37 @@ int t4_fw_bye(struct adapter *adap, unsigned int mbox) } /** - * t4_init_cmd - ask FW to initialize the device + * t4_fw_reset - issue a reset to FW * @adap: the adapter * @mbox: mailbox to use for the FW command + * @reset: specifies the type of reset to perform * - * Issues a command to FW to partially initialize the device. This - * performs initialization that generally doesn't depend on user input. + * Issues a reset command of the specified type to FW. */ -int t4_early_init(struct adapter *adap, unsigned int mbox) +int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset) { - struct fw_initialize_cmd c; + struct fw_reset_cmd c; memset(&c, 0, sizeof(c)); - INIT_CMD(c, INITIALIZE, WRITE); + INIT_CMD(c, RESET, WRITE); + c.val = htonl(reset); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } /** - * t4_fw_reset - issue a reset to FW + * t4_fw_initialize - ask FW to initialize the device * @adap: the adapter * @mbox: mailbox to use for the FW command - * @reset: specifies the type of reset to perform * - * Issues a reset command of the specified type to FW. + * Issues a command to FW to partially initialize the device. This + * performs initialization that generally doesn't depend on user input. */ -int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset) +int t4_fw_initialize(struct adapter *adap, unsigned int mbox) { - struct fw_reset_cmd c; + struct fw_initialize_cmd c; memset(&c, 0, sizeof(c)); - INIT_CMD(c, RESET, WRITE); - c.val = htonl(reset); + INIT_CMD(c, INITIALIZE, WRITE); return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); } @@ -4495,6 +4669,21 @@ static int __devinit get_flash_params(struct adapter *adapter) return 0; } +static void __devinit set_pcie_completion_timeout(struct adapter *adapter, + u8 range) +{ + u16 val; + u32 pcie_cap; + + pcie_cap = t4_os_find_pci_capability(adapter, PCI_CAP_ID_EXP); + if (pcie_cap) { + t4_os_pci_read_cfg2(adapter, pcie_cap + PCI_EXP_DEVCTL2, &val); + val &= 0xfff0; + val |= range ; + t4_os_pci_write_cfg2(adapter, pcie_cap + PCI_EXP_DEVCTL2, val); + } +} + /** * t4_prep_adapter - prepare SW and HW for operation * @adapter: the adapter @@ -4541,6 +4730,8 @@ int __devinit t4_prep_adapter(struct adapter *adapter) adapter->params.portvec = 1; adapter->params.vpd.cclk = 50000; + /* Set pci completion timeout value to 4 seconds. */ + set_pcie_completion_timeout(adapter, 0xd); return 0; } diff --git a/sys/dev/cxgbe/common/t4_hw.h b/sys/dev/cxgbe/common/t4_hw.h index fd48aabaa249..b93734e8de39 100644 --- a/sys/dev/cxgbe/common/t4_hw.h +++ b/sys/dev/cxgbe/common/t4_hw.h @@ -182,4 +182,82 @@ struct pagepod { #define M_PPOD_OFST 0xFFFFFFFF #define V_PPOD_OFST(x) ((x) << S_PPOD_OFST) +/* + * Flash layout. + */ +#define FLASH_START(start) ((start) * SF_SEC_SIZE) +#define FLASH_MAX_SIZE(nsecs) ((nsecs) * SF_SEC_SIZE) + +enum { + /* + * Various Expansion-ROM boot images, etc. + */ + FLASH_EXP_ROM_START_SEC = 0, + FLASH_EXP_ROM_NSECS = 6, + FLASH_EXP_ROM_START = FLASH_START(FLASH_EXP_ROM_START_SEC), + FLASH_EXP_ROM_MAX_SIZE = FLASH_MAX_SIZE(FLASH_EXP_ROM_NSECS), + + /* + * iSCSI Boot Firmware Table (iBFT) and other driver-related + * parameters ... + */ + FLASH_IBFT_START_SEC = 6, + FLASH_IBFT_NSECS = 1, + FLASH_IBFT_START = FLASH_START(FLASH_IBFT_START_SEC), + FLASH_IBFT_MAX_SIZE = FLASH_MAX_SIZE(FLASH_IBFT_NSECS), + + /* + * Boot configuration data. + */ + FLASH_BOOTCFG_START_SEC = 7, + FLASH_BOOTCFG_NSECS = 1, + FLASH_BOOTCFG_START = FLASH_START(FLASH_BOOTCFG_START_SEC), + FLASH_BOOTCFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_BOOTCFG_NSECS), + + /* + * Location of firmware image in FLASH. + */ + FLASH_FW_START_SEC = 8, + FLASH_FW_NSECS = 8, + FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC), + FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS), + + /* + * iSCSI persistent/crash information. + */ + FLASH_ISCSI_CRASH_START_SEC = 29, + FLASH_ISCSI_CRASH_NSECS = 1, + FLASH_ISCSI_CRASH_START = FLASH_START(FLASH_ISCSI_CRASH_START_SEC), + FLASH_ISCSI_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_ISCSI_CRASH_NSECS), + + /* + * FCoE persistent/crash information. + */ + FLASH_FCOE_CRASH_START_SEC = 30, + FLASH_FCOE_CRASH_NSECS = 1, + FLASH_FCOE_CRASH_START = FLASH_START(FLASH_FCOE_CRASH_START_SEC), + FLASH_FCOE_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FCOE_CRASH_NSECS), + + /* + * Location of Firmware Configuration File in FLASH. Since the FPGA + * "FLASH" is smaller we need to store the Configuration File in a + * different location -- which will overlap the end of the firmware + * image if firmware ever gets that large ... + */ + FLASH_CFG_START_SEC = 31, + FLASH_CFG_NSECS = 1, + FLASH_CFG_START = FLASH_START(FLASH_CFG_START_SEC), + FLASH_CFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_CFG_NSECS), + + FLASH_FPGA_CFG_START_SEC = 15, + FLASH_FPGA_CFG_START = FLASH_START(FLASH_FPGA_CFG_START_SEC), + + /* + * Sectors 32-63 are reserved for FLASH failover. + */ +}; + +#undef FLASH_START +#undef FLASH_MAX_SIZE + #endif /* __T4_HW_H */ diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg.txt b/sys/dev/cxgbe/firmware/t4fw_cfg.txt new file mode 100644 index 000000000000..65d6f0bf011a --- /dev/null +++ b/sys/dev/cxgbe/firmware/t4fw_cfg.txt @@ -0,0 +1,132 @@ +# Firmware configuration file. +# +# Global limits (some are hardware limits, others are due to the firmware). +# Also note that the firmware reserves some of these resources for its own use +# so it's not always possible for the drivers to grab everything listed here. +# nvi = 128 virtual interfaces +# niqflint = 1023 ingress queues with freelists and/or interrupts +# nethctrl = 64K Ethernet or ctrl egress queues +# neq = 64K egress queues of all kinds, including freelists +# nexactf = 336 MPS TCAM entries, can oversubscribe. +# + +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen, hashtoeplitz, tnlalllkp + + sge_timer_value = 1, 5, 10, 50, 100, 200 # usecs + + # TP_SHIFT_CNT + reg[0x7dc0] = 0x64f8849 + + filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe + + # TP rx and tx payload memory (% of the total EDRAM + DDR3). + tp_pmrx = 40 + tp_pmtx = 60 + tp_pmrx_pagesize = 64K + tp_pmtx_pagesize = 64K + +# PFs 0-3. These get 8 MSI/8 MSI-X vectors each. VFs are supported by +# these 4 PFs only. Not used here at all. +[function "0"] + nvf = 16 + nvi = 1 +[function "0/*"] + nvi = 1 + +[function "1"] + nvf = 16 + nvi = 1 +[function "1/*"] + nvi = 1 + +[function "2"] + nvf = 16 + nvi = 1 +[function "2/*"] + nvi = 1 + +[function "3"] + nvf = 16 + nvi = 1 +[function "3/*"] + nvi = 1 + +# PF4 is the resource-rich PF that the bus/nexus driver attaches to. +# It gets 32 MSI/128 MSI-X vectors. +[function "4"] + wx_caps = all + r_caps = all + nvi = 48 + niqflint = 256 + nethctrl = 128 + neq = 256 + nexactf = 300 + cmask = all + pmask = all + + # driver will mask off features it won't use + protocol = ofld + + tp_l2t = 100 + + # TCAM has 8K cells; each region must start at a multiple of 128 cell. + # Each entry in these categories takes 4 cells each. nhash will use the + # TCAM iff there is room left (that is, the rest don't add up to 2048). + nroute = 32 + nclip = 0 # needed only for IPv6 offload + nfilter = 1504 + nserver = 512 + nhash = 16384 + +# PF5 is the SCSI Controller PF. It gets 32 MSI/40 MSI-X vectors. +# Not used right now. +[function "5"] + nvi = 1 + +# PF6 is the FCoE Controller PF. It gets 32 MSI/40 MSI-X vectors. +# Not used right now. +[function "6"] + nvi = 1 + +# MPS has 192K buffer space for ingress packets from the wire as well as +# loopback path of the L2 switch. +[port "0"] + dcb = none + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[port "1"] + dcb = none + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[port "2"] + dcb = none + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[port "3"] + dcb = none + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[fini] + version = 0x1 + checksum = 0xb31cdfac +# +# $FreeBSD$ +# diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt b/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt new file mode 100644 index 000000000000..134d60c4d27d --- /dev/null +++ b/sys/dev/cxgbe/firmware/t4fw_cfg_uwire.txt @@ -0,0 +1,503 @@ +# Chelsio T4 Factory Default configuration file. +# +# Copyright (C) 2010 Chelsio Communications. All rights reserved. +# + +# This file provides the default, power-on configuration for 4-port T4-based +# adapters shipped from the factory. These defaults are designed to address +# the needs of the vast majority of T4 customers. The basic idea is to have +# a default configuration which allows a customer to plug a T4 adapter in and +# have it work regardless of OS, driver or application except in the most +# unusual and/or demanding customer applications. +# +# Many of the T4 resources which are described by this configuration are +# finite. This requires balancing the configuration/operation needs of +# device drivers across OSes and a large number of customer application. +# +# Some of the more important resources to allocate and their constaints are: +# 1. Virtual Interfaces: 128. +# 2. Ingress Queues with Free Lists: 1024. PCI-E SR-IOV Virtual Functions +# must use a power of 2 Ingress Queues. +# 3. Egress Queues: 128K. PCI-E SR-IOV Virtual Functions must use a +# power of 2 Egress Queues. +# 4. MSI-X Vectors: 1088. A complication here is that the PCI-E SR-IOV +# Virtual Functions based off of a Physical Function all get the +# same umber of MSI-X Vectors as the base Physical Function. +# Additionally, regardless of whether Virtual Functions are enabled or +# not, their MSI-X "needs" are counted by the PCI-E implementation. +# And finally, all Physical Funcations capable of supporting Virtual +# Functions (PF0-3) must have the same number of configured TotalVFs in +# their SR-IOV Capabilities. +# 5. Multi-Port Support (MPS) TCAM: 336 entries to support MAC destination +# address matching on Ingress Packets. +# +# Some of the important OS/Driver resource needs are: +# 6. Some OS Drivers will manage all resources through a single Physical +# Function (currently PF0 but it could be any Physical Function). Thus, +# this "Unified PF" will need to have enough resources allocated to it +# to allow for this. And because of the MSI-X resource allocation +# constraints mentioned above, this probably means we'll either have to +# severely limit the TotalVFs if we continue to use PF0 as the Unified PF +# or we'll need to move the Unified PF into the PF4-7 range since those +# Physical Functions don't have any Virtual Functions associated with +# them. +# 7. Some OS Drivers will manage different ports and functions (NIC, +# storage, etc.) on different Physical Functions. For example, NIC +# functions for ports 0-3 on PF0-3, FCoE on PF4, iSCSI on PF5, etc. +# +# Some of the customer application needs which need to be accommodated: +# 8. Some customers will want to support large CPU count systems with +# good scaling. Thus, we'll need to accommodate a number of +# Ingress Queues and MSI-X Vectors to allow up to some number of CPUs +# to be involved per port and per application function. For example, +# in the case where all ports and application functions will be +# managed via a single Unified PF and we want to accommodate scaling up +# to 8 CPUs, we would want: +# +# 4 ports * +# 3 application functions (NIC, FCoE, iSCSI) per port * +# 8 Ingress Queue/MSI-X Vectors per application function +# +# for a total of 96 Ingress Queues and MSI-X Vectors on the Unified PF. +# (Plus a few for Firmware Event Queues, etc.) +# +# 9. Some customers will want to use T4's PCI-E SR-IOV Capability to allow +# Virtual Machines to directly access T4 functionality via SR-IOV +# Virtual Functions and "PCI Device Passthrough" -- this is especially +# true for the NIC application functionality. (Note that there is +# currently no ability to use the TOE, FCoE, iSCSI, etc. via Virtual +# Functions so this is in fact solely limited to NIC.) +# + + +# Global configuration settings. +# +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp + + # The following Scatter Gather Engine (SGE) settings assume a 4KB Host + # Page Size and a 64B L1 Cache Line Size. It programs the + # EgrStatusPageSize and IngPadBoundary to 64B and the PktShift to 2. + # If a Master PF Driver finds itself on a machine with different + # parameters, then the Master PF Driver is responsible for initializing + # these parameters to appropriate values. + # + # Notes: + # 1. The Free List Buffer Sizes below are raw and the firmware will + # round them up to the Ingress Padding Boundary. + # 2. The SGE Timer Values below are expressed below in microseconds. + # The firmware will convert these values to Core Clock Ticks when + # it processes the configuration parameters. + # + reg[0x1008] = 0x40810/0x21c70 # SGE_CONTROL + reg[0x100c] = 0x22222222 # SGE_HOST_PAGE_SIZE + reg[0x10a0] = 0x01040810 # SGE_INGRESS_RX_THRESHOLD + reg[0x1044] = 4096 # SGE_FL_BUFFER_SIZE0 + reg[0x1048] = 65536 # SGE_FL_BUFFER_SIZE1 + reg[0x104c] = 1536 # SGE_FL_BUFFER_SIZE2 + reg[0x1050] = 9024 # SGE_FL_BUFFER_SIZE3 + reg[0x1054] = 9216 # SGE_FL_BUFFER_SIZE4 + reg[0x1058] = 2048 # SGE_FL_BUFFER_SIZE5 + reg[0x105c] = 128 # SGE_FL_BUFFER_SIZE6 + reg[0x1060] = 8192 # SGE_FL_BUFFER_SIZE7 + reg[0x1064] = 16384 # SGE_FL_BUFFER_SIZE8 + reg[0x10a4] = 0xa000a000/0xf000f000 # SGE_DBFIFO_STATUS + reg[0x10a8] = 0x2000/0x2000 # SGE_DOORBELL_CONTROL + sge_timer_value = 5, 10, 20, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + + reg[0x7dc0] = 0x64f8849 # TP_SHIFT_CNT + + # Selection of tuples for LE filter lookup, fields (and widths which + # must sum to <= 36): { IP Fragment (1), MPS Match Type (3), + # IP Protocol (8), [Inner] VLAN (17), Port (3), FCoE (1) } + # + filterMode = fragmentation, mpshittype, protocol, vnic_id, port, fcoe + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP RX payload + tp_pmrx = 30 + + # TP RX payload page size + tp_pmrx_pagesize = 64K + + # Percentage of dynamic memory (in either the EDRAM or external MEM) + # to use for TP TX payload + tp_pmtx = 50 + + # TP TX payload page size + tp_pmtx_pagesize = 64K + +# Some "definitions" to make the rest of this a bit more readable. We support +# 4 ports, 3 functions (NIC, FCoE and iSCSI), scaling up to 8 "CPU Queue Sets" +# per function per port ... +# +# NMSIX = 1088 # available MSI-X Vectors +# NVI = 128 # available Virtual Interfaces +# NMPSTCAM = 336 # MPS TCAM entries +# +# NPORTS = 4 # ports +# NCPUS = 8 # CPUs we want to support scalably +# NFUNCS = 3 # functions per port (NIC, FCoE, iSCSI) + +# Breakdown of Virtual Interface/Queue/Interrupt resources for the "Unified +# PF" which many OS Drivers will use to manage most or all functions. +# +# Each Ingress Queue can use one MSI-X interrupt but some Ingress Queues can +# use Forwarded Interrupt Ingress Queues. For these latter, an Ingress Queue +# would be created and the Queue ID of a Forwarded Interrupt Ingress Queue +# will be specified as the "Ingress Queue Asynchronous Destination Index." +# Thus, the number of MSI-X Vectors assigned to the Unified PF will be less +# than or equal to the number of Ingress Queues ... +# +# NVI_NIC = 4 # NIC access to NPORTS +# NFLIQ_NIC = 32 # NIC Ingress Queues with Free Lists +# NETHCTRL_NIC = 32 # NIC Ethernet Control/TX Queues +# NEQ_NIC = 64 # NIC Egress Queues (FL, ETHCTRL/TX) +# NMPSTCAM_NIC = 16 # NIC MPS TCAM Entries (NPORTS*4) +# NMSIX_NIC = 32 # NIC MSI-X Interrupt Vectors (FLIQ) +# +# NVI_OFLD = 0 # Offload uses NIC function to access ports +# NFLIQ_OFLD = 16 # Offload Ingress Queues with Free Lists +# NETHCTRL_OFLD = 0 # Offload Ethernet Control/TX Queues +# NEQ_OFLD = 16 # Offload Egress Queues (FL) +# NMPSTCAM_OFLD = 0 # Offload MPS TCAM Entries (uses NIC's) +# NMSIX_OFLD = 16 # Offload MSI-X Interrupt Vectors (FLIQ) +# +# NVI_RDMA = 0 # RDMA uses NIC function to access ports +# NFLIQ_RDMA = 4 # RDMA Ingress Queues with Free Lists +# NETHCTRL_RDMA = 0 # RDMA Ethernet Control/TX Queues +# NEQ_RDMA = 4 # RDMA Egress Queues (FL) +# NMPSTCAM_RDMA = 0 # RDMA MPS TCAM Entries (uses NIC's) +# NMSIX_RDMA = 4 # RDMA MSI-X Interrupt Vectors (FLIQ) +# +# NEQ_WD = 128 # Wire Direct TX Queues and FLs +# NETHCTRL_WD = 64 # Wire Direct TX Queues +# NFLIQ_WD = 64 ` # Wire Direct Ingress Queues with Free Lists +# +# NVI_ISCSI = 4 # ISCSI access to NPORTS +# NFLIQ_ISCSI = 4 # ISCSI Ingress Queues with Free Lists +# NETHCTRL_ISCSI = 0 # ISCSI Ethernet Control/TX Queues +# NEQ_ISCSI = 4 # ISCSI Egress Queues (FL) +# NMPSTCAM_ISCSI = 4 # ISCSI MPS TCAM Entries (NPORTS) +# NMSIX_ISCSI = 4 # ISCSI MSI-X Interrupt Vectors (FLIQ) +# +# NVI_FCOE = 4 # FCOE access to NPORTS +# NFLIQ_FCOE = 34 # FCOE Ingress Queues with Free Lists +# NETHCTRL_FCOE = 32 # FCOE Ethernet Control/TX Queues +# NEQ_FCOE = 66 # FCOE Egress Queues (FL) +# NMPSTCAM_FCOE = 32 # FCOE MPS TCAM Entries (NPORTS) +# NMSIX_FCOE = 34 # FCOE MSI-X Interrupt Vectors (FLIQ) + +# Two extra Ingress Queues per function for Firmware Events and Forwarded +# Interrupts, and two extra interrupts per function for Firmware Events (or a +# Forwarded Interrupt Queue) and General Interrupts per function. +# +# NFLIQ_EXTRA = 6 # "extra" Ingress Queues 2*NFUNCS (Firmware and +# # Forwarded Interrupts +# NMSIX_EXTRA = 6 # extra interrupts 2*NFUNCS (Firmware and +# # General Interrupts + +# Microsoft HyperV resources. The HyperV Virtual Ingress Queues will have +# their interrupts forwarded to another set of Forwarded Interrupt Queues. +# +# NVI_HYPERV = 16 # VMs we want to support +# NVIIQ_HYPERV = 2 # Virtual Ingress Queues with Free Lists per VM +# NFLIQ_HYPERV = 40 # VIQs + NCPUS Forwarded Interrupt Queues +# NEQ_HYPERV = 32 # VIQs Free Lists +# NMPSTCAM_HYPERV = 16 # MPS TCAM Entries (NVI_HYPERV) +# NMSIX_HYPERV = 8 # NCPUS Forwarded Interrupt Queues + +# Adding all of the above Unified PF resource needs together: (NIC + OFLD + +# RDMA + ISCSI + FCOE + EXTRA + HYPERV) +# +# NVI_UNIFIED = 28 +# NFLIQ_UNIFIED = 106 +# NETHCTRL_UNIFIED = 32 +# NEQ_UNIFIED = 124 +# NMPSTCAM_UNIFIED = 40 +# +# The sum of all the MSI-X resources above is 74 MSI-X Vectors but we'll round +# that up to 128 to make sure the Unified PF doesn't run out of resources. +# +# NMSIX_UNIFIED = 128 +# +# The Storage PFs could need up to NPORTS*NCPUS + NMSIX_EXTRA MSI-X Vectors +# which is 34 but they're probably safe with 32. +# +# NMSIX_STORAGE = 32 + +# Note: The UnifiedPF is PF4 which doesn't have any Virtual Functions +# associated with it. Thus, the MSI-X Vector allocations we give to the +# UnifiedPF aren't inherited by any Virtual Functions. As a result we can +# provision many more Virtual Functions than we can if the UnifiedPF were +# one of PF0-3. +# + +# All of the below PCI-E parameters are actually stored in various *_init.txt +# files. We include them below essentially as comments. +# +# For PF0-3 we assign 8 vectors each for NIC Ingress Queues of the associated +# ports 0-3. +# +# For PF4, the Unified PF, we give it an MSI-X Table Size as outlined above. +# +# For PF5-6 we assign enough MSI-X Vectors to support FCoE and iSCSI +# storage applications across all four possible ports. +# +# Additionally, since the UnifiedPF isn't one of the per-port Physical +# Functions, we give the UnifiedPF and the PF0-3 Physical Functions +# different PCI Device IDs which will allow Unified and Per-Port Drivers +# to directly select the type of Physical Function to which they wish to be +# attached. +# +# Note that the actual values used for the PCI-E Intelectual Property will be +# 1 less than those below since that's the way it "counts" things. For +# readability, we use the number we actually mean ... +# +# PF0_INT = 8 # NCPUS +# PF1_INT = 8 # NCPUS +# PF2_INT = 8 # NCPUS +# PF3_INT = 8 # NCPUS +# PF0_3_INT = 32 # PF0_INT + PF1_INT + PF2_INT + PF3_INT +# +# PF4_INT = 128 # NMSIX_UNIFIED +# PF5_INT = 32 # NMSIX_STORAGE +# PF6_INT = 32 # NMSIX_STORAGE +# PF7_INT = 0 # Nothing Assigned +# PF4_7_INT = 192 # PF4_INT + PF5_INT + PF6_INT + PF7_INT +# +# PF0_7_INT = 224 # PF0_3_INT + PF4_7_INT +# +# With the above we can get 17 VFs/PF0-3 (limited by 336 MPS TCAM entries) +# but we'll lower that to 16 to make our total 64 and a nice power of 2 ... +# +# NVF = 16 + +# For those OSes which manage different ports on different PFs, we need +# only enough resources to support a single port's NIC application functions +# on PF0-3. The below assumes that we're only doing NIC with NCPUS "Queue +# Sets" for ports 0-3. The FCoE and iSCSI functions for such OSes will be +# managed on the "storage PFs" (see below). +# +[function "0"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x1 # access to only one port + +[function "1"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x2 # access to only one port + +[function "2"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x4 # access to only one port + +[function "3"] + nvf = 16 # NVF on this function + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 1 # 1 port + niqflint = 8 # NCPUS "Queue Sets" + nethctrl = 8 # NCPUS "Queue Sets" + neq = 16 # niqflint + nethctrl Egress Queues + nexactf = 8 # number of exact MPSTCAM MAC filters + cmask = all # access to all channels + pmask = 0x8 # access to only one port + +# Some OS Drivers manage all application functions for all ports via PF4. +# Thus we need to provide a large number of resources here. For Egress +# Queues we need to account for both TX Queues as well as Free List Queues +# (because the host is responsible for producing Free List Buffers for the +# hardware to consume). +# +[function "4"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 28 # NVI_UNIFIED + niqflint = 170 # NFLIQ_UNIFIED + NLFIQ_WD + nethctrl = 96 # NETHCTRL_UNIFIED + NETHCTRL_WD + neq = 252 # NEQ_UNIFIED + NEQ_WD + nexactf = 40 # NMPSTCAM_UNIFIED + cmask = all # access to all channels + pmask = all # access to all four ports ... + nroute = 32 # number of routing region entries + nclip = 32 # number of clip region entries + nfilter = 768 # number of filter region entries + nserver = 256 # number of server region entries + nhash = 0 # number of hash region entries + protocol = nic_vm, ofld, rddp, rdmac, iscsi_initiator_pdu, iscsi_target_pdu + tp_l2t = 100 + tp_ddp = 2 + tp_ddp_iscsi = 2 + tp_stag = 2 + tp_pbl = 5 + tp_rq = 7 + +# We have FCoE and iSCSI storage functions on PF5 and PF6 each of which may +# need to have Virtual Interfaces on each of the four ports with up to NCPUS +# "Queue Sets" each. +# +[function "5"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NPORTS + niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA + nethctrl = 32 # NPORTS*NCPUS + neq = 64 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + nexactf = 4 # NPORTS + cmask = all # access to all channels + pmask = all # access to all four ports ... + +[function "6"] + wx_caps = all # write/execute permissions for all commands + r_caps = all # read permissions for all commands + nvi = 4 # NPORTS + niqflint = 34 # NPORTS*NCPUS + NMSIX_EXTRA + nethctrl = 32 # NPORTS*NCPUS + neq = 66 # NPORTS*NCPUS * 2 (FL, ETHCTRL/TX) + 2 (EXTRA) + nexactf = 32 # NPORTS + adding 28 exact entries for FCoE + # which is OK since < MIN(SUM PF0..3, PF4) + # and we never load PF0..3 and PF4 concurrently + cmask = all # access to all channels + pmask = all # access to all four ports ... + nhash = 0 + protocol = fcoe_initiator + tp_ddp = 2 + fcoe_nfcf = 16 + fcoe_nvnp = 32 + fcoe_nssn = 1024 + +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 4 # 2 "Queue Sets" + NXIQ + nethctrl = 2 # 2 "Queue Sets" + neq = 4 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x1 # access to only one port ... + +[function "1/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 4 # 2 "Queue Sets" + NXIQ + nethctrl = 2 # 2 "Queue Sets" + neq = 4 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x2 # access to only one port ... + +[function "2/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 4 # 2 "Queue Sets" + NXIQ + nethctrl = 2 # 2 "Queue Sets" + neq = 4 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x4 # access to only one port ... + +[function "3/*"] # NVF + wx_caps = 0x82 # DMAQ | VF + r_caps = 0x86 # DMAQ | VF | PORT + nvi = 1 # 1 port + niqflint = 4 # 2 "Queue Sets" + NXIQ + nethctrl = 2 # 2 "Queue Sets" + neq = 4 # 2 "Queue Sets" * 2 + nexactf = 4 + cmask = all # access to all channels + pmask = 0x8 # access to only one port ... + +# MPS features a 196608 bytes ingress buffer that is used for ingress buffering +# for packets from the wire as well as the loopback path of the L2 switch. The +# folling params control how the buffer memory is distributed and the L2 flow +# control settings: +# +# bg_mem: %-age of mem to use for port/buffer group +# lpbk_mem: %-age of port/bg mem to use for loopback +# hwm: high watermark; bytes available when starting to send pause +# frames (in units of 0.1 MTU) +# lwm: low watermark; bytes remaining when sending 'unpause' frame +# (in inuits of 0.1 MTU) +# dwm: minimum delta between high and low watermark (in units of 100 +# Bytes) +# +[port "0"] + dcb = ppp, dcbx # configure for DCB PPP and enable DCBX offload + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[port "1"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[port "2"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[port "3"] + dcb = ppp, dcbx + bg_mem = 25 + lpbk_mem = 25 + hwm = 30 + lwm = 15 + dwm = 30 + +[fini] + version = 0x14250007 + checksum = 0xfcbadefb + +# Total resources used by above allocations: +# Virtual Interfaces: 104 +# Ingress Queues/w Free Lists and Interrupts: 526 +# Egress Queues: 702 +# MPS TCAM Entries: 336 +# MSI-X Vectors: 736 +# Virtual Functions: 64 +# +# $FreeBSD$ +# diff --git a/sys/dev/cxgbe/common/t4fw_interface.h b/sys/dev/cxgbe/firmware/t4fw_interface.h index 3eb961500ead..6d259a5d260e 100644 --- a/sys/dev/cxgbe/common/t4fw_interface.h +++ b/sys/dev/cxgbe/firmware/t4fw_interface.h @@ -37,16 +37,23 @@ enum fw_retval { FW_SUCCESS = 0, /* completed sucessfully */ FW_EPERM = 1, /* operation not permitted */ + FW_ENOENT = 2, /* no such file or directory */ FW_EIO = 5, /* input/output error; hw bad */ - FW_ENOEXEC = 8, /* Exec format error; inv microcode */ + FW_ENOEXEC = 8, /* exec format error; inv microcode */ FW_EAGAIN = 11, /* try again */ FW_ENOMEM = 12, /* out of memory */ FW_EFAULT = 14, /* bad address; fw bad */ FW_EBUSY = 16, /* resource busy */ - FW_EEXIST = 17, /* File exists */ + FW_EEXIST = 17, /* file exists */ FW_EINVAL = 22, /* invalid argument */ + FW_ENOSPC = 28, /* no space left on device */ FW_ENOSYS = 38, /* functionality not implemented */ FW_EPROTO = 71, /* protocol error */ + FW_EADDRINUSE = 98, /* address already in use */ + FW_EADDRNOTAVAIL = 99, /* cannot assigned requested address */ + FW_ENETDOWN = 100, /* network is down */ + FW_ENETUNREACH = 101, /* network is unreachable */ + FW_ENOBUFS = 105, /* no buffer space available */ FW_ETIMEDOUT = 110, /* timeout */ FW_EINPROGRESS = 115, /* fw internal */ FW_SCSI_ABORT_REQUESTED = 128, /* */ @@ -62,6 +69,8 @@ enum fw_retval { FW_ERR_RDEV_IMPL_LOGO = 138, /* */ FW_SCSI_UNDER_FLOW_ERR = 139, /* */ FW_SCSI_OVER_FLOW_ERR = 140, /* */ + FW_SCSI_DDP_ERR = 141, /* DDP error*/ + FW_SCSI_TASK_ERR = 142, /* No SCSI tasks available */ }; /****************************************************************************** @@ -89,7 +98,7 @@ enum fw_wr_opcodes { FW_RI_INV_LSTAG_WR = 0x1a, FW_RI_WR = 0x0d, FW_ISCSI_NODE_WR = 0x4a, - FW_LASTC2E_WR = 0x4b + FW_LASTC2E_WR = 0x50 }; /* @@ -512,8 +521,14 @@ struct fw_eth_tx_pkt_wr { __be64 r3; }; +#define S_FW_ETH_TX_PKT_WR_IMMDLEN 0 +#define M_FW_ETH_TX_PKT_WR_IMMDLEN 0x1ff +#define V_FW_ETH_TX_PKT_WR_IMMDLEN(x) ((x) << S_FW_ETH_TX_PKT_WR_IMMDLEN) +#define G_FW_ETH_TX_PKT_WR_IMMDLEN(x) \ + (((x) >> S_FW_ETH_TX_PKT_WR_IMMDLEN) & M_FW_ETH_TX_PKT_WR_IMMDLEN) + struct fw_eth_tx_pkts_wr { - __be32 op_immdlen; + __be32 op_pkd; __be32 equiq_to_len16; __be32 r3; __be16 plen; @@ -537,7 +552,7 @@ enum fw_flowc_mnem { FW_FLOWC_MNEM_RCVNXT, FW_FLOWC_MNEM_SNDBUF, FW_FLOWC_MNEM_MSS, - FW_FLOWC_MEM_TXDATAPLEN_MAX, + FW_FLOWC_MNEM_TXDATAPLEN_MAX, }; struct fw_flowc_mnemval { @@ -1469,22 +1484,129 @@ struct fw_ri_wr { #define G_FW_RI_WR_P2PTYPE(x) \ (((x) >> S_FW_RI_WR_P2PTYPE) & M_FW_RI_WR_P2PTYPE) -#ifdef FOISCSI +/****************************************************************************** + * S C S I W O R K R E Q U E S T s + **********************************************/ + + +/****************************************************************************** + * F O i S C S I W O R K R E Q U E S T s + **********************************************/ + +#define ISCSI_NAME_MAX_LEN 224 +#define ISCSI_ALIAS_MAX_LEN 224 + +enum session_type { + ISCSI_SESSION_DISCOVERY = 0, + ISCSI_SESSION_NORMAL, +}; + +enum digest_val { + DIGEST_NONE = 0, + DIGEST_CRC32, + DIGEST_BOTH, +}; + +enum fw_iscsi_subops { + NODE_ONLINE = 1, + SESS_ONLINE, + CONN_ONLINE, + NODE_OFFLINE, + SESS_OFFLINE, + CONN_OFFLINE, + NODE_STATS, + SESS_STATS, + CONN_STATS, + UPDATE_IOHANDLE, +}; + +struct fw_iscsi_node_attr { + __u8 name_len; + __u8 node_name[ISCSI_NAME_MAX_LEN]; + __u8 alias_len; + __u8 node_alias[ISCSI_ALIAS_MAX_LEN]; +}; + +struct fw_iscsi_sess_attr { + __u8 sess_type; + __u8 seq_inorder; + __u8 pdu_inorder; + __u8 immd_data_en; + __u8 init_r2t_en; + __u8 erl; + __be16 max_conn; + __be16 max_r2t; + __be16 time2wait; + __be16 time2retain; + __be32 max_burst; + __be32 first_burst; +}; + +struct fw_iscsi_conn_attr { + __u8 hdr_digest; + __u8 data_digest; + __be32 max_rcv_dsl; + __be16 dst_port; + __be32 dst_addr; + __be16 src_port; + __be32 src_addr; + __be32 ping_tmo; +}; + +struct fw_iscsi_node_stats { + __be16 sess_count; + __be16 chap_fail_count; + __be16 login_count; + __be16 r1; +}; + +struct fw_iscsi_sess_stats { + __be32 rxbytes; + __be32 txbytes; + __be32 scmd_count; + __be32 read_cmds; + __be32 write_cmds; + __be32 read_bytes; + __be32 write_bytes; + __be32 scsi_err_count; + __be32 scsi_rst_count; + __be32 iscsi_tmf_count; + __be32 conn_count; +}; + +struct fw_iscsi_conn_stats { + __be32 txbytes; + __be32 rxbytes; + __be32 dataout; + __be32 datain; +}; + struct fw_iscsi_node_wr { __u8 opcode; __u8 subop; - __u8 node_attr_to_compl; - __u8 len16; - __u8 status; - __u8 r2; __be16 immd_len; + __be32 flowid_len16; __be64 cookie; + __u8 node_attr_to_compl; + __u8 status; + __be16 r1; __be32 node_id; __be32 ctrl_handle; __be32 io_handle; - __be32 r3; }; +#define S_FW_ISCSI_NODE_WR_FLOWID 8 +#define M_FW_ISCSI_NODE_WR_FLOWID 0xfffff +#define V_FW_ISCSI_NODE_WR_FLOWID(x) ((x) << S_FW_ISCSI_NODE_WR_FLOWID) +#define G_FW_ISCSI_NODE_WR_FLOWID(x) \ + (((x) >> S_FW_ISCSI_NODE_WR_FLOWID) & M_FW_ISCSI_NODE_WR_FLOWID) + +#define S_FW_ISCSI_NODE_WR_LEN16 0 +#define M_FW_ISCSI_NODE_WR_LEN16 0xff +#define V_FW_ISCSI_NODE_WR_LEN16(x) ((x) << S_FW_ISCSI_NODE_WR_LEN16) +#define G_FW_ISCSI_NODE_WR_LEN16(x) \ + (((x) >> S_FW_ISCSI_NODE_WR_LEN16) & M_FW_ISCSI_NODE_WR_LEN16) + #define S_FW_ISCSI_NODE_WR_NODE_ATTR 7 #define M_FW_ISCSI_NODE_WR_NODE_ATTR 0x1 #define V_FW_ISCSI_NODE_WR_NODE_ATTR(x) ((x) << S_FW_ISCSI_NODE_WR_NODE_ATTR) @@ -1527,7 +1649,109 @@ struct fw_iscsi_node_wr { (((x) >> S_FW_ISCSI_NODE_WR_COMPL) & M_FW_ISCSI_NODE_WR_COMPL) #define F_FW_ISCSI_NODE_WR_COMPL V_FW_ISCSI_NODE_WR_COMPL(1U) -#endif +#define FW_ISCSI_NODE_INVALID_ID 0xffffffff + +struct fw_scsi_iscsi_data { + __u8 r0; + __u8 fbit_to_tattr; + __be16 r2; + __be32 r3; + __u8 lun[8]; + __be32 r4; + __be32 dlen; + __be32 r5; + __be32 r6; + __u8 cdb[16]; +}; + +#define S_FW_SCSI_ISCSI_DATA_FBIT 7 +#define M_FW_SCSI_ISCSI_DATA_FBIT 0x1 +#define V_FW_SCSI_ISCSI_DATA_FBIT(x) ((x) << S_FW_SCSI_ISCSI_DATA_FBIT) +#define G_FW_SCSI_ISCSI_DATA_FBIT(x) \ + (((x) >> S_FW_SCSI_ISCSI_DATA_FBIT) & M_FW_SCSI_ISCSI_DATA_FBIT) +#define F_FW_SCSI_ISCSI_DATA_FBIT V_FW_SCSI_ISCSI_DATA_FBIT(1U) + +#define S_FW_SCSI_ISCSI_DATA_RBIT 6 +#define M_FW_SCSI_ISCSI_DATA_RBIT 0x1 +#define V_FW_SCSI_ISCSI_DATA_RBIT(x) ((x) << S_FW_SCSI_ISCSI_DATA_RBIT) +#define G_FW_SCSI_ISCSI_DATA_RBIT(x) \ + (((x) >> S_FW_SCSI_ISCSI_DATA_RBIT) & M_FW_SCSI_ISCSI_DATA_RBIT) +#define F_FW_SCSI_ISCSI_DATA_RBIT V_FW_SCSI_ISCSI_DATA_RBIT(1U) + +#define S_FW_SCSI_ISCSI_DATA_WBIT 5 +#define M_FW_SCSI_ISCSI_DATA_WBIT 0x1 +#define V_FW_SCSI_ISCSI_DATA_WBIT(x) ((x) << S_FW_SCSI_ISCSI_DATA_WBIT) +#define G_FW_SCSI_ISCSI_DATA_WBIT(x) \ + (((x) >> S_FW_SCSI_ISCSI_DATA_WBIT) & M_FW_SCSI_ISCSI_DATA_WBIT) +#define F_FW_SCSI_ISCSI_DATA_WBIT V_FW_SCSI_ISCSI_DATA_WBIT(1U) + +#define S_FW_SCSI_ISCSI_DATA_TATTR 0 +#define M_FW_SCSI_ISCSI_DATA_TATTR 0x7 +#define V_FW_SCSI_ISCSI_DATA_TATTR(x) ((x) << S_FW_SCSI_ISCSI_DATA_TATTR) +#define G_FW_SCSI_ISCSI_DATA_TATTR(x) \ + (((x) >> S_FW_SCSI_ISCSI_DATA_TATTR) & M_FW_SCSI_ISCSI_DATA_TATTR) + +#define FW_SCSI_ISCSI_DATA_TATTR_UNTAGGED 0 +#define FW_SCSI_ISCSI_DATA_TATTR_SIMPLE 1 +#define FW_SCSI_ISCSI_DATA_TATTR_ORDERED 2 +#define FW_SCSI_ISCSI_DATA_TATTR_HEADOQ 3 +#define FW_SCSI_ISCSI_DATA_TATTR_ACA 4 + +#define FW_SCSI_ISCSI_TMF_OP 0x02 +#define FW_SCSI_ISCSI_ABORT_FUNC 0x01 +#define FW_SCSI_ISCSI_LUN_RESET_FUNC 0x05 +#define FW_SCSI_ISCSI_RESERVED_TAG 0xffffffff + +struct fw_scsi_iscsi_rsp { + __u8 r0; + __u8 sbit_to_uflow; + __u8 response; + __u8 status; + __be32 r4; + __u8 r5[32]; + __be32 bidir_res_cnt; + __be32 res_cnt; + __u8 sense_data[128]; +}; + +#define S_FW_SCSI_ISCSI_RSP_SBIT 7 +#define M_FW_SCSI_ISCSI_RSP_SBIT 0x1 +#define V_FW_SCSI_ISCSI_RSP_SBIT(x) ((x) << S_FW_SCSI_ISCSI_RSP_SBIT) +#define G_FW_SCSI_ISCSI_RSP_SBIT(x) \ + (((x) >> S_FW_SCSI_ISCSI_RSP_SBIT) & M_FW_SCSI_ISCSI_RSP_SBIT) +#define F_FW_SCSI_ISCSI_RSP_SBIT V_FW_SCSI_ISCSI_RSP_SBIT(1U) + +#define S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW 4 +#define M_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW 0x1 +#define V_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(x) \ + ((x) << S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW) +#define G_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(x) \ + (((x) >> S_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW) & \ + M_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW) +#define F_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW V_FW_SCSI_ISCSI_RSP_BIDIR_OFLOW(1U) + +#define S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW 3 +#define M_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW 0x1 +#define V_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(x) \ + ((x) << S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW) +#define G_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(x) \ + (((x) >> S_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW) & \ + M_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW) +#define F_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW V_FW_SCSI_ISCSI_RSP_BIDIR_UFLOW(1U) + +#define S_FW_SCSI_ISCSI_RSP_OFLOW 2 +#define M_FW_SCSI_ISCSI_RSP_OFLOW 0x1 +#define V_FW_SCSI_ISCSI_RSP_OFLOW(x) ((x) << S_FW_SCSI_ISCSI_RSP_OFLOW) +#define G_FW_SCSI_ISCSI_RSP_OFLOW(x) \ + (((x) >> S_FW_SCSI_ISCSI_RSP_OFLOW) & M_FW_SCSI_ISCSI_RSP_OFLOW) +#define F_FW_SCSI_ISCSI_RSP_OFLOW V_FW_SCSI_ISCSI_RSP_OFLOW(1U) + +#define S_FW_SCSI_ISCSI_RSP_UFLOW 1 +#define M_FW_SCSI_ISCSI_RSP_UFLOW 0x1 +#define V_FW_SCSI_ISCSI_RSP_UFLOW(x) ((x) << S_FW_SCSI_ISCSI_RSP_UFLOW) +#define G_FW_SCSI_ISCSI_RSP_UFLOW(x) \ + (((x) >> S_FW_SCSI_ISCSI_RSP_UFLOW) & M_FW_SCSI_ISCSI_RSP_UFLOW) +#define F_FW_SCSI_ISCSI_RSP_UFLOW V_FW_SCSI_ISCSI_RSP_UFLOW(1U) /****************************************************************************** * C O M M A N D s @@ -1543,6 +1767,16 @@ struct fw_iscsi_node_wr { */ #define FW_CMD_MAX_TIMEOUT 10000 +/* + * If a host driver does a HELLO and discovers that there's already a MASTER + * selected, we may have to wait for that MASTER to finish issuing RESET, + * configuration and INITIALIZE commands. Also, there's a possibility that + * our own HELLO may get lost if it happens right as the MASTER is issuign a + * RESET command, so we need to be willing to make a few retries of our HELLO. + */ +#define FW_CMD_HELLO_TIMEOUT (3 * FW_CMD_MAX_TIMEOUT) +#define FW_CMD_HELLO_RETRIES 3 + enum fw_cmd_opcodes { FW_LDST_CMD = 0x01, FW_RESET_CMD = 0x03, @@ -1575,10 +1809,11 @@ enum fw_cmd_opcodes { FW_SCHED_CMD = 0x24, FW_DEVLOG_CMD = 0x25, FW_NETIF_CMD = 0x26, + FW_WATCHDOG_CMD = 0x27, + FW_CLIP_CMD = 0x28, FW_LASTC2E_CMD = 0x40, FW_ERROR_CMD = 0x80, FW_DEBUG_CMD = 0x81, - }; enum fw_cmd_cap { @@ -1696,7 +1931,7 @@ struct fw_ldst_cmd { } addrval; struct fw_ldst_idctxt { __be32 physid; - __be32 msg_pkd; + __be32 msg_ctxtflush; __be32 ctxt_data7; __be32 ctxt_data6; __be32 ctxt_data5; @@ -1769,6 +2004,13 @@ struct fw_ldst_cmd { (((x) >> S_FW_LDST_CMD_MSG) & M_FW_LDST_CMD_MSG) #define F_FW_LDST_CMD_MSG V_FW_LDST_CMD_MSG(1U) +#define S_FW_LDST_CMD_CTXTFLUSH 30 +#define M_FW_LDST_CMD_CTXTFLUSH 0x1 +#define V_FW_LDST_CMD_CTXTFLUSH(x) ((x) << S_FW_LDST_CMD_CTXTFLUSH) +#define G_FW_LDST_CMD_CTXTFLUSH(x) \ + (((x) >> S_FW_LDST_CMD_CTXTFLUSH) & M_FW_LDST_CMD_CTXTFLUSH) +#define F_FW_LDST_CMD_CTXTFLUSH V_FW_LDST_CMD_CTXTFLUSH(1U) + #define S_FW_LDST_CMD_PADDR 8 #define M_FW_LDST_CMD_PADDR 0x1f #define V_FW_LDST_CMD_PADDR(x) ((x) << S_FW_LDST_CMD_PADDR) @@ -1852,13 +2094,27 @@ struct fw_reset_cmd { __be32 op_to_write; __be32 retval_len16; __be32 val; - __be32 r3; + __be32 halt_pkd; +}; + +#define S_FW_RESET_CMD_HALT 31 +#define M_FW_RESET_CMD_HALT 0x1 +#define V_FW_RESET_CMD_HALT(x) ((x) << S_FW_RESET_CMD_HALT) +#define G_FW_RESET_CMD_HALT(x) \ + (((x) >> S_FW_RESET_CMD_HALT) & M_FW_RESET_CMD_HALT) +#define F_FW_RESET_CMD_HALT V_FW_RESET_CMD_HALT(1U) + +enum { + FW_HELLO_CMD_STAGE_OS = 0, + FW_HELLO_CMD_STAGE_PREOS0 = 1, + FW_HELLO_CMD_STAGE_PREOS1 = 2, + FW_HELLO_CMD_STAGE_POSTOS = 3, }; struct fw_hello_cmd { __be32 op_to_write; __be32 retval_len16; - __be32 err_to_mbasyncnot; + __be32 err_to_clearinit; __be32 fwrev; }; @@ -1909,6 +2165,19 @@ struct fw_hello_cmd { #define G_FW_HELLO_CMD_MBASYNCNOT(x) \ (((x) >> S_FW_HELLO_CMD_MBASYNCNOT) & M_FW_HELLO_CMD_MBASYNCNOT) +#define S_FW_HELLO_CMD_STAGE 17 +#define M_FW_HELLO_CMD_STAGE 0x7 +#define V_FW_HELLO_CMD_STAGE(x) ((x) << S_FW_HELLO_CMD_STAGE) +#define G_FW_HELLO_CMD_STAGE(x) \ + (((x) >> S_FW_HELLO_CMD_STAGE) & M_FW_HELLO_CMD_STAGE) + +#define S_FW_HELLO_CMD_CLEARINIT 16 +#define M_FW_HELLO_CMD_CLEARINIT 0x1 +#define V_FW_HELLO_CMD_CLEARINIT(x) ((x) << S_FW_HELLO_CMD_CLEARINIT) +#define G_FW_HELLO_CMD_CLEARINIT(x) \ + (((x) >> S_FW_HELLO_CMD_CLEARINIT) & M_FW_HELLO_CMD_CLEARINIT) +#define F_FW_HELLO_CMD_CLEARINIT V_FW_HELLO_CMD_CLEARINIT(1U) + struct fw_bye_cmd { __be32 op_to_write; __be32 retval_len16; @@ -1989,6 +2258,8 @@ enum fw_caps_config_nic { FW_CAPS_CONFIG_NIC = 0x00000001, FW_CAPS_CONFIG_NIC_VM = 0x00000002, FW_CAPS_CONFIG_NIC_IDS = 0x00000004, + FW_CAPS_CONFIG_NIC_UM = 0x00000008, + FW_CAPS_CONFIG_NIC_UM_ISGL = 0x00000010, }; enum fw_caps_config_toe { @@ -2015,9 +2286,16 @@ enum fw_caps_config_fcoe { FW_CAPS_CONFIG_FCOE_CTRL_OFLD = 0x00000004, }; +enum fw_memtype_cf { + FW_MEMTYPE_CF_EDC0 = 0x0, + FW_MEMTYPE_CF_EDC1 = 0x1, + FW_MEMTYPE_CF_EXTMEM = 0x2, + FW_MEMTYPE_CF_FLASH = 0x4, +}; + struct fw_caps_config_cmd { __be32 op_to_write; - __be32 retval_len16; + __be32 cfvalid_to_len16; __be32 r2; __be32 hwmbitmap; __be16 nbmcaps; @@ -2030,9 +2308,33 @@ struct fw_caps_config_cmd { __be16 r4; __be16 iscsicaps; __be16 fcoecaps; - __be32 r5; - __be64 r6; -}; + __be32 cfcsum; + __be32 finiver; + __be32 finicsum; +}; + +#define S_FW_CAPS_CONFIG_CMD_CFVALID 27 +#define M_FW_CAPS_CONFIG_CMD_CFVALID 0x1 +#define V_FW_CAPS_CONFIG_CMD_CFVALID(x) ((x) << S_FW_CAPS_CONFIG_CMD_CFVALID) +#define G_FW_CAPS_CONFIG_CMD_CFVALID(x) \ + (((x) >> S_FW_CAPS_CONFIG_CMD_CFVALID) & M_FW_CAPS_CONFIG_CMD_CFVALID) +#define F_FW_CAPS_CONFIG_CMD_CFVALID V_FW_CAPS_CONFIG_CMD_CFVALID(1U) + +#define S_FW_CAPS_CONFIG_CMD_MEMTYPE_CF 24 +#define M_FW_CAPS_CONFIG_CMD_MEMTYPE_CF 0x7 +#define V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(x) \ + ((x) << S_FW_CAPS_CONFIG_CMD_MEMTYPE_CF) +#define G_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(x) \ + (((x) >> S_FW_CAPS_CONFIG_CMD_MEMTYPE_CF) & \ + M_FW_CAPS_CONFIG_CMD_MEMTYPE_CF) + +#define S_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF 16 +#define M_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF 0xff +#define V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(x) \ + ((x) << S_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF) +#define G_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(x) \ + (((x) >> S_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF) & \ + M_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF) /* * params command mnemonics @@ -2056,15 +2358,17 @@ enum fw_params_param_dev { * Lookup Engine */ FW_PARAMS_PARAM_DEV_FLOWC_BUFFIFO_SZ = 0x03, - FW_PARAMS_PARAM_DEV_INTVER_NIC = 0x04, - FW_PARAMS_PARAM_DEV_INTVER_VNIC = 0x05, - FW_PARAMS_PARAM_DEV_INTVER_OFLD = 0x06, - FW_PARAMS_PARAM_DEV_INTVER_RI = 0x07, - FW_PARAMS_PARAM_DEV_INTVER_ISCSIPDU = 0x08, - FW_PARAMS_PARAM_DEV_INTVER_ISCSI = 0x09, - FW_PARAMS_PARAM_DEV_INTVER_FCOE = 0x0A, + FW_PARAMS_PARAM_DEV_INTFVER_NIC = 0x04, + FW_PARAMS_PARAM_DEV_INTFVER_VNIC = 0x05, + FW_PARAMS_PARAM_DEV_INTFVER_OFLD = 0x06, + FW_PARAMS_PARAM_DEV_INTFVER_RI = 0x07, + FW_PARAMS_PARAM_DEV_INTFVER_ISCSIPDU = 0x08, + FW_PARAMS_PARAM_DEV_INTFVER_ISCSI = 0x09, + FW_PARAMS_PARAM_DEV_INTFVER_FCOE = 0x0A, FW_PARAMS_PARAM_DEV_FWREV = 0x0B, FW_PARAMS_PARAM_DEV_TPREV = 0x0C, + FW_PARAMS_PARAM_DEV_CF = 0x0D, + FW_PARAMS_PARAM_DEV_BYPASS = 0x0E, }; /* @@ -2119,6 +2423,23 @@ enum fw_params_param_dmaq { FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12, }; +/* + * dev bypass parameters; actions and modes + */ +enum fw_params_param_dev_bypass { + + /* actions + */ + FW_PARAMS_PARAM_DEV_BYPASS_PFAIL = 0x00, + FW_PARAMS_PARAM_DEV_BYPASS_CURRENT = 0x01, + + /* modes + */ + FW_PARAMS_PARAM_DEV_BYPASS_NORMAL = 0x00, + FW_PARAMS_PARAM_DEV_BYPASS_DROP = 0x1, + FW_PARAMS_PARAM_DEV_BYPASS_BYPASS = 0x2, +}; + #define S_FW_PARAMS_MNEM 24 #define M_FW_PARAMS_MNEM 0xff #define V_FW_PARAMS_MNEM(x) ((x) << S_FW_PARAMS_MNEM) @@ -2271,6 +2592,7 @@ struct fw_pfvf_cmd { #define V_FW_PFVF_CMD_NETHCTRL(x) ((x) << S_FW_PFVF_CMD_NETHCTRL) #define G_FW_PFVF_CMD_NETHCTRL(x) \ (((x) >> S_FW_PFVF_CMD_NETHCTRL) & M_FW_PFVF_CMD_NETHCTRL) + /* * ingress queue type; the first 1K ingress queues can have associated 0, * 1 or 2 free lists and an interrupt, all other ingress queues lack these @@ -3518,6 +3840,7 @@ struct fw_eq_ofld_cmd { #define V_FW_EQ_OFLD_CMD_EQSIZE(x) ((x) << S_FW_EQ_OFLD_CMD_EQSIZE) #define G_FW_EQ_OFLD_CMD_EQSIZE(x) \ (((x) >> S_FW_EQ_OFLD_CMD_EQSIZE) & M_FW_EQ_OFLD_CMD_EQSIZE) + /* Macros for VIID parsing: VIID - [10:8] PFN, [7] VI Valid, [6:0] VI number */ #define S_FW_VIID_PFN 8 @@ -4081,8 +4404,10 @@ enum fw_port_action { FW_PORT_ACTION_L2_WOL_MODE_EN = 0x0012, FW_PORT_ACTION_LPBK_TO_NORMAL = 0x0020, FW_PORT_ACTION_L1_SS_LPBK_ASIC = 0x0021, + FW_PORT_ACTION_MAC_LPBK = 0x0022, FW_PORT_ACTION_L1_WS_LPBK_ASIC = 0x0023, FW_PORT_ACTION_L1_EXT_LPBK = 0x0026, + FW_PORT_ACTION_PCS_LPBK = 0x0028, FW_PORT_ACTION_PHY_RESET = 0x0040, FW_PORT_ACTION_PMA_RESET = 0x0041, FW_PORT_ACTION_PCS_RESET = 0x0042, @@ -4164,7 +4489,8 @@ struct fw_port_cmd { struct fw_port_dcb_pgrate { __u8 type; __u8 apply_pkd; - __u8 r10_lo[6]; + __u8 r10_lo[5]; + __u8 num_tcs_supported; __u8 pgrate[8]; } pgrate; struct fw_port_dcb_priorate { @@ -4181,11 +4507,12 @@ struct fw_port_cmd { } pfc; struct fw_port_app_priority { __u8 type; - __u8 r10_lo[3]; - __u8 prio; - __u8 sel; + __u8 r10[2]; + __u8 idx; + __u8 user_prio_map; + __u8 sel_field; __be16 protocolid; - __u8 r12[8]; + __be64 r12; } app_priority; } dcb; } u; @@ -4337,20 +4664,6 @@ struct fw_port_cmd { (((x) >> S_FW_PORT_CMD_APPLY) & M_FW_PORT_CMD_APPLY) #define F_FW_PORT_CMD_APPLY V_FW_PORT_CMD_APPLY(1U) -#define S_FW_PORT_CMD_APPLY 7 -#define M_FW_PORT_CMD_APPLY 0x1 -#define V_FW_PORT_CMD_APPLY(x) ((x) << S_FW_PORT_CMD_APPLY) -#define G_FW_PORT_CMD_APPLY(x) \ - (((x) >> S_FW_PORT_CMD_APPLY) & M_FW_PORT_CMD_APPLY) -#define F_FW_PORT_CMD_APPLY V_FW_PORT_CMD_APPLY(1U) - -#define S_FW_PORT_CMD_APPLY 7 -#define M_FW_PORT_CMD_APPLY 0x1 -#define V_FW_PORT_CMD_APPLY(x) ((x) << S_FW_PORT_CMD_APPLY) -#define G_FW_PORT_CMD_APPLY(x) \ - (((x) >> S_FW_PORT_CMD_APPLY) & M_FW_PORT_CMD_APPLY) -#define F_FW_PORT_CMD_APPLY V_FW_PORT_CMD_APPLY(1U) - /* * These are configured into the VPD and hence tools that generate * VPD may use this enumeration. @@ -4383,6 +4696,7 @@ enum fw_port_module_type { FW_PORT_MOD_TYPE_TWINAX_PASSIVE = 0x4, FW_PORT_MOD_TYPE_TWINAX_ACTIVE = 0x5, FW_PORT_MOD_TYPE_LRM = 0x6, + FW_PORT_MOD_TYPE_ERROR = M_FW_PORT_CMD_MODTYPE - 3, FW_PORT_MOD_TYPE_UNKNOWN = M_FW_PORT_CMD_MODTYPE - 2, FW_PORT_MOD_TYPE_NOTSUPPORTED = M_FW_PORT_CMD_MODTYPE - 1, FW_PORT_MOD_TYPE_NONE = M_FW_PORT_CMD_MODTYPE @@ -5189,15 +5503,12 @@ struct fw_rss_vi_config_cmd { #define F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN \ V_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN(1U) -#define S_FW_RSS_VI_CONFIG_CMD_UDPEN 0 -#define M_FW_RSS_VI_CONFIG_CMD_UDPEN 0x1 -#define V_FW_RSS_VI_CONFIG_CMD_UDPEN(x) \ - ((x) << S_FW_RSS_VI_CONFIG_CMD_UDPEN) -#define G_FW_RSS_VI_CONFIG_CMD_UDPEN(x) \ - (((x) >> S_FW_RSS_VI_CONFIG_CMD_UDPEN) & \ - M_FW_RSS_VI_CONFIG_CMD_UDPEN) -#define F_FW_RSS_VI_CONFIG_CMD_UDPEN \ - V_FW_RSS_VI_CONFIG_CMD_UDPEN(1U) +#define S_FW_RSS_VI_CONFIG_CMD_UDPEN 0 +#define M_FW_RSS_VI_CONFIG_CMD_UDPEN 0x1 +#define V_FW_RSS_VI_CONFIG_CMD_UDPEN(x) ((x) << S_FW_RSS_VI_CONFIG_CMD_UDPEN) +#define G_FW_RSS_VI_CONFIG_CMD_UDPEN(x) \ + (((x) >> S_FW_RSS_VI_CONFIG_CMD_UDPEN) & M_FW_RSS_VI_CONFIG_CMD_UDPEN) +#define F_FW_RSS_VI_CONFIG_CMD_UDPEN V_FW_RSS_VI_CONFIG_CMD_UDPEN(1U) enum fw_sched_sc { FW_SCHED_SC_CONFIG = 0, @@ -5352,103 +5663,97 @@ struct fw_devlog_cmd { M_FW_DEVLOG_CMD_MEMADDR16_DEVLOG) struct fw_netif_cmd { - __be32 op_portid; - __be32 retval_to_len16; - __be32 add_to_ipv4gw; - __be32 vlanid_mtuval; + __be32 op_to_ipv4gw; + __be32 retval_len16; + __be32 netifi_ifadridx; + __be32 portid_to_mtuval; __be32 gwaddr; __be32 addr; __be32 nmask; __be32 bcaddr; }; -#define S_FW_NETIF_CMD_PORTID 0 -#define M_FW_NETIF_CMD_PORTID 0xf -#define V_FW_NETIF_CMD_PORTID(x) ((x) << S_FW_NETIF_CMD_PORTID) -#define G_FW_NETIF_CMD_PORTID(x) \ - (((x) >> S_FW_NETIF_CMD_PORTID) & M_FW_NETIF_CMD_PORTID) - -#define S_FW_NETIF_CMD_RETVAL 24 -#define M_FW_NETIF_CMD_RETVAL 0xff -#define V_FW_NETIF_CMD_RETVAL(x) ((x) << S_FW_NETIF_CMD_RETVAL) -#define G_FW_NETIF_CMD_RETVAL(x) \ - (((x) >> S_FW_NETIF_CMD_RETVAL) & M_FW_NETIF_CMD_RETVAL) - -#define S_FW_NETIF_CMD_IFIDX 16 -#define M_FW_NETIF_CMD_IFIDX 0xff -#define V_FW_NETIF_CMD_IFIDX(x) ((x) << S_FW_NETIF_CMD_IFIDX) -#define G_FW_NETIF_CMD_IFIDX(x) \ - (((x) >> S_FW_NETIF_CMD_IFIDX) & M_FW_NETIF_CMD_IFIDX) - -#define S_FW_NETIF_CMD_LEN16 0 -#define M_FW_NETIF_CMD_LEN16 0xff -#define V_FW_NETIF_CMD_LEN16(x) ((x) << S_FW_NETIF_CMD_LEN16) -#define G_FW_NETIF_CMD_LEN16(x) \ - (((x) >> S_FW_NETIF_CMD_LEN16) & M_FW_NETIF_CMD_LEN16) - -#define S_FW_NETIF_CMD_ADD 31 +#define S_FW_NETIF_CMD_ADD 20 #define M_FW_NETIF_CMD_ADD 0x1 #define V_FW_NETIF_CMD_ADD(x) ((x) << S_FW_NETIF_CMD_ADD) #define G_FW_NETIF_CMD_ADD(x) \ (((x) >> S_FW_NETIF_CMD_ADD) & M_FW_NETIF_CMD_ADD) #define F_FW_NETIF_CMD_ADD V_FW_NETIF_CMD_ADD(1U) -#define S_FW_NETIF_CMD_LINK 30 +#define S_FW_NETIF_CMD_LINK 19 #define M_FW_NETIF_CMD_LINK 0x1 #define V_FW_NETIF_CMD_LINK(x) ((x) << S_FW_NETIF_CMD_LINK) #define G_FW_NETIF_CMD_LINK(x) \ (((x) >> S_FW_NETIF_CMD_LINK) & M_FW_NETIF_CMD_LINK) #define F_FW_NETIF_CMD_LINK V_FW_NETIF_CMD_LINK(1U) -#define S_FW_NETIF_CMD_VLAN 29 +#define S_FW_NETIF_CMD_VLAN 18 #define M_FW_NETIF_CMD_VLAN 0x1 #define V_FW_NETIF_CMD_VLAN(x) ((x) << S_FW_NETIF_CMD_VLAN) #define G_FW_NETIF_CMD_VLAN(x) \ (((x) >> S_FW_NETIF_CMD_VLAN) & M_FW_NETIF_CMD_VLAN) #define F_FW_NETIF_CMD_VLAN V_FW_NETIF_CMD_VLAN(1U) -#define S_FW_NETIF_CMD_MTU 28 +#define S_FW_NETIF_CMD_MTU 17 #define M_FW_NETIF_CMD_MTU 0x1 #define V_FW_NETIF_CMD_MTU(x) ((x) << S_FW_NETIF_CMD_MTU) #define G_FW_NETIF_CMD_MTU(x) \ (((x) >> S_FW_NETIF_CMD_MTU) & M_FW_NETIF_CMD_MTU) #define F_FW_NETIF_CMD_MTU V_FW_NETIF_CMD_MTU(1U) -#define S_FW_NETIF_CMD_DHCP 27 +#define S_FW_NETIF_CMD_DHCP 16 #define M_FW_NETIF_CMD_DHCP 0x1 #define V_FW_NETIF_CMD_DHCP(x) ((x) << S_FW_NETIF_CMD_DHCP) #define G_FW_NETIF_CMD_DHCP(x) \ (((x) >> S_FW_NETIF_CMD_DHCP) & M_FW_NETIF_CMD_DHCP) #define F_FW_NETIF_CMD_DHCP V_FW_NETIF_CMD_DHCP(1U) -#define S_FW_NETIF_CMD_IPV4BCADDR 3 +#define S_FW_NETIF_CMD_IPV4BCADDR 15 #define M_FW_NETIF_CMD_IPV4BCADDR 0x1 #define V_FW_NETIF_CMD_IPV4BCADDR(x) ((x) << S_FW_NETIF_CMD_IPV4BCADDR) #define G_FW_NETIF_CMD_IPV4BCADDR(x) \ (((x) >> S_FW_NETIF_CMD_IPV4BCADDR) & M_FW_NETIF_CMD_IPV4BCADDR) #define F_FW_NETIF_CMD_IPV4BCADDR V_FW_NETIF_CMD_IPV4BCADDR(1U) -#define S_FW_NETIF_CMD_IPV4NMASK 2 +#define S_FW_NETIF_CMD_IPV4NMASK 14 #define M_FW_NETIF_CMD_IPV4NMASK 0x1 #define V_FW_NETIF_CMD_IPV4NMASK(x) ((x) << S_FW_NETIF_CMD_IPV4NMASK) #define G_FW_NETIF_CMD_IPV4NMASK(x) \ (((x) >> S_FW_NETIF_CMD_IPV4NMASK) & M_FW_NETIF_CMD_IPV4NMASK) #define F_FW_NETIF_CMD_IPV4NMASK V_FW_NETIF_CMD_IPV4NMASK(1U) -#define S_FW_NETIF_CMD_IPV4ADDR 1 +#define S_FW_NETIF_CMD_IPV4ADDR 13 #define M_FW_NETIF_CMD_IPV4ADDR 0x1 #define V_FW_NETIF_CMD_IPV4ADDR(x) ((x) << S_FW_NETIF_CMD_IPV4ADDR) #define G_FW_NETIF_CMD_IPV4ADDR(x) \ (((x) >> S_FW_NETIF_CMD_IPV4ADDR) & M_FW_NETIF_CMD_IPV4ADDR) #define F_FW_NETIF_CMD_IPV4ADDR V_FW_NETIF_CMD_IPV4ADDR(1U) -#define S_FW_NETIF_CMD_IPV4GW 0 +#define S_FW_NETIF_CMD_IPV4GW 12 #define M_FW_NETIF_CMD_IPV4GW 0x1 #define V_FW_NETIF_CMD_IPV4GW(x) ((x) << S_FW_NETIF_CMD_IPV4GW) #define G_FW_NETIF_CMD_IPV4GW(x) \ (((x) >> S_FW_NETIF_CMD_IPV4GW) & M_FW_NETIF_CMD_IPV4GW) #define F_FW_NETIF_CMD_IPV4GW V_FW_NETIF_CMD_IPV4GW(1U) +#define S_FW_NETIF_CMD_NETIFI 8 +#define M_FW_NETIF_CMD_NETIFI 0xffffff +#define V_FW_NETIF_CMD_NETIFI(x) ((x) << S_FW_NETIF_CMD_NETIFI) +#define G_FW_NETIF_CMD_NETIFI(x) \ + (((x) >> S_FW_NETIF_CMD_NETIFI) & M_FW_NETIF_CMD_NETIFI) + +#define S_FW_NETIF_CMD_IFADRIDX 0 +#define M_FW_NETIF_CMD_IFADRIDX 0xff +#define V_FW_NETIF_CMD_IFADRIDX(x) ((x) << S_FW_NETIF_CMD_IFADRIDX) +#define G_FW_NETIF_CMD_IFADRIDX(x) \ + (((x) >> S_FW_NETIF_CMD_IFADRIDX) & M_FW_NETIF_CMD_IFADRIDX) + +#define S_FW_NETIF_CMD_PORTID 28 +#define M_FW_NETIF_CMD_PORTID 0xf +#define V_FW_NETIF_CMD_PORTID(x) ((x) << S_FW_NETIF_CMD_PORTID) +#define G_FW_NETIF_CMD_PORTID(x) \ + (((x) >> S_FW_NETIF_CMD_PORTID) & M_FW_NETIF_CMD_PORTID) + #define S_FW_NETIF_CMD_VLANID 16 #define M_FW_NETIF_CMD_VLANID 0xfff #define V_FW_NETIF_CMD_VLANID(x) ((x) << S_FW_NETIF_CMD_VLANID) @@ -5461,6 +5766,42 @@ struct fw_netif_cmd { #define G_FW_NETIF_CMD_MTUVAL(x) \ (((x) >> S_FW_NETIF_CMD_MTUVAL) & M_FW_NETIF_CMD_MTUVAL) +enum fw_watchdog_actions { + FW_WATCHDOG_ACTION_FLR = 0x1, + FW_WATCHDOG_ACTION_BYPASS = 0x2, +}; + +#define FW_WATCHDOG_MAX_TIMEOUT_SECS 60 + +struct fw_watchdog_cmd { + __be32 op_to_write; + __be32 retval_len16; + __be32 timeout; + __be32 actions; +}; + +struct fw_clip_cmd { + __be32 op_to_write; + __be32 alloc_to_len16; + __be64 ip_hi; + __be64 ip_lo; + __be32 r4[2]; +}; + +#define S_FW_CLIP_CMD_ALLOC 31 +#define M_FW_CLIP_CMD_ALLOC 0x1 +#define V_FW_CLIP_CMD_ALLOC(x) ((x) << S_FW_CLIP_CMD_ALLOC) +#define G_FW_CLIP_CMD_ALLOC(x) \ + (((x) >> S_FW_CLIP_CMD_ALLOC) & M_FW_CLIP_CMD_ALLOC) +#define F_FW_CLIP_CMD_ALLOC V_FW_CLIP_CMD_ALLOC(1U) + +#define S_FW_CLIP_CMD_FREE 30 +#define M_FW_CLIP_CMD_FREE 0x1 +#define V_FW_CLIP_CMD_FREE(x) ((x) << S_FW_CLIP_CMD_FREE) +#define G_FW_CLIP_CMD_FREE(x) \ + (((x) >> S_FW_CLIP_CMD_FREE) & M_FW_CLIP_CMD_FREE) +#define F_FW_CLIP_CMD_FREE V_FW_CLIP_CMD_FREE(1U) + enum fw_error_type { FW_ERROR_TYPE_EXCEPTION = 0x0, FW_ERROR_TYPE_HWMODULE = 0x1, @@ -5570,6 +5911,94 @@ struct fw_debug_cmd { #define G_FW_DEBUG_CMD_TYPE(x) \ (((x) >> S_FW_DEBUG_CMD_TYPE) & M_FW_DEBUG_CMD_TYPE) + +/****************************************************************************** + * P C I E F W R E G I S T E R + **************************************/ + +/** + * Register definitions for the PCIE_FW register which the firmware uses + * to retain status across RESETs. This register should be considered + * as a READ-ONLY register for Host Software and only to be used to + * track firmware initialization/error state, etc. + */ +#define S_PCIE_FW_ERR 31 +#define M_PCIE_FW_ERR 0x1 +#define V_PCIE_FW_ERR(x) ((x) << S_PCIE_FW_ERR) +#define G_PCIE_FW_ERR(x) (((x) >> S_PCIE_FW_ERR) & M_PCIE_FW_ERR) +#define F_PCIE_FW_ERR V_PCIE_FW_ERR(1U) + +#define S_PCIE_FW_INIT 30 +#define M_PCIE_FW_INIT 0x1 +#define V_PCIE_FW_INIT(x) ((x) << S_PCIE_FW_INIT) +#define G_PCIE_FW_INIT(x) (((x) >> S_PCIE_FW_INIT) & M_PCIE_FW_INIT) +#define F_PCIE_FW_INIT V_PCIE_FW_INIT(1U) + +#define S_PCIE_FW_HALT 29 +#define M_PCIE_FW_HALT 0x1 +#define V_PCIE_FW_HALT(x) ((x) << S_PCIE_FW_HALT) +#define G_PCIE_FW_HALT(x) (((x) >> S_PCIE_FW_HALT) & M_PCIE_FW_HALT) +#define F_PCIE_FW_HALT V_PCIE_FW_HALT(1U) + +#define S_PCIE_FW_STAGE 21 +#define M_PCIE_FW_STAGE 0x7 +#define V_PCIE_FW_STAGE(x) ((x) << S_PCIE_FW_STAGE) +#define G_PCIE_FW_STAGE(x) (((x) >> S_PCIE_FW_STAGE) & M_PCIE_FW_STAGE) + +#define S_PCIE_FW_ASYNCNOT_VLD 20 +#define M_PCIE_FW_ASYNCNOT_VLD 0x1 +#define V_PCIE_FW_ASYNCNOT_VLD(x) \ + ((x) << S_PCIE_FW_ASYNCNOT_VLD) +#define G_PCIE_FW_ASYNCNOT_VLD(x) \ + (((x) >> S_PCIE_FW_ASYNCNOT_VLD) & M_PCIE_FW_ASYNCNOT_VLD) +#define F_PCIE_FW_ASYNCNOT_VLD V_PCIE_FW_ASYNCNOT_VLD(1U) + +#define S_PCIE_FW_ASYNCNOTINT 19 +#define M_PCIE_FW_ASYNCNOTINT 0x1 +#define V_PCIE_FW_ASYNCNOTINT(x) \ + ((x) << S_PCIE_FW_ASYNCNOTINT) +#define G_PCIE_FW_ASYNCNOTINT(x) \ + (((x) >> S_PCIE_FW_ASYNCNOTINT) & M_PCIE_FW_ASYNCNOTINT) +#define F_PCIE_FW_ASYNCNOTINT V_PCIE_FW_ASYNCNOTINT(1U) + +#define S_PCIE_FW_ASYNCNOT 16 +#define M_PCIE_FW_ASYNCNOT 0x7 +#define V_PCIE_FW_ASYNCNOT(x) ((x) << S_PCIE_FW_ASYNCNOT) +#define G_PCIE_FW_ASYNCNOT(x) \ + (((x) >> S_PCIE_FW_ASYNCNOT) & M_PCIE_FW_ASYNCNOT) + +#define S_PCIE_FW_MASTER_VLD 15 +#define M_PCIE_FW_MASTER_VLD 0x1 +#define V_PCIE_FW_MASTER_VLD(x) ((x) << S_PCIE_FW_MASTER_VLD) +#define G_PCIE_FW_MASTER_VLD(x) \ + (((x) >> S_PCIE_FW_MASTER_VLD) & M_PCIE_FW_MASTER_VLD) +#define F_PCIE_FW_MASTER_VLD V_PCIE_FW_MASTER_VLD(1U) + +#define S_PCIE_FW_MASTER 12 +#define M_PCIE_FW_MASTER 0x7 +#define V_PCIE_FW_MASTER(x) ((x) << S_PCIE_FW_MASTER) +#define G_PCIE_FW_MASTER(x) (((x) >> S_PCIE_FW_MASTER) & M_PCIE_FW_MASTER) + +#define S_PCIE_FW_RESET_VLD 11 +#define M_PCIE_FW_RESET_VLD 0x1 +#define V_PCIE_FW_RESET_VLD(x) ((x) << S_PCIE_FW_RESET_VLD) +#define G_PCIE_FW_RESET_VLD(x) \ + (((x) >> S_PCIE_FW_RESET_VLD) & M_PCIE_FW_RESET_VLD) +#define F_PCIE_FW_RESET_VLD V_PCIE_FW_RESET_VLD(1U) + +#define S_PCIE_FW_RESET 8 +#define M_PCIE_FW_RESET 0x7 +#define V_PCIE_FW_RESET(x) ((x) << S_PCIE_FW_RESET) +#define G_PCIE_FW_RESET(x) \ + (((x) >> S_PCIE_FW_RESET) & M_PCIE_FW_RESET) + +#define S_PCIE_FW_REGISTERED 0 +#define M_PCIE_FW_REGISTERED 0xff +#define V_PCIE_FW_REGISTERED(x) ((x) << S_PCIE_FW_REGISTERED) +#define G_PCIE_FW_REGISTERED(x) \ + (((x) >> S_PCIE_FW_REGISTERED) & M_PCIE_FW_REGISTERED) + + /****************************************************************************** * B I N A R Y H E A D E R F O R M A T **********************************************/ @@ -5579,7 +6008,7 @@ struct fw_debug_cmd { */ struct fw_hdr { __u8 ver; - __u8 reserved1; + __u8 chip; /* terminator chip family */ __be16 len512; /* bin length in units of 512-bytes */ __be32 fw_ver; /* firmware version */ __be32 tp_microcode_ver; /* tcp processor microcode version */ @@ -5591,7 +6020,16 @@ struct fw_hdr { __u8 intfver_iscsi; __u8 intfver_fcoe; __u8 reserved2; - __be32 reserved3[27]; + __u32 reserved3; + __u32 reserved4; + __u32 reserved5; + __be32 flags; + __be32 reserved6[23]; +}; + +enum fw_hdr_chip { + FW_HDR_CHIP_T4, + FW_HDR_CHIP_T5 }; #define S_FW_HDR_FW_VER_MAJOR 24 @@ -5622,4 +6060,18 @@ struct fw_hdr { #define G_FW_HDR_FW_VER_BUILD(x) \ (((x) >> S_FW_HDR_FW_VER_BUILD) & M_FW_HDR_FW_VER_BUILD) +enum fw_hdr_intfver { + FW_HDR_INTFVER_NIC = 0x00, + FW_HDR_INTFVER_VNIC = 0x00, + FW_HDR_INTFVER_OFLD = 0x00, + FW_HDR_INTFVER_RI = 0x00, + FW_HDR_INTFVER_ISCSIPDU = 0x00, + FW_HDR_INTFVER_ISCSI = 0x00, + FW_HDR_INTFVER_FCOE = 0x00, +}; + +enum fw_hdr_flags { + FW_HDR_FLAGS_RESET_HALT = 0x00000001, +}; + #endif /* _T4FW_INTERFACE_H_ */ diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h index f31b840b6b08..f6ada9d09345 100644 --- a/sys/dev/cxgbe/offload.h +++ b/sys/dev/cxgbe/offload.h @@ -31,15 +31,18 @@ #ifndef __T4_OFFLOAD_H__ #define __T4_OFFLOAD_H__ -/* CPL message priority levels */ -enum { - CPL_PRIORITY_DATA = 0, /* data messages */ - CPL_PRIORITY_SETUP = 1, /* connection setup messages */ - CPL_PRIORITY_TEARDOWN = 0, /* connection teardown messages */ - CPL_PRIORITY_LISTEN = 1, /* listen start/stop messages */ - CPL_PRIORITY_ACK = 1, /* RX ACK messages */ - CPL_PRIORITY_CONTROL = 1 /* control messages */ -}; +/* XXX: flagrant misuse of mbuf fields (during tx by TOM) */ +#define MBUF_EQ(m) (*((void **)(&(m)->m_pkthdr.rcvif))) +/* These have to work for !M_PKTHDR so we use a field from m_hdr. */ +#define MBUF_TX_CREDITS(m) ((m)->m_hdr.pad[0]) +#define MBUF_DMA_MAPPED(m) ((m)->m_hdr.pad[1]) + +#define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \ + (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \ + (w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \ + V_FW_WR_FLOWID(tid)); \ + (w)->wr.wr_lo = cpu_to_be64(0); \ +} while (0) #define INIT_TP_WR(w, tid) do { \ (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \ @@ -49,13 +52,19 @@ enum { (w)->wr.wr_lo = cpu_to_be64(0); \ } while (0) +#define INIT_TP_WR_MIT_CPL(w, cpl, tid) do { \ + INIT_TP_WR(w, tid); \ + OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \ +} while (0) + /* * Max # of ATIDs. The absolute HW max is 16K but we keep it lower. */ #define MAX_ATIDS 8192U -struct serv_entry { +union serv_entry { void *data; + union serv_entry *next; }; union aopen_entry { @@ -71,8 +80,7 @@ struct tid_info { void **tid_tab; unsigned int ntids; - struct serv_entry *stid_tab; - unsigned long *stid_bmap; + union serv_entry *stid_tab; unsigned int nstids; unsigned int stid_base; @@ -84,10 +92,15 @@ struct tid_info { unsigned int ftid_base; unsigned int ftids_in_use; + struct mtx atid_lock; union aopen_entry *afree; unsigned int atids_in_use; + struct mtx stid_lock; + union serv_entry *sfree; unsigned int stids_in_use; + + unsigned int tids_in_use; }; struct t4_range { @@ -101,6 +114,40 @@ struct t4_virt_res { /* virtualized HW resources */ struct t4_range stag; struct t4_range rq; struct t4_range pbl; + struct t4_range qp; + struct t4_range cq; + struct t4_range ocq; }; +#ifndef TCP_OFFLOAD_DISABLE +enum { + ULD_TOM = 1, +}; + +struct adapter; +struct port_info; +struct uld_info { + SLIST_ENTRY(uld_info) link; + int refcount; + int uld_id; + int (*attach)(struct adapter *, void **); + int (*detach)(void *); +}; + +struct uld_softc { + struct uld_info *uld; + void *softc; +}; + +struct tom_tunables { + int sndbuf; + int ddp; + int indsz; + int ddp_thres; +}; + +int t4_register_uld(struct uld_info *); +int t4_unregister_uld(struct uld_info *); +#endif + #endif diff --git a/sys/dev/cxgbe/osdep.h b/sys/dev/cxgbe/osdep.h index 85a8206ff8af..40ed61b3d911 100644 --- a/sys/dev/cxgbe/osdep.h +++ b/sys/dev/cxgbe/osdep.h @@ -124,6 +124,7 @@ typedef boolean_t bool; #define PCI_EXP_LNKSTA PCIR_EXPRESS_LINK_STA #define PCI_EXP_LNKSTA_CLS PCIM_LINK_STA_SPEED #define PCI_EXP_LNKSTA_NLW PCIM_LINK_STA_WIDTH +#define PCI_EXP_DEVCTL2 0x28 static inline int ilog2(long x) diff --git a/sys/dev/cxgbe/t4_ioctl.h b/sys/dev/cxgbe/t4_ioctl.h index ecc2c3d6a4c6..2a3fa3998ef1 100644 --- a/sys/dev/cxgbe/t4_ioctl.h +++ b/sys/dev/cxgbe/t4_ioctl.h @@ -47,6 +47,8 @@ enum { T4_SET_FILTER, /* program a filter */ T4_DEL_FILTER, /* delete a filter */ T4_GET_SGE_CONTEXT, /* get SGE context for a queue */ + T4_LOAD_FW, /* flash firmware */ + T4_GET_MEM, /* read memory */ }; struct t4_reg { @@ -62,6 +64,11 @@ struct t4_regdump { uint32_t *data; }; +struct t4_data { + uint32_t len; + uint8_t *data; +}; + /* * A hardware filter is some valid combination of these. */ @@ -73,8 +80,8 @@ struct t4_regdump { #define T4_FILTER_IP_DPORT 0x20 /* Destination IP port */ #define T4_FILTER_FCoE 0x40 /* Fibre Channel over Ethernet packet */ #define T4_FILTER_PORT 0x80 /* Physical ingress port */ -#define T4_FILTER_OVLAN 0x100 /* Outer VLAN ID */ -#define T4_FILTER_IVLAN 0x200 /* Inner VLAN ID */ +#define T4_FILTER_VNIC 0x100 /* VNIC id or outer VLAN */ +#define T4_FILTER_VLAN 0x200 /* VLAN ID */ #define T4_FILTER_IP_TOS 0x400 /* IPv4 TOS/IPv6 Traffic Class */ #define T4_FILTER_IP_PROTO 0x800 /* IP protocol */ #define T4_FILTER_ETH_TYPE 0x1000 /* Ethernet Type */ @@ -131,8 +138,8 @@ struct t4_filter_tuple { * is used to select the global mode and all filters are limited to the * set of fields allowed by the global mode. */ - uint16_t ovlan; /* outer VLAN */ - uint16_t ivlan; /* inner VLAN */ + uint16_t vnic; /* VNIC id or outer VLAN tag */ + uint16_t vlan; /* VLAN tag */ uint16_t ethtype; /* Ethernet type */ uint8_t tos; /* TOS/Traffic Type */ uint8_t proto; /* protocol type */ @@ -141,8 +148,8 @@ struct t4_filter_tuple { uint32_t matchtype:3; /* MPS match type */ uint32_t frag:1; /* fragmentation extension header */ uint32_t macidx:9; /* exact match MAC index */ - uint32_t ivlan_vld:1; /* inner VLAN valid */ - uint32_t ovlan_vld:1; /* outer VLAN valid */ + uint32_t vlan_vld:1; /* VLAN valid */ + uint32_t vnic_vld:1; /* VNIC id/outer VLAN tag valid */ }; struct t4_filter_specification { @@ -199,6 +206,12 @@ struct t4_sge_context { uint32_t data[T4_SGE_CONTEXT_SIZE / 4]; }; +struct t4_mem_range { + uint32_t addr; + uint32_t len; + uint32_t *data; +}; + #define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg) #define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg) #define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump) @@ -209,4 +222,6 @@ struct t4_sge_context { #define CHELSIO_T4_DEL_FILTER _IOW('f', T4_DEL_FILTER, struct t4_filter) #define CHELSIO_T4_GET_SGE_CONTEXT _IOWR('f', T4_GET_SGE_CONTEXT, \ struct t4_sge_context) +#define CHELSIO_T4_LOAD_FW _IOW('f', T4_LOAD_FW, struct t4_data) +#define CHELSIO_T4_GET_MEM _IOW('f', T4_GET_MEM, struct t4_mem_range) #endif diff --git a/sys/dev/cxgbe/t4_l2t.c b/sys/dev/cxgbe/t4_l2t.c index 31197b8654f7..be206c1fe892 100644 --- a/sys/dev/cxgbe/t4_l2t.c +++ b/sys/dev/cxgbe/t4_l2t.c @@ -37,7 +37,9 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/rwlock.h> #include <sys/socket.h> +#include <sys/sbuf.h> #include <net/if.h> +#include <net/if_types.h> #include <net/ethernet.h> #include <net/if_vlan_var.h> #include <net/if_dl.h> @@ -50,9 +52,26 @@ __FBSDID("$FreeBSD$"); #include "common/common.h" #include "common/jhash.h" #include "common/t4_msg.h" -#include "offload.h" #include "t4_l2t.h" +/* + * Module locking notes: There is a RW lock protecting the L2 table as a + * whole plus a spinlock per L2T entry. Entry lookups and allocations happen + * under the protection of the table lock, individual entry changes happen + * while holding that entry's spinlock. The table lock nests outside the + * entry locks. Allocations of new entries take the table lock as writers so + * no other lookups can happen while allocating new entries. Entry updates + * take the table lock as readers so multiple entries can be updated in + * parallel. An L2T entry can be dropped by decrementing its reference count + * and therefore can happen in parallel with entry allocation but no entry + * can change state or increment its ref count during allocation as both of + * these perform lookups. + * + * Note: We do not take refereces to ifnets in this module because both + * the TOE and the sockets already hold references to the interfaces and the + * lifetime of an L2T entry is fully contained in the lifetime of the TOE. + */ + /* identifies sync vs async L2T_WRITE_REQs */ #define S_SYNC_WR 12 #define V_SYNC_WR(x) ((x) << S_SYNC_WR) @@ -76,34 +95,251 @@ struct l2t_data { struct l2t_entry l2tab[L2T_SIZE]; }; +static int do_l2t_write_rpl(struct sge_iq *, const struct rss_header *, + struct mbuf *); + +#define VLAN_NONE 0xfff +#define SA(x) ((struct sockaddr *)(x)) +#define SIN(x) ((struct sockaddr_in *)(x)) +#define SINADDR(x) (SIN(x)->sin_addr.s_addr) + /* - * Module locking notes: There is a RW lock protecting the L2 table as a - * whole plus a spinlock per L2T entry. Entry lookups and allocations happen - * under the protection of the table lock, individual entry changes happen - * while holding that entry's spinlock. The table lock nests outside the - * entry locks. Allocations of new entries take the table lock as writers so - * no other lookups can happen while allocating new entries. Entry updates - * take the table lock as readers so multiple entries can be updated in - * parallel. An L2T entry can be dropped by decrementing its reference count - * and therefore can happen in parallel with entry allocation but no entry - * can change state or increment its ref count during allocation as both of - * these perform lookups. - * - * Note: We do not take refereces to ifnets in this module because both - * the TOE and the sockets already hold references to the interfaces and the - * lifetime of an L2T entry is fully contained in the lifetime of the TOE. + * Allocate a free L2T entry. Must be called with l2t_data.lock held. */ +static struct l2t_entry * +alloc_l2e(struct l2t_data *d) +{ + struct l2t_entry *end, *e, **p; + + rw_assert(&d->lock, RA_WLOCKED); + + if (!atomic_load_acq_int(&d->nfree)) + return (NULL); + + /* there's definitely a free entry */ + for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e) + if (atomic_load_acq_int(&e->refcnt) == 0) + goto found; + + for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ; +found: + d->rover = e + 1; + atomic_subtract_int(&d->nfree, 1); + + /* + * The entry we found may be an inactive entry that is + * presently in the hash table. We need to remove it. + */ + if (e->state < L2T_STATE_SWITCHING) { + for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) { + if (*p == e) { + *p = e->next; + e->next = NULL; + break; + } + } + } + + e->state = L2T_STATE_UNUSED; + return (e); +} + +/* + * Write an L2T entry. Must be called with the entry locked. + * The write may be synchronous or asynchronous. + */ +static int +write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) +{ + struct mbuf *m; + struct cpl_l2t_write_req *req; + + mtx_assert(&e->lock, MA_OWNED); + + if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) + return (ENOMEM); + + req = mtod(m, struct cpl_l2t_write_req *); + m->m_pkthdr.len = m->m_len = sizeof(*req); + + INIT_TP_WR(req, 0); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx | + V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id))); + req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync)); + req->l2t_idx = htons(e->idx); + req->vlan = htons(e->vlan); + memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); + + t4_mgmt_tx(sc, m); + + if (sync && e->state != L2T_STATE_SWITCHING) + e->state = L2T_STATE_SYNC_WRITE; + + return (0); +} + +/* + * Allocate an L2T entry for use by a switching rule. Such need to be + * explicitly freed and while busy they are not on any hash chain, so normal + * address resolution updates do not see them. + */ +struct l2t_entry * +t4_l2t_alloc_switching(struct l2t_data *d) +{ + struct l2t_entry *e; + + rw_rlock(&d->lock); + e = alloc_l2e(d); + if (e) { + mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ + e->state = L2T_STATE_SWITCHING; + atomic_store_rel_int(&e->refcnt, 1); + mtx_unlock(&e->lock); + } + rw_runlock(&d->lock); + return e; +} + +/* + * Sets/updates the contents of a switching L2T entry that has been allocated + * with an earlier call to @t4_l2t_alloc_switching. + */ +int +t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan, + uint8_t port, uint8_t *eth_addr) +{ + int rc; + + e->vlan = vlan; + e->lport = port; + memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN); + mtx_lock(&e->lock); + rc = write_l2e(sc, e, 0); + mtx_unlock(&e->lock); + return (rc); +} + +int +t4_init_l2t(struct adapter *sc, int flags) +{ + int i; + struct l2t_data *d; + + d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags); + if (!d) + return (ENOMEM); + + d->rover = d->l2tab; + atomic_store_rel_int(&d->nfree, L2T_SIZE); + rw_init(&d->lock, "L2T"); + + for (i = 0; i < L2T_SIZE; i++) { + d->l2tab[i].idx = i; + d->l2tab[i].state = L2T_STATE_UNUSED; + mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF); + atomic_store_rel_int(&d->l2tab[i].refcnt, 0); + } + + sc->l2t = d; + t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl); + + return (0); +} + +int +t4_free_l2t(struct l2t_data *d) +{ + int i; + + for (i = 0; i < L2T_SIZE; i++) + mtx_destroy(&d->l2tab[i].lock); + rw_destroy(&d->lock); + free(d, M_CXGBE); + + return (0); +} + static inline unsigned int vlan_prio(const struct l2t_entry *e) { return e->vlan >> 13; } +static char +l2e_state(const struct l2t_entry *e) +{ + switch (e->state) { + case L2T_STATE_VALID: return 'V'; /* valid, fast-path entry */ + case L2T_STATE_STALE: return 'S'; /* needs revalidation, but usable */ + case L2T_STATE_SYNC_WRITE: return 'W'; + case L2T_STATE_RESOLVING: return e->arpq_head ? 'A' : 'R'; + case L2T_STATE_SWITCHING: return 'X'; + default: return 'U'; + } +} + +int +sysctl_l2t(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct l2t_data *l2t = sc->l2t; + struct l2t_entry *e; + struct sbuf *sb; + int rc, i, header = 0; + char ip[60]; + + if (l2t == NULL) + return (ENXIO); + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); + if (sb == NULL) + return (ENOMEM); + + e = &l2t->l2tab[0]; + for (i = 0; i < L2T_SIZE; i++, e++) { + mtx_lock(&e->lock); + if (e->state == L2T_STATE_UNUSED) + goto skip; + + if (header == 0) { + sbuf_printf(sb, " Idx IP address " + "Ethernet address VLAN/P LP State Users Port"); + header = 1; + } + if (e->state == L2T_STATE_SWITCHING || e->v6) + ip[0] = 0; + else + snprintf(ip, sizeof(ip), "%s", + inet_ntoa(*(struct in_addr *)&e->addr[0])); + + /* XXX: accessing lle probably not safe? */ + sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d" + " %u %2u %c %5u %s", + e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2], + e->dmac[3], e->dmac[4], e->dmac[5], + e->vlan & 0xfff, vlan_prio(e), e->lport, + l2e_state(e), atomic_load_acq_int(&e->refcnt), + e->lle ? e->lle->lle_tbl->llt_ifp->if_xname : ""); +skip: + mtx_unlock(&e->lock); + } + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +#ifndef TCP_OFFLOAD_DISABLE static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e) { if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ - atomic_add_int(&d->nfree, -1); + atomic_subtract_int(&d->nfree, 1); } /* @@ -154,38 +390,6 @@ addreq(const struct l2t_entry *e, const uint32_t *addr) } /* - * Write an L2T entry. Must be called with the entry locked (XXX: really?). - * The write may be synchronous or asynchronous. - */ -static int -write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) -{ - struct mbuf *m; - struct cpl_l2t_write_req *req; - - if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) - return (ENOMEM); - - req = mtod(m, struct cpl_l2t_write_req *); - m->m_pkthdr.len = m->m_len = sizeof(*req); - - INIT_TP_WR(req, 0); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx | - V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id))); - req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync)); - req->l2t_idx = htons(e->idx); - req->vlan = htons(e->vlan); - memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); - - t4_mgmt_tx(sc, m); - - if (sync && e->state != L2T_STATE_SWITCHING) - e->state = L2T_STATE_SYNC_WRITE; - - return (0); -} - -/* * Add a packet to an L2T entry's queue of packets awaiting resolution. * Must be called with the entry's lock held. */ @@ -194,53 +398,133 @@ arpq_enqueue(struct l2t_entry *e, struct mbuf *m) { mtx_assert(&e->lock, MA_OWNED); - m->m_next = NULL; + KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt not NULL", __func__)); if (e->arpq_head) - e->arpq_tail->m_next = m; + e->arpq_tail->m_nextpkt = m; else e->arpq_head = m; e->arpq_tail = m; } -/* - * Allocate a free L2T entry. Must be called with l2t_data.lock held. - */ -static struct l2t_entry * -alloc_l2e(struct l2t_data *d) +static inline void +send_pending(struct adapter *sc, struct l2t_entry *e) { - struct l2t_entry *end, *e, **p; + struct mbuf *m, *next; - rw_assert(&d->lock, RA_WLOCKED); + mtx_assert(&e->lock, MA_OWNED); - if (!atomic_load_acq_int(&d->nfree)) - return (NULL); + for (m = e->arpq_head; m; m = next) { + next = m->m_nextpkt; + m->m_nextpkt = NULL; + t4_wrq_tx(sc, MBUF_EQ(m), m); + } + e->arpq_head = e->arpq_tail = NULL; +} - /* there's definitely a free entry */ - for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e) - if (atomic_load_acq_int(&e->refcnt) == 0) - goto found; +#ifdef INET +/* + * Looks up and fills up an l2t_entry's lle. We grab all the locks that we need + * ourself, and update e->state at the end if e->lle was successfully filled. + * + * The lle passed in comes from arpresolve and is ignored as it does not appear + * to be of much use. + */ +static int +l2t_fill_lle(struct adapter *sc, struct l2t_entry *e, struct llentry *unused) +{ + int rc = 0; + struct sockaddr_in sin; + struct ifnet *ifp = e->ifp; + struct llentry *lle; - for (e = d->l2tab; atomic_load_acq_int(&e->refcnt); ++e) ; -found: - d->rover = e + 1; - atomic_add_int(&d->nfree, -1); + bzero(&sin, sizeof(struct sockaddr_in)); + if (e->v6) + panic("%s: IPv6 L2 resolution not supported yet.", __func__); + + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in)); + + mtx_assert(&e->lock, MA_NOTOWNED); + KASSERT(e->addr && ifp, ("%s: bad prep before call", __func__)); + + IF_AFDATA_LOCK(ifp); + lle = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, SA(&sin)); + IF_AFDATA_UNLOCK(ifp); + if (!LLE_IS_VALID(lle)) + return (ENOMEM); + if (!(lle->la_flags & LLE_VALID)) { + rc = EINVAL; + goto done; + } + + LLE_ADDREF(lle); + + mtx_lock(&e->lock); + if (e->state == L2T_STATE_RESOLVING) { + KASSERT(e->lle == NULL, ("%s: lle already valid", __func__)); + e->lle = lle; + memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN); + write_l2e(sc, e, 1); + } else { + KASSERT(e->lle == lle, ("%s: lle changed", __func__)); + LLE_REMREF(lle); + } + mtx_unlock(&e->lock); +done: + LLE_WUNLOCK(lle); + return (rc); +} +#endif - /* - * The entry we found may be an inactive entry that is - * presently in the hash table. We need to remove it. - */ - if (e->state < L2T_STATE_SWITCHING) { - for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) { - if (*p == e) { - *p = e->next; - e->next = NULL; - break; - } +int +t4_l2t_send(struct adapter *sc, struct mbuf *m, struct l2t_entry *e) +{ +#ifndef INET + return (EINVAL); +#else + struct llentry *lle = NULL; + struct sockaddr_in sin; + struct ifnet *ifp = e->ifp; + + if (e->v6) + panic("%s: IPv6 L2 resolution not supported yet.", __func__); + + bzero(&sin, sizeof(struct sockaddr_in)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + memcpy(&sin.sin_addr, e->addr, sizeof(struct sockaddr_in)); + +again: + switch (e->state) { + case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ + if (arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0) + l2t_fill_lle(sc, e, lle); + + /* Fall through */ + + case L2T_STATE_VALID: /* fast-path, send the packet on */ + return t4_wrq_tx(sc, MBUF_EQ(m), m); + + case L2T_STATE_RESOLVING: + case L2T_STATE_SYNC_WRITE: + mtx_lock(&e->lock); + if (e->state != L2T_STATE_SYNC_WRITE && + e->state != L2T_STATE_RESOLVING) { + /* state changed by the time we got here */ + mtx_unlock(&e->lock); + goto again; } + arpq_enqueue(e, m); + mtx_unlock(&e->lock); + + if (e->state == L2T_STATE_RESOLVING && + arpresolve(ifp, NULL, NULL, SA(&sin), e->dmac, &lle) == 0) + l2t_fill_lle(sc, e, lle); } - e->state = L2T_STATE_UNUSED; - return e; + return (0); +#endif } /* @@ -287,75 +571,214 @@ t4_l2t_release(struct l2t_entry *e) t4_l2e_free(e); } +static int +do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) +{ + struct adapter *sc = iq->adapter; + const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); + unsigned int tid = GET_TID(rpl); + unsigned int idx = tid & (L2T_SIZE - 1); + + if (__predict_false(rpl->status != CPL_ERR_NONE)) { + log(LOG_ERR, + "Unexpected L2T_WRITE_RPL status %u for entry %u\n", + rpl->status, idx); + return (EINVAL); + } + + if (tid & F_SYNC_WR) { + struct l2t_entry *e = &sc->l2t->l2tab[idx]; + + mtx_lock(&e->lock); + if (e->state != L2T_STATE_SWITCHING) { + send_pending(sc, e); + e->state = L2T_STATE_VALID; + } + mtx_unlock(&e->lock); + } + + return (0); +} + /* - * Allocate an L2T entry for use by a switching rule. Such need to be - * explicitly freed and while busy they are not on any hash chain, so normal - * address resolution updates do not see them. + * Reuse an L2T entry that was previously used for the same next hop. + */ +static void +reuse_entry(struct l2t_entry *e) +{ + struct llentry *lle; + + mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ + lle = e->lle; + if (lle) { + KASSERT(lle->la_flags & LLE_VALID, + ("%s: invalid lle stored in l2t_entry", __func__)); + + if (lle->la_expire >= time_uptime) + e->state = L2T_STATE_STALE; + else + e->state = L2T_STATE_VALID; + } else + e->state = L2T_STATE_RESOLVING; + mtx_unlock(&e->lock); +} + +/* + * The TOE wants an L2 table entry that it can use to reach the next hop over + * the specified port. Produce such an entry - create one if needed. + * + * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on + * top of the real cxgbe interface. */ struct l2t_entry * -t4_l2t_alloc_switching(struct l2t_data *d) +t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) { struct l2t_entry *e; + struct l2t_data *d = pi->adapter->l2t; + int addr_len; + uint32_t *addr; + int hash; + struct sockaddr_in6 *sin6; + unsigned int smt_idx = pi->port_id; + + if (sa->sa_family == AF_INET) { + addr = (uint32_t *)&SINADDR(sa); + addr_len = sizeof(SINADDR(sa)); + } else if (sa->sa_family == AF_INET6) { + sin6 = (struct sockaddr_in6 *)sa; + addr = (uint32_t *)&sin6->sin6_addr.s6_addr; + addr_len = sizeof(sin6->sin6_addr.s6_addr); + } else + return (NULL); - rw_rlock(&d->lock); + hash = addr_hash(addr, addr_len, ifp->if_index); + + rw_wlock(&d->lock); + for (e = d->l2tab[hash].first; e; e = e->next) { + if (!addreq(e, addr) && e->ifp == ifp && e->smt_idx == smt_idx){ + l2t_hold(d, e); + if (atomic_load_acq_int(&e->refcnt) == 1) + reuse_entry(e); + goto done; + } + } + + /* Need to allocate a new entry */ e = alloc_l2e(d); if (e) { mtx_lock(&e->lock); /* avoid race with t4_l2t_free */ - e->state = L2T_STATE_SWITCHING; + e->state = L2T_STATE_RESOLVING; + memcpy(e->addr, addr, addr_len); + e->ifindex = ifp->if_index; + e->smt_idx = smt_idx; + e->ifp = ifp; + e->hash = hash; + e->lport = pi->lport; + e->v6 = (addr_len == 16); + e->lle = NULL; atomic_store_rel_int(&e->refcnt, 1); + if (ifp->if_type == IFT_L2VLAN) + VLAN_TAG(ifp, &e->vlan); + else + e->vlan = VLAN_NONE; + e->next = d->l2tab[hash].first; + d->l2tab[hash].first = e; mtx_unlock(&e->lock); } - rw_runlock(&d->lock); +done: + rw_wunlock(&d->lock); return e; } /* - * Sets/updates the contents of a switching L2T entry that has been allocated - * with an earlier call to @t4_l2t_alloc_switching. + * Called when the host's neighbor layer makes a change to some entry that is + * loaded into the HW L2 table. */ -int -t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan, - uint8_t port, uint8_t *eth_addr) +void +t4_l2t_update(struct adapter *sc, struct llentry *lle) { - e->vlan = vlan; - e->lport = port; - memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN); - return write_l2e(sc, e, 0); -} + struct l2t_entry *e; + struct l2t_data *d = sc->l2t; + struct sockaddr *sa = L3_ADDR(lle); + struct llentry *old_lle = NULL; + uint32_t *addr = (uint32_t *)&SINADDR(sa); + struct ifnet *ifp = lle->lle_tbl->llt_ifp; + int hash = addr_hash(addr, sizeof(*addr), ifp->if_index); + + KASSERT(d != NULL, ("%s: no L2 table", __func__)); + LLE_WLOCK_ASSERT(lle); + KASSERT(lle->la_flags & LLE_VALID || lle->la_flags & LLE_DELETED, + ("%s: entry neither valid nor deleted.", __func__)); -struct l2t_data * -t4_init_l2t(int flags) -{ - int i; - struct l2t_data *d; + rw_rlock(&d->lock); + for (e = d->l2tab[hash].first; e; e = e->next) { + if (!addreq(e, addr) && e->ifp == ifp) { + mtx_lock(&e->lock); + if (atomic_load_acq_int(&e->refcnt)) + goto found; + e->state = L2T_STATE_STALE; + mtx_unlock(&e->lock); + break; + } + } + rw_runlock(&d->lock); - d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags); - if (!d) - return (NULL); + /* The TOE has no interest in this LLE */ + return; - d->rover = d->l2tab; - atomic_store_rel_int(&d->nfree, L2T_SIZE); - rw_init(&d->lock, "L2T"); + found: + rw_runlock(&d->lock); - for (i = 0; i < L2T_SIZE; i++) { - d->l2tab[i].idx = i; - d->l2tab[i].state = L2T_STATE_UNUSED; - mtx_init(&d->l2tab[i].lock, "L2T_E", NULL, MTX_DEF); - atomic_store_rel_int(&d->l2tab[i].refcnt, 0); - } + if (atomic_load_acq_int(&e->refcnt)) { - return (d); -} + /* Entry is referenced by at least 1 offloaded connection. */ -int -t4_free_l2t(struct l2t_data *d) -{ - int i; + /* Handle deletes first */ + if (lle->la_flags & LLE_DELETED) { + if (lle == e->lle) { + e->lle = NULL; + e->state = L2T_STATE_RESOLVING; + LLE_REMREF(lle); + } + goto done; + } - for (i = 0; i < L2T_SIZE; i++) - mtx_destroy(&d->l2tab[i].lock); - rw_destroy(&d->lock); - free(d, M_CXGBE); + if (lle != e->lle) { + old_lle = e->lle; + LLE_ADDREF(lle); + e->lle = lle; + } - return (0); + if (e->state == L2T_STATE_RESOLVING || + memcmp(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN)) { + + /* unresolved -> resolved; or dmac changed */ + + memcpy(e->dmac, &lle->ll_addr, ETHER_ADDR_LEN); + write_l2e(sc, e, 1); + } else { + + /* +ve reinforcement of a valid or stale entry */ + + } + + e->state = L2T_STATE_VALID; + + } else { + /* + * Entry was used previously but is unreferenced right now. + * e->lle has been released and NULL'd out by t4_l2t_free, or + * l2t_release is about to call t4_l2t_free and do that. + * + * Either way this is of no interest to us. + */ + } + +done: + mtx_unlock(&e->lock); + if (old_lle) + LLE_FREE(old_lle); } + +#endif diff --git a/sys/dev/cxgbe/t4_l2t.h b/sys/dev/cxgbe/t4_l2t.h index c5520c612146..8004c9ec3b39 100644 --- a/sys/dev/cxgbe/t4_l2t.h +++ b/sys/dev/cxgbe/t4_l2t.h @@ -54,18 +54,26 @@ struct l2t_entry { struct mbuf *arpq_head; /* list of mbufs awaiting resolution */ struct mbuf *arpq_tail; struct mtx lock; - volatile uint32_t refcnt; /* entry reference count */ + volatile int refcnt; /* entry reference count */ uint16_t hash; /* hash bucket the entry is on */ uint8_t v6; /* whether entry is for IPv6 */ uint8_t lport; /* associated offload logical port */ uint8_t dmac[ETHER_ADDR_LEN]; /* next hop's MAC address */ }; -struct l2t_data *t4_init_l2t(int); +int t4_init_l2t(struct adapter *, int); int t4_free_l2t(struct l2t_data *); struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *); int t4_l2t_set_switching(struct adapter *, struct l2t_entry *, uint16_t, uint8_t, uint8_t *); void t4_l2t_release(struct l2t_entry *); +int sysctl_l2t(SYSCTL_HANDLER_ARGS); + +#ifndef TCP_OFFLOAD_DISABLE +struct l2t_entry *t4_l2t_get(struct port_info *, struct ifnet *, + struct sockaddr *); +int t4_l2t_send(struct adapter *, struct mbuf *, struct l2t_entry *); +void t4_l2t_update(struct adapter *, struct llentry *); +#endif #endif /* __T4_L2T_H */ diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index adca421ea0a3..37a4a7c4e2bd 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -55,12 +55,10 @@ __FBSDID("$FreeBSD$"); #include <net/if_dl.h> #include <net/if_vlan_var.h> -#include "common/t4_hw.h" #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" -#include "common/t4fw_interface.h" #include "t4_ioctl.h" #include "t4_l2t.h" @@ -122,115 +120,130 @@ static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4 Ethernet driver and services"); -/* - * Tunables. - */ -static SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, - "cxgbe driver parameters"); - -static int force_firmware_install = 0; -TUNABLE_INT("hw.cxgbe.force_firmware_install", &force_firmware_install); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, force_firmware_install, CTLFLAG_RDTUN, - &force_firmware_install, 0, "install firmware on every attach."); +static struct mtx t4_list_lock; +static SLIST_HEAD(, adapter) t4_list; +#ifndef TCP_OFFLOAD_DISABLE +static struct mtx t4_uld_list_lock; +static SLIST_HEAD(, uld_info) t4_uld_list; +#endif /* - * Holdoff timer and packet counter values. + * Tunables. See tweak_tunables() too. */ -static unsigned int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; -static unsigned int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ /* - * Max # of tx and rx queues to use for each 10G and 1G port. + * Number of queues for tx and rx, 10G and 1G, NIC and offload. */ -static unsigned int max_ntxq_10g = 8; -TUNABLE_INT("hw.cxgbe.max_ntxq_10G_port", &max_ntxq_10g); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_ntxq_10G_port, CTLFLAG_RDTUN, - &max_ntxq_10g, 0, "maximum number of tx queues per 10G port."); - -static unsigned int max_nrxq_10g = 8; -TUNABLE_INT("hw.cxgbe.max_nrxq_10G_port", &max_nrxq_10g); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_nrxq_10G_port, CTLFLAG_RDTUN, - &max_nrxq_10g, 0, "maximum number of rxq's (per 10G port)."); - -static unsigned int max_ntxq_1g = 2; -TUNABLE_INT("hw.cxgbe.max_ntxq_1G_port", &max_ntxq_1g); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_ntxq_1G_port, CTLFLAG_RDTUN, - &max_ntxq_1g, 0, "maximum number of tx queues per 1G port."); - -static unsigned int max_nrxq_1g = 2; -TUNABLE_INT("hw.cxgbe.max_nrxq_1G_port", &max_nrxq_1g); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, max_nrxq_1G_port, CTLFLAG_RDTUN, - &max_nrxq_1g, 0, "maximum number of rxq's (per 1G port)."); +#define NTXQ_10G 16 +static int t4_ntxq10g = -1; +TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g); + +#define NRXQ_10G 8 +static int t4_nrxq10g = -1; +TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g); + +#define NTXQ_1G 4 +static int t4_ntxq1g = -1; +TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g); + +#define NRXQ_1G 2 +static int t4_nrxq1g = -1; +TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g); + +#ifndef TCP_OFFLOAD_DISABLE +#define NOFLDTXQ_10G 8 +static int t4_nofldtxq10g = -1; +TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g); + +#define NOFLDRXQ_10G 2 +static int t4_nofldrxq10g = -1; +TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g); + +#define NOFLDTXQ_1G 2 +static int t4_nofldtxq1g = -1; +TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g); + +#define NOFLDRXQ_1G 1 +static int t4_nofldrxq1g = -1; +TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g); +#endif /* * Holdoff parameters for 10G and 1G ports. */ -static unsigned int tmr_idx_10g = 1; -TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &tmr_idx_10g); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_10G, CTLFLAG_RDTUN, - &tmr_idx_10g, 0, - "default timer index for interrupt holdoff (10G ports)."); - -static int pktc_idx_10g = 2; -TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &pktc_idx_10g); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_10G, CTLFLAG_RDTUN, - &pktc_idx_10g, 0, - "default pkt counter index for interrupt holdoff (10G ports)."); - -static unsigned int tmr_idx_1g = 1; -TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &tmr_idx_1g); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_1G, CTLFLAG_RDTUN, - &tmr_idx_1g, 0, - "default timer index for interrupt holdoff (1G ports)."); - -static int pktc_idx_1g = 2; -TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &pktc_idx_1g); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_1G, CTLFLAG_RDTUN, - &pktc_idx_1g, 0, - "default pkt counter index for interrupt holdoff (1G ports)."); +#define TMR_IDX_10G 1 +static int t4_tmr_idx_10g = TMR_IDX_10G; +TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g); + +#define PKTC_IDX_10G 2 +static int t4_pktc_idx_10g = PKTC_IDX_10G; +TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g); + +#define TMR_IDX_1G 1 +static int t4_tmr_idx_1g = TMR_IDX_1G; +TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g); + +#define PKTC_IDX_1G 2 +static int t4_pktc_idx_1g = PKTC_IDX_1G; +TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g); /* * Size (# of entries) of each tx and rx queue. */ -static unsigned int qsize_txq = TX_EQ_QSIZE; -TUNABLE_INT("hw.cxgbe.qsize_txq", &qsize_txq); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN, - &qsize_txq, 0, "default queue size of NIC tx queues."); +static unsigned int t4_qsize_txq = TX_EQ_QSIZE; +TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); + +static unsigned int t4_qsize_rxq = RX_IQ_QSIZE; +TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); -static unsigned int qsize_rxq = RX_IQ_QSIZE; -TUNABLE_INT("hw.cxgbe.qsize_rxq", &qsize_rxq); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN, - &qsize_rxq, 0, "default queue size of NIC rx queues."); +/* + * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). + */ +static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; +TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* - * Interrupt types allowed. + * Configuration file. */ -static int intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; -TUNABLE_INT("hw.cxgbe.interrupt_types", &intr_types); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &intr_types, 0, - "interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively)"); +static char t4_cfg_file[32] = "default"; +TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* - * Force the driver to use the same set of interrupts for all ports. + * ASIC features that will be used. Disable the ones you don't want so that the + * chip resources aren't wasted on features that will not be used. */ -static int intr_shared = 0; -TUNABLE_INT("hw.cxgbe.interrupts_shared", &intr_shared); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, interrupts_shared, CTLFLAG_RDTUN, - &intr_shared, 0, "interrupts shared between all ports"); +static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ +TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); + +static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; +TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); + +static int t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; +TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); + +static int t4_rdmacaps_allowed = 0; +TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); -static unsigned int filter_mode = HW_TPL_FR_MT_PR_IV_P_FC; -TUNABLE_INT("hw.cxgbe.filter_mode", &filter_mode); -SYSCTL_UINT(_hw_cxgbe, OID_AUTO, filter_mode, CTLFLAG_RDTUN, - &filter_mode, 0, "default global filter mode."); +static int t4_iscsicaps_allowed = 0; +TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); + +static int t4_fcoecaps_allowed = 0; +TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); struct intrs_and_queues { int intr_type; /* INTx, MSI, or MSI-X */ int nirq; /* Number of vectors */ - int intr_shared; /* Interrupts shared between all ports */ + int intr_flags; int ntxq10g; /* # of NIC txq's for each 10G port */ int nrxq10g; /* # of NIC rxq's for each 10G port */ int ntxq1g; /* # of NIC txq's for each 1G port */ int nrxq1g; /* # of NIC rxq's for each 1G port */ +#ifndef TCP_OFFLOAD_DISABLE + int nofldtxq10g; /* # of TOE txq's for each 10G port */ + int nofldrxq10g; /* # of TOE rxq's for each 10G port */ + int nofldtxq1g; /* # of TOE txq's for each 1G port */ + int nofldrxq1g; /* # of TOE rxq's for each 1G port */ +#endif }; struct filter_entry { @@ -244,15 +257,6 @@ struct filter_entry { }; enum { - MEMWIN0_APERTURE = 2048, - MEMWIN0_BASE = 0x1b800, - MEMWIN1_APERTURE = 32768, - MEMWIN1_BASE = 0x28000, - MEMWIN2_APERTURE = 65536, - MEMWIN2_BASE = 0x30000, -}; - -enum { XGMAC_MTU = (1 << 0), XGMAC_PROMISC = (1 << 1), XGMAC_ALLMULTI = (1 << 2), @@ -268,9 +272,11 @@ static void setup_memwin(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, int, int, struct intrs_and_queues *); static int prep_firmware(struct adapter *); -static int get_devlog_params(struct adapter *, struct devlog_params *); -static int get_capabilities(struct adapter *, struct fw_caps_config_cmd *); -static int get_params(struct adapter *, struct fw_caps_config_cmd *); +static int upload_config_file(struct adapter *, const struct firmware *, + uint32_t *, uint32_t *); +static int partition_resources(struct adapter *, const struct firmware *); +static int get_params__pre_init(struct adapter *); +static int get_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *); static int update_mac_settings(struct port_info *, int); @@ -278,24 +284,46 @@ static int cxgbe_init_locked(struct port_info *); static int cxgbe_init_synchronized(struct port_info *); static int cxgbe_uninit_locked(struct port_info *); static int cxgbe_uninit_synchronized(struct port_info *); -static int first_port_up(struct adapter *); -static int last_port_down(struct adapter *); +static int adapter_full_init(struct adapter *); +static int adapter_full_uninit(struct adapter *); +static int port_full_init(struct port_info *); +static int port_full_uninit(struct port_info *); +static void quiesce_eq(struct adapter *, struct sge_eq *); +static void quiesce_iq(struct adapter *, struct sge_iq *); +static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, - iq_intr_handler_t *, void *, char *); + driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void reg_block_dump(struct adapter *, uint8_t *, unsigned int, unsigned int); static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void cxgbe_tick(void *); +static int cpl_not_handled(struct sge_iq *, const struct rss_header *, + struct mbuf *); static int t4_sysctls(struct adapter *); static int cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); +static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); +static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); +static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); +static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); +static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); +static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); +static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); +static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); +static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); +static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); +static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); +static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); +static int sysctl_tids(SYSCTL_HANDLER_ARGS); +static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); +static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static inline void txq_start(struct ifnet *, struct sge_txq *); static uint32_t fconf_to_mode(uint32_t); static uint32_t mode_to_fconf(uint32_t); @@ -309,8 +337,15 @@ static int del_filter(struct adapter *, struct t4_filter *); static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); -void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *); +static int filter_rpl(struct sge_iq *, const struct rss_header *, + struct mbuf *); static int get_sge_context(struct adapter *, struct t4_sge_context *); +static int read_card_mem(struct adapter *, struct t4_mem_range *); +#ifndef TCP_OFFLOAD_DISABLE +static int toe_capability(struct port_info *, int); +static int activate_uld(struct adapter *, int, struct uld_softc *); +static int deactivate_uld(struct uld_softc *); +#endif static int t4_mod_event(module_t, int, void *); struct t4_pciids { @@ -332,6 +367,11 @@ struct t4_pciids { {0x440a, 4, "Chelsio T404-BT"}, }; +#ifndef TCP_OFFLOAD_DISABLE +/* This is used in service_iq() to get to the fl associated with an iq. */ +CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); +#endif + static int t4_probe(device_t dev) { @@ -358,10 +398,11 @@ t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, n10g, n1g, rqidx, tqidx; - struct fw_caps_config_cmd caps; - uint32_t p, v; struct intrs_and_queues iaq; struct sge *s; +#ifndef TCP_OFFLOAD_DISABLE + int ofld_rqidx, ofld_tqidx; +#endif sc = device_get_softc(dev); sc->dev = dev; @@ -370,6 +411,8 @@ t4_attach(device_t dev) pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { + uint32_t v; + pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIR_EXPRESS_DEVICE_CTL, 2); v |= PCIM_EXP_CTL_RELAXED_ORD_ENABLE; @@ -379,12 +422,22 @@ t4_attach(device_t dev) snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); + mtx_lock(&t4_list_lock); + SLIST_INSERT_HEAD(&t4_list, sc, link); + mtx_unlock(&t4_list_lock); + + mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); + TAILQ_INIT(&sc->sfl); + callout_init(&sc->sfl_callout, CALLOUT_MPSAFE); rc = map_bars(sc); if (rc != 0) goto done; /* error message displayed already */ memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); + for (i = 0; i < ARRAY_SIZE(sc->cpl_handler); i++) + sc->cpl_handler[i] = cpl_not_handled; + t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, filter_rpl); /* Prepare the adapter for operation */ rc = -t4_prep_adapter(sc); @@ -393,107 +446,75 @@ t4_attach(device_t dev) goto done; } - /* Do this really early */ + /* + * Do this really early, with the memory windows set up even before the + * character device. The userland tool's register i/o and mem read + * will work even in "recovery mode". + */ + setup_memwin(sc); sc->cdev = make_dev(&t4_cdevsw, device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s", device_get_nameunit(dev)); sc->cdev->si_drv1 = sc; + /* Go no further if recovery mode has been requested. */ + if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { + device_printf(dev, "recovery mode.\n"); + goto done; + } + /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ - /* Read firmware devlog parameters */ - (void) get_devlog_params(sc, &sc->params.devlog); - - /* Get device capabilities and select which ones we'll use */ - rc = get_capabilities(sc, &caps); - if (rc != 0) { - device_printf(dev, - "failed to initialize adapter capabilities: %d.\n", rc); - goto done; - } + rc = get_params__pre_init(sc); + if (rc != 0) + goto done; /* error message displayed already */ - /* Choose the global RSS mode. */ - rc = -t4_config_glbl_rss(sc, sc->mbox, - FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL, - F_FW_RSS_GLB_CONFIG_CMD_TNLMAPEN | - F_FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ | - F_FW_RSS_GLB_CONFIG_CMD_TNLALLLKP); - if (rc != 0) { - device_printf(dev, - "failed to select global RSS mode: %d.\n", rc); - goto done; - } + rc = t4_sge_init(sc); + if (rc != 0) + goto done; /* error message displayed already */ - /* These are total (sum of all ports) limits for a bus driver */ - rc = -t4_cfg_pfvf(sc, sc->mbox, sc->pf, 0, - 128, /* max # of egress queues */ - 64, /* max # of egress Ethernet or control queues */ - 64, /* max # of ingress queues with fl/interrupt */ - 0, /* max # of ingress queues without interrupt */ - 0, /* PCIe traffic class */ - 4, /* max # of virtual interfaces */ - M_FW_PFVF_CMD_CMASK, M_FW_PFVF_CMD_PMASK, 16, - FW_CMD_CAP_PF, FW_CMD_CAP_PF); - if (rc != 0) { - device_printf(dev, - "failed to configure pf/vf resources: %d.\n", rc); - goto done; + if (sc->flags & MASTER_PF) { + /* get basic stuff going */ + rc = -t4_fw_initialize(sc, sc->mbox); + if (rc != 0) { + device_printf(dev, "early init failed: %d.\n", rc); + goto done; + } } - /* Need this before sge_init */ - for (i = 0; i < SGE_NTIMERS; i++) - sc->sge.timer_val[i] = min(intr_timer[i], 200U); - for (i = 0; i < SGE_NCOUNTERS; i++) - sc->sge.counter_val[i] = min(intr_pktcount[i], M_THRESHOLD_0); - - /* Also need the cooked value of cclk before sge_init */ - p = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | - V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CCLK)); - rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &p, &v); - if (rc != 0) { - device_printf(sc->dev, - "failed to obtain core clock value: %d.\n", rc); - goto done; - } - sc->params.vpd.cclk = v; + rc = get_params__post_init(sc); + if (rc != 0) + goto done; /* error message displayed already */ - t4_sge_init(sc); + if (sc->flags & MASTER_PF) { - t4_set_filter_mode(sc, filter_mode); - t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG, - V_FIVETUPLELOOKUP(M_FIVETUPLELOOKUP), - V_FIVETUPLELOOKUP(M_FIVETUPLELOOKUP)); - t4_tp_wr_bits_indirect(sc, A_TP_INGRESS_CONFIG, F_CSUM_HAS_PSEUDO_HDR, - F_LOOKUPEVERYPKT); + /* final tweaks to some settings */ - /* get basic stuff going */ - rc = -t4_early_init(sc, sc->mbox); - if (rc != 0) { - device_printf(dev, "early init failed: %d.\n", rc); - goto done; + t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, + sc->params.b_wnd); + t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); + t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 | + F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3, 0); + t4_set_reg_field(sc, A_TP_PARA_REG5, + V_INDICATESIZE(M_INDICATESIZE) | + F_REARMDDPOFFSET | F_RESETDDPOFFSET, + V_INDICATESIZE(M_INDICATESIZE) | + F_REARMDDPOFFSET | F_RESETDDPOFFSET); + } else { + /* + * XXX: Verify that we can live with whatever the master driver + * has done so far, and hope that it doesn't change any global + * setting from underneath us in the future. + */ } - rc = get_params(sc, &caps); - if (rc != 0) - goto done; /* error message displayed already */ - - /* These are finalized by FW initialization, load their values now */ - v = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); - sc->params.tp.tre = G_TIMERRESOLUTION(v); - sc->params.tp.dack_re = G_DELAYEDACKRESOLUTION(v); - t4_read_mtu_tbl(sc, sc->params.mtus, NULL); - - /* tweak some settings */ - t4_write_reg(sc, A_TP_SHIFT_CNT, V_SYNSHIFTMAX(6) | V_RXTSHIFTMAXR1(4) | - V_RXTSHIFTMAXR2(15) | V_PERSHIFTBACKOFFMAX(8) | V_PERSHIFTMAX(8) | - V_KEEPALIVEMAXR1(4) | V_KEEPALIVEMAXR2(9)); - t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); - t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 | - F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3, 0); + t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &sc->filter_mode, 1, + A_TP_VLAN_PRI_MAP); - setup_memwin(sc); + for (i = 0; i < NCHAN; i++) + sc->params.tp.tx_modq[i] = i; rc = t4_create_dma_tag(sc); if (rc != 0) @@ -532,31 +553,18 @@ t4_attach(device_t dev) if (is_10G_port(pi)) { n10g++; - pi->tmr_idx = tmr_idx_10g; - pi->pktc_idx = pktc_idx_10g; + pi->tmr_idx = t4_tmr_idx_10g; + pi->pktc_idx = t4_pktc_idx_10g; } else { n1g++; - pi->tmr_idx = tmr_idx_1g; - pi->pktc_idx = pktc_idx_1g; + pi->tmr_idx = t4_tmr_idx_1g; + pi->pktc_idx = t4_pktc_idx_1g; } pi->xact_addr_filt = -1; - pi->qsize_rxq = max(qsize_rxq, 128); - while (pi->qsize_rxq & 7) - pi->qsize_rxq++; - pi->qsize_txq = max(qsize_txq, 128); - - if (pi->qsize_rxq != qsize_rxq) { - device_printf(dev, - "using %d instead of %d as the rx queue size.\n", - pi->qsize_rxq, qsize_rxq); - } - if (pi->qsize_txq != qsize_txq) { - device_printf(dev, - "using %d instead of %d as the tx queue size.\n", - pi->qsize_txq, qsize_txq); - } + pi->qsize_rxq = t4_qsize_rxq; + pi->qsize_txq = t4_qsize_txq; pi->dev = device_add_child(dev, "cxgbe", -1); if (pi->dev == NULL) { @@ -566,14 +574,6 @@ t4_attach(device_t dev) goto done; } device_set_softc(pi->dev, pi); - - setbit(&sc->registered_device_map, i); - } - - if (sc->registered_device_map == 0) { - device_printf(dev, "no usable ports\n"); - rc = ENXIO; - goto done; } /* @@ -585,20 +585,31 @@ t4_attach(device_t dev) sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; + sc->flags |= iaq.intr_flags; s = &sc->sge; s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ - s->neq += sc->params.nports; /* control queues, 1 per port */ + s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ - if (iaq.intr_shared) - sc->flags |= INTR_SHARED; - s->niq += NINTRQ(sc); /* interrupt queues */ - s->intrq = malloc(NINTRQ(sc) * sizeof(struct sge_iq), M_CXGBE, - M_ZERO | M_WAITOK); - s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_ctrlq), M_CXGBE, +#ifndef TCP_OFFLOAD_DISABLE + if (is_offload(sc)) { + + s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g; + s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g; + s->neq += s->nofldtxq + s->nofldrxq; + s->niq += s->nofldrxq; + + s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), + M_CXGBE, M_ZERO | M_WAITOK); + s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), + M_CXGBE, M_ZERO | M_WAITOK); + } +#endif + + s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); @@ -612,15 +623,16 @@ t4_attach(device_t dev) sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); - sc->l2t = t4_init_l2t(M_WAITOK); - - t4_sysctls(sc); + t4_init_l2t(sc, M_WAITOK); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; +#ifndef TCP_OFFLOAD_DISABLE + ofld_rqidx = ofld_tqidx = 0; +#endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; @@ -628,13 +640,33 @@ t4_attach(device_t dev) continue; pi->first_rxq = rqidx; - pi->nrxq = is_10G_port(pi) ? iaq.nrxq10g : iaq.nrxq1g; - pi->first_txq = tqidx; - pi->ntxq = is_10G_port(pi) ? iaq.ntxq10g : iaq.ntxq1g; + if (is_10G_port(pi)) { + pi->nrxq = iaq.nrxq10g; + pi->ntxq = iaq.ntxq10g; + } else { + pi->nrxq = iaq.nrxq1g; + pi->ntxq = iaq.ntxq1g; + } rqidx += pi->nrxq; tqidx += pi->ntxq; + +#ifndef TCP_OFFLOAD_DISABLE + if (is_offload(sc)) { + pi->first_ofld_rxq = ofld_rqidx; + pi->first_ofld_txq = ofld_tqidx; + if (is_10G_port(pi)) { + pi->nofldrxq = iaq.nofldrxq10g; + pi->nofldtxq = iaq.nofldtxq10g; + } else { + pi->nofldrxq = iaq.nofldrxq1g; + pi->nofldtxq = iaq.nofldtxq1g; + } + ofld_rqidx += pi->nofldrxq; + ofld_tqidx += pi->nofldtxq; + } +#endif } rc = bus_generic_attach(dev); @@ -644,17 +676,27 @@ t4_attach(device_t dev) goto done; } -#ifdef INVARIANTS device_printf(dev, - "%p, %d ports (0x%x), %d intr_type, %d intr_count\n", - sc, sc->params.nports, sc->params.portvec, - sc->intr_type, sc->intr_count); -#endif + "PCIe x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", + sc->params.pci.width, sc->params.nports, sc->intr_count, + sc->intr_type == INTR_MSIX ? "MSI-X" : + (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), + sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); + t4_set_desc(sc); done: + if (rc != 0 && sc->cdev) { + /* cdev was created and so cxgbetool works; recover that way. */ + device_printf(dev, + "error during attach, adapter is now in recovery mode.\n"); + rc = 0; + } + if (rc != 0) t4_detach(dev); + else + t4_sysctls(sc); return (rc); } @@ -667,14 +709,25 @@ t4_detach(device_t dev) { struct adapter *sc; struct port_info *pi; - int i; + int i, rc; sc = device_get_softc(dev); - if (sc->cdev) + if (sc->flags & FULL_INIT_DONE) + t4_intr_disable(sc); + + if (sc->cdev) { destroy_dev(sc->cdev); + sc->cdev = NULL; + } + + rc = bus_generic_detach(dev); + if (rc) { + device_printf(dev, + "failed to detach child devices: %d\n", rc); + return (rc); + } - bus_generic_detach(dev); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { @@ -687,6 +740,9 @@ t4_detach(device_t dev) } } + if (sc->flags & FULL_INIT_DONE) + adapter_full_uninit(sc); + if (sc->flags & FW_OK) t4_fw_bye(sc, sc->mbox); @@ -704,16 +760,27 @@ t4_detach(device_t dev) if (sc->l2t) t4_free_l2t(sc->l2t); +#ifndef TCP_OFFLOAD_DISABLE + free(sc->sge.ofld_rxq, M_CXGBE); + free(sc->sge.ofld_txq, M_CXGBE); +#endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); - free(sc->sge.intrq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); t4_destroy_dma_tag(sc); - mtx_destroy(&sc->sc_lock); + if (mtx_initialized(&sc->sc_lock)) { + mtx_lock(&t4_list_lock); + SLIST_REMOVE(&t4_list, sc, adapter, link); + mtx_unlock(&t4_list_lock); + mtx_destroy(&sc->sc_lock); + } + + if (mtx_initialized(&sc->sfl_lock)) + mtx_destroy(&sc->sfl_lock); bzero(sc, sizeof(*sc)); @@ -727,7 +794,7 @@ cxgbe_probe(device_t dev) char buf[128]; struct port_info *pi = device_get_softc(dev); - snprintf(buf, sizeof(buf), "Port %d", pi->port_id); + snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); @@ -754,15 +821,6 @@ cxgbe_attach(device_t dev) ifp->if_softc = pi; callout_init(&pi->tick, CALLOUT_MPSAFE); - pi->tq = taskqueue_create("cxgbe_taskq", M_NOWAIT, - taskqueue_thread_enqueue, &pi->tq); - if (pi->tq == NULL) { - device_printf(dev, "failed to allocate port task queue\n"); - if_free(pi->ifp); - return (ENOMEM); - } - taskqueue_start_threads(&pi->tq, 1, PI_NET, "%s taskq", - device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; @@ -778,6 +836,10 @@ cxgbe_attach(device_t dev) IFQ_SET_READY(&ifp->if_snd); ifp->if_capabilities = T4_CAP; +#ifndef TCP_OFFLOAD_DISABLE + if (is_offload(pi->adapter)) + ifp->if_capabilities |= IFCAP_TOE4; +#endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO; @@ -788,9 +850,14 @@ cxgbe_attach(device_t dev) ether_ifattach(ifp, pi->hw_addr); -#ifdef INVARIANTS - device_printf(dev, "%p, %d txq, %d rxq\n", pi, pi->ntxq, pi->nrxq); +#ifndef TCP_OFFLOAD_DISABLE + if (is_offload(pi->adapter)) { + device_printf(dev, + "%d txq, %d rxq (NIC); %d txq, %d rxq (TOE)\n", + pi->ntxq, pi->nrxq, pi->nofldtxq, pi->nofldrxq); + } else #endif + device_printf(dev, "%d txq, %d rxq\n", pi->ntxq, pi->nrxq); cxgbe_sysctls(pi); @@ -802,7 +869,7 @@ cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; - int rc; + struct ifnet *ifp = pi->ifp; /* Tell if_ioctl and if_init that the port is going away */ ADAPTER_LOCK(sc); @@ -813,11 +880,15 @@ cxgbe_detach(device_t dev) SET_BUSY(sc); ADAPTER_UNLOCK(sc); - rc = cxgbe_uninit_synchronized(pi); - if (rc != 0) - device_printf(dev, "port uninit failed: %d.\n", rc); + PORT_LOCK(pi); + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + callout_stop(&pi->tick); + PORT_UNLOCK(pi); + callout_drain(&pi->tick); - taskqueue_free(pi->tq); + /* Let detach proceed even if these fail. */ + cxgbe_uninit_synchronized(pi); + port_full_uninit(pi); ifmedia_removeall(&pi->media); ether_ifdetach(pi->ifp); @@ -956,6 +1027,7 @@ fail: if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; + goto fail; } } else ifp->if_hwassist &= ~CSUM_TSO; @@ -968,15 +1040,21 @@ fail: ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(pi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) - rxq->flags |= RXQ_LRO_ENABLED; + rxq->iq.flags |= IQ_LRO_ENABLED; else - rxq->flags &= ~RXQ_LRO_ENABLED; + rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifndef TCP_OFFLOAD_DISABLE - if (mask & IFCAP_TOE4) { - rc = EOPNOTSUPP; + if (mask & IFCAP_TOE) { + int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; + + rc = toe_capability(pi, enable); + if (rc != 0) + goto fail; + + ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { @@ -1041,9 +1119,9 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) M_ASSERTPKTHDR(m); - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); - return (0); + return (ENETDOWN); } if (m->m_flags & M_FLOWID) @@ -1051,13 +1129,20 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) br = txq->br; if (TXQ_TRYLOCK(txq) == 0) { + struct sge_eq *eq = &txq->eq; + /* - * XXX: make sure that this packet really is sent out. There is - * a small race where t4_eth_tx may stop draining the drbr and - * goes away, just before we enqueued this mbuf. + * It is possible that t4_eth_tx finishes up and releases the + * lock between the TRYLOCK above and the drbr_enqueue here. We + * need to make sure that this mbuf doesn't just sit there in + * the drbr. */ - return (drbr_enqueue(ifp, br, m)); + rc = drbr_enqueue(ifp, br, m); + if (rc == 0 && callout_pending(&eq->tx_callout) == 0 && + !(eq->flags & EQ_DOOMED)) + callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); + return (rc); } /* @@ -1098,11 +1183,12 @@ cxgbe_qflush(struct ifnet *ifp) int i; struct mbuf *m; - /* queues do not exist if !IFF_DRV_RUNNING. */ - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + /* queues do not exist if !PORT_INIT_DONE. */ + if (pi->flags & PORT_INIT_DONE) { for_each_txq(pi, i, txq) { TXQ_LOCK(txq); m_freem(txq->m); + txq->m = NULL; while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) m_freem(m); TXQ_UNLOCK(txq); @@ -1216,14 +1302,25 @@ static int cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, struct intrs_and_queues *iaq) { - int rc, itype, navail, nc, nrxq10g, nrxq1g; + int rc, itype, navail, nrxq10g, nrxq1g, n; + int nofldrxq10g = 0, nofldrxq1g = 0; bzero(iaq, sizeof(*iaq)); - nc = mp_ncpus; /* our snapshot of the number of CPUs */ + + iaq->ntxq10g = t4_ntxq10g; + iaq->ntxq1g = t4_ntxq1g; + iaq->nrxq10g = nrxq10g = t4_nrxq10g; + iaq->nrxq1g = nrxq1g = t4_nrxq1g; +#ifndef TCP_OFFLOAD_DISABLE + iaq->nofldtxq10g = t4_nofldtxq10g; + iaq->nofldtxq1g = t4_nofldtxq1g; + iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g; + iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g; +#endif for (itype = INTR_MSIX; itype; itype >>= 1) { - if ((itype & intr_types) == 0) + if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) @@ -1232,60 +1329,93 @@ cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, navail = pci_msi_count(sc->dev); else navail = 1; - +restart: if (navail == 0) continue; iaq->intr_type = itype; + iaq->intr_flags = 0; - iaq->ntxq10g = min(nc, max_ntxq_10g); - iaq->ntxq1g = min(nc, max_ntxq_1g); - - nrxq10g = min(nc, max_nrxq_10g); - nrxq1g = min(nc, max_nrxq_1g); - - iaq->nirq = n10g * nrxq10g + n1g * nrxq1g + T4_EXTRA_INTR; - if (iaq->nirq <= navail && intr_shared == 0) { - - if (itype == INTR_MSI && !powerof2(iaq->nirq)) - goto share; - - /* One for err, one for fwq, and one for each rxq */ - - iaq->intr_shared = 0; - iaq->nrxq10g = nrxq10g; - iaq->nrxq1g = nrxq1g; + /* + * Best option: an interrupt vector for errors, one for the + * firmware event queue, and one each for each rxq (NIC as well + * as offload). + */ + iaq->nirq = T4_EXTRA_INTR; + iaq->nirq += n10g * (nrxq10g + nofldrxq10g); + iaq->nirq += n1g * (nrxq1g + nofldrxq1g); + if (iaq->nirq <= navail && + (itype != INTR_MSI || powerof2(iaq->nirq))) { + iaq->intr_flags |= INTR_DIRECT; + goto allocate; + } - } else { -share: - iaq->intr_shared = 1; + /* + * Second best option: an interrupt vector for errors, one for + * the firmware event queue, and one each for either NIC or + * offload rxq's. + */ + iaq->nirq = T4_EXTRA_INTR; + iaq->nirq += n10g * max(nrxq10g, nofldrxq10g); + iaq->nirq += n1g * max(nrxq1g, nofldrxq1g); + if (iaq->nirq <= navail && + (itype != INTR_MSI || powerof2(iaq->nirq))) + goto allocate; - if (navail >= nc + T4_EXTRA_INTR) { - if (itype == INTR_MSIX) - navail = nc + T4_EXTRA_INTR; + /* + * Next best option: an interrupt vector for errors, one for the + * firmware event queue, and at least one per port. At this + * point we know we'll have to downsize nrxq or nofldrxq to fit + * what's available to us. + */ + iaq->nirq = T4_EXTRA_INTR; + iaq->nirq += n10g + n1g; + if (iaq->nirq <= navail) { + int leftover = navail - iaq->nirq; + + if (n10g > 0) { + int target = max(nrxq10g, nofldrxq10g); + + n = 1; + while (n < target && leftover >= n10g) { + leftover -= n10g; + iaq->nirq += n10g; + n++; + } + iaq->nrxq10g = min(n, nrxq10g); +#ifndef TCP_OFFLOAD_DISABLE + iaq->nofldrxq10g = min(n, nofldrxq10g); +#endif + } - /* navail is and must remain a pow2 for MSI */ - if (itype == INTR_MSI) { - KASSERT(powerof2(navail), - ("%d not power of 2", navail)); + if (n1g > 0) { + int target = max(nrxq1g, nofldrxq1g); - while (navail / 2 >= nc + T4_EXTRA_INTR) - navail /= 2; + n = 1; + while (n < target && leftover >= n1g) { + leftover -= n1g; + iaq->nirq += n1g; + n++; } + iaq->nrxq1g = min(n, nrxq1g); +#ifndef TCP_OFFLOAD_DISABLE + iaq->nofldrxq1g = min(n, nofldrxq1g); +#endif } - iaq->nirq = navail; /* total # of interrupts */ - /* - * If we have multiple vectors available reserve one - * exclusively for errors. The rest will be shared by - * the fwq and data. - */ - if (navail > 1) - navail--; - iaq->nrxq10g = min(nrxq10g, navail); - iaq->nrxq1g = min(nrxq1g, navail); + if (itype != INTR_MSI || powerof2(iaq->nirq)) + goto allocate; } + /* + * Least desirable option: one interrupt vector for everything. + */ + iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1; +#ifndef TCP_OFFLOAD_DISABLE + iaq->nofldrxq10g = iaq->nofldrxq1g = 1; +#endif + +allocate: navail = iaq->nirq; rc = 0; if (itype == INTR_MSIX) @@ -1301,8 +1431,11 @@ share: * Didn't get the number requested. Use whatever number * the kernel is willing to allocate (it's in navail). */ + device_printf(sc->dev, "fewer vectors than requested, " + "type=%d, req=%d, rcvd=%d; will downshift req.\n", + itype, iaq->nirq, navail); pci_release_msi(sc->dev); - goto share; + goto restart; } device_printf(sc->dev, @@ -1312,26 +1445,30 @@ share: device_printf(sc->dev, "failed to find a usable interrupt type. " - "allowed=%d, msi-x=%d, msi=%d, intx=1", intr_types, + "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } /* - * Install a compatible firmware (if required), establish contact with it, - * become the master, and reset the device. + * Install a compatible firmware (if required), establish contact with it (by + * saying hello), and reset the device. If we end up as the master driver, + * partition adapter resources by providing a configuration file to the + * firmware. */ static int prep_firmware(struct adapter *sc) { - const struct firmware *fw; + const struct firmware *fw = NULL, *cfg = NULL, *default_cfg; int rc; enum dev_state state; + default_cfg = firmware_get(T4_CFGNAME); + /* Check firmware version and install a different one if necessary */ rc = t4_check_fw_version(sc); - if (rc != 0 || force_firmware_install) { + if (rc != 0) { uint32_t v = 0; fw = firmware_get(T4_FWNAME); @@ -1343,7 +1480,7 @@ prep_firmware(struct adapter *sc) /* * The firmware module will not be used if it isn't the * same major version as what the driver was compiled - * with. This check trumps force_firmware_install. + * with. */ if (G_FW_HDR_FW_VER_MAJOR(v) != FW_VERSION_MAJOR) { device_printf(sc->dev, @@ -1356,17 +1493,16 @@ prep_firmware(struct adapter *sc) } } - if (fw == NULL && (rc < 0 || force_firmware_install)) { + if (fw == NULL && rc < 0) { device_printf(sc->dev, "No usable firmware. " - "card has %d.%d.%d, driver compiled with %d.%d.%d, " - "force_firmware_install%s set", + "card has %d.%d.%d, driver compiled with %d.%d.%d", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), FW_VERSION_MAJOR, FW_VERSION_MINOR, - FW_VERSION_MICRO, - force_firmware_install ? "" : " not"); - return (EAGAIN); + FW_VERSION_MICRO); + rc = EAGAIN; + goto done; } /* @@ -1374,8 +1510,7 @@ prep_firmware(struct adapter *sc) * Downgrade only for a major version mismatch or if * force_firmware_install was specified. */ - if (fw != NULL && (rc < 0 || force_firmware_install || - v > sc->params.fw_vers)) { + if (fw != NULL && (rc < 0 || v > sc->params.fw_vers)) { device_printf(sc->dev, "installing firmware %d.%d.%d.%d on card.\n", G_FW_HDR_FW_VER_MAJOR(v), G_FW_HDR_FW_VER_MINOR(v), @@ -1385,26 +1520,24 @@ prep_firmware(struct adapter *sc) if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); - firmware_put(fw, FIRMWARE_UNLOAD); - return (rc); + goto done; } else { /* refresh */ (void) t4_check_fw_version(sc); } } - - if (fw != NULL) - firmware_put(fw, FIRMWARE_UNLOAD); } - /* Contact firmware, request master */ - rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MUST, &state); + /* Contact firmware. */ + rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d.\n", rc); - return (rc); + goto done; } + if (rc == sc->mbox) + sc->flags |= MASTER_PF; /* Reset device */ rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST); @@ -1412,7 +1545,26 @@ prep_firmware(struct adapter *sc) device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); - return (rc); + goto done; + } + + /* Partition adapter resources as specified in the config file. */ + if (sc->flags & MASTER_PF) { + if (strncmp(t4_cfg_file, "default", sizeof(t4_cfg_file))) { + char s[32]; + + snprintf(s, sizeof(s), "t4fw_cfg_%s", t4_cfg_file); + cfg = firmware_get(s); + if (cfg == NULL) { + device_printf(sc->dev, + "unable to locate %s module, " + "will use default config file.\n", s); + } + } + + rc = partition_resources(sc, cfg ? cfg : default_cfg); + if (rc != 0) + goto done; /* error message displayed already */ } snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", @@ -1422,84 +1574,207 @@ prep_firmware(struct adapter *sc) G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); sc->flags |= FW_OK; - return (0); +done: + if (fw != NULL) + firmware_put(fw, FIRMWARE_UNLOAD); + if (cfg != NULL) + firmware_put(cfg, FIRMWARE_UNLOAD); + if (default_cfg != NULL) + firmware_put(default_cfg, FIRMWARE_UNLOAD); + + return (rc); } +#define FW_PARAM_DEV(param) \ + (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) +#define FW_PARAM_PFVF(param) \ + (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) + +/* + * Upload configuration file to card's memory. + */ static int -get_devlog_params(struct adapter *sc, struct devlog_params *dlog) +upload_config_file(struct adapter *sc, const struct firmware *fw, uint32_t *mt, + uint32_t *ma) { - struct fw_devlog_cmd devlog_cmd; - uint32_t meminfo; - int rc; + int rc, i; + uint32_t param, val, mtype, maddr, bar, off, win, remaining; + const uint32_t *b; - bzero(&devlog_cmd, sizeof(devlog_cmd)); - devlog_cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) | - F_FW_CMD_REQUEST | F_FW_CMD_READ); - devlog_cmd.retval_len16 = htobe32(FW_LEN16(devlog_cmd)); - rc = -t4_wr_mbox(sc, sc->mbox, &devlog_cmd, sizeof(devlog_cmd), - &devlog_cmd); + /* Figure out where the firmware wants us to upload it. */ + param = FW_PARAM_DEV(CF); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { + /* Firmwares without config file support will fail this way */ device_printf(sc->dev, - "failed to get devlog parameters: %d.\n", rc); - bzero(dlog, sizeof (*dlog)); + "failed to query config file location: %d.\n", rc); return (rc); } + *mt = mtype = G_FW_PARAMS_PARAM_Y(val); + *ma = maddr = G_FW_PARAMS_PARAM_Z(val) << 16; + + if (maddr & 3) { + device_printf(sc->dev, + "cannot upload config file (type %u, addr %x).\n", + mtype, maddr); + return (EFAULT); + } - meminfo = be32toh(devlog_cmd.memtype_devlog_memaddr16_devlog); - dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(meminfo); - dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(meminfo) << 4; - dlog->size = be32toh(devlog_cmd.memsize_devlog); + /* Translate mtype/maddr to an address suitable for the PCIe window */ + val = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); + val &= F_EDRAM0_ENABLE | F_EDRAM1_ENABLE | F_EXT_MEM_ENABLE; + switch (mtype) { + case FW_MEMTYPE_CF_EDC0: + if (!(val & F_EDRAM0_ENABLE)) + goto err; + bar = t4_read_reg(sc, A_MA_EDRAM0_BAR); + maddr += G_EDRAM0_BASE(bar) << 20; + break; - return (0); + case FW_MEMTYPE_CF_EDC1: + if (!(val & F_EDRAM1_ENABLE)) + goto err; + bar = t4_read_reg(sc, A_MA_EDRAM1_BAR); + maddr += G_EDRAM1_BASE(bar) << 20; + break; + + case FW_MEMTYPE_CF_EXTMEM: + if (!(val & F_EXT_MEM_ENABLE)) + goto err; + bar = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); + maddr += G_EXT_MEM_BASE(bar) << 20; + break; + + default: +err: + device_printf(sc->dev, + "cannot upload config file (type %u, enabled %u).\n", + mtype, val); + return (EFAULT); + } + + /* + * Position the PCIe window (we use memwin2) to the 16B aligned area + * just at/before the upload location. + */ + win = maddr & ~0xf; + off = maddr - win; /* offset from the start of the window. */ + t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), win); + t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2)); + + remaining = fw->datasize; + if (remaining > FLASH_CFG_MAX_SIZE || + remaining > MEMWIN2_APERTURE - off) { + device_printf(sc->dev, "cannot upload config file all at once " + "(size %u, max %u, room %u).\n", + remaining, FLASH_CFG_MAX_SIZE, MEMWIN2_APERTURE - off); + return (EFBIG); + } + + /* + * XXX: sheer laziness. We deliberately added 4 bytes of useless + * stuffing/comments at the end of the config file so it's ok to simply + * throw away the last remaining bytes when the config file is not an + * exact multiple of 4. + */ + b = fw->data; + for (i = 0; remaining >= 4; i += 4, remaining -= 4) + t4_write_reg(sc, MEMWIN2_BASE + off + i, *b++); + + return (rc); } +/* + * Partition chip resources for use between various PFs, VFs, etc. This is done + * by uploading the firmware configuration file to the adapter and instructing + * the firmware to process it. + */ static int -get_capabilities(struct adapter *sc, struct fw_caps_config_cmd *caps) +partition_resources(struct adapter *sc, const struct firmware *cfg) { int rc; + struct fw_caps_config_cmd caps; + uint32_t mtype, maddr, finicsum, cfcsum; - bzero(caps, sizeof(*caps)); - caps->op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | - F_FW_CMD_REQUEST | F_FW_CMD_READ); - caps->retval_len16 = htobe32(FW_LEN16(*caps)); + rc = cfg ? upload_config_file(sc, cfg, &mtype, &maddr) : ENOENT; + if (rc != 0) { + mtype = FW_MEMTYPE_CF_FLASH; + maddr = t4_flash_cfg_addr(sc); + } - rc = -t4_wr_mbox(sc, sc->mbox, caps, sizeof(*caps), caps); - if (rc != 0) + bzero(&caps, sizeof(caps)); + caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + F_FW_CMD_REQUEST | F_FW_CMD_READ); + caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | + V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | + V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) | FW_LEN16(caps)); + rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); + if (rc != 0) { + device_printf(sc->dev, + "failed to pre-process config file: %d.\n", rc); return (rc); + } - if (caps->niccaps & htobe16(FW_CAPS_CONFIG_NIC_VM)) - caps->niccaps ^= htobe16(FW_CAPS_CONFIG_NIC_VM); + finicsum = be32toh(caps.finicsum); + cfcsum = be32toh(caps.cfcsum); + if (finicsum != cfcsum) { + device_printf(sc->dev, + "WARNING: config file checksum mismatch: %08x %08x\n", + finicsum, cfcsum); + } + sc->cfcsum = cfcsum; + +#define LIMIT_CAPS(x) do { \ + caps.x &= htobe16(t4_##x##_allowed); \ + sc->x = htobe16(caps.x); \ +} while (0) - caps->op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + /* + * Let the firmware know what features will (not) be used so it can tune + * things accordingly. + */ + LIMIT_CAPS(linkcaps); + LIMIT_CAPS(niccaps); + LIMIT_CAPS(toecaps); + LIMIT_CAPS(rdmacaps); + LIMIT_CAPS(iscsicaps); + LIMIT_CAPS(fcoecaps); +#undef LIMIT_CAPS + + caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); - rc = -t4_wr_mbox(sc, sc->mbox, caps, sizeof(*caps), NULL); + caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); + rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); + if (rc != 0) { + device_printf(sc->dev, + "failed to process config file: %d.\n", rc); + return (rc); + } - return (rc); + return (0); } +/* + * Retrieve parameters that are needed (or nice to have) prior to calling + * t4_sge_init and t4_fw_initialize. + */ static int -get_params(struct adapter *sc, struct fw_caps_config_cmd *caps) +get_params__pre_init(struct adapter *sc) { int rc; - uint32_t params[7], val[7]; + uint32_t param[2], val[2]; + struct fw_devlog_cmd cmd; + struct devlog_params *dlog = &sc->params.devlog; -#define FW_PARAM_DEV(param) \ - (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ - V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) -#define FW_PARAM_PFVF(param) \ - (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ - V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) - - params[0] = FW_PARAM_DEV(PORTVEC); - params[1] = FW_PARAM_PFVF(IQFLINT_START); - params[2] = FW_PARAM_PFVF(EQ_START); - params[3] = FW_PARAM_PFVF(FILTER_START); - params[4] = FW_PARAM_PFVF(FILTER_END); - rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 5, params, val); + param[0] = FW_PARAM_DEV(PORTVEC); + param[1] = FW_PARAM_DEV(CCLK); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, - "failed to query parameters: %d.\n", rc); - goto done; + "failed to query parameters (pre_init): %d.\n", rc); + return (rc); } sc->params.portvec = val[0]; @@ -1509,24 +1784,81 @@ get_params(struct adapter *sc, struct fw_caps_config_cmd *caps) val[0] &= val[0] - 1; } - sc->sge.iq_start = val[1]; - sc->sge.eq_start = val[2]; - sc->tids.ftid_base = val[3]; - sc->tids.nftids = val[4] - val[3] + 1; + sc->params.vpd.cclk = val[1]; + + /* Read device log parameters. */ + bzero(&cmd, sizeof(cmd)); + cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) | + F_FW_CMD_REQUEST | F_FW_CMD_READ); + cmd.retval_len16 = htobe32(FW_LEN16(cmd)); + rc = -t4_wr_mbox(sc, sc->mbox, &cmd, sizeof(cmd), &cmd); + if (rc != 0) { + device_printf(sc->dev, + "failed to get devlog parameters: %d.\n", rc); + bzero(dlog, sizeof (*dlog)); + rc = 0; /* devlog isn't critical for device operation */ + } else { + val[0] = be32toh(cmd.memtype_devlog_memaddr16_devlog); + dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(val[0]); + dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(val[0]) << 4; + dlog->size = be32toh(cmd.memsize_devlog); + } - if (caps->toecaps) { + return (rc); +} + +/* + * Retrieve various parameters that are of interest to the driver. The device + * has been initialized by the firmware at this point. + */ +static int +get_params__post_init(struct adapter *sc) +{ + int rc; + uint32_t param[7], val[7]; + struct fw_caps_config_cmd caps; + + param[0] = FW_PARAM_PFVF(IQFLINT_START); + param[1] = FW_PARAM_PFVF(EQ_START); + param[2] = FW_PARAM_PFVF(FILTER_START); + param[3] = FW_PARAM_PFVF(FILTER_END); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val); + if (rc != 0) { + device_printf(sc->dev, + "failed to query parameters (post_init): %d.\n", rc); + return (rc); + } + + sc->sge.iq_start = val[0]; + sc->sge.eq_start = val[1]; + sc->tids.ftid_base = val[2]; + sc->tids.nftids = val[3] - val[2] + 1; + + /* get capabilites */ + bzero(&caps, sizeof(caps)); + caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + F_FW_CMD_REQUEST | F_FW_CMD_READ); + caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); + rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); + if (rc != 0) { + device_printf(sc->dev, + "failed to get card capabilities: %d.\n", rc); + return (rc); + } + + if (caps.toecaps) { /* query offload-related parameters */ - params[0] = FW_PARAM_DEV(NTID); - params[1] = FW_PARAM_PFVF(SERVER_START); - params[2] = FW_PARAM_PFVF(SERVER_END); - params[3] = FW_PARAM_PFVF(TDDP_START); - params[4] = FW_PARAM_PFVF(TDDP_END); - params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); - rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, params, val); + param[0] = FW_PARAM_DEV(NTID); + param[1] = FW_PARAM_PFVF(SERVER_START); + param[2] = FW_PARAM_PFVF(SERVER_END); + param[3] = FW_PARAM_PFVF(TDDP_START); + param[4] = FW_PARAM_PFVF(TDDP_END); + param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); - goto done; + return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); @@ -1537,18 +1869,18 @@ get_params(struct adapter *sc, struct fw_caps_config_cmd *caps) sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } - if (caps->rdmacaps) { - params[0] = FW_PARAM_PFVF(STAG_START); - params[1] = FW_PARAM_PFVF(STAG_END); - params[2] = FW_PARAM_PFVF(RQ_START); - params[3] = FW_PARAM_PFVF(RQ_END); - params[4] = FW_PARAM_PFVF(PBL_START); - params[5] = FW_PARAM_PFVF(PBL_END); - rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, params, val); + if (caps.rdmacaps) { + param[0] = FW_PARAM_PFVF(STAG_START); + param[1] = FW_PARAM_PFVF(STAG_END); + param[2] = FW_PARAM_PFVF(RQ_START); + param[3] = FW_PARAM_PFVF(RQ_END); + param[4] = FW_PARAM_PFVF(PBL_START); + param[5] = FW_PARAM_PFVF(PBL_END); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, - "failed to query RDMA parameters: %d.\n", rc); - goto done; + "failed to query RDMA parameters(1): %d.\n", rc); + return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; @@ -1556,37 +1888,59 @@ get_params(struct adapter *sc, struct fw_caps_config_cmd *caps) sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; + + param[0] = FW_PARAM_PFVF(SQRQ_START); + param[1] = FW_PARAM_PFVF(SQRQ_END); + param[2] = FW_PARAM_PFVF(CQ_START); + param[3] = FW_PARAM_PFVF(CQ_END); + param[4] = FW_PARAM_PFVF(OCQ_START); + param[5] = FW_PARAM_PFVF(OCQ_END); + rc = -t4_query_params(sc, 0, 0, 0, 6, param, val); + if (rc != 0) { + device_printf(sc->dev, + "failed to query RDMA parameters(2): %d.\n", rc); + return (rc); + } + sc->vres.qp.start = val[0]; + sc->vres.qp.size = val[1] - val[0] + 1; + sc->vres.cq.start = val[2]; + sc->vres.cq.size = val[3] - val[2] + 1; + sc->vres.ocq.start = val[4]; + sc->vres.ocq.size = val[5] - val[4] + 1; } - if (caps->iscsicaps) { - params[0] = FW_PARAM_PFVF(ISCSI_START); - params[1] = FW_PARAM_PFVF(ISCSI_END); - rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, params, val); + if (caps.iscsicaps) { + param[0] = FW_PARAM_PFVF(ISCSI_START); + param[1] = FW_PARAM_PFVF(ISCSI_END); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); - goto done; + return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } -#undef FW_PARAM_PFVF -#undef FW_PARAM_DEV -done: + /* These are finalized by FW initialization, load their values now */ + val[0] = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); + sc->params.tp.tre = G_TIMERRESOLUTION(val[0]); + sc->params.tp.dack_re = G_DELAYEDACKRESOLUTION(val[0]); + t4_read_mtu_tbl(sc, sc->params.mtus, NULL); + return (rc); } +#undef FW_PARAM_PFVF +#undef FW_PARAM_DEV + static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; - snprintf(buf, sizeof(buf), - "Chelsio %s (rev %d) %d port %sNIC PCIe-x%d %d %s, S/N:%s, E/C:%s", - p->vpd.id, p->rev, p->nports, is_offload(sc) ? "R" : "", - p->pci.width, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : - (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), p->vpd.sn, p->vpd.ec); + snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, E/C:%s", + p->vpd.id, is_offload(sc) ? "R" : "", p->rev, p->vpd.sn, p->vpd.ec); device_set_desc_copy(sc->dev, buf); } @@ -1803,9 +2157,7 @@ cxgbe_init_synchronized(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; - int rc = 0, i; - uint16_t *rss; - struct sge_rxq *rxq; + int rc = 0; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); @@ -1815,30 +2167,13 @@ cxgbe_init_synchronized(struct port_info *pi) return (0); /* already running */ } - if (sc->open_device_map == 0 && ((rc = first_port_up(sc)) != 0)) + if (!(sc->flags & FULL_INIT_DONE) && + ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ - /* - * Allocate tx/rx/fl queues for this port. - */ - rc = t4_setup_eth_queues(pi); - if (rc != 0) - goto done; /* error message displayed already */ - - /* - * Setup RSS for this port. - */ - rss = malloc(pi->nrxq * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); - for_each_rxq(pi, i, rxq) { - rss[i] = rxq->iq.abs_id; - } - rc = -t4_config_rss_range(sc, sc->mbox, pi->viid, 0, pi->rss_size, rss, - pi->nrxq); - free(rss, M_CXGBE); - if (rc != 0) { - if_printf(ifp, "rss_config failed: %d\n", rc); - goto done; - } + if (!(pi->flags & PORT_INIT_DONE) && + ((rc = port_full_init(pi)) != 0)) + return (rc); /* error message displayed already */ PORT_LOCK(pi); rc = update_mac_settings(pi, XGMAC_ALL); @@ -1857,12 +2192,10 @@ cxgbe_init_synchronized(struct port_info *pi) if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } - pi->flags |= VI_ENABLED; /* all ok */ setbit(&sc->open_device_map, pi->port_id); ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&pi->tick, hz, cxgbe_tick, pi); done: @@ -1915,51 +2248,28 @@ cxgbe_uninit_synchronized(struct port_info *pi) struct ifnet *ifp = pi->ifp; int rc; - /* - * taskqueue_drain may cause a deadlock if the adapter lock is held. - */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* - * Clear this port's bit from the open device map, and then drain - * tasks and callouts. + * Disable the VI so that all its data in either direction is discarded + * by the MPS. Leave everything else (the queues, interrupts, and 1Hz + * tick) intact as the TP can deliver negative advice or data that it's + * holding in its RAM (for an offloaded connection) even after the VI is + * disabled. */ - clrbit(&sc->open_device_map, pi->port_id); - - PORT_LOCK(pi); - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - callout_stop(&pi->tick); - PORT_UNLOCK(pi); - callout_drain(&pi->tick); - - /* - * Stop and then free the queues' resources, including the queues - * themselves. - * - * XXX: we could just stop the queues here (on ifconfig down) and free - * them later (on port detach), but having up/down go through the entire - * allocate/activate/deactivate/free sequence is a good way to find - * leaks and bugs. - */ - rc = t4_teardown_eth_queues(pi); - if (rc != 0) - if_printf(ifp, "teardown failed: %d\n", rc); - - if (pi->flags & VI_ENABLED) { - rc = -t4_enable_vi(sc, sc->mbox, pi->viid, false, false); - if (rc) - if_printf(ifp, "disable_vi failed: %d\n", rc); - else - pi->flags &= ~VI_ENABLED; + rc = -t4_enable_vi(sc, sc->mbox, pi->viid, false, false); + if (rc) { + if_printf(ifp, "disable_vi failed: %d\n", rc); + return (rc); } + clrbit(&sc->open_device_map, pi->port_id); + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; t4_os_link_changed(sc, pi->port_id, 0); - if (sc->open_device_map == 0) - last_port_down(sc); - return (0); } @@ -1968,15 +2278,22 @@ cxgbe_uninit_synchronized(struct port_info *pi) if (rc != 0) \ goto done; \ } while (0) + static int -first_port_up(struct adapter *sc) +adapter_full_init(struct adapter *sc) { int rc, i, rid, p, q; char s[8]; struct irq *irq; - struct sge_iq *intrq; + struct port_info *pi; + struct sge_rxq *rxq; +#ifndef TCP_OFFLOAD_DISABLE + struct sge_ofld_rxq *ofld_rxq; +#endif ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + KASSERT((sc->flags & FULL_INIT_DONE) == 0, + ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). @@ -1985,95 +2302,265 @@ first_port_up(struct adapter *sc) if (rc != 0) goto done; + for (i = 0; i < ARRAY_SIZE(sc->tq); i++) { + sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, + taskqueue_thread_enqueue, &sc->tq[i]); + if (sc->tq[i] == NULL) { + device_printf(sc->dev, + "failed to allocate task queue %d\n", i); + rc = ENOMEM; + goto done; + } + taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", + device_get_nameunit(sc->dev), i); + } + /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (sc->intr_count == 1) { - KASSERT(sc->flags & INTR_SHARED, - ("%s: single interrupt but not shared?", __func__)); + KASSERT(!(sc->flags & INTR_DIRECT), + ("%s: single interrupt && INTR_DIRECT?", __func__)); T4_ALLOC_IRQ(sc, irq, rid, t4_intr_all, sc, "all"); } else { - /* Multiple interrupts. The first one is always error intr */ + /* Multiple interrupts. */ + KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, + ("%s: too few intr.", __func__)); + + /* The first one is always error intr */ T4_ALLOC_IRQ(sc, irq, rid, t4_intr_err, sc, "err"); irq++; rid++; - /* Firmware event queue normally has an interrupt of its own */ - if (sc->intr_count > T4_EXTRA_INTR) { - T4_ALLOC_IRQ(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, - "evt"); - irq++; - rid++; - } - - intrq = &sc->sge.intrq[0]; - if (sc->flags & INTR_SHARED) { + /* The second one is always the firmware event queue */ + T4_ALLOC_IRQ(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, "evt"); + irq++; + rid++; - /* All ports share these interrupt queues */ + /* + * Note that if INTR_DIRECT is not set then either the NIC rx + * queues or (exclusive or) the TOE rx queueus will be taking + * direct interrupts. + * + * There is no need to check for is_offload(sc) as nofldrxq + * will be 0 if offload is disabled. + */ + for_each_port(sc, p) { + pi = sc->port[p]; - for (i = 0; i < NINTRQ(sc); i++) { - snprintf(s, sizeof(s), "*.%d", i); - T4_ALLOC_IRQ(sc, irq, rid, t4_intr, intrq, s); +#ifndef TCP_OFFLOAD_DISABLE + /* + * Skip over the NIC queues if they aren't taking direct + * interrupts. + */ + if (!(sc->flags & INTR_DIRECT) && + pi->nofldrxq > pi->nrxq) + goto ofld_queues; +#endif + rxq = &sc->sge.rxq[pi->first_rxq]; + for (q = 0; q < pi->nrxq; q++, rxq++) { + snprintf(s, sizeof(s), "%d.%d", p, q); + T4_ALLOC_IRQ(sc, irq, rid, t4_intr, rxq, s); irq++; rid++; - intrq++; } - } else { - - /* Each port has its own set of interrupt queues */ - for (p = 0; p < sc->params.nports; p++) { - for (q = 0; q < sc->port[p]->nrxq; q++) { - snprintf(s, sizeof(s), "%d.%d", p, q); - T4_ALLOC_IRQ(sc, irq, rid, t4_intr, - intrq, s); - irq++; - rid++; - intrq++; - } +#ifndef TCP_OFFLOAD_DISABLE + /* + * Skip over the offload queues if they aren't taking + * direct interrupts. + */ + if (!(sc->flags & INTR_DIRECT)) + continue; +ofld_queues: + ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq]; + for (q = 0; q < pi->nofldrxq; q++, ofld_rxq++) { + snprintf(s, sizeof(s), "%d,%d", p, q); + T4_ALLOC_IRQ(sc, irq, rid, t4_intr, ofld_rxq, s); + irq++; + rid++; } +#endif } } t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; - done: if (rc != 0) - last_port_down(sc); + adapter_full_uninit(sc); return (rc); } #undef T4_ALLOC_IRQ -/* - * Idempotent. - */ static int -last_port_down(struct adapter *sc) +adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); - t4_intr_disable(sc); - t4_teardown_adapter_queues(sc); for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); + for (i = 0; i < ARRAY_SIZE(sc->tq) && sc->tq[i]; i++) { + taskqueue_free(sc->tq[i]); + sc->tq[i] = NULL; + } + sc->flags &= ~FULL_INIT_DONE; return (0); } static int +port_full_init(struct port_info *pi) +{ + struct adapter *sc = pi->adapter; + struct ifnet *ifp = pi->ifp; + uint16_t *rss; + struct sge_rxq *rxq; + int rc, i; + + ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + KASSERT((pi->flags & PORT_INIT_DONE) == 0, + ("%s: PORT_INIT_DONE already", __func__)); + + sysctl_ctx_init(&pi->ctx); + pi->flags |= PORT_SYSCTL_CTX; + + /* + * Allocate tx/rx/fl queues for this port. + */ + rc = t4_setup_port_queues(pi); + if (rc != 0) + goto done; /* error message displayed already */ + + /* + * Setup RSS for this port. + */ + rss = malloc(pi->nrxq * sizeof (*rss), M_CXGBE, + M_ZERO | M_WAITOK); + for_each_rxq(pi, i, rxq) { + rss[i] = rxq->iq.abs_id; + } + rc = -t4_config_rss_range(sc, sc->mbox, pi->viid, 0, + pi->rss_size, rss, pi->nrxq); + free(rss, M_CXGBE); + if (rc != 0) { + if_printf(ifp, "rss_config failed: %d\n", rc); + goto done; + } + + pi->flags |= PORT_INIT_DONE; +done: + if (rc != 0) + port_full_uninit(pi); + + return (rc); +} + +/* + * Idempotent. + */ +static int +port_full_uninit(struct port_info *pi) +{ + struct adapter *sc = pi->adapter; + int i; + struct sge_rxq *rxq; + struct sge_txq *txq; +#ifndef TCP_OFFLOAD_DISABLE + struct sge_ofld_rxq *ofld_rxq; + struct sge_wrq *ofld_txq; +#endif + + if (pi->flags & PORT_INIT_DONE) { + + /* Need to quiesce queues. XXX: ctrl queues? */ + + for_each_txq(pi, i, txq) { + quiesce_eq(sc, &txq->eq); + } + +#ifndef TCP_OFFLOAD_DISABLE + for_each_ofld_txq(pi, i, ofld_txq) { + quiesce_eq(sc, &ofld_txq->eq); + } +#endif + + for_each_rxq(pi, i, rxq) { + quiesce_iq(sc, &rxq->iq); + quiesce_fl(sc, &rxq->fl); + } + +#ifndef TCP_OFFLOAD_DISABLE + for_each_ofld_rxq(pi, i, ofld_rxq) { + quiesce_iq(sc, &ofld_rxq->iq); + quiesce_fl(sc, &ofld_rxq->fl); + } +#endif + } + + t4_teardown_port_queues(pi); + pi->flags &= ~PORT_INIT_DONE; + + return (0); +} + +static void +quiesce_eq(struct adapter *sc, struct sge_eq *eq) +{ + EQ_LOCK(eq); + eq->flags |= EQ_DOOMED; + + /* + * Wait for the response to a credit flush if one's + * pending. + */ + while (eq->flags & EQ_CRFLUSHED) + mtx_sleep(eq, &eq->eq_lock, 0, "crflush", 0); + EQ_UNLOCK(eq); + + callout_drain(&eq->tx_callout); /* XXX: iffy */ + pause("callout", 10); /* Still iffy */ + + taskqueue_drain(sc->tq[eq->tx_chan], &eq->tx_task); +} + +static void +quiesce_iq(struct adapter *sc, struct sge_iq *iq) +{ + (void) sc; /* unused */ + + /* Synchronize with the interrupt handler */ + while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) + pause("iqfree", 1); +} + +static void +quiesce_fl(struct adapter *sc, struct sge_fl *fl) +{ + mtx_lock(&sc->sfl_lock); + FL_LOCK(fl); + fl->flags |= FL_DOOMED; + FL_UNLOCK(fl); + mtx_unlock(&sc->sfl_lock); + + callout_drain(&sc->sfl_callout); + KASSERT((fl->flags & FL_STARVING) == 0, + ("%s: still starving", __func__)); +} + +static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, - iq_intr_handler_t *handler, void *arg, char *name) + driver_intr_t *handler, void *arg, char *name) { int rc; @@ -2365,12 +2852,12 @@ cxgbe_tick(void *arg) t4_get_port_stats(pi->adapter, pi->tx_chan, s); - ifp->if_opackets = s->tx_frames; - ifp->if_ipackets = s->rx_frames; - ifp->if_obytes = s->tx_octets; - ifp->if_ibytes = s->rx_octets; - ifp->if_omcasts = s->tx_mcast_frames; - ifp->if_imcasts = s->rx_mcast_frames; + ifp->if_opackets = s->tx_frames - s->tx_pause; + ifp->if_ipackets = s->rx_frames - s->rx_pause; + ifp->if_obytes = s->tx_octets - s->tx_pause * 64; + ifp->if_ibytes = s->rx_octets - s->rx_pause * 64; + ifp->if_omcasts = s->tx_mcast_frames - s->tx_pause; + ifp->if_imcasts = s->rx_mcast_frames - s->rx_pause; ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3; @@ -2388,15 +2875,58 @@ cxgbe_tick(void *arg) } static int +cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) +{ +#ifdef INVARIANTS + panic("%s: opcode %02x on iq %p with payload %p", + __func__, rss->opcode, iq, m); +#else + log(LOG_ERR, "%s: opcode %02x on iq %p with payload %p", + __func__, rss->opcode, iq, m); + m_freem(m); +#endif + return (EDOOFUS); +} + +int +t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h) +{ + uintptr_t *loc, new; + + if (opcode >= ARRAY_SIZE(sc->cpl_handler)) + return (EINVAL); + + new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; + loc = (uintptr_t *) &sc->cpl_handler[opcode]; + atomic_store_rel_ptr(loc, new); + + return (0); +} + +static int t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; - struct sysctl_oid_list *children; + struct sysctl_oid_list *children, *c0; + static char *caps[] = { + "\20\1PPP\2QFC\3DCBX", /* caps[0] linkcaps */ + "\20\1NIC\2VM\3IDS\4UM\5UM_ISGL", /* caps[1] niccaps */ + "\20\1TOE", /* caps[2] toecaps */ + "\20\1RDDP\2RDMAC", /* caps[3] rdmacaps */ + "\20\1INITIATOR_PDU\2TARGET_PDU" /* caps[4] iscsicaps */ + "\3INITIATOR_CNXOFLD\4TARGET_CNXOFLD" + "\5INITIATOR_SSNOFLD\6TARGET_SSNOFLD", + "\20\1INITIATOR\2TARGET\3CTRL_OFLD" /* caps[5] fcoecaps */ + }; ctx = device_get_sysctl_ctx(sc->dev); + + /* + * dev.t4nex.X. + */ oid = device_get_sysctl_tree(sc->dev); - children = SYSCTL_CHILDREN(oid); + c0 = children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, &sc->params.nports, 0, "# of ports"); @@ -2407,23 +2937,146 @@ t4_sysctls(struct adapter *sc) SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "TOE", CTLFLAG_RD, - &sc->params.offload, 0, "hardware is capable of TCP offload"); + SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", + CTLFLAG_RD, &t4_cfg_file, 0, "configuration file"); + + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, + &sc->cfcsum, 0, "config file checksum"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkcaps", + CTLTYPE_STRING | CTLFLAG_RD, caps[0], sc->linkcaps, + sysctl_bitfield, "A", "available link capabilities"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "niccaps", + CTLTYPE_STRING | CTLFLAG_RD, caps[1], sc->niccaps, + sysctl_bitfield, "A", "available NIC capabilities"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "toecaps", + CTLTYPE_STRING | CTLFLAG_RD, caps[2], sc->toecaps, + sysctl_bitfield, "A", "available TCP offload capabilities"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdmacaps", + CTLTYPE_STRING | CTLFLAG_RD, caps[3], sc->rdmacaps, + sysctl_bitfield, "A", "available RDMA capabilities"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "iscsicaps", + CTLTYPE_STRING | CTLFLAG_RD, caps[4], sc->iscsicaps, + sysctl_bitfield, "A", "available iSCSI capabilities"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoecaps", + CTLTYPE_STRING | CTLFLAG_RD, caps[5], sc->fcoecaps, + sysctl_bitfield, "A", "available FCoE capabilities"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, &sc->params.vpd.cclk, 0, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", - CTLTYPE_STRING | CTLFLAG_RD, &intr_timer, sizeof(intr_timer), - sysctl_int_array, "A", "interrupt holdoff timer values (us)"); + CTLTYPE_STRING | CTLFLAG_RD, sc->sge.timer_val, + sizeof(sc->sge.timer_val), sysctl_int_array, "A", + "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", - CTLTYPE_STRING | CTLFLAG_RD, &intr_pktcount, sizeof(intr_pktcount), - sysctl_int_array, "A", "interrupt holdoff packet counter values"); + CTLTYPE_STRING | CTLFLAG_RD, sc->sge.counter_val, + sizeof(sc->sge.counter_val), sysctl_int_array, "A", + "interrupt holdoff packet counter values"); + + /* + * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. + */ + oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", + CTLFLAG_RD | CTLFLAG_SKIP, NULL, + "logs and miscellaneous information"); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_cctrl, "A", "congestion control"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_cpl_stats, "A", "CPL statistics"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_ddp_stats, "A", "DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, - sysctl_devlog, "A", "device log"); + sysctl_devlog, "A", "firmware's device log"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_fcoe_stats, "A", "FCoE statistics"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_hw_sched, "A", "hardware scheduler "); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_l2t, "A", "hardware L2 table"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_lb_stats, "A", "loopback statistics"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_meminfo, "A", "memory regions"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_path_mtus, "A", "path MTUs"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_pm_stats, "A", "PM statistics"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_rdma_stats, "A", "RDMA statistics"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_tcp_stats, "A", "TCP statistics"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_tids, "A", "TID information"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_tp_err_stats, "A", "TP error statistics"); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", + CTLTYPE_STRING | CTLFLAG_RD, sc, 0, + sysctl_tx_rate, "A", "Tx rate"); + +#ifndef TCP_OFFLOAD_DISABLE + if (is_offload(sc)) { + /* + * dev.t4nex.X.toe. + */ + oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, + NULL, "TOE parameters"); + children = SYSCTL_CHILDREN(oid); + + sc->tt.sndbuf = 256 * 1024; + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, + &sc->tt.sndbuf, 0, "max hardware send buffer size"); + + sc->tt.ddp = 0; + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, + &sc->tt.ddp, 0, "DDP allowed"); + sc->tt.indsz = M_INDICATESIZE; + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW, + &sc->tt.indsz, 0, "DDP max indicate size allowed"); + sc->tt.ddp_thres = 3*4096; + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW, + &sc->tt.ddp_thres, 0, "DDP threshold"); + } +#endif + return (0); } @@ -2452,6 +3105,23 @@ cxgbe_sysctls(struct port_info *pi) SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &pi->first_txq, 0, "index of first tx queue"); +#ifndef TCP_OFFLOAD_DISABLE + if (is_offload(pi->adapter)) { + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, + &pi->nofldrxq, 0, + "# of rx queues for offloaded TCP connections"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, + &pi->nofldtxq, 0, + "# of tx queues for offloaded TCP connections"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", + CTLFLAG_RD, &pi->first_ofld_rxq, 0, + "index of first TOE rx queue"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", + CTLFLAG_RD, &pi->first_ofld_txq, 0, + "index of first TOE tx queue"); + } +#endif + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); @@ -2642,11 +3312,31 @@ sysctl_int_array(SYSCTL_HANDLER_ARGS) } static int +sysctl_bitfield(SYSCTL_HANDLER_ARGS) +{ + int rc; + struct sbuf *sb; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return(rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); + if (sb == NULL) + return (ENOMEM); + + sbuf_printf(sb, "%b", (int)arg2, (char *)arg1); + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; - struct sge_rxq *rxq; int idx, rc, i; idx = pi->tmr_idx; @@ -2661,9 +3351,16 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (rc == 0) { + struct sge_rxq *rxq; + uint8_t v; + + v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1); for_each_rxq(pi, i, rxq) { - rxq->iq.intr_params = V_QINTR_TIMER_IDX(idx) | - V_QINTR_CNT_EN(pi->pktc_idx != -1); +#ifdef atomic_store_rel_8 + atomic_store_rel_8(&rxq->iq.intr_params, v); +#else + rxq->iq.intr_params = v; +#endif } pi->tmr_idx = idx; } @@ -2690,8 +3387,8 @@ sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); - if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING) - rc = EBUSY; /* can be changed only when port is down */ + if (rc == 0 && pi->flags & PORT_INIT_DONE) + rc = EBUSY; /* cannot be changed once the queues are created */ if (rc == 0) pi->pktc_idx = idx; @@ -2718,8 +3415,8 @@ sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); - if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING) - rc = EBUSY; /* can be changed only when port is down */ + if (rc == 0 && pi->flags & PORT_INIT_DONE) + rc = EBUSY; /* cannot be changed once the queues are created */ if (rc == 0) pi->qsize_rxq = qsize; @@ -2746,8 +3443,8 @@ sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) ADAPTER_LOCK(sc); rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); - if (rc == 0 && pi->ifp->if_drv_flags & IFF_DRV_RUNNING) - rc = EBUSY; /* can be changed only when port is down */ + if (rc == 0 && pi->flags & PORT_INIT_DONE) + rc = EBUSY; /* cannot be changed once the queues are created */ if (rc == 0) pi->qsize_txq = qsize; @@ -2768,6 +3465,103 @@ sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) return (sysctl_handle_64(oidp, &val, 0, req)); } +static int +sysctl_cctrl(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc, i; + uint16_t incr[NMTUS][NCCTRL_WIN]; + static const char *dec_fac[] = { + "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", + "0.9375" + }; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); + if (sb == NULL) + return (ENOMEM); + + t4_read_cong_tbl(sc, incr); + + for (i = 0; i < NCCTRL_WIN; ++i) { + sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, + incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], + incr[5][i], incr[6][i], incr[7][i]); + sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", + incr[8][i], incr[9][i], incr[10][i], incr[11][i], + incr[12][i], incr[13][i], incr[14][i], incr[15][i], + sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); + } + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + struct tp_cpl_stats stats; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_tp_get_cpl_stats(sc, &stats); + + sbuf_printf(sb, " channel 0 channel 1 channel 2 " + "channel 3\n"); + sbuf_printf(sb, "CPL requests: %10u %10u %10u %10u\n", + stats.req[0], stats.req[1], stats.req[2], stats.req[3]); + sbuf_printf(sb, "CPL responses: %10u %10u %10u %10u", + stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + struct tp_usm_stats stats; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return(rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_get_usm_stats(sc, &stats); + + sbuf_printf(sb, "Frames: %u\n", stats.frames); + sbuf_printf(sb, "Octets: %ju\n", stats.octets); + sbuf_printf(sb, "Drops: %u", stats.drops); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + const char *devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", @@ -2852,7 +3646,11 @@ sysctl_devlog(SYSCTL_HANDLER_ARGS) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); - sbuf_printf(sb, "\n%10s %15s %8s %8s %s\n", + if (sb == NULL) { + rc = ENOMEM; + goto done; + } + sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; @@ -2882,6 +3680,624 @@ done: return (rc); } +static int +sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + struct tp_fcoe_stats stats[4]; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_get_fcoe_stats(sc, 0, &stats[0]); + t4_get_fcoe_stats(sc, 1, &stats[1]); + t4_get_fcoe_stats(sc, 2, &stats[2]); + t4_get_fcoe_stats(sc, 3, &stats[3]); + + sbuf_printf(sb, " channel 0 channel 1 " + "channel 2 channel 3\n"); + sbuf_printf(sb, "octetsDDP: %16ju %16ju %16ju %16ju\n", + stats[0].octetsDDP, stats[1].octetsDDP, stats[2].octetsDDP, + stats[3].octetsDDP); + sbuf_printf(sb, "framesDDP: %16u %16u %16u %16u\n", stats[0].framesDDP, + stats[1].framesDDP, stats[2].framesDDP, stats[3].framesDDP); + sbuf_printf(sb, "framesDrop: %16u %16u %16u %16u", + stats[0].framesDrop, stats[1].framesDrop, stats[2].framesDrop, + stats[3].framesDrop); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_hw_sched(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc, i; + unsigned int map, kbps, ipg, mode; + unsigned int pace_tab[NTX_SCHED]; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); + mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); + t4_read_pace_tbl(sc, pace_tab); + + sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " + "Class IPG (0.1 ns) Flow IPG (us)"); + + for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { + t4_get_tx_sched(sc, i, &kbps, &ipg); + sbuf_printf(sb, "\n %u %-5s %u ", i, + (mode & (1 << i)) ? "flow" : "class", map & 3); + if (kbps) + sbuf_printf(sb, "%9u ", kbps); + else + sbuf_printf(sb, " disabled "); + + if (ipg) + sbuf_printf(sb, "%13u ", ipg); + else + sbuf_printf(sb, " disabled "); + + if (pace_tab[i]) + sbuf_printf(sb, "%10u", pace_tab[i]); + else + sbuf_printf(sb, " disabled"); + } + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_lb_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc, i, j; + uint64_t *p0, *p1; + struct lb_port_stats s[2]; + static const char *stat_name[] = { + "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", + "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", + "Frames128To255:", "Frames256To511:", "Frames512To1023:", + "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", + "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", + "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", + "BG2FramesTrunc:", "BG3FramesTrunc:" + }; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); + if (sb == NULL) + return (ENOMEM); + + memset(s, 0, sizeof(s)); + + for (i = 0; i < 4; i += 2) { + t4_get_lb_stats(sc, i, &s[0]); + t4_get_lb_stats(sc, i + 1, &s[1]); + + p0 = &s[0].octets; + p1 = &s[1].octets; + sbuf_printf(sb, "%s Loopback %u" + " Loopback %u", i == 0 ? "" : "\n", i, i + 1); + + for (j = 0; j < ARRAY_SIZE(stat_name); j++) + sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], + *p0++, *p1++); + } + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +struct mem_desc { + unsigned int base; + unsigned int limit; + unsigned int idx; +}; + +static int +mem_desc_cmp(const void *a, const void *b) +{ + return ((const struct mem_desc *)a)->base - + ((const struct mem_desc *)b)->base; +} + +static void +mem_region_show(struct sbuf *sb, const char *name, unsigned int from, + unsigned int to) +{ + unsigned int size; + + size = to - from + 1; + if (size == 0) + return; + + /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ + sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); +} + +static int +sysctl_meminfo(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc, i, n; + uint32_t lo, hi; + static const char *memory[] = { "EDC0:", "EDC1:", "MC:" }; + static const char *region[] = { + "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", + "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", + "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", + "TDDP region:", "TPT region:", "STAG region:", "RQ region:", + "RQUDP region:", "PBL region:", "TXPBL region:", "ULPRX state:", + "ULPTX state:", "On-chip queues:" + }; + struct mem_desc avail[3]; + struct mem_desc mem[ARRAY_SIZE(region) + 3]; /* up to 3 holes */ + struct mem_desc *md = mem; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); + if (sb == NULL) + return (ENOMEM); + + for (i = 0; i < ARRAY_SIZE(mem); i++) { + mem[i].limit = 0; + mem[i].idx = i; + } + + /* Find and sort the populated memory ranges */ + i = 0; + lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); + if (lo & F_EDRAM0_ENABLE) { + hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); + avail[i].base = G_EDRAM0_BASE(hi) << 20; + avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); + avail[i].idx = 0; + i++; + } + if (lo & F_EDRAM1_ENABLE) { + hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); + avail[i].base = G_EDRAM1_BASE(hi) << 20; + avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); + avail[i].idx = 1; + i++; + } + if (lo & F_EXT_MEM_ENABLE) { + hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); + avail[i].base = G_EXT_MEM_BASE(hi) << 20; + avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); + avail[i].idx = 2; + i++; + } + if (!i) /* no memory available */ + return 0; + qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); + + (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); + (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); + (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); + (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); + (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); + (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); + (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); + (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); + (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); + + /* the next few have explicit upper bounds */ + md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); + md->limit = md->base - 1 + + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * + G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); + md++; + + md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); + md->limit = md->base - 1 + + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * + G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); + md++; + + if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { + hi = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4; + md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); + md->limit = (sc->tids.ntids - hi) * 16 + md->base - 1; + } else { + md->base = 0; + md->idx = ARRAY_SIZE(region); /* hide it */ + } + md++; + +#define ulp_region(reg) \ + md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ + (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) + + ulp_region(RX_ISCSI); + ulp_region(RX_TDDP); + ulp_region(TX_TPT); + ulp_region(RX_STAG); + ulp_region(RX_RQ); + ulp_region(RX_RQUDP); + ulp_region(RX_PBL); + ulp_region(TX_PBL); +#undef ulp_region + + md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); + md->limit = md->base + sc->tids.ntids - 1; + md++; + md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); + md->limit = md->base + sc->tids.ntids - 1; + md++; + + md->base = sc->vres.ocq.start; + if (sc->vres.ocq.size) + md->limit = md->base + sc->vres.ocq.size - 1; + else + md->idx = ARRAY_SIZE(region); /* hide it */ + md++; + + /* add any address-space holes, there can be up to 3 */ + for (n = 0; n < i - 1; n++) + if (avail[n].limit < avail[n + 1].base) + (md++)->base = avail[n].limit; + if (avail[n].limit) + (md++)->base = avail[n].limit; + + n = md - mem; + qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); + + for (lo = 0; lo < i; lo++) + mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, + avail[lo].limit - 1); + + sbuf_printf(sb, "\n"); + for (i = 0; i < n; i++) { + if (mem[i].idx >= ARRAY_SIZE(region)) + continue; /* skip holes */ + if (!mem[i].limit) + mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; + mem_region_show(sb, region[mem[i].idx], mem[i].base, + mem[i].limit); + } + + sbuf_printf(sb, "\n"); + lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); + hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; + mem_region_show(sb, "uP RAM:", lo, hi); + + lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); + hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; + mem_region_show(sb, "uP Extmem2:", lo, hi); + + lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); + sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", + G_PMRXMAXPAGE(lo), + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, + (lo & F_PMRXNUMCHN) ? 2 : 1); + + lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); + hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); + sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", + G_PMTXMAXPAGE(lo), + hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), + hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); + sbuf_printf(sb, "%u p-structs\n", + t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); + + for (i = 0; i < 4; i++) { + lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); + sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", + i, G_USED(lo), G_ALLOC(lo)); + } + for (i = 0; i < 4; i++) { + lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); + sbuf_printf(sb, + "\nLoopback %d using %u pages out of %u allocated", + i, G_USED(lo), G_ALLOC(lo)); + } + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_path_mtus(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + uint16_t mtus[NMTUS]; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_read_mtu_tbl(sc, mtus, NULL); + + sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", + mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], + mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], + mtus[14], mtus[15]); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_pm_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc, i; + uint32_t tx_cnt[PM_NSTATS], rx_cnt[PM_NSTATS]; + uint64_t tx_cyc[PM_NSTATS], rx_cyc[PM_NSTATS]; + static const char *pm_stats[] = { + "Read:", "Write bypass:", "Write mem:", "Flush:", "FIFO wait:" + }; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_pmtx_get_stats(sc, tx_cnt, tx_cyc); + t4_pmrx_get_stats(sc, rx_cnt, rx_cyc); + + sbuf_printf(sb, " Tx count Tx cycles " + "Rx count Rx cycles"); + for (i = 0; i < PM_NSTATS; i++) + sbuf_printf(sb, "\n%-13s %10u %20ju %10u %20ju", + pm_stats[i], tx_cnt[i], tx_cyc[i], rx_cnt[i], rx_cyc[i]); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + struct tp_rdma_stats stats; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_tp_get_rdma_stats(sc, &stats); + sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); + sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + struct tp_tcp_stats v4, v6; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_tp_get_tcp_stats(sc, &v4, &v6); + sbuf_printf(sb, + " IP IPv6\n"); + sbuf_printf(sb, "OutRsts: %20u %20u\n", + v4.tcpOutRsts, v6.tcpOutRsts); + sbuf_printf(sb, "InSegs: %20ju %20ju\n", + v4.tcpInSegs, v6.tcpInSegs); + sbuf_printf(sb, "OutSegs: %20ju %20ju\n", + v4.tcpOutSegs, v6.tcpOutSegs); + sbuf_printf(sb, "RetransSegs: %20ju %20ju", + v4.tcpRetransSegs, v6.tcpRetransSegs); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_tids(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + struct tid_info *t = &sc->tids; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + if (t->natids) { + sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, + t->atids_in_use); + } + + if (t->ntids) { + if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { + uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; + + if (b) { + sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1, + t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, + t->ntids - 1); + } else { + sbuf_printf(sb, "TID range: %u-%u", + t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, + t->ntids - 1); + } + } else + sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1); + sbuf_printf(sb, ", in use: %u\n", + atomic_load_acq_int(&t->tids_in_use)); + } + + if (t->nstids) { + sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, + t->stid_base + t->nstids - 1, t->stids_in_use); + } + + if (t->nftids) { + sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base, + t->ftid_base + t->nftids - 1); + } + + sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", + t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), + t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + struct tp_err_stats stats; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_tp_get_err_stats(sc, &stats); + + sbuf_printf(sb, " channel 0 channel 1 channel 2 " + "channel 3\n"); + sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", + stats.macInErrs[0], stats.macInErrs[1], stats.macInErrs[2], + stats.macInErrs[3]); + sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", + stats.hdrInErrs[0], stats.hdrInErrs[1], stats.hdrInErrs[2], + stats.hdrInErrs[3]); + sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", + stats.tcpInErrs[0], stats.tcpInErrs[1], stats.tcpInErrs[2], + stats.tcpInErrs[3]); + sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", + stats.tcp6InErrs[0], stats.tcp6InErrs[1], stats.tcp6InErrs[2], + stats.tcp6InErrs[3]); + sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", + stats.tnlCongDrops[0], stats.tnlCongDrops[1], stats.tnlCongDrops[2], + stats.tnlCongDrops[3]); + sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", + stats.tnlTxDrops[0], stats.tnlTxDrops[1], stats.tnlTxDrops[2], + stats.tnlTxDrops[3]); + sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", + stats.ofldVlanDrops[0], stats.ofldVlanDrops[1], + stats.ofldVlanDrops[2], stats.ofldVlanDrops[3]); + sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", + stats.ofldChanDrops[0], stats.ofldChanDrops[1], + stats.ofldChanDrops[2], stats.ofldChanDrops[3]); + sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", + stats.ofldNoNeigh, stats.ofldCongDefer); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + +static int +sysctl_tx_rate(SYSCTL_HANDLER_ARGS) +{ + struct adapter *sc = arg1; + struct sbuf *sb; + int rc; + u64 nrate[NCHAN], orate[NCHAN]; + + rc = sysctl_wire_old_buffer(req, 0); + if (rc != 0) + return (rc); + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + t4_get_chan_txrate(sc, nrate, orate); + sbuf_printf(sb, " channel 0 channel 1 channel 2 " + "channel 3\n"); + sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", + nrate[0], nrate[1], nrate[2], nrate[3]); + sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", + orate[0], orate[1], orate[2], orate[3]); + + rc = sbuf_finish(sb); + sbuf_delete(sb); + + return (rc); +} + static inline void txq_start(struct ifnet *ifp, struct sge_txq *txq) { @@ -2897,17 +4313,57 @@ txq_start(struct ifnet *ifp, struct sge_txq *txq) } void -cxgbe_txq_start(void *arg, int count) +t4_tx_callout(void *arg) { - struct sge_txq *txq = arg; + struct sge_eq *eq = arg; + struct adapter *sc; + + if (EQ_TRYLOCK(eq) == 0) + goto reschedule; + + if (eq->flags & EQ_STALLED && !can_resume_tx(eq)) { + EQ_UNLOCK(eq); +reschedule: + if (__predict_true(!(eq->flags && EQ_DOOMED))) + callout_schedule(&eq->tx_callout, 1); + return; + } + + EQ_LOCK_ASSERT_OWNED(eq); + + if (__predict_true((eq->flags & EQ_DOOMED) == 0)) { + + if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) { + struct sge_txq *txq = arg; + struct port_info *pi = txq->ifp->if_softc; + + sc = pi->adapter; + } else { + struct sge_wrq *wrq = arg; + + sc = wrq->adapter; + } + + taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task); + } - TXQ_LOCK(txq); - if (txq->eq.flags & EQ_CRFLUSHED) { - txq->eq.flags &= ~EQ_CRFLUSHED; + EQ_UNLOCK(eq); +} + +void +t4_tx_task(void *arg, int count) +{ + struct sge_eq *eq = arg; + + EQ_LOCK(eq); + if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) { + struct sge_txq *txq = arg; txq_start(txq->ifp, txq); - } else - wakeup_one(txq); /* txq is going away, wakeup free_txq */ - TXQ_UNLOCK(txq); + } else { + struct sge_wrq *wrq = arg; + t4_wrq_tx_locked(wrq->adapter, wrq, NULL); + } + EQ_UNLOCK(eq); } static uint32_t @@ -2937,10 +4393,10 @@ fconf_to_mode(uint32_t fconf) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) - mode |= T4_FILTER_IVLAN; + mode |= T4_FILTER_VLAN; if (fconf & F_VNIC_ID) - mode |= T4_FILTER_OVLAN; + mode |= T4_FILTER_VNIC; if (fconf & F_PORT) mode |= T4_FILTER_PORT; @@ -2974,10 +4430,10 @@ mode_to_fconf(uint32_t mode) if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; - if (mode & T4_FILTER_IVLAN) + if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; - if (mode & T4_FILTER_OVLAN) + if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) @@ -3012,10 +4468,10 @@ fspec_to_fconf(struct t4_filter_specification *fs) if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; - if (fs->val.ivlan_vld || fs->mask.ivlan_vld) + if (fs->val.vlan_vld || fs->mask.vlan_vld) fconf |= F_VLAN; - if (fs->val.ovlan_vld || fs->mask.ovlan_vld) + if (fs->val.vnic_vld || fs->mask.vnic_vld) fconf |= F_VNIC_ID; if (fs->val.iport || fs->mask.iport) @@ -3035,7 +4491,13 @@ get_filter_mode(struct adapter *sc, uint32_t *mode) t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1, A_TP_VLAN_PRI_MAP); - *mode = fconf_to_mode(fconf); + if (sc->filter_mode != fconf) { + log(LOG_WARNING, "%s: cached filter mode out of sync %x %x.\n", + device_get_nameunit(sc->dev), sc->filter_mode, fconf); + sc->filter_mode = fconf; + } + + *mode = fconf_to_mode(sc->filter_mode); return (0); } @@ -3059,7 +4521,21 @@ set_filter_mode(struct adapter *sc, uint32_t mode) goto done; } +#ifndef TCP_OFFLOAD_DISABLE + if (sc->offload_map) { + rc = EBUSY; + goto done; + } +#endif + +#ifdef notyet rc = -t4_set_filter_mode(sc, fconf); + if (rc == 0) + sc->filter_mode = fconf; +#else + rc = ENOTSUP; +#endif + done: ADAPTER_UNLOCK(sc); return (rc); @@ -3119,7 +4595,6 @@ get_filter(struct adapter *sc, struct t4_filter *t) static int set_filter(struct adapter *sc, struct t4_filter *t) { - uint32_t fconf; unsigned int nfilters, nports; struct filter_entry *f; int i; @@ -3139,9 +4614,7 @@ set_filter(struct adapter *sc, struct t4_filter *t) return (EINVAL); /* Validate against the global filter mode */ - t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1, - A_TP_VLAN_PRI_MAP); - if ((fconf | fspec_to_fconf(&t->fs)) != fconf) + if ((sc->filter_mode | fspec_to_fconf(&t->fs)) != sc->filter_mode) return (E2BIG); if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) @@ -3238,7 +4711,6 @@ clear_filter(struct filter_entry *f) static int set_filter_wr(struct adapter *sc, int fidx) { - int rc; struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct mbuf *m; struct fw_filter_wr *fwr; @@ -3298,13 +4770,13 @@ set_filter_wr(struct adapter *sc, int fidx) fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | - V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.ivlan_vld) | - V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.ovlan_vld) | - V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.ivlan_vld) | - V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.ovlan_vld)); + V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | + V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.vnic_vld) | + V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | + V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.vnic_vld)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | - V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.intrq[0].abs_id)); + V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | @@ -3318,10 +4790,10 @@ set_filter_wr(struct adapter *sc, int fidx) fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; - fwr->ivlan = htobe16(f->fs.val.ivlan); - fwr->ivlanm = htobe16(f->fs.mask.ivlan); - fwr->ovlan = htobe16(f->fs.val.ovlan); - fwr->ovlanm = htobe16(f->fs.mask.ovlan); + fwr->ivlan = htobe16(f->fs.val.vlan); + fwr->ivlanm = htobe16(f->fs.mask.vlan); + fwr->ovlan = htobe16(f->fs.val.vnic); + fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); @@ -3335,13 +4807,9 @@ set_filter_wr(struct adapter *sc, int fidx) f->pending = 1; sc->tids.ftids_in_use++; - rc = t4_mgmt_tx(sc, m); - if (rc != 0) { - sc->tids.ftids_in_use--; - m_freem(m); - clear_filter(f); - } - return (rc); + + t4_mgmt_tx(sc, m); + return (0); } static int @@ -3350,7 +4818,7 @@ del_filter_wr(struct adapter *sc, int fidx) struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct mbuf *m; struct fw_filter_wr *fwr; - unsigned int rc, ftid; + unsigned int ftid; ADAPTER_LOCK_ASSERT_OWNED(sc); @@ -3364,55 +4832,48 @@ del_filter_wr(struct adapter *sc, int fidx) m->m_len = m->m_pkthdr.len = sizeof(*fwr); bzero(fwr, sizeof (*fwr)); - t4_mk_filtdelwr(ftid, fwr, sc->sge.intrq[0].abs_id); + t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); f->pending = 1; - rc = t4_mgmt_tx(sc, m); - if (rc != 0) { - f->pending = 0; - m_freem(m); - } - return (rc); + t4_mgmt_tx(sc, m); + return (0); } -/* XXX move intr handlers to main.c and make this static */ -void -filter_rpl(struct adapter *sc, const struct cpl_set_tcb_rpl *rpl) +static int +filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { + struct adapter *sc = iq->adapter; + const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); unsigned int idx = GET_TID(rpl); + KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, + rss->opcode)); + if (idx >= sc->tids.ftid_base && (idx -= sc->tids.ftid_base) < sc->tids.nftids) { unsigned int rc = G_COOKIE(rpl->cookie); struct filter_entry *f = &sc->tids.ftid_tab[idx]; - if (rc == FW_FILTER_WR_FLT_DELETED) { - /* - * Clear the filter when we get confirmation from the - * hardware that the filter has been deleted. - */ - clear_filter(f); - sc->tids.ftids_in_use--; - } else if (rc == FW_FILTER_WR_SMT_TBL_FULL) { - device_printf(sc->dev, - "filter %u setup failed due to full SMT\n", idx); - clear_filter(f); - sc->tids.ftids_in_use--; - } else if (rc == FW_FILTER_WR_FLT_ADDED) { + if (rc == FW_FILTER_WR_FLT_ADDED) { f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; - } else { - /* - * Something went wrong. Issue a warning about the - * problem and clear everything out. - */ + return (0); + } + + if (rc != FW_FILTER_WR_FLT_DELETED) { + /* Add or delete failed, need to display an error */ device_printf(sc->dev, "filter %u setup failed with error %u\n", idx, rc); - clear_filter(f); - sc->tids.ftids_in_use--; } + + clear_filter(f); + ADAPTER_LOCK(sc); + sc->tids.ftids_in_use--; + ADAPTER_UNLOCK(sc); } + + return (0); } static int @@ -3444,6 +4905,84 @@ get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) return (rc); } +static int +read_card_mem(struct adapter *sc, struct t4_mem_range *mr) +{ + uint32_t base, size, lo, hi, win, off, remaining, i, n; + uint32_t *buf, *b; + int rc; + + /* reads are in multiples of 32 bits */ + if (mr->addr & 3 || mr->len & 3 || mr->len == 0) + return (EINVAL); + + /* + * We don't want to deal with potential holes so we mandate that the + * requested region must lie entirely within one of the 3 memories. + */ + lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); + if (lo & F_EDRAM0_ENABLE) { + hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); + base = G_EDRAM0_BASE(hi) << 20; + size = G_EDRAM0_SIZE(hi) << 20; + if (size > 0 && + mr->addr >= base && mr->addr < base + size && + mr->addr + mr->len <= base + size) + goto proceed; + } + if (lo & F_EDRAM1_ENABLE) { + hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); + base = G_EDRAM1_BASE(hi) << 20; + size = G_EDRAM1_SIZE(hi) << 20; + if (size > 0 && + mr->addr >= base && mr->addr < base + size && + mr->addr + mr->len <= base + size) + goto proceed; + } + if (lo & F_EXT_MEM_ENABLE) { + hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); + base = G_EXT_MEM_BASE(hi) << 20; + size = G_EXT_MEM_SIZE(hi) << 20; + if (size > 0 && + mr->addr >= base && mr->addr < base + size && + mr->addr + mr->len <= base + size) + goto proceed; + } + return (ENXIO); + +proceed: + buf = b = malloc(mr->len, M_CXGBE, M_WAITOK); + + /* + * Position the PCIe window (we use memwin2) to the 16B aligned area + * just at/before the requested region. + */ + win = mr->addr & ~0xf; + off = mr->addr - win; /* offset of the requested region in the win */ + remaining = mr->len; + + while (remaining) { + t4_write_reg(sc, + PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), win); + t4_read_reg(sc, + PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2)); + + /* number of bytes that we'll copy in the inner loop */ + n = min(remaining, MEMWIN2_APERTURE - off); + + for (i = 0; i < n; i += 4, remaining -= 4) + *b++ = t4_read_reg(sc, MEMWIN2_BASE + off + i); + + win += MEMWIN2_APERTURE; + off = 0; + } + + rc = copyout(buf, mr->data, mr->len); + free(buf, M_CXGBE); + + return (rc); +} + int t4_os_find_pci_capability(struct adapter *sc, int cap) { @@ -3514,6 +5053,22 @@ t4_os_link_changed(struct adapter *sc, int idx, int link_stat) if_link_state_change(ifp, LINK_STATE_DOWN); } +void +t4_iterate(void (*func)(struct adapter *, void *), void *arg) +{ + struct adapter *sc; + + mtx_lock(&t4_list_lock); + SLIST_FOREACH(sc, &t4_list, link) { + /* + * func should not make any assumptions about what state sc is + * in - the only guarantee is that sc->sc_lock is a valid lock. + */ + func(sc, arg); + } + mtx_unlock(&t4_list_lock); +} + static int t4_open(struct cdev *dev, int flags, int type, struct thread *td) { @@ -3610,6 +5165,27 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; + case CHELSIO_T4_LOAD_FW: { + struct t4_data *fw = (struct t4_data *)data; + uint8_t *fw_data; + + if (sc->flags & FULL_INIT_DONE) + return (EBUSY); + + fw_data = malloc(fw->len, M_CXGBE, M_NOWAIT); + if (fw_data == NULL) + return (ENOMEM); + + rc = copyin(fw->data, fw_data, fw->len); + if (rc == 0) + rc = -t4_load_fw(sc, fw_data, fw->len); + + free(fw_data, M_CXGBE); + break; + } + case CHELSIO_T4_GET_MEM: + rc = read_card_mem(sc, (struct t4_mem_range *)data); + break; default: rc = EINVAL; } @@ -3617,14 +5193,247 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, return (rc); } +#ifndef TCP_OFFLOAD_DISABLE +static int +toe_capability(struct port_info *pi, int enable) +{ + int rc; + struct adapter *sc = pi->adapter; + + ADAPTER_LOCK_ASSERT_OWNED(sc); + + if (!is_offload(sc)) + return (ENODEV); + + if (enable) { + if (isset(&sc->offload_map, pi->port_id)) + return (0); + + if (sc->offload_map == 0) { + rc = activate_uld(sc, ULD_TOM, &sc->tom); + if (rc != 0) + return (rc); + } + + setbit(&sc->offload_map, pi->port_id); + } else { + if (!isset(&sc->offload_map, pi->port_id)) + return (0); + + clrbit(&sc->offload_map, pi->port_id); + + if (sc->offload_map == 0) { + rc = deactivate_uld(&sc->tom); + if (rc != 0) { + setbit(&sc->offload_map, pi->port_id); + return (rc); + } + } + } + + return (0); +} + +/* + * Add an upper layer driver to the global list. + */ +int +t4_register_uld(struct uld_info *ui) +{ + int rc = 0; + struct uld_info *u; + + mtx_lock(&t4_uld_list_lock); + SLIST_FOREACH(u, &t4_uld_list, link) { + if (u->uld_id == ui->uld_id) { + rc = EEXIST; + goto done; + } + } + + SLIST_INSERT_HEAD(&t4_uld_list, ui, link); + ui->refcount = 0; +done: + mtx_unlock(&t4_uld_list_lock); + return (rc); +} + +int +t4_unregister_uld(struct uld_info *ui) +{ + int rc = EINVAL; + struct uld_info *u; + + mtx_lock(&t4_uld_list_lock); + + SLIST_FOREACH(u, &t4_uld_list, link) { + if (u == ui) { + if (ui->refcount > 0) { + rc = EBUSY; + goto done; + } + + SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); + rc = 0; + goto done; + } + } +done: + mtx_unlock(&t4_uld_list_lock); + return (rc); +} + +static int +activate_uld(struct adapter *sc, int id, struct uld_softc *usc) +{ + int rc = EAGAIN; + struct uld_info *ui; + + mtx_lock(&t4_uld_list_lock); + + SLIST_FOREACH(ui, &t4_uld_list, link) { + if (ui->uld_id == id) { + rc = ui->attach(sc, &usc->softc); + if (rc == 0) { + KASSERT(usc->softc != NULL, + ("%s: ULD %d has no state", __func__, id)); + ui->refcount++; + usc->uld = ui; + } + goto done; + } + } +done: + mtx_unlock(&t4_uld_list_lock); + + return (rc); +} + +static int +deactivate_uld(struct uld_softc *usc) +{ + int rc; + + mtx_lock(&t4_uld_list_lock); + + if (usc->uld == NULL || usc->softc == NULL) { + rc = EINVAL; + goto done; + } + + rc = usc->uld->detach(usc->softc); + if (rc == 0) { + KASSERT(usc->uld->refcount > 0, + ("%s: ULD has bad refcount", __func__)); + usc->uld->refcount--; + usc->uld = NULL; + usc->softc = NULL; + } +done: + mtx_unlock(&t4_uld_list_lock); + + return (rc); +} +#endif + +/* + * Come up with reasonable defaults for some of the tunables, provided they're + * not set by the user (in which case we'll use the values as is). + */ +static void +tweak_tunables(void) +{ + int nc = mp_ncpus; /* our snapshot of the number of CPUs */ + + if (t4_ntxq10g < 1) + t4_ntxq10g = min(nc, NTXQ_10G); + + if (t4_ntxq1g < 1) + t4_ntxq1g = min(nc, NTXQ_1G); + + if (t4_nrxq10g < 1) + t4_nrxq10g = min(nc, NRXQ_10G); + + if (t4_nrxq1g < 1) + t4_nrxq1g = min(nc, NRXQ_1G); + +#ifndef TCP_OFFLOAD_DISABLE + if (t4_nofldtxq10g < 1) + t4_nofldtxq10g = min(nc, NOFLDTXQ_10G); + + if (t4_nofldtxq1g < 1) + t4_nofldtxq1g = min(nc, NOFLDTXQ_1G); + + if (t4_nofldrxq10g < 1) + t4_nofldrxq10g = min(nc, NOFLDRXQ_10G); + + if (t4_nofldrxq1g < 1) + t4_nofldrxq1g = min(nc, NOFLDRXQ_1G); +#endif + + if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS) + t4_tmr_idx_10g = TMR_IDX_10G; + + if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS) + t4_pktc_idx_10g = PKTC_IDX_10G; + + if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS) + t4_tmr_idx_1g = TMR_IDX_1G; + + if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS) + t4_pktc_idx_1g = PKTC_IDX_1G; + + if (t4_qsize_txq < 128) + t4_qsize_txq = 128; + + if (t4_qsize_rxq < 128) + t4_qsize_rxq = 128; + while (t4_qsize_rxq & 7) + t4_qsize_rxq++; + + t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; +} + static int t4_mod_event(module_t mod, int cmd, void *arg) { + int rc = 0; - if (cmd == MOD_LOAD) + switch (cmd) { + case MOD_LOAD: t4_sge_modload(); + mtx_init(&t4_list_lock, "T4 adapters", 0, MTX_DEF); + SLIST_INIT(&t4_list); +#ifndef TCP_OFFLOAD_DISABLE + mtx_init(&t4_uld_list_lock, "T4 ULDs", 0, MTX_DEF); + SLIST_INIT(&t4_uld_list); +#endif + tweak_tunables(); + break; - return (0); + case MOD_UNLOAD: +#ifndef TCP_OFFLOAD_DISABLE + mtx_lock(&t4_uld_list_lock); + if (!SLIST_EMPTY(&t4_uld_list)) { + rc = EBUSY; + mtx_unlock(&t4_uld_list_lock); + break; + } + mtx_unlock(&t4_uld_list_lock); + mtx_destroy(&t4_uld_list_lock); +#endif + mtx_lock(&t4_list_lock); + if (!SLIST_EMPTY(&t4_list)) { + rc = EBUSY; + mtx_unlock(&t4_list_lock); + break; + } + mtx_unlock(&t4_list_lock); + mtx_destroy(&t4_list_lock); + break; + } + + return (rc); } static devclass_t t4_devclass; diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 09e3c19cb61a..41630e4f9cfa 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <sys/queue.h> #include <sys/taskqueue.h> #include <sys/sysctl.h> +#include <sys/smp.h> #include <net/bpf.h> #include <net/ethernet.h> #include <net/if.h> @@ -50,7 +51,7 @@ __FBSDID("$FreeBSD$"); #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_msg.h" -#include "common/t4fw_interface.h" +#include "t4_l2t.h" struct fl_buf_info { int size; @@ -91,12 +92,15 @@ struct sgl { bus_dma_segment_t seg[TX_SGL_SEGS]; }; -static void t4_evt_rx(void *); -static void t4_eth_rx(void *); +static int service_iq(struct sge_iq *, int); +static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t, + int *); +static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, - int, iq_intr_handler_t *, char *); -static inline void init_fl(struct sge_fl *, int, char *); -static inline void init_eq(struct sge_eq *, int, char *); + int, char *); +static inline void init_fl(struct sge_fl *, int, int, char *); +static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t, + char *); static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, bus_addr_t *, void **); static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, @@ -104,26 +108,41 @@ static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, int, int); static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); -static int alloc_intrq(struct adapter *, int, int, int); -static int free_intrq(struct sge_iq *); -static int alloc_fwq(struct adapter *, int); -static int free_fwq(struct sge_iq *); -static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int); +static int alloc_fwq(struct adapter *); +static int free_fwq(struct adapter *); +static int alloc_mgmtq(struct adapter *); +static int free_mgmtq(struct adapter *); +static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int, + struct sysctl_oid *); static int free_rxq(struct port_info *, struct sge_rxq *); -static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int); -static int free_ctrlq(struct adapter *, struct sge_ctrlq *); -static int alloc_txq(struct port_info *, struct sge_txq *, int); +#ifndef TCP_OFFLOAD_DISABLE +static int alloc_ofld_rxq(struct port_info *, struct sge_ofld_rxq *, int, int, + struct sysctl_oid *); +static int free_ofld_rxq(struct port_info *, struct sge_ofld_rxq *); +#endif +static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); +static int eth_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); +#ifndef TCP_OFFLOAD_DISABLE +static int ofld_eq_alloc(struct adapter *, struct port_info *, struct sge_eq *); +#endif +static int alloc_eq(struct adapter *, struct port_info *, struct sge_eq *); +static int free_eq(struct adapter *, struct sge_eq *); +static int alloc_wrq(struct adapter *, struct port_info *, struct sge_wrq *, + struct sysctl_oid *); +static int free_wrq(struct adapter *, struct sge_wrq *); +static int alloc_txq(struct port_info *, struct sge_txq *, int, + struct sysctl_oid *); static int free_txq(struct port_info *, struct sge_txq *); static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **); static inline void iq_next(struct sge_iq *); static inline void ring_fl_db(struct adapter *, struct sge_fl *); -static void refill_fl(struct adapter *, struct sge_fl *, int, int); +static int refill_fl(struct adapter *, struct sge_fl *, int); +static void refill_sfl(void *); static int alloc_fl_sdesc(struct sge_fl *); static void free_fl_sdesc(struct sge_fl *); -static int alloc_tx_maps(struct sge_txq *); -static void free_tx_maps(struct sge_txq *); static void set_fl_tag_idx(struct sge_fl *, int); +static void add_fl_to_sfl(struct adapter *, struct sge_fl *); static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); static int free_pkt_sgl(struct sge_txq *, struct sgl *); @@ -141,15 +160,13 @@ static inline int reclaimable(struct sge_eq *); static int reclaim_tx_descs(struct sge_txq *, int, int); static void write_eqflush_wr(struct sge_eq *); static __be64 get_flit(bus_dma_segment_t *, int, int); -static int handle_sge_egr_update(struct adapter *, - const struct cpl_sge_egr_update *); -static void handle_cpl(struct adapter *, struct sge_iq *); +static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, + struct mbuf *); +static int handle_fw_rpl(struct sge_iq *, const struct rss_header *, + struct mbuf *); -static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *); static int sysctl_uint16(SYSCTL_HANDLER_ARGS); -extern void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *); - /* * Called on MOD_LOAD and fills up fl_buf_info[]. */ @@ -181,46 +198,103 @@ t4_sge_modload(void) * We do not initialize any of the queues here, instead the driver * top-level must request them individually. */ -void +int t4_sge_init(struct adapter *sc) { struct sge *s = &sc->sge; - int i; + int i, rc = 0; + uint32_t ctrl_mask, ctrl_val, hpsize, v; + + ctrl_mask = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | + V_INGPADBOUNDARY(M_INGPADBOUNDARY) | + F_EGRSTATUSPAGESIZE; + ctrl_val = V_PKTSHIFT(FL_PKTSHIFT) | F_RXPKTCPLMODE | + V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) | + V_EGRSTATUSPAGESIZE(SPG_LEN == 128); + + hpsize = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | + V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | + V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | + V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | + V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | + V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | + V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | + V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); + + if (sc->flags & MASTER_PF) { + int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; + int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ + + t4_set_reg_field(sc, A_SGE_CONTROL, ctrl_mask, ctrl_val); + t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, hpsize); + for (i = 0; i < FL_BUF_SIZES; i++) { + t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), + FL_BUF_SIZE(i)); + } + + t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, + V_THRESHOLD_0(intr_pktcount[0]) | + V_THRESHOLD_1(intr_pktcount[1]) | + V_THRESHOLD_2(intr_pktcount[2]) | + V_THRESHOLD_3(intr_pktcount[3])); + + t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, + V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | + V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1]))); + t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, + V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | + V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3]))); + t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, + V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | + V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5]))); + } + + v = t4_read_reg(sc, A_SGE_CONTROL); + if ((v & ctrl_mask) != ctrl_val) { + device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", v); + rc = EINVAL; + } - t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) | - V_INGPADBOUNDARY(M_INGPADBOUNDARY) | - F_EGRSTATUSPAGESIZE, - V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) | - V_PKTSHIFT(FL_PKTSHIFT) | - F_RXPKTCPLMODE | - V_EGRSTATUSPAGESIZE(SPG_LEN == 128)); - t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE, - V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0), - V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10)); + v = t4_read_reg(sc, A_SGE_HOST_PAGE_SIZE); + if (v != hpsize) { + device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", v); + rc = EINVAL; + } for (i = 0; i < FL_BUF_SIZES; i++) { - t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), - FL_BUF_SIZE(i)); + v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i)); + if (v != FL_BUF_SIZE(i)) { + device_printf(sc->dev, + "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v); + rc = EINVAL; + } } - i = t4_read_reg(sc, A_SGE_CONM_CTRL); - s->fl_starve_threshold = G_EGRTHRESHOLD(i) * 2 + 1; + v = t4_read_reg(sc, A_SGE_CONM_CTRL); + s->fl_starve_threshold = G_EGRTHRESHOLD(v) * 2 + 1; + + v = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD); + sc->sge.counter_val[0] = G_THRESHOLD_0(v); + sc->sge.counter_val[1] = G_THRESHOLD_1(v); + sc->sge.counter_val[2] = G_THRESHOLD_2(v); + sc->sge.counter_val[3] = G_THRESHOLD_3(v); + + v = t4_read_reg(sc, A_SGE_TIMER_VALUE_0_AND_1); + sc->sge.timer_val[0] = G_TIMERVALUE0(v) / core_ticks_per_usec(sc); + sc->sge.timer_val[1] = G_TIMERVALUE1(v) / core_ticks_per_usec(sc); + v = t4_read_reg(sc, A_SGE_TIMER_VALUE_2_AND_3); + sc->sge.timer_val[2] = G_TIMERVALUE2(v) / core_ticks_per_usec(sc); + sc->sge.timer_val[3] = G_TIMERVALUE3(v) / core_ticks_per_usec(sc); + v = t4_read_reg(sc, A_SGE_TIMER_VALUE_4_AND_5); + sc->sge.timer_val[4] = G_TIMERVALUE4(v) / core_ticks_per_usec(sc); + sc->sge.timer_val[5] = G_TIMERVALUE5(v) / core_ticks_per_usec(sc); - t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, - V_THRESHOLD_0(s->counter_val[0]) | - V_THRESHOLD_1(s->counter_val[1]) | - V_THRESHOLD_2(s->counter_val[2]) | - V_THRESHOLD_3(s->counter_val[3])); + t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_rpl); + t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_rpl); + t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update); + t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx); - t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, - V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) | - V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1]))); - t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, - V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) | - V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3]))); - t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, - V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) | - V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5]))); + return (rc); } int @@ -250,8 +324,7 @@ t4_destroy_dma_tag(struct adapter *sc) } /* - * Allocate and initialize the firmware event queue, control queues, and the - * interrupt queues. The adapter owns all of these queues. + * Allocate and initialize the firmware event queue and the management queue. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. @@ -259,110 +332,32 @@ t4_destroy_dma_tag(struct adapter *sc) int t4_setup_adapter_queues(struct adapter *sc) { - int i, j, rc, intr_idx, qsize; - struct sge_iq *iq; - struct sge_ctrlq *ctrlq; - iq_intr_handler_t *handler; - char name[16]; + int rc; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); - if (sysctl_ctx_init(&sc->ctx) == 0) { - struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); - struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - - sc->oid_fwq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, - "fwq", CTLFLAG_RD, NULL, "firmware event queue"); - sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, - "ctrlq", CTLFLAG_RD, NULL, "ctrl queues"); - sc->oid_intrq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, - "intrq", CTLFLAG_RD, NULL, "interrupt queues"); - } - - /* - * Interrupt queues - */ - intr_idx = sc->intr_count - NINTRQ(sc); - if (sc->flags & INTR_SHARED) { - qsize = max((sc->sge.nrxq + 1) * 2, INTR_IQ_QSIZE); - for (i = 0; i < NINTRQ(sc); i++, intr_idx++) { - snprintf(name, sizeof(name), "%s intrq%d", - device_get_nameunit(sc->dev), i); - - iq = &sc->sge.intrq[i]; - init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, NULL, name); - rc = alloc_intrq(sc, i % sc->params.nports, i, - intr_idx); - - if (rc != 0) { - device_printf(sc->dev, - "failed to create %s: %d\n", name, rc); - return (rc); - } - } - } else { - int qidx = 0; - struct port_info *pi; - - for (i = 0; i < sc->params.nports; i++) { - pi = sc->port[i]; - qsize = max((pi->nrxq + 1) * 2, INTR_IQ_QSIZE); - for (j = 0; j < pi->nrxq; j++, qidx++, intr_idx++) { - snprintf(name, sizeof(name), "%s intrq%d", - device_get_nameunit(pi->dev), j); - - iq = &sc->sge.intrq[qidx]; - init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, - NULL, name); - rc = alloc_intrq(sc, i, qidx, intr_idx); - - if (rc != 0) { - device_printf(sc->dev, - "failed to create %s: %d\n", - name, rc); - return (rc); - } - } - } - } + sysctl_ctx_init(&sc->ctx); + sc->flags |= ADAP_SYSCTL_CTX; /* * Firmware event queue */ - snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); - if (sc->intr_count > T4_EXTRA_INTR) { - handler = NULL; - intr_idx = 1; - } else { - handler = t4_evt_rx; - intr_idx = 0; - } - - iq = &sc->sge.fwq; - init_iq(iq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name); - rc = alloc_fwq(sc, intr_idx); + rc = alloc_fwq(sc); if (rc != 0) { device_printf(sc->dev, "failed to create firmware event queue: %d\n", rc); - return (rc); } /* - * Control queues - one per port. + * Management queue. This is just a control queue that uses the fwq as + * its associated iq. */ - ctrlq = &sc->sge.ctrlq[0]; - for (i = 0; i < sc->params.nports; i++, ctrlq++) { - snprintf(name, sizeof(name), "%s ctrlq%d", - device_get_nameunit(sc->dev), i); - init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name); - - rc = alloc_ctrlq(sc, ctrlq, i); - if (rc != 0) { - device_printf(sc->dev, - "failed to create control queue %d: %d\n", i, rc); - return (rc); - } + rc = alloc_mgmtq(sc); + if (rc != 0) { + device_printf(sc->dev, + "failed to create management queue: %d\n", rc); + return (rc); } return (rc); @@ -374,86 +369,278 @@ t4_setup_adapter_queues(struct adapter *sc) int t4_teardown_adapter_queues(struct adapter *sc) { - int i; - struct sge_iq *iq; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); - /* Do this before freeing the queues */ - if (sc->oid_fwq || sc->oid_ctrlq || sc->oid_intrq) { + /* Do this before freeing the queue */ + if (sc->flags & ADAP_SYSCTL_CTX) { sysctl_ctx_free(&sc->ctx); - sc->oid_fwq = NULL; - sc->oid_ctrlq = NULL; - sc->oid_intrq = NULL; + sc->flags &= ~ADAP_SYSCTL_CTX; } - for (i = 0; i < sc->params.nports; i++) - free_ctrlq(sc, &sc->sge.ctrlq[i]); + free_mgmtq(sc); + free_fwq(sc); + + return (0); +} + +static inline int +first_vector(struct port_info *pi) +{ + struct adapter *sc = pi->adapter; + int rc = T4_EXTRA_INTR, i; + + if (sc->intr_count == 1) + return (0); - iq = &sc->sge.fwq; - free_fwq(iq); + for_each_port(sc, i) { + if (i == pi->port_id) + break; + +#ifndef TCP_OFFLOAD_DISABLE + if (sc->flags & INTR_DIRECT) + rc += pi->nrxq + pi->nofldrxq; + else + rc += max(pi->nrxq, pi->nofldrxq); +#else + /* + * Not compiled with offload support and intr_count > 1. Only + * NIC queues exist and they'd better be taking direct + * interrupts. + */ + KASSERT(sc->flags & INTR_DIRECT, + ("%s: intr_count %d, !INTR_DIRECT", __func__, + sc->intr_count)); - for (i = 0; i < NINTRQ(sc); i++) { - iq = &sc->sge.intrq[i]; - free_intrq(iq); + rc += pi->nrxq; +#endif } - return (0); + return (rc); +} + +/* + * Given an arbitrary "index," come up with an iq that can be used by other + * queues (of this port) for interrupt forwarding, SGE egress updates, etc. + * The iq returned is guaranteed to be something that takes direct interrupts. + */ +static struct sge_iq * +port_intr_iq(struct port_info *pi, int idx) +{ + struct adapter *sc = pi->adapter; + struct sge *s = &sc->sge; + struct sge_iq *iq = NULL; + + if (sc->intr_count == 1) + return (&sc->sge.fwq); + +#ifndef TCP_OFFLOAD_DISABLE + if (sc->flags & INTR_DIRECT) { + idx %= pi->nrxq + pi->nofldrxq; + + if (idx >= pi->nrxq) { + idx -= pi->nrxq; + iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; + } else + iq = &s->rxq[pi->first_rxq + idx].iq; + + } else { + idx %= max(pi->nrxq, pi->nofldrxq); + + if (pi->nrxq >= pi->nofldrxq) + iq = &s->rxq[pi->first_rxq + idx].iq; + else + iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; + } +#else + /* + * Not compiled with offload support and intr_count > 1. Only NIC + * queues exist and they'd better be taking direct interrupts. + */ + KASSERT(sc->flags & INTR_DIRECT, + ("%s: intr_count %d, !INTR_DIRECT", __func__, sc->intr_count)); + + idx %= pi->nrxq; + iq = &s->rxq[pi->first_rxq + idx].iq; +#endif + + KASSERT(iq->flags & IQ_INTR, ("%s: EDOOFUS", __func__)); + return (iq); } int -t4_setup_eth_queues(struct port_info *pi) +t4_setup_port_queues(struct port_info *pi) { - int rc = 0, i, intr_idx; + int rc = 0, i, j, intr_idx, iqid; struct sge_rxq *rxq; struct sge_txq *txq; + struct sge_wrq *ctrlq; +#ifndef TCP_OFFLOAD_DISABLE + struct sge_ofld_rxq *ofld_rxq; + struct sge_wrq *ofld_txq; +#endif char name[16]; struct adapter *sc = pi->adapter; + struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev), *oid2 = NULL; + struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - if (sysctl_ctx_init(&pi->ctx) == 0) { - struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); - struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); + oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, + NULL, "rx queues"); - pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, - "rxq", CTLFLAG_RD, NULL, "rx queues"); - pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, - "txq", CTLFLAG_RD, NULL, "tx queues"); +#ifndef TCP_OFFLOAD_DISABLE + if (is_offload(sc)) { + oid2 = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_rxq", + CTLFLAG_RD, NULL, + "rx queues for offloaded TCP connections"); } +#endif + + /* Interrupt vector to start from (when using multiple vectors) */ + intr_idx = first_vector(pi); + /* + * First pass over all rx queues (NIC and TOE): + * a) initialize iq and fl + * b) allocate queue iff it will take direct interrupts. + */ for_each_rxq(pi, i, rxq) { snprintf(name, sizeof(name), "%s rxq%d-iq", device_get_nameunit(pi->dev), i); - init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, - pi->qsize_rxq, RX_IQ_ESIZE, t4_eth_rx, name); + init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq, + RX_IQ_ESIZE, name); snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(pi->dev), i); - init_fl(&rxq->fl, pi->qsize_rxq / 8, name); + init_fl(&rxq->fl, pi->qsize_rxq / 8, pi->ifp->if_mtu, name); + + if (sc->flags & INTR_DIRECT +#ifndef TCP_OFFLOAD_DISABLE + || (sc->intr_count > 1 && pi->nrxq >= pi->nofldrxq) +#endif + ) { + rxq->iq.flags |= IQ_INTR; + rc = alloc_rxq(pi, rxq, intr_idx, i, oid); + if (rc != 0) + goto done; + intr_idx++; + } + } - intr_idx = pi->first_rxq + i; - if (sc->flags & INTR_SHARED) - intr_idx %= NINTRQ(sc); +#ifndef TCP_OFFLOAD_DISABLE + for_each_ofld_rxq(pi, i, ofld_rxq) { - rc = alloc_rxq(pi, rxq, intr_idx, i); + snprintf(name, sizeof(name), "%s ofld_rxq%d-iq", + device_get_nameunit(pi->dev), i); + init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, + pi->qsize_rxq, RX_IQ_ESIZE, name); + + snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", + device_get_nameunit(pi->dev), i); + init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, MJUM16BYTES, name); + + if (sc->flags & INTR_DIRECT || + (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) { + ofld_rxq->iq.flags |= IQ_INTR; + rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2); + if (rc != 0) + goto done; + intr_idx++; + } + } +#endif + + /* + * Second pass over all rx queues (NIC and TOE). The queues forwarding + * their interrupts are allocated now. + */ + j = 0; + for_each_rxq(pi, i, rxq) { + if (rxq->iq.flags & IQ_INTR) + continue; + + intr_idx = port_intr_iq(pi, j)->abs_id; + + rc = alloc_rxq(pi, rxq, intr_idx, i, oid); + if (rc != 0) + goto done; + j++; + } + +#ifndef TCP_OFFLOAD_DISABLE + for_each_ofld_rxq(pi, i, ofld_rxq) { + if (ofld_rxq->iq.flags & IQ_INTR) + continue; + + intr_idx = port_intr_iq(pi, j)->abs_id; + + rc = alloc_ofld_rxq(pi, ofld_rxq, intr_idx, i, oid2); if (rc != 0) goto done; + j++; } +#endif + /* + * Now the tx queues. Only one pass needed. + */ + oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, + NULL, "tx queues"); + j = 0; for_each_txq(pi, i, txq) { + uint16_t iqid; + + iqid = port_intr_iq(pi, j)->cntxt_id; snprintf(name, sizeof(name), "%s txq%d", device_get_nameunit(pi->dev), i); - init_eq(&txq->eq, pi->qsize_txq, name); + init_eq(&txq->eq, EQ_ETH, pi->qsize_txq, pi->tx_chan, iqid, + name); - rc = alloc_txq(pi, txq, i); + rc = alloc_txq(pi, txq, i, oid); if (rc != 0) goto done; + j++; } +#ifndef TCP_OFFLOAD_DISABLE + oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ofld_txq", + CTLFLAG_RD, NULL, "tx queues for offloaded TCP connections"); + for_each_ofld_txq(pi, i, ofld_txq) { + uint16_t iqid; + + iqid = port_intr_iq(pi, j)->cntxt_id; + + snprintf(name, sizeof(name), "%s ofld_txq%d", + device_get_nameunit(pi->dev), i); + init_eq(&ofld_txq->eq, EQ_OFLD, pi->qsize_txq, pi->tx_chan, + iqid, name); + + snprintf(name, sizeof(name), "%d", i); + oid2 = SYSCTL_ADD_NODE(&pi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + name, CTLFLAG_RD, NULL, "offload tx queue"); + + rc = alloc_wrq(sc, pi, ofld_txq, oid2); + if (rc != 0) + goto done; + j++; + } +#endif + + /* + * Finally, the control queue. + */ + oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, + NULL, "ctrl queue"); + ctrlq = &sc->sge.ctrlq[pi->port_id]; + iqid = port_intr_iq(pi, 0)->cntxt_id; + snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(pi->dev)); + init_eq(&ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, iqid, name); + rc = alloc_wrq(sc, pi, ctrlq, oid); + done: if (rc) - t4_teardown_eth_queues(pi); + t4_teardown_port_queues(pi); return (rc); } @@ -462,90 +649,91 @@ done: * Idempotent */ int -t4_teardown_eth_queues(struct port_info *pi) +t4_teardown_port_queues(struct port_info *pi) { int i; + struct adapter *sc = pi->adapter; struct sge_rxq *rxq; struct sge_txq *txq; +#ifndef TCP_OFFLOAD_DISABLE + struct sge_ofld_rxq *ofld_rxq; + struct sge_wrq *ofld_txq; +#endif /* Do this before freeing the queues */ - if (pi->oid_txq || pi->oid_rxq) { + if (pi->flags & PORT_SYSCTL_CTX) { sysctl_ctx_free(&pi->ctx); - pi->oid_txq = pi->oid_rxq = NULL; + pi->flags &= ~PORT_SYSCTL_CTX; } + /* + * Take down all the tx queues first, as they reference the rx queues + * (for egress updates, etc.). + */ + + free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); + for_each_txq(pi, i, txq) { free_txq(pi, txq); } +#ifndef TCP_OFFLOAD_DISABLE + for_each_ofld_txq(pi, i, ofld_txq) { + free_wrq(sc, ofld_txq); + } +#endif + + /* + * Then take down the rx queues that forward their interrupts, as they + * reference other rx queues. + */ + + for_each_rxq(pi, i, rxq) { + if ((rxq->iq.flags & IQ_INTR) == 0) + free_rxq(pi, rxq); + } + +#ifndef TCP_OFFLOAD_DISABLE + for_each_ofld_rxq(pi, i, ofld_rxq) { + if ((ofld_rxq->iq.flags & IQ_INTR) == 0) + free_ofld_rxq(pi, ofld_rxq); + } +#endif + + /* + * Then take down the rx queues that take direct interrupts. + */ + for_each_rxq(pi, i, rxq) { - free_rxq(pi, rxq); + if (rxq->iq.flags & IQ_INTR) + free_rxq(pi, rxq); } +#ifndef TCP_OFFLOAD_DISABLE + for_each_ofld_rxq(pi, i, ofld_rxq) { + if (ofld_rxq->iq.flags & IQ_INTR) + free_ofld_rxq(pi, ofld_rxq); + } +#endif + return (0); } -/* Deals with errors and the first (and only) interrupt queue */ +/* + * Deals with errors and the firmware event queue. All data rx queues forward + * their interrupt to the firmware event queue. + */ void t4_intr_all(void *arg) { struct adapter *sc = arg; + struct sge_iq *fwq = &sc->sge.fwq; t4_intr_err(arg); - t4_intr(&sc->sge.intrq[0]); -} - -/* Deals with interrupts, and a few CPLs, on the given interrupt queue */ -void -t4_intr(void *arg) -{ - struct sge_iq *iq = arg, *q; - struct adapter *sc = iq->adapter; - struct rsp_ctrl *ctrl; - const struct rss_header *rss; - int ndesc_pending = 0, ndesc_total = 0; - int qid, rsp_type; - - if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) - return; - - while (is_new_response(iq, &ctrl)) { - - rmb(); - - rss = (const void *)iq->cdesc; - rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); - - if (__predict_false(rsp_type == X_RSPD_TYPE_CPL)) { - handle_cpl(sc, iq); - goto nextdesc; - } - - qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start; - q = sc->sge.iqmap[qid]; - - if (atomic_cmpset_32(&q->state, IQS_IDLE, IQS_BUSY)) { - q->handler(q); - atomic_cmpset_32(&q->state, IQS_BUSY, IQS_IDLE); - } - -nextdesc: ndesc_total++; - if (++ndesc_pending >= iq->qsize / 4) { - t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), - V_CIDXINC(ndesc_pending) | - V_INGRESSQID(iq->cntxt_id) | - V_SEINTARM( - V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); - ndesc_pending = 0; - } - - iq_next(iq); + if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { + service_iq(fwq, 0); + atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); } - - t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | - V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); - - atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); } /* Deals with error interrupts */ @@ -558,259 +746,452 @@ t4_intr_err(void *arg) t4_slow_intr_handler(sc); } -/* Deals with the firmware event queue */ void t4_intr_evt(void *arg) { struct sge_iq *iq = arg; - if (atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) { - t4_evt_rx(arg); - atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE); + if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { + service_iq(iq, 0); + atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); } } -static void -t4_evt_rx(void *arg) +void +t4_intr(void *arg) { struct sge_iq *iq = arg; + + if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { + service_iq(iq, 0); + atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); + } +} + +/* + * Deals with anything and everything on the given ingress queue. + */ +static int +service_iq(struct sge_iq *iq, int budget) +{ + struct sge_iq *q; + struct sge_rxq *rxq = (void *)iq; /* Use iff iq is part of rxq */ + struct sge_fl *fl = &rxq->fl; /* Use iff IQ_HAS_FL */ struct adapter *sc = iq->adapter; struct rsp_ctrl *ctrl; - int ndesc_pending = 0, ndesc_total = 0; + const struct rss_header *rss; + int ndescs = 0, limit, fl_bufs_used = 0; + int rsp_type; + uint32_t lq; + struct mbuf *m0; + STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); + + limit = budget ? budget : iq->qsize / 8; + + KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); + + /* + * We always come back and check the descriptor ring for new indirect + * interrupts and other responses after running a single handler. + */ + for (;;) { + while (is_new_response(iq, &ctrl)) { + + rmb(); + + m0 = NULL; + rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); + lq = be32toh(ctrl->pldbuflen_qid); + rss = (const void *)iq->cdesc; + + switch (rsp_type) { + case X_RSPD_TYPE_FLBUF: + + KASSERT(iq->flags & IQ_HAS_FL, + ("%s: data for an iq (%p) with no freelist", + __func__, iq)); + + m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used); +#ifdef T4_PKT_TIMESTAMP + /* + * 60 bit timestamp for the payload is + * *(uint64_t *)m0->m_pktdat. Note that it is + * in the leading free-space in the mbuf. The + * kernel can clobber it during a pullup, + * m_copymdata, etc. You need to make sure that + * the mbuf reaches you unmolested if you care + * about the timestamp. + */ + *(uint64_t *)m0->m_pktdat = + be64toh(ctrl->u.last_flit) & + 0xfffffffffffffff; +#endif + + /* fall through */ + + case X_RSPD_TYPE_CPL: + KASSERT(rss->opcode < NUM_CPL_CMDS, + ("%s: bad opcode %02x.", __func__, + rss->opcode)); + sc->cpl_handler[rss->opcode](iq, rss, m0); + break; + + case X_RSPD_TYPE_INTR: + + /* + * Interrupts should be forwarded only to queues + * that are not forwarding their interrupts. + * This means service_iq can recurse but only 1 + * level deep. + */ + KASSERT(budget == 0, + ("%s: budget %u, rsp_type %u", __func__, + budget, rsp_type)); + + q = sc->sge.iqmap[lq - sc->sge.iq_start]; + if (atomic_cmpset_int(&q->state, IQS_IDLE, + IQS_BUSY)) { + if (service_iq(q, q->qsize / 8) == 0) { + atomic_cmpset_int(&q->state, + IQS_BUSY, IQS_IDLE); + } else { + STAILQ_INSERT_TAIL(&iql, q, + link); + } + } + break; - KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__)); + default: + panic("%s: rsp_type %u", __func__, rsp_type); + } - while (is_new_response(iq, &ctrl)) { - int rsp_type; + iq_next(iq); + if (++ndescs == limit) { + t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), + V_CIDXINC(ndescs) | + V_INGRESSQID(iq->cntxt_id) | + V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); + ndescs = 0; + + if (fl_bufs_used > 0) { + FL_LOCK(fl); + fl->needed += fl_bufs_used; + refill_fl(sc, fl, fl->cap / 8); + FL_UNLOCK(fl); + fl_bufs_used = 0; + } - rmb(); + if (budget) + return (EINPROGRESS); + } + } - rsp_type = G_RSPD_TYPE(ctrl->u.type_gen); - if (__predict_false(rsp_type != X_RSPD_TYPE_CPL)) - panic("%s: unexpected rsp_type %d", __func__, rsp_type); + if (STAILQ_EMPTY(&iql)) + break; - handle_cpl(sc, iq); + /* + * Process the head only, and send it to the back of the list if + * it's still not done. + */ + q = STAILQ_FIRST(&iql); + STAILQ_REMOVE_HEAD(&iql, link); + if (service_iq(q, q->qsize / 8) == 0) + atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); + else + STAILQ_INSERT_TAIL(&iql, q, link); + } - ndesc_total++; - if (++ndesc_pending >= iq->qsize / 4) { - t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), - V_CIDXINC(ndesc_pending) | - V_INGRESSQID(iq->cntxt_id) | - V_SEINTARM( - V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); - ndesc_pending = 0; +#ifdef INET + if (iq->flags & IQ_LRO_ENABLED) { + struct lro_ctrl *lro = &rxq->lro; + struct lro_entry *l; + + while (!SLIST_EMPTY(&lro->lro_active)) { + l = SLIST_FIRST(&lro->lro_active); + SLIST_REMOVE_HEAD(&lro->lro_active, next); + tcp_lro_flush(lro, l); } + } +#endif + + t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | + V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); + + if (iq->flags & IQ_HAS_FL) { + int starved; - iq_next(iq); + FL_LOCK(fl); + fl->needed += fl_bufs_used; + starved = refill_fl(sc, fl, fl->cap / 4); + FL_UNLOCK(fl); + if (__predict_false(starved != 0)) + add_fl_to_sfl(sc, fl); } - t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) | - V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(iq->intr_params)); + return (0); } + #ifdef T4_PKT_TIMESTAMP #define RX_COPY_THRESHOLD (MINCLSIZE - 8) #else #define RX_COPY_THRESHOLD MINCLSIZE #endif -static void -t4_eth_rx(void *arg) +static struct mbuf * +get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, + int *fl_bufs_used) { - struct sge_rxq *rxq = arg; - struct sge_iq *iq = arg; - struct adapter *sc = iq->adapter; - struct rsp_ctrl *ctrl; - struct ifnet *ifp = rxq->ifp; - struct sge_fl *fl = &rxq->fl; - struct fl_sdesc *sd = &fl->sdesc[fl->cidx], *sd_next; - const struct rss_header *rss; - const struct cpl_rx_pkt *cpl; - uint32_t len; - int ndescs = 0, i; struct mbuf *m0, *m; -#ifdef INET - struct lro_ctrl *lro = &rxq->lro; - struct lro_entry *l; -#endif + struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; + unsigned int nbuf, len; - prefetch(sd->m); - prefetch(sd->cl); + /* + * No assertion for the fl lock because we don't need it. This routine + * is called only from the rx interrupt handler and it only updates + * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be + * updated in the rx interrupt handler or the starvation helper routine. + * That's why code that manipulates fl->pidx/fl->needed needs the fl + * lock but this routine does not). + */ - iq->intr_next = iq->intr_params; - while (is_new_response(iq, &ctrl)) { + if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0)) + panic("%s: cannot handle packed frames", __func__); + len = G_RSPD_LEN(len_newbuf); - rmb(); + m0 = sd->m; + sd->m = NULL; /* consumed */ - rss = (const void *)iq->cdesc; - i = G_RSPD_TYPE(ctrl->u.type_gen); + bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); + m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR); +#ifdef T4_PKT_TIMESTAMP + /* Leave room for a timestamp */ + m0->m_data += 8; +#endif - KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT, - ("%s: unexpected type %d CPL opcode 0x%x", - __func__, i, rss->opcode)); + if (len < RX_COPY_THRESHOLD) { + /* copy data to mbuf, buffer will be recycled */ + bcopy(sd->cl, mtod(m0, caddr_t), len); + m0->m_len = len; + } else { + bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); + m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx)); + sd->cl = NULL; /* consumed */ + m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); + } + m0->m_pkthdr.len = len; - sd_next = sd + 1; - if (__predict_false(fl->cidx + 1 == fl->cap)) - sd_next = fl->sdesc; - prefetch(sd_next->m); - prefetch(sd_next->cl); + sd++; + if (__predict_false(++fl->cidx == fl->cap)) { + sd = fl->sdesc; + fl->cidx = 0; + } - cpl = (const void *)(rss + 1); + m = m0; + len -= m->m_len; + nbuf = 1; /* # of fl buffers used */ - m0 = sd->m; + while (len > 0) { + m->m_next = sd->m; sd->m = NULL; /* consumed */ - - len = be32toh(ctrl->pldbuflen_qid); - if (__predict_false((len & F_RSPD_NEWBUF) == 0)) - panic("%s: cannot handle packed frames", __func__); - len = G_RSPD_LEN(len); + m = m->m_next; bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); - m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR); - -#ifdef T4_PKT_TIMESTAMP - *mtod(m0, uint64_t *) = - be64toh(ctrl->u.last_flit & 0xfffffffffffffff); - m0->m_data += 8; - - /* - * 60 bit timestamp value is *(uint64_t *)m0->m_pktdat. Note - * that it is in the leading free-space (see M_LEADINGSPACE) in - * the mbuf. The kernel can clobber it during a pullup, - * m_copymdata, etc. You need to make sure that the mbuf - * reaches you unmolested if you care about the timestamp. - */ -#endif - - if (len < RX_COPY_THRESHOLD) { - /* copy data to mbuf, buffer will be recycled */ - bcopy(sd->cl, mtod(m0, caddr_t), len); - m0->m_len = len; + m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); + if (len <= MLEN) { + bcopy(sd->cl, mtod(m, caddr_t), len); + m->m_len = len; } else { - bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); - m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx)); + bus_dmamap_unload(fl->tag[sd->tag_idx], + sd->map); + m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx)); sd->cl = NULL; /* consumed */ - m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); + m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); } - len -= FL_PKTSHIFT; - m0->m_len -= FL_PKTSHIFT; - m0->m_data += FL_PKTSHIFT; - - m0->m_pkthdr.len = len; - m0->m_pkthdr.rcvif = ifp; - m0->m_flags |= M_FLOWID; - m0->m_pkthdr.flowid = rss->hash_val; - - if (cpl->csum_calc && !cpl->err_vec && - ifp->if_capenable & IFCAP_RXCSUM) { - m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | - CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); - if (cpl->ip_frag) - m0->m_pkthdr.csum_data = be16toh(cpl->csum); - else - m0->m_pkthdr.csum_data = 0xffff; - rxq->rxcsum++; - } - - if (cpl->vlan_ex) { - m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); - m0->m_flags |= M_VLANTAG; - rxq->vlan_extraction++; - } - - i = 1; /* # of fl sdesc used */ - sd = sd_next; - if (__predict_false(++fl->cidx == fl->cap)) + sd++; + if (__predict_false(++fl->cidx == fl->cap)) { + sd = fl->sdesc; fl->cidx = 0; + } - len -= m0->m_len; - m = m0; - while (len) { - i++; - - sd_next = sd + 1; - if (__predict_false(fl->cidx + 1 == fl->cap)) - sd_next = fl->sdesc; - prefetch(sd_next->m); - prefetch(sd_next->cl); - - m->m_next = sd->m; - sd->m = NULL; /* consumed */ - m = m->m_next; - - bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, - BUS_DMASYNC_POSTREAD); - - m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); - if (len <= MLEN) { - bcopy(sd->cl, mtod(m, caddr_t), len); - m->m_len = len; - } else { - bus_dmamap_unload(fl->tag[sd->tag_idx], - sd->map); - m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx)); - sd->cl = NULL; /* consumed */ - m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx)); - } + len -= m->m_len; + nbuf++; + } - i++; - sd = sd_next; - if (__predict_false(++fl->cidx == fl->cap)) - fl->cidx = 0; + (*fl_bufs_used) += nbuf; - len -= m->m_len; - } + return (m0); +} +static int +t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) +{ + struct sge_rxq *rxq = (void *)iq; + struct ifnet *ifp = rxq->ifp; + const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); #ifdef INET - if (cpl->l2info & htobe32(F_RXF_LRO) && - rxq->flags & RXQ_LRO_ENABLED && - tcp_lro_rx(lro, m0, 0) == 0) { - /* queued for LRO */ - } else + struct lro_ctrl *lro = &rxq->lro; #endif - ifp->if_input(ifp, m0); - FL_LOCK(fl); - fl->needed += i; - if (fl->needed >= 32) - refill_fl(sc, fl, 64, 32); - FL_UNLOCK(fl); + KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, + rss->opcode)); - if (++ndescs > 32) { - t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), - V_CIDXINC(ndescs) | - V_INGRESSQID((u32)iq->cntxt_id) | - V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); - ndescs = 0; - } + m0->m_pkthdr.len -= FL_PKTSHIFT; + m0->m_len -= FL_PKTSHIFT; + m0->m_data += FL_PKTSHIFT; + + m0->m_pkthdr.rcvif = ifp; + m0->m_flags |= M_FLOWID; + m0->m_pkthdr.flowid = rss->hash_val; - iq_next(iq); + if (cpl->csum_calc && !cpl->err_vec && + ifp->if_capenable & IFCAP_RXCSUM) { + m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | + CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + if (cpl->ip_frag) + m0->m_pkthdr.csum_data = be16toh(cpl->csum); + else + m0->m_pkthdr.csum_data = 0xffff; + rxq->rxcsum++; } -#ifdef INET - while (!SLIST_EMPTY(&lro->lro_active)) { - l = SLIST_FIRST(&lro->lro_active); - SLIST_REMOVE_HEAD(&lro->lro_active, next); - tcp_lro_flush(lro, l); + if (cpl->vlan_ex) { + m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); + m0->m_flags |= M_VLANTAG; + rxq->vlan_extraction++; } -#endif - t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) | - V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next)); +#ifdef INET + if (cpl->l2info & htobe32(F_RXF_LRO) && + iq->flags & IQ_LRO_ENABLED && + tcp_lro_rx(lro, m0, 0) == 0) { + /* queued for LRO */ + } else +#endif + ifp->if_input(ifp, m0); - FL_LOCK(fl); - if (fl->needed >= 32) - refill_fl(sc, fl, 128, 8); - FL_UNLOCK(fl); + return (0); } int t4_mgmt_tx(struct adapter *sc, struct mbuf *m) { - return ctrl_tx(sc, &sc->sge.ctrlq[0], m); + return t4_wrq_tx(sc, &sc->sge.mgmtq, m); +} + +/* + * Doesn't fail. Holds on to work requests it can't send right away. + */ +int +t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct mbuf *m0) +{ + struct sge_eq *eq = &wrq->eq; + int can_reclaim; + caddr_t dst; + struct mbuf *wr, *next; + + TXQ_LOCK_ASSERT_OWNED(wrq); + KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD || + (eq->flags & EQ_TYPEMASK) == EQ_CTRL, + ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); + + if (__predict_true(m0 != NULL)) { + if (wrq->head) + wrq->tail->m_nextpkt = m0; + else + wrq->head = m0; + while (m0->m_nextpkt) + m0 = m0->m_nextpkt; + wrq->tail = m0; + } + + can_reclaim = reclaimable(eq); + if (__predict_false(eq->flags & EQ_STALLED)) { + if (can_reclaim < tx_resume_threshold(eq)) + return (0); + eq->flags &= ~EQ_STALLED; + eq->unstalled++; + } + eq->cidx += can_reclaim; + eq->avail += can_reclaim; + if (__predict_false(eq->cidx >= eq->cap)) + eq->cidx -= eq->cap; + + for (wr = wrq->head; wr; wr = next) { + int ndesc; + struct mbuf *m; + + next = wr->m_nextpkt; + wr->m_nextpkt = NULL; + + M_ASSERTPKTHDR(wr); + KASSERT(wr->m_pkthdr.len > 0 && (wr->m_pkthdr.len & 0x7) == 0, + ("%s: work request len %d.", __func__, wr->m_pkthdr.len)); + + if (wr->m_pkthdr.len > SGE_MAX_WR_LEN) { +#ifdef INVARIANTS + panic("%s: oversized work request", __func__); +#else + log(LOG_ERR, "%s: %s work request too long (%d)", + device_get_nameunit(sc->dev), __func__, + wr->m_pkthdr.len); + m_freem(wr); + continue; +#endif + } + + ndesc = howmany(wr->m_pkthdr.len, EQ_ESIZE); + if (eq->avail < ndesc) { + wr->m_nextpkt = next; + wrq->no_desc++; + break; + } + + dst = (void *)&eq->desc[eq->pidx]; + for (m = wr; m; m = m->m_next) + copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); + + eq->pidx += ndesc; + eq->avail -= ndesc; + if (__predict_false(eq->pidx >= eq->cap)) + eq->pidx -= eq->cap; + + eq->pending += ndesc; + if (eq->pending > 16) + ring_eq_db(sc, eq); + + wrq->tx_wrs++; + m_freem(wr); + + if (eq->avail < 8) { + can_reclaim = reclaimable(eq); + eq->cidx += can_reclaim; + eq->avail += can_reclaim; + if (__predict_false(eq->cidx >= eq->cap)) + eq->cidx -= eq->cap; + } + } + + if (eq->pending) + ring_eq_db(sc, eq); + + if (wr == NULL) + wrq->head = wrq->tail = NULL; + else { + wrq->head = wr; + + KASSERT(wrq->tail->m_nextpkt == NULL, + ("%s: wrq->tail grew a tail of its own", __func__)); + + eq->flags |= EQ_STALLED; + if (callout_pending(&eq->tx_callout) == 0) + callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); + } + + return (0); } /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */ @@ -852,6 +1233,8 @@ t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) TXQ_LOCK_ASSERT_OWNED(txq); KASSERT(m, ("%s: called with nothing to do.", __func__)); + KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH, + ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK)); prefetch(&eq->desc[eq->pidx]); prefetch(&txq->sdesc[eq->pidx]); @@ -859,8 +1242,25 @@ t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) txpkts.npkt = 0;/* indicates there's nothing in txpkts */ coalescing = 0; - if (eq->avail < 8) - reclaim_tx_descs(txq, 0, 8); + can_reclaim = reclaimable(eq); + if (__predict_false(eq->flags & EQ_STALLED)) { + if (can_reclaim < tx_resume_threshold(eq)) { + txq->m = m; + return (0); + } + eq->flags &= ~EQ_STALLED; + eq->unstalled++; + } + + if (__predict_false(eq->flags & EQ_DOOMED)) { + m_freem(m); + while ((m = buf_ring_dequeue_sc(txq->br)) != NULL) + m_freem(m); + return (ENETDOWN); + } + + if (eq->avail < 8 && can_reclaim) + reclaim_tx_descs(txq, can_reclaim, 32); for (; m; m = next ? next : drbr_dequeue(ifp, br)) { @@ -938,15 +1338,13 @@ t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m) ETHER_BPF_MTAP(ifp, m); if (sgl.nsegs == 0) m_freem(m); - doorbell: - /* Fewer and fewer doorbells as the queue fills up */ - if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2))) + if (eq->pending >= 64) ring_eq_db(sc, eq); can_reclaim = reclaimable(eq); if (can_reclaim >= 32) - reclaim_tx_descs(txq, can_reclaim, 32); + reclaim_tx_descs(txq, can_reclaim, 64); } if (txpkts.npkt > 0) @@ -957,14 +1355,17 @@ doorbell: * This can happen when we're short of tx descriptors (no_desc) or maybe * even DMA maps (no_dmamap). Either way, a credit flush and reclaim * will get things going again. - * - * If eq->avail is already 0 we know a credit flush was requested in the - * WR that reduced it to 0 so we don't need another flush (we don't have - * any descriptor for a flush WR anyway, duh). */ - if (m && eq->avail > 0 && !(eq->flags & EQ_CRFLUSHED)) { + if (m && !(eq->flags & EQ_CRFLUSHED)) { struct tx_sdesc *txsd = &txq->sdesc[eq->pidx]; + /* + * If EQ_CRFLUSHED is not set then we know we have at least one + * available descriptor because any WR that reduces eq->avail to + * 0 also sets EQ_CRFLUSHED. + */ + KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__)); + txsd->desc_used = 1; txsd->credits = 0; write_eqflush_wr(eq); @@ -974,9 +1375,10 @@ doorbell: if (eq->pending) ring_eq_db(sc, eq); - can_reclaim = reclaimable(eq); - if (can_reclaim >= 32) - reclaim_tx_descs(txq, can_reclaim, 128); + reclaim_tx_descs(txq, 0, 128); + + if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0) + callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq); return (0); } @@ -987,24 +1389,29 @@ t4_update_fl_bufsize(struct ifnet *ifp) struct port_info *pi = ifp->if_softc; struct sge_rxq *rxq; struct sge_fl *fl; - int i; + int i, bufsize; + /* large enough for a frame even when VLAN extraction is disabled */ + bufsize = FL_PKTSHIFT + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + + ifp->if_mtu; for_each_rxq(pi, i, rxq) { fl = &rxq->fl; FL_LOCK(fl); - set_fl_tag_idx(fl, ifp->if_mtu); + set_fl_tag_idx(fl, bufsize); FL_UNLOCK(fl); } } -/* - * A non-NULL handler indicates this iq will not receive direct interrupts, the - * handler will be invoked by an interrupt queue. - */ +int +can_resume_tx(struct sge_eq *eq) +{ + return (reclaimable(eq) >= tx_resume_threshold(eq)); +} + static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, - int qsize, int esize, iq_intr_handler_t *handler, char *name) + int qsize, int esize, char *name) { KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, ("%s: bad tmr_idx %d", __func__, tmr_idx)); @@ -1018,22 +1425,32 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, iq->intr_pktc_idx = pktc_idx; iq->qsize = roundup(qsize, 16); /* See FW_IQ_CMD/iqsize */ iq->esize = max(esize, 16); /* See FW_IQ_CMD/iqesize */ - iq->handler = handler; strlcpy(iq->lockname, name, sizeof(iq->lockname)); } static inline void -init_fl(struct sge_fl *fl, int qsize, char *name) +init_fl(struct sge_fl *fl, int qsize, int bufsize, char *name) { fl->qsize = qsize; strlcpy(fl->lockname, name, sizeof(fl->lockname)); + set_fl_tag_idx(fl, bufsize); } static inline void -init_eq(struct sge_eq *eq, int qsize, char *name) +init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan, + uint16_t iqid, char *name) { + KASSERT(tx_chan < NCHAN, ("%s: bad tx channel %d", __func__, tx_chan)); + KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); + + eq->flags = eqtype & EQ_TYPEMASK; + eq->tx_chan = tx_chan; + eq->iqid = iqid; eq->qsize = qsize; strlcpy(eq->lockname, name, sizeof(eq->lockname)); + + TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq); + callout_init(&eq->tx_callout, CALLOUT_MPSAFE); } static int @@ -1090,9 +1507,9 @@ free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. * - * If the ingress queue will take interrupts directly (iq->handler == NULL) then + * If the ingress queue will take interrupts directly (iq->flags & IQ_INTR) then * the intr_idx specifies the vector, starting from 0. Otherwise it specifies - * the index of the interrupt queue to which its interrupts will be forwarded. + * the abs_id of the ingress queue to which its interrupts should be forwarded. */ static int alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, @@ -1122,16 +1539,12 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, if (iq == &sc->sge.fwq) v |= F_FW_IQ_CMD_IQASYNCH; - if (iq->handler) { - KASSERT(intr_idx < NINTRQ(sc), - ("%s: invalid indirect intr_idx %d", __func__, intr_idx)); - v |= F_FW_IQ_CMD_IQANDST; - v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.intrq[intr_idx].abs_id); - } else { + if (iq->flags & IQ_INTR) { KASSERT(intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, intr_idx)); - v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); - } + } else + v |= F_FW_IQ_CMD_IQANDST; + v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | @@ -1176,7 +1589,6 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, /* Allocate space for one software descriptor per buffer. */ fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8; FL_LOCK(fl); - set_fl_tag_idx(fl, pi->ifp->if_mtu); rc = alloc_fl_sdesc(fl); FL_UNLOCK(fl); if (rc != 0) { @@ -1186,6 +1598,7 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, return (rc); } fl->needed = fl->cap; + fl->lowat = roundup(sc->sge.fl_starve_threshold, 8); c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | @@ -1217,12 +1630,13 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, iq->intr_next = iq->intr_params; iq->cntxt_id = be16toh(c.iqid); iq->abs_id = be16toh(c.physiqid); - iq->flags |= (IQ_ALLOCATED | IQ_STARTED); + iq->flags |= IQ_ALLOCATED; cntxt_id = iq->cntxt_id - sc->sge.iq_start; - KASSERT(cntxt_id < sc->sge.niq, - ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, - cntxt_id, sc->sge.niq - 1)); + if (cntxt_id >= sc->sge.niq) { + panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, + cntxt_id, sc->sge.niq - 1); + } sc->sge.iqmap[cntxt_id] = iq; if (fl) { @@ -1230,29 +1644,28 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, fl->pidx = fl->cidx = 0; cntxt_id = fl->cntxt_id - sc->sge.eq_start; - KASSERT(cntxt_id < sc->sge.neq, - ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__, - cntxt_id, sc->sge.neq - 1)); + if (cntxt_id >= sc->sge.neq) { + panic("%s: fl->cntxt_id (%d) more than the max (%d)", + __func__, cntxt_id, sc->sge.neq - 1); + } sc->sge.eqmap[cntxt_id] = (void *)fl; FL_LOCK(fl); - /* Just enough to make sure it doesn't starve right away. */ - refill_fl(sc, fl, roundup(sc->sge.fl_starve_threshold, 8), 8); + /* Enough to make sure the SGE doesn't think it's starved */ + refill_fl(sc, fl, fl->lowat); FL_UNLOCK(fl); + + iq->flags |= IQ_HAS_FL; } /* Enable IQ interrupts */ - atomic_store_rel_32(&iq->state, IQS_IDLE); + atomic_store_rel_int(&iq->state, IQS_IDLE); t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) | V_INGRESSQID(iq->cntxt_id)); return (0); } -/* - * This can be called with the iq/fl in any state - fully allocated and - * functional, partially allocated, even all-zeroed out. - */ static int free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) { @@ -1265,23 +1678,7 @@ free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) dev = pi ? pi->dev : sc->dev; - if (iq->flags & IQ_STARTED) { - rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0, - iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); - if (rc != 0) { - device_printf(dev, - "failed to stop queue %p: %d\n", iq, rc); - return (rc); - } - iq->flags &= ~IQ_STARTED; - - /* Synchronize with the interrupt handler */ - while (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_DISABLED)) - pause("iqfree", hz / 1000); - } - if (iq->flags & IQ_ALLOCATED) { - rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); @@ -1322,76 +1719,84 @@ free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) } static int -alloc_intrq(struct adapter *sc, int port_idx, int intrq_idx, int intr_idx) +alloc_fwq(struct adapter *sc) { - int rc; - struct sysctl_oid *oid; - struct sysctl_oid_list *children; + int rc, intr_idx; + struct sge_iq *fwq = &sc->sge.fwq; char name[16]; - struct sge_iq *intrq = &sc->sge.intrq[intrq_idx]; + struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); + struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - rc = alloc_iq_fl(sc->port[port_idx], intrq, NULL, intr_idx, -1); - if (rc != 0) + snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev)); + init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, name); + fwq->flags |= IQ_INTR; /* always */ + intr_idx = sc->intr_count > 1 ? 1 : 0; + rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); + if (rc != 0) { + device_printf(sc->dev, + "failed to create firmware event queue: %d\n", rc); return (rc); + } - children = SYSCTL_CHILDREN(sc->oid_intrq); - - snprintf(name, sizeof(name), "%d", intrq_idx); - oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, - NULL, "interrupt queue"); + oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, + NULL, "firmware event queue"); children = SYSCTL_CHILDREN(oid); + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", + CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", + "absolute id of the queue"); + SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", + CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", + "SGE context id of the queue"); SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", - CTLTYPE_INT | CTLFLAG_RD, &intrq->cidx, 0, sysctl_uint16, "I", + CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", "consumer index"); - return (rc); + return (0); } static int -free_intrq(struct sge_iq *iq) +free_fwq(struct adapter *sc) { - return free_iq_fl(NULL, iq, NULL); - + return free_iq_fl(NULL, &sc->sge.fwq, NULL); } static int -alloc_fwq(struct adapter *sc, int intr_idx) +alloc_mgmtq(struct adapter *sc) { int rc; - struct sysctl_oid_list *children; - struct sge_iq *fwq = &sc->sge.fwq; - - rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1); - if (rc != 0) - return (rc); + struct sge_wrq *mgmtq = &sc->sge.mgmtq; + char name[16]; + struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); + struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - children = SYSCTL_CHILDREN(sc->oid_fwq); + oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, + NULL, "management queue"); - SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id", - CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I", - "absolute id of the queue"); - SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id", - CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I", - "SGE context id of the queue"); - SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx", - CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I", - "consumer index"); + snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); + init_eq(&mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, + sc->sge.fwq.cntxt_id, name); + rc = alloc_wrq(sc, NULL, mgmtq, oid); + if (rc != 0) { + device_printf(sc->dev, + "failed to create management queue: %d\n", rc); + return (rc); + } - return (rc); + return (0); } static int -free_fwq(struct sge_iq *iq) +free_mgmtq(struct adapter *sc) { - return free_iq_fl(NULL, iq, NULL); + return free_wrq(sc, &sc->sge.mgmtq); } static int -alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) +alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx, + struct sysctl_oid *oid) { int rc; - struct sysctl_oid *oid; struct sysctl_oid_list *children; char name[16]; @@ -1400,7 +1805,7 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) return (rc); FL_LOCK(&rxq->fl); - refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8, 8); + refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8); FL_UNLOCK(&rxq->fl); #ifdef INET @@ -1410,11 +1815,11 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx) rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */ if (pi->ifp->if_capenable & IFCAP_LRO) - rxq->flags |= RXQ_LRO_ENABLED; + rxq->iq.flags |= IQ_LRO_ENABLED; #endif rxq->ifp = pi->ifp; - children = SYSCTL_CHILDREN(pi->oid_rxq); + children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, @@ -1477,32 +1882,71 @@ free_rxq(struct port_info *pi, struct sge_rxq *rxq) return (rc); } +#ifndef TCP_OFFLOAD_DISABLE static int -alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) +alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq, + int intr_idx, int idx, struct sysctl_oid *oid) { - int rc, cntxt_id; - size_t len; - struct fw_eq_ctrl_cmd c; - struct sge_eq *eq = &ctrlq->eq; - char name[16]; - struct sysctl_oid *oid; + int rc; struct sysctl_oid_list *children; + char name[16]; - mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); - - len = eq->qsize * CTRL_EQ_ESIZE; - rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, - &eq->ba, (void **)&eq->desc); - if (rc) + rc = alloc_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, + 1 << pi->tx_chan); + if (rc != 0) return (rc); - eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE; - eq->spg = (void *)&eq->desc[eq->cap]; - eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ - if (sc->flags & INTR_SHARED) - eq->iqid = sc->sge.intrq[idx % NINTRQ(sc)].cntxt_id; - else - eq->iqid = sc->sge.intrq[sc->port[idx]->first_rxq].cntxt_id; + children = SYSCTL_CHILDREN(oid); + + snprintf(name, sizeof(name), "%d", idx); + oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, + NULL, "rx queue"); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id", + CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.abs_id, 0, sysctl_uint16, + "I", "absolute id of the queue"); + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", + CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cntxt_id, 0, sysctl_uint16, + "I", "SGE context id of the queue"); + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx", + CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", + "consumer index"); + + children = SYSCTL_CHILDREN(oid); + oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, + NULL, "freelist"); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", + CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16, + "I", "SGE context id of the queue"); + SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, + &ofld_rxq->fl.cidx, 0, "consumer index"); + SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, + &ofld_rxq->fl.pidx, 0, "producer index"); + + return (rc); +} + +static int +free_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq) +{ + int rc; + + rc = free_iq_fl(pi, &ofld_rxq->iq, &ofld_rxq->fl); + if (rc == 0) + bzero(ofld_rxq, sizeof(*ofld_rxq)); + + return (rc); +} +#endif + +static int +ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) +{ + int rc, cntxt_id; + struct fw_eq_ctrl_cmd c; bzero(&c, sizeof(c)); @@ -1515,7 +1959,7 @@ alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) c.physeqid_pkd = htobe32(0); c.fetchszm_to_iqid = htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | - V_FW_EQ_CTRL_CMD_PCIECHN(sc->port[idx]->tx_chan) | + V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | @@ -1527,51 +1971,191 @@ alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx) rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, - "failed to create control queue %d: %d\n", idx, rc); + "failed to create control queue %d: %d\n", eq->tx_chan, rc); return (rc); } + eq->flags |= EQ_ALLOCATED; - eq->pidx = eq->cidx = 0; eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); - eq->flags |= (EQ_ALLOCATED | EQ_STARTED); + cntxt_id = eq->cntxt_id - sc->sge.eq_start; + if (cntxt_id >= sc->sge.neq) + panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, + cntxt_id, sc->sge.neq - 1); + sc->sge.eqmap[cntxt_id] = eq; + + return (rc); +} + +static int +eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) +{ + int rc, cntxt_id; + struct fw_eq_eth_cmd c; + + bzero(&c, sizeof(c)); + c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | + V_FW_EQ_ETH_CMD_VFN(0)); + c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | + F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); + c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); + c.fetchszm_to_iqid = + htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | + V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | + V_FW_EQ_ETH_CMD_IQID(eq->iqid)); + c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | + V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | + V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | + V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); + c.eqaddr = htobe64(eq->ba); + + rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); + if (rc != 0) { + device_printf(pi->dev, + "failed to create Ethernet egress queue: %d\n", rc); + return (rc); + } + eq->flags |= EQ_ALLOCATED; + + eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; - KASSERT(cntxt_id < sc->sge.neq, - ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, - cntxt_id, sc->sge.neq - 1)); + if (cntxt_id >= sc->sge.neq) + panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, + cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; - children = SYSCTL_CHILDREN(sc->oid_ctrlq); + return (rc); +} - snprintf(name, sizeof(name), "%d", idx); - oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD, - NULL, "ctrl queue"); - children = SYSCTL_CHILDREN(oid); +#ifndef TCP_OFFLOAD_DISABLE +static int +ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) +{ + int rc, cntxt_id; + struct fw_eq_ofld_cmd c; - SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "pidx", - CTLTYPE_INT | CTLFLAG_RD, &ctrlq->eq.pidx, 0, sysctl_uint16, "I", - "producer index"); - SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, - &ctrlq->no_desc, 0, - "# of times ctrlq ran out of hardware descriptors"); + bzero(&c, sizeof(c)); + + c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | + V_FW_EQ_OFLD_CMD_VFN(0)); + c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | + F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); + c.fetchszm_to_iqid = + htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | + V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | + F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); + c.dcaen_to_eqsize = + htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | + V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | + V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | + V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize)); + c.eqaddr = htobe64(eq->ba); + + rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); + if (rc != 0) { + device_printf(pi->dev, + "failed to create egress queue for TCP offload: %d\n", rc); + return (rc); + } + eq->flags |= EQ_ALLOCATED; + + eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); + cntxt_id = eq->cntxt_id - sc->sge.eq_start; + if (cntxt_id >= sc->sge.neq) + panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, + cntxt_id, sc->sge.neq - 1); + sc->sge.eqmap[cntxt_id] = eq; + + return (rc); +} +#endif + +static int +alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq) +{ + int rc; + size_t len; + + mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); + + len = eq->qsize * EQ_ESIZE; + rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, + &eq->ba, (void **)&eq->desc); + if (rc) + return (rc); + + eq->cap = eq->qsize - SPG_LEN / EQ_ESIZE; + eq->spg = (void *)&eq->desc[eq->cap]; + eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ + eq->pidx = eq->cidx = 0; + + switch (eq->flags & EQ_TYPEMASK) { + case EQ_CTRL: + rc = ctrl_eq_alloc(sc, eq); + break; + + case EQ_ETH: + rc = eth_eq_alloc(sc, pi, eq); + break; + +#ifndef TCP_OFFLOAD_DISABLE + case EQ_OFLD: + rc = ofld_eq_alloc(sc, pi, eq); + break; +#endif + + default: + panic("%s: invalid eq type %d.", __func__, + eq->flags & EQ_TYPEMASK); + } + if (rc != 0) { + device_printf(sc->dev, + "failed to allocate egress queue(%d): %d", + eq->flags & EQ_TYPEMASK, rc); + } + + eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus; return (rc); } static int -free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq) +free_eq(struct adapter *sc, struct sge_eq *eq) { int rc; - struct sge_eq *eq = &ctrlq->eq; - if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { - rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); + if (eq->flags & EQ_ALLOCATED) { + switch (eq->flags & EQ_TYPEMASK) { + case EQ_CTRL: + rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, + eq->cntxt_id); + break; + + case EQ_ETH: + rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, + eq->cntxt_id); + break; + +#ifndef TCP_OFFLOAD_DISABLE + case EQ_OFLD: + rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, + eq->cntxt_id); + break; +#endif + + default: + panic("%s: invalid eq type %d.", __func__, + eq->flags & EQ_TYPEMASK); + } if (rc != 0) { device_printf(sc->dev, - "failed to free ctrl queue %p: %d\n", eq, rc); + "failed to free egress queue (%d): %d\n", + eq->flags & EQ_TYPEMASK, rc); return (rc); } - eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); + eq->flags &= ~EQ_ALLOCATED; } free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); @@ -1579,47 +2163,77 @@ free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq) if (mtx_initialized(&eq->eq_lock)) mtx_destroy(&eq->eq_lock); - bzero(ctrlq, sizeof(*ctrlq)); + bzero(eq, sizeof(*eq)); return (0); } static int -alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) +alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq, + struct sysctl_oid *oid) { - int rc, cntxt_id; - size_t len; + int rc; + struct sysctl_ctx_list *ctx = pi ? &pi->ctx : &sc->ctx; + struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); + + rc = alloc_eq(sc, pi, &wrq->eq); + if (rc) + return (rc); + + wrq->adapter = sc; + + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, + &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", + CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", + "consumer index"); + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", + CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", + "producer index"); + SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD, + &wrq->tx_wrs, "# of work requests"); + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, + &wrq->no_desc, 0, + "# of times queue ran out of hardware descriptors"); + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD, + &wrq->eq.unstalled, 0, "# of times queue recovered after stall"); + + + return (rc); +} + +static int +free_wrq(struct adapter *sc, struct sge_wrq *wrq) +{ + int rc; + + rc = free_eq(sc, &wrq->eq); + if (rc) + return (rc); + + bzero(wrq, sizeof(*wrq)); + return (0); +} + +static int +alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, + struct sysctl_oid *oid) +{ + int rc; struct adapter *sc = pi->adapter; - struct fw_eq_eth_cmd c; struct sge_eq *eq = &txq->eq; char name[16]; - struct sysctl_oid *oid; - struct sysctl_oid_list *children; - struct sge_iq *intrq; - - txq->ifp = pi->ifp; - TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq); - - mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); + struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - len = eq->qsize * TX_EQ_ESIZE; - rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, - &eq->ba, (void **)&eq->desc); + rc = alloc_eq(sc, pi, eq); if (rc) return (rc); - eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE; - eq->spg = (void *)&eq->desc[eq->cap]; - eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */ + txq->ifp = pi->ifp; + txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock); - intrq = &sc->sge.intrq[0]; - if (sc->flags & INTR_SHARED) - eq->iqid = intrq[(pi->first_txq + idx) % NINTRQ(sc)].cntxt_id; - else - eq->iqid = intrq[pi->first_rxq + (idx % pi->nrxq)].cntxt_id; - rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag); @@ -1629,49 +2243,18 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) return (rc); } - rc = alloc_tx_maps(txq); + /* + * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE + * limit for any WR). txq->no_dmamap events shouldn't occur if maps is + * sized for the worst case. + */ + rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8, + M_WAITOK); if (rc != 0) { device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc); return (rc); } - bzero(&c, sizeof(c)); - - c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | - F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | - V_FW_EQ_ETH_CMD_VFN(0)); - c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | - F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); - c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid)); - c.fetchszm_to_iqid = - htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | - V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | - V_FW_EQ_ETH_CMD_IQID(eq->iqid)); - c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | - V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | - V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | - V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize)); - c.eqaddr = htobe64(eq->ba); - - rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); - if (rc != 0) { - device_printf(pi->dev, - "failed to create egress queue: %d\n", rc); - return (rc); - } - - eq->pidx = eq->cidx = 0; - eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); - eq->flags |= (EQ_ALLOCATED | EQ_STARTED); - - cntxt_id = eq->cntxt_id - sc->sge.eq_start; - KASSERT(cntxt_id < sc->sge.neq, - ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, - cntxt_id, sc->sge.neq - 1)); - sc->sge.eqmap[cntxt_id] = eq; - - children = SYSCTL_CHILDREN(pi->oid_txq); - snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "tx queue"); @@ -1709,7 +2292,9 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx) SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, &txq->no_desc, 0, "# of times txq ran out of hardware descriptors"); SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD, - &txq->egr_update, 0, "egress update notifications from the SGE"); + &eq->egr_update, 0, "egress update notifications from the SGE"); + SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD, + &eq->unstalled, 0, "# of times txq recovered after stall"); return (rc); } @@ -1721,52 +2306,20 @@ free_txq(struct port_info *pi, struct sge_txq *txq) struct adapter *sc = pi->adapter; struct sge_eq *eq = &txq->eq; - if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) { - - /* - * Wait for the response to a credit flush if there's one - * pending. Clearing the flag tells handle_sge_egr_update or - * cxgbe_txq_start (depending on how far the response has made - * it) that they should ignore the response and wake up free_txq - * instead. - * - * The interface has been marked down by the time we get here - * (both IFF_UP and IFF_DRV_RUNNING cleared). qflush has - * emptied the tx buf_rings and we know nothing new is being - * queued for tx so we don't have to worry about a new credit - * flush request. - */ - TXQ_LOCK(txq); - if (eq->flags & EQ_CRFLUSHED) { - eq->flags &= ~EQ_CRFLUSHED; - msleep(txq, &eq->eq_lock, 0, "crflush", 0); - } - TXQ_UNLOCK(txq); - - rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); - if (rc != 0) { - device_printf(pi->dev, - "failed to free egress queue %p: %d\n", eq, rc); - return (rc); - } - eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED); - } - - free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); + rc = free_eq(sc, eq); + if (rc) + return (rc); free(txq->sdesc, M_CXGBE); - if (txq->maps) - free_tx_maps(txq); + if (txq->txmaps.maps) + t4_free_tx_maps(&txq->txmaps, txq->tx_tag); buf_ring_free(txq->br, M_CXGBE); if (txq->tx_tag) bus_dma_tag_destroy(txq->tx_tag); - if (mtx_initialized(&eq->eq_lock)) - mtx_destroy(&eq->eq_lock); - bzero(txq, sizeof(*txq)); return (0); } @@ -1822,11 +2375,13 @@ ring_fl_db(struct adapter *sc, struct sge_fl *fl) } /* - * Fill up the freelist by upto nbufs and ring its doorbell if the number of - * buffers ready to be handed to the hardware >= dbthresh. + * Fill up the freelist by upto nbufs and maybe ring its doorbell. + * + * Returns non-zero to indicate that it should be added to the list of starving + * freelists. */ -static void -refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh) +static int +refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs) { __be64 *d = &fl->desc[fl->pidx]; struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; @@ -1837,7 +2392,7 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh) FL_LOCK_ASSERT_OWNED(fl); - if (nbufs < 0 || nbufs > fl->needed) + if (nbufs > fl->needed) nbufs = fl->needed; while (nbufs--) { @@ -1918,8 +2473,35 @@ recycled: } } - if (fl->pending >= dbthresh) + if (fl->pending >= 8) ring_fl_db(sc, fl); + + return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); +} + +/* + * Attempt to refill all starving freelists. + */ +static void +refill_sfl(void *arg) +{ + struct adapter *sc = arg; + struct sge_fl *fl, *fl_temp; + + mtx_lock(&sc->sfl_lock); + TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { + FL_LOCK(fl); + refill_fl(sc, fl, 64); + if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { + TAILQ_REMOVE(&sc->sfl, fl, link); + fl->flags &= ~FL_STARVING; + } + FL_UNLOCK(fl); + } + + if (!TAILQ_EMPTY(&sc->sfl)) + callout_schedule(&sc->sfl_callout, hz / 5); + mtx_unlock(&sc->sfl_lock); } static int @@ -1993,27 +2575,22 @@ free_fl_sdesc(struct sge_fl *fl) fl->sdesc = NULL; } -static int -alloc_tx_maps(struct sge_txq *txq) +int +t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count, + int flags) { struct tx_map *txm; - int i, rc, count; + int i, rc; - /* - * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE - * limit for any WR). txq->no_dmamap events shouldn't occur if maps is - * sized for the worst case. - */ - count = txq->eq.qsize * 10 / 8; - txq->map_total = txq->map_avail = count; - txq->map_cidx = txq->map_pidx = 0; + txmaps->map_total = txmaps->map_avail = count; + txmaps->map_cidx = txmaps->map_pidx = 0; - txq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, - M_ZERO | M_WAITOK); + txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE, + M_ZERO | flags); - txm = txq->maps; + txm = txmaps->maps; for (i = 0; i < count; i++, txm++) { - rc = bus_dmamap_create(txq->tx_tag, 0, &txm->map); + rc = bus_dmamap_create(tx_tag, 0, &txm->map); if (rc != 0) goto failed; } @@ -2022,36 +2599,36 @@ alloc_tx_maps(struct sge_txq *txq) failed: while (--i >= 0) { txm--; - bus_dmamap_destroy(txq->tx_tag, txm->map); + bus_dmamap_destroy(tx_tag, txm->map); } - KASSERT(txm == txq->maps, ("%s: EDOOFUS", __func__)); + KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__)); - free(txq->maps, M_CXGBE); - txq->maps = NULL; + free(txmaps->maps, M_CXGBE); + txmaps->maps = NULL; return (rc); } -static void -free_tx_maps(struct sge_txq *txq) +void +t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag) { struct tx_map *txm; int i; - txm = txq->maps; - for (i = 0; i < txq->map_total; i++, txm++) { + txm = txmaps->maps; + for (i = 0; i < txmaps->map_total; i++, txm++) { if (txm->m) { - bus_dmamap_unload(txq->tx_tag, txm->map); + bus_dmamap_unload(tx_tag, txm->map); m_freem(txm->m); txm->m = NULL; } - bus_dmamap_destroy(txq->tx_tag, txm->map); + bus_dmamap_destroy(tx_tag, txm->map); } - free(txq->maps, M_CXGBE); - txq->maps = NULL; + free(txmaps->maps, M_CXGBE); + txmaps->maps = NULL; } /* @@ -2060,7 +2637,7 @@ free_tx_maps(struct sge_txq *txq) * of immediate data. */ #define IMM_LEN ( \ - 2 * TX_EQ_ESIZE \ + 2 * EQ_ESIZE \ - sizeof(struct fw_eth_tx_pkt_wr) \ - sizeof(struct cpl_tx_pkt_core)) @@ -2078,6 +2655,7 @@ get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl, int sgl_only) { struct mbuf *m = *fp; + struct tx_maps *txmaps; struct tx_map *txm; int rc, defragged = 0, n; @@ -2091,11 +2669,12 @@ start: sgl->nsegs = 0; if (m->m_pkthdr.len <= IMM_LEN && !sgl_only) return (0); /* nsegs = 0 tells caller to use imm. tx */ - if (txq->map_avail == 0) { + txmaps = &txq->txmaps; + if (txmaps->map_avail == 0) { txq->no_dmamap++; return (ENOMEM); } - txm = &txq->maps[txq->map_pidx]; + txm = &txmaps->maps[txmaps->map_pidx]; if (m->m_pkthdr.tso_segsz && m->m_len < 50) { *fp = m_pullup(m, 50); @@ -2119,9 +2698,9 @@ start: sgl->nsegs = 0; return (rc); txm->m = m; - txq->map_avail--; - if (++txq->map_pidx == txq->map_total) - txq->map_pidx = 0; + txmaps->map_avail--; + if (++txmaps->map_pidx == txmaps->map_total) + txmaps->map_pidx = 0; KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS, ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs)); @@ -2145,6 +2724,7 @@ start: sgl->nsegs = 0; static int free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) { + struct tx_maps *txmaps; struct tx_map *txm; TXQ_LOCK_ASSERT_OWNED(txq); @@ -2152,15 +2732,17 @@ free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl) if (sgl->nsegs == 0) return (0); /* didn't use any map */ + txmaps = &txq->txmaps; + /* 1 pkt uses exactly 1 map, back it out */ - txq->map_avail++; - if (txq->map_pidx > 0) - txq->map_pidx--; + txmaps->map_avail++; + if (txmaps->map_pidx > 0) + txmaps->map_pidx--; else - txq->map_pidx = txq->map_total - 1; + txmaps->map_pidx = txmaps->map_total - 1; - txm = &txq->maps[txq->map_pidx]; + txm = &txmaps->maps[txmaps->map_pidx]; bus_dmamap_unload(txq->tx_tag, txm->map); txm->m = NULL; @@ -2206,11 +2788,14 @@ write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m, /* Firmware work request header */ wr = (void *)&eq->desc[eq->pidx]; wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | - V_FW_WR_IMMDLEN(ctrl)); + V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(howmany(nflits, 2)); - if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) { - ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; - eq->flags |= EQ_CRFLUSHED; + if (eq->avail == ndesc) { + if (!(eq->flags & EQ_CRFLUSHED)) { + ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; + eq->flags |= EQ_CRFLUSHED; + } + eq->flags |= EQ_STALLED; } wr->equiq_to_len16 = htobe32(ctrl); @@ -2325,6 +2910,8 @@ add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts, TXQ_LOCK_ASSERT_OWNED(txq); + KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__)); + if (txpkts->npkt > 0) { flits = TXPKTS_PKT_HDR + sgl->nflits; can_coalesce = m->m_pkthdr.tso_segsz == 0 && @@ -2397,12 +2984,14 @@ write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts) ndesc = howmany(txpkts->nflits, 8); wr = (void *)&eq->desc[eq->pidx]; - wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) | - V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */ + wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2)); - if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) { - ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; - eq->flags |= EQ_CRFLUSHED; + if (eq->avail == ndesc) { + if (!(eq->flags & EQ_CRFLUSHED)) { + ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ; + eq->flags |= EQ_CRFLUSHED; + } + eq->flags |= EQ_STALLED; } wr->equiq_to_len16 = htobe32(ctrl); wr->plen = htobe16(txpkts->plen); @@ -2616,7 +3205,7 @@ reclaimable(struct sge_eq *eq) unsigned int cidx; cidx = eq->spg->cidx; /* stable snapshot */ - cidx = be16_to_cpu(cidx); + cidx = be16toh(cidx); if (cidx >= eq->cidx) return (cidx - eq->cidx); @@ -2634,11 +3223,12 @@ static int reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) { struct tx_sdesc *txsd; + struct tx_maps *txmaps; struct tx_map *txm; unsigned int reclaimed, maps; struct sge_eq *eq = &txq->eq; - EQ_LOCK_ASSERT_OWNED(eq); + TXQ_LOCK_ASSERT_OWNED(txq); if (can_reclaim == 0) can_reclaim = reclaimable(eq); @@ -2665,7 +3255,8 @@ reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) eq->cidx -= eq->cap; } - txm = &txq->maps[txq->map_cidx]; + txmaps = &txq->txmaps; + txm = &txmaps->maps[txmaps->map_cidx]; if (maps) prefetch(txm->m); @@ -2673,16 +3264,16 @@ reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */ ("%s: too many descriptors available", __func__)); - txq->map_avail += maps; - KASSERT(txq->map_avail <= txq->map_total, + txmaps->map_avail += maps; + KASSERT(txmaps->map_avail <= txmaps->map_total, ("%s: too many maps available", __func__)); while (maps--) { struct tx_map *next; next = txm + 1; - if (__predict_false(txq->map_cidx + 1 == txq->map_total)) - next = txq->maps; + if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total)) + next = txmaps->maps; prefetch(next->m); bus_dmamap_unload(txq->tx_tag, txm->map); @@ -2690,8 +3281,8 @@ reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n) txm->m = NULL; txm = next; - if (__predict_false(++txq->map_cidx == txq->map_total)) - txq->map_cidx = 0; + if (__predict_false(++txmaps->map_cidx == txmaps->map_total)) + txmaps->map_cidx = 0; } return (reclaimed); @@ -2704,6 +3295,7 @@ write_eqflush_wr(struct sge_eq *eq) EQ_LOCK_ASSERT_OWNED(eq); KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__)); + KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__)); wr = (void *)&eq->desc[eq->pidx]; bzero(wr, sizeof(*wr)); @@ -2711,7 +3303,7 @@ write_eqflush_wr(struct sge_eq *eq) wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) | F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); - eq->flags |= EQ_CRFLUSHED; + eq->flags |= (EQ_CRFLUSHED | EQ_STALLED); eq->pending++; eq->avail--; if (++eq->pidx == eq->cap) @@ -2743,118 +3335,73 @@ get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) } static void -set_fl_tag_idx(struct sge_fl *fl, int mtu) +set_fl_tag_idx(struct sge_fl *fl, int bufsize) { int i; - FL_LOCK_ASSERT_OWNED(fl); - for (i = 0; i < FL_BUF_SIZES - 1; i++) { - if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT)) + if (FL_BUF_SIZE(i) >= bufsize) break; } fl->tag_idx = i; } -static int -handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl) -{ - unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); - struct sge *s = &sc->sge; - struct sge_txq *txq; - struct port_info *pi; - - txq = (void *)s->eqmap[qid - s->eq_start]; - TXQ_LOCK(txq); - if (txq->eq.flags & EQ_CRFLUSHED) { - pi = txq->ifp->if_softc; - taskqueue_enqueue(pi->tq, &txq->resume_tx); - txq->egr_update++; - } else - wakeup_one(txq); /* txq is going away, wakeup free_txq */ - TXQ_UNLOCK(txq); - - return (0); -} - static void -handle_cpl(struct adapter *sc, struct sge_iq *iq) +add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) { - const struct rss_header *rss = (const void *)iq->cdesc; - const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); - - switch (rss->opcode) { - case CPL_FW4_MSG: - case CPL_FW6_MSG: - if (cpl->type == FW6_TYPE_CMD_RPL) - t4_handle_fw_rpl(sc, cpl->data); - break; - - case CPL_SGE_EGR_UPDATE: - handle_sge_egr_update(sc, (const void *)cpl); - break; - - case CPL_SET_TCB_RPL: - filter_rpl(sc, (const void *)cpl); - break; - - default: - panic("%s: unexpected CPL opcode 0x%x", __func__, rss->opcode); + mtx_lock(&sc->sfl_lock); + FL_LOCK(fl); + if ((fl->flags & FL_DOOMED) == 0) { + fl->flags |= FL_STARVING; + TAILQ_INSERT_TAIL(&sc->sfl, fl, link); + callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); } + FL_UNLOCK(fl); + mtx_unlock(&sc->sfl_lock); } -/* - * m0 is freed on successful transmission. - */ static int -ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0) +handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) { - struct sge_eq *eq = &ctrlq->eq; - int rc = 0, ndesc; - int can_reclaim; - caddr_t dst; - struct mbuf *m; - - M_ASSERTPKTHDR(m0); + const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); + unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); + struct adapter *sc = iq->adapter; + struct sge *s = &sc->sge; + struct sge_eq *eq; - if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) { - log(LOG_ERR, "%s: %s work request too long (%d)", - device_get_nameunit(sc->dev), __func__, m0->m_pkthdr.len); - return (EMSGSIZE); - } - ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE); + KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, + rss->opcode)); + eq = s->eqmap[qid - s->eq_start]; EQ_LOCK(eq); + KASSERT(eq->flags & EQ_CRFLUSHED, + ("%s: unsolicited egress update", __func__)); + eq->flags &= ~EQ_CRFLUSHED; + eq->egr_update++; + + if (__predict_false(eq->flags & EQ_DOOMED)) + wakeup_one(eq); + else if (eq->flags & EQ_STALLED && can_resume_tx(eq)) + taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task); + EQ_UNLOCK(eq); - can_reclaim = reclaimable(eq); - eq->cidx += can_reclaim; - eq->avail += can_reclaim; - if (__predict_false(eq->cidx >= eq->cap)) - eq->cidx -= eq->cap; - - if (eq->avail < ndesc) { - rc = EAGAIN; - ctrlq->no_desc++; - goto failed; - } + return (0); +} - dst = (void *)&eq->desc[eq->pidx]; - for (m = m0; m; m = m->m_next) - copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); +static int +handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) +{ + const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); - eq->pidx += ndesc; - if (__predict_false(eq->pidx >= eq->cap)) - eq->pidx -= eq->cap; + KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, + rss->opcode)); - eq->pending += ndesc; - ring_eq_db(sc, eq); -failed: - EQ_UNLOCK(eq); - if (rc == 0) - m_freem(m0); + if (cpl->type == FW6_TYPE_CMD_RPL) + t4_handle_fw_rpl(iq->adapter, cpl->data); - return (rc); + return (0); } static int diff --git a/sys/modules/cxgbe/Makefile b/sys/modules/cxgbe/Makefile index 2e834461fd1c..1d69f761422e 100644 --- a/sys/modules/cxgbe/Makefile +++ b/sys/modules/cxgbe/Makefile @@ -3,5 +3,6 @@ # SUBDIR = if_cxgbe +SUBDIR+= firmware .include <bsd.subdir.mk> diff --git a/sys/modules/cxgbe/firmware/Makefile b/sys/modules/cxgbe/firmware/Makefile new file mode 100644 index 000000000000..035de02c0376 --- /dev/null +++ b/sys/modules/cxgbe/firmware/Makefile @@ -0,0 +1,27 @@ +# +# $FreeBSD$ +# + +T4FW = ${.CURDIR}/../../../dev/cxgbe/firmware +.PATH: ${T4FW} + +KMOD = t4fw_cfg +FIRMWS = ${KMOD}.txt:${KMOD}:1.0.0.0 + +# You can have additional configuration files in the ${T4FW} directory. +# t4fw_cfg_<name>.txt +CFG_FILES != cd ${T4FW} && echo ${KMOD}_*.txt +.for F in ${CFG_FILES} +.if exists(${F}) +FIRMWS += ${F}:${F:C/.txt//}:1.0.0.0 +.endif +.endfor + +# The firmware binary is optional. +# t4fw-<a>.<b>.<c>.<d>.bin +FW_BIN != cd ${T4FW} && echo t4fw-*.bin +.if exists(${FW_BIN}) +FIRMWS += ${FW_BIN}:t4fw:${FW_BIN:C/t4fw-//:C/.bin//} +.endif + +.include <bsd.kmod.mk> diff --git a/tools/tools/cxgbetool/cxgbetool.c b/tools/tools/cxgbetool/cxgbetool.c index da6bfba5d04f..06fa50bffc0b 100644 --- a/tools/tools/cxgbetool/cxgbetool.c +++ b/tools/tools/cxgbetool/cxgbetool.c @@ -396,12 +396,12 @@ do_show_info_header(uint32_t mode) printf (" Port"); break; - case T4_FILTER_OVLAN: - printf (" vld:oVLAN"); + case T4_FILTER_VNIC: + printf (" vld:VNIC"); break; - case T4_FILTER_IVLAN: - printf (" vld:iVLAN"); + case T4_FILTER_VLAN: + printf (" vld:VLAN"); break; case T4_FILTER_IP_TOS: @@ -653,18 +653,18 @@ do_show_one_filter_info(struct t4_filter *t, uint32_t mode) printf(" %1d/%1d", t->fs.val.iport, t->fs.mask.iport); break; - case T4_FILTER_OVLAN: + case T4_FILTER_VNIC: printf(" %1d:%1x:%02x/%1d:%1x:%02x", - t->fs.val.ovlan_vld, (t->fs.val.ovlan >> 7) & 0x7, - t->fs.val.ovlan & 0x7f, t->fs.mask.ovlan_vld, - (t->fs.mask.ovlan >> 7) & 0x7, - t->fs.mask.ovlan & 0x7f); + t->fs.val.vnic_vld, (t->fs.val.vnic >> 7) & 0x7, + t->fs.val.vnic & 0x7f, t->fs.mask.vnic_vld, + (t->fs.mask.vnic >> 7) & 0x7, + t->fs.mask.vnic & 0x7f); break; - case T4_FILTER_IVLAN: + case T4_FILTER_VLAN: printf(" %1d:%04x/%1d:%04x", - t->fs.val.ivlan_vld, t->fs.val.ivlan, - t->fs.mask.ivlan_vld, t->fs.mask.ivlan); + t->fs.val.vlan_vld, t->fs.val.vlan, + t->fs.mask.vlan_vld, t->fs.mask.vlan); break; case T4_FILTER_IP_TOS: @@ -830,11 +830,11 @@ get_filter_mode(void) if (mode & T4_FILTER_IP_TOS) printf("tos "); - if (mode & T4_FILTER_IVLAN) - printf("ivlan "); + if (mode & T4_FILTER_VLAN) + printf("vlan "); - if (mode & T4_FILTER_OVLAN) - printf("ovlan "); + if (mode & T4_FILTER_VNIC) + printf("vnic "); if (mode & T4_FILTER_PORT) printf("iport "); @@ -868,11 +868,12 @@ set_filter_mode(int argc, const char *argv[]) if (!strcmp(argv[0], "tos")) mode |= T4_FILTER_IP_TOS; - if (!strcmp(argv[0], "ivlan")) - mode |= T4_FILTER_IVLAN; + if (!strcmp(argv[0], "vlan")) + mode |= T4_FILTER_VLAN; - if (!strcmp(argv[0], "ovlan")) - mode |= T4_FILTER_OVLAN; + if (!strcmp(argv[0], "ovlan") || + !strcmp(argv[0], "vnic")) + mode |= T4_FILTER_VNIC; if (!strcmp(argv[0], "iport")) mode |= T4_FILTER_PORT; @@ -936,15 +937,20 @@ set_filter(uint32_t idx, int argc, const char *argv[]) t.fs.val.iport = val; t.fs.mask.iport = mask; } else if (!parse_val_mask("ovlan", args, &val, &mask)) { - t.fs.val.ovlan = val; - t.fs.mask.ovlan = mask; - t.fs.val.ovlan_vld = 1; - t.fs.mask.ovlan_vld = 1; - } else if (!parse_val_mask("ivlan", args, &val, &mask)) { - t.fs.val.ivlan = val; - t.fs.mask.ivlan = mask; - t.fs.val.ivlan_vld = 1; - t.fs.mask.ivlan_vld = 1; + t.fs.val.vnic = val; + t.fs.mask.vnic = mask; + t.fs.val.vnic_vld = 1; + t.fs.mask.vnic_vld = 1; + } else if (!parse_val_mask("vnic", args, &val, &mask)) { + t.fs.val.vnic = val; + t.fs.mask.vnic = mask; + t.fs.val.vnic_vld = 1; + t.fs.mask.vnic_vld = 1; + } else if (!parse_val_mask("vlan", args, &val, &mask)) { + t.fs.val.vlan = val; + t.fs.mask.vlan = mask; + t.fs.val.vlan_vld = 1; + t.fs.mask.vlan_vld = 1; } else if (!parse_val_mask("tos", args, &val, &mask)) { t.fs.val.tos = val; t.fs.mask.tos = mask; |