aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Grehan <grehan@FreeBSD.org>2013-05-03 02:03:36 +0000
committerPeter Grehan <grehan@FreeBSD.org>2013-05-03 02:03:36 +0000
commite5548b5cde2f430b71abb29e251ba8bb2210bf73 (patch)
tree90796125d475b8430c123a84eebe43265505ec0f
downloadsrc-e5548b5cde2f430b71abb29e251ba8bb2210bf73.tar.gz
src-e5548b5cde2f430b71abb29e251ba8bb2210bf73.zip
Initial import of the Microsoft HyperV 'enlightened' drivers.vendor/hyperv/20130502
From https://github.com/FreeBSDonHyper-V/VendorBranchForFreeBSDonHyper-V rev: 99eaa0ddb0485c9d76046664100f6beb1a0a0c58
Notes
Notes: svn path=/vendor/hyperv/dist/; revision=250199 svn path=/vendor/hyperv/20130502/; revision=250200; tag=vendor/hyperv/20130502
-rw-r--r--README.md4
-rw-r--r--sys/dev/hyperv/README34
-rw-r--r--sys/dev/hyperv/include/hyperv.h796
-rw-r--r--sys/dev/hyperv/netvsc/hv_net_vsc.c1141
-rw-r--r--sys/dev/hyperv/netvsc/hv_net_vsc.h995
-rw-r--r--sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c948
-rw-r--r--sys/dev/hyperv/netvsc/hv_rndis.h911
-rw-r--r--sys/dev/hyperv/netvsc/hv_rndis_filter.c929
-rw-r--r--sys/dev/hyperv/netvsc/hv_rndis_filter.h116
-rw-r--r--sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c1470
-rw-r--r--sys/dev/hyperv/storvsc/hv_vstorage.h231
-rw-r--r--sys/dev/hyperv/utilities/hv_util.c492
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel.c842
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel_mgmt.c680
-rw-r--r--sys/dev/hyperv/vmbus/hv_connection.c431
-rw-r--r--sys/dev/hyperv/vmbus/hv_hv.c515
-rw-r--r--sys/dev/hyperv/vmbus/hv_ring_buffer.c440
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c583
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_priv.h770
-rw-r--r--sys/modules/Makefile845
-rw-r--r--sys/modules/hyperv/Makefile5
-rw-r--r--sys/modules/hyperv/netvsc/Makefile14
-rw-r--r--sys/modules/hyperv/storvsc/Makefile14
-rw-r--r--sys/modules/hyperv/utilities/Makefile12
-rw-r--r--sys/modules/hyperv/vmbus/Makefile20
25 files changed, 13238 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 000000000000..461d035b2b13
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+HypervOnFreeBSDVendorBranch
+===========================
+
+Contains only the Hyper-V driver files and other files that have been touched. \ No newline at end of file
diff --git a/sys/dev/hyperv/README b/sys/dev/hyperv/README
new file mode 100644
index 000000000000..1c4488d5c870
--- /dev/null
+++ b/sys/dev/hyperv/README
@@ -0,0 +1,34 @@
+***** Release rc2.3.0 4/27/2012 **************************************************
+
+New features/limitations-
+
+-Added Fast IDE
+-Massive code restructuring to meeting FreeBSD sytle guidelines
+
+***** Release rc2.2.0 1/4/2012 ***************************************************
+
+New features/limitations-
+
+-Port of LIS 2.1 with FreeBSD support code from Citrix, drivers are linked with
+ Kernel (future drivers will be loadable), port has not been refactored to meet
+ BSD coding standards
+
+-SCSI device driver functional, but support for scatter-gather lists is not
+ implemented-Fast IDE support has not been added-still using emulated IDE
+ support
+
+-Network storage device support has been added
+
+-While the storage and networking devices support multiple controllers, we're
+ waiting on a resolution from Microsoft to enable persistent and consistent
+ numbering between boots
+
+-Hyper-V bus has been ported with support code from Citrix to handle clock
+ synchronization between guest and host. Clock synchronization and heartbeat
+ logic have been moved to two, separate drivers-this separation is part
+ of the initial steps for refactoring and restructuring the Hyper-V bus driver from the
+ LIS 2.1 codebase
+
+Bug fixes-
+
+******************************************************************************* \ No newline at end of file
diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h
new file mode 100644
index 000000000000..36512698fa7d
--- /dev/null
+++ b/sys/dev/hyperv/include/hyperv.h
@@ -0,0 +1,796 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * HyperV definitions for messages that are sent between instances of the
+ * Channel Management Library in separate partitions, or in some cases,
+ * back to itself.
+ */
+
+#ifndef __HYPERV_H__
+#define __HYPERV_H__
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/malloc.h>
+#include <sys/kthread.h>
+#include <sys/taskqueue.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/sema.h>
+#include <sys/mutex.h>
+#include <sys/bus.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include <amd64/include/xen/synch_bitops.h>
+#include <amd64/include/atomic.h>
+
+typedef uint8_t hv_bool_uint8_t;
+
+#define HV_S_OK 0x00000000
+#define HV_E_FAIL 0x80004005
+#define HV_ERROR_NOT_SUPPORTED 0x80070032
+#define HV_ERROR_MACHINE_LOCKED 0x800704F7
+
+/*
+ * A revision number of vmbus that is used for ensuring both ends on a
+ * partition are using compatible versions.
+ */
+
+#define HV_VMBUS_REVISION_NUMBER 13
+
+/*
+ * Make maximum size of pipe payload of 16K
+ */
+
+#define HV_MAX_PIPE_DATA_PAYLOAD (sizeof(BYTE) * 16384)
+
+/*
+ * Define pipe_mode values
+ */
+
+#define HV_VMBUS_PIPE_TYPE_BYTE 0x00000000
+#define HV_VMBUS_PIPE_TYPE_MESSAGE 0x00000004
+
+/*
+ * The size of the user defined data buffer for non-pipe offers
+ */
+
+#define HV_MAX_USER_DEFINED_BYTES 120
+
+/*
+ * The size of the user defined data buffer for pipe offers
+ */
+
+#define HV_MAX_PIPE_USER_DEFINED_BYTES 116
+
+
+#define HV_MAX_PAGE_BUFFER_COUNT 16
+#define HV_MAX_MULTIPAGE_BUFFER_COUNT 32
+
+#define HV_ALIGN_UP(value, align) \
+ (((value) & (align-1)) ? \
+ (((value) + (align-1)) & ~(align-1) ) : (value))
+
+#define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) )
+
+#define HV_NUM_PAGES_SPANNED(addr, len) \
+ ((HV_ALIGN_UP(addr+len, PAGE_SIZE) - \
+ HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT )
+
+typedef struct hv_guid {
+ unsigned char data[16];
+} __packed hv_guid;
+
+/*
+ * At the center of the Channel Management library is
+ * the Channel Offer. This struct contains the
+ * fundamental information about an offer.
+ */
+
+typedef struct hv_vmbus_channel_offer {
+ hv_guid interface_type;
+ hv_guid interface_instance;
+ uint64_t interrupt_latency_in_100ns_units;
+ uint32_t interface_revision;
+ uint32_t server_context_area_size; /* in bytes */
+ uint16_t channel_flags;
+ uint16_t mmio_megabytes; /* in bytes * 1024 * 1024 */
+ union
+ {
+ /*
+ * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes.
+ */
+ struct {
+ uint8_t user_defined[HV_MAX_USER_DEFINED_BYTES];
+ } __packed standard;
+
+ /*
+ * Pipes: The following structure is an integrated pipe protocol, which
+ * is implemented on top of standard user-defined data. pipe
+ * clients have HV_MAX_PIPE_USER_DEFINED_BYTES left for their
+ * own use.
+ */
+ struct {
+ uint32_t pipe_mode;
+ uint8_t user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES];
+ } __packed pipe;
+ } u;
+
+ uint32_t padding;
+
+} __packed hv_vmbus_channel_offer;
+
+typedef uint32_t hv_gpadl_handle;
+
+typedef struct {
+ uint16_t type;
+ uint16_t data_offset8;
+ uint16_t length8;
+ uint16_t flags;
+ uint64_t transaction_id;
+} __packed hv_vm_packet_descriptor;
+
+typedef uint32_t hv_previous_packet_offset;
+
+typedef struct {
+ hv_previous_packet_offset previous_packet_start_offset;
+ hv_vm_packet_descriptor descriptor;
+} __packed hv_vm_packet_header;
+
+typedef struct {
+ uint32_t byte_count;
+ uint32_t byte_offset;
+} __packed hv_vm_transfer_page;
+
+typedef struct {
+ hv_vm_packet_descriptor d;
+ uint16_t transfer_page_set_id;
+ hv_bool_uint8_t sender_owns_set;
+ uint8_t reserved;
+ uint32_t range_count;
+ hv_vm_transfer_page ranges[1];
+} __packed hv_vm_transfer_page_packet_header;
+
+typedef struct {
+ hv_vm_packet_descriptor d;
+ uint32_t gpadl;
+ uint32_t reserved;
+} __packed hv_vm_gpadl_packet_header;
+
+typedef struct {
+ hv_vm_packet_descriptor d;
+ uint32_t gpadl;
+ uint16_t transfer_page_set_id;
+ uint16_t reserved;
+} __packed hv_vm_add_remove_transfer_page_set;
+
+/*
+ * This structure defines a range in guest
+ * physical space that can be made
+ * to look virtually contiguous.
+ */
+
+typedef struct {
+ uint32_t byte_count;
+ uint32_t byte_offset;
+ uint64_t pfn_array[0];
+} __packed hv_gpa_range;
+
+/*
+ * This is the format for an Establish Gpadl packet, which contains a handle
+ * by which this GPADL will be known and a set of GPA ranges associated with
+ * it. This can be converted to a MDL by the guest OS. If there are multiple
+ * GPA ranges, then the resulting MDL will be "chained," representing multiple
+ * VA ranges.
+ */
+
+typedef struct {
+ hv_vm_packet_descriptor d;
+ uint32_t gpadl;
+ uint32_t range_count;
+ hv_gpa_range range[1];
+} __packed hv_vm_establish_gpadl;
+
+/*
+ * This is the format for a Teardown Gpadl packet, which indicates that the
+ * GPADL handle in the Establish Gpadl packet will never be referenced again.
+ */
+
+typedef struct {
+ hv_vm_packet_descriptor d;
+ uint32_t gpadl;
+ /* for alignment to a 8-byte boundary */
+ uint32_t reserved;
+} __packed hv_vm_teardown_gpadl;
+
+/*
+ * This is the format for a GPA-Direct packet, which contains a set of GPA
+ * ranges, in addition to commands and/or data.
+ */
+
+typedef struct {
+ hv_vm_packet_descriptor d;
+ uint32_t reserved;
+ uint32_t range_count;
+ hv_gpa_range range[1];
+} __packed hv_vm_data_gpa_direct;
+
+/*
+ * This is the format for a Additional data Packet.
+ */
+typedef struct {
+ hv_vm_packet_descriptor d;
+ uint64_t total_bytes;
+ uint32_t byte_offset;
+ uint32_t byte_count;
+ uint8_t data[1];
+} __packed hv_vm_additional_data;
+
+typedef union {
+ hv_vm_packet_descriptor simple_header;
+ hv_vm_transfer_page_packet_header transfer_page_header;
+ hv_vm_gpadl_packet_header gpadl_header;
+ hv_vm_add_remove_transfer_page_set add_remove_transfer_page_header;
+ hv_vm_establish_gpadl establish_gpadl_header;
+ hv_vm_teardown_gpadl teardown_gpadl_header;
+ hv_vm_data_gpa_direct data_gpa_direct_header;
+} __packed hv_vm_packet_largest_possible_header;
+
+typedef enum {
+ HV_VMBUS_PACKET_TYPE_INVALID = 0x0,
+ HV_VMBUS_PACKET_TYPES_SYNCH = 0x1,
+ HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET = 0x2,
+ HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET = 0x3,
+ HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL = 0x4,
+ HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL = 0x5,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND = 0x6,
+ HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES = 0x7,
+ HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL = 0x8,
+ HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT = 0x9,
+ HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST = 0xa,
+ HV_VMBUS_PACKET_TYPE_COMPLETION = 0xb,
+ HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS = 0xc,
+ HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd
+} hv_vmbus_packet_type;
+
+#define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED 1
+
+/*
+ * Version 1 messages
+ */
+typedef enum {
+ HV_CHANNEL_MESSAGE_INVALID = 0,
+ HV_CHANNEL_MESSAGE_OFFER_CHANNEL = 1,
+ HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER = 2,
+ HV_CHANNEL_MESSAGE_REQUEST_OFFERS = 3,
+ HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED = 4,
+ HV_CHANNEL_MESSAGE_OPEN_CHANNEL = 5,
+ HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT = 6,
+ HV_CHANNEL_MESSAGE_CLOSE_CHANNEL = 7,
+ HV_CHANNEL_MESSAGEL_GPADL_HEADER = 8,
+ HV_CHANNEL_MESSAGE_GPADL_BODY = 9,
+ HV_CHANNEL_MESSAGE_GPADL_CREATED = 10,
+ HV_CHANNEL_MESSAGE_GPADL_TEARDOWN = 11,
+ HV_CHANNEL_MESSAGE_GPADL_TORNDOWN = 12,
+ HV_CHANNEL_MESSAGE_REL_ID_RELEASED = 13,
+ HV_CHANNEL_MESSAGE_INITIATED_CONTACT = 14,
+ HV_CHANNEL_MESSAGE_VERSION_RESPONSE = 15,
+ HV_CHANNEL_MESSAGE_UNLOAD = 16,
+
+#ifdef HV_VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD
+ HV_CHANNEL_MESSAGE_VIEW_RANGE_ADD = 17,
+ HV_CHANNEL_MESSAGE_VIEW_RANGE_REMOVE = 18,
+#endif
+ HV_CHANNEL_MESSAGE_COUNT
+} hv_vmbus_channel_msg_type;
+
+typedef struct {
+ hv_vmbus_channel_msg_type message_type;
+ uint32_t padding;
+} __packed hv_vmbus_channel_msg_header;
+
+/*
+ * Query VMBus Version parameters
+ */
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ uint32_t version;
+} __packed hv_vmbus_channel_query_vmbus_version;
+
+/*
+ * VMBus Version Supported parameters
+ */
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ hv_bool_uint8_t version_supported;
+} __packed hv_vmbus_channel_version_supported;
+
+/*
+ * Channel Offer parameters
+ */
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ hv_vmbus_channel_offer offer;
+ uint32_t child_rel_id;
+ uint8_t monitor_id;
+ hv_bool_uint8_t monitor_allocated;
+} __packed hv_vmbus_channel_offer_channel;
+
+/*
+ * Rescind Offer parameters
+ */
+typedef struct
+{
+ hv_vmbus_channel_msg_header header;
+ uint32_t child_rel_id;
+} __packed hv_vmbus_channel_rescind_offer;
+
+
+/*
+ * Request Offer -- no parameters, SynIC message contains the partition ID
+ *
+ * Set Snoop -- no parameters, SynIC message contains the partition ID
+ *
+ * Clear Snoop -- no parameters, SynIC message contains the partition ID
+ *
+ * All Offers Delivered -- no parameters, SynIC message contains the
+ * partition ID
+ *
+ * Flush Client -- no parameters, SynIC message contains the partition ID
+ */
+
+
+/*
+ * Open Channel parameters
+ */
+typedef struct
+{
+ hv_vmbus_channel_msg_header header;
+
+ /*
+ * Identifies the specific VMBus channel that is being opened.
+ */
+ uint32_t child_rel_id;
+
+ /*
+ * ID making a particular open request at a channel offer unique.
+ */
+ uint32_t open_id;
+
+ /*
+ * GPADL for the channel's ring buffer.
+ */
+ hv_gpadl_handle ring_buffer_gpadl_handle;
+
+ /*
+ * GPADL for the channel's server context save area.
+ */
+ hv_gpadl_handle server_context_area_gpadl_handle;
+
+ /*
+ * The upstream ring buffer begins at offset zero in the memory described
+ * by ring_buffer_gpadl_handle. The downstream ring buffer follows it at
+ * this offset (in pages).
+ */
+ uint32_t downstream_ring_buffer_page_offset;
+
+ /*
+ * User-specific data to be passed along to the server endpoint.
+ */
+ uint8_t user_data[HV_MAX_USER_DEFINED_BYTES];
+
+} __packed hv_vmbus_channel_open_channel;
+
+typedef uint32_t hv_nt_status;
+
+/*
+ * Open Channel Result parameters
+ */
+typedef struct
+{
+ hv_vmbus_channel_msg_header header;
+ uint32_t child_rel_id;
+ uint32_t open_id;
+ hv_nt_status status;
+} __packed hv_vmbus_channel_open_result;
+
+/*
+ * Close channel parameters
+ */
+typedef struct
+{
+ hv_vmbus_channel_msg_header header;
+ uint32_t child_rel_id;
+} __packed hv_vmbus_channel_close_channel;
+
+/*
+ * Channel Message GPADL
+ */
+#define HV_GPADL_TYPE_RING_BUFFER 1
+#define HV_GPADL_TYPE_SERVER_SAVE_AREA 2
+#define HV_GPADL_TYPE_TRANSACTION 8
+
+/*
+ * The number of PFNs in a GPADL message is defined by the number of pages
+ * that would be spanned by byte_count and byte_offset. If the implied number
+ * of PFNs won't fit in this packet, there will be a follow-up packet that
+ * contains more
+ */
+
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ uint32_t child_rel_id;
+ uint32_t gpadl;
+ uint16_t range_buf_len;
+ uint16_t range_count;
+ hv_gpa_range range[0];
+} __packed hv_vmbus_channel_gpadl_header;
+
+/*
+ * This is the follow-up packet that contains more PFNs
+ */
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ uint32_t message_number;
+ uint32_t gpadl;
+ uint64_t pfn[0];
+} __packed hv_vmbus_channel_gpadl_body;
+
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ uint32_t child_rel_id;
+ uint32_t gpadl;
+ uint32_t creation_status;
+} __packed hv_vmbus_channel_gpadl_created;
+
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ uint32_t child_rel_id;
+ uint32_t gpadl;
+} __packed hv_vmbus_channel_gpadl_teardown;
+
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ uint32_t gpadl;
+} __packed hv_vmbus_channel_gpadl_torndown;
+
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ uint32_t child_rel_id;
+} __packed hv_vmbus_channel_relid_released;
+
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ uint32_t vmbus_version_requested;
+ uint32_t padding2;
+ uint64_t interrupt_page;
+ uint64_t monitor_page_1;
+ uint64_t monitor_page_2;
+} __packed hv_vmbus_channel_initiate_contact;
+
+typedef struct {
+ hv_vmbus_channel_msg_header header;
+ hv_bool_uint8_t version_supported;
+} __packed hv_vmbus_channel_version_response;
+
+typedef hv_vmbus_channel_msg_header hv_vmbus_channel_unload;
+
+#define HW_MACADDR_LEN 6
+
+/*
+ * Fixme: Added to quiet "typeof" errors involving hv_vmbus.h when
+ * the including C file was compiled with "-std=c99".
+ */
+#ifndef typeof
+#define typeof __typeof
+#endif
+
+#ifndef NULL
+#define NULL (void *)0
+#endif
+
+typedef void *hv_vmbus_handle;
+
+#ifndef CONTAINING_RECORD
+#define CONTAINING_RECORD(address, type, field) ((type *)( \
+ (uint8_t *)(address) - \
+ (uint8_t *)(&((type *)0)->field)))
+#endif /* CONTAINING_RECORD */
+
+
+#define container_of(ptr, type, member) ({ \
+ __typeof__( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+enum {
+ HV_VMBUS_IVAR_TYPE,
+ HV_VMBUS_IVAR_INSTANCE,
+ HV_VMBUS_IVAR_NODE,
+ HV_VMBUS_IVAR_DEVCTX
+};
+
+#define HV_VMBUS_ACCESSOR(var, ivar, type) \
+ __BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type)
+
+HV_VMBUS_ACCESSOR(type, TYPE, const char *)
+HV_VMBUS_ACCESSOR(devctx, DEVCTX, struct hv_device *)
+
+
+/*
+ * Common defines for Hyper-V ICs
+ */
+#define HV_ICMSGTYPE_NEGOTIATE 0
+#define HV_ICMSGTYPE_HEARTBEAT 1
+#define HV_ICMSGTYPE_KVPEXCHANGE 2
+#define HV_ICMSGTYPE_SHUTDOWN 3
+#define HV_ICMSGTYPE_TIMESYNC 4
+#define HV_ICMSGTYPE_VSS 5
+
+#define HV_ICMSGHDRFLAG_TRANSACTION 1
+#define HV_ICMSGHDRFLAG_REQUEST 2
+#define HV_ICMSGHDRFLAG_RESPONSE 4
+
+typedef struct hv_vmbus_pipe_hdr {
+ uint32_t flags;
+ uint32_t msgsize;
+} __packed hv_vmbus_pipe_hdr;
+
+typedef struct hv_vmbus_ic_version {
+ uint16_t major;
+ uint16_t minor;
+} __packed hv_vmbus_ic_version;
+
+typedef struct hv_vmbus_icmsg_hdr {
+ hv_vmbus_ic_version icverframe;
+ uint16_t icmsgtype;
+ hv_vmbus_ic_version icvermsg;
+ uint16_t icmsgsize;
+ uint32_t status;
+ uint8_t ictransaction_id;
+ uint8_t icflags;
+ uint8_t reserved[2];
+} __packed hv_vmbus_icmsg_hdr;
+
+typedef struct hv_vmbus_icmsg_negotiate {
+ uint16_t icframe_vercnt;
+ uint16_t icmsg_vercnt;
+ uint32_t reserved;
+ hv_vmbus_ic_version icversion_data[1]; /* any size array */
+} __packed hv_vmbus_icmsg_negotiate;
+
+typedef struct hv_vmbus_shutdown_msg_data {
+ uint32_t reason_code;
+ uint32_t timeout_seconds;
+ uint32_t flags;
+ uint8_t display_message[2048];
+} __packed hv_vmbus_shutdown_msg_data;
+
+typedef struct hv_vmbus_heartbeat_msg_data {
+ uint64_t seq_num;
+ uint32_t reserved[8];
+} __packed hv_vmbus_heartbeat_msg_data;
+
+typedef struct {
+ /*
+ * offset in bytes from the start of ring data below
+ */
+ volatile uint32_t write_index;
+ /*
+ * offset in bytes from the start of ring data below
+ */
+ volatile uint32_t read_index;
+ /*
+ * NOTE: The interrupt_mask field is used only for channels, but
+ * vmbus connection also uses this data structure
+ */
+ volatile uint32_t interrupt_mask;
+ /* pad it to PAGE_SIZE so that data starts on a page */
+ uint8_t reserved[4084];
+
+ /*
+ * WARNING: Ring data starts here + ring_data_start_offset
+ * !!! DO NOT place any fields below this !!!
+ */
+ uint8_t buffer[0]; /* doubles as interrupt mask */
+} __packed hv_vmbus_ring_buffer;
+
+typedef struct {
+ int length;
+ int offset;
+ uint64_t pfn;
+} __packed hv_vmbus_page_buffer;
+
+typedef struct {
+ int length;
+ int offset;
+ uint64_t pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT];
+} __packed hv_vmbus_multipage_buffer;
+
+typedef struct {
+ hv_vmbus_ring_buffer* ring_buffer;
+ uint32_t ring_size; /* Include the shared header */
+ struct mtx ring_lock;
+ uint32_t ring_data_size; /* ring_size */
+ uint32_t ring_data_start_offset;
+} hv_vmbus_ring_buffer_info;
+
+typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
+
+typedef enum {
+ HV_CHANNEL_OFFER_STATE,
+ HV_CHANNEL_OPENING_STATE,
+ HV_CHANNEL_OPEN_STATE,
+ HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE,
+} hv_vmbus_channel_state;
+
+typedef struct hv_vmbus_channel {
+ TAILQ_ENTRY(hv_vmbus_channel) list_entry;
+ struct hv_device* device;
+ hv_vmbus_channel_state state;
+ hv_vmbus_channel_offer_channel offer_msg;
+ /*
+ * These are based on the offer_msg.monitor_id.
+ * Save it here for easy access.
+ */
+ uint8_t monitor_group;
+ uint8_t monitor_bit;
+
+ uint32_t ring_buffer_gpadl_handle;
+ /*
+ * Allocated memory for ring buffer
+ */
+ void* ring_buffer_pages;
+ uint32_t ring_buffer_page_count;
+ /*
+ * send to parent
+ */
+ hv_vmbus_ring_buffer_info outbound;
+ /*
+ * receive from parent
+ */
+ hv_vmbus_ring_buffer_info inbound;
+
+ struct mtx inbound_lock;
+ hv_vmbus_handle control_work_queue;
+
+ hv_vmbus_pfn_channel_callback on_channel_callback;
+ void* channel_callback_context;
+
+} hv_vmbus_channel;
+
+typedef struct hv_device {
+ hv_guid class_id;
+ hv_guid device_id;
+ device_t device;
+ hv_vmbus_channel* channel;
+} hv_device;
+
+
+
+int hv_vmbus_channel_recv_packet(
+ hv_vmbus_channel* channel,
+ void* buffer,
+ uint32_t buffer_len,
+ uint32_t* buffer_actual_len,
+ uint64_t* request_id);
+
+int hv_vmbus_channel_recv_packet_raw(
+ hv_vmbus_channel* channel,
+ void* buffer,
+ uint32_t buffer_len,
+ uint32_t* buffer_actual_len,
+ uint64_t* request_id);
+
+int hv_vmbus_channel_open(
+ hv_vmbus_channel* channel,
+ uint32_t send_ring_buffer_size,
+ uint32_t recv_ring_buffer_size,
+ void* user_data,
+ uint32_t user_data_len,
+ hv_vmbus_pfn_channel_callback
+ pfn_on_channel_callback,
+ void* context);
+
+void hv_vmbus_channel_close(hv_vmbus_channel *channel);
+
+int hv_vmbus_channel_send_packet(
+ hv_vmbus_channel* channel,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id,
+ hv_vmbus_packet_type type,
+ uint32_t flags);
+
+int hv_vmbus_channel_send_packet_pagebuffer(
+ hv_vmbus_channel* channel,
+ hv_vmbus_page_buffer page_buffers[],
+ uint32_t page_count,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id);
+
+int hv_vmbus_channel_send_packet_multipagebuffer(
+ hv_vmbus_channel* channel,
+ hv_vmbus_multipage_buffer* multi_page_buffer,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id);
+
+int hv_vmbus_channel_establish_gpadl(
+ hv_vmbus_channel* channel,
+ /* must be phys and virt contiguous */
+ void* contig_buffer,
+ /* page-size multiple */
+ uint32_t size,
+ uint32_t* gpadl_handle);
+
+int hv_vmbus_channel_teardown_gpdal(
+ hv_vmbus_channel* channel,
+ uint32_t gpadl_handle);
+
+/*
+ * Work abstraction defines
+ */
+typedef struct hv_work_queue {
+ struct taskqueue* queue;
+ struct proc* proc;
+ struct sema* work_sema;
+} hv_work_queue;
+
+typedef struct hv_work_item {
+ struct task work;
+ void (*callback)(void *);
+ void* context;
+ hv_work_queue* wq;
+} hv_work_item;
+
+struct hv_work_queue* hv_work_queue_create(char* name);
+
+void hv_work_queue_close(struct hv_work_queue* wq);
+
+int hv_queue_work_item(
+ hv_work_queue* wq,
+ void (*callback)(void *),
+ void* context);
+/**
+ * @brief Get physical address from virtual
+ */
+static inline unsigned long
+hv_get_phys_addr(void *virt)
+{
+ unsigned long ret;
+ ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK));
+ return (ret);
+}
+
+#endif /* __HYPERV_H__ */
+
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c
new file mode 100644
index 000000000000..aeee94d83459
--- /dev/null
+++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c
@@ -0,0 +1,1141 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * HyperV vmbus network VSC (virtual services client) module
+ *
+ */
+
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/lock.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <machine/bus.h>
+#include <machine/atomic.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_net_vsc.h"
+#include "hv_rndis.h"
+#include "hv_rndis_filter.h"
+
+
+/*
+ * Forward declarations
+ */
+static void hv_nv_on_channel_callback(void *context);
+static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
+static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
+static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
+static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
+static int hv_nv_connect_to_vsp(struct hv_device *device);
+static void hv_nv_on_send_completion(struct hv_device *device,
+ hv_vm_packet_descriptor *pkt);
+static void hv_nv_on_receive(struct hv_device *device,
+ hv_vm_packet_descriptor *pkt);
+static void hv_nv_send_receive_completion(struct hv_device *device,
+ uint64_t tid);
+
+
+/*
+ *
+ */
+static inline netvsc_dev *
+hv_nv_alloc_net_device(struct hv_device *device)
+{
+ netvsc_dev *net_dev;
+ hn_softc_t *sc = device_get_softc(device->device);
+
+ net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (net_dev == NULL) {
+ return (NULL);
+ }
+
+ net_dev->dev = device;
+ net_dev->destroy = FALSE;
+ sc->net_dev = net_dev;
+
+ return (net_dev);
+}
+
+/*
+ *
+ */
+static inline netvsc_dev *
+hv_nv_get_outbound_net_device(struct hv_device *device)
+{
+ hn_softc_t *sc = device_get_softc(device->device);
+ netvsc_dev *net_dev = sc->net_dev;;
+
+ if ((net_dev != NULL) && net_dev->destroy) {
+ return (NULL);
+ }
+
+ return (net_dev);
+}
+
+/*
+ *
+ */
+static inline netvsc_dev *
+hv_nv_get_inbound_net_device(struct hv_device *device)
+{
+ hn_softc_t *sc = device_get_softc(device->device);
+ netvsc_dev *net_dev = sc->net_dev;;
+
+ if (net_dev == NULL) {
+ return (net_dev);
+ }
+ /*
+ * When the device is being destroyed; we only
+ * permit incoming packets if and only if there
+ * are outstanding sends.
+ */
+ if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
+ return (NULL);
+ }
+
+ return (net_dev);
+}
+
+/*
+ * Net VSC initialize receive buffer with net VSP
+ *
+ * Net VSP: Network virtual services client, also known as the
+ * Hyper-V extensible switch and the synthetic data path.
+ */
+static int
+hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
+{
+ netvsc_dev *net_dev;
+ nvsp_msg *init_pkt;
+ int ret = 0;
+
+ net_dev = hv_nv_get_outbound_net_device(device);
+ if (!net_dev) {
+ return (ENODEV);
+ }
+
+ net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF,
+ M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
+ if (net_dev->rx_buf == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ /*
+ * Establish the GPADL handle for this buffer on this channel.
+ * Note: This call uses the vmbus connection rather than the
+ * channel to establish the gpadl handle.
+ * GPADL: Guest physical address descriptor list.
+ */
+ ret = hv_vmbus_channel_establish_gpadl(
+ device->channel, net_dev->rx_buf,
+ net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ /* sema_wait(&ext->channel_init_sema); KYS CHECK */
+
+ /* Notify the NetVsp of the gpadl handle */
+ init_pkt = &net_dev->channel_init_packet;
+
+ memset(init_pkt, 0, sizeof(nvsp_msg));
+
+ init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
+ init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
+ net_dev->rx_buf_gpadl_handle;
+ init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
+ NETVSC_RECEIVE_BUFFER_ID;
+
+ /* Send the gpadl notification request */
+
+ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+ sizeof(nvsp_msg), (uint64_t)init_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ sema_wait(&net_dev->channel_init_sema);
+
+ /* Check the response */
+ if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
+ != nvsp_status_success) {
+ ret = EINVAL;
+ goto cleanup;
+ }
+
+ net_dev->rx_section_count =
+ init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
+
+ net_dev->rx_sections = malloc(net_dev->rx_section_count *
+ sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT);
+ if (net_dev->rx_sections == NULL) {
+ ret = EINVAL;
+ goto cleanup;
+ }
+ memcpy(net_dev->rx_sections,
+ init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
+ net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
+
+
+ /*
+ * For first release, there should only be 1 section that represents
+ * the entire receive buffer
+ */
+ if (net_dev->rx_section_count != 1
+ || net_dev->rx_sections->offset != 0) {
+ ret = EINVAL;
+ goto cleanup;
+ }
+
+ goto exit;
+
+cleanup:
+ hv_nv_destroy_rx_buffer(net_dev);
+
+exit:
+ return (ret);
+}
+
+/*
+ * Net VSC initialize send buffer with net VSP
+ */
+static int
+hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
+{
+ netvsc_dev *net_dev;
+ nvsp_msg *init_pkt;
+ int ret = 0;
+
+ net_dev = hv_nv_get_outbound_net_device(device);
+ if (!net_dev) {
+ return (ENODEV);
+ }
+
+ net_dev->send_buf = contigmalloc(net_dev->send_buf_size, M_DEVBUF,
+ M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
+ if (net_dev->send_buf == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ /*
+ * Establish the gpadl handle for this buffer on this channel.
+ * Note: This call uses the vmbus connection rather than the
+ * channel to establish the gpadl handle.
+ */
+ ret = hv_vmbus_channel_establish_gpadl(device->channel,
+ net_dev->send_buf, net_dev->send_buf_size,
+ &net_dev->send_buf_gpadl_handle);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ /* Notify the NetVsp of the gpadl handle */
+
+ init_pkt = &net_dev->channel_init_packet;
+
+ memset(init_pkt, 0, sizeof(nvsp_msg));
+
+ init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
+ init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
+ net_dev->send_buf_gpadl_handle;
+ init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
+ NETVSC_SEND_BUFFER_ID;
+
+ /* Send the gpadl notification request */
+
+ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+ sizeof(nvsp_msg), (uint64_t)init_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ sema_wait(&net_dev->channel_init_sema);
+
+ /* Check the response */
+ if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
+ != nvsp_status_success) {
+ ret = EINVAL;
+ goto cleanup;
+ }
+
+ net_dev->send_section_size =
+ init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;
+
+ goto exit;
+
+cleanup:
+ hv_nv_destroy_send_buffer(net_dev);
+
+exit:
+ return (ret);
+}
+
+/*
+ * Net VSC destroy receive buffer
+ */
+static int
+hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
+{
+ nvsp_msg *revoke_pkt;
+ int ret = 0;
+
+ /*
+ * If we got a section count, it means we received a
+ * send_rx_buf_complete msg
+ * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
+ * we need to send a revoke msg here
+ */
+ if (net_dev->rx_section_count) {
+ /* Send the revoke receive buffer */
+ revoke_pkt = &net_dev->revoke_packet;
+ memset(revoke_pkt, 0, sizeof(nvsp_msg));
+
+ revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
+ revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
+ NETVSC_RECEIVE_BUFFER_ID;
+
+ ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
+ revoke_pkt, sizeof(nvsp_msg),
+ (uint64_t)revoke_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+
+ /*
+ * If we failed here, we might as well return and have a leak
+ * rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ return (ret);
+ }
+ }
+
+ /* Tear down the gpadl on the vsp end */
+ if (net_dev->rx_buf_gpadl_handle) {
+ ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
+ net_dev->rx_buf_gpadl_handle);
+ /*
+ * If we failed here, we might as well return and have a leak
+ * rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ return (ret);
+ }
+ net_dev->rx_buf_gpadl_handle = 0;
+ }
+
+ if (net_dev->rx_buf) {
+ /* Free up the receive buffer */
+ contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF);
+ net_dev->rx_buf = NULL;
+ }
+
+ if (net_dev->rx_sections) {
+ free(net_dev->rx_sections, M_DEVBUF);
+ net_dev->rx_sections = NULL;
+ net_dev->rx_section_count = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * Net VSC destroy send buffer
+ */
+static int
+hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
+{
+ nvsp_msg *revoke_pkt;
+ int ret = 0;
+
+ /*
+ * If we got a section count, it means we received a
+ * send_rx_buf_complete msg
+ * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
+ * we need to send a revoke msg here
+ */
+ if (net_dev->send_section_size) {
+ /* Send the revoke send buffer */
+ revoke_pkt = &net_dev->revoke_packet;
+ memset(revoke_pkt, 0, sizeof(nvsp_msg));
+
+ revoke_pkt->hdr.msg_type =
+ nvsp_msg_1_type_revoke_send_buf;
+ revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
+ NETVSC_SEND_BUFFER_ID;
+
+ ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
+ revoke_pkt, sizeof(nvsp_msg),
+ (uint64_t)revoke_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ /*
+ * If we failed here, we might as well return and have a leak
+ * rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ return (ret);
+ }
+ }
+
+ /* Tear down the gpadl on the vsp end */
+ if (net_dev->send_buf_gpadl_handle) {
+ ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
+ net_dev->send_buf_gpadl_handle);
+
+ /*
+ * If we failed here, we might as well return and have a leak
+ * rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ return (ret);
+ }
+ net_dev->send_buf_gpadl_handle = 0;
+ }
+
+ if (net_dev->send_buf) {
+ /* Free up the receive buffer */
+ contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF);
+ net_dev->send_buf = NULL;
+ }
+
+ return (ret);
+}
+
+
+/*
+ * Attempt to negotiate the caller-specified NVSP version
+ *
+ * For NVSP v2, Server 2008 R2 does not set
+ * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
+ * to the negotiated version, so we cannot rely on that.
+ */
+static int
+hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
+ uint32_t nvsp_ver)
+{
+ nvsp_msg *init_pkt;
+ int ret;
+
+ init_pkt = &net_dev->channel_init_packet;
+ memset(init_pkt, 0, sizeof(nvsp_msg));
+ init_pkt->hdr.msg_type = nvsp_msg_type_init;
+
+ /*
+ * Specify parameter as the only acceptable protocol version
+ */
+ init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
+ init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;
+
+ /* Send the init request */
+ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+ sizeof(nvsp_msg), (uint64_t)init_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret != 0)
+ return (-1);
+
+ sema_wait(&net_dev->channel_init_sema);
+
+ if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
+ return (EINVAL);
+
+ return (0);
+}
+
+/*
+ * Send NDIS version 2 config packet containing MTU.
+ *
+ * Not valid for NDIS version 1.
+ */
+static int
+hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
+{
+ netvsc_dev *net_dev;
+ nvsp_msg *init_pkt;
+ int ret;
+
+ net_dev = hv_nv_get_outbound_net_device(device);
+ if (!net_dev)
+ return (-ENODEV);
+
+ /*
+ * Set up configuration packet, write MTU
+ * Indicate we are capable of handling VLAN tags
+ */
+ init_pkt = &net_dev->channel_init_packet;
+ memset(init_pkt, 0, sizeof(nvsp_msg));
+ init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
+ init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
+ init_pkt->
+ msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
+ = 1;
+
+ /* Send the configuration packet */
+ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+ sizeof(nvsp_msg), (uint64_t)init_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ if (ret != 0)
+ return (-EINVAL);
+
+ return (0);
+}
+
+/*
+ * Net VSC connect to VSP
+ */
+static int
+hv_nv_connect_to_vsp(struct hv_device *device)
+{
+ netvsc_dev *net_dev;
+ nvsp_msg *init_pkt;
+ uint32_t nvsp_vers;
+ uint32_t ndis_version;
+ int ret = 0;
+ device_t dev = device->device;
+ hn_softc_t *sc = device_get_softc(dev);
+ struct ifnet *ifp = sc->arpcom.ac_ifp;
+
+ net_dev = hv_nv_get_outbound_net_device(device);
+ if (!net_dev) {
+ return (ENODEV);
+ }
+
+ /*
+ * Negotiate the NVSP version. Try NVSP v2 first.
+ */
+ nvsp_vers = NVSP_PROTOCOL_VERSION_2;
+ ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
+ if (ret != 0) {
+ /* NVSP v2 failed, try NVSP v1 */
+ nvsp_vers = NVSP_PROTOCOL_VERSION_1;
+ ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
+ if (ret != 0) {
+ /* NVSP v1 failed, return bad status */
+ return (ret);
+ }
+ }
+ net_dev->nvsp_version = nvsp_vers;
+
+ /*
+ * Set the MTU if supported by this NVSP protocol version
+ * This needs to be right after the NVSP init message per Haiyang
+ */
+ if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2)
+ ret = hv_nv_send_ndis_config(device, ifp->if_mtu);
+
+ /*
+ * Send the NDIS version
+ */
+ init_pkt = &net_dev->channel_init_packet;
+
+ memset(init_pkt, 0, sizeof(nvsp_msg));
+
+ /*
+ * Updated to version 5.1, minimum, for VLAN per Haiyang
+ */
+ ndis_version = NDIS_VERSION;
+
+ init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
+ init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
+ (ndis_version & 0xFFFF0000) >> 16;
+ init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
+ ndis_version & 0xFFFF;
+
+ /* Send the init request */
+
+ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+ sizeof(nvsp_msg), (uint64_t)init_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ if (ret != 0) {
+ goto cleanup;
+ }
+ /*
+ * TODO: BUGBUG - We have to wait for the above msg since the netvsp
+ * uses KMCL which acknowledges packet (completion packet)
+ * since our Vmbus always set the
+ * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
+ */
+ /* sema_wait(&NetVscChannel->channel_init_sema); */
+
+ /* Post the big receive buffer to NetVSP */
+ ret = hv_nv_init_rx_buffer_with_net_vsp(device);
+ if (ret == 0)
+ ret = hv_nv_init_send_buffer_with_net_vsp(device);
+
+cleanup:
+ return (ret);
+}
+
+/*
+ * Net VSC disconnect from VSP
+ */
+static void
+hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
+{
+ hv_nv_destroy_rx_buffer(net_dev);
+ hv_nv_destroy_send_buffer(net_dev);
+}
+
+/*
+ * Net VSC on device add
+ *
+ * Callback when the device belonging to this driver is added
+ */
+netvsc_dev *
+hv_nv_on_device_add(struct hv_device *device, void *additional_info)
+{
+ netvsc_dev *net_dev;
+ netvsc_packet *packet;
+ netvsc_packet *next_packet;
+ int i, ret = 0;
+
+ net_dev = hv_nv_alloc_net_device(device);
+ if (!net_dev)
+ goto cleanup;
+
+ /* Initialize the NetVSC channel extension */
+ net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+ mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL,
+ MTX_SPIN | MTX_RECURSE);
+
+ net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
+
+ /* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
+ STAILQ_INIT(&net_dev->myrx_packet_list);
+
+ /*
+ * malloc a sufficient number of netvsc_packet buffers to hold
+ * a packet list. Add them to the netvsc device packet queue.
+ */
+ for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
+ packet = malloc(sizeof(netvsc_packet) +
+ (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!packet) {
+ break;
+ }
+ STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet,
+ mylist_entry);
+ }
+
+ sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
+
+ /*
+ * Open the channel
+ */
+ ret = hv_vmbus_channel_open(device->channel,
+ NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
+ NULL, 0, hv_nv_on_channel_callback, device);
+ if (ret != 0)
+ goto cleanup;
+
+ /*
+ * Connect with the NetVsp
+ */
+ ret = hv_nv_connect_to_vsp(device);
+ if (ret != 0)
+ goto close;
+
+ return (net_dev);
+
+close:
+ /* Now, we can close the channel safely */
+
+ hv_vmbus_channel_close(device->channel);
+
+cleanup:
+ /*
+ * Free the packet buffers on the netvsc device packet queue.
+ * Release other resources.
+ */
+ if (net_dev) {
+ sema_destroy(&net_dev->channel_init_sema);
+
+ packet = STAILQ_FIRST(&net_dev->myrx_packet_list);
+ while (packet != NULL) {
+ next_packet = STAILQ_NEXT(packet, mylist_entry);
+ free(packet, M_DEVBUF);
+ packet = next_packet;
+ }
+ /* Reset the list to initial state */
+ STAILQ_INIT(&net_dev->myrx_packet_list);
+
+ mtx_destroy(&net_dev->rx_pkt_list_lock);
+
+ free(net_dev, M_DEVBUF);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Net VSC on device remove
+ */
+int
+hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
+{
+ netvsc_packet *net_vsc_pkt;
+ netvsc_packet *next_net_vsc_pkt;
+ hn_softc_t *sc = device_get_softc(device->device);
+ netvsc_dev *net_dev = sc->net_dev;;
+
+ /* Stop outbound traffic ie sends and receives completions */
+ mtx_lock(&device->channel->inbound_lock);
+ net_dev->destroy = TRUE;
+ mtx_unlock(&device->channel->inbound_lock);
+
+ /* Wait for all send completions */
+ while (net_dev->num_outstanding_sends) {
+ DELAY(100);
+ }
+
+ hv_nv_disconnect_from_vsp(net_dev);
+
+ /* At this point, no one should be accessing net_dev except in here */
+
+ /* Now, we can close the channel safely */
+
+ if (!destroy_channel) {
+ device->channel->state =
+ HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
+ }
+
+ hv_vmbus_channel_close(device->channel);
+
+ /* Release all resources */
+ net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
+ while (net_vsc_pkt != NULL) {
+ next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry);
+ free(net_vsc_pkt, M_DEVBUF);
+ net_vsc_pkt = next_net_vsc_pkt;
+ }
+
+ /* Reset the list to initial state */
+ STAILQ_INIT(&net_dev->myrx_packet_list);
+
+ mtx_destroy(&net_dev->rx_pkt_list_lock);
+ sema_destroy(&net_dev->channel_init_sema);
+ free(net_dev, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * Net VSC on send completion
+ */
+static void
+hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt)
+{
+ netvsc_dev *net_dev;
+ nvsp_msg *nvsp_msg_pkt;
+ netvsc_packet *net_vsc_pkt;
+
+ net_dev = hv_nv_get_inbound_net_device(device);
+ if (!net_dev) {
+ return;
+ }
+
+ nvsp_msg_pkt =
+ (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
+
+ if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
+ || nvsp_msg_pkt->hdr.msg_type
+ == nvsp_msg_1_type_send_rx_buf_complete
+ || nvsp_msg_pkt->hdr.msg_type
+ == nvsp_msg_1_type_send_send_buf_complete) {
+ /* Copy the response back */
+ memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
+ sizeof(nvsp_msg));
+ sema_post(&net_dev->channel_init_sema);
+ } else if (nvsp_msg_pkt->hdr.msg_type ==
+ nvsp_msg_1_type_send_rndis_pkt_complete) {
+ /* Get the send context */
+ net_vsc_pkt =
+ (netvsc_packet *)(unsigned long)pkt->transaction_id;
+
+ /* Notify the layer above us */
+ net_vsc_pkt->compl.send.on_send_completion(
+ net_vsc_pkt->compl.send.send_completion_context);
+
+ atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
+ }
+}
+
+/*
+ * Net VSC on send
+ * Sends a packet on the specified Hyper-V device.
+ * Returns 0 on success, non-zero on failure.
+ */
+int
+hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
+{
+ netvsc_dev *net_dev;
+ nvsp_msg send_msg;
+ int ret;
+
+ net_dev = hv_nv_get_outbound_net_device(device);
+ if (!net_dev)
+ return (ENODEV);
+
+ send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
+ if (pkt->is_data_pkt) {
+ /* 0 is RMC_DATA */
+ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
+ } else {
+ /* 1 is RMC_CONTROL */
+ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
+ }
+
+ /* Not using send buffer section */
+ send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
+ 0xFFFFFFFF;
+ send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0;
+
+ if (pkt->page_buf_count) {
+ ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
+ pkt->page_buffers, pkt->page_buf_count,
+ &send_msg, sizeof(nvsp_msg), (uint64_t)pkt);
+ } else {
+ ret = hv_vmbus_channel_send_packet(device->channel,
+ &send_msg, sizeof(nvsp_msg), (uint64_t)pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ }
+
+ /* Record outstanding send only if send_packet() succeeded */
+ if (ret == 0)
+ atomic_add_int(&net_dev->num_outstanding_sends, 1);
+
+ return (ret);
+}
+
+/*
+ * Net VSC on receive
+ *
+ * In the FreeBSD Hyper-V virtual world, this function deals exclusively
+ * with virtual addresses.
+ */
+static void
+hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt)
+{
+ netvsc_dev *net_dev;
+ hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
+ nvsp_msg *nvsp_msg_pkt;
+ netvsc_packet *net_vsc_pkt = NULL;
+ unsigned long start;
+ xfer_page_packet *xfer_page_pkt = NULL;
+ STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head =
+ STAILQ_HEAD_INITIALIZER(mylist_head);
+ int count = 0;
+ int i = 0;
+
+ net_dev = hv_nv_get_inbound_net_device(device);
+ if (!net_dev)
+ return;
+
+ /*
+ * All inbound packets other than send completion should be
+ * xfer page packet.
+ */
+ if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)
+ return;
+
+ nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
+ + (pkt->data_offset8 << 3));
+
+ /* Make sure this is a valid nvsp packet */
+ if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt)
+ return;
+
+ vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;
+
+ if (vm_xfer_page_pkt->transfer_page_set_id
+ != NETVSC_RECEIVE_BUFFER_ID) {
+ return;
+ }
+
+ STAILQ_INIT(&mylist_head);
+
+ /*
+ * Grab free packets (range count + 1) to represent this xfer page
+ * packet. +1 to represent the xfer page packet itself. We grab it
+ * here so that we know exactly how many we can fulfill.
+ */
+ mtx_lock_spin(&net_dev->rx_pkt_list_lock);
+ while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) {
+ net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
+ STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry);
+
+ STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry);
+
+ if (++count == vm_xfer_page_pkt->range_count + 1)
+ break;
+ }
+
+ mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
+
+ /*
+ * We need at least 2 netvsc pkts (1 to represent the xfer page
+ * and at least 1 for the range) i.e. we can handle some of the
+ * xfer page packet ranges...
+ */
+ if (count < 2) {
+ /* Return netvsc packet to the freelist */
+ mtx_lock_spin(&net_dev->rx_pkt_list_lock);
+ for (i=count; i != 0; i--) {
+ net_vsc_pkt = STAILQ_FIRST(&mylist_head);
+ STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
+
+ STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
+ net_vsc_pkt, mylist_entry);
+ }
+ mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
+
+ hv_nv_send_receive_completion(device,
+ vm_xfer_page_pkt->d.transaction_id);
+
+ return;
+ }
+
+ /* Take the first packet in the list */
+ xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head);
+ STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
+
+ /* This is how many data packets we can supply */
+ xfer_page_pkt->count = count - 1;
+
+ /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
+ for (i=0; i < (count - 1); i++) {
+ net_vsc_pkt = STAILQ_FIRST(&mylist_head);
+ STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
+
+ /*
+ * Initialize the netvsc packet
+ */
+ net_vsc_pkt->xfer_page_pkt = xfer_page_pkt;
+ net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt;
+ net_vsc_pkt->device = device;
+ /* Save this so that we can send it back */
+ net_vsc_pkt->compl.rx.rx_completion_tid =
+ vm_xfer_page_pkt->d.transaction_id;
+
+ net_vsc_pkt->tot_data_buf_len =
+ vm_xfer_page_pkt->ranges[i].byte_count;
+ net_vsc_pkt->page_buf_count = 1;
+
+ net_vsc_pkt->page_buffers[0].length =
+ vm_xfer_page_pkt->ranges[i].byte_count;
+
+ /* The virtual address of the packet in the receive buffer */
+ start = ((unsigned long)net_dev->rx_buf +
+ vm_xfer_page_pkt->ranges[i].byte_offset);
+ start = ((unsigned long)start) & ~(PAGE_SIZE - 1);
+
+ /* Page number of the virtual page containing packet start */
+ net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
+
+ /* Calculate the page relative offset */
+ net_vsc_pkt->page_buffers[0].offset =
+ vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);
+
+ /*
+ * In this implementation, we are dealing with virtual
+ * addresses exclusively. Since we aren't using physical
+ * addresses at all, we don't care if a packet crosses a
+ * page boundary. For this reason, the original code to
+ * check for and handle page crossings has been removed.
+ */
+
+ /*
+ * Pass it to the upper layer. The receive completion call
+ * has been moved into this function.
+ */
+ hv_rf_on_receive(device, net_vsc_pkt);
+
+ /*
+ * Moved completion call back here so that all received
+ * messages (not just data messages) will trigger a response
+ * message back to the host.
+ */
+ hv_nv_on_receive_completion(net_vsc_pkt);
+ }
+}
+
+/*
+ * Net VSC send receive completion
+ */
+static void
+hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid)
+{
+ nvsp_msg rx_comp_msg;
+ int retries = 0;
+ int ret = 0;
+
+ rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
+
+ /* Pass in the status */
+ rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
+ nvsp_status_success;
+
+retry_send_cmplt:
+ /* Send the completion */
+ ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
+ sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
+ if (ret == 0) {
+ /* success */
+ /* no-op */
+ } else if (ret == EAGAIN) {
+ /* no more room... wait a bit and attempt to retry 3 times */
+ retries++;
+
+ if (retries < 4) {
+ DELAY(100);
+ goto retry_send_cmplt;
+ }
+ }
+}
+
+/*
+ * Net VSC on receive completion
+ *
+ * Send a receive completion packet to RNDIS device (ie NetVsp)
+ */
+void
+hv_nv_on_receive_completion(void *context)
+{
+ netvsc_packet *packet = (netvsc_packet *)context;
+ struct hv_device *device = (struct hv_device *)packet->device;
+ netvsc_dev *net_dev;
+ uint64_t tid = 0;
+ boolean_t send_rx_completion = FALSE;
+
+ /*
+ * Even though it seems logical to do a hv_nv_get_outbound_net_device()
+ * here to send out receive completion, we are using
+ * hv_nv_get_inbound_net_device() since we may have disabled
+ * outbound traffic already.
+ */
+ net_dev = hv_nv_get_inbound_net_device(device);
+ if (net_dev == NULL)
+ return;
+
+ /* Overloading use of the lock. */
+ mtx_lock_spin(&net_dev->rx_pkt_list_lock);
+
+ packet->xfer_page_pkt->count--;
+
+ /*
+ * Last one in the line that represent 1 xfer page packet.
+ * Return the xfer page packet itself to the free list.
+ */
+ if (packet->xfer_page_pkt->count == 0) {
+ send_rx_completion = TRUE;
+ tid = packet->compl.rx.rx_completion_tid;
+ STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
+ (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry);
+ }
+
+ /* Put the packet back on the free list */
+ STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry);
+ mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
+
+ /* Send a receive completion for the xfer page packet */
+ if (send_rx_completion)
+ hv_nv_send_receive_completion(device, tid);
+}
+
+/*
+ * Net VSC on channel callback
+ */
+static void
+hv_nv_on_channel_callback(void *context)
+{
+ /* Fixme: Magic number */
+ const int net_pkt_size = 2048;
+ struct hv_device *device = (struct hv_device *)context;
+ netvsc_dev *net_dev;
+ uint32_t bytes_rxed;
+ uint64_t request_id;
+ uint8_t *packet;
+ hv_vm_packet_descriptor *desc;
+ uint8_t *buffer;
+ int bufferlen = net_pkt_size;
+ int ret = 0;
+
+ packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT);
+ if (!packet)
+ return;
+
+ buffer = packet;
+
+ net_dev = hv_nv_get_inbound_net_device(device);
+ if (net_dev == NULL)
+ goto out;
+
+ do {
+ ret = hv_vmbus_channel_recv_packet_raw(device->channel,
+ buffer, bufferlen, &bytes_rxed, &request_id);
+ if (ret == 0) {
+ if (bytes_rxed > 0) {
+ desc = (hv_vm_packet_descriptor *)buffer;
+ switch (desc->type) {
+ case HV_VMBUS_PACKET_TYPE_COMPLETION:
+ hv_nv_on_send_completion(device, desc);
+ break;
+ case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
+ hv_nv_on_receive(device, desc);
+ break;
+ default:
+ break;
+ }
+ } else {
+ break;
+ }
+ } else if (ret == ENOBUFS) {
+ /* Handle large packet */
+ free(buffer, M_DEVBUF);
+ buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT);
+ if (buffer == NULL) {
+ break;
+ }
+ bufferlen = bytes_rxed;
+ }
+ } while (1);
+
+out:
+ free(buffer, M_DEVBUF);
+}
+
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h
new file mode 100644
index 000000000000..f7e7d00a903f
--- /dev/null
+++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h
@@ -0,0 +1,995 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * HyperV vmbus (virtual machine bus) network VSC (virtual services client)
+ * header file
+ *
+ * (Updated from unencumbered NvspProtocol.h)
+ */
+
+#ifndef __HV_NET_VSC_H__
+#define __HV_NET_VSC_H__
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+
+#include <dev/hyperv/include/hyperv.h>
+
+
+#define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF)
+
+#define NVSP_PROTOCOL_VERSION_1 2
+#define NVSP_PROTOCOL_VERSION_2 0x30002
+#define NVSP_MIN_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_1)
+#define NVSP_MAX_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_2)
+
+#define NVSP_PROTOCOL_VERSION_CURRENT NVSP_PROTOCOL_VERSION_2
+
+#define NVSP_OPERATIONAL_STATUS_OK (0x00000000)
+#define NVSP_OPERATIONAL_STATUS_DEGRADED (0x00000001)
+#define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE (0x00000002)
+#define NVSP_OPERATIONAL_STATUS_NO_CONTACT (0x00000003)
+#define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004)
+
+/*
+ * Maximun number of transfer pages (packets) the VSP will use on a receive
+ */
+#define NVSP_MAX_PACKETS_PER_RECEIVE 375
+
+
+typedef enum nvsp_msg_type_ {
+ nvsp_msg_type_none = 0,
+
+ /*
+ * Init Messages
+ */
+ nvsp_msg_type_init = 1,
+ nvsp_msg_type_init_complete = 2,
+
+ nvsp_version_msg_start = 100,
+
+ /*
+ * Version 1 Messages
+ */
+ nvsp_msg_1_type_send_ndis_vers = nvsp_version_msg_start,
+
+ nvsp_msg_1_type_send_rx_buf,
+ nvsp_msg_1_type_send_rx_buf_complete,
+ nvsp_msg_1_type_revoke_rx_buf,
+
+ nvsp_msg_1_type_send_send_buf,
+ nvsp_msg_1_type_send_send_buf_complete,
+ nvsp_msg_1_type_revoke_send_buf,
+
+ nvsp_msg_1_type_send_rndis_pkt,
+ nvsp_msg_1_type_send_rndis_pkt_complete,
+
+ /*
+ * Version 2 Messages
+ */
+ nvsp_msg_2_type_send_chimney_delegated_buf,
+ nvsp_msg_2_type_send_chimney_delegated_buf_complete,
+ nvsp_msg_2_type_revoke_chimney_delegated_buf,
+
+ nvsp_msg_2_type_resume_chimney_rx_indication,
+
+ nvsp_msg_2_type_terminate_chimney,
+ nvsp_msg_2_type_terminate_chimney_complete,
+
+ nvsp_msg_2_type_indicate_chimney_event,
+
+ nvsp_msg_2_type_send_chimney_packet,
+ nvsp_msg_2_type_send_chimney_packet_complete,
+
+ nvsp_msg_2_type_post_chimney_rx_request,
+ nvsp_msg_2_type_post_chimney_rx_request_complete,
+
+ nvsp_msg_2_type_alloc_rx_buf,
+ nvsp_msg_2_type_alloc_rx_buf_complete,
+
+ nvsp_msg_2_type_free_rx_buf,
+
+ nvsp_msg_2_send_vmq_rndis_pkt,
+ nvsp_msg_2_send_vmq_rndis_pkt_complete,
+
+ nvsp_msg_2_type_send_ndis_config,
+
+ nvsp_msg_2_type_alloc_chimney_handle,
+ nvsp_msg_2_type_alloc_chimney_handle_complete,
+} nvsp_msg_type;
+
+typedef enum nvsp_status_ {
+ nvsp_status_none = 0,
+ nvsp_status_success,
+ nvsp_status_failure,
+ /* Deprecated */
+ nvsp_status_prot_vers_range_too_new,
+ /* Deprecated */
+ nvsp_status_prot_vers_range_too_old,
+ nvsp_status_invalid_rndis_pkt,
+ nvsp_status_busy,
+ nvsp_status_max,
+} nvsp_status;
+
+typedef struct nvsp_msg_hdr_ {
+ uint32_t msg_type;
+} __packed nvsp_msg_hdr;
+
+/*
+ * Init Messages
+ */
+
+/*
+ * This message is used by the VSC to initialize the channel
+ * after the channels has been opened. This message should
+ * never include anything other then versioning (i.e. this
+ * message will be the same for ever).
+ *
+ * Forever is a long time. The values have been redefined
+ * in Win7 to indicate major and minor protocol version
+ * number.
+ */
+typedef struct nvsp_msg_init_ {
+ union {
+ struct {
+ uint16_t minor_protocol_version;
+ uint16_t major_protocol_version;
+ } s;
+ /* Formerly min_protocol_version */
+ uint32_t protocol_version;
+ } p1;
+ /* Formerly max_protocol_version */
+ uint32_t protocol_version_2;
+} __packed nvsp_msg_init;
+
+/*
+ * This message is used by the VSP to complete the initialization
+ * of the channel. This message should never include anything other
+ * then versioning (i.e. this message will be the same forever).
+ */
+typedef struct nvsp_msg_init_complete_ {
+ /* Deprecated */
+ uint32_t negotiated_prot_vers;
+ uint32_t max_mdl_chain_len;
+ uint32_t status;
+} __packed nvsp_msg_init_complete;
+
+typedef union nvsp_msg_init_uber_ {
+ nvsp_msg_init init;
+ nvsp_msg_init_complete init_compl;
+} __packed nvsp_msg_init_uber;
+
+/*
+ * Version 1 Messages
+ */
+
+/*
+ * This message is used by the VSC to send the NDIS version
+ * to the VSP. The VSP can use this information when handling
+ * OIDs sent by the VSC.
+ */
+typedef struct nvsp_1_msg_send_ndis_version_ {
+ uint32_t ndis_major_vers;
+ /* Deprecated */
+ uint32_t ndis_minor_vers;
+} __packed nvsp_1_msg_send_ndis_version;
+
+/*
+ * This message is used by the VSC to send a receive buffer
+ * to the VSP. The VSP can then use the receive buffer to
+ * send data to the VSC.
+ */
+typedef struct nvsp_1_msg_send_rx_buf_ {
+ uint32_t gpadl_handle;
+ uint16_t id;
+} __packed nvsp_1_msg_send_rx_buf;
+
+typedef struct nvsp_1_rx_buf_section_ {
+ uint32_t offset;
+ uint32_t sub_allocation_size;
+ uint32_t num_sub_allocations;
+ uint32_t end_offset;
+} __packed nvsp_1_rx_buf_section;
+
+/*
+ * This message is used by the VSP to acknowledge a receive
+ * buffer send by the VSC. This message must be sent by the
+ * VSP before the VSP uses the receive buffer.
+ */
+typedef struct nvsp_1_msg_send_rx_buf_complete_ {
+ uint32_t status;
+ uint32_t num_sections;
+
+ /*
+ * The receive buffer is split into two parts, a large
+ * suballocation section and a small suballocation
+ * section. These sections are then suballocated by a
+ * certain size.
+ *
+ * For example, the following break up of the receive
+ * buffer has 6 large suballocations and 10 small
+ * suballocations.
+ *
+ * | Large Section | | Small Section |
+ * ------------------------------------------------------------
+ * | | | | | | | | | | | | | | | | | |
+ * | |
+ * LargeOffset SmallOffset
+ */
+ nvsp_1_rx_buf_section sections[1];
+
+} __packed nvsp_1_msg_send_rx_buf_complete;
+
+/*
+ * This message is sent by the VSC to revoke the receive buffer.
+ * After the VSP completes this transaction, the VSP should never
+ * use the receive buffer again.
+ */
+typedef struct nvsp_1_msg_revoke_rx_buf_ {
+ uint16_t id;
+} __packed nvsp_1_msg_revoke_rx_buf;
+
+/*
+ * This message is used by the VSC to send a send buffer
+ * to the VSP. The VSC can then use the send buffer to
+ * send data to the VSP.
+ */
+typedef struct nvsp_1_msg_send_send_buf_ {
+ uint32_t gpadl_handle;
+ uint16_t id;
+} __packed nvsp_1_msg_send_send_buf;
+
+/*
+ * This message is used by the VSP to acknowledge a send
+ * buffer sent by the VSC. This message must be sent by the
+ * VSP before the VSP uses the sent buffer.
+ */
+typedef struct nvsp_1_msg_send_send_buf_complete_ {
+ uint32_t status;
+
+ /*
+ * The VSC gets to choose the size of the send buffer and
+ * the VSP gets to choose the sections size of the buffer.
+ * This was done to enable dynamic reconfigurations when
+ * the cost of GPA-direct buffers decreases.
+ */
+ uint32_t section_size;
+} __packed nvsp_1_msg_send_send_buf_complete;
+
+/*
+ * This message is sent by the VSC to revoke the send buffer.
+ * After the VSP completes this transaction, the vsp should never
+ * use the send buffer again.
+ */
+typedef struct nvsp_1_msg_revoke_send_buf_ {
+ uint16_t id;
+} __packed nvsp_1_msg_revoke_send_buf;
+
+/*
+ * This message is used by both the VSP and the VSC to send
+ * an RNDIS message to the opposite channel endpoint.
+ */
+typedef struct nvsp_1_msg_send_rndis_pkt_ {
+ /*
+ * This field is specified by RNIDS. They assume there's
+ * two different channels of communication. However,
+ * the Network VSP only has one. Therefore, the channel
+ * travels with the RNDIS packet.
+ */
+ uint32_t chan_type;
+
+ /*
+ * This field is used to send part or all of the data
+ * through a send buffer. This values specifies an
+ * index into the send buffer. If the index is
+ * 0xFFFFFFFF, then the send buffer is not being used
+ * and all of the data was sent through other VMBus
+ * mechanisms.
+ */
+ uint32_t send_buf_section_idx;
+ uint32_t send_buf_section_size;
+} __packed nvsp_1_msg_send_rndis_pkt;
+
+/*
+ * This message is used by both the VSP and the VSC to complete
+ * a RNDIS message to the opposite channel endpoint. At this
+ * point, the initiator of this message cannot use any resources
+ * associated with the original RNDIS packet.
+ */
+typedef struct nvsp_1_msg_send_rndis_pkt_complete_ {
+ uint32_t status;
+} __packed nvsp_1_msg_send_rndis_pkt_complete;
+
+
+/*
+ * Version 2 Messages
+ */
+
+/*
+ * This message is used by the VSC to send the NDIS version
+ * to the VSP. The VSP can use this information when handling
+ * OIDs sent by the VSC.
+ */
+typedef struct nvsp_2_netvsc_capabilities_ {
+ union {
+ uint64_t as_uint64;
+ struct {
+ uint64_t vmq : 1;
+ uint64_t chimney : 1;
+ uint64_t sriov : 1;
+ uint64_t ieee8021q : 1;
+ uint64_t correlationid : 1;
+ uint64_t teaming : 1;
+ } u2;
+ } u1;
+} __packed nvsp_2_netvsc_capabilities;
+
+typedef struct nvsp_2_msg_send_ndis_config_ {
+ uint32_t mtu;
+ uint32_t reserved;
+ nvsp_2_netvsc_capabilities capabilities;
+} __packed nvsp_2_msg_send_ndis_config;
+
+/*
+ * NvspMessage2TypeSendChimneyDelegatedBuffer
+ */
+typedef struct nvsp_2_msg_send_chimney_buf_
+{
+ /*
+ * On WIN7 beta, delegated_obj_max_size is defined as a uint32_t
+ * Since WIN7 RC, it was split into two uint16_t. To have the same
+ * struct layout, delegated_obj_max_size shall be the first field.
+ */
+ uint16_t delegated_obj_max_size;
+
+ /*
+ * The revision # of chimney protocol used between NVSC and NVSP.
+ *
+ * This revision is NOT related to the chimney revision between
+ * NDIS protocol and miniport drivers.
+ */
+ uint16_t revision;
+
+ uint32_t gpadl_handle;
+} __packed nvsp_2_msg_send_chimney_buf;
+
+
+/* Unsupported chimney revision 0 (only present in WIN7 beta) */
+#define NVSP_CHIMNEY_REVISION_0 0
+
+/* WIN7 Beta Chimney QFE */
+#define NVSP_CHIMNEY_REVISION_1 1
+
+/* The chimney revision since WIN7 RC */
+#define NVSP_CHIMNEY_REVISION_2 2
+
+
+/*
+ * NvspMessage2TypeSendChimneyDelegatedBufferComplete
+ */
+typedef struct nvsp_2_msg_send_chimney_buf_complete_ {
+ uint32_t status;
+
+ /*
+ * Maximum number outstanding sends and pre-posted receives.
+ *
+ * NVSC should not post more than SendQuota/ReceiveQuota packets.
+ * Otherwise, it can block the non-chimney path for an indefinite
+ * amount of time.
+ * (since chimney sends/receives are affected by the remote peer).
+ *
+ * Note: NVSP enforces the quota restrictions on a per-VMBCHANNEL
+ * basis. It doesn't enforce the restriction separately for chimney
+ * send/receive. If NVSC doesn't voluntarily enforce "SendQuota",
+ * it may kill its own network connectivity.
+ */
+ uint32_t send_quota;
+ uint32_t rx_quota;
+} __packed nvsp_2_msg_send_chimney_buf_complete;
+
+/*
+ * NvspMessage2TypeRevokeChimneyDelegatedBuffer
+ */
+typedef struct nvsp_2_msg_revoke_chimney_buf_ {
+ uint32_t gpadl_handle;
+} __packed nvsp_2_msg_revoke_chimney_buf;
+
+
+#define NVSP_CHIMNEY_OBJECT_TYPE_NEIGHBOR 0
+#define NVSP_CHIMNEY_OBJECT_TYPE_PATH4 1
+#define NVSP_CHIMNEY_OBJECT_TYPE_PATH6 2
+#define NVSP_CHIMNEY_OBJECT_TYPE_TCP 3
+
+/*
+ * NvspMessage2TypeAllocateChimneyHandle
+ */
+typedef struct nvsp_2_msg_alloc_chimney_handle_ {
+ uint64_t vsc_context;
+ uint32_t object_type;
+} __packed nvsp_2_msg_alloc_chimney_handle;
+
+/*
+ * NvspMessage2TypeAllocateChimneyHandleComplete
+ */
+typedef struct nvsp_2_msg_alloc_chimney_handle_complete_ {
+ uint32_t vsp_handle;
+} __packed nvsp_2_msg_alloc_chimney_handle_complete;
+
+
+/*
+ * NvspMessage2TypeResumeChimneyRXIndication
+ */
+typedef struct nvsp_2_msg_resume_chimney_rx_indication {
+ /*
+ * Handle identifying the offloaded connection
+ */
+ uint32_t vsp_tcp_handle;
+} __packed nvsp_2_msg_resume_chimney_rx_indication;
+
+
+#define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_FIRST_STAGE (0x01u)
+#define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_RESERVED (~(0x01u))
+
+/*
+ * NvspMessage2TypeTerminateChimney
+ */
+typedef struct nvsp_2_msg_terminate_chimney_ {
+ /*
+ * Handle identifying the offloaded object
+ */
+ uint32_t vsp_handle;
+
+ /*
+ * Terminate Offload Flags
+ * Bit 0:
+ * When set to 0, terminate the offload at the destination NIC
+ * Bit 1-31: Reserved, shall be zero
+ */
+ uint32_t flags;
+
+ union {
+ /*
+ * This field is valid only when bit 0 of flags is clear.
+ * It specifies the index into the premapped delegated
+ * object buffer. The buffer was sent through the
+ * NvspMessage2TypeSendChimneyDelegatedBuffer
+ * message at initialization time.
+ *
+ * NVSP will write the delegated state into the delegated
+ * buffer upon upload completion.
+ */
+ uint32_t index;
+
+ /*
+ * This field is valid only when bit 0 of flags is set.
+ *
+ * The seqence number of the most recently accepted RX
+ * indication when VSC sets its TCP context into
+ * "terminating" state.
+ *
+ * This allows NVSP to determines if there are any in-flight
+ * RX indications for which the acceptance state is still
+ * undefined.
+ */
+ uint64_t last_accepted_rx_seq_no;
+ } f0;
+} __packed nvsp_2_msg_terminate_chimney;
+
+
+#define NVSP_TERMINATE_CHIMNEY_COMPLETE_FLAG_DATA_CORRUPTED 0x0000001u
+
+/*
+ * NvspMessage2TypeTerminateChimneyComplete
+ */
+typedef struct nvsp_2_msg_terminate_chimney_complete_ {
+ uint64_t vsc_context;
+ uint32_t flags;
+} __packed nvsp_2_msg_terminate_chimney_complete;
+
+/*
+ * NvspMessage2TypeIndicateChimneyEvent
+ */
+typedef struct nvsp_2_msg_indicate_chimney_event_ {
+ /*
+ * When VscTcpContext is 0, event_type is an NDIS_STATUS event code
+ * Otherwise, EventType is an TCP connection event (defined in
+ * NdisTcpOffloadEventHandler chimney DDK document).
+ */
+ uint32_t event_type;
+
+ /*
+ * When VscTcpContext is 0, EventType is an NDIS_STATUS event code
+ * Otherwise, EventType is an TCP connection event specific information
+ * (defined in NdisTcpOffloadEventHandler chimney DDK document).
+ */
+ uint32_t event_specific_info;
+
+ /*
+ * If not 0, the event is per-TCP connection event. This field
+ * contains the VSC's TCP context.
+ * If 0, the event indication is global.
+ */
+ uint64_t vsc_tcp_context;
+} __packed nvsp_2_msg_indicate_chimney_event;
+
+
+#define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX 0xffffu
+#define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX 0xffffu
+
+/*
+ * NvspMessage2TypeSendChimneyPacket
+ */
+typedef struct nvsp_2_msg_send_chimney_pkt_ {
+ /*
+ * Identify the TCP connection for which this chimney send is
+ */
+ uint32_t vsp_tcp_handle;
+
+ /*
+ * This field is used to send part or all of the data
+ * through a send buffer. This values specifies an
+ * index into the send buffer. If the index is
+ * 0xFFFF, then the send buffer is not being used
+ * and all of the data was sent through other VMBus
+ * mechanisms.
+ */
+ uint16_t send_buf_section_index;
+ uint16_t send_buf_section_size;
+
+ /*
+ * OOB Data Index
+ * This an index to the OOB data buffer. If the index is 0xFFFFFFFF,
+ * then there is no OOB data.
+ *
+ * This field shall be always 0xFFFFFFFF for now. It is reserved for
+ * the future.
+ */
+ uint16_t oob_data_index;
+
+ /*
+ * DisconnectFlags = 0
+ * Normal chimney send. See MiniportTcpOffloadSend for details.
+ *
+ * DisconnectFlags = TCP_DISCONNECT_GRACEFUL_CLOSE (0x01)
+ * Graceful disconnect. See MiniportTcpOffloadDisconnect for details.
+ *
+ * DisconnectFlags = TCP_DISCONNECT_ABORTIVE_CLOSE (0x02)
+ * Abortive disconnect. See MiniportTcpOffloadDisconnect for details.
+ */
+ uint16_t disconnect_flags;
+
+ uint32_t seq_no;
+} __packed nvsp_2_msg_send_chimney_pkt;
+
+/*
+ * NvspMessage2TypeSendChimneyPacketComplete
+ */
+typedef struct nvsp_2_msg_send_chimney_pkt_complete_ {
+ /*
+ * The NDIS_STATUS for the chimney send
+ */
+ uint32_t status;
+
+ /*
+ * Number of bytes that have been sent to the peer (and ACKed by the peer).
+ */
+ uint32_t bytes_transferred;
+} __packed nvsp_2_msg_send_chimney_pkt_complete;
+
+
+#define NVSP_1_CHIMNEY_RECV_FLAG_NO_PUSH 0x0001u
+#define NVSP_1_CHIMNEY_RECV_INVALID_OOB_INDEX 0xffffu
+
+/*
+ * NvspMessage2TypePostChimneyRecvRequest
+ */
+typedef struct nvsp_2_msg_post_chimney_rx_request_ {
+ /*
+ * Identify the TCP connection which this chimney receive request
+ * is for.
+ */
+ uint32_t vsp_tcp_handle;
+
+ /*
+ * OOB Data Index
+ * This an index to the OOB data buffer. If the index is 0xFFFFFFFF,
+ * then there is no OOB data.
+ *
+ * This field shall be always 0xFFFFFFFF for now. It is reserved for
+ * the future.
+ */
+ uint32_t oob_data_index;
+
+ /*
+ * Bit 0
+ * When it is set, this is a "no-push" receive.
+ * When it is clear, this is a "push" receive.
+ *
+ * Bit 1-15: Reserved and shall be zero
+ */
+ uint16_t flags;
+
+ /*
+ * For debugging and diagnoses purpose.
+ * The SeqNo is per TCP connection and starts from 0.
+ */
+ uint32_t seq_no;
+} __packed nvsp_2_msg_post_chimney_rx_request;
+
+/*
+ * NvspMessage2TypePostChimneyRecvRequestComplete
+ */
+typedef struct nvsp_2_msg_post_chimney_rx_request_complete_ {
+ /*
+ * The NDIS_STATUS for the chimney send
+ */
+ uint32_t status;
+
+ /*
+ * Number of bytes that have been sent to the peer (and ACKed by
+ * the peer).
+ */
+ uint32_t bytes_xferred;
+} __packed nvsp_2_msg_post_chimney_rx_request_complete;
+
+/*
+ * NvspMessage2TypeAllocateReceiveBuffer
+ */
+typedef struct nvsp_2_msg_alloc_rx_buf_ {
+ /*
+ * Allocation ID to match the allocation request and response
+ */
+ uint32_t allocation_id;
+
+ /*
+ * Length of the VM shared memory receive buffer that needs to
+ * be allocated
+ */
+ uint32_t length;
+} __packed nvsp_2_msg_alloc_rx_buf;
+
+/*
+ * NvspMessage2TypeAllocateReceiveBufferComplete
+ */
+typedef struct nvsp_2_msg_alloc_rx_buf_complete_ {
+ /*
+ * The NDIS_STATUS code for buffer allocation
+ */
+ uint32_t status;
+
+ /*
+ * Allocation ID from NVSP_2_MESSAGE_ALLOCATE_RECEIVE_BUFFER
+ */
+ uint32_t allocation_id;
+
+ /*
+ * GPADL handle for the allocated receive buffer
+ */
+ uint32_t gpadl_handle;
+
+ /*
+ * Receive buffer ID that is further used in
+ * NvspMessage2SendVmqRndisPacket
+ */
+ uint64_t rx_buf_id;
+} __packed nvsp_2_msg_alloc_rx_buf_complete;
+
+/*
+ * NvspMessage2TypeFreeReceiveBuffer
+ */
+typedef struct nvsp_2_msg_free_rx_buf_ {
+ /*
+ * Receive buffer ID previous returned in
+ * NvspMessage2TypeAllocateReceiveBufferComplete message
+ */
+ uint64_t rx_buf_id;
+} __packed nvsp_2_msg_free_rx_buf;
+
+/*
+ * This structure is used in defining the buffers in
+ * NVSP_2_MESSAGE_SEND_VMQ_RNDIS_PACKET structure
+ */
+typedef struct nvsp_xfer_page_range_ {
+ /*
+ * Specifies the ID of the receive buffer that has the buffer. This
+ * ID can be the general receive buffer ID specified in
+ * NvspMessage1TypeSendReceiveBuffer or it can be the shared memory
+ * receive buffer ID allocated by the VSC and specified in
+ * NvspMessage2TypeAllocateReceiveBufferComplete message
+ */
+ uint64_t xfer_page_set_id;
+
+ /*
+ * Number of bytes
+ */
+ uint32_t byte_count;
+
+ /*
+ * Offset in bytes from the beginning of the buffer
+ */
+ uint32_t byte_offset;
+} __packed nvsp_xfer_page_range;
+
+/*
+ * NvspMessage2SendVmqRndisPacket
+ */
+typedef struct nvsp_2_msg_send_vmq_rndis_pkt_ {
+ /*
+ * This field is specified by RNIDS. They assume there's
+ * two different channels of communication. However,
+ * the Network VSP only has one. Therefore, the channel
+ * travels with the RNDIS packet. It must be RMC_DATA
+ */
+ uint32_t channel_type;
+
+ /*
+ * Only the Range element corresponding to the RNDIS header of
+ * the first RNDIS message in the multiple RNDIS messages sent
+ * in one NVSP message. Information about the data portions as well
+ * as the subsequent RNDIS messages in the same NVSP message are
+ * embedded in the RNDIS header itself
+ */
+ nvsp_xfer_page_range range;
+} __packed nvsp_2_msg_send_vmq_rndis_pkt;
+
+/*
+ * This message is used by the VSC to complete
+ * a RNDIS VMQ message to the VSP. At this point,
+ * the initiator of this message can use any resources
+ * associated with the original RNDIS VMQ packet.
+ */
+typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_
+{
+ uint32_t status;
+} __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
+
+
+typedef union nvsp_1_msg_uber_ {
+ nvsp_1_msg_send_ndis_version send_ndis_vers;
+
+ nvsp_1_msg_send_rx_buf send_rx_buf;
+ nvsp_1_msg_send_rx_buf_complete send_rx_buf_complete;
+ nvsp_1_msg_revoke_rx_buf revoke_rx_buf;
+
+ nvsp_1_msg_send_send_buf send_send_buf;
+ nvsp_1_msg_send_send_buf_complete send_send_buf_complete;
+ nvsp_1_msg_revoke_send_buf revoke_send_buf;
+
+ nvsp_1_msg_send_rndis_pkt send_rndis_pkt;
+ nvsp_1_msg_send_rndis_pkt_complete send_rndis_pkt_complete;
+} __packed nvsp_1_msg_uber;
+
+
+typedef union nvsp_2_msg_uber_ {
+ nvsp_2_msg_send_ndis_config send_ndis_config;
+
+ nvsp_2_msg_send_chimney_buf send_chimney_buf;
+ nvsp_2_msg_send_chimney_buf_complete send_chimney_buf_complete;
+ nvsp_2_msg_revoke_chimney_buf revoke_chimney_buf;
+
+ nvsp_2_msg_resume_chimney_rx_indication resume_chimney_rx_indication;
+ nvsp_2_msg_terminate_chimney terminate_chimney;
+ nvsp_2_msg_terminate_chimney_complete terminate_chimney_complete;
+ nvsp_2_msg_indicate_chimney_event indicate_chimney_event;
+
+ nvsp_2_msg_send_chimney_pkt send_chimney_packet;
+ nvsp_2_msg_send_chimney_pkt_complete send_chimney_packet_complete;
+ nvsp_2_msg_post_chimney_rx_request post_chimney_rx_request;
+ nvsp_2_msg_post_chimney_rx_request_complete
+ post_chimney_rx_request_complete;
+
+ nvsp_2_msg_alloc_rx_buf alloc_rx_buffer;
+ nvsp_2_msg_alloc_rx_buf_complete alloc_rx_buffer_complete;
+ nvsp_2_msg_free_rx_buf free_rx_buffer;
+
+ nvsp_2_msg_send_vmq_rndis_pkt send_vmq_rndis_pkt;
+ nvsp_2_msg_send_vmq_rndis_pkt_complete send_vmq_rndis_pkt_complete;
+ nvsp_2_msg_alloc_chimney_handle alloc_chimney_handle;
+ nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
+} __packed nvsp_2_msg_uber;
+
+
+typedef union nvsp_all_msgs_ {
+ nvsp_msg_init_uber init_msgs;
+ nvsp_1_msg_uber vers_1_msgs;
+ nvsp_2_msg_uber vers_2_msgs;
+} __packed nvsp_all_msgs;
+
+/*
+ * ALL Messages
+ */
+typedef struct nvsp_msg_ {
+ nvsp_msg_hdr hdr;
+ nvsp_all_msgs msgs;
+} __packed nvsp_msg;
+
+
+/*
+ * The following arguably belongs in a separate header file
+ */
+
+/*
+ * Defines
+ */
+
+#define NETVSC_SEND_BUFFER_SIZE (64*1024) /* 64K */
+#define NETVSC_SEND_BUFFER_ID 0xface
+
+
+#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024) /* 1MB */
+
+#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
+
+#define NETVSC_RECEIVE_SG_COUNT 1
+
+/* Preallocated receive packets */
+#define NETVSC_RECEIVE_PACKETLIST_COUNT 256
+
+/*
+ * Maximum MTU we permit to be configured for a netvsc interface.
+ * When the code was developed, a max MTU of 12232 was tested and
+ * proven to work. 9K is a reasonable maximum for an Ethernet.
+ */
+#define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024)
+
+/*
+ * Data types
+ */
+
+/*
+ * Per netvsc channel-specific
+ */
+typedef struct netvsc_dev_ {
+ struct hv_device *dev;
+ int num_outstanding_sends;
+
+ /* List of free preallocated NETVSC_PACKET to represent RX packet */
+ STAILQ_HEAD(PQ, netvsc_packet_) myrx_packet_list;
+ struct mtx rx_pkt_list_lock;
+
+ /* Send buffer allocated by us but manages by NetVSP */
+ void *send_buf;
+ uint32_t send_buf_size;
+ uint32_t send_buf_gpadl_handle;
+ uint32_t send_section_size;
+
+ /* Receive buffer allocated by us but managed by NetVSP */
+ void *rx_buf;
+ uint32_t rx_buf_size;
+ uint32_t rx_buf_gpadl_handle;
+ uint32_t rx_section_count;
+ nvsp_1_rx_buf_section *rx_sections;
+
+ /* Used for NetVSP initialization protocol */
+ struct sema channel_init_sema;
+ nvsp_msg channel_init_packet;
+
+ nvsp_msg revoke_packet;
+ /*uint8_t hw_mac_addr[HW_MACADDR_LEN];*/
+
+ /* Holds rndis device info */
+ void *extension;
+
+ hv_bool_uint8_t destroy;
+ /* Negotiated NVSP version */
+ uint32_t nvsp_version;
+} netvsc_dev;
+
+
+typedef void (*pfn_on_send_rx_completion)(void *);
+
+#define NETVSC_DEVICE_RING_BUFFER_SIZE (64 * PAGE_SIZE)
+#define NETVSC_PACKET_MAXPAGE 16
+
+
+typedef struct xfer_page_packet_ {
+ /*
+ * This needs to be here because the network RX code casts
+ * an instantiation of this structure to a netvsc_packet.
+ */
+ STAILQ_ENTRY(netvsc_packet_) mylist_entry;
+
+ uint32_t count;
+} xfer_page_packet;
+
+typedef struct netvsc_packet_ {
+ /*
+ * List used when enqueued on &net_dev->rx_packet_list,
+ * and when enqueued within the netvsc code
+ */
+ STAILQ_ENTRY(netvsc_packet_) mylist_entry;
+ struct hv_device *device;
+ hv_bool_uint8_t is_data_pkt; /* One byte */
+ uint16_t vlan_tci;
+ xfer_page_packet *xfer_page_pkt;
+
+ /* Completion */
+ union {
+ struct {
+ uint64_t rx_completion_tid;
+ void *rx_completion_context;
+ /* This is no longer used */
+ pfn_on_send_rx_completion on_rx_completion;
+ } rx;
+ struct {
+ uint64_t send_completion_tid;
+ void *send_completion_context;
+ /* Still used in netvsc and filter code */
+ pfn_on_send_rx_completion on_send_completion;
+ } send;
+ } compl;
+
+ void *extension;
+ uint32_t tot_data_buf_len;
+ uint32_t page_buf_count;
+ hv_vmbus_page_buffer page_buffers[NETVSC_PACKET_MAXPAGE];
+} netvsc_packet;
+
+typedef struct {
+ uint8_t mac_addr[6]; /* Assumption unsigned long */
+ hv_bool_uint8_t link_state;
+} netvsc_device_info;
+
+/*
+ * Device-specific softc structure
+ */
+typedef struct hn_softc {
+ struct ifnet *hn_ifp;
+ struct arpcom arpcom;
+ device_t hn_dev;
+ uint8_t hn_unit;
+ int hn_carrier;
+ int hn_if_flags;
+ struct mtx hn_lock;
+ int hn_initdone;
+ struct hv_device *hn_dev_obj;
+ netvsc_dev *net_dev;
+} hn_softc_t;
+
+
+/*
+ * Externs
+ */
+extern int hv_promisc_mode;
+
+extern void netvsc_linkstatus_callback(struct hv_device *device_obj,
+ uint32_t status);
+extern int netvsc_recv(struct hv_device *device_obj, netvsc_packet *packet);
+extern void netvsc_xmit_completion(void *context);
+
+extern void hv_nv_on_receive_completion(void *context);
+extern netvsc_dev *hv_nv_on_device_add(struct hv_device *device, void *additional_info);
+extern int hv_nv_on_device_remove(struct hv_device *device,
+ boolean_t destroy_channel);
+extern int hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt);
+
+#endif /* __HV_NET_VSC_H__ */
+
diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
new file mode 100644
index 000000000000..47f48dd6abde
--- /dev/null
+++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
@@ -0,0 +1,948 @@
+/*-
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 2004-2006 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+
+#include <net/bpf.h>
+
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/if.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+#include <machine/vmparam.h>
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+#include <sys/mutex.h>
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <machine/atomic.h>
+
+#include <machine/intr_machdep.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_net_vsc.h"
+#include "hv_rndis.h"
+#include "hv_rndis_filter.h"
+
+
+/* Short for Hyper-V network interface */
+#define NETVSC_DEVNAME "hn"
+
+/*
+ * It looks like offset 0 of buf is reserved to hold the softc pointer.
+ * The sc pointer evidently not needed, and is not presently populated.
+ * The packet offset is where the netvsc_packet starts in the buffer.
+ */
+#define HV_NV_SC_PTR_OFFSET_IN_BUF 0
+#define HV_NV_PACKET_OFFSET_IN_BUF 16
+
+
+/*
+ * Data types
+ */
+
+struct hv_netvsc_driver_context {
+ uint32_t drv_inited;
+};
+
+/*
+ * Be aware that this sleepable mutex will exhibit WITNESS errors when
+ * certain TCP and ARP code paths are taken. This appears to be a
+ * well-known condition, as all other drivers checked use a sleeping
+ * mutex to protect their transmit paths.
+ * Also Be aware that mutexes do not play well with semaphores, and there
+ * is a conflicting semaphore in a certain channel code path.
+ */
+#define NV_LOCK_INIT(_sc, _name) \
+ mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF)
+#define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock)
+#define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED)
+#define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock)
+#define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock)
+
+
+/*
+ * Globals
+ */
+
+int hv_promisc_mode = 0; /* normal mode by default */
+
+/* The one and only one */
+static struct hv_netvsc_driver_context g_netvsc_drv;
+
+
+/*
+ * Forward declarations
+ */
+static void hn_stop(hn_softc_t *sc);
+static void hn_ifinit_locked(hn_softc_t *sc);
+static void hn_ifinit(void *xsc);
+static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
+static int hn_start_locked(struct ifnet *ifp);
+static void hn_start(struct ifnet *ifp);
+
+
+/*
+ * NetVsc driver initialization
+ * Note: Filter init is no longer required
+ */
+static int
+netvsc_drv_init(void)
+{
+ return (0);
+}
+
+/*
+ * NetVsc global initialization entry point
+ */
+static void
+netvsc_init(void)
+{
+ printf("Netvsc initializing... ");
+
+ /*
+ * XXXKYS: cleanup initialization
+ */
+ if (!cold && !g_netvsc_drv.drv_inited) {
+ g_netvsc_drv.drv_inited = 1;
+ netvsc_drv_init();
+ } else {
+ printf("Already initialized!\n");
+ }
+}
+
+/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
+static const hv_guid g_net_vsc_device_type = {
+ .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
+ 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
+};
+
+/*
+ * Standard probe entry point.
+ *
+ */
+static int
+netvsc_probe(device_t dev)
+{
+ const char *p;
+
+ p = vmbus_get_type(dev);
+ if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) {
+ device_set_desc(dev, "Synthetic Network Interface");
+ printf("Netvsc probe... DONE \n");
+
+ return (0);
+ }
+
+ return (ENXIO);
+}
+
+/*
+ * Standard attach entry point.
+ *
+ * Called when the driver is loaded. It allocates needed resources,
+ * and initializes the "hardware" and software.
+ */
+static int
+netvsc_attach(device_t dev)
+{
+ struct hv_device *device_ctx = vmbus_get_devctx(dev);
+ netvsc_device_info device_info;
+ hn_softc_t *sc;
+ int unit = device_get_unit(dev);
+ struct ifnet *ifp;
+ int ret;
+
+ netvsc_init();
+
+ sc = device_get_softc(dev);
+ if (sc == NULL) {
+ return (ENOMEM);
+ }
+
+ bzero(sc, sizeof(hn_softc_t));
+ sc->hn_unit = unit;
+ sc->hn_dev = dev;
+
+ NV_LOCK_INIT(sc, "NetVSCLock");
+
+ sc->hn_dev_obj = device_ctx;
+
+ ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
+ ifp->if_softc = sc;
+
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+ ifp->if_dunit = unit;
+ ifp->if_dname = NETVSC_DEVNAME;
+
+ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_ioctl = hn_ioctl;
+ ifp->if_start = hn_start;
+ ifp->if_init = hn_ifinit;
+ /* needed by hv_rf_on_device_add() code */
+ ifp->if_mtu = ETHERMTU;
+ IFQ_SET_MAXLEN(&ifp->if_snd, 512);
+ ifp->if_snd.ifq_drv_maxlen = 511;
+ IFQ_SET_READY(&ifp->if_snd);
+
+ /*
+ * Tell upper layers that we support full VLAN capability.
+ */
+ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
+ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
+ ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
+
+ ret = hv_rf_on_device_add(device_ctx, &device_info);
+ if (ret != 0) {
+ if_free(ifp);
+
+ return (ret);
+ }
+ if (device_info.link_state == 0) {
+ sc->hn_carrier = 1;
+ }
+
+ ether_ifattach(ifp, device_info.mac_addr);
+
+ return (0);
+}
+
+/*
+ * Standard detach entry point
+ */
+static int
+netvsc_detach(device_t dev)
+{
+ struct hv_device *hv_device = vmbus_get_devctx(dev);
+
+ printf("netvsc_detach\n");
+
+ /*
+ * XXXKYS: Need to clean up all our
+ * driver state; this is the driver
+ * unloading.
+ */
+
+ /*
+ * XXXKYS: Need to stop outgoing traffic and unregister
+ * the netdevice.
+ */
+
+ hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
+
+ return (0);
+}
+
+/*
+ * Standard shutdown entry point
+ */
+static int
+netvsc_shutdown(device_t dev)
+{
+ return (0);
+}
+
+/*
+ * Send completion processing
+ *
+ * Note: It looks like offset 0 of buf is reserved to hold the softc
+ * pointer. The sc pointer is not currently needed in this function, and
+ * it is not presently populated by the TX function.
+ */
+void
+netvsc_xmit_completion(void *context)
+{
+ netvsc_packet *packet = (netvsc_packet *)context;
+ struct mbuf *mb;
+ uint8_t *buf;
+
+ mb = (struct mbuf *)packet->compl.send.send_completion_tid;
+ buf = ((uint8_t *)packet) - HV_NV_PACKET_OFFSET_IN_BUF;
+
+ free(buf, M_DEVBUF);
+
+ if (mb != NULL) {
+ m_freem(mb);
+ }
+}
+
+/*
+ * Start a transmit of one or more packets
+ */
+static int
+hn_start_locked(struct ifnet *ifp)
+{
+ hn_softc_t *sc = ifp->if_softc;
+ struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
+ uint8_t *buf;
+ netvsc_packet *packet;
+ struct mbuf *m_head, *m;
+ struct mbuf *mc_head = NULL;
+ int i;
+ int num_frags;
+ int len;
+ int xlen;
+ int rppi_size;
+ int retries = 0;
+ int ret = 0;
+
+ while (!IFQ_DRV_IS_EMPTY(&sc->hn_ifp->if_snd)) {
+ IFQ_DRV_DEQUEUE(&sc->hn_ifp->if_snd, m_head);
+ if (m_head == NULL) {
+ break;
+ }
+
+ len = 0;
+ num_frags = 0;
+ xlen = 0;
+
+ /* Walk the mbuf list computing total length and num frags */
+ for (m = m_head; m != NULL; m = m->m_next) {
+ if (m->m_len != 0) {
+ num_frags++;
+ len += m->m_len;
+ }
+ }
+
+ /*
+ * Reserve the number of pages requested. Currently,
+ * one page is reserved for the message in the RNDIS
+ * filter packet
+ */
+ num_frags += HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
+
+ /* If exceeds # page_buffers in netvsc_packet */
+ if (num_frags > NETVSC_PACKET_MAXPAGE) {
+ m_freem(m);
+
+ return (EINVAL);
+ }
+
+ rppi_size = 0;
+ if (m_head->m_flags & M_VLANTAG) {
+ rppi_size = sizeof(rndis_per_packet_info) +
+ sizeof(ndis_8021q_info);
+ }
+
+ /*
+ * Allocate a buffer with space for a netvsc packet plus a
+ * number of reserved areas. First comes a (currently 16
+ * bytes, currently unused) reserved data area. Second is
+ * the netvsc_packet, which includes (currently 4) page
+ * buffers. Third (optional) is a rndis_per_packet_info
+ * struct, but only if a VLAN tag should be inserted into the
+ * Ethernet frame by the Hyper-V infrastructure. Fourth is
+ * an area reserved for an rndis_filter_packet struct.
+ * Changed malloc to M_NOWAIT to avoid sleep under spin lock.
+ * No longer reserving extra space for page buffers, as they
+ * are already part of the netvsc_packet.
+ */
+ buf = malloc(HV_NV_PACKET_OFFSET_IN_BUF +
+ sizeof(netvsc_packet) + rppi_size +
+ sizeof(rndis_filter_packet),
+ M_DEVBUF, M_ZERO | M_NOWAIT);
+ if (buf == NULL) {
+ m_freem(m);
+
+ return (ENOMEM);
+ }
+
+ packet = (netvsc_packet *)(buf + HV_NV_PACKET_OFFSET_IN_BUF);
+ *(vm_offset_t *)buf = HV_NV_SC_PTR_OFFSET_IN_BUF;
+
+ /*
+ * extension points to the area reserved for the
+ * rndis_filter_packet, which is placed just after
+ * the netvsc_packet (and rppi struct, if present;
+ * length is updated later).
+ */
+ packet->extension = packet + 1;
+
+ /* Set up the rndis header */
+ packet->page_buf_count = num_frags;
+
+ /* Initialize it from the mbuf */
+ packet->tot_data_buf_len = len;
+
+ /*
+ * If the Hyper-V infrastructure needs to embed a VLAN tag,
+ * initialize netvsc_packet and rppi struct values as needed.
+ */
+ if (rppi_size) {
+ /* Lower layers need the VLAN TCI */
+ packet->vlan_tci = m_head->m_pkthdr.ether_vtag;
+ }
+
+ /*
+ * Fill the page buffers with mbuf info starting at index
+ * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
+ */
+ i = HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
+ for (m = m_head; m != NULL; m = m->m_next) {
+ if (m->m_len) {
+ vm_offset_t paddr =
+ vtophys(mtod(m, vm_offset_t));
+ packet->page_buffers[i].pfn =
+ paddr >> PAGE_SHIFT;
+ packet->page_buffers[i].offset =
+ paddr & (PAGE_SIZE - 1);
+ packet->page_buffers[i].length = m->m_len;
+ i++;
+ }
+ }
+
+ /*
+ * If bpf, copy the mbuf chain. This is less expensive than
+ * it appears; the mbuf clusters are not copied, only their
+ * reference counts are incremented.
+ * Needed to avoid a race condition where the completion
+ * callback is invoked, freeing the mbuf chain, before the
+ * bpf_mtap code has a chance to run.
+ */
+ if (ifp->if_bpf) {
+ mc_head = m_copypacket(m_head, M_DONTWAIT);
+ }
+retry_send:
+ /* Set the completion routine */
+ packet->compl.send.on_send_completion = netvsc_xmit_completion;
+ packet->compl.send.send_completion_context = packet;
+ packet->compl.send.send_completion_tid = (uint64_t)m_head;
+
+ /* Removed critical_enter(), does not appear necessary */
+ ret = hv_rf_on_send(device_ctx, packet);
+
+ if (ret == 0) {
+ ifp->if_opackets++;
+ /* if bpf && mc_head, call bpf_mtap code */
+ if (mc_head) {
+ ETHER_BPF_MTAP(ifp, mc_head);
+ }
+ } else {
+ retries++;
+ if (retries < 4) {
+ goto retry_send;
+ }
+
+ IF_PREPEND(&ifp->if_snd, m_head);
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+
+ /*
+ * Null the mbuf pointer so the completion function
+ * does not free the mbuf chain. We just pushed the
+ * mbuf chain back on the if_snd queue.
+ */
+ packet->compl.send.send_completion_tid = 0;
+
+ /*
+ * Release the resources since we will not get any
+ * send completion
+ */
+ netvsc_xmit_completion(packet);
+ }
+
+ /* if bpf && mc_head, free the mbuf chain copy */
+ if (mc_head) {
+ m_freem(mc_head);
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * Link up/down notification
+ */
+void
+netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status)
+{
+ hn_softc_t *sc = device_get_softc(device_obj->device);
+
+ if (sc == NULL) {
+ return;
+ }
+
+ if (status == 1) {
+ sc->hn_carrier = 1;
+ } else {
+ sc->hn_carrier = 0;
+ }
+}
+
+/*
+ * Append the specified data to the indicated mbuf chain,
+ * Extend the mbuf chain if the new data does not fit in
+ * existing space.
+ *
+ * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
+ * There should be an equivalent in the kernel mbuf code,
+ * but there does not appear to be one yet.
+ *
+ * Differs from m_append() in that additional mbufs are
+ * allocated with cluster size MJUMPAGESIZE, and filled
+ * accordingly.
+ *
+ * Return 1 if able to complete the job; otherwise 0.
+ */
+static int
+hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
+{
+ struct mbuf *m, *n;
+ int remainder, space;
+
+ for (m = m0; m->m_next != NULL; m = m->m_next)
+ ;
+ remainder = len;
+ space = M_TRAILINGSPACE(m);
+ if (space > 0) {
+ /*
+ * Copy into available space.
+ */
+ if (space > remainder)
+ space = remainder;
+ bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
+ m->m_len += space;
+ cp += space;
+ remainder -= space;
+ }
+ while (remainder > 0) {
+ /*
+ * Allocate a new mbuf; could check space
+ * and allocate a cluster instead.
+ */
+ n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
+ if (n == NULL)
+ break;
+ n->m_len = min(MJUMPAGESIZE, remainder);
+ bcopy(cp, mtod(n, caddr_t), n->m_len);
+ cp += n->m_len;
+ remainder -= n->m_len;
+ m->m_next = n;
+ m = n;
+ }
+ if (m0->m_flags & M_PKTHDR)
+ m0->m_pkthdr.len += len - remainder;
+
+ return (remainder == 0);
+}
+
+
+/*
+ * Called when we receive a data packet from the "wire" on the
+ * specified device
+ *
+ * Note: This is no longer used as a callback
+ */
+int
+netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet)
+{
+ hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device);
+ struct mbuf *m_new;
+ struct ifnet *ifp = sc->hn_ifp;
+ int size;
+ int i;
+
+ if (sc == NULL) {
+ return (0); /* TODO: KYS how can this be! */
+ }
+
+ ifp = sc->arpcom.ac_ifp;
+
+ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ return (0);
+ }
+
+ /*
+ * Bail out if packet contains more data than configured MTU.
+ */
+ if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) {
+ return (0);
+ }
+
+ /*
+ * Get an mbuf with a cluster. For packets 2K or less,
+ * get a standard 2K cluster. For anything larger, get a
+ * 4K cluster. Any buffers larger than 4K can cause problems
+ * if looped around to the Hyper-V TX channel, so avoid them.
+ */
+ size = MCLBYTES;
+
+ if (packet->tot_data_buf_len > MCLBYTES) {
+ /* 4096 */
+ size = MJUMPAGESIZE;
+ }
+
+ m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size);
+
+ if (m_new == NULL)
+ return (0);
+
+ /*
+ * Remove trailing junk from RX data buffer.
+ * Fixme: This will not work for multiple Hyper-V RX buffers.
+ * Fortunately, the channel gathers all RX data into one buffer.
+ *
+ * L2 frame length, with L2 header, not including CRC
+ */
+ packet->page_buffers[0].length = packet->tot_data_buf_len;
+
+ /*
+ * Copy the received packet to one or more mbufs.
+ * The copy is required since the memory pointed to by netvsc_packet
+ * cannot be deallocated
+ */
+ for (i=0; i < packet->page_buf_count; i++) {
+ /* Shift virtual page number to form virtual page address */
+ uint8_t *vaddr = (uint8_t *)
+ (packet->page_buffers[i].pfn << PAGE_SHIFT);
+
+ hv_m_append(m_new, packet->page_buffers[i].length,
+ vaddr + packet->page_buffers[i].offset);
+ }
+
+ m_new->m_pkthdr.rcvif = ifp;
+
+ if ((packet->vlan_tci != 0) &&
+ (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
+ m_new->m_pkthdr.ether_vtag = packet->vlan_tci;
+ m_new->m_flags |= M_VLANTAG;
+ }
+
+ /*
+ * Note: Moved RX completion back to hv_nv_on_receive() so all
+ * messages (not just data messages) will trigger a response.
+ */
+
+ ifp->if_ipackets++;
+
+ /* We're not holding the lock here, so don't release it */
+ (*ifp->if_input)(ifp, m_new);
+
+ return (0);
+}
+
+/*
+ * Standard ioctl entry point. Called when the user wants to configure
+ * the interface.
+ */
+static int
+hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ hn_softc_t *sc = ifp->if_softc;
+ struct ifreq *ifr = (struct ifreq *)data;
+ netvsc_device_info device_info;
+ struct hv_device *hn_dev;
+ int mask, error = 0;
+
+ switch(cmd) {
+
+ case SIOCSIFADDR:
+ case SIOCGIFADDR:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ case SIOCSIFMTU:
+ hn_dev = vmbus_get_devctx(sc->hn_dev);
+
+ NV_LOCK(sc);
+
+ if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) {
+ error = EINVAL;
+ NV_UNLOCK(sc);
+ break;
+ }
+ /* Obtain and record requested MTU */
+ ifp->if_mtu = ifr->ifr_mtu;
+
+ /*
+ * We must remove and add back the device to cause the new
+ * MTU to take effect. This includes tearing down, but not
+ * deleting the channel, then bringing it back up.
+ */
+ error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL);
+ if (error) {
+ NV_UNLOCK(sc);
+ break;
+ }
+ error = hv_rf_on_device_add(hn_dev, &device_info);
+ if (error) {
+ NV_UNLOCK(sc);
+ break;
+ }
+
+ hn_ifinit_locked(sc);
+
+ NV_UNLOCK(sc);
+ break;
+ case SIOCSIFFLAGS:
+ NV_LOCK(sc);
+ if (ifp->if_flags & IFF_UP) {
+ /*
+ * If only the state of the PROMISC flag changed,
+ * then just use the 'set promisc mode' command
+ * instead of reinitializing the entire NIC. Doing
+ * a full re-init means reloading the firmware and
+ * waiting for it to start up, which may take a
+ * second or two.
+ */
+#ifdef notyet
+ /* Fixme: Promiscuous mode? */
+ /* No promiscuous mode with Xen */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+ ifp->if_flags & IFF_PROMISC &&
+ !(sc->hn_if_flags & IFF_PROMISC)) {
+ /* do something here for Hyper-V */
+ ;
+/* XN_SETBIT(sc, XN_RX_MODE, */
+/* XN_RXMODE_RX_PROMISC); */
+ } else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+ !(ifp->if_flags & IFF_PROMISC) &&
+ sc->hn_if_flags & IFF_PROMISC) {
+ /* do something here for Hyper-V */
+ ;
+/* XN_CLRBIT(sc, XN_RX_MODE, */
+/* XN_RXMODE_RX_PROMISC); */
+ } else
+#endif
+ hn_ifinit_locked(sc);
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ hn_stop(sc);
+ }
+ }
+ sc->hn_if_flags = ifp->if_flags;
+ NV_UNLOCK(sc);
+ error = 0;
+ break;
+ case SIOCSIFCAP:
+ mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ if (mask & IFCAP_HWCSUM) {
+ if (IFCAP_HWCSUM & ifp->if_capenable) {
+ ifp->if_capenable &= ~IFCAP_HWCSUM;
+ } else {
+ ifp->if_capenable |= IFCAP_HWCSUM;
+ }
+ }
+ error = 0;
+ break;
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+#ifdef notyet
+ /* Fixme: Multicast mode? */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ NV_LOCK(sc);
+ netvsc_setmulti(sc);
+ NV_UNLOCK(sc);
+ error = 0;
+ }
+#endif
+ /* FALLTHROUGH */
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ error = EINVAL;
+ break;
+ default:
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ *
+ */
+static void
+hn_stop(hn_softc_t *sc)
+{
+ struct ifnet *ifp;
+ int ret;
+ struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
+
+ NV_LOCK_ASSERT(sc);
+ ifp = sc->hn_ifp;
+
+ printf(" Closing Device ...\n");
+
+ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ sc->hn_initdone = 0;
+
+ ret = hv_rf_on_close(device_ctx);
+}
+
+/*
+ * FreeBSD transmit entry point
+ */
+static void
+hn_start(struct ifnet *ifp)
+{
+ hn_softc_t *sc;
+
+ sc = ifp->if_softc;
+ NV_LOCK(sc);
+ hn_start_locked(ifp);
+ NV_UNLOCK(sc);
+}
+
+/*
+ *
+ */
+static void
+hn_ifinit_locked(hn_softc_t *sc)
+{
+ struct ifnet *ifp;
+ struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
+ int ret;
+
+ NV_LOCK_ASSERT(sc);
+
+ ifp = sc->hn_ifp;
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ return;
+ }
+
+ hv_promisc_mode = 1;
+
+ ret = hv_rf_on_open(device_ctx);
+ if (ret != 0) {
+ return;
+ } else {
+ sc->hn_initdone = 1;
+ }
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+}
+
+/*
+ *
+ */
+static void
+hn_ifinit(void *xsc)
+{
+ hn_softc_t *sc = xsc;
+
+ NV_LOCK(sc);
+ hn_ifinit_locked(sc);
+ NV_UNLOCK(sc);
+}
+
+#ifdef LATER
+/*
+ *
+ */
+static void
+hn_watchdog(struct ifnet *ifp)
+{
+ hn_softc_t *sc;
+ sc = ifp->if_softc;
+
+ printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit);
+ hn_ifinit(sc); /*???*/
+ ifp->if_oerrors++;
+}
+#endif
+
+static device_method_t netvsc_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, netvsc_probe),
+ DEVMETHOD(device_attach, netvsc_attach),
+ DEVMETHOD(device_detach, netvsc_detach),
+ DEVMETHOD(device_shutdown, netvsc_shutdown),
+
+ { 0, 0 }
+};
+
+static driver_t netvsc_driver = {
+ NETVSC_DEVNAME,
+ netvsc_methods,
+ sizeof(hn_softc_t)
+};
+
+static devclass_t netvsc_devclass;
+
+DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0);
+MODULE_VERSION(hn, 1);
+MODULE_DEPEND(hn, vmbus, 1, 1, 1);
+SYSINIT(netvsc_initx, SI_SUB_RUN_SCHEDULER, SI_ORDER_MIDDLE + 1, netvsc_init,
+ NULL);
+
diff --git a/sys/dev/hyperv/netvsc/hv_rndis.h b/sys/dev/hyperv/netvsc/hv_rndis.h
new file mode 100644
index 000000000000..819cab5a7290
--- /dev/null
+++ b/sys/dev/hyperv/netvsc/hv_rndis.h
@@ -0,0 +1,911 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HV_RNDIS_H__
+#define __HV_RNDIS_H__
+
+
+/*
+ * NDIS protocol version numbers
+ */
+#define NDIS_VERSION_5_0 0x00050000
+#define NDIS_VERSION_5_1 0x00050001
+#define NDIS_VERSION_6_0 0x00060000
+#define NDIS_VERSION (NDIS_VERSION_5_1)
+
+/*
+ * Status codes
+ */
+
+#define STATUS_SUCCESS (0x00000000L)
+#define STATUS_UNSUCCESSFUL (0xC0000001L)
+#define STATUS_PENDING (0x00000103L)
+#define STATUS_INSUFFICIENT_RESOURCES (0xC000009AL)
+#define STATUS_BUFFER_OVERFLOW (0x80000005L)
+#define STATUS_NOT_SUPPORTED (0xC00000BBL)
+
+#define RNDIS_STATUS_SUCCESS (STATUS_SUCCESS)
+#define RNDIS_STATUS_PENDING (STATUS_PENDING)
+#define RNDIS_STATUS_NOT_RECOGNIZED (0x00010001L)
+#define RNDIS_STATUS_NOT_COPIED (0x00010002L)
+#define RNDIS_STATUS_NOT_ACCEPTED (0x00010003L)
+#define RNDIS_STATUS_CALL_ACTIVE (0x00010007L)
+
+#define RNDIS_STATUS_ONLINE (0x40010003L)
+#define RNDIS_STATUS_RESET_START (0x40010004L)
+#define RNDIS_STATUS_RESET_END (0x40010005L)
+#define RNDIS_STATUS_RING_STATUS (0x40010006L)
+#define RNDIS_STATUS_CLOSED (0x40010007L)
+#define RNDIS_STATUS_WAN_LINE_UP (0x40010008L)
+#define RNDIS_STATUS_WAN_LINE_DOWN (0x40010009L)
+#define RNDIS_STATUS_WAN_FRAGMENT (0x4001000AL)
+#define RNDIS_STATUS_MEDIA_CONNECT (0x4001000BL)
+#define RNDIS_STATUS_MEDIA_DISCONNECT (0x4001000CL)
+#define RNDIS_STATUS_HARDWARE_LINE_UP (0x4001000DL)
+#define RNDIS_STATUS_HARDWARE_LINE_DOWN (0x4001000EL)
+#define RNDIS_STATUS_INTERFACE_UP (0x4001000FL)
+#define RNDIS_STATUS_INTERFACE_DOWN (0x40010010L)
+#define RNDIS_STATUS_MEDIA_BUSY (0x40010011L)
+#define RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION (0x40010012L)
+#define RNDIS_STATUS_WW_INDICATION RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION
+#define RNDIS_STATUS_LINK_SPEED_CHANGE (0x40010013L)
+
+#define RNDIS_STATUS_NOT_RESETTABLE (0x80010001L)
+#define RNDIS_STATUS_SOFT_ERRORS (0x80010003L)
+#define RNDIS_STATUS_HARD_ERRORS (0x80010004L)
+#define RNDIS_STATUS_BUFFER_OVERFLOW (STATUS_BUFFER_OVERFLOW)
+
+#define RNDIS_STATUS_FAILURE (STATUS_UNSUCCESSFUL)
+#define RNDIS_STATUS_RESOURCES (STATUS_INSUFFICIENT_RESOURCES)
+#define RNDIS_STATUS_CLOSING (0xC0010002L)
+#define RNDIS_STATUS_BAD_VERSION (0xC0010004L)
+#define RNDIS_STATUS_BAD_CHARACTERISTICS (0xC0010005L)
+#define RNDIS_STATUS_ADAPTER_NOT_FOUND (0xC0010006L)
+#define RNDIS_STATUS_OPEN_FAILED (0xC0010007L)
+#define RNDIS_STATUS_DEVICE_FAILED (0xC0010008L)
+#define RNDIS_STATUS_MULTICAST_FULL (0xC0010009L)
+#define RNDIS_STATUS_MULTICAST_EXISTS (0xC001000AL)
+#define RNDIS_STATUS_MULTICAST_NOT_FOUND (0xC001000BL)
+#define RNDIS_STATUS_REQUEST_ABORTED (0xC001000CL)
+#define RNDIS_STATUS_RESET_IN_PROGRESS (0xC001000DL)
+#define RNDIS_STATUS_CLOSING_INDICATING (0xC001000EL)
+#define RNDIS_STATUS_NOT_SUPPORTED (STATUS_NOT_SUPPORTED)
+#define RNDIS_STATUS_INVALID_PACKET (0xC001000FL)
+#define RNDIS_STATUS_OPEN_LIST_FULL (0xC0010010L)
+#define RNDIS_STATUS_ADAPTER_NOT_READY (0xC0010011L)
+#define RNDIS_STATUS_ADAPTER_NOT_OPEN (0xC0010012L)
+#define RNDIS_STATUS_NOT_INDICATING (0xC0010013L)
+#define RNDIS_STATUS_INVALID_LENGTH (0xC0010014L)
+#define RNDIS_STATUS_INVALID_DATA (0xC0010015L)
+#define RNDIS_STATUS_BUFFER_TOO_SHORT (0xC0010016L)
+#define RNDIS_STATUS_INVALID_OID (0xC0010017L)
+#define RNDIS_STATUS_ADAPTER_REMOVED (0xC0010018L)
+#define RNDIS_STATUS_UNSUPPORTED_MEDIA (0xC0010019L)
+#define RNDIS_STATUS_GROUP_ADDRESS_IN_USE (0xC001001AL)
+#define RNDIS_STATUS_FILE_NOT_FOUND (0xC001001BL)
+#define RNDIS_STATUS_ERROR_READING_FILE (0xC001001CL)
+#define RNDIS_STATUS_ALREADY_MAPPED (0xC001001DL)
+#define RNDIS_STATUS_RESOURCE_CONFLICT (0xC001001EL)
+#define RNDIS_STATUS_NO_CABLE (0xC001001FL)
+
+#define RNDIS_STATUS_INVALID_SAP (0xC0010020L)
+#define RNDIS_STATUS_SAP_IN_USE (0xC0010021L)
+#define RNDIS_STATUS_INVALID_ADDRESS (0xC0010022L)
+#define RNDIS_STATUS_VC_NOT_ACTIVATED (0xC0010023L)
+#define RNDIS_STATUS_DEST_OUT_OF_ORDER (0xC0010024L)
+#define RNDIS_STATUS_VC_NOT_AVAILABLE (0xC0010025L)
+#define RNDIS_STATUS_CELLRATE_NOT_AVAILABLE (0xC0010026L)
+#define RNDIS_STATUS_INCOMPATABLE_QOS (0xC0010027L)
+#define RNDIS_STATUS_AAL_PARAMS_UNSUPPORTED (0xC0010028L)
+#define RNDIS_STATUS_NO_ROUTE_TO_DESTINATION (0xC0010029L)
+
+#define RNDIS_STATUS_TOKEN_RING_OPEN_ERROR (0xC0011000L)
+
+
+/*
+ * Object Identifiers used by NdisRequest Query/Set Information
+ */
+
+/*
+ * General Objects
+ */
+
+#define RNDIS_OID_GEN_SUPPORTED_LIST 0x00010101
+#define RNDIS_OID_GEN_HARDWARE_STATUS 0x00010102
+#define RNDIS_OID_GEN_MEDIA_SUPPORTED 0x00010103
+#define RNDIS_OID_GEN_MEDIA_IN_USE 0x00010104
+#define RNDIS_OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105
+#define RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106
+#define RNDIS_OID_GEN_LINK_SPEED 0x00010107
+#define RNDIS_OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108
+#define RNDIS_OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109
+#define RNDIS_OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A
+#define RNDIS_OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B
+#define RNDIS_OID_GEN_VENDOR_ID 0x0001010C
+#define RNDIS_OID_GEN_VENDOR_DESCRIPTION 0x0001010D
+#define RNDIS_OID_GEN_CURRENT_PACKET_FILTER 0x0001010E
+#define RNDIS_OID_GEN_CURRENT_LOOKAHEAD 0x0001010F
+#define RNDIS_OID_GEN_DRIVER_VERSION 0x00010110
+#define RNDIS_OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111
+#define RNDIS_OID_GEN_PROTOCOL_OPTIONS 0x00010112
+#define RNDIS_OID_GEN_MAC_OPTIONS 0x00010113
+#define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS 0x00010114
+#define RNDIS_OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115
+#define RNDIS_OID_GEN_VENDOR_DRIVER_VERSION 0x00010116
+#define RNDIS_OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118
+#define RNDIS_OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119
+#define RNDIS_OID_GEN_MACHINE_NAME 0x0001021A
+#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
+
+#define RNDIS_OID_GEN_XMIT_OK 0x00020101
+#define RNDIS_OID_GEN_RCV_OK 0x00020102
+#define RNDIS_OID_GEN_XMIT_ERROR 0x00020103
+#define RNDIS_OID_GEN_RCV_ERROR 0x00020104
+#define RNDIS_OID_GEN_RCV_NO_BUFFER 0x00020105
+
+#define RNDIS_OID_GEN_DIRECTED_BYTES_XMIT 0x00020201
+#define RNDIS_OID_GEN_DIRECTED_FRAMES_XMIT 0x00020202
+#define RNDIS_OID_GEN_MULTICAST_BYTES_XMIT 0x00020203
+#define RNDIS_OID_GEN_MULTICAST_FRAMES_XMIT 0x00020204
+#define RNDIS_OID_GEN_BROADCAST_BYTES_XMIT 0x00020205
+#define RNDIS_OID_GEN_BROADCAST_FRAMES_XMIT 0x00020206
+#define RNDIS_OID_GEN_DIRECTED_BYTES_RCV 0x00020207
+#define RNDIS_OID_GEN_DIRECTED_FRAMES_RCV 0x00020208
+#define RNDIS_OID_GEN_MULTICAST_BYTES_RCV 0x00020209
+#define RNDIS_OID_GEN_MULTICAST_FRAMES_RCV 0x0002020A
+#define RNDIS_OID_GEN_BROADCAST_BYTES_RCV 0x0002020B
+#define RNDIS_OID_GEN_BROADCAST_FRAMES_RCV 0x0002020C
+
+#define RNDIS_OID_GEN_RCV_CRC_ERROR 0x0002020D
+#define RNDIS_OID_GEN_TRANSMIT_QUEUE_LENGTH 0x0002020E
+
+#define RNDIS_OID_GEN_GET_TIME_CAPS 0x0002020F
+#define RNDIS_OID_GEN_GET_NETCARD_TIME 0x00020210
+
+/*
+ * These are connection-oriented general OIDs.
+ * These replace the above OIDs for connection-oriented media.
+ */
+#define RNDIS_OID_GEN_CO_SUPPORTED_LIST 0x00010101
+#define RNDIS_OID_GEN_CO_HARDWARE_STATUS 0x00010102
+#define RNDIS_OID_GEN_CO_MEDIA_SUPPORTED 0x00010103
+#define RNDIS_OID_GEN_CO_MEDIA_IN_USE 0x00010104
+#define RNDIS_OID_GEN_CO_LINK_SPEED 0x00010105
+#define RNDIS_OID_GEN_CO_VENDOR_ID 0x00010106
+#define RNDIS_OID_GEN_CO_VENDOR_DESCRIPTION 0x00010107
+#define RNDIS_OID_GEN_CO_DRIVER_VERSION 0x00010108
+#define RNDIS_OID_GEN_CO_PROTOCOL_OPTIONS 0x00010109
+#define RNDIS_OID_GEN_CO_MAC_OPTIONS 0x0001010A
+#define RNDIS_OID_GEN_CO_MEDIA_CONNECT_STATUS 0x0001010B
+#define RNDIS_OID_GEN_CO_VENDOR_DRIVER_VERSION 0x0001010C
+#define RNDIS_OID_GEN_CO_MINIMUM_LINK_SPEED 0x0001010D
+
+#define RNDIS_OID_GEN_CO_GET_TIME_CAPS 0x00010201
+#define RNDIS_OID_GEN_CO_GET_NETCARD_TIME 0x00010202
+
+/*
+ * These are connection-oriented statistics OIDs.
+ */
+#define RNDIS_OID_GEN_CO_XMIT_PDUS_OK 0x00020101
+#define RNDIS_OID_GEN_CO_RCV_PDUS_OK 0x00020102
+#define RNDIS_OID_GEN_CO_XMIT_PDUS_ERROR 0x00020103
+#define RNDIS_OID_GEN_CO_RCV_PDUS_ERROR 0x00020104
+#define RNDIS_OID_GEN_CO_RCV_PDUS_NO_BUFFER 0x00020105
+
+
+#define RNDIS_OID_GEN_CO_RCV_CRC_ERROR 0x00020201
+#define RNDIS_OID_GEN_CO_TRANSMIT_QUEUE_LENGTH 0x00020202
+#define RNDIS_OID_GEN_CO_BYTES_XMIT 0x00020203
+#define RNDIS_OID_GEN_CO_BYTES_RCV 0x00020204
+#define RNDIS_OID_GEN_CO_BYTES_XMIT_OUTSTANDING 0x00020205
+#define RNDIS_OID_GEN_CO_NETCARD_LOAD 0x00020206
+
+/*
+ * These are objects for Connection-oriented media call-managers.
+ */
+#define RNDIS_OID_CO_ADD_PVC 0xFF000001
+#define RNDIS_OID_CO_DELETE_PVC 0xFF000002
+#define RNDIS_OID_CO_GET_CALL_INFORMATION 0xFF000003
+#define RNDIS_OID_CO_ADD_ADDRESS 0xFF000004
+#define RNDIS_OID_CO_DELETE_ADDRESS 0xFF000005
+#define RNDIS_OID_CO_GET_ADDRESSES 0xFF000006
+#define RNDIS_OID_CO_ADDRESS_CHANGE 0xFF000007
+#define RNDIS_OID_CO_SIGNALING_ENABLED 0xFF000008
+#define RNDIS_OID_CO_SIGNALING_DISABLED 0xFF000009
+
+
+/*
+ * 802.3 Objects (Ethernet)
+ */
+
+#define RNDIS_OID_802_3_PERMANENT_ADDRESS 0x01010101
+#define RNDIS_OID_802_3_CURRENT_ADDRESS 0x01010102
+#define RNDIS_OID_802_3_MULTICAST_LIST 0x01010103
+#define RNDIS_OID_802_3_MAXIMUM_LIST_SIZE 0x01010104
+#define RNDIS_OID_802_3_MAC_OPTIONS 0x01010105
+
+/*
+ *
+ */
+#define NDIS_802_3_MAC_OPTION_PRIORITY 0x00000001
+
+#define RNDIS_OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101
+#define RNDIS_OID_802_3_XMIT_ONE_COLLISION 0x01020102
+#define RNDIS_OID_802_3_XMIT_MORE_COLLISIONS 0x01020103
+
+#define RNDIS_OID_802_3_XMIT_DEFERRED 0x01020201
+#define RNDIS_OID_802_3_XMIT_MAX_COLLISIONS 0x01020202
+#define RNDIS_OID_802_3_RCV_OVERRUN 0x01020203
+#define RNDIS_OID_802_3_XMIT_UNDERRUN 0x01020204
+#define RNDIS_OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205
+#define RNDIS_OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206
+#define RNDIS_OID_802_3_XMIT_LATE_COLLISIONS 0x01020207
+
+
+/*
+ * RNDIS MP custom OID for test
+ */
+#define OID_RNDISMP_GET_RECEIVE_BUFFERS 0xFFA0C90D // Query only
+
+
+/*
+ * Remote NDIS message types
+ */
+#define REMOTE_NDIS_PACKET_MSG 0x00000001
+#define REMOTE_NDIS_INITIALIZE_MSG 0x00000002
+#define REMOTE_NDIS_HALT_MSG 0x00000003
+#define REMOTE_NDIS_QUERY_MSG 0x00000004
+#define REMOTE_NDIS_SET_MSG 0x00000005
+#define REMOTE_NDIS_RESET_MSG 0x00000006
+#define REMOTE_NDIS_INDICATE_STATUS_MSG 0x00000007
+#define REMOTE_NDIS_KEEPALIVE_MSG 0x00000008
+
+#define REMOTE_CONDIS_MP_CREATE_VC_MSG 0x00008001
+#define REMOTE_CONDIS_MP_DELETE_VC_MSG 0x00008002
+#define REMOTE_CONDIS_MP_ACTIVATE_VC_MSG 0x00008005
+#define REMOTE_CONDIS_MP_DEACTIVATE_VC_MSG 0x00008006
+#define REMOTE_CONDIS_INDICATE_STATUS_MSG 0x00008007
+
+/*
+ * Remote NDIS message completion types
+ */
+#define REMOTE_NDIS_INITIALIZE_CMPLT 0x80000002
+#define REMOTE_NDIS_QUERY_CMPLT 0x80000004
+#define REMOTE_NDIS_SET_CMPLT 0x80000005
+#define REMOTE_NDIS_RESET_CMPLT 0x80000006
+#define REMOTE_NDIS_KEEPALIVE_CMPLT 0x80000008
+
+#define REMOTE_CONDIS_MP_CREATE_VC_CMPLT 0x80008001
+#define REMOTE_CONDIS_MP_DELETE_VC_CMPLT 0x80008002
+#define REMOTE_CONDIS_MP_ACTIVATE_VC_CMPLT 0x80008005
+#define REMOTE_CONDIS_MP_DEACTIVATE_VC_CMPLT 0x80008006
+
+/*
+ * Reserved message type for private communication between lower-layer
+ * host driver and remote device, if necessary.
+ */
+#define REMOTE_NDIS_BUS_MSG 0xff000001
+
+/*
+ * Defines for DeviceFlags in rndis_initialize_complete
+ */
+#define RNDIS_DF_CONNECTIONLESS 0x00000001
+#define RNDIS_DF_CONNECTION_ORIENTED 0x00000002
+#define RNDIS_DF_RAW_DATA 0x00000004
+
+/*
+ * Remote NDIS medium types.
+ */
+#define RNDIS_MEDIUM_802_3 0x00000000
+#define RNDIS_MEDIUM_802_5 0x00000001
+#define RNDIS_MEDIUM_FDDI 0x00000002
+#define RNDIS_MEDIUM_WAN 0x00000003
+#define RNDIS_MEDIUM_LOCAL_TALK 0x00000004
+#define RNDIS_MEDIUM_ARCNET_RAW 0x00000006
+#define RNDIS_MEDIUM_ARCNET_878_2 0x00000007
+#define RNDIS_MEDIUM_ATM 0x00000008
+#define RNDIS_MEDIUM_WIRELESS_WAN 0x00000009
+#define RNDIS_MEDIUM_IRDA 0x0000000a
+#define RNDIS_MEDIUM_CO_WAN 0x0000000b
+/* Not a real medium, defined as an upper bound */
+#define RNDIS_MEDIUM_MAX 0x0000000d
+
+/*
+ * Remote NDIS medium connection states.
+ */
+#define RNDIS_MEDIA_STATE_CONNECTED 0x00000000
+#define RNDIS_MEDIA_STATE_DISCONNECTED 0x00000001
+
+/*
+ * Remote NDIS version numbers
+ */
+#define RNDIS_MAJOR_VERSION 0x00000001
+#define RNDIS_MINOR_VERSION 0x00000000
+
+/*
+ * NdisInitialize message
+ */
+typedef struct rndis_initialize_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ uint32_t major_version;
+ uint32_t minor_version;
+ uint32_t max_xfer_size;
+} rndis_initialize_request;
+
+/*
+ * Response to NdisInitialize
+ */
+typedef struct rndis_initialize_complete_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+ uint32_t major_version;
+ uint32_t minor_version;
+ uint32_t device_flags;
+ /* RNDIS medium */
+ uint32_t medium;
+ uint32_t max_pkts_per_msg;
+ uint32_t max_xfer_size;
+ uint32_t pkt_align_factor;
+ uint32_t af_list_offset;
+ uint32_t af_list_size;
+} rndis_initialize_complete;
+
+/*
+ * Call manager devices only: Information about an address family
+ * supported by the device is appended to the response to NdisInitialize.
+ */
+typedef struct rndis_co_address_family_ {
+ /* RNDIS AF */
+ uint32_t address_family;
+ uint32_t major_version;
+ uint32_t minor_version;
+} rndis_co_address_family;
+
+/*
+ * NdisHalt message
+ */
+typedef struct rndis_halt_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+} rndis_halt_request;
+
+/*
+ * NdisQueryRequest message
+ */
+typedef struct rndis_query_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS OID */
+ uint32_t oid;
+ uint32_t info_buffer_length;
+ uint32_t info_buffer_offset;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+} rndis_query_request;
+
+/*
+ * Response to NdisQueryRequest
+ */
+typedef struct rndis_query_complete_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+ uint32_t info_buffer_length;
+ uint32_t info_buffer_offset;
+} rndis_query_complete;
+
+/*
+ * NdisSetRequest message
+ */
+typedef struct rndis_set_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS OID */
+ uint32_t oid;
+ uint32_t info_buffer_length;
+ uint32_t info_buffer_offset;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+} rndis_set_request;
+
+/*
+ * Response to NdisSetRequest
+ */
+typedef struct rndis_set_complete_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+} rndis_set_complete;
+
+/*
+ * NdisReset message
+ */
+typedef struct rndis_reset_request_ {
+ uint32_t reserved;
+} rndis_reset_request;
+
+/*
+ * Response to NdisReset
+ */
+typedef struct rndis_reset_complete_ {
+ /* RNDIS status */
+ uint32_t status;
+ uint32_t addressing_reset;
+} rndis_reset_complete;
+
+/*
+ * NdisMIndicateStatus message
+ */
+typedef struct rndis_indicate_status_ {
+ /* RNDIS status */
+ uint32_t status;
+ uint32_t status_buf_length;
+ uint32_t status_buf_offset;
+} rndis_indicate_status;
+
+/*
+ * Diagnostic information passed as the status buffer in
+ * rndis_indicate_status messages signifying error conditions.
+ */
+typedef struct rndis_diagnostic_info_ {
+ /* RNDIS status */
+ uint32_t diag_status;
+ uint32_t error_offset;
+} rndis_diagnostic_info;
+
+/*
+ * NdisKeepAlive message
+ */
+typedef struct rndis_keepalive_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+} rndis_keepalive_request;
+
+/*
+ * Response to NdisKeepAlive
+ */
+typedef struct rndis_keepalive_complete_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+} rndis_keepalive_complete;
+
+/*
+ * Data message. All offset fields contain byte offsets from the beginning
+ * of the rndis_packet structure. All length fields are in bytes.
+ * VcHandle is set to 0 for connectionless data, otherwise it
+ * contains the VC handle.
+ */
+typedef struct rndis_packet_ {
+ uint32_t data_offset;
+ uint32_t data_length;
+ uint32_t oob_data_offset;
+ uint32_t oob_data_length;
+ uint32_t num_oob_data_elements;
+ uint32_t per_pkt_info_offset;
+ uint32_t per_pkt_info_length;
+ /* RNDIS handle */
+ uint32_t vc_handle;
+ uint32_t reserved;
+} rndis_packet;
+
+typedef struct rndis_packet_ex_ {
+ uint32_t data_offset;
+ uint32_t data_length;
+ uint32_t oob_data_offset;
+ uint32_t oob_data_length;
+ uint32_t num_oob_data_elements;
+ uint32_t per_pkt_info_offset;
+ uint32_t per_pkt_info_length;
+ /* RNDIS handle */
+ uint32_t vc_handle;
+ uint32_t reserved;
+ uint64_t data_buf_id;
+ uint32_t data_buf_offset;
+ uint64_t next_header_buf_id;
+ uint32_t next_header_byte_offset;
+ uint32_t next_header_byte_count;
+} rndis_packet_ex;
+
+/*
+ * Optional Out of Band data associated with a Data message.
+ */
+typedef struct rndis_oobd_ {
+ uint32_t size;
+ /* RNDIS class ID */
+ uint32_t type;
+ uint32_t class_info_offset;
+} rndis_oobd;
+
+/*
+ * Packet extension field contents associated with a Data message.
+ */
+typedef struct rndis_per_packet_info_ {
+ uint32_t size;
+ uint32_t type;
+ uint32_t per_packet_info_offset;
+} rndis_per_packet_info;
+
+typedef enum ndis_per_pkt_infotype_ {
+ tcpip_chksum_info,
+ ipsec_info,
+ tcp_large_send_info,
+ classification_handle_info,
+ ndis_reserved,
+ sgl_info,
+ ieee_8021q_info,
+ original_pkt_info,
+ pkt_cancel_id,
+ original_netbuf_list,
+ cached_netbuf_list,
+ short_pkt_padding_info,
+ max_perpkt_info
+} ndis_per_pkt_infotype;
+
+typedef struct ndis_8021q_info_ {
+ union {
+ struct {
+ uint32_t user_pri : 3; /* User Priority */
+ uint32_t cfi : 1; /* Canonical Format ID */
+ uint32_t vlan_id : 12;
+ uint32_t reserved : 16;
+ } s1;
+ uint32_t value;
+ } u1;
+} ndis_8021q_info;
+
+/*
+ * Format of Information buffer passed in a SetRequest for the OID
+ * OID_GEN_RNDIS_CONFIG_PARAMETER.
+ */
+typedef struct rndis_config_parameter_info_ {
+ uint32_t parameter_name_offset;
+ uint32_t parameter_name_length;
+ uint32_t parameter_type;
+ uint32_t parameter_value_offset;
+ uint32_t parameter_value_length;
+} rndis_config_parameter_info;
+
+/*
+ * Values for ParameterType in rndis_config_parameter_info
+ */
+#define RNDIS_CONFIG_PARAM_TYPE_INTEGER 0
+#define RNDIS_CONFIG_PARAM_TYPE_STRING 2
+
+
+/*
+ * CONDIS Miniport messages for connection oriented devices
+ * that do not implement a call manager.
+ */
+
+/*
+ * CoNdisMiniportCreateVc message
+ */
+typedef struct rcondis_mp_create_vc_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS handle */
+ uint32_t ndis_vc_handle;
+} rcondis_mp_create_vc;
+
+/*
+ * Response to CoNdisMiniportCreateVc
+ */
+typedef struct rcondis_mp_create_vc_complete_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+ /* RNDIS status */
+ uint32_t status;
+} rcondis_mp_create_vc_complete;
+
+/*
+ * CoNdisMiniportDeleteVc message
+ */
+typedef struct rcondis_mp_delete_vc_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+} rcondis_mp_delete_vc;
+
+/*
+ * Response to CoNdisMiniportDeleteVc
+ */
+typedef struct rcondis_mp_delete_vc_complete_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+} rcondis_mp_delete_vc_complete;
+
+/*
+ * CoNdisMiniportQueryRequest message
+ */
+typedef struct rcondis_mp_query_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS request type */
+ uint32_t request_type;
+ /* RNDIS OID */
+ uint32_t oid;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+ uint32_t info_buf_length;
+ uint32_t info_buf_offset;
+} rcondis_mp_query_request;
+
+/*
+ * CoNdisMiniportSetRequest message
+ */
+typedef struct rcondis_mp_set_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS request type */
+ uint32_t request_type;
+ /* RNDIS OID */
+ uint32_t oid;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+ uint32_t info_buf_length;
+ uint32_t info_buf_offset;
+} rcondis_mp_set_request;
+
+/*
+ * CoNdisIndicateStatus message
+ */
+typedef struct rcondis_indicate_status_ {
+ /* RNDIS handle */
+ uint32_t ndis_vc_handle;
+ /* RNDIS status */
+ uint32_t status;
+ uint32_t status_buf_length;
+ uint32_t status_buf_offset;
+} rcondis_indicate_status;
+
+/*
+ * CONDIS Call/VC parameters
+ */
+
+typedef struct rcondis_specific_parameters_ {
+ uint32_t parameter_type;
+ uint32_t parameter_length;
+ uint32_t parameter_offset;
+} rcondis_specific_parameters;
+
+typedef struct rcondis_media_parameters_ {
+ uint32_t flags;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ rcondis_specific_parameters media_specific;
+} rcondis_media_parameters;
+
+typedef struct rndis_flowspec_ {
+ uint32_t token_rate;
+ uint32_t token_bucket_size;
+ uint32_t peak_bandwidth;
+ uint32_t latency;
+ uint32_t delay_variation;
+ uint32_t service_type;
+ uint32_t max_sdu_size;
+ uint32_t minimum_policed_size;
+} rndis_flowspec;
+
+typedef struct rcondis_call_manager_parameters_ {
+ rndis_flowspec transmit;
+ rndis_flowspec receive;
+ rcondis_specific_parameters call_mgr_specific;
+} rcondis_call_manager_parameters;
+
+/*
+ * CoNdisMiniportActivateVc message
+ */
+typedef struct rcondis_mp_activate_vc_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ uint32_t flags;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+ uint32_t media_params_offset;
+ uint32_t media_params_length;
+ uint32_t call_mgr_params_offset;
+ uint32_t call_mgr_params_length;
+} rcondis_mp_activate_vc_request;
+
+/*
+ * Response to CoNdisMiniportActivateVc
+ */
+typedef struct rcondis_mp_activate_vc_complete_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+} rcondis_mp_activate_vc_complete;
+
+/*
+ * CoNdisMiniportDeactivateVc message
+ */
+typedef struct rcondis_mp_deactivate_vc_request_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ uint32_t flags;
+ /* RNDIS handle */
+ uint32_t device_vc_handle;
+} rcondis_mp_deactivate_vc_request;
+
+/*
+ * Response to CoNdisMiniportDeactivateVc
+ */
+typedef struct rcondis_mp_deactivate_vc_complete_ {
+ /* RNDIS request ID */
+ uint32_t request_id;
+ /* RNDIS status */
+ uint32_t status;
+} rcondis_mp_deactivate_vc_complete;
+
+/*
+ * union with all of the RNDIS messages
+ */
+typedef union rndis_msg_container_ {
+ rndis_packet packet;
+ rndis_initialize_request init_request;
+ rndis_halt_request halt_request;
+ rndis_query_request query_request;
+ rndis_set_request set_request;
+ rndis_reset_request reset_request;
+ rndis_keepalive_request keepalive_request;
+ rndis_indicate_status indicate_status;
+ rndis_initialize_complete init_complete;
+ rndis_query_complete query_complete;
+ rndis_set_complete set_complete;
+ rndis_reset_complete reset_complete;
+ rndis_keepalive_complete keepalive_complete;
+ rcondis_mp_create_vc co_miniport_create_vc;
+ rcondis_mp_delete_vc co_miniport_delete_vc;
+ rcondis_indicate_status co_miniport_status;
+ rcondis_mp_activate_vc_request co_miniport_activate_vc;
+ rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc;
+ rcondis_mp_create_vc_complete co_miniport_create_vc_complete;
+ rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete;
+ rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete;
+ rcondis_mp_deactivate_vc_complete co_miniport_deactivate_vc_complete;
+ rndis_packet_ex packet_ex;
+} rndis_msg_container;
+
+/*
+ * Remote NDIS message format
+ */
+typedef struct rndis_msg_ {
+ uint32_t ndis_msg_type;
+
+ /*
+ * Total length of this message, from the beginning
+ * of the rndis_msg struct, in bytes.
+ */
+ uint32_t msg_len;
+
+ /* Actual message */
+ rndis_msg_container msg;
+} rndis_msg;
+
+
+/*
+ * Handy macros
+ */
+
+/*
+ * get the size of an RNDIS message. Pass in the message type,
+ * rndis_set_request, rndis_packet for example
+ */
+#define RNDIS_MESSAGE_SIZE(message) \
+ (sizeof(message) + (sizeof(rndis_msg) - sizeof(rndis_msg_container)))
+
+/*
+ * get pointer to info buffer with message pointer
+ */
+#define MESSAGE_TO_INFO_BUFFER(message) \
+ (((PUCHAR)(message)) + message->InformationBufferOffset)
+
+/*
+ * get pointer to status buffer with message pointer
+ */
+#define MESSAGE_TO_STATUS_BUFFER(message) \
+ (((PUCHAR)(message)) + message->StatusBufferOffset)
+
+/*
+ * get pointer to OOBD buffer with message pointer
+ */
+#define MESSAGE_TO_OOBD_BUFFER(message) \
+ (((PUCHAR)(message)) + message->OOBDataOffset)
+
+/*
+ * get pointer to data buffer with message pointer
+ */
+#define MESSAGE_TO_DATA_BUFFER(message) \
+ (((PUCHAR)(message)) + message->PerPacketInfoOffset)
+
+/*
+ * get pointer to contained message from NDIS_MESSAGE pointer
+ */
+#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_message) \
+ ((void *) &rndis_message->Message)
+
+/*
+ * get pointer to contained message from NDIS_MESSAGE pointer
+ */
+#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_message) \
+ ((void *) rndis_message)
+
+
+
+/*
+ * Structures used in OID_RNDISMP_GET_RECEIVE_BUFFERS
+ */
+
+#define RNDISMP_RECEIVE_BUFFER_ELEM_FLAG_VMQ_RECEIVE_BUFFER 0x00000001
+
+typedef struct rndismp_rx_buf_elem_ {
+ uint32_t flags;
+ uint32_t length;
+ uint64_t rx_buf_id;
+ uint32_t gpadl_handle;
+ void *rx_buf;
+} rndismp_rx_buf_elem;
+
+typedef struct rndismp_rx_bufs_info_ {
+ uint32_t num_rx_bufs;
+ rndismp_rx_buf_elem rx_buf_elems[1];
+} rndismp_rx_bufs_info;
+
+
+
+#define RNDIS_HEADER_SIZE (sizeof(rndis_msg) - sizeof(rndis_msg_container))
+
+#define NDIS_PACKET_TYPE_DIRECTED 0x00000001
+#define NDIS_PACKET_TYPE_MULTICAST 0x00000002
+#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004
+#define NDIS_PACKET_TYPE_BROADCAST 0x00000008
+#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
+#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020
+#define NDIS_PACKET_TYPE_SMT 0x00000040
+#define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080
+#define NDIS_PACKET_TYPE_GROUP 0x00000100
+#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00000200
+#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400
+#define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800
+
+
+#endif /* __HV_RNDIS_H__ */
+
diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.c b/sys/dev/hyperv/netvsc/hv_rndis_filter.c
new file mode 100644
index 000000000000..691cf7ed51b3
--- /dev/null
+++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.c
@@ -0,0 +1,929 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <sys/types.h>
+#include <machine/atomic.h>
+#include <sys/sema.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_net_vsc.h"
+#include "hv_rndis.h"
+#include "hv_rndis_filter.h"
+
+
+/*
+ * Forward declarations
+ */
+static int hv_rf_send_request(rndis_device *device, rndis_request *request,
+ uint32_t message_type);
+static void hv_rf_receive_response(rndis_device *device, rndis_msg *response);
+static void hv_rf_receive_indicate_status(rndis_device *device,
+ rndis_msg *response);
+static void hv_rf_receive_data(rndis_device *device, rndis_msg *message,
+ netvsc_packet *pkt);
+static int hv_rf_query_device(rndis_device *device, uint32_t oid,
+ void *result, uint32_t *result_size);
+static inline int hv_rf_query_device_mac(rndis_device *device);
+static inline int hv_rf_query_device_link_status(rndis_device *device);
+static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter);
+static int hv_rf_init_device(rndis_device *device);
+static int hv_rf_open_device(rndis_device *device);
+static int hv_rf_close_device(rndis_device *device);
+static void hv_rf_on_send_completion(void *context);
+static void hv_rf_on_send_request_completion(void *context);
+static void hv_rf_on_send_request_halt_completion(void *context);
+
+
+/*
+ * Allow module_param to work and override to switch to promiscuous mode.
+ */
+static inline rndis_device *
+hv_get_rndis_device(void)
+{
+ rndis_device *device;
+
+ device = malloc(sizeof(rndis_device), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (device == NULL) {
+ return (NULL);
+ }
+
+ mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_SPIN | MTX_RECURSE);
+
+ /* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
+ STAILQ_INIT(&device->myrequest_list);
+
+ device->state = RNDIS_DEV_UNINITIALIZED;
+
+ return (device);
+}
+
+/*
+ *
+ */
+static inline void
+hv_put_rndis_device(rndis_device *device)
+{
+ mtx_destroy(&device->req_lock);
+ free(device, M_DEVBUF);
+}
+
+/*
+ *
+ */
+static inline rndis_request *
+hv_rndis_request(rndis_device *device, uint32_t message_type,
+ uint32_t message_length)
+{
+ rndis_request *request;
+ rndis_msg *rndis_mesg;
+ rndis_set_request *set;
+
+ request = malloc(sizeof(rndis_request), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (request == NULL) {
+ return (NULL);
+ }
+
+ sema_init(&request->wait_sema, 0, "rndis sema");
+
+ rndis_mesg = &request->request_msg;
+ rndis_mesg->ndis_msg_type = message_type;
+ rndis_mesg->msg_len = message_length;
+
+ /*
+ * Set the request id. This field is always after the rndis header
+ * for request/response packet types so we just use the set_request
+ * as a template.
+ */
+ set = &rndis_mesg->msg.set_request;
+ set->request_id = atomic_fetchadd_int(&device->new_request_id, 1);
+ /* Increment to get the new value (call above returns old value) */
+ set->request_id += 1;
+
+ /* Add to the request list */
+ mtx_lock_spin(&device->req_lock);
+ STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry);
+ mtx_unlock_spin(&device->req_lock);
+
+ return (request);
+}
+
+/*
+ *
+ */
+static inline void
+hv_put_rndis_request(rndis_device *device, rndis_request *request)
+{
+ mtx_lock_spin(&device->req_lock);
+ /* Fixme: Has O(n) performance */
+ /*
+ * XXXKYS: Use Doubly linked lists.
+ */
+ STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_,
+ mylist_entry);
+ mtx_unlock_spin(&device->req_lock);
+
+ sema_destroy(&request->wait_sema);
+ free(request, M_DEVBUF);
+}
+
+/*
+ *
+ */
+static int
+hv_rf_send_request(rndis_device *device, rndis_request *request,
+ uint32_t message_type)
+{
+ int ret;
+ netvsc_packet *packet;
+
+ /* Set up the packet to send it */
+ packet = &request->pkt;
+
+ packet->is_data_pkt = FALSE;
+ packet->tot_data_buf_len = request->request_msg.msg_len;
+ packet->page_buf_count = 1;
+
+ packet->page_buffers[0].pfn =
+ hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT;
+ packet->page_buffers[0].length = request->request_msg.msg_len;
+ packet->page_buffers[0].offset =
+ (unsigned long)&request->request_msg & (PAGE_SIZE - 1);
+
+ packet->compl.send.send_completion_context = request; /* packet */
+ if (message_type != REMOTE_NDIS_HALT_MSG) {
+ packet->compl.send.on_send_completion =
+ hv_rf_on_send_request_completion;
+ } else {
+ packet->compl.send.on_send_completion =
+ hv_rf_on_send_request_halt_completion;
+ }
+ packet->compl.send.send_completion_tid = (unsigned long)device;
+
+ ret = hv_nv_on_send(device->net_dev->dev, packet);
+
+ return (ret);
+}
+
+/*
+ * RNDIS filter receive response
+ */
+static void
+hv_rf_receive_response(rndis_device *device, rndis_msg *response)
+{
+ rndis_request *request = NULL;
+ rndis_request *next_request;
+ boolean_t found = FALSE;
+
+ mtx_lock_spin(&device->req_lock);
+ request = STAILQ_FIRST(&device->myrequest_list);
+ while (request != NULL) {
+ /*
+ * All request/response message contains request_id as the
+ * first field
+ */
+ if (request->request_msg.msg.init_request.request_id ==
+ response->msg.init_complete.request_id) {
+ found = TRUE;
+ break;
+ }
+ next_request = STAILQ_NEXT(request, mylist_entry);
+ request = next_request;
+ }
+ mtx_unlock_spin(&device->req_lock);
+
+ if (found) {
+ if (response->msg_len <= sizeof(rndis_msg)) {
+ memcpy(&request->response_msg, response,
+ response->msg_len);
+ } else {
+ if (response->ndis_msg_type == REMOTE_NDIS_RESET_CMPLT) {
+ /* Does not have a request id field */
+ request->response_msg.msg.reset_complete.status =
+ STATUS_BUFFER_OVERFLOW;
+ } else {
+ request->response_msg.msg.init_complete.status =
+ STATUS_BUFFER_OVERFLOW;
+ }
+ }
+
+ sema_post(&request->wait_sema);
+ }
+}
+
+/*
+ * RNDIS filter receive indicate status
+ */
+static void
+hv_rf_receive_indicate_status(rndis_device *device, rndis_msg *response)
+{
+ rndis_indicate_status *indicate = &response->msg.indicate_status;
+
+ if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT) {
+ netvsc_linkstatus_callback(device->net_dev->dev, 1);
+ } else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT) {
+ netvsc_linkstatus_callback(device->net_dev->dev, 0);
+ } else {
+ /* TODO: */
+ }
+}
+
+/*
+ * RNDIS filter receive data
+ */
+static void
+hv_rf_receive_data(rndis_device *device, rndis_msg *message, netvsc_packet *pkt)
+{
+ rndis_packet *rndis_pkt;
+ rndis_per_packet_info *rppi;
+ ndis_8021q_info *rppi_vlan_info;
+ uint32_t data_offset;
+
+ rndis_pkt = &message->msg.packet;
+
+ /*
+ * Fixme: Handle multiple rndis pkt msgs that may be enclosed in this
+ * netvsc packet (ie tot_data_buf_len != message_length)
+ */
+
+ /* Remove rndis header, then pass data packet up the stack */
+ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
+
+ /* L2 frame length, with L2 header, not including CRC */
+ pkt->tot_data_buf_len = rndis_pkt->data_length;
+ pkt->page_buffers[0].offset += data_offset;
+ /* Buffer length now L2 frame length plus trailing junk */
+ pkt->page_buffers[0].length -= data_offset;
+
+ pkt->is_data_pkt = TRUE;
+
+ pkt->vlan_tci = 0;
+
+ /*
+ * Read the VLAN ID if supplied by the Hyper-V infrastructure.
+ * Let higher-level driver code decide if it wants to use it.
+ * Ignore CFI, priority for now as FreeBSD does not support these.
+ */
+ if (rndis_pkt->per_pkt_info_offset != 0) {
+ /* rppi struct exists; compute its address */
+ rppi = (rndis_per_packet_info *)((uint8_t *)rndis_pkt +
+ rndis_pkt->per_pkt_info_offset);
+ /* if VLAN ppi struct, get the VLAN ID */
+ if (rppi->type == ieee_8021q_info) {
+ rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi
+ + rppi->per_packet_info_offset);
+ pkt->vlan_tci = rppi_vlan_info->u1.s1.vlan_id;
+ }
+ }
+
+ netvsc_recv(device->net_dev->dev, pkt);
+}
+
+/*
+ * RNDIS filter on receive
+ */
+int
+hv_rf_on_receive(struct hv_device *device, netvsc_packet *pkt)
+{
+ hn_softc_t *sc = device_get_softc(device->device);
+ netvsc_dev *net_dev = sc->net_dev;
+ rndis_device *rndis_dev;
+ rndis_msg rndis_mesg;
+ rndis_msg *rndis_hdr;
+
+ /* Make sure the rndis device state is initialized */
+ if (net_dev->extension == NULL)
+ return (ENODEV);
+
+ rndis_dev = (rndis_device *)net_dev->extension;
+ if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED)
+ return (EINVAL);
+
+ /* Shift virtual page number to form virtual page address */
+ rndis_hdr = (rndis_msg *)(pkt->page_buffers[0].pfn << PAGE_SHIFT);
+
+ rndis_hdr = (void *)((unsigned long)rndis_hdr
+ + pkt->page_buffers[0].offset);
+
+ /*
+ * Make sure we got a valid rndis message
+ * Fixme: There seems to be a bug in set completion msg where
+ * its msg_len is 16 bytes but the byte_count field in the
+ * xfer page range shows 52 bytes
+ */
+#if 0
+ if (pkt->tot_data_buf_len != rndis_hdr->msg_len) {
+ DPRINT_ERR(NETVSC, "invalid rndis message? (expected %u "
+ "bytes got %u)... dropping this message!",
+ rndis_hdr->msg_len, pkt->tot_data_buf_len);
+ DPRINT_EXIT(NETVSC);
+
+ return (-1);
+ }
+#endif
+
+ memcpy(&rndis_mesg, rndis_hdr,
+ (rndis_hdr->msg_len > sizeof(rndis_msg)) ?
+ sizeof(rndis_msg) : rndis_hdr->msg_len);
+
+ switch (rndis_mesg.ndis_msg_type) {
+
+ /* data message */
+ case REMOTE_NDIS_PACKET_MSG:
+ hv_rf_receive_data(rndis_dev, &rndis_mesg, pkt);
+ break;
+ /* completion messages */
+ case REMOTE_NDIS_INITIALIZE_CMPLT:
+ case REMOTE_NDIS_QUERY_CMPLT:
+ case REMOTE_NDIS_SET_CMPLT:
+ case REMOTE_NDIS_RESET_CMPLT:
+ case REMOTE_NDIS_KEEPALIVE_CMPLT:
+ hv_rf_receive_response(rndis_dev, &rndis_mesg);
+ break;
+ /* notification message */
+ case REMOTE_NDIS_INDICATE_STATUS_MSG:
+ hv_rf_receive_indicate_status(rndis_dev, &rndis_mesg);
+ break;
+ default:
+ printf("hv_rf_on_receive(): Unknown msg_type 0x%x\n",
+ rndis_mesg.ndis_msg_type);
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * RNDIS filter query device
+ */
+static int
+hv_rf_query_device(rndis_device *device, uint32_t oid, void *result,
+ uint32_t *result_size)
+{
+ rndis_request *request;
+ uint32_t in_result_size = *result_size;
+ rndis_query_request *query;
+ rndis_query_complete *query_complete;
+ int ret = 0;
+
+ *result_size = 0;
+ request = hv_rndis_request(device, REMOTE_NDIS_QUERY_MSG,
+ RNDIS_MESSAGE_SIZE(rndis_query_request));
+ if (request == NULL) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ /* Set up the rndis query */
+ query = &request->request_msg.msg.query_request;
+ query->oid = oid;
+ query->info_buffer_offset = sizeof(rndis_query_request);
+ query->info_buffer_length = 0;
+ query->device_vc_handle = 0;
+
+ ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
+ if (ret != 0) {
+ /* Fixme: printf added */
+ printf("RNDISFILTER request failed to Send!\n");
+ goto cleanup;
+ }
+
+ sema_wait(&request->wait_sema);
+
+ /* Copy the response back */
+ query_complete = &request->response_msg.msg.query_complete;
+
+ if (query_complete->info_buffer_length > in_result_size) {
+ ret = EINVAL;
+ goto cleanup;
+ }
+
+ memcpy(result, (void *)((unsigned long)query_complete +
+ query_complete->info_buffer_offset),
+ query_complete->info_buffer_length);
+
+ *result_size = query_complete->info_buffer_length;
+
+cleanup:
+ if (request != NULL)
+ hv_put_rndis_request(device, request);
+
+ return (ret);
+}
+
+/*
+ * RNDIS filter query device MAC address
+ */
+static inline int
+hv_rf_query_device_mac(rndis_device *device)
+{
+ uint32_t size = HW_MACADDR_LEN;
+
+ return (hv_rf_query_device(device,
+ RNDIS_OID_802_3_PERMANENT_ADDRESS, device->hw_mac_addr, &size));
+}
+
+/*
+ * RNDIS filter query device link status
+ */
+static inline int
+hv_rf_query_device_link_status(rndis_device *device)
+{
+ uint32_t size = sizeof(uint32_t);
+
+ return (hv_rf_query_device(device,
+ RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
+}
+
+/*
+ * RNDIS filter set packet filter
+ * Sends an rndis request with the new filter, then waits for a response
+ * from the host.
+ * Returns zero on success, non-zero on failure.
+ */
+static int
+hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter)
+{
+ rndis_request *request;
+ rndis_set_request *set;
+ rndis_set_complete *set_complete;
+ uint32_t status;
+ int ret;
+
+ request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
+ RNDIS_MESSAGE_SIZE(rndis_set_request) + sizeof(uint32_t));
+ if (request == NULL) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ /* Set up the rndis set */
+ set = &request->request_msg.msg.set_request;
+ set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
+ set->info_buffer_length = sizeof(uint32_t);
+ set->info_buffer_offset = sizeof(rndis_set_request);
+
+ memcpy((void *)((unsigned long)set + sizeof(rndis_set_request)),
+ &new_filter, sizeof(uint32_t));
+
+ ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ /*
+ * Wait for the response from the host. Another thread will signal
+ * us when the response has arrived. In the failure case,
+ * sema_timedwait() returns a non-zero status after waiting 5 seconds.
+ */
+ ret = sema_timedwait(&request->wait_sema, 500);
+ if (ret == 0) {
+ /* Response received, check status */
+ set_complete = &request->response_msg.msg.set_complete;
+ status = set_complete->status;
+ if (status != RNDIS_STATUS_SUCCESS) {
+ /* Bad response status, return error */
+ ret = -2;
+ }
+ } else {
+ /*
+ * We cannot deallocate the request since we may still
+ * receive a send completion for it.
+ */
+ goto exit;
+ }
+
+cleanup:
+ if (request != NULL) {
+ hv_put_rndis_request(device, request);
+ }
+exit:
+ return (ret);
+}
+
+/*
+ * RNDIS filter init device
+ */
+static int
+hv_rf_init_device(rndis_device *device)
+{
+ rndis_request *request;
+ rndis_initialize_request *init;
+ rndis_initialize_complete *init_complete;
+ uint32_t status;
+ int ret;
+
+ request = hv_rndis_request(device, REMOTE_NDIS_INITIALIZE_MSG,
+ RNDIS_MESSAGE_SIZE(rndis_initialize_request));
+ if (!request) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ /* Set up the rndis set */
+ init = &request->request_msg.msg.init_request;
+ init->major_version = RNDIS_MAJOR_VERSION;
+ init->minor_version = RNDIS_MINOR_VERSION;
+ /*
+ * Per the RNDIS document, this should be set to the max MTU
+ * plus the header size. However, 2048 works fine, so leaving
+ * it as is.
+ */
+ init->max_xfer_size = 2048;
+
+ device->state = RNDIS_DEV_INITIALIZING;
+
+ ret = hv_rf_send_request(device, request, REMOTE_NDIS_INITIALIZE_MSG);
+ if (ret != 0) {
+ device->state = RNDIS_DEV_UNINITIALIZED;
+ goto cleanup;
+ }
+
+ sema_wait(&request->wait_sema);
+
+ init_complete = &request->response_msg.msg.init_complete;
+ status = init_complete->status;
+ if (status == RNDIS_STATUS_SUCCESS) {
+ device->state = RNDIS_DEV_INITIALIZED;
+ ret = 0;
+ } else {
+ device->state = RNDIS_DEV_UNINITIALIZED;
+ ret = -1;
+ }
+
+cleanup:
+ if (request) {
+ hv_put_rndis_request(device, request);
+ }
+
+ return (ret);
+}
+
+#define HALT_COMPLETION_WAIT_COUNT 25
+
+/*
+ * RNDIS filter halt device
+ */
+static int
+hv_rf_halt_device(rndis_device *device)
+{
+ rndis_request *request;
+ rndis_halt_request *halt;
+ int i, ret;
+
+ /* Attempt to do a rndis device halt */
+ request = hv_rndis_request(device, REMOTE_NDIS_HALT_MSG,
+ RNDIS_MESSAGE_SIZE(rndis_halt_request));
+ if (request == NULL) {
+ return (-1);
+ }
+
+ /* initialize "poor man's semaphore" */
+ request->halt_complete_flag = 0;
+
+ /* Set up the rndis set */
+ halt = &request->request_msg.msg.halt_request;
+ halt->request_id = atomic_fetchadd_int(&device->new_request_id, 1);
+ /* Increment to get the new value (call above returns old value) */
+ halt->request_id += 1;
+
+ ret = hv_rf_send_request(device, request, REMOTE_NDIS_HALT_MSG);
+ if (ret != 0) {
+ return (-1);
+ }
+
+ /*
+ * Wait for halt response from halt callback. We must wait for
+ * the transaction response before freeing the request and other
+ * resources.
+ */
+ for (i=HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
+ if (request->halt_complete_flag != 0) {
+ break;
+ }
+ DELAY(400);
+ }
+ if (i == 0) {
+ return (-1);
+ }
+
+ device->state = RNDIS_DEV_UNINITIALIZED;
+
+ if (request != NULL) {
+ hv_put_rndis_request(device, request);
+ }
+
+ return (0);
+}
+
+/*
+ * RNDIS filter open device
+ */
+static int
+hv_rf_open_device(rndis_device *device)
+{
+ int ret;
+
+ if (device->state != RNDIS_DEV_INITIALIZED) {
+ return (0);
+ }
+
+ if (hv_promisc_mode != 1) {
+ ret = hv_rf_set_packet_filter(device,
+ NDIS_PACKET_TYPE_BROADCAST |
+ NDIS_PACKET_TYPE_ALL_MULTICAST |
+ NDIS_PACKET_TYPE_DIRECTED);
+ } else {
+ ret = hv_rf_set_packet_filter(device,
+ NDIS_PACKET_TYPE_PROMISCUOUS);
+ }
+
+ if (ret == 0) {
+ device->state = RNDIS_DEV_DATAINITIALIZED;
+ }
+
+ return (ret);
+}
+
+/*
+ * RNDIS filter close device
+ */
+static int
+hv_rf_close_device(rndis_device *device)
+{
+ int ret;
+
+ if (device->state != RNDIS_DEV_DATAINITIALIZED) {
+ return (0);
+ }
+
+ ret = hv_rf_set_packet_filter(device, 0);
+ if (ret == 0) {
+ device->state = RNDIS_DEV_INITIALIZED;
+ }
+
+ return (ret);
+}
+
+/*
+ * RNDIS filter on device add
+ */
+int
+hv_rf_on_device_add(struct hv_device *device, void *additl_info)
+{
+ int ret;
+ netvsc_dev *net_dev;
+ rndis_device *rndis_dev;
+ netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
+
+ rndis_dev = hv_get_rndis_device();
+ if (rndis_dev == NULL) {
+ return (ENOMEM);
+ }
+
+ /*
+ * Let the inner driver handle this first to create the netvsc channel
+ * NOTE! Once the channel is created, we may get a receive callback
+ * (hv_rf_on_receive()) before this call is completed.
+ * Note: Earlier code used a function pointer here.
+ */
+ net_dev = hv_nv_on_device_add(device, additl_info);
+ if (!net_dev) {
+ hv_put_rndis_device(rndis_dev);
+
+ return (ENOMEM);
+ }
+
+ /*
+ * Initialize the rndis device
+ */
+
+ net_dev->extension = rndis_dev;
+ rndis_dev->net_dev = net_dev;
+
+ /* Send the rndis initialization message */
+ ret = hv_rf_init_device(rndis_dev);
+ if (ret != 0) {
+ /*
+ * TODO: If rndis init failed, we will need to shut down
+ * the channel
+ */
+ }
+
+ /* Get the mac address */
+ ret = hv_rf_query_device_mac(rndis_dev);
+ if (ret != 0) {
+ /* TODO: shut down rndis device and the channel */
+ }
+
+ memcpy(dev_info->mac_addr, rndis_dev->hw_mac_addr, HW_MACADDR_LEN);
+
+ hv_rf_query_device_link_status(rndis_dev);
+
+ dev_info->link_state = rndis_dev->link_status;
+
+ return (ret);
+}
+
+/*
+ * RNDIS filter on device remove
+ */
+int
+hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
+{
+ hn_softc_t *sc = device_get_softc(device->device);
+ netvsc_dev *net_dev = sc->net_dev;
+ rndis_device *rndis_dev = (rndis_device *)net_dev->extension;
+ int ret;
+
+ /* Halt and release the rndis device */
+ ret = hv_rf_halt_device(rndis_dev);
+
+ hv_put_rndis_device(rndis_dev);
+ net_dev->extension = NULL;
+
+ /* Pass control to inner driver to remove the device */
+ ret |= hv_nv_on_device_remove(device, destroy_channel);
+
+ return (ret);
+}
+
+/*
+ * RNDIS filter on open
+ */
+int
+hv_rf_on_open(struct hv_device *device)
+{
+ hn_softc_t *sc = device_get_softc(device->device);
+ netvsc_dev *net_dev = sc->net_dev;
+
+ return (hv_rf_open_device((rndis_device *)net_dev->extension));
+}
+
+/*
+ * RNDIS filter on close
+ */
+int
+hv_rf_on_close(struct hv_device *device)
+{
+ hn_softc_t *sc = device_get_softc(device->device);
+ netvsc_dev *net_dev = sc->net_dev;
+
+ return (hv_rf_close_device((rndis_device *)net_dev->extension));
+}
+
+/*
+ * RNDIS filter on send
+ */
+int
+hv_rf_on_send(struct hv_device *device, netvsc_packet *pkt)
+{
+ rndis_filter_packet *filter_pkt;
+ rndis_msg *rndis_mesg;
+ rndis_packet *rndis_pkt;
+ rndis_per_packet_info *rppi;
+ ndis_8021q_info *rppi_vlan_info;
+ uint32_t rndis_msg_size;
+ int ret = 0;
+
+ /* Add the rndis header */
+ filter_pkt = (rndis_filter_packet *)pkt->extension;
+
+ memset(filter_pkt, 0, sizeof(rndis_filter_packet));
+
+ rndis_mesg = &filter_pkt->message;
+ rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
+
+ if (pkt->vlan_tci != 0) {
+ rndis_msg_size += sizeof(rndis_per_packet_info) +
+ sizeof(ndis_8021q_info);
+ }
+
+ rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
+ rndis_mesg->msg_len = pkt->tot_data_buf_len + rndis_msg_size;
+
+ rndis_pkt = &rndis_mesg->msg.packet;
+ rndis_pkt->data_offset = sizeof(rndis_packet);
+ rndis_pkt->data_length = pkt->tot_data_buf_len;
+
+ pkt->is_data_pkt = TRUE;
+ pkt->page_buffers[0].pfn = hv_get_phys_addr(rndis_mesg) >> PAGE_SHIFT;
+ pkt->page_buffers[0].offset =
+ (unsigned long)rndis_mesg & (PAGE_SIZE - 1);
+ pkt->page_buffers[0].length = rndis_msg_size;
+
+ /* Save the packet context */
+ filter_pkt->completion_context =
+ pkt->compl.send.send_completion_context;
+
+ /* Use ours */
+ pkt->compl.send.on_send_completion = hv_rf_on_send_completion;
+ pkt->compl.send.send_completion_context = filter_pkt;
+
+ /*
+ * If there is a VLAN tag, we need to set up some additional
+ * fields so the Hyper-V infrastructure will stuff the VLAN tag
+ * into the frame.
+ */
+ if (pkt->vlan_tci != 0) {
+ /* Move data offset past end of rppi + VLAN structs */
+ rndis_pkt->data_offset += sizeof(rndis_per_packet_info) +
+ sizeof(ndis_8021q_info);
+
+ /* must be set when we have rppi, VLAN info */
+ rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
+ rndis_pkt->per_pkt_info_length = sizeof(rndis_per_packet_info) +
+ sizeof(ndis_8021q_info);
+
+ /* rppi immediately follows rndis_pkt */
+ rppi = (rndis_per_packet_info *)(rndis_pkt + 1);
+ rppi->size = sizeof(rndis_per_packet_info) +
+ sizeof(ndis_8021q_info);
+ rppi->type = ieee_8021q_info;
+ rppi->per_packet_info_offset = sizeof(rndis_per_packet_info);
+
+ /* VLAN info immediately follows rppi struct */
+ rppi_vlan_info = (ndis_8021q_info *)(rppi + 1);
+ /* FreeBSD does not support CFI or priority */
+ rppi_vlan_info->u1.s1.vlan_id = pkt->vlan_tci & 0xfff;
+ }
+
+ /*
+ * Invoke netvsc send. If return status is bad, the caller now
+ * resets the context pointers before retrying.
+ */
+ ret = hv_nv_on_send(device, pkt);
+
+ return (ret);
+}
+
+/*
+ * RNDIS filter on send completion callback
+ */
+static void
+hv_rf_on_send_completion(void *context)
+{
+ rndis_filter_packet *filter_pkt = (rndis_filter_packet *)context;
+
+ /* Pass it back to the original handler */
+ netvsc_xmit_completion(filter_pkt->completion_context);
+}
+
+/*
+ * RNDIS filter on send request completion callback
+ */
+static void
+hv_rf_on_send_request_completion(void *context)
+{
+}
+
+/*
+ * RNDIS filter on send request (halt only) completion callback
+ */
+static void
+hv_rf_on_send_request_halt_completion(void *context)
+{
+ rndis_request *request = context;
+
+ /*
+ * Notify hv_rf_halt_device() about halt completion.
+ * The halt code must wait for completion before freeing
+ * the transaction resources.
+ */
+ request->halt_complete_flag = 1;
+}
+
diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.h b/sys/dev/hyperv/netvsc/hv_rndis_filter.h
new file mode 100644
index 000000000000..edbb3476b32f
--- /dev/null
+++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HV_RNDIS_FILTER_H__
+#define __HV_RNDIS_FILTER_H__
+
+
+/*
+ * Defines
+ */
+
+/* Destroy or preserve channel on filter/netvsc teardown */
+#define HV_RF_NV_DESTROY_CHANNEL TRUE
+#define HV_RF_NV_RETAIN_CHANNEL FALSE
+
+/*
+ * Number of page buffers to reserve for the RNDIS filter packet in the
+ * transmitted message.
+ */
+#define HV_RF_NUM_TX_RESERVED_PAGE_BUFS 1
+
+
+/*
+ * Data types
+ */
+
+typedef enum {
+ RNDIS_DEV_UNINITIALIZED = 0,
+ RNDIS_DEV_INITIALIZING,
+ RNDIS_DEV_INITIALIZED,
+ RNDIS_DEV_DATAINITIALIZED,
+} rndis_device_state;
+
+typedef struct rndis_request_ {
+ STAILQ_ENTRY(rndis_request_) mylist_entry;
+ struct sema wait_sema;
+
+ /*
+ * Fixme: We assumed a fixed size response here. If we do ever
+ * need to handle a bigger response, we can either define a max
+ * response message or add a response buffer variable above this field
+ */
+ rndis_msg response_msg;
+
+ /* Simplify allocation by having a netvsc packet inline */
+ netvsc_packet pkt;
+ hv_vmbus_page_buffer buffer;
+ /* Fixme: We assumed a fixed size request here. */
+ rndis_msg request_msg;
+ /* Fixme: Poor man's semaphore. */
+ uint32_t halt_complete_flag;
+} rndis_request;
+
+typedef struct rndis_device_ {
+ netvsc_dev *net_dev;
+
+ rndis_device_state state;
+ uint32_t link_status;
+ uint32_t new_request_id;
+
+ struct mtx req_lock;
+
+ STAILQ_HEAD(RQ, rndis_request_) myrequest_list;
+
+ uint8_t hw_mac_addr[HW_MACADDR_LEN];
+} rndis_device;
+
+typedef struct rndis_filter_packet_ {
+ void *completion_context;
+ /* No longer used */
+ pfn_on_send_rx_completion on_completion;
+
+ rndis_msg message;
+} rndis_filter_packet;
+
+
+/*
+ * Externs
+ */
+
+extern int hv_rf_on_receive(struct hv_device *device, netvsc_packet *pkt);
+extern int hv_rf_on_device_add(struct hv_device *device, void *additl_info);
+extern int hv_rf_on_device_remove(struct hv_device *device,
+ boolean_t destroy_channel);
+extern int hv_rf_on_open(struct hv_device *device);
+extern int hv_rf_on_close(struct hv_device *device);
+extern int hv_rf_on_send(struct hv_device *device, netvsc_packet *pkt);
+
+
+#endif /* __HV_RNDIS_FILTER_H__ */
+
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
new file mode 100644
index 000000000000..657cedfd7b67
--- /dev/null
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -0,0 +1,1470 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * StorVSC driver for Hyper-V. This driver presents a SCSI HBA interface
+ * to the Comman Access Method (CAM) layer. CAM control blocks (CCBs) are
+ * converted into VSCSI protocol messages which are delivered to the parent
+ * partition StorVSP driver over the Hyper-V VMBUS.
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/condvar.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+#include <sys/taskqueue.h>
+#include <sys/bus.h>
+#include <sys/mutex.h>
+#include <sys/callout.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <sys/lock.h>
+#include <sys/sema.h>
+
+#include <cam/cam.h>
+#include <cam/cam_ccb.h>
+#include <cam/cam_periph.h>
+#include <cam/cam_sim.h>
+#include <cam/cam_xpt_sim.h>
+#include <cam/cam_xpt_internal.h>
+#include <cam/cam_debug.h>
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_message.h>
+
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_vstorage.h"
+
+#define STORVSC_RINGBUFFER_SIZE (20*PAGE_SIZE)
+#define STORVSC_MAX_LUNS_PER_TARGET (64)
+#define STORVSC_MAX_IO_REQUESTS (STORVSC_MAX_LUNS_PER_TARGET * 2)
+#define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1)
+#define BLKVSC_MAX_IO_REQUESTS STORVSC_MAX_IO_REQUESTS
+#define STORVSC_MAX_TARGETS (1)
+
+struct storvsc_softc;
+
+enum storvsc_request_type {
+ WRITE_TYPE,
+ READ_TYPE,
+ UNKNOWN_TYPE
+};
+
+struct hv_storvsc_request {
+ LIST_ENTRY(hv_storvsc_request) link;
+ struct vstor_packet vstor_packet;
+ hv_vmbus_multipage_buffer data_buf;
+ void *sense_data;
+ uint8_t sense_info_len;
+ uint8_t retries;
+ union ccb *ccb;
+ struct storvsc_softc *softc;
+ struct callout callout;
+ struct sema synch_sema; /*Synchronize the request/response if needed */
+};
+
+struct storvsc_softc {
+ struct hv_device *hs_dev;
+ LIST_HEAD(, hv_storvsc_request) hs_free_list;
+ struct mtx hs_lock;
+ struct storvsc_driver_props *hs_drv_props;
+ int hs_unit;
+ uint32_t hs_frozen;
+ struct cam_sim *hs_sim;
+ struct cam_path *hs_path;
+ uint32_t hs_num_out_reqs;
+ boolean_t hs_destroy;
+ boolean_t hs_drain_notify;
+ struct sema hs_drain_sema;
+ struct hv_storvsc_request hs_init_req;
+ struct hv_storvsc_request hs_reset_req;
+};
+
+
+/**
+ * HyperV storvsc timeout testing cases:
+ * a. IO returned after first timeout;
+ * b. IO returned after second timeout and queue freeze;
+ * c. IO returned while timer handler is running
+ * The first can be tested by "sg_senddiag -vv /dev/daX",
+ * and the second and third can be done by
+ * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
+ */
+#define HVS_TIMEOUT_TEST 0
+
+/*
+ * Bus/adapter reset functionality on the Hyper-V host is
+ * buggy and it will be disabled until
+ * it can be further tested.
+ */
+#define HVS_HOST_RESET 0
+
+struct storvsc_driver_props {
+ char *drv_name;
+ char *drv_desc;
+ uint8_t drv_max_luns_per_target;
+ uint8_t drv_max_ios_per_target;
+ uint32_t drv_ringbuffer_size;
+};
+
+enum hv_storage_type {
+ DRIVER_BLKVSC,
+ DRIVER_STORVSC,
+ DRIVER_UNKNOWN
+};
+
+#define HS_MAX_ADAPTERS 10
+
+/* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
+static const hv_guid gStorVscDeviceType={
+ .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
+ 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
+};
+
+/* {32412632-86cb-44a2-9b5c-50d1417354f5} */
+static const hv_guid gBlkVscDeviceType={
+ .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
+ 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
+};
+
+static struct storvsc_driver_props g_drv_props_table[] = {
+ {"blkvsc", "Hyper-V IDE Storage Interface",
+ BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
+ STORVSC_RINGBUFFER_SIZE},
+ {"storvsc", "Hyper-V SCSI Storage Interface",
+ STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
+ STORVSC_RINGBUFFER_SIZE}
+};
+
+static struct storvsc_softc *hs_softc[HS_MAX_ADAPTERS];
+
+/* static functions */
+static int storvsc_probe(device_t dev);
+static int storvsc_attach(device_t dev);
+static int storvsc_detach(device_t dev);
+static void storvsc_poll(struct cam_sim * sim);
+static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
+static void scan_for_luns(struct storvsc_softc * sc);
+static void create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
+static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
+static enum hv_storage_type storvsc_get_storage_type(device_t dev);
+static void hv_storvsc_on_channel_callback(void *context);
+static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
+ struct vstor_packet *vstor_packet,
+ struct hv_storvsc_request *request);
+static int hv_storvsc_connect_vsp(struct hv_device *device);
+static void storvsc_io_done(struct hv_storvsc_request *reqp);
+
+static device_method_t storvsc_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, storvsc_probe),
+ DEVMETHOD(device_attach, storvsc_attach),
+ DEVMETHOD(device_detach, storvsc_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ { 0, 0 }
+};
+
+static driver_t storvsc_driver = {
+ "storvsc", storvsc_methods, sizeof(struct storvsc_softc),
+};
+
+static devclass_t storvsc_devclass;
+DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
+MODULE_VERSION(storvsc,1);
+MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
+
+extern int ata_disk_enable;
+
+/**
+ * The host is capable of sending messages to us that are
+ * completely unsolicited. So, we need to address the race
+ * condition where we may be in the process of unloading the
+ * driver when the host may send us an unsolicited message.
+ * We address this issue by implementing a sequentially
+ * consistent protocol:
+ *
+ * 1. Channel callback is invoked while holding the the channel lock
+ * and an unloading driver will reset the channel callback under
+ * the protection of this channel lock.
+ *
+ * 2. To ensure bounded wait time for unloading a driver, we don't
+ * permit outgoing traffic once the device is marked as being
+ * destroyed.
+ *
+ * 3. Once the device is marked as being destroyed, we only
+ * permit incoming traffic to properly account for
+ * packets already sent out.
+ */
+static inline struct storvsc_softc *
+get_stor_device(struct hv_device *device,
+ boolean_t outbound)
+{
+ struct storvsc_softc *sc;
+
+ sc = device_get_softc(device->device);
+ if (sc == NULL) {
+ return NULL;
+ }
+
+ if (outbound) {
+ /*
+ * Here we permit outgoing I/O only
+ * if the device is not being destroyed.
+ */
+
+ if (sc->hs_destroy) {
+ sc = NULL;
+ }
+ } else {
+ /*
+ * inbound case; if being destroyed
+ * only permit to account for
+ * messages already sent out.
+ */
+ if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) {
+ sc = NULL;
+ }
+ }
+ return sc;
+}
+
+/**
+ * @brief initialize channel connection to parent partition
+ *
+ * @param dev a Hyper-V device pointer
+ * @returns 0 on success, non-zero error on failure
+ */
+static int
+hv_storvsc_channel_init(struct hv_device *dev)
+{
+ int ret = 0;
+ struct hv_storvsc_request *request;
+ struct vstor_packet *vstor_packet;
+ struct storvsc_softc *sc;
+
+ sc = get_stor_device(dev, TRUE);
+ if (sc == NULL) {
+ return ENODEV;
+ }
+
+ request = &sc->hs_init_req;
+ memset(request, 0, sizeof(struct hv_storvsc_request));
+ vstor_packet = &request->vstor_packet;
+ request->softc = sc;
+
+ /**
+ * Initiate the vsc/vsp initialization protocol on the open channel
+ */
+ sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
+
+ vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
+ vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+
+ ret = hv_vmbus_channel_send_packet(
+ dev->channel,
+ vstor_packet,
+ sizeof(struct vstor_packet),
+ (uint64_t)request,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+ vstor_packet->status != 0) {
+ goto cleanup;
+ }
+
+ /* reuse the packet for version range supported */
+
+ memset(vstor_packet, 0, sizeof(struct vstor_packet));
+ vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
+ vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+ vstor_packet->version.major_minor = VMSTOR_PROTOCOL_VERSION_CURRENT;
+
+ /* revision is only significant for Windows guests */
+ vstor_packet->version.revision = 0;
+
+ ret = hv_vmbus_channel_send_packet(
+ dev->channel,
+ vstor_packet,
+ sizeof(struct vstor_packet),
+ (uint64_t)request,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+
+ if (ret) {
+ goto cleanup;
+ }
+
+ /* TODO: Check returned version */
+ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+ vstor_packet->status != 0) {
+ goto cleanup;
+ }
+
+ /**
+ * Query channel properties
+ */
+ memset(vstor_packet, 0, sizeof(struct vstor_packet));
+ vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
+ vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+ ret = hv_vmbus_channel_send_packet(
+ dev->channel,
+ vstor_packet,
+ sizeof(struct vstor_packet),
+ (uint64_t)request,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+ if ( ret != 0) {
+ goto cleanup;
+ }
+
+ ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ /* TODO: Check returned version */
+ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+ vstor_packet->status != 0) {
+ goto cleanup;
+ }
+
+ memset(vstor_packet, 0, sizeof(struct vstor_packet));
+ vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
+ vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+ ret = hv_vmbus_channel_send_packet(
+ dev->channel,
+ vstor_packet,
+ sizeof(struct vstor_packet),
+ (uint64_t)request,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+ vstor_packet->status != 0) {
+ goto cleanup;
+ }
+
+cleanup:
+ sema_destroy(&request->synch_sema);
+ return (ret);
+}
+
+/**
+ * @brief Open channel connection to paraent partition StorVSP driver
+ *
+ * Open and initialize channel connection to parent partition StorVSP driver.
+ *
+ * @param pointer to a Hyper-V device
+ * @returns 0 on success, non-zero error on failure
+ */
+static int
+hv_storvsc_connect_vsp(struct hv_device *dev)
+{
+ int ret = 0;
+ struct vmstor_chan_props props;
+ struct storvsc_softc *sc;
+
+ sc = device_get_softc(dev->device);
+
+ memset(&props, 0, sizeof(struct vmstor_chan_props));
+
+ /*
+ * Open the channel
+ */
+
+ ret = hv_vmbus_channel_open(
+ dev->channel,
+ sc->hs_drv_props->drv_ringbuffer_size,
+ sc->hs_drv_props->drv_ringbuffer_size,
+ (void *)&props,
+ sizeof(struct vmstor_chan_props),
+ hv_storvsc_on_channel_callback,
+ dev);
+
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = hv_storvsc_channel_init(dev);
+
+ return (ret);
+}
+
+#if HVS_HOST_RESET
+static int
+hv_storvsc_host_reset(struct hv_device *dev)
+{
+ int ret = 0;
+ struct storvsc_softc *sc;
+
+ struct hv_storvsc_request *request;
+ struct vstor_packet *vstor_packet;
+
+ sc = get_stor_device(dev, TRUE);
+ if (sc == NULL) {
+ return ENODEV;
+ }
+
+ request = &sc->hs_reset_req;
+ request->softc = sc;
+ vstor_packet = &request->vstor_packet;
+
+ sema_init(&request->synch_sema, 0, "stor synch sema");
+
+ vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
+ vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+
+ ret = hv_vmbus_channel_send_packet(dev->channel,
+ vstor_packet,
+ sizeof(struct vstor_packet),
+ (uint64_t)&sc->hs_reset_req,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+
+ if (ret) {
+ goto cleanup;
+ }
+
+
+ /*
+ * At this point, all outstanding requests in the adapter
+ * should have been flushed out and return to us
+ */
+
+cleanup:
+ sema_destroy(&request->synch_sema);
+ return (ret);
+}
+#endif /* HVS_HOST_RESET */
+
+/**
+ * @brief Function to initiate an I/O request
+ *
+ * @param device Hyper-V device pointer
+ * @param request pointer to a request structure
+ * @returns 0 on success, non-zero error on failure
+ */
+static int
+hv_storvsc_io_request(struct hv_device *device,
+ struct hv_storvsc_request *request)
+{
+ struct storvsc_softc *sc;
+ struct vstor_packet *vstor_packet = &request->vstor_packet;
+ int ret = 0;
+
+ sc = get_stor_device(device, TRUE);
+
+ if (sc == NULL) {
+ return ENODEV;
+ }
+
+ vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
+
+ vstor_packet->vm_srb.length = sizeof(struct vmscsi_req);
+
+ vstor_packet->vm_srb.sense_info_len = SENSE_BUFFER_SIZE;
+
+ vstor_packet->vm_srb.transfer_len = request->data_buf.length;
+
+ vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
+
+
+ mtx_unlock(&request->softc->hs_lock);
+ if (request->data_buf.length) {
+ ret = hv_vmbus_channel_send_packet_multipagebuffer(
+ device->channel,
+ &request->data_buf,
+ vstor_packet,
+ sizeof(struct vstor_packet),
+ (uint64_t)request);
+
+ } else {
+ ret = hv_vmbus_channel_send_packet(
+ device->channel,
+ vstor_packet,
+ sizeof(struct vstor_packet),
+ (uint64_t)request,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ }
+ mtx_lock(&request->softc->hs_lock);
+
+ if (ret != 0) {
+ printf("Unable to send packet %p ret %d", vstor_packet, ret);
+ } else {
+ atomic_add_int(&sc->hs_num_out_reqs, 1);
+ }
+
+ return (ret);
+}
+
+
+/**
+ * Process IO_COMPLETION_OPERATION and ready
+ * the result to be completed for upper layer
+ * processing by the CAM layer.
+ */
+static void
+hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
+ struct vstor_packet *vstor_packet,
+ struct hv_storvsc_request *request)
+{
+ struct vmscsi_req *vm_srb;
+
+ vm_srb = &vstor_packet->vm_srb;
+
+ request->sense_info_len = 0;
+ if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
+ (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
+ /* Autosense data available */
+
+ KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
+ ("vm_srb->sense_info_len <= "
+ "request->sense_info_len"));
+
+ memcpy(request->sense_data, vm_srb->sense_data,
+ vm_srb->sense_info_len);
+
+ request->sense_info_len = vm_srb->sense_info_len;
+ }
+
+ /* Complete request by passing to the CAM layer */
+ storvsc_io_done(request);
+ atomic_subtract_int(&sc->hs_num_out_reqs, 1);
+ if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
+ sema_post(&sc->hs_drain_sema);
+ }
+}
+
+static void
+hv_storvsc_on_channel_callback(void *context)
+{
+ int ret = 0;
+ struct hv_device *device = (struct hv_device *)context;
+ struct storvsc_softc *sc;
+ uint32_t bytes_recvd;
+ uint64_t request_id;
+ uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
+ struct hv_storvsc_request *request;
+ struct vstor_packet *vstor_packet;
+
+ sc = get_stor_device(device, FALSE);
+ if (sc == NULL) {
+ return;
+ }
+
+ KASSERT(device, ("device"));
+
+ ret = hv_vmbus_channel_recv_packet(
+ device->channel,
+ packet,
+ roundup2(sizeof(struct vstor_packet), 8),
+ &bytes_recvd,
+ &request_id);
+
+ while ((ret == 0) && (bytes_recvd > 0)) {
+ request = (struct hv_storvsc_request *)request_id;
+ KASSERT(request, ("request"));
+
+ if ((request == &sc->hs_init_req) ||
+ (request == &sc->hs_reset_req)) {
+ memcpy(&request->vstor_packet, packet,
+ sizeof(struct vstor_packet));
+ sema_post(&request->synch_sema);
+ } else {
+ vstor_packet = (struct vstor_packet *)packet;
+ switch(vstor_packet->operation) {
+ case VSTOR_OPERATION_COMPLETEIO:
+ hv_storvsc_on_iocompletion(sc,
+ vstor_packet, request);
+ break;
+ case VSTOR_OPERATION_REMOVEDEVICE:
+ /* TODO: implement */
+ break;
+ default:
+ break;
+ }
+ }
+ ret = hv_vmbus_channel_recv_packet(
+ device->channel,
+ packet,
+ roundup2(sizeof(struct vstor_packet), 8),
+ &bytes_recvd,
+ &request_id);
+ }
+}
+
+/**
+ * @brief callback function for completing a single LUN scan
+ *
+ * This function is responsible for waking up the executer of
+ * the scan LUN CCB action (cam_periph_runccb.) cam_periph_ccbwait
+ * sleeps on the mutex being signaled.
+ *
+ * @param periph a pointer to a CAM peripheral
+ * @param done_ccb pointer to CAM control block
+ */
+static void
+storvsc_xptdone(struct cam_periph *periph, union ccb *done_ccb)
+{
+ wakeup(&done_ccb->ccb_h.cbfcnp);
+}
+
+/**
+ * @brief scan for attached logical unit numbers (LUNs)
+ *
+ * In Hyper-V there is no backend changed device operation which
+ * presents FreeBSD with a list of devices to connect. The result is
+ * that we have to scan for a list of luns in the storvsc_attach()
+ * routine. There is only one SCSI target, so scan for the maximum
+ * number of luns.
+ *
+ * @param pointer to softc
+ */
+static void
+scan_for_luns(struct storvsc_softc *sc)
+{
+ union ccb *request_ccb;
+ struct cam_path *path = sc->hs_path;
+ struct cam_path *my_path = NULL;
+ cam_status status;
+ int lun_nb = 0;
+ int error;
+
+ request_ccb = malloc(sizeof(union ccb), M_CAMXPT, M_WAITOK);
+ my_path = malloc(sizeof(*my_path), M_CAMXPT, M_WAITOK);
+
+ mtx_lock(&sc->hs_lock);
+ do {
+ /*
+ * Scan the next LUN. Reuse path and ccb structs.
+ */
+ bzero(my_path, sizeof(*my_path));
+ bzero(request_ccb, sizeof(*request_ccb));
+ status = xpt_compile_path(my_path,
+ xpt_periph,
+ path->bus->path_id,
+ 0,
+ lun_nb);
+
+ if (status != CAM_REQ_CMP) {
+ mtx_unlock(&sc->hs_lock);
+ xpt_print(path, "scan_for_lunYYY: can't compile"
+ " path, 0x%p can't continue\n",
+ sc->hs_path);
+ free(request_ccb, M_CAMXPT);
+ free(my_path, M_CAMXPT);
+ return;
+ }
+
+ xpt_setup_ccb(&request_ccb->ccb_h, my_path, 5);
+ request_ccb->ccb_h.func_code = XPT_SCAN_LUN;
+ request_ccb->ccb_h.cbfcnp = storvsc_xptdone;
+ request_ccb->crcn.flags = CAM_FLAG_NONE;
+
+ error = cam_periph_runccb(request_ccb, NULL,
+ CAM_FLAG_NONE, 0, NULL);
+ KASSERT(error == 0, ("cam_periph_runccb failed %d\n", error));
+ xpt_release_path(my_path);
+ } while ( ++lun_nb < sc->hs_drv_props->drv_max_luns_per_target);
+ mtx_unlock(&sc->hs_lock);
+ free(request_ccb, M_CAMXPT);
+ free(my_path, M_CAMXPT);
+}
+
+/**
+ * @brief StorVSC probe function
+ *
+ * Device probe function. Returns 0 if the input device is a StorVSC
+ * device. Otherwise, a ENXIO is returned. If the input device is
+ * for BlkVSC (paravirtual IDE) device and this support is disabled in
+ * favor of the emulated ATA/IDE device, return ENXIO.
+ *
+ * @param a device
+ * @returns 0 on success, ENXIO if not a matcing StorVSC device
+ */
+static int
+storvsc_probe(device_t dev)
+{
+ int ret = ENXIO;
+
+ switch (storvsc_get_storage_type(dev)) {
+ case DRIVER_BLKVSC:
+ if (ata_disk_enable == 0) {
+ ret = 0;
+ }
+ break;
+ case DRIVER_STORVSC:
+ ret = 0;
+ break;
+ default:
+ ret = ENXIO;
+ }
+ return (ret);
+}
+
+/**
+ * @brief StorVSC attach function
+ *
+ * Function responsible for allocating per-device structures,
+ * setting up CAM interfaces and scanning for available LUNs to
+ * be used for SCSI device peripherals.
+ *
+ * @param a device
+ * @returns 0 on success or an error on failure
+ */
+static int
+storvsc_attach(device_t dev)
+{
+ struct hv_device *hv_dev = vmbus_get_devctx(dev);
+ enum hv_storage_type stor_type;
+ struct storvsc_softc *sc;
+ struct cam_devq *devq;
+ int ret, i;
+ struct hv_storvsc_request *reqp;
+ struct root_hold_token *root_mount_token = NULL;
+
+ /*
+ * We need to serialize storvsc attach calls.
+ */
+ root_mount_token = root_mount_hold("storvsc");
+
+ sc = device_get_softc(dev);
+ if (sc == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ stor_type = storvsc_get_storage_type(dev);
+
+ if (stor_type == DRIVER_UNKNOWN) {
+ ret = ENODEV;
+ goto cleanup;
+ }
+
+ bzero(sc, sizeof(struct storvsc_softc));
+
+ /* fill in driver specific properties */
+ sc->hs_drv_props = &g_drv_props_table[stor_type];
+
+ /* fill in device specific properties */
+ sc->hs_unit = device_get_unit(dev);
+ sc->hs_dev = hv_dev;
+ device_set_desc(dev, g_drv_props_table[stor_type].drv_desc);
+
+ LIST_INIT(&sc->hs_free_list);
+ mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
+
+ for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
+ reqp = malloc(sizeof(struct hv_storvsc_request),
+ M_DEVBUF, M_WAITOK|M_ZERO);
+ reqp->softc = sc;
+
+ LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
+ }
+
+ sc->hs_destroy = FALSE;
+ sc->hs_drain_notify = FALSE;
+ sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
+
+ ret = hv_storvsc_connect_vsp(hv_dev);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ /*
+ * Create the device queue.
+ * Hyper-V maps each target to one SCSI HBA
+ */
+ devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
+ if (devq == NULL) {
+ device_printf(dev, "Failed to alloc device queue\n");
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ sc->hs_sim = cam_sim_alloc(storvsc_action,
+ storvsc_poll,
+ sc->hs_drv_props->drv_name,
+ sc,
+ sc->hs_unit,
+ &sc->hs_lock, 1,
+ sc->hs_drv_props->drv_max_ios_per_target,
+ devq);
+
+ if (sc->hs_sim == NULL) {
+ device_printf(dev, "Failed to alloc sim\n");
+ cam_simq_free(devq);
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ mtx_lock(&sc->hs_lock);
+ /* bus_id is set to 0, need to get it from VMBUS channel query? */
+ if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
+ cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
+ mtx_unlock(&sc->hs_lock);
+ device_printf(dev, "Unable to register SCSI bus\n");
+ ret = ENXIO;
+ goto cleanup;
+ }
+
+ if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
+ cam_sim_path(sc->hs_sim),
+ CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+ xpt_bus_deregister(cam_sim_path(sc->hs_sim));
+ cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
+ mtx_unlock(&sc->hs_lock);
+ device_printf(dev, "Unable to create path\n");
+ ret = ENXIO;
+ goto cleanup;
+ }
+
+ mtx_unlock(&sc->hs_lock);
+ scan_for_luns(sc);
+ for (i = 0; (hs_softc[i] != NULL) && (i < HS_MAX_ADAPTERS); i++);
+ KASSERT(i < HS_MAX_ADAPTERS, ("storvsc_attach: hs_softc full\n"));
+ hs_softc[i] = sc;
+
+ root_mount_rel(root_mount_token);
+ return (0);
+
+
+cleanup:
+ root_mount_rel(root_mount_token);
+ while (!LIST_EMPTY(&sc->hs_free_list)) {
+ reqp = LIST_FIRST(&sc->hs_free_list);
+ LIST_REMOVE(reqp, link);
+ free(reqp, M_DEVBUF);
+ }
+ return (ret);
+}
+
+/**
+ * @brief StorVSC device detach function
+ *
+ * This function is responsible for safely detaching a
+ * StorVSC device. This includes waiting for inbound responses
+ * to complete and freeing associated per-device structures.
+ *
+ * @param dev a device
+ * returns 0 on success
+ */
+static int
+storvsc_detach(device_t dev)
+{
+ struct storvsc_softc *sc = device_get_softc(dev);
+ struct hv_storvsc_request *reqp = NULL;
+ struct hv_device *hv_device = vmbus_get_devctx(dev);
+
+ mtx_lock(&hv_device->channel->inbound_lock);
+ sc->hs_destroy = TRUE;
+ mtx_unlock(&hv_device->channel->inbound_lock);
+
+ /*
+ * At this point, all outbound traffic should be disabled. We
+ * only allow inbound traffic (responses) to proceed so that
+ * outstanding requests can be completed.
+ */
+
+ sc->hs_drain_notify = TRUE;
+ sema_wait(&sc->hs_drain_sema);
+ sc->hs_drain_notify = FALSE;
+
+ /*
+ * Since we have already drained, we don't need to busy wait.
+ * The call to close the channel will reset the callback
+ * under the protection of the incoming channel lock.
+ */
+
+ hv_vmbus_channel_close(hv_device->channel);
+
+ mtx_lock(&sc->hs_lock);
+ while (!LIST_EMPTY(&sc->hs_free_list)) {
+ reqp = LIST_FIRST(&sc->hs_free_list);
+ LIST_REMOVE(reqp, link);
+
+ free(reqp, M_DEVBUF);
+ }
+ mtx_unlock(&sc->hs_lock);
+ return (0);
+}
+
+#if HVS_TIMEOUT_TEST
+/**
+ * @brief unit test for timed out operations
+ *
+ * This function provides unit testing capability to simulate
+ * timed out operations. Recompilation with HV_TIMEOUT_TEST=1
+ * is required.
+ *
+ * @param reqp pointer to a request structure
+ * @param opcode SCSI operation being performed
+ * @param wait if 1, wait for I/O to complete
+ */
+static void
+storvsc_timeout_test(struct hv_storvsc_request *reqp,
+ uint8_t opcode, int wait)
+{
+ int ret;
+ union ccb *ccb = reqp->ccb;
+ struct storvsc_softc *sc = reqp->softc;
+
+ if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
+ return;
+ }
+
+ if (wait) {
+ mtx_lock(&reqp->event.mtx);
+ }
+ ret = hv_storvsc_io_request(sc->hs_dev, reqp);
+ if (ret != 0) {
+ if (wait) {
+ mtx_unlock(&reqp->event.mtx);
+ }
+ printf("%s: io_request failed with %d.\n",
+ __func__, ret);
+ ccb->ccb_h.status = CAM_PROVIDE_FAIL;
+ mtx_lock(&sc->hs_lock);
+ storvsc_free_request(sc, reqp);
+ xpt_done(ccb);
+ mtx_unlock(&sc->hs_lock);
+ return;
+ }
+
+ if (wait) {
+ xpt_print(ccb->ccb_h.path,
+ "%u: %s: waiting for IO return.\n",
+ ticks, __func__);
+ ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
+ mtx_unlock(&reqp->event.mtx);
+ xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
+ ticks, __func__, (ret == 0)?
+ "IO return detected" :
+ "IO return not detected");
+ /*
+ * Now both the timer handler and io done are running
+ * simultaneously. We want to confirm the io done always
+ * finishes after the timer handler exits. So reqp used by
+ * timer handler is not freed or stale. Do busy loop for
+ * another 1/10 second to make sure io done does
+ * wait for the timer handler to complete.
+ */
+ DELAY(100*1000);
+ mtx_lock(&sc->hs_lock);
+ xpt_print(ccb->ccb_h.path,
+ "%u: %s: finishing, queue frozen %d, "
+ "ccb status 0x%x scsi_status 0x%x.\n",
+ ticks, __func__, sc->hs_frozen,
+ ccb->ccb_h.status,
+ ccb->csio.scsi_status);
+ mtx_unlock(&sc->hs_lock);
+ }
+}
+#endif /* HVS_TIMEOUT_TEST */
+
+/**
+ * @brief timeout handler for requests
+ *
+ * This function is called as a result of a callout expiring.
+ *
+ * @param arg pointer to a request
+ */
+static void
+storvsc_timeout(void *arg)
+{
+ struct hv_storvsc_request *reqp = arg;
+ struct storvsc_softc *sc = reqp->softc;
+ union ccb *ccb = reqp->ccb;
+
+ if (reqp->retries == 0) {
+ mtx_lock(&sc->hs_lock);
+ xpt_print(ccb->ccb_h.path,
+ "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
+ ticks, reqp, ccb->ccb_h.timeout / 1000);
+ cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
+ mtx_unlock(&sc->hs_lock);
+
+ reqp->retries++;
+ callout_reset(&reqp->callout,
+ (ccb->ccb_h.timeout * hz) / 1000,
+ storvsc_timeout, reqp);
+#if HVS_TIMEOUT_TEST
+ storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
+#endif
+ return;
+ }
+
+ mtx_lock(&sc->hs_lock);
+ xpt_print(ccb->ccb_h.path,
+ "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
+ ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
+ (sc->hs_frozen == 0)?
+ "freezing the queue" : "the queue is already frozen");
+ if (sc->hs_frozen == 0) {
+ sc->hs_frozen = 1;
+ xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
+ }
+ mtx_unlock(&sc->hs_lock);
+
+#if HVS_TIMEOUT_TEST
+ storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
+#endif
+}
+
+/**
+ * @brief StorVSC device poll function
+ *
+ * This function is responsible for servicing requests when
+ * interrupts are disabled (i.e when we are dumping core.)
+ *
+ * @param sim a pointer to a CAM SCSI interface module
+ */
+static void
+storvsc_poll(struct cam_sim *sim)
+{
+ struct storvsc_softc *sc = cam_sim_softc(sim);
+
+ mtx_assert(&sc->hs_lock, MA_OWNED);
+ mtx_unlock(&sc->hs_lock);
+ hv_storvsc_on_channel_callback(sc->hs_dev);
+ mtx_lock(&sc->hs_lock);
+}
+
+/**
+ * @brief StorVSC device action function
+ *
+ * This function is responsible for handling SCSI operations which
+ * are passed from the CAM layer. The requests are in the form of
+ * CAM control blocks which indicate the action being performed.
+ * Not all actions require converting the request to a VSCSI protocol
+ * message - these actions can be responded to by this driver.
+ * Requests which are destined for a backend storage device are converted
+ * to a VSCSI protocol message and sent on the channel connection associated
+ * with this device.
+ *
+ * @param sim pointer to a CAM SCSI interface module
+ * @param ccb pointer to a CAM control block
+ */
+static void
+storvsc_action(struct cam_sim *sim, union ccb *ccb)
+{
+ struct storvsc_softc *sc = cam_sim_softc(sim);
+ int res;
+
+ mtx_assert(&sc->hs_lock, MA_OWNED);
+ switch (ccb->ccb_h.func_code) {
+ case XPT_PATH_INQ: {
+ struct ccb_pathinq *cpi = &ccb->cpi;
+
+ cpi->version_num = 1;
+ cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
+ cpi->target_sprt = 0;
+ cpi->hba_misc = PIM_NOBUSRESET;
+ cpi->hba_eng_cnt = 0;
+ cpi->max_target = STORVSC_MAX_TARGETS;
+ cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
+ cpi->initiator_id = 0;
+ cpi->bus_id = cam_sim_bus(sim);
+ cpi->base_transfer_speed = 300000;
+ cpi->transport = XPORT_SAS;
+ cpi->transport_version = 0;
+ cpi->protocol = PROTO_SCSI;
+ cpi->protocol_version = SCSI_REV_SPC2;
+ strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
+ strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
+ strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
+ cpi->unit_number = cam_sim_unit(sim);
+
+ ccb->ccb_h.status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ return;
+ }
+ case XPT_GET_TRAN_SETTINGS: {
+ struct ccb_trans_settings *cts = &ccb->cts;
+
+ cts->transport = XPORT_SAS;
+ cts->transport_version = 0;
+ cts->protocol = PROTO_SCSI;
+ cts->protocol_version = SCSI_REV_SPC2;
+
+ /* enable tag queuing and disconnected mode */
+ cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
+ cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
+ cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
+ cts->xport_specific.valid = CTS_SPI_VALID_DISC;
+ cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
+
+ ccb->ccb_h.status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ return;
+ }
+ case XPT_SET_TRAN_SETTINGS: {
+ ccb->ccb_h.status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ return;
+ }
+ case XPT_CALC_GEOMETRY:{
+ cam_calc_geometry(&ccb->ccg, 1);
+ xpt_done(ccb);
+ return;
+ }
+ case XPT_RESET_BUS:
+ case XPT_RESET_DEV:{
+#if HVS_HOST_RESET
+ if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) {
+ xpt_print(ccb->ccb_h.path,
+ "hv_storvsc_host_reset failed with %d\n", res);
+ ccb->ccb_h.status = CAM_PROVIDE_FAIL;
+ xpt_done(ccb);
+ return;
+ }
+ ccb->ccb_h.status = CAM_REQ_CMP;
+ xpt_done(ccb);
+ return;
+#else
+ xpt_print(ccb->ccb_h.path,
+ "%s reset not supported.\n",
+ (ccb->ccb_h.func_code == XPT_RESET_BUS)?
+ "bus" : "dev");
+ ccb->ccb_h.status = CAM_REQ_INVALID;
+ xpt_done(ccb);
+ return;
+#endif /* HVS_HOST_RESET */
+ }
+ case XPT_SCSI_IO:
+ case XPT_IMMED_NOTIFY: {
+ struct hv_storvsc_request *reqp = NULL;
+
+ if (ccb->csio.cdb_len == 0) {
+ panic("cdl_len is 0\n");
+ }
+
+ if (LIST_EMPTY(&sc->hs_free_list)) {
+ ccb->ccb_h.status = CAM_REQUEUE_REQ;
+ if (sc->hs_frozen == 0) {
+ sc->hs_frozen = 1;
+ xpt_freeze_simq(sim, /* count*/1);
+ }
+ xpt_done(ccb);
+ return;
+ }
+
+ reqp = LIST_FIRST(&sc->hs_free_list);
+ LIST_REMOVE(reqp, link);
+
+ bzero(reqp, sizeof(struct hv_storvsc_request));
+ reqp->softc = sc;
+
+ ccb->ccb_h.status |= CAM_SIM_QUEUED;
+ create_storvsc_request(ccb, reqp);
+
+ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
+ callout_init(&reqp->callout, CALLOUT_MPSAFE);
+ callout_reset(&reqp->callout,
+ (ccb->ccb_h.timeout * hz) / 1000,
+ storvsc_timeout, reqp);
+#if HVS_TIMEOUT_TEST
+ cv_init(&reqp->event.cv, "storvsc timeout cv");
+ mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
+ NULL, MTX_DEF);
+ switch (reqp->vstor_packet.vm_srb.cdb[0]) {
+ case MODE_SELECT_10:
+ case SEND_DIAGNOSTIC:
+ /* To have timer send the request. */
+ return;
+ default:
+ break;
+ }
+#endif /* HVS_TIMEOUT_TEST */
+ }
+
+ if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) {
+ xpt_print(ccb->ccb_h.path,
+ "hv_storvsc_io_request failed with %d\n", res);
+ ccb->ccb_h.status = CAM_PROVIDE_FAIL;
+ storvsc_free_request(sc, reqp);
+ xpt_done(ccb);
+ return;
+ }
+ return;
+ }
+
+ default:
+ ccb->ccb_h.status = CAM_REQ_INVALID;
+ xpt_done(ccb);
+ return;
+ }
+}
+
+/**
+ * @brief Fill in a request structure based on a CAM control block
+ *
+ * Fills in a request structure based on the contents of a CAM control
+ * block. The request structure holds the payload information for
+ * VSCSI protocol request.
+ *
+ * @param ccb pointer to a CAM contorl block
+ * @param reqp pointer to a request structure
+ */
+static void
+create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
+{
+ struct ccb_scsiio *csio = &ccb->csio;
+ uint64_t phys_addr;
+ uint32_t bytes_to_copy = 0;
+ uint32_t pfn_num = 0;
+ uint32_t pfn;
+
+ /* refer to struct vmscsi_req for meanings of these two fields */
+ reqp->vstor_packet.vm_srb.port =
+ cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
+ reqp->vstor_packet.vm_srb.path_id =
+ cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
+
+ reqp->vstor_packet.vm_srb.target_id = ccb->ccb_h.target_id;
+ reqp->vstor_packet.vm_srb.lun = ccb->ccb_h.target_lun;
+
+ reqp->vstor_packet.vm_srb.cdb_len = csio->cdb_len;
+ if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
+ memcpy(&reqp->vstor_packet.vm_srb.cdb, csio->cdb_io.cdb_ptr,
+ csio->cdb_len);
+ } else {
+ memcpy(&reqp->vstor_packet.vm_srb.cdb, csio->cdb_io.cdb_bytes,
+ csio->cdb_len);
+ }
+
+ switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
+ case CAM_DIR_OUT:
+ reqp->vstor_packet.vm_srb.data_in = WRITE_TYPE;
+ break;
+ case CAM_DIR_IN:
+ reqp->vstor_packet.vm_srb.data_in = READ_TYPE;
+ break;
+ case CAM_DIR_NONE:
+ reqp->vstor_packet.vm_srb.data_in = UNKNOWN_TYPE;
+ break;
+ default:
+ reqp->vstor_packet.vm_srb.data_in = UNKNOWN_TYPE;
+ break;
+ }
+
+ reqp->sense_data = &csio->sense_data;
+ reqp->sense_info_len = csio->sense_len;
+
+ reqp->ccb = ccb;
+ /*
+ KASSERT((ccb->ccb_h.flags & CAM_SCATTER_VALID) == 0,
+ ("ccb is scatter gather valid\n"));
+ */
+ if (csio->dxfer_len != 0) {
+ reqp->data_buf.length = csio->dxfer_len;
+ bytes_to_copy = csio->dxfer_len;
+ phys_addr = vtophys(csio->data_ptr);
+ reqp->data_buf.offset = phys_addr - trunc_page(phys_addr);
+ }
+
+ while (bytes_to_copy != 0) {
+ int bytes, page_offset;
+ phys_addr = vtophys(&csio->data_ptr[reqp->data_buf.length -
+ bytes_to_copy]);
+ pfn = phys_addr >> PAGE_SHIFT;
+ reqp->data_buf.pfn_array[pfn_num] = pfn;
+ page_offset = phys_addr - trunc_page(phys_addr);
+
+ bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+
+ bytes_to_copy -= bytes;
+ pfn_num++;
+ }
+}
+
+/**
+ * @brief completion function before returning to CAM
+ *
+ * I/O process has been completed and the result needs
+ * to be passed to the CAM layer.
+ * Free resources related to this request.
+ *
+ * @param reqp pointer to a request structure
+ */
+static void
+storvsc_io_done(struct hv_storvsc_request *reqp)
+{
+ union ccb *ccb = reqp->ccb;
+ struct ccb_scsiio *csio = &ccb->csio;
+ struct storvsc_softc *sc = reqp->softc;
+ struct vmscsi_req *vm_srb = &reqp->vstor_packet.vm_srb;
+
+ if (reqp->retries > 0) {
+ mtx_lock(&sc->hs_lock);
+#if HVS_TIMEOUT_TEST
+ xpt_print(ccb->ccb_h.path,
+ "%u: IO returned after timeout, "
+ "waking up timer handler if any.\n", ticks);
+ mtx_lock(&reqp->event.mtx);
+ cv_signal(&reqp->event.cv);
+ mtx_unlock(&reqp->event.mtx);
+#endif
+ reqp->retries = 0;
+ xpt_print(ccb->ccb_h.path,
+ "%u: IO returned after timeout, "
+ "stopping timer if any.\n", ticks);
+ mtx_unlock(&sc->hs_lock);
+ }
+
+ /*
+ * callout_drain() will wait for the timer handler to finish
+ * if it is running. So we don't need any lock to synchronize
+ * between this routine and the timer handler.
+ * Note that we need to make sure reqp is not freed when timer
+ * handler is using or will use it.
+ */
+ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
+ callout_drain(&reqp->callout);
+ }
+
+ ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
+ ccb->ccb_h.status &= ~CAM_STATUS_MASK;
+ if (vm_srb->scsi_status == SCSI_STATUS_OK) {
+ ccb->ccb_h.status |= CAM_REQ_CMP;
+ } else {
+ mtx_lock(&sc->hs_lock);
+ xpt_print(ccb->ccb_h.path,
+ "srovsc scsi_status = %d\n",
+ vm_srb->scsi_status);
+ mtx_unlock(&sc->hs_lock);
+ ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
+ }
+
+ ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
+ ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
+
+ if (reqp->sense_info_len != 0) {
+ csio->sense_resid = csio->sense_len - reqp->sense_info_len;
+ ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
+ }
+
+ mtx_lock(&sc->hs_lock);
+ if (reqp->softc->hs_frozen == 1) {
+ xpt_print(ccb->ccb_h.path,
+ "%u: storvsc unfreezing softc 0x%p.\n",
+ ticks, reqp->softc);
+ ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
+ reqp->softc->hs_frozen = 0;
+ }
+ storvsc_free_request(sc, reqp);
+ xpt_done(ccb);
+ mtx_unlock(&sc->hs_lock);
+}
+
+/**
+ * @brief Free a request structure
+ *
+ * Free a request structure by returning it to the free list
+ *
+ * @param sc pointer to a softc
+ * @param reqp pointer to a request structure
+ */
+static void
+storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
+{
+
+ LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
+}
+
+/**
+ * @brief Determine type of storage device from GUID
+ *
+ * Using the type GUID, determine if this is a StorVSC (paravirtual
+ * SCSI or BlkVSC (paravirtual IDE) device.
+ *
+ * @param dev a device
+ * returns an enum
+ */
+static enum hv_storage_type
+storvsc_get_storage_type(device_t dev)
+{
+ const char *p = vmbus_get_type(dev);
+
+ if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) {
+ return DRIVER_BLKVSC;
+ } else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) {
+ return DRIVER_STORVSC;
+ }
+ return (DRIVER_UNKNOWN);
+}
+
diff --git a/sys/dev/hyperv/storvsc/hv_vstorage.h b/sys/dev/hyperv/storvsc/hv_vstorage.h
new file mode 100644
index 000000000000..d01d08411471
--- /dev/null
+++ b/sys/dev/hyperv/storvsc/hv_vstorage.h
@@ -0,0 +1,231 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HV_VSTORAGE_H__
+#define __HV_VSTORAGE_H__
+
+/*
+ * Major/minor macros. Minor version is in LSB, meaning that earlier flat
+ * version numbers will be interpreted as "0.x" (i.e., 1 becomes 0.1).
+ */
+
+#define VMSTOR_PROTOCOL_MAJOR(VERSION_) (((VERSION_) >> 8) & 0xff)
+#define VMSTOR_PROTOCOL_MINOR(VERSION_) (((VERSION_) ) & 0xff)
+#define VMSTOR_PROTOCOL_VERSION(MAJOR_, MINOR_) ((((MAJOR_) & 0xff) << 8) | \
+ (((MINOR_) & 0xff) ))
+
+/*
+ * Invalid version.
+ */
+#define VMSTOR_INVALID_PROTOCOL_VERSION -1
+
+/*
+ * Version history:
+ * V1 Beta 0.1
+ * V1 RC < 2008/1/31 1.0
+ * V1 RC > 2008/1/31 2.0
+ */
+
+#define VMSTOR_PROTOCOL_VERSION_CURRENT VMSTOR_PROTOCOL_VERSION(2, 0)
+
+/**
+ * Packet structure ops describing virtual storage requests.
+ */
+enum vstor_packet_ops {
+ VSTOR_OPERATION_COMPLETEIO = 1,
+ VSTOR_OPERATION_REMOVEDEVICE = 2,
+ VSTOR_OPERATION_EXECUTESRB = 3,
+ VSTOR_OPERATION_RESETLUN = 4,
+ VSTOR_OPERATION_RESETADAPTER = 5,
+ VSTOR_OPERATION_RESETBUS = 6,
+ VSTOR_OPERATION_BEGININITIALIZATION = 7,
+ VSTOR_OPERATION_ENDINITIALIZATION = 8,
+ VSTOR_OPERATION_QUERYPROTOCOLVERSION = 9,
+ VSTOR_OPERATION_QUERYPROPERTIES = 10,
+ VSTOR_OPERATION_MAXIMUM = 10
+};
+
+
+/*
+ * Platform neutral description of a scsi request -
+ * this remains the same across the write regardless of 32/64 bit
+ * note: it's patterned off the Windows DDK SCSI_PASS_THROUGH structure
+ */
+
+#define CDB16GENERIC_LENGTH 0x10
+#define SENSE_BUFFER_SIZE 0x12
+#define MAX_DATA_BUFFER_LENGTH_WITH_PADDING 0x14
+
+struct vmscsi_req {
+ uint16_t length;
+ uint8_t srb_status;
+ uint8_t scsi_status;
+
+ /* HBA number, set to the order number detected by initiator. */
+ uint8_t port;
+ /* SCSI bus number or bus_id, different from CAM's path_id. */
+ uint8_t path_id;
+
+ uint8_t target_id;
+ uint8_t lun;
+
+ uint8_t cdb_len;
+ uint8_t sense_info_len;
+ uint8_t data_in;
+ uint8_t reserved;
+
+ uint32_t transfer_len;
+
+ union {
+ uint8_t cdb[CDB16GENERIC_LENGTH];
+
+ uint8_t sense_data[SENSE_BUFFER_SIZE];
+
+ uint8_t reserved_array[MAX_DATA_BUFFER_LENGTH_WITH_PADDING];
+ };
+
+} __packed;
+
+/**
+ * This structure is sent during the initialization phase to get the different
+ * properties of the channel.
+ */
+
+struct vmstor_chan_props {
+ uint16_t proto_ver;
+ uint8_t path_id;
+ uint8_t target_id;
+
+ /**
+ * Note: port number is only really known on the client side
+ */
+ uint32_t port;
+ uint32_t flags;
+ uint32_t max_transfer_bytes;
+
+ /**
+ * This id is unique for each channel and will correspond with
+ * vendor specific data in the inquiry_ata
+ */
+ uint64_t unique_id;
+
+} __packed;
+
+/**
+ * This structure is sent during the storage protocol negotiations.
+ */
+
+struct vmstor_proto_ver
+{
+ /**
+ * Major (MSW) and minor (LSW) version numbers.
+ */
+ uint16_t major_minor;
+
+ uint16_t revision; /* always zero */
+} __packed;
+
+/**
+ * Channel Property Flags
+ */
+
+#define STORAGE_CHANNEL_REMOVABLE_FLAG 0x1
+#define STORAGE_CHANNEL_EMULATED_IDE_FLAG 0x2
+
+
+struct vstor_packet {
+ /**
+ * Requested operation type
+ */
+ enum vstor_packet_ops operation;
+
+ /*
+ * Flags - see below for values
+ */
+ uint32_t flags;
+
+ /**
+ * Status of the request returned from the server side.
+ */
+ uint32_t status;
+
+ union
+ {
+ /**
+ * Structure used to forward SCSI commands from the client to
+ * the server.
+ */
+ struct vmscsi_req vm_srb;
+
+ /**
+ * Structure used to query channel properties.
+ */
+ struct vmstor_chan_props chan_props;
+
+ /**
+ * Used during version negotiations.
+ */
+ struct vmstor_proto_ver version;
+ };
+
+} __packed;
+
+
+/**
+ * SRB (SCSI Request Block) Status Codes
+ */
+#define SRB_STATUS_PENDING 0x00
+#define SRB_STATUS_SUCCESS 0x01
+#define SRB_STATUS_ABORTED 0x02
+#define SRB_STATUS_ABORT_FAILED 0x03
+#define SRB_STATUS_ERROR 0x04
+#define SRB_STATUS_BUSY 0x05
+
+/**
+ * SRB Status Masks (can be combined with above status codes)
+ */
+#define SRB_STATUS_QUEUE_FROZEN 0x40
+#define SRB_STATUS_AUTOSENSE_VALID 0x80
+
+
+/**
+ * Packet flags
+ */
+
+/**
+ * This flag indicates that the server should send back a completion for this
+ * packet.
+ */
+#define REQUEST_COMPLETION_FLAG 0x1
+
+/**
+ * This is the set of flags that the vsc can set in any packets it sends
+ */
+#define VSC_LEGAL_FLAGS (REQUEST_COMPLETION_FLAG)
+
+#endif /* __HV_VSTORAGE_H__ */
diff --git a/sys/dev/hyperv/utilities/hv_util.c b/sys/dev/hyperv/utilities/hv_util.c
new file mode 100644
index 000000000000..9ad4370285ff
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_util.c
@@ -0,0 +1,492 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * A common driver for all hyper-V util services.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/reboot.h>
+#include <sys/timetc.h>
+
+#include <dev/hyperv/include/hyperv.h>
+
+#define HV_SHUT_DOWN 0
+#define HV_TIME_SYNCH 1
+#define HV_HEART_BEAT 2
+#define HV_KVP 3
+#define HV_MAX_UTIL_SERVICES 4
+
+#define HV_NANO_SEC 1000000000L /* 10^ 9 nanosecs = 1 sec */
+
+#define HV_WLTIMEDELTA 116444736000000000L /* in 100ns unit */
+#define HV_ICTIMESYNCFLAG_PROBE 0
+#define HV_ICTIMESYNCFLAG_SYNC 1
+#define HV_ICTIMESYNCFLAG_SAMPLE 2
+
+typedef struct hv_vmbus_service {
+ hv_guid guid; /* Hyper-V GUID */
+ char* name; /* name of service */
+ boolean_t enabled; /* service enabled */
+ hv_work_queue* work_queue; /* background work queue */
+ /*
+ * function to initialize service
+ */
+ int (*init)(struct hv_vmbus_service *);
+ /*
+ * function to process Hyper-V messages
+ */
+ void (*callback)(void *);
+} hv_vmbus_service;
+
+static void hv_shutdown_cb(void *context);
+static void hv_heartbeat_cb(void *context);
+static void hv_timesync_cb(void *context);
+static void hv_kvp_cb(void *context);
+
+static int hv_timesync_init(hv_vmbus_service *serv);
+
+/**
+ * Note: GUID codes below are predefined by the host hypervisor
+ * (Hyper-V and Azure)interface and required for correct operation.
+ */
+static hv_vmbus_service service_table[] = {
+ /* Shutdown Service */
+ { .guid.data = {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
+ 0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB},
+ .name = "Hyper-V Shutdown Service\n",
+ .enabled = TRUE,
+ .callback = hv_shutdown_cb,
+ },
+
+ /* Time Synch Service */
+ { .guid.data = {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
+ 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf},
+ .name = "Hyper-V Time Synch Service\n",
+ .enabled = TRUE,
+ .init = hv_timesync_init,
+ .callback = hv_timesync_cb,
+ },
+
+ /* Heartbeat Service */
+ { .guid.data = {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
+ 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d},
+ .name = "Hyper-V Heartbeat Service\n",
+ .enabled = TRUE,
+ .callback = hv_heartbeat_cb,
+
+ },
+
+ /* KVP (Key Value Pair) Service */
+ { .guid.data = {0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
+ 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6},
+ .name = "Hyper-V KVP Service\n",
+ .enabled = FALSE,
+ .callback = hv_kvp_cb,
+ },
+};
+
+/**
+ * Receive buffer pointers, there is one buffer per utility service. The
+ * buffer is allocated during attach().
+ */
+static uint8_t* receive_buffer[HV_MAX_UTIL_SERVICES];
+
+struct hv_ictimesync_data {
+ uint64_t parenttime;
+ uint64_t childtime;
+ uint64_t roundtriptime;
+ uint8_t flags;
+} __packed;
+
+static int hv_timesync_init(hv_vmbus_service *serv)
+{
+ serv->work_queue = hv_work_queue_create("Time Sync");
+ if (serv->work_queue == NULL)
+ return (ENOMEM);
+ return (0);
+}
+
+static void
+hv_negotiate_version(
+ struct hv_vmbus_icmsg_hdr* icmsghdrp,
+ struct hv_vmbus_icmsg_negotiate* negop,
+ uint8_t* buf)
+ {
+ icmsghdrp->icmsgsize = 0x10;
+
+ negop = (struct hv_vmbus_icmsg_negotiate *)&buf[
+ sizeof(struct hv_vmbus_pipe_hdr) +
+ sizeof(struct hv_vmbus_icmsg_hdr)];
+
+ if (negop->icframe_vercnt == 2 &&
+ negop->icversion_data[1].major == 3) {
+ negop->icversion_data[0].major = 3;
+ negop->icversion_data[0].minor = 0;
+ negop->icversion_data[1].major = 3;
+ negop->icversion_data[1].minor = 0;
+ } else {
+ negop->icversion_data[0].major = 1;
+ negop->icversion_data[0].minor = 0;
+ negop->icversion_data[1].major = 1;
+ negop->icversion_data[1].minor = 0;
+ }
+
+ negop->icframe_vercnt = 1;
+ negop->icmsg_vercnt = 1;
+}
+
+static void hv_kvp_cb(void *context)
+{
+}
+
+/**
+ * Set host time based on time sync message from host
+ */
+static void
+hv_set_host_time(void *context)
+{
+ uint64_t hosttime = (uint64_t)context;
+ struct timespec ts, host_ts;
+ int64_t tns, host_tns, tmp, tsec;
+
+ nanotime(&ts);
+ tns = ts.tv_sec * HV_NANO_SEC + ts.tv_nsec;
+ host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
+
+ tmp = host_tns;
+ tsec = tmp / HV_NANO_SEC;
+ host_ts.tv_nsec = (long) (tmp - (tsec * HV_NANO_SEC));
+ host_ts.tv_sec = tsec;
+
+ /* force time sync with host after reboot, restore, etc. */
+ mtx_lock(&Giant);
+ tc_setclock(&host_ts);
+ resettodr();
+ mtx_unlock(&Giant);
+}
+
+/**
+ * @brief Synchronize time with host after reboot, restore, etc.
+ *
+ * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
+ * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
+ * message after the timesync channel is opened. Since the hv_utils module is
+ * loaded after hv_vmbus, the first message is usually missed. The other
+ * thing is, systime is automatically set to emulated hardware clock which may
+ * not be UTC time or in the same time zone. So, to override these effects, we
+ * use the first 50 time samples for initial system time setting.
+ */
+static inline
+void hv_adj_guesttime(uint64_t hosttime, uint8_t flags)
+{
+ static int scnt = 50;
+
+ if ((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) {
+ hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
+ hv_set_host_time, (void *) hosttime);
+ return;
+ }
+
+ if ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0 && scnt > 0) {
+ scnt--;
+ hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
+ hv_set_host_time, (void *) hosttime);
+ }
+}
+
+/**
+ * Time Sync Channel message handler
+ */
+static void
+hv_timesync_cb(void *context)
+{
+ hv_vmbus_channel* channel = context;
+ hv_vmbus_icmsg_hdr* icmsghdrp;
+ uint32_t recvlen;
+ uint64_t requestId;
+ int ret;
+ uint8_t* time_buf;
+ struct hv_ictimesync_data* timedatap;
+
+ time_buf = receive_buffer[HV_TIME_SYNCH];
+
+ ret = hv_vmbus_channel_recv_packet(channel, time_buf,
+ PAGE_SIZE, &recvlen, &requestId);
+
+ if ((ret == 0) && recvlen > 0) {
+ icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
+ sizeof(struct hv_vmbus_pipe_hdr)];
+
+ if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+ hv_negotiate_version(icmsghdrp, NULL, time_buf);
+ } else {
+ timedatap = (struct hv_ictimesync_data *) &time_buf[
+ sizeof(struct hv_vmbus_pipe_hdr) +
+ sizeof(struct hv_vmbus_icmsg_hdr)];
+ hv_adj_guesttime(timedatap->parenttime, timedatap->flags);
+ }
+
+ icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
+ | HV_ICMSGHDRFLAG_RESPONSE;
+
+ hv_vmbus_channel_send_packet(channel, time_buf,
+ recvlen, requestId,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ }
+}
+
+/**
+ * Shutdown
+ */
+static void
+hv_shutdown_cb(void *context)
+{
+ uint8_t* buf;
+ hv_vmbus_channel* channel = context;
+ uint8_t execute_shutdown = 0;
+ hv_vmbus_icmsg_hdr* icmsghdrp;
+ uint32_t recv_len;
+ uint64_t request_id;
+ int ret;
+ hv_vmbus_shutdown_msg_data* shutdown_msg;
+
+ buf = receive_buffer[HV_SHUT_DOWN];
+
+ ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
+ &recv_len, &request_id);
+
+ if ((ret == 0) && recv_len > 0) {
+
+ icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+ &buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+ if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+ hv_negotiate_version(icmsghdrp, NULL, buf);
+
+ } else {
+ shutdown_msg =
+ (struct hv_vmbus_shutdown_msg_data *)
+ &buf[sizeof(struct hv_vmbus_pipe_hdr) +
+ sizeof(struct hv_vmbus_icmsg_hdr)];
+
+ switch (shutdown_msg->flags) {
+ case 0:
+ case 1:
+ icmsghdrp->status = HV_S_OK;
+ execute_shutdown = 1;
+ if(bootverbose)
+ printf("Shutdown request received -"
+ " graceful shutdown initiated\n");
+ break;
+ default:
+ icmsghdrp->status = HV_E_FAIL;
+ execute_shutdown = 0;
+ printf("Shutdown request received -"
+ " Invalid request\n");
+ break;
+ }
+ }
+
+ icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
+ HV_ICMSGHDRFLAG_RESPONSE;
+
+ hv_vmbus_channel_send_packet(channel, buf,
+ recv_len, request_id,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ }
+
+ if (execute_shutdown)
+ shutdown_nice(RB_POWEROFF);
+}
+
+/**
+ * Process heartbeat message
+ */
+static void
+hv_heartbeat_cb(void *context)
+{
+ uint8_t* buf;
+ hv_vmbus_channel* channel = context;
+ uint32_t recvlen;
+ uint64_t requestid;
+ int ret;
+
+ struct hv_vmbus_heartbeat_msg_data* heartbeat_msg;
+ struct hv_vmbus_icmsg_hdr* icmsghdrp;
+
+ buf = receive_buffer[HV_HEART_BEAT];
+
+ ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
+ &requestid);
+
+ if ((ret == 0) && recvlen > 0) {
+
+ icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+ &buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+ if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+ hv_negotiate_version(icmsghdrp, NULL, buf);
+
+ } else {
+ heartbeat_msg =
+ (struct hv_vmbus_heartbeat_msg_data *)
+ &buf[sizeof(struct hv_vmbus_pipe_hdr) +
+ sizeof(struct hv_vmbus_icmsg_hdr)];
+
+ heartbeat_msg->seq_num += 1;
+ }
+
+ icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
+ HV_ICMSGHDRFLAG_RESPONSE;
+
+ hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ }
+}
+
+
+static int
+hv_util_probe(device_t dev)
+{
+ int i;
+ int rtn_value = ENXIO;
+
+ for (i = 0; i < HV_MAX_UTIL_SERVICES; i++) {
+ const char *p = vmbus_get_type(dev);
+ if (service_table[i].enabled && !memcmp(p, &service_table[i].guid, sizeof(hv_guid))) {
+ device_set_softc(dev, (void *) (&service_table[i]));
+ rtn_value = 0;
+ }
+ }
+
+ return rtn_value;
+}
+
+static int
+hv_util_attach(device_t dev)
+{
+ struct hv_device* hv_dev;
+ struct hv_vmbus_service* service;
+ int ret;
+ size_t receive_buffer_offset;
+
+ hv_dev = vmbus_get_devctx(dev);
+ service = device_get_softc(dev);
+ receive_buffer_offset = service - &service_table[0];
+ device_printf(dev, "Hyper-V Service attaching: %s\n", service->name);
+ receive_buffer[receive_buffer_offset] =
+ malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
+
+ if (service->init != NULL) {
+ ret = service->init(service);
+ if (ret) {
+ ret = ENODEV;
+ goto error0;
+ }
+ }
+
+ ret = hv_vmbus_channel_open(hv_dev->channel, 2 * PAGE_SIZE,
+ 2 * PAGE_SIZE, NULL, 0,
+ service->callback, hv_dev->channel);
+
+ if (ret)
+ goto error0;
+
+ return (0);
+
+ error0:
+
+ free(receive_buffer[receive_buffer_offset], M_DEVBUF);
+ receive_buffer[receive_buffer_offset] = NULL;
+
+ return (ret);
+}
+
+static int
+hv_util_detach(device_t dev)
+{
+ struct hv_device* hv_dev;
+ struct hv_vmbus_service* service;
+ size_t receive_buffer_offset;
+
+ hv_dev = vmbus_get_devctx(dev);
+
+ hv_vmbus_channel_close(hv_dev->channel);
+ service = device_get_softc(dev);
+ receive_buffer_offset = service - &service_table[0];
+
+ if (service->work_queue != NULL)
+ hv_work_queue_close(service->work_queue);
+
+ free(receive_buffer[receive_buffer_offset], M_DEVBUF);
+ receive_buffer[receive_buffer_offset] = NULL;
+
+ return (0);
+}
+
+static void hv_util_init(void)
+{
+}
+
+static int hv_util_modevent(module_t mod, int event, void *arg)
+{
+ switch (event) {
+ case MOD_LOAD:
+ break;
+ case MOD_UNLOAD:
+ break;
+ default:
+ break;
+ }
+ return (0);
+}
+
+static device_method_t util_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, hv_util_probe),
+ DEVMETHOD(device_attach, hv_util_attach),
+ DEVMETHOD(device_detach, hv_util_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ { 0, 0 } }
+;
+
+static driver_t util_driver = { "hyperv-utils", util_methods, 0 };
+
+static devclass_t util_devclass;
+
+DRIVER_MODULE(hv_utils, vmbus, util_driver, util_devclass, hv_util_modevent, 0);
+MODULE_VERSION(hv_utils, 1);
+MODULE_DEPEND(hv_utils, vmbus, 1, 1, 1);
+
+SYSINIT(hv_util_initx, SI_SUB_RUN_SCHEDULER, SI_ORDER_MIDDLE + 1,
+ hv_util_init, NULL);
diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c
new file mode 100644
index 000000000000..17dfd7611235
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_channel.c
@@ -0,0 +1,842 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <machine/bus.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include "hv_vmbus_priv.h"
+
+static int vmbus_channel_create_gpadl_header(
+ /* must be phys and virt contiguous*/
+ void* contig_buffer,
+ /* page-size multiple */
+ uint32_t size,
+ hv_vmbus_channel_msg_info** msg_info,
+ uint32_t* message_count);
+
+static void vmbus_channel_set_event(hv_vmbus_channel* channel);
+
+/**
+ * @brief Trigger an event notification on the specified channel
+ */
+static void
+vmbus_channel_set_event(hv_vmbus_channel *channel)
+{
+ hv_vmbus_monitor_page *monitor_page;
+
+ if (channel->offer_msg.monitor_allocated) {
+ /* Each uint32_t represents 32 channels */
+ synch_set_bit((channel->offer_msg.child_rel_id & 31),
+ ((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
+ + ((channel->offer_msg.child_rel_id >> 5))));
+
+ monitor_page = (hv_vmbus_monitor_page *)
+ hv_vmbus_g_connection.monitor_pages;
+
+ monitor_page++; /* Get the child to parent monitor page */
+
+ synch_set_bit(channel->monitor_bit,
+ (uint32_t *)&monitor_page->
+ trigger_group[channel->monitor_group].pending);
+ } else {
+ hv_vmbus_set_event(channel->offer_msg.child_rel_id);
+ }
+
+}
+
+/**
+ * @brief Open the specified channel
+ */
+int
+hv_vmbus_channel_open(
+ hv_vmbus_channel* new_channel,
+ uint32_t send_ring_buffer_size,
+ uint32_t recv_ring_buffer_size,
+ void* user_data,
+ uint32_t user_data_len,
+ hv_vmbus_pfn_channel_callback pfn_on_channel_callback,
+ void* context)
+{
+
+ int ret = 0;
+ void *in, *out;
+ hv_vmbus_channel_open_channel* open_msg;
+ hv_vmbus_channel_msg_info* open_info;
+
+ new_channel->on_channel_callback = pfn_on_channel_callback;
+ new_channel->channel_callback_context = context;
+
+ /* Allocate the ring buffer */
+ out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
+ M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
+ KASSERT(out != NULL,
+ ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
+ if (out == NULL)
+ return (ENOMEM);
+
+ in = ((uint8_t *) out + send_ring_buffer_size);
+
+ new_channel->ring_buffer_pages = out;
+ new_channel->ring_buffer_page_count = (send_ring_buffer_size
+ + recv_ring_buffer_size) >> PAGE_SHIFT;
+
+ hv_vmbus_ring_buffer_init(
+ &new_channel->outbound,
+ out,
+ send_ring_buffer_size);
+
+ hv_vmbus_ring_buffer_init(
+ &new_channel->inbound,
+ in,
+ recv_ring_buffer_size);
+
+ /**
+ * Establish the gpadl for the ring buffer
+ */
+ new_channel->ring_buffer_gpadl_handle = 0;
+
+ ret = hv_vmbus_channel_establish_gpadl(new_channel,
+ new_channel->outbound.ring_buffer,
+ send_ring_buffer_size + recv_ring_buffer_size,
+ &new_channel->ring_buffer_gpadl_handle);
+
+ /**
+ * Create and init the channel open message
+ */
+ open_info = (hv_vmbus_channel_msg_info*) malloc(
+ sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_open_channel),
+ M_DEVBUF,
+ M_NOWAIT);
+ KASSERT(open_info != NULL,
+ ("Error VMBUS: malloc failed to allocate Open Channel message!"));
+
+ if (open_info == NULL)
+ return (ENOMEM);
+
+ sema_init(&open_info->wait_sema, 0, "Open Info Sema");
+
+ open_msg = (hv_vmbus_channel_open_channel*) open_info->msg;
+ open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL;
+ open_msg->open_id = new_channel->offer_msg.child_rel_id;
+ open_msg->child_rel_id = new_channel->offer_msg.child_rel_id;
+ open_msg->ring_buffer_gpadl_handle =
+ new_channel->ring_buffer_gpadl_handle;
+ open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
+ >> PAGE_SHIFT;
+ open_msg->server_context_area_gpadl_handle = 0;
+
+ if (user_data_len)
+ memcpy(open_msg->user_data, user_data, user_data_len);
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ open_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(
+ open_msg, sizeof(hv_vmbus_channel_open_channel));
+
+ if (ret != 0)
+ goto cleanup;
+
+ ret = sema_timedwait(&open_info->wait_sema, 500); /* KYS 5 seconds */
+
+ if (ret)
+ goto cleanup;
+
+ if (open_info->response.open_result.status == 0) {
+ if(bootverbose)
+ printf("VMBUS: channel <%p> open success.\n", new_channel);
+ } else {
+ if(bootverbose)
+ printf("Error VMBUS: channel <%p> open failed - %d!\n",
+ new_channel, open_info->response.open_result.status);
+ }
+
+ cleanup:
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ open_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ sema_destroy(&open_info->wait_sema);
+ free(open_info, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * @brief Create a gpadl for the specified buffer
+ */
+static int
+vmbus_channel_create_gpadl_header(
+ void* contig_buffer,
+ uint32_t size, /* page-size multiple */
+ hv_vmbus_channel_msg_info** msg_info,
+ uint32_t* message_count)
+{
+ int i;
+ int page_count;
+ unsigned long long pfn;
+ uint32_t msg_size;
+ hv_vmbus_channel_gpadl_header* gpa_header;
+ hv_vmbus_channel_gpadl_body* gpadl_body;
+ hv_vmbus_channel_msg_info* msg_header;
+ hv_vmbus_channel_msg_info* msg_body;
+
+ int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize;
+
+ page_count = size >> PAGE_SHIFT;
+ pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT;
+
+ /*do we need a gpadl body msg */
+ pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
+ - sizeof(hv_vmbus_channel_gpadl_header)
+ - sizeof(hv_gpa_range);
+ pfnCount = pfnSize / sizeof(uint64_t);
+
+ if (page_count > pfnCount) { /* if(we need a gpadl body) */
+ /* fill in the header */
+ msg_size = sizeof(hv_vmbus_channel_msg_info)
+ + sizeof(hv_vmbus_channel_gpadl_header)
+ + sizeof(hv_gpa_range)
+ + pfnCount * sizeof(uint64_t);
+ msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(
+ msg_header != NULL,
+ ("Error VMBUS: malloc failed to allocate Gpadl Message!"));
+ if (msg_header == NULL)
+ return (ENOMEM);
+
+ TAILQ_INIT(&msg_header->sub_msg_list_anchor);
+ msg_header->message_size = msg_size;
+
+ gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
+ gpa_header->range_count = 1;
+ gpa_header->range_buf_len = sizeof(hv_gpa_range)
+ + page_count * sizeof(uint64_t);
+ gpa_header->range[0].byte_offset = 0;
+ gpa_header->range[0].byte_count = size;
+ for (i = 0; i < pfnCount; i++) {
+ gpa_header->range[0].pfn_array[i] = pfn + i;
+ }
+ *msg_info = msg_header;
+ *message_count = 1;
+
+ pfnSum = pfnCount;
+ pfnLeft = page_count - pfnCount;
+
+ /*
+ * figure out how many pfns we can fit
+ */
+ pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
+ - sizeof(hv_vmbus_channel_gpadl_body);
+ pfnCount = pfnSize / sizeof(uint64_t);
+
+ /*
+ * fill in the body
+ */
+ while (pfnLeft) {
+ if (pfnLeft > pfnCount) {
+ pfnCurr = pfnCount;
+ } else {
+ pfnCurr = pfnLeft;
+ }
+
+ msg_size = sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_gpadl_body) +
+ pfnCurr * sizeof(uint64_t);
+ msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(
+ msg_body != NULL,
+ ("Error VMBUS: malloc failed to allocate Gpadl msg_body!"));
+ if (msg_body == NULL)
+ return (ENOMEM);
+
+ msg_body->message_size = msg_size;
+ (*message_count)++;
+ gpadl_body =
+ (hv_vmbus_channel_gpadl_body*) msg_body->msg;
+ /*
+ * gpadl_body->gpadl = kbuffer;
+ */
+ for (i = 0; i < pfnCurr; i++) {
+ gpadl_body->pfn[i] = pfn + pfnSum + i;
+ }
+
+ TAILQ_INSERT_TAIL(
+ &msg_header->sub_msg_list_anchor,
+ msg_body,
+ msg_list_entry);
+ pfnSum += pfnCurr;
+ pfnLeft -= pfnCurr;
+ }
+ } else { /* else everything fits in a header */
+
+ msg_size = sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_gpadl_header) +
+ sizeof(hv_gpa_range) +
+ page_count * sizeof(uint64_t);
+ msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(
+ msg_header != NULL,
+ ("Error VMBUS: malloc failed to allocate Gpadl Message!"));
+ if (msg_header == NULL)
+ return (ENOMEM);
+
+ msg_header->message_size = msg_size;
+
+ gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
+ gpa_header->range_count = 1;
+ gpa_header->range_buf_len = sizeof(hv_gpa_range) +
+ page_count * sizeof(uint64_t);
+ gpa_header->range[0].byte_offset = 0;
+ gpa_header->range[0].byte_count = size;
+ for (i = 0; i < page_count; i++) {
+ gpa_header->range[0].pfn_array[i] = pfn + i;
+ }
+
+ *msg_info = msg_header;
+ *message_count = 1;
+ }
+
+ return (0);
+}
+
+/**
+ * @brief Establish a GPADL for the specified buffer
+ */
+int
+hv_vmbus_channel_establish_gpadl(
+ hv_vmbus_channel* channel,
+ void* contig_buffer,
+ uint32_t size, /* page-size multiple */
+ uint32_t* gpadl_handle)
+
+{
+ int ret = 0;
+ hv_vmbus_channel_gpadl_header* gpadl_msg;
+ hv_vmbus_channel_gpadl_body* gpadl_body;
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_info* sub_msg_info;
+ uint32_t msg_count;
+ hv_vmbus_channel_msg_info* curr;
+ uint32_t next_gpadl_handle;
+
+ next_gpadl_handle = hv_vmbus_g_connection.next_gpadl_handle;
+ atomic_add_int((int*) &hv_vmbus_g_connection.next_gpadl_handle, 1);
+
+ ret = vmbus_channel_create_gpadl_header(
+ contig_buffer, size, &msg_info, &msg_count);
+
+ if(ret != 0) { /* if(allocation failed) return immediately */
+ /* reverse atomic_add_int above */
+ atomic_subtract_int((int*)
+ &hv_vmbus_g_connection.next_gpadl_handle, 1);
+ return ret;
+ }
+
+ sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
+ gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg;
+ gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER;
+ gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
+ gpadl_msg->gpadl = next_gpadl_handle;
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(
+ gpadl_msg,
+ msg_info->message_size -
+ (uint32_t) sizeof(hv_vmbus_channel_msg_info));
+
+ if (ret != 0)
+ goto cleanup;
+
+ if (msg_count > 1) {
+ TAILQ_FOREACH(curr,
+ &msg_info->sub_msg_list_anchor, msg_list_entry) {
+ sub_msg_info = curr;
+ gpadl_body =
+ (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg;
+
+ gpadl_body->header.message_type =
+ HV_CHANNEL_MESSAGE_GPADL_BODY;
+ gpadl_body->gpadl = next_gpadl_handle;
+
+ ret = hv_vmbus_post_message(
+ gpadl_body,
+ sub_msg_info->message_size
+ - (uint32_t) sizeof(hv_vmbus_channel_msg_info));
+ /* if (the post message failed) give up and clean up */
+ if(ret != 0)
+ goto cleanup;
+ }
+ }
+
+ ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds*/
+ if (ret != 0)
+ goto cleanup;
+
+ *gpadl_handle = gpadl_msg->gpadl;
+
+cleanup:
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info, msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ sema_destroy(&msg_info->wait_sema);
+ free(msg_info, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * @brief Teardown the specified GPADL handle
+ */
+int
+hv_vmbus_channel_teardown_gpdal(
+ hv_vmbus_channel* channel,
+ uint32_t gpadl_handle)
+{
+ int ret = 0;
+ hv_vmbus_channel_gpadl_teardown* msg;
+ hv_vmbus_channel_msg_info* info;
+
+ info = (hv_vmbus_channel_msg_info *)
+ malloc( sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_gpadl_teardown),
+ M_DEVBUF, M_NOWAIT);
+ KASSERT(info != NULL,
+ ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!"));
+ if (info == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ sema_init(&info->wait_sema, 0, "Open Info Sema");
+
+ msg = (hv_vmbus_channel_gpadl_teardown*) info->msg;
+
+ msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN;
+ msg->child_rel_id = channel->offer_msg.child_rel_id;
+ msg->gpadl = gpadl_handle;
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
+ info, msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(msg,
+ sizeof(hv_vmbus_channel_gpadl_teardown));
+ if (ret != 0)
+ goto cleanup;
+
+ ret = sema_timedwait(&info->wait_sema, 500); /* KYS 5 seconds */
+
+cleanup:
+ /*
+ * Received a torndown response
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
+ info, msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ sema_destroy(&info->wait_sema);
+ free(info, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * @brief Close the specified channel
+ */
+void
+hv_vmbus_channel_close(hv_vmbus_channel *channel)
+{
+ int ret = 0;
+ hv_vmbus_channel_close_channel* msg;
+ hv_vmbus_channel_msg_info* info;
+
+ mtx_lock(&channel->inbound_lock);
+ channel->on_channel_callback = NULL;
+ mtx_unlock(&channel->inbound_lock);
+
+ /**
+ * Send a closing message
+ */
+ info = (hv_vmbus_channel_msg_info *)
+ malloc( sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_close_channel),
+ M_DEVBUF, M_NOWAIT);
+ KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!"));
+ if(info == NULL)
+ return;
+
+ msg = (hv_vmbus_channel_close_channel*) info->msg;
+ msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL;
+ msg->child_rel_id = channel->offer_msg.child_rel_id;
+
+ ret = hv_vmbus_post_message(
+ msg, sizeof(hv_vmbus_channel_close_channel));
+
+ /* Tear down the gpadl for the channel's ring buffer */
+ if (channel->ring_buffer_gpadl_handle) {
+ hv_vmbus_channel_teardown_gpdal(channel,
+ channel->ring_buffer_gpadl_handle);
+ }
+
+ /* TODO: Send a msg to release the childRelId */
+
+ /* cleanup the ring buffers for this channel */
+ hv_ring_buffer_cleanup(&channel->outbound);
+ hv_ring_buffer_cleanup(&channel->inbound);
+
+ contigfree(
+ channel->ring_buffer_pages,
+ channel->ring_buffer_page_count,
+ M_DEVBUF);
+
+ free(info, M_DEVBUF);
+
+ /*
+ * If we are closing the channel during an error path in
+ * opening the channel, don't free the channel
+ * since the caller will free the channel
+ */
+ if (channel->state == HV_CHANNEL_OPEN_STATE) {
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_anchor,
+ channel,
+ list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ hv_vmbus_free_vmbus_channel(channel);
+ }
+
+}
+
+/**
+ * @brief Send the specified buffer on the given channel
+ */
+int
+hv_vmbus_channel_send_packet(
+ hv_vmbus_channel* channel,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id,
+ hv_vmbus_packet_type type,
+ uint32_t flags)
+{
+ int ret = 0;
+ hv_vm_packet_descriptor desc;
+ uint32_t packet_len;
+ uint64_t aligned_data;
+ uint32_t packet_len_aligned;
+ hv_vmbus_sg_buffer_list buffer_list[3];
+
+ packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
+ packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+ aligned_data = 0;
+
+ /* Setup the descriptor */
+ desc.type = type; /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND; */
+ desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
+ /* in 8-bytes granularity */
+ desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
+ desc.length8 = (uint16_t) (packet_len_aligned >> 3);
+ desc.transaction_id = request_id;
+
+ buffer_list[0].data = &desc;
+ buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
+
+ buffer_list[1].data = buffer;
+ buffer_list[1].length = buffer_len;
+
+ buffer_list[2].data = &aligned_data;
+ buffer_list[2].length = packet_len_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+
+ /* TODO: We should determine if this is optional */
+ if (ret == 0
+ && !hv_vmbus_get_ring_buffer_interrupt_mask(
+ &channel->outbound)) {
+ vmbus_channel_set_event(channel);
+ }
+
+ return (ret);
+}
+
+/**
+ * @brief Send a range of single-page buffer packets using
+ * a GPADL Direct packet type
+ */
+int
+hv_vmbus_channel_send_packet_pagebuffer(
+ hv_vmbus_channel* channel,
+ hv_vmbus_page_buffer page_buffers[],
+ uint32_t page_count,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id)
+{
+
+ int ret = 0;
+ int i = 0;
+ uint32_t packet_len;
+ uint32_t packetLen_aligned;
+ hv_vmbus_sg_buffer_list buffer_list[3];
+ hv_vmbus_channel_packet_page_buffer desc;
+ uint32_t descSize;
+ uint64_t alignedData = 0;
+
+ if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
+ return (EINVAL);
+
+ /*
+ * Adjust the size down since hv_vmbus_channel_packet_page_buffer
+ * is the largest size we support
+ */
+ descSize = sizeof(hv_vmbus_channel_packet_page_buffer) -
+ ((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
+ sizeof(hv_vmbus_page_buffer));
+ packet_len = descSize + buffer_len;
+ packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+
+ /* Setup the descriptor */
+ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
+ desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+ desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
+ desc.length8 = (uint16_t) (packetLen_aligned >> 3);
+ desc.transaction_id = request_id;
+ desc.range_count = page_count;
+
+ for (i = 0; i < page_count; i++) {
+ desc.range[i].length = page_buffers[i].length;
+ desc.range[i].offset = page_buffers[i].offset;
+ desc.range[i].pfn = page_buffers[i].pfn;
+ }
+
+ buffer_list[0].data = &desc;
+ buffer_list[0].length = descSize;
+
+ buffer_list[1].data = buffer;
+ buffer_list[1].length = buffer_len;
+
+ buffer_list[2].data = &alignedData;
+ buffer_list[2].length = packetLen_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+
+ /* TODO: We should determine if this is optional */
+ if (ret == 0 &&
+ !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+ vmbus_channel_set_event(channel);
+ }
+
+ return (ret);
+}
+
+/**
+ * @brief Send a multi-page buffer packet using a GPADL Direct packet type
+ */
+int
+hv_vmbus_channel_send_packet_multipagebuffer(
+ hv_vmbus_channel* channel,
+ hv_vmbus_multipage_buffer* multi_page_buffer,
+ void* buffer,
+ uint32_t buffer_len,
+ uint64_t request_id)
+{
+
+ int ret = 0;
+ uint32_t desc_size;
+ uint32_t packet_len;
+ uint32_t packet_len_aligned;
+ uint32_t pfn_count;
+ uint64_t aligned_data = 0;
+ hv_vmbus_sg_buffer_list buffer_list[3];
+ hv_vmbus_channel_packet_multipage_buffer desc;
+
+ pfn_count =
+ HV_NUM_PAGES_SPANNED(
+ multi_page_buffer->offset,
+ multi_page_buffer->length);
+
+ if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
+ return (EINVAL);
+ /*
+ * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
+ * is the largest size we support
+ */
+ desc_size =
+ sizeof(hv_vmbus_channel_packet_multipage_buffer) -
+ ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
+ sizeof(uint64_t));
+ packet_len = desc_size + buffer_len;
+ packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+
+ /*
+ * Setup the descriptor
+ */
+ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
+ desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+ desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
+ desc.length8 = (uint16_t) (packet_len_aligned >> 3);
+ desc.transaction_id = request_id;
+ desc.range_count = 1;
+
+ desc.range.length = multi_page_buffer->length;
+ desc.range.offset = multi_page_buffer->offset;
+
+ memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
+ pfn_count * sizeof(uint64_t));
+
+ buffer_list[0].data = &desc;
+ buffer_list[0].length = desc_size;
+
+ buffer_list[1].data = buffer;
+ buffer_list[1].length = buffer_len;
+
+ buffer_list[2].data = &aligned_data;
+ buffer_list[2].length = packet_len_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+
+ /* TODO: We should determine if this is optional */
+ if (ret == 0 &&
+ !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+ vmbus_channel_set_event(channel);
+ }
+
+ return (ret);
+}
+
+/**
+ * @brief Retrieve the user packet on the specified channel
+ */
+int
+hv_vmbus_channel_recv_packet(
+ hv_vmbus_channel* channel,
+ void* Buffer,
+ uint32_t buffer_len,
+ uint32_t* buffer_actual_len,
+ uint64_t* request_id)
+{
+ int ret;
+ uint32_t user_len;
+ uint32_t packet_len;
+ hv_vm_packet_descriptor desc;
+
+ *buffer_actual_len = 0;
+ *request_id = 0;
+
+ ret = hv_ring_buffer_peek(&channel->inbound, &desc,
+ sizeof(hv_vm_packet_descriptor));
+ if (ret != 0)
+ return (0);
+
+ packet_len = desc.length8 << 3;
+ user_len = packet_len - (desc.data_offset8 << 3);
+
+ *buffer_actual_len = user_len;
+
+ if (user_len > buffer_len)
+ return (EINVAL);
+
+ *request_id = desc.transaction_id;
+
+ /* Copy over the packet to the user buffer */
+ ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
+ (desc.data_offset8 << 3));
+
+ return (0);
+}
+
+/**
+ * @brief Retrieve the raw packet on the specified channel
+ */
+int
+hv_vmbus_channel_recv_packet_raw(
+ hv_vmbus_channel* channel,
+ void* buffer,
+ uint32_t buffer_len,
+ uint32_t* buffer_actual_len,
+ uint64_t* request_id)
+{
+ int ret;
+ uint32_t packetLen;
+ uint32_t userLen;
+ hv_vm_packet_descriptor desc;
+
+ *buffer_actual_len = 0;
+ *request_id = 0;
+
+ ret = hv_ring_buffer_peek(
+ &channel->inbound, &desc,
+ sizeof(hv_vm_packet_descriptor));
+
+ if (ret != 0)
+ return (0);
+
+ packetLen = desc.length8 << 3;
+ userLen = packetLen - (desc.data_offset8 << 3);
+
+ *buffer_actual_len = packetLen;
+
+ if (packetLen > buffer_len)
+ return (ENOBUFS);
+
+ *request_id = desc.transaction_id;
+
+ /* Copy over the entire packet to the user buffer */
+ ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
+
+ return (0);
+}
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
new file mode 100644
index 000000000000..011e305709e6
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -0,0 +1,680 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#include "hv_vmbus_priv.h"
+
+typedef void (*hv_pfn_channel_msg_handler)(hv_vmbus_channel_msg_header* msg);
+
+typedef struct hv_vmbus_channel_msg_table_entry {
+ hv_vmbus_channel_msg_type messageType;
+ hv_pfn_channel_msg_handler messageHandler;
+} hv_vmbus_channel_msg_table_entry;
+
+/*
+ * Internal functions
+ */
+
+static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_process_offer(void *context);
+
+/**
+ * Channel message dispatch table
+ */
+hv_vmbus_channel_msg_table_entry
+ g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
+ { HV_CHANNEL_MESSAGE_INVALID, NULL },
+ { HV_CHANNEL_MESSAGE_OFFER_CHANNEL, vmbus_channel_on_offer },
+ { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
+ vmbus_channel_on_offer_rescind },
+ { HV_CHANNEL_MESSAGE_REQUEST_OFFERS, NULL },
+ { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
+ vmbus_channel_on_offers_delivered },
+ { HV_CHANNEL_MESSAGE_OPEN_CHANNEL, NULL },
+ { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
+ vmbus_channel_on_open_result },
+ { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, NULL },
+ { HV_CHANNEL_MESSAGEL_GPADL_HEADER, NULL },
+ { HV_CHANNEL_MESSAGE_GPADL_BODY, NULL },
+ { HV_CHANNEL_MESSAGE_GPADL_CREATED,
+ vmbus_channel_on_gpadl_created },
+ { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, NULL },
+ { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
+ vmbus_channel_on_gpadl_torndown },
+ { HV_CHANNEL_MESSAGE_REL_ID_RELEASED, NULL },
+ { HV_CHANNEL_MESSAGE_INITIATED_CONTACT, NULL },
+ { HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
+ vmbus_channel_on_version_response },
+ { HV_CHANNEL_MESSAGE_UNLOAD, NULL }
+};
+
+
+/**
+ * Implementation of the work abstraction.
+ */
+static void
+work_item_callback(void *work, int pending)
+{
+ struct hv_work_item *w = (struct hv_work_item *)work;
+
+ /*
+ * Serialize work execution.
+ */
+ if (w->wq->work_sema != NULL) {
+ sema_wait(w->wq->work_sema);
+ }
+
+ w->callback(w->context);
+
+ if (w->wq->work_sema != NULL) {
+ sema_post(w->wq->work_sema);
+ }
+
+ free(w, M_DEVBUF);
+}
+
+struct hv_work_queue*
+hv_work_queue_create(char* name)
+{
+ static unsigned int qid = 0;
+ char qname[64];
+ int pri;
+ struct hv_work_queue* wq;
+
+ wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n"));
+ if (wq == NULL)
+ return (NULL);
+
+ /*
+ * We use work abstraction to handle messages
+ * coming from the host and these are typically offers.
+ * Some FreeBsd drivers appear to have a concurrency issue
+ * where probe/attach needs to be serialized. We ensure that
+ * by having only one thread process work elements in a
+ * specific queue by serializing work execution.
+ *
+ */
+ if (strcmp(name, "vmbusQ") == 0) {
+ pri = PI_DISK;
+ } else { /* control */
+ pri = PI_NET;
+ /*
+ * Initialize semaphore for this queue by pointing
+ * to the globale semaphore used for synchronizing all
+ * control messages.
+ */
+ wq->work_sema = &hv_vmbus_g_connection.control_sema;
+ }
+
+ sprintf(qname, "hv_%s_%u", name, qid);
+
+ /*
+ * Fixme: FreeBSD 8.2 has a different prototype for
+ * taskqueue_create(), and for certain other taskqueue functions.
+ * We need to research the implications of these changes.
+ * Fixme: Not sure when the changes were introduced.
+ */
+ wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue,
+ &wq->queue
+ #if __FreeBSD_version < 800000
+ , &wq->proc
+ #endif
+ );
+
+ if (wq->queue == NULL) {
+ free(wq, M_DEVBUF);
+ return (NULL);
+ }
+
+ if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) {
+ taskqueue_free(wq->queue);
+ free(wq, M_DEVBUF);
+ return (NULL);
+ }
+
+ qid++;
+
+ return (wq);
+}
+
+void
+hv_work_queue_close(struct hv_work_queue *wq)
+{
+ /*
+ * KYS: Need to drain the taskqueue
+ * before we close the hv_work_queue.
+ */
+ /*KYS: taskqueue_drain(wq->tq, ); */
+ taskqueue_free(wq->queue);
+ free(wq, M_DEVBUF);
+}
+
+/**
+ * @brief Create work item
+ */
+int
+hv_queue_work_item(
+ struct hv_work_queue *wq,
+ void (*callback)(void *), void *context)
+{
+ struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
+ if (w == NULL)
+ return (ENOMEM);
+
+ w->callback = callback;
+ w->context = context;
+ w->wq = wq;
+
+ TASK_INIT(&w->work, 0, work_item_callback, w);
+
+ return (taskqueue_enqueue(wq->queue, &w->work));
+}
+
+/**
+ * @brief Rescind the offer by initiating a device removal
+ */
+static void
+vmbus_channel_process_rescind_offer(void *context)
+{
+ hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
+ hv_vmbus_child_device_unregister(channel->device);
+}
+
+/**
+ * @brief Allocate and initialize a vmbus channel object
+ */
+hv_vmbus_channel*
+hv_vmbus_allocate_channel(void)
+{
+ hv_vmbus_channel* channel;
+
+ channel = (hv_vmbus_channel*) malloc(
+ sizeof(hv_vmbus_channel),
+ M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!"));
+ if (channel == NULL)
+ return (NULL);
+
+ mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
+
+ channel->control_work_queue = hv_work_queue_create("control");
+
+ if (channel->control_work_queue == NULL) {
+ mtx_destroy(&channel->inbound_lock);
+ free(channel, M_DEVBUF);
+ return (NULL);
+ }
+
+ return (channel);
+}
+
+/**
+ * @brief Release the vmbus channel object itself
+ */
+static inline void
+ReleaseVmbusChannel(void *context)
+{
+ hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
+ hv_work_queue_close(channel->control_work_queue);
+ free(channel, M_DEVBUF);
+}
+
+/**
+ * @brief Release the resources used by the vmbus channel object
+ */
+void
+hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
+{
+ mtx_destroy(&channel->inbound_lock);
+ /*
+ * We have to release the channel's workqueue/thread in
+ * the vmbus's workqueue/thread context
+ * ie we can't destroy ourselves
+ */
+ hv_queue_work_item(hv_vmbus_g_connection.work_queue,
+ ReleaseVmbusChannel, (void *) channel);
+}
+
+/**
+ * @brief Process the offer by creating a channel/device
+ * associated with this offer
+ */
+static void
+vmbus_channel_process_offer(void *context)
+{
+ int ret;
+ hv_vmbus_channel* new_channel;
+ boolean_t f_new;
+ hv_vmbus_channel* channel;
+
+ new_channel = (hv_vmbus_channel*) context;
+ f_new = TRUE;
+ channel = NULL;
+
+ /*
+ * Make sure this is a new offer
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
+ list_entry)
+ {
+ if (!memcmp(
+ &channel->offer_msg.offer.interface_type,
+ &new_channel->offer_msg.offer.interface_type,
+ sizeof(hv_guid))
+ && !memcmp(
+ &channel->offer_msg.offer.interface_instance,
+ &new_channel->offer_msg.offer.interface_instance,
+ sizeof(hv_guid))) {
+ f_new = FALSE;
+ break;
+ }
+ }
+
+ if (f_new) {
+ /* Insert at tail */
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_anchor,
+ new_channel,
+ list_entry);
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ if (!f_new) {
+ hv_vmbus_free_vmbus_channel(new_channel);
+ return;
+ }
+
+ /*
+ * Start the process of binding this offer to the driver
+ * (We need to set the device field before calling
+ * hv_vmbus_child_device_add())
+ */
+ new_channel->device = hv_vmbus_child_device_create(
+ new_channel->offer_msg.offer.interface_type,
+ new_channel->offer_msg.offer.interface_instance, new_channel);
+
+ /*
+ * TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
+ * but in the "open" channel request. The ret != 0 logic below
+ * doesn't take into account that a channel
+ * may have been opened successfully
+ */
+
+ /*
+ * Add the new device to the bus. This will kick off device-driver
+ * binding which eventually invokes the device driver's AddDevice()
+ * method.
+ */
+ ret = hv_vmbus_child_device_register(new_channel->device);
+ if (ret != 0) {
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_anchor,
+ new_channel,
+ list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+ hv_vmbus_free_vmbus_channel(new_channel);
+ } else {
+ /*
+ * This state is used to indicate a successful open
+ * so that when we do close the channel normally,
+ * we can clean up properly
+ */
+ new_channel->state = HV_CHANNEL_OPEN_STATE;
+
+ }
+}
+
+/**
+ * @brief Handler for channel offers from Hyper-V/Azure
+ *
+ * Handler for channel offers from vmbus in parent partition. We ignore
+ * all offers except network and storage offers. For each network and storage
+ * offers, we create a channel object and queue a work item to the channel
+ * object to process the offer synchronously
+ */
+static void
+vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_offer_channel* offer;
+ hv_vmbus_channel* new_channel;
+
+ offer = (hv_vmbus_channel_offer_channel*) hdr;
+
+ hv_guid *guidType;
+ hv_guid *guidInstance;
+
+ guidType = &offer->offer.interface_type;
+ guidInstance = &offer->offer.interface_instance;
+
+ /* Allocate the channel object and save this offer */
+ new_channel = hv_vmbus_allocate_channel();
+ if (new_channel == NULL)
+ return;
+
+ memcpy(&new_channel->offer_msg, offer,
+ sizeof(hv_vmbus_channel_offer_channel));
+ new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
+ new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
+
+ /* TODO: Make sure the offer comes from our parent partition */
+ hv_queue_work_item(
+ new_channel->control_work_queue,
+ vmbus_channel_process_offer,
+ new_channel);
+}
+
+/**
+ * @brief Rescind offer handler.
+ *
+ * We queue a work item to process this offer
+ * synchronously
+ */
+static void
+vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_rescind_offer* rescind;
+ hv_vmbus_channel* channel;
+
+ rescind = (hv_vmbus_channel_rescind_offer*) hdr;
+
+ channel = hv_vmbus_get_channel_from_rel_id(rescind->child_rel_id);
+ if (channel == NULL)
+ return;
+
+ hv_queue_work_item(channel->control_work_queue,
+ vmbus_channel_process_rescind_offer, channel);
+}
+
+/**
+ *
+ * @brief Invoked when all offers have been delivered.
+ */
+static void
+vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
+{
+}
+
+/**
+ * @brief Open result handler.
+ *
+ * This is invoked when we received a response
+ * to our channel open request. Find the matching request, copy the
+ * response and signal the requesting thread.
+ */
+static void
+vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_open_result* result;
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_header* requestHeader;
+ hv_vmbus_channel_open_channel* openMsg;
+
+ result = (hv_vmbus_channel_open_result*) hdr;
+
+ /*
+ * Find the open msg, copy the result and signal/unblock the wait event
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_list_entry) {
+ requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
+
+ if (requestHeader->message_type ==
+ HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
+ openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
+ if (openMsg->child_rel_id == result->child_rel_id
+ && openMsg->open_id == result->open_id) {
+ memcpy(&msg_info->response.open_result, result,
+ sizeof(hv_vmbus_channel_open_result));
+ sema_post(&msg_info->wait_sema);
+ break;
+ }
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+}
+
+/**
+ * @brief GPADL created handler.
+ *
+ * This is invoked when we received a response
+ * to our gpadl create request. Find the matching request, copy the
+ * response and signal the requesting thread.
+ */
+static void
+vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_gpadl_created* gpadl_created;
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_header* request_header;
+ hv_vmbus_channel_gpadl_header* gpadl_header;
+
+ gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
+
+ /* Find the establish msg, copy the result and signal/unblock
+ * the wait event
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_list_entry) {
+ request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
+ if (request_header->message_type ==
+ HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
+ gpadl_header =
+ (hv_vmbus_channel_gpadl_header*) request_header;
+
+ if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
+ && (gpadl_created->gpadl == gpadl_header->gpadl)) {
+ memcpy(&msg_info->response.gpadl_created,
+ gpadl_created,
+ sizeof(hv_vmbus_channel_gpadl_created));
+ sema_post(&msg_info->wait_sema);
+ break;
+ }
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+}
+
+/**
+ * @brief GPADL torndown handler.
+ *
+ * This is invoked when we received a respons
+ * to our gpadl teardown request. Find the matching request, copy the
+ * response and signal the requesting thread
+ */
+static void
+vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_gpadl_torndown* gpadl_torndown;
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_header* requestHeader;
+ hv_vmbus_channel_gpadl_teardown* gpadlTeardown;
+
+ gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
+
+ /*
+ * Find the open msg, copy the result and signal/unblock the
+ * wait event.
+ */
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_list_entry) {
+ requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
+
+ if (requestHeader->message_type
+ == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
+ gpadlTeardown =
+ (hv_vmbus_channel_gpadl_teardown*) requestHeader;
+
+ if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
+ memcpy(&msg_info->response.gpadl_torndown,
+ gpadl_torndown,
+ sizeof(hv_vmbus_channel_gpadl_torndown));
+ sema_post(&msg_info->wait_sema);
+ break;
+ }
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+}
+
+/**
+ * @brief Version response handler.
+ *
+ * This is invoked when we received a response
+ * to our initiate contact request. Find the matching request, copy th
+ * response and signal the requesting thread.
+ */
+static void
+vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
+{
+ hv_vmbus_channel_msg_info* msg_info;
+ hv_vmbus_channel_msg_header* requestHeader;
+ hv_vmbus_channel_initiate_contact* initiate;
+ hv_vmbus_channel_version_response* versionResponse;
+
+ versionResponse = (hv_vmbus_channel_version_response*)hdr;
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_list_entry) {
+ requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
+ if (requestHeader->message_type
+ == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
+ initiate =
+ (hv_vmbus_channel_initiate_contact*) requestHeader;
+ memcpy(&msg_info->response.version_response,
+ versionResponse,
+ sizeof(hv_vmbus_channel_version_response));
+ sema_post(&msg_info->wait_sema);
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+}
+
+/**
+ * @brief Handler for channel protocol messages.
+ *
+ * This is invoked in the vmbus worker thread context.
+ */
+void
+hv_vmbus_on_channel_message(void *context)
+{
+ hv_vmbus_message* msg;
+ hv_vmbus_channel_msg_header* hdr;
+ int size;
+
+ msg = (hv_vmbus_message*) context;
+ hdr = (hv_vmbus_channel_msg_header*) msg->u.payload;
+ size = msg->header.payload_size;
+
+ if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) {
+ free(msg, M_DEVBUF);
+ return;
+ }
+
+ if (g_channel_message_table[hdr->message_type].messageHandler) {
+ g_channel_message_table[hdr->message_type].messageHandler(hdr);
+ }
+
+ /* Free the msg that was allocated in VmbusOnMsgDPC() */
+ free(msg, M_DEVBUF);
+}
+
+/**
+ * @brief Send a request to get all our pending offers.
+ */
+int
+hv_vmbus_request_channel_offers(void)
+{
+ int ret;
+ hv_vmbus_channel_msg_header* msg;
+ hv_vmbus_channel_msg_info* msg_info;
+
+ msg_info = (hv_vmbus_channel_msg_info *)
+ malloc(sizeof(hv_vmbus_channel_msg_info)
+ + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
+
+ if (msg_info == NULL) {
+ if(bootverbose)
+ printf("Error VMBUS: malloc failed for Request Offers\n");
+ return (ENOMEM);
+ }
+
+ msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
+ msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
+
+ ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
+
+ if (msg_info)
+ free(msg_info, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
+ */
+void
+hv_vmbus_release_unattached_channels(void)
+{
+ hv_vmbus_channel *channel;
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
+ channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
+ TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
+ channel, list_entry);
+
+ hv_vmbus_child_device_unregister(channel->device);
+ hv_vmbus_free_vmbus_channel(channel);
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+}
diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c
new file mode 100644
index 000000000000..c8e0b48ac65c
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_connection.c
@@ -0,0 +1,431 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <machine/bus.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include "hv_vmbus_priv.h"
+
+/*
+ * Globals
+ */
+hv_vmbus_connection hv_vmbus_g_connection =
+ { .connect_state = HV_DISCONNECTED,
+ .next_gpadl_handle = 0xE1E10, };
+
+/**
+ * Send a connect request on the partition service connection
+ */
+int
+hv_vmbus_connect(void) {
+ int ret = 0;
+ hv_vmbus_channel_msg_info* msg_info = NULL;
+ hv_vmbus_channel_initiate_contact* msg;
+
+ /**
+ * Make sure we are not connecting or connected
+ */
+ if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
+ return (-1);
+ }
+
+ /**
+ * Initialize the vmbus connection
+ */
+ hv_vmbus_g_connection.connect_state = HV_CONNECTING;
+ hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ");
+ sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema");
+
+ TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
+ mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
+ NULL, MTX_SPIN);
+
+ TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
+ mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
+ NULL, MTX_SPIN);
+
+ /**
+ * Setup the vmbus event connection for channel interrupt abstraction
+ * stuff
+ */
+ hv_vmbus_g_connection.interrupt_page = contigmalloc(
+ PAGE_SIZE, M_DEVBUF,
+ M_NOWAIT | M_ZERO, 0UL,
+ BUS_SPACE_MAXADDR,
+ PAGE_SIZE, 0);
+ KASSERT(hv_vmbus_g_connection.interrupt_page != NULL,
+ ("Error VMBUS: malloc failed to allocate Channel"
+ " Request Event message!"));
+ if (hv_vmbus_g_connection.interrupt_page == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ hv_vmbus_g_connection.recv_interrupt_page =
+ hv_vmbus_g_connection.interrupt_page;
+
+ hv_vmbus_g_connection.send_interrupt_page =
+ ((uint8_t *) hv_vmbus_g_connection.interrupt_page +
+ (PAGE_SIZE >> 1));
+
+ /**
+ * Set up the monitor notification facility. The 1st page for
+ * parent->child and the 2nd page for child->parent
+ */
+ hv_vmbus_g_connection.monitor_pages = contigmalloc(
+ 2 * PAGE_SIZE,
+ M_DEVBUF,
+ M_NOWAIT | M_ZERO,
+ 0UL,
+ BUS_SPACE_MAXADDR,
+ PAGE_SIZE,
+ 0);
+ KASSERT(hv_vmbus_g_connection.monitor_pages != NULL,
+ ("Error VMBUS: malloc failed to allocate Monitor Pages!"));
+ if (hv_vmbus_g_connection.monitor_pages == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ msg_info = (hv_vmbus_channel_msg_info*)
+ malloc(sizeof(hv_vmbus_channel_msg_info) +
+ sizeof(hv_vmbus_channel_initiate_contact),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(msg_info != NULL,
+ ("Error VMBUS: malloc failed for Initiate Contact message!"));
+ if (msg_info == NULL) {
+ ret = ENOMEM;
+ goto cleanup;
+ }
+
+ sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
+ msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
+
+ msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
+ msg->vmbus_version_requested = HV_VMBUS_REVISION_NUMBER;
+
+ msg->interrupt_page = hv_get_phys_addr(
+ hv_vmbus_g_connection.interrupt_page);
+
+ msg->monitor_page_1 = hv_get_phys_addr(
+ hv_vmbus_g_connection.monitor_pages);
+
+ msg->monitor_page_2 =
+ hv_get_phys_addr(
+ ((uint8_t *) hv_vmbus_g_connection.monitor_pages
+ + PAGE_SIZE));
+
+ /**
+ * Add to list before we send the request since we may receive the
+ * response before returning from this routine
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ TAILQ_INSERT_TAIL(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ ret = hv_vmbus_post_message(
+ msg,
+ sizeof(hv_vmbus_channel_initiate_contact));
+
+ if (ret != 0) {
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ goto cleanup;
+ }
+
+ /**
+ * Wait for the connection response
+ */
+ ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
+
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ TAILQ_REMOVE(
+ &hv_vmbus_g_connection.channel_msg_anchor,
+ msg_info,
+ msg_list_entry);
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+ /**
+ * Check if successful
+ */
+ if (msg_info->response.version_response.version_supported) {
+ hv_vmbus_g_connection.connect_state = HV_CONNECTED;
+ } else {
+ ret = ECONNREFUSED;
+ goto cleanup;
+ }
+
+ sema_destroy(&msg_info->wait_sema);
+ free(msg_info, M_DEVBUF);
+
+ return (0);
+
+ /*
+ * Cleanup after failure!
+ */
+ cleanup:
+
+ hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
+
+ hv_work_queue_close(hv_vmbus_g_connection.work_queue);
+ sema_destroy(&hv_vmbus_g_connection.control_sema);
+ mtx_destroy(&hv_vmbus_g_connection.channel_lock);
+ mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
+
+ if (hv_vmbus_g_connection.interrupt_page != NULL) {
+ contigfree(
+ hv_vmbus_g_connection.interrupt_page,
+ PAGE_SIZE,
+ M_DEVBUF);
+ hv_vmbus_g_connection.interrupt_page = NULL;
+ }
+
+ if (hv_vmbus_g_connection.monitor_pages != NULL) {
+ contigfree(
+ hv_vmbus_g_connection.monitor_pages,
+ 2 * PAGE_SIZE,
+ M_DEVBUF);
+ hv_vmbus_g_connection.monitor_pages = NULL;
+ }
+
+ if (msg_info) {
+ sema_destroy(&msg_info->wait_sema);
+ free(msg_info, M_DEVBUF);
+ }
+
+ return (ret);
+}
+
+/**
+ * Send a disconnect request on the partition service connection
+ */
+int
+hv_vmbus_disconnect(void) {
+ int ret = 0;
+ hv_vmbus_channel_unload* msg;
+
+ msg = malloc(sizeof(hv_vmbus_channel_unload),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(msg != NULL,
+ ("Error VMBUS: malloc failed to allocate Channel Unload Msg!"));
+ if (msg == NULL)
+ return (ENOMEM);
+
+ msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD;
+
+ ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload));
+
+
+ contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF);
+
+ mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
+
+ hv_work_queue_close(hv_vmbus_g_connection.work_queue);
+ sema_destroy(&hv_vmbus_g_connection.control_sema);
+
+ hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
+
+ free(msg, M_DEVBUF);
+
+ return (ret);
+}
+
+/**
+ * Get the channel object given its child relative id (ie channel id)
+ */
+hv_vmbus_channel*
+hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
+
+ hv_vmbus_channel* channel;
+ hv_vmbus_channel* foundChannel = NULL;
+
+ /*
+ * TODO:
+ * Consider optimization where relids are stored in a fixed size array
+ * and channels are accessed without the need to take this lock or search
+ * the list.
+ */
+ mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+ TAILQ_FOREACH(channel,
+ &hv_vmbus_g_connection.channel_anchor, list_entry) {
+
+ if (channel->offer_msg.child_rel_id == rel_id) {
+ foundChannel = channel;
+ break;
+ }
+ }
+ mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+
+ return (foundChannel);
+}
+
+/**
+ * Process a channel event notification
+ */
+static void
+VmbusProcessChannelEvent(uint32_t relid)
+{
+ hv_vmbus_channel* channel;
+
+ /**
+ * Find the channel based on this relid and invokes
+ * the channel callback to process the event
+ */
+
+ channel = hv_vmbus_get_channel_from_rel_id(relid);
+
+ if (channel == NULL) {
+ return;
+ }
+ /**
+ * To deal with the race condition where we might
+ * receive a packet while the relevant driver is
+ * being unloaded, dispatch the callback while
+ * holding the channel lock. The unloading driver
+ * will acquire the same channel lock to set the
+ * callback to NULL. This closes the window.
+ */
+
+ mtx_lock(&channel->inbound_lock);
+ if (channel->on_channel_callback != NULL) {
+ channel->on_channel_callback(channel->channel_callback_context);
+ }
+ mtx_unlock(&channel->inbound_lock);
+}
+
+/**
+ * Handler for events
+ */
+void
+hv_vmbus_on_events(void *arg)
+{
+ int dword;
+ int bit;
+ int rel_id;
+ int maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
+ /* int maxdword = PAGE_SIZE >> 3; */
+
+ /*
+ * receive size is 1/2 page and divide that by 4 bytes
+ */
+
+ uint32_t* recv_interrupt_page =
+ hv_vmbus_g_connection.recv_interrupt_page;
+
+ /*
+ * Check events
+ */
+ if (recv_interrupt_page != NULL) {
+ for (dword = 0; dword < maxdword; dword++) {
+ if (recv_interrupt_page[dword]) {
+ for (bit = 0; bit < 32; bit++) {
+ if (synch_test_and_clear_bit(bit,
+ (uint32_t *) &recv_interrupt_page[dword])) {
+ rel_id = (dword << 5) + bit;
+ if (rel_id == 0) {
+ /*
+ * Special case -
+ * vmbus channel protocol msg.
+ */
+ continue;
+ } else {
+ VmbusProcessChannelEvent(rel_id);
+
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return;
+}
+
+/**
+ * Send a msg on the vmbus's message connection
+ */
+int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
+ int ret = 0;
+ hv_vmbus_connection_id connId;
+ unsigned retries = 0;
+
+ /* NetScaler delays from previous code were consolidated here */
+ static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000};
+
+ /* for(each entry in delayAmount) try to post message,
+ * delay a little bit before retrying
+ */
+ for (retries = 0;
+ retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) {
+ connId.as_uint32_t = 0;
+ connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
+ ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen);
+ if (ret != HV_STATUS_INSUFFICIENT_BUFFERS)
+ break;
+ /* TODO: KYS We should use a blocking wait call */
+ DELAY(delayAmount[retries]);
+ }
+
+ KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n"));
+
+ return (ret);
+}
+
+/**
+ * Send an event notification to the parent
+ */
+int
+hv_vmbus_set_event(uint32_t child_rel_id) {
+ int ret = 0;
+
+ /* Each uint32_t represents 32 channels */
+
+ synch_set_bit(child_rel_id & 31,
+ (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
+ + (child_rel_id >> 5))));
+ ret = hv_vmbus_signal_event();
+
+ return (ret);
+}
+
diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c
new file mode 100644
index 000000000000..0e73bdc4bf4e
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_hv.c
@@ -0,0 +1,515 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Implements low-level interactions with Hypver-V/Azure
+ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/pcpu.h>
+#include <sys/timetc.h>
+#include <machine/bus.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+
+#include "hv_vmbus_priv.h"
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_CPUID_MIN 0x40000005
+#define HV_X64_CPUID_MAX 0x4000ffff
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+
+#define HV_NANOSECONDS_PER_SEC 1000000000L
+
+
+static u_int hv_get_timecount(struct timecounter *tc);
+static u_int hv_get_timecount(struct timecounter *tc);
+
+static inline void do_cpuid_inline(unsigned int op, unsigned int *eax,
+ unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
+ __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx),
+ "=d" (*edx) : "0" (op), "c" (ecx));
+}
+
+/**
+ * Globals
+ */
+hv_vmbus_context hv_vmbus_g_context = {
+ .syn_ic_initialized = FALSE,
+ .hypercall_page = NULL,
+ .signal_event_param = NULL,
+ .signal_event_buffer = NULL,
+};
+
+static struct timecounter hv_timecounter = {
+ hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100
+};
+
+static u_int
+hv_get_timecount(struct timecounter *tc)
+{
+ u_int now = hv_vmbus_read_msr(HV_X64_MSR_TIME_REF_COUNT);
+ return (now);
+}
+
+/**
+ * @brief Query the cpuid for presence of windows hypervisor
+ */
+int
+hv_vmbus_query_hypervisor_presence(void)
+{
+ u_int regs[4];
+ int hyper_v_detected = 0;
+ do_cpuid(1, regs);
+ if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */
+ /* make sure this really is Hyper-V */
+ /* we look at the CPUID info */
+ do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs);
+ hyper_v_detected =
+ regs[0] >= HV_X64_CPUID_MIN &&
+ regs[0] <= HV_X64_CPUID_MAX &&
+ !memcmp("Microsoft Hv", &regs[1], 12);
+ }
+ return (hyper_v_detected);
+}
+
+/**
+ * @brief Get version of the windows hypervisor
+ */
+static int
+hv_vmbus_get_hypervisor_version(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ unsigned int maxLeaf;
+ unsigned int op;
+
+ /*
+ * Its assumed that this is called after confirming that
+ * Viridian is present
+ * Query id and revision.
+ */
+ eax = 0;
+ ebx = 0;
+ ecx = 0;
+ edx = 0;
+ op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
+ do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
+
+ maxLeaf = eax;
+ eax = 0;
+ ebx = 0;
+ ecx = 0;
+ edx = 0;
+ op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
+ do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
+
+ if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) {
+ eax = 0;
+ ebx = 0;
+ ecx = 0;
+ edx = 0;
+ op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
+ do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
+ }
+ return (maxLeaf);
+}
+
+/**
+ * @brief Invoke the specified hypercall
+ */
+static uint64_t
+hv_vmbus_do_hypercall(uint64_t control, void* input, void* output)
+{
+#ifdef __x86_64__
+ uint64_t hv_status = 0;
+ uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
+ uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
+ volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
+
+ __asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8");
+ __asm__ __volatile__ ("call *%3" : "=a"(hv_status):
+ "c" (control), "d" (input_address),
+ "m" (hypercall_page));
+ return (hv_status);
+#else
+ uint32_t control_high = control >> 32;
+ uint32_t control_low = control & 0xFFFFFFFF;
+ uint32_t hv_status_high = 1;
+ uint32_t hv_status_low = 1;
+ uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
+ uint32_t input_address_high = input_address >> 32;
+ uint32_t input_address_low = input_address & 0xFFFFFFFF;
+ uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
+ uint32_t output_address_high = output_address >> 32;
+ uint32_t output_address_low = output_address & 0xFFFFFFFF;
+ volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
+
+ __asm__ __volatile__ ("call *%8" : "=d"(hv_status_high),
+ "=a"(hv_status_low) : "d" (control_high),
+ "a" (control_low), "b" (input_address_high),
+ "c" (input_address_low),
+ "D"(output_address_high),
+ "S"(output_address_low), "m" (hypercall_page));
+ return (hv_status_low | ((uint64_t)hv_status_high << 32));
+#endif /* __x86_64__ */
+}
+
+/**
+ * @brief Main initialization routine.
+ *
+ * This routine must be called
+ * before any other routines in here are called
+ */
+int
+hv_vmbus_init(void)
+{
+ int max_leaf;
+ hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
+ void* virt_addr = 0;
+
+ memset(
+ hv_vmbus_g_context.syn_ic_event_page,
+ 0,
+ sizeof(hv_vmbus_handle) * MAXCPU);
+
+ memset(
+ hv_vmbus_g_context.syn_ic_msg_page,
+ 0,
+ sizeof(hv_vmbus_handle) * MAXCPU);
+
+ if (!hv_vmbus_query_hypervisor_presence())
+ goto cleanup;
+
+ max_leaf = hv_vmbus_get_hypervisor_version();
+
+ /*
+ * Write our OS info
+ */
+ uint64_t os_guest_info = HV_FREEBSD_GUEST_ID;
+ hv_vmbus_write_msr(HV_X64_MSR_GUEST_OS_ID, os_guest_info);
+ hv_vmbus_g_context.guest_id = os_guest_info;
+
+ /*
+ * See if the hypercall page is already set
+ */
+ hypercall_msr.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_HYPERCALL);
+ virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(virt_addr != NULL,
+ ("Error VMBUS: malloc failed to allocate page during init!"));
+ if (virt_addr == NULL)
+ goto cleanup;
+
+ hypercall_msr.enable = 1;
+ hypercall_msr.guest_physical_address =
+ (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT);
+ hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t);
+
+ /*
+ * Confirm that hypercall page did get set up
+ */
+ hypercall_msr.as_uint64_t = 0;
+ hypercall_msr.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_HYPERCALL);
+
+ if (!hypercall_msr.enable)
+ goto cleanup;
+
+ hv_vmbus_g_context.hypercall_page = virt_addr;
+
+ /*
+ * Setup the global signal event param for the signal event hypercall
+ */
+ hv_vmbus_g_context.signal_event_buffer =
+ malloc(sizeof(hv_vmbus_input_signal_event_buffer), M_DEVBUF,
+ M_ZERO | M_NOWAIT);
+ KASSERT(hv_vmbus_g_context.signal_event_buffer != NULL,
+ ("Error VMBUS: Failed to allocate signal_event_buffer\n"));
+ if (hv_vmbus_g_context.signal_event_buffer == NULL)
+ goto cleanup;
+
+ hv_vmbus_g_context.signal_event_param =
+ (hv_vmbus_input_signal_event*)
+ (HV_ALIGN_UP((unsigned long)
+ hv_vmbus_g_context.signal_event_buffer,
+ HV_HYPERCALL_PARAM_ALIGN));
+ hv_vmbus_g_context.signal_event_param->connection_id.as_uint32_t = 0;
+ hv_vmbus_g_context.signal_event_param->connection_id.u.id =
+ HV_VMBUS_EVENT_CONNECTION_ID;
+ hv_vmbus_g_context.signal_event_param->flag_number = 0;
+ hv_vmbus_g_context.signal_event_param->rsvd_z = 0;
+
+ tc_init(&hv_timecounter); /* register virtual timecount */
+
+ return (0);
+
+ cleanup:
+ if (virt_addr != NULL) {
+ if (hypercall_msr.enable) {
+ hypercall_msr.as_uint64_t = 0;
+ hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL,
+ hypercall_msr.as_uint64_t);
+ }
+
+ free(virt_addr, M_DEVBUF);
+ }
+ return (ENOTSUP);
+}
+
+/**
+ * @brief Cleanup routine, called normally during driver unloading or exiting
+ */
+void
+hv_vmbus_cleanup(void)
+{
+ hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
+
+ if (hv_vmbus_g_context.signal_event_buffer != NULL) {
+ free(hv_vmbus_g_context.signal_event_buffer, M_DEVBUF);
+ hv_vmbus_g_context.signal_event_buffer = NULL;
+ hv_vmbus_g_context.signal_event_param = NULL;
+ }
+
+ if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) {
+ if (hv_vmbus_g_context.hypercall_page != NULL) {
+ hypercall_msr.as_uint64_t = 0;
+ hv_vmbus_write_msr(HV_X64_MSR_HYPERCALL,
+ hypercall_msr.as_uint64_t);
+ free(hv_vmbus_g_context.hypercall_page, M_DEVBUF);
+ hv_vmbus_g_context.hypercall_page = NULL;
+ }
+ }
+}
+
+/**
+ * @brief Post a message using the hypervisor message IPC.
+ * (This involves a hypercall.)
+ */
+hv_vmbus_status
+hv_vmbus_post_msg_via_msg_ipc(
+ hv_vmbus_connection_id connection_id,
+ hv_vmbus_msg_type message_type,
+ void* payload,
+ size_t payload_size)
+{
+ struct alignedinput {
+ uint64_t alignment8;
+ hv_vmbus_input_post_message msg;
+ };
+
+ hv_vmbus_input_post_message* aligned_msg;
+ hv_vmbus_status status;
+ size_t addr;
+
+ if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
+ return (EMSGSIZE);
+
+ addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF,
+ M_ZERO | M_NOWAIT);
+ KASSERT(addr != 0,
+ ("Error VMBUS: malloc failed to allocate message buffer!"));
+ if (addr == 0)
+ return (ENOMEM);
+
+ aligned_msg = (hv_vmbus_input_post_message*)
+ (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN));
+
+ aligned_msg->connection_id = connection_id;
+ aligned_msg->message_type = message_type;
+ aligned_msg->payload_size = payload_size;
+ memcpy((void*) aligned_msg->payload, payload, payload_size);
+
+ status = hv_vmbus_do_hypercall(
+ HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF;
+
+ free((void *) addr, M_DEVBUF);
+ return (status);
+}
+
+/**
+ * @brief Signal an event on the specified connection using the hypervisor
+ * event IPC. (This involves a hypercall.)
+ */
+hv_vmbus_status
+hv_vmbus_signal_event()
+{
+ hv_vmbus_status status;
+
+ status = hv_vmbus_do_hypercall(
+ HV_CALL_SIGNAL_EVENT,
+ hv_vmbus_g_context.signal_event_param,
+ 0) & 0xFFFF;
+
+ return (status);
+}
+
+/**
+ * @brief hv_vmbus_synic_init
+ */
+void
+hv_vmbus_synic_init(void *irq_arg)
+
+{
+ int cpu;
+ uint32_t irq_vector;
+ hv_vmbus_synic_simp simp;
+ hv_vmbus_synic_siefp siefp;
+ hv_vmbus_synic_scontrol sctrl;
+ hv_vmbus_synic_sint shared_sint;
+ uint64_t version;
+
+ irq_vector = *((uint32_t *) (irq_arg));
+ cpu = PCPU_GET(cpuid);
+
+ if (hv_vmbus_g_context.hypercall_page == NULL)
+ return;
+
+ /*
+ * KYS: Looks like we can only initialize on cpu0; don't we support
+ * SMP guests?
+ *
+ * TODO: Need to add SMP support for FreeBSD V9
+ */
+
+ if (cpu != 0)
+ return;
+
+ /*
+ * TODO: Check the version
+ */
+ version = hv_vmbus_read_msr(HV_X64_MSR_SVERSION);
+
+ hv_vmbus_g_context.syn_ic_msg_page[cpu] =
+ malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(hv_vmbus_g_context.syn_ic_msg_page[cpu] != NULL,
+ ("Error VMBUS: malloc failed for allocating page!"));
+ if (hv_vmbus_g_context.syn_ic_msg_page[cpu] == NULL)
+ goto cleanup;
+
+ hv_vmbus_g_context.syn_ic_event_page[cpu] =
+ malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
+ KASSERT(hv_vmbus_g_context.syn_ic_event_page[cpu] != NULL,
+ ("Error VMBUS: malloc failed to allocate page!"));
+ if (hv_vmbus_g_context.syn_ic_event_page[cpu] == NULL)
+ goto cleanup;
+
+ /*
+ * Setup the Synic's message page
+ */
+
+ simp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIMP);
+ simp.simp_enabled = 1;
+ simp.base_simp_gpa = ((hv_get_phys_addr(
+ hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT);
+
+ hv_vmbus_write_msr(HV_X64_MSR_SIMP, simp.as_uint64_t);
+
+ /*
+ * Setup the Synic's event page
+ */
+ siefp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIEFP);
+ siefp.siefp_enabled = 1;
+ siefp.base_siefp_gpa = ((hv_get_phys_addr(
+ hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT);
+
+ hv_vmbus_write_msr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
+
+ shared_sint.vector = irq_vector; /*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
+ shared_sint.masked = FALSE;
+ shared_sint.auto_eoi = FALSE;
+
+ hv_vmbus_write_msr(
+ HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64_t);
+
+ /* Enable the global synic bit */
+ sctrl.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SCONTROL);
+ sctrl.enable = 1;
+
+ hv_vmbus_write_msr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t);
+
+ hv_vmbus_g_context.syn_ic_initialized = TRUE;
+
+ return;
+
+ cleanup:
+
+ free(hv_vmbus_g_context.syn_ic_msg_page[cpu], M_DEVBUF);
+ free(hv_vmbus_g_context.syn_ic_msg_page[cpu], M_DEVBUF);
+}
+
+/**
+ * @brief Cleanup routine for hv_vmbus_synic_init()
+ */
+void hv_vmbus_synic_cleanup(void *arg)
+{
+ hv_vmbus_synic_sint shared_sint;
+ hv_vmbus_synic_simp simp;
+ hv_vmbus_synic_siefp siefp;
+ int cpu = PCPU_GET(cpuid);
+
+ if (!hv_vmbus_g_context.syn_ic_initialized)
+ return;
+
+ if (cpu != 0)
+ return; /* TODO: XXXKYS: SMP? */
+
+ shared_sint.as_uint64_t = hv_vmbus_read_msr(
+ HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
+
+ shared_sint.masked = 1;
+
+ /*
+ * Disable the interrupt
+ */
+ hv_vmbus_write_msr(
+ HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64_t);
+
+ simp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIMP);
+ simp.simp_enabled = 0;
+ simp.base_simp_gpa = 0;
+
+ hv_vmbus_write_msr(HV_X64_MSR_SIMP, simp.as_uint64_t);
+
+ siefp.as_uint64_t = hv_vmbus_read_msr(HV_X64_MSR_SIEFP);
+ siefp.siefp_enabled = 0;
+ siefp.base_siefp_gpa = 0;
+
+ hv_vmbus_write_msr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
+
+ contigfree(hv_vmbus_g_context.syn_ic_msg_page[cpu],
+ PAGE_SIZE, M_DEVBUF);
+ contigfree(hv_vmbus_g_context.syn_ic_event_page[cpu],
+ PAGE_SIZE, M_DEVBUF);
+}
+
diff --git a/sys/dev/hyperv/vmbus/hv_ring_buffer.c b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
new file mode 100644
index 000000000000..f7c1965c8334
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
@@ -0,0 +1,440 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include "hv_vmbus_priv.h"
+
+/* Amount of space to write to */
+#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
+ ((z) - ((w) - (r))):((r) - (w))
+
+/**
+ * @brief Get number of bytes available to read and to write to
+ * for the specified ring buffer
+ */
+static inline void
+get_ring_buffer_avail_bytes(
+ hv_vmbus_ring_buffer_info* rbi,
+ uint32_t* read,
+ uint32_t* write)
+{
+ uint32_t read_loc, write_loc;
+
+ /*
+ * Capture the read/write indices before they changed
+ */
+ read_loc = rbi->ring_buffer->read_index;
+ write_loc = rbi->ring_buffer->write_index;
+
+ *write = HV_BYTES_AVAIL_TO_WRITE(
+ read_loc, write_loc, rbi->ring_data_size);
+ *read = rbi->ring_data_size - *write;
+}
+
+/**
+ * @brief Get the next write location for the specified ring buffer
+ */
+static inline uint32_t
+get_next_write_location(hv_vmbus_ring_buffer_info* ring_info)
+{
+ uint32_t next = ring_info->ring_buffer->write_index;
+ return (next);
+}
+
+/**
+ * @brief Set the next write location for the specified ring buffer
+ */
+static inline void
+set_next_write_location(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t next_write_location)
+{
+ ring_info->ring_buffer->write_index = next_write_location;
+}
+
+/**
+ * @brief Get the next read location for the specified ring buffer
+ */
+static inline uint32_t
+get_next_read_location(hv_vmbus_ring_buffer_info* ring_info)
+{
+ uint32_t next = ring_info->ring_buffer->read_index;
+ return (next);
+}
+
+/**
+ * @brief Get the next read location + offset for the specified ring buffer.
+ * This allows the caller to skip.
+ */
+static inline uint32_t
+get_next_read_location_with_offset(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t offset)
+{
+ uint32_t next = ring_info->ring_buffer->read_index;
+ next += offset;
+ next %= ring_info->ring_data_size;
+ return (next);
+}
+
+/**
+ * @brief Set the next read location for the specified ring buffer
+ */
+static inline void
+set_next_read_location(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t next_read_location)
+{
+ ring_info->ring_buffer->read_index = next_read_location;
+}
+
+/**
+ * @brief Get the start of the ring buffer
+ */
+static inline void *
+get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info)
+{
+ return (void *) ring_info->ring_buffer->buffer;
+}
+
+/**
+ * @brief Get the size of the ring buffer.
+ */
+static inline uint32_t
+get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info)
+{
+ return ring_info->ring_data_size;
+}
+
+/**
+ * Get the read and write indices as uint64_t of the specified ring buffer.
+ */
+static inline uint64_t
+get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info)
+{
+ return (uint64_t) ring_info->ring_buffer->write_index << 32;
+}
+
+static uint32_t copy_to_ring_buffer(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t start_write_offset,
+ char* src,
+ uint32_t src_len);
+
+static uint32_t copy_from_ring_buffer(
+ hv_vmbus_ring_buffer_info* ring_info,
+ char* dest,
+ uint32_t dest_len,
+ uint32_t start_read_offset);
+
+
+/**
+ * @brief Get the interrupt mask for the specified ring buffer.
+ */
+uint32_t
+hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi)
+{
+ return rbi->ring_buffer->interrupt_mask;
+}
+
+/**
+ * @brief Initialize the ring buffer.
+ */
+int
+hv_vmbus_ring_buffer_init(
+ hv_vmbus_ring_buffer_info* ring_info,
+ void* buffer,
+ uint32_t buffer_len)
+{
+ memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
+
+ ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
+ ring_info->ring_buffer->read_index =
+ ring_info->ring_buffer->write_index = 0;
+
+ ring_info->ring_size = buffer_len;
+ ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
+
+ mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
+
+ return (0);
+}
+
+/**
+ * @brief Cleanup the ring buffer.
+ */
+void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info)
+{
+ mtx_destroy(&ring_info->ring_lock);
+}
+
+/**
+ * @brief Write to the ring buffer.
+ */
+int
+hv_ring_buffer_write(
+ hv_vmbus_ring_buffer_info* out_ring_info,
+ hv_vmbus_sg_buffer_list sg_buffers[],
+ uint32_t sg_buffer_count)
+{
+ int i = 0;
+ uint32_t byte_avail_to_write;
+ uint32_t byte_avail_to_read;
+ uint32_t total_bytes_to_write = 0;
+
+ volatile uint32_t next_write_location;
+ uint64_t prev_indices = 0;
+
+ for (i = 0; i < sg_buffer_count; i++) {
+ total_bytes_to_write += sg_buffers[i].length;
+ }
+
+ total_bytes_to_write += sizeof(uint64_t);
+
+ mtx_lock_spin(&out_ring_info->ring_lock);
+
+ get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
+ &byte_avail_to_write);
+
+ /*
+ * If there is only room for the packet, assume it is full.
+ * Otherwise, the next time around, we think the ring buffer
+ * is empty since the read index == write index
+ */
+
+ if (byte_avail_to_write <= total_bytes_to_write) {
+
+ mtx_unlock_spin(&out_ring_info->ring_lock);
+ return (EAGAIN);
+ }
+
+ /*
+ * Write to the ring buffer
+ */
+ next_write_location = get_next_write_location(out_ring_info);
+
+ for (i = 0; i < sg_buffer_count; i++) {
+ next_write_location = copy_to_ring_buffer(out_ring_info,
+ next_write_location, (char *) sg_buffers[i].data,
+ sg_buffers[i].length);
+ }
+
+ /*
+ * Set previous packet start
+ */
+ prev_indices = get_ring_buffer_indices(out_ring_info);
+
+ next_write_location = copy_to_ring_buffer(
+ out_ring_info, next_write_location,
+ (char *) &prev_indices, sizeof(uint64_t));
+
+ /*
+ * Make sure we flush all writes before updating the writeIndex
+ */
+ wmb();
+
+ /*
+ * Now, update the write location
+ */
+ set_next_write_location(out_ring_info, next_write_location);
+
+ mtx_unlock_spin(&out_ring_info->ring_lock);
+
+ return (0);
+}
+
+/**
+ * @brief Read without advancing the read index.
+ */
+int
+hv_ring_buffer_peek(
+ hv_vmbus_ring_buffer_info* in_ring_info,
+ void* buffer,
+ uint32_t buffer_len)
+{
+ uint32_t bytesAvailToWrite;
+ uint32_t bytesAvailToRead;
+ uint32_t nextReadLocation = 0;
+
+ mtx_lock_spin(&in_ring_info->ring_lock);
+
+ get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
+ &bytesAvailToWrite);
+
+ /*
+ * Make sure there is something to read
+ */
+ if (bytesAvailToRead < buffer_len) {
+ mtx_unlock_spin(&in_ring_info->ring_lock);
+ return (EAGAIN);
+ }
+
+ /*
+ * Convert to byte offset
+ */
+ nextReadLocation = get_next_read_location(in_ring_info);
+
+ nextReadLocation = copy_from_ring_buffer(
+ in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
+
+ mtx_unlock_spin(&in_ring_info->ring_lock);
+
+ return (0);
+}
+
+/**
+ * @brief Read and advance the read index.
+ */
+int
+hv_ring_buffer_read(
+ hv_vmbus_ring_buffer_info* in_ring_info,
+ void* buffer,
+ uint32_t buffer_len,
+ uint32_t offset)
+{
+ uint32_t bytes_avail_to_write;
+ uint32_t bytes_avail_to_read;
+ uint32_t next_read_location = 0;
+ uint64_t prev_indices = 0;
+
+ if (buffer_len <= 0)
+ return (EINVAL);
+
+ mtx_lock_spin(&in_ring_info->ring_lock);
+
+ get_ring_buffer_avail_bytes(
+ in_ring_info, &bytes_avail_to_read,
+ &bytes_avail_to_write);
+
+ /*
+ * Make sure there is something to read
+ */
+ if (bytes_avail_to_read < buffer_len) {
+ mtx_unlock_spin(&in_ring_info->ring_lock);
+ return (EAGAIN);
+ }
+
+ next_read_location = get_next_read_location_with_offset(
+ in_ring_info,
+ offset);
+
+ next_read_location = copy_from_ring_buffer(
+ in_ring_info,
+ (char *) buffer,
+ buffer_len,
+ next_read_location);
+
+ next_read_location = copy_from_ring_buffer(
+ in_ring_info,
+ (char *) &prev_indices,
+ sizeof(uint64_t),
+ next_read_location);
+
+ /*
+ * Make sure all reads are done before we update the read index since
+ * the writer may start writing to the read area once the read index
+ * is updated.
+ */
+ wmb();
+
+ /*
+ * Update the read index
+ */
+ set_next_read_location(in_ring_info, next_read_location);
+
+ mtx_unlock_spin(&in_ring_info->ring_lock);
+
+ return (0);
+}
+
+/**
+ * @brief Helper routine to copy from source to ring buffer.
+ *
+ * Assume there is enough room. Handles wrap-around in dest case only!
+ */
+uint32_t
+copy_to_ring_buffer(
+ hv_vmbus_ring_buffer_info* ring_info,
+ uint32_t start_write_offset,
+ char* src,
+ uint32_t src_len)
+{
+ char *ring_buffer = get_ring_buffer(ring_info);
+ uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
+ uint32_t fragLen;
+
+ if (src_len > ring_buffer_size - start_write_offset) {
+ /* wrap-around detected! */
+ fragLen = ring_buffer_size - start_write_offset;
+ memcpy(ring_buffer + start_write_offset, src, fragLen);
+ memcpy(ring_buffer, src + fragLen, src_len - fragLen);
+ } else {
+ memcpy(ring_buffer + start_write_offset, src, src_len);
+ }
+
+ start_write_offset += src_len;
+ start_write_offset %= ring_buffer_size;
+
+ return (start_write_offset);
+}
+
+/**
+ * @brief Helper routine to copy to source from ring buffer.
+ *
+ * Assume there is enough room. Handles wrap-around in src case only!
+ */
+uint32_t
+copy_from_ring_buffer(
+ hv_vmbus_ring_buffer_info* ring_info,
+ char* dest,
+ uint32_t dest_len,
+ uint32_t start_read_offset)
+{
+ uint32_t fragLen;
+ char *ring_buffer = get_ring_buffer(ring_info);
+ uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
+
+ if (dest_len > ring_buffer_size - start_read_offset) {
+ /* wrap-around detected at the src */
+ fragLen = ring_buffer_size - start_read_offset;
+ memcpy(dest, ring_buffer + start_read_offset, fragLen);
+ memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
+ } else {
+ memcpy(dest, ring_buffer + start_read_offset, dest_len);
+ }
+
+ start_read_offset += dest_len;
+ start_read_offset %= ring_buffer_size;
+
+ return (start_read_offset);
+}
+
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
new file mode 100644
index 000000000000..4dfddd3e30fa
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
@@ -0,0 +1,583 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * VM Bus Driver Implementation
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <sys/rtprio.h>
+#include <sys/interrupt.h>
+#include <sys/sx.h>
+#include <sys/taskqueue.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+
+#include <machine/resource.h>
+#include <sys/rman.h>
+
+#include <machine/stdarg.h>
+#include <machine/intr_machdep.h>
+#include <sys/pcpu.h>
+
+#include "hv_vmbus_priv.h"
+
+
+#define VMBUS_IRQ 0x5
+
+static struct intr_event *hv_msg_intr_event;
+static struct intr_event *hv_event_intr_event;
+static void *msg_swintr;
+static void *event_swintr;
+static device_t vmbus_devp;
+static void *vmbus_cookiep;
+static int vmbus_rid;
+struct resource *intr_res;
+static int vmbus_irq = VMBUS_IRQ;
+static int vmbus_inited;
+
+/**
+ * @brief Software interrupt thread routine to handle channel messages from
+ * the hypervisor.
+ */
+static void
+vmbus_msg_swintr(void *dummy)
+{
+ int cpu;
+ void* page_addr;
+ hv_vmbus_message* msg;
+ hv_vmbus_message* copied;
+
+ cpu = PCPU_GET(cpuid);
+ page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
+ msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
+
+ for (;;) {
+ if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
+ break; /* no message */
+ } else {
+ copied = malloc(sizeof(hv_vmbus_message),
+ M_DEVBUF, M_NOWAIT);
+ KASSERT(copied != NULL,
+ ("Error VMBUS: malloc failed to allocate"
+ " hv_vmbus_message!"));
+ if (copied == NULL)
+ continue;
+ memcpy(copied, msg, sizeof(hv_vmbus_message));
+ hv_queue_work_item(hv_vmbus_g_connection.work_queue,
+ hv_vmbus_on_channel_message, copied);
+ }
+
+ msg->header.message_type = HV_MESSAGE_TYPE_NONE;
+
+ /*
+ * Make sure the write to message_type (ie set to
+ * HV_MESSAGE_TYPE_NONE) happens before we read the
+ * message_pending and EOMing. Otherwise, the EOMing will
+ * not deliver any more messages
+ * since there is no empty slot
+ */
+ wmb();
+
+ if (msg->header.message_flags.message_pending) {
+ /*
+ * This will cause message queue rescan to possibly
+ * deliver another msg from the hypervisor
+ */
+ hv_vmbus_write_msr(HV_X64_MSR_EOM, 0);
+ }
+ }
+}
+
+/**
+ * @brief Interrupt filter routine for VMBUS.
+ *
+ * The purpose of this routine is to determine the type of VMBUS protocol
+ * message to process - an event or a channel message.
+ * As this is an interrupt filter routine, the function runs in a very
+ * restricted envinronment. From the manpage for bus_setup_intr(9)
+ *
+ * In this restricted environment, care must be taken to account for all
+ * races. A careful analysis of races should be done as well. It is gener-
+ * ally cheaper to take an extra interrupt, for example, than to protect
+ * variables with spinlocks. Read, modify, write cycles of hardware regis-
+ * ters need to be carefully analyzed if other threads are accessing the
+ * same registers.
+ */
+static int
+hv_vmbus_isr(void *unused)
+{
+ int cpu;
+ hv_vmbus_message* msg;
+ hv_vmbus_synic_event_flags* event;
+ void* page_addr;
+
+ cpu = PCPU_GET(cpuid);
+ /* (Temporary limit) */
+ KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
+
+ /*
+ * The Windows team has advised that we check for events
+ * before checking for messages. This is the way they do it
+ * in Windows when running as a guest in Hyper-V
+ */
+
+ page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
+ event = (hv_vmbus_synic_event_flags*)
+ page_addr + HV_VMBUS_MESSAGE_SINT;
+
+ /* Since we are a child, we only need to check bit 0 */
+ if (synch_test_and_clear_bit(0, &event->flags32[0])) {
+ swi_sched(event_swintr, 0);
+ }
+
+ /* Check if there are actual msgs to be process */
+ page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
+ msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
+
+ if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
+ swi_sched(msg_swintr, 0);
+ }
+
+ return FILTER_HANDLED;
+}
+
+static int
+vmbus_read_ivar(
+ device_t dev,
+ device_t child,
+ int index,
+ uintptr_t* result)
+{
+ struct hv_device *child_dev_ctx = device_get_ivars(child);
+
+ switch (index) {
+
+ case HV_VMBUS_IVAR_TYPE:
+ *result = (uintptr_t) &child_dev_ctx->class_id;
+ return (0);
+ case HV_VMBUS_IVAR_INSTANCE:
+ *result = (uintptr_t) &child_dev_ctx->device_id;
+ return (0);
+ case HV_VMBUS_IVAR_DEVCTX:
+ *result = (uintptr_t) child_dev_ctx;
+ return (0);
+ case HV_VMBUS_IVAR_NODE:
+ *result = (uintptr_t) child_dev_ctx->device;
+ return (0);
+ }
+ return (ENOENT);
+}
+
+static int
+vmbus_write_ivar(
+ device_t dev,
+ device_t child,
+ int index,
+ uintptr_t value)
+{
+ switch (index) {
+
+ case HV_VMBUS_IVAR_TYPE:
+ case HV_VMBUS_IVAR_INSTANCE:
+ case HV_VMBUS_IVAR_DEVCTX:
+ case HV_VMBUS_IVAR_NODE:
+ /* read-only */
+ return (EINVAL);
+ }
+ return (ENOENT);
+}
+
+struct hv_device*
+hv_vmbus_child_device_create(
+ hv_guid type,
+ hv_guid instance,
+ hv_vmbus_channel* channel)
+{
+ hv_device* child_dev;
+
+ /*
+ * Allocate the new child device
+ */
+ child_dev = malloc(sizeof(hv_device), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ KASSERT(child_dev != NULL,
+ ("Error VMBUS: malloc failed to allocate hv_device!"));
+
+ if (child_dev == NULL)
+ return (NULL);
+
+ child_dev->channel = channel;
+ memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
+ memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
+
+ return (child_dev);
+}
+
+static void
+print_dev_guid(struct hv_device *dev)
+{
+ int i;
+ unsigned char guid_name[100];
+ for (i = 0; i < 32; i += 2)
+ sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
+ if(bootverbose)
+ printf("VMBUS: Class ID: %s\n", guid_name);
+}
+
+int
+hv_vmbus_child_device_register(struct hv_device *child_dev)
+{
+ device_t child;
+ int ret = 0;
+
+ print_dev_guid(child_dev);
+
+
+ child = device_add_child(vmbus_devp, NULL, -1);
+ child_dev->device = child;
+ device_set_ivars(child, child_dev);
+
+ mtx_lock(&Giant);
+ ret = device_probe_and_attach(child);
+ mtx_unlock(&Giant);
+
+ return (0);
+}
+
+int
+hv_vmbus_child_device_unregister(struct hv_device *child_dev)
+{
+ int ret = 0;
+ /*
+ * XXXKYS: Ensure that this is the opposite of
+ * device_add_child()
+ */
+ mtx_lock(&Giant);
+ ret = device_delete_child(vmbus_devp, child_dev->device);
+ mtx_unlock(&Giant);
+ return(ret);
+}
+
+static void vmbus_identify(driver_t *driver, device_t parent) {
+ BUS_ADD_CHILD(parent, 0, "vmbus", 0);
+ if (device_find_child(parent, "vmbus", 0) == NULL) {
+ BUS_ADD_CHILD(parent, 0, "vmbus", 0);
+ }
+}
+
+static int
+vmbus_probe(device_t dev) {
+ if(bootverbose)
+ device_printf(dev, "VMBUS: probe\n");
+
+ if (!hv_vmbus_query_hypervisor_presence())
+ return (ENXIO);
+
+ device_set_desc(dev, "Vmbus Devices");
+
+ return (0);
+}
+
+/**
+ * @brief Main vmbus driver initialization routine.
+ *
+ * Here, we
+ * - initialize the vmbus driver context
+ * - setup various driver entry points
+ * - invoke the vmbus hv main init routine
+ * - get the irq resource
+ * - invoke the vmbus to add the vmbus root device
+ * - setup the vmbus root device
+ * - retrieve the channel offers
+ */
+static int
+vmbus_bus_init(void)
+{
+ struct ioapic_intsrc {
+ struct intsrc io_intsrc;
+ u_int io_irq;
+ u_int io_intpin:8;
+ u_int io_vector:8;
+ u_int io_cpu:8;
+ u_int io_activehi:1;
+ u_int io_edgetrigger:1;
+ u_int io_masked:1;
+ int io_bus:4;
+ uint32_t io_lowreg;
+ };
+
+ int ret;
+ unsigned int vector = 0;
+ struct intsrc *isrc;
+ struct ioapic_intsrc *intpin;
+
+ if (vmbus_inited)
+ return (0);
+
+ vmbus_inited = 1;
+
+ ret = hv_vmbus_init();
+
+ if (ret) {
+ if(bootverbose)
+ printf("Error VMBUS: Hypervisor Initialization Failed!\n");
+ return (ret);
+ }
+
+ ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
+ NULL, SWI_CLOCK, 0, &msg_swintr);
+
+ if (ret)
+ goto cleanup;
+
+ /*
+ * Message SW interrupt handler checks a per-CPU page and
+ * thus the thread needs to be bound to CPU-0 - which is where
+ * all interrupts are processed.
+ */
+ ret = intr_event_bind(hv_msg_intr_event, 0);
+
+ if (ret)
+ goto cleanup1;
+
+ ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
+ NULL, SWI_CLOCK, 0, &event_swintr);
+
+ if (ret)
+ goto cleanup1;
+
+ intr_res = bus_alloc_resource(vmbus_devp,
+ SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
+
+ if (intr_res == NULL) {
+ ret = ENOMEM; /* XXXKYS: Need a better errno */
+ goto cleanup2;
+ }
+
+ /*
+ * Setup interrupt filter handler
+ */
+ ret = bus_setup_intr(vmbus_devp, intr_res,
+ INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
+ NULL, &vmbus_cookiep);
+
+ if (ret != 0)
+ goto cleanup3;
+
+ ret = bus_bind_intr(vmbus_devp, intr_res, 0);
+ if (ret != 0)
+ goto cleanup4;
+
+ isrc = intr_lookup_source(vmbus_irq);
+ if ((isrc == NULL) || (isrc->is_event == NULL)) {
+ ret = EINVAL;
+ goto cleanup4;
+ }
+
+ /* vector = isrc->is_event->ie_vector; */
+ intpin = (struct ioapic_intsrc *)isrc;
+ vector = intpin->io_vector;
+
+ if(bootverbose)
+ printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
+
+ /**
+ * Notify the hypervisor of our irq.
+ */
+
+ smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &vector);
+
+ /**
+ * Connect to VMBus in the root partition
+ */
+ ret = hv_vmbus_connect();
+
+ if (ret)
+ goto cleanup4;
+
+ hv_vmbus_request_channel_offers();
+ return (ret);
+
+ cleanup4:
+
+ /*
+ * remove swi, bus and intr resource
+ */
+ bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+
+ cleanup3:
+
+ bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+
+ cleanup2:
+ swi_remove(event_swintr);
+
+ cleanup1:
+ swi_remove(msg_swintr);
+
+ cleanup:
+ hv_vmbus_cleanup();
+
+ return (ret);
+}
+
+static int
+vmbus_attach(device_t dev)
+{
+ if(bootverbose)
+ device_printf(dev, "VMBUS: attach dev: %p\n", dev);
+ vmbus_devp = dev;
+
+ /*
+ * If the system has already booted and thread
+ * scheduling is possible indicated by the global
+ * cold set to zero, we just call the driver
+ * initialization directly.
+ */
+ if (!cold)
+ vmbus_bus_init();
+
+ return (0);
+}
+
+static void
+vmbus_init(void)
+{
+ /*
+ * If the system has already booted and thread
+ * scheduling is possible indicated by the global
+ * cold set to zero, we just call the driver
+ * initialization directly.
+ */
+ if (!cold)
+ vmbus_bus_init();
+}
+
+static void
+vmbus_bus_exit(void)
+{
+ hv_vmbus_release_unattached_channels();
+ hv_vmbus_disconnect();
+
+ smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
+
+ hv_vmbus_cleanup();
+
+ /* remove swi, bus and intr resource */
+ bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+
+ bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+
+ swi_remove(msg_swintr);
+ swi_remove(event_swintr);
+
+ return;
+}
+
+static void
+vmbus_exit(void)
+{
+ vmbus_bus_exit();
+}
+
+static int
+vmbus_detach(device_t dev)
+{
+ vmbus_exit();
+ return (0);
+}
+
+static void
+vmbus_mod_load(void)
+{
+ if(bootverbose)
+ printf("VMBUS: load\n");
+}
+
+static void
+vmbus_mod_unload(void)
+{
+ if(bootverbose)
+ printf("VMBUS: unload\n");
+}
+
+static int
+vmbus_modevent(module_t mod, int what, void *arg)
+{
+ switch (what) {
+
+ case MOD_LOAD:
+ vmbus_mod_load();
+ break;
+ case MOD_UNLOAD:
+ vmbus_mod_unload();
+ break;
+ }
+
+ return (0);
+}
+
+static device_method_t vmbus_methods[] = {
+ /** Device interface */
+ DEVMETHOD(device_identify, vmbus_identify),
+ DEVMETHOD(device_probe, vmbus_probe),
+ DEVMETHOD(device_attach, vmbus_attach),
+ DEVMETHOD(device_detach, vmbus_detach),
+ DEVMETHOD(device_shutdown, bus_generic_shutdown),
+ DEVMETHOD(device_suspend, bus_generic_suspend),
+ DEVMETHOD(device_resume, bus_generic_resume),
+
+ /** Bus interface */
+ DEVMETHOD(bus_add_child, bus_generic_add_child),
+ DEVMETHOD(bus_print_child, bus_generic_print_child),
+ DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
+ DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
+
+ { 0, 0 } };
+
+static char driver_name[] = "vmbus";
+static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
+
+
+devclass_t vmbus_devclass;
+
+DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
+MODULE_VERSION(vmbus,1);
+
+/* TODO: We want to be earlier than SI_SUB_VFS */
+SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
+
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
new file mode 100644
index 000000000000..739acb158dfc
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
@@ -0,0 +1,770 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __HYPERV_PRIV_H__
+#define __HYPERV_PRIV_H__
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sema.h>
+
+#include <dev/hyperv/include/hyperv.h>
+
+
+/*
+ * Status codes for hypervisor operations.
+ */
+
+typedef uint16_t hv_vmbus_status;
+
+#define HV_MESSAGE_SIZE (256)
+#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
+#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
+#define HV_ANY_VP (0xFFFFFFFF)
+
+/*
+ * Synthetic interrupt controller flag constants.
+ */
+
+#define HV_EVENT_FLAGS_COUNT (256 * 8)
+#define HV_EVENT_FLAGS_BYTE_COUNT (256)
+#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(uint32_t))
+
+/*
+ * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS
+ * MessageText:
+ * You did not supply enough message buffers to send a message.
+ */
+
+#define HV_STATUS_INSUFFICIENT_BUFFERS ((uint16_t)0x0013)
+
+typedef void (*hv_vmbus_channel_callback)(void *context);
+
+typedef struct {
+ void* data;
+ uint32_t length;
+} hv_vmbus_sg_buffer_list;
+
+typedef struct {
+ uint32_t current_interrupt_mask;
+ uint32_t current_read_index;
+ uint32_t current_write_index;
+ uint32_t bytes_avail_to_read;
+ uint32_t bytes_avail_to_write;
+} hv_vmbus_ring_buffer_debug_info;
+
+typedef struct {
+ uint32_t rel_id;
+ hv_vmbus_channel_state state;
+ hv_guid interface_type;
+ hv_guid interface_instance;
+ uint32_t monitor_id;
+ uint32_t server_monitor_pending;
+ uint32_t server_monitor_latency;
+ uint32_t server_monitor_connection_id;
+ uint32_t client_monitor_pending;
+ uint32_t client_monitor_latency;
+ uint32_t client_monitor_connection_id;
+ hv_vmbus_ring_buffer_debug_info inbound;
+ hv_vmbus_ring_buffer_debug_info outbound;
+} hv_vmbus_channel_debug_info;
+
+typedef union {
+ hv_vmbus_channel_version_supported version_supported;
+ hv_vmbus_channel_open_result open_result;
+ hv_vmbus_channel_gpadl_torndown gpadl_torndown;
+ hv_vmbus_channel_gpadl_created gpadl_created;
+ hv_vmbus_channel_version_response version_response;
+} hv_vmbus_channel_msg_response;
+
+/*
+ * Represents each channel msg on the vmbus connection
+ * This is a variable-size data structure depending on
+ * the msg type itself
+ */
+typedef struct hv_vmbus_channel_msg_info {
+ /*
+ * Bookkeeping stuff
+ */
+ TAILQ_ENTRY(hv_vmbus_channel_msg_info) msg_list_entry;
+ /*
+ * So far, this is only used to handle
+ * gpadl body message
+ */
+ TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor;
+ /*
+ * Synchronize the request/response if
+ * needed.
+ * KYS: Use a semaphore for now.
+ * Not perf critical.
+ */
+ struct sema wait_sema;
+ hv_vmbus_channel_msg_response response;
+ uint32_t message_size;
+ /**
+ * The channel message that goes out on
+ * the "wire". It will contain at
+ * minimum the
+ * hv_vmbus_channel_msg_header
+ * header.
+ */
+ unsigned char msg[0];
+} hv_vmbus_channel_msg_info;
+
+/*
+ * The format must be the same as hv_vm_data_gpa_direct
+ */
+typedef struct hv_vmbus_channel_packet_page_buffer {
+ uint16_t type;
+ uint16_t data_offset8;
+ uint16_t length8;
+ uint16_t flags;
+ uint64_t transaction_id;
+ uint32_t reserved;
+ uint32_t range_count;
+ hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT];
+} __packed hv_vmbus_channel_packet_page_buffer;
+
+/*
+ * The format must be the same as hv_vm_data_gpa_direct
+ */
+typedef struct hv_vmbus_channel_packet_multipage_buffer {
+ uint16_t type;
+ uint16_t data_offset8;
+ uint16_t length8;
+ uint16_t flags;
+ uint64_t transaction_id;
+ uint32_t reserved;
+ uint32_t range_count; /* Always 1 in this case */
+ hv_vmbus_multipage_buffer range;
+} __packed hv_vmbus_channel_packet_multipage_buffer;
+
+enum {
+ HV_VMBUS_MESSAGE_CONNECTION_ID = 1,
+ HV_VMBUS_MESSAGE_PORT_ID = 1,
+ HV_VMBUS_EVENT_CONNECTION_ID = 2,
+ HV_VMBUS_EVENT_PORT_ID = 2,
+ HV_VMBUS_MONITOR_CONNECTION_ID = 3,
+ HV_VMBUS_MONITOR_PORT_ID = 3,
+ HV_VMBUS_MESSAGE_SINT = 2
+};
+
+#define HV_PRESENT_BIT 0x80000000
+
+#define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t)
+
+/*
+ * Connection identifier type
+ */
+typedef union {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t id:24;
+ uint32_t reserved:8;
+ } u;
+
+} __packed hv_vmbus_connection_id;
+
+/*
+ * Definition of the hv_vmbus_signal_event hypercall input structure
+ */
+typedef struct {
+ hv_vmbus_connection_id connection_id;
+ uint16_t flag_number;
+ uint16_t rsvd_z;
+} __packed hv_vmbus_input_signal_event;
+
+typedef struct {
+ uint64_t align8;
+ hv_vmbus_input_signal_event event;
+} __packed hv_vmbus_input_signal_event_buffer;
+
+typedef struct {
+ uint64_t guest_id;
+ void* hypercall_page;
+ hv_bool_uint8_t syn_ic_initialized;
+ /*
+ * This is used as an input param to HV_CALL_SIGNAL_EVENT hypercall.
+ * The input param is immutable in our usage and
+ * must be dynamic mem (vs stack or global).
+ */
+ hv_vmbus_input_signal_event_buffer *signal_event_buffer;
+ /*
+ * 8-bytes aligned of the buffer above
+ */
+ hv_vmbus_input_signal_event *signal_event_param;
+
+ hv_vmbus_handle syn_ic_msg_page[MAXCPU];
+ hv_vmbus_handle syn_ic_event_page[MAXCPU];
+} hv_vmbus_context;
+
+/*
+ * Define hypervisor message types
+ */
+typedef enum {
+
+ HV_MESSAGE_TYPE_NONE = 0x00000000,
+
+ /*
+ * Memory access messages
+ */
+ HV_MESSAGE_TYPE_UNMAPPED_GPA = 0x80000000,
+ HV_MESSAGE_TYPE_GPA_INTERCEPT = 0x80000001,
+
+ /*
+ * Timer notification messages
+ */
+ HV_MESSAGE_TIMER_EXPIRED = 0x80000010,
+
+ /*
+ * Error messages
+ */
+ HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE = 0x80000020,
+ HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION = 0x80000021,
+ HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE = 0x80000022,
+
+ /*
+ * Trace buffer complete messages
+ */
+ HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE = 0x80000040,
+
+ /*
+ * Platform-specific processor intercept messages
+ */
+ HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT = 0x80010000,
+ HV_MESSAGE_TYPE_X64_MSR_INTERCEPT = 0x80010001,
+ HV_MESSAGE_TYPE_X64_CPU_INTERCEPT = 0x80010002,
+ HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT = 0x80010003,
+ HV_MESSAGE_TYPE_X64_APIC_EOI = 0x80010004,
+ HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR = 0x80010005
+
+} hv_vmbus_msg_type;
+
+/*
+ * Define port identifier type
+ */
+typedef union _hv_vmbus_port_id {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t id:24;
+ uint32_t reserved:8;
+ } u ;
+} hv_vmbus_port_id;
+
+/*
+ * Define synthetic interrupt controller message flag
+ */
+typedef union {
+ uint8_t as_uint8_t;
+ struct {
+ uint8_t message_pending:1;
+ uint8_t reserved:7;
+ };
+} hv_vmbus_msg_flags;
+
+typedef uint64_t hv_vmbus_partition_id;
+
+/*
+ * Define synthetic interrupt controller message header
+ */
+typedef struct {
+ hv_vmbus_msg_type message_type;
+ uint8_t payload_size;
+ hv_vmbus_msg_flags message_flags;
+ uint8_t reserved[2];
+ union {
+ hv_vmbus_partition_id sender;
+ hv_vmbus_port_id port;
+ } u;
+} hv_vmbus_msg_header;
+
+/*
+ * Define synthetic interrupt controller message format
+ */
+typedef struct {
+ hv_vmbus_msg_header header;
+ union {
+ uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
+ } u ;
+} hv_vmbus_message;
+
+/*
+ * Maximum channels is determined by the size of the interrupt
+ * page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for
+ * send endpoint interrupt and the other is receive
+ * endpoint interrupt.
+ *
+ * Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels
+ */
+#define HV_MAX_NUM_CHANNELS (PAGE_SIZE >> 1) << 3
+
+/*
+ * (The value here must be in multiple of 32)
+ */
+#define HV_MAX_NUM_CHANNELS_SUPPORTED 256
+
+/*
+ * VM Bus connection states
+ */
+typedef enum {
+ HV_DISCONNECTED,
+ HV_CONNECTING,
+ HV_CONNECTED,
+ HV_DISCONNECTING
+} hv_vmbus_connect_state;
+
+#define HV_MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT
+
+
+typedef struct {
+ hv_vmbus_connect_state connect_state;
+ uint32_t next_gpadl_handle;
+ /**
+ * Represents channel interrupts. Each bit position
+ * represents a channel.
+ * When a channel sends an interrupt via VMBUS, it
+ * finds its bit in the send_interrupt_page, set it and
+ * calls Hv to generate a port event. The other end
+ * receives the port event and parse the
+ * recv_interrupt_page to see which bit is set
+ */
+ void *interrupt_page;
+ void *send_interrupt_page;
+ void *recv_interrupt_page;
+ /*
+ * 2 pages - 1st page for parent->child
+ * notification and 2nd is child->parent
+ * notification
+ */
+ void *monitor_pages;
+ TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor;
+ struct mtx channel_msg_lock;
+ /**
+ * List of channels
+ */
+ TAILQ_HEAD(, hv_vmbus_channel) channel_anchor;
+ struct mtx channel_lock;
+
+ hv_vmbus_handle work_queue;
+ struct sema control_sema;
+} hv_vmbus_connection;
+
+/*
+ * Declare the MSR used to identify the guest OS
+ */
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t build_number : 16;
+ uint64_t service_version : 8; /* Service Pack, etc. */
+ uint64_t minor_version : 8;
+ uint64_t major_version : 8;
+ /*
+ * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS)
+ * HV_GUEST_OS_VENDOR
+ */
+ uint64_t os_id : 8;
+ uint64_t vendor_id : 16;
+ };
+} hv_vmbus_x64_msr_guest_os_id_contents;
+
+/*
+ * Declare the MSR used to setup pages used to communicate with the hypervisor
+ */
+#define HV_X64_MSR_HYPERCALL 0x40000001
+
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t enable :1;
+ uint64_t reserved :11;
+ uint64_t guest_physical_address :52;
+ };
+} hv_vmbus_x64_msr_hypercall_contents;
+
+typedef union {
+ uint32_t as_uint32_t;
+ struct {
+ uint32_t group_enable :4;
+ uint32_t rsvd_z :28;
+ };
+} hv_vmbus_monitor_trigger_state;
+
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint32_t pending;
+ uint32_t armed;
+ };
+} hv_vmbus_monitor_trigger_group;
+
+typedef struct {
+ hv_vmbus_connection_id connection_id;
+ uint16_t flag_number;
+ uint16_t rsvd_z;
+} hv_vmbus_monitor_parameter;
+
+/*
+ * hv_vmbus_monitor_page Layout
+ * ------------------------------------------------------
+ * | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) |
+ * | 8 | trigger_group[0] |
+ * | 10 | trigger_group[1] |
+ * | 18 | trigger_group[2] |
+ * | 20 | trigger_group[3] |
+ * | 28 | Rsvd2[0] |
+ * | 30 | Rsvd2[1] |
+ * | 38 | Rsvd2[2] |
+ * | 40 | next_check_time[0][0] | next_check_time[0][1] |
+ * | ... |
+ * | 240 | latency[0][0..3] |
+ * | 340 | Rsvz3[0] |
+ * | 440 | parameter[0][0] |
+ * | 448 | parameter[0][1] |
+ * | ... |
+ * | 840 | Rsvd4[0] |
+ * ------------------------------------------------------
+ */
+
+typedef struct {
+ hv_vmbus_monitor_trigger_state trigger_state;
+ uint32_t rsvd_z1;
+
+ hv_vmbus_monitor_trigger_group trigger_group[4];
+ uint64_t rsvd_z2[3];
+
+ int32_t next_check_time[4][32];
+
+ uint16_t latency[4][32];
+ uint64_t rsvd_z3[32];
+
+ hv_vmbus_monitor_parameter parameter[4][32];
+
+ uint8_t rsvd_z4[1984];
+} hv_vmbus_monitor_page;
+
+/*
+ * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
+ * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES).
+ */
+typedef enum {
+ HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES = 0x00000001,
+ HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION = 0x40000000,
+ HV_CPU_ID_FUNCTION_HV_INTERFACE = 0x40000001,
+ /*
+ * The remaining functions depend on the value
+ * of hv_cpu_id_function_interface
+ */
+ HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002,
+ HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003,
+ HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004,
+ HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005
+
+} hv_vmbus_cpuid_function;
+
+/*
+ * Define the format of the SIMP register
+ */
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t simp_enabled : 1;
+ uint64_t preserved : 11;
+ uint64_t base_simp_gpa : 52;
+ };
+} hv_vmbus_synic_simp;
+
+/*
+ * Define the format of the SIEFP register
+ */
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t siefp_enabled : 1;
+ uint64_t preserved : 11;
+ uint64_t base_siefp_gpa : 52;
+ };
+} hv_vmbus_synic_siefp;
+
+/*
+ * Define synthetic interrupt source
+ */
+typedef union {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t vector : 8;
+ uint64_t reserved1 : 8;
+ uint64_t masked : 1;
+ uint64_t auto_eoi : 1;
+ uint64_t reserved2 : 46;
+ };
+} hv_vmbus_synic_sint;
+
+/*
+ * Define syn_ic control register
+ */
+typedef union _hv_vmbus_synic_scontrol {
+ uint64_t as_uint64_t;
+ struct {
+ uint64_t enable : 1;
+ uint64_t reserved : 63;
+ };
+} hv_vmbus_synic_scontrol;
+
+/*
+ * Define the hv_vmbus_post_message hypercall input structure
+ */
+typedef struct {
+ hv_vmbus_connection_id connection_id;
+ uint32_t reserved;
+ hv_vmbus_msg_type message_type;
+ uint32_t payload_size;
+ uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
+} hv_vmbus_input_post_message;
+
+/*
+ * Define the synthetic interrupt controller event flags format
+ */
+typedef union {
+ uint8_t flags8[HV_EVENT_FLAGS_BYTE_COUNT];
+ uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT];
+} hv_vmbus_synic_event_flags;
+
+
+/*
+ * Define synthetic interrupt controller model specific registers
+ */
+#define HV_X64_MSR_SCONTROL (0x40000080)
+#define HV_X64_MSR_SVERSION (0x40000081)
+#define HV_X64_MSR_SIEFP (0x40000082)
+#define HV_X64_MSR_SIMP (0x40000083)
+#define HV_X64_MSR_EOM (0x40000084)
+
+#define HV_X64_MSR_SINT0 (0x40000090)
+#define HV_X64_MSR_SINT1 (0x40000091)
+#define HV_X64_MSR_SINT2 (0x40000092)
+#define HV_X64_MSR_SINT3 (0x40000093)
+#define HV_X64_MSR_SINT4 (0x40000094)
+#define HV_X64_MSR_SINT5 (0x40000095)
+#define HV_X64_MSR_SINT6 (0x40000096)
+#define HV_X64_MSR_SINT7 (0x40000097)
+#define HV_X64_MSR_SINT8 (0x40000098)
+#define HV_X64_MSR_SINT9 (0x40000099)
+#define HV_X64_MSR_SINT10 (0x4000009A)
+#define HV_X64_MSR_SINT11 (0x4000009B)
+#define HV_X64_MSR_SINT12 (0x4000009C)
+#define HV_X64_MSR_SINT13 (0x4000009D)
+#define HV_X64_MSR_SINT14 (0x4000009E)
+#define HV_X64_MSR_SINT15 (0x4000009F)
+
+/*
+ * Declare the various hypercall operations
+ */
+typedef enum {
+ HV_CALL_POST_MESSAGE = 0x005c,
+ HV_CALL_SIGNAL_EVENT = 0x005d,
+} hv_vmbus_call_code;
+
+/**
+ * Global variables
+ */
+
+extern hv_vmbus_context hv_vmbus_g_context;
+extern hv_vmbus_connection hv_vmbus_g_connection;
+
+
+/*
+ * Private, VM Bus functions
+ */
+
+int hv_vmbus_ring_buffer_init(
+ hv_vmbus_ring_buffer_info *ring_info,
+ void *buffer,
+ uint32_t buffer_len);
+
+void hv_ring_buffer_cleanup(
+ hv_vmbus_ring_buffer_info *ring_info);
+
+int hv_ring_buffer_write(
+ hv_vmbus_ring_buffer_info *ring_info,
+ hv_vmbus_sg_buffer_list sg_buffers[],
+ uint32_t sg_buff_count);
+
+int hv_ring_buffer_peek(
+ hv_vmbus_ring_buffer_info *ring_info,
+ void *buffer,
+ uint32_t buffer_len);
+
+int hv_ring_buffer_read(
+ hv_vmbus_ring_buffer_info *ring_info,
+ void *buffer,
+ uint32_t buffer_len,
+ uint32_t offset);
+
+uint32_t hv_vmbus_get_ring_buffer_interrupt_mask(
+ hv_vmbus_ring_buffer_info *ring_info);
+
+void hv_vmbus_dump_ring_info(
+ hv_vmbus_ring_buffer_info *ring_info,
+ char *prefix);
+
+hv_vmbus_channel* hv_vmbus_allocate_channel(void);
+void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
+void hv_vmbus_on_channel_message(void *context);
+int hv_vmbus_request_channel_offers(void);
+void hv_vmbus_release_unattached_channels(void);
+int hv_vmbus_init(void);
+void hv_vmbus_cleanup(void);
+
+uint16_t hv_vmbus_post_msg_via_msg_ipc(
+ hv_vmbus_connection_id connection_id,
+ hv_vmbus_msg_type message_type,
+ void *payload,
+ size_t payload_size);
+
+uint16_t hv_vmbus_signal_event(void);
+void hv_vmbus_synic_init(void *irq_arg);
+void hv_vmbus_synic_cleanup(void *arg);
+int hv_vmbus_query_hypervisor_presence(void);
+
+struct hv_device* hv_vmbus_child_device_create(
+ hv_guid device_type,
+ hv_guid device_instance,
+ hv_vmbus_channel *channel);
+
+int hv_vmbus_child_device_register(
+ struct hv_device *child_dev);
+int hv_vmbus_child_device_unregister(
+ struct hv_device *child_dev);
+hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id);
+
+/**
+ * Connection interfaces
+ */
+int hv_vmbus_connect(void);
+int hv_vmbus_disconnect(void);
+int hv_vmbus_post_message(void *buffer, size_t buf_size);
+int hv_vmbus_set_event(uint32_t child_rel_id);
+void hv_vmbus_on_events(void *);
+
+/*
+ * static inline functions
+ * (with some helper macros for reading/writing to model specific registers)
+ */
+
+#ifdef __x86_64__
+
+#define HV_VMBUS_READ_MSR(reg, v) { \
+ uint32_t h, l; \
+ __asm__ __volatile__("rdmsr" \
+ : "=a" (l), "=d" (h) \
+ : "c" (reg)); \
+ v = (((uint64_t)h) << 32) | l; \
+}
+
+#define HV_VMBUS_WRITE_MSR(reg, v) { \
+ uint32_t h, l; \
+ l = (uint32_t)(((uint64_t)(v)) & 0xFFFFFFFF); \
+ h = (uint32_t)((((uint64_t)(v)) >> 32) & 0xFFFFFFFF); \
+ __asm__ __volatile__("wrmsr" \
+ : /* no outputs */ \
+ : "c" (reg), "a" (l), "d" (h)); \
+}
+
+#else
+
+#define HV_VMBUS_READ_MSR(reg, v) \
+ __asm__ __volatile__("rdmsr" \
+ : "=A" (v) \
+ : "c" (reg))
+
+#define HV_VMBUS_WRITE_MSR(reg, v) \
+ __asm__ __volatile__("wrmsr" \
+ : /* no outputs */ \
+ : "c" (reg), "A" ((uint64_t)v))
+
+#endif
+
+static inline unsigned long long
+hv_vmbus_read_msr(int msr)
+{
+ unsigned long long val;
+ HV_VMBUS_READ_MSR(msr, val);
+ return (val);
+}
+
+static inline
+void hv_vmbus_write_msr(int msr, uint64_t val)
+{
+ HV_VMBUS_WRITE_MSR(msr, val);
+ return;
+}
+
+/*
+ * The guest OS needs to register the guest ID with the hypervisor.
+ * The guest ID is a 64 bit entity and the structure of this ID is
+ * specified in the Hyper-V specification:
+ *
+ * http://msdn.microsoft.com/en-us/library/windows/
+ * hardware/ff542653%28v=vs.85%29.aspx
+ *
+ * While the current guideline does not specify how FreeBSD guest ID(s)
+ * need to be generated, our plan is to publish the guidelines for
+ * FreeBSD and other guest operating systems that currently are hosted
+ * on Hyper-V. The implementation here conforms to this yet
+ * unpublished guidelines.
+ *
+ * Bit(s)
+ * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
+ * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200
+ * 55:48 - Distro specific identification
+ * 47:16 - FreeBSD kernel version number
+ * 15:0 - Distro specific identification
+ *
+ */
+
+#define HV_FREEBSD_VENDOR_ID 0x8200
+#define HV_FREEBSD_GUEST_ID hv_generate_guest_id(0,0)
+
+static inline uint64_t hv_generate_guest_id(
+ uint8_t distro_id_part1,
+ uint16_t distro_id_part2)
+{
+ uint64_t guest_id;
+ guest_id = (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48);
+ guest_id |= (((uint64_t)(distro_id_part1)) << 48);
+ guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */
+ guest_id |= ((uint64_t)(distro_id_part2));
+ return guest_id;
+}
+
+
+#endif /* __HYPERV_PRIV_H__ */
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
new file mode 100644
index 000000000000..5f15a1c96314
--- /dev/null
+++ b/sys/modules/Makefile
@@ -0,0 +1,845 @@
+# $FreeBSD$
+
+.include <bsd.own.mk>
+
+# Modules that include binary-only blobs of microcode should be selectable by
+# MK_SOURCELESS_UCODE option (see below).
+
+SUBDIR= \
+ ${_3dfx} \
+ ${_3dfx_linux} \
+ ${_aac} \
+ accf_data \
+ accf_dns \
+ accf_http \
+ acl_nfs4 \
+ acl_posix1e \
+ ${_acpi} \
+ ae \
+ ${_aesni} \
+ age \
+ ${_agp} \
+ aha \
+ ${_ahb} \
+ ahci \
+ ${_aic} \
+ aic7xxx \
+ aio \
+ alc \
+ ale \
+ alq \
+ ${_amdsbwd} \
+ ${_amdtemp} \
+ amr \
+ ${_an} \
+ ${_aout} \
+ ${_apm} \
+ ${_arcmsr} \
+ ${_arcnet} \
+ ${_asmc} \
+ ${_asr} \
+ ata \
+ ath \
+ ath_pci \
+ ${_auxio} \
+ ${_bce} \
+ bfe \
+ bge \
+ ${_bxe} \
+ ${_bios} \
+ ${_bktr} \
+ ${_bm} \
+ bridgestp \
+ bwi \
+ bwn \
+ cam \
+ ${_canbepm} \
+ ${_canbus} \
+ ${_cardbus} \
+ ${_carp} \
+ cas \
+ ${_cbb} \
+ cc \
+ cd9660 \
+ cd9660_iconv \
+ ${_ce} \
+ ${_cfi} \
+ ${_ciss} \
+ ${_cm} \
+ ${_cmx} \
+ ${_coff} \
+ ${_coretemp} \
+ ${_cp} \
+ ${_cpsw} \
+ ${_cpuctl} \
+ ${_cpufreq} \
+ ${_crypto} \
+ ${_cryptodev} \
+ ${_cs} \
+ ${_ct} \
+ ${_ctau} \
+ ${_cxgb} \
+ cxgbe \
+ ${_cyclic} \
+ dc \
+ dcons \
+ dcons_crom \
+ de \
+ ${_dpms} \
+ ${_dpt} \
+ ${_drm} \
+ ${_drm2} \
+ ${_dtrace} \
+ dummynet \
+ ${_ed} \
+ ${_elink} \
+ ${_em} \
+ en \
+ ${_ep} \
+ ${_epic} \
+ esp \
+ ${_et} \
+ ${_ex} \
+ ${_exca} \
+ ${_ext2fs} \
+ ${_fatm} \
+ fdc \
+ fdescfs \
+ ${_fe} \
+ ${_filemon} \
+ firewire \
+ firmware \
+ fuse \
+ ${_fxp} \
+ gem \
+ geom \
+ ${_glxiic} \
+ ${_glxsb} \
+ hatm \
+ hifn \
+ hme \
+ ${_hpt27xx} \
+ ${_hptiop} \
+ ${_hptmv} \
+ ${_hptrr} \
+ hwpmc \
+ ${_hyperv} \
+ ${_i2c} \
+ ${_ibcs2} \
+ ${_ichwd} \
+ ${_ida} \
+ ${_ie} \
+ if_bridge \
+ if_disc \
+ if_edsc \
+ if_ef \
+ if_epair \
+ if_faith \
+ if_gif \
+ ${_if_gre} \
+ if_lagg \
+ ${_if_ndis} \
+ if_stf \
+ if_tap \
+ if_tun \
+ if_vlan \
+ ${_igb} \
+ ${_iir} \
+ ${_io} \
+ ${_ipdivert} \
+ ${_ipfilter} \
+ ${_ipfw} \
+ ipfw_nat \
+ ${_ipmi} \
+ ip6_mroute_mod \
+ ip_mroute_mod \
+ ${_ips} \
+ ${_ipw} \
+ ${_ipwfw} \
+ ${_isci} \
+ iscsi \
+ isp \
+ ${_ispfw} \
+ ${_iwi} \
+ ${_iwifw} \
+ ${_iwn} \
+ ${_iwnfw} \
+ ${_ixgb} \
+ ${_ixgbe} \
+ jme \
+ joy \
+ kbdmux \
+ kgssapi \
+ kgssapi_krb5 \
+ khelp \
+ krpc \
+ ksyms \
+ le \
+ lge \
+ libalias \
+ libiconv \
+ libmbpool \
+ libmchain \
+ ${_lindev} \
+ ${_linprocfs} \
+ ${_linsysfs} \
+ ${_linux} \
+ lmc \
+ lpt \
+ mac_biba \
+ mac_bsdextended \
+ mac_ifoff \
+ mac_lomac \
+ mac_mls \
+ mac_none \
+ mac_partition \
+ mac_portacl \
+ mac_seeotheruids \
+ mac_stub \
+ mac_test \
+ malo \
+ mcd \
+ md \
+ mem \
+ mfi \
+ mii \
+ mlx \
+ ${_mlx4} \
+ ${_mlx4ib} \
+ ${_mlxen} \
+ ${_mly} \
+ mmc \
+ mmcsd \
+ mps \
+ mpt \
+ mqueue \
+ msdosfs \
+ msdosfs_iconv \
+ ${_mse} \
+ msk \
+ ${_mthca} \
+ mvs \
+ mwl \
+ ${_mwlfw} \
+ mxge \
+ my \
+ ${_nandfs} \
+ ${_nandsim} \
+ ${_ncp} \
+ ${_ncv} \
+ ${_ndis} \
+ ${_netgraph} \
+ ${_nfe} \
+ nfs_common \
+ nfscl \
+ nfsclient \
+ nfscommon \
+ nfsd \
+ nfslock \
+ nfslockd \
+ nfsserver \
+ nfssvc \
+ nge \
+ nmdm \
+ ${_nsp} \
+ nullfs \
+ ${_nvd} \
+ ${_nve} \
+ ${_nvme} \
+ ${_nvram} \
+ ${_nxge} \
+ ${_opensolaris} \
+ oce \
+ ${_padlock} \
+ patm \
+ ${_pccard} \
+ ${_pcfclock} \
+ pcn \
+ ${_pf} \
+ ${_pflog} \
+ ${_pfsync} \
+ plip \
+ ${_pmc} \
+ ppbus \
+ ppc \
+ ppi \
+ pps \
+ procfs \
+ pseudofs \
+ ${_pst} \
+ pty \
+ puc \
+ ${_qlxgb} \
+ ral \
+ ${_ralfw} \
+ ${_random} \
+ rc4 \
+ ${_rdma} \
+ re \
+ reiserfs \
+ rl \
+ ${_runfw} \
+ ${_s3} \
+ ${_safe} \
+ ${_sbni} \
+ scc \
+ scd \
+ ${_scsi_low} \
+ sdhci \
+ sdhci_pci \
+ sem \
+ send \
+ ${_sf} \
+ ${_sfxge} \
+ sge \
+ siba_bwn \
+ siftr \
+ siis \
+ sis \
+ sk \
+ ${_sn} \
+ ${_snc} \
+ snp \
+ ${_sound} \
+ ${_speaker} \
+ ${_splash} \
+ ${_sppp} \
+ ste \
+ ${_stg} \
+ stge \
+ ${_streams} \
+ ${_svr4} \
+ ${_sym} \
+ ${_syscons} \
+ sysvipc \
+ ${_ti} \
+ tl \
+ tmpfs \
+ ${_toecore} \
+ ${_tpm} \
+ trm \
+ ${_twa} \
+ twe \
+ tws \
+ tx \
+ ${_txp} \
+ uart \
+ ubsec \
+ udf \
+ udf_iconv \
+ ufs \
+ unionfs \
+ usb \
+ utopia \
+ ${_vesa} \
+ ${_virtio} \
+ vge \
+ ${_viawd} \
+ vkbd \
+ ${_vmm} \
+ ${_vpo} \
+ vr \
+ vte \
+ vx \
+ ${_vxge} \
+ wb \
+ ${_wbwd} \
+ ${_wi} \
+ wlan \
+ wlan_acl \
+ wlan_amrr \
+ wlan_ccmp \
+ wlan_rssadapt \
+ wlan_tkip \
+ wlan_wep \
+ wlan_xauth \
+ ${_wpi} \
+ ${_wpifw} \
+ ${_x86bios} \
+ ${_xe} \
+ xl \
+ ${_zfs} \
+ zlib \
+
+.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
+_filemon= filemon
+.endif
+
+.if ${MACHINE_CPUARCH} != "powerpc" && ${MACHINE_CPUARCH} != "arm" && \
+ ${MACHINE_CPUARCH} != "mips"
+_syscons= syscons
+_vpo= vpo
+.endif
+
+.if ${MACHINE_CPUARCH} != "arm" && ${MACHINE_CPUARCH} != "mips"
+# no BUS_SPACE_UNSPECIFIED
+# No barrier instruction support (specific to this driver)
+_sym= sym
+# intr_disable() is a macro, causes problems
+.if ${MK_SOURCELESS_UCODE} != "no"
+_cxgb= cxgb
+.endif
+.endif
+
+.if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
+.if exists(${.CURDIR}/../opencrypto)
+_crypto= crypto
+_cryptodev= cryptodev
+.endif
+.if exists(${.CURDIR}/../crypto)
+_random= random
+.endif
+.endif
+
+.if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \
+ defined(ALL_MODULES)
+_carp= carp
+_toecore= toecore
+.endif
+
+.if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES)
+_if_gre= if_gre
+.endif
+
+.if ${MK_IPFILTER} != "no" || defined(ALL_MODULES)
+_ipfilter= ipfilter
+.endif
+
+.if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES)
+_ipdivert= ipdivert
+_ipfw= ipfw
+.endif
+
+.if ${MK_NAND} != "no" || defined(ALL_MODULES)
+_nandfs= nandfs
+_nandsim= nandsim
+.endif
+
+.if ${MK_NETGRAPH} != "no" || defined(ALL_MODULES)
+_netgraph= netgraph
+.endif
+
+.if (${MK_PF} != "no" && (${MK_INET_SUPPORT} != "no" || \
+ ${MK_INET6_SUPPORT} != "no")) || defined(ALL_MODULES)
+_pf= pf
+_pflog= pflog
+.if ${MK_INET_SUPPORT} != "no"
+_pfsync= pfsync
+.endif
+.endif
+
+.if ${MK_SOURCELESS_UCODE} != "no"
+_bce= bce
+_fatm= fatm
+_fxp= fxp
+_ispfw= ispfw
+_mwlfw= mwlfw
+_ralfw= ralfw
+_runfw= runfw
+_sf= sf
+_sn= sn
+_ti= ti
+_txp= txp
+.endif
+
+.if ${MACHINE_CPUARCH} == "i386"
+# XXX some of these can move to the general case when de-i386'ed
+# XXX some of these can move now, but are untested on other architectures.
+_3dfx= 3dfx
+_3dfx_linux= 3dfx_linux
+_agp= agp
+_aic= aic
+_an= an
+_aout= aout
+_apm= apm
+_arcnet= arcnet
+_bktr= bktr
+_bxe= bxe
+_cardbus= cardbus
+_cbb= cbb
+.if ${MK_SOURCELESS_UCODE} != "no"
+_ce= ce
+.endif
+_coff= coff
+.if ${MK_SOURCELESS_UCODE} != "no"
+_cp= cp
+.endif
+_cpuctl= cpuctl
+_cpufreq= cpufreq
+_cs= cs
+.if ${MK_CDDL} != "no" || defined(ALL_MODULES)
+_cyclic= cyclic
+.endif
+_dpms= dpms
+_drm= drm
+_drm2= drm2
+.if ${MK_CDDL} != "no" || defined(ALL_MODULES)
+_dtrace= dtrace
+.endif
+_ed= ed
+_elink= elink
+_em= em
+_ep= ep
+_et= et
+_exca= exca
+_ext2fs= ext2fs
+_fe= fe
+_glxiic= glxiic
+_glxsb= glxsb
+_i2c= i2c
+_ibcs2= ibcs2
+_ie= ie
+_if_ndis= if_ndis
+_igb= igb
+_io= io
+_lindev= lindev
+_linprocfs= linprocfs
+_linsysfs= linsysfs
+_linux= linux
+_mse= mse
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_mlx4= mlx4
+_mlx4ib= mlx4ib
+_mlxen= mlxen
+_mthca= mthca
+.endif
+_ncv= ncv
+_ndis= ndis
+_nsp= nsp
+.if ${MK_CDDL} != "no" || defined(ALL_MODULES)
+_opensolaris= opensolaris
+.endif
+_pccard= pccard
+_pcfclock= pcfclock
+_pst= pst
+_rdma= rdma
+_safe= safe
+_sbni= sbni
+_scsi_low= scsi_low
+_sound= sound
+_speaker= speaker
+_splash= splash
+_sppp= sppp
+_stg= stg
+_streams= streams
+_svr4= svr4
+_vxge= vxge
+_wbwd= wbwd
+_wi= wi
+_xe= xe
+.if ${MK_ZFS} != "no" || defined(ALL_MODULES)
+_zfs= zfs
+.endif
+.if ${MACHINE} == "i386"
+_aac= aac
+_acpi= acpi
+.if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
+_aesni= aesni
+.endif
+_ahb= ahb
+_amdsbwd= amdsbwd
+_amdtemp= amdtemp
+_arcmsr= arcmsr
+_asmc= asmc
+_asr= asr
+_bios= bios
+_ciss= ciss
+_cm= cm
+_cmx= cmx
+_coretemp= coretemp
+.if ${MK_SOURCELESS_UCODE} != "no"
+_ctau= ctau
+.endif
+_dpt= dpt
+_ex= ex
+.if ${MK_SOURCELESS_HOST} != "no"
+_hpt27xx= hpt27xx
+.endif
+_hptiop= hptiop
+.if ${MK_SOURCELESS_HOST} != "no"
+_hptmv= hptmv
+_hptrr= hptrr
+.endif
+_ichwd= ichwd
+_ida= ida
+_iir= iir
+_ipmi= ipmi
+_ips= ips
+_ipw= ipw
+.if ${MK_SOURCELESS_UCODE} != "no"
+_ipwfw= ipwfw
+.endif
+_isci= isci
+_iwi= iwi
+.if ${MK_SOURCELESS_UCODE} != "no"
+_iwifw= iwifw
+.endif
+_iwn= iwn
+.if ${MK_SOURCELESS_UCODE} != "no"
+_iwnfw= iwnfw
+.endif
+_ixgb= ixgb
+_ixgbe= ixgbe
+_mly= mly
+_nfe= nfe
+_nvd= nvd
+.if ${MK_SOURCELESS_HOST} != "no"
+_nve= nve
+.endif
+_nvme= nvme
+_nvram= nvram
+_nxge= nxge
+_tpm= tpm
+_viawd= viawd
+_wpi= wpi
+.if ${MK_SOURCELESS_UCODE} != "no"
+_wpifw= wpifw
+.endif
+.if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
+_padlock= padlock
+.endif
+_s3= s3
+_twa= twa
+_vesa= vesa
+_virtio= virtio
+_x86bios= x86bios
+.elif ${MACHINE} == "pc98"
+_canbepm= canbepm
+_canbus= canbus
+_ct= ct
+_pmc= pmc
+_snc= snc
+.endif
+.endif
+
+.if ${MACHINE_CPUARCH} == "amd64"
+_aac= aac
+_aout= aout
+_acpi= acpi
+.if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
+_aesni= aesni
+.endif
+_agp= agp
+_an= an
+_amdsbwd= amdsbwd
+_amdtemp= amdtemp
+_arcmsr= arcmsr
+_asmc= asmc
+_bktr= bktr
+_bxe= bxe
+_cardbus= cardbus
+_cbb= cbb
+_cmx= cmx
+_ciss= ciss
+_coretemp= coretemp
+_cpuctl= cpuctl
+_cpufreq= cpufreq
+.if ${MK_CDDL} != "no" || defined(ALL_MODULES)
+_cyclic= cyclic
+.endif
+_dpms= dpms
+_drm= drm
+_drm2= drm2
+.if ${MK_CDDL} != "no" || defined(ALL_MODULES)
+_dtrace= dtrace
+.endif
+_ed= ed
+_et= et
+_em= em
+_exca= exca
+_ext2fs= ext2fs
+.if ${MK_SOURCELESS_HOST} != "no"
+_hpt27xx= hpt27xx
+.endif
+_hptiop= hptiop
+.if ${MK_SOURCELESS_HOST} != "no"
+_hptmv= hptmv
+_hptrr= hptrr
+.endif
+_hyperv= hyperv
+_i2c= i2c
+_ichwd= ichwd
+_ida= ida
+_if_ndis= if_ndis
+_igb= igb
+_iir= iir
+_io= io
+_ipmi= ipmi
+_ips= ips
+_ipw= ipw
+.if ${MK_SOURCELESS_UCODE} != "no"
+_ipwfw= ipwfw
+.endif
+_isci= isci
+_iwi= iwi
+.if ${MK_SOURCELESS_UCODE} != "no"
+_iwifw= iwifw
+.endif
+_iwn= iwn
+.if ${MK_SOURCELESS_UCODE} != "no"
+_iwnfw= iwnfw
+.endif
+_ixgb= ixgb
+_ixgbe= ixgbe
+_lindev= lindev
+_linprocfs= linprocfs
+_linsysfs= linsysfs
+_linux= linux
+_mly= mly
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_mlx4= mlx4
+_mlx4ib= mlx4ib
+_mlxen= mlxen
+_mthca= mthca
+.endif
+_ndis= ndis
+_nfe= nfe
+_nvd= nvd
+.if ${MK_SOURCELESS_HOST} != "no"
+_nve= nve
+.endif
+_nvme= nvme
+_nvram= nvram
+_nxge= nxge
+.if ${MK_CDDL} != "no" || defined(ALL_MODULES)
+_opensolaris= opensolaris
+.endif
+.if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
+_padlock= padlock
+.endif
+_pccard= pccard
+_qlxgb= qlxgb
+_rdma= rdma
+_s3= s3
+_safe= safe
+_scsi_low= scsi_low
+_sfxge= sfxge
+_sound= sound
+_speaker= speaker
+_splash= splash
+_sppp= sppp
+_tpm= tpm
+_twa= twa
+_vesa= vesa
+_viawd= viawd
+_virtio= virtio
+_vmm= vmm
+_vxge= vxge
+_x86bios= x86bios
+_wbwd= wbwd
+_wi= wi
+_wpi= wpi
+.if ${MK_SOURCELESS_UCODE} != "no"
+_wpifw= wpifw
+.endif
+.if ${MK_ZFS} != "no" || defined(ALL_MODULES)
+_zfs= zfs
+.endif
+.endif
+
+.if ${MACHINE_CPUARCH} == "arm"
+_cfi= cfi
+_cpsw= cpsw
+.endif
+
+.if ${MACHINE_CPUARCH} == "ia64"
+_aac= aac
+_aic= aic
+_an= an
+_arcnet= arcnet
+_asr= asr
+_bktr= bktr
+_cardbus= cardbus
+_cbb= cbb
+_ciss= ciss
+_cm= cm
+_cmx= cmx
+_coff= coff
+_cpufreq= cpufreq
+_dpt= dpt
+_em= em
+_ep= ep
+_et= et
+_exca= exca
+_fe= fe
+_hptiop= hptiop
+_ida= ida
+_igb= igb
+_iir= iir
+_ips= ips
+_mly= mly
+_pccard= pccard
+_scsi_low= scsi_low
+_sound= sound
+_splash= splash
+_sppp= sppp
+_streams= streams
+_tpm= tpm
+_twa= twa
+_wi= wi
+_xe= xe
+.endif
+
+.if ${MACHINE_CPUARCH} == "powerpc"
+_agp= agp
+_an= an
+_bm= bm
+_cardbus= cardbus
+_cbb= cbb
+_cfi= cfi
+_cpufreq= cpufreq
+_drm= drm
+_exca= exca
+_nvram= powermac_nvram
+_pccard= pccard
+_sound= sound
+_cyclic= cyclic
+_dtrace= dtrace
+_opensolaris= opensolaris
+.endif
+
+.if ${MACHINE_ARCH} == "powerpc64"
+.if ${MK_CDDL} != "no" || defined(ALL_MODULES)
+_cyclic= cyclic
+_dtrace= dtrace
+_opensolaris= opensolaris
+.endif
+.if ${MK_ZFS} != "no" || defined(ALL_MODULES)
+_zfs= zfs
+.endif
+.endif
+
+.if ${MACHINE_CPUARCH} == "sparc64"
+_auxio= auxio
+_em= em
+_epic= epic
+_i2c= i2c
+_igb= igb
+.if ${MK_CDDL} != "no" || defined(ALL_MODULES)
+_opensolaris= opensolaris
+.endif
+_sound= sound
+.if ${MK_ZFS} != "no" || defined(ALL_MODULES)
+_zfs= zfs
+.endif
+.endif
+
+.if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES)
+SUBDIR=${MODULES_OVERRIDE}
+.endif
+
+.for reject in ${WITHOUT_MODULES}
+SUBDIR:= ${SUBDIR:N${reject}}
+.endfor
+
+# Calling kldxref(8) for each module is expensive.
+.if !defined(NO_XREF)
+.MAKEFLAGS+= -DNO_XREF
+afterinstall:
+ @if type kldxref >/dev/null 2>&1; then \
+ ${ECHO} kldxref ${DESTDIR}${KMODDIR}; \
+ kldxref ${DESTDIR}${KMODDIR}; \
+ fi
+.endif
+
+.include <bsd.subdir.mk>
diff --git a/sys/modules/hyperv/Makefile b/sys/modules/hyperv/Makefile
new file mode 100644
index 000000000000..3bae26ac4030
--- /dev/null
+++ b/sys/modules/hyperv/Makefile
@@ -0,0 +1,5 @@
+# $FreeBSD$
+
+SUBDIR = vmbus netvsc storvsc utilities
+
+.include <bsd.subdir.mk>
diff --git a/sys/modules/hyperv/netvsc/Makefile b/sys/modules/hyperv/netvsc/Makefile
new file mode 100644
index 000000000000..bfe7d8232f6f
--- /dev/null
+++ b/sys/modules/hyperv/netvsc/Makefile
@@ -0,0 +1,14 @@
+#$FreeBSD$
+
+.PATH: ${.CURDIR}/../../../dev/hyperv/netvsc
+
+KMOD = hv_netvsc
+
+SRCS = hv_net_vsc.c \
+ hv_netvsc_drv_freebsd.c \
+ hv_rndis_filter.c
+
+CFLAGS += -I${.CURDIR}/../../../dev/hyperv/include \
+ -I${.CURDIR}/../../../dev/hyperv/netvsc
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/hyperv/storvsc/Makefile b/sys/modules/hyperv/storvsc/Makefile
new file mode 100644
index 000000000000..fa3e3de86172
--- /dev/null
+++ b/sys/modules/hyperv/storvsc/Makefile
@@ -0,0 +1,14 @@
+#$FreeBSD$
+
+.PATH: ${.CURDIR}/../../../dev/hyperv/storvsc
+
+KMOD= hv_storvsc
+
+SRCS = hv_storvsc_drv_freebsd.c \
+ hv_vstorage.h
+
+CFLAGS+= -I${.CURDIR}/../../../dev/hyperv/include \
+ -I${.CURDIR}/../../../dev/hyperv/vmbus \
+ -I${.CURDIR}/../../../dev/hyperv/storvsc
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/hyperv/utilities/Makefile b/sys/modules/hyperv/utilities/Makefile
new file mode 100644
index 000000000000..e2351a348c71
--- /dev/null
+++ b/sys/modules/hyperv/utilities/Makefile
@@ -0,0 +1,12 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../../dev/hyperv/utilities
+
+KMOD= hv_utils
+
+SRCS = hv_util.c
+
+CFLAGS+= -I${.CURDIR}/../../../dev/hyperv/include \
+ -I${.CURDIR}/../../../dev/hyperv/vmbus
+
+.include <bsd.kmod.mk>
diff --git a/sys/modules/hyperv/vmbus/Makefile b/sys/modules/hyperv/vmbus/Makefile
new file mode 100644
index 000000000000..df8464e02174
--- /dev/null
+++ b/sys/modules/hyperv/vmbus/Makefile
@@ -0,0 +1,20 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../../dev/hyperv/vmbus \
+ ${.CURDIR}/../../../dev/hyperv/utilities
+
+KMOD= hv_vmbus
+
+SRCS = hv_channel.c \
+ hv_channel_mgmt.c \
+ hv_connection.c \
+ hv_hv.c \
+ hv_ring_buffer.c \
+ hv_vmbus_drv_freebsd.c \
+ hv_vmbus_priv.h
+
+CFLAGS+= -I${.CURDIR}/../../../dev/hyperv/include \
+ -I${.CURDIR}/../../../dev/hyperv/vmbus \
+ -I${.CURDIR}/../../../dev/hyperv/utilities
+
+.include <bsd.kmod.mk>