From: "Butler, Siobhan A" <siobhan.a.butler@intel.com>
To: Stephen Hemminger <stephen@networkplumber.org>,
"alexmay@microsoft.com" <alexmay@microsoft.com>
Cc: "dev@dpdk.org" <dev@dpdk.org>, Stas Egorov <segorov@mirantis.com>,
Stephen Hemminger <shemming@brocade.com>
Subject: Re: [dpdk-dev] [PATCH v4 5/7] hv: poll mode driver
Date: Tue, 21 Apr 2015 19:34:39 +0000 [thread overview]
Message-ID: <0C5AFCA4B3408848ADF2A3073F7D8CC86D5A6905@IRSMSX109.ger.corp.intel.com> (raw)
In-Reply-To: <1429637564-5656-6-git-send-email-stephen@networkplumber.org>
Hi Stephen
Will you have documentation to go along with these changes?
Thanks
Siobhan
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen
> Hemminger
> Sent: Tuesday, April 21, 2015 6:33 PM
> To: alexmay@microsoft.com
> Cc: dev@dpdk.org; Stas Egorov; Stephen Hemminger
> Subject: [dpdk-dev] [PATCH v4 5/7] hv: poll mode driver
>
> From: Stephen Hemminger <shemming@brocade.com>
>
> This is new Poll Mode driver for using hyper-v virtual network
> interface.
>
> Signed-off-by: Stas Egorov <segorov@mirantis.com>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
> lib/Makefile | 1 +
> lib/librte_pmd_hyperv/Makefile | 28 +
> lib/librte_pmd_hyperv/hyperv.h | 169 ++++
> lib/librte_pmd_hyperv/hyperv_drv.c | 1653
> +++++++++++++++++++++++++++++++++
> lib/librte_pmd_hyperv/hyperv_drv.h | 558 +++++++++++
> lib/librte_pmd_hyperv/hyperv_ethdev.c | 332 +++++++
> lib/librte_pmd_hyperv/hyperv_logs.h | 69 ++
> lib/librte_pmd_hyperv/hyperv_rxtx.c | 403 ++++++++
> lib/librte_pmd_hyperv/hyperv_rxtx.h | 35 +
> mk/rte.app.mk | 4 +
> 10 files changed, 3252 insertions(+)
> create mode 100644 lib/librte_pmd_hyperv/Makefile
> create mode 100644 lib/librte_pmd_hyperv/hyperv.h
> create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.c
> create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.h
> create mode 100644 lib/librte_pmd_hyperv/hyperv_ethdev.c
> create mode 100644 lib/librte_pmd_hyperv/hyperv_logs.h
> create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.c
> create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.h
>
> diff --git a/lib/Makefile b/lib/Makefile
> index d94355d..6c1daf2 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -47,6 +47,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) +=
> librte_pmd_i40e
> DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += librte_pmd_fm10k
> DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += librte_pmd_mlx4
> DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += librte_pmd_enic
> +DIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += librte_pmd_hyperv
> DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
> DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
> DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
> diff --git a/lib/librte_pmd_hyperv/Makefile
> b/lib/librte_pmd_hyperv/Makefile
> new file mode 100644
> index 0000000..4ba08c8
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/Makefile
> @@ -0,0 +1,28 @@
> +# BSD LICENSE
> +#
> +# Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
> +# All rights reserved.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_hyperv.a
> +
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_ethdev.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_rxtx.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_drv.c
> +
> +# this lib depends upon:
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_eal lib/librte_ether
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_mempool
> lib/librte_mbuf
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_malloc
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_pmd_hyperv/hyperv.h
> b/lib/librte_pmd_hyperv/hyperv.h
> new file mode 100644
> index 0000000..5f66d8a
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv.h
> @@ -0,0 +1,169 @@
> +/*-
> + * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + */
> +
> +#ifndef _HYPERV_H_
> +#define _HYPERV_H_
> +
> +#include <sys/param.h>
> +#include <rte_log.h>
> +#include <rte_debug.h>
> +#include <rte_ether.h>
> +#include <rte_ethdev.h>
> +#include <rte_malloc.h>
> +#include <rte_memzone.h>
> +#include <rte_cycles.h>
> +#include <rte_dev.h>
> +
> +#include "hyperv_logs.h"
> +
> +#define PAGE_SHIFT 12
> +#define PAGE_SIZE (1 << PAGE_SHIFT)
> +
> +/*
> + * Tunable ethdev params
> + */
> +#define HV_MIN_RX_BUF_SIZE 1024
> +#define HV_MAX_RX_PKT_LEN 4096
> +#define HV_MAX_MAC_ADDRS 1
> +#define HV_MAX_RX_QUEUES 1
> +#define HV_MAX_TX_QUEUES 1
> +#define HV_MAX_PKT_BURST 32
> +#define HV_MAX_LINK_REQ 10
> +
> +/*
> + * List of resources mapped from kspace
> + * need to be the same as defined in hv_uio.c
> + */
> +enum {
> + TXRX_RING_MAP,
> + INT_PAGE_MAP,
> + MON_PAGE_MAP,
> + RECV_BUF_MAP
> +};
> +
> +/*
> + * Statistics
> + */
> +struct hv_stats {
> + uint64_t opkts;
> + uint64_t obytes;
> + uint64_t oerrors;
> +
> + uint64_t ipkts;
> + uint64_t ibytes;
> + uint64_t ierrors;
> + uint64_t rx_nombuf;
> +};
> +
> +struct hv_data;
> +struct netvsc_packet;
> +struct rndis_msg;
> +typedef void (*receive_callback_t)(struct hv_data *hv, struct rndis_msg
> *msg,
> + struct netvsc_packet *pkt);
> +
> +/*
> + * Main driver structure
> + */
> +struct hv_data {
> + int vmbus_device;
> + uint8_t monitor_bit;
> + uint8_t monitor_group;
> + uint8_t kernel_initialized;
> + int uio_fd;
> + /* Flag indicates channel state. If closed, RX/TX shouldn't work
> further */
> + uint8_t closed;
> + /* Flag indicates whether HALT rndis request was received by host */
> + uint8_t hlt_req_sent;
> + /* Flag indicates pending state for HALT request */
> + uint8_t hlt_req_pending;
> + /* Counter for RNDIS requests */
> + uint32_t new_request_id;
> + /* State of RNDIS device */
> + uint8_t rndis_dev_state;
> + /* Number of transmitted packets but not completed yet by Hyper-V
> */
> + int num_outstanding_sends;
> + /* Max pkt len to fit in rx mbufs */
> + uint32_t max_rx_pkt_len;
> +
> + uint8_t jumbo_frame_support;
> +
> + struct hv_vmbus_ring_buffer *in;
> + struct hv_vmbus_ring_buffer *out;
> +
> + /* Size of each ring_buffer(in/out) */
> + uint32_t rb_size;
> + /* Size of data in each ring_buffer(in/out) */
> + uint32_t rb_data_size;
> +
> + void *int_page;
> + struct hv_vmbus_monitor_page *monitor_pages;
> + void *recv_interrupt_page;
> + void *send_interrupt_page;
> + void *ring_pages;
> + void *recv_buf;
> +
> + uint8_t link_req_cnt;
> + uint32_t link_status;
> + uint8_t hw_mac_addr[ETHER_ADDR_LEN];
> + struct rndis_request *req;
> + struct netvsc_packet *netvsc_packet;
> + struct nvsp_msg *rx_comp_msg;
> + struct hv_rx_queue *rxq;
> + struct hv_tx_queue *txq;
> + struct hv_vm_packet_descriptor *desc;
> + receive_callback_t receive_callback;
> + int pkt_rxed;
> +
> + uint32_t debug;
> + struct hv_stats stats;
> +};
> +
> +/*
> + * Extern functions declarations
> + */
> +int hyperv_dev_tx_queue_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx,
> + uint16_t nb_desc,
> + unsigned int socket_id,
> + const struct rte_eth_txconf *tx_conf);
> +
> +void hyperv_dev_tx_queue_release(void *ptxq);
> +
> +int hyperv_dev_rx_queue_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx,
> + uint16_t nb_desc,
> + unsigned int socket_id,
> + const struct rte_eth_rxconf *rx_conf,
> + struct rte_mempool *mp);
> +
> +void hyperv_dev_rx_queue_release(void *prxq);
> +
> +uint16_t
> +hyperv_recv_pkts(void *prxq,
> + struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
> +
> +uint16_t
> +hyperv_xmit_pkts(void *ptxq,
> + struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
> +
> +int hv_rf_on_device_add(struct hv_data *hv);
> +int hv_rf_on_device_remove(struct hv_data *hv);
> +int hv_rf_on_send(struct hv_data *hv, struct netvsc_packet *pkt);
> +int hv_rf_on_open(struct hv_data *hv);
> +int hv_rf_on_close(struct hv_data *hv);
> +int hv_rf_set_device_mac(struct hv_data *hv, uint8_t *mac);
> +void hyperv_start_rx(struct hv_data *hv);
> +void hyperv_stop_rx(struct hv_data *hv);
> +int hyperv_get_buffer(struct hv_data *hv, void *buffer, uint32_t
> bufferlen);
> +void hyperv_scan_comps(struct hv_data *hv, int allow_rx_drop);
> +uint8_t hyperv_get_link_status(struct hv_data *hv);
> +int hyperv_set_rx_mode(struct hv_data *hv, uint8_t promisc, uint8_t
> mcast);
> +
> +inline int rte_hv_dev_atomic_write_link_status(struct rte_eth_dev *dev,
> + struct rte_eth_link *link);
> +inline int rte_hv_dev_atomic_read_link_status(struct rte_eth_dev *dev,
> + struct rte_eth_link *link);
> +
> +#endif /* _HYPERV_H_ */
> diff --git a/lib/librte_pmd_hyperv/hyperv_drv.c
> b/lib/librte_pmd_hyperv/hyperv_drv.c
> new file mode 100644
> index 0000000..4a37966
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_drv.c
> @@ -0,0 +1,1653 @@
> +/*-
> + * Copyright (c) 2009-2012 Microsoft Corp.
> + * Copyright (c) 2010-2012 Citrix Inc.
> + * Copyright (c) 2012 NetApp Inc.
> + * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice unmodified, this list of conditions, and the following
> + * disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
> OR
> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> DISCLAIMED.
> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> (INCLUDING, BUT
> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE OF
> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + *
> + */
> +
> +#include "hyperv.h"
> +#include "hyperv_drv.h"
> +#include "hyperv_rxtx.h"
> +
> +#define LOOP_CNT 10000
> +#define MAC_STRLEN 14
> +#define MAC_PARAM_STR "NetworkAddress"
> +
> +#define hex "0123456789abcdef"
> +#define high(x) hex[(x & 0xf0) >> 4]
> +#define low(x) hex[x & 0x0f]
> +
> +static int hv_rf_on_receive(struct hv_data *hv, struct netvsc_packet *pkt);
> +
> +/*
> + * Ring buffer
> + */
> +
> +/* Amount of space to write to */
> +#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) \
> + (((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)))
> +
> +/*
> + * Get number of bytes available to read and to write to
> + * for the specified ring buffer
> + */
> +static inline void
> +get_ring_buffer_avail_bytes(
> + struct hv_data *hv,
> + struct hv_vmbus_ring_buffer *ring_buffer,
> + uint32_t *read,
> + uint32_t *write)
> +{
> + rte_compiler_barrier();
> +
> + /*
> + * Capture the read/write indices before they changed
> + */
> + uint32_t read_loc = ring_buffer->read_index;
> + uint32_t write_loc = ring_buffer->write_index;
> +
> + *write = HV_BYTES_AVAIL_TO_WRITE(
> + read_loc, write_loc, hv->rb_data_size);
> + *read = hv->rb_data_size - *write;
> +}
> +
> +/*
> + * Helper routine to copy from source to ring buffer.
> + *
> + * Assume there is enough room. Handles wrap-around in dest case only!
> + */
> +static uint32_t
> +copy_to_ring_buffer(
> + struct hv_vmbus_ring_buffer *ring_buffer,
> + uint32_t ring_buffer_size,
> + uint32_t start_write_offset,
> + char *src,
> + uint32_t src_len)
> +{
> + char *ring_buf = (char *)ring_buffer->buffer;
> + uint32_t fragLen;
> +
> + if (src_len > ring_buffer_size - start_write_offset) {
> + /* wrap-around detected! */
> + fragLen = ring_buffer_size - start_write_offset;
> + rte_memcpy(ring_buf + start_write_offset, src, fragLen);
> + rte_memcpy(ring_buf, src + fragLen, src_len - fragLen);
> + } else {
> + rte_memcpy(ring_buf + start_write_offset, src, src_len);
> + }
> +
> + start_write_offset += src_len;
> + start_write_offset %= ring_buffer_size;
> +
> + return start_write_offset;
> +}
> +
> +/*
> + * Helper routine to copy to dest from ring buffer.
> + *
> + * Assume there is enough room. Handles wrap-around in src case only!
> + */
> +static uint32_t
> +copy_from_ring_buffer(
> + struct hv_data *hv,
> + struct hv_vmbus_ring_buffer *ring_buffer,
> + char *dest,
> + uint32_t dest_len,
> + uint32_t start_read_offset)
> +{
> + uint32_t fragLen;
> + char *ring_buf = (char *)ring_buffer->buffer;
> +
> + if (dest_len > hv->rb_data_size - start_read_offset) {
> + /* wrap-around detected at the src */
> + fragLen = hv->rb_data_size - start_read_offset;
> + rte_memcpy(dest, ring_buf + start_read_offset, fragLen);
> + rte_memcpy(dest + fragLen, ring_buf, dest_len - fragLen);
> + } else {
> + rte_memcpy(dest, ring_buf + start_read_offset, dest_len);
> + }
> +
> + start_read_offset += dest_len;
> + start_read_offset %= hv->rb_data_size;
> +
> + return start_read_offset;
> +}
> +
> +/*
> + * Write to the ring buffer.
> + */
> +static int
> +hv_ring_buffer_write(
> + struct hv_data *hv,
> + struct hv_vmbus_sg_buffer_list sg_buffers[],
> + uint32_t sg_buffer_count)
> +{
> + struct hv_vmbus_ring_buffer *ring_buffer = hv->out;
> + uint32_t i = 0;
> + uint32_t byte_avail_to_write;
> + uint32_t byte_avail_to_read;
> + uint32_t total_bytes_to_write = 0;
> + volatile uint32_t next_write_location;
> + uint64_t prev_indices = 0;
> +
> + for (i = 0; i < sg_buffer_count; i++)
> + total_bytes_to_write += sg_buffers[i].length;
> +
> + total_bytes_to_write += sizeof(uint64_t);
> +
> + get_ring_buffer_avail_bytes(hv, ring_buffer, &byte_avail_to_read,
> + &byte_avail_to_write);
> +
> + /*
> + * If there is only room for the packet, assume it is full.
> + * Otherwise, the next time around, we think the ring buffer
> + * is empty since the read index == write index
> + */
> + if (byte_avail_to_write <= total_bytes_to_write) {
> + PMD_PERROR_LOG(hv, DBG_RB,
> + "byte_avail_to_write = %u,
> total_bytes_to_write = %u",
> + byte_avail_to_write, total_bytes_to_write);
> + return -EAGAIN;
> + }
> +
> + /*
> + * Write to the ring buffer
> + */
> + next_write_location = ring_buffer->write_index;
> +
> + for (i = 0; i < sg_buffer_count; i++) {
> + next_write_location = copy_to_ring_buffer(ring_buffer,
> + hv->rb_data_size, next_write_location,
> + (char *) sg_buffers[i].data,
> sg_buffers[i].length);
> + }
> +
> + /*
> + * Set previous packet start
> + */
> + prev_indices = (uint64_t)ring_buffer->write_index << 32;
> +
> + next_write_location = copy_to_ring_buffer(
> + ring_buffer, hv->rb_data_size, next_write_location,
> + (char *) &prev_indices, sizeof(uint64_t));
> +
> + /*
> + * Make sure we flush all writes before updating the writeIndex
> + */
> + rte_compiler_barrier();
> +
> + /*
> + * Now, update the write location
> + */
> + ring_buffer->write_index = next_write_location;
> +
> + return 0;
> +}
> +
> +/*
> + * Read without advancing the read index.
> + */
> +static int
> +hv_ring_buffer_peek(struct hv_data *hv, void *buffer, uint32_t
> buffer_len)
> +{
> + struct hv_vmbus_ring_buffer *ring_buffer = hv->in;
> + uint32_t bytesAvailToWrite;
> + uint32_t bytesAvailToRead;
> +
> + get_ring_buffer_avail_bytes(hv, ring_buffer,
> + &bytesAvailToRead,
> + &bytesAvailToWrite);
> +
> + /* Make sure there is something to read */
> + if (bytesAvailToRead < buffer_len)
> + return -EAGAIN;
> +
> + copy_from_ring_buffer(hv, ring_buffer,
> + (char *)buffer, buffer_len, ring_buffer->read_index);
> +
> + return 0;
> +}
> +
> +/*
> + * Read and advance the read index.
> + */
> +static int
> +hv_ring_buffer_read(struct hv_data *hv, void *buffer,
> + uint32_t buffer_len, uint32_t offset)
> +{
> + struct hv_vmbus_ring_buffer *ring_buffer = hv->in;
> + uint32_t bytes_avail_to_write;
> + uint32_t bytes_avail_to_read;
> + uint32_t next_read_location = 0;
> + uint64_t prev_indices = 0;
> +
> + if (buffer_len <= 0)
> + return -EINVAL;
> +
> + get_ring_buffer_avail_bytes(
> + hv,
> + ring_buffer,
> + &bytes_avail_to_read,
> + &bytes_avail_to_write);
> +
> + /*
> + * Make sure there is something to read
> + */
> + if (bytes_avail_to_read < buffer_len) {
> + PMD_PERROR_LOG(hv, DBG_RB, "bytes_avail_to_read =
> %u, buffer_len = %u",
> + bytes_avail_to_read, buffer_len);
> + return -EAGAIN;
> + }
> +
> + next_read_location = (ring_buffer->read_index + offset) % hv-
> >rb_data_size;
> +
> + next_read_location = copy_from_ring_buffer(
> + hv,
> + ring_buffer,
> + (char *) buffer,
> + buffer_len,
> + next_read_location);
> +
> + next_read_location = copy_from_ring_buffer(
> + hv,
> + ring_buffer,
> + (char *) &prev_indices,
> + sizeof(uint64_t),
> + next_read_location);
> +
> + /*
> + * Make sure all reads are done before we update the read index
> since
> + * the writer may start writing to the read area once the read index
> + * is updated.
> + */
> + rte_compiler_barrier();
> +
> + /*
> + * Update the read index
> + */
> + ring_buffer->read_index = next_read_location;
> +
> + return 0;
> +}
> +
> +/*
> + * VMBus
> + */
> +
> +/*
> + * Retrieve the raw packet on the specified channel
> + */
> +static int
> +hv_vmbus_channel_recv_packet_raw(struct hv_data *hv, void *buffer,
> + uint32_t buffer_len,
> + uint32_t *buffer_actual_len,
> + uint64_t *request_id,
> + int mode)
> +{
> + int ret;
> + uint32_t packetLen;
> + struct hv_vm_packet_descriptor desc;
> +
> + *buffer_actual_len = 0;
> + *request_id = 0;
> +
> + ret = hv_ring_buffer_peek(hv, &desc,
> + sizeof(struct hv_vm_packet_descriptor));
> +
> + if (ret != 0)
> + return 0;
> +
> + if ((desc.type ==
> HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES
> + && !(mode & 1)) ||
> + ((desc.type ==
> HV_VMBUS_PACKET_TYPE_COMPLETION) && !(mode & 2))) {
> + return -1;
> + }
> +
> + packetLen = desc.length8 << 3;
> +
> + *buffer_actual_len = packetLen;
> +
> + if (unlikely(packetLen > buffer_len)) {
> + PMD_PERROR_LOG(hv, DBG_RX, "The buffer desc is too big,
> will drop it");
> + return -ENOMEM;
> + }
> +
> + *request_id = desc.transaction_id;
> +
> + /* Copy over the entire packet to the user buffer */
> + ret = hv_ring_buffer_read(hv, buffer, packetLen, 0);
> +
> + return 0;
> +}
> +
> +/*
> + * Trigger an event notification on the specified channel
> + */
> +static void
> +vmbus_channel_set_event(struct hv_data *hv)
> +{
> + /* Here we assume that channel->offer_msg.monitor_allocated ==
> 1,
> + * in another case our driver will not work */
> + /* Each uint32_t represents 32 channels */
> + __sync_or_and_fetch(((uint32_t *)hv->send_interrupt_page
> + + ((hv->vmbus_device >> 5))), 1 << (hv->vmbus_device &
> 31)
> + );
> + __sync_or_and_fetch((uint32_t *)&hv->monitor_pages->
> + trigger_group[hv->monitor_group].u.pending, 1 <<
> hv->monitor_bit);
> +}
> +
> +/**
> + * @brief Send the specified buffer on the given channel
> + */
> +static int
> +hv_vmbus_channel_send_packet(struct hv_data *hv, void *buffer,
> + uint32_t buffer_len, uint64_t request_id,
> + enum hv_vmbus_packet_type type,
> + uint32_t flags)
> +{
> + struct hv_vmbus_sg_buffer_list buffer_list[3];
> + struct hv_vm_packet_descriptor desc;
> + uint32_t packet_len_aligned;
> + uint64_t aligned_data;
> + uint32_t packet_len;
> + int ret = 0;
> + uint32_t old_write = hv->out->write_index;
> +
> + packet_len = sizeof(struct hv_vm_packet_descriptor) + buffer_len;
> + packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
> + aligned_data = 0;
> +
> + /* Setup the descriptor */
> + desc.type = type; /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;
> */
> + desc.flags = flags; /*
> HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
> + /* in 8-bytes granularity */
> + desc.data_offset8 = sizeof(struct hv_vm_packet_descriptor) >> 3;
> + desc.length8 = (uint16_t) (packet_len_aligned >> 3);
> + desc.transaction_id = request_id;
> +
> + buffer_list[0].data = &desc;
> + buffer_list[0].length = sizeof(struct hv_vm_packet_descriptor);
> +
> + buffer_list[1].data = buffer;
> + buffer_list[1].length = buffer_len;
> +
> + buffer_list[2].data = &aligned_data;
> + buffer_list[2].length = packet_len_aligned - packet_len;
> +
> + ret = hv_ring_buffer_write(hv, buffer_list, 3);
> +
> + rte_mb();
> + if (!ret && !hv->out->interrupt_mask && hv->out->read_index ==
> old_write)
> + vmbus_channel_set_event(hv);
> +
> + return ret;
> +}
> +
> +/*
> + * Send a range of single-page buffer packets using
> + * a GPADL Direct packet type
> + */
> +static int
> +hv_vmbus_channel_send_packet_pagebuffer(
> + struct hv_data *hv,
> + struct hv_vmbus_page_buffer page_buffers[],
> + uint32_t page_count,
> + void *buffer,
> + uint32_t buffer_len,
> + uint64_t request_id)
> +{
> +
> + int ret = 0;
> + uint32_t packet_len, packetLen_aligned, descSize, i = 0;
> + struct hv_vmbus_sg_buffer_list buffer_list[3];
> + struct hv_vmbus_channel_packet_page_buffer desc;
> + uint64_t alignedData = 0;
> + uint32_t old_write = hv->out->write_index;
> +
> + if (page_count > HV_MAX_PAGE_BUFFER_COUNT) {
> + PMD_PERROR_LOG(hv, DBG_VMBUS, "page_count %u goes
> out of the limit",
> + page_count);
> + return -EINVAL;
> + }
> +
> + /*
> + * Adjust the size down since
> hv_vmbus_channel_packet_page_buffer
> + * is the largest size we support
> + */
> + descSize = sizeof(struct hv_vmbus_channel_packet_page_buffer) -
> + ((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
> + sizeof(struct hv_vmbus_page_buffer));
> + packet_len = descSize + buffer_len;
> + packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
> +
> + /* Setup the descriptor */
> + desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
> + desc.flags =
> HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
> + desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
> + desc.length8 = (uint16_t) (packetLen_aligned >> 3);
> + desc.transaction_id = request_id;
> + desc.range_count = page_count;
> +
> + for (i = 0; i < page_count; i++) {
> + desc.range[i].length = page_buffers[i].length;
> + desc.range[i].offset = page_buffers[i].offset;
> + desc.range[i].pfn = page_buffers[i].pfn;
> + }
> +
> + buffer_list[0].data = &desc;
> + buffer_list[0].length = descSize;
> +
> + buffer_list[1].data = buffer;
> + buffer_list[1].length = buffer_len;
> +
> + buffer_list[2].data = &alignedData;
> + buffer_list[2].length = packetLen_aligned - packet_len;
> +
> + ret = hv_ring_buffer_write(hv, buffer_list, 3);
> + if (likely(ret == 0))
> + ++hv->num_outstanding_sends;
> +
> + rte_mb();
> + if (!ret && !hv->out->interrupt_mask &&
> + hv->out->read_index == old_write)
> + vmbus_channel_set_event(hv);
> +
> + return ret;
> +}
> +
> +/*
> + * NetVSC
> + */
> +
> +/*
> + * Net VSC on send
> + * Sends a packet on the specified Hyper-V device.
> + * Returns 0 on success, non-zero on failure.
> + */
> +static int
> +hv_nv_on_send(struct hv_data *hv, struct netvsc_packet *pkt)
> +{
> + struct nvsp_msg send_msg;
> + int ret;
> +
> + send_msg.msg_type = nvsp_msg_1_type_send_rndis_pkt;
> + if (pkt->is_data_pkt) {
> + /* 0 is RMC_DATA */
> + send_msg.msgs.send_rndis_pkt.chan_type = 0;
> + } else {
> + /* 1 is RMC_CONTROL */
> + send_msg.msgs.send_rndis_pkt.chan_type = 1;
> + }
> +
> + /* Not using send buffer section */
> + send_msg.msgs.send_rndis_pkt.send_buf_section_idx =
> + 0xFFFFFFFF;
> + send_msg.msgs.send_rndis_pkt.send_buf_section_size = 0;
> +
> + if (likely(pkt->page_buf_count)) {
> + ret = hv_vmbus_channel_send_packet_pagebuffer(hv,
> + pkt->page_buffers, pkt->page_buf_count,
> + &send_msg, sizeof(struct nvsp_msg),
> + (uint64_t)pkt->is_data_pkt ? (hv->txq-
> >tx_tail + 1) : 0);
> + } else {
> + PMD_PERROR_LOG(hv, DBG_TX, "pkt->page_buf_count
> value can't be zero");
> + ret = -1;
> + }
> +
> + return ret;
> +}
> +
> +/*
> + * Net VSC on receive
> + *
> + * This function deals exclusively with virtual addresses.
> + */
> +static void
> +hv_nv_on_receive(struct hv_data *hv, struct hv_vm_packet_descriptor
> *pkt)
> +{
> + struct hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
> + struct nvsp_msg *nvsp_msg_pkt;
> + struct netvsc_packet *net_vsc_pkt = NULL;
> + unsigned long start;
> + int count, i;
> +
> + nvsp_msg_pkt = (struct nvsp_msg *)((unsigned long)pkt
> + + (pkt->data_offset8 << 3));
> +
> + /* Make sure this is a valid nvsp packet */
> + if (unlikely(nvsp_msg_pkt->msg_type !=
> nvsp_msg_1_type_send_rndis_pkt)) {
> + PMD_PERROR_LOG(hv, DBG_RX, "NVSP packet is not valid");
> + return;
> + }
> +
> + vm_xfer_page_pkt = (struct hv_vm_transfer_page_packet_header
> *)pkt;
> +
> + if (unlikely(vm_xfer_page_pkt->transfer_page_set_id
> + != NETVSC_RECEIVE_BUFFER_ID)) {
> + PMD_PERROR_LOG(hv, DBG_RX, "transfer_page_set_id is
> not valid");
> + return;
> + }
> +
> + count = vm_xfer_page_pkt->range_count;
> +
> + /*
> + * Initialize the netvsc packet
> + */
> + for (i = 0; i < count; ++i) {
> + net_vsc_pkt = hv->netvsc_packet;
> +
> + net_vsc_pkt->tot_data_buf_len =
> + vm_xfer_page_pkt->ranges[i].byte_count;
> + net_vsc_pkt->page_buf_count = 1;
> +
> + net_vsc_pkt->page_buffers[0].length =
> + vm_xfer_page_pkt->ranges[i].byte_count;
> +
> + /* The virtual address of the packet in the receive buffer */
> + start = ((unsigned long)hv->recv_buf +
> + vm_xfer_page_pkt->ranges[i].byte_offset);
> +
> + /* Page number of the virtual page containing packet start */
> + net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
> +
> + /* Calculate the page relative offset */
> + net_vsc_pkt->page_buffers[0].offset =
> + vm_xfer_page_pkt->ranges[i].byte_offset &
> (PAGE_SIZE - 1);
> +
> + /*
> + * In this implementation, we are dealing with virtual
> + * addresses exclusively. Since we aren't using physical
> + * addresses at all, we don't care if a packet crosses a
> + * page boundary. For this reason, the original code to
> + * check for and handle page crossings has been removed.
> + */
> +
> + /*
> + * Pass it to the upper layer. The receive completion call
> + * has been moved into this function.
> + */
> + hv_rf_on_receive(hv, net_vsc_pkt);
> + }
> + /* Send a receive completion packet to RNDIS device (ie NetVsp) */
> + hv_vmbus_channel_send_packet(hv, hv->rx_comp_msg,
> sizeof(struct nvsp_msg),
> + vm_xfer_page_pkt->d.transaction_id,
> + HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
> +}
> +
> +/*
> + * Net VSC on send completion
> + */
> +static void
> +hv_nv_on_send_completion(struct hv_data *hv, struct
> hv_vm_packet_descriptor *pkt)
> +{
> + struct nvsp_msg *nvsp_msg_pkt;
> +
> + nvsp_msg_pkt =
> + (struct nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 <<
> 3));
> +
> + if (likely(nvsp_msg_pkt->msg_type ==
> +
> nvsp_msg_1_type_send_rndis_pkt_complete)) {
> +
> + if (unlikely(hv->hlt_req_pending))
> + hv->hlt_req_sent = 1;
> + else
> + if (pkt->transaction_id)
> + ++hv->txq->tx_free;
> + --hv->num_outstanding_sends;
> + return;
> + }
> + PMD_PINFO_LOG(hv, DBG_TX, "unhandled completion (for kernel
> req or so)");
> +}
> +
> +/*
> + * Analogue of bsd hv_nv_on_channel_callback
> + */
> +static void
> +hv_nv_complete_request(struct hv_data *hv, struct rndis_request
> *request)
> +{
> + uint32_t bytes_rxed, cnt = 0;
> + uint64_t request_id;
> + struct hv_vm_packet_descriptor *desc;
> + uint8_t *buffer;
> + int bufferlen = NETVSC_PACKET_SIZE;
> + int ret = 0;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + hv->req = request;
> +
> + buffer = rte_malloc(NULL, bufferlen, RTE_CACHE_LINE_SIZE);
> + if (!buffer) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "failed to allocate
> packet");
> + return;
> + }
> +
> + do {
> + rte_delay_us(1);
> + ret = hv_vmbus_channel_recv_packet_raw(hv,
> + buffer, bufferlen, &bytes_rxed,
> &request_id, 3);
> + if (ret == 0) {
> + if (bytes_rxed > 0) {
> + desc = (struct hv_vm_packet_descriptor
> *)buffer;
> +
> + switch (desc->type) {
> + case
> HV_VMBUS_PACKET_TYPE_COMPLETION:
> + hv_nv_on_send_completion(hv,
> desc);
> + break;
> + case
> HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
> + hv_nv_on_receive(hv, desc);
> + break;
> + default:
> + break;
> + }
> + PMD_PDEBUG_LOG(hv, DBG_LOAD,
> + "Did %d attempts until non-empty
> data was receieved",
> + cnt);
> + cnt = 0;
> + } else {
> + cnt++;
> + }
> + } else if (ret == -ENOMEM) {
> + /* Handle large packet */
> + PMD_PDEBUG_LOG(hv, DBG_LOAD,
> + "recv_packet_raw returned -ENOMEM");
> + rte_free(buffer);
> + buffer = rte_malloc(NULL, bytes_rxed,
> RTE_CACHE_LINE_SIZE);
> + if (buffer == NULL) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "failed
> to allocate buffer");
> + break;
> + }
> + bufferlen = bytes_rxed;
> + } else {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "Unexpected
> return code (%d)", ret);
> + }
> + if (!hv->req) {
> + PMD_PINFO_LOG(hv, DBG_LOAD, "Single request
> processed");
> + break;
> + }
> + if (cnt >= LOOP_CNT) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "Emergency
> break from the loop");
> + break;
> + }
> + if (hv->hlt_req_sent) {
> + PMD_PINFO_LOG(hv, DBG_LOAD, "Halt request
> processed");
> + break;
> + }
> + /* The field hv->req->response_msg.ndis_msg_type
> + * should be set to non-zero value when response received
> + */
> + } while (!hv->req->response_msg.ndis_msg_type);
> +
> + rte_free(buffer);
> +}
> +
> +/*
> + * RNDIS
> + */
> +
> +/*
> + * Create new RNDIS request
> + */
> +static inline struct rndis_request *
> +hv_rndis_request(struct hv_data *hv, uint32_t message_type,
> + uint32_t message_length)
> +{
> + struct rndis_request *request;
> + struct rndis_msg *rndis_mesg;
> + struct rndis_set_request *set;
> + char mz_name[RTE_MEMZONE_NAMESIZE];
> + uint32_t size;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + request = rte_zmalloc("rndis_req", sizeof(struct rndis_request),
> + RTE_CACHE_LINE_SIZE);
> +
> + if (!request)
> + return NULL;
> +
> + sprintf(mz_name, "hv_%d_%u_%d_%p", hv->vmbus_device,
> message_type,
> + hv->new_request_id, request);
> +
> + size = MAX(message_length, sizeof(struct rndis_msg));
> +
> + request->request_msg_memzone =
> rte_memzone_reserve_aligned(mz_name,
> + size, rte_lcore_to_socket_id(rte_lcore_id()), 0,
> PAGE_SIZE);
> + if (!request->request_msg_memzone) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "memzone_reserve
> failed");
> + rte_free(request);
> + return NULL;
> + }
> + request->request_msg = request->request_msg_memzone->addr;
> + rndis_mesg = request->request_msg;
> + rndis_mesg->ndis_msg_type = message_type;
> + rndis_mesg->msg_len = message_length;
> +
> + /*
> + * Set the request id. This field is always after the rndis header
> + * for request/response packet types so we just use the set_request
> + * as a template.
> + */
> + set = &rndis_mesg->msg.set_request;
> + hv->new_request_id++;
> + set->request_id = hv->new_request_id;
> +
> + return request;
> +}
> +
> +/*
> + * RNDIS filter
> + */
> +
> +static void
> +hv_rf_receive_response(
> + struct hv_data *hv,
> + struct rndis_msg *response)
> +{
> + struct rndis_request *request = hv->req;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + if (response->msg_len <= sizeof(struct rndis_msg)) {
> + rte_memcpy(&request->response_msg, response,
> + response->msg_len);
> + } else {
> + if (response->ndis_msg_type ==
> REMOTE_NDIS_INITIALIZE_CMPLT) {
> + request->response_msg.msg.init_complete.status =
> + STATUS_BUFFER_OVERFLOW;
> + }
> + PMD_PERROR_LOG(hv, DBG_LOAD, "response buffer
> overflow\n");
> + }
> +}
> +
> +/*
> + * RNDIS filter receive indicate status
> + */
> +static void
> +hv_rf_receive_indicate_status(struct hv_data *hv, struct rndis_msg
> *response)
> +{
> + struct rndis_indicate_status *indicate = &response-
> >msg.indicate_status;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT)
> + hv->link_status = 1;
> + else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT)
> + hv->link_status = 0;
> + else if (indicate->status == RNDIS_STATUS_INVALID_DATA)
> + PMD_PERROR_LOG(hv, DBG_RX, "Invalid data in RNDIS
> message");
> + else
> + PMD_PERROR_LOG(hv, DBG_RX, "Unsupported status: %u",
> indicate->status);
> +}
> +
> +/*
> + * RNDIS filter receive data
> + */
> +static void
> +hv_rf_receive_data(struct hv_data *hv, struct rndis_msg *msg,
> + struct netvsc_packet *pkt)
> +{
> + struct rte_mbuf *m_new;
> + struct hv_rx_queue *rxq = hv->rxq;
> + struct rndis_packet *rndis_pkt;
> + uint32_t data_offset;
> +
> + if (unlikely(hv->closed))
> + return;
> +
> + rndis_pkt = &msg->msg.packet;
> +
> + if (unlikely(hv->max_rx_pkt_len < rndis_pkt->data_length)) {
> + PMD_PWARN_LOG(hv, DBG_RX, "Packet is too large (%db),
> dropping.",
> + rndis_pkt->data_length);
> + ++hv->stats.ierrors;
> + return;
> + }
> +
> + /* Remove rndis header, then pass data packet up the stack */
> + data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
> +
> + /* L2 frame length, with L2 header, not including CRC */
> + pkt->tot_data_buf_len = rndis_pkt->data_length;
> + pkt->page_buffers[0].offset += data_offset;
> + /* Buffer length now L2 frame length plus trailing junk */
> + pkt->page_buffers[0].length -= data_offset;
> +
> + pkt->vlan_tci = 0;
> +
> + /*
> + * Just put data into appropriate mbuf, all further work will be done
> + * by the upper layer (mbuf replacement, index adjustment, etc)
> + */
> + m_new = rxq->sw_ring[rxq->rx_tail];
> + if (++rxq->rx_tail == rxq->nb_rx_desc)
> + rxq->rx_tail = 0;
> +
> + /*
> + * Copy the received packet to mbuf.
> + * The copy is required since the memory pointed to by
> netvsc_packet
> + * cannot be reallocated
> + */
> + uint8_t *vaddr = (uint8_t *)
> + (pkt->page_buffers[0].pfn << PAGE_SHIFT)
> + + pkt->page_buffers[0].offset;
> +
> + m_new->nb_segs = 1;
> + m_new->pkt_len = m_new->data_len = pkt->tot_data_buf_len;
> + rte_memcpy(rte_pktmbuf_mtod(m_new, void *), vaddr, m_new-
> >data_len);
> +
> + if (pkt->vlan_tci) {
> + m_new->vlan_tci = pkt->vlan_tci;
> + m_new->ol_flags |= PKT_RX_VLAN_PKT;
> + }
> +
> + hv->pkt_rxed = 1;
> +}
> +
> +/*
> + * RNDIS filter receive data, jumbo frames support
> + */
> +static void
> +hv_rf_receive_data_sg(struct hv_data *hv, struct rndis_msg *msg,
> + struct netvsc_packet *pkt)
> +{
> + struct rte_mbuf *m_new;
> + struct hv_rx_queue *rxq = hv->rxq;
> + struct rndis_packet *rndis_pkt;
> + uint32_t data_offset;
> +
> + if (unlikely(hv->closed))
> + return;
> +
> + rndis_pkt = &msg->msg.packet;
> +
> + /* Remove rndis header, then pass data packet up the stack */
> + data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
> +
> + /* L2 frame length, with L2 header, not including CRC */
> + pkt->tot_data_buf_len = rndis_pkt->data_length;
> + pkt->page_buffers[0].offset += data_offset;
> + /* Buffer length now L2 frame length plus trailing junk */
> + pkt->page_buffers[0].length -= data_offset;
> +
> + pkt->vlan_tci = 0;
> +
> + /*
> + * Just put data into appropriate mbuf, all further work will be done
> + * by the upper layer (mbuf replacement, index adjustment, etc)
> + */
> + m_new = rxq->sw_ring[rxq->rx_tail];
> + if (++rxq->rx_tail == rxq->nb_rx_desc)
> + rxq->rx_tail = 0;
> +
> + /*
> + * Copy the received packet to mbuf.
> + * The copy is required since the memory pointed to by
> netvsc_packet
> + * cannot be reallocated
> + */
> + uint8_t *vaddr = (uint8_t *)
> + (pkt->page_buffers[0].pfn << PAGE_SHIFT)
> + + pkt->page_buffers[0].offset;
> +
> + /* Scatter-gather emulation */
> + uint32_t carry_len = pkt->tot_data_buf_len;
> + struct rte_mbuf *m_next;
> +
> + m_new->pkt_len = carry_len;
> + m_new->nb_segs = (carry_len - 1) / hv->max_rx_pkt_len + 1;
> +
> + while (1) {
> + m_new->data_len = MIN(carry_len, hv->max_rx_pkt_len);
> + rte_memcpy(rte_pktmbuf_mtod(m_new, void *),
> + vaddr, m_new->data_len);
> + vaddr += m_new->data_len;
> +
> + if (carry_len <= hv->max_rx_pkt_len)
> + break;
> +
> + carry_len -= hv->max_rx_pkt_len;
> + m_next = rxq->sw_ring[rxq->rx_tail];
> + if (++rxq->rx_tail == rxq->nb_rx_desc)
> + rxq->rx_tail = 0;
> + m_new->next = m_next;
> + m_new = m_next;
> + }
> +
> + if (pkt->vlan_tci) {
> + m_new->vlan_tci = pkt->vlan_tci;
> + m_new->ol_flags |= PKT_RX_VLAN_PKT;
> + }
> +
> + hv->pkt_rxed = 1;
> +}
> +
> +static int
> +hv_rf_send_request(struct hv_data *hv, struct rndis_request *request)
> +{
> + struct netvsc_packet *packet;
> +
> + PMD_INIT_FUNC_TRACE();
> + /* Set up the packet to send it */
> + packet = &request->pkt;
> +
> + packet->is_data_pkt = 0;
> + packet->tot_data_buf_len = request->request_msg->msg_len;
> + packet->page_buf_count = 1;
> +
> + packet->page_buffers[0].pfn =
> + (request->request_msg_memzone->phys_addr) >>
> PAGE_SHIFT;
> + packet->page_buffers[0].length = request->request_msg->msg_len;
> + packet->page_buffers[0].offset =
> + (unsigned long)request->request_msg & (PAGE_SIZE - 1);
> +
> + return hv_nv_on_send(hv, packet);
> +}
> +
> +static void u8_to_u16(const char *src, int len, char *dst)
> +{
> + int i;
> +
> + for (i = 0; i < len; ++i) {
> + dst[2 * i] = src[i];
> + dst[2 * i + 1] = 0;
> + }
> +}
> +
> +int
> +hv_rf_set_device_mac(struct hv_data *hv, uint8_t *macaddr)
> +{
> + struct rndis_request *request;
> + struct rndis_set_request *set_request;
> + struct rndis_config_parameter_info *info;
> + struct rndis_set_complete *set_complete;
> + char mac_str[2*ETHER_ADDR_LEN+1];
> + wchar_t *param_value, *param_name;
> + uint32_t status;
> + uint32_t message_len = sizeof(struct rndis_config_parameter_info)
> +
> + 2 * MAC_STRLEN + 4 * ETHER_ADDR_LEN;
> + int ret, i;
> +
> + request = hv_rndis_request(hv, REMOTE_NDIS_SET_MSG,
> + RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
> message_len);
> + if (!request)
> + return -ENOMEM;
> +
> + set_request = &request->request_msg->msg.set_request;
> + set_request->oid = RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER;
> + set_request->device_vc_handle = 0;
> + set_request->info_buffer_offset = sizeof(struct rndis_set_request);
> + set_request->info_buffer_length = message_len;
> +
> + info = (struct rndis_config_parameter_info *)((ulong)set_request +
> + set_request->info_buffer_offset);
> + info->parameter_type = RNDIS_CONFIG_PARAM_TYPE_STRING;
> + info->parameter_name_offset =
> + sizeof(struct rndis_config_parameter_info);
> + info->parameter_name_length = 2 * MAC_STRLEN;
> + info->parameter_value_offset =
> + info->parameter_name_offset + info-
> >parameter_name_length;
> + /* Multiply by 2 because of string representation and by 2
> + * because of utf16 representation
> + */
> + info->parameter_value_length = 4 * ETHER_ADDR_LEN;
> + param_name = (wchar_t *)((ulong)info + info-
> >parameter_name_offset);
> + param_value = (wchar_t *)((ulong)info + info-
> >parameter_value_offset);
> +
> + u8_to_u16(MAC_PARAM_STR, MAC_STRLEN, (char *)param_name);
> + for (i = 0; i < ETHER_ADDR_LEN; ++i) {
> + mac_str[2*i] = high(macaddr[i]);
> + mac_str[2*i+1] = low(macaddr[i]);
> + }
> +
> + u8_to_u16((const char *)mac_str, 2 * ETHER_ADDR_LEN, (char
> *)param_value);
> +
> + ret = hv_rf_send_request(hv, request);
> + if (ret)
> + goto cleanup;
> +
> + request->response_msg.msg.set_complete.status = 0xFFFF;
> + hv_nv_complete_request(hv, request);
> + set_complete = &request->response_msg.msg.set_complete;
> + if (set_complete->status == 0xFFFF) {
> + /* Host is not responding, we can't free request in this case
> */
> + ret = -1;
> + PMD_PERROR_LOG(hv, DBG_LOAD, "Host is not
> responding");
> + goto exit;
> + }
> + /* Response received, check status */
> + status = set_complete->status;
> + if (status) {
> + /* Bad response status, return error */
> + PMD_PERROR_LOG(hv, DBG_LOAD, "set_complete->status
> = %u\n", status);
> + ret = -EINVAL;
> + }
> +
> +cleanup:
> + rte_free(request);
> +exit:
> + return ret;
> +}
> +
> +/*
> + * RNDIS filter on receive
> + */
> +static int
> +hv_rf_on_receive(struct hv_data *hv, struct netvsc_packet *pkt)
> +{
> + struct rndis_msg rndis_mesg;
> + struct rndis_msg *rndis_hdr;
> +
> + /* Shift virtual page number to form virtual page address */
> + rndis_hdr = (struct rndis_msg *)(pkt->page_buffers[0].pfn <<
> PAGE_SHIFT);
> +
> + rndis_hdr = (void *)((unsigned long)rndis_hdr
> + + pkt->page_buffers[0].offset);
> +
> + /*
> + * Make sure we got a valid rndis message
> + * Fixme: There seems to be a bug in set completion msg where
> + * its msg_len is 16 bytes but the byte_count field in the
> + * xfer page range shows 52 bytes
> + */
> + if (unlikely(pkt->tot_data_buf_len != rndis_hdr->msg_len)) {
> + ++hv->stats.ierrors;
> + PMD_PERROR_LOG(hv, DBG_RX,
> + "invalid rndis message? (expected %u "
> + "bytes got %u)... dropping this message",
> + rndis_hdr->msg_len, pkt->tot_data_buf_len);
> + return -1;
> + }
> +
> + rte_memcpy(&rndis_mesg, rndis_hdr,
> + (rndis_hdr->msg_len > sizeof(struct rndis_msg)) ?
> + sizeof(struct rndis_msg) : rndis_hdr->msg_len);
> +
> + switch (rndis_mesg.ndis_msg_type) {
> +
> + /* data message */
> + case REMOTE_NDIS_PACKET_MSG:
> + hv->receive_callback(hv, &rndis_mesg, pkt);
> + break;
> + /* completion messages */
> + case REMOTE_NDIS_INITIALIZE_CMPLT:
> + case REMOTE_NDIS_QUERY_CMPLT:
> + case REMOTE_NDIS_SET_CMPLT:
> + case REMOTE_NDIS_RESET_CMPLT:
> + case REMOTE_NDIS_KEEPALIVE_CMPLT:
> + hv_rf_receive_response(hv, &rndis_mesg);
> + break;
> + /* notification message */
> + case REMOTE_NDIS_INDICATE_STATUS_MSG:
> + hv_rf_receive_indicate_status(hv, &rndis_mesg);
> + break;
> + default:
> + PMD_PERROR_LOG(hv, DBG_RX, "hv_rf_on_receive():
> Unknown msg_type 0x%x",
> + rndis_mesg.ndis_msg_type);
> + break;
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * RNDIS filter on send
> + */
> +int
> +hv_rf_on_send(struct hv_data *hv, struct netvsc_packet *pkt)
> +{
> + struct rndis_msg *rndis_mesg;
> + struct rndis_packet *rndis_pkt;
> + uint32_t rndis_msg_size;
> +
> + /* Add the rndis header */
> + rndis_mesg = (struct rndis_msg *)pkt->extension;
> +
> + memset(rndis_mesg, 0, sizeof(struct rndis_msg));
> +
> + rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
> +
> + rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
> + rndis_mesg->msg_len = pkt->tot_data_buf_len + rndis_msg_size;
> +
> + rndis_pkt = &rndis_mesg->msg.packet;
> + rndis_pkt->data_offset = sizeof(struct rndis_packet);
> + rndis_pkt->data_length = pkt->tot_data_buf_len;
> +
> + pkt->is_data_pkt = 1;
> +
> + /*
> + * Invoke netvsc send. If return status is bad, the caller now
> + * resets the context pointers before retrying.
> + */
> + return hv_nv_on_send(hv, pkt);
> +}
> +
> +static int
> +hv_rf_init_device(struct hv_data *hv)
> +{
> + struct rndis_request *request;
> + struct rndis_initialize_request *init;
> + struct rndis_initialize_complete *init_complete;
> + uint32_t status;
> + int ret;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + request = hv_rndis_request(hv, REMOTE_NDIS_INITIALIZE_MSG,
> + RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
> + if (!request) {
> + ret = -1;
> + goto cleanup;
> + }
> +
> + /* Set up the rndis set */
> + init = &request->request_msg->msg.init_request;
> + init->major_version = RNDIS_MAJOR_VERSION;
> + init->minor_version = RNDIS_MINOR_VERSION;
> + /*
> + * Per the RNDIS document, this should be set to the max MTU
> + * plus the header size. However, 2048 works fine, so leaving
> + * it as is.
> + */
> + init->max_xfer_size = 2048;
> +
> + hv->rndis_dev_state = RNDIS_DEV_INITIALIZING;
> +
> + ret = hv_rf_send_request(hv, request);
> + if (ret != 0) {
> + hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
> + goto cleanup;
> + }
> +
> + /* Putting -1 here to ensure that HyperV really answered us */
> + request->response_msg.msg.init_complete.status = -1;
> + hv_nv_complete_request(hv, request);
> +
> + init_complete = &request->response_msg.msg.init_complete;
> + status = init_complete->status;
> + if (status == 0) {
> + PMD_PINFO_LOG(hv, DBG_LOAD, "Remote NDIS device is
> initialized");
> + hv->rndis_dev_state = RNDIS_DEV_INITIALIZED;
> + ret = 0;
> + } else {
> + PMD_PINFO_LOG(hv, DBG_LOAD, "Remote NDIS device left
> uninitialized");
> + hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
> + ret = -1;
> + }
> +
> +cleanup:
> + rte_free(request);
> +
> + return ret;
> +}
> +
> +/*
> + * RNDIS filter query device
> + */
> +static int
> +hv_rf_query_device(struct hv_data *hv, uint32_t oid, void *result,
> + uint32_t result_size)
> +{
> + struct rndis_request *request;
> + struct rndis_query_request *query;
> + struct rndis_query_complete *query_complete;
> + int ret = 0;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + request = hv_rndis_request(hv, REMOTE_NDIS_QUERY_MSG,
> + RNDIS_MESSAGE_SIZE(struct rndis_query_request));
> + if (request == NULL) {
> + ret = -1;
> + goto cleanup;
> + }
> +
> + /* Set up the rndis query */
> + query = &request->request_msg->msg.query_request;
> + query->oid = oid;
> + query->info_buffer_offset = sizeof(struct rndis_query_request);
> + query->info_buffer_length = 0;
> + query->device_vc_handle = 0;
> +
> + ret = hv_rf_send_request(hv, request);
> + if (ret != 0) {
> + PMD_PERROR_LOG(hv, DBG_TX, "RNDISFILTER request
> failed to Send!");
> + goto cleanup;
> + }
> +
> + hv_nv_complete_request(hv, request);
> +
> + /* Copy the response back */
> + query_complete = &request->response_msg.msg.query_complete;
> +
> + if (query_complete->info_buffer_length > result_size) {
> + ret = -EINVAL;
> + goto cleanup;
> + }
> +
> + rte_memcpy(result, (void *)((unsigned long)query_complete +
> + query_complete->info_buffer_offset),
> + query_complete->info_buffer_length);
> +
> +cleanup:
> + rte_free(request);
> +
> + return ret;
> +}
> +
> +/*
> + * RNDIS filter query device MAC address
> + */
> +static inline int
> +hv_rf_query_device_mac(struct hv_data *hv)
> +{
> + uint32_t size = HW_MACADDR_LEN;
> +
> + int ret = hv_rf_query_device(hv,
> RNDIS_OID_802_3_PERMANENT_ADDRESS,
> + &hv->hw_mac_addr, size);
> + PMD_PDEBUG_LOG(hv, DBG_TX, "MAC:
> %02x:%02x:%02x:%02x:%02x:%02x, ret = %d",
> + hv->hw_mac_addr[0], hv->hw_mac_addr[1], hv-
> >hw_mac_addr[2],
> + hv->hw_mac_addr[3], hv->hw_mac_addr[4], hv-
> >hw_mac_addr[5],
> + ret);
> + return ret;
> +}
> +
> +/*
> + * RNDIS filter query device link status
> + */
> +static inline int
> +hv_rf_query_device_link_status(struct hv_data *hv)
> +{
> + uint32_t size = sizeof(uint32_t);
> + /* Set all bits to 1, it's to ensure that the response is actual */
> + uint32_t status = -1;
> +
> + int ret = hv_rf_query_device(hv,
> RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
> + &status, size);
> + hv->link_status = status ? 0 : 1;
> + PMD_PDEBUG_LOG(hv, DBG_TX, "Link Status: %s",
> + hv->link_status ? "Up" : "Down");
> + return ret;
> +}
> +
> +int
> +hv_rf_on_device_add(struct hv_data *hv)
> +{
> + int ret;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + hv->closed = 0;
> + hv->rb_data_size = hv->rb_size - sizeof(struct
> hv_vmbus_ring_buffer);
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "hv->rb_data_size = %u", hv-
> >rb_data_size);
> +
> + if (unlikely(hv->in->interrupt_mask == 0)) {
> + PMD_PINFO_LOG(hv, DBG_LOAD, "Disabling interrupts from
> host");
> + hv->in->interrupt_mask = 1;
> + rte_mb();
> + }
> +
> + hv->netvsc_packet = rte_zmalloc("", sizeof(struct netvsc_packet),
> + RTE_CACHE_LINE_SIZE);
> + if (hv->netvsc_packet == NULL)
> + return -ENOMEM;
> + hv->netvsc_packet->is_data_pkt = 1;
> +
> + hv->rx_comp_msg = rte_zmalloc("", sizeof(struct nvsp_msg),
> + RTE_CACHE_LINE_SIZE);
> + if (hv->rx_comp_msg == NULL)
> + return -ENOMEM;
> +
> + hv->rx_comp_msg->msg_type =
> nvsp_msg_1_type_send_rndis_pkt_complete;
> + hv->rx_comp_msg->msgs.send_rndis_pkt_complete.status =
> + nvsp_status_success;
> +
> + memset(&hv->stats, 0, sizeof(struct hv_stats));
> +
> + hv->receive_callback = hv_rf_receive_data;
> +
> + /* It's for completion of requests which were sent from kernel-space
> part */
> + hv_nv_complete_request(hv, NULL);
> + hv_nv_complete_request(hv, NULL);
> +
> + hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
> +
> + /* Send the rndis initialization message */
> + ret = hv_rf_init_device(hv);
> + if (ret != 0) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "rndis init failed!");
> + hv_rf_on_device_remove(hv);
> + return ret;
> + }
> +
> + /* Get the mac address */
> + ret = hv_rf_query_device_mac(hv);
> + if (ret != 0) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "rndis query mac
> failed!");
> + hv_rf_on_device_remove(hv);
> + return ret;
> + }
> +
> + return ret;
> +}
> +
> +#define HALT_COMPLETION_WAIT_COUNT 25
> +
> +/*
> + * RNDIS filter halt device
> + */
> +static int
> +hv_rf_halt_device(struct hv_data *hv)
> +{
> + struct rndis_request *request;
> + struct rndis_halt_request *halt;
> + int i, ret;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + /* Attempt to do a rndis device halt */
> + request = hv_rndis_request(hv, REMOTE_NDIS_HALT_MSG,
> + RNDIS_MESSAGE_SIZE(struct rndis_halt_request));
> + if (!request) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "Unable to create
> RNDIS_HALT request");
> + return -1;
> + }
> +
> + /* initialize "poor man's semaphore" */
> + hv->hlt_req_sent = 0;
> +
> + /* Set up the rndis set */
> + halt = &request->request_msg->msg.halt_request;
> + hv->new_request_id++;
> + halt->request_id = hv->new_request_id;
> +
> + ret = hv_rf_send_request(hv, request);
> + if (ret) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "Failed to send
> RNDIS_HALT request: %d",
> + ret);
> + return ret;
> + }
> +
> + /*
> + * Wait for halt response from halt callback. We must wait for
> + * the transaction response before freeing the request and other
> + * resources.
> + */
> + for (i = HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
> + hv_nv_complete_request(hv, request);
> + if (hv->hlt_req_sent != 0) {
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "Completed
> HALT request at %d try",
> + HALT_COMPLETION_WAIT_COUNT - i
> + 1);
> + break;
> + }
> + }
> + hv->hlt_req_sent = 0;
> + if (i == 0) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "RNDIS_HALT request
> was not completed!");
> + rte_free(request);
> + return -1;
> + }
> +
> + hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
> +
> + rte_free(request);
> +
> + return 0;
> +}
> +
> +#define HV_TX_DRAIN_TRIES 50
> +static inline int
> +hyperv_tx_drain(struct hv_data *hv)
> +{
> + int i = HV_TX_DRAIN_TRIES;
> +
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "Waiting for TXs to be
> completed...");
> + while (hv->num_outstanding_sends > 0 && --i) {
> + hv_nv_complete_request(hv, NULL);
> + rte_delay_ms(100);
> + }
> +
> + return hv->num_outstanding_sends;
> +}
> +
> +/*
> + * RNDIS filter on device remove
> + */
> +int
> +hv_rf_on_device_remove(struct hv_data *hv)
> +{
> + int ret;
> +
> + PMD_INIT_FUNC_TRACE();
> + hv->closed = 1;
> + if (hyperv_tx_drain(hv) > 0) {
> + /* Hypervisor is not responding, exit with error here */
> + PMD_PWARN_LOG(hv, DBG_LOAD, "Can't drain TX queue:
> no response");
> + return -EAGAIN;
> + }
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "TX queue is empty, can halt
> the device");
> +
> + /* Halt and release the rndis device */
> + hv->hlt_req_pending = 1;
> + ret = hv_rf_halt_device(hv);
> + hv->hlt_req_pending = 0;
> +
> + rte_free(hv->netvsc_packet);
> +
> + return ret;
> +}
> +
> +/*
> + * RNDIS filter set packet filter
> + * Sends an rndis request with the new filter, then waits for a response
> + * from the host.
> + * Returns zero on success, non-zero on failure.
> + */
> +static int
> +hv_rf_set_packet_filter(struct hv_data *hv, uint32_t new_filter)
> +{
> + struct rndis_request *request;
> + struct rndis_set_request *set;
> + struct rndis_set_complete *set_complete;
> + uint32_t status;
> + int ret;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + request = hv_rndis_request(hv, REMOTE_NDIS_SET_MSG,
> + RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
> sizeof(uint32_t));
> + if (!request) {
> + ret = -1;
> + goto cleanup;
> + }
> +
> + /* Set up the rndis set */
> + set = &request->request_msg->msg.set_request;
> + set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
> + set->info_buffer_length = sizeof(uint32_t);
> + set->info_buffer_offset = sizeof(struct rndis_set_request);
> +
> + rte_memcpy((void *)((unsigned long)set + sizeof(struct
> rndis_set_request)),
> + &new_filter, sizeof(uint32_t));
> +
> + ret = hv_rf_send_request(hv, request);
> + if (ret)
> + goto cleanup;
> +
> + /*
> + * Wait for the response from the host.
> + */
> + request->response_msg.msg.set_complete.status = 0xFFFF;
> + hv_nv_complete_request(hv, request);
> +
> + set_complete = &request->response_msg.msg.set_complete;
> + if (set_complete->status == 0xFFFF) {
> + /* Host is not responding, we can't free request in this case
> */
> + ret = -1;
> + goto exit;
> + }
> + /* Response received, check status */
> + status = set_complete->status;
> + if (status)
> + /* Bad response status, return error */
> + ret = -2;
> +
> +cleanup:
> + rte_free(request);
> +exit:
> + return ret;
> +}
> +
> +/*
> + * RNDIS filter open device
> + */
> +int
> +hv_rf_on_open(struct hv_data *hv)
> +{
> + int ret;
> +
> + if (hv->closed)
> + return 0;
> +
> + if (hv->jumbo_frame_support)
> + hv->receive_callback = hv_rf_receive_data_sg;
> +
> + ret = hyperv_set_rx_mode(hv, 1, 0);
> + if (!ret) {
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device
> opened");
> + hv->rndis_dev_state = RNDIS_DEV_DATAINITIALIZED;
> + } else
> + PMD_PERROR_LOG(hv, DBG_LOAD, "RNDIS device is left
> unopened");
> +
> + return ret;
> +}
> +
> +/*
> + * RNDIS filter on close
> + */
> +int
> +hv_rf_on_close(struct hv_data *hv)
> +{
> + int ret;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + if (hv->closed)
> + return 0;
> +
> + if (hv->rndis_dev_state != RNDIS_DEV_DATAINITIALIZED) {
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device state
> should be"
> + " RNDIS_DEV_DATAINITIALIZED, but now it is
> %u",
> + hv->rndis_dev_state);
> + return 0;
> + }
> +
> + ret = hv_rf_set_packet_filter(hv, 0);
> + if (!ret) {
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device closed");
> + hv->rndis_dev_state = RNDIS_DEV_INITIALIZED;
> + } else
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device is left
> unclosed");
> +
> + return ret;
> +}
> +
> +/*
> + * RX Flow
> + */
> +int
> +hyperv_get_buffer(struct hv_data *hv, void *buffer, uint32_t bufferlen)
> +{
> + uint32_t bytes_rxed;
> + uint64_t request_id;
> + struct hv_vm_packet_descriptor *desc;
> +
> + int ret = hv_vmbus_channel_recv_packet_raw(hv, buffer, bufferlen,
> + &bytes_rxed, &request_id, 1);
> + if (likely(ret == 0)) {
> + if (bytes_rxed) {
> + desc = (struct hv_vm_packet_descriptor *)buffer;
> +
> + if (likely(desc->type ==
> +
> HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)) {
> + hv->pkt_rxed = 0;
> + hv_nv_on_receive(hv, desc);
> + return hv->pkt_rxed;
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> +/*
> + * TX completions handler
> + */
> +void
> +hyperv_scan_comps(struct hv_data *hv, int allow_rx_drop)
> +{
> + uint32_t bytes_rxed;
> + uint64_t request_id;
> +
> + while (1) {
> + int ret = hv_vmbus_channel_recv_packet_raw(hv, hv->desc,
> PAGE_SIZE,
> + &bytes_rxed, &request_id, 2 | allow_rx_drop);
> +
> + if (ret != 0 || !bytes_rxed)
> + break;
> +
> + if (likely(hv->desc->type ==
> HV_VMBUS_PACKET_TYPE_COMPLETION))
> + hv_nv_on_send_completion(hv, hv->desc);
> + }
> +}
> +
> +/*
> + * Get link status
> + */
> +uint8_t
> +hyperv_get_link_status(struct hv_data *hv)
> +{
> + if (hv_rf_query_device_link_status(hv))
> + return 2;
> + return hv->link_status;
> +}
> +
> +/*
> + * Set/Reset RX mode
> + */
> +int
> +hyperv_set_rx_mode(struct hv_data *hv, uint8_t promisc, uint8_t mcast)
> +{
> + PMD_INIT_FUNC_TRACE();
> +
> + if (!promisc) {
> + return hv_rf_set_packet_filter(hv,
> + NDIS_PACKET_TYPE_BROADCAST |
> + (mcast ?
> NDIS_PACKET_TYPE_ALL_MULTICAST : 0) |
> + NDIS_PACKET_TYPE_DIRECTED);
> + }
> +
> + return hv_rf_set_packet_filter(hv,
> NDIS_PACKET_TYPE_PROMISCUOUS);
> +}
> diff --git a/lib/librte_pmd_hyperv/hyperv_drv.h
> b/lib/librte_pmd_hyperv/hyperv_drv.h
> new file mode 100644
> index 0000000..22acad5
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_drv.h
> @@ -0,0 +1,558 @@
> +/*-
> + * Copyright (c) 2009-2012 Microsoft Corp.
> + * Copyright (c) 2010-2012 Citrix Inc.
> + * Copyright (c) 2012 NetApp Inc.
> + * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice unmodified, this list of conditions, and the following
> + * disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
> OR
> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> DISCLAIMED.
> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> (INCLUDING, BUT
> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE OF
> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + *
> + */
> +
> +#ifndef _HYPERV_DRV_H_
> +#define _HYPERV_DRV_H_
> +
> +/*
> + * Definitions from hyperv.h
> + */
> +#define HW_MACADDR_LEN 6
> +#define HV_MAX_PAGE_BUFFER_COUNT 19
> +
> +#define HV_ALIGN_UP(value, align) \
> + (((value) & (align-1)) ? \
> + (((value) + (align-1)) & ~(align-1)) : (value))
> +
> +/*
> + * Connection identifier type
> + */
> +union hv_vmbus_connection_id {
> + uint32_t as_uint32_t;
> + struct {
> + uint32_t id:24;
> + uint32_t reserved:8;
> + } u;
> +
> +} __attribute__((packed));
> +
> +union hv_vmbus_monitor_trigger_state {
> + uint32_t as_uint32_t;
> + struct {
> + uint32_t group_enable:4;
> + uint32_t rsvd_z:28;
> + } u;
> +};
> +
> +union hv_vmbus_monitor_trigger_group {
> + uint64_t as_uint64_t;
> + struct {
> + uint32_t pending;
> + uint32_t armed;
> + } u;
> +};
> +
> +struct hv_vmbus_monitor_parameter {
> + union hv_vmbus_connection_id connection_id;
> + uint16_t flag_number;
> + uint16_t rsvd_z;
> +};
> +
> +/*
> + * hv_vmbus_monitor_page Layout
> + * ------------------------------------------------------
> + * | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) |
> + * | 8 | trigger_group[0] |
> + * | 10 | trigger_group[1] |
> + * | 18 | trigger_group[2] |
> + * | 20 | trigger_group[3] |
> + * | 28 | Rsvd2[0] |
> + * | 30 | Rsvd2[1] |
> + * | 38 | Rsvd2[2] |
> + * | 40 | next_check_time[0][0] | next_check_time[0][1] |
> + * | ... |
> + * | 240 | latency[0][0..3] |
> + * | 340 | Rsvz3[0] |
> + * | 440 | parameter[0][0] |
> + * | 448 | parameter[0][1] |
> + * | ... |
> + * | 840 | Rsvd4[0] |
> + * ------------------------------------------------------
> + */
> +
> +struct hv_vmbus_monitor_page {
> + union hv_vmbus_monitor_trigger_state trigger_state;
> + uint32_t rsvd_z1;
> +
> + union hv_vmbus_monitor_trigger_group trigger_group[4];
> + uint64_t rsvd_z2[3];
> +
> + int32_t next_check_time[4][32];
> +
> + uint16_t latency[4][32];
> + uint64_t rsvd_z3[32];
> +
> + struct hv_vmbus_monitor_parameter parameter[4][32];
> +
> + uint8_t rsvd_z4[1984];
> +};
> +
> +enum hv_vmbus_packet_type {
> + HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES
> = 0x7,
> + HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT
> = 0x9,
> + HV_VMBUS_PACKET_TYPE_COMPLETION
> = 0xb,
> +};
> +
> +#define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED 1
> +
> +struct hv_vm_packet_descriptor {
> + uint16_t type;
> + uint16_t data_offset8;
> + uint16_t length8;
> + uint16_t flags;
> + uint64_t transaction_id;
> +} __attribute__((packed));
> +
> +struct hv_vm_transfer_page {
> + uint32_t byte_count;
> + uint32_t byte_offset;
> +} __attribute__((packed));
> +
> +struct hv_vm_transfer_page_packet_header {
> + struct hv_vm_packet_descriptor d;
> + uint16_t transfer_page_set_id;
> + uint8_t sender_owns_set;
> + uint8_t reserved;
> + uint32_t range_count;
> + struct hv_vm_transfer_page ranges[1];
> +} __attribute__((packed));
> +
> +struct hv_vmbus_ring_buffer {
> + volatile uint32_t write_index;
> + volatile uint32_t read_index;
> + /*
> + * NOTE: The interrupt_mask field is used only for channels, but
> + * vmbus connection also uses this data structure
> + */
> + volatile uint32_t interrupt_mask;
> + /* pad it to PAGE_SIZE so that data starts on a page */
> + uint8_t reserved[4084];
> +
> + /*
> + * WARNING: Ring data starts here + ring_data_start_offset
> + * !!! DO NOT place any fields below this !!!
> + */
> + uint8_t buffer[0]; /* doubles as interrupt mask
> */
> +} __attribute__((packed));
> +
> +struct hv_vmbus_page_buffer {
> + uint32_t length;
> + uint32_t offset;
> + uint64_t pfn;
> +} __attribute__((packed));
> +
> +/*
> + * Definitions from hv_vmbus_priv.h
> + */
> +struct hv_vmbus_sg_buffer_list {
> + void *data;
> + uint32_t length;
> +};
> +
> +struct hv_vmbus_channel_packet_page_buffer {
> + uint16_t type;
> + uint16_t data_offset8;
> + uint16_t length8;
> + uint16_t flags;
> + uint64_t transaction_id;
> + uint32_t reserved;
> + uint32_t range_count;
> + struct hv_vmbus_page_buffer
> range[HV_MAX_PAGE_BUFFER_COUNT];
> +} __attribute__((packed));
> +
> +/*
> + * Definitions from hv_net_vsc.h
> + */
> +#define NETVSC_PACKET_MAXPAGE 16
> +#define NETVSC_PACKET_SIZE 256
> +
> +/*
> + * This message is used by both the VSP and the VSC to complete
> + * a RNDIS message to the opposite channel endpoint. At this
> + * point, the initiator of this message cannot use any resources
> + * associated with the original RNDIS packet.
> + */
> +enum nvsp_status_ {
> + nvsp_status_none = 0,
> + nvsp_status_success,
> + nvsp_status_failure,
> +};
> +
> +struct nvsp_1_msg_send_rndis_pkt_complete {
> + uint32_t status;
> +} __attribute__((packed));
> +
> +enum nvsp_msg_type {
> + /*
> + * Version 1 Messages
> + */
> + nvsp_msg_1_type_send_ndis_vers = 100,
> +
> + nvsp_msg_1_type_send_rx_buf,
> + nvsp_msg_1_type_send_rx_buf_complete,
> + nvsp_msg_1_type_revoke_rx_buf,
> +
> + nvsp_msg_1_type_send_send_buf,
> + nvsp_msg_1_type_send_send_buf_complete,
> + nvsp_msg_1_type_revoke_send_buf,
> +
> + nvsp_msg_1_type_send_rndis_pkt,
> + nvsp_msg_1_type_send_rndis_pkt_complete,
> +};
> +
> +struct nvsp_1_msg_send_rndis_pkt {
> + /*
> + * This field is specified by RNDIS. They assume there's
> + * two different channels of communication. However,
> + * the Network VSP only has one. Therefore, the channel
> + * travels with the RNDIS packet.
> + */
> + uint32_t chan_type;
> +
> + /*
> + * This field is used to send part or all of the data
> + * through a send buffer. This value specifies an
> + * index into the send buffer. If the index is
> + * 0xFFFFFFFF, then the send buffer is not being used
> + * and all of the data was sent through other VMBus
> + * mechanisms.
> + */
> + uint32_t send_buf_section_idx;
> + uint32_t send_buf_section_size;
> +} __attribute__((packed));
> +
> +/*
> + * ALL Messages
> + */
> +struct nvsp_msg {
> + uint32_t msg_type;
> + union {
> + struct nvsp_1_msg_send_rndis_pkt send_rndis_pkt;
> + struct nvsp_1_msg_send_rndis_pkt_complete
> send_rndis_pkt_complete;
> + /* size is set like in linux kernel driver */
> + uint8_t raw[24];
> + } msgs;
> +} __attribute__((packed));
> +
> +#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
> +
> +struct netvsc_packet {
> + uint8_t is_data_pkt; /* One byte */
> + uint8_t ext_pages;
> + uint16_t vlan_tci;
> +
> + void *extension;
> + uint64_t extension_phys_addr;
> + uint32_t tot_data_buf_len;
> + uint32_t page_buf_count;
> + struct hv_vmbus_page_buffer
> page_buffers[NETVSC_PACKET_MAXPAGE];
> +};
> +
> +/*
> + * Definitions from hv_rndis.h
> + */
> +#define RNDIS_MAJOR_VERSION 0x00000001
> +#define RNDIS_MINOR_VERSION 0x00000000
> +
> +#define STATUS_BUFFER_OVERFLOW (0x80000005L)
> +
> +/*
> + * Remote NDIS message types
> + */
> +#define REMOTE_NDIS_PACKET_MSG 0x00000001
> +#define REMOTE_NDIS_INITIALIZE_MSG 0x00000002
> +#define REMOTE_NDIS_HALT_MSG 0x00000003
> +#define REMOTE_NDIS_QUERY_MSG 0x00000004
> +#define REMOTE_NDIS_SET_MSG 0x00000005
> +#define REMOTE_NDIS_RESET_MSG 0x00000006
> +#define REMOTE_NDIS_INDICATE_STATUS_MSG 0x00000007
> +#define REMOTE_NDIS_KEEPALIVE_MSG 0x00000008
> +/*
> + * Remote NDIS message completion types
> + */
> +#define REMOTE_NDIS_INITIALIZE_CMPLT 0x80000002
> +#define REMOTE_NDIS_QUERY_CMPLT 0x80000004
> +#define REMOTE_NDIS_SET_CMPLT 0x80000005
> +#define REMOTE_NDIS_RESET_CMPLT 0x80000006
> +#define REMOTE_NDIS_KEEPALIVE_CMPLT 0x80000008
> +
> +#define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS 0x00010114
> +#define RNDIS_OID_GEN_CURRENT_PACKET_FILTER 0x0001010E
> +#define RNDIS_OID_802_3_PERMANENT_ADDRESS 0x01010101
> +#define RNDIS_OID_802_3_CURRENT_ADDRESS 0x01010102
> +#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
> +
> +#define RNDIS_CONFIG_PARAM_TYPE_STRING 2
> +/* extended info after the RNDIS request message */
> +#define RNDIS_EXT_LEN 100
> +/*
> + * Packet extension field contents associated with a Data message.
> + */
> +struct rndis_per_packet_info {
> + uint32_t size;
> + uint32_t type;
> + uint32_t per_packet_info_offset;
> +};
> +
> +#define ieee_8021q_info 6
> +
> +struct ndis_8021q_info {
> + union {
> + struct {
> + uint32_t user_pri:3; /* User Priority */
> + uint32_t cfi:1; /* Canonical Format ID */
> + uint32_t vlan_id:12;
> + uint32_t reserved:16;
> + } s1;
> + uint32_t value;
> + } u1;
> +};
> +
> +/* Format of Information buffer passed in a SetRequest for the OID */
> +/* OID_GEN_RNDIS_CONFIG_PARAMETER. */
> +struct rndis_config_parameter_info {
> + uint32_t parameter_name_offset;
> + uint32_t parameter_name_length;
> + uint32_t parameter_type;
> + uint32_t parameter_value_offset;
> + uint32_t parameter_value_length;
> +};
> +
> +/*
> + * NdisInitialize message
> + */
> +struct rndis_initialize_request {
> + /* RNDIS request ID */
> + uint32_t request_id;
> + uint32_t major_version;
> + uint32_t minor_version;
> + uint32_t max_xfer_size;
> +};
> +
> +/*
> + * Response to NdisInitialize
> + */
> +struct rndis_initialize_complete {
> + /* RNDIS request ID */
> + uint32_t request_id;
> + /* RNDIS status */
> + uint32_t status;
> + uint32_t major_version;
> + uint32_t minor_version;
> + uint32_t device_flags;
> + /* RNDIS medium */
> + uint32_t medium;
> + uint32_t max_pkts_per_msg;
> + uint32_t max_xfer_size;
> + uint32_t pkt_align_factor;
> + uint32_t af_list_offset;
> + uint32_t af_list_size;
> +};
> +
> +/*
> + * NdisSetRequest message
> + */
> +struct rndis_set_request {
> + /* RNDIS request ID */
> + uint32_t request_id;
> + /* RNDIS OID */
> + uint32_t oid;
> + uint32_t info_buffer_length;
> + uint32_t info_buffer_offset;
> + /* RNDIS handle */
> + uint32_t device_vc_handle;
> +};
> +
> +/*
> + * Response to NdisSetRequest
> + */
> +struct rndis_set_complete {
> + /* RNDIS request ID */
> + uint32_t request_id;
> + /* RNDIS status */
> + uint32_t status;
> +};
> +
> +/*
> + * NdisQueryRequest message
> + */
> +struct rndis_query_request {
> + /* RNDIS request ID */
> + uint32_t request_id;
> + /* RNDIS OID */
> + uint32_t oid;
> + uint32_t info_buffer_length;
> + uint32_t info_buffer_offset;
> + /* RNDIS handle */
> + uint32_t device_vc_handle;
> +};
> +
> +/*
> + * Response to NdisQueryRequest
> + */
> +struct rndis_query_complete {
> + /* RNDIS request ID */
> + uint32_t request_id;
> + /* RNDIS status */
> + uint32_t status;
> + uint32_t info_buffer_length;
> + uint32_t info_buffer_offset;
> +};
> +
> +/*
> + * Data message. All offset fields contain byte offsets from the beginning
> + * of the rndis_packet structure. All length fields are in bytes.
> + * VcHandle is set to 0 for connectionless data, otherwise it
> + * contains the VC handle.
> + */
> +struct rndis_packet {
> + uint32_t data_offset;
> + uint32_t data_length;
> + uint32_t oob_data_offset;
> + uint32_t oob_data_length;
> + uint32_t num_oob_data_elements;
> + uint32_t per_pkt_info_offset;
> + uint32_t per_pkt_info_length;
> + /* RNDIS handle */
> + uint32_t vc_handle;
> + uint32_t reserved;
> +};
> +
> +/*
> + * NdisHalt message
> + */
> +struct rndis_halt_request {
> + /* RNDIS request ID */
> + uint32_t request_id;
> +};
> +
> +/*
> + * NdisMIndicateStatus message
> + */
> +struct rndis_indicate_status {
> + /* RNDIS status */
> + uint32_t status;
> + uint32_t status_buf_length;
> + uint32_t status_buf_offset;
> +};
> +
> +#define RNDIS_STATUS_MEDIA_CONNECT (0x4001000BL)
> +#define RNDIS_STATUS_MEDIA_DISCONNECT (0x4001000CL)
> +#define RNDIS_STATUS_INVALID_DATA (0xC0010015L)
> +
> +/*
> + * union with all of the RNDIS messages
> + */
> +union rndis_msg_container {
> + struct rndis_initialize_request init_request;
> + struct rndis_initialize_complete init_complete;
> + struct rndis_set_request set_request;
> + struct rndis_set_complete set_complete;
> + struct rndis_query_request query_request;
> + struct rndis_query_complete query_complete;
> + struct rndis_packet packet;
> + struct rndis_halt_request halt_request;
> + struct rndis_indicate_status indicate_status;
> +#if 0
> + rndis_keepalive_request keepalive_request;
> + rndis_reset_request reset_request;
> + rndis_reset_complete reset_complete;
> + rndis_keepalive_complete keepalive_complete;
> + rcondis_mp_create_vc co_miniport_create_vc;
> + rcondis_mp_delete_vc co_miniport_delete_vc;
> + rcondis_indicate_status co_miniport_status;
> + rcondis_mp_activate_vc_request co_miniport_activate_vc;
> + rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc;
> + rcondis_mp_create_vc_complete
> co_miniport_create_vc_complete;
> + rcondis_mp_delete_vc_complete
> co_miniport_delete_vc_complete;
> + rcondis_mp_activate_vc_complete
> co_miniport_activate_vc_complete;
> + rcondis_mp_deactivate_vc_complete
> co_miniport_deactivate_vc_complete;
> +#endif
> + uint32_t packet_ex[16]; /* to pad the union size */
> +};
> +
> +struct rndis_msg {
> + uint32_t ndis_msg_type;
> +
> + /*
> + * Total length of this message, from the beginning
> + * of the rndis_msg struct, in bytes.
> + */
> + uint32_t msg_len;
> +
> + /* Actual message */
> + union rndis_msg_container msg;
> +};
> +
> +#define RNDIS_HEADER_SIZE (sizeof(struct rndis_msg) - sizeof(union
> rndis_msg_container))
> +
> +#define NDIS_PACKET_TYPE_DIRECTED 0x00000001
> +#define NDIS_PACKET_TYPE_MULTICAST 0x00000002
> +#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004
> +#define NDIS_PACKET_TYPE_BROADCAST 0x00000008
> +#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
> +#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020
> +
> +/*
> + * get the size of an RNDIS message. Pass in the message type,
> + * rndis_set_request, rndis_packet for example
> + */
> +#define RNDIS_MESSAGE_SIZE(message) \
> + (sizeof(message) + (sizeof(struct rndis_msg) - sizeof(union
> rndis_msg_container)))
> +
> +
> +/*
> + * Definitions from hv_rndis_filter.h
> + */
> +enum {
> + RNDIS_DEV_UNINITIALIZED = 0,
> + RNDIS_DEV_INITIALIZING,
> + RNDIS_DEV_INITIALIZED,
> + RNDIS_DEV_DATAINITIALIZED,
> +};
> +
> +struct rndis_request {
> + /* assumed a fixed size response here. */
> + struct rndis_msg response_msg;
> +
> + /* Simplify allocation by having a netvsc packet inline */
> + struct netvsc_packet pkt;
> + /* set additional buffer since packet can cross page boundary */
> + struct hv_vmbus_page_buffer buffer;
> + /* assumed a fixed size request here. */
> + struct rndis_msg *request_msg;
> + const struct rte_memzone *request_msg_memzone;
> +};
> +
> +struct rndis_filter_packet {
> + struct rndis_msg message;
> +};
> +
> +#endif /* _HYPERV_DRV_H_ */
> diff --git a/lib/librte_pmd_hyperv/hyperv_ethdev.c
> b/lib/librte_pmd_hyperv/hyperv_ethdev.c
> new file mode 100644
> index 0000000..7b909db
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_ethdev.c
> @@ -0,0 +1,332 @@
> +/*-
> + * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + */
> +
> +#include <assert.h>
> +#include <unistd.h>
> +#include "hyperv.h"
> +
> +static struct rte_vmbus_id vmbus_id_hyperv_map[] = {
> + {
> + .device_id = 0x0,
> + },
> +};
> +
> +static void
> +hyperv_dev_info_get(__rte_unused struct rte_eth_dev *dev,
> + struct rte_eth_dev_info *dev_info)
> +{
> + PMD_INIT_FUNC_TRACE();
> + dev_info->max_rx_queues = HV_MAX_RX_QUEUES;
> + dev_info->max_tx_queues = HV_MAX_TX_QUEUES;
> + dev_info->min_rx_bufsize = HV_MIN_RX_BUF_SIZE;
> + dev_info->max_rx_pktlen = HV_MAX_RX_PKT_LEN;
> + dev_info->max_mac_addrs = HV_MAX_MAC_ADDRS;
> +}
> +
> +inline int
> +rte_hv_dev_atomic_write_link_status(struct rte_eth_dev *dev,
> + struct rte_eth_link *link)
> +{
> + struct rte_eth_link *dst = &(dev->data->dev_link);
> + struct rte_eth_link *src = link;
> +
> + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> + *(uint64_t *)src) == 0)
> + return -1;
> +
> + return 0;
> +}
> +
> +inline int
> +rte_hv_dev_atomic_read_link_status(struct rte_eth_dev *dev,
> + struct rte_eth_link *link)
> +{
> + struct rte_eth_link *dst = link;
> + struct rte_eth_link *src = &(dev->data->dev_link);
> +
> + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> + *(uint64_t *)src) == 0)
> + return -1;
> +
> + return 0;
> +}
> +
> +/* return 0 means link status changed, -1 means not changed */
> +static int
> +hyperv_dev_link_update(struct rte_eth_dev *dev,
> + __rte_unused int wait_to_complete)
> +{
> + uint8_t ret;
> + struct rte_eth_link old, link;
> + struct hv_data *hv = dev->data->dev_private;
> +
> + PMD_INIT_FUNC_TRACE();
> + memset(&old, 0, sizeof(old));
> + memset(&link, 0, sizeof(link));
> + rte_hv_dev_atomic_read_link_status(dev, &old);
> + if (!hv->link_status && (hv->link_req_cnt == HV_MAX_LINK_REQ)) {
> + ret = hyperv_get_link_status(hv);
> + if (ret > 1)
> + return -1;
> + hv->link_req_cnt = 0;
> + }
> + link.link_duplex = ETH_LINK_FULL_DUPLEX;
> + link.link_speed = ETH_LINK_SPEED_10000;
> + link.link_status = hv->link_status;
> + hv->link_req_cnt++;
> + rte_hv_dev_atomic_write_link_status(dev, &link);
> +
> + return (old.link_status == link.link_status) ? -1 : 0;
> +}
> +
> +static int
> +hyperv_dev_configure(struct rte_eth_dev *dev)
> +{
> + struct hv_data *hv = dev->data->dev_private;
> + const struct rte_eth_rxmode *rxmode = &dev->data-
> >dev_conf.rxmode;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + rte_memcpy(dev->data->mac_addrs->addr_bytes, hv-
> >hw_mac_addr,
> + ETHER_ADDR_LEN);
> + hv->jumbo_frame_support = rxmode->jumbo_frame;
> +
> + return 0;
> +}
> +
> +static int
> +hyperv_init(struct rte_eth_dev *dev)
> +{
> + struct hv_data *hv = dev->data->dev_private;
> + struct rte_vmbus_device *vmbus_dev;
> +
> + vmbus_dev = dev->vmbus_dev;
> + hv->uio_fd = vmbus_dev->uio_fd;
> + hv->kernel_initialized = 1;
> + hv->vmbus_device = vmbus_dev->id.device_id;
> + hv->monitor_bit = (uint8_t)(vmbus_dev->vmbus_monitor_id % 32);
> + hv->monitor_group = (uint8_t)(vmbus_dev->vmbus_monitor_id /
> 32);
> + PMD_PDEBUG_LOG(hv, DBG_LOAD, "hyperv_init for vmbus device
> %d",
> + vmbus_dev->id.device_id);
> +
> + /* get the memory mappings */
> + hv->ring_pages = vmbus_dev-
> >mem_resource[TXRX_RING_MAP].addr;
> + hv->int_page = vmbus_dev->mem_resource[INT_PAGE_MAP].addr;
> + hv->monitor_pages =
> + (struct hv_vmbus_monitor_page *)
> + vmbus_dev->mem_resource[MON_PAGE_MAP].addr;
> + hv->recv_buf = vmbus_dev-
> >mem_resource[RECV_BUF_MAP].addr;
> + assert(hv->ring_pages);
> + assert(hv->int_page);
> + assert(hv->monitor_pages);
> + assert(hv->recv_buf);
> +
> + /* separate send/recv int_pages */
> + hv->recv_interrupt_page = hv->int_page;
> +
> + hv->send_interrupt_page =
> + ((uint8_t *) hv->int_page + (PAGE_SIZE >> 1));
> +
> + /* retrieve in/out ring_buffers */
> + hv->out = hv->ring_pages;
> + hv->in = (void *)((uint64_t)hv->out +
> + (vmbus_dev->mem_resource[TXRX_RING_MAP].len
> / 2));
> + hv->rb_size = (vmbus_dev->mem_resource[TXRX_RING_MAP].len /
> 2);
> +
> + dev->rx_pkt_burst = hyperv_recv_pkts;
> + dev->tx_pkt_burst = hyperv_xmit_pkts;
> +
> + return hv_rf_on_device_add(hv);
> +}
> +
> +#define HV_DEV_ID (hv->vmbus_device << 1)
> +#define HV_MTU (dev->data->dev_conf.rxmode.max_rx_pkt_len << 9)
> +
> +static int
> +hyperv_dev_start(struct rte_eth_dev *dev)
> +{
> + int ret;
> + uint32_t cmd;
> + size_t bytes;
> + struct hv_data *hv = dev->data->dev_private;
> +
> + PMD_INIT_FUNC_TRACE();
> + if (!hv->kernel_initialized) {
> + cmd = HV_DEV_ID | HV_MTU;
> + bytes = write(hv->uio_fd, &cmd, sizeof(uint32_t));
> + if (bytes < sizeof(uint32_t)) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "write on uio_fd
> %d failed",
> + hv->uio_fd);
> + return -1;
> + }
> + ret = vmbus_uio_map_resource(dev->vmbus_dev);
> + if (ret < 0) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "Failed to map
> resources");
> + return ret;
> + }
> + ret = hyperv_init(dev);
> + if (ret)
> + return ret;
> + }
> + ret = hv_rf_on_open(hv);
> + if (ret) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "hv_rf_on_open
> failed");
> + return ret;
> + }
> + hv->link_req_cnt = HV_MAX_LINK_REQ;
> +
> + return ret;
> +}
> +
> +static void
> +hyperv_dev_stop(struct rte_eth_dev *dev)
> +{
> + struct hv_data *hv = dev->data->dev_private;
> + uint32_t cmd;
> + size_t bytes;
> +
> + PMD_INIT_FUNC_TRACE();
> + if (!hv->closed) {
> + hv_rf_on_close(hv);
> + hv_rf_on_device_remove(hv);
> + if (hv->kernel_initialized) {
> + cmd = 1 | HV_DEV_ID;
> + bytes = write(hv->uio_fd, &cmd, sizeof(uint32_t));
> + if (bytes)
> + hv->kernel_initialized = 0;
> + else
> + PMD_PWARN_LOG(hv, DBG_LOAD, "write to
> uio_fd %d failed: (%zu)b",
> + hv->uio_fd, bytes);
> + }
> + hv->link_status = 0;
> + }
> +}
> +
> +static void
> +hyperv_dev_close(struct rte_eth_dev *dev)
> +{
> + PMD_INIT_FUNC_TRACE();
> + hyperv_dev_stop(dev);
> +}
> +
> +static void
> +hyperv_dev_promisc_enable(struct rte_eth_dev *dev)
> +{
> + struct hv_data *hv = dev->data->dev_private;
> +
> + PMD_INIT_FUNC_TRACE();
> + hyperv_set_rx_mode(hv, 1, dev->data->all_multicast);
> +}
> +
> +static void
> +hyperv_dev_promisc_disable(struct rte_eth_dev *dev)
> +{
> + struct hv_data *hv = dev->data->dev_private;
> +
> + PMD_INIT_FUNC_TRACE();
> + hyperv_set_rx_mode(hv, 0, dev->data->all_multicast);
> +}
> +
> +static void
> +hyperv_dev_allmulticast_enable(struct rte_eth_dev *dev)
> +{
> + struct hv_data *hv = dev->data->dev_private;
> +
> + PMD_INIT_FUNC_TRACE();
> + hyperv_set_rx_mode(hv, dev->data->promiscuous, 1);
> +}
> +
> +static void
> +hyperv_dev_allmulticast_disable(struct rte_eth_dev *dev)
> +{
> + struct hv_data *hv = dev->data->dev_private;
> +
> + PMD_INIT_FUNC_TRACE();
> + hyperv_set_rx_mode(hv, dev->data->promiscuous, 0);
> +}
> +
> +static void
> +hyperv_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats
> *stats)
> +{
> + struct hv_data *hv = dev->data->dev_private;
> + struct hv_stats *st = &hv->stats;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + memset(stats, 0, sizeof(struct rte_eth_stats));
> +
> + stats->opackets = st->opkts;
> + stats->obytes = st->obytes;
> + stats->oerrors = st->oerrors;
> + stats->ipackets = st->ipkts;
> + stats->ibytes = st->ibytes;
> + stats->ierrors = st->ierrors;
> + stats->rx_nombuf = st->rx_nombuf;
> +}
> +
> +static struct eth_dev_ops hyperv_eth_dev_ops = {
> + .dev_configure = hyperv_dev_configure,
> + .dev_start = hyperv_dev_start,
> + .dev_stop = hyperv_dev_stop,
> + .dev_infos_get = hyperv_dev_info_get,
> + .rx_queue_release = hyperv_dev_rx_queue_release,
> + .tx_queue_release = hyperv_dev_tx_queue_release,
> + .rx_queue_setup = hyperv_dev_rx_queue_setup,
> + .tx_queue_setup = hyperv_dev_tx_queue_setup,
> + .dev_close = hyperv_dev_close,
> + .promiscuous_enable = hyperv_dev_promisc_enable,
> + .promiscuous_disable = hyperv_dev_promisc_disable,
> + .allmulticast_enable = hyperv_dev_allmulticast_enable,
> + .allmulticast_disable = hyperv_dev_allmulticast_disable,
> + .link_update = hyperv_dev_link_update,
> + .stats_get = hyperv_dev_stats_get,
> +};
> +
> +static int
> +eth_hyperv_dev_init(struct rte_eth_dev *eth_dev)
> +{
> + int ret;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + eth_dev->dev_ops = &hyperv_eth_dev_ops;
> + eth_dev->data->mac_addrs = rte_malloc("mac_addrs",
> + sizeof(struct ether_addr),
> + RTE_CACHE_LINE_SIZE);
> + if (!eth_dev->data->mac_addrs) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "unable to allocate
> memory for mac addrs");
> + return -1;
> + }
> +
> + ret = hyperv_init(eth_dev);
> +
> + return ret;
> +}
> +
> +static struct eth_driver rte_hyperv_pmd = {
> + .vmbus_drv = {
> + .name = "rte_hyperv_pmd",
> + .module_name = "hv_uio",
> + .id_table = vmbus_id_hyperv_map,
> + },
> + .bus_type = RTE_BUS_VMBUS,
> + .eth_dev_init = eth_hyperv_dev_init,
> + .dev_private_size = sizeof(struct hv_data),
> +};
> +
> +static int
> +rte_hyperv_pmd_init(const char *name __rte_unused,
> + const char *param __rte_unused)
> +{
> + rte_eth_driver_register(&rte_hyperv_pmd);
> + return 0;
> +}
> +
> +static struct rte_driver rte_hyperv_driver = {
> + .type = PMD_PDEV,
> + .init = rte_hyperv_pmd_init,
> +};
> +
> +PMD_REGISTER_DRIVER(rte_hyperv_driver);
> diff --git a/lib/librte_pmd_hyperv/hyperv_logs.h
> b/lib/librte_pmd_hyperv/hyperv_logs.h
> new file mode 100644
> index 0000000..1b96468
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_logs.h
> @@ -0,0 +1,69 @@
> +/*-
> + * Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + */
> +
> +#ifndef _HYPERV_LOGS_H_
> +#define _HYPERV_LOGS_H_
> +
> +#ifdef RTE_LIBRTE_HV_DEBUG_INIT
> +#define PMD_INIT_LOG(level, fmt, args...) \
> + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
> +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
> +#else
> +#define PMD_INIT_LOG(level, fmt, args...) do { } while (0)
> +#define PMD_INIT_FUNC_TRACE() do { } while (0)
> +#endif
> +
> +#ifdef RTE_LIBRTE_HV_DEBUG
> +
> +#define RTE_DBG_LOAD INIT
> +#define RTE_DBG_STATS STATS
> +#define RTE_DBG_TX TX
> +#define RTE_DBG_RX RX
> +#define RTE_DBG_MBUF MBUF
> +#define RTE_DBG_ASSERT ASRT
> +#define RTE_DBG_RB RB
> +#define RTE_DBG_VMBUS VMBUS
> +#define RTE_DBG_ALL ALL
> +
> +#define STR(x) #x
> +
> +#define HV_RTE_LOG(hv, codepath, level, fmt, args...) \
> + RTE_LOG(level, PMD, "[%d]: %-6s: %s: " fmt "\n", \
> + hv->vmbus_device, STR(codepath), __func__, ## args)
> +
> +#define PMD_PDEBUG_LOG(hv, codepath, fmt, args...) \
> +do { \
> + if (unlikely(hv->debug & (codepath))) \
> + HV_RTE_LOG(hv, RTE_##codepath, DEBUG, fmt, ## args) \
> +} while (0)
> +
> +#define PMD_PINFO_LOG(hv, codepath, fmt, args...) \
> +do { \
> + if (unlikely(hv->debug & (codepath))) \
> + HV_RTE_LOG(hv, RTE_##codepath, INFO, fmt, ## args) \
> +} while (0)
> +
> +#define PMD_PWARN_LOG(hv, codepath, fmt, args...) \
> +do { \
> + if (unlikely(hv->debug & (codepath))) \
> + HV_RTE_LOG(hv, RTE_##codepath, WARNING, fmt, ## args)
> \
> +} while (0)
> +
> +#define PMD_PERROR_LOG(hv, codepath, fmt, args...) \
> +do { \
> + if (unlikely(hv->debug & (codepath))) \
> + HV_RTE_LOG(hv, RTE_##codepath, ERR, fmt, ## args) \
> +} while (0)
> +#else
> +#define HV_RTE_LOG(level, fmt, args...) do { } while (0)
> +#define PMD_PDEBUG_LOG(fmt, args...) do { } while (0)
> +#define PMD_PINFO_LOG(fmt, args...) do { } while (0)
> +#define PMD_PWARN_LOG(fmt, args...) do { } while (0)
> +#define PMD_PERROR_LOG(fmt, args...) do { } while (0)
> +#undef RTE_LIBRTE_HV_DEBUG_TX
> +#undef RTE_LIBRTE_HV_DEBUG_RX
> +#endif
> +
> +#endif /* _HYPERV_LOGS_H_ */
> diff --git a/lib/librte_pmd_hyperv/hyperv_rxtx.c
> b/lib/librte_pmd_hyperv/hyperv_rxtx.c
> new file mode 100644
> index 0000000..9e423d0
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_rxtx.c
> @@ -0,0 +1,403 @@
> +/*-
> + * Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + */
> +
> +#include "hyperv.h"
> +#include "hyperv_rxtx.h"
> +#include "hyperv_drv.h"
> +
> +#define RTE_MBUF_DATA_DMA_ADDR(mb) \
> + ((uint64_t)((mb)->buf_physaddr + (mb)->data_off))
> +
> +#define RPPI_SIZE (sizeof(struct rndis_per_packet_info)\
> + + sizeof(struct ndis_8021q_info))
> +#define RNDIS_OFF (sizeof(struct netvsc_packet) + RPPI_SIZE)
> +#define TX_PKT_SIZE (RNDIS_OFF + sizeof(struct rndis_filter_packet) * 2)
> +
> +static inline struct rte_mbuf *
> +hv_rxmbuf_alloc(struct rte_mempool *mp)
> +{
> + return __rte_mbuf_raw_alloc(mp);
> +}
> +
> +static inline int
> +hyperv_has_rx_work(struct hv_data *hv)
> +{
> + return hv->in->read_index != hv->in->write_index;
> +}
> +
> +#ifndef DEFAULT_TX_FREE_THRESHOLD
> +#define DEFAULT_TX_FREE_THRESHOLD 32
> +#endif
> +
> +int
> +hyperv_dev_tx_queue_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx,
> + uint16_t nb_desc,
> + unsigned int socket_id,
> + const struct rte_eth_txconf *tx_conf)
> +
> +{
> + struct hv_data *hv = dev->data->dev_private;
> + const struct rte_memzone *tz;
> + struct hv_tx_queue *txq;
> + char tz_name[RTE_MEMZONE_NAMESIZE];
> + uint32_t i, delta = 0, new_delta;
> + struct netvsc_packet *pkt;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct
> hv_tx_queue),
> + RTE_CACHE_LINE_SIZE, socket_id);
> + if (txq == NULL) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "rte_zmalloc for
> tx_queue failed");
> + return -ENOMEM;
> + }
> +
> + if (tx_conf->tx_free_thresh >= nb_desc) {
> + PMD_PERROR_LOG(hv, DBG_LOAD,
> + "tx_free_thresh should be less then nb_desc");
> + return -EINVAL;
> + }
> + txq->tx_free_thresh = (tx_conf->tx_free_thresh ? tx_conf-
> >tx_free_thresh :
> + DEFAULT_TX_FREE_THRESHOLD);
> + txq->pkts = rte_calloc_socket("TX pkts", sizeof(void*), nb_desc,
> + RTE_CACHE_LINE_SIZE, socket_id);
> + if (txq->pkts == NULL) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "rte_zmalloc for pkts
> failed");
> + return -ENOMEM;
> + }
> + sprintf(tz_name, "hv_%d_%u_%u", hv->vmbus_device, queue_idx,
> socket_id);
> + tz = rte_memzone_reserve_aligned(tz_name,
> + (uint32_t)nb_desc * TX_PKT_SIZE,
> +
> rte_lcore_to_socket_id(rte_lcore_id()),
> + 0, PAGE_SIZE);
> + if (tz == NULL) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "netvsc packet ring alloc
> fail");
> + return -ENOMEM;
> + }
> + for (i = 0; i < nb_desc; i++) {
> + pkt = txq->pkts[i] = (struct netvsc_packet *)((uint8_t *)tz-
> >addr +
> + i * TX_PKT_SIZE +
> delta);
> + pkt->extension = (uint8_t *)tz->addr + i * TX_PKT_SIZE +
> RNDIS_OFF + delta;
> + if (!pkt->extension) {
> + PMD_PERROR_LOG(hv, DBG_TX,
> + "pkt->extension is NULL for %d-th pkt", i);
> + return -EINVAL;
> + }
> + pkt->extension_phys_addr =
> + tz->phys_addr + i * TX_PKT_SIZE + RNDIS_OFF +
> delta;
> + pkt->ext_pages = 1;
> + pkt->page_buffers[0].pfn = pkt->extension_phys_addr >>
> PAGE_SHIFT;
> + pkt->page_buffers[0].offset =
> + (unsigned long)pkt->extension & (PAGE_SIZE - 1);
> + pkt->page_buffers[0].length = RNDIS_MESSAGE_SIZE(struct
> rndis_packet);
> + if (pkt->page_buffers[0].offset + pkt-
> >page_buffers[0].length
> + > PAGE_SIZE) {
> + new_delta = PAGE_SIZE - pkt-
> >page_buffers[0].offset;
> + pkt->page_buffers[0].pfn++;
> + delta += new_delta;
> + pkt->page_buffers[0].offset = 0;
> + pkt->extension = (uint8_t *)pkt->extension +
> new_delta;
> + pkt->extension_phys_addr += new_delta;
> + }
> + }
> + txq->sw_ring = rte_calloc_socket("txq_sw_ring",
> + sizeof(struct rte_mbuf *), nb_desc,
> + RTE_CACHE_LINE_SIZE, socket_id);
> + if (txq->sw_ring == NULL) {
> + hyperv_dev_tx_queue_release(txq);
> + return -ENOMEM;
> + }
> + txq->port_id = dev->data->port_id;
> + txq->nb_tx_desc = txq->tx_avail = nb_desc;
> + txq->tx_free_thresh = tx_conf->tx_free_thresh;
> + txq->hv = hv;
> + dev->data->tx_queues[queue_idx] = txq;
> + hv->txq = txq;
> +
> + return 0;
> +}
> +
> +void
> +hyperv_dev_tx_queue_release(void *ptxq)
> +{
> + struct hv_tx_queue *txq = ptxq;
> +
> + PMD_INIT_FUNC_TRACE();
> + if (txq == NULL)
> + return;
> + rte_free(txq->sw_ring);
> + rte_free(txq->pkts);
> + rte_free(txq);
> +}
> +
> +int
> +hyperv_dev_rx_queue_setup(struct rte_eth_dev *dev,
> + uint16_t queue_idx,
> + uint16_t nb_desc,
> + unsigned int socket_id,
> + const struct rte_eth_rxconf *rx_conf,
> + struct rte_mempool *mp)
> +{
> + uint16_t i;
> + struct hv_rx_queue *rxq;
> + struct rte_mbuf *mbuf;
> + struct hv_data *hv = dev->data->dev_private;
> +
> + PMD_INIT_FUNC_TRACE();
> +
> + rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct
> hv_rx_queue),
> + RTE_CACHE_LINE_SIZE, socket_id);
> + if (rxq == NULL) {
> + PMD_PERROR_LOG(hv, DBG_LOAD,
> + "rte_zmalloc for rx_queue failed!");
> + return -ENOMEM;
> + }
> + hv->desc = rxq->desc = rte_zmalloc_socket(NULL, PAGE_SIZE,
> + RTE_CACHE_LINE_SIZE,
> socket_id);
> + if (rxq->desc == NULL) {
> + PMD_PERROR_LOG(hv, DBG_LOAD,
> + "rte_zmalloc for vmbus_desc failed!");
> + hyperv_dev_rx_queue_release(rxq);
> + return -ENOMEM;
> + }
> + rxq->sw_ring = rte_calloc_socket("rxq->sw_ring",
> + sizeof(struct mbuf *), nb_desc,
> + RTE_CACHE_LINE_SIZE, socket_id);
> + if (rxq->sw_ring == NULL) {
> + hyperv_dev_rx_queue_release(rxq);
> + return -ENOMEM;
> + }
> +
> + for (i = 0; i < nb_desc; i++) {
> + mbuf = hv_rxmbuf_alloc(mp);
> + if (mbuf == NULL) {
> + PMD_PERROR_LOG(hv, DBG_LOAD, "RX mbuf alloc
> failed");
> + return -ENOMEM;
> + }
> +
> + mbuf->nb_segs = 1;
> + mbuf->next = NULL;
> + mbuf->port = rxq->port_id;
> + rxq->sw_ring[i] = mbuf;
> + }
> +
> + rxq->mb_pool = mp;
> + rxq->nb_rx_desc = nb_desc;
> + rxq->rx_head = 0;
> + rxq->rx_tail = 0;
> + rxq->rx_free_thresh = rx_conf->rx_free_thresh;
> + rxq->port_id = dev->data->port_id;
> + rxq->hv = hv;
> + dev->data->rx_queues[queue_idx] = rxq;
> + hv->rxq = rxq;
> + hv->max_rx_pkt_len = mp->elt_size -
> + (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
> +
> + return 0;
> +}
> +
> +void
> +hyperv_dev_rx_queue_release(void *prxq)
> +{
> + struct hv_rx_queue *rxq = prxq;
> +
> + PMD_INIT_FUNC_TRACE();
> + if (rxq == NULL)
> + return;
> + rte_free(rxq->sw_ring);
> + rte_free(rxq->desc);
> + rte_free(rxq);
> +}
> +
> +uint16_t
> +hyperv_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t
> nb_pkts)
> +{
> + struct hv_rx_queue *rxq = prxq;
> + struct hv_data *hv = rxq->hv;
> + struct rte_mbuf *new_mb, *rx_mbuf, *first_mbuf;
> + uint16_t nb_rx = 0;
> + uint16_t segs, i;
> +
> + if (unlikely(hv->closed))
> + return 0;
> +
> + nb_pkts = MIN(nb_pkts, HV_MAX_PKT_BURST);
> + hyperv_scan_comps(hv, 0);
> +
> + while (nb_rx < nb_pkts) {
> + /*
> + * if there are no mbufs in sw_ring,
> + * we need to trigger receive procedure
> + */
> + if (rxq->rx_head == rxq->rx_tail) {
> + if (!hyperv_has_rx_work(hv))
> + break;
> +
> + if (unlikely(!hyperv_get_buffer(hv, rxq->desc,
> PAGE_SIZE))) {
> + hyperv_scan_comps(hv, 0);
> + continue;
> + }
> + }
> +
> + /*
> + * Now the received data is in sw_ring of our rxq
> + * we need to extract it and replace in sw_ring with new
> mbuf
> + */
> + rx_mbuf = first_mbuf = rxq->sw_ring[rxq->rx_head];
> + segs = first_mbuf->nb_segs;
> + for (i = 0; i < segs; ++i) {
> + new_mb = hv_rxmbuf_alloc(rxq->mb_pool);
> + if (unlikely(!new_mb)) {
> + PMD_PERROR_LOG(hv, DBG_RX, "mbuf alloc
> fail");
> + ++hv->stats.rx_nombuf;
> + return nb_rx;
> + }
> +
> + rx_mbuf = rxq->sw_ring[rxq->rx_head];
> + rxq->sw_ring[rxq->rx_head] = new_mb;
> +
> + if (++rxq->rx_head == rxq->nb_rx_desc)
> + rxq->rx_head = 0;
> +
> + rx_mbuf->ol_flags |= PKT_RX_IPV4_HDR;
> + rx_mbuf->port = rxq->port_id;
> + }
> + rx_mbuf->next = NULL;
> +
> + rx_pkts[nb_rx++] = first_mbuf;
> + ++hv->stats.ipkts;
> + hv->stats.ibytes += first_mbuf->pkt_len;
> + }
> +
> + return nb_rx;
> +}
> +
> +static void hyperv_txeof(struct hv_tx_queue *txq)
> +{
> + struct rte_mbuf *mb, *mb_next;
> +
> + txq->tx_avail += txq->tx_free;
> + while (txq->tx_free) {
> + --txq->tx_free;
> + mb = txq->sw_ring[txq->tx_head];
> + while (mb) {
> + mb_next = mb->next;
> + rte_mempool_put(mb->pool, mb);
> + mb = mb_next;
> + }
> + if (++txq->tx_head == txq->nb_tx_desc)
> + txq->tx_head = 0;
> + }
> +}
> +
> +uint16_t
> +hyperv_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t
> nb_pkts)
> +{
> + struct hv_tx_queue *txq = ptxq;
> + struct hv_data *hv = txq->hv;
> + struct netvsc_packet *packet;
> + struct rte_mbuf *m;
> + uint32_t data_pages;
> + uint64_t first_data_page;
> + uint32_t total_len;
> + uint32_t len;
> + uint16_t i, nb_tx;
> + uint8_t rndis_pages;
> + int ret;
> +
> + if (unlikely(hv->closed))
> + return 0;
> +
> + for (nb_tx = 0; nb_tx < nb_pkts; ++nb_tx) {
> + hyperv_scan_comps(hv, 0);
> + /* Determine if the descriptor ring needs to be cleaned. */
> + if (txq->tx_free > txq->tx_free_thresh)
> + hyperv_txeof(txq);
> +
> + if (!txq->tx_avail) {
> + hyperv_scan_comps(hv, 1);
> + hyperv_txeof(txq);
> + if (!txq->tx_avail) {
> + PMD_PWARN_LOG(hv, DBG_TX, "No TX
> mbuf available");
> + break;
> + }
> + }
> + m = tx_pkts[nb_tx];
> + len = m->data_len;
> + total_len = m->pkt_len;
> + first_data_page = RTE_MBUF_DATA_DMA_ADDR(m) >>
> PAGE_SHIFT;
> + data_pages = ((RTE_MBUF_DATA_DMA_ADDR(m) + len - 1)
> >> PAGE_SHIFT) -
> + first_data_page + 1;
> +
> + packet = txq->pkts[txq->tx_tail];
> + rndis_pages = packet->ext_pages;
> +
> + txq->sw_ring[txq->tx_tail] = m;
> + packet->tot_data_buf_len = total_len;
> + packet->page_buffers[rndis_pages].pfn =
> + RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
> + packet->page_buffers[rndis_pages].offset =
> + RTE_MBUF_DATA_DMA_ADDR(m) & (PAGE_SIZE -
> 1);
> + if (data_pages == 1)
> + packet->page_buffers[rndis_pages].length = len;
> + else
> + packet->page_buffers[rndis_pages].length =
> PAGE_SIZE -
> + packet->page_buffers[rndis_pages].offset;
> +
> + for (i = 1; i < data_pages; ++i) {
> + packet->page_buffers[rndis_pages + i].pfn =
> first_data_page + i;
> + packet->page_buffers[rndis_pages + i].offset = 0;
> + packet->page_buffers[rndis_pages + i].length =
> PAGE_SIZE;
> + }
> + if (data_pages > 1)
> + packet->page_buffers[rndis_pages - 1 +
> data_pages].length =
> + ((rte_pktmbuf_mtod(m, unsigned long) + len
> - 1)
> + & (PAGE_SIZE - 1)) + 1;
> +
> + uint16_t index = data_pages + rndis_pages;
> +
> + for (i = 1; i < m->nb_segs; ++i) {
> + m = m->next;
> + len = m->data_len;
> + first_data_page = RTE_MBUF_DATA_DMA_ADDR(m)
> >> PAGE_SHIFT;
> + data_pages = ((RTE_MBUF_DATA_DMA_ADDR(m) +
> len - 1) >> PAGE_SHIFT) -
> + first_data_page + 1;
> + packet->page_buffers[index].pfn =
> + RTE_MBUF_DATA_DMA_ADDR(m) >>
> PAGE_SHIFT;
> + packet->page_buffers[index].offset =
> + rte_pktmbuf_mtod(m, unsigned long)
> + & (PAGE_SIZE - 1);
> + packet->page_buffers[index].length = m->data_len;
> + if (data_pages > 1) {
> + /* It can be 2 in case of usual mbuf_size=2048
> */
> + packet->page_buffers[index].length =
> PAGE_SIZE -
> + packet->page_buffers[index].offset;
> + packet->page_buffers[++index].offset = 0;
> + packet->page_buffers[index].pfn =
> + packet->page_buffers[index - 1].pfn
> + 1;
> + packet->page_buffers[index].length =
> + m->data_len
> + - packet->page_buffers[index -
> 1].length;
> + }
> + ++index;
> + }
> + packet->page_buf_count = index;
> +
> + ret = hv_rf_on_send(hv, packet);
> + if (likely(ret == 0)) {
> + ++hv->stats.opkts;
> + hv->stats.obytes += total_len;
> + if (++txq->tx_tail == txq->nb_tx_desc)
> + txq->tx_tail = 0;
> + --txq->tx_avail;
> + } else {
> + ++hv->stats.oerrors;
> + PMD_PERROR_LOG(hv, DBG_TX, "TX ring buffer is
> busy");
> + }
> + }
> +
> + return nb_tx;
> +}
> diff --git a/lib/librte_pmd_hyperv/hyperv_rxtx.h
> b/lib/librte_pmd_hyperv/hyperv_rxtx.h
> new file mode 100644
> index 0000000..c45a704
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_rxtx.h
> @@ -0,0 +1,35 @@
> +/*-
> + * Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + */
> +
> +/**
> + * Structure associated with each TX queue.
> + */
> +struct hv_tx_queue {
> + struct netvsc_packet **pkts;
> + struct rte_mbuf **sw_ring;
> + uint16_t nb_tx_desc;
> + uint16_t tx_avail;
> + uint16_t tx_head;
> + uint16_t tx_tail;
> + uint16_t tx_free_thresh;
> + uint16_t tx_free;
> + uint8_t port_id;
> + struct hv_data *hv;
> +} __rte_cache_aligned;
> +
> +/**
> + * Structure associated with each RX queue.
> + */
> +struct hv_rx_queue {
> + struct rte_mempool *mb_pool;
> + struct rte_mbuf **sw_ring;
> + uint16_t nb_rx_desc;
> + uint16_t rx_head;
> + uint16_t rx_tail;
> + uint16_t rx_free_thresh;
> + uint8_t port_id;
> + struct hv_data *hv;
> + struct hv_vm_packet_descriptor *desc;
> +} __rte_cache_aligned;
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 62a76ae..e0416d1 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -133,6 +133,10 @@ LDLIBS += -lm
> LDLIBS += -lrt
> endif
>
> +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
> +LDLIBS += -lrte_pmd_hyperv
> +endif
> +
> ifeq ($(CONFIG_RTE_LIBRTE_VHOST), y)
> LDLIBS += -lrte_vhost
> endif
> --
> 2.1.4
next prev parent reply other threads:[~2015-04-21 19:34 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-21 17:32 [dpdk-dev] [PATCH v4 0/7] Hyper-V Poll Mode driver Stephen Hemminger
2015-04-21 17:32 ` [dpdk-dev] [PATCH v4 1/7] ether: add function to query for link state interrupt Stephen Hemminger
2015-07-08 23:42 ` Thomas Monjalon
[not found] ` <d0360434d10a44dcb9f5c9c7220c3162@HQ1WP-EXMB11.corp.brocade.com>
2017-02-08 23:25 ` Stephen Hemminger
2015-04-21 17:32 ` [dpdk-dev] [PATCH v4 2/7] pmd: change drivers initialization for pci Stephen Hemminger
2015-04-21 17:32 ` [dpdk-dev] [PATCH v4 3/7] hv: add basic vmbus support Stephen Hemminger
2015-07-08 23:51 ` Thomas Monjalon
2015-04-21 17:32 ` [dpdk-dev] [PATCH v4 4/7] hv: uio driver Stephen Hemminger
2015-07-08 23:55 ` Thomas Monjalon
2015-04-21 17:32 ` [dpdk-dev] [PATCH v4 5/7] hv: poll mode driver Stephen Hemminger
2015-04-21 19:34 ` Butler, Siobhan A [this message]
2015-04-21 21:35 ` Stephen Hemminger
2015-07-09 0:01 ` Thomas Monjalon
2015-07-09 0:05 ` Thomas Monjalon
2015-04-21 17:32 ` [dpdk-dev] [PATCH v4 6/7] hv: enable driver in common config Stephen Hemminger
2015-07-08 23:58 ` Thomas Monjalon
2015-04-21 17:32 ` [dpdk-dev] [PATCH v4 7/7] hv: add kernel patch Stephen Hemminger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0C5AFCA4B3408848ADF2A3073F7D8CC86D5A6905@IRSMSX109.ger.corp.intel.com \
--to=siobhan.a.butler@intel.com \
--cc=alexmay@microsoft.com \
--cc=dev@dpdk.org \
--cc=segorov@mirantis.com \
--cc=shemming@brocade.com \
--cc=stephen@networkplumber.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).