Blame - src/vnet/devices/virtio/vhost_user_input.c - fdio/vpp

blob: 26140484d2e45403a96eca9d95961f89e3faaa4b [file] [log] [blame]

Mohsin Kazmi	e7cde31	2018-06-26 17:20:11 +0200	[diff] [blame]	1	/*
				2	*------------------------------------------------------------------
				3	* vhost-user-input
				4	*
				5	* Copyright (c) 2014-2018 Cisco and/or its affiliates.
				6	* Licensed under the Apache License, Version 2.0 (the "License");
				7	* you may not use this file except in compliance with the License.
				8	* You may obtain a copy of the License at:
				9	*
				10	* http://www.apache.org/licenses/LICENSE-2.0
				11	*
				12	* Unless required by applicable law or agreed to in writing, software
				13	* distributed under the License is distributed on an "AS IS" BASIS,
				14	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	* See the License for the specific language governing permissions and
				16	* limitations under the License.
				17	*------------------------------------------------------------------
				18	*/
				19
				20	#include <fcntl.h> /* for open */
				21	#include <sys/ioctl.h>
				22	#include <sys/socket.h>
				23	#include <sys/un.h>
				24	#include <sys/stat.h>
				25	#include <sys/types.h>
				26	#include <sys/uio.h> /* for iovec */
				27	#include <netinet/in.h>
				28	#include <sys/vfs.h>
				29
				30	#include <linux/if_arp.h>
				31	#include <linux/if_tun.h>
				32
				33	#include <vlib/vlib.h>
				34	#include <vlib/unix/unix.h>
				35
				36	#include <vnet/ip/ip.h>
				37
				38	#include <vnet/ethernet/ethernet.h>
				39	#include <vnet/devices/devices.h>
				40	#include <vnet/feature/feature.h>
				41
				42	#include <vnet/devices/virtio/vhost_user.h>
				43	#include <vnet/devices/virtio/vhost_user_inline.h>
				44
				45	/*
				46	* When an RX queue is down but active, received packets
				47	* must be discarded. This value controls up to how many
				48	* packets will be discarded during each round.
				49	*/
				50	#define VHOST_USER_DOWN_DISCARD_COUNT 256
				51
				52	/*
				53	* When the number of available buffers gets under this threshold,
				54	* RX node will start discarding packets.
				55	*/
				56	#define VHOST_USER_RX_BUFFER_STARVATION 32
				57
				58	/*
				59	* On the receive side, the host should free descriptors as soon
				60	* as possible in order to avoid TX drop in the VM.
				61	* This value controls the number of copy operations that are stacked
				62	* before copy is done for all and descriptors are given back to
				63	* the guest.
				64	* The value 64 was obtained by testing (48 and 128 were not as good).
				65	*/
				66	#define VHOST_USER_RX_COPY_THRESHOLD 64
				67
				68	vlib_node_registration_t vhost_user_input_node;
				69
				70	#define foreach_vhost_user_input_func_error \
				71	_(NO_ERROR, "no error") \
				72	_(NO_BUFFER, "no available buffer") \
				73	_(MMAP_FAIL, "mmap failure") \
				74	_(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
				75	_(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
				76	_(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
				77
				78	typedef enum
				79	{
				80	#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
				81	foreach_vhost_user_input_func_error
				82	#undef _
				83	VHOST_USER_INPUT_FUNC_N_ERROR,
				84	} vhost_user_input_func_error_t;
				85
				86	static __clib_unused char *vhost_user_input_func_error_strings[] = {
				87	#define _(n,s) s,
				88	foreach_vhost_user_input_func_error
				89	#undef _
				90	};
				91
				92	static_always_inline void
				93	vhost_user_rx_trace (vhost_trace_t * t,
				94	vhost_user_intf_t * vui, u16 qid,
				95	vlib_buffer_t * b, vhost_user_vring_t * txvq)
				96	{
				97	vhost_user_main_t *vum = &vhost_user_main;
				98	u32 last_avail_idx = txvq->last_avail_idx;
				99	u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
				100	vring_desc_t *hdr_desc = 0;
				101	virtio_net_hdr_mrg_rxbuf_t *hdr;
				102	u32 hint = 0;
				103
				104	memset (t, 0, sizeof (*t));
				105	t->device_index = vui - vum->vhost_user_interfaces;
				106	t->qid = qid;
				107
				108	hdr_desc = &txvq->desc[desc_current];
				109	if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
				110	{
				111	t->virtio_ring_flags \|= 1 << VIRTIO_TRACE_F_INDIRECT;
				112	/* Header is the first here */
				113	hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
				114	}
				115	if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
				116	{
				117	t->virtio_ring_flags \|= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
				118	}
				119	if (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
				120	!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
				121	{
				122	t->virtio_ring_flags \|= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
				123	}
				124
				125	t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
				126
				127	if (!hdr_desc \|\| !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
				128	{
				129	t->virtio_ring_flags \|= 1 << VIRTIO_TRACE_F_MAP_ERROR;
				130	}
				131	else
				132	{
				133	u32 len = vui->virtio_net_hdr_sz;
				134	memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
				135	}
				136	}
				137
				138	static_always_inline u32
				139	vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
				140	u16 copy_len, u32 * map_hint)
				141	{
				142	void src0, src1, src2, src3;
				143	if (PREDICT_TRUE (copy_len >= 4))
				144	{
				145	if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint))))
				146	return 1;
				147	if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint))))
				148	return 1;
				149
				150	while (PREDICT_TRUE (copy_len >= 4))
				151	{
				152	src0 = src2;
				153	src1 = src3;
				154
				155	if (PREDICT_FALSE
				156	(!(src2 = map_guest_mem (vui, cpy[2].src, map_hint))))
				157	return 1;
				158	if (PREDICT_FALSE
				159	(!(src3 = map_guest_mem (vui, cpy[3].src, map_hint))))
				160	return 1;
				161
				162	CLIB_PREFETCH (src2, 64, LOAD);
				163	CLIB_PREFETCH (src3, 64, LOAD);
				164
				165	clib_memcpy ((void *) cpy[0].dst, src0, cpy[0].len);
				166	clib_memcpy ((void *) cpy[1].dst, src1, cpy[1].len);
				167	copy_len -= 2;
				168	cpy += 2;
				169	}
				170	}
				171	while (copy_len)
				172	{
				173	if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
				174	return 1;
				175	clib_memcpy ((void *) cpy->dst, src0, cpy->len);
				176	copy_len -= 1;
				177	cpy += 1;
				178	}
				179	return 0;
				180	}
				181
				182	/**
				183	* Try to discard packets from the tx ring (VPP RX path).
				184	* Returns the number of discarded packets.
				185	*/
				186	static_always_inline u32
				187	vhost_user_rx_discard_packet (vlib_main_t * vm,
				188	vhost_user_intf_t * vui,
				189	vhost_user_vring_t * txvq, u32 discard_max)
				190	{
				191	/*
				192	* On the RX side, each packet corresponds to one descriptor
				193	* (it is the same whether it is a shallow descriptor, chained, or indirect).
				194	* Therefore, discarding a packet is like discarding a descriptor.
				195	*/
				196	u32 discarded_packets = 0;
				197	u32 avail_idx = txvq->avail->idx;
				198	while (discarded_packets != discard_max)
				199	{
				200	if (avail_idx == txvq->last_avail_idx)
				201	goto out;
				202
				203	u16 desc_chain_head =
				204	txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
				205	txvq->last_avail_idx++;
				206	txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
				207	desc_chain_head;
				208	txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
				209	vhost_user_log_dirty_ring (vui, txvq,
				210	ring[txvq->last_used_idx & txvq->qsz_mask]);
				211	txvq->last_used_idx++;
				212	discarded_packets++;
				213	}
				214
				215	out:
				216	CLIB_MEMORY_BARRIER ();
				217	txvq->used->idx = txvq->last_used_idx;
				218	vhost_user_log_dirty_ring (vui, txvq, idx);
				219	return discarded_packets;
				220	}
				221
				222	/*
				223	* In case of overflow, we need to rewind the array of allocated buffers.
				224	*/
				225	static __clib_unused void
				226	vhost_user_input_rewind_buffers (vlib_main_t * vm,
				227	vhost_cpu_t * cpu, vlib_buffer_t * b_head)
				228	{
				229	u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
				230	vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current);
				231	b_current->current_length = 0;
				232	b_current->flags = 0;
				233	while (b_current != b_head)
				234	{
				235	cpu->rx_buffers_len++;
				236	bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
				237	b_current = vlib_get_buffer (vm, bi_current);
				238	b_current->current_length = 0;
				239	b_current->flags = 0;
				240	}
				241	cpu->rx_buffers_len++;
				242	}
				243
				244	static __clib_unused u32
				245	vhost_user_if_input (vlib_main_t * vm,
				246	vhost_user_main_t * vum,
				247	vhost_user_intf_t * vui,
				248	u16 qid, vlib_node_runtime_t * node,
				249	vnet_hw_interface_rx_mode mode)
				250	{
				251	vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
				252	u16 n_rx_packets = 0;
				253	u32 n_rx_bytes = 0;
				254	u16 n_left;
				255	u32 n_left_to_next, *to_next;
				256	u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
				257	u32 n_trace = vlib_get_trace_count (vm, node);
				258	u32 map_hint = 0;
Damjan Marion	067cd62	2018-07-11 12:47:43 +0200	[diff] [blame]	259	u16 thread_index = vm->thread_index;
Mohsin Kazmi	e7cde31	2018-06-26 17:20:11 +0200	[diff] [blame]	260	u16 copy_len = 0;
				261
Yichen Wang	28812a0	2018-08-28 23:05:27 -0700	[diff] [blame]	262	/* The descriptor table is not ready yet */
				263	if (PREDICT_FALSE (txvq->avail == 0))
				264	return 0;
				265
Mohsin Kazmi	e7cde31	2018-06-26 17:20:11 +0200	[diff] [blame]	266	{
				267	/* do we have pending interrupts ? */
				268	vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
				269	f64 now = vlib_time_now (vm);
				270
				271	if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
				272	vhost_user_send_call (vm, txvq);
				273
				274	if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
				275	vhost_user_send_call (vm, rxvq);
				276	}
				277
				278	/*
				279	* For adaptive mode, it is optimized to reduce interrupts.
				280	* If the scheduler switches the input node to polling due
				281	* to burst of traffic, we tell the driver no interrupt.
				282	* When the traffic subsides, the scheduler switches the node back to
				283	* interrupt mode. We must tell the driver we want interrupt.
				284	*/
				285	if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
				286	{
				287	if ((node->flags &
				288	VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) \|\|
				289	!(node->flags &
				290	VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
				291	/* Tell driver we want notification */
				292	txvq->used->flags = 0;
				293	else
				294	/* Tell driver we don't want notification */
				295	txvq->used->flags = VRING_USED_F_NO_NOTIFY;
				296	}
				297
				298	if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
				299	return 0;
				300
				301	n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
				302
				303	/* nothing to do */
				304	if (PREDICT_FALSE (n_left == 0))
				305	return 0;
				306
				307	if (PREDICT_FALSE (!vui->admin_up \|\| !(txvq->enabled)))
				308	{
				309	/*
				310	* Discard input packet if interface is admin down or vring is not
				311	* enabled.
				312	* "For example, for a networking device, in the disabled state
				313	* client must not supply any new RX packets, but must process
				314	* and discard any TX packets."
				315	*/
				316	vhost_user_rx_discard_packet (vm, vui, txvq,
				317	VHOST_USER_DOWN_DISCARD_COUNT);
				318	return 0;
				319	}
				320
				321	if (PREDICT_FALSE (n_left == (txvq->qsz_mask + 1)))
				322	{
				323	/*
				324	* Informational error logging when VPP is not
				325	* receiving packets fast enough.
				326	*/
				327	vlib_error_count (vm, node->node_index,
				328	VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
				329	}
				330
				331	if (n_left > VLIB_FRAME_SIZE)
				332	n_left = VLIB_FRAME_SIZE;
				333
				334	/*
				335	* For small packets (<2kB), we will not need more than one vlib buffer
				336	* per packet. In case packets are bigger, we will just yeld at some point
				337	* in the loop and come back later. This is not an issue as for big packet,
				338	* processing cost really comes from the memory copy.
				339	* The assumption is that big packets will fit in 40 buffers.
				340	*/
				341	if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 \|\|
				342	vum->cpus[thread_index].rx_buffers_len < 40))
				343	{
				344	u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
				345	vum->cpus[thread_index].rx_buffers_len +=
				346	vlib_buffer_alloc_from_free_list (vm,
				347	vum->cpus[thread_index].rx_buffers +
				348	curr_len,
				349	VHOST_USER_RX_BUFFERS_N - curr_len,
				350	VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
				351
				352	if (PREDICT_FALSE
				353	(vum->cpus[thread_index].rx_buffers_len <
				354	VHOST_USER_RX_BUFFER_STARVATION))
				355	{
				356	/* In case of buffer starvation, discard some packets from the queue
				357	* and log the event.
				358	* We keep doing best effort for the remaining packets. */
				359	u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
				360	n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
				361	flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
				362
				363	n_left -= flush;
				364	vlib_increment_simple_counter (vnet_main.
				365	interface_main.sw_if_counters +
				366	VNET_INTERFACE_COUNTER_DROP,
				367	vlib_get_thread_index (),
				368	vui->sw_if_index, flush);
				369
				370	vlib_error_count (vm, vhost_user_input_node.index,
				371	VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
				372	}
				373	}
				374
				375	while (n_left > 0)
				376	{
				377	vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
				378
				379	while (n_left > 0 && n_left_to_next > 0)
				380	{
				381	vlib_buffer_t b_head, b_current;
				382	u32 bi_current;
				383	u16 desc_current;
				384	u32 desc_data_offset;
				385	vring_desc_t *desc_table = txvq->desc;
				386
				387	if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
				388	{
				389	/* Not enough rx_buffers
				390	* Note: We yeld on 1 so we don't need to do an additional
				391	* check for the next buffer prefetch.
				392	*/
				393	n_left = 0;
				394	break;
				395	}
				396
				397	desc_current =
				398	txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
				399	vum->cpus[thread_index].rx_buffers_len--;
				400	bi_current = (vum->cpus[thread_index].rx_buffers)
				401	[vum->cpus[thread_index].rx_buffers_len];
				402	b_head = b_current = vlib_get_buffer (vm, bi_current);
				403	to_next[0] = bi_current; //We do that now so we can forget about bi_current
				404	to_next++;
				405	n_left_to_next--;
				406
				407	vlib_prefetch_buffer_with_index (vm,
				408	(vum->
				409	cpus[thread_index].rx_buffers)
				410	[vum->cpus[thread_index].
				411	rx_buffers_len - 1], LOAD);
				412
				413	/* Just preset the used descriptor id and length for later */
				414	txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
				415	desc_current;
				416	txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
				417	vhost_user_log_dirty_ring (vui, txvq,
				418	ring[txvq->last_used_idx &
				419	txvq->qsz_mask]);
				420
				421	/* The buffer should already be initialized */
				422	b_head->total_length_not_including_first_buffer = 0;
				423	b_head->flags \|= VLIB_BUFFER_TOTAL_LENGTH_VALID;
				424
				425	if (PREDICT_FALSE (n_trace))
				426	{
				427	//TODO: next_index is not exactly known at that point
				428	vlib_trace_buffer (vm, node, next_index, b_head,
				429	/* follow_chain */ 0);
				430	vhost_trace_t *t0 =
				431	vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
				432	vhost_user_rx_trace (t0, vui, qid, b_head, txvq);
				433	n_trace--;
				434	vlib_set_trace_count (vm, node, n_trace);
				435	}
				436
				437	/* This depends on the setup but is very consistent
				438	* So I think the CPU branch predictor will make a pretty good job
				439	* at optimizing the decision. */
				440	if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
				441	{
				442	desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
				443	&map_hint);
				444	desc_current = 0;
				445	if (PREDICT_FALSE (desc_table == 0))
				446	{
				447	vlib_error_count (vm, node->node_index,
				448	VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
				449	goto out;
				450	}
				451	}
				452
				453	if (PREDICT_TRUE (vui->is_any_layout) \|\|
				454	(!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT)))
				455	{
				456	/* ANYLAYOUT or single buffer */
				457	desc_data_offset = vui->virtio_net_hdr_sz;
				458	}
				459	else
				460	{
				461	/* CSR case without ANYLAYOUT, skip 1st buffer */
				462	desc_data_offset = desc_table[desc_current].len;
				463	}
				464
				465	while (1)
				466	{
				467	/* Get more input if necessary. Or end of packet. */
				468	if (desc_data_offset == desc_table[desc_current].len)
				469	{
				470	if (PREDICT_FALSE (desc_table[desc_current].flags &
				471	VIRTQ_DESC_F_NEXT))
				472	{
				473	desc_current = desc_table[desc_current].next;
				474	desc_data_offset = 0;
				475	}
				476	else
				477	{
				478	goto out;
				479	}
				480	}
				481
				482	/* Get more output if necessary. Or end of packet. */
				483	if (PREDICT_FALSE
				484	(b_current->current_length == VLIB_BUFFER_DATA_SIZE))
				485	{
				486	if (PREDICT_FALSE
				487	(vum->cpus[thread_index].rx_buffers_len == 0))
				488	{
				489	/* Cancel speculation */
				490	to_next--;
				491	n_left_to_next++;
				492
				493	/*
				494	* Checking if there are some left buffers.
				495	* If not, just rewind the used buffers and stop.
				496	* Note: Scheduled copies are not cancelled. This is
				497	* not an issue as they would still be valid. Useless,
				498	* but valid.
				499	*/
				500	vhost_user_input_rewind_buffers (vm,
				501	&vum->cpus
				502	[thread_index],
				503	b_head);
				504	n_left = 0;
				505	goto stop;
				506	}
				507
				508	/* Get next output */
				509	vum->cpus[thread_index].rx_buffers_len--;
				510	u32 bi_next =
				511	(vum->cpus[thread_index].rx_buffers)[vum->cpus
				512	[thread_index].rx_buffers_len];
				513	b_current->next_buffer = bi_next;
				514	b_current->flags \|= VLIB_BUFFER_NEXT_PRESENT;
				515	bi_current = bi_next;
				516	b_current = vlib_get_buffer (vm, bi_current);
				517	}
				518
				519	/* Prepare a copy order executed later for the data */
				520	vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
				521	copy_len++;
				522	u32 desc_data_l =
				523	desc_table[desc_current].len - desc_data_offset;
				524	cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length;
				525	cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
				526	cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
				527	b_current->current_length);
				528	cpy->src = desc_table[desc_current].addr + desc_data_offset;
				529
				530	desc_data_offset += cpy->len;
				531
				532	b_current->current_length += cpy->len;
				533	b_head->total_length_not_including_first_buffer += cpy->len;
				534	}
				535
				536	out:
				537	CLIB_PREFETCH (&n_left, sizeof (n_left), LOAD);
				538
				539	n_rx_bytes += b_head->total_length_not_including_first_buffer;
				540	n_rx_packets++;
				541
				542	b_head->total_length_not_including_first_buffer -=
				543	b_head->current_length;
				544
				545	/* consume the descriptor and return it as used */
				546	txvq->last_avail_idx++;
				547	txvq->last_used_idx++;
				548
				549	VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
				550
				551	vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
				552	vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
				553	b_head->error = 0;
				554
				555	{
				556	u32 next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
				557
				558	/* redirect if feature path enabled */
				559	vnet_feature_start_device_input_x1 (vui->sw_if_index, &next0,
				560	b_head);
				561
				562	u32 bi = to_next[-1]; //Cannot use to_next[-1] in the macro
				563	vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
				564	to_next, n_left_to_next,
				565	bi, next0);
				566	}
				567
				568	n_left--;
				569
				570	/*
				571	* Although separating memory copies from virtio ring parsing
				572	* is beneficial, we can offer to perform the copies from time
				573	* to time in order to free some space in the ring.
				574	*/
				575	if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
				576	{
				577	if (PREDICT_FALSE
				578	(vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
				579	copy_len, &map_hint)))
				580	{
				581	vlib_error_count (vm, node->node_index,
				582	VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
				583	}
				584	copy_len = 0;
				585
				586	/* give buffers back to driver */
				587	CLIB_MEMORY_BARRIER ();
				588	txvq->used->idx = txvq->last_used_idx;
				589	vhost_user_log_dirty_ring (vui, txvq, idx);
				590	}
				591	}
				592	stop:
				593	vlib_put_next_frame (vm, node, next_index, n_left_to_next);
				594	}
				595
				596	/* Do the memory copies */
				597	if (PREDICT_FALSE
				598	(vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
				599	copy_len, &map_hint)))
				600	{
				601	vlib_error_count (vm, node->node_index,
				602	VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
				603	}
				604
				605	/* give buffers back to driver */
				606	CLIB_MEMORY_BARRIER ();
				607	txvq->used->idx = txvq->last_used_idx;
				608	vhost_user_log_dirty_ring (vui, txvq, idx);
				609
				610	/* interrupt (call) handling */
				611	if ((txvq->callfd_idx != ~0) &&
				612	!(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
				613	{
				614	txvq->n_since_last_int += n_rx_packets;
				615
				616	if (txvq->n_since_last_int > vum->coalesce_frames)
				617	vhost_user_send_call (vm, txvq);
				618	}
				619
				620	/* increase rx counters */
				621	vlib_increment_combined_counter
				622	(vnet_main.interface_main.combined_sw_if_counters
				623	+ VNET_INTERFACE_COUNTER_RX,
				624	vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
				625
				626	vnet_device_increment_rx_packets (thread_index, n_rx_packets);
				627
				628	return n_rx_packets;
				629	}
				630
				631	VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
				632	vlib_node_runtime_t * node,
				633	vlib_frame_t * frame)
				634	{
				635	vhost_user_main_t *vum = &vhost_user_main;
				636	uword n_rx_packets = 0;
				637	vhost_user_intf_t *vui;
				638	vnet_device_input_runtime_t *rt =
				639	(vnet_device_input_runtime_t *) node->runtime_data;
				640	vnet_device_and_queue_t *dq;
				641
				642	vec_foreach (dq, rt->devices_and_queues)
				643	{
				644	if (clib_smp_swap (&dq->interrupt_pending, 0) \|\|
				645	(node->state == VLIB_NODE_STATE_POLLING))
				646	{
				647	vui =
				648	pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
				649	n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
				650	dq->mode);
				651	}
				652	}
				653
				654	return n_rx_packets;
				655	}
				656
Mohsin Kazmi	e7cde31	2018-06-26 17:20:11 +0200	[diff] [blame]	657	/* INDENT-OFF */
				658	VLIB_REGISTER_NODE (vhost_user_input_node) = {
				659	.type = VLIB_NODE_TYPE_INPUT,
				660	.name = "vhost-user-input",
				661	.sibling_of = "device-input",
				662
				663	/* Will be enabled if/when hardware is detected. */
				664	.state = VLIB_NODE_STATE_DISABLED,
				665
				666	.format_buffer = format_ethernet_header_with_length,
				667	.format_trace = format_vhost_trace,
				668
				669	.n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
				670	.error_strings = vhost_user_input_func_error_strings,
				671	};
				672	/* INDENT-ON */
Mohsin Kazmi	e7cde31	2018-06-26 17:20:11 +0200	[diff] [blame]	673
				674	/*
				675	* fd.io coding-style-patch-verification: ON
				676	*
				677	* Local Variables:
				678	* eval: (c-set-style "gnu")
				679	* End:
				680	*/