blob: 7470f15b1c38376a1dcdc5a8fcb5fd3a330370ad [file] [log] [blame]
Damjan Marion00a9dca2016-08-17 17:05:46 +02001/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07002 *------------------------------------------------------------------
3 * vhost.c - vhost-user
4 *
5 * Copyright (c) 2014 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#include <fcntl.h> /* for open */
21#include <sys/ioctl.h>
22#include <sys/socket.h>
23#include <sys/un.h>
24#include <sys/stat.h>
25#include <sys/types.h>
26#include <sys/uio.h> /* for iovec */
27#include <netinet/in.h>
28#include <sys/vfs.h>
29
30#include <linux/if_arp.h>
31#include <linux/if_tun.h>
32
33#include <vlib/vlib.h>
34#include <vlib/unix/unix.h>
35
36#include <vnet/ip/ip.h>
37
38#include <vnet/ethernet/ethernet.h>
Damjan Marion8bdc63b2016-11-02 14:48:21 +010039#include <vnet/devices/devices.h>
Damjan Marion22311502016-10-28 20:30:15 +020040#include <vnet/feature/feature.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070041
42#include <vnet/devices/virtio/vhost-user.h>
43
Billy McFalla92501a2016-11-23 12:45:29 -050044/**
45 * @file
46 * @brief vHost User Device Driver.
47 *
48 * This file contains the source code for vHost User interface.
49 */
50
51
Pierre Pfister116ea4b2016-11-08 15:49:28 +000052#define VHOST_DEBUG_VQ 0
Ed Warnickecb9cada2015-12-08 15:45:58 -070053
Steven388e51a2017-06-01 12:49:23 -070054#define DBG_SOCK(args...) \
55 { \
56 vhost_user_main_t *_vum = &vhost_user_main; \
57 if (_vum->debug) \
58 clib_warning(args); \
59 };
Ed Warnickecb9cada2015-12-08 15:45:58 -070060
Pierre Pfister116ea4b2016-11-08 15:49:28 +000061#if VHOST_DEBUG_VQ == 1
Ed Warnickecb9cada2015-12-08 15:45:58 -070062#define DBG_VQ(args...) clib_warning(args);
63#else
64#define DBG_VQ(args...)
65#endif
66
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +000067/*
68 * When an RX queue is down but active, received packets
69 * must be discarded. This value controls up to how many
70 * packets will be discarded during each round.
71 */
72#define VHOST_USER_DOWN_DISCARD_COUNT 256
73
74/*
75 * When the number of available buffers gets under this threshold,
76 * RX node will start discarding packets.
77 */
78#define VHOST_USER_RX_BUFFER_STARVATION 32
79
80/*
81 * On the receive side, the host should free descriptors as soon
82 * as possible in order to avoid TX drop in the VM.
83 * This value controls the number of copy operations that are stacked
84 * before copy is done for all and descriptors are given back to
85 * the guest.
86 * The value 64 was obtained by testing (48 and 128 were not as good).
87 */
88#define VHOST_USER_RX_COPY_THRESHOLD 64
Stevend7727532017-06-09 18:49:17 -070089/*
90 * On the transmit side, we keep processing the buffers from vlib in the while
91 * loop and prepare the copy order to be executed later. However, the static
92 * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
93 * entries. In order to not corrupt memory, we have to do the copy when the
94 * static array reaches the copy threshold. We subtract 40 in case the code
95 * goes into the inner loop for a maximum of 64k frames which may require
96 * more array entries.
97 */
98#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 40)
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +000099
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000100#define UNIX_GET_FD(unixfd_idx) \
101 (unixfd_idx != ~0) ? \
Damjan Marion56dd5432017-09-08 19:52:02 +0200102 pool_elt_at_index (file_main.file_pool, \
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000103 unixfd_idx)->file_descriptor : -1;
104
Pierre Pfister116ea4b2016-11-08 15:49:28 +0000105#define foreach_virtio_trace_flags \
106 _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
107 _ (SINGLE_DESC, 1, "Single descriptor packet") \
108 _ (INDIRECT, 2, "Indirect descriptor") \
109 _ (MAP_ERROR, 4, "Memory mapping error")
110
111typedef enum
112{
113#define _(n,i,s) VIRTIO_TRACE_F_##n,
114 foreach_virtio_trace_flags
115#undef _
116} virtio_trace_flag_t;
117
Ed Warnickecb9cada2015-12-08 15:45:58 -0700118vlib_node_registration_t vhost_user_input_node;
119
120#define foreach_vhost_user_tx_func_error \
Pierre Pfister328e99b2016-02-12 13:18:42 +0000121 _(NONE, "no error") \
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000122 _(NOT_READY, "vhost vring not ready") \
123 _(DOWN, "vhost interface is down") \
Ed Warnickecb9cada2015-12-08 15:45:58 -0700124 _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
Pierre Pfisterba1d0462016-07-27 16:38:20 +0100125 _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
126 _(MMAP_FAIL, "mmap failure") \
127 _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
Ed Warnickecb9cada2015-12-08 15:45:58 -0700128
Damjan Marion00a9dca2016-08-17 17:05:46 +0200129typedef enum
130{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700131#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
132 foreach_vhost_user_tx_func_error
133#undef _
Damjan Marion00a9dca2016-08-17 17:05:46 +0200134 VHOST_USER_TX_FUNC_N_ERROR,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700135} vhost_user_tx_func_error_t;
136
Damjan Marion00a9dca2016-08-17 17:05:46 +0200137static char *vhost_user_tx_func_error_strings[] = {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700138#define _(n,s) s,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200139 foreach_vhost_user_tx_func_error
Ed Warnickecb9cada2015-12-08 15:45:58 -0700140#undef _
141};
142
143#define foreach_vhost_user_input_func_error \
144 _(NO_ERROR, "no error") \
Pierre Pfister328e99b2016-02-12 13:18:42 +0000145 _(NO_BUFFER, "no available buffer") \
146 _(MMAP_FAIL, "mmap failure") \
Pierre Pfisterba1d0462016-07-27 16:38:20 +0100147 _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
148 _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
149 _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
Ed Warnickecb9cada2015-12-08 15:45:58 -0700150
Damjan Marion00a9dca2016-08-17 17:05:46 +0200151typedef enum
152{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700153#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
154 foreach_vhost_user_input_func_error
155#undef _
Damjan Marion00a9dca2016-08-17 17:05:46 +0200156 VHOST_USER_INPUT_FUNC_N_ERROR,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700157} vhost_user_input_func_error_t;
158
Damjan Marion00a9dca2016-08-17 17:05:46 +0200159static char *vhost_user_input_func_error_strings[] = {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700160#define _(n,s) s,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200161 foreach_vhost_user_input_func_error
Ed Warnickecb9cada2015-12-08 15:45:58 -0700162#undef _
163};
164
Damjan Marion00a9dca2016-08-17 17:05:46 +0200165/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700166static vhost_user_main_t vhost_user_main = {
167 .mtu_bytes = 1518,
168};
169
170VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
171 .name = "vhost-user",
172};
Damjan Marion00a9dca2016-08-17 17:05:46 +0200173/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700174
Damjan Marion00a9dca2016-08-17 17:05:46 +0200175static u8 *
176format_vhost_user_interface_name (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700177{
178 u32 i = va_arg (*args, u32);
179 u32 show_dev_instance = ~0;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200180 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700181
182 if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
183 show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
184
185 if (show_dev_instance != ~0)
186 i = show_dev_instance;
187
188 s = format (s, "VirtualEthernet0/0/%d", i);
189 return s;
190}
191
Damjan Marion00a9dca2016-08-17 17:05:46 +0200192static int
193vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700194{
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000195 // FIXME: check if the new dev instance is already used
Damjan Marion00a9dca2016-08-17 17:05:46 +0200196 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700197 vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200198 hi->dev_instance, ~0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700199
Damjan Marion00a9dca2016-08-17 17:05:46 +0200200 vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
Ed Warnickecb9cada2015-12-08 15:45:58 -0700201 new_dev_instance;
202
Damjan Marion00a9dca2016-08-17 17:05:46 +0200203 DBG_SOCK ("renumbered vhost-user interface dev_instance %d to %d",
204 hi->dev_instance, new_dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700205
206 return 0;
207}
208
Pierre Pfister11f92052016-09-21 08:08:55 +0100209static_always_inline void *
210map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700211{
Pierre Pfister11f92052016-09-21 08:08:55 +0100212 int i = *hint;
213 if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
214 ((vui->regions[i].guest_phys_addr +
215 vui->regions[i].memory_size) > addr)))
216 {
217 return (void *) (vui->region_mmap_addr[i] + addr -
218 vui->regions[i].guest_phys_addr);
219 }
Damjan Marion37623702016-09-20 11:25:27 +0200220#if __SSE4_2__
221 __m128i rl, rh, al, ah, r;
222 al = _mm_set1_epi64x (addr + 1);
223 ah = _mm_set1_epi64x (addr);
224
225 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
226 rl = _mm_cmpgt_epi64 (al, rl);
227 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
228 rh = _mm_cmpgt_epi64 (rh, ah);
229 r = _mm_and_si128 (rl, rh);
230
231 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
232 rl = _mm_cmpgt_epi64 (al, rl);
233 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
234 rh = _mm_cmpgt_epi64 (rh, ah);
235 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
236
237 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
238 rl = _mm_cmpgt_epi64 (al, rl);
239 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
240 rh = _mm_cmpgt_epi64 (rh, ah);
241 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
242
243 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
244 rl = _mm_cmpgt_epi64 (al, rl);
245 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
246 rh = _mm_cmpgt_epi64 (rh, ah);
247 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
248
249 r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
Damjan Marion0b49e2b2017-02-09 21:49:06 +0100250 i = __builtin_ctzll (_mm_movemask_epi8 (r) |
251 (1 << VHOST_MEMORY_MAX_NREGIONS));
Damjan Marion37623702016-09-20 11:25:27 +0200252
253 if (i < vui->nregions)
254 {
Pierre Pfister11f92052016-09-21 08:08:55 +0100255 *hint = i;
Damjan Marion37623702016-09-20 11:25:27 +0200256 return (void *) (vui->region_mmap_addr[i] + addr -
257 vui->regions[i].guest_phys_addr);
258 }
259
260#else
Damjan Marion00a9dca2016-08-17 17:05:46 +0200261 for (i = 0; i < vui->nregions; i++)
262 {
263 if ((vui->regions[i].guest_phys_addr <= addr) &&
264 ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
265 addr))
266 {
Pierre Pfister11f92052016-09-21 08:08:55 +0100267 *hint = i;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200268 return (void *) (vui->region_mmap_addr[i] + addr -
269 vui->regions[i].guest_phys_addr);
270 }
271 }
Damjan Marion37623702016-09-20 11:25:27 +0200272#endif
Damjan Marion00a9dca2016-08-17 17:05:46 +0200273 DBG_VQ ("failed to map guest mem addr %llx", addr);
Pierre Pfister11f92052016-09-21 08:08:55 +0100274 *hint = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700275 return 0;
276}
277
Damjan Marion00a9dca2016-08-17 17:05:46 +0200278static inline void *
279map_user_mem (vhost_user_intf_t * vui, uword addr)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700280{
281 int i;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200282 for (i = 0; i < vui->nregions; i++)
283 {
284 if ((vui->regions[i].userspace_addr <= addr) &&
285 ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
286 addr))
287 {
288 return (void *) (vui->region_mmap_addr[i] + addr -
289 vui->regions[i].userspace_addr);
290 }
291 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700292 return 0;
293}
294
Damjan Marion00a9dca2016-08-17 17:05:46 +0200295static long
296get_huge_page_size (int fd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700297{
298 struct statfs s;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200299 fstatfs (fd, &s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700300 return s.f_bsize;
301}
302
Damjan Marion00a9dca2016-08-17 17:05:46 +0200303static void
304unmap_all_mem_regions (vhost_user_intf_t * vui)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700305{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200306 int i, r;
307 for (i = 0; i < vui->nregions; i++)
308 {
309 if (vui->region_mmap_addr[i] != (void *) -1)
310 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700311
Damjan Marion00a9dca2016-08-17 17:05:46 +0200312 long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700313
Damjan Marion00a9dca2016-08-17 17:05:46 +0200314 ssize_t map_sz = (vui->regions[i].memory_size +
315 vui->regions[i].mmap_offset +
Pierre Pfisterbed54892017-04-20 15:34:00 +0200316 page_sz - 1) & ~(page_sz - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700317
Damjan Marion00a9dca2016-08-17 17:05:46 +0200318 r =
319 munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
320 map_sz);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700321
Damjan Marion00a9dca2016-08-17 17:05:46 +0200322 DBG_SOCK
323 ("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i,
324 vui->region_mmap_addr[i], map_sz, page_sz);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700325
Damjan Marion00a9dca2016-08-17 17:05:46 +0200326 vui->region_mmap_addr[i] = (void *) -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700327
Damjan Marion00a9dca2016-08-17 17:05:46 +0200328 if (r == -1)
329 {
330 clib_warning ("failed to unmap memory region (errno %d)",
331 errno);
332 }
333 close (vui->region_mmap_fd[i]);
334 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700335 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700336 vui->nregions = 0;
337}
338
Pierre Pfistere21c5282016-09-21 08:04:59 +0100339static void
340vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
341{
342 //Let's try to assign one queue to each thread
343 u32 qid = 0;
Damjan Marion586afd72017-04-05 19:18:20 +0200344 u32 thread_index = 0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100345 vui->use_tx_spinlock = 0;
346 while (1)
347 {
348 for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
349 {
350 vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
351 if (!rxvq->started || !rxvq->enabled)
352 continue;
353
Damjan Marion586afd72017-04-05 19:18:20 +0200354 vui->per_cpu_tx_qid[thread_index] = qid;
355 thread_index++;
356 if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100357 return;
358 }
359 //We need to loop, meaning the spinlock has to be used
360 vui->use_tx_spinlock = 1;
Damjan Marion586afd72017-04-05 19:18:20 +0200361 if (thread_index == 0)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100362 {
363 //Could not find a single valid one
Damjan Marion586afd72017-04-05 19:18:20 +0200364 for (thread_index = 0;
365 thread_index < vlib_get_thread_main ()->n_vlib_mains;
366 thread_index++)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100367 {
Damjan Marion586afd72017-04-05 19:18:20 +0200368 vui->per_cpu_tx_qid[thread_index] = 0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100369 }
370 return;
371 }
372 }
373}
374
Stevenf3b53642017-05-01 14:03:02 -0700375/**
376 * @brief Unassign existing interface/queue to thread mappings and re-assign
377 * new interface/queue to thread mappings
378 */
Pierre Pfistere21c5282016-09-21 08:04:59 +0100379static void
380vhost_user_rx_thread_placement ()
381{
382 vhost_user_main_t *vum = &vhost_user_main;
383 vhost_user_intf_t *vui;
Stevenf3b53642017-05-01 14:03:02 -0700384 vhost_user_vring_t *txvq;
385 vnet_main_t *vnm = vnet_get_main ();
386 u32 qid;
387 int rv;
388 u16 *queue;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100389
Stevenf3b53642017-05-01 14:03:02 -0700390 // Scrap all existing mappings for all interfaces/queues
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000391 /* *INDENT-OFF* */
392 pool_foreach (vui, vum->vhost_user_interfaces, {
Stevenf3b53642017-05-01 14:03:02 -0700393 vec_foreach (queue, vui->rx_queues)
394 {
395 rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index,
396 *queue);
397 if (rv)
398 clib_warning ("Warning: unable to unassign interface %d, "
399 "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
400 }
401 vec_reset_length (vui->rx_queues);
402 });
403 /* *INDENT-ON* */
404
405 // Create the rx_queues for all interfaces
406 /* *INDENT-OFF* */
407 pool_foreach (vui, vum->vhost_user_interfaces, {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000408 for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
409 {
Stevenf3b53642017-05-01 14:03:02 -0700410 txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
411 if (txvq->started)
412 {
413 if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN)
414 /* Set polling as the default */
415 txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
416 vec_add1 (vui->rx_queues, qid);
417 }
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000418 }
419 });
420 /* *INDENT-ON* */
Steven7312cc72017-03-15 21:18:55 -0700421
Stevenf3b53642017-05-01 14:03:02 -0700422 // Assign new mappings for all interfaces/queues
423 /* *INDENT-OFF* */
424 pool_foreach (vui, vum->vhost_user_interfaces, {
425 vnet_hw_interface_set_input_node (vnm, vui->hw_if_index,
426 vhost_user_input_node.index);
427 vec_foreach (queue, vui->rx_queues)
Steven7312cc72017-03-15 21:18:55 -0700428 {
Stevenf3b53642017-05-01 14:03:02 -0700429 vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue,
430 ~0);
431 txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
432 rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue,
433 txvq->mode);
434 if (rv)
435 clib_warning ("Warning: unable to set rx mode for interface %d, "
436 "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
Steven7312cc72017-03-15 21:18:55 -0700437 }
Stevenf3b53642017-05-01 14:03:02 -0700438 });
439 /* *INDENT-ON* */
Pierre Pfistere21c5282016-09-21 08:04:59 +0100440}
441
442/** @brief Returns whether at least one TX and one RX vring are enabled */
443int
444vhost_user_intf_ready (vhost_user_intf_t * vui)
445{
446 int i, found[2] = { }; //RX + TX
447
448 for (i = 0; i < VHOST_VRING_MAX_N; i++)
449 if (vui->vrings[i].started && vui->vrings[i].enabled)
450 found[i & 1] = 1;
451
452 return found[0] && found[1];
453}
454
455static void
456vhost_user_update_iface_state (vhost_user_intf_t * vui)
457{
458 /* if we have pointers to descriptor table, go up */
459 int is_up = vhost_user_intf_ready (vui);
460 if (is_up != vui->is_up)
461 {
462 DBG_SOCK ("interface %d %s", vui->sw_if_index,
463 is_up ? "ready" : "down");
464 vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index,
465 is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP :
466 0);
467 vui->is_up = is_up;
468 }
469 vhost_user_rx_thread_placement ();
470 vhost_user_tx_thread_placement (vui);
471}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700472
Steven7312cc72017-03-15 21:18:55 -0700473static void
474vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
475{
Stevenf3b53642017-05-01 14:03:02 -0700476 u32 qid;
477 vnet_main_t *vnm = vnet_get_main ();
Stevene4dcba82017-04-04 16:56:54 -0700478
479 qid = ifq & 0xff;
Stevenf3b53642017-05-01 14:03:02 -0700480 if ((qid & 1) == 0)
481 /* Only care about the odd number, or TX, virtqueue */
Stevene4dcba82017-04-04 16:56:54 -0700482 return;
Steven7312cc72017-03-15 21:18:55 -0700483
484 if (vhost_user_intf_ready (vui))
Stevenf3b53642017-05-01 14:03:02 -0700485 // qid >> 1 is to convert virtqueue number to vring queue index
486 vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1);
Steven7312cc72017-03-15 21:18:55 -0700487}
488
Damjan Marion00a9dca2016-08-17 17:05:46 +0200489static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200490vhost_user_callfd_read_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700491{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200492 __attribute__ ((unused)) int n;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700493 u8 buff[8];
Steven7312cc72017-03-15 21:18:55 -0700494
Damjan Marion00a9dca2016-08-17 17:05:46 +0200495 n = read (uf->file_descriptor, ((char *) &buff), 8);
Steven7312cc72017-03-15 21:18:55 -0700496
Ed Warnickecb9cada2015-12-08 15:45:58 -0700497 return 0;
498}
499
Pierre Pfistere21c5282016-09-21 08:04:59 +0100500static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200501vhost_user_kickfd_read_ready (clib_file_t * uf)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100502{
503 __attribute__ ((unused)) int n;
504 u8 buff[8];
505 vhost_user_intf_t *vui =
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000506 pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
507 uf->private_data >> 8);
Pierre Pfistere21c5282016-09-21 08:04:59 +0100508 u32 qid = uf->private_data & 0xff;
Steven7312cc72017-03-15 21:18:55 -0700509
Pierre Pfistere21c5282016-09-21 08:04:59 +0100510 n = read (uf->file_descriptor, ((char *) &buff), 8);
511 DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid);
Steven7312cc72017-03-15 21:18:55 -0700512 if (!vui->vrings[qid].started ||
513 (vhost_user_intf_ready (vui) != vui->is_up))
514 {
Stevene4dcba82017-04-04 16:56:54 -0700515 vlib_worker_thread_barrier_sync (vlib_get_main ());
Steven7312cc72017-03-15 21:18:55 -0700516 vui->vrings[qid].started = 1;
517 vhost_user_update_iface_state (vui);
Stevene4dcba82017-04-04 16:56:54 -0700518 vlib_worker_thread_barrier_release (vlib_get_main ());
Steven7312cc72017-03-15 21:18:55 -0700519 }
Steven7312cc72017-03-15 21:18:55 -0700520
521 vhost_user_set_interrupt_pending (vui, uf->private_data);
Pierre Pfistere21c5282016-09-21 08:04:59 +0100522 return 0;
523}
524
525/**
526 * @brief Try once to lock the vring
527 * @return 0 on success, non-zero on failure.
528 */
529static inline int
530vhost_user_vring_try_lock (vhost_user_intf_t * vui, u32 qid)
531{
532 return __sync_lock_test_and_set (vui->vring_locks[qid], 1);
533}
534
535/**
536 * @brief Spin until the vring is successfully locked
537 */
538static inline void
539vhost_user_vring_lock (vhost_user_intf_t * vui, u32 qid)
540{
541 while (vhost_user_vring_try_lock (vui, qid))
542 ;
543}
544
545/**
546 * @brief Unlock the vring lock
547 */
548static inline void
549vhost_user_vring_unlock (vhost_user_intf_t * vui, u32 qid)
550{
551 *vui->vring_locks[qid] = 0;
552}
553
554static inline void
555vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
556{
557 vhost_user_vring_t *vring = &vui->vrings[qid];
558 memset (vring, 0, sizeof (*vring));
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000559 vring->kickfd_idx = ~0;
560 vring->callfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100561 vring->errfd = -1;
562
563 /*
564 * We have a bug with some qemu 2.5, and this may be a fix.
565 * Feel like interpretation holy text, but this is from vhost-user.txt.
566 * "
567 * One queue pair is enabled initially. More queues are enabled
568 * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE.
569 * "
570 * Don't know who's right, but this is what DPDK does.
571 */
572 if (qid == 0 || qid == 1)
573 vring->enabled = 1;
574}
575
576static inline void
577vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
578{
579 vhost_user_vring_t *vring = &vui->vrings[qid];
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000580 if (vring->kickfd_idx != ~0)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100581 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200582 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfistere21c5282016-09-21 08:04:59 +0100583 vring->kickfd_idx);
Damjan Marion56dd5432017-09-08 19:52:02 +0200584 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000585 vring->kickfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100586 }
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000587 if (vring->callfd_idx != ~0)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100588 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200589 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfistere21c5282016-09-21 08:04:59 +0100590 vring->callfd_idx);
Damjan Marion56dd5432017-09-08 19:52:02 +0200591 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000592 vring->callfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100593 }
594 if (vring->errfd != -1)
Stevenf6dae052017-03-09 23:49:32 -0800595 {
596 close (vring->errfd);
597 vring->errfd = -1;
598 }
Pierre Pfistere21c5282016-09-21 08:04:59 +0100599 vhost_user_vring_init (vui, qid);
600}
601
Damjan Marion00a9dca2016-08-17 17:05:46 +0200602static inline void
603vhost_user_if_disconnect (vhost_user_intf_t * vui)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700604{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200605 vnet_main_t *vnm = vnet_get_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700606 int q;
607
Damjan Marion00a9dca2016-08-17 17:05:46 +0200608 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700609
Damjan Marion56dd5432017-09-08 19:52:02 +0200610 if (vui->clib_file_index != ~0)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200611 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200612 clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
613 vui->clib_file_index = ~0;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200614 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700615
Ed Warnickecb9cada2015-12-08 15:45:58 -0700616 vui->is_up = 0;
Steve Shin44489572016-09-22 12:08:55 -0700617
Pierre Pfistere21c5282016-09-21 08:04:59 +0100618 for (q = 0; q < VHOST_VRING_MAX_N; q++)
619 vhost_user_vring_close (vui, q);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700620
Damjan Marion00a9dca2016-08-17 17:05:46 +0200621 unmap_all_mem_regions (vui);
622 DBG_SOCK ("interface ifindex %d disconnected", vui->sw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700623}
624
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100625#define VHOST_LOG_PAGE 0x1000
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000626static_always_inline void
627vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
628 u64 addr, u64 len, u8 is_host_address)
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100629{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200630 if (PREDICT_TRUE (vui->log_base_addr == 0
631 || !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL))))
632 {
633 return;
634 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000635 if (is_host_address)
636 {
Damjan Marion7bee80c2017-04-26 15:32:12 +0200637 addr = pointer_to_uword (map_user_mem (vui, (uword) addr));
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000638 }
Damjan Marion00a9dca2016-08-17 17:05:46 +0200639 if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
640 {
641 DBG_SOCK ("vhost_user_log_dirty_pages(): out of range\n");
642 return;
643 }
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100644
Damjan Marion00a9dca2016-08-17 17:05:46 +0200645 CLIB_MEMORY_BARRIER ();
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100646 u64 page = addr / VHOST_LOG_PAGE;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200647 while (page * VHOST_LOG_PAGE < addr + len)
648 {
649 ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
650 page++;
651 }
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100652}
653
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000654static_always_inline void
655vhost_user_log_dirty_pages (vhost_user_intf_t * vui, u64 addr, u64 len)
656{
657 vhost_user_log_dirty_pages_2 (vui, addr, len, 0);
658}
659
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100660#define vhost_user_log_dirty_ring(vui, vq, member) \
Yoann Desmouceauxfe2da0e2016-03-08 14:54:28 +0100661 if (PREDICT_FALSE(vq->log_used)) { \
Damjan Marion8d281b32016-08-24 14:32:39 +0200662 vhost_user_log_dirty_pages(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \
Yoann Desmouceauxfe2da0e2016-03-08 14:54:28 +0100663 sizeof(vq->used->member)); \
664 }
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100665
Damjan Marion00a9dca2016-08-17 17:05:46 +0200666static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200667vhost_user_socket_read (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700668{
669 int n, i;
670 int fd, number_of_fds = 0;
671 int fds[VHOST_MEMORY_MAX_NREGIONS];
672 vhost_user_msg_t msg;
673 struct msghdr mh;
674 struct iovec iov[1];
Damjan Marion00a9dca2016-08-17 17:05:46 +0200675 vhost_user_main_t *vum = &vhost_user_main;
676 vhost_user_intf_t *vui;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700677 struct cmsghdr *cmsg;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700678 u8 q;
Damjan Marion56dd5432017-09-08 19:52:02 +0200679 clib_file_t template = { 0 };
Damjan Marion00a9dca2016-08-17 17:05:46 +0200680 vnet_main_t *vnm = vnet_get_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700681
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000682 vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700683
Damjan Marion00a9dca2016-08-17 17:05:46 +0200684 char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700685
Damjan Marion00a9dca2016-08-17 17:05:46 +0200686 memset (&mh, 0, sizeof (mh));
687 memset (control, 0, sizeof (control));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700688
Damjan Marion00a9dca2016-08-17 17:05:46 +0200689 for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
Damjan Mariona290d7c2016-08-16 12:37:24 +0200690 fds[i] = -1;
691
Ed Warnickecb9cada2015-12-08 15:45:58 -0700692 /* set the payload */
693 iov[0].iov_base = (void *) &msg;
694 iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
695
696 mh.msg_iov = iov;
697 mh.msg_iovlen = 1;
698 mh.msg_control = control;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200699 mh.msg_controllen = sizeof (control);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700700
Damjan Marion00a9dca2016-08-17 17:05:46 +0200701 n = recvmsg (uf->file_descriptor, &mh, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700702
Pierre Pfistere21c5282016-09-21 08:04:59 +0100703 /* Stop workers to avoid end of the world */
704 vlib_worker_thread_barrier_sync (vlib_get_main ());
705
Ed Warnickecb9cada2015-12-08 15:45:58 -0700706 if (n != VHOST_USER_MSG_HDR_SZ)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100707 {
708 if (n == -1)
709 {
710 DBG_SOCK ("recvmsg returned error %d %s", errno, strerror (errno));
711 }
712 else
713 {
714 DBG_SOCK ("n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
715 n, VHOST_USER_MSG_HDR_SZ);
716 }
717 goto close_socket;
718 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700719
Damjan Marion00a9dca2016-08-17 17:05:46 +0200720 if (mh.msg_flags & MSG_CTRUNC)
721 {
Pierre Pfistere21c5282016-09-21 08:04:59 +0100722 DBG_SOCK ("MSG_CTRUNC is set");
Damjan Marion00a9dca2016-08-17 17:05:46 +0200723 goto close_socket;
724 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700725
Damjan Marion00a9dca2016-08-17 17:05:46 +0200726 cmsg = CMSG_FIRSTHDR (&mh);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700727
728 if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
729 (cmsg->cmsg_type == SCM_RIGHTS) &&
Damjan Marion00a9dca2016-08-17 17:05:46 +0200730 (cmsg->cmsg_len - CMSG_LEN (0) <=
731 VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
732 {
733 number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
734 clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
735 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700736
Damjan Marion00a9dca2016-08-17 17:05:46 +0200737 /* version 1, no reply bit set */
738 if ((msg.flags & 7) != 1)
739 {
740 DBG_SOCK ("malformed message received. closing socket");
741 goto close_socket;
742 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700743
744 {
Pierre Pfistere21c5282016-09-21 08:04:59 +0100745 int rv;
746 rv =
747 read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ,
748 msg.size);
749 if (rv < 0)
750 {
751 DBG_SOCK ("read failed %s", strerror (errno));
752 goto close_socket;
753 }
754 else if (rv != msg.size)
755 {
756 DBG_SOCK ("message too short (read %dB should be %dB)", rv, msg.size);
757 goto close_socket;
758 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700759 }
760
Damjan Marion00a9dca2016-08-17 17:05:46 +0200761 switch (msg.request)
762 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700763 case VHOST_USER_GET_FEATURES:
Ed Warnickecb9cada2015-12-08 15:45:58 -0700764 msg.flags |= 4;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100765 msg.u64 = (1ULL << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
766 (1ULL << FEAT_VIRTIO_NET_F_CTRL_VQ) |
767 (1ULL << FEAT_VIRTIO_F_ANY_LAYOUT) |
768 (1ULL << FEAT_VIRTIO_F_INDIRECT_DESC) |
769 (1ULL << FEAT_VHOST_F_LOG_ALL) |
770 (1ULL << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
771 (1ULL << FEAT_VIRTIO_NET_F_MQ) |
772 (1ULL << FEAT_VHOST_USER_F_PROTOCOL_FEATURES) |
773 (1ULL << FEAT_VIRTIO_F_VERSION_1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700774 msg.u64 &= vui->feature_mask;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200775 msg.size = sizeof (msg.u64);
Pierre Pfistere21c5282016-09-21 08:04:59 +0100776 DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES - reply 0x%016llx",
777 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700778 break;
779
780 case VHOST_USER_SET_FEATURES:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200781 DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx",
782 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700783
784 vui->features = msg.u64;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100785
Pierre Pfistere21c5282016-09-21 08:04:59 +0100786 if (vui->features &
787 ((1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
788 (1ULL << FEAT_VIRTIO_F_VERSION_1)))
Damjan Marion00a9dca2016-08-17 17:05:46 +0200789 vui->virtio_net_hdr_sz = 12;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700790 else
Damjan Marion00a9dca2016-08-17 17:05:46 +0200791 vui->virtio_net_hdr_sz = 10;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700792
Damjan Marion00a9dca2016-08-17 17:05:46 +0200793 vui->is_any_layout =
794 (vui->features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700795
796 ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200797 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700798 vui->is_up = 0;
799
Pierre Pfistere21c5282016-09-21 08:04:59 +0100800 /*for (q = 0; q < VHOST_VRING_MAX_N; q++)
801 vhost_user_vring_close(&vui->vrings[q]); */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700802
803 break;
804
805 case VHOST_USER_SET_MEM_TABLE:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200806 DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
807 vui->hw_if_index, msg.memory.nregions);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700808
809 if ((msg.memory.nregions < 1) ||
Damjan Marion00a9dca2016-08-17 17:05:46 +0200810 (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
811 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700812
Damjan Marion00a9dca2016-08-17 17:05:46 +0200813 DBG_SOCK ("number of mem regions must be between 1 and %i",
814 VHOST_MEMORY_MAX_NREGIONS);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700815
Damjan Marion00a9dca2016-08-17 17:05:46 +0200816 goto close_socket;
817 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700818
Damjan Marion00a9dca2016-08-17 17:05:46 +0200819 if (msg.memory.nregions != number_of_fds)
820 {
821 DBG_SOCK ("each memory region must have FD");
822 goto close_socket;
823 }
824 unmap_all_mem_regions (vui);
825 for (i = 0; i < msg.memory.nregions; i++)
826 {
827 clib_memcpy (&(vui->regions[i]), &msg.memory.regions[i],
828 sizeof (vhost_user_memory_region_t));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700829
Damjan Marion00a9dca2016-08-17 17:05:46 +0200830 long page_sz = get_huge_page_size (fds[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700831
Damjan Marion00a9dca2016-08-17 17:05:46 +0200832 /* align size to 2M page */
833 ssize_t map_sz = (vui->regions[i].memory_size +
834 vui->regions[i].mmap_offset +
Pierre Pfisterbed54892017-04-20 15:34:00 +0200835 page_sz - 1) & ~(page_sz - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700836
Damjan Marion00a9dca2016-08-17 17:05:46 +0200837 vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
838 MAP_SHARED, fds[i], 0);
Damjan Marion37623702016-09-20 11:25:27 +0200839 vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
840 vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
841 vui->regions[i].memory_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700842
Damjan Marion00a9dca2016-08-17 17:05:46 +0200843 DBG_SOCK
844 ("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx "
845 "page_sz 0x%x", i, map_sz, fds[i], vui->region_mmap_addr[i],
846 page_sz);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700847
Damjan Marion00a9dca2016-08-17 17:05:46 +0200848 if (vui->region_mmap_addr[i] == MAP_FAILED)
849 {
850 clib_warning ("failed to map memory. errno is %d", errno);
851 goto close_socket;
852 }
853 vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
854 vui->region_mmap_fd[i] = fds[i];
855 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700856 vui->nregions = msg.memory.nregions;
857 break;
858
859 case VHOST_USER_SET_VRING_NUM:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200860 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
861 vui->hw_if_index, msg.state.index, msg.state.num);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700862
Damjan Marion00a9dca2016-08-17 17:05:46 +0200863 if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
864 (msg.state.num == 0) || /* it cannot be zero */
Pierre Pfistere21c5282016-09-21 08:04:59 +0100865 ((msg.state.num - 1) & msg.state.num)) /* must be power of 2 */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200866 goto close_socket;
Steven97878892017-08-29 09:23:26 -0700867 vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700868 break;
869
870 case VHOST_USER_SET_VRING_ADDR:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200871 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
872 vui->hw_if_index, msg.state.index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700873
Pierre Pfistere21c5282016-09-21 08:04:59 +0100874 if (msg.state.index >= VHOST_VRING_MAX_N)
875 {
876 DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ADDR:"
877 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
878 goto close_socket;
879 }
880
881 if (msg.size < sizeof (msg.addr))
882 {
883 DBG_SOCK ("vhost message is too short (%d < %d)",
884 msg.size, sizeof (msg.addr));
885 goto close_socket;
886 }
887
Damjan Marion00a9dca2016-08-17 17:05:46 +0200888 vui->vrings[msg.state.index].desc = (vring_desc_t *)
889 map_user_mem (vui, msg.addr.desc_user_addr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700890 vui->vrings[msg.state.index].used = (vring_used_t *)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200891 map_user_mem (vui, msg.addr.used_user_addr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700892 vui->vrings[msg.state.index].avail = (vring_avail_t *)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200893 map_user_mem (vui, msg.addr.avail_user_addr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700894
895 if ((vui->vrings[msg.state.index].desc == NULL) ||
Damjan Marion00a9dca2016-08-17 17:05:46 +0200896 (vui->vrings[msg.state.index].used == NULL) ||
897 (vui->vrings[msg.state.index].avail == NULL))
898 {
899 DBG_SOCK ("failed to map user memory for hw_if_index %d",
900 vui->hw_if_index);
901 goto close_socket;
902 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700903
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100904 vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
Yoann Desmouceauxfe2da0e2016-03-08 14:54:28 +0100905 vui->vrings[msg.state.index].log_used =
Damjan Marion00a9dca2016-08-17 17:05:46 +0200906 (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100907
908 /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200909 the ring is initialized in an enabled state. */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200910 if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES)))
911 {
912 vui->vrings[msg.state.index].enabled = 1;
913 }
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100914
Ed Warnickecb9cada2015-12-08 15:45:58 -0700915 vui->vrings[msg.state.index].last_used_idx =
Damjan Marion10eb1ea2016-10-13 10:02:19 +0200916 vui->vrings[msg.state.index].last_avail_idx =
Damjan Marion00a9dca2016-08-17 17:05:46 +0200917 vui->vrings[msg.state.index].used->idx;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700918
Stevenf3b53642017-05-01 14:03:02 -0700919 /* tell driver that we don't want interrupts */
920 vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700921 break;
922
923 case VHOST_USER_SET_OWNER:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200924 DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700925 break;
926
927 case VHOST_USER_RESET_OWNER:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200928 DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700929 break;
930
931 case VHOST_USER_SET_VRING_CALL:
Steven388e51a2017-06-01 12:49:23 -0700932 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL %d",
Damjan Marion00a9dca2016-08-17 17:05:46 +0200933 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700934
935 q = (u8) (msg.u64 & 0xFF);
936
Pierre Pfistere21c5282016-09-21 08:04:59 +0100937 /* if there is old fd, delete and close it */
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000938 if (vui->vrings[q].callfd_idx != ~0)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100939 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200940 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfistere21c5282016-09-21 08:04:59 +0100941 vui->vrings[q].callfd_idx);
Damjan Marion56dd5432017-09-08 19:52:02 +0200942 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000943 vui->vrings[q].callfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100944 }
945
Steven49a04b92017-07-29 08:56:08 -0700946 if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
Damjan Marion00a9dca2016-08-17 17:05:46 +0200947 {
948 if (number_of_fds != 1)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200949 {
Pierre Pfistere21c5282016-09-21 08:04:59 +0100950 DBG_SOCK ("More than one fd received !");
951 goto close_socket;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200952 }
Pierre Pfistere21c5282016-09-21 08:04:59 +0100953
Damjan Marion00a9dca2016-08-17 17:05:46 +0200954 template.read_function = vhost_user_callfd_read_ready;
955 template.file_descriptor = fds[0];
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000956 template.private_data =
957 ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
Damjan Marion56dd5432017-09-08 19:52:02 +0200958 vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200959 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700960 else
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000961 vui->vrings[q].callfd_idx = ~0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700962 break;
963
964 case VHOST_USER_SET_VRING_KICK:
Steven388e51a2017-06-01 12:49:23 -0700965 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK %d",
Damjan Marion00a9dca2016-08-17 17:05:46 +0200966 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700967
968 q = (u8) (msg.u64 & 0xFF);
969
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000970 if (vui->vrings[q].kickfd_idx != ~0)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100971 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200972 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000973 vui->vrings[q].kickfd_idx);
Damjan Marion56dd5432017-09-08 19:52:02 +0200974 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000975 vui->vrings[q].kickfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100976 }
977
Steven49a04b92017-07-29 08:56:08 -0700978 if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
Damjan Marion00a9dca2016-08-17 17:05:46 +0200979 {
980 if (number_of_fds != 1)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100981 {
982 DBG_SOCK ("More than one fd received !");
983 goto close_socket;
984 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700985
Pierre Pfistere21c5282016-09-21 08:04:59 +0100986 template.read_function = vhost_user_kickfd_read_ready;
987 template.file_descriptor = fds[0];
988 template.private_data =
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000989 (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
990 q;
Damjan Marion56dd5432017-09-08 19:52:02 +0200991 vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200992 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700993 else
Pierre Pfistere21c5282016-09-21 08:04:59 +0100994 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000995 //When no kickfd is set, the queue is initialized as started
996 vui->vrings[q].kickfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100997 vui->vrings[q].started = 1;
998 }
999
Ed Warnickecb9cada2015-12-08 15:45:58 -07001000 break;
1001
1002 case VHOST_USER_SET_VRING_ERR:
Steven388e51a2017-06-01 12:49:23 -07001003 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR %d",
Damjan Marion00a9dca2016-08-17 17:05:46 +02001004 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001005
1006 q = (u8) (msg.u64 & 0xFF);
1007
Pierre Pfistere21c5282016-09-21 08:04:59 +01001008 if (vui->vrings[q].errfd != -1)
1009 close (vui->vrings[q].errfd);
1010
Steven49a04b92017-07-29 08:56:08 -07001011 if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001012 {
1013 if (number_of_fds != 1)
1014 goto close_socket;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001015
Pierre Pfistere21c5282016-09-21 08:04:59 +01001016 vui->vrings[q].errfd = fds[0];
Damjan Marion00a9dca2016-08-17 17:05:46 +02001017 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001018 else
Pierre Pfistere21c5282016-09-21 08:04:59 +01001019 vui->vrings[q].errfd = -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001020
Ed Warnickecb9cada2015-12-08 15:45:58 -07001021 break;
1022
1023 case VHOST_USER_SET_VRING_BASE:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001024 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
1025 vui->hw_if_index, msg.state.index, msg.state.num);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001026
1027 vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
1028 break;
1029
1030 case VHOST_USER_GET_VRING_BASE:
Pierre Pfistere21c5282016-09-21 08:04:59 +01001031 if (msg.state.index >= VHOST_VRING_MAX_N)
1032 {
1033 DBG_SOCK ("invalid vring index VHOST_USER_GET_VRING_BASE:"
1034 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
1035 goto close_socket;
1036 }
1037
Stevenf6dae052017-03-09 23:49:32 -08001038 /*
1039 * Copy last_avail_idx from the vring before closing it because
1040 * closing the vring also initializes the vring last_avail_idx
1041 */
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001042 msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001043 msg.flags |= 4;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001044 msg.size = sizeof (msg.state);
Stevenf6dae052017-03-09 23:49:32 -08001045
1046 /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */
1047 vhost_user_vring_close (vui, msg.state.index);
Steven388e51a2017-06-01 12:49:23 -07001048 DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
1049 vui->hw_if_index, msg.state.index, msg.state.num);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001050 break;
1051
1052 case VHOST_USER_NONE:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001053 DBG_SOCK ("if %d msg VHOST_USER_NONE", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001054
1055 break;
1056
1057 case VHOST_USER_SET_LOG_BASE:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001058 {
1059 DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001060
Damjan Marion00a9dca2016-08-17 17:05:46 +02001061 if (msg.size != sizeof (msg.log))
1062 {
1063 DBG_SOCK
1064 ("invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d",
1065 msg.size, sizeof (msg.log));
1066 goto close_socket;
1067 }
1068
1069 if (!
1070 (vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD)))
1071 {
1072 DBG_SOCK
1073 ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
1074 goto close_socket;
1075 }
1076
1077 fd = fds[0];
1078 /* align size to 2M page */
1079 long page_sz = get_huge_page_size (fd);
1080 ssize_t map_sz =
Pierre Pfisterbed54892017-04-20 15:34:00 +02001081 (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001082
1083 vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
1084 MAP_SHARED, fd, 0);
1085
1086 DBG_SOCK
1087 ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx",
1088 map_sz, msg.log.offset, fd, vui->log_base_addr);
1089
1090 if (vui->log_base_addr == MAP_FAILED)
1091 {
1092 clib_warning ("failed to map memory. errno is %d", errno);
1093 goto close_socket;
1094 }
1095
1096 vui->log_base_addr += msg.log.offset;
1097 vui->log_size = msg.log.size;
1098
1099 msg.flags |= 4;
1100 msg.size = sizeof (msg.u64);
1101
1102 break;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001103 }
1104
Ed Warnickecb9cada2015-12-08 15:45:58 -07001105 case VHOST_USER_SET_LOG_FD:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001106 DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001107
1108 break;
1109
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001110 case VHOST_USER_GET_PROTOCOL_FEATURES:
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001111 msg.flags |= 4;
Pierre Pfistere21c5282016-09-21 08:04:59 +01001112 msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |
1113 (1 << VHOST_USER_PROTOCOL_F_MQ);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001114 msg.size = sizeof (msg.u64);
Steven388e51a2017-06-01 12:49:23 -07001115 DBG_SOCK
1116 ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - reply 0x%016llx",
1117 vui->hw_if_index, msg.u64);
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001118 break;
1119
1120 case VHOST_USER_SET_PROTOCOL_FEATURES:
Steven388e51a2017-06-01 12:49:23 -07001121 DBG_SOCK
1122 ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%016llx",
1123 vui->hw_if_index, msg.u64);
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001124
1125 vui->protocol_features = msg.u64;
1126
1127 break;
1128
Pierre Pfistere21c5282016-09-21 08:04:59 +01001129 case VHOST_USER_GET_QUEUE_NUM:
Pierre Pfistere21c5282016-09-21 08:04:59 +01001130 msg.flags |= 4;
1131 msg.u64 = VHOST_VRING_MAX_N;
1132 msg.size = sizeof (msg.u64);
Steven388e51a2017-06-01 12:49:23 -07001133 DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d",
1134 vui->hw_if_index, msg.u64);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001135 break;
1136
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001137 case VHOST_USER_SET_VRING_ENABLE:
Pierre Pfistere21c5282016-09-21 08:04:59 +01001138 DBG_SOCK ("if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
1139 vui->hw_if_index, msg.state.num ? "enable" : "disable",
1140 msg.state.index);
1141 if (msg.state.index >= VHOST_VRING_MAX_N)
1142 {
1143 DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ENABLE:"
1144 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
1145 goto close_socket;
1146 }
1147
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001148 vui->vrings[msg.state.index].enabled = msg.state.num;
1149 break;
1150
Ed Warnickecb9cada2015-12-08 15:45:58 -07001151 default:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001152 DBG_SOCK ("unknown vhost-user message %d received. closing socket",
1153 msg.request);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001154 goto close_socket;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001155 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001156
Ed Warnickecb9cada2015-12-08 15:45:58 -07001157 /* if we need to reply */
1158 if (msg.flags & 4)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001159 {
1160 n =
1161 send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001162 if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
Pierre Pfistere21c5282016-09-21 08:04:59 +01001163 {
1164 DBG_SOCK ("could not send message response");
1165 goto close_socket;
1166 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02001167 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001168
Pierre Pfistere21c5282016-09-21 08:04:59 +01001169 vhost_user_update_iface_state (vui);
1170 vlib_worker_thread_barrier_release (vlib_get_main ());
Ed Warnickecb9cada2015-12-08 15:45:58 -07001171 return 0;
1172
1173close_socket:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001174 vhost_user_if_disconnect (vui);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001175 vhost_user_update_iface_state (vui);
1176 vlib_worker_thread_barrier_release (vlib_get_main ());
Ed Warnickecb9cada2015-12-08 15:45:58 -07001177 return 0;
1178}
1179
Damjan Marion00a9dca2016-08-17 17:05:46 +02001180static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +02001181vhost_user_socket_error (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001182{
Pierre Pfistere21c5282016-09-21 08:04:59 +01001183 vlib_main_t *vm = vlib_get_main ();
Damjan Marion00a9dca2016-08-17 17:05:46 +02001184 vhost_user_main_t *vum = &vhost_user_main;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001185 vhost_user_intf_t *vui =
1186 pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001187
Pierre Pfistere21c5282016-09-21 08:04:59 +01001188 DBG_SOCK ("socket error on if %d", vui->sw_if_index);
1189 vlib_worker_thread_barrier_sync (vm);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001190 vhost_user_if_disconnect (vui);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001191 vhost_user_rx_thread_placement ();
1192 vlib_worker_thread_barrier_release (vm);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001193 return 0;
1194}
1195
Damjan Marion00a9dca2016-08-17 17:05:46 +02001196static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +02001197vhost_user_socksvr_accept_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001198{
1199 int client_fd, client_len;
1200 struct sockaddr_un client;
Damjan Marion56dd5432017-09-08 19:52:02 +02001201 clib_file_t template = { 0 };
Damjan Marion00a9dca2016-08-17 17:05:46 +02001202 vhost_user_main_t *vum = &vhost_user_main;
1203 vhost_user_intf_t *vui;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001204
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001205 vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001206
Damjan Marion00a9dca2016-08-17 17:05:46 +02001207 client_len = sizeof (client);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001208 client_fd = accept (uf->file_descriptor,
Damjan Marion00a9dca2016-08-17 17:05:46 +02001209 (struct sockaddr *) &client,
1210 (socklen_t *) & client_len);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001211
1212 if (client_fd < 0)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001213 return clib_error_return_unix (0, "accept");
Ed Warnickecb9cada2015-12-08 15:45:58 -07001214
Pierre Pfistere21c5282016-09-21 08:04:59 +01001215 DBG_SOCK ("New client socket for vhost interface %d", vui->sw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001216 template.read_function = vhost_user_socket_read;
1217 template.error_function = vhost_user_socket_error;
1218 template.file_descriptor = client_fd;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001219 template.private_data = vui - vhost_user_main.vhost_user_interfaces;
Damjan Marion56dd5432017-09-08 19:52:02 +02001220 vui->clib_file_index = clib_file_add (&file_main, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001221 return 0;
1222}
1223
1224static clib_error_t *
1225vhost_user_init (vlib_main_t * vm)
1226{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001227 clib_error_t *error;
1228 vhost_user_main_t *vum = &vhost_user_main;
1229 vlib_thread_main_t *tm = vlib_get_thread_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001230
1231 error = vlib_call_init_function (vm, ip4_init);
1232 if (error)
1233 return error;
1234
Ed Warnickecb9cada2015-12-08 15:45:58 -07001235 vum->coalesce_frames = 32;
1236 vum->coalesce_time = 1e-3;
1237
Pierre Pfistere21c5282016-09-21 08:04:59 +01001238 vec_validate (vum->cpus, tm->n_vlib_mains - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001239
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001240 vhost_cpu_t *cpu;
1241 vec_foreach (cpu, vum->cpus)
1242 {
1243 /* This is actually not necessary as validate already zeroes it
1244 * Just keeping the loop here for later because I am lazy. */
1245 cpu->rx_buffers_len = 0;
1246 }
1247
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001248 vum->random = random_default_seed ();
1249
Steven5445f5f2017-04-25 16:16:00 -07001250 mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
1251
Ed Warnickecb9cada2015-12-08 15:45:58 -07001252 return 0;
1253}
1254
1255VLIB_INIT_FUNCTION (vhost_user_init);
1256
Damjan Marion00a9dca2016-08-17 17:05:46 +02001257static u8 *
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001258format_vhost_trace (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001259{
1260 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
1261 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001262 CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
1263 vhost_user_main_t *vum = &vhost_user_main;
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001264 vhost_trace_t *t = va_arg (*va, vhost_trace_t *);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001265 vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
1266 t->device_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001267
Damjan Marion00a9dca2016-08-17 17:05:46 +02001268 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001269
Christophe Fontained3c008d2017-10-02 18:10:54 +02001270 u32 indent = format_get_indent (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001271
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001272 s = format (s, "%U %U queue %d\n", format_white_space, indent,
Pierre Pfistere21c5282016-09-21 08:04:59 +01001273 format_vnet_sw_interface_name, vnm, sw, t->qid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001274
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001275 s = format (s, "%U virtio flags:\n", format_white_space, indent);
1276#define _(n,i,st) \
1277 if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \
1278 s = format (s, "%U %s %s\n", format_white_space, indent, #n, st);
1279 foreach_virtio_trace_flags
1280#undef _
1281 s = format (s, "%U virtio_net_hdr first_desc_len %u\n",
1282 format_white_space, indent, t->first_desc_len);
1283
1284 s = format (s, "%U flags 0x%02x gso_type %u\n",
Damjan Marion00a9dca2016-08-17 17:05:46 +02001285 format_white_space, indent,
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001286 t->hdr.hdr.flags, t->hdr.hdr.gso_type);
1287
1288 if (vui->virtio_net_hdr_sz == 12)
1289 s = format (s, "%U num_buff %u",
1290 format_white_space, indent, t->hdr.num_buffers);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001291
1292 return s;
1293}
1294
Damjan Marion00a9dca2016-08-17 17:05:46 +02001295void
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001296vhost_user_rx_trace (vhost_trace_t * t,
1297 vhost_user_intf_t * vui, u16 qid,
1298 vlib_buffer_t * b, vhost_user_vring_t * txvq)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001299{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001300 vhost_user_main_t *vum = &vhost_user_main;
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001301 u32 last_avail_idx = txvq->last_avail_idx;
Steven97878892017-08-29 09:23:26 -07001302 u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001303 vring_desc_t *hdr_desc = 0;
1304 virtio_net_hdr_mrg_rxbuf_t *hdr;
1305 u32 hint = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001306
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001307 memset (t, 0, sizeof (*t));
1308 t->device_index = vui - vum->vhost_user_interfaces;
1309 t->qid = qid;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001310
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001311 hdr_desc = &txvq->desc[desc_current];
1312 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001313 {
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001314 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001315 /* Header is the first here */
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001316 hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
1317 }
1318 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
1319 {
1320 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
1321 }
1322 if (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1323 !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
1324 {
1325 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
1326 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001327
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001328 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001329
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001330 if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
1331 {
1332 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
1333 }
1334 else
1335 {
1336 u32 len = vui->virtio_net_hdr_sz;
1337 memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001338 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001339}
1340
Damjan Marion00a9dca2016-08-17 17:05:46 +02001341static inline void
1342vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001343{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001344 vhost_user_main_t *vum = &vhost_user_main;
1345 u64 x = 1;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001346 int fd = UNIX_GET_FD (vq->callfd_idx);
Stevenf3b53642017-05-01 14:03:02 -07001347 int rv;
1348
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001349 rv = write (fd, &x, sizeof (x));
Stevenf3b53642017-05-01 14:03:02 -07001350 if (rv <= 0)
1351 {
1352 clib_unix_warning
1353 ("Error: Could not write to unix socket for callfd %d", fd);
1354 return;
1355 }
1356
Damjan Marion00a9dca2016-08-17 17:05:46 +02001357 vq->n_since_last_int = 0;
1358 vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001359}
1360
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001361static_always_inline u32
1362vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
1363 u16 copy_len, u32 * map_hint)
1364{
1365 void *src0, *src1, *src2, *src3;
1366 if (PREDICT_TRUE (copy_len >= 4))
1367 {
1368 if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint))))
1369 return 1;
1370 if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint))))
1371 return 1;
1372
1373 while (PREDICT_TRUE (copy_len >= 4))
1374 {
1375 src0 = src2;
1376 src1 = src3;
1377
1378 if (PREDICT_FALSE
1379 (!(src2 = map_guest_mem (vui, cpy[2].src, map_hint))))
1380 return 1;
1381 if (PREDICT_FALSE
1382 (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint))))
1383 return 1;
1384
1385 CLIB_PREFETCH (src2, 64, LOAD);
1386 CLIB_PREFETCH (src3, 64, LOAD);
1387
1388 clib_memcpy ((void *) cpy[0].dst, src0, cpy[0].len);
1389 clib_memcpy ((void *) cpy[1].dst, src1, cpy[1].len);
1390 copy_len -= 2;
1391 cpy += 2;
1392 }
1393 }
1394 while (copy_len)
1395 {
1396 if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
1397 return 1;
1398 clib_memcpy ((void *) cpy->dst, src0, cpy->len);
1399 copy_len -= 1;
1400 cpy += 1;
1401 }
1402 return 0;
1403}
1404
1405/**
1406 * Try to discard packets from the tx ring (VPP RX path).
1407 * Returns the number of discarded packets.
1408 */
1409u32
1410vhost_user_rx_discard_packet (vlib_main_t * vm,
1411 vhost_user_intf_t * vui,
1412 vhost_user_vring_t * txvq, u32 discard_max)
1413{
1414 /*
1415 * On the RX side, each packet corresponds to one descriptor
1416 * (it is the same whether it is a shallow descriptor, chained, or indirect).
1417 * Therefore, discarding a packet is like discarding a descriptor.
1418 */
1419 u32 discarded_packets = 0;
1420 u32 avail_idx = txvq->avail->idx;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001421 while (discarded_packets != discard_max)
1422 {
1423 if (avail_idx == txvq->last_avail_idx)
1424 goto out;
1425
1426 u16 desc_chain_head =
Steven97878892017-08-29 09:23:26 -07001427 txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001428 txvq->last_avail_idx++;
Steven97878892017-08-29 09:23:26 -07001429 txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
1430 desc_chain_head;
1431 txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001432 vhost_user_log_dirty_ring (vui, txvq,
Steven97878892017-08-29 09:23:26 -07001433 ring[txvq->last_used_idx & txvq->qsz_mask]);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001434 txvq->last_used_idx++;
1435 discarded_packets++;
1436 }
1437
1438out:
1439 CLIB_MEMORY_BARRIER ();
1440 txvq->used->idx = txvq->last_used_idx;
1441 vhost_user_log_dirty_ring (vui, txvq, idx);
1442 return discarded_packets;
1443}
1444
1445/*
1446 * In case of overflow, we need to rewind the array of allocated buffers.
1447 */
1448static void
1449vhost_user_input_rewind_buffers (vlib_main_t * vm,
1450 vhost_cpu_t * cpu, vlib_buffer_t * b_head)
1451{
1452 u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
1453 vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current);
1454 b_current->current_length = 0;
1455 b_current->flags = 0;
1456 while (b_current != b_head)
1457 {
1458 cpu->rx_buffers_len++;
1459 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
1460 b_current = vlib_get_buffer (vm, bi_current);
1461 b_current->current_length = 0;
1462 b_current->flags = 0;
1463 }
Steven95827e42017-05-18 21:22:00 -07001464 cpu->rx_buffers_len++;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001465}
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001466
Damjan Marion00a9dca2016-08-17 17:05:46 +02001467static u32
1468vhost_user_if_input (vlib_main_t * vm,
1469 vhost_user_main_t * vum,
Pierre Pfistere21c5282016-09-21 08:04:59 +01001470 vhost_user_intf_t * vui,
Stevenf3b53642017-05-01 14:03:02 -07001471 u16 qid, vlib_node_runtime_t * node,
1472 vnet_hw_interface_rx_mode mode)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001473{
Pierre Pfistere21c5282016-09-21 08:04:59 +01001474 vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001475 u16 n_rx_packets = 0;
1476 u32 n_rx_bytes = 0;
1477 u16 n_left;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001478 u32 n_left_to_next, *to_next;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001479 u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
1480 u32 n_trace = vlib_get_trace_count (vm, node);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001481 u32 map_hint = 0;
Damjan Marion586afd72017-04-05 19:18:20 +02001482 u16 thread_index = vlib_get_thread_index ();
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001483 u16 copy_len = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001484
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001485 {
1486 /* do we have pending interrupts ? */
1487 vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
1488 f64 now = vlib_time_now (vm);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001489
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001490 if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
1491 vhost_user_send_call (vm, txvq);
1492
1493 if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
1494 vhost_user_send_call (vm, rxvq);
1495 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001496
Stevenf3b53642017-05-01 14:03:02 -07001497 /*
1498 * For adaptive mode, it is optimized to reduce interrupts.
1499 * If the scheduler switches the input node to polling due
1500 * to burst of traffic, we tell the driver no interrupt.
1501 * When the traffic subsides, the scheduler switches the node back to
1502 * interrupt mode. We must tell the driver we want interrupt.
1503 */
1504 if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
1505 {
1506 if ((node->flags &
1507 VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
1508 !(node->flags &
1509 VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
1510 /* Tell driver we want notification */
1511 txvq->used->flags = 0;
1512 else
1513 /* Tell driver we don't want notification */
1514 txvq->used->flags = VRING_USED_F_NO_NOTIFY;
1515 }
1516
Damjan Marion00a9dca2016-08-17 17:05:46 +02001517 if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
Ed Warnickecb9cada2015-12-08 15:45:58 -07001518 return 0;
1519
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001520 n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
1521
Ed Warnickecb9cada2015-12-08 15:45:58 -07001522 /* nothing to do */
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001523 if (PREDICT_FALSE (n_left == 0))
Ed Warnickecb9cada2015-12-08 15:45:58 -07001524 return 0;
1525
Pierre Pfistere21c5282016-09-21 08:04:59 +01001526 if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled)))
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001527 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01001528 /*
1529 * Discard input packet if interface is admin down or vring is not
1530 * enabled.
1531 * "For example, for a networking device, in the disabled state
1532 * client must not supply any new RX packets, but must process
1533 * and discard any TX packets."
1534 */
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001535 vhost_user_rx_discard_packet (vm, vui, txvq,
1536 VHOST_USER_DOWN_DISCARD_COUNT);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001537 return 0;
1538 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001539
Steven97878892017-08-29 09:23:26 -07001540 if (PREDICT_FALSE (n_left == (txvq->qsz_mask + 1)))
Pierre Pfistere21c5282016-09-21 08:04:59 +01001541 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001542 /*
1543 * Informational error logging when VPP is not
1544 * receiving packets fast enough.
1545 */
Pierre Pfistere21c5282016-09-21 08:04:59 +01001546 vlib_error_count (vm, node->node_index,
1547 VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
1548 }
1549
Pierre Pfister328e99b2016-02-12 13:18:42 +00001550 if (n_left > VLIB_FRAME_SIZE)
1551 n_left = VLIB_FRAME_SIZE;
1552
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001553 /*
1554 * For small packets (<2kB), we will not need more than one vlib buffer
1555 * per packet. In case packets are bigger, we will just yeld at some point
1556 * in the loop and come back later. This is not an issue as for big packet,
1557 * processing cost really comes from the memory copy.
Pierre Pfister56a86842017-10-05 14:24:05 +02001558 * The assumption is that big packets will fit in 40 buffers.
Pierre Pfister328e99b2016-02-12 13:18:42 +00001559 */
Pierre Pfister56a86842017-10-05 14:24:05 +02001560 if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 ||
1561 vum->cpus[thread_index].rx_buffers_len < 40))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001562 {
Damjan Marion586afd72017-04-05 19:18:20 +02001563 u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
1564 vum->cpus[thread_index].rx_buffers_len +=
Damjan Marion00a9dca2016-08-17 17:05:46 +02001565 vlib_buffer_alloc_from_free_list (vm,
Damjan Marion586afd72017-04-05 19:18:20 +02001566 vum->cpus[thread_index].rx_buffers +
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001567 curr_len,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001568 VHOST_USER_RX_BUFFERS_N - curr_len,
Damjan Marion00a9dca2016-08-17 17:05:46 +02001569 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001570
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001571 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02001572 (vum->cpus[thread_index].rx_buffers_len <
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001573 VHOST_USER_RX_BUFFER_STARVATION))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001574 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001575 /* In case of buffer starvation, discard some packets from the queue
1576 * and log the event.
1577 * We keep doing best effort for the remaining packets. */
Damjan Marion586afd72017-04-05 19:18:20 +02001578 u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
1579 n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001580 flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
1581
1582 n_left -= flush;
1583 vlib_increment_simple_counter (vnet_main.
1584 interface_main.sw_if_counters +
1585 VNET_INTERFACE_COUNTER_DROP,
Damjan Marion586afd72017-04-05 19:18:20 +02001586 vlib_get_thread_index (),
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001587 vui->sw_if_index, flush);
1588
1589 vlib_error_count (vm, vhost_user_input_node.index,
1590 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001591 }
1592 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001593
Damjan Marion00a9dca2016-08-17 17:05:46 +02001594 while (n_left > 0)
1595 {
1596 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001597
Damjan Marion00a9dca2016-08-17 17:05:46 +02001598 while (n_left > 0 && n_left_to_next > 0)
1599 {
1600 vlib_buffer_t *b_head, *b_current;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001601 u32 bi_current;
1602 u16 desc_current;
1603 u32 desc_data_offset;
1604 vring_desc_t *desc_table = txvq->desc;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001605
Damjan Marion586afd72017-04-05 19:18:20 +02001606 if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001607 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001608 /* Not enough rx_buffers
1609 * Note: We yeld on 1 so we don't need to do an additional
1610 * check for the next buffer prefetch.
1611 */
1612 n_left = 0;
1613 break;
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001614 }
1615
Steven97878892017-08-29 09:23:26 -07001616 desc_current =
1617 txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
Damjan Marion586afd72017-04-05 19:18:20 +02001618 vum->cpus[thread_index].rx_buffers_len--;
1619 bi_current = (vum->cpus[thread_index].rx_buffers)
1620 [vum->cpus[thread_index].rx_buffers_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001621 b_head = b_current = vlib_get_buffer (vm, bi_current);
1622 to_next[0] = bi_current; //We do that now so we can forget about bi_current
1623 to_next++;
1624 n_left_to_next--;
1625
1626 vlib_prefetch_buffer_with_index (vm,
Damjan Marion586afd72017-04-05 19:18:20 +02001627 (vum->
1628 cpus[thread_index].rx_buffers)
1629 [vum->cpus[thread_index].
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001630 rx_buffers_len - 1], LOAD);
1631
1632 /* Just preset the used descriptor id and length for later */
Steven97878892017-08-29 09:23:26 -07001633 txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
1634 desc_current;
1635 txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001636 vhost_user_log_dirty_ring (vui, txvq,
Steven97878892017-08-29 09:23:26 -07001637 ring[txvq->last_used_idx &
1638 txvq->qsz_mask]);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001639
1640 /* The buffer should already be initialized */
1641 b_head->total_length_not_including_first_buffer = 0;
1642 b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1643
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001644 if (PREDICT_FALSE (n_trace))
1645 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001646 //TODO: next_index is not exactly known at that point
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001647 vlib_trace_buffer (vm, node, next_index, b_head,
1648 /* follow_chain */ 0);
1649 vhost_trace_t *t0 =
1650 vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
1651 vhost_user_rx_trace (t0, vui, qid, b_head, txvq);
1652 n_trace--;
1653 vlib_set_trace_count (vm, node, n_trace);
1654 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02001655
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001656 /* This depends on the setup but is very consistent
1657 * So I think the CPU branch predictor will make a pretty good job
1658 * at optimizing the decision. */
1659 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
1660 {
1661 desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
1662 &map_hint);
1663 desc_current = 0;
1664 if (PREDICT_FALSE (desc_table == 0))
1665 {
Steven95827e42017-05-18 21:22:00 -07001666 vlib_error_count (vm, node->node_index,
1667 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001668 goto out;
1669 }
1670 }
1671
Damjan Marion00a9dca2016-08-17 17:05:46 +02001672 if (PREDICT_TRUE (vui->is_any_layout) ||
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001673 (!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT)))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001674 {
1675 /* ANYLAYOUT or single buffer */
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001676 desc_data_offset = vui->virtio_net_hdr_sz;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001677 }
1678 else
1679 {
1680 /* CSR case without ANYLAYOUT, skip 1st buffer */
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001681 desc_data_offset = desc_table[desc_current].len;
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001682 }
1683
Damjan Marion00a9dca2016-08-17 17:05:46 +02001684 while (1)
1685 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001686 /* Get more input if necessary. Or end of packet. */
1687 if (desc_data_offset == desc_table[desc_current].len)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001688 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001689 if (PREDICT_FALSE (desc_table[desc_current].flags &
1690 VIRTQ_DESC_F_NEXT))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001691 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001692 desc_current = desc_table[desc_current].next;
1693 desc_data_offset = 0;
1694 }
1695 else
1696 {
1697 goto out;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001698 }
1699 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001700
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001701 /* Get more output if necessary. Or end of packet. */
1702 if (PREDICT_FALSE
1703 (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
1704 {
1705 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02001706 (vum->cpus[thread_index].rx_buffers_len == 0))
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001707 {
Steven62411e72017-02-03 09:30:37 -08001708 /* Cancel speculation */
1709 to_next--;
1710 n_left_to_next++;
1711
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001712 /*
1713 * Checking if there are some left buffers.
1714 * If not, just rewind the used buffers and stop.
1715 * Note: Scheduled copies are not cancelled. This is
1716 * not an issue as they would still be valid. Useless,
1717 * but valid.
1718 */
1719 vhost_user_input_rewind_buffers (vm,
Damjan Marion586afd72017-04-05 19:18:20 +02001720 &vum->cpus
1721 [thread_index],
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001722 b_head);
1723 n_left = 0;
1724 goto stop;
1725 }
1726
1727 /* Get next output */
Damjan Marion586afd72017-04-05 19:18:20 +02001728 vum->cpus[thread_index].rx_buffers_len--;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001729 u32 bi_next =
Damjan Marion586afd72017-04-05 19:18:20 +02001730 (vum->cpus[thread_index].rx_buffers)[vum->cpus
1731 [thread_index].rx_buffers_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001732 b_current->next_buffer = bi_next;
1733 b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
1734 bi_current = bi_next;
1735 b_current = vlib_get_buffer (vm, bi_current);
1736 }
1737
1738 /* Prepare a copy order executed later for the data */
Damjan Marion586afd72017-04-05 19:18:20 +02001739 vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001740 copy_len++;
1741 u32 desc_data_l =
1742 desc_table[desc_current].len - desc_data_offset;
1743 cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length;
1744 cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
Steven025d4152017-05-16 21:26:13 -07001745 cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
1746 b_current->current_length);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001747 cpy->src = desc_table[desc_current].addr + desc_data_offset;
1748
1749 desc_data_offset += cpy->len;
1750
1751 b_current->current_length += cpy->len;
1752 b_head->total_length_not_including_first_buffer += cpy->len;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001753 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001754
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001755 out:
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001756 CLIB_PREFETCH (&n_left, sizeof (n_left), LOAD);
1757
1758 n_rx_bytes += b_head->total_length_not_including_first_buffer;
1759 n_rx_packets++;
1760
1761 b_head->total_length_not_including_first_buffer -=
1762 b_head->current_length;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001763
Damjan Marion00a9dca2016-08-17 17:05:46 +02001764 /* consume the descriptor and return it as used */
1765 txvq->last_avail_idx++;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001766 txvq->last_used_idx++;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001767
Damjan Marion00a9dca2016-08-17 17:05:46 +02001768 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001769
Damjan Marion00a9dca2016-08-17 17:05:46 +02001770 vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
1771 vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001772 b_head->error = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001773
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001774 {
1775 u32 next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
Pierre Pfister328e99b2016-02-12 13:18:42 +00001776
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001777 /* redirect if feature path enabled */
1778 vnet_feature_start_device_input_x1 (vui->sw_if_index, &next0,
Damjan Marion35af9e52017-03-06 12:02:50 +01001779 b_head);
Damjan Marion22311502016-10-28 20:30:15 +02001780
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001781 u32 bi = to_next[-1]; //Cannot use to_next[-1] in the macro
1782 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1783 to_next, n_left_to_next,
1784 bi, next0);
1785 }
Damjan Marion22311502016-10-28 20:30:15 +02001786
Damjan Marion00a9dca2016-08-17 17:05:46 +02001787 n_left--;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001788
1789 /*
1790 * Although separating memory copies from virtio ring parsing
1791 * is beneficial, we can offer to perform the copies from time
1792 * to time in order to free some space in the ring.
1793 */
1794 if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
Pierre Pfistere21c5282016-09-21 08:04:59 +01001795 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001796 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02001797 (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001798 copy_len, &map_hint)))
1799 {
Steven95827e42017-05-18 21:22:00 -07001800 vlib_error_count (vm, node->node_index,
1801 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001802 }
1803 copy_len = 0;
1804
1805 /* give buffers back to driver */
1806 CLIB_MEMORY_BARRIER ();
1807 txvq->used->idx = txvq->last_used_idx;
1808 vhost_user_log_dirty_ring (vui, txvq, idx);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001809 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02001810 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001811 stop:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001812 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001813 }
1814
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001815 /* Do the memory copies */
1816 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02001817 (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001818 copy_len, &map_hint)))
1819 {
Steven95827e42017-05-18 21:22:00 -07001820 vlib_error_count (vm, node->node_index,
1821 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001822 }
Pierre Pfister328e99b2016-02-12 13:18:42 +00001823
1824 /* give buffers back to driver */
Damjan Marion00a9dca2016-08-17 17:05:46 +02001825 CLIB_MEMORY_BARRIER ();
Pierre Pfister328e99b2016-02-12 13:18:42 +00001826 txvq->used->idx = txvq->last_used_idx;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001827 vhost_user_log_dirty_ring (vui, txvq, idx);
Pierre Pfister328e99b2016-02-12 13:18:42 +00001828
Ed Warnickecb9cada2015-12-08 15:45:58 -07001829 /* interrupt (call) handling */
Steven7312cc72017-03-15 21:18:55 -07001830 if ((txvq->callfd_idx != ~0) &&
1831 !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001832 {
1833 txvq->n_since_last_int += n_rx_packets;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001834
Damjan Marion00a9dca2016-08-17 17:05:46 +02001835 if (txvq->n_since_last_int > vum->coalesce_frames)
1836 vhost_user_send_call (vm, txvq);
1837 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001838
1839 /* increase rx counters */
1840 vlib_increment_combined_counter
Damjan Marion00a9dca2016-08-17 17:05:46 +02001841 (vnet_main.interface_main.combined_sw_if_counters
1842 + VNET_INTERFACE_COUNTER_RX,
Damjan Marion586afd72017-04-05 19:18:20 +02001843 vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001844
Damjan Marion586afd72017-04-05 19:18:20 +02001845 vnet_device_increment_rx_packets (thread_index, n_rx_packets);
Damjan Marionb3bb1012017-02-28 21:55:28 +01001846
Ed Warnickecb9cada2015-12-08 15:45:58 -07001847 return n_rx_packets;
1848}
1849
1850static uword
1851vhost_user_input (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02001852 vlib_node_runtime_t * node, vlib_frame_t * f)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001853{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001854 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001855 uword n_rx_packets = 0;
Steven7312cc72017-03-15 21:18:55 -07001856 vhost_user_intf_t *vui;
Stevenf3b53642017-05-01 14:03:02 -07001857 vnet_device_input_runtime_t *rt =
1858 (vnet_device_input_runtime_t *) node->runtime_data;
1859 vnet_device_and_queue_t *dq;
Pierre Pfistere21c5282016-09-21 08:04:59 +01001860
Stevenf3b53642017-05-01 14:03:02 -07001861 vec_foreach (dq, rt->devices_and_queues)
1862 {
1863 if (clib_smp_swap (&dq->interrupt_pending, 0) ||
1864 (node->state == VLIB_NODE_STATE_POLLING))
Steven7312cc72017-03-15 21:18:55 -07001865 {
Stevenf3b53642017-05-01 14:03:02 -07001866 vui =
1867 pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
1868 n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
1869 dq->mode);
Steven7312cc72017-03-15 21:18:55 -07001870 }
Stevenf3b53642017-05-01 14:03:02 -07001871 }
Steven7312cc72017-03-15 21:18:55 -07001872
Ed Warnickecb9cada2015-12-08 15:45:58 -07001873 return n_rx_packets;
1874}
1875
Damjan Marion00a9dca2016-08-17 17:05:46 +02001876/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001877VLIB_REGISTER_NODE (vhost_user_input_node) = {
1878 .function = vhost_user_input,
1879 .type = VLIB_NODE_TYPE_INPUT,
1880 .name = "vhost-user-input",
Damjan Marion51327ac2016-11-09 11:59:42 +01001881 .sibling_of = "device-input",
Ed Warnickecb9cada2015-12-08 15:45:58 -07001882
1883 /* Will be enabled if/when hardware is detected. */
1884 .state = VLIB_NODE_STATE_DISABLED,
1885
1886 .format_buffer = format_ethernet_header_with_length,
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001887 .format_trace = format_vhost_trace,
Ed Warnickecb9cada2015-12-08 15:45:58 -07001888
1889 .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
1890 .error_strings = vhost_user_input_func_error_strings,
Ed Warnickecb9cada2015-12-08 15:45:58 -07001891};
1892
Damjan Marion1c80e832016-05-11 23:07:18 +02001893VLIB_NODE_FUNCTION_MULTIARCH (vhost_user_input_node, vhost_user_input)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001894/* *INDENT-ON* */
Damjan Marion1c80e832016-05-11 23:07:18 +02001895
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001896
1897void
1898vhost_user_tx_trace (vhost_trace_t * t,
1899 vhost_user_intf_t * vui, u16 qid,
1900 vlib_buffer_t * b, vhost_user_vring_t * rxvq)
1901{
1902 vhost_user_main_t *vum = &vhost_user_main;
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001903 u32 last_avail_idx = rxvq->last_avail_idx;
Steven97878892017-08-29 09:23:26 -07001904 u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001905 vring_desc_t *hdr_desc = 0;
1906 u32 hint = 0;
1907
1908 memset (t, 0, sizeof (*t));
1909 t->device_index = vui - vum->vhost_user_interfaces;
1910 t->qid = qid;
1911
1912 hdr_desc = &rxvq->desc[desc_current];
1913 if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
1914 {
1915 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001916 /* Header is the first here */
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001917 hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
1918 }
1919 if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
1920 {
1921 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
1922 }
1923 if (!(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1924 !(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
1925 {
1926 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
1927 }
1928
1929 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
1930}
1931
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001932static_always_inline u32
1933vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
1934 u16 copy_len, u32 * map_hint)
1935{
1936 void *dst0, *dst1, *dst2, *dst3;
1937 if (PREDICT_TRUE (copy_len >= 4))
1938 {
1939 if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
1940 return 1;
1941 if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
1942 return 1;
1943 while (PREDICT_TRUE (copy_len >= 4))
1944 {
1945 dst0 = dst2;
1946 dst1 = dst3;
1947
1948 if (PREDICT_FALSE
1949 (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
1950 return 1;
1951 if (PREDICT_FALSE
1952 (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
1953 return 1;
1954
1955 CLIB_PREFETCH ((void *) cpy[2].src, 64, LOAD);
1956 CLIB_PREFETCH ((void *) cpy[3].src, 64, LOAD);
1957
1958 clib_memcpy (dst0, (void *) cpy[0].src, cpy[0].len);
1959 clib_memcpy (dst1, (void *) cpy[1].src, cpy[1].len);
1960
1961 vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
1962 vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
1963 copy_len -= 2;
1964 cpy += 2;
1965 }
1966 }
1967 while (copy_len)
1968 {
1969 if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
1970 return 1;
1971 clib_memcpy (dst0, (void *) cpy->src, cpy->len);
1972 vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
1973 copy_len -= 1;
1974 cpy += 1;
1975 }
1976 return 0;
1977}
1978
1979
Ed Warnickecb9cada2015-12-08 15:45:58 -07001980static uword
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001981vhost_user_tx (vlib_main_t * vm,
1982 vlib_node_runtime_t * node, vlib_frame_t * frame)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001983{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001984 u32 *buffers = vlib_frame_args (frame);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001985 u32 n_left = frame->n_vectors;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001986 vhost_user_main_t *vum = &vhost_user_main;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001987 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
1988 vhost_user_intf_t *vui =
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001989 pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001990 u32 qid = ~0;
1991 vhost_user_vring_t *rxvq;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001992 u8 error;
Damjan Marion586afd72017-04-05 19:18:20 +02001993 u32 thread_index = vlib_get_thread_index ();
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001994 u32 map_hint = 0;
1995 u8 retry = 8;
1996 u16 copy_len;
1997 u16 tx_headers_len;
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001998
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001999 if (PREDICT_FALSE (!vui->admin_up))
2000 {
2001 error = VHOST_USER_TX_FUNC_ERROR_DOWN;
2002 goto done3;
2003 }
2004
2005 if (PREDICT_FALSE (!vui->is_up))
Damjan Marion00a9dca2016-08-17 17:05:46 +02002006 {
2007 error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
Pierre Pfistere21c5282016-09-21 08:04:59 +01002008 goto done3;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002009 }
Damjan Marion920ecc22016-01-12 18:34:24 +01002010
Pierre Pfistere21c5282016-09-21 08:04:59 +01002011 qid =
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002012 VHOST_VRING_IDX_RX (*vec_elt_at_index
Stevend7727532017-06-09 18:49:17 -07002013 (vui->per_cpu_tx_qid, thread_index));
Pierre Pfistere21c5282016-09-21 08:04:59 +01002014 rxvq = &vui->vrings[qid];
2015 if (PREDICT_FALSE (vui->use_tx_spinlock))
2016 vhost_user_vring_lock (vui, qid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002017
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002018retry:
2019 error = VHOST_USER_TX_FUNC_ERROR_NONE;
2020 tx_headers_len = 0;
2021 copy_len = 0;
2022 while (n_left > 0)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002023 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002024 vlib_buffer_t *b0, *current_b0;
2025 u16 desc_head, desc_index, desc_len;
2026 vring_desc_t *desc_table;
2027 uword buffer_map_addr;
2028 u32 buffer_len;
2029 u16 bytes_left;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002030
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002031 if (PREDICT_TRUE (n_left > 1))
2032 vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
2033
2034 b0 = vlib_get_buffer (vm, buffers[0]);
2035
2036 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
Pierre Pfister116ea4b2016-11-08 15:49:28 +00002037 {
Damjan Marion586afd72017-04-05 19:18:20 +02002038 vum->cpus[thread_index].current_trace =
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002039 vlib_add_trace (vm, node, b0,
Damjan Marion586afd72017-04-05 19:18:20 +02002040 sizeof (*vum->cpus[thread_index].current_trace));
2041 vhost_user_tx_trace (vum->cpus[thread_index].current_trace,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002042 vui, qid / 2, b0, rxvq);
2043 }
Pierre Pfister116ea4b2016-11-08 15:49:28 +00002044
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002045 if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
2046 {
2047 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
2048 goto done;
2049 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002050
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002051 desc_table = rxvq->desc;
2052 desc_head = desc_index =
Steven97878892017-08-29 09:23:26 -07002053 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002054
2055 /* Go deeper in case of indirect descriptor
2056 * I don't know of any driver providing indirect for RX. */
2057 if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
2058 {
2059 if (PREDICT_FALSE
2060 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
Pierre Pfisterba1d0462016-07-27 16:38:20 +01002061 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002062 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
Pierre Pfisterba1d0462016-07-27 16:38:20 +01002063 goto done;
2064 }
2065 if (PREDICT_FALSE
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002066 (!(desc_table =
2067 map_guest_mem (vui, rxvq->desc[desc_index].addr,
2068 &map_hint))))
Pierre Pfisterba1d0462016-07-27 16:38:20 +01002069 {
2070 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
2071 goto done;
2072 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002073 desc_index = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002074 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002075
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002076 desc_len = vui->virtio_net_hdr_sz;
2077 buffer_map_addr = desc_table[desc_index].addr;
2078 buffer_len = desc_table[desc_index].len;
2079
2080 {
2081 // Get a header from the header array
2082 virtio_net_hdr_mrg_rxbuf_t *hdr =
Damjan Marion586afd72017-04-05 19:18:20 +02002083 &vum->cpus[thread_index].tx_headers[tx_headers_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002084 tx_headers_len++;
2085 hdr->hdr.flags = 0;
2086 hdr->hdr.gso_type = 0;
2087 hdr->num_buffers = 1; //This is local, no need to check
2088
2089 // Prepare a copy order executed later for the header
Damjan Marion586afd72017-04-05 19:18:20 +02002090 vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002091 copy_len++;
2092 cpy->len = vui->virtio_net_hdr_sz;
2093 cpy->dst = buffer_map_addr;
2094 cpy->src = (uword) hdr;
2095 }
2096
2097 buffer_map_addr += vui->virtio_net_hdr_sz;
2098 buffer_len -= vui->virtio_net_hdr_sz;
2099 bytes_left = b0->current_length;
2100 current_b0 = b0;
2101 while (1)
2102 {
2103 if (buffer_len == 0)
2104 { //Get new output
2105 if (desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT)
2106 {
2107 //Next one is chained
2108 desc_index = desc_table[desc_index].next;
2109 buffer_map_addr = desc_table[desc_index].addr;
2110 buffer_len = desc_table[desc_index].len;
2111 }
2112 else if (vui->virtio_net_hdr_sz == 12) //MRG is available
2113 {
2114 virtio_net_hdr_mrg_rxbuf_t *hdr =
Damjan Marion586afd72017-04-05 19:18:20 +02002115 &vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002116
2117 //Move from available to used buffer
Steven97878892017-08-29 09:23:26 -07002118 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002119 desc_head;
Steven97878892017-08-29 09:23:26 -07002120 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002121 desc_len;
2122 vhost_user_log_dirty_ring (vui, rxvq,
2123 ring[rxvq->last_used_idx &
Steven97878892017-08-29 09:23:26 -07002124 rxvq->qsz_mask]);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002125
2126 rxvq->last_avail_idx++;
2127 rxvq->last_used_idx++;
2128 hdr->num_buffers++;
2129 desc_len = 0;
2130
2131 if (PREDICT_FALSE
2132 (rxvq->last_avail_idx == rxvq->avail->idx))
2133 {
2134 //Dequeue queued descriptors for this packet
2135 rxvq->last_used_idx -= hdr->num_buffers - 1;
2136 rxvq->last_avail_idx -= hdr->num_buffers - 1;
2137 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
2138 goto done;
2139 }
2140
2141 desc_table = rxvq->desc;
2142 desc_head = desc_index =
Steven97878892017-08-29 09:23:26 -07002143 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002144 if (PREDICT_FALSE
2145 (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
2146 {
2147 //It is seriously unlikely that a driver will put indirect descriptor
2148 //after non-indirect descriptor.
2149 if (PREDICT_FALSE
2150 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
2151 {
2152 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
2153 goto done;
2154 }
2155 if (PREDICT_FALSE
2156 (!(desc_table =
2157 map_guest_mem (vui,
2158 rxvq->desc[desc_index].addr,
2159 &map_hint))))
2160 {
2161 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
2162 goto done;
2163 }
2164 desc_index = 0;
2165 }
2166 buffer_map_addr = desc_table[desc_index].addr;
2167 buffer_len = desc_table[desc_index].len;
2168 }
2169 else
2170 {
2171 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
2172 goto done;
2173 }
2174 }
2175
2176 {
Damjan Marion586afd72017-04-05 19:18:20 +02002177 vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002178 copy_len++;
2179 cpy->len = bytes_left;
2180 cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
2181 cpy->dst = buffer_map_addr;
2182 cpy->src = (uword) vlib_buffer_get_current (current_b0) +
2183 current_b0->current_length - bytes_left;
2184
2185 bytes_left -= cpy->len;
2186 buffer_len -= cpy->len;
2187 buffer_map_addr += cpy->len;
2188 desc_len += cpy->len;
2189
Pierre Pfister14ac8012016-12-08 07:58:47 +00002190 CLIB_PREFETCH (&rxvq->desc, CLIB_CACHE_LINE_BYTES, LOAD);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002191 }
2192
2193 // Check if vlib buffer has more data. If not, get more or break.
2194 if (PREDICT_TRUE (!bytes_left))
2195 {
2196 if (PREDICT_FALSE
2197 (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
2198 {
2199 current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
2200 bytes_left = current_b0->current_length;
2201 }
2202 else
2203 {
2204 //End of packet
2205 break;
2206 }
2207 }
2208 }
2209
2210 //Move from available to used ring
Steven97878892017-08-29 09:23:26 -07002211 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
2212 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002213 vhost_user_log_dirty_ring (vui, rxvq,
Steven97878892017-08-29 09:23:26 -07002214 ring[rxvq->last_used_idx & rxvq->qsz_mask]);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002215 rxvq->last_avail_idx++;
2216 rxvq->last_used_idx++;
2217
2218 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2219 {
Damjan Marion586afd72017-04-05 19:18:20 +02002220 vum->cpus[thread_index].current_trace->hdr =
2221 vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002222 }
2223
2224 n_left--; //At the end for error counting when 'goto done' is invoked
Stevend7727532017-06-09 18:49:17 -07002225
2226 /*
2227 * Do the copy periodically to prevent
2228 * vum->cpus[thread_index].copy array overflow and corrupt memory
2229 */
2230 if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
2231 {
2232 if (PREDICT_FALSE
2233 (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
2234 copy_len, &map_hint)))
2235 {
2236 vlib_error_count (vm, node->node_index,
2237 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
2238 }
2239 copy_len = 0;
2240
2241 /* give buffers back to driver */
2242 CLIB_MEMORY_BARRIER ();
2243 rxvq->used->idx = rxvq->last_used_idx;
2244 vhost_user_log_dirty_ring (vui, rxvq, idx);
2245 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002246 buffers++;
2247 }
2248
2249done:
2250 //Do the memory copies
2251 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02002252 (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002253 copy_len, &map_hint)))
2254 {
Steven95827e42017-05-18 21:22:00 -07002255 vlib_error_count (vm, node->node_index,
2256 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002257 }
2258
2259 CLIB_MEMORY_BARRIER ();
2260 rxvq->used->idx = rxvq->last_used_idx;
2261 vhost_user_log_dirty_ring (vui, rxvq, idx);
2262
2263 /*
2264 * When n_left is set, error is always set to something too.
2265 * In case error is due to lack of remaining buffers, we go back up and
2266 * retry.
2267 * The idea is that it is better to waste some time on packets
2268 * that have been processed already than dropping them and get
2269 * more fresh packets with a good likelyhood that they will be dropped too.
2270 * This technique also gives more time to VM driver to pick-up packets.
2271 * In case the traffic flows from physical to virtual interfaces, this
2272 * technique will end-up leveraging the physical NIC buffer in order to
2273 * absorb the VM's CPU jitter.
2274 */
2275 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
2276 {
2277 retry--;
2278 goto retry;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002279 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002280
Ed Warnickecb9cada2015-12-08 15:45:58 -07002281 /* interrupt (call) handling */
Steven7312cc72017-03-15 21:18:55 -07002282 if ((rxvq->callfd_idx != ~0) &&
2283 !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
Damjan Marion00a9dca2016-08-17 17:05:46 +02002284 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002285 rxvq->n_since_last_int += frame->n_vectors - n_left;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002286
Damjan Marion00a9dca2016-08-17 17:05:46 +02002287 if (rxvq->n_since_last_int > vum->coalesce_frames)
2288 vhost_user_send_call (vm, rxvq);
2289 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002290
Pierre Pfistere21c5282016-09-21 08:04:59 +01002291 vhost_user_vring_unlock (vui, qid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002292
Pierre Pfistere21c5282016-09-21 08:04:59 +01002293done3:
Damjan Marion00a9dca2016-08-17 17:05:46 +02002294 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
2295 {
2296 vlib_error_count (vm, node->node_index, error, n_left);
2297 vlib_increment_simple_counter
2298 (vnet_main.interface_main.sw_if_counters
2299 + VNET_INTERFACE_COUNTER_DROP,
Stevend7727532017-06-09 18:49:17 -07002300 thread_index, vui->sw_if_index, n_left);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002301 }
Pierre Pfister328e99b2016-02-12 13:18:42 +00002302
Ed Warnickecb9cada2015-12-08 15:45:58 -07002303 vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
2304 return frame->n_vectors;
2305}
2306
Stevenf3b53642017-05-01 14:03:02 -07002307static uword
2308vhost_user_send_interrupt_process (vlib_main_t * vm,
2309 vlib_node_runtime_t * rt, vlib_frame_t * f)
2310{
2311 vhost_user_intf_t *vui;
2312 f64 timeout = 3153600000.0 /* 100 years */ ;
2313 uword event_type, *event_data = 0;
2314 vhost_user_main_t *vum = &vhost_user_main;
2315 u16 *queue;
2316 f64 now, poll_time_remaining;
2317 f64 next_timeout;
2318 u8 stop_timer = 0;
2319
2320 while (1)
2321 {
2322 poll_time_remaining =
2323 vlib_process_wait_for_event_or_clock (vm, timeout);
2324 event_type = vlib_process_get_events (vm, &event_data);
2325 vec_reset_length (event_data);
2326
2327 /*
2328 * Use the remaining timeout if it is less than coalesce time to avoid
2329 * resetting the existing timer in the middle of expiration
2330 */
2331 timeout = poll_time_remaining;
2332 if (vlib_process_suspend_time_is_zero (timeout) ||
2333 (timeout > vum->coalesce_time))
2334 timeout = vum->coalesce_time;
2335
2336 now = vlib_time_now (vm);
2337 switch (event_type)
2338 {
2339 case VHOST_USER_EVENT_STOP_TIMER:
2340 stop_timer = 1;
2341 break;
2342
2343 case VHOST_USER_EVENT_START_TIMER:
2344 stop_timer = 0;
2345 if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
2346 break;
2347 /* fall through */
2348
2349 case ~0:
2350 /* *INDENT-OFF* */
2351 pool_foreach (vui, vum->vhost_user_interfaces, {
2352 next_timeout = timeout;
2353 vec_foreach (queue, vui->rx_queues)
2354 {
2355 vhost_user_vring_t *rxvq =
2356 &vui->vrings[VHOST_VRING_IDX_RX (*queue)];
2357 vhost_user_vring_t *txvq =
2358 &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
2359
2360 if (txvq->n_since_last_int)
2361 {
2362 if (now >= txvq->int_deadline)
2363 vhost_user_send_call (vm, txvq);
2364 else
2365 next_timeout = txvq->int_deadline - now;
2366 }
2367
2368 if (rxvq->n_since_last_int)
2369 {
2370 if (now >= rxvq->int_deadline)
2371 vhost_user_send_call (vm, rxvq);
2372 else
2373 next_timeout = rxvq->int_deadline - now;
2374 }
2375
2376 if ((next_timeout < timeout) && (next_timeout > 0.0))
2377 timeout = next_timeout;
2378 }
2379 });
2380 /* *INDENT-ON* */
2381 break;
2382
2383 default:
2384 clib_warning ("BUG: unhandled event type %d", event_type);
2385 break;
2386 }
2387 /* No less than 1 millisecond */
2388 if (timeout < 1e-3)
2389 timeout = 1e-3;
2390 if (stop_timer)
2391 timeout = 3153600000.0;
2392 }
2393 return 0;
2394}
2395
2396/* *INDENT-OFF* */
2397VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
2398 .function = vhost_user_send_interrupt_process,
2399 .type = VLIB_NODE_TYPE_PROCESS,
2400 .name = "vhost-user-send-interrupt-process",
2401};
2402/* *INDENT-ON* */
2403
2404static clib_error_t *
2405vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
2406 u32 qid, vnet_hw_interface_rx_mode mode)
2407{
2408 vlib_main_t *vm = vnm->vlib_main;
2409 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
2410 vhost_user_main_t *vum = &vhost_user_main;
2411 vhost_user_intf_t *vui =
2412 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
2413 vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
2414
2415 if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
2416 (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
2417 {
Steven49a04b92017-07-29 08:56:08 -07002418 if (txvq->kickfd_idx == ~0)
2419 {
2420 // We cannot support interrupt mode if the driver opts out
2421 return clib_error_return (0, "Driver does not support interrupt");
2422 }
Stevenf3b53642017-05-01 14:03:02 -07002423 if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
2424 {
2425 vum->ifq_count++;
2426 // Start the timer if this is the first encounter on interrupt
2427 // interface/queue
2428 if ((vum->ifq_count == 1) &&
2429 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
2430 vlib_process_signal_event (vm,
2431 vhost_user_send_interrupt_node.index,
2432 VHOST_USER_EVENT_START_TIMER, 0);
2433 }
2434 }
2435 else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
2436 {
2437 if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
2438 (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) &&
2439 vum->ifq_count)
2440 {
2441 vum->ifq_count--;
2442 // Stop the timer if there is no more interrupt interface/queue
2443 if ((vum->ifq_count == 0) &&
2444 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
2445 vlib_process_signal_event (vm,
2446 vhost_user_send_interrupt_node.index,
2447 VHOST_USER_EVENT_STOP_TIMER, 0);
2448 }
2449 }
2450
2451 txvq->mode = mode;
2452 if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
2453 txvq->used->flags = VRING_USED_F_NO_NOTIFY;
2454 else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) ||
2455 (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT))
2456 txvq->used->flags = 0;
2457 else
2458 {
2459 clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode,
2460 hw_if_index, qid);
2461 return clib_error_return (0, "unsupported");
2462 }
2463
2464 return 0;
2465}
2466
Ed Warnickecb9cada2015-12-08 15:45:58 -07002467static clib_error_t *
Damjan Marion00a9dca2016-08-17 17:05:46 +02002468vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
2469 u32 flags)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002470{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002471 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002472 uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002473 vhost_user_main_t *vum = &vhost_user_main;
2474 vhost_user_intf_t *vui =
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002475 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002476
2477 vui->admin_up = is_up;
2478
Yoann Desmouceaux35df2e12017-09-20 11:00:42 +02002479 if (is_up && vui->is_up)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002480 vnet_hw_interface_set_flags (vnm, vui->hw_if_index,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002481 VNET_HW_INTERFACE_FLAG_LINK_UP);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002482
2483 return /* no error */ 0;
2484}
2485
Damjan Marion00a9dca2016-08-17 17:05:46 +02002486/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07002487VNET_DEVICE_CLASS (vhost_user_dev_class,static) = {
2488 .name = "vhost-user",
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002489 .tx_function = vhost_user_tx,
Ed Warnickecb9cada2015-12-08 15:45:58 -07002490 .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
2491 .tx_function_error_strings = vhost_user_tx_func_error_strings,
2492 .format_device_name = format_vhost_user_interface_name,
2493 .name_renumber = vhost_user_name_renumber,
2494 .admin_up_down_function = vhost_user_interface_admin_up_down,
Stevenf3b53642017-05-01 14:03:02 -07002495 .rx_mode_change_function = vhost_user_interface_rx_mode_change,
Pierre Pfister116ea4b2016-11-08 15:49:28 +00002496 .format_tx_trace = format_vhost_trace,
Ed Warnickecb9cada2015-12-08 15:45:58 -07002497};
2498
Damjan Marion1c80e832016-05-11 23:07:18 +02002499VLIB_DEVICE_TX_FUNCTION_MULTIARCH (vhost_user_dev_class,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002500 vhost_user_tx)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002501/* *INDENT-ON* */
Damjan Marion1c80e832016-05-11 23:07:18 +02002502
Ed Warnickecb9cada2015-12-08 15:45:58 -07002503static uword
2504vhost_user_process (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002505 vlib_node_runtime_t * rt, vlib_frame_t * f)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002506{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002507 vhost_user_main_t *vum = &vhost_user_main;
2508 vhost_user_intf_t *vui;
2509 struct sockaddr_un sun;
2510 int sockfd;
Damjan Marion56dd5432017-09-08 19:52:02 +02002511 clib_file_t template = { 0 };
Damjan Marion00a9dca2016-08-17 17:05:46 +02002512 f64 timeout = 3153600000.0 /* 100 years */ ;
2513 uword *event_data = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002514
Steven0d150bb2017-03-22 12:05:19 -07002515 sockfd = -1;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002516 sun.sun_family = AF_UNIX;
2517 template.read_function = vhost_user_socket_read;
2518 template.error_function = vhost_user_socket_error;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002519
Damjan Marion00a9dca2016-08-17 17:05:46 +02002520 while (1)
2521 {
2522 vlib_process_wait_for_event_or_clock (vm, timeout);
2523 vlib_process_get_events (vm, &event_data);
2524 vec_reset_length (event_data);
2525
2526 timeout = 3.0;
2527
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002528 /* *INDENT-OFF* */
2529 pool_foreach (vui, vum->vhost_user_interfaces, {
Damjan Marion00a9dca2016-08-17 17:05:46 +02002530
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002531 if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
Damjan Marion56dd5432017-09-08 19:52:02 +02002532 if (vui->clib_file_index == ~0)
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002533 {
Steven0d150bb2017-03-22 12:05:19 -07002534 if ((sockfd < 0) &&
2535 ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
2536 {
2537 /*
2538 * 1st time error or new error for this interface,
2539 * spit out the message and record the error
2540 */
2541 if (!vui->sock_errno || (vui->sock_errno != errno))
2542 {
2543 clib_unix_warning
2544 ("Error: Could not open unix socket for %s",
2545 vui->sock_filename);
2546 vui->sock_errno = errno;
2547 }
2548 continue;
2549 }
2550
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002551 /* try to connect */
2552 strncpy (sun.sun_path, (char *) vui->sock_filename,
2553 sizeof (sun.sun_path) - 1);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002554
Andrew Yourtchenko0c3d4672017-01-03 16:52:22 +00002555 /* Avoid hanging VPP if the other end does not accept */
Dave Barach8f544962017-01-18 10:23:22 -05002556 if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0)
2557 clib_unix_warning ("fcntl");
2558
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002559 if (connect (sockfd, (struct sockaddr *) &sun,
2560 sizeof (struct sockaddr_un)) == 0)
2561 {
Andrew Yourtchenko0c3d4672017-01-03 16:52:22 +00002562 /* Set the socket to blocking as it was before */
Dave Barach8f544962017-01-18 10:23:22 -05002563 if (fcntl(sockfd, F_SETFL, 0) < 0)
2564 clib_unix_warning ("fcntl2");
2565
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002566 vui->sock_errno = 0;
2567 template.file_descriptor = sockfd;
2568 template.private_data =
2569 vui - vhost_user_main.vhost_user_interfaces;
Damjan Marion56dd5432017-09-08 19:52:02 +02002570 vui->clib_file_index = clib_file_add (&file_main, &template);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002571
Steven0d150bb2017-03-22 12:05:19 -07002572 /* This sockfd is considered consumed */
2573 sockfd = -1;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002574 }
2575 else
2576 {
2577 vui->sock_errno = errno;
2578 }
2579 }
2580 else
2581 {
2582 /* check if socket is alive */
2583 int error = 0;
2584 socklen_t len = sizeof (error);
Damjan Marion56dd5432017-09-08 19:52:02 +02002585 int fd = UNIX_GET_FD(vui->clib_file_index);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002586 int retval =
2587 getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002588
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002589 if (retval)
2590 {
2591 DBG_SOCK ("getsockopt returned %d", retval);
2592 vhost_user_if_disconnect (vui);
2593 }
2594 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02002595 }
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002596 });
2597 /* *INDENT-ON* */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002598 }
2599 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002600}
2601
Damjan Marion00a9dca2016-08-17 17:05:46 +02002602/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07002603VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
2604 .function = vhost_user_process,
2605 .type = VLIB_NODE_TYPE_PROCESS,
2606 .name = "vhost-user-process",
2607};
Damjan Marion00a9dca2016-08-17 17:05:46 +02002608/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07002609
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002610/**
2611 * Disables and reset interface structure.
2612 * It can then be either init again, or removed from used interfaces.
2613 */
2614static void
2615vhost_user_term_if (vhost_user_intf_t * vui)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002616{
Ole Troan553a4112017-01-10 10:07:04 +01002617 int q;
Steven5445f5f2017-04-25 16:16:00 -07002618 vhost_user_main_t *vum = &vhost_user_main;
Ole Troan553a4112017-01-10 10:07:04 +01002619
Ed Warnickecb9cada2015-12-08 15:45:58 -07002620 // disconnect interface sockets
Damjan Marion00a9dca2016-08-17 17:05:46 +02002621 vhost_user_if_disconnect (vui);
Pierre Pfisterfbb2ef62016-11-16 02:43:29 +00002622 vhost_user_update_iface_state (vui);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002623
Ole Troan553a4112017-01-10 10:07:04 +01002624 for (q = 0; q < VHOST_VRING_MAX_N; q++)
2625 {
2626 clib_mem_free ((void *) vui->vring_locks[q]);
2627 }
2628
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002629 if (vui->unix_server_index != ~0)
2630 {
2631 //Close server socket
Damjan Marion56dd5432017-09-08 19:52:02 +02002632 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002633 vui->unix_server_index);
Damjan Marion56dd5432017-09-08 19:52:02 +02002634 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002635 vui->unix_server_index = ~0;
Steven53129422017-04-21 13:31:50 -07002636 unlink (vui->sock_filename);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002637 }
Steven5445f5f2017-04-25 16:16:00 -07002638
2639 mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename,
2640 &vui->if_index);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002641}
Ed Warnickecb9cada2015-12-08 15:45:58 -07002642
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002643int
2644vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
2645{
2646 vhost_user_main_t *vum = &vhost_user_main;
2647 vhost_user_intf_t *vui;
2648 int rv = 0;
2649 vnet_hw_interface_t *hwif;
Stevenf3b53642017-05-01 14:03:02 -07002650 u16 *queue;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002651
2652 if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
2653 hwif->dev_class_index != vhost_user_dev_class.index)
2654 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
2655
2656 DBG_SOCK ("Deleting vhost-user interface %s (instance %d)",
2657 hwif->name, hwif->dev_instance);
2658
2659 vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
2660
Stevenf3b53642017-05-01 14:03:02 -07002661 vec_foreach (queue, vui->rx_queues)
2662 {
2663 vhost_user_vring_t *txvq;
2664
2665 txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
2666 if ((vum->ifq_count > 0) &&
2667 ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
2668 (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)))
2669 {
2670 vum->ifq_count--;
2671 // Stop the timer if there is no more interrupt interface/queue
2672 if ((vum->ifq_count == 0) &&
2673 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
2674 {
2675 vlib_process_signal_event (vm,
2676 vhost_user_send_interrupt_node.index,
2677 VHOST_USER_EVENT_STOP_TIMER, 0);
2678 break;
2679 }
2680 }
2681 }
2682
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002683 // Disable and reset interface
2684 vhost_user_term_if (vui);
2685
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002686 // Reset renumbered iface
2687 if (hwif->dev_instance <
2688 vec_len (vum->show_dev_instance_by_real_dev_instance))
2689 vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0;
2690
2691 // Delete ethernet interface
Ed Warnickecb9cada2015-12-08 15:45:58 -07002692 ethernet_delete_interface (vnm, vui->hw_if_index);
Wojciech Decd8e47872017-01-17 21:45:11 +01002693
2694 // Back to pool
2695 pool_put (vum->vhost_user_interfaces, vui);
2696
Ed Warnickecb9cada2015-12-08 15:45:58 -07002697 return rv;
2698}
2699
Steven53129422017-04-21 13:31:50 -07002700static clib_error_t *
2701vhost_user_exit (vlib_main_t * vm)
2702{
2703 vnet_main_t *vnm = vnet_get_main ();
2704 vhost_user_main_t *vum = &vhost_user_main;
2705 vhost_user_intf_t *vui;
2706
Steven41748862017-04-25 13:49:51 -07002707 vlib_worker_thread_barrier_sync (vlib_get_main ());
Steven53129422017-04-21 13:31:50 -07002708 /* *INDENT-OFF* */
2709 pool_foreach (vui, vum->vhost_user_interfaces, {
2710 vhost_user_delete_if (vnm, vm, vui->sw_if_index);
2711 });
2712 /* *INDENT-ON* */
Steven41748862017-04-25 13:49:51 -07002713 vlib_worker_thread_barrier_release (vlib_get_main ());
Steven53129422017-04-21 13:31:50 -07002714 return 0;
2715}
2716
2717VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
2718
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002719/**
2720 * Open server unix socket on specified sock_filename.
2721 */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002722static int
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002723vhost_user_init_server_sock (const char *sock_filename, int *sock_fd)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002724{
Pierre Pfister5afccb22016-07-25 14:32:02 +01002725 int rv = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002726 struct sockaddr_un un = { };
Ed Warnickecb9cada2015-12-08 15:45:58 -07002727 int fd;
2728 /* create listening socket */
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002729 if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
2730 return VNET_API_ERROR_SYSCALL_ERROR_1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002731
2732 un.sun_family = AF_UNIX;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002733 strncpy ((char *) un.sun_path, (char *) sock_filename,
2734 sizeof (un.sun_path) - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002735
2736 /* remove if exists */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002737 unlink ((char *) sock_filename);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002738
Damjan Marion00a9dca2016-08-17 17:05:46 +02002739 if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
2740 {
2741 rv = VNET_API_ERROR_SYSCALL_ERROR_2;
2742 goto error;
2743 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002744
Damjan Marion00a9dca2016-08-17 17:05:46 +02002745 if (listen (fd, 1) == -1)
2746 {
2747 rv = VNET_API_ERROR_SYSCALL_ERROR_3;
2748 goto error;
2749 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002750
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002751 *sock_fd = fd;
2752 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002753
2754error:
Damjan Marion00a9dca2016-08-17 17:05:46 +02002755 close (fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002756 return rv;
2757}
2758
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002759/**
2760 * Create ethernet interface for vhost user interface.
2761 */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002762static void
2763vhost_user_create_ethernet (vnet_main_t * vnm, vlib_main_t * vm,
2764 vhost_user_intf_t * vui, u8 * hwaddress)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002765{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002766 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002767 u8 hwaddr[6];
Damjan Marion00a9dca2016-08-17 17:05:46 +02002768 clib_error_t *error;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002769
2770 /* create hw and sw interface */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002771 if (hwaddress)
2772 {
2773 clib_memcpy (hwaddr, hwaddress, 6);
2774 }
2775 else
2776 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002777 random_u32 (&vum->random);
2778 clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random));
Damjan Marion00a9dca2016-08-17 17:05:46 +02002779 hwaddr[0] = 2;
2780 hwaddr[1] = 0xfe;
2781 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002782
2783 error = ethernet_register_interface
2784 (vnm,
2785 vhost_user_dev_class.index,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002786 vui - vum->vhost_user_interfaces /* device instance */ ,
2787 hwaddr /* ethernet address */ ,
2788 &vui->hw_if_index, 0 /* flag change */ );
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002789
Ed Warnickecb9cada2015-12-08 15:45:58 -07002790 if (error)
2791 clib_error_report (error);
Pierre Pfister328e99b2016-02-12 13:18:42 +00002792
2793 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
2794 hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002795}
2796
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002797/*
2798 * Initialize vui with specified attributes
2799 */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002800static void
2801vhost_user_vui_init (vnet_main_t * vnm,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002802 vhost_user_intf_t * vui,
2803 int server_sock_fd,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002804 const char *sock_filename,
Stevenf3b53642017-05-01 14:03:02 -07002805 u64 feature_mask, u32 * sw_if_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002806{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002807 vnet_sw_interface_t *sw;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01002808 int q;
Steven5445f5f2017-04-25 16:16:00 -07002809 vhost_user_main_t *vum = &vhost_user_main;
Stevenf3b53642017-05-01 14:03:02 -07002810 vnet_hw_interface_t *hw;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002811
Stevenf3b53642017-05-01 14:03:02 -07002812 hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
2813 sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002814 if (server_sock_fd != -1)
2815 {
Damjan Marion56dd5432017-09-08 19:52:02 +02002816 clib_file_t template = { 0 };
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002817 template.read_function = vhost_user_socksvr_accept_ready;
2818 template.file_descriptor = server_sock_fd;
Steven5445f5f2017-04-25 16:16:00 -07002819 template.private_data = vui - vum->vhost_user_interfaces; //hw index
Damjan Marion56dd5432017-09-08 19:52:02 +02002820 vui->unix_server_index = clib_file_add (&file_main, &template);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002821 }
2822 else
2823 {
2824 vui->unix_server_index = ~0;
2825 }
2826
Ed Warnickecb9cada2015-12-08 15:45:58 -07002827 vui->sw_if_index = sw->sw_if_index;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002828 strncpy (vui->sock_filename, sock_filename,
2829 ARRAY_LEN (vui->sock_filename) - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002830 vui->sock_errno = 0;
2831 vui->is_up = 0;
2832 vui->feature_mask = feature_mask;
Damjan Marion56dd5432017-09-08 19:52:02 +02002833 vui->clib_file_index = ~0;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01002834 vui->log_base_addr = 0;
Steven5445f5f2017-04-25 16:16:00 -07002835 vui->if_index = vui - vum->vhost_user_interfaces;
2836 mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
2837 &vui->if_index, 0);
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01002838
Pierre Pfistere21c5282016-09-21 08:04:59 +01002839 for (q = 0; q < VHOST_VRING_MAX_N; q++)
2840 vhost_user_vring_init (vui, q);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002841
Stevenf3b53642017-05-01 14:03:02 -07002842 hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002843 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002844
2845 if (sw_if_index)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002846 *sw_if_index = vui->sw_if_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002847
Pierre Pfistere21c5282016-09-21 08:04:59 +01002848 for (q = 0; q < VHOST_VRING_MAX_N; q++)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002849 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01002850 vui->vring_locks[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
2851 CLIB_CACHE_LINE_BYTES);
2852 memset ((void *) vui->vring_locks[q], 0, CLIB_CACHE_LINE_BYTES);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002853 }
Pierre Pfistere21c5282016-09-21 08:04:59 +01002854
2855 vec_validate (vui->per_cpu_tx_qid,
2856 vlib_get_thread_main ()->n_vlib_mains - 1);
2857 vhost_user_tx_thread_placement (vui);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002858}
2859
Damjan Marion00a9dca2016-08-17 17:05:46 +02002860int
2861vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
2862 const char *sock_filename,
2863 u8 is_server,
2864 u32 * sw_if_index,
2865 u64 feature_mask,
Stevenf3b53642017-05-01 14:03:02 -07002866 u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002867{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002868 vhost_user_intf_t *vui = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002869 u32 sw_if_idx = ~0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002870 int rv = 0;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002871 int server_sock_fd = -1;
Steven7312cc72017-03-15 21:18:55 -07002872 vhost_user_main_t *vum = &vhost_user_main;
Steven5445f5f2017-04-25 16:16:00 -07002873 uword *if_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002874
Wojciech Dec3cd9eed2017-01-03 10:38:37 +01002875 if (sock_filename == NULL || !(strlen (sock_filename) > 0))
2876 {
2877 return VNET_API_ERROR_INVALID_ARGUMENT;
2878 }
2879
Steven5445f5f2017-04-25 16:16:00 -07002880 if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename);
2881 if (if_index)
2882 {
2883 if (sw_if_index)
2884 {
2885 vui = &vum->vhost_user_interfaces[*if_index];
2886 *sw_if_index = vui->sw_if_index;
2887 }
2888 return VNET_API_ERROR_IF_ALREADY_EXISTS;
2889 }
2890
Damjan Marion00a9dca2016-08-17 17:05:46 +02002891 if (is_server)
2892 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002893 if ((rv =
2894 vhost_user_init_server_sock (sock_filename, &server_sock_fd)) != 0)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002895 {
2896 return rv;
2897 }
2898 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002899
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002900 pool_get (vhost_user_main.vhost_user_interfaces, vui);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002901
Pierre Pfisteref65cb02016-02-19 13:52:44 +00002902 vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002903 vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename,
Stevenf3b53642017-05-01 14:03:02 -07002904 feature_mask, &sw_if_idx);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002905
Damjan Marion00a9dca2016-08-17 17:05:46 +02002906 if (renumber)
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002907 vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002908
2909 if (sw_if_index)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002910 *sw_if_index = sw_if_idx;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002911
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002912 // Process node must connect
2913 vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
Steven7312cc72017-03-15 21:18:55 -07002914
Ed Warnickecb9cada2015-12-08 15:45:58 -07002915 return rv;
2916}
2917
Damjan Marion00a9dca2016-08-17 17:05:46 +02002918int
2919vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
2920 const char *sock_filename,
2921 u8 is_server,
2922 u32 sw_if_index,
Stevenf3b53642017-05-01 14:03:02 -07002923 u64 feature_mask, u8 renumber, u32 custom_dev_instance)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002924{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002925 vhost_user_main_t *vum = &vhost_user_main;
2926 vhost_user_intf_t *vui = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002927 u32 sw_if_idx = ~0;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002928 int server_sock_fd = -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002929 int rv = 0;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002930 vnet_hw_interface_t *hwif;
Steven5445f5f2017-04-25 16:16:00 -07002931 uword *if_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002932
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002933 if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
2934 hwif->dev_class_index != vhost_user_dev_class.index)
2935 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002936
Steven5445f5f2017-04-25 16:16:00 -07002937 if (sock_filename == NULL || !(strlen (sock_filename) > 0))
2938 return VNET_API_ERROR_INVALID_ARGUMENT;
2939
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002940 vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002941
Steven5445f5f2017-04-25 16:16:00 -07002942 /*
2943 * Disallow changing the interface to have the same path name
2944 * as other interface
2945 */
2946 if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename);
2947 if (if_index && (*if_index != vui->if_index))
2948 return VNET_API_ERROR_IF_ALREADY_EXISTS;
2949
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002950 // First try to open server socket
Damjan Marion00a9dca2016-08-17 17:05:46 +02002951 if (is_server)
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002952 if ((rv = vhost_user_init_server_sock (sock_filename,
2953 &server_sock_fd)) != 0)
2954 return rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002955
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002956 vhost_user_term_if (vui);
2957 vhost_user_vui_init (vnm, vui, server_sock_fd,
Stevenf3b53642017-05-01 14:03:02 -07002958 sock_filename, feature_mask, &sw_if_idx);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002959
Damjan Marion00a9dca2016-08-17 17:05:46 +02002960 if (renumber)
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002961 vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002962
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002963 // Process node must connect
2964 vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
Steven7312cc72017-03-15 21:18:55 -07002965
Ed Warnickecb9cada2015-12-08 15:45:58 -07002966 return rv;
2967}
2968
2969clib_error_t *
2970vhost_user_connect_command_fn (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002971 unformat_input_t * input,
2972 vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002973{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002974 unformat_input_t _line_input, *line_input = &_line_input;
2975 u8 *sock_filename = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002976 u32 sw_if_index;
2977 u8 is_server = 0;
Pierre Pfistere21c5282016-09-21 08:04:59 +01002978 u64 feature_mask = (u64) ~ (0ULL);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002979 u8 renumber = 0;
2980 u32 custom_dev_instance = ~0;
Pierre Pfisteref65cb02016-02-19 13:52:44 +00002981 u8 hwaddr[6];
2982 u8 *hw = NULL;
Billy McFalla9a20e72017-02-15 11:39:12 -05002983 clib_error_t *error = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002984
2985 /* Get a line of input. */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002986 if (!unformat_user (input, unformat_line_input, line_input))
Ed Warnickecb9cada2015-12-08 15:45:58 -07002987 return 0;
2988
Damjan Marion00a9dca2016-08-17 17:05:46 +02002989 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
2990 {
2991 if (unformat (line_input, "socket %s", &sock_filename))
2992 ;
2993 else if (unformat (line_input, "server"))
2994 is_server = 1;
2995 else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
2996 ;
2997 else
2998 if (unformat
2999 (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr))
3000 hw = hwaddr;
3001 else if (unformat (line_input, "renumber %d", &custom_dev_instance))
3002 {
3003 renumber = 1;
3004 }
3005 else
Billy McFalla9a20e72017-02-15 11:39:12 -05003006 {
3007 error = clib_error_return (0, "unknown input `%U'",
3008 format_unformat_error, line_input);
3009 goto done;
3010 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003011 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003012
Damjan Marion00a9dca2016-08-17 17:05:46 +02003013 vnet_main_t *vnm = vnet_get_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07003014
Pierre Pfister5afccb22016-07-25 14:32:02 +01003015 int rv;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003016 if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
3017 is_server, &sw_if_index, feature_mask,
Stevenf3b53642017-05-01 14:03:02 -07003018 renumber, custom_dev_instance, hw)))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003019 {
Billy McFalla9a20e72017-02-15 11:39:12 -05003020 error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
3021 goto done;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003022 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003023
Damjan Marion00a9dca2016-08-17 17:05:46 +02003024 vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
3025 sw_if_index);
Billy McFalla9a20e72017-02-15 11:39:12 -05003026
3027done:
3028 vec_free (sock_filename);
3029 unformat_free (line_input);
3030
3031 return error;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003032}
3033
3034clib_error_t *
3035vhost_user_delete_command_fn (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02003036 unformat_input_t * input,
3037 vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -07003038{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003039 unformat_input_t _line_input, *line_input = &_line_input;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003040 u32 sw_if_index = ~0;
Pierre Pfisterece983d2016-11-21 12:52:22 +00003041 vnet_main_t *vnm = vnet_get_main ();
Billy McFalla9a20e72017-02-15 11:39:12 -05003042 clib_error_t *error = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003043
3044 /* Get a line of input. */
Damjan Marion00a9dca2016-08-17 17:05:46 +02003045 if (!unformat_user (input, unformat_line_input, line_input))
Ed Warnickecb9cada2015-12-08 15:45:58 -07003046 return 0;
3047
Damjan Marion00a9dca2016-08-17 17:05:46 +02003048 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
3049 {
3050 if (unformat (line_input, "sw_if_index %d", &sw_if_index))
3051 ;
Pierre Pfisterece983d2016-11-21 12:52:22 +00003052 else if (unformat
3053 (line_input, "%U", unformat_vnet_sw_interface, vnm,
3054 &sw_if_index))
3055 {
3056 vnet_hw_interface_t *hwif =
3057 vnet_get_sup_hw_interface (vnm, sw_if_index);
3058 if (hwif == NULL ||
3059 vhost_user_dev_class.index != hwif->dev_class_index)
Billy McFalla9a20e72017-02-15 11:39:12 -05003060 {
3061 error = clib_error_return (0, "Not a vhost interface");
3062 goto done;
3063 }
Pierre Pfisterece983d2016-11-21 12:52:22 +00003064 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02003065 else
Billy McFalla9a20e72017-02-15 11:39:12 -05003066 {
3067 error = clib_error_return (0, "unknown input `%U'",
3068 format_unformat_error, line_input);
3069 goto done;
3070 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02003071 }
Billy McFalla9a20e72017-02-15 11:39:12 -05003072
Damjan Marion00a9dca2016-08-17 17:05:46 +02003073 vhost_user_delete_if (vnm, vm, sw_if_index);
Billy McFalla9a20e72017-02-15 11:39:12 -05003074
3075done:
3076 unformat_free (line_input);
3077
3078 return error;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003079}
3080
Damjan Marion00a9dca2016-08-17 17:05:46 +02003081int
3082vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
3083 vhost_user_intf_details_t ** out_vuids)
Ed Warnickecb9cada2015-12-08 15:45:58 -07003084{
3085 int rv = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003086 vhost_user_main_t *vum = &vhost_user_main;
3087 vhost_user_intf_t *vui;
3088 vhost_user_intf_details_t *r_vuids = NULL;
3089 vhost_user_intf_details_t *vuid = NULL;
3090 u32 *hw_if_indices = 0;
3091 vnet_hw_interface_t *hi;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003092 u8 *s = NULL;
3093 int i;
3094
3095 if (!out_vuids)
Damjan Marion00a9dca2016-08-17 17:05:46 +02003096 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003097
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003098 pool_foreach (vui, vum->vhost_user_interfaces,
3099 vec_add1 (hw_if_indices, vui->hw_if_index);
3100 );
Ed Warnickecb9cada2015-12-08 15:45:58 -07003101
Damjan Marion00a9dca2016-08-17 17:05:46 +02003102 for (i = 0; i < vec_len (hw_if_indices); i++)
3103 {
3104 hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003105 vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003106
Damjan Marion00a9dca2016-08-17 17:05:46 +02003107 vec_add2 (r_vuids, vuid, 1);
3108 vuid->sw_if_index = vui->sw_if_index;
3109 vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
3110 vuid->features = vui->features;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003111 vuid->num_regions = vui->nregions;
Marek Gradzki0578cd12017-02-13 14:19:51 +01003112 vuid->is_server = vui->unix_server_index != ~0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003113 vuid->sock_errno = vui->sock_errno;
3114 strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename,
3115 ARRAY_LEN (vuid->sock_filename) - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003116
Damjan Marion00a9dca2016-08-17 17:05:46 +02003117 s = format (s, "%v%c", hi->name, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003118
Damjan Marion00a9dca2016-08-17 17:05:46 +02003119 strncpy ((char *) vuid->if_name, (char *) s,
3120 ARRAY_LEN (vuid->if_name) - 1);
3121 _vec_len (s) = 0;
3122 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003123
3124 vec_free (s);
3125 vec_free (hw_if_indices);
3126
3127 *out_vuids = r_vuids;
3128
3129 return rv;
3130}
3131
3132clib_error_t *
3133show_vhost_user_command_fn (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02003134 unformat_input_t * input,
3135 vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -07003136{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003137 clib_error_t *error = 0;
3138 vnet_main_t *vnm = vnet_get_main ();
3139 vhost_user_main_t *vum = &vhost_user_main;
3140 vhost_user_intf_t *vui;
3141 u32 hw_if_index, *hw_if_indices = 0;
3142 vnet_hw_interface_t *hi;
Stevenf3b53642017-05-01 14:03:02 -07003143 u16 *queue;
Pierre Pfistere21c5282016-09-21 08:04:59 +01003144 u32 ci;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003145 int i, j, q;
3146 int show_descr = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003147 struct feat_struct
3148 {
3149 u8 bit;
3150 char *str;
3151 };
Ed Warnickecb9cada2015-12-08 15:45:58 -07003152 struct feat_struct *feat_entry;
3153
3154 static struct feat_struct feat_array[] = {
3155#define _(s,b) { .str = #s, .bit = b, },
Damjan Marion00a9dca2016-08-17 17:05:46 +02003156 foreach_virtio_net_feature
Ed Warnickecb9cada2015-12-08 15:45:58 -07003157#undef _
Damjan Marion00a9dca2016-08-17 17:05:46 +02003158 {.str = NULL}
Ed Warnickecb9cada2015-12-08 15:45:58 -07003159 };
3160
Pierre Pfistere21c5282016-09-21 08:04:59 +01003161#define foreach_protocol_feature \
3162 _(VHOST_USER_PROTOCOL_F_MQ) \
3163 _(VHOST_USER_PROTOCOL_F_LOG_SHMFD)
3164
3165 static struct feat_struct proto_feat_array[] = {
3166#define _(s) { .str = #s, .bit = s},
3167 foreach_protocol_feature
3168#undef _
3169 {.str = NULL}
3170 };
3171
Damjan Marion00a9dca2016-08-17 17:05:46 +02003172 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3173 {
3174 if (unformat
3175 (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
3176 {
3177 vec_add1 (hw_if_indices, hw_if_index);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003178 }
3179 else if (unformat (input, "descriptors") || unformat (input, "desc"))
3180 show_descr = 1;
3181 else
3182 {
3183 error = clib_error_return (0, "unknown input `%U'",
3184 format_unformat_error, input);
3185 goto done;
3186 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003187 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02003188 if (vec_len (hw_if_indices) == 0)
3189 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003190 pool_foreach (vui, vum->vhost_user_interfaces,
3191 vec_add1 (hw_if_indices, vui->hw_if_index);
3192 );
Ed Warnickecb9cada2015-12-08 15:45:58 -07003193 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02003194 vlib_cli_output (vm, "Virtio vhost-user interfaces");
Pierre Pfistere21c5282016-09-21 08:04:59 +01003195 vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
Damjan Marion00a9dca2016-08-17 17:05:46 +02003196 vum->coalesce_frames, vum->coalesce_time);
Stevenf3b53642017-05-01 14:03:02 -07003197 vlib_cli_output (vm, " number of rx virtqueues in interrupt mode: %d",
3198 vum->ifq_count);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003199
3200 for (i = 0; i < vec_len (hw_if_indices); i++)
3201 {
3202 hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003203 vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003204 vlib_cli_output (vm, "Interface: %s (ifindex %d)",
3205 hi->name, hw_if_indices[i]);
3206
Pierre Pfistere21c5282016-09-21 08:04:59 +01003207 vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
3208 " features mask (0x%llx): \n"
3209 " features (0x%llx): \n",
3210 vui->virtio_net_hdr_sz, vui->feature_mask,
3211 vui->features);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003212
3213 feat_entry = (struct feat_struct *) &feat_array;
3214 while (feat_entry->str)
3215 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01003216 if (vui->features & (1ULL << feat_entry->bit))
3217 vlib_cli_output (vm, " %s (%d)", feat_entry->str,
3218 feat_entry->bit);
3219 feat_entry++;
3220 }
3221
3222 vlib_cli_output (vm, " protocol features (0x%llx)",
3223 vui->protocol_features);
3224 feat_entry = (struct feat_struct *) &proto_feat_array;
3225 while (feat_entry->str)
3226 {
3227 if (vui->protocol_features & (1ULL << feat_entry->bit))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003228 vlib_cli_output (vm, " %s (%d)", feat_entry->str,
3229 feat_entry->bit);
3230 feat_entry++;
3231 }
3232
3233 vlib_cli_output (vm, "\n");
3234
Damjan Marion00a9dca2016-08-17 17:05:46 +02003235 vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
3236 vui->sock_filename,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003237 (vui->unix_server_index != ~0) ? "server" : "client",
Damjan Marion00a9dca2016-08-17 17:05:46 +02003238 strerror (vui->sock_errno));
3239
Pierre Pfistere21c5282016-09-21 08:04:59 +01003240 vlib_cli_output (vm, " rx placement: ");
Stevenf3b53642017-05-01 14:03:02 -07003241
3242 vec_foreach (queue, vui->rx_queues)
Pierre Pfistere21c5282016-09-21 08:04:59 +01003243 {
Stevenf3b53642017-05-01 14:03:02 -07003244 vnet_main_t *vnm = vnet_get_main ();
3245 uword thread_index;
3246 vnet_hw_interface_rx_mode mode;
3247
3248 thread_index = vnet_get_device_input_thread_index (vnm,
3249 vui->hw_if_index,
3250 *queue);
3251 vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode);
3252 vlib_cli_output (vm, " thread %d on vring %d, %U\n",
3253 thread_index, VHOST_VRING_IDX_TX (*queue),
3254 format_vnet_hw_interface_rx_mode, mode);
Pierre Pfistere21c5282016-09-21 08:04:59 +01003255 }
3256
3257 vlib_cli_output (vm, " tx placement: %s\n",
3258 vui->use_tx_spinlock ? "spin-lock" : "lock-free");
3259
3260 vec_foreach_index (ci, vui->per_cpu_tx_qid)
3261 {
3262 vlib_cli_output (vm, " thread %d on vring %d\n", ci,
3263 VHOST_VRING_IDX_RX (vui->per_cpu_tx_qid[ci]));
3264 }
3265
3266 vlib_cli_output (vm, "\n");
3267
Damjan Marion00a9dca2016-08-17 17:05:46 +02003268 vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
3269
3270 if (vui->nregions)
3271 {
3272 vlib_cli_output (vm,
3273 " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
3274 vlib_cli_output (vm,
3275 " ====== ===== ================== ================== ================== ================== ==================\n");
3276 }
3277 for (j = 0; j < vui->nregions; j++)
3278 {
3279 vlib_cli_output (vm,
3280 " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
3281 j, vui->region_mmap_fd[j],
3282 vui->regions[j].guest_phys_addr,
3283 vui->regions[j].memory_size,
3284 vui->regions[j].userspace_addr,
3285 vui->regions[j].mmap_offset,
3286 pointer_to_uword (vui->region_mmap_addr[j]));
3287 }
Pierre Pfistere21c5282016-09-21 08:04:59 +01003288 for (q = 0; q < VHOST_VRING_MAX_N; q++)
Damjan Marion00a9dca2016-08-17 17:05:46 +02003289 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01003290 if (!vui->vrings[q].started)
3291 continue;
3292
3293 vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
3294 (q & 1) ? "RX" : "TX",
3295 vui->vrings[q].enabled ? "" : " disabled");
Damjan Marion00a9dca2016-08-17 17:05:46 +02003296
3297 vlib_cli_output (vm,
3298 " qsz %d last_avail_idx %d last_used_idx %d\n",
Steven97878892017-08-29 09:23:26 -07003299 vui->vrings[q].qsz_mask + 1,
3300 vui->vrings[q].last_avail_idx,
Damjan Marion00a9dca2016-08-17 17:05:46 +02003301 vui->vrings[q].last_used_idx);
3302
3303 if (vui->vrings[q].avail && vui->vrings[q].used)
3304 vlib_cli_output (vm,
3305 " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
3306 vui->vrings[q].avail->flags,
3307 vui->vrings[q].avail->idx,
3308 vui->vrings[q].used->flags,
3309 vui->vrings[q].used->idx);
3310
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003311 int kickfd = UNIX_GET_FD (vui->vrings[q].kickfd_idx);
3312 int callfd = UNIX_GET_FD (vui->vrings[q].callfd_idx);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003313 vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n",
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003314 kickfd, callfd, vui->vrings[q].errfd);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003315
3316 if (show_descr)
3317 {
3318 vlib_cli_output (vm, "\n descriptor table:\n");
3319 vlib_cli_output (vm,
3320 " id addr len flags next user_addr\n");
3321 vlib_cli_output (vm,
3322 " ===== ================== ===== ====== ===== ==================\n");
Steven97878892017-08-29 09:23:26 -07003323 for (j = 0; j < vui->vrings[q].qsz_mask + 1; j++)
Damjan Marion00a9dca2016-08-17 17:05:46 +02003324 {
Pierre Pfister11f92052016-09-21 08:08:55 +01003325 u32 mem_hint = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003326 vlib_cli_output (vm,
3327 " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
3328 j, vui->vrings[q].desc[j].addr,
3329 vui->vrings[q].desc[j].len,
3330 vui->vrings[q].desc[j].flags,
3331 vui->vrings[q].desc[j].next,
3332 pointer_to_uword (map_guest_mem
3333 (vui,
Pierre Pfisterba1d0462016-07-27 16:38:20 +01003334 vui->vrings[q].desc[j].
Pierre Pfister11f92052016-09-21 08:08:55 +01003335 addr, &mem_hint)));
Damjan Marion00a9dca2016-08-17 17:05:46 +02003336 }
3337 }
3338 }
3339 vlib_cli_output (vm, "\n");
3340 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003341done:
3342 vec_free (hw_if_indices);
3343 return error;
3344}
3345
Damjan Marion8d281b32016-08-24 14:32:39 +02003346/*
3347 * CLI functions
3348 */
3349
Billy McFalla92501a2016-11-23 12:45:29 -05003350/*?
3351 * Create a vHost User interface. Once created, a new virtual interface
3352 * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>'
3353 * is the next free index.
3354 *
3355 * There are several parameters associated with a vHost interface:
3356 *
3357 * - <b>socket <socket-filename></b> - Name of the linux socket used by QEMU/VM and
3358 * VPP to manage the vHost interface. If socket does not already exist, VPP will
3359 * create the socket.
3360 *
3361 * - <b>server</b> - Optional flag to indicate that VPP should be the server for the
3362 * linux socket. If not provided, VPP will be the client.
3363 *
3364 * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated at
3365 * startup. By default, all supported features will be advertised. Otherwise,
3366 * provide the set of features desired.
3367 * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
3368 * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
3369 * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
3370 * - 0x000400000 (22) - VIRTIO_NET_F_MQ
3371 * - 0x004000000 (26) - VHOST_F_LOG_ALL
3372 * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT
3373 * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC
3374 * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES
3375 * - 0x100000000 (32) - VIRTIO_F_VERSION_1
3376 *
3377 * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
3378 * X:X:X:X:X:X unix or X.X.X cisco format.
3379 *
3380 * - <b>renumber <dev_instance></b> - Optional parameter which allows the instance
3381 * in the name to be specified. If instance already exists, name will be used
3382 * anyway and multiple instances will have the same name. Use with caution.
3383 *
Steven7312cc72017-03-15 21:18:55 -07003384 * - <b>mode [interrupt | polling]</b> - Optional parameter specifying
3385 * the input thread polling policy.
3386 *
Billy McFalla92501a2016-11-23 12:45:29 -05003387 * @cliexpar
3388 * Example of how to create a vhost interface with VPP as the client and all features enabled:
3389 * @cliexstart{create vhost-user socket /tmp/vhost1.sock}
3390 * VirtualEthernet0/0/0
3391 * @cliexend
3392 * Example of how to create a vhost interface with VPP as the server and with just
3393 * multiple queues enabled:
3394 * @cliexstart{create vhost-user socket /tmp/vhost2.sock server feature-mask 0x40400000}
3395 * VirtualEthernet0/0/1
3396 * @cliexend
3397 * Once the vHost interface is created, enable the interface using:
3398 * @cliexcmd{set interface state VirtualEthernet0/0/0 up}
3399?*/
Damjan Marion8d281b32016-08-24 14:32:39 +02003400/* *INDENT-OFF* */
3401VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
3402 .path = "create vhost-user",
Steven7312cc72017-03-15 21:18:55 -07003403 .short_help = "create vhost-user socket <socket-filename> [server] "
Stevenf3b53642017-05-01 14:03:02 -07003404 "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] ",
Damjan Marion8d281b32016-08-24 14:32:39 +02003405 .function = vhost_user_connect_command_fn,
3406};
Billy McFalla92501a2016-11-23 12:45:29 -05003407/* *INDENT-ON* */
Damjan Marion8d281b32016-08-24 14:32:39 +02003408
Billy McFalla92501a2016-11-23 12:45:29 -05003409/*?
3410 * Delete a vHost User interface using the interface name or the
Dave Barach13ad1f02017-03-26 19:36:18 -04003411 * software interface index. Use the '<em>show interface</em>'
Billy McFalla92501a2016-11-23 12:45:29 -05003412 * command to determine the software interface index. On deletion,
3413 * the linux socket will not be deleted.
3414 *
3415 * @cliexpar
3416 * Example of how to delete a vhost interface by name:
3417 * @cliexcmd{delete vhost-user VirtualEthernet0/0/1}
3418 * Example of how to delete a vhost interface by software interface index:
3419 * @cliexcmd{delete vhost-user sw_if_index 1}
3420?*/
3421/* *INDENT-OFF* */
Damjan Marion8d281b32016-08-24 14:32:39 +02003422VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
3423 .path = "delete vhost-user",
Billy McFalla92501a2016-11-23 12:45:29 -05003424 .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
Damjan Marion8d281b32016-08-24 14:32:39 +02003425 .function = vhost_user_delete_command_fn,
3426};
3427
Billy McFalla92501a2016-11-23 12:45:29 -05003428/*?
3429 * Display the attributes of a single vHost User interface (provide interface
3430 * name), multiple vHost User interfaces (provide a list of interface names seperated
3431 * by spaces) or all Vhost User interfaces (omit an interface name to display all
3432 * vHost interfaces).
3433 *
3434 * @cliexpar
3435 * @parblock
3436 * Example of how to display a vhost interface:
3437 * @cliexstart{show vhost-user VirtualEthernet0/0/0}
3438 * Virtio vhost-user interfaces
3439 * Global:
3440 * coalesce frames 32 time 1e-3
3441 * Interface: VirtualEthernet0/0/0 (ifindex 1)
3442 * virtio_net_hdr_sz 12
3443 * features mask (0xffffffffffffffff):
3444 * features (0x50408000):
3445 * VIRTIO_NET_F_MRG_RXBUF (15)
3446 * VIRTIO_NET_F_MQ (22)
3447 * VIRTIO_F_INDIRECT_DESC (28)
3448 * VHOST_USER_F_PROTOCOL_FEATURES (30)
3449 * protocol features (0x3)
3450 * VHOST_USER_PROTOCOL_F_MQ (0)
3451 * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
3452 *
3453 * socket filename /tmp/vhost1.sock type client errno "Success"
3454 *
3455 * rx placement:
3456 * thread 1 on vring 1
3457 * thread 1 on vring 5
3458 * thread 2 on vring 3
3459 * thread 2 on vring 7
3460 * tx placement: spin-lock
3461 * thread 0 on vring 0
3462 * thread 1 on vring 2
3463 * thread 2 on vring 0
3464 *
3465 * Memory regions (total 2)
3466 * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
3467 * ====== ===== ================== ================== ================== ================== ==================
3468 * 0 60 0x0000000000000000 0x00000000000a0000 0x00002aaaaac00000 0x0000000000000000 0x00002aab2b400000
3469 * 1 61 0x00000000000c0000 0x000000003ff40000 0x00002aaaaacc0000 0x00000000000c0000 0x00002aababcc0000
3470 *
3471 * Virtqueue 0 (TX)
3472 * qsz 256 last_avail_idx 0 last_used_idx 0
3473 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
3474 * kickfd 62 callfd 64 errfd -1
3475 *
3476 * Virtqueue 1 (RX)
3477 * qsz 256 last_avail_idx 0 last_used_idx 0
3478 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3479 * kickfd 65 callfd 66 errfd -1
3480 *
3481 * Virtqueue 2 (TX)
3482 * qsz 256 last_avail_idx 0 last_used_idx 0
3483 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
3484 * kickfd 63 callfd 70 errfd -1
3485 *
3486 * Virtqueue 3 (RX)
3487 * qsz 256 last_avail_idx 0 last_used_idx 0
3488 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3489 * kickfd 72 callfd 74 errfd -1
3490 *
3491 * Virtqueue 4 (TX disabled)
3492 * qsz 256 last_avail_idx 0 last_used_idx 0
3493 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3494 * kickfd 76 callfd 78 errfd -1
3495 *
3496 * Virtqueue 5 (RX disabled)
3497 * qsz 256 last_avail_idx 0 last_used_idx 0
3498 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3499 * kickfd 80 callfd 82 errfd -1
3500 *
3501 * Virtqueue 6 (TX disabled)
3502 * qsz 256 last_avail_idx 0 last_used_idx 0
3503 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3504 * kickfd 84 callfd 86 errfd -1
3505 *
3506 * Virtqueue 7 (RX disabled)
3507 * qsz 256 last_avail_idx 0 last_used_idx 0
3508 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3509 * kickfd 88 callfd 90 errfd -1
3510 *
3511 * @cliexend
3512 *
3513 * The optional '<em>descriptors</em>' parameter will display the same output as
3514 * the previous example but will include the descriptor table for each queue.
3515 * The output is truncated below:
3516 * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
3517 * Virtio vhost-user interfaces
3518 * Global:
3519 * coalesce frames 32 time 1e-3
3520 * Interface: VirtualEthernet0/0/0 (ifindex 1)
3521 * virtio_net_hdr_sz 12
3522 * features mask (0xffffffffffffffff):
3523 * features (0x50408000):
3524 * VIRTIO_NET_F_MRG_RXBUF (15)
3525 * VIRTIO_NET_F_MQ (22)
3526 * :
3527 * Virtqueue 0 (TX)
3528 * qsz 256 last_avail_idx 0 last_used_idx 0
3529 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
3530 * kickfd 62 callfd 64 errfd -1
3531 *
3532 * descriptor table:
3533 * id addr len flags next user_addr
3534 * ===== ================== ===== ====== ===== ==================
3535 * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974
3536 * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034
3537 * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4
3538 * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4
3539 * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474
3540 * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34
3541 * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4
3542 * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4
3543 * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74
3544 * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634
3545 * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4
3546 * :
3547 * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000
3548 * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000
3549 * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000
3550 * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000
3551 * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000
3552 * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000
3553 * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000
3554 *
3555 * Virtqueue 1 (RX)
3556 * qsz 256 last_avail_idx 0 last_used_idx 0
3557 * :
3558 * @cliexend
3559 * @endparblock
3560?*/
3561/* *INDENT-OFF* */
Damjan Marion8d281b32016-08-24 14:32:39 +02003562VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
3563 .path = "show vhost-user",
Billy McFalla92501a2016-11-23 12:45:29 -05003564 .short_help = "show vhost-user [<interface> [<interface> [..]]] [descriptors]",
Damjan Marion8d281b32016-08-24 14:32:39 +02003565 .function = show_vhost_user_command_fn,
3566};
3567/* *INDENT-ON* */
Damjan Marion8d281b32016-08-24 14:32:39 +02003568
Steven388e51a2017-06-01 12:49:23 -07003569clib_error_t *
3570debug_vhost_user_command_fn (vlib_main_t * vm,
3571 unformat_input_t * input,
3572 vlib_cli_command_t * cmd)
3573{
3574 unformat_input_t _line_input, *line_input = &_line_input;
3575 clib_error_t *error = NULL;
3576 vhost_user_main_t *vum = &vhost_user_main;
Steven2ee2d572017-07-21 16:38:41 -07003577 u8 onoff = 0;
3578 u8 input_found = 0;
Steven388e51a2017-06-01 12:49:23 -07003579
3580 /* Get a line of input. */
3581 if (!unformat_user (input, unformat_line_input, line_input))
Steven2ee2d572017-07-21 16:38:41 -07003582 return clib_error_return (0, "missing argument");
Steven388e51a2017-06-01 12:49:23 -07003583
3584 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
3585 {
Steven2ee2d572017-07-21 16:38:41 -07003586 if (input_found)
3587 {
3588 error = clib_error_return (0, "unknown input `%U'",
3589 format_unformat_error, line_input);
3590 goto done;
3591 }
3592
Steven388e51a2017-06-01 12:49:23 -07003593 if (unformat (line_input, "on"))
Steven2ee2d572017-07-21 16:38:41 -07003594 {
3595 input_found = 1;
3596 onoff = 1;
3597 }
Steven388e51a2017-06-01 12:49:23 -07003598 else if (unformat (line_input, "off"))
Steven2ee2d572017-07-21 16:38:41 -07003599 {
3600 input_found = 1;
3601 onoff = 0;
3602 }
Steven388e51a2017-06-01 12:49:23 -07003603 else
Steven2ee2d572017-07-21 16:38:41 -07003604 {
3605 error = clib_error_return (0, "unknown input `%U'",
3606 format_unformat_error, line_input);
3607 goto done;
3608 }
Steven388e51a2017-06-01 12:49:23 -07003609 }
3610
Steven2ee2d572017-07-21 16:38:41 -07003611 vum->debug = onoff;
3612
3613done:
Steven388e51a2017-06-01 12:49:23 -07003614 unformat_free (line_input);
3615
3616 return error;
3617}
3618
3619/* *INDENT-OFF* */
3620VLIB_CLI_COMMAND (debug_vhost_user_command, static) = {
3621 .path = "debug vhost-user",
3622 .short_help = "debug vhost-user <on | off>",
3623 .function = debug_vhost_user_command_fn,
3624};
3625/* *INDENT-ON* */
3626
Ed Warnickecb9cada2015-12-08 15:45:58 -07003627static clib_error_t *
3628vhost_user_config (vlib_main_t * vm, unformat_input_t * input)
3629{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003630 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003631
3632 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3633 {
3634 if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003635 ;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003636 else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003637 ;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003638 else if (unformat (input, "dont-dump-memory"))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003639 vum->dont_dump_vhost_user_memory = 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003640 else
Damjan Marion00a9dca2016-08-17 17:05:46 +02003641 return clib_error_return (0, "unknown input `%U'",
3642 format_unformat_error, input);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003643 }
3644
3645 return 0;
3646}
3647
3648/* vhost-user { ... } configuration. */
3649VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
3650
3651void
3652vhost_user_unmap_all (void)
3653{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003654 vhost_user_main_t *vum = &vhost_user_main;
3655 vhost_user_intf_t *vui;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003656
3657 if (vum->dont_dump_vhost_user_memory)
3658 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003659 pool_foreach (vui, vum->vhost_user_interfaces,
3660 unmap_all_mem_regions (vui);
3661 );
Ed Warnickecb9cada2015-12-08 15:45:58 -07003662 }
3663}
Damjan Marion00a9dca2016-08-17 17:05:46 +02003664
3665/*
3666 * fd.io coding-style-patch-verification: ON
3667 *
3668 * Local Variables:
3669 * eval: (c-set-style "gnu")
3670 * End:
3671 */