blob: be3405488fe7eb341e71c29bceffb19086b3969b [file] [log] [blame]
Damjan Marion00a9dca2016-08-17 17:05:46 +02001/*
Ed Warnickecb9cada2015-12-08 15:45:58 -07002 *------------------------------------------------------------------
3 * vhost.c - vhost-user
4 *
5 * Copyright (c) 2014 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#include <fcntl.h> /* for open */
21#include <sys/ioctl.h>
22#include <sys/socket.h>
23#include <sys/un.h>
24#include <sys/stat.h>
25#include <sys/types.h>
26#include <sys/uio.h> /* for iovec */
27#include <netinet/in.h>
28#include <sys/vfs.h>
29
30#include <linux/if_arp.h>
31#include <linux/if_tun.h>
32
33#include <vlib/vlib.h>
34#include <vlib/unix/unix.h>
35
36#include <vnet/ip/ip.h>
37
38#include <vnet/ethernet/ethernet.h>
Damjan Marion8bdc63b2016-11-02 14:48:21 +010039#include <vnet/devices/devices.h>
Damjan Marion22311502016-10-28 20:30:15 +020040#include <vnet/feature/feature.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070041
42#include <vnet/devices/virtio/vhost-user.h>
43
Billy McFalla92501a2016-11-23 12:45:29 -050044/**
45 * @file
46 * @brief vHost User Device Driver.
47 *
48 * This file contains the source code for vHost User interface.
49 */
50
51
Pierre Pfister116ea4b2016-11-08 15:49:28 +000052#define VHOST_DEBUG_VQ 0
Ed Warnickecb9cada2015-12-08 15:45:58 -070053
Steven388e51a2017-06-01 12:49:23 -070054#define DBG_SOCK(args...) \
55 { \
56 vhost_user_main_t *_vum = &vhost_user_main; \
57 if (_vum->debug) \
58 clib_warning(args); \
59 };
Ed Warnickecb9cada2015-12-08 15:45:58 -070060
Pierre Pfister116ea4b2016-11-08 15:49:28 +000061#if VHOST_DEBUG_VQ == 1
Ed Warnickecb9cada2015-12-08 15:45:58 -070062#define DBG_VQ(args...) clib_warning(args);
63#else
64#define DBG_VQ(args...)
65#endif
66
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +000067/*
68 * When an RX queue is down but active, received packets
69 * must be discarded. This value controls up to how many
70 * packets will be discarded during each round.
71 */
72#define VHOST_USER_DOWN_DISCARD_COUNT 256
73
74/*
75 * When the number of available buffers gets under this threshold,
76 * RX node will start discarding packets.
77 */
78#define VHOST_USER_RX_BUFFER_STARVATION 32
79
80/*
81 * On the receive side, the host should free descriptors as soon
82 * as possible in order to avoid TX drop in the VM.
83 * This value controls the number of copy operations that are stacked
84 * before copy is done for all and descriptors are given back to
85 * the guest.
86 * The value 64 was obtained by testing (48 and 128 were not as good).
87 */
88#define VHOST_USER_RX_COPY_THRESHOLD 64
Stevend7727532017-06-09 18:49:17 -070089/*
90 * On the transmit side, we keep processing the buffers from vlib in the while
91 * loop and prepare the copy order to be executed later. However, the static
92 * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
93 * entries. In order to not corrupt memory, we have to do the copy when the
94 * static array reaches the copy threshold. We subtract 40 in case the code
95 * goes into the inner loop for a maximum of 64k frames which may require
96 * more array entries.
97 */
98#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 40)
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +000099
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000100#define UNIX_GET_FD(unixfd_idx) \
101 (unixfd_idx != ~0) ? \
Damjan Marion56dd5432017-09-08 19:52:02 +0200102 pool_elt_at_index (file_main.file_pool, \
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000103 unixfd_idx)->file_descriptor : -1;
104
Pierre Pfister116ea4b2016-11-08 15:49:28 +0000105#define foreach_virtio_trace_flags \
106 _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
107 _ (SINGLE_DESC, 1, "Single descriptor packet") \
108 _ (INDIRECT, 2, "Indirect descriptor") \
109 _ (MAP_ERROR, 4, "Memory mapping error")
110
111typedef enum
112{
113#define _(n,i,s) VIRTIO_TRACE_F_##n,
114 foreach_virtio_trace_flags
115#undef _
116} virtio_trace_flag_t;
117
Ed Warnickecb9cada2015-12-08 15:45:58 -0700118vlib_node_registration_t vhost_user_input_node;
119
120#define foreach_vhost_user_tx_func_error \
Pierre Pfister328e99b2016-02-12 13:18:42 +0000121 _(NONE, "no error") \
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000122 _(NOT_READY, "vhost vring not ready") \
123 _(DOWN, "vhost interface is down") \
Ed Warnickecb9cada2015-12-08 15:45:58 -0700124 _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
Pierre Pfisterba1d0462016-07-27 16:38:20 +0100125 _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
126 _(MMAP_FAIL, "mmap failure") \
127 _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
Ed Warnickecb9cada2015-12-08 15:45:58 -0700128
Damjan Marion00a9dca2016-08-17 17:05:46 +0200129typedef enum
130{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700131#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
132 foreach_vhost_user_tx_func_error
133#undef _
Damjan Marion00a9dca2016-08-17 17:05:46 +0200134 VHOST_USER_TX_FUNC_N_ERROR,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700135} vhost_user_tx_func_error_t;
136
Damjan Marion00a9dca2016-08-17 17:05:46 +0200137static char *vhost_user_tx_func_error_strings[] = {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700138#define _(n,s) s,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200139 foreach_vhost_user_tx_func_error
Ed Warnickecb9cada2015-12-08 15:45:58 -0700140#undef _
141};
142
143#define foreach_vhost_user_input_func_error \
144 _(NO_ERROR, "no error") \
Pierre Pfister328e99b2016-02-12 13:18:42 +0000145 _(NO_BUFFER, "no available buffer") \
146 _(MMAP_FAIL, "mmap failure") \
Pierre Pfisterba1d0462016-07-27 16:38:20 +0100147 _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
148 _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
149 _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
Ed Warnickecb9cada2015-12-08 15:45:58 -0700150
Damjan Marion00a9dca2016-08-17 17:05:46 +0200151typedef enum
152{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700153#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
154 foreach_vhost_user_input_func_error
155#undef _
Damjan Marion00a9dca2016-08-17 17:05:46 +0200156 VHOST_USER_INPUT_FUNC_N_ERROR,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700157} vhost_user_input_func_error_t;
158
Damjan Marion00a9dca2016-08-17 17:05:46 +0200159static char *vhost_user_input_func_error_strings[] = {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700160#define _(n,s) s,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200161 foreach_vhost_user_input_func_error
Ed Warnickecb9cada2015-12-08 15:45:58 -0700162#undef _
163};
164
Damjan Marion00a9dca2016-08-17 17:05:46 +0200165/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700166static vhost_user_main_t vhost_user_main = {
167 .mtu_bytes = 1518,
168};
169
170VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
171 .name = "vhost-user",
172};
Damjan Marion00a9dca2016-08-17 17:05:46 +0200173/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700174
Damjan Marion00a9dca2016-08-17 17:05:46 +0200175static u8 *
176format_vhost_user_interface_name (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700177{
178 u32 i = va_arg (*args, u32);
179 u32 show_dev_instance = ~0;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200180 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700181
182 if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
183 show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
184
185 if (show_dev_instance != ~0)
186 i = show_dev_instance;
187
188 s = format (s, "VirtualEthernet0/0/%d", i);
189 return s;
190}
191
Damjan Marion00a9dca2016-08-17 17:05:46 +0200192static int
193vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700194{
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000195 // FIXME: check if the new dev instance is already used
Damjan Marion00a9dca2016-08-17 17:05:46 +0200196 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700197 vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200198 hi->dev_instance, ~0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700199
Damjan Marion00a9dca2016-08-17 17:05:46 +0200200 vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
Ed Warnickecb9cada2015-12-08 15:45:58 -0700201 new_dev_instance;
202
Damjan Marion00a9dca2016-08-17 17:05:46 +0200203 DBG_SOCK ("renumbered vhost-user interface dev_instance %d to %d",
204 hi->dev_instance, new_dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700205
206 return 0;
207}
208
Pierre Pfister11f92052016-09-21 08:08:55 +0100209static_always_inline void *
210map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700211{
Pierre Pfister11f92052016-09-21 08:08:55 +0100212 int i = *hint;
213 if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
214 ((vui->regions[i].guest_phys_addr +
215 vui->regions[i].memory_size) > addr)))
216 {
217 return (void *) (vui->region_mmap_addr[i] + addr -
218 vui->regions[i].guest_phys_addr);
219 }
Damjan Marion37623702016-09-20 11:25:27 +0200220#if __SSE4_2__
221 __m128i rl, rh, al, ah, r;
222 al = _mm_set1_epi64x (addr + 1);
223 ah = _mm_set1_epi64x (addr);
224
225 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
226 rl = _mm_cmpgt_epi64 (al, rl);
227 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
228 rh = _mm_cmpgt_epi64 (rh, ah);
229 r = _mm_and_si128 (rl, rh);
230
231 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
232 rl = _mm_cmpgt_epi64 (al, rl);
233 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
234 rh = _mm_cmpgt_epi64 (rh, ah);
235 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
236
237 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
238 rl = _mm_cmpgt_epi64 (al, rl);
239 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
240 rh = _mm_cmpgt_epi64 (rh, ah);
241 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
242
243 rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
244 rl = _mm_cmpgt_epi64 (al, rl);
245 rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
246 rh = _mm_cmpgt_epi64 (rh, ah);
247 r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
248
249 r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
Damjan Marion0b49e2b2017-02-09 21:49:06 +0100250 i = __builtin_ctzll (_mm_movemask_epi8 (r) |
251 (1 << VHOST_MEMORY_MAX_NREGIONS));
Damjan Marion37623702016-09-20 11:25:27 +0200252
253 if (i < vui->nregions)
254 {
Pierre Pfister11f92052016-09-21 08:08:55 +0100255 *hint = i;
Damjan Marion37623702016-09-20 11:25:27 +0200256 return (void *) (vui->region_mmap_addr[i] + addr -
257 vui->regions[i].guest_phys_addr);
258 }
Nitin Saxenad3cb7ba2018-02-07 11:32:00 +0000259#elif __aarch64__ && __ARM_NEON
260 uint64x2_t al, ah, rl, rh, r;
261 uint32_t u32 = 0;
Damjan Marion37623702016-09-20 11:25:27 +0200262
Nitin Saxenad3cb7ba2018-02-07 11:32:00 +0000263 al = vdupq_n_u64 (addr + 1);
264 ah = vdupq_n_u64 (addr);
265
266 /*First Iteration */
267 rl = vld1q_u64 (&vui->region_guest_addr_lo[0]);
268 rl = vcgtq_u64 (al, rl);
269 rh = vld1q_u64 (&vui->region_guest_addr_hi[0]);
270 rh = vcgtq_u64 (rh, ah);
271 r = vandq_u64 (rl, rh);
272 u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1);
273 u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1);
274
275 if (u32)
276 {
277 i = __builtin_ctzll (u32);
278 goto vhost_map_guest_mem_done;
279 }
280
281 /*Second Iteration */
282 rl = vld1q_u64 (&vui->region_guest_addr_lo[2]);
283 rl = vcgtq_u64 (al, rl);
284 rh = vld1q_u64 (&vui->region_guest_addr_hi[2]);
285 rh = vcgtq_u64 (rh, ah);
286 r = vandq_u64 (rl, rh);
287 u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2);
288 u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3);
289
290 if (u32)
291 {
292 i = __builtin_ctzll (u32);
293 goto vhost_map_guest_mem_done;
294 }
295
296 /*Third Iteration */
297 rl = vld1q_u64 (&vui->region_guest_addr_lo[4]);
298 rl = vcgtq_u64 (al, rl);
299 rh = vld1q_u64 (&vui->region_guest_addr_hi[4]);
300 rh = vcgtq_u64 (rh, ah);
301 r = vandq_u64 (rl, rh);
302 u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 4);
303 u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 5);
304
305 if (u32)
306 {
307 i = __builtin_ctzll (u32);
308 goto vhost_map_guest_mem_done;
309 }
310
311 /*Fourth Iteration */
312 rl = vld1q_u64 (&vui->region_guest_addr_lo[6]);
313 rl = vcgtq_u64 (al, rl);
314 rh = vld1q_u64 (&vui->region_guest_addr_hi[6]);
315 rh = vcgtq_u64 (rh, ah);
316 r = vandq_u64 (rl, rh);
317 u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6);
318 u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7);
319
320 i = __builtin_ctzll (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS));
321
322vhost_map_guest_mem_done:
323 if (i < vui->nregions)
324 {
325 *hint = i;
326 return (void *) (vui->region_mmap_addr[i] + addr -
327 vui->regions[i].guest_phys_addr);
328 }
Damjan Marion37623702016-09-20 11:25:27 +0200329#else
Damjan Marion00a9dca2016-08-17 17:05:46 +0200330 for (i = 0; i < vui->nregions; i++)
331 {
332 if ((vui->regions[i].guest_phys_addr <= addr) &&
333 ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
334 addr))
335 {
Pierre Pfister11f92052016-09-21 08:08:55 +0100336 *hint = i;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200337 return (void *) (vui->region_mmap_addr[i] + addr -
338 vui->regions[i].guest_phys_addr);
339 }
340 }
Damjan Marion37623702016-09-20 11:25:27 +0200341#endif
Damjan Marion00a9dca2016-08-17 17:05:46 +0200342 DBG_VQ ("failed to map guest mem addr %llx", addr);
Pierre Pfister11f92052016-09-21 08:08:55 +0100343 *hint = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700344 return 0;
345}
346
Damjan Marion00a9dca2016-08-17 17:05:46 +0200347static inline void *
348map_user_mem (vhost_user_intf_t * vui, uword addr)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700349{
350 int i;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200351 for (i = 0; i < vui->nregions; i++)
352 {
353 if ((vui->regions[i].userspace_addr <= addr) &&
354 ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
355 addr))
356 {
357 return (void *) (vui->region_mmap_addr[i] + addr -
358 vui->regions[i].userspace_addr);
359 }
360 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700361 return 0;
362}
363
Damjan Marion00a9dca2016-08-17 17:05:46 +0200364static long
365get_huge_page_size (int fd)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700366{
367 struct statfs s;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200368 fstatfs (fd, &s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700369 return s.f_bsize;
370}
371
Damjan Marion00a9dca2016-08-17 17:05:46 +0200372static void
373unmap_all_mem_regions (vhost_user_intf_t * vui)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700374{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200375 int i, r;
376 for (i = 0; i < vui->nregions; i++)
377 {
Haiyang Tan7b0933a2018-01-20 04:48:53 -0500378 if (vui->region_mmap_addr[i] != MAP_FAILED)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200379 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700380
Damjan Marion00a9dca2016-08-17 17:05:46 +0200381 long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700382
Damjan Marion00a9dca2016-08-17 17:05:46 +0200383 ssize_t map_sz = (vui->regions[i].memory_size +
384 vui->regions[i].mmap_offset +
Pierre Pfisterbed54892017-04-20 15:34:00 +0200385 page_sz - 1) & ~(page_sz - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700386
Damjan Marion00a9dca2016-08-17 17:05:46 +0200387 r =
388 munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
389 map_sz);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700390
Damjan Marion00a9dca2016-08-17 17:05:46 +0200391 DBG_SOCK
392 ("unmap memory region %d addr 0x%lx len 0x%lx page_sz 0x%x", i,
393 vui->region_mmap_addr[i], map_sz, page_sz);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700394
Haiyang Tan7b0933a2018-01-20 04:48:53 -0500395 vui->region_mmap_addr[i] = MAP_FAILED;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700396
Damjan Marion00a9dca2016-08-17 17:05:46 +0200397 if (r == -1)
398 {
399 clib_warning ("failed to unmap memory region (errno %d)",
400 errno);
401 }
402 close (vui->region_mmap_fd[i]);
403 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700404 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700405 vui->nregions = 0;
406}
407
Pierre Pfistere21c5282016-09-21 08:04:59 +0100408static void
409vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
410{
411 //Let's try to assign one queue to each thread
412 u32 qid = 0;
Damjan Marion586afd72017-04-05 19:18:20 +0200413 u32 thread_index = 0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100414 vui->use_tx_spinlock = 0;
415 while (1)
416 {
417 for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
418 {
419 vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
420 if (!rxvq->started || !rxvq->enabled)
421 continue;
422
Damjan Marion586afd72017-04-05 19:18:20 +0200423 vui->per_cpu_tx_qid[thread_index] = qid;
424 thread_index++;
425 if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100426 return;
427 }
428 //We need to loop, meaning the spinlock has to be used
429 vui->use_tx_spinlock = 1;
Damjan Marion586afd72017-04-05 19:18:20 +0200430 if (thread_index == 0)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100431 {
432 //Could not find a single valid one
Damjan Marion586afd72017-04-05 19:18:20 +0200433 for (thread_index = 0;
434 thread_index < vlib_get_thread_main ()->n_vlib_mains;
435 thread_index++)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100436 {
Damjan Marion586afd72017-04-05 19:18:20 +0200437 vui->per_cpu_tx_qid[thread_index] = 0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100438 }
439 return;
440 }
441 }
442}
443
Stevenf3b53642017-05-01 14:03:02 -0700444/**
445 * @brief Unassign existing interface/queue to thread mappings and re-assign
446 * new interface/queue to thread mappings
447 */
Pierre Pfistere21c5282016-09-21 08:04:59 +0100448static void
449vhost_user_rx_thread_placement ()
450{
451 vhost_user_main_t *vum = &vhost_user_main;
452 vhost_user_intf_t *vui;
Stevenf3b53642017-05-01 14:03:02 -0700453 vhost_user_vring_t *txvq;
454 vnet_main_t *vnm = vnet_get_main ();
455 u32 qid;
456 int rv;
457 u16 *queue;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100458
Stevenf3b53642017-05-01 14:03:02 -0700459 // Scrap all existing mappings for all interfaces/queues
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000460 /* *INDENT-OFF* */
461 pool_foreach (vui, vum->vhost_user_interfaces, {
Stevenf3b53642017-05-01 14:03:02 -0700462 vec_foreach (queue, vui->rx_queues)
463 {
464 rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index,
465 *queue);
466 if (rv)
467 clib_warning ("Warning: unable to unassign interface %d, "
468 "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
469 }
470 vec_reset_length (vui->rx_queues);
471 });
472 /* *INDENT-ON* */
473
474 // Create the rx_queues for all interfaces
475 /* *INDENT-OFF* */
476 pool_foreach (vui, vum->vhost_user_interfaces, {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000477 for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
478 {
Stevenf3b53642017-05-01 14:03:02 -0700479 txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
480 if (txvq->started)
481 {
482 if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN)
483 /* Set polling as the default */
484 txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
485 vec_add1 (vui->rx_queues, qid);
486 }
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000487 }
488 });
489 /* *INDENT-ON* */
Steven7312cc72017-03-15 21:18:55 -0700490
Stevenf3b53642017-05-01 14:03:02 -0700491 // Assign new mappings for all interfaces/queues
492 /* *INDENT-OFF* */
493 pool_foreach (vui, vum->vhost_user_interfaces, {
494 vnet_hw_interface_set_input_node (vnm, vui->hw_if_index,
495 vhost_user_input_node.index);
496 vec_foreach (queue, vui->rx_queues)
Steven7312cc72017-03-15 21:18:55 -0700497 {
Stevenf3b53642017-05-01 14:03:02 -0700498 vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue,
499 ~0);
500 txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
501 rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue,
502 txvq->mode);
503 if (rv)
504 clib_warning ("Warning: unable to set rx mode for interface %d, "
505 "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
Steven7312cc72017-03-15 21:18:55 -0700506 }
Stevenf3b53642017-05-01 14:03:02 -0700507 });
508 /* *INDENT-ON* */
Pierre Pfistere21c5282016-09-21 08:04:59 +0100509}
510
511/** @brief Returns whether at least one TX and one RX vring are enabled */
512int
513vhost_user_intf_ready (vhost_user_intf_t * vui)
514{
515 int i, found[2] = { }; //RX + TX
516
517 for (i = 0; i < VHOST_VRING_MAX_N; i++)
518 if (vui->vrings[i].started && vui->vrings[i].enabled)
519 found[i & 1] = 1;
520
521 return found[0] && found[1];
522}
523
524static void
525vhost_user_update_iface_state (vhost_user_intf_t * vui)
526{
527 /* if we have pointers to descriptor table, go up */
528 int is_up = vhost_user_intf_ready (vui);
529 if (is_up != vui->is_up)
530 {
531 DBG_SOCK ("interface %d %s", vui->sw_if_index,
532 is_up ? "ready" : "down");
533 vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index,
534 is_up ? VNET_HW_INTERFACE_FLAG_LINK_UP :
535 0);
536 vui->is_up = is_up;
537 }
538 vhost_user_rx_thread_placement ();
539 vhost_user_tx_thread_placement (vui);
540}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700541
Steven7312cc72017-03-15 21:18:55 -0700542static void
543vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
544{
Stevenf3b53642017-05-01 14:03:02 -0700545 u32 qid;
546 vnet_main_t *vnm = vnet_get_main ();
Stevene4dcba82017-04-04 16:56:54 -0700547
548 qid = ifq & 0xff;
Stevenf3b53642017-05-01 14:03:02 -0700549 if ((qid & 1) == 0)
550 /* Only care about the odd number, or TX, virtqueue */
Stevene4dcba82017-04-04 16:56:54 -0700551 return;
Steven7312cc72017-03-15 21:18:55 -0700552
553 if (vhost_user_intf_ready (vui))
Stevenf3b53642017-05-01 14:03:02 -0700554 // qid >> 1 is to convert virtqueue number to vring queue index
555 vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1);
Steven7312cc72017-03-15 21:18:55 -0700556}
557
Damjan Marion00a9dca2016-08-17 17:05:46 +0200558static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200559vhost_user_callfd_read_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700560{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200561 __attribute__ ((unused)) int n;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700562 u8 buff[8];
Steven7312cc72017-03-15 21:18:55 -0700563
Damjan Marion00a9dca2016-08-17 17:05:46 +0200564 n = read (uf->file_descriptor, ((char *) &buff), 8);
Steven7312cc72017-03-15 21:18:55 -0700565
Ed Warnickecb9cada2015-12-08 15:45:58 -0700566 return 0;
567}
568
Pierre Pfistere21c5282016-09-21 08:04:59 +0100569static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200570vhost_user_kickfd_read_ready (clib_file_t * uf)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100571{
572 __attribute__ ((unused)) int n;
573 u8 buff[8];
574 vhost_user_intf_t *vui =
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000575 pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
576 uf->private_data >> 8);
Pierre Pfistere21c5282016-09-21 08:04:59 +0100577 u32 qid = uf->private_data & 0xff;
Steven7312cc72017-03-15 21:18:55 -0700578
Pierre Pfistere21c5282016-09-21 08:04:59 +0100579 n = read (uf->file_descriptor, ((char *) &buff), 8);
580 DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid);
Steven7312cc72017-03-15 21:18:55 -0700581 if (!vui->vrings[qid].started ||
582 (vhost_user_intf_ready (vui) != vui->is_up))
583 {
Stevene4dcba82017-04-04 16:56:54 -0700584 vlib_worker_thread_barrier_sync (vlib_get_main ());
Steven7312cc72017-03-15 21:18:55 -0700585 vui->vrings[qid].started = 1;
586 vhost_user_update_iface_state (vui);
Stevene4dcba82017-04-04 16:56:54 -0700587 vlib_worker_thread_barrier_release (vlib_get_main ());
Steven7312cc72017-03-15 21:18:55 -0700588 }
Steven7312cc72017-03-15 21:18:55 -0700589
590 vhost_user_set_interrupt_pending (vui, uf->private_data);
Pierre Pfistere21c5282016-09-21 08:04:59 +0100591 return 0;
592}
593
594/**
595 * @brief Try once to lock the vring
596 * @return 0 on success, non-zero on failure.
597 */
598static inline int
599vhost_user_vring_try_lock (vhost_user_intf_t * vui, u32 qid)
600{
601 return __sync_lock_test_and_set (vui->vring_locks[qid], 1);
602}
603
604/**
605 * @brief Spin until the vring is successfully locked
606 */
607static inline void
608vhost_user_vring_lock (vhost_user_intf_t * vui, u32 qid)
609{
610 while (vhost_user_vring_try_lock (vui, qid))
611 ;
612}
613
614/**
615 * @brief Unlock the vring lock
616 */
617static inline void
618vhost_user_vring_unlock (vhost_user_intf_t * vui, u32 qid)
619{
620 *vui->vring_locks[qid] = 0;
621}
622
623static inline void
624vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
625{
626 vhost_user_vring_t *vring = &vui->vrings[qid];
627 memset (vring, 0, sizeof (*vring));
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000628 vring->kickfd_idx = ~0;
629 vring->callfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100630 vring->errfd = -1;
631
632 /*
633 * We have a bug with some qemu 2.5, and this may be a fix.
634 * Feel like interpretation holy text, but this is from vhost-user.txt.
635 * "
636 * One queue pair is enabled initially. More queues are enabled
637 * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE.
638 * "
639 * Don't know who's right, but this is what DPDK does.
640 */
641 if (qid == 0 || qid == 1)
642 vring->enabled = 1;
643}
644
645static inline void
646vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
647{
648 vhost_user_vring_t *vring = &vui->vrings[qid];
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000649 if (vring->kickfd_idx != ~0)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100650 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200651 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfistere21c5282016-09-21 08:04:59 +0100652 vring->kickfd_idx);
Damjan Marion56dd5432017-09-08 19:52:02 +0200653 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000654 vring->kickfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100655 }
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000656 if (vring->callfd_idx != ~0)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100657 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200658 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfistere21c5282016-09-21 08:04:59 +0100659 vring->callfd_idx);
Damjan Marion56dd5432017-09-08 19:52:02 +0200660 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000661 vring->callfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100662 }
663 if (vring->errfd != -1)
Stevenf6dae052017-03-09 23:49:32 -0800664 {
665 close (vring->errfd);
666 vring->errfd = -1;
667 }
Pierre Pfistere21c5282016-09-21 08:04:59 +0100668 vhost_user_vring_init (vui, qid);
669}
670
Damjan Marion00a9dca2016-08-17 17:05:46 +0200671static inline void
672vhost_user_if_disconnect (vhost_user_intf_t * vui)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700673{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200674 vnet_main_t *vnm = vnet_get_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700675 int q;
676
Damjan Marion00a9dca2016-08-17 17:05:46 +0200677 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700678
Damjan Marion56dd5432017-09-08 19:52:02 +0200679 if (vui->clib_file_index != ~0)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200680 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200681 clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
682 vui->clib_file_index = ~0;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200683 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700684
Ed Warnickecb9cada2015-12-08 15:45:58 -0700685 vui->is_up = 0;
Steve Shin44489572016-09-22 12:08:55 -0700686
Pierre Pfistere21c5282016-09-21 08:04:59 +0100687 for (q = 0; q < VHOST_VRING_MAX_N; q++)
688 vhost_user_vring_close (vui, q);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700689
Damjan Marion00a9dca2016-08-17 17:05:46 +0200690 unmap_all_mem_regions (vui);
691 DBG_SOCK ("interface ifindex %d disconnected", vui->sw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700692}
693
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100694#define VHOST_LOG_PAGE 0x1000
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000695static_always_inline void
696vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
697 u64 addr, u64 len, u8 is_host_address)
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100698{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200699 if (PREDICT_TRUE (vui->log_base_addr == 0
700 || !(vui->features & (1 << FEAT_VHOST_F_LOG_ALL))))
701 {
702 return;
703 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000704 if (is_host_address)
705 {
Damjan Marion7bee80c2017-04-26 15:32:12 +0200706 addr = pointer_to_uword (map_user_mem (vui, (uword) addr));
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000707 }
Damjan Marion00a9dca2016-08-17 17:05:46 +0200708 if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
709 {
710 DBG_SOCK ("vhost_user_log_dirty_pages(): out of range\n");
711 return;
712 }
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100713
Damjan Marion00a9dca2016-08-17 17:05:46 +0200714 CLIB_MEMORY_BARRIER ();
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100715 u64 page = addr / VHOST_LOG_PAGE;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200716 while (page * VHOST_LOG_PAGE < addr + len)
717 {
718 ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
719 page++;
720 }
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100721}
722
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000723static_always_inline void
724vhost_user_log_dirty_pages (vhost_user_intf_t * vui, u64 addr, u64 len)
725{
726 vhost_user_log_dirty_pages_2 (vui, addr, len, 0);
727}
728
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100729#define vhost_user_log_dirty_ring(vui, vq, member) \
Yoann Desmouceauxfe2da0e2016-03-08 14:54:28 +0100730 if (PREDICT_FALSE(vq->log_used)) { \
Damjan Marion8d281b32016-08-24 14:32:39 +0200731 vhost_user_log_dirty_pages(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \
Yoann Desmouceauxfe2da0e2016-03-08 14:54:28 +0100732 sizeof(vq->used->member)); \
733 }
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100734
Damjan Marion00a9dca2016-08-17 17:05:46 +0200735static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200736vhost_user_socket_read (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700737{
738 int n, i;
739 int fd, number_of_fds = 0;
740 int fds[VHOST_MEMORY_MAX_NREGIONS];
741 vhost_user_msg_t msg;
742 struct msghdr mh;
743 struct iovec iov[1];
Damjan Marion00a9dca2016-08-17 17:05:46 +0200744 vhost_user_main_t *vum = &vhost_user_main;
745 vhost_user_intf_t *vui;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700746 struct cmsghdr *cmsg;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700747 u8 q;
Damjan Marion56dd5432017-09-08 19:52:02 +0200748 clib_file_t template = { 0 };
Damjan Marion00a9dca2016-08-17 17:05:46 +0200749 vnet_main_t *vnm = vnet_get_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700750
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000751 vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700752
Damjan Marion00a9dca2016-08-17 17:05:46 +0200753 char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700754
Damjan Marion00a9dca2016-08-17 17:05:46 +0200755 memset (&mh, 0, sizeof (mh));
756 memset (control, 0, sizeof (control));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700757
Damjan Marion00a9dca2016-08-17 17:05:46 +0200758 for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
Damjan Mariona290d7c2016-08-16 12:37:24 +0200759 fds[i] = -1;
760
Ed Warnickecb9cada2015-12-08 15:45:58 -0700761 /* set the payload */
762 iov[0].iov_base = (void *) &msg;
763 iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
764
765 mh.msg_iov = iov;
766 mh.msg_iovlen = 1;
767 mh.msg_control = control;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200768 mh.msg_controllen = sizeof (control);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700769
Damjan Marion00a9dca2016-08-17 17:05:46 +0200770 n = recvmsg (uf->file_descriptor, &mh, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700771
Pierre Pfistere21c5282016-09-21 08:04:59 +0100772 /* Stop workers to avoid end of the world */
773 vlib_worker_thread_barrier_sync (vlib_get_main ());
774
Ed Warnickecb9cada2015-12-08 15:45:58 -0700775 if (n != VHOST_USER_MSG_HDR_SZ)
Pierre Pfistere21c5282016-09-21 08:04:59 +0100776 {
777 if (n == -1)
778 {
779 DBG_SOCK ("recvmsg returned error %d %s", errno, strerror (errno));
780 }
781 else
782 {
783 DBG_SOCK ("n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
784 n, VHOST_USER_MSG_HDR_SZ);
785 }
786 goto close_socket;
787 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700788
Damjan Marion00a9dca2016-08-17 17:05:46 +0200789 if (mh.msg_flags & MSG_CTRUNC)
790 {
Pierre Pfistere21c5282016-09-21 08:04:59 +0100791 DBG_SOCK ("MSG_CTRUNC is set");
Damjan Marion00a9dca2016-08-17 17:05:46 +0200792 goto close_socket;
793 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700794
Damjan Marion00a9dca2016-08-17 17:05:46 +0200795 cmsg = CMSG_FIRSTHDR (&mh);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700796
797 if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
798 (cmsg->cmsg_type == SCM_RIGHTS) &&
Damjan Marion00a9dca2016-08-17 17:05:46 +0200799 (cmsg->cmsg_len - CMSG_LEN (0) <=
800 VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
801 {
802 number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
803 clib_memcpy (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
804 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700805
Damjan Marion00a9dca2016-08-17 17:05:46 +0200806 /* version 1, no reply bit set */
807 if ((msg.flags & 7) != 1)
808 {
809 DBG_SOCK ("malformed message received. closing socket");
810 goto close_socket;
811 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700812
813 {
Pierre Pfistere21c5282016-09-21 08:04:59 +0100814 int rv;
815 rv =
816 read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ,
817 msg.size);
818 if (rv < 0)
819 {
820 DBG_SOCK ("read failed %s", strerror (errno));
821 goto close_socket;
822 }
823 else if (rv != msg.size)
824 {
825 DBG_SOCK ("message too short (read %dB should be %dB)", rv, msg.size);
826 goto close_socket;
827 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700828 }
829
Damjan Marion00a9dca2016-08-17 17:05:46 +0200830 switch (msg.request)
831 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700832 case VHOST_USER_GET_FEATURES:
Ed Warnickecb9cada2015-12-08 15:45:58 -0700833 msg.flags |= 4;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100834 msg.u64 = (1ULL << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
835 (1ULL << FEAT_VIRTIO_NET_F_CTRL_VQ) |
836 (1ULL << FEAT_VIRTIO_F_ANY_LAYOUT) |
837 (1ULL << FEAT_VIRTIO_F_INDIRECT_DESC) |
838 (1ULL << FEAT_VHOST_F_LOG_ALL) |
839 (1ULL << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
840 (1ULL << FEAT_VIRTIO_NET_F_MQ) |
841 (1ULL << FEAT_VHOST_USER_F_PROTOCOL_FEATURES) |
842 (1ULL << FEAT_VIRTIO_F_VERSION_1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700843 msg.u64 &= vui->feature_mask;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200844 msg.size = sizeof (msg.u64);
Pierre Pfistere21c5282016-09-21 08:04:59 +0100845 DBG_SOCK ("if %d msg VHOST_USER_GET_FEATURES - reply 0x%016llx",
846 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700847 break;
848
849 case VHOST_USER_SET_FEATURES:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200850 DBG_SOCK ("if %d msg VHOST_USER_SET_FEATURES features 0x%016llx",
851 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700852
853 vui->features = msg.u64;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100854
Pierre Pfistere21c5282016-09-21 08:04:59 +0100855 if (vui->features &
856 ((1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
857 (1ULL << FEAT_VIRTIO_F_VERSION_1)))
Damjan Marion00a9dca2016-08-17 17:05:46 +0200858 vui->virtio_net_hdr_sz = 12;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700859 else
Damjan Marion00a9dca2016-08-17 17:05:46 +0200860 vui->virtio_net_hdr_sz = 10;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700861
Damjan Marion00a9dca2016-08-17 17:05:46 +0200862 vui->is_any_layout =
863 (vui->features & (1 << FEAT_VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700864
865 ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200866 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700867 vui->is_up = 0;
868
Pierre Pfistere21c5282016-09-21 08:04:59 +0100869 /*for (q = 0; q < VHOST_VRING_MAX_N; q++)
870 vhost_user_vring_close(&vui->vrings[q]); */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700871
872 break;
873
874 case VHOST_USER_SET_MEM_TABLE:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200875 DBG_SOCK ("if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
876 vui->hw_if_index, msg.memory.nregions);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700877
878 if ((msg.memory.nregions < 1) ||
Damjan Marion00a9dca2016-08-17 17:05:46 +0200879 (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
880 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700881
Damjan Marion00a9dca2016-08-17 17:05:46 +0200882 DBG_SOCK ("number of mem regions must be between 1 and %i",
883 VHOST_MEMORY_MAX_NREGIONS);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700884
Damjan Marion00a9dca2016-08-17 17:05:46 +0200885 goto close_socket;
886 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700887
Damjan Marion00a9dca2016-08-17 17:05:46 +0200888 if (msg.memory.nregions != number_of_fds)
889 {
890 DBG_SOCK ("each memory region must have FD");
891 goto close_socket;
892 }
893 unmap_all_mem_regions (vui);
894 for (i = 0; i < msg.memory.nregions; i++)
895 {
896 clib_memcpy (&(vui->regions[i]), &msg.memory.regions[i],
897 sizeof (vhost_user_memory_region_t));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700898
Damjan Marion00a9dca2016-08-17 17:05:46 +0200899 long page_sz = get_huge_page_size (fds[i]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700900
Haiyang Tan7b0933a2018-01-20 04:48:53 -0500901 /* align size to page */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200902 ssize_t map_sz = (vui->regions[i].memory_size +
903 vui->regions[i].mmap_offset +
Pierre Pfisterbed54892017-04-20 15:34:00 +0200904 page_sz - 1) & ~(page_sz - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700905
Damjan Marion00a9dca2016-08-17 17:05:46 +0200906 vui->region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
907 MAP_SHARED, fds[i], 0);
Damjan Marion37623702016-09-20 11:25:27 +0200908 vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
909 vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
910 vui->regions[i].memory_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700911
Damjan Marion00a9dca2016-08-17 17:05:46 +0200912 DBG_SOCK
913 ("map memory region %d addr 0 len 0x%lx fd %d mapped 0x%lx "
914 "page_sz 0x%x", i, map_sz, fds[i], vui->region_mmap_addr[i],
915 page_sz);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700916
Damjan Marion00a9dca2016-08-17 17:05:46 +0200917 if (vui->region_mmap_addr[i] == MAP_FAILED)
918 {
919 clib_warning ("failed to map memory. errno is %d", errno);
920 goto close_socket;
921 }
922 vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
923 vui->region_mmap_fd[i] = fds[i];
Haiyang Tan352ecd92018-01-20 04:01:28 -0500924
925 vui->nregions++;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200926 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700927 break;
928
929 case VHOST_USER_SET_VRING_NUM:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200930 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
931 vui->hw_if_index, msg.state.index, msg.state.num);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700932
Damjan Marion00a9dca2016-08-17 17:05:46 +0200933 if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
934 (msg.state.num == 0) || /* it cannot be zero */
Pierre Pfistere21c5282016-09-21 08:04:59 +0100935 ((msg.state.num - 1) & msg.state.num)) /* must be power of 2 */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200936 goto close_socket;
Steven97878892017-08-29 09:23:26 -0700937 vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700938 break;
939
940 case VHOST_USER_SET_VRING_ADDR:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200941 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
942 vui->hw_if_index, msg.state.index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700943
Pierre Pfistere21c5282016-09-21 08:04:59 +0100944 if (msg.state.index >= VHOST_VRING_MAX_N)
945 {
946 DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ADDR:"
947 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
948 goto close_socket;
949 }
950
951 if (msg.size < sizeof (msg.addr))
952 {
953 DBG_SOCK ("vhost message is too short (%d < %d)",
954 msg.size, sizeof (msg.addr));
955 goto close_socket;
956 }
957
Damjan Marion00a9dca2016-08-17 17:05:46 +0200958 vui->vrings[msg.state.index].desc = (vring_desc_t *)
959 map_user_mem (vui, msg.addr.desc_user_addr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700960 vui->vrings[msg.state.index].used = (vring_used_t *)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200961 map_user_mem (vui, msg.addr.used_user_addr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700962 vui->vrings[msg.state.index].avail = (vring_avail_t *)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200963 map_user_mem (vui, msg.addr.avail_user_addr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700964
965 if ((vui->vrings[msg.state.index].desc == NULL) ||
Damjan Marion00a9dca2016-08-17 17:05:46 +0200966 (vui->vrings[msg.state.index].used == NULL) ||
967 (vui->vrings[msg.state.index].avail == NULL))
968 {
969 DBG_SOCK ("failed to map user memory for hw_if_index %d",
970 vui->hw_if_index);
971 goto close_socket;
972 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700973
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100974 vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
Yoann Desmouceauxfe2da0e2016-03-08 14:54:28 +0100975 vui->vrings[msg.state.index].log_used =
Damjan Marion00a9dca2016-08-17 17:05:46 +0200976 (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100977
978 /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200979 the ring is initialized in an enabled state. */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200980 if (!(vui->features & (1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES)))
981 {
982 vui->vrings[msg.state.index].enabled = 1;
983 }
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100984
Ed Warnickecb9cada2015-12-08 15:45:58 -0700985 vui->vrings[msg.state.index].last_used_idx =
Damjan Marion10eb1ea2016-10-13 10:02:19 +0200986 vui->vrings[msg.state.index].last_avail_idx =
Damjan Marion00a9dca2016-08-17 17:05:46 +0200987 vui->vrings[msg.state.index].used->idx;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700988
Stevenf3b53642017-05-01 14:03:02 -0700989 /* tell driver that we don't want interrupts */
990 vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700991 break;
992
993 case VHOST_USER_SET_OWNER:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200994 DBG_SOCK ("if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700995 break;
996
997 case VHOST_USER_RESET_OWNER:
Damjan Marion00a9dca2016-08-17 17:05:46 +0200998 DBG_SOCK ("if %d msg VHOST_USER_RESET_OWNER", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700999 break;
1000
1001 case VHOST_USER_SET_VRING_CALL:
Steven388e51a2017-06-01 12:49:23 -07001002 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_CALL %d",
Damjan Marion00a9dca2016-08-17 17:05:46 +02001003 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001004
1005 q = (u8) (msg.u64 & 0xFF);
1006
Pierre Pfistere21c5282016-09-21 08:04:59 +01001007 /* if there is old fd, delete and close it */
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001008 if (vui->vrings[q].callfd_idx != ~0)
Pierre Pfistere21c5282016-09-21 08:04:59 +01001009 {
Damjan Marion56dd5432017-09-08 19:52:02 +02001010 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfistere21c5282016-09-21 08:04:59 +01001011 vui->vrings[q].callfd_idx);
Damjan Marion56dd5432017-09-08 19:52:02 +02001012 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001013 vui->vrings[q].callfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +01001014 }
1015
Steven49a04b92017-07-29 08:56:08 -07001016 if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001017 {
1018 if (number_of_fds != 1)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001019 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01001020 DBG_SOCK ("More than one fd received !");
1021 goto close_socket;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001022 }
Pierre Pfistere21c5282016-09-21 08:04:59 +01001023
Damjan Marion00a9dca2016-08-17 17:05:46 +02001024 template.read_function = vhost_user_callfd_read_ready;
1025 template.file_descriptor = fds[0];
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001026 template.private_data =
1027 ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
Damjan Marion56dd5432017-09-08 19:52:02 +02001028 vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001029 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001030 else
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001031 vui->vrings[q].callfd_idx = ~0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001032 break;
1033
1034 case VHOST_USER_SET_VRING_KICK:
Steven388e51a2017-06-01 12:49:23 -07001035 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_KICK %d",
Damjan Marion00a9dca2016-08-17 17:05:46 +02001036 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001037
1038 q = (u8) (msg.u64 & 0xFF);
1039
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001040 if (vui->vrings[q].kickfd_idx != ~0)
Pierre Pfistere21c5282016-09-21 08:04:59 +01001041 {
Damjan Marion56dd5432017-09-08 19:52:02 +02001042 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001043 vui->vrings[q].kickfd_idx);
Damjan Marion56dd5432017-09-08 19:52:02 +02001044 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001045 vui->vrings[q].kickfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +01001046 }
1047
Steven49a04b92017-07-29 08:56:08 -07001048 if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001049 {
1050 if (number_of_fds != 1)
Pierre Pfistere21c5282016-09-21 08:04:59 +01001051 {
1052 DBG_SOCK ("More than one fd received !");
1053 goto close_socket;
1054 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001055
Pierre Pfistere21c5282016-09-21 08:04:59 +01001056 template.read_function = vhost_user_kickfd_read_ready;
1057 template.file_descriptor = fds[0];
1058 template.private_data =
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001059 (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
1060 q;
Damjan Marion56dd5432017-09-08 19:52:02 +02001061 vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001062 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001063 else
Pierre Pfistere21c5282016-09-21 08:04:59 +01001064 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001065 //When no kickfd is set, the queue is initialized as started
1066 vui->vrings[q].kickfd_idx = ~0;
Pierre Pfistere21c5282016-09-21 08:04:59 +01001067 vui->vrings[q].started = 1;
1068 }
1069
Ed Warnickecb9cada2015-12-08 15:45:58 -07001070 break;
1071
1072 case VHOST_USER_SET_VRING_ERR:
Steven388e51a2017-06-01 12:49:23 -07001073 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_ERR %d",
Damjan Marion00a9dca2016-08-17 17:05:46 +02001074 vui->hw_if_index, msg.u64);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001075
1076 q = (u8) (msg.u64 & 0xFF);
1077
Pierre Pfistere21c5282016-09-21 08:04:59 +01001078 if (vui->vrings[q].errfd != -1)
1079 close (vui->vrings[q].errfd);
1080
Steven49a04b92017-07-29 08:56:08 -07001081 if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001082 {
1083 if (number_of_fds != 1)
1084 goto close_socket;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001085
Pierre Pfistere21c5282016-09-21 08:04:59 +01001086 vui->vrings[q].errfd = fds[0];
Damjan Marion00a9dca2016-08-17 17:05:46 +02001087 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001088 else
Pierre Pfistere21c5282016-09-21 08:04:59 +01001089 vui->vrings[q].errfd = -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001090
Ed Warnickecb9cada2015-12-08 15:45:58 -07001091 break;
1092
1093 case VHOST_USER_SET_VRING_BASE:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001094 DBG_SOCK ("if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
1095 vui->hw_if_index, msg.state.index, msg.state.num);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001096
1097 vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
1098 break;
1099
1100 case VHOST_USER_GET_VRING_BASE:
Pierre Pfistere21c5282016-09-21 08:04:59 +01001101 if (msg.state.index >= VHOST_VRING_MAX_N)
1102 {
1103 DBG_SOCK ("invalid vring index VHOST_USER_GET_VRING_BASE:"
1104 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
1105 goto close_socket;
1106 }
1107
Stevenf6dae052017-03-09 23:49:32 -08001108 /*
1109 * Copy last_avail_idx from the vring before closing it because
1110 * closing the vring also initializes the vring last_avail_idx
1111 */
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001112 msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001113 msg.flags |= 4;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001114 msg.size = sizeof (msg.state);
Stevenf6dae052017-03-09 23:49:32 -08001115
1116 /* Spec says: Client must [...] stop ring upon receiving VHOST_USER_GET_VRING_BASE. */
1117 vhost_user_vring_close (vui, msg.state.index);
Steven388e51a2017-06-01 12:49:23 -07001118 DBG_SOCK ("if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
1119 vui->hw_if_index, msg.state.index, msg.state.num);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001120 break;
1121
1122 case VHOST_USER_NONE:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001123 DBG_SOCK ("if %d msg VHOST_USER_NONE", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001124
1125 break;
1126
1127 case VHOST_USER_SET_LOG_BASE:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001128 {
1129 DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_BASE", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001130
Damjan Marion00a9dca2016-08-17 17:05:46 +02001131 if (msg.size != sizeof (msg.log))
1132 {
1133 DBG_SOCK
1134 ("invalid msg size for VHOST_USER_SET_LOG_BASE: %d instead of %d",
1135 msg.size, sizeof (msg.log));
1136 goto close_socket;
1137 }
1138
1139 if (!
1140 (vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD)))
1141 {
1142 DBG_SOCK
1143 ("VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but VHOST_USER_SET_LOG_BASE received");
1144 goto close_socket;
1145 }
1146
1147 fd = fds[0];
Haiyang Tan7b0933a2018-01-20 04:48:53 -05001148 /* align size to page */
Damjan Marion00a9dca2016-08-17 17:05:46 +02001149 long page_sz = get_huge_page_size (fd);
1150 ssize_t map_sz =
Pierre Pfisterbed54892017-04-20 15:34:00 +02001151 (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001152
1153 vui->log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
1154 MAP_SHARED, fd, 0);
1155
1156 DBG_SOCK
1157 ("map log region addr 0 len 0x%lx off 0x%lx fd %d mapped 0x%lx",
1158 map_sz, msg.log.offset, fd, vui->log_base_addr);
1159
1160 if (vui->log_base_addr == MAP_FAILED)
1161 {
1162 clib_warning ("failed to map memory. errno is %d", errno);
1163 goto close_socket;
1164 }
1165
1166 vui->log_base_addr += msg.log.offset;
1167 vui->log_size = msg.log.size;
1168
1169 msg.flags |= 4;
1170 msg.size = sizeof (msg.u64);
1171
1172 break;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001173 }
1174
Ed Warnickecb9cada2015-12-08 15:45:58 -07001175 case VHOST_USER_SET_LOG_FD:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001176 DBG_SOCK ("if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001177
1178 break;
1179
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001180 case VHOST_USER_GET_PROTOCOL_FEATURES:
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001181 msg.flags |= 4;
Pierre Pfistere21c5282016-09-21 08:04:59 +01001182 msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |
1183 (1 << VHOST_USER_PROTOCOL_F_MQ);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001184 msg.size = sizeof (msg.u64);
Steven388e51a2017-06-01 12:49:23 -07001185 DBG_SOCK
1186 ("if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - reply 0x%016llx",
1187 vui->hw_if_index, msg.u64);
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001188 break;
1189
1190 case VHOST_USER_SET_PROTOCOL_FEATURES:
Steven388e51a2017-06-01 12:49:23 -07001191 DBG_SOCK
1192 ("if %d msg VHOST_USER_SET_PROTOCOL_FEATURES features 0x%016llx",
1193 vui->hw_if_index, msg.u64);
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001194
1195 vui->protocol_features = msg.u64;
1196
1197 break;
1198
Pierre Pfistere21c5282016-09-21 08:04:59 +01001199 case VHOST_USER_GET_QUEUE_NUM:
Pierre Pfistere21c5282016-09-21 08:04:59 +01001200 msg.flags |= 4;
1201 msg.u64 = VHOST_VRING_MAX_N;
1202 msg.size = sizeof (msg.u64);
Steven388e51a2017-06-01 12:49:23 -07001203 DBG_SOCK ("if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d",
1204 vui->hw_if_index, msg.u64);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001205 break;
1206
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001207 case VHOST_USER_SET_VRING_ENABLE:
Pierre Pfistere21c5282016-09-21 08:04:59 +01001208 DBG_SOCK ("if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
1209 vui->hw_if_index, msg.state.num ? "enable" : "disable",
1210 msg.state.index);
1211 if (msg.state.index >= VHOST_VRING_MAX_N)
1212 {
1213 DBG_SOCK ("invalid vring index VHOST_USER_SET_VRING_ENABLE:"
1214 " %d >= %d", msg.state.index, VHOST_VRING_MAX_N);
1215 goto close_socket;
1216 }
1217
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001218 vui->vrings[msg.state.index].enabled = msg.state.num;
1219 break;
1220
Ed Warnickecb9cada2015-12-08 15:45:58 -07001221 default:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001222 DBG_SOCK ("unknown vhost-user message %d received. closing socket",
1223 msg.request);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001224 goto close_socket;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001225 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001226
Ed Warnickecb9cada2015-12-08 15:45:58 -07001227 /* if we need to reply */
1228 if (msg.flags & 4)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001229 {
1230 n =
1231 send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001232 if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
Pierre Pfistere21c5282016-09-21 08:04:59 +01001233 {
1234 DBG_SOCK ("could not send message response");
1235 goto close_socket;
1236 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02001237 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001238
Pierre Pfistere21c5282016-09-21 08:04:59 +01001239 vhost_user_update_iface_state (vui);
1240 vlib_worker_thread_barrier_release (vlib_get_main ());
Ed Warnickecb9cada2015-12-08 15:45:58 -07001241 return 0;
1242
1243close_socket:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001244 vhost_user_if_disconnect (vui);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001245 vhost_user_update_iface_state (vui);
1246 vlib_worker_thread_barrier_release (vlib_get_main ());
Ed Warnickecb9cada2015-12-08 15:45:58 -07001247 return 0;
1248}
1249
Damjan Marion00a9dca2016-08-17 17:05:46 +02001250static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +02001251vhost_user_socket_error (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001252{
Pierre Pfistere21c5282016-09-21 08:04:59 +01001253 vlib_main_t *vm = vlib_get_main ();
Damjan Marion00a9dca2016-08-17 17:05:46 +02001254 vhost_user_main_t *vum = &vhost_user_main;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001255 vhost_user_intf_t *vui =
1256 pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001257
Pierre Pfistere21c5282016-09-21 08:04:59 +01001258 DBG_SOCK ("socket error on if %d", vui->sw_if_index);
1259 vlib_worker_thread_barrier_sync (vm);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001260 vhost_user_if_disconnect (vui);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001261 vhost_user_rx_thread_placement ();
1262 vlib_worker_thread_barrier_release (vm);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001263 return 0;
1264}
1265
Damjan Marion00a9dca2016-08-17 17:05:46 +02001266static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +02001267vhost_user_socksvr_accept_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001268{
1269 int client_fd, client_len;
1270 struct sockaddr_un client;
Damjan Marion56dd5432017-09-08 19:52:02 +02001271 clib_file_t template = { 0 };
Damjan Marion00a9dca2016-08-17 17:05:46 +02001272 vhost_user_main_t *vum = &vhost_user_main;
1273 vhost_user_intf_t *vui;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001274
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001275 vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001276
Damjan Marion00a9dca2016-08-17 17:05:46 +02001277 client_len = sizeof (client);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001278 client_fd = accept (uf->file_descriptor,
Damjan Marion00a9dca2016-08-17 17:05:46 +02001279 (struct sockaddr *) &client,
1280 (socklen_t *) & client_len);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001281
1282 if (client_fd < 0)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001283 return clib_error_return_unix (0, "accept");
Ed Warnickecb9cada2015-12-08 15:45:58 -07001284
Pierre Pfistere21c5282016-09-21 08:04:59 +01001285 DBG_SOCK ("New client socket for vhost interface %d", vui->sw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001286 template.read_function = vhost_user_socket_read;
1287 template.error_function = vhost_user_socket_error;
1288 template.file_descriptor = client_fd;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001289 template.private_data = vui - vhost_user_main.vhost_user_interfaces;
Damjan Marion56dd5432017-09-08 19:52:02 +02001290 vui->clib_file_index = clib_file_add (&file_main, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001291 return 0;
1292}
1293
1294static clib_error_t *
1295vhost_user_init (vlib_main_t * vm)
1296{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001297 clib_error_t *error;
1298 vhost_user_main_t *vum = &vhost_user_main;
1299 vlib_thread_main_t *tm = vlib_get_thread_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07001300
1301 error = vlib_call_init_function (vm, ip4_init);
1302 if (error)
1303 return error;
1304
Ed Warnickecb9cada2015-12-08 15:45:58 -07001305 vum->coalesce_frames = 32;
1306 vum->coalesce_time = 1e-3;
1307
Pierre Pfistere21c5282016-09-21 08:04:59 +01001308 vec_validate (vum->cpus, tm->n_vlib_mains - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001309
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001310 vhost_cpu_t *cpu;
1311 vec_foreach (cpu, vum->cpus)
1312 {
1313 /* This is actually not necessary as validate already zeroes it
1314 * Just keeping the loop here for later because I am lazy. */
1315 cpu->rx_buffers_len = 0;
1316 }
1317
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001318 vum->random = random_default_seed ();
1319
Steven5445f5f2017-04-25 16:16:00 -07001320 mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
1321
Ed Warnickecb9cada2015-12-08 15:45:58 -07001322 return 0;
1323}
1324
1325VLIB_INIT_FUNCTION (vhost_user_init);
1326
Damjan Marion00a9dca2016-08-17 17:05:46 +02001327static u8 *
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001328format_vhost_trace (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001329{
1330 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
1331 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001332 CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
1333 vhost_user_main_t *vum = &vhost_user_main;
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001334 vhost_trace_t *t = va_arg (*va, vhost_trace_t *);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001335 vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
1336 t->device_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001337
Damjan Marion00a9dca2016-08-17 17:05:46 +02001338 vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001339
Christophe Fontained3c008d2017-10-02 18:10:54 +02001340 u32 indent = format_get_indent (s);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001341
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001342 s = format (s, "%U %U queue %d\n", format_white_space, indent,
Pierre Pfistere21c5282016-09-21 08:04:59 +01001343 format_vnet_sw_interface_name, vnm, sw, t->qid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001344
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001345 s = format (s, "%U virtio flags:\n", format_white_space, indent);
1346#define _(n,i,st) \
1347 if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \
1348 s = format (s, "%U %s %s\n", format_white_space, indent, #n, st);
1349 foreach_virtio_trace_flags
1350#undef _
1351 s = format (s, "%U virtio_net_hdr first_desc_len %u\n",
1352 format_white_space, indent, t->first_desc_len);
1353
1354 s = format (s, "%U flags 0x%02x gso_type %u\n",
Damjan Marion00a9dca2016-08-17 17:05:46 +02001355 format_white_space, indent,
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001356 t->hdr.hdr.flags, t->hdr.hdr.gso_type);
1357
1358 if (vui->virtio_net_hdr_sz == 12)
1359 s = format (s, "%U num_buff %u",
1360 format_white_space, indent, t->hdr.num_buffers);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001361
1362 return s;
1363}
1364
Damjan Marion00a9dca2016-08-17 17:05:46 +02001365void
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001366vhost_user_rx_trace (vhost_trace_t * t,
1367 vhost_user_intf_t * vui, u16 qid,
1368 vlib_buffer_t * b, vhost_user_vring_t * txvq)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001369{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001370 vhost_user_main_t *vum = &vhost_user_main;
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001371 u32 last_avail_idx = txvq->last_avail_idx;
Steven97878892017-08-29 09:23:26 -07001372 u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001373 vring_desc_t *hdr_desc = 0;
1374 virtio_net_hdr_mrg_rxbuf_t *hdr;
1375 u32 hint = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001376
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001377 memset (t, 0, sizeof (*t));
1378 t->device_index = vui - vum->vhost_user_interfaces;
1379 t->qid = qid;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001380
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001381 hdr_desc = &txvq->desc[desc_current];
1382 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001383 {
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001384 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001385 /* Header is the first here */
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001386 hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
1387 }
1388 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
1389 {
1390 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
1391 }
1392 if (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1393 !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
1394 {
1395 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
1396 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001397
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001398 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001399
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001400 if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
1401 {
1402 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
1403 }
1404 else
1405 {
1406 u32 len = vui->virtio_net_hdr_sz;
1407 memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001408 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001409}
1410
Damjan Marion00a9dca2016-08-17 17:05:46 +02001411static inline void
1412vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001413{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001414 vhost_user_main_t *vum = &vhost_user_main;
1415 u64 x = 1;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001416 int fd = UNIX_GET_FD (vq->callfd_idx);
Stevenf3b53642017-05-01 14:03:02 -07001417 int rv;
1418
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00001419 rv = write (fd, &x, sizeof (x));
Stevenf3b53642017-05-01 14:03:02 -07001420 if (rv <= 0)
1421 {
1422 clib_unix_warning
1423 ("Error: Could not write to unix socket for callfd %d", fd);
1424 return;
1425 }
1426
Damjan Marion00a9dca2016-08-17 17:05:46 +02001427 vq->n_since_last_int = 0;
1428 vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001429}
1430
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001431static_always_inline u32
1432vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
1433 u16 copy_len, u32 * map_hint)
1434{
1435 void *src0, *src1, *src2, *src3;
1436 if (PREDICT_TRUE (copy_len >= 4))
1437 {
1438 if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint))))
1439 return 1;
1440 if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint))))
1441 return 1;
1442
1443 while (PREDICT_TRUE (copy_len >= 4))
1444 {
1445 src0 = src2;
1446 src1 = src3;
1447
1448 if (PREDICT_FALSE
1449 (!(src2 = map_guest_mem (vui, cpy[2].src, map_hint))))
1450 return 1;
1451 if (PREDICT_FALSE
1452 (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint))))
1453 return 1;
1454
1455 CLIB_PREFETCH (src2, 64, LOAD);
1456 CLIB_PREFETCH (src3, 64, LOAD);
1457
1458 clib_memcpy ((void *) cpy[0].dst, src0, cpy[0].len);
1459 clib_memcpy ((void *) cpy[1].dst, src1, cpy[1].len);
1460 copy_len -= 2;
1461 cpy += 2;
1462 }
1463 }
1464 while (copy_len)
1465 {
1466 if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
1467 return 1;
1468 clib_memcpy ((void *) cpy->dst, src0, cpy->len);
1469 copy_len -= 1;
1470 cpy += 1;
1471 }
1472 return 0;
1473}
1474
1475/**
1476 * Try to discard packets from the tx ring (VPP RX path).
1477 * Returns the number of discarded packets.
1478 */
1479u32
1480vhost_user_rx_discard_packet (vlib_main_t * vm,
1481 vhost_user_intf_t * vui,
1482 vhost_user_vring_t * txvq, u32 discard_max)
1483{
1484 /*
1485 * On the RX side, each packet corresponds to one descriptor
1486 * (it is the same whether it is a shallow descriptor, chained, or indirect).
1487 * Therefore, discarding a packet is like discarding a descriptor.
1488 */
1489 u32 discarded_packets = 0;
1490 u32 avail_idx = txvq->avail->idx;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001491 while (discarded_packets != discard_max)
1492 {
1493 if (avail_idx == txvq->last_avail_idx)
1494 goto out;
1495
1496 u16 desc_chain_head =
Steven97878892017-08-29 09:23:26 -07001497 txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001498 txvq->last_avail_idx++;
Steven97878892017-08-29 09:23:26 -07001499 txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
1500 desc_chain_head;
1501 txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001502 vhost_user_log_dirty_ring (vui, txvq,
Steven97878892017-08-29 09:23:26 -07001503 ring[txvq->last_used_idx & txvq->qsz_mask]);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001504 txvq->last_used_idx++;
1505 discarded_packets++;
1506 }
1507
1508out:
1509 CLIB_MEMORY_BARRIER ();
1510 txvq->used->idx = txvq->last_used_idx;
1511 vhost_user_log_dirty_ring (vui, txvq, idx);
1512 return discarded_packets;
1513}
1514
1515/*
1516 * In case of overflow, we need to rewind the array of allocated buffers.
1517 */
1518static void
1519vhost_user_input_rewind_buffers (vlib_main_t * vm,
1520 vhost_cpu_t * cpu, vlib_buffer_t * b_head)
1521{
1522 u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
1523 vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current);
1524 b_current->current_length = 0;
1525 b_current->flags = 0;
1526 while (b_current != b_head)
1527 {
1528 cpu->rx_buffers_len++;
1529 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
1530 b_current = vlib_get_buffer (vm, bi_current);
1531 b_current->current_length = 0;
1532 b_current->flags = 0;
1533 }
Steven95827e42017-05-18 21:22:00 -07001534 cpu->rx_buffers_len++;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001535}
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01001536
Damjan Marion00a9dca2016-08-17 17:05:46 +02001537static u32
1538vhost_user_if_input (vlib_main_t * vm,
1539 vhost_user_main_t * vum,
Pierre Pfistere21c5282016-09-21 08:04:59 +01001540 vhost_user_intf_t * vui,
Stevenf3b53642017-05-01 14:03:02 -07001541 u16 qid, vlib_node_runtime_t * node,
1542 vnet_hw_interface_rx_mode mode)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001543{
Pierre Pfistere21c5282016-09-21 08:04:59 +01001544 vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001545 u16 n_rx_packets = 0;
1546 u32 n_rx_bytes = 0;
1547 u16 n_left;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001548 u32 n_left_to_next, *to_next;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001549 u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
1550 u32 n_trace = vlib_get_trace_count (vm, node);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001551 u32 map_hint = 0;
Damjan Marion586afd72017-04-05 19:18:20 +02001552 u16 thread_index = vlib_get_thread_index ();
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001553 u16 copy_len = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001554
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001555 {
1556 /* do we have pending interrupts ? */
1557 vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
1558 f64 now = vlib_time_now (vm);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001559
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001560 if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
1561 vhost_user_send_call (vm, txvq);
1562
1563 if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
1564 vhost_user_send_call (vm, rxvq);
1565 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001566
Stevenf3b53642017-05-01 14:03:02 -07001567 /*
1568 * For adaptive mode, it is optimized to reduce interrupts.
1569 * If the scheduler switches the input node to polling due
1570 * to burst of traffic, we tell the driver no interrupt.
1571 * When the traffic subsides, the scheduler switches the node back to
1572 * interrupt mode. We must tell the driver we want interrupt.
1573 */
1574 if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
1575 {
1576 if ((node->flags &
1577 VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
1578 !(node->flags &
1579 VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
1580 /* Tell driver we want notification */
1581 txvq->used->flags = 0;
1582 else
1583 /* Tell driver we don't want notification */
1584 txvq->used->flags = VRING_USED_F_NO_NOTIFY;
1585 }
1586
Damjan Marion00a9dca2016-08-17 17:05:46 +02001587 if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
Ed Warnickecb9cada2015-12-08 15:45:58 -07001588 return 0;
1589
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001590 n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
1591
Ed Warnickecb9cada2015-12-08 15:45:58 -07001592 /* nothing to do */
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001593 if (PREDICT_FALSE (n_left == 0))
Ed Warnickecb9cada2015-12-08 15:45:58 -07001594 return 0;
1595
Pierre Pfistere21c5282016-09-21 08:04:59 +01001596 if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled)))
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001597 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01001598 /*
1599 * Discard input packet if interface is admin down or vring is not
1600 * enabled.
1601 * "For example, for a networking device, in the disabled state
1602 * client must not supply any new RX packets, but must process
1603 * and discard any TX packets."
1604 */
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001605 vhost_user_rx_discard_packet (vm, vui, txvq,
1606 VHOST_USER_DOWN_DISCARD_COUNT);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001607 return 0;
1608 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001609
Steven97878892017-08-29 09:23:26 -07001610 if (PREDICT_FALSE (n_left == (txvq->qsz_mask + 1)))
Pierre Pfistere21c5282016-09-21 08:04:59 +01001611 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001612 /*
1613 * Informational error logging when VPP is not
1614 * receiving packets fast enough.
1615 */
Pierre Pfistere21c5282016-09-21 08:04:59 +01001616 vlib_error_count (vm, node->node_index,
1617 VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
1618 }
1619
Pierre Pfister328e99b2016-02-12 13:18:42 +00001620 if (n_left > VLIB_FRAME_SIZE)
1621 n_left = VLIB_FRAME_SIZE;
1622
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001623 /*
1624 * For small packets (<2kB), we will not need more than one vlib buffer
1625 * per packet. In case packets are bigger, we will just yeld at some point
1626 * in the loop and come back later. This is not an issue as for big packet,
1627 * processing cost really comes from the memory copy.
Pierre Pfister56a86842017-10-05 14:24:05 +02001628 * The assumption is that big packets will fit in 40 buffers.
Pierre Pfister328e99b2016-02-12 13:18:42 +00001629 */
Pierre Pfister56a86842017-10-05 14:24:05 +02001630 if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1 ||
1631 vum->cpus[thread_index].rx_buffers_len < 40))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001632 {
Damjan Marion586afd72017-04-05 19:18:20 +02001633 u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
1634 vum->cpus[thread_index].rx_buffers_len +=
Damjan Marion00a9dca2016-08-17 17:05:46 +02001635 vlib_buffer_alloc_from_free_list (vm,
Damjan Marion586afd72017-04-05 19:18:20 +02001636 vum->cpus[thread_index].rx_buffers +
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001637 curr_len,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001638 VHOST_USER_RX_BUFFERS_N - curr_len,
Damjan Marion00a9dca2016-08-17 17:05:46 +02001639 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001640
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001641 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02001642 (vum->cpus[thread_index].rx_buffers_len <
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001643 VHOST_USER_RX_BUFFER_STARVATION))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001644 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001645 /* In case of buffer starvation, discard some packets from the queue
1646 * and log the event.
1647 * We keep doing best effort for the remaining packets. */
Damjan Marion586afd72017-04-05 19:18:20 +02001648 u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
1649 n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001650 flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
1651
1652 n_left -= flush;
1653 vlib_increment_simple_counter (vnet_main.
1654 interface_main.sw_if_counters +
1655 VNET_INTERFACE_COUNTER_DROP,
Damjan Marion586afd72017-04-05 19:18:20 +02001656 vlib_get_thread_index (),
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001657 vui->sw_if_index, flush);
1658
1659 vlib_error_count (vm, vhost_user_input_node.index,
1660 VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
Damjan Marion00a9dca2016-08-17 17:05:46 +02001661 }
1662 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001663
Damjan Marion00a9dca2016-08-17 17:05:46 +02001664 while (n_left > 0)
1665 {
1666 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001667
Damjan Marion00a9dca2016-08-17 17:05:46 +02001668 while (n_left > 0 && n_left_to_next > 0)
1669 {
1670 vlib_buffer_t *b_head, *b_current;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001671 u32 bi_current;
1672 u16 desc_current;
1673 u32 desc_data_offset;
1674 vring_desc_t *desc_table = txvq->desc;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001675
Damjan Marion586afd72017-04-05 19:18:20 +02001676 if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001677 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001678 /* Not enough rx_buffers
1679 * Note: We yeld on 1 so we don't need to do an additional
1680 * check for the next buffer prefetch.
1681 */
1682 n_left = 0;
1683 break;
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001684 }
1685
Steven97878892017-08-29 09:23:26 -07001686 desc_current =
1687 txvq->avail->ring[txvq->last_avail_idx & txvq->qsz_mask];
Damjan Marion586afd72017-04-05 19:18:20 +02001688 vum->cpus[thread_index].rx_buffers_len--;
1689 bi_current = (vum->cpus[thread_index].rx_buffers)
1690 [vum->cpus[thread_index].rx_buffers_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001691 b_head = b_current = vlib_get_buffer (vm, bi_current);
1692 to_next[0] = bi_current; //We do that now so we can forget about bi_current
1693 to_next++;
1694 n_left_to_next--;
1695
1696 vlib_prefetch_buffer_with_index (vm,
Damjan Marion586afd72017-04-05 19:18:20 +02001697 (vum->
1698 cpus[thread_index].rx_buffers)
1699 [vum->cpus[thread_index].
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001700 rx_buffers_len - 1], LOAD);
1701
1702 /* Just preset the used descriptor id and length for later */
Steven97878892017-08-29 09:23:26 -07001703 txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].id =
1704 desc_current;
1705 txvq->used->ring[txvq->last_used_idx & txvq->qsz_mask].len = 0;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001706 vhost_user_log_dirty_ring (vui, txvq,
Steven97878892017-08-29 09:23:26 -07001707 ring[txvq->last_used_idx &
1708 txvq->qsz_mask]);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001709
1710 /* The buffer should already be initialized */
1711 b_head->total_length_not_including_first_buffer = 0;
1712 b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1713
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001714 if (PREDICT_FALSE (n_trace))
1715 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001716 //TODO: next_index is not exactly known at that point
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001717 vlib_trace_buffer (vm, node, next_index, b_head,
1718 /* follow_chain */ 0);
1719 vhost_trace_t *t0 =
1720 vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
1721 vhost_user_rx_trace (t0, vui, qid, b_head, txvq);
1722 n_trace--;
1723 vlib_set_trace_count (vm, node, n_trace);
1724 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02001725
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001726 /* This depends on the setup but is very consistent
1727 * So I think the CPU branch predictor will make a pretty good job
1728 * at optimizing the decision. */
1729 if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
1730 {
1731 desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
1732 &map_hint);
1733 desc_current = 0;
1734 if (PREDICT_FALSE (desc_table == 0))
1735 {
Steven95827e42017-05-18 21:22:00 -07001736 vlib_error_count (vm, node->node_index,
1737 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001738 goto out;
1739 }
1740 }
1741
Damjan Marion00a9dca2016-08-17 17:05:46 +02001742 if (PREDICT_TRUE (vui->is_any_layout) ||
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001743 (!(desc_table[desc_current].flags & VIRTQ_DESC_F_NEXT)))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001744 {
1745 /* ANYLAYOUT or single buffer */
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001746 desc_data_offset = vui->virtio_net_hdr_sz;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001747 }
1748 else
1749 {
1750 /* CSR case without ANYLAYOUT, skip 1st buffer */
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001751 desc_data_offset = desc_table[desc_current].len;
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001752 }
1753
Damjan Marion00a9dca2016-08-17 17:05:46 +02001754 while (1)
1755 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001756 /* Get more input if necessary. Or end of packet. */
1757 if (desc_data_offset == desc_table[desc_current].len)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001758 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001759 if (PREDICT_FALSE (desc_table[desc_current].flags &
1760 VIRTQ_DESC_F_NEXT))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001761 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001762 desc_current = desc_table[desc_current].next;
1763 desc_data_offset = 0;
1764 }
1765 else
1766 {
1767 goto out;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001768 }
1769 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001770
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001771 /* Get more output if necessary. Or end of packet. */
1772 if (PREDICT_FALSE
1773 (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
1774 {
1775 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02001776 (vum->cpus[thread_index].rx_buffers_len == 0))
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001777 {
Steven62411e72017-02-03 09:30:37 -08001778 /* Cancel speculation */
1779 to_next--;
1780 n_left_to_next++;
1781
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001782 /*
1783 * Checking if there are some left buffers.
1784 * If not, just rewind the used buffers and stop.
1785 * Note: Scheduled copies are not cancelled. This is
1786 * not an issue as they would still be valid. Useless,
1787 * but valid.
1788 */
1789 vhost_user_input_rewind_buffers (vm,
Damjan Marion586afd72017-04-05 19:18:20 +02001790 &vum->cpus
1791 [thread_index],
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001792 b_head);
1793 n_left = 0;
1794 goto stop;
1795 }
1796
1797 /* Get next output */
Damjan Marion586afd72017-04-05 19:18:20 +02001798 vum->cpus[thread_index].rx_buffers_len--;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001799 u32 bi_next =
Damjan Marion586afd72017-04-05 19:18:20 +02001800 (vum->cpus[thread_index].rx_buffers)[vum->cpus
1801 [thread_index].rx_buffers_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001802 b_current->next_buffer = bi_next;
1803 b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
1804 bi_current = bi_next;
1805 b_current = vlib_get_buffer (vm, bi_current);
1806 }
1807
1808 /* Prepare a copy order executed later for the data */
Damjan Marion586afd72017-04-05 19:18:20 +02001809 vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001810 copy_len++;
1811 u32 desc_data_l =
1812 desc_table[desc_current].len - desc_data_offset;
1813 cpy->len = VLIB_BUFFER_DATA_SIZE - b_current->current_length;
1814 cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
Steven025d4152017-05-16 21:26:13 -07001815 cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
1816 b_current->current_length);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001817 cpy->src = desc_table[desc_current].addr + desc_data_offset;
1818
1819 desc_data_offset += cpy->len;
1820
1821 b_current->current_length += cpy->len;
1822 b_head->total_length_not_including_first_buffer += cpy->len;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001823 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001824
Pierre Pfisterba1d0462016-07-27 16:38:20 +01001825 out:
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001826 CLIB_PREFETCH (&n_left, sizeof (n_left), LOAD);
1827
1828 n_rx_bytes += b_head->total_length_not_including_first_buffer;
1829 n_rx_packets++;
1830
1831 b_head->total_length_not_including_first_buffer -=
1832 b_head->current_length;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001833
Damjan Marion00a9dca2016-08-17 17:05:46 +02001834 /* consume the descriptor and return it as used */
1835 txvq->last_avail_idx++;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001836 txvq->last_used_idx++;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001837
Damjan Marion00a9dca2016-08-17 17:05:46 +02001838 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001839
Damjan Marion00a9dca2016-08-17 17:05:46 +02001840 vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
1841 vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001842 b_head->error = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001843
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001844 {
1845 u32 next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
Pierre Pfister328e99b2016-02-12 13:18:42 +00001846
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001847 /* redirect if feature path enabled */
1848 vnet_feature_start_device_input_x1 (vui->sw_if_index, &next0,
Damjan Marion35af9e52017-03-06 12:02:50 +01001849 b_head);
Damjan Marion22311502016-10-28 20:30:15 +02001850
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001851 u32 bi = to_next[-1]; //Cannot use to_next[-1] in the macro
1852 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1853 to_next, n_left_to_next,
1854 bi, next0);
1855 }
Damjan Marion22311502016-10-28 20:30:15 +02001856
Damjan Marion00a9dca2016-08-17 17:05:46 +02001857 n_left--;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001858
1859 /*
1860 * Although separating memory copies from virtio ring parsing
1861 * is beneficial, we can offer to perform the copies from time
1862 * to time in order to free some space in the ring.
1863 */
1864 if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
Pierre Pfistere21c5282016-09-21 08:04:59 +01001865 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001866 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02001867 (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001868 copy_len, &map_hint)))
1869 {
Steven95827e42017-05-18 21:22:00 -07001870 vlib_error_count (vm, node->node_index,
1871 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001872 }
1873 copy_len = 0;
1874
1875 /* give buffers back to driver */
1876 CLIB_MEMORY_BARRIER ();
1877 txvq->used->idx = txvq->last_used_idx;
1878 vhost_user_log_dirty_ring (vui, txvq, idx);
Pierre Pfistere21c5282016-09-21 08:04:59 +01001879 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02001880 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001881 stop:
Damjan Marion00a9dca2016-08-17 17:05:46 +02001882 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001883 }
1884
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001885 /* Do the memory copies */
1886 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02001887 (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001888 copy_len, &map_hint)))
1889 {
Steven95827e42017-05-18 21:22:00 -07001890 vlib_error_count (vm, node->node_index,
1891 VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001892 }
Pierre Pfister328e99b2016-02-12 13:18:42 +00001893
1894 /* give buffers back to driver */
Damjan Marion00a9dca2016-08-17 17:05:46 +02001895 CLIB_MEMORY_BARRIER ();
Pierre Pfister328e99b2016-02-12 13:18:42 +00001896 txvq->used->idx = txvq->last_used_idx;
Damjan Marion00a9dca2016-08-17 17:05:46 +02001897 vhost_user_log_dirty_ring (vui, txvq, idx);
Pierre Pfister328e99b2016-02-12 13:18:42 +00001898
Ed Warnickecb9cada2015-12-08 15:45:58 -07001899 /* interrupt (call) handling */
Steven7312cc72017-03-15 21:18:55 -07001900 if ((txvq->callfd_idx != ~0) &&
1901 !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
Damjan Marion00a9dca2016-08-17 17:05:46 +02001902 {
1903 txvq->n_since_last_int += n_rx_packets;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001904
Damjan Marion00a9dca2016-08-17 17:05:46 +02001905 if (txvq->n_since_last_int > vum->coalesce_frames)
1906 vhost_user_send_call (vm, txvq);
1907 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001908
1909 /* increase rx counters */
1910 vlib_increment_combined_counter
Damjan Marion00a9dca2016-08-17 17:05:46 +02001911 (vnet_main.interface_main.combined_sw_if_counters
1912 + VNET_INTERFACE_COUNTER_RX,
Damjan Marion586afd72017-04-05 19:18:20 +02001913 vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001914
Damjan Marion586afd72017-04-05 19:18:20 +02001915 vnet_device_increment_rx_packets (thread_index, n_rx_packets);
Damjan Marionb3bb1012017-02-28 21:55:28 +01001916
Ed Warnickecb9cada2015-12-08 15:45:58 -07001917 return n_rx_packets;
1918}
1919
1920static uword
1921vhost_user_input (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02001922 vlib_node_runtime_t * node, vlib_frame_t * f)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001923{
Damjan Marion00a9dca2016-08-17 17:05:46 +02001924 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001925 uword n_rx_packets = 0;
Steven7312cc72017-03-15 21:18:55 -07001926 vhost_user_intf_t *vui;
Stevenf3b53642017-05-01 14:03:02 -07001927 vnet_device_input_runtime_t *rt =
1928 (vnet_device_input_runtime_t *) node->runtime_data;
1929 vnet_device_and_queue_t *dq;
Pierre Pfistere21c5282016-09-21 08:04:59 +01001930
Stevenf3b53642017-05-01 14:03:02 -07001931 vec_foreach (dq, rt->devices_and_queues)
1932 {
1933 if (clib_smp_swap (&dq->interrupt_pending, 0) ||
1934 (node->state == VLIB_NODE_STATE_POLLING))
Steven7312cc72017-03-15 21:18:55 -07001935 {
Stevenf3b53642017-05-01 14:03:02 -07001936 vui =
1937 pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
1938 n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
1939 dq->mode);
Steven7312cc72017-03-15 21:18:55 -07001940 }
Stevenf3b53642017-05-01 14:03:02 -07001941 }
Steven7312cc72017-03-15 21:18:55 -07001942
Ed Warnickecb9cada2015-12-08 15:45:58 -07001943 return n_rx_packets;
1944}
1945
Damjan Marion00a9dca2016-08-17 17:05:46 +02001946/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001947VLIB_REGISTER_NODE (vhost_user_input_node) = {
1948 .function = vhost_user_input,
1949 .type = VLIB_NODE_TYPE_INPUT,
1950 .name = "vhost-user-input",
Damjan Marion51327ac2016-11-09 11:59:42 +01001951 .sibling_of = "device-input",
Ed Warnickecb9cada2015-12-08 15:45:58 -07001952
1953 /* Will be enabled if/when hardware is detected. */
1954 .state = VLIB_NODE_STATE_DISABLED,
1955
1956 .format_buffer = format_ethernet_header_with_length,
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001957 .format_trace = format_vhost_trace,
Ed Warnickecb9cada2015-12-08 15:45:58 -07001958
1959 .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
1960 .error_strings = vhost_user_input_func_error_strings,
Ed Warnickecb9cada2015-12-08 15:45:58 -07001961};
1962
Damjan Marion1c80e832016-05-11 23:07:18 +02001963VLIB_NODE_FUNCTION_MULTIARCH (vhost_user_input_node, vhost_user_input)
Damjan Marion00a9dca2016-08-17 17:05:46 +02001964/* *INDENT-ON* */
Damjan Marion1c80e832016-05-11 23:07:18 +02001965
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001966
1967void
1968vhost_user_tx_trace (vhost_trace_t * t,
1969 vhost_user_intf_t * vui, u16 qid,
1970 vlib_buffer_t * b, vhost_user_vring_t * rxvq)
1971{
1972 vhost_user_main_t *vum = &vhost_user_main;
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001973 u32 last_avail_idx = rxvq->last_avail_idx;
Steven97878892017-08-29 09:23:26 -07001974 u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001975 vring_desc_t *hdr_desc = 0;
1976 u32 hint = 0;
1977
1978 memset (t, 0, sizeof (*t));
1979 t->device_index = vui - vum->vhost_user_interfaces;
1980 t->qid = qid;
1981
1982 hdr_desc = &rxvq->desc[desc_current];
1983 if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
1984 {
1985 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00001986 /* Header is the first here */
Pierre Pfister116ea4b2016-11-08 15:49:28 +00001987 hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
1988 }
1989 if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
1990 {
1991 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
1992 }
1993 if (!(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
1994 !(rxvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT))
1995 {
1996 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
1997 }
1998
1999 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
2000}
2001
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002002static_always_inline u32
2003vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
2004 u16 copy_len, u32 * map_hint)
2005{
2006 void *dst0, *dst1, *dst2, *dst3;
2007 if (PREDICT_TRUE (copy_len >= 4))
2008 {
2009 if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
2010 return 1;
2011 if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
2012 return 1;
2013 while (PREDICT_TRUE (copy_len >= 4))
2014 {
2015 dst0 = dst2;
2016 dst1 = dst3;
2017
2018 if (PREDICT_FALSE
2019 (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
2020 return 1;
2021 if (PREDICT_FALSE
2022 (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
2023 return 1;
2024
2025 CLIB_PREFETCH ((void *) cpy[2].src, 64, LOAD);
2026 CLIB_PREFETCH ((void *) cpy[3].src, 64, LOAD);
2027
2028 clib_memcpy (dst0, (void *) cpy[0].src, cpy[0].len);
2029 clib_memcpy (dst1, (void *) cpy[1].src, cpy[1].len);
2030
2031 vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
2032 vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
2033 copy_len -= 2;
2034 cpy += 2;
2035 }
2036 }
2037 while (copy_len)
2038 {
2039 if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
2040 return 1;
2041 clib_memcpy (dst0, (void *) cpy->src, cpy->len);
2042 vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
2043 copy_len -= 1;
2044 cpy += 1;
2045 }
2046 return 0;
2047}
2048
2049
Ed Warnickecb9cada2015-12-08 15:45:58 -07002050static uword
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002051vhost_user_tx (vlib_main_t * vm,
2052 vlib_node_runtime_t * node, vlib_frame_t * frame)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002053{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002054 u32 *buffers = vlib_frame_args (frame);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002055 u32 n_left = frame->n_vectors;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002056 vhost_user_main_t *vum = &vhost_user_main;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002057 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
2058 vhost_user_intf_t *vui =
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002059 pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
Pierre Pfistere21c5282016-09-21 08:04:59 +01002060 u32 qid = ~0;
2061 vhost_user_vring_t *rxvq;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002062 u8 error;
Damjan Marion586afd72017-04-05 19:18:20 +02002063 u32 thread_index = vlib_get_thread_index ();
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002064 u32 map_hint = 0;
2065 u8 retry = 8;
2066 u16 copy_len;
2067 u16 tx_headers_len;
Pierre Pfisterba1d0462016-07-27 16:38:20 +01002068
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002069 if (PREDICT_FALSE (!vui->admin_up))
2070 {
2071 error = VHOST_USER_TX_FUNC_ERROR_DOWN;
2072 goto done3;
2073 }
2074
2075 if (PREDICT_FALSE (!vui->is_up))
Damjan Marion00a9dca2016-08-17 17:05:46 +02002076 {
2077 error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
Pierre Pfistere21c5282016-09-21 08:04:59 +01002078 goto done3;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002079 }
Damjan Marion920ecc22016-01-12 18:34:24 +01002080
Pierre Pfistere21c5282016-09-21 08:04:59 +01002081 qid =
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002082 VHOST_VRING_IDX_RX (*vec_elt_at_index
Stevend7727532017-06-09 18:49:17 -07002083 (vui->per_cpu_tx_qid, thread_index));
Pierre Pfistere21c5282016-09-21 08:04:59 +01002084 rxvq = &vui->vrings[qid];
2085 if (PREDICT_FALSE (vui->use_tx_spinlock))
2086 vhost_user_vring_lock (vui, qid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002087
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002088retry:
2089 error = VHOST_USER_TX_FUNC_ERROR_NONE;
2090 tx_headers_len = 0;
2091 copy_len = 0;
2092 while (n_left > 0)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002093 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002094 vlib_buffer_t *b0, *current_b0;
2095 u16 desc_head, desc_index, desc_len;
2096 vring_desc_t *desc_table;
2097 uword buffer_map_addr;
2098 u32 buffer_len;
2099 u16 bytes_left;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002100
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002101 if (PREDICT_TRUE (n_left > 1))
2102 vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
2103
2104 b0 = vlib_get_buffer (vm, buffers[0]);
2105
2106 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
Pierre Pfister116ea4b2016-11-08 15:49:28 +00002107 {
Damjan Marion586afd72017-04-05 19:18:20 +02002108 vum->cpus[thread_index].current_trace =
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002109 vlib_add_trace (vm, node, b0,
Damjan Marion586afd72017-04-05 19:18:20 +02002110 sizeof (*vum->cpus[thread_index].current_trace));
2111 vhost_user_tx_trace (vum->cpus[thread_index].current_trace,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002112 vui, qid / 2, b0, rxvq);
2113 }
Pierre Pfister116ea4b2016-11-08 15:49:28 +00002114
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002115 if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
2116 {
2117 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
2118 goto done;
2119 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002120
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002121 desc_table = rxvq->desc;
2122 desc_head = desc_index =
Steven97878892017-08-29 09:23:26 -07002123 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002124
2125 /* Go deeper in case of indirect descriptor
2126 * I don't know of any driver providing indirect for RX. */
2127 if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
2128 {
2129 if (PREDICT_FALSE
2130 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
Pierre Pfisterba1d0462016-07-27 16:38:20 +01002131 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002132 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
Pierre Pfisterba1d0462016-07-27 16:38:20 +01002133 goto done;
2134 }
2135 if (PREDICT_FALSE
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002136 (!(desc_table =
2137 map_guest_mem (vui, rxvq->desc[desc_index].addr,
2138 &map_hint))))
Pierre Pfisterba1d0462016-07-27 16:38:20 +01002139 {
2140 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
2141 goto done;
2142 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002143 desc_index = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002144 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002145
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002146 desc_len = vui->virtio_net_hdr_sz;
2147 buffer_map_addr = desc_table[desc_index].addr;
2148 buffer_len = desc_table[desc_index].len;
2149
2150 {
2151 // Get a header from the header array
2152 virtio_net_hdr_mrg_rxbuf_t *hdr =
Damjan Marion586afd72017-04-05 19:18:20 +02002153 &vum->cpus[thread_index].tx_headers[tx_headers_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002154 tx_headers_len++;
2155 hdr->hdr.flags = 0;
2156 hdr->hdr.gso_type = 0;
2157 hdr->num_buffers = 1; //This is local, no need to check
2158
2159 // Prepare a copy order executed later for the header
Damjan Marion586afd72017-04-05 19:18:20 +02002160 vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002161 copy_len++;
2162 cpy->len = vui->virtio_net_hdr_sz;
2163 cpy->dst = buffer_map_addr;
2164 cpy->src = (uword) hdr;
2165 }
2166
2167 buffer_map_addr += vui->virtio_net_hdr_sz;
2168 buffer_len -= vui->virtio_net_hdr_sz;
2169 bytes_left = b0->current_length;
2170 current_b0 = b0;
2171 while (1)
2172 {
2173 if (buffer_len == 0)
2174 { //Get new output
2175 if (desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT)
2176 {
2177 //Next one is chained
2178 desc_index = desc_table[desc_index].next;
2179 buffer_map_addr = desc_table[desc_index].addr;
2180 buffer_len = desc_table[desc_index].len;
2181 }
2182 else if (vui->virtio_net_hdr_sz == 12) //MRG is available
2183 {
2184 virtio_net_hdr_mrg_rxbuf_t *hdr =
Damjan Marion586afd72017-04-05 19:18:20 +02002185 &vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002186
2187 //Move from available to used buffer
Steven97878892017-08-29 09:23:26 -07002188 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002189 desc_head;
Steven97878892017-08-29 09:23:26 -07002190 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002191 desc_len;
2192 vhost_user_log_dirty_ring (vui, rxvq,
2193 ring[rxvq->last_used_idx &
Steven97878892017-08-29 09:23:26 -07002194 rxvq->qsz_mask]);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002195
2196 rxvq->last_avail_idx++;
2197 rxvq->last_used_idx++;
2198 hdr->num_buffers++;
2199 desc_len = 0;
2200
2201 if (PREDICT_FALSE
2202 (rxvq->last_avail_idx == rxvq->avail->idx))
2203 {
2204 //Dequeue queued descriptors for this packet
2205 rxvq->last_used_idx -= hdr->num_buffers - 1;
2206 rxvq->last_avail_idx -= hdr->num_buffers - 1;
2207 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
2208 goto done;
2209 }
2210
2211 desc_table = rxvq->desc;
2212 desc_head = desc_index =
Steven97878892017-08-29 09:23:26 -07002213 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002214 if (PREDICT_FALSE
2215 (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
2216 {
2217 //It is seriously unlikely that a driver will put indirect descriptor
2218 //after non-indirect descriptor.
2219 if (PREDICT_FALSE
2220 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
2221 {
2222 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
2223 goto done;
2224 }
2225 if (PREDICT_FALSE
2226 (!(desc_table =
2227 map_guest_mem (vui,
2228 rxvq->desc[desc_index].addr,
2229 &map_hint))))
2230 {
2231 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
2232 goto done;
2233 }
2234 desc_index = 0;
2235 }
2236 buffer_map_addr = desc_table[desc_index].addr;
2237 buffer_len = desc_table[desc_index].len;
2238 }
2239 else
2240 {
2241 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
2242 goto done;
2243 }
2244 }
2245
2246 {
Damjan Marion586afd72017-04-05 19:18:20 +02002247 vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002248 copy_len++;
2249 cpy->len = bytes_left;
2250 cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
2251 cpy->dst = buffer_map_addr;
2252 cpy->src = (uword) vlib_buffer_get_current (current_b0) +
2253 current_b0->current_length - bytes_left;
2254
2255 bytes_left -= cpy->len;
2256 buffer_len -= cpy->len;
2257 buffer_map_addr += cpy->len;
2258 desc_len += cpy->len;
2259
Pierre Pfister14ac8012016-12-08 07:58:47 +00002260 CLIB_PREFETCH (&rxvq->desc, CLIB_CACHE_LINE_BYTES, LOAD);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002261 }
2262
2263 // Check if vlib buffer has more data. If not, get more or break.
2264 if (PREDICT_TRUE (!bytes_left))
2265 {
2266 if (PREDICT_FALSE
2267 (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
2268 {
2269 current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
2270 bytes_left = current_b0->current_length;
2271 }
2272 else
2273 {
2274 //End of packet
2275 break;
2276 }
2277 }
2278 }
2279
2280 //Move from available to used ring
Steven97878892017-08-29 09:23:26 -07002281 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
2282 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002283 vhost_user_log_dirty_ring (vui, rxvq,
Steven97878892017-08-29 09:23:26 -07002284 ring[rxvq->last_used_idx & rxvq->qsz_mask]);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002285 rxvq->last_avail_idx++;
2286 rxvq->last_used_idx++;
2287
2288 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2289 {
Damjan Marion586afd72017-04-05 19:18:20 +02002290 vum->cpus[thread_index].current_trace->hdr =
2291 vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002292 }
2293
2294 n_left--; //At the end for error counting when 'goto done' is invoked
Stevend7727532017-06-09 18:49:17 -07002295
2296 /*
2297 * Do the copy periodically to prevent
2298 * vum->cpus[thread_index].copy array overflow and corrupt memory
2299 */
2300 if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
2301 {
2302 if (PREDICT_FALSE
2303 (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
2304 copy_len, &map_hint)))
2305 {
2306 vlib_error_count (vm, node->node_index,
2307 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
2308 }
2309 copy_len = 0;
2310
2311 /* give buffers back to driver */
2312 CLIB_MEMORY_BARRIER ();
2313 rxvq->used->idx = rxvq->last_used_idx;
2314 vhost_user_log_dirty_ring (vui, rxvq, idx);
2315 }
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002316 buffers++;
2317 }
2318
2319done:
2320 //Do the memory copies
2321 if (PREDICT_FALSE
Damjan Marion586afd72017-04-05 19:18:20 +02002322 (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002323 copy_len, &map_hint)))
2324 {
Steven95827e42017-05-18 21:22:00 -07002325 vlib_error_count (vm, node->node_index,
2326 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002327 }
2328
2329 CLIB_MEMORY_BARRIER ();
2330 rxvq->used->idx = rxvq->last_used_idx;
2331 vhost_user_log_dirty_ring (vui, rxvq, idx);
2332
2333 /*
2334 * When n_left is set, error is always set to something too.
2335 * In case error is due to lack of remaining buffers, we go back up and
2336 * retry.
2337 * The idea is that it is better to waste some time on packets
2338 * that have been processed already than dropping them and get
2339 * more fresh packets with a good likelyhood that they will be dropped too.
2340 * This technique also gives more time to VM driver to pick-up packets.
2341 * In case the traffic flows from physical to virtual interfaces, this
2342 * technique will end-up leveraging the physical NIC buffer in order to
2343 * absorb the VM's CPU jitter.
2344 */
2345 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
2346 {
2347 retry--;
2348 goto retry;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002349 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002350
Ed Warnickecb9cada2015-12-08 15:45:58 -07002351 /* interrupt (call) handling */
Steven7312cc72017-03-15 21:18:55 -07002352 if ((rxvq->callfd_idx != ~0) &&
2353 !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
Damjan Marion00a9dca2016-08-17 17:05:46 +02002354 {
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +00002355 rxvq->n_since_last_int += frame->n_vectors - n_left;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002356
Damjan Marion00a9dca2016-08-17 17:05:46 +02002357 if (rxvq->n_since_last_int > vum->coalesce_frames)
2358 vhost_user_send_call (vm, rxvq);
2359 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002360
Pierre Pfistere21c5282016-09-21 08:04:59 +01002361 vhost_user_vring_unlock (vui, qid);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002362
Pierre Pfistere21c5282016-09-21 08:04:59 +01002363done3:
Damjan Marion00a9dca2016-08-17 17:05:46 +02002364 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
2365 {
2366 vlib_error_count (vm, node->node_index, error, n_left);
2367 vlib_increment_simple_counter
2368 (vnet_main.interface_main.sw_if_counters
2369 + VNET_INTERFACE_COUNTER_DROP,
Stevend7727532017-06-09 18:49:17 -07002370 thread_index, vui->sw_if_index, n_left);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002371 }
Pierre Pfister328e99b2016-02-12 13:18:42 +00002372
Ed Warnickecb9cada2015-12-08 15:45:58 -07002373 vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
2374 return frame->n_vectors;
2375}
2376
Stevenf3b53642017-05-01 14:03:02 -07002377static uword
2378vhost_user_send_interrupt_process (vlib_main_t * vm,
2379 vlib_node_runtime_t * rt, vlib_frame_t * f)
2380{
2381 vhost_user_intf_t *vui;
2382 f64 timeout = 3153600000.0 /* 100 years */ ;
2383 uword event_type, *event_data = 0;
2384 vhost_user_main_t *vum = &vhost_user_main;
2385 u16 *queue;
2386 f64 now, poll_time_remaining;
2387 f64 next_timeout;
2388 u8 stop_timer = 0;
2389
2390 while (1)
2391 {
2392 poll_time_remaining =
2393 vlib_process_wait_for_event_or_clock (vm, timeout);
2394 event_type = vlib_process_get_events (vm, &event_data);
2395 vec_reset_length (event_data);
2396
2397 /*
2398 * Use the remaining timeout if it is less than coalesce time to avoid
2399 * resetting the existing timer in the middle of expiration
2400 */
2401 timeout = poll_time_remaining;
2402 if (vlib_process_suspend_time_is_zero (timeout) ||
2403 (timeout > vum->coalesce_time))
2404 timeout = vum->coalesce_time;
2405
2406 now = vlib_time_now (vm);
2407 switch (event_type)
2408 {
2409 case VHOST_USER_EVENT_STOP_TIMER:
2410 stop_timer = 1;
2411 break;
2412
2413 case VHOST_USER_EVENT_START_TIMER:
2414 stop_timer = 0;
2415 if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
2416 break;
2417 /* fall through */
2418
2419 case ~0:
2420 /* *INDENT-OFF* */
2421 pool_foreach (vui, vum->vhost_user_interfaces, {
2422 next_timeout = timeout;
2423 vec_foreach (queue, vui->rx_queues)
2424 {
2425 vhost_user_vring_t *rxvq =
2426 &vui->vrings[VHOST_VRING_IDX_RX (*queue)];
2427 vhost_user_vring_t *txvq =
2428 &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
2429
2430 if (txvq->n_since_last_int)
2431 {
2432 if (now >= txvq->int_deadline)
2433 vhost_user_send_call (vm, txvq);
2434 else
2435 next_timeout = txvq->int_deadline - now;
2436 }
2437
2438 if (rxvq->n_since_last_int)
2439 {
2440 if (now >= rxvq->int_deadline)
2441 vhost_user_send_call (vm, rxvq);
2442 else
2443 next_timeout = rxvq->int_deadline - now;
2444 }
2445
2446 if ((next_timeout < timeout) && (next_timeout > 0.0))
2447 timeout = next_timeout;
2448 }
2449 });
2450 /* *INDENT-ON* */
2451 break;
2452
2453 default:
2454 clib_warning ("BUG: unhandled event type %d", event_type);
2455 break;
2456 }
2457 /* No less than 1 millisecond */
2458 if (timeout < 1e-3)
2459 timeout = 1e-3;
2460 if (stop_timer)
2461 timeout = 3153600000.0;
2462 }
2463 return 0;
2464}
2465
2466/* *INDENT-OFF* */
2467VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
2468 .function = vhost_user_send_interrupt_process,
2469 .type = VLIB_NODE_TYPE_PROCESS,
2470 .name = "vhost-user-send-interrupt-process",
2471};
2472/* *INDENT-ON* */
2473
2474static clib_error_t *
2475vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
2476 u32 qid, vnet_hw_interface_rx_mode mode)
2477{
2478 vlib_main_t *vm = vnm->vlib_main;
2479 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
2480 vhost_user_main_t *vum = &vhost_user_main;
2481 vhost_user_intf_t *vui =
2482 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
2483 vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
2484
2485 if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
2486 (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
2487 {
Steven49a04b92017-07-29 08:56:08 -07002488 if (txvq->kickfd_idx == ~0)
2489 {
2490 // We cannot support interrupt mode if the driver opts out
2491 return clib_error_return (0, "Driver does not support interrupt");
2492 }
Stevenf3b53642017-05-01 14:03:02 -07002493 if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
2494 {
2495 vum->ifq_count++;
2496 // Start the timer if this is the first encounter on interrupt
2497 // interface/queue
2498 if ((vum->ifq_count == 1) &&
2499 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
2500 vlib_process_signal_event (vm,
2501 vhost_user_send_interrupt_node.index,
2502 VHOST_USER_EVENT_START_TIMER, 0);
2503 }
2504 }
2505 else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
2506 {
2507 if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
2508 (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) &&
2509 vum->ifq_count)
2510 {
2511 vum->ifq_count--;
2512 // Stop the timer if there is no more interrupt interface/queue
2513 if ((vum->ifq_count == 0) &&
2514 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
2515 vlib_process_signal_event (vm,
2516 vhost_user_send_interrupt_node.index,
2517 VHOST_USER_EVENT_STOP_TIMER, 0);
2518 }
2519 }
2520
2521 txvq->mode = mode;
2522 if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
2523 txvq->used->flags = VRING_USED_F_NO_NOTIFY;
2524 else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) ||
2525 (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT))
2526 txvq->used->flags = 0;
2527 else
2528 {
2529 clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode,
2530 hw_if_index, qid);
2531 return clib_error_return (0, "unsupported");
2532 }
2533
2534 return 0;
2535}
2536
Ed Warnickecb9cada2015-12-08 15:45:58 -07002537static clib_error_t *
Damjan Marion00a9dca2016-08-17 17:05:46 +02002538vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
2539 u32 flags)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002540{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002541 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002542 uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002543 vhost_user_main_t *vum = &vhost_user_main;
2544 vhost_user_intf_t *vui =
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002545 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002546
2547 vui->admin_up = is_up;
2548
Yoann Desmouceaux35df2e12017-09-20 11:00:42 +02002549 if (is_up && vui->is_up)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002550 vnet_hw_interface_set_flags (vnm, vui->hw_if_index,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002551 VNET_HW_INTERFACE_FLAG_LINK_UP);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002552
2553 return /* no error */ 0;
2554}
2555
Damjan Marion00a9dca2016-08-17 17:05:46 +02002556/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07002557VNET_DEVICE_CLASS (vhost_user_dev_class,static) = {
2558 .name = "vhost-user",
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002559 .tx_function = vhost_user_tx,
Ed Warnickecb9cada2015-12-08 15:45:58 -07002560 .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
2561 .tx_function_error_strings = vhost_user_tx_func_error_strings,
2562 .format_device_name = format_vhost_user_interface_name,
2563 .name_renumber = vhost_user_name_renumber,
2564 .admin_up_down_function = vhost_user_interface_admin_up_down,
Stevenf3b53642017-05-01 14:03:02 -07002565 .rx_mode_change_function = vhost_user_interface_rx_mode_change,
Pierre Pfister116ea4b2016-11-08 15:49:28 +00002566 .format_tx_trace = format_vhost_trace,
Ed Warnickecb9cada2015-12-08 15:45:58 -07002567};
2568
Damjan Marion1c80e832016-05-11 23:07:18 +02002569VLIB_DEVICE_TX_FUNCTION_MULTIARCH (vhost_user_dev_class,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002570 vhost_user_tx)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002571/* *INDENT-ON* */
Damjan Marion1c80e832016-05-11 23:07:18 +02002572
Ed Warnickecb9cada2015-12-08 15:45:58 -07002573static uword
2574vhost_user_process (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002575 vlib_node_runtime_t * rt, vlib_frame_t * f)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002576{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002577 vhost_user_main_t *vum = &vhost_user_main;
2578 vhost_user_intf_t *vui;
2579 struct sockaddr_un sun;
2580 int sockfd;
Damjan Marion56dd5432017-09-08 19:52:02 +02002581 clib_file_t template = { 0 };
Damjan Marion00a9dca2016-08-17 17:05:46 +02002582 f64 timeout = 3153600000.0 /* 100 years */ ;
2583 uword *event_data = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002584
Steven0d150bb2017-03-22 12:05:19 -07002585 sockfd = -1;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002586 sun.sun_family = AF_UNIX;
2587 template.read_function = vhost_user_socket_read;
2588 template.error_function = vhost_user_socket_error;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002589
Damjan Marion00a9dca2016-08-17 17:05:46 +02002590 while (1)
2591 {
2592 vlib_process_wait_for_event_or_clock (vm, timeout);
2593 vlib_process_get_events (vm, &event_data);
2594 vec_reset_length (event_data);
2595
2596 timeout = 3.0;
2597
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002598 /* *INDENT-OFF* */
2599 pool_foreach (vui, vum->vhost_user_interfaces, {
Damjan Marion00a9dca2016-08-17 17:05:46 +02002600
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002601 if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
Damjan Marion56dd5432017-09-08 19:52:02 +02002602 if (vui->clib_file_index == ~0)
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002603 {
Steven0d150bb2017-03-22 12:05:19 -07002604 if ((sockfd < 0) &&
2605 ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
2606 {
2607 /*
2608 * 1st time error or new error for this interface,
2609 * spit out the message and record the error
2610 */
2611 if (!vui->sock_errno || (vui->sock_errno != errno))
2612 {
2613 clib_unix_warning
2614 ("Error: Could not open unix socket for %s",
2615 vui->sock_filename);
2616 vui->sock_errno = errno;
2617 }
2618 continue;
2619 }
2620
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002621 /* try to connect */
2622 strncpy (sun.sun_path, (char *) vui->sock_filename,
2623 sizeof (sun.sun_path) - 1);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002624
Andrew Yourtchenko0c3d4672017-01-03 16:52:22 +00002625 /* Avoid hanging VPP if the other end does not accept */
Dave Barach8f544962017-01-18 10:23:22 -05002626 if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0)
2627 clib_unix_warning ("fcntl");
2628
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002629 if (connect (sockfd, (struct sockaddr *) &sun,
2630 sizeof (struct sockaddr_un)) == 0)
2631 {
Andrew Yourtchenko0c3d4672017-01-03 16:52:22 +00002632 /* Set the socket to blocking as it was before */
Dave Barach8f544962017-01-18 10:23:22 -05002633 if (fcntl(sockfd, F_SETFL, 0) < 0)
2634 clib_unix_warning ("fcntl2");
2635
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002636 vui->sock_errno = 0;
2637 template.file_descriptor = sockfd;
2638 template.private_data =
2639 vui - vhost_user_main.vhost_user_interfaces;
Damjan Marion56dd5432017-09-08 19:52:02 +02002640 vui->clib_file_index = clib_file_add (&file_main, &template);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002641
Steven0d150bb2017-03-22 12:05:19 -07002642 /* This sockfd is considered consumed */
2643 sockfd = -1;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002644 }
2645 else
2646 {
2647 vui->sock_errno = errno;
2648 }
2649 }
2650 else
2651 {
2652 /* check if socket is alive */
2653 int error = 0;
2654 socklen_t len = sizeof (error);
Damjan Marion56dd5432017-09-08 19:52:02 +02002655 int fd = UNIX_GET_FD(vui->clib_file_index);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002656 int retval =
2657 getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002658
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002659 if (retval)
2660 {
2661 DBG_SOCK ("getsockopt returned %d", retval);
2662 vhost_user_if_disconnect (vui);
2663 }
2664 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02002665 }
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002666 });
2667 /* *INDENT-ON* */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002668 }
2669 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002670}
2671
Damjan Marion00a9dca2016-08-17 17:05:46 +02002672/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07002673VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
2674 .function = vhost_user_process,
2675 .type = VLIB_NODE_TYPE_PROCESS,
2676 .name = "vhost-user-process",
2677};
Damjan Marion00a9dca2016-08-17 17:05:46 +02002678/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07002679
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002680/**
2681 * Disables and reset interface structure.
2682 * It can then be either init again, or removed from used interfaces.
2683 */
2684static void
2685vhost_user_term_if (vhost_user_intf_t * vui)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002686{
Ole Troan553a4112017-01-10 10:07:04 +01002687 int q;
Steven5445f5f2017-04-25 16:16:00 -07002688 vhost_user_main_t *vum = &vhost_user_main;
Ole Troan553a4112017-01-10 10:07:04 +01002689
Ed Warnickecb9cada2015-12-08 15:45:58 -07002690 // disconnect interface sockets
Damjan Marion00a9dca2016-08-17 17:05:46 +02002691 vhost_user_if_disconnect (vui);
Pierre Pfisterfbb2ef62016-11-16 02:43:29 +00002692 vhost_user_update_iface_state (vui);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002693
Ole Troan553a4112017-01-10 10:07:04 +01002694 for (q = 0; q < VHOST_VRING_MAX_N; q++)
2695 {
2696 clib_mem_free ((void *) vui->vring_locks[q]);
2697 }
2698
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002699 if (vui->unix_server_index != ~0)
2700 {
2701 //Close server socket
Damjan Marion56dd5432017-09-08 19:52:02 +02002702 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002703 vui->unix_server_index);
Damjan Marion56dd5432017-09-08 19:52:02 +02002704 clib_file_del (&file_main, uf);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002705 vui->unix_server_index = ~0;
Steven53129422017-04-21 13:31:50 -07002706 unlink (vui->sock_filename);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002707 }
Steven5445f5f2017-04-25 16:16:00 -07002708
2709 mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename,
2710 &vui->if_index);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002711}
Ed Warnickecb9cada2015-12-08 15:45:58 -07002712
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002713int
2714vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
2715{
2716 vhost_user_main_t *vum = &vhost_user_main;
2717 vhost_user_intf_t *vui;
2718 int rv = 0;
2719 vnet_hw_interface_t *hwif;
Stevenf3b53642017-05-01 14:03:02 -07002720 u16 *queue;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002721
2722 if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
2723 hwif->dev_class_index != vhost_user_dev_class.index)
2724 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
2725
2726 DBG_SOCK ("Deleting vhost-user interface %s (instance %d)",
2727 hwif->name, hwif->dev_instance);
2728
2729 vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
2730
Stevenf3b53642017-05-01 14:03:02 -07002731 vec_foreach (queue, vui->rx_queues)
2732 {
2733 vhost_user_vring_t *txvq;
2734
2735 txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
2736 if ((vum->ifq_count > 0) &&
2737 ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
2738 (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)))
2739 {
2740 vum->ifq_count--;
2741 // Stop the timer if there is no more interrupt interface/queue
2742 if ((vum->ifq_count == 0) &&
2743 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
2744 {
2745 vlib_process_signal_event (vm,
2746 vhost_user_send_interrupt_node.index,
2747 VHOST_USER_EVENT_STOP_TIMER, 0);
2748 break;
2749 }
2750 }
2751 }
2752
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002753 // Disable and reset interface
2754 vhost_user_term_if (vui);
2755
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002756 // Reset renumbered iface
2757 if (hwif->dev_instance <
2758 vec_len (vum->show_dev_instance_by_real_dev_instance))
2759 vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0;
2760
2761 // Delete ethernet interface
Ed Warnickecb9cada2015-12-08 15:45:58 -07002762 ethernet_delete_interface (vnm, vui->hw_if_index);
Wojciech Decd8e47872017-01-17 21:45:11 +01002763
2764 // Back to pool
2765 pool_put (vum->vhost_user_interfaces, vui);
2766
Ed Warnickecb9cada2015-12-08 15:45:58 -07002767 return rv;
2768}
2769
Steven53129422017-04-21 13:31:50 -07002770static clib_error_t *
2771vhost_user_exit (vlib_main_t * vm)
2772{
2773 vnet_main_t *vnm = vnet_get_main ();
2774 vhost_user_main_t *vum = &vhost_user_main;
2775 vhost_user_intf_t *vui;
2776
Steven41748862017-04-25 13:49:51 -07002777 vlib_worker_thread_barrier_sync (vlib_get_main ());
Steven53129422017-04-21 13:31:50 -07002778 /* *INDENT-OFF* */
2779 pool_foreach (vui, vum->vhost_user_interfaces, {
2780 vhost_user_delete_if (vnm, vm, vui->sw_if_index);
2781 });
2782 /* *INDENT-ON* */
Steven41748862017-04-25 13:49:51 -07002783 vlib_worker_thread_barrier_release (vlib_get_main ());
Steven53129422017-04-21 13:31:50 -07002784 return 0;
2785}
2786
2787VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
2788
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002789/**
2790 * Open server unix socket on specified sock_filename.
2791 */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002792static int
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002793vhost_user_init_server_sock (const char *sock_filename, int *sock_fd)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002794{
Pierre Pfister5afccb22016-07-25 14:32:02 +01002795 int rv = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002796 struct sockaddr_un un = { };
Ed Warnickecb9cada2015-12-08 15:45:58 -07002797 int fd;
2798 /* create listening socket */
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002799 if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
2800 return VNET_API_ERROR_SYSCALL_ERROR_1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002801
2802 un.sun_family = AF_UNIX;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002803 strncpy ((char *) un.sun_path, (char *) sock_filename,
2804 sizeof (un.sun_path) - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002805
2806 /* remove if exists */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002807 unlink ((char *) sock_filename);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002808
Damjan Marion00a9dca2016-08-17 17:05:46 +02002809 if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
2810 {
2811 rv = VNET_API_ERROR_SYSCALL_ERROR_2;
2812 goto error;
2813 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002814
Damjan Marion00a9dca2016-08-17 17:05:46 +02002815 if (listen (fd, 1) == -1)
2816 {
2817 rv = VNET_API_ERROR_SYSCALL_ERROR_3;
2818 goto error;
2819 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002820
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002821 *sock_fd = fd;
2822 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002823
2824error:
Damjan Marion00a9dca2016-08-17 17:05:46 +02002825 close (fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002826 return rv;
2827}
2828
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002829/**
2830 * Create ethernet interface for vhost user interface.
2831 */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002832static void
2833vhost_user_create_ethernet (vnet_main_t * vnm, vlib_main_t * vm,
2834 vhost_user_intf_t * vui, u8 * hwaddress)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002835{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002836 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002837 u8 hwaddr[6];
Damjan Marion00a9dca2016-08-17 17:05:46 +02002838 clib_error_t *error;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002839
2840 /* create hw and sw interface */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002841 if (hwaddress)
2842 {
2843 clib_memcpy (hwaddr, hwaddress, 6);
2844 }
2845 else
2846 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002847 random_u32 (&vum->random);
2848 clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random));
Damjan Marion00a9dca2016-08-17 17:05:46 +02002849 hwaddr[0] = 2;
2850 hwaddr[1] = 0xfe;
2851 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002852
2853 error = ethernet_register_interface
2854 (vnm,
2855 vhost_user_dev_class.index,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002856 vui - vum->vhost_user_interfaces /* device instance */ ,
2857 hwaddr /* ethernet address */ ,
2858 &vui->hw_if_index, 0 /* flag change */ );
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002859
Ed Warnickecb9cada2015-12-08 15:45:58 -07002860 if (error)
2861 clib_error_report (error);
Pierre Pfister328e99b2016-02-12 13:18:42 +00002862
2863 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
2864 hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002865}
2866
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002867/*
2868 * Initialize vui with specified attributes
2869 */
Damjan Marion00a9dca2016-08-17 17:05:46 +02002870static void
2871vhost_user_vui_init (vnet_main_t * vnm,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002872 vhost_user_intf_t * vui,
2873 int server_sock_fd,
Damjan Marion00a9dca2016-08-17 17:05:46 +02002874 const char *sock_filename,
Stevenf3b53642017-05-01 14:03:02 -07002875 u64 feature_mask, u32 * sw_if_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002876{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002877 vnet_sw_interface_t *sw;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01002878 int q;
Steven5445f5f2017-04-25 16:16:00 -07002879 vhost_user_main_t *vum = &vhost_user_main;
Stevenf3b53642017-05-01 14:03:02 -07002880 vnet_hw_interface_t *hw;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002881
Stevenf3b53642017-05-01 14:03:02 -07002882 hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
2883 sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002884 if (server_sock_fd != -1)
2885 {
Damjan Marion56dd5432017-09-08 19:52:02 +02002886 clib_file_t template = { 0 };
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002887 template.read_function = vhost_user_socksvr_accept_ready;
2888 template.file_descriptor = server_sock_fd;
Steven5445f5f2017-04-25 16:16:00 -07002889 template.private_data = vui - vum->vhost_user_interfaces; //hw index
Damjan Marion56dd5432017-09-08 19:52:02 +02002890 vui->unix_server_index = clib_file_add (&file_main, &template);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002891 }
2892 else
2893 {
2894 vui->unix_server_index = ~0;
2895 }
2896
Ed Warnickecb9cada2015-12-08 15:45:58 -07002897 vui->sw_if_index = sw->sw_if_index;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002898 strncpy (vui->sock_filename, sock_filename,
2899 ARRAY_LEN (vui->sock_filename) - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002900 vui->sock_errno = 0;
2901 vui->is_up = 0;
2902 vui->feature_mask = feature_mask;
Damjan Marion56dd5432017-09-08 19:52:02 +02002903 vui->clib_file_index = ~0;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01002904 vui->log_base_addr = 0;
Steven5445f5f2017-04-25 16:16:00 -07002905 vui->if_index = vui - vum->vhost_user_interfaces;
2906 mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
2907 &vui->if_index, 0);
Yoann Desmouceaux4667c222016-02-24 22:51:00 +01002908
Pierre Pfistere21c5282016-09-21 08:04:59 +01002909 for (q = 0; q < VHOST_VRING_MAX_N; q++)
2910 vhost_user_vring_init (vui, q);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002911
Stevenf3b53642017-05-01 14:03:02 -07002912 hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
Damjan Marion00a9dca2016-08-17 17:05:46 +02002913 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002914
2915 if (sw_if_index)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002916 *sw_if_index = vui->sw_if_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002917
Pierre Pfistere21c5282016-09-21 08:04:59 +01002918 for (q = 0; q < VHOST_VRING_MAX_N; q++)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002919 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01002920 vui->vring_locks[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
2921 CLIB_CACHE_LINE_BYTES);
2922 memset ((void *) vui->vring_locks[q], 0, CLIB_CACHE_LINE_BYTES);
Damjan Marion00a9dca2016-08-17 17:05:46 +02002923 }
Pierre Pfistere21c5282016-09-21 08:04:59 +01002924
2925 vec_validate (vui->per_cpu_tx_qid,
2926 vlib_get_thread_main ()->n_vlib_mains - 1);
2927 vhost_user_tx_thread_placement (vui);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002928}
2929
Damjan Marion00a9dca2016-08-17 17:05:46 +02002930int
2931vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
2932 const char *sock_filename,
2933 u8 is_server,
2934 u32 * sw_if_index,
2935 u64 feature_mask,
Stevenf3b53642017-05-01 14:03:02 -07002936 u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002937{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002938 vhost_user_intf_t *vui = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002939 u32 sw_if_idx = ~0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002940 int rv = 0;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002941 int server_sock_fd = -1;
Steven7312cc72017-03-15 21:18:55 -07002942 vhost_user_main_t *vum = &vhost_user_main;
Steven5445f5f2017-04-25 16:16:00 -07002943 uword *if_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002944
Wojciech Dec3cd9eed2017-01-03 10:38:37 +01002945 if (sock_filename == NULL || !(strlen (sock_filename) > 0))
2946 {
2947 return VNET_API_ERROR_INVALID_ARGUMENT;
2948 }
2949
Steven5445f5f2017-04-25 16:16:00 -07002950 if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename);
2951 if (if_index)
2952 {
2953 if (sw_if_index)
2954 {
2955 vui = &vum->vhost_user_interfaces[*if_index];
2956 *sw_if_index = vui->sw_if_index;
2957 }
2958 return VNET_API_ERROR_IF_ALREADY_EXISTS;
2959 }
2960
Damjan Marion00a9dca2016-08-17 17:05:46 +02002961 if (is_server)
2962 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002963 if ((rv =
2964 vhost_user_init_server_sock (sock_filename, &server_sock_fd)) != 0)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002965 {
2966 return rv;
2967 }
2968 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07002969
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002970 pool_get (vhost_user_main.vhost_user_interfaces, vui);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002971
Pierre Pfisteref65cb02016-02-19 13:52:44 +00002972 vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002973 vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename,
Stevenf3b53642017-05-01 14:03:02 -07002974 feature_mask, &sw_if_idx);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002975
Damjan Marion00a9dca2016-08-17 17:05:46 +02002976 if (renumber)
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002977 vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07002978
2979 if (sw_if_index)
Damjan Marion00a9dca2016-08-17 17:05:46 +02002980 *sw_if_index = sw_if_idx;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002981
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002982 // Process node must connect
2983 vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
Steven7312cc72017-03-15 21:18:55 -07002984
Ed Warnickecb9cada2015-12-08 15:45:58 -07002985 return rv;
2986}
2987
Damjan Marion00a9dca2016-08-17 17:05:46 +02002988int
2989vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
2990 const char *sock_filename,
2991 u8 is_server,
2992 u32 sw_if_index,
Stevenf3b53642017-05-01 14:03:02 -07002993 u64 feature_mask, u8 renumber, u32 custom_dev_instance)
Ed Warnickecb9cada2015-12-08 15:45:58 -07002994{
Damjan Marion00a9dca2016-08-17 17:05:46 +02002995 vhost_user_main_t *vum = &vhost_user_main;
2996 vhost_user_intf_t *vui = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002997 u32 sw_if_idx = ~0;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00002998 int server_sock_fd = -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07002999 int rv = 0;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003000 vnet_hw_interface_t *hwif;
Steven5445f5f2017-04-25 16:16:00 -07003001 uword *if_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003002
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003003 if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
3004 hwif->dev_class_index != vhost_user_dev_class.index)
3005 return VNET_API_ERROR_INVALID_SW_IF_INDEX;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003006
Steven5445f5f2017-04-25 16:16:00 -07003007 if (sock_filename == NULL || !(strlen (sock_filename) > 0))
3008 return VNET_API_ERROR_INVALID_ARGUMENT;
3009
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003010 vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003011
Steven5445f5f2017-04-25 16:16:00 -07003012 /*
3013 * Disallow changing the interface to have the same path name
3014 * as other interface
3015 */
3016 if_index = mhash_get (&vum->if_index_by_sock_name, (void *) sock_filename);
3017 if (if_index && (*if_index != vui->if_index))
3018 return VNET_API_ERROR_IF_ALREADY_EXISTS;
3019
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003020 // First try to open server socket
Damjan Marion00a9dca2016-08-17 17:05:46 +02003021 if (is_server)
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003022 if ((rv = vhost_user_init_server_sock (sock_filename,
3023 &server_sock_fd)) != 0)
3024 return rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003025
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003026 vhost_user_term_if (vui);
3027 vhost_user_vui_init (vnm, vui, server_sock_fd,
Stevenf3b53642017-05-01 14:03:02 -07003028 sock_filename, feature_mask, &sw_if_idx);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003029
Damjan Marion00a9dca2016-08-17 17:05:46 +02003030 if (renumber)
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003031 vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003032
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003033 // Process node must connect
3034 vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
Steven7312cc72017-03-15 21:18:55 -07003035
Ed Warnickecb9cada2015-12-08 15:45:58 -07003036 return rv;
3037}
3038
3039clib_error_t *
3040vhost_user_connect_command_fn (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02003041 unformat_input_t * input,
3042 vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -07003043{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003044 unformat_input_t _line_input, *line_input = &_line_input;
3045 u8 *sock_filename = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003046 u32 sw_if_index;
3047 u8 is_server = 0;
Pierre Pfistere21c5282016-09-21 08:04:59 +01003048 u64 feature_mask = (u64) ~ (0ULL);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003049 u8 renumber = 0;
3050 u32 custom_dev_instance = ~0;
Pierre Pfisteref65cb02016-02-19 13:52:44 +00003051 u8 hwaddr[6];
3052 u8 *hw = NULL;
Billy McFalla9a20e72017-02-15 11:39:12 -05003053 clib_error_t *error = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003054
3055 /* Get a line of input. */
Damjan Marion00a9dca2016-08-17 17:05:46 +02003056 if (!unformat_user (input, unformat_line_input, line_input))
Ed Warnickecb9cada2015-12-08 15:45:58 -07003057 return 0;
3058
Damjan Marion00a9dca2016-08-17 17:05:46 +02003059 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
3060 {
3061 if (unformat (line_input, "socket %s", &sock_filename))
3062 ;
3063 else if (unformat (line_input, "server"))
3064 is_server = 1;
3065 else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
3066 ;
3067 else
3068 if (unformat
3069 (line_input, "hwaddr %U", unformat_ethernet_address, hwaddr))
3070 hw = hwaddr;
3071 else if (unformat (line_input, "renumber %d", &custom_dev_instance))
3072 {
3073 renumber = 1;
3074 }
3075 else
Billy McFalla9a20e72017-02-15 11:39:12 -05003076 {
3077 error = clib_error_return (0, "unknown input `%U'",
3078 format_unformat_error, line_input);
3079 goto done;
3080 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003081 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003082
Damjan Marion00a9dca2016-08-17 17:05:46 +02003083 vnet_main_t *vnm = vnet_get_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -07003084
Pierre Pfister5afccb22016-07-25 14:32:02 +01003085 int rv;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003086 if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
3087 is_server, &sw_if_index, feature_mask,
Stevenf3b53642017-05-01 14:03:02 -07003088 renumber, custom_dev_instance, hw)))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003089 {
Billy McFalla9a20e72017-02-15 11:39:12 -05003090 error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
3091 goto done;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003092 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003093
Damjan Marion00a9dca2016-08-17 17:05:46 +02003094 vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
3095 sw_if_index);
Billy McFalla9a20e72017-02-15 11:39:12 -05003096
3097done:
3098 vec_free (sock_filename);
3099 unformat_free (line_input);
3100
3101 return error;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003102}
3103
3104clib_error_t *
3105vhost_user_delete_command_fn (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02003106 unformat_input_t * input,
3107 vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -07003108{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003109 unformat_input_t _line_input, *line_input = &_line_input;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003110 u32 sw_if_index = ~0;
Pierre Pfisterece983d2016-11-21 12:52:22 +00003111 vnet_main_t *vnm = vnet_get_main ();
Billy McFalla9a20e72017-02-15 11:39:12 -05003112 clib_error_t *error = NULL;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003113
3114 /* Get a line of input. */
Damjan Marion00a9dca2016-08-17 17:05:46 +02003115 if (!unformat_user (input, unformat_line_input, line_input))
Ed Warnickecb9cada2015-12-08 15:45:58 -07003116 return 0;
3117
Damjan Marion00a9dca2016-08-17 17:05:46 +02003118 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
3119 {
3120 if (unformat (line_input, "sw_if_index %d", &sw_if_index))
3121 ;
Pierre Pfisterece983d2016-11-21 12:52:22 +00003122 else if (unformat
3123 (line_input, "%U", unformat_vnet_sw_interface, vnm,
3124 &sw_if_index))
3125 {
3126 vnet_hw_interface_t *hwif =
3127 vnet_get_sup_hw_interface (vnm, sw_if_index);
3128 if (hwif == NULL ||
3129 vhost_user_dev_class.index != hwif->dev_class_index)
Billy McFalla9a20e72017-02-15 11:39:12 -05003130 {
3131 error = clib_error_return (0, "Not a vhost interface");
3132 goto done;
3133 }
Pierre Pfisterece983d2016-11-21 12:52:22 +00003134 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02003135 else
Billy McFalla9a20e72017-02-15 11:39:12 -05003136 {
3137 error = clib_error_return (0, "unknown input `%U'",
3138 format_unformat_error, line_input);
3139 goto done;
3140 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02003141 }
Billy McFalla9a20e72017-02-15 11:39:12 -05003142
Damjan Marion00a9dca2016-08-17 17:05:46 +02003143 vhost_user_delete_if (vnm, vm, sw_if_index);
Billy McFalla9a20e72017-02-15 11:39:12 -05003144
3145done:
3146 unformat_free (line_input);
3147
3148 return error;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003149}
3150
Damjan Marion00a9dca2016-08-17 17:05:46 +02003151int
3152vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
3153 vhost_user_intf_details_t ** out_vuids)
Ed Warnickecb9cada2015-12-08 15:45:58 -07003154{
3155 int rv = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003156 vhost_user_main_t *vum = &vhost_user_main;
3157 vhost_user_intf_t *vui;
3158 vhost_user_intf_details_t *r_vuids = NULL;
3159 vhost_user_intf_details_t *vuid = NULL;
3160 u32 *hw_if_indices = 0;
3161 vnet_hw_interface_t *hi;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003162 u8 *s = NULL;
3163 int i;
3164
3165 if (!out_vuids)
Damjan Marion00a9dca2016-08-17 17:05:46 +02003166 return -1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003167
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003168 pool_foreach (vui, vum->vhost_user_interfaces,
3169 vec_add1 (hw_if_indices, vui->hw_if_index);
3170 );
Ed Warnickecb9cada2015-12-08 15:45:58 -07003171
Damjan Marion00a9dca2016-08-17 17:05:46 +02003172 for (i = 0; i < vec_len (hw_if_indices); i++)
3173 {
3174 hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003175 vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003176
Damjan Marion00a9dca2016-08-17 17:05:46 +02003177 vec_add2 (r_vuids, vuid, 1);
3178 vuid->sw_if_index = vui->sw_if_index;
3179 vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
3180 vuid->features = vui->features;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003181 vuid->num_regions = vui->nregions;
Marek Gradzki0578cd12017-02-13 14:19:51 +01003182 vuid->is_server = vui->unix_server_index != ~0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003183 vuid->sock_errno = vui->sock_errno;
3184 strncpy ((char *) vuid->sock_filename, (char *) vui->sock_filename,
3185 ARRAY_LEN (vuid->sock_filename) - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003186
Damjan Marion00a9dca2016-08-17 17:05:46 +02003187 s = format (s, "%v%c", hi->name, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003188
Damjan Marion00a9dca2016-08-17 17:05:46 +02003189 strncpy ((char *) vuid->if_name, (char *) s,
3190 ARRAY_LEN (vuid->if_name) - 1);
3191 _vec_len (s) = 0;
3192 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003193
3194 vec_free (s);
3195 vec_free (hw_if_indices);
3196
3197 *out_vuids = r_vuids;
3198
3199 return rv;
3200}
3201
3202clib_error_t *
3203show_vhost_user_command_fn (vlib_main_t * vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +02003204 unformat_input_t * input,
3205 vlib_cli_command_t * cmd)
Ed Warnickecb9cada2015-12-08 15:45:58 -07003206{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003207 clib_error_t *error = 0;
3208 vnet_main_t *vnm = vnet_get_main ();
3209 vhost_user_main_t *vum = &vhost_user_main;
3210 vhost_user_intf_t *vui;
3211 u32 hw_if_index, *hw_if_indices = 0;
3212 vnet_hw_interface_t *hi;
Stevenf3b53642017-05-01 14:03:02 -07003213 u16 *queue;
Pierre Pfistere21c5282016-09-21 08:04:59 +01003214 u32 ci;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003215 int i, j, q;
3216 int show_descr = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003217 struct feat_struct
3218 {
3219 u8 bit;
3220 char *str;
3221 };
Ed Warnickecb9cada2015-12-08 15:45:58 -07003222 struct feat_struct *feat_entry;
3223
3224 static struct feat_struct feat_array[] = {
3225#define _(s,b) { .str = #s, .bit = b, },
Damjan Marion00a9dca2016-08-17 17:05:46 +02003226 foreach_virtio_net_feature
Ed Warnickecb9cada2015-12-08 15:45:58 -07003227#undef _
Damjan Marion00a9dca2016-08-17 17:05:46 +02003228 {.str = NULL}
Ed Warnickecb9cada2015-12-08 15:45:58 -07003229 };
3230
Pierre Pfistere21c5282016-09-21 08:04:59 +01003231#define foreach_protocol_feature \
3232 _(VHOST_USER_PROTOCOL_F_MQ) \
3233 _(VHOST_USER_PROTOCOL_F_LOG_SHMFD)
3234
3235 static struct feat_struct proto_feat_array[] = {
3236#define _(s) { .str = #s, .bit = s},
3237 foreach_protocol_feature
3238#undef _
3239 {.str = NULL}
3240 };
3241
Damjan Marion00a9dca2016-08-17 17:05:46 +02003242 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3243 {
3244 if (unformat
3245 (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
3246 {
3247 vec_add1 (hw_if_indices, hw_if_index);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003248 }
3249 else if (unformat (input, "descriptors") || unformat (input, "desc"))
3250 show_descr = 1;
3251 else
3252 {
3253 error = clib_error_return (0, "unknown input `%U'",
3254 format_unformat_error, input);
3255 goto done;
3256 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003257 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02003258 if (vec_len (hw_if_indices) == 0)
3259 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003260 pool_foreach (vui, vum->vhost_user_interfaces,
3261 vec_add1 (hw_if_indices, vui->hw_if_index);
3262 );
Ed Warnickecb9cada2015-12-08 15:45:58 -07003263 }
Damjan Marion00a9dca2016-08-17 17:05:46 +02003264 vlib_cli_output (vm, "Virtio vhost-user interfaces");
Pierre Pfistere21c5282016-09-21 08:04:59 +01003265 vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
Damjan Marion00a9dca2016-08-17 17:05:46 +02003266 vum->coalesce_frames, vum->coalesce_time);
Stevenf3b53642017-05-01 14:03:02 -07003267 vlib_cli_output (vm, " number of rx virtqueues in interrupt mode: %d",
3268 vum->ifq_count);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003269
3270 for (i = 0; i < vec_len (hw_if_indices); i++)
3271 {
3272 hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003273 vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003274 vlib_cli_output (vm, "Interface: %s (ifindex %d)",
3275 hi->name, hw_if_indices[i]);
3276
Pierre Pfistere21c5282016-09-21 08:04:59 +01003277 vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
3278 " features mask (0x%llx): \n"
3279 " features (0x%llx): \n",
3280 vui->virtio_net_hdr_sz, vui->feature_mask,
3281 vui->features);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003282
3283 feat_entry = (struct feat_struct *) &feat_array;
3284 while (feat_entry->str)
3285 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01003286 if (vui->features & (1ULL << feat_entry->bit))
3287 vlib_cli_output (vm, " %s (%d)", feat_entry->str,
3288 feat_entry->bit);
3289 feat_entry++;
3290 }
3291
3292 vlib_cli_output (vm, " protocol features (0x%llx)",
3293 vui->protocol_features);
3294 feat_entry = (struct feat_struct *) &proto_feat_array;
3295 while (feat_entry->str)
3296 {
3297 if (vui->protocol_features & (1ULL << feat_entry->bit))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003298 vlib_cli_output (vm, " %s (%d)", feat_entry->str,
3299 feat_entry->bit);
3300 feat_entry++;
3301 }
3302
3303 vlib_cli_output (vm, "\n");
3304
Damjan Marion00a9dca2016-08-17 17:05:46 +02003305 vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
3306 vui->sock_filename,
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003307 (vui->unix_server_index != ~0) ? "server" : "client",
Damjan Marion00a9dca2016-08-17 17:05:46 +02003308 strerror (vui->sock_errno));
3309
Pierre Pfistere21c5282016-09-21 08:04:59 +01003310 vlib_cli_output (vm, " rx placement: ");
Stevenf3b53642017-05-01 14:03:02 -07003311
3312 vec_foreach (queue, vui->rx_queues)
Pierre Pfistere21c5282016-09-21 08:04:59 +01003313 {
Stevenf3b53642017-05-01 14:03:02 -07003314 vnet_main_t *vnm = vnet_get_main ();
3315 uword thread_index;
3316 vnet_hw_interface_rx_mode mode;
3317
3318 thread_index = vnet_get_device_input_thread_index (vnm,
3319 vui->hw_if_index,
3320 *queue);
3321 vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode);
3322 vlib_cli_output (vm, " thread %d on vring %d, %U\n",
3323 thread_index, VHOST_VRING_IDX_TX (*queue),
3324 format_vnet_hw_interface_rx_mode, mode);
Pierre Pfistere21c5282016-09-21 08:04:59 +01003325 }
3326
3327 vlib_cli_output (vm, " tx placement: %s\n",
3328 vui->use_tx_spinlock ? "spin-lock" : "lock-free");
3329
3330 vec_foreach_index (ci, vui->per_cpu_tx_qid)
3331 {
3332 vlib_cli_output (vm, " thread %d on vring %d\n", ci,
3333 VHOST_VRING_IDX_RX (vui->per_cpu_tx_qid[ci]));
3334 }
3335
3336 vlib_cli_output (vm, "\n");
3337
Damjan Marion00a9dca2016-08-17 17:05:46 +02003338 vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
3339
3340 if (vui->nregions)
3341 {
3342 vlib_cli_output (vm,
3343 " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
3344 vlib_cli_output (vm,
3345 " ====== ===== ================== ================== ================== ================== ==================\n");
3346 }
3347 for (j = 0; j < vui->nregions; j++)
3348 {
3349 vlib_cli_output (vm,
3350 " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
3351 j, vui->region_mmap_fd[j],
3352 vui->regions[j].guest_phys_addr,
3353 vui->regions[j].memory_size,
3354 vui->regions[j].userspace_addr,
3355 vui->regions[j].mmap_offset,
3356 pointer_to_uword (vui->region_mmap_addr[j]));
3357 }
Pierre Pfistere21c5282016-09-21 08:04:59 +01003358 for (q = 0; q < VHOST_VRING_MAX_N; q++)
Damjan Marion00a9dca2016-08-17 17:05:46 +02003359 {
Pierre Pfistere21c5282016-09-21 08:04:59 +01003360 if (!vui->vrings[q].started)
3361 continue;
3362
3363 vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
3364 (q & 1) ? "RX" : "TX",
3365 vui->vrings[q].enabled ? "" : " disabled");
Damjan Marion00a9dca2016-08-17 17:05:46 +02003366
3367 vlib_cli_output (vm,
3368 " qsz %d last_avail_idx %d last_used_idx %d\n",
Steven97878892017-08-29 09:23:26 -07003369 vui->vrings[q].qsz_mask + 1,
3370 vui->vrings[q].last_avail_idx,
Damjan Marion00a9dca2016-08-17 17:05:46 +02003371 vui->vrings[q].last_used_idx);
3372
3373 if (vui->vrings[q].avail && vui->vrings[q].used)
3374 vlib_cli_output (vm,
3375 " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
3376 vui->vrings[q].avail->flags,
3377 vui->vrings[q].avail->idx,
3378 vui->vrings[q].used->flags,
3379 vui->vrings[q].used->idx);
3380
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003381 int kickfd = UNIX_GET_FD (vui->vrings[q].kickfd_idx);
3382 int callfd = UNIX_GET_FD (vui->vrings[q].callfd_idx);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003383 vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n",
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003384 kickfd, callfd, vui->vrings[q].errfd);
Damjan Marion00a9dca2016-08-17 17:05:46 +02003385
3386 if (show_descr)
3387 {
3388 vlib_cli_output (vm, "\n descriptor table:\n");
3389 vlib_cli_output (vm,
3390 " id addr len flags next user_addr\n");
3391 vlib_cli_output (vm,
3392 " ===== ================== ===== ====== ===== ==================\n");
Steven97878892017-08-29 09:23:26 -07003393 for (j = 0; j < vui->vrings[q].qsz_mask + 1; j++)
Damjan Marion00a9dca2016-08-17 17:05:46 +02003394 {
Pierre Pfister11f92052016-09-21 08:08:55 +01003395 u32 mem_hint = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +02003396 vlib_cli_output (vm,
3397 " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
3398 j, vui->vrings[q].desc[j].addr,
3399 vui->vrings[q].desc[j].len,
3400 vui->vrings[q].desc[j].flags,
3401 vui->vrings[q].desc[j].next,
3402 pointer_to_uword (map_guest_mem
3403 (vui,
Pierre Pfisterba1d0462016-07-27 16:38:20 +01003404 vui->vrings[q].desc[j].
Pierre Pfister11f92052016-09-21 08:08:55 +01003405 addr, &mem_hint)));
Damjan Marion00a9dca2016-08-17 17:05:46 +02003406 }
3407 }
3408 }
3409 vlib_cli_output (vm, "\n");
3410 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07003411done:
3412 vec_free (hw_if_indices);
3413 return error;
3414}
3415
Damjan Marion8d281b32016-08-24 14:32:39 +02003416/*
3417 * CLI functions
3418 */
3419
Billy McFalla92501a2016-11-23 12:45:29 -05003420/*?
3421 * Create a vHost User interface. Once created, a new virtual interface
3422 * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>'
3423 * is the next free index.
3424 *
3425 * There are several parameters associated with a vHost interface:
3426 *
Billy McFall28cf3b72018-01-15 17:54:52 -05003427 * - <b>socket <socket-filename></b> - Name of the linux socket used by hypervisor
3428 * and VPP to manage the vHost interface. If in '<em>server</em>' mode, VPP will
3429 * create the socket if it does not already exist. If in '<em>client</em>' mode,
3430 * hypervisor will create the socket if it does not already exist. The VPP code
3431 * is indifferent to the file location. However, if SELinux is enabled, then the
3432 * socket needs to be created in '<em>/var/run/vpp/</em>'.
Billy McFalla92501a2016-11-23 12:45:29 -05003433 *
Billy McFall28cf3b72018-01-15 17:54:52 -05003434 * - <b>server</b> - Optional flag to indicate that VPP should be the server for
3435 * the linux socket. If not provided, VPP will be the client. In '<em>server</em>'
3436 * mode, the VM can be reset without tearing down the vHost Interface. In
3437 * '<em>client</em>' mode, VPP can be reset without bringing down the VM and
3438 * tearing down the vHost Interface.
Billy McFalla92501a2016-11-23 12:45:29 -05003439 *
3440 * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated at
Billy McFall28cf3b72018-01-15 17:54:52 -05003441 * startup. <b>This is intended for degugging only.</b> It is recommended that this
3442 * parameter not be used except by experienced users. By default, all supported
3443 * features will be advertised. Otherwise, provide the set of features desired.
Billy McFalla92501a2016-11-23 12:45:29 -05003444 * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
3445 * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
3446 * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
3447 * - 0x000400000 (22) - VIRTIO_NET_F_MQ
3448 * - 0x004000000 (26) - VHOST_F_LOG_ALL
3449 * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT
3450 * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC
3451 * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES
3452 * - 0x100000000 (32) - VIRTIO_F_VERSION_1
3453 *
3454 * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
3455 * X:X:X:X:X:X unix or X.X.X cisco format.
3456 *
3457 * - <b>renumber <dev_instance></b> - Optional parameter which allows the instance
3458 * in the name to be specified. If instance already exists, name will be used
3459 * anyway and multiple instances will have the same name. Use with caution.
3460 *
3461 * @cliexpar
3462 * Example of how to create a vhost interface with VPP as the client and all features enabled:
Billy McFall28cf3b72018-01-15 17:54:52 -05003463 * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock}
Billy McFalla92501a2016-11-23 12:45:29 -05003464 * VirtualEthernet0/0/0
3465 * @cliexend
3466 * Example of how to create a vhost interface with VPP as the server and with just
3467 * multiple queues enabled:
Billy McFall28cf3b72018-01-15 17:54:52 -05003468 * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server feature-mask 0x40400000}
Billy McFalla92501a2016-11-23 12:45:29 -05003469 * VirtualEthernet0/0/1
3470 * @cliexend
3471 * Once the vHost interface is created, enable the interface using:
3472 * @cliexcmd{set interface state VirtualEthernet0/0/0 up}
3473?*/
Damjan Marion8d281b32016-08-24 14:32:39 +02003474/* *INDENT-OFF* */
3475VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
3476 .path = "create vhost-user",
Steven7312cc72017-03-15 21:18:55 -07003477 .short_help = "create vhost-user socket <socket-filename> [server] "
Stevenf3b53642017-05-01 14:03:02 -07003478 "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] ",
Damjan Marion8d281b32016-08-24 14:32:39 +02003479 .function = vhost_user_connect_command_fn,
3480};
Billy McFalla92501a2016-11-23 12:45:29 -05003481/* *INDENT-ON* */
Damjan Marion8d281b32016-08-24 14:32:39 +02003482
Billy McFalla92501a2016-11-23 12:45:29 -05003483/*?
3484 * Delete a vHost User interface using the interface name or the
Dave Barach13ad1f02017-03-26 19:36:18 -04003485 * software interface index. Use the '<em>show interface</em>'
Billy McFalla92501a2016-11-23 12:45:29 -05003486 * command to determine the software interface index. On deletion,
3487 * the linux socket will not be deleted.
3488 *
3489 * @cliexpar
3490 * Example of how to delete a vhost interface by name:
3491 * @cliexcmd{delete vhost-user VirtualEthernet0/0/1}
3492 * Example of how to delete a vhost interface by software interface index:
3493 * @cliexcmd{delete vhost-user sw_if_index 1}
3494?*/
3495/* *INDENT-OFF* */
Damjan Marion8d281b32016-08-24 14:32:39 +02003496VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
3497 .path = "delete vhost-user",
Billy McFalla92501a2016-11-23 12:45:29 -05003498 .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
Damjan Marion8d281b32016-08-24 14:32:39 +02003499 .function = vhost_user_delete_command_fn,
3500};
3501
Billy McFalla92501a2016-11-23 12:45:29 -05003502/*?
3503 * Display the attributes of a single vHost User interface (provide interface
3504 * name), multiple vHost User interfaces (provide a list of interface names seperated
3505 * by spaces) or all Vhost User interfaces (omit an interface name to display all
3506 * vHost interfaces).
3507 *
3508 * @cliexpar
3509 * @parblock
3510 * Example of how to display a vhost interface:
3511 * @cliexstart{show vhost-user VirtualEthernet0/0/0}
3512 * Virtio vhost-user interfaces
3513 * Global:
3514 * coalesce frames 32 time 1e-3
3515 * Interface: VirtualEthernet0/0/0 (ifindex 1)
3516 * virtio_net_hdr_sz 12
3517 * features mask (0xffffffffffffffff):
3518 * features (0x50408000):
3519 * VIRTIO_NET_F_MRG_RXBUF (15)
3520 * VIRTIO_NET_F_MQ (22)
3521 * VIRTIO_F_INDIRECT_DESC (28)
3522 * VHOST_USER_F_PROTOCOL_FEATURES (30)
3523 * protocol features (0x3)
3524 * VHOST_USER_PROTOCOL_F_MQ (0)
3525 * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
3526 *
Billy McFall28cf3b72018-01-15 17:54:52 -05003527 * socket filename /var/run/vpp/vhost1.sock type client errno "Success"
Billy McFalla92501a2016-11-23 12:45:29 -05003528 *
3529 * rx placement:
3530 * thread 1 on vring 1
3531 * thread 1 on vring 5
3532 * thread 2 on vring 3
3533 * thread 2 on vring 7
3534 * tx placement: spin-lock
3535 * thread 0 on vring 0
3536 * thread 1 on vring 2
3537 * thread 2 on vring 0
3538 *
3539 * Memory regions (total 2)
3540 * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
3541 * ====== ===== ================== ================== ================== ================== ==================
3542 * 0 60 0x0000000000000000 0x00000000000a0000 0x00002aaaaac00000 0x0000000000000000 0x00002aab2b400000
3543 * 1 61 0x00000000000c0000 0x000000003ff40000 0x00002aaaaacc0000 0x00000000000c0000 0x00002aababcc0000
3544 *
3545 * Virtqueue 0 (TX)
3546 * qsz 256 last_avail_idx 0 last_used_idx 0
3547 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
3548 * kickfd 62 callfd 64 errfd -1
3549 *
3550 * Virtqueue 1 (RX)
3551 * qsz 256 last_avail_idx 0 last_used_idx 0
3552 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3553 * kickfd 65 callfd 66 errfd -1
3554 *
3555 * Virtqueue 2 (TX)
3556 * qsz 256 last_avail_idx 0 last_used_idx 0
3557 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
3558 * kickfd 63 callfd 70 errfd -1
3559 *
3560 * Virtqueue 3 (RX)
3561 * qsz 256 last_avail_idx 0 last_used_idx 0
3562 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3563 * kickfd 72 callfd 74 errfd -1
3564 *
3565 * Virtqueue 4 (TX disabled)
3566 * qsz 256 last_avail_idx 0 last_used_idx 0
3567 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3568 * kickfd 76 callfd 78 errfd -1
3569 *
3570 * Virtqueue 5 (RX disabled)
3571 * qsz 256 last_avail_idx 0 last_used_idx 0
3572 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3573 * kickfd 80 callfd 82 errfd -1
3574 *
3575 * Virtqueue 6 (TX disabled)
3576 * qsz 256 last_avail_idx 0 last_used_idx 0
3577 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3578 * kickfd 84 callfd 86 errfd -1
3579 *
3580 * Virtqueue 7 (RX disabled)
3581 * qsz 256 last_avail_idx 0 last_used_idx 0
3582 * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
3583 * kickfd 88 callfd 90 errfd -1
3584 *
3585 * @cliexend
3586 *
3587 * The optional '<em>descriptors</em>' parameter will display the same output as
3588 * the previous example but will include the descriptor table for each queue.
3589 * The output is truncated below:
3590 * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
3591 * Virtio vhost-user interfaces
3592 * Global:
3593 * coalesce frames 32 time 1e-3
3594 * Interface: VirtualEthernet0/0/0 (ifindex 1)
3595 * virtio_net_hdr_sz 12
3596 * features mask (0xffffffffffffffff):
3597 * features (0x50408000):
3598 * VIRTIO_NET_F_MRG_RXBUF (15)
3599 * VIRTIO_NET_F_MQ (22)
3600 * :
3601 * Virtqueue 0 (TX)
3602 * qsz 256 last_avail_idx 0 last_used_idx 0
3603 * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
3604 * kickfd 62 callfd 64 errfd -1
3605 *
3606 * descriptor table:
3607 * id addr len flags next user_addr
3608 * ===== ================== ===== ====== ===== ==================
3609 * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974
3610 * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034
3611 * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4
3612 * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4
3613 * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474
3614 * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34
3615 * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4
3616 * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4
3617 * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74
3618 * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634
3619 * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4
3620 * :
3621 * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000
3622 * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000
3623 * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000
3624 * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000
3625 * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000
3626 * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000
3627 * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000
3628 *
3629 * Virtqueue 1 (RX)
3630 * qsz 256 last_avail_idx 0 last_used_idx 0
3631 * :
3632 * @cliexend
3633 * @endparblock
3634?*/
3635/* *INDENT-OFF* */
Damjan Marion8d281b32016-08-24 14:32:39 +02003636VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
3637 .path = "show vhost-user",
Billy McFalla92501a2016-11-23 12:45:29 -05003638 .short_help = "show vhost-user [<interface> [<interface> [..]]] [descriptors]",
Damjan Marion8d281b32016-08-24 14:32:39 +02003639 .function = show_vhost_user_command_fn,
3640};
3641/* *INDENT-ON* */
Damjan Marion8d281b32016-08-24 14:32:39 +02003642
Steven388e51a2017-06-01 12:49:23 -07003643clib_error_t *
3644debug_vhost_user_command_fn (vlib_main_t * vm,
3645 unformat_input_t * input,
3646 vlib_cli_command_t * cmd)
3647{
3648 unformat_input_t _line_input, *line_input = &_line_input;
3649 clib_error_t *error = NULL;
3650 vhost_user_main_t *vum = &vhost_user_main;
Steven2ee2d572017-07-21 16:38:41 -07003651 u8 onoff = 0;
3652 u8 input_found = 0;
Steven388e51a2017-06-01 12:49:23 -07003653
3654 /* Get a line of input. */
3655 if (!unformat_user (input, unformat_line_input, line_input))
Steven2ee2d572017-07-21 16:38:41 -07003656 return clib_error_return (0, "missing argument");
Steven388e51a2017-06-01 12:49:23 -07003657
3658 while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
3659 {
Steven2ee2d572017-07-21 16:38:41 -07003660 if (input_found)
3661 {
3662 error = clib_error_return (0, "unknown input `%U'",
3663 format_unformat_error, line_input);
3664 goto done;
3665 }
3666
Steven388e51a2017-06-01 12:49:23 -07003667 if (unformat (line_input, "on"))
Steven2ee2d572017-07-21 16:38:41 -07003668 {
3669 input_found = 1;
3670 onoff = 1;
3671 }
Steven388e51a2017-06-01 12:49:23 -07003672 else if (unformat (line_input, "off"))
Steven2ee2d572017-07-21 16:38:41 -07003673 {
3674 input_found = 1;
3675 onoff = 0;
3676 }
Steven388e51a2017-06-01 12:49:23 -07003677 else
Steven2ee2d572017-07-21 16:38:41 -07003678 {
3679 error = clib_error_return (0, "unknown input `%U'",
3680 format_unformat_error, line_input);
3681 goto done;
3682 }
Steven388e51a2017-06-01 12:49:23 -07003683 }
3684
Steven2ee2d572017-07-21 16:38:41 -07003685 vum->debug = onoff;
3686
3687done:
Steven388e51a2017-06-01 12:49:23 -07003688 unformat_free (line_input);
3689
3690 return error;
3691}
3692
3693/* *INDENT-OFF* */
3694VLIB_CLI_COMMAND (debug_vhost_user_command, static) = {
3695 .path = "debug vhost-user",
3696 .short_help = "debug vhost-user <on | off>",
3697 .function = debug_vhost_user_command_fn,
3698};
3699/* *INDENT-ON* */
3700
Ed Warnickecb9cada2015-12-08 15:45:58 -07003701static clib_error_t *
3702vhost_user_config (vlib_main_t * vm, unformat_input_t * input)
3703{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003704 vhost_user_main_t *vum = &vhost_user_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003705
3706 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3707 {
3708 if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003709 ;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003710 else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003711 ;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003712 else if (unformat (input, "dont-dump-memory"))
Damjan Marion00a9dca2016-08-17 17:05:46 +02003713 vum->dont_dump_vhost_user_memory = 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003714 else
Damjan Marion00a9dca2016-08-17 17:05:46 +02003715 return clib_error_return (0, "unknown input `%U'",
3716 format_unformat_error, input);
Ed Warnickecb9cada2015-12-08 15:45:58 -07003717 }
3718
3719 return 0;
3720}
3721
3722/* vhost-user { ... } configuration. */
3723VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
3724
3725void
3726vhost_user_unmap_all (void)
3727{
Damjan Marion00a9dca2016-08-17 17:05:46 +02003728 vhost_user_main_t *vum = &vhost_user_main;
3729 vhost_user_intf_t *vui;
Ed Warnickecb9cada2015-12-08 15:45:58 -07003730
3731 if (vum->dont_dump_vhost_user_memory)
3732 {
Pierre Pfisterdbb3c252016-11-22 10:33:34 +00003733 pool_foreach (vui, vum->vhost_user_interfaces,
3734 unmap_all_mem_regions (vui);
3735 );
Ed Warnickecb9cada2015-12-08 15:45:58 -07003736 }
3737}
Damjan Marion00a9dca2016-08-17 17:05:46 +02003738
3739/*
3740 * fd.io coding-style-patch-verification: ON
3741 *
3742 * Local Variables:
3743 * eval: (c-set-style "gnu")
3744 * End:
3745 */