blob: 2d17ddfda0478573cba5d32688ed261e48983048 [file] [log] [blame]
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001/*
2 *------------------------------------------------------------------
3 * vhost-user-output
4 *
5 * Copyright (c) 2014-2018 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
Steven Luong4208a4c2019-05-06 08:51:56 -070020#include <stddef.h>
Mohsin Kazmie7cde312018-06-26 17:20:11 +020021#include <fcntl.h> /* for open */
22#include <sys/ioctl.h>
23#include <sys/socket.h>
24#include <sys/un.h>
25#include <sys/stat.h>
26#include <sys/types.h>
27#include <sys/uio.h> /* for iovec */
28#include <netinet/in.h>
29#include <sys/vfs.h>
30
31#include <linux/if_arp.h>
32#include <linux/if_tun.h>
33
34#include <vlib/vlib.h>
35#include <vlib/unix/unix.h>
36
37#include <vnet/ip/ip.h>
38
39#include <vnet/ethernet/ethernet.h>
40#include <vnet/devices/devices.h>
41#include <vnet/feature/feature.h>
42
43#include <vnet/devices/virtio/vhost_user.h>
44#include <vnet/devices/virtio/vhost_user_inline.h>
45
Mohsin Kazmi0b042092020-04-17 16:50:56 +000046#include <vnet/gso/hdr_offset_parser.h>
Mohsin Kazmie7cde312018-06-26 17:20:11 +020047/*
48 * On the transmit side, we keep processing the buffers from vlib in the while
49 * loop and prepare the copy order to be executed later. However, the static
50 * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
51 * entries. In order to not corrupt memory, we have to do the copy when the
52 * static array reaches the copy threshold. We subtract 40 in case the code
53 * goes into the inner loop for a maximum of 64k frames which may require
Steven Luong73310052019-10-23 13:28:37 -070054 * more array entries. We subtract 200 because our default buffer size is
55 * 2048 and the default desc len is likely 1536. While it takes less than 40
56 * vlib buffers for the jumbo frame, it may take twice as much descriptors
57 * for the same jumbo frame. Use 200 for the extra head room.
Mohsin Kazmie7cde312018-06-26 17:20:11 +020058 */
Steven Luong73310052019-10-23 13:28:37 -070059#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200)
Mohsin Kazmie7cde312018-06-26 17:20:11 +020060
BenoƮt Ganne47727c02019-02-12 13:35:08 +010061extern vnet_device_class_t vhost_user_device_class;
Mohsin Kazmie7cde312018-06-26 17:20:11 +020062
63#define foreach_vhost_user_tx_func_error \
64 _(NONE, "no error") \
65 _(NOT_READY, "vhost vring not ready") \
66 _(DOWN, "vhost interface is down") \
67 _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
68 _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
69 _(MMAP_FAIL, "mmap failure") \
70 _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
71
72typedef enum
73{
74#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
75 foreach_vhost_user_tx_func_error
76#undef _
77 VHOST_USER_TX_FUNC_N_ERROR,
78} vhost_user_tx_func_error_t;
79
80static __clib_unused char *vhost_user_tx_func_error_strings[] = {
81#define _(n,s) s,
82 foreach_vhost_user_tx_func_error
83#undef _
84};
85
86static __clib_unused u8 *
87format_vhost_user_interface_name (u8 * s, va_list * args)
88{
89 u32 i = va_arg (*args, u32);
90 u32 show_dev_instance = ~0;
91 vhost_user_main_t *vum = &vhost_user_main;
92
93 if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
94 show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
95
96 if (show_dev_instance != ~0)
97 i = show_dev_instance;
98
99 s = format (s, "VirtualEthernet0/0/%d", i);
100 return s;
101}
102
103static __clib_unused int
104vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
105{
106 // FIXME: check if the new dev instance is already used
107 vhost_user_main_t *vum = &vhost_user_main;
Jerome Tollet2f54c272018-10-02 11:41:11 +0200108 vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
109 hi->dev_instance);
110
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200111 vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
112 hi->dev_instance, ~0);
113
114 vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
115 new_dev_instance;
116
Jerome Tollet2f54c272018-10-02 11:41:11 +0200117 vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d",
118 hi->dev_instance, new_dev_instance);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200119
120 return 0;
121}
122
123/**
124 * @brief Try once to lock the vring
125 * @return 0 on success, non-zero on failure.
126 */
127static_always_inline int
128vhost_user_vring_try_lock (vhost_user_intf_t * vui, u32 qid)
129{
Sirshak Das2f6d7bb2018-10-03 22:53:51 +0000130 return clib_atomic_test_and_set (vui->vring_locks[qid]);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200131}
132
133/**
134 * @brief Spin until the vring is successfully locked
135 */
136static_always_inline void
137vhost_user_vring_lock (vhost_user_intf_t * vui, u32 qid)
138{
139 while (vhost_user_vring_try_lock (vui, qid))
140 ;
141}
142
143/**
144 * @brief Unlock the vring lock
145 */
146static_always_inline void
147vhost_user_vring_unlock (vhost_user_intf_t * vui, u32 qid)
148{
Sirshak Das2f6d7bb2018-10-03 22:53:51 +0000149 clib_atomic_release (vui->vring_locks[qid]);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200150}
151
152static_always_inline void
153vhost_user_tx_trace (vhost_trace_t * t,
154 vhost_user_intf_t * vui, u16 qid,
155 vlib_buffer_t * b, vhost_user_vring_t * rxvq)
156{
157 vhost_user_main_t *vum = &vhost_user_main;
158 u32 last_avail_idx = rxvq->last_avail_idx;
159 u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
160 vring_desc_t *hdr_desc = 0;
161 u32 hint = 0;
162
Dave Barachb7b92992018-10-17 10:38:51 -0400163 clib_memset (t, 0, sizeof (*t));
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200164 t->device_index = vui - vum->vhost_user_interfaces;
165 t->qid = qid;
166
167 hdr_desc = &rxvq->desc[desc_current];
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200168 if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200169 {
170 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
171 /* Header is the first here */
172 hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
173 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200174 if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200175 {
176 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
177 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200178 if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
179 !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200180 {
181 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
182 }
183
184 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
185}
186
187static_always_inline u32
188vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
189 u16 copy_len, u32 * map_hint)
190{
191 void *dst0, *dst1, *dst2, *dst3;
192 if (PREDICT_TRUE (copy_len >= 4))
193 {
194 if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
195 return 1;
196 if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
197 return 1;
198 while (PREDICT_TRUE (copy_len >= 4))
199 {
200 dst0 = dst2;
201 dst1 = dst3;
202
203 if (PREDICT_FALSE
204 (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
205 return 1;
206 if (PREDICT_FALSE
207 (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
208 return 1;
209
210 CLIB_PREFETCH ((void *) cpy[2].src, 64, LOAD);
211 CLIB_PREFETCH ((void *) cpy[3].src, 64, LOAD);
212
Dave Barach178cf492018-11-13 16:34:13 -0500213 clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len);
214 clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200215
216 vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
217 vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
218 copy_len -= 2;
219 cpy += 2;
220 }
221 }
222 while (copy_len)
223 {
224 if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
225 return 1;
Dave Barach178cf492018-11-13 16:34:13 -0500226 clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200227 vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
228 copy_len -= 1;
229 cpy += 1;
230 }
231 return 0;
232}
233
Steven Luong4208a4c2019-05-06 08:51:56 -0700234static_always_inline void
235vhost_user_handle_tx_offload (vhost_user_intf_t * vui, vlib_buffer_t * b,
236 virtio_net_hdr_t * hdr)
237{
Mohsin Kazmi0b042092020-04-17 16:50:56 +0000238 generic_header_offset_t gho = { 0 };
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200239 int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
240 int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
241
242 ASSERT (!(is_ip4 && is_ip6));
243 vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100244 if (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)
245 {
246 ip4_header_t *ip4;
247
248 ip4 =
249 (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
250 ip4->checksum = ip4_header_checksum (ip4);
251 }
252
Steven Luong4208a4c2019-05-06 08:51:56 -0700253 /* checksum offload */
254 if (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)
255 {
256 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100257 hdr->csum_start = gho.l4_hdr_offset;
Steven Luong4208a4c2019-05-06 08:51:56 -0700258 hdr->csum_offset = offsetof (udp_header_t, checksum);
Mohsin Kazmi0937fdf2020-03-25 20:37:16 +0000259 udp_header_t *udp =
260 (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
261 udp->checksum = 0;
Steven Luong4208a4c2019-05-06 08:51:56 -0700262 }
263 else if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
264 {
265 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100266 hdr->csum_start = gho.l4_hdr_offset;
Steven Luong4208a4c2019-05-06 08:51:56 -0700267 hdr->csum_offset = offsetof (tcp_header_t, checksum);
Mohsin Kazmi0937fdf2020-03-25 20:37:16 +0000268 tcp_header_t *tcp =
269 (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
270 tcp->checksum = 0;
Steven Luong4208a4c2019-05-06 08:51:56 -0700271 }
272
273 /* GSO offload */
274 if (b->flags & VNET_BUFFER_F_GSO)
275 {
276 if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
277 {
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200278 if (is_ip4 &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200279 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700280 {
281 hdr->gso_size = vnet_buffer2 (b)->gso_size;
282 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
283 }
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200284 else if (is_ip6 &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200285 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700286 {
287 hdr->gso_size = vnet_buffer2 (b)->gso_size;
288 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
289 }
290 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200291 else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) &&
Steven Luong4208a4c2019-05-06 08:51:56 -0700292 (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
293 {
294 hdr->gso_size = vnet_buffer2 (b)->gso_size;
295 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
296 }
297 }
298}
299
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700300static_always_inline void
301vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_vring_t * rxvq,
302 u16 * n_descs_processed, u8 chained,
303 vlib_frame_t * frame, u32 n_left)
304{
305 u16 desc_idx, flags;
306 vring_packed_desc_t *desc_table = rxvq->packed_desc;
307 u16 last_used_idx = rxvq->last_used_idx;
308
309 if (PREDICT_FALSE (*n_descs_processed == 0))
310 return;
311
312 if (rxvq->used_wrap_counter)
313 flags = desc_table[last_used_idx & rxvq->qsz_mask].flags |
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200314 (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700315 else
316 flags = desc_table[last_used_idx & rxvq->qsz_mask].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200317 ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700318
319 vhost_user_advance_last_used_idx (rxvq);
320
321 for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++)
322 {
323 if (rxvq->used_wrap_counter)
324 desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |=
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200325 (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700326 else
327 desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &=
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200328 ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700329 vhost_user_advance_last_used_idx (rxvq);
330 }
331
332 desc_table[last_used_idx & rxvq->qsz_mask].flags = flags;
333
334 *n_descs_processed = 0;
335
336 if (chained)
337 {
338 vring_packed_desc_t *desc_table = rxvq->packed_desc;
339
340 while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200341 VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700342 vhost_user_advance_last_used_idx (rxvq);
343
344 /* Advance past the current chained table entries */
345 vhost_user_advance_last_used_idx (rxvq);
346 }
347
348 /* interrupt (call) handling */
349 if ((rxvq->callfd_idx != ~0) &&
350 (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE))
351 {
352 vhost_user_main_t *vum = &vhost_user_main;
353
354 rxvq->n_since_last_int += frame->n_vectors - n_left;
355 if (rxvq->n_since_last_int > vum->coalesce_frames)
356 vhost_user_send_call (vm, rxvq);
357 }
358}
359
360static_always_inline void
361vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
362 u16 qid, vlib_buffer_t * b,
363 vhost_user_vring_t * rxvq)
364{
365 vhost_user_main_t *vum = &vhost_user_main;
366 u32 last_avail_idx = rxvq->last_avail_idx;
367 u32 desc_current = last_avail_idx & rxvq->qsz_mask;
368 vring_packed_desc_t *hdr_desc = 0;
369 u32 hint = 0;
370
371 clib_memset (t, 0, sizeof (*t));
372 t->device_index = vui - vum->vhost_user_interfaces;
373 t->qid = qid;
374
375 hdr_desc = &rxvq->packed_desc[desc_current];
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200376 if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700377 {
378 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
379 /* Header is the first here */
380 hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr,
381 &hint);
382 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200383 if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700384 {
385 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
386 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200387 if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
388 !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700389 {
390 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
391 }
392
393 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
394}
395
396static_always_inline uword
397vhost_user_device_class_packed (vlib_main_t * vm, vlib_node_runtime_t * node,
398 vlib_frame_t * frame)
399{
400 u32 *buffers = vlib_frame_vector_args (frame);
401 u32 n_left = frame->n_vectors;
402 vhost_user_main_t *vum = &vhost_user_main;
403 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
404 vhost_user_intf_t *vui =
405 pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
406 u32 qid;
407 vhost_user_vring_t *rxvq;
408 u8 error;
409 u32 thread_index = vm->thread_index;
410 vhost_cpu_t *cpu = &vum->cpus[thread_index];
411 u32 map_hint = 0;
412 u8 retry = 8;
413 u16 copy_len;
414 u16 tx_headers_len;
415 vring_packed_desc_t *desc_table;
416 u32 or_flags;
417 u16 desc_head, desc_index, desc_len;
418 u16 n_descs_processed;
419 u8 indirect, chained;
420
421 qid = VHOST_VRING_IDX_RX (*vec_elt_at_index (vui->per_cpu_tx_qid,
422 thread_index));
423 rxvq = &vui->vrings[qid];
424
425retry:
426 error = VHOST_USER_TX_FUNC_ERROR_NONE;
427 tx_headers_len = 0;
428 copy_len = 0;
429 n_descs_processed = 0;
430
431 while (n_left > 0)
432 {
433 vlib_buffer_t *b0, *current_b0;
434 uword buffer_map_addr;
435 u32 buffer_len;
436 u16 bytes_left;
437 u32 total_desc_len = 0;
438 u16 n_entries = 0;
439
440 indirect = 0;
441 chained = 0;
442 if (PREDICT_TRUE (n_left > 1))
443 vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
444
445 b0 = vlib_get_buffer (vm, buffers[0]);
446 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
447 {
448 cpu->current_trace = vlib_add_trace (vm, node, b0,
449 sizeof (*cpu->current_trace));
450 vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0,
451 rxvq);
452 }
453
454 desc_table = rxvq->packed_desc;
455 desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
456 if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head)))
457 {
458 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
459 goto done;
460 }
461 /*
462 * Go deeper in case of indirect descriptor.
463 * To test it, turn off mrg_rxbuf.
464 */
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200465 if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700466 {
467 indirect = 1;
468 if (PREDICT_FALSE (desc_table[desc_head].len <
469 sizeof (vring_packed_desc_t)))
470 {
471 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
472 goto done;
473 }
474 n_entries = desc_table[desc_head].len >> 4;
475 desc_table = map_guest_mem (vui, desc_table[desc_index].addr,
476 &map_hint);
477 if (PREDICT_FALSE (desc_table == 0))
478 {
479 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
480 goto done;
481 }
482 desc_index = 0;
483 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200484 else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700485 chained = 1;
486
487 desc_len = vui->virtio_net_hdr_sz;
488 buffer_map_addr = desc_table[desc_index].addr;
489 buffer_len = desc_table[desc_index].len;
490
491 /* Get a header from the header array */
492 virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
493 tx_headers_len++;
494 hdr->hdr.flags = 0;
495 hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
496 hdr->num_buffers = 1;
497
498 or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM) ||
499 (b0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM) ||
500 (b0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM);
501
502 /* Guest supports csum offload and buffer requires checksum offload? */
503 if (or_flags &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200504 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700505 vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
506
507 /* Prepare a copy order executed later for the header */
508 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
509 vhost_copy_t *cpy = &cpu->copy[copy_len];
510 copy_len++;
511 cpy->len = vui->virtio_net_hdr_sz;
512 cpy->dst = buffer_map_addr;
513 cpy->src = (uword) hdr;
514
515 buffer_map_addr += vui->virtio_net_hdr_sz;
516 buffer_len -= vui->virtio_net_hdr_sz;
517 bytes_left = b0->current_length;
518 current_b0 = b0;
519 while (1)
520 {
521 if (buffer_len == 0)
522 {
523 /* Get new output */
524 if (chained)
525 {
526 /*
527 * Next one is chained
528 * Test it with both indirect and mrg_rxbuf off
529 */
530 if (PREDICT_FALSE (!(desc_table[desc_index].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200531 VRING_DESC_F_NEXT)))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700532 {
533 /*
534 * Last descriptor in chain.
535 * Dequeue queued descriptors for this packet
536 */
537 vhost_user_dequeue_chained_descs (rxvq,
538 &n_descs_processed);
539 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
540 goto done;
541 }
542 vhost_user_advance_last_avail_idx (rxvq);
543 desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
544 n_descs_processed++;
545 buffer_map_addr = desc_table[desc_index].addr;
546 buffer_len = desc_table[desc_index].len;
547 total_desc_len += desc_len;
548 desc_len = 0;
549 }
550 else if (indirect)
551 {
552 /*
553 * Indirect table
554 * Test it with mrg_rxnuf off
555 */
556 if (PREDICT_TRUE (n_entries > 0))
557 n_entries--;
558 else
559 {
560 /* Dequeue queued descriptors for this packet */
561 vhost_user_dequeue_chained_descs (rxvq,
562 &n_descs_processed);
563 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
564 goto done;
565 }
566 total_desc_len += desc_len;
567 desc_index = (desc_index + 1) & rxvq->qsz_mask;
568 buffer_map_addr = desc_table[desc_index].addr;
569 buffer_len = desc_table[desc_index].len;
570 desc_len = 0;
571 }
572 else if (vui->virtio_net_hdr_sz == 12)
573 {
574 /*
575 * MRG is available
576 * This is the default setting for the guest VM
577 */
578 virtio_net_hdr_mrg_rxbuf_t *hdr =
579 &cpu->tx_headers[tx_headers_len - 1];
580
581 desc_table[desc_index].len = desc_len;
582 vhost_user_advance_last_avail_idx (rxvq);
583 desc_head = desc_index =
584 rxvq->last_avail_idx & rxvq->qsz_mask;
585 hdr->num_buffers++;
586 n_descs_processed++;
587 desc_len = 0;
588
589 if (PREDICT_FALSE (!vhost_user_packed_desc_available
590 (rxvq, desc_index)))
591 {
592 /* Dequeue queued descriptors for this packet */
593 vhost_user_dequeue_descs (rxvq, hdr,
594 &n_descs_processed);
595 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
596 goto done;
597 }
598
599 buffer_map_addr = desc_table[desc_index].addr;
600 buffer_len = desc_table[desc_index].len;
601 }
602 else
603 {
604 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
605 goto done;
606 }
607 }
608
609 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
610 vhost_copy_t *cpy = &cpu->copy[copy_len];
611 copy_len++;
612 cpy->len = bytes_left;
613 cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
614 cpy->dst = buffer_map_addr;
615 cpy->src = (uword) vlib_buffer_get_current (current_b0) +
616 current_b0->current_length - bytes_left;
617
618 bytes_left -= cpy->len;
619 buffer_len -= cpy->len;
620 buffer_map_addr += cpy->len;
621 desc_len += cpy->len;
622
623 CLIB_PREFETCH (&rxvq->packed_desc, CLIB_CACHE_LINE_BYTES, LOAD);
624
625 /* Check if vlib buffer has more data. If not, get more or break */
626 if (PREDICT_TRUE (!bytes_left))
627 {
628 if (PREDICT_FALSE
629 (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
630 {
631 current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
632 bytes_left = current_b0->current_length;
633 }
634 else
635 {
636 /* End of packet */
637 break;
638 }
639 }
640 }
641
642 /* Move from available to used ring */
643 total_desc_len += desc_len;
644 rxvq->packed_desc[desc_head].len = total_desc_len;
645
646 vhost_user_advance_last_avail_table_idx (vui, rxvq, chained);
647 n_descs_processed++;
648
649 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
650 cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
651
652 n_left--;
653
654 /*
655 * Do the copy periodically to prevent
656 * cpu->copy array overflow and corrupt memory
657 */
658 if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained)
659 {
660 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
661 &map_hint)))
662 vlib_error_count (vm, node->node_index,
663 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
664 copy_len = 0;
665
666 /* give buffers back to driver */
667 vhost_user_mark_desc_available (vm, rxvq, &n_descs_processed,
668 chained, frame, n_left);
669 }
670
671 buffers++;
672 }
673
674done:
675 if (PREDICT_TRUE (copy_len))
676 {
677 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
678 &map_hint)))
679 vlib_error_count (vm, node->node_index,
680 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
681
682 vhost_user_mark_desc_available (vm, rxvq, &n_descs_processed, chained,
683 frame, n_left);
684 }
685
686 /*
687 * When n_left is set, error is always set to something too.
688 * In case error is due to lack of remaining buffers, we go back up and
689 * retry.
690 * The idea is that it is better to waste some time on packets
691 * that have been processed already than dropping them and get
692 * more fresh packets with a good likelyhood that they will be dropped too.
693 * This technique also gives more time to VM driver to pick-up packets.
694 * In case the traffic flows from physical to virtual interfaces, this
695 * technique will end-up leveraging the physical NIC buffer in order to
696 * absorb the VM's CPU jitter.
697 */
698 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
699 {
700 retry--;
701 goto retry;
702 }
703
704 vhost_user_vring_unlock (vui, qid);
705
706 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
707 {
708 vlib_error_count (vm, node->node_index, error, n_left);
709 vlib_increment_simple_counter
710 (vnet_main.interface_main.sw_if_counters +
711 VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left);
712 }
713
714 vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
715 return frame->n_vectors;
716}
717
Mohsin Kazmidd8e7d02018-07-23 14:45:57 +0200718VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
719 vlib_node_runtime_t *
720 node, vlib_frame_t * frame)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200721{
Damjan Mariona3d59862018-11-10 10:23:00 +0100722 u32 *buffers = vlib_frame_vector_args (frame);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200723 u32 n_left = frame->n_vectors;
724 vhost_user_main_t *vum = &vhost_user_main;
725 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
726 vhost_user_intf_t *vui =
727 pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
728 u32 qid = ~0;
729 vhost_user_vring_t *rxvq;
730 u8 error;
Damjan Marion067cd622018-07-11 12:47:43 +0200731 u32 thread_index = vm->thread_index;
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100732 vhost_cpu_t *cpu = &vum->cpus[thread_index];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200733 u32 map_hint = 0;
734 u8 retry = 8;
735 u16 copy_len;
736 u16 tx_headers_len;
Steven Luong564e1672020-01-30 15:18:45 -0800737 u32 or_flags;
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200738
739 if (PREDICT_FALSE (!vui->admin_up))
740 {
741 error = VHOST_USER_TX_FUNC_ERROR_DOWN;
742 goto done3;
743 }
744
Juraj Slobodab192feb2018-10-01 12:42:07 +0200745 if (PREDICT_FALSE (!vui->is_ready))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200746 {
747 error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
748 goto done3;
749 }
750
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100751 qid = VHOST_VRING_IDX_RX (*vec_elt_at_index (vui->per_cpu_tx_qid,
752 thread_index));
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200753 rxvq = &vui->vrings[qid];
Steven0c469982018-11-04 08:20:01 -0800754 if (PREDICT_FALSE (rxvq->avail == 0))
755 {
756 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
757 goto done3;
758 }
759
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200760 if (PREDICT_FALSE (vui->use_tx_spinlock))
761 vhost_user_vring_lock (vui, qid);
762
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700763 if (vhost_user_is_packed_ring_supported (vui))
764 return (vhost_user_device_class_packed (vm, node, frame));
765
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200766retry:
767 error = VHOST_USER_TX_FUNC_ERROR_NONE;
768 tx_headers_len = 0;
769 copy_len = 0;
770 while (n_left > 0)
771 {
772 vlib_buffer_t *b0, *current_b0;
773 u16 desc_head, desc_index, desc_len;
774 vring_desc_t *desc_table;
775 uword buffer_map_addr;
776 u32 buffer_len;
777 u16 bytes_left;
778
779 if (PREDICT_TRUE (n_left > 1))
780 vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
781
782 b0 = vlib_get_buffer (vm, buffers[0]);
783
784 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
785 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100786 cpu->current_trace = vlib_add_trace (vm, node, b0,
787 sizeof (*cpu->current_trace));
788 vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200789 }
790
791 if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
792 {
793 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
794 goto done;
795 }
796
797 desc_table = rxvq->desc;
798 desc_head = desc_index =
799 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
800
801 /* Go deeper in case of indirect descriptor
802 * I don't know of any driver providing indirect for RX. */
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200803 if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200804 {
805 if (PREDICT_FALSE
806 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
807 {
808 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
809 goto done;
810 }
811 if (PREDICT_FALSE
812 (!(desc_table =
813 map_guest_mem (vui, rxvq->desc[desc_index].addr,
814 &map_hint))))
815 {
816 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
817 goto done;
818 }
819 desc_index = 0;
820 }
821
822 desc_len = vui->virtio_net_hdr_sz;
823 buffer_map_addr = desc_table[desc_index].addr;
824 buffer_len = desc_table[desc_index].len;
825
826 {
827 // Get a header from the header array
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100828 virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200829 tx_headers_len++;
830 hdr->hdr.flags = 0;
Steven Luong4208a4c2019-05-06 08:51:56 -0700831 hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200832 hdr->num_buffers = 1; //This is local, no need to check
833
Steven Luong564e1672020-01-30 15:18:45 -0800834 or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM) ||
835 (b0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM) ||
836 (b0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM);
837
838 /* Guest supports csum offload and buffer requires checksum offload? */
839 if (or_flags
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200840 && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700841 vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
842
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200843 // Prepare a copy order executed later for the header
Steven Luong73310052019-10-23 13:28:37 -0700844 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100845 vhost_copy_t *cpy = &cpu->copy[copy_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200846 copy_len++;
847 cpy->len = vui->virtio_net_hdr_sz;
848 cpy->dst = buffer_map_addr;
849 cpy->src = (uword) hdr;
850 }
851
852 buffer_map_addr += vui->virtio_net_hdr_sz;
853 buffer_len -= vui->virtio_net_hdr_sz;
854 bytes_left = b0->current_length;
855 current_b0 = b0;
856 while (1)
857 {
858 if (buffer_len == 0)
859 { //Get new output
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200860 if (desc_table[desc_index].flags & VRING_DESC_F_NEXT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200861 {
862 //Next one is chained
863 desc_index = desc_table[desc_index].next;
864 buffer_map_addr = desc_table[desc_index].addr;
865 buffer_len = desc_table[desc_index].len;
866 }
867 else if (vui->virtio_net_hdr_sz == 12) //MRG is available
868 {
869 virtio_net_hdr_mrg_rxbuf_t *hdr =
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100870 &cpu->tx_headers[tx_headers_len - 1];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200871
872 //Move from available to used buffer
873 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
874 desc_head;
875 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
876 desc_len;
877 vhost_user_log_dirty_ring (vui, rxvq,
878 ring[rxvq->last_used_idx &
879 rxvq->qsz_mask]);
880
881 rxvq->last_avail_idx++;
882 rxvq->last_used_idx++;
883 hdr->num_buffers++;
884 desc_len = 0;
885
886 if (PREDICT_FALSE
887 (rxvq->last_avail_idx == rxvq->avail->idx))
888 {
889 //Dequeue queued descriptors for this packet
890 rxvq->last_used_idx -= hdr->num_buffers - 1;
891 rxvq->last_avail_idx -= hdr->num_buffers - 1;
892 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
893 goto done;
894 }
895
896 desc_table = rxvq->desc;
897 desc_head = desc_index =
898 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
899 if (PREDICT_FALSE
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200900 (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200901 {
902 //It is seriously unlikely that a driver will put indirect descriptor
903 //after non-indirect descriptor.
904 if (PREDICT_FALSE
905 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
906 {
907 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
908 goto done;
909 }
910 if (PREDICT_FALSE
911 (!(desc_table =
912 map_guest_mem (vui,
913 rxvq->desc[desc_index].addr,
914 &map_hint))))
915 {
916 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
917 goto done;
918 }
919 desc_index = 0;
920 }
921 buffer_map_addr = desc_table[desc_index].addr;
922 buffer_len = desc_table[desc_index].len;
923 }
924 else
925 {
926 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
927 goto done;
928 }
929 }
930
931 {
Steven Luong73310052019-10-23 13:28:37 -0700932 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100933 vhost_copy_t *cpy = &cpu->copy[copy_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200934 copy_len++;
935 cpy->len = bytes_left;
936 cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
937 cpy->dst = buffer_map_addr;
938 cpy->src = (uword) vlib_buffer_get_current (current_b0) +
939 current_b0->current_length - bytes_left;
940
941 bytes_left -= cpy->len;
942 buffer_len -= cpy->len;
943 buffer_map_addr += cpy->len;
944 desc_len += cpy->len;
945
946 CLIB_PREFETCH (&rxvq->desc, CLIB_CACHE_LINE_BYTES, LOAD);
947 }
948
949 // Check if vlib buffer has more data. If not, get more or break.
950 if (PREDICT_TRUE (!bytes_left))
951 {
952 if (PREDICT_FALSE
953 (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
954 {
955 current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
956 bytes_left = current_b0->current_length;
957 }
958 else
959 {
960 //End of packet
961 break;
962 }
963 }
964 }
965
966 //Move from available to used ring
967 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
968 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
969 vhost_user_log_dirty_ring (vui, rxvq,
970 ring[rxvq->last_used_idx & rxvq->qsz_mask]);
971 rxvq->last_avail_idx++;
972 rxvq->last_used_idx++;
973
974 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
975 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100976 cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200977 }
978
979 n_left--; //At the end for error counting when 'goto done' is invoked
980
981 /*
982 * Do the copy periodically to prevent
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100983 * cpu->copy array overflow and corrupt memory
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200984 */
985 if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
986 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100987 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
988 &map_hint)))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200989 {
990 vlib_error_count (vm, node->node_index,
991 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
992 }
993 copy_len = 0;
994
995 /* give buffers back to driver */
996 CLIB_MEMORY_BARRIER ();
997 rxvq->used->idx = rxvq->last_used_idx;
998 vhost_user_log_dirty_ring (vui, rxvq, idx);
999 }
1000 buffers++;
1001 }
1002
1003done:
1004 //Do the memory copies
Damjan Marion7e0b17d2018-11-20 21:07:03 +01001005 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
1006 &map_hint)))
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001007 {
1008 vlib_error_count (vm, node->node_index,
1009 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
1010 }
1011
1012 CLIB_MEMORY_BARRIER ();
1013 rxvq->used->idx = rxvq->last_used_idx;
1014 vhost_user_log_dirty_ring (vui, rxvq, idx);
1015
1016 /*
1017 * When n_left is set, error is always set to something too.
1018 * In case error is due to lack of remaining buffers, we go back up and
1019 * retry.
1020 * The idea is that it is better to waste some time on packets
1021 * that have been processed already than dropping them and get
Paul Vinciguerra97c998c2019-10-29 16:11:09 -04001022 * more fresh packets with a good likelihood that they will be dropped too.
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001023 * This technique also gives more time to VM driver to pick-up packets.
1024 * In case the traffic flows from physical to virtual interfaces, this
1025 * technique will end-up leveraging the physical NIC buffer in order to
1026 * absorb the VM's CPU jitter.
1027 */
1028 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
1029 {
1030 retry--;
1031 goto retry;
1032 }
1033
1034 /* interrupt (call) handling */
1035 if ((rxvq->callfd_idx != ~0) &&
1036 !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
1037 {
1038 rxvq->n_since_last_int += frame->n_vectors - n_left;
1039
1040 if (rxvq->n_since_last_int > vum->coalesce_frames)
1041 vhost_user_send_call (vm, rxvq);
1042 }
1043
1044 vhost_user_vring_unlock (vui, qid);
1045
1046done3:
1047 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
1048 {
1049 vlib_error_count (vm, node->node_index, error, n_left);
1050 vlib_increment_simple_counter
1051 (vnet_main.interface_main.sw_if_counters
1052 + VNET_INTERFACE_COUNTER_DROP,
1053 thread_index, vui->sw_if_index, n_left);
1054 }
1055
Damjan Mariona3d59862018-11-10 10:23:00 +01001056 vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001057 return frame->n_vectors;
1058}
1059
1060static __clib_unused clib_error_t *
1061vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
1062 u32 qid, vnet_hw_interface_rx_mode mode)
1063{
1064 vlib_main_t *vm = vnm->vlib_main;
1065 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1066 vhost_user_main_t *vum = &vhost_user_main;
1067 vhost_user_intf_t *vui =
1068 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
1069 vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
1070
1071 if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
1072 (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
1073 {
1074 if (txvq->kickfd_idx == ~0)
1075 {
1076 // We cannot support interrupt mode if the driver opts out
1077 return clib_error_return (0, "Driver does not support interrupt");
1078 }
1079 if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
1080 {
1081 vum->ifq_count++;
1082 // Start the timer if this is the first encounter on interrupt
1083 // interface/queue
1084 if ((vum->ifq_count == 1) &&
1085 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
1086 vlib_process_signal_event (vm,
1087 vhost_user_send_interrupt_node.index,
1088 VHOST_USER_EVENT_START_TIMER, 0);
1089 }
1090 }
1091 else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
1092 {
1093 if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
1094 (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) &&
1095 vum->ifq_count)
1096 {
1097 vum->ifq_count--;
1098 // Stop the timer if there is no more interrupt interface/queue
1099 if ((vum->ifq_count == 0) &&
1100 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
1101 vlib_process_signal_event (vm,
1102 vhost_user_send_interrupt_node.index,
1103 VHOST_USER_EVENT_STOP_TIMER, 0);
1104 }
1105 }
1106
1107 txvq->mode = mode;
1108 if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
1109 txvq->used->flags = VRING_USED_F_NO_NOTIFY;
1110 else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) ||
1111 (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT))
1112 txvq->used->flags = 0;
1113 else
1114 {
Jerome Tollet2f54c272018-10-02 11:41:11 +02001115 vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
1116 hw_if_index, qid);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001117 return clib_error_return (0, "unsupported");
1118 }
1119
1120 return 0;
1121}
1122
1123static __clib_unused clib_error_t *
1124vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
1125 u32 flags)
1126{
1127 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1128 vhost_user_main_t *vum = &vhost_user_main;
1129 vhost_user_intf_t *vui =
1130 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
Juraj Slobodab192feb2018-10-01 12:42:07 +02001131 u8 link_old, link_new;
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001132
Juraj Slobodab192feb2018-10-01 12:42:07 +02001133 link_old = vui_is_link_up (vui);
1134
1135 vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1136
1137 link_new = vui_is_link_up (vui);
1138
1139 if (link_old != link_new)
1140 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ?
1141 VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001142
1143 return /* no error */ 0;
1144}
1145
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001146/* *INDENT-OFF* */
1147VNET_DEVICE_CLASS (vhost_user_device_class) = {
1148 .name = "vhost-user",
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001149 .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
1150 .tx_function_error_strings = vhost_user_tx_func_error_strings,
1151 .format_device_name = format_vhost_user_interface_name,
1152 .name_renumber = vhost_user_name_renumber,
1153 .admin_up_down_function = vhost_user_interface_admin_up_down,
1154 .rx_mode_change_function = vhost_user_interface_rx_mode_change,
1155 .format_tx_trace = format_vhost_trace,
1156};
1157
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001158/* *INDENT-ON* */
1159
1160/*
1161 * fd.io coding-style-patch-verification: ON
1162 *
1163 * Local Variables:
1164 * eval: (c-set-style "gnu")
1165 * End:
1166 */