blob: 85ac0a3c29541f164736381743e57cc867ac7b09 [file] [log] [blame]
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001/*
2 *------------------------------------------------------------------
3 * vhost-user-output
4 *
5 * Copyright (c) 2014-2018 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
Steven Luong4208a4c2019-05-06 08:51:56 -070020#include <stddef.h>
Mohsin Kazmie7cde312018-06-26 17:20:11 +020021#include <fcntl.h> /* for open */
22#include <sys/ioctl.h>
23#include <sys/socket.h>
24#include <sys/un.h>
25#include <sys/stat.h>
26#include <sys/types.h>
27#include <sys/uio.h> /* for iovec */
28#include <netinet/in.h>
29#include <sys/vfs.h>
30
31#include <linux/if_arp.h>
32#include <linux/if_tun.h>
33
34#include <vlib/vlib.h>
35#include <vlib/unix/unix.h>
36
Mohsin Kazmie7cde312018-06-26 17:20:11 +020037#include <vnet/ethernet/ethernet.h>
38#include <vnet/devices/devices.h>
39#include <vnet/feature/feature.h>
40
41#include <vnet/devices/virtio/vhost_user.h>
42#include <vnet/devices/virtio/vhost_user_inline.h>
43
Mohsin Kazmi0b042092020-04-17 16:50:56 +000044#include <vnet/gso/hdr_offset_parser.h>
Mohsin Kazmie7cde312018-06-26 17:20:11 +020045/*
46 * On the transmit side, we keep processing the buffers from vlib in the while
47 * loop and prepare the copy order to be executed later. However, the static
48 * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
49 * entries. In order to not corrupt memory, we have to do the copy when the
50 * static array reaches the copy threshold. We subtract 40 in case the code
51 * goes into the inner loop for a maximum of 64k frames which may require
Steven Luong73310052019-10-23 13:28:37 -070052 * more array entries. We subtract 200 because our default buffer size is
53 * 2048 and the default desc len is likely 1536. While it takes less than 40
54 * vlib buffers for the jumbo frame, it may take twice as much descriptors
55 * for the same jumbo frame. Use 200 for the extra head room.
Mohsin Kazmie7cde312018-06-26 17:20:11 +020056 */
Steven Luong73310052019-10-23 13:28:37 -070057#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200)
Mohsin Kazmie7cde312018-06-26 17:20:11 +020058
BenoƮt Ganne47727c02019-02-12 13:35:08 +010059extern vnet_device_class_t vhost_user_device_class;
Mohsin Kazmie7cde312018-06-26 17:20:11 +020060
61#define foreach_vhost_user_tx_func_error \
62 _(NONE, "no error") \
63 _(NOT_READY, "vhost vring not ready") \
64 _(DOWN, "vhost interface is down") \
65 _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
66 _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
67 _(MMAP_FAIL, "mmap failure") \
68 _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
69
70typedef enum
71{
72#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
73 foreach_vhost_user_tx_func_error
74#undef _
75 VHOST_USER_TX_FUNC_N_ERROR,
76} vhost_user_tx_func_error_t;
77
78static __clib_unused char *vhost_user_tx_func_error_strings[] = {
79#define _(n,s) s,
80 foreach_vhost_user_tx_func_error
81#undef _
82};
83
84static __clib_unused u8 *
85format_vhost_user_interface_name (u8 * s, va_list * args)
86{
87 u32 i = va_arg (*args, u32);
88 u32 show_dev_instance = ~0;
89 vhost_user_main_t *vum = &vhost_user_main;
90
91 if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
92 show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
93
94 if (show_dev_instance != ~0)
95 i = show_dev_instance;
96
97 s = format (s, "VirtualEthernet0/0/%d", i);
98 return s;
99}
100
101static __clib_unused int
102vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
103{
104 // FIXME: check if the new dev instance is already used
105 vhost_user_main_t *vum = &vhost_user_main;
Jerome Tollet2f54c272018-10-02 11:41:11 +0200106 vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
107 hi->dev_instance);
108
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200109 vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
110 hi->dev_instance, ~0);
111
112 vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
113 new_dev_instance;
114
Jerome Tollet2f54c272018-10-02 11:41:11 +0200115 vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d",
116 hi->dev_instance, new_dev_instance);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200117
118 return 0;
119}
120
121/**
122 * @brief Try once to lock the vring
123 * @return 0 on success, non-zero on failure.
124 */
125static_always_inline int
126vhost_user_vring_try_lock (vhost_user_intf_t * vui, u32 qid)
127{
Sirshak Das2f6d7bb2018-10-03 22:53:51 +0000128 return clib_atomic_test_and_set (vui->vring_locks[qid]);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200129}
130
131/**
132 * @brief Spin until the vring is successfully locked
133 */
134static_always_inline void
135vhost_user_vring_lock (vhost_user_intf_t * vui, u32 qid)
136{
137 while (vhost_user_vring_try_lock (vui, qid))
138 ;
139}
140
141/**
142 * @brief Unlock the vring lock
143 */
144static_always_inline void
145vhost_user_vring_unlock (vhost_user_intf_t * vui, u32 qid)
146{
Sirshak Das2f6d7bb2018-10-03 22:53:51 +0000147 clib_atomic_release (vui->vring_locks[qid]);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200148}
149
150static_always_inline void
151vhost_user_tx_trace (vhost_trace_t * t,
152 vhost_user_intf_t * vui, u16 qid,
153 vlib_buffer_t * b, vhost_user_vring_t * rxvq)
154{
155 vhost_user_main_t *vum = &vhost_user_main;
156 u32 last_avail_idx = rxvq->last_avail_idx;
157 u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
158 vring_desc_t *hdr_desc = 0;
159 u32 hint = 0;
160
Dave Barachb7b92992018-10-17 10:38:51 -0400161 clib_memset (t, 0, sizeof (*t));
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200162 t->device_index = vui - vum->vhost_user_interfaces;
163 t->qid = qid;
164
165 hdr_desc = &rxvq->desc[desc_current];
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200166 if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200167 {
168 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
169 /* Header is the first here */
170 hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
171 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200172 if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200173 {
174 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
175 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200176 if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
177 !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200178 {
179 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
180 }
181
182 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
183}
184
185static_always_inline u32
186vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
187 u16 copy_len, u32 * map_hint)
188{
189 void *dst0, *dst1, *dst2, *dst3;
190 if (PREDICT_TRUE (copy_len >= 4))
191 {
192 if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
193 return 1;
194 if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
195 return 1;
196 while (PREDICT_TRUE (copy_len >= 4))
197 {
198 dst0 = dst2;
199 dst1 = dst3;
200
201 if (PREDICT_FALSE
202 (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
203 return 1;
204 if (PREDICT_FALSE
205 (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
206 return 1;
207
208 CLIB_PREFETCH ((void *) cpy[2].src, 64, LOAD);
209 CLIB_PREFETCH ((void *) cpy[3].src, 64, LOAD);
210
Dave Barach178cf492018-11-13 16:34:13 -0500211 clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len);
212 clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200213
214 vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
215 vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
216 copy_len -= 2;
217 cpy += 2;
218 }
219 }
220 while (copy_len)
221 {
222 if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
223 return 1;
Dave Barach178cf492018-11-13 16:34:13 -0500224 clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200225 vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
226 copy_len -= 1;
227 cpy += 1;
228 }
229 return 0;
230}
231
Steven Luong4208a4c2019-05-06 08:51:56 -0700232static_always_inline void
233vhost_user_handle_tx_offload (vhost_user_intf_t * vui, vlib_buffer_t * b,
234 virtio_net_hdr_t * hdr)
235{
Mohsin Kazmi0b042092020-04-17 16:50:56 +0000236 generic_header_offset_t gho = { 0 };
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200237 int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
238 int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
239
240 ASSERT (!(is_ip4 && is_ip6));
241 vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100242 if (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)
243 {
244 ip4_header_t *ip4;
245
246 ip4 =
247 (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
248 ip4->checksum = ip4_header_checksum (ip4);
249 }
250
Steven Luong4208a4c2019-05-06 08:51:56 -0700251 /* checksum offload */
252 if (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)
253 {
254 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100255 hdr->csum_start = gho.l4_hdr_offset;
Steven Luong4208a4c2019-05-06 08:51:56 -0700256 hdr->csum_offset = offsetof (udp_header_t, checksum);
257 }
258 else if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
259 {
260 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100261 hdr->csum_start = gho.l4_hdr_offset;
Steven Luong4208a4c2019-05-06 08:51:56 -0700262 hdr->csum_offset = offsetof (tcp_header_t, checksum);
263 }
264
265 /* GSO offload */
266 if (b->flags & VNET_BUFFER_F_GSO)
267 {
268 if (b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM)
269 {
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200270 if (is_ip4 &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200271 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700272 {
273 hdr->gso_size = vnet_buffer2 (b)->gso_size;
274 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
275 }
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200276 else if (is_ip6 &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200277 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700278 {
279 hdr->gso_size = vnet_buffer2 (b)->gso_size;
280 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
281 }
282 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200283 else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) &&
Steven Luong4208a4c2019-05-06 08:51:56 -0700284 (b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
285 {
286 hdr->gso_size = vnet_buffer2 (b)->gso_size;
287 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
288 }
289 }
290}
291
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700292static_always_inline void
293vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_vring_t * rxvq,
294 u16 * n_descs_processed, u8 chained,
295 vlib_frame_t * frame, u32 n_left)
296{
297 u16 desc_idx, flags;
298 vring_packed_desc_t *desc_table = rxvq->packed_desc;
299 u16 last_used_idx = rxvq->last_used_idx;
300
301 if (PREDICT_FALSE (*n_descs_processed == 0))
302 return;
303
304 if (rxvq->used_wrap_counter)
305 flags = desc_table[last_used_idx & rxvq->qsz_mask].flags |
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200306 (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700307 else
308 flags = desc_table[last_used_idx & rxvq->qsz_mask].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200309 ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700310
311 vhost_user_advance_last_used_idx (rxvq);
312
313 for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++)
314 {
315 if (rxvq->used_wrap_counter)
316 desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |=
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200317 (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700318 else
319 desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &=
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200320 ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700321 vhost_user_advance_last_used_idx (rxvq);
322 }
323
324 desc_table[last_used_idx & rxvq->qsz_mask].flags = flags;
325
326 *n_descs_processed = 0;
327
328 if (chained)
329 {
330 vring_packed_desc_t *desc_table = rxvq->packed_desc;
331
332 while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200333 VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700334 vhost_user_advance_last_used_idx (rxvq);
335
336 /* Advance past the current chained table entries */
337 vhost_user_advance_last_used_idx (rxvq);
338 }
339
340 /* interrupt (call) handling */
341 if ((rxvq->callfd_idx != ~0) &&
342 (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE))
343 {
344 vhost_user_main_t *vum = &vhost_user_main;
345
346 rxvq->n_since_last_int += frame->n_vectors - n_left;
347 if (rxvq->n_since_last_int > vum->coalesce_frames)
348 vhost_user_send_call (vm, rxvq);
349 }
350}
351
352static_always_inline void
353vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
354 u16 qid, vlib_buffer_t * b,
355 vhost_user_vring_t * rxvq)
356{
357 vhost_user_main_t *vum = &vhost_user_main;
358 u32 last_avail_idx = rxvq->last_avail_idx;
359 u32 desc_current = last_avail_idx & rxvq->qsz_mask;
360 vring_packed_desc_t *hdr_desc = 0;
361 u32 hint = 0;
362
363 clib_memset (t, 0, sizeof (*t));
364 t->device_index = vui - vum->vhost_user_interfaces;
365 t->qid = qid;
366
367 hdr_desc = &rxvq->packed_desc[desc_current];
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200368 if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700369 {
370 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
371 /* Header is the first here */
372 hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr,
373 &hint);
374 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200375 if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700376 {
377 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
378 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200379 if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
380 !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700381 {
382 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
383 }
384
385 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
386}
387
388static_always_inline uword
389vhost_user_device_class_packed (vlib_main_t * vm, vlib_node_runtime_t * node,
390 vlib_frame_t * frame)
391{
392 u32 *buffers = vlib_frame_vector_args (frame);
393 u32 n_left = frame->n_vectors;
394 vhost_user_main_t *vum = &vhost_user_main;
395 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
396 vhost_user_intf_t *vui =
397 pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
398 u32 qid;
399 vhost_user_vring_t *rxvq;
400 u8 error;
401 u32 thread_index = vm->thread_index;
402 vhost_cpu_t *cpu = &vum->cpus[thread_index];
403 u32 map_hint = 0;
404 u8 retry = 8;
405 u16 copy_len;
406 u16 tx_headers_len;
407 vring_packed_desc_t *desc_table;
408 u32 or_flags;
409 u16 desc_head, desc_index, desc_len;
410 u16 n_descs_processed;
411 u8 indirect, chained;
412
413 qid = VHOST_VRING_IDX_RX (*vec_elt_at_index (vui->per_cpu_tx_qid,
414 thread_index));
415 rxvq = &vui->vrings[qid];
416
417retry:
418 error = VHOST_USER_TX_FUNC_ERROR_NONE;
419 tx_headers_len = 0;
420 copy_len = 0;
421 n_descs_processed = 0;
422
423 while (n_left > 0)
424 {
425 vlib_buffer_t *b0, *current_b0;
426 uword buffer_map_addr;
427 u32 buffer_len;
428 u16 bytes_left;
429 u32 total_desc_len = 0;
430 u16 n_entries = 0;
431
432 indirect = 0;
433 chained = 0;
434 if (PREDICT_TRUE (n_left > 1))
435 vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
436
437 b0 = vlib_get_buffer (vm, buffers[0]);
438 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
439 {
440 cpu->current_trace = vlib_add_trace (vm, node, b0,
441 sizeof (*cpu->current_trace));
442 vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0,
443 rxvq);
444 }
445
446 desc_table = rxvq->packed_desc;
447 desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
448 if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head)))
449 {
450 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
451 goto done;
452 }
453 /*
454 * Go deeper in case of indirect descriptor.
455 * To test it, turn off mrg_rxbuf.
456 */
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200457 if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700458 {
459 indirect = 1;
460 if (PREDICT_FALSE (desc_table[desc_head].len <
461 sizeof (vring_packed_desc_t)))
462 {
463 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
464 goto done;
465 }
466 n_entries = desc_table[desc_head].len >> 4;
467 desc_table = map_guest_mem (vui, desc_table[desc_index].addr,
468 &map_hint);
469 if (PREDICT_FALSE (desc_table == 0))
470 {
471 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
472 goto done;
473 }
474 desc_index = 0;
475 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200476 else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700477 chained = 1;
478
479 desc_len = vui->virtio_net_hdr_sz;
480 buffer_map_addr = desc_table[desc_index].addr;
481 buffer_len = desc_table[desc_index].len;
482
483 /* Get a header from the header array */
484 virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
485 tx_headers_len++;
486 hdr->hdr.flags = 0;
487 hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
488 hdr->num_buffers = 1;
489
490 or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM) ||
491 (b0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM) ||
492 (b0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM);
493
494 /* Guest supports csum offload and buffer requires checksum offload? */
495 if (or_flags &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200496 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700497 vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
498
499 /* Prepare a copy order executed later for the header */
500 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
501 vhost_copy_t *cpy = &cpu->copy[copy_len];
502 copy_len++;
503 cpy->len = vui->virtio_net_hdr_sz;
504 cpy->dst = buffer_map_addr;
505 cpy->src = (uword) hdr;
506
507 buffer_map_addr += vui->virtio_net_hdr_sz;
508 buffer_len -= vui->virtio_net_hdr_sz;
509 bytes_left = b0->current_length;
510 current_b0 = b0;
511 while (1)
512 {
513 if (buffer_len == 0)
514 {
515 /* Get new output */
516 if (chained)
517 {
518 /*
519 * Next one is chained
520 * Test it with both indirect and mrg_rxbuf off
521 */
522 if (PREDICT_FALSE (!(desc_table[desc_index].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200523 VRING_DESC_F_NEXT)))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700524 {
525 /*
526 * Last descriptor in chain.
527 * Dequeue queued descriptors for this packet
528 */
529 vhost_user_dequeue_chained_descs (rxvq,
530 &n_descs_processed);
531 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
532 goto done;
533 }
534 vhost_user_advance_last_avail_idx (rxvq);
535 desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
536 n_descs_processed++;
537 buffer_map_addr = desc_table[desc_index].addr;
538 buffer_len = desc_table[desc_index].len;
539 total_desc_len += desc_len;
540 desc_len = 0;
541 }
542 else if (indirect)
543 {
544 /*
545 * Indirect table
546 * Test it with mrg_rxnuf off
547 */
548 if (PREDICT_TRUE (n_entries > 0))
549 n_entries--;
550 else
551 {
552 /* Dequeue queued descriptors for this packet */
553 vhost_user_dequeue_chained_descs (rxvq,
554 &n_descs_processed);
555 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
556 goto done;
557 }
558 total_desc_len += desc_len;
559 desc_index = (desc_index + 1) & rxvq->qsz_mask;
560 buffer_map_addr = desc_table[desc_index].addr;
561 buffer_len = desc_table[desc_index].len;
562 desc_len = 0;
563 }
564 else if (vui->virtio_net_hdr_sz == 12)
565 {
566 /*
567 * MRG is available
568 * This is the default setting for the guest VM
569 */
570 virtio_net_hdr_mrg_rxbuf_t *hdr =
571 &cpu->tx_headers[tx_headers_len - 1];
572
573 desc_table[desc_index].len = desc_len;
574 vhost_user_advance_last_avail_idx (rxvq);
575 desc_head = desc_index =
576 rxvq->last_avail_idx & rxvq->qsz_mask;
577 hdr->num_buffers++;
578 n_descs_processed++;
579 desc_len = 0;
580
581 if (PREDICT_FALSE (!vhost_user_packed_desc_available
582 (rxvq, desc_index)))
583 {
584 /* Dequeue queued descriptors for this packet */
585 vhost_user_dequeue_descs (rxvq, hdr,
586 &n_descs_processed);
587 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
588 goto done;
589 }
590
591 buffer_map_addr = desc_table[desc_index].addr;
592 buffer_len = desc_table[desc_index].len;
593 }
594 else
595 {
596 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
597 goto done;
598 }
599 }
600
601 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
602 vhost_copy_t *cpy = &cpu->copy[copy_len];
603 copy_len++;
604 cpy->len = bytes_left;
605 cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
606 cpy->dst = buffer_map_addr;
607 cpy->src = (uword) vlib_buffer_get_current (current_b0) +
608 current_b0->current_length - bytes_left;
609
610 bytes_left -= cpy->len;
611 buffer_len -= cpy->len;
612 buffer_map_addr += cpy->len;
613 desc_len += cpy->len;
614
615 CLIB_PREFETCH (&rxvq->packed_desc, CLIB_CACHE_LINE_BYTES, LOAD);
616
617 /* Check if vlib buffer has more data. If not, get more or break */
618 if (PREDICT_TRUE (!bytes_left))
619 {
620 if (PREDICT_FALSE
621 (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
622 {
623 current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
624 bytes_left = current_b0->current_length;
625 }
626 else
627 {
628 /* End of packet */
629 break;
630 }
631 }
632 }
633
634 /* Move from available to used ring */
635 total_desc_len += desc_len;
636 rxvq->packed_desc[desc_head].len = total_desc_len;
637
638 vhost_user_advance_last_avail_table_idx (vui, rxvq, chained);
639 n_descs_processed++;
640
641 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
642 cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
643
644 n_left--;
645
646 /*
647 * Do the copy periodically to prevent
648 * cpu->copy array overflow and corrupt memory
649 */
650 if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained)
651 {
652 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
653 &map_hint)))
654 vlib_error_count (vm, node->node_index,
655 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
656 copy_len = 0;
657
658 /* give buffers back to driver */
659 vhost_user_mark_desc_available (vm, rxvq, &n_descs_processed,
660 chained, frame, n_left);
661 }
662
663 buffers++;
664 }
665
666done:
667 if (PREDICT_TRUE (copy_len))
668 {
669 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
670 &map_hint)))
671 vlib_error_count (vm, node->node_index,
672 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
673
674 vhost_user_mark_desc_available (vm, rxvq, &n_descs_processed, chained,
675 frame, n_left);
676 }
677
678 /*
679 * When n_left is set, error is always set to something too.
680 * In case error is due to lack of remaining buffers, we go back up and
681 * retry.
682 * The idea is that it is better to waste some time on packets
683 * that have been processed already than dropping them and get
684 * more fresh packets with a good likelyhood that they will be dropped too.
685 * This technique also gives more time to VM driver to pick-up packets.
686 * In case the traffic flows from physical to virtual interfaces, this
687 * technique will end-up leveraging the physical NIC buffer in order to
688 * absorb the VM's CPU jitter.
689 */
690 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
691 {
692 retry--;
693 goto retry;
694 }
695
696 vhost_user_vring_unlock (vui, qid);
697
698 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
699 {
700 vlib_error_count (vm, node->node_index, error, n_left);
701 vlib_increment_simple_counter
702 (vnet_main.interface_main.sw_if_counters +
703 VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left);
704 }
705
706 vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
707 return frame->n_vectors;
708}
709
Mohsin Kazmidd8e7d02018-07-23 14:45:57 +0200710VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
711 vlib_node_runtime_t *
712 node, vlib_frame_t * frame)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200713{
Damjan Mariona3d59862018-11-10 10:23:00 +0100714 u32 *buffers = vlib_frame_vector_args (frame);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200715 u32 n_left = frame->n_vectors;
716 vhost_user_main_t *vum = &vhost_user_main;
717 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
718 vhost_user_intf_t *vui =
719 pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
720 u32 qid = ~0;
721 vhost_user_vring_t *rxvq;
722 u8 error;
Damjan Marion067cd622018-07-11 12:47:43 +0200723 u32 thread_index = vm->thread_index;
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100724 vhost_cpu_t *cpu = &vum->cpus[thread_index];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200725 u32 map_hint = 0;
726 u8 retry = 8;
727 u16 copy_len;
728 u16 tx_headers_len;
Steven Luong564e1672020-01-30 15:18:45 -0800729 u32 or_flags;
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200730
731 if (PREDICT_FALSE (!vui->admin_up))
732 {
733 error = VHOST_USER_TX_FUNC_ERROR_DOWN;
734 goto done3;
735 }
736
Juraj Slobodab192feb2018-10-01 12:42:07 +0200737 if (PREDICT_FALSE (!vui->is_ready))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200738 {
739 error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
740 goto done3;
741 }
742
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100743 qid = VHOST_VRING_IDX_RX (*vec_elt_at_index (vui->per_cpu_tx_qid,
744 thread_index));
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200745 rxvq = &vui->vrings[qid];
Steven0c469982018-11-04 08:20:01 -0800746 if (PREDICT_FALSE (rxvq->avail == 0))
747 {
748 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
749 goto done3;
750 }
751
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200752 if (PREDICT_FALSE (vui->use_tx_spinlock))
753 vhost_user_vring_lock (vui, qid);
754
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700755 if (vhost_user_is_packed_ring_supported (vui))
756 return (vhost_user_device_class_packed (vm, node, frame));
757
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200758retry:
759 error = VHOST_USER_TX_FUNC_ERROR_NONE;
760 tx_headers_len = 0;
761 copy_len = 0;
762 while (n_left > 0)
763 {
764 vlib_buffer_t *b0, *current_b0;
765 u16 desc_head, desc_index, desc_len;
766 vring_desc_t *desc_table;
767 uword buffer_map_addr;
768 u32 buffer_len;
769 u16 bytes_left;
770
771 if (PREDICT_TRUE (n_left > 1))
772 vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
773
774 b0 = vlib_get_buffer (vm, buffers[0]);
775
776 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
777 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100778 cpu->current_trace = vlib_add_trace (vm, node, b0,
779 sizeof (*cpu->current_trace));
780 vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200781 }
782
783 if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
784 {
785 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
786 goto done;
787 }
788
789 desc_table = rxvq->desc;
790 desc_head = desc_index =
791 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
792
793 /* Go deeper in case of indirect descriptor
794 * I don't know of any driver providing indirect for RX. */
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200795 if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200796 {
797 if (PREDICT_FALSE
798 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
799 {
800 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
801 goto done;
802 }
803 if (PREDICT_FALSE
804 (!(desc_table =
805 map_guest_mem (vui, rxvq->desc[desc_index].addr,
806 &map_hint))))
807 {
808 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
809 goto done;
810 }
811 desc_index = 0;
812 }
813
814 desc_len = vui->virtio_net_hdr_sz;
815 buffer_map_addr = desc_table[desc_index].addr;
816 buffer_len = desc_table[desc_index].len;
817
818 {
819 // Get a header from the header array
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100820 virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200821 tx_headers_len++;
822 hdr->hdr.flags = 0;
Steven Luong4208a4c2019-05-06 08:51:56 -0700823 hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200824 hdr->num_buffers = 1; //This is local, no need to check
825
Steven Luong564e1672020-01-30 15:18:45 -0800826 or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM) ||
827 (b0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM) ||
828 (b0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM);
829
830 /* Guest supports csum offload and buffer requires checksum offload? */
831 if (or_flags
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200832 && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700833 vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
834
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200835 // Prepare a copy order executed later for the header
Steven Luong73310052019-10-23 13:28:37 -0700836 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100837 vhost_copy_t *cpy = &cpu->copy[copy_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200838 copy_len++;
839 cpy->len = vui->virtio_net_hdr_sz;
840 cpy->dst = buffer_map_addr;
841 cpy->src = (uword) hdr;
842 }
843
844 buffer_map_addr += vui->virtio_net_hdr_sz;
845 buffer_len -= vui->virtio_net_hdr_sz;
846 bytes_left = b0->current_length;
847 current_b0 = b0;
848 while (1)
849 {
850 if (buffer_len == 0)
851 { //Get new output
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200852 if (desc_table[desc_index].flags & VRING_DESC_F_NEXT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200853 {
854 //Next one is chained
855 desc_index = desc_table[desc_index].next;
856 buffer_map_addr = desc_table[desc_index].addr;
857 buffer_len = desc_table[desc_index].len;
858 }
859 else if (vui->virtio_net_hdr_sz == 12) //MRG is available
860 {
861 virtio_net_hdr_mrg_rxbuf_t *hdr =
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100862 &cpu->tx_headers[tx_headers_len - 1];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200863
864 //Move from available to used buffer
865 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
866 desc_head;
867 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
868 desc_len;
869 vhost_user_log_dirty_ring (vui, rxvq,
870 ring[rxvq->last_used_idx &
871 rxvq->qsz_mask]);
872
873 rxvq->last_avail_idx++;
874 rxvq->last_used_idx++;
875 hdr->num_buffers++;
876 desc_len = 0;
877
878 if (PREDICT_FALSE
879 (rxvq->last_avail_idx == rxvq->avail->idx))
880 {
881 //Dequeue queued descriptors for this packet
882 rxvq->last_used_idx -= hdr->num_buffers - 1;
883 rxvq->last_avail_idx -= hdr->num_buffers - 1;
884 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
885 goto done;
886 }
887
888 desc_table = rxvq->desc;
889 desc_head = desc_index =
890 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
891 if (PREDICT_FALSE
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200892 (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200893 {
894 //It is seriously unlikely that a driver will put indirect descriptor
895 //after non-indirect descriptor.
896 if (PREDICT_FALSE
897 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
898 {
899 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
900 goto done;
901 }
902 if (PREDICT_FALSE
903 (!(desc_table =
904 map_guest_mem (vui,
905 rxvq->desc[desc_index].addr,
906 &map_hint))))
907 {
908 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
909 goto done;
910 }
911 desc_index = 0;
912 }
913 buffer_map_addr = desc_table[desc_index].addr;
914 buffer_len = desc_table[desc_index].len;
915 }
916 else
917 {
918 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
919 goto done;
920 }
921 }
922
923 {
Steven Luong73310052019-10-23 13:28:37 -0700924 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100925 vhost_copy_t *cpy = &cpu->copy[copy_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200926 copy_len++;
927 cpy->len = bytes_left;
928 cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
929 cpy->dst = buffer_map_addr;
930 cpy->src = (uword) vlib_buffer_get_current (current_b0) +
931 current_b0->current_length - bytes_left;
932
933 bytes_left -= cpy->len;
934 buffer_len -= cpy->len;
935 buffer_map_addr += cpy->len;
936 desc_len += cpy->len;
937
938 CLIB_PREFETCH (&rxvq->desc, CLIB_CACHE_LINE_BYTES, LOAD);
939 }
940
941 // Check if vlib buffer has more data. If not, get more or break.
942 if (PREDICT_TRUE (!bytes_left))
943 {
944 if (PREDICT_FALSE
945 (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
946 {
947 current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
948 bytes_left = current_b0->current_length;
949 }
950 else
951 {
952 //End of packet
953 break;
954 }
955 }
956 }
957
958 //Move from available to used ring
959 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
960 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
961 vhost_user_log_dirty_ring (vui, rxvq,
962 ring[rxvq->last_used_idx & rxvq->qsz_mask]);
963 rxvq->last_avail_idx++;
964 rxvq->last_used_idx++;
965
966 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
967 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100968 cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200969 }
970
971 n_left--; //At the end for error counting when 'goto done' is invoked
972
973 /*
974 * Do the copy periodically to prevent
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100975 * cpu->copy array overflow and corrupt memory
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200976 */
977 if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
978 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100979 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
980 &map_hint)))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200981 {
982 vlib_error_count (vm, node->node_index,
983 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
984 }
985 copy_len = 0;
986
987 /* give buffers back to driver */
988 CLIB_MEMORY_BARRIER ();
989 rxvq->used->idx = rxvq->last_used_idx;
990 vhost_user_log_dirty_ring (vui, rxvq, idx);
991 }
992 buffers++;
993 }
994
995done:
996 //Do the memory copies
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100997 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
998 &map_hint)))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200999 {
1000 vlib_error_count (vm, node->node_index,
1001 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
1002 }
1003
1004 CLIB_MEMORY_BARRIER ();
1005 rxvq->used->idx = rxvq->last_used_idx;
1006 vhost_user_log_dirty_ring (vui, rxvq, idx);
1007
1008 /*
1009 * When n_left is set, error is always set to something too.
1010 * In case error is due to lack of remaining buffers, we go back up and
1011 * retry.
1012 * The idea is that it is better to waste some time on packets
1013 * that have been processed already than dropping them and get
Paul Vinciguerra97c998c2019-10-29 16:11:09 -04001014 * more fresh packets with a good likelihood that they will be dropped too.
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001015 * This technique also gives more time to VM driver to pick-up packets.
1016 * In case the traffic flows from physical to virtual interfaces, this
1017 * technique will end-up leveraging the physical NIC buffer in order to
1018 * absorb the VM's CPU jitter.
1019 */
1020 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
1021 {
1022 retry--;
1023 goto retry;
1024 }
1025
1026 /* interrupt (call) handling */
1027 if ((rxvq->callfd_idx != ~0) &&
1028 !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
1029 {
1030 rxvq->n_since_last_int += frame->n_vectors - n_left;
1031
1032 if (rxvq->n_since_last_int > vum->coalesce_frames)
1033 vhost_user_send_call (vm, rxvq);
1034 }
1035
1036 vhost_user_vring_unlock (vui, qid);
1037
1038done3:
1039 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
1040 {
1041 vlib_error_count (vm, node->node_index, error, n_left);
1042 vlib_increment_simple_counter
1043 (vnet_main.interface_main.sw_if_counters
1044 + VNET_INTERFACE_COUNTER_DROP,
1045 thread_index, vui->sw_if_index, n_left);
1046 }
1047
Damjan Mariona3d59862018-11-10 10:23:00 +01001048 vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001049 return frame->n_vectors;
1050}
1051
1052static __clib_unused clib_error_t *
1053vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
Damjan Marioneabd4242020-10-07 20:59:07 +02001054 u32 qid, vnet_hw_if_rx_mode mode)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001055{
1056 vlib_main_t *vm = vnm->vlib_main;
1057 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1058 vhost_user_main_t *vum = &vhost_user_main;
1059 vhost_user_intf_t *vui =
1060 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
1061 vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
1062
Damjan Marioneabd4242020-10-07 20:59:07 +02001063 if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
1064 (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001065 {
1066 if (txvq->kickfd_idx == ~0)
1067 {
1068 // We cannot support interrupt mode if the driver opts out
1069 return clib_error_return (0, "Driver does not support interrupt");
1070 }
Damjan Marioneabd4242020-10-07 20:59:07 +02001071 if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001072 {
1073 vum->ifq_count++;
1074 // Start the timer if this is the first encounter on interrupt
1075 // interface/queue
1076 if ((vum->ifq_count == 1) &&
1077 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
1078 vlib_process_signal_event (vm,
1079 vhost_user_send_interrupt_node.index,
1080 VHOST_USER_EVENT_START_TIMER, 0);
1081 }
1082 }
Damjan Marioneabd4242020-10-07 20:59:07 +02001083 else if (mode == VNET_HW_IF_RX_MODE_POLLING)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001084 {
Damjan Marioneabd4242020-10-07 20:59:07 +02001085 if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
1086 (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001087 {
1088 vum->ifq_count--;
1089 // Stop the timer if there is no more interrupt interface/queue
1090 if ((vum->ifq_count == 0) &&
1091 (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
1092 vlib_process_signal_event (vm,
1093 vhost_user_send_interrupt_node.index,
1094 VHOST_USER_EVENT_STOP_TIMER, 0);
1095 }
1096 }
1097
1098 txvq->mode = mode;
Damjan Marioneabd4242020-10-07 20:59:07 +02001099 if (mode == VNET_HW_IF_RX_MODE_POLLING)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001100 txvq->used->flags = VRING_USED_F_NO_NOTIFY;
Damjan Marioneabd4242020-10-07 20:59:07 +02001101 else if ((mode == VNET_HW_IF_RX_MODE_ADAPTIVE) ||
1102 (mode == VNET_HW_IF_RX_MODE_INTERRUPT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001103 txvq->used->flags = 0;
1104 else
1105 {
Jerome Tollet2f54c272018-10-02 11:41:11 +02001106 vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
1107 hw_if_index, qid);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001108 return clib_error_return (0, "unsupported");
1109 }
1110
1111 return 0;
1112}
1113
1114static __clib_unused clib_error_t *
1115vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
1116 u32 flags)
1117{
1118 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1119 vhost_user_main_t *vum = &vhost_user_main;
1120 vhost_user_intf_t *vui =
1121 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
Juraj Slobodab192feb2018-10-01 12:42:07 +02001122 u8 link_old, link_new;
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001123
Juraj Slobodab192feb2018-10-01 12:42:07 +02001124 link_old = vui_is_link_up (vui);
1125
1126 vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1127
1128 link_new = vui_is_link_up (vui);
1129
1130 if (link_old != link_new)
1131 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ?
1132 VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001133
1134 return /* no error */ 0;
1135}
1136
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001137/* *INDENT-OFF* */
1138VNET_DEVICE_CLASS (vhost_user_device_class) = {
1139 .name = "vhost-user",
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001140 .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
1141 .tx_function_error_strings = vhost_user_tx_func_error_strings,
1142 .format_device_name = format_vhost_user_interface_name,
1143 .name_renumber = vhost_user_name_renumber,
1144 .admin_up_down_function = vhost_user_interface_admin_up_down,
1145 .rx_mode_change_function = vhost_user_interface_rx_mode_change,
1146 .format_tx_trace = format_vhost_trace,
1147};
1148
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001149/* *INDENT-ON* */
1150
1151/*
1152 * fd.io coding-style-patch-verification: ON
1153 *
1154 * Local Variables:
1155 * eval: (c-set-style "gnu")
1156 * End:
1157 */