blob: 15e39a116921cd6507216e803f9c00df21359fdf [file] [log] [blame]
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001/*
2 *------------------------------------------------------------------
3 * vhost-user-output
4 *
5 * Copyright (c) 2014-2018 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
Steven Luong4208a4c2019-05-06 08:51:56 -070020#include <stddef.h>
Mohsin Kazmie7cde312018-06-26 17:20:11 +020021#include <fcntl.h> /* for open */
22#include <sys/ioctl.h>
23#include <sys/socket.h>
24#include <sys/un.h>
25#include <sys/stat.h>
26#include <sys/types.h>
27#include <sys/uio.h> /* for iovec */
28#include <netinet/in.h>
29#include <sys/vfs.h>
30
31#include <linux/if_arp.h>
32#include <linux/if_tun.h>
33
34#include <vlib/vlib.h>
35#include <vlib/unix/unix.h>
36
Mohsin Kazmie7cde312018-06-26 17:20:11 +020037#include <vnet/ethernet/ethernet.h>
38#include <vnet/devices/devices.h>
39#include <vnet/feature/feature.h>
Mohsin Kazmif5462362021-02-23 15:55:04 +010040#include <vnet/ip/ip_psh_cksum.h>
Mohsin Kazmie7cde312018-06-26 17:20:11 +020041
42#include <vnet/devices/virtio/vhost_user.h>
43#include <vnet/devices/virtio/vhost_user_inline.h>
44
Mohsin Kazmi0b042092020-04-17 16:50:56 +000045#include <vnet/gso/hdr_offset_parser.h>
Mohsin Kazmie7cde312018-06-26 17:20:11 +020046/*
47 * On the transmit side, we keep processing the buffers from vlib in the while
48 * loop and prepare the copy order to be executed later. However, the static
49 * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
50 * entries. In order to not corrupt memory, we have to do the copy when the
51 * static array reaches the copy threshold. We subtract 40 in case the code
52 * goes into the inner loop for a maximum of 64k frames which may require
Steven Luong73310052019-10-23 13:28:37 -070053 * more array entries. We subtract 200 because our default buffer size is
54 * 2048 and the default desc len is likely 1536. While it takes less than 40
55 * vlib buffers for the jumbo frame, it may take twice as much descriptors
56 * for the same jumbo frame. Use 200 for the extra head room.
Mohsin Kazmie7cde312018-06-26 17:20:11 +020057 */
Steven Luong73310052019-10-23 13:28:37 -070058#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200)
Mohsin Kazmie7cde312018-06-26 17:20:11 +020059
BenoƮt Ganne47727c02019-02-12 13:35:08 +010060extern vnet_device_class_t vhost_user_device_class;
Mohsin Kazmie7cde312018-06-26 17:20:11 +020061
62#define foreach_vhost_user_tx_func_error \
63 _(NONE, "no error") \
64 _(NOT_READY, "vhost vring not ready") \
65 _(DOWN, "vhost interface is down") \
66 _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
67 _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
68 _(MMAP_FAIL, "mmap failure") \
69 _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
70
71typedef enum
72{
73#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
74 foreach_vhost_user_tx_func_error
75#undef _
76 VHOST_USER_TX_FUNC_N_ERROR,
77} vhost_user_tx_func_error_t;
78
79static __clib_unused char *vhost_user_tx_func_error_strings[] = {
80#define _(n,s) s,
81 foreach_vhost_user_tx_func_error
82#undef _
83};
84
85static __clib_unused u8 *
86format_vhost_user_interface_name (u8 * s, va_list * args)
87{
88 u32 i = va_arg (*args, u32);
89 u32 show_dev_instance = ~0;
90 vhost_user_main_t *vum = &vhost_user_main;
91
92 if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
93 show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
94
95 if (show_dev_instance != ~0)
96 i = show_dev_instance;
97
98 s = format (s, "VirtualEthernet0/0/%d", i);
99 return s;
100}
101
102static __clib_unused int
103vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
104{
105 // FIXME: check if the new dev instance is already used
106 vhost_user_main_t *vum = &vhost_user_main;
Jerome Tollet2f54c272018-10-02 11:41:11 +0200107 vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
108 hi->dev_instance);
109
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200110 vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
111 hi->dev_instance, ~0);
112
113 vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
114 new_dev_instance;
115
Jerome Tollet2f54c272018-10-02 11:41:11 +0200116 vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d",
117 hi->dev_instance, new_dev_instance);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200118
119 return 0;
120}
121
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200122static_always_inline void
123vhost_user_tx_trace (vhost_trace_t * t,
124 vhost_user_intf_t * vui, u16 qid,
125 vlib_buffer_t * b, vhost_user_vring_t * rxvq)
126{
127 vhost_user_main_t *vum = &vhost_user_main;
128 u32 last_avail_idx = rxvq->last_avail_idx;
129 u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
130 vring_desc_t *hdr_desc = 0;
131 u32 hint = 0;
132
Dave Barachb7b92992018-10-17 10:38:51 -0400133 clib_memset (t, 0, sizeof (*t));
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200134 t->device_index = vui - vum->vhost_user_interfaces;
135 t->qid = qid;
136
137 hdr_desc = &rxvq->desc[desc_current];
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200138 if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200139 {
140 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
141 /* Header is the first here */
142 hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
143 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200144 if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200145 {
146 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
147 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200148 if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
149 !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200150 {
151 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
152 }
153
154 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
155}
156
157static_always_inline u32
158vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
159 u16 copy_len, u32 * map_hint)
160{
161 void *dst0, *dst1, *dst2, *dst3;
162 if (PREDICT_TRUE (copy_len >= 4))
163 {
164 if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
165 return 1;
166 if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
167 return 1;
168 while (PREDICT_TRUE (copy_len >= 4))
169 {
170 dst0 = dst2;
171 dst1 = dst3;
172
173 if (PREDICT_FALSE
174 (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
175 return 1;
176 if (PREDICT_FALSE
177 (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
178 return 1;
179
Damjan Marionaf7fb042021-07-15 11:54:41 +0200180 clib_prefetch_load ((void *) cpy[2].src);
181 clib_prefetch_load ((void *) cpy[3].src);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200182
Dave Barach178cf492018-11-13 16:34:13 -0500183 clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len);
184 clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200185
186 vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
187 vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
188 copy_len -= 2;
189 cpy += 2;
190 }
191 }
192 while (copy_len)
193 {
194 if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
195 return 1;
Dave Barach178cf492018-11-13 16:34:13 -0500196 clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200197 vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
198 copy_len -= 1;
199 cpy += 1;
200 }
201 return 0;
202}
203
Steven Luong4208a4c2019-05-06 08:51:56 -0700204static_always_inline void
205vhost_user_handle_tx_offload (vhost_user_intf_t * vui, vlib_buffer_t * b,
206 virtio_net_hdr_t * hdr)
207{
Mohsin Kazmi0b042092020-04-17 16:50:56 +0000208 generic_header_offset_t gho = { 0 };
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200209 int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
210 int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
Mohsin Kazmi36f7a6a2021-05-05 14:26:38 +0200211 vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
Mohsin Kazmif5462362021-02-23 15:55:04 +0100212 u16 psh_cksum = 0;
213 ip4_header_t *ip4 = 0;
214 ip6_header_t *ip6 = 0;
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200215
216 ASSERT (!(is_ip4 && is_ip6));
217 vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
Mohsin Kazmi68095382021-02-10 11:26:24 +0100218 if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100219 {
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100220 ip4 =
221 (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
222 ip4->checksum = ip4_header_checksum (ip4);
Mohsin Kazmif5462362021-02-23 15:55:04 +0100223 psh_cksum = ip4_pseudo_header_cksum (ip4);
224 }
225 else
226 {
227 ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
228 psh_cksum = ip6_pseudo_header_cksum (ip6);
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100229 }
230
Steven Luong4208a4c2019-05-06 08:51:56 -0700231 /* checksum offload */
Mohsin Kazmi68095382021-02-10 11:26:24 +0100232 if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
Steven Luong4208a4c2019-05-06 08:51:56 -0700233 {
Mohsin Kazmif5462362021-02-23 15:55:04 +0100234 udp_header_t *udp =
235 (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
236 udp->checksum = psh_cksum;
Steven Luong4208a4c2019-05-06 08:51:56 -0700237 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100238 hdr->csum_start = gho.l4_hdr_offset;
Steven Luong4208a4c2019-05-06 08:51:56 -0700239 hdr->csum_offset = offsetof (udp_header_t, checksum);
240 }
Mohsin Kazmi68095382021-02-10 11:26:24 +0100241 else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
Steven Luong4208a4c2019-05-06 08:51:56 -0700242 {
Mohsin Kazmif5462362021-02-23 15:55:04 +0100243 tcp_header_t *tcp =
244 (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
245 tcp->checksum = psh_cksum;
Steven Luong4208a4c2019-05-06 08:51:56 -0700246 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
Mohsin Kazmiaffc5f62019-12-26 20:42:18 +0100247 hdr->csum_start = gho.l4_hdr_offset;
Steven Luong4208a4c2019-05-06 08:51:56 -0700248 hdr->csum_offset = offsetof (tcp_header_t, checksum);
249 }
250
251 /* GSO offload */
252 if (b->flags & VNET_BUFFER_F_GSO)
253 {
Mohsin Kazmi68095382021-02-10 11:26:24 +0100254 if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
Steven Luong4208a4c2019-05-06 08:51:56 -0700255 {
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200256 if (is_ip4 &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200257 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700258 {
259 hdr->gso_size = vnet_buffer2 (b)->gso_size;
260 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
261 }
Mohsin Kazmi84f91fa2020-04-23 17:59:49 +0200262 else if (is_ip6 &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200263 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700264 {
265 hdr->gso_size = vnet_buffer2 (b)->gso_size;
266 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
267 }
268 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200269 else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) &&
Mohsin Kazmi68095382021-02-10 11:26:24 +0100270 (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
Steven Luong4208a4c2019-05-06 08:51:56 -0700271 {
272 hdr->gso_size = vnet_buffer2 (b)->gso_size;
273 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
274 }
275 }
276}
277
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700278static_always_inline void
Steven Luong27ba5002020-11-17 13:30:44 -0800279vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui,
280 vhost_user_vring_t * rxvq,
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700281 u16 * n_descs_processed, u8 chained,
282 vlib_frame_t * frame, u32 n_left)
283{
284 u16 desc_idx, flags;
285 vring_packed_desc_t *desc_table = rxvq->packed_desc;
286 u16 last_used_idx = rxvq->last_used_idx;
287
288 if (PREDICT_FALSE (*n_descs_processed == 0))
289 return;
290
291 if (rxvq->used_wrap_counter)
292 flags = desc_table[last_used_idx & rxvq->qsz_mask].flags |
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200293 (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700294 else
295 flags = desc_table[last_used_idx & rxvq->qsz_mask].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200296 ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700297
298 vhost_user_advance_last_used_idx (rxvq);
299
300 for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++)
301 {
302 if (rxvq->used_wrap_counter)
303 desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |=
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200304 (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700305 else
306 desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &=
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200307 ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700308 vhost_user_advance_last_used_idx (rxvq);
309 }
310
311 desc_table[last_used_idx & rxvq->qsz_mask].flags = flags;
312
313 *n_descs_processed = 0;
314
315 if (chained)
316 {
317 vring_packed_desc_t *desc_table = rxvq->packed_desc;
318
319 while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200320 VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700321 vhost_user_advance_last_used_idx (rxvq);
322
323 /* Advance past the current chained table entries */
324 vhost_user_advance_last_used_idx (rxvq);
325 }
326
327 /* interrupt (call) handling */
328 if ((rxvq->callfd_idx != ~0) &&
329 (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE))
330 {
331 vhost_user_main_t *vum = &vhost_user_main;
332
333 rxvq->n_since_last_int += frame->n_vectors - n_left;
334 if (rxvq->n_since_last_int > vum->coalesce_frames)
Steven Luong27ba5002020-11-17 13:30:44 -0800335 vhost_user_send_call (vm, vui, rxvq);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700336 }
337}
338
339static_always_inline void
340vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
341 u16 qid, vlib_buffer_t * b,
342 vhost_user_vring_t * rxvq)
343{
344 vhost_user_main_t *vum = &vhost_user_main;
345 u32 last_avail_idx = rxvq->last_avail_idx;
346 u32 desc_current = last_avail_idx & rxvq->qsz_mask;
347 vring_packed_desc_t *hdr_desc = 0;
348 u32 hint = 0;
349
350 clib_memset (t, 0, sizeof (*t));
351 t->device_index = vui - vum->vhost_user_interfaces;
352 t->qid = qid;
353
354 hdr_desc = &rxvq->packed_desc[desc_current];
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200355 if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700356 {
357 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
358 /* Header is the first here */
359 hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr,
360 &hint);
361 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200362 if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700363 {
364 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
365 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200366 if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
367 !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700368 {
369 t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
370 }
371
372 t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
373}
374
375static_always_inline uword
Steven Luongce507582021-08-23 14:31:16 -0700376vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
377 vlib_frame_t *frame, vhost_user_intf_t *vui,
378 vhost_user_vring_t *rxvq)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700379{
380 u32 *buffers = vlib_frame_vector_args (frame);
381 u32 n_left = frame->n_vectors;
382 vhost_user_main_t *vum = &vhost_user_main;
Steven Luongce507582021-08-23 14:31:16 -0700383 u32 qid = rxvq->qid;
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700384 u8 error;
385 u32 thread_index = vm->thread_index;
386 vhost_cpu_t *cpu = &vum->cpus[thread_index];
387 u32 map_hint = 0;
388 u8 retry = 8;
389 u16 copy_len;
390 u16 tx_headers_len;
391 vring_packed_desc_t *desc_table;
392 u32 or_flags;
393 u16 desc_head, desc_index, desc_len;
394 u16 n_descs_processed;
395 u8 indirect, chained;
396
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700397retry:
398 error = VHOST_USER_TX_FUNC_ERROR_NONE;
399 tx_headers_len = 0;
400 copy_len = 0;
401 n_descs_processed = 0;
402
403 while (n_left > 0)
404 {
405 vlib_buffer_t *b0, *current_b0;
406 uword buffer_map_addr;
407 u32 buffer_len;
408 u16 bytes_left;
409 u32 total_desc_len = 0;
410 u16 n_entries = 0;
411
412 indirect = 0;
413 chained = 0;
414 if (PREDICT_TRUE (n_left > 1))
415 vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
416
417 b0 = vlib_get_buffer (vm, buffers[0]);
418 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
419 {
420 cpu->current_trace = vlib_add_trace (vm, node, b0,
421 sizeof (*cpu->current_trace));
422 vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0,
423 rxvq);
424 }
425
426 desc_table = rxvq->packed_desc;
427 desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
428 if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head)))
429 {
430 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
431 goto done;
432 }
433 /*
434 * Go deeper in case of indirect descriptor.
435 * To test it, turn off mrg_rxbuf.
436 */
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200437 if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700438 {
439 indirect = 1;
440 if (PREDICT_FALSE (desc_table[desc_head].len <
441 sizeof (vring_packed_desc_t)))
442 {
443 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
444 goto done;
445 }
446 n_entries = desc_table[desc_head].len >> 4;
447 desc_table = map_guest_mem (vui, desc_table[desc_index].addr,
448 &map_hint);
449 if (PREDICT_FALSE (desc_table == 0))
450 {
451 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
452 goto done;
453 }
454 desc_index = 0;
455 }
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200456 else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT)
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700457 chained = 1;
458
459 desc_len = vui->virtio_net_hdr_sz;
460 buffer_map_addr = desc_table[desc_index].addr;
461 buffer_len = desc_table[desc_index].len;
462
463 /* Get a header from the header array */
464 virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
465 tx_headers_len++;
466 hdr->hdr.flags = 0;
467 hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
468 hdr->num_buffers = 1;
469
Mohsin Kazmi68095382021-02-10 11:26:24 +0100470 or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700471
472 /* Guest supports csum offload and buffer requires checksum offload? */
473 if (or_flags &&
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200474 (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700475 vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
476
477 /* Prepare a copy order executed later for the header */
478 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
479 vhost_copy_t *cpy = &cpu->copy[copy_len];
480 copy_len++;
481 cpy->len = vui->virtio_net_hdr_sz;
482 cpy->dst = buffer_map_addr;
483 cpy->src = (uword) hdr;
484
485 buffer_map_addr += vui->virtio_net_hdr_sz;
486 buffer_len -= vui->virtio_net_hdr_sz;
487 bytes_left = b0->current_length;
488 current_b0 = b0;
489 while (1)
490 {
491 if (buffer_len == 0)
492 {
493 /* Get new output */
494 if (chained)
495 {
496 /*
497 * Next one is chained
498 * Test it with both indirect and mrg_rxbuf off
499 */
500 if (PREDICT_FALSE (!(desc_table[desc_index].flags &
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200501 VRING_DESC_F_NEXT)))
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700502 {
503 /*
504 * Last descriptor in chain.
505 * Dequeue queued descriptors for this packet
506 */
507 vhost_user_dequeue_chained_descs (rxvq,
508 &n_descs_processed);
509 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
510 goto done;
511 }
512 vhost_user_advance_last_avail_idx (rxvq);
513 desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
514 n_descs_processed++;
515 buffer_map_addr = desc_table[desc_index].addr;
516 buffer_len = desc_table[desc_index].len;
517 total_desc_len += desc_len;
518 desc_len = 0;
519 }
520 else if (indirect)
521 {
522 /*
523 * Indirect table
524 * Test it with mrg_rxnuf off
525 */
526 if (PREDICT_TRUE (n_entries > 0))
527 n_entries--;
528 else
529 {
530 /* Dequeue queued descriptors for this packet */
531 vhost_user_dequeue_chained_descs (rxvq,
532 &n_descs_processed);
533 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
534 goto done;
535 }
536 total_desc_len += desc_len;
537 desc_index = (desc_index + 1) & rxvq->qsz_mask;
538 buffer_map_addr = desc_table[desc_index].addr;
539 buffer_len = desc_table[desc_index].len;
540 desc_len = 0;
541 }
542 else if (vui->virtio_net_hdr_sz == 12)
543 {
544 /*
545 * MRG is available
546 * This is the default setting for the guest VM
547 */
548 virtio_net_hdr_mrg_rxbuf_t *hdr =
549 &cpu->tx_headers[tx_headers_len - 1];
550
551 desc_table[desc_index].len = desc_len;
552 vhost_user_advance_last_avail_idx (rxvq);
553 desc_head = desc_index =
554 rxvq->last_avail_idx & rxvq->qsz_mask;
555 hdr->num_buffers++;
556 n_descs_processed++;
557 desc_len = 0;
558
559 if (PREDICT_FALSE (!vhost_user_packed_desc_available
560 (rxvq, desc_index)))
561 {
562 /* Dequeue queued descriptors for this packet */
563 vhost_user_dequeue_descs (rxvq, hdr,
564 &n_descs_processed);
565 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
566 goto done;
567 }
568
569 buffer_map_addr = desc_table[desc_index].addr;
570 buffer_len = desc_table[desc_index].len;
571 }
572 else
573 {
574 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
575 goto done;
576 }
577 }
578
579 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
580 vhost_copy_t *cpy = &cpu->copy[copy_len];
581 copy_len++;
582 cpy->len = bytes_left;
583 cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
584 cpy->dst = buffer_map_addr;
585 cpy->src = (uword) vlib_buffer_get_current (current_b0) +
586 current_b0->current_length - bytes_left;
587
588 bytes_left -= cpy->len;
589 buffer_len -= cpy->len;
590 buffer_map_addr += cpy->len;
591 desc_len += cpy->len;
592
Damjan Marionaf7fb042021-07-15 11:54:41 +0200593 clib_prefetch_load (&rxvq->packed_desc);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700594
595 /* Check if vlib buffer has more data. If not, get more or break */
596 if (PREDICT_TRUE (!bytes_left))
597 {
598 if (PREDICT_FALSE
599 (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
600 {
601 current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
602 bytes_left = current_b0->current_length;
603 }
604 else
605 {
606 /* End of packet */
607 break;
608 }
609 }
610 }
611
612 /* Move from available to used ring */
613 total_desc_len += desc_len;
614 rxvq->packed_desc[desc_head].len = total_desc_len;
615
616 vhost_user_advance_last_avail_table_idx (vui, rxvq, chained);
617 n_descs_processed++;
618
619 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
620 cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
621
622 n_left--;
623
624 /*
625 * Do the copy periodically to prevent
626 * cpu->copy array overflow and corrupt memory
627 */
628 if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained)
629 {
630 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
631 &map_hint)))
632 vlib_error_count (vm, node->node_index,
633 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
634 copy_len = 0;
635
636 /* give buffers back to driver */
Steven Luong27ba5002020-11-17 13:30:44 -0800637 vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700638 chained, frame, n_left);
639 }
640
641 buffers++;
642 }
643
644done:
645 if (PREDICT_TRUE (copy_len))
646 {
647 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
648 &map_hint)))
649 vlib_error_count (vm, node->node_index,
650 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
651
Steven Luong27ba5002020-11-17 13:30:44 -0800652 vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
653 chained, frame, n_left);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700654 }
655
656 /*
657 * When n_left is set, error is always set to something too.
658 * In case error is due to lack of remaining buffers, we go back up and
659 * retry.
660 * The idea is that it is better to waste some time on packets
661 * that have been processed already than dropping them and get
662 * more fresh packets with a good likelyhood that they will be dropped too.
663 * This technique also gives more time to VM driver to pick-up packets.
664 * In case the traffic flows from physical to virtual interfaces, this
665 * technique will end-up leveraging the physical NIC buffer in order to
666 * absorb the VM's CPU jitter.
667 */
668 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
669 {
670 retry--;
671 goto retry;
672 }
673
Steven Luongce507582021-08-23 14:31:16 -0700674 clib_spinlock_unlock (&rxvq->vring_lock);
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700675
676 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
677 {
678 vlib_error_count (vm, node->node_index, error, n_left);
679 vlib_increment_simple_counter
680 (vnet_main.interface_main.sw_if_counters +
681 VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left);
682 }
683
684 vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
685 return frame->n_vectors;
686}
687
Mohsin Kazmidd8e7d02018-07-23 14:45:57 +0200688VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
689 vlib_node_runtime_t *
690 node, vlib_frame_t * frame)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200691{
Damjan Mariona3d59862018-11-10 10:23:00 +0100692 u32 *buffers = vlib_frame_vector_args (frame);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200693 u32 n_left = frame->n_vectors;
694 vhost_user_main_t *vum = &vhost_user_main;
695 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
696 vhost_user_intf_t *vui =
697 pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
Steven Luongce507582021-08-23 14:31:16 -0700698 u32 qid;
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200699 vhost_user_vring_t *rxvq;
700 u8 error;
Damjan Marion067cd622018-07-11 12:47:43 +0200701 u32 thread_index = vm->thread_index;
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100702 vhost_cpu_t *cpu = &vum->cpus[thread_index];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200703 u32 map_hint = 0;
704 u8 retry = 8;
705 u16 copy_len;
706 u16 tx_headers_len;
Steven Luong564e1672020-01-30 15:18:45 -0800707 u32 or_flags;
Steven Luongce507582021-08-23 14:31:16 -0700708 vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200709
710 if (PREDICT_FALSE (!vui->admin_up))
711 {
712 error = VHOST_USER_TX_FUNC_ERROR_DOWN;
713 goto done3;
714 }
715
Juraj Slobodab192feb2018-10-01 12:42:07 +0200716 if (PREDICT_FALSE (!vui->is_ready))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200717 {
718 error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
719 goto done3;
720 }
721
Steven Luongce507582021-08-23 14:31:16 -0700722 qid = VHOST_VRING_IDX_RX (tf->queue_id);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200723 rxvq = &vui->vrings[qid];
Steven Luongce507582021-08-23 14:31:16 -0700724 ASSERT (tf->queue_id == rxvq->qid);
725
Steven0c469982018-11-04 08:20:01 -0800726 if (PREDICT_FALSE (rxvq->avail == 0))
727 {
728 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
729 goto done3;
730 }
Steven Luongce507582021-08-23 14:31:16 -0700731 if (tf->shared_queue)
732 clib_spinlock_lock (&rxvq->vring_lock);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200733
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700734 if (vhost_user_is_packed_ring_supported (vui))
Steven Luongce507582021-08-23 14:31:16 -0700735 return (vhost_user_device_class_packed (vm, node, frame, vui, rxvq));
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700736
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200737retry:
738 error = VHOST_USER_TX_FUNC_ERROR_NONE;
739 tx_headers_len = 0;
740 copy_len = 0;
741 while (n_left > 0)
742 {
743 vlib_buffer_t *b0, *current_b0;
744 u16 desc_head, desc_index, desc_len;
745 vring_desc_t *desc_table;
746 uword buffer_map_addr;
747 u32 buffer_len;
748 u16 bytes_left;
749
750 if (PREDICT_TRUE (n_left > 1))
751 vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
752
753 b0 = vlib_get_buffer (vm, buffers[0]);
754
755 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
756 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100757 cpu->current_trace = vlib_add_trace (vm, node, b0,
758 sizeof (*cpu->current_trace));
759 vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200760 }
761
762 if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
763 {
764 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
765 goto done;
766 }
767
768 desc_table = rxvq->desc;
769 desc_head = desc_index =
770 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
771
772 /* Go deeper in case of indirect descriptor
773 * I don't know of any driver providing indirect for RX. */
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200774 if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200775 {
776 if (PREDICT_FALSE
777 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
778 {
779 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
780 goto done;
781 }
782 if (PREDICT_FALSE
783 (!(desc_table =
784 map_guest_mem (vui, rxvq->desc[desc_index].addr,
785 &map_hint))))
786 {
787 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
788 goto done;
789 }
790 desc_index = 0;
791 }
792
793 desc_len = vui->virtio_net_hdr_sz;
794 buffer_map_addr = desc_table[desc_index].addr;
795 buffer_len = desc_table[desc_index].len;
796
797 {
798 // Get a header from the header array
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100799 virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200800 tx_headers_len++;
801 hdr->hdr.flags = 0;
Steven Luong4208a4c2019-05-06 08:51:56 -0700802 hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200803 hdr->num_buffers = 1; //This is local, no need to check
804
Mohsin Kazmi68095382021-02-10 11:26:24 +0100805 or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
Steven Luong564e1672020-01-30 15:18:45 -0800806
807 /* Guest supports csum offload and buffer requires checksum offload? */
808 if (or_flags
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200809 && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
Steven Luong4208a4c2019-05-06 08:51:56 -0700810 vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
811
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200812 // Prepare a copy order executed later for the header
Steven Luong73310052019-10-23 13:28:37 -0700813 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100814 vhost_copy_t *cpy = &cpu->copy[copy_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200815 copy_len++;
816 cpy->len = vui->virtio_net_hdr_sz;
817 cpy->dst = buffer_map_addr;
818 cpy->src = (uword) hdr;
819 }
820
821 buffer_map_addr += vui->virtio_net_hdr_sz;
822 buffer_len -= vui->virtio_net_hdr_sz;
823 bytes_left = b0->current_length;
824 current_b0 = b0;
825 while (1)
826 {
827 if (buffer_len == 0)
828 { //Get new output
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200829 if (desc_table[desc_index].flags & VRING_DESC_F_NEXT)
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200830 {
831 //Next one is chained
832 desc_index = desc_table[desc_index].next;
833 buffer_map_addr = desc_table[desc_index].addr;
834 buffer_len = desc_table[desc_index].len;
835 }
836 else if (vui->virtio_net_hdr_sz == 12) //MRG is available
837 {
838 virtio_net_hdr_mrg_rxbuf_t *hdr =
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100839 &cpu->tx_headers[tx_headers_len - 1];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200840
841 //Move from available to used buffer
842 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
843 desc_head;
844 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
845 desc_len;
846 vhost_user_log_dirty_ring (vui, rxvq,
847 ring[rxvq->last_used_idx &
848 rxvq->qsz_mask]);
849
850 rxvq->last_avail_idx++;
851 rxvq->last_used_idx++;
852 hdr->num_buffers++;
853 desc_len = 0;
854
855 if (PREDICT_FALSE
856 (rxvq->last_avail_idx == rxvq->avail->idx))
857 {
858 //Dequeue queued descriptors for this packet
859 rxvq->last_used_idx -= hdr->num_buffers - 1;
860 rxvq->last_avail_idx -= hdr->num_buffers - 1;
861 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
862 goto done;
863 }
864
865 desc_table = rxvq->desc;
866 desc_head = desc_index =
867 rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
868 if (PREDICT_FALSE
Mohsin Kazmia7a22812020-08-31 17:17:16 +0200869 (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200870 {
871 //It is seriously unlikely that a driver will put indirect descriptor
872 //after non-indirect descriptor.
873 if (PREDICT_FALSE
874 (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
875 {
876 error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
877 goto done;
878 }
879 if (PREDICT_FALSE
880 (!(desc_table =
881 map_guest_mem (vui,
882 rxvq->desc[desc_index].addr,
883 &map_hint))))
884 {
885 error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
886 goto done;
887 }
888 desc_index = 0;
889 }
890 buffer_map_addr = desc_table[desc_index].addr;
891 buffer_len = desc_table[desc_index].len;
892 }
893 else
894 {
895 error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
896 goto done;
897 }
898 }
899
900 {
Steven Luong73310052019-10-23 13:28:37 -0700901 ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100902 vhost_copy_t *cpy = &cpu->copy[copy_len];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200903 copy_len++;
904 cpy->len = bytes_left;
905 cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
906 cpy->dst = buffer_map_addr;
907 cpy->src = (uword) vlib_buffer_get_current (current_b0) +
908 current_b0->current_length - bytes_left;
909
910 bytes_left -= cpy->len;
911 buffer_len -= cpy->len;
912 buffer_map_addr += cpy->len;
913 desc_len += cpy->len;
914
Damjan Marionaf7fb042021-07-15 11:54:41 +0200915 clib_prefetch_load (&rxvq->desc);
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200916 }
917
918 // Check if vlib buffer has more data. If not, get more or break.
919 if (PREDICT_TRUE (!bytes_left))
920 {
921 if (PREDICT_FALSE
922 (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
923 {
924 current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
925 bytes_left = current_b0->current_length;
926 }
927 else
928 {
929 //End of packet
930 break;
931 }
932 }
933 }
934
935 //Move from available to used ring
936 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
937 rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
938 vhost_user_log_dirty_ring (vui, rxvq,
939 ring[rxvq->last_used_idx & rxvq->qsz_mask]);
940 rxvq->last_avail_idx++;
941 rxvq->last_used_idx++;
942
943 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
944 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100945 cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200946 }
947
948 n_left--; //At the end for error counting when 'goto done' is invoked
949
950 /*
951 * Do the copy periodically to prevent
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100952 * cpu->copy array overflow and corrupt memory
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200953 */
954 if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
955 {
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100956 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
957 &map_hint)))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200958 {
959 vlib_error_count (vm, node->node_index,
960 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
961 }
962 copy_len = 0;
963
964 /* give buffers back to driver */
965 CLIB_MEMORY_BARRIER ();
966 rxvq->used->idx = rxvq->last_used_idx;
967 vhost_user_log_dirty_ring (vui, rxvq, idx);
968 }
969 buffers++;
970 }
971
972done:
973 //Do the memory copies
Damjan Marion7e0b17d2018-11-20 21:07:03 +0100974 if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
975 &map_hint)))
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200976 {
977 vlib_error_count (vm, node->node_index,
978 VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
979 }
980
981 CLIB_MEMORY_BARRIER ();
982 rxvq->used->idx = rxvq->last_used_idx;
983 vhost_user_log_dirty_ring (vui, rxvq, idx);
984
985 /*
986 * When n_left is set, error is always set to something too.
987 * In case error is due to lack of remaining buffers, we go back up and
988 * retry.
989 * The idea is that it is better to waste some time on packets
990 * that have been processed already than dropping them and get
Paul Vinciguerra97c998c2019-10-29 16:11:09 -0400991 * more fresh packets with a good likelihood that they will be dropped too.
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200992 * This technique also gives more time to VM driver to pick-up packets.
993 * In case the traffic flows from physical to virtual interfaces, this
994 * technique will end-up leveraging the physical NIC buffer in order to
995 * absorb the VM's CPU jitter.
996 */
997 if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
998 {
999 retry--;
1000 goto retry;
1001 }
1002
1003 /* interrupt (call) handling */
1004 if ((rxvq->callfd_idx != ~0) &&
1005 !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
1006 {
1007 rxvq->n_since_last_int += frame->n_vectors - n_left;
1008
1009 if (rxvq->n_since_last_int > vum->coalesce_frames)
Steven Luong27ba5002020-11-17 13:30:44 -08001010 vhost_user_send_call (vm, vui, rxvq);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001011 }
1012
Steven Luongce507582021-08-23 14:31:16 -07001013 clib_spinlock_unlock (&rxvq->vring_lock);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001014
1015done3:
1016 if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
1017 {
1018 vlib_error_count (vm, node->node_index, error, n_left);
1019 vlib_increment_simple_counter
1020 (vnet_main.interface_main.sw_if_counters
1021 + VNET_INTERFACE_COUNTER_DROP,
1022 thread_index, vui->sw_if_index, n_left);
1023 }
1024
Damjan Mariona3d59862018-11-10 10:23:00 +01001025 vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001026 return frame->n_vectors;
1027}
1028
1029static __clib_unused clib_error_t *
1030vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
Damjan Marioneabd4242020-10-07 20:59:07 +02001031 u32 qid, vnet_hw_if_rx_mode mode)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001032{
1033 vlib_main_t *vm = vnm->vlib_main;
1034 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1035 vhost_user_main_t *vum = &vhost_user_main;
1036 vhost_user_intf_t *vui =
1037 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
1038 vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
Steven Luong38071b12021-04-21 09:54:34 -07001039 vhost_cpu_t *cpu;
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001040
Steven Luong38071b12021-04-21 09:54:34 -07001041 if (mode == txvq->mode)
1042 return 0;
1043
1044 if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
1045 (mode != VNET_HW_IF_RX_MODE_ADAPTIVE) &&
1046 (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
1047 {
1048 vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
1049 hw_if_index, qid);
1050 return clib_error_return (0, "unsupported");
1051 }
1052
1053 if (txvq->thread_index == ~0)
1054 return clib_error_return (0, "Queue initialization is not finished yet");
1055
1056 cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
Damjan Marioneabd4242020-10-07 20:59:07 +02001057 if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
1058 (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001059 {
1060 if (txvq->kickfd_idx == ~0)
1061 {
1062 // We cannot support interrupt mode if the driver opts out
1063 return clib_error_return (0, "Driver does not support interrupt");
1064 }
Damjan Marioneabd4242020-10-07 20:59:07 +02001065 if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001066 {
Steven Luong38071b12021-04-21 09:54:34 -07001067 ASSERT (cpu->polling_q_count != 0);
1068 if (cpu->polling_q_count)
1069 cpu->polling_q_count--;
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001070 vum->ifq_count++;
1071 // Start the timer if this is the first encounter on interrupt
1072 // interface/queue
1073 if ((vum->ifq_count == 1) &&
Steven Luong38071b12021-04-21 09:54:34 -07001074 ((vum->coalesce_time > 0.0) || (vum->coalesce_frames > 0)))
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001075 vlib_process_signal_event (vm,
1076 vhost_user_send_interrupt_node.index,
1077 VHOST_USER_EVENT_START_TIMER, 0);
1078 }
1079 }
Damjan Marioneabd4242020-10-07 20:59:07 +02001080 else if (mode == VNET_HW_IF_RX_MODE_POLLING)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001081 {
Damjan Marioneabd4242020-10-07 20:59:07 +02001082 if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
1083 (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001084 {
Steven Luong38071b12021-04-21 09:54:34 -07001085 cpu->polling_q_count++;
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001086 vum->ifq_count--;
1087 // Stop the timer if there is no more interrupt interface/queue
Steven Luong38071b12021-04-21 09:54:34 -07001088 if (vum->ifq_count == 0)
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001089 vlib_process_signal_event (vm,
1090 vhost_user_send_interrupt_node.index,
1091 VHOST_USER_EVENT_STOP_TIMER, 0);
1092 }
1093 }
1094
1095 txvq->mode = mode;
Steven Luong38071b12021-04-21 09:54:34 -07001096 vhost_user_set_operation_mode (vui, txvq);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001097
1098 return 0;
1099}
1100
1101static __clib_unused clib_error_t *
1102vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
1103 u32 flags)
1104{
1105 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
1106 vhost_user_main_t *vum = &vhost_user_main;
1107 vhost_user_intf_t *vui =
1108 pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
Juraj Slobodab192feb2018-10-01 12:42:07 +02001109 u8 link_old, link_new;
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001110
Juraj Slobodab192feb2018-10-01 12:42:07 +02001111 link_old = vui_is_link_up (vui);
1112
1113 vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1114
1115 link_new = vui_is_link_up (vui);
1116
1117 if (link_old != link_new)
1118 vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ?
1119 VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001120
1121 return /* no error */ 0;
1122}
1123
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001124/* *INDENT-OFF* */
1125VNET_DEVICE_CLASS (vhost_user_device_class) = {
1126 .name = "vhost-user",
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001127 .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
1128 .tx_function_error_strings = vhost_user_tx_func_error_strings,
1129 .format_device_name = format_vhost_user_interface_name,
1130 .name_renumber = vhost_user_name_renumber,
1131 .admin_up_down_function = vhost_user_interface_admin_up_down,
1132 .rx_mode_change_function = vhost_user_interface_rx_mode_change,
1133 .format_tx_trace = format_vhost_trace,
1134};
1135
Mohsin Kazmie7cde312018-06-26 17:20:11 +02001136/* *INDENT-ON* */
1137
1138/*
1139 * fd.io coding-style-patch-verification: ON
1140 *
1141 * Local Variables:
1142 * eval: (c-set-style "gnu")
1143 * End:
1144 */