blob: 59db5b4c592e9fc612d1d872f945d18d4ad55f3b [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef __VIRTIO_VHOST_USER_H__
16#define __VIRTIO_VHOST_USER_H__
Mohsin Kazmia7a22812020-08-31 17:17:16 +020017
18#include <vnet/devices/virtio/virtio_std.h>
19#include <vnet/devices/virtio/vhost_std.h>
20
Ed Warnickecb9cada2015-12-08 15:45:58 -070021/* vhost-user data structures */
22
23#define VHOST_MEMORY_MAX_NREGIONS 8
24#define VHOST_USER_MSG_HDR_SZ 12
Steven Luong2c1084a2020-12-10 20:44:22 -080025#define VHOST_VRING_INIT_MQ_PAIR_SZ 8 //8TX + 8RX
26
27/*
28 * qid is one byte in size in the spec. Please see VHOST_USER_SET_VRING_CALL,
29 * VHOST_USER_SET_VRING_KICK, and VHOST_USER_SET_VRING_ERR.
30 * The max number for q pair is naturally 128.
31 */
32#define VHOST_VRING_MAX_MQ_PAIR_SZ 128
Steven Luongce507582021-08-23 14:31:16 -070033#define VHOST_VRING_IDX_RX(qid) (2 * (qid))
34#define VHOST_VRING_IDX_TX(qid) (2 * (qid) + 1)
Ed Warnickecb9cada2015-12-08 15:45:58 -070035
Steven49a04b92017-07-29 08:56:08 -070036#define VHOST_USER_VRING_NOFD_MASK 0x100
Shesha Sreenivasamurthyb8f45b32016-02-03 09:38:36 -080037
Yoann Desmouceaux4667c222016-02-24 22:51:00 +010038#define VHOST_USER_PROTOCOL_F_MQ 0
39#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
Yoann Desmouceauxfe2da0e2016-03-08 14:54:28 +010040#define VHOST_VRING_F_LOG 0
Yoann Desmouceaux4667c222016-02-24 22:51:00 +010041
Shesha Sreenivasamurthy0666dc42016-04-18 16:38:20 -040042#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
43 (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD))
Shesha Sreenivasamurthyb8f45b32016-02-03 09:38:36 -080044
Jerome Tollet2f54c272018-10-02 11:41:11 +020045#define vu_log_debug(dev, f, ...) \
46{ \
47 vlib_log(VLIB_LOG_LEVEL_DEBUG, vhost_user_main.log_default, "%U: " f, \
48 format_vnet_hw_if_index_name, vnet_get_main(), \
49 dev->hw_if_index, ##__VA_ARGS__); \
50};
Mohsin Kazmie7cde312018-06-26 17:20:11 +020051
Jerome Tollet2f54c272018-10-02 11:41:11 +020052#define vu_log_warn(dev, f, ...) \
53{ \
54 vlib_log(VLIB_LOG_LEVEL_WARNING, vhost_user_main.log_default, "%U: " f, \
55 format_vnet_hw_if_index_name, vnet_get_main(), \
56 dev->hw_if_index, ##__VA_ARGS__); \
57};
58#define vu_log_err(dev, f, ...) \
59{ \
60 vlib_log(VLIB_LOG_LEVEL_ERR, vhost_user_main.log_default, "%U: " f, \
61 format_vnet_hw_if_index_name, vnet_get_main(), \
62 dev->hw_if_index, ##__VA_ARGS__); \
63};
Mohsin Kazmie7cde312018-06-26 17:20:11 +020064
65#define UNIX_GET_FD(unixfd_idx) ({ \
66 typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \
67 (__unixfd_idx != ~0) ? \
68 pool_elt_at_index (file_main.file_pool, \
69 __unixfd_idx)->file_descriptor : -1; })
70
71#define foreach_virtio_trace_flags \
72 _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
73 _ (SINGLE_DESC, 1, "Single descriptor packet") \
74 _ (INDIRECT, 2, "Indirect descriptor") \
75 _ (MAP_ERROR, 4, "Memory mapping error")
76
77typedef enum
78{
79#define _(n,i,s) VIRTIO_TRACE_F_##n,
80 foreach_virtio_trace_flags
81#undef _
82} virtio_trace_flag_t;
83
Steven Luong4208a4c2019-05-06 08:51:56 -070084#define FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS \
Mohsin Kazmia7a22812020-08-31 17:17:16 +020085 (VIRTIO_FEATURE (VIRTIO_NET_F_CSUM) | \
86 VIRTIO_FEATURE (VIRTIO_NET_F_HOST_UFO) | \
87 VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) | \
88 VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6))
Steven Luong4208a4c2019-05-06 08:51:56 -070089
90#define FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS \
Mohsin Kazmia7a22812020-08-31 17:17:16 +020091 (VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) | \
92 VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO) | \
93 VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) | \
94 VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6))
Steven Luong4208a4c2019-05-06 08:51:56 -070095
96#define FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS \
97 (FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS | \
98 FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS)
99
Steven Luong27ba5002020-11-17 13:30:44 -0800100
101typedef struct
102{
103 char *sock_filename;
104 u64 feature_mask;
105 u32 custom_dev_instance;
106 u8 hwaddr[6];
107 u8 renumber;
108 u8 is_server;
109 u8 enable_gso;
110 u8 enable_packed;
111 u8 enable_event_idx;
Steven Luongd6361c72021-01-26 23:44:19 -0800112 u8 use_custom_mac;
Steven Luong27ba5002020-11-17 13:30:44 -0800113
114 /* return */
115 u32 sw_if_index;
116} vhost_user_create_if_args_t;
117
Damjan Marion00a9dca2016-08-17 17:05:46 +0200118int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
Steven Luong27ba5002020-11-17 13:30:44 -0800119 vhost_user_create_if_args_t * args);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200120int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
Steven Luong27ba5002020-11-17 13:30:44 -0800121 vhost_user_create_if_args_t * args);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200122int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
123 u32 sw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700124
Pierre Pfistere21c5282016-09-21 08:04:59 +0100125/* *INDENT-OFF* */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200126typedef struct vhost_user_memory_region
127{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700128 u64 guest_phys_addr;
129 u64 memory_size;
130 u64 userspace_addr;
131 u64 mmap_offset;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100132} __attribute ((packed)) vhost_user_memory_region_t;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700133
Damjan Marion00a9dca2016-08-17 17:05:46 +0200134typedef struct vhost_user_memory
135{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700136 u32 nregions;
137 u32 padding;
138 vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
Pierre Pfistere21c5282016-09-21 08:04:59 +0100139} __attribute ((packed)) vhost_user_memory_t;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700140
Damjan Marion00a9dca2016-08-17 17:05:46 +0200141typedef enum vhost_user_req
142{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700143 VHOST_USER_NONE = 0,
144 VHOST_USER_GET_FEATURES = 1,
145 VHOST_USER_SET_FEATURES = 2,
146 VHOST_USER_SET_OWNER = 3,
147 VHOST_USER_RESET_OWNER = 4,
148 VHOST_USER_SET_MEM_TABLE = 5,
149 VHOST_USER_SET_LOG_BASE = 6,
150 VHOST_USER_SET_LOG_FD = 7,
151 VHOST_USER_SET_VRING_NUM = 8,
152 VHOST_USER_SET_VRING_ADDR = 9,
153 VHOST_USER_SET_VRING_BASE = 10,
154 VHOST_USER_GET_VRING_BASE = 11,
155 VHOST_USER_SET_VRING_KICK = 12,
156 VHOST_USER_SET_VRING_CALL = 13,
157 VHOST_USER_SET_VRING_ERR = 14,
Shesha Sreenivasamurthyb8f45b32016-02-03 09:38:36 -0800158 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
159 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
160 VHOST_USER_GET_QUEUE_NUM = 17,
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100161 VHOST_USER_SET_VRING_ENABLE = 18,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700162 VHOST_USER_MAX
163} vhost_user_req_t;
164
Ed Warnickecb9cada2015-12-08 15:45:58 -0700165typedef struct vhost_user_msg {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200166 vhost_user_req_t request;
167 u32 flags;
168 u32 size;
169 union
170 {
171 u64 u64;
172 vhost_vring_state_t state;
173 vhost_vring_addr_t addr;
174 vhost_user_memory_t memory;
175 vhost_user_log_t log;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700176 };
177} __attribute ((packed)) vhost_user_msg_t;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200178/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700179
Damjan Marion00a9dca2016-08-17 17:05:46 +0200180typedef struct
181{
Pierre Pfistere21c5282016-09-21 08:04:59 +0100182 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
Steven97878892017-08-29 09:23:26 -0700183 u16 qsz_mask;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700184 u16 last_avail_idx;
185 u16 last_used_idx;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100186 u16 n_since_last_int;
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700187 union
188 {
189 vring_desc_t *desc;
190 vring_packed_desc_t *packed_desc;
191 };
192 union
193 {
194 vring_avail_t *avail;
195 vring_desc_event_t *avail_event;
196 };
197 union
198 {
199 vring_used_t *used;
200 vring_desc_event_t *used_event;
201 };
Steven Luong4442f7c2019-10-02 07:33:48 -0700202 uword desc_user_addr;
203 uword used_user_addr;
204 uword avail_user_addr;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100205 f64 int_deadline;
206 u8 started;
207 u8 enabled;
208 u8 log_used;
Steven Luong2c1084a2020-12-10 20:44:22 -0800209 clib_spinlock_t vring_lock;
210
Pierre Pfistere21c5282016-09-21 08:04:59 +0100211 //Put non-runtime in a different cache line
212 CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700213 int errfd;
214 u32 callfd_idx;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100215 u32 kickfd_idx;
216 u64 log_guest_addr;
Stevenf3b53642017-05-01 14:03:02 -0700217
218 /* The rx queue policy (interrupt/adaptive/polling) for this queue */
219 u32 mode;
Steven Luong67f935e2019-02-01 10:23:56 -0800220
221 /*
222 * It contains the device queue number. -1 if it does not. The idea is
223 * to not invoke vnet_hw_interface_assign_rx_thread and
224 * vnet_hw_interface_unassign_rx_thread more than once for the duration of
225 * the interface even if it is disconnected and reconnected.
226 */
227 i16 qid;
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700228
229 u16 used_wrap_counter;
230 u16 avail_wrap_counter;
Steven Luong27ba5002020-11-17 13:30:44 -0800231 u16 last_kick;
232 u8 first_kick;
Damjan Marion94100532020-11-06 23:25:57 +0100233 u32 queue_index;
Steven Luong38071b12021-04-21 09:54:34 -0700234 u32 thread_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700235} vhost_user_vring_t;
236
Steven7312cc72017-03-15 21:18:55 -0700237#define VHOST_USER_EVENT_START_TIMER 1
Stevenf3b53642017-05-01 14:03:02 -0700238#define VHOST_USER_EVENT_STOP_TIMER 2
Steven7312cc72017-03-15 21:18:55 -0700239
Damjan Marion00a9dca2016-08-17 17:05:46 +0200240typedef struct
241{
242 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
Juraj Slobodab192feb2018-10-01 12:42:07 +0200243 u32 is_ready;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700244 u32 admin_up;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000245 u32 unix_server_index;
Damjan Marion56dd5432017-09-08 19:52:02 +0200246 u32 clib_file_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700247 char sock_filename[256];
248 int sock_errno;
Steven5445f5f2017-04-25 16:16:00 -0700249 uword if_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700250 u32 hw_if_index, sw_if_index;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200251
Pierre Pfistere21c5282016-09-21 08:04:59 +0100252 //Feature negotiation
Ed Warnickecb9cada2015-12-08 15:45:58 -0700253 u64 features;
254 u64 feature_mask;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100255 u64 protocol_features;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100256
257 //Memory region information
258 u32 nregions;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700259 vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
Damjan Marion00a9dca2016-08-17 17:05:46 +0200260 void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
Damjan Marion37623702016-09-20 11:25:27 +0200261 u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS];
262 u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700263 u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS];
Pierre Pfistere21c5282016-09-21 08:04:59 +0100264
265 //Virtual rings
Steven Luong2c1084a2020-12-10 20:44:22 -0800266 vhost_user_vring_t *vrings;
267
268 /*
269 * vrings is a dynamic array. It may have more elements than it is
270 * currently used. num_qid indicates the current total qid's in the
271 * vrings. For example, vec_len(vrings) = 64, num_qid = 60, so the
272 * current valid/used qid is (0, 59) in the vrings array.
273 */
274 u32 num_qid;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100275
Ed Warnickecb9cada2015-12-08 15:45:58 -0700276 int virtio_net_hdr_sz;
277 int is_any_layout;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100278
Damjan Marion00a9dca2016-08-17 17:05:46 +0200279 void *log_base_addr;
Yoann Desmouceaux4667c222016-02-24 22:51:00 +0100280 u64 log_size;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100281
Steven Luong4208a4c2019-05-06 08:51:56 -0700282 u8 enable_gso;
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700283
284 /* Packed ring configured */
285 u8 enable_packed;
286
Steven Luong27ba5002020-11-17 13:30:44 -0800287 u8 enable_event_idx;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700288} vhost_user_intf_t;
289
Steven Luong38071b12021-04-21 09:54:34 -0700290#define FOR_ALL_VHOST_TXQ(qid, vui) for (qid = 1; qid < vui->num_qid; qid += 2)
291
292#define FOR_ALL_VHOST_RXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid += 2)
293
294#define FOR_ALL_VHOST_RX_TXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid++)
295
Damjan Marion00a9dca2016-08-17 17:05:46 +0200296typedef struct
297{
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000298 uword dst;
299 uword src;
300 u32 len;
301} vhost_copy_t;
302
303typedef struct
304{
305 u16 qid; /** The interface queue index (Not the virtio vring idx) */
306 u16 device_index; /** The device index */
307 u32 virtio_ring_flags; /** Runtime queue flags **/
308 u16 first_desc_len; /** Length of the first data descriptor **/
309 virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
310} vhost_trace_t;
311
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000312#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2)
313#define VHOST_USER_COPY_ARRAY_N (4 * VLIB_FRAME_SIZE)
314
315typedef struct
316{
Pierre Pfisterd3eb90e2016-11-29 15:36:14 +0000317 u32 rx_buffers_len;
318 u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
319
320 virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE];
321 vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N];
322
323 /* This is here so it doesn't end-up
324 * using stack or registers. */
325 vhost_trace_t *current_trace;
Steven Luongbc0d9ff2020-03-23 09:34:59 -0700326
327 u32 *to_next_list;
328 vlib_buffer_t **rx_buffers_pdesc;
Steven Luong38071b12021-04-21 09:54:34 -0700329 u32 polling_q_count;
Pierre Pfistere21c5282016-09-21 08:04:59 +0100330} vhost_cpu_t;
331
332typedef struct
333{
Steven5445f5f2017-04-25 16:16:00 -0700334 mhash_t if_index_by_sock_name;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700335 u32 mtu_bytes;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200336 vhost_user_intf_t *vhost_user_interfaces;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200337 u32 *show_dev_instance_by_real_dev_instance;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700338 u32 coalesce_frames;
339 f64 coalesce_time;
340 int dont_dump_vhost_user_memory;
Damjan Marion0dafaa72016-09-20 23:21:02 +0200341
Pierre Pfistere21c5282016-09-21 08:04:59 +0100342 /** Per-CPU data for vhost-user */
343 vhost_cpu_t *cpus;
Pierre Pfisterdbb3c252016-11-22 10:33:34 +0000344
345 /** Pseudo random iterator */
346 u32 random;
Steven7312cc72017-03-15 21:18:55 -0700347
Stevenf3b53642017-05-01 14:03:02 -0700348 /* The number of rx interface/queue pairs in interrupt mode */
349 u32 ifq_count;
Steven388e51a2017-06-01 12:49:23 -0700350
Jerome Tollet2f54c272018-10-02 11:41:11 +0200351 /* logging */
352 vlib_log_class_t log_default;
Steven Luong4208a4c2019-05-06 08:51:56 -0700353
354 /* gso interface count */
355 u32 gso_count;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700356} vhost_user_main_t;
357
Damjan Marion00a9dca2016-08-17 17:05:46 +0200358typedef struct
359{
360 u8 if_name[64];
361 u32 sw_if_index;
362 u32 virtio_net_hdr_sz;
363 u64 features;
364 u8 is_server;
365 u8 sock_filename[256];
366 u32 num_regions;
367 int sock_errno;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700368} vhost_user_intf_details_t;
369
Damjan Marion00a9dca2016-08-17 17:05:46 +0200370int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
371 vhost_user_intf_details_t ** out_vuids);
Steven Luong38071b12021-04-21 09:54:34 -0700372void vhost_user_set_operation_mode (vhost_user_intf_t *vui,
373 vhost_user_vring_t *txvq);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700374
Mohsin Kazmie7cde312018-06-26 17:20:11 +0200375extern vlib_node_registration_t vhost_user_send_interrupt_node;
376extern vnet_device_class_t vhost_user_device_class;
377extern vlib_node_registration_t vhost_user_input_node;
378extern vhost_user_main_t vhost_user_main;
379
Ed Warnickecb9cada2015-12-08 15:45:58 -0700380#endif
Damjan Marion00a9dca2016-08-17 17:05:46 +0200381
382/*
383 * fd.io coding-style-patch-verification: ON
384 *
385 * Local Variables:
386 * eval: (c-set-style "gnu")
387 * End:
388 */