Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 1 | /* |
| 2 | *------------------------------------------------------------------ |
| 3 | * Copyright (c) 2017 Cisco and/or its affiliates. |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at: |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | *------------------------------------------------------------------ |
| 16 | */ |
| 17 | |
| 18 | #include <sys/types.h> |
| 19 | #include <sys/stat.h> |
| 20 | #include <fcntl.h> |
| 21 | #include <net/if.h> |
| 22 | #include <linux/if_tun.h> |
| 23 | #include <sys/ioctl.h> |
| 24 | #include <linux/virtio_net.h> |
| 25 | #include <linux/vhost.h> |
| 26 | #include <sys/eventfd.h> |
| 27 | |
| 28 | #include <vlib/vlib.h> |
Mohsin Kazmi | d6c15af | 2018-10-23 18:00:47 +0200 | [diff] [blame] | 29 | #include <vlib/pci/pci.h> |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 30 | #include <vlib/unix/unix.h> |
| 31 | #include <vnet/ethernet/ethernet.h> |
Milan Lenco | 73e7f42 | 2017-12-14 10:04:25 +0100 | [diff] [blame] | 32 | #include <vnet/ip/ip4_packet.h> |
| 33 | #include <vnet/ip/ip6_packet.h> |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 34 | #include <vnet/devices/virtio/virtio.h> |
Mohsin Kazmi | d6c15af | 2018-10-23 18:00:47 +0200 | [diff] [blame] | 35 | #include <vnet/devices/virtio/pci.h> |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 36 | |
| 37 | virtio_main_t virtio_main; |
| 38 | |
| 39 | #define _IOCTL(fd,a,...) \ |
| 40 | if (ioctl (fd, a, __VA_ARGS__) < 0) \ |
| 41 | { \ |
| 42 | err = clib_error_return_unix (0, "ioctl(" #a ")"); \ |
| 43 | goto error; \ |
| 44 | } |
| 45 | |
| 46 | static clib_error_t * |
| 47 | call_read_ready (clib_file_t * uf) |
| 48 | { |
| 49 | virtio_main_t *nm = &virtio_main; |
| 50 | vnet_main_t *vnm = vnet_get_main (); |
| 51 | u16 qid = uf->private_data & 0xFFFF; |
| 52 | virtio_if_t *vif = |
| 53 | vec_elt_at_index (nm->interfaces, uf->private_data >> 16); |
| 54 | u64 b; |
| 55 | |
| 56 | CLIB_UNUSED (ssize_t size) = read (uf->file_descriptor, &b, sizeof (b)); |
| 57 | if ((qid & 1) == 0) |
| 58 | vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid); |
| 59 | |
| 60 | return 0; |
| 61 | } |
| 62 | |
| 63 | |
| 64 | clib_error_t * |
| 65 | virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz) |
| 66 | { |
| 67 | clib_error_t *err = 0; |
| 68 | virtio_vring_t *vring; |
Damjan Marion | 7074961 | 2017-12-04 11:23:58 +0100 | [diff] [blame] | 69 | struct vhost_vring_state state = { 0 }; |
| 70 | struct vhost_vring_addr addr = { 0 }; |
| 71 | struct vhost_vring_file file = { 0 }; |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 72 | clib_file_t t = { 0 }; |
| 73 | int i; |
| 74 | |
| 75 | if (!is_pow2 (sz)) |
| 76 | return clib_error_return (0, "ring size must be power of 2"); |
| 77 | |
| 78 | if (sz > 32768) |
| 79 | return clib_error_return (0, "ring size must be 32768 or lower"); |
| 80 | |
| 81 | if (sz == 0) |
| 82 | sz = 256; |
| 83 | |
| 84 | vec_validate_aligned (vif->vrings, idx, CLIB_CACHE_LINE_BYTES); |
| 85 | vring = vec_elt_at_index (vif->vrings, idx); |
| 86 | |
| 87 | i = sizeof (struct vring_desc) * sz; |
| 88 | i = round_pow2 (i, CLIB_CACHE_LINE_BYTES); |
| 89 | vring->desc = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES); |
Dave Barach | b7b9299 | 2018-10-17 10:38:51 -0400 | [diff] [blame] | 90 | clib_memset (vring->desc, 0, i); |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 91 | |
| 92 | i = sizeof (struct vring_avail) + sz * sizeof (vring->avail->ring[0]); |
| 93 | i = round_pow2 (i, CLIB_CACHE_LINE_BYTES); |
| 94 | vring->avail = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES); |
Dave Barach | b7b9299 | 2018-10-17 10:38:51 -0400 | [diff] [blame] | 95 | clib_memset (vring->avail, 0, i); |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 96 | // tell kernel that we don't need interrupt |
| 97 | vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT; |
| 98 | |
| 99 | i = sizeof (struct vring_used) + sz * sizeof (struct vring_used_elem); |
| 100 | i = round_pow2 (i, CLIB_CACHE_LINE_BYTES); |
| 101 | vring->used = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES); |
Dave Barach | b7b9299 | 2018-10-17 10:38:51 -0400 | [diff] [blame] | 102 | clib_memset (vring->used, 0, i); |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 103 | |
| 104 | ASSERT (vring->buffers == 0); |
Damjan Marion | c58408c | 2018-01-18 14:54:04 +0100 | [diff] [blame] | 105 | vec_validate_aligned (vring->buffers, sz, CLIB_CACHE_LINE_BYTES); |
Mohsin Kazmi | d6c15af | 2018-10-23 18:00:47 +0200 | [diff] [blame] | 106 | ASSERT (vring->indirect_buffers == 0); |
| 107 | vec_validate_aligned (vring->indirect_buffers, sz, CLIB_CACHE_LINE_BYTES); |
| 108 | if (idx % 2) |
| 109 | { |
| 110 | u32 n_alloc = 0; |
| 111 | do |
| 112 | { |
| 113 | if (n_alloc < sz) |
| 114 | n_alloc = |
| 115 | vlib_buffer_alloc (vm, vring->indirect_buffers + n_alloc, |
| 116 | sz - n_alloc); |
| 117 | } |
| 118 | while (n_alloc != sz); |
| 119 | } |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 120 | |
| 121 | vring->size = sz; |
| 122 | vring->call_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC); |
| 123 | vring->kick_fd = eventfd (0, EFD_CLOEXEC); |
| 124 | |
| 125 | t.read_function = call_read_ready; |
| 126 | t.file_descriptor = vring->call_fd; |
| 127 | t.private_data = vif->dev_instance << 16 | idx; |
Damjan Marion | ceab788 | 2018-01-19 20:56:12 +0100 | [diff] [blame] | 128 | t.description = format (0, "%U vring %u", format_virtio_device_name, |
| 129 | vif->dev_instance, idx); |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 130 | vring->call_file_index = clib_file_add (&file_main, &t); |
| 131 | |
| 132 | state.index = idx; |
| 133 | state.num = sz; |
| 134 | _IOCTL (vif->fd, VHOST_SET_VRING_NUM, &state); |
| 135 | |
| 136 | addr.index = idx; |
| 137 | addr.flags = 0; |
| 138 | addr.desc_user_addr = pointer_to_uword (vring->desc); |
| 139 | addr.avail_user_addr = pointer_to_uword (vring->avail); |
| 140 | addr.used_user_addr = pointer_to_uword (vring->used); |
| 141 | _IOCTL (vif->fd, VHOST_SET_VRING_ADDR, &addr); |
| 142 | |
| 143 | file.index = idx; |
| 144 | file.fd = vring->kick_fd; |
| 145 | _IOCTL (vif->fd, VHOST_SET_VRING_KICK, &file); |
| 146 | file.fd = vring->call_fd; |
| 147 | _IOCTL (vif->fd, VHOST_SET_VRING_CALL, &file); |
| 148 | file.fd = vif->tap_fd; |
| 149 | _IOCTL (vif->fd, VHOST_NET_SET_BACKEND, &file); |
| 150 | |
| 151 | error: |
| 152 | return err; |
| 153 | } |
| 154 | |
Mohsin Kazmi | d6c15af | 2018-10-23 18:00:47 +0200 | [diff] [blame] | 155 | inline void |
Steven | a624dbe | 2018-01-09 11:13:29 -0800 | [diff] [blame] | 156 | virtio_free_rx_buffers (vlib_main_t * vm, virtio_vring_t * vring) |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 157 | { |
Steven | a624dbe | 2018-01-09 11:13:29 -0800 | [diff] [blame] | 158 | u16 used = vring->desc_in_use; |
Steven | 074f698 | 2018-03-30 22:18:11 -0700 | [diff] [blame] | 159 | u16 last = vring->last_used_idx; |
Steven | a624dbe | 2018-01-09 11:13:29 -0800 | [diff] [blame] | 160 | u16 mask = vring->size - 1; |
| 161 | |
| 162 | while (used) |
| 163 | { |
Steven | 074f698 | 2018-03-30 22:18:11 -0700 | [diff] [blame] | 164 | vlib_buffer_free (vm, &vring->buffers[last & mask], 1); |
| 165 | last++; |
Steven | a624dbe | 2018-01-09 11:13:29 -0800 | [diff] [blame] | 166 | used--; |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | clib_error_t * |
| 171 | virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif, u32 idx) |
| 172 | { |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 173 | virtio_vring_t *vring = vec_elt_at_index (vif->vrings, idx); |
Steven | a624dbe | 2018-01-09 11:13:29 -0800 | [diff] [blame] | 174 | |
| 175 | clib_file_del_by_index (&file_main, vring->call_file_index); |
| 176 | close (vring->kick_fd); |
| 177 | close (vring->call_fd); |
| 178 | if (vring->used) |
| 179 | { |
| 180 | if ((idx & 1) == 1) |
| 181 | virtio_free_used_desc (vm, vring); |
| 182 | else |
| 183 | virtio_free_rx_buffers (vm, vring); |
| 184 | clib_mem_free (vring->used); |
| 185 | } |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 186 | if (vring->desc) |
| 187 | clib_mem_free (vring->desc); |
| 188 | if (vring->avail) |
| 189 | clib_mem_free (vring->avail); |
Mohsin Kazmi | d6c15af | 2018-10-23 18:00:47 +0200 | [diff] [blame] | 190 | if (vring->queue_id % 2) |
| 191 | { |
| 192 | vlib_buffer_free_no_next (vm, vring->indirect_buffers, vring->size); |
| 193 | } |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 194 | vec_free (vring->buffers); |
Mohsin Kazmi | d6c15af | 2018-10-23 18:00:47 +0200 | [diff] [blame] | 195 | vec_free (vring->indirect_buffers); |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 196 | return 0; |
| 197 | } |
| 198 | |
Mohsin Kazmi | 80659b4 | 2019-01-31 13:18:00 +0000 | [diff] [blame] | 199 | void |
| 200 | virtio_vring_set_numa_node (vlib_main_t * vm, virtio_if_t * vif, u32 idx) |
| 201 | { |
| 202 | vnet_main_t *vnm = vnet_get_main (); |
| 203 | u32 thread_index; |
| 204 | virtio_vring_t *vring = vec_elt_at_index (vif->vrings, idx); |
| 205 | thread_index = |
| 206 | vnet_get_device_input_thread_index (vnm, vif->hw_if_index, |
| 207 | vring->queue_id); |
| 208 | vring->buffer_pool_index = |
| 209 | vlib_buffer_pool_get_default_for_numa (vm, |
| 210 | vlib_mains |
| 211 | [thread_index]->numa_node); |
| 212 | } |
| 213 | |
Mohsin Kazmi | d6c15af | 2018-10-23 18:00:47 +0200 | [diff] [blame] | 214 | inline void |
| 215 | virtio_set_net_hdr_size (virtio_if_t * vif) |
| 216 | { |
| 217 | if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) || |
| 218 | vif->features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1)) |
| 219 | vif->virtio_net_hdr_sz = sizeof (struct virtio_net_hdr_v1); |
| 220 | else |
| 221 | vif->virtio_net_hdr_sz = sizeof (struct virtio_net_hdr); |
| 222 | } |
| 223 | |
| 224 | inline void |
| 225 | virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type) |
| 226 | { |
| 227 | u32 i, j, hw_if_index; |
| 228 | virtio_if_t *vif; |
| 229 | vnet_main_t *vnm = &vnet_main; |
| 230 | virtio_main_t *mm = &virtio_main; |
| 231 | virtio_vring_t *vring; |
| 232 | struct feat_struct |
| 233 | { |
| 234 | u8 bit; |
| 235 | char *str; |
| 236 | }; |
| 237 | struct feat_struct *feat_entry; |
| 238 | |
| 239 | static struct feat_struct feat_array[] = { |
| 240 | #define _(s,b) { .str = #s, .bit = b, }, |
| 241 | foreach_virtio_net_features |
| 242 | #undef _ |
| 243 | {.str = NULL} |
| 244 | }; |
| 245 | |
| 246 | struct feat_struct *flag_entry; |
| 247 | static struct feat_struct flags_array[] = { |
| 248 | #define _(b,e,s) { .bit = b, .str = s, }, |
| 249 | foreach_virtio_if_flag |
| 250 | #undef _ |
| 251 | {.str = NULL} |
| 252 | }; |
| 253 | |
| 254 | if (!hw_if_indices) |
| 255 | return; |
| 256 | |
| 257 | for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++) |
| 258 | { |
| 259 | vnet_hw_interface_t *hi = |
| 260 | vnet_get_hw_interface (vnm, hw_if_indices[hw_if_index]); |
| 261 | vif = pool_elt_at_index (mm->interfaces, hi->dev_instance); |
| 262 | if (vif->type != type) |
| 263 | continue; |
| 264 | vlib_cli_output (vm, "Interface: %U (ifindex %d)", |
| 265 | format_vnet_hw_if_index_name, vnm, |
| 266 | hw_if_indices[hw_if_index], vif->hw_if_index); |
| 267 | if (type == VIRTIO_IF_TYPE_PCI) |
| 268 | { |
| 269 | vlib_cli_output (vm, " PCI Address: %U", format_vlib_pci_addr, |
| 270 | &vif->pci_addr); |
| 271 | } |
| 272 | if (type == VIRTIO_IF_TYPE_TAP) |
| 273 | { |
| 274 | if (vif->host_if_name) |
| 275 | vlib_cli_output (vm, " name \"%s\"", vif->host_if_name); |
| 276 | if (vif->net_ns) |
| 277 | vlib_cli_output (vm, " host-ns \"%s\"", vif->net_ns); |
| 278 | vlib_cli_output (vm, " fd %d", vif->fd); |
| 279 | vlib_cli_output (vm, " tap-fd %d", vif->tap_fd); |
| 280 | } |
| 281 | vlib_cli_output (vm, " Mac Address: %U", format_ethernet_address, |
| 282 | vif->mac_addr); |
| 283 | vlib_cli_output (vm, " Device instance: %u", vif->dev_instance); |
| 284 | vlib_cli_output (vm, " flags 0x%x", vif->flags); |
| 285 | flag_entry = (struct feat_struct *) &flags_array; |
| 286 | while (flag_entry->str) |
| 287 | { |
| 288 | if (vif->flags & (1ULL << flag_entry->bit)) |
| 289 | vlib_cli_output (vm, " %s (%d)", flag_entry->str, |
| 290 | flag_entry->bit); |
| 291 | flag_entry++; |
| 292 | } |
| 293 | if (type == VIRTIO_IF_TYPE_PCI) |
| 294 | { |
| 295 | device_status (vm, vif); |
| 296 | } |
| 297 | vlib_cli_output (vm, " features 0x%lx", vif->features); |
| 298 | feat_entry = (struct feat_struct *) &feat_array; |
| 299 | while (feat_entry->str) |
| 300 | { |
| 301 | if (vif->features & (1ULL << feat_entry->bit)) |
| 302 | vlib_cli_output (vm, " %s (%d)", feat_entry->str, |
| 303 | feat_entry->bit); |
| 304 | feat_entry++; |
| 305 | } |
| 306 | vlib_cli_output (vm, " remote-features 0x%lx", vif->remote_features); |
| 307 | feat_entry = (struct feat_struct *) &feat_array; |
| 308 | while (feat_entry->str) |
| 309 | { |
| 310 | if (vif->remote_features & (1ULL << feat_entry->bit)) |
| 311 | vlib_cli_output (vm, " %s (%d)", feat_entry->str, |
| 312 | feat_entry->bit); |
| 313 | feat_entry++; |
| 314 | } |
| 315 | vec_foreach_index (i, vif->vrings) |
| 316 | { |
| 317 | // RX = 0, TX = 1 |
| 318 | vring = vec_elt_at_index (vif->vrings, i); |
| 319 | vlib_cli_output (vm, " Virtqueue (%s)", (i & 1) ? "TX" : "RX"); |
| 320 | vlib_cli_output (vm, |
| 321 | " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d", |
| 322 | vring->size, vring->last_used_idx, vring->desc_next, |
| 323 | vring->desc_in_use); |
| 324 | vlib_cli_output (vm, |
| 325 | " avail.flags 0x%x avail.idx %d used.flags 0x%x used.idx %d", |
| 326 | vring->avail->flags, vring->avail->idx, |
| 327 | vring->used->flags, vring->used->idx); |
| 328 | if (type == VIRTIO_IF_TYPE_TAP) |
| 329 | { |
| 330 | vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd, |
| 331 | vring->call_fd); |
| 332 | } |
| 333 | if (show_descr) |
| 334 | { |
| 335 | vlib_cli_output (vm, "\n descriptor table:\n"); |
| 336 | vlib_cli_output (vm, |
| 337 | " id addr len flags next user_addr\n"); |
| 338 | vlib_cli_output (vm, |
| 339 | " ===== ================== ===== ====== ===== ==================\n"); |
| 340 | vring = vif->vrings; |
| 341 | for (j = 0; j < vring->size; j++) |
| 342 | { |
| 343 | struct vring_desc *desc = &vring->desc[j]; |
| 344 | vlib_cli_output (vm, |
| 345 | " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", |
| 346 | j, desc->addr, |
| 347 | desc->len, |
| 348 | desc->flags, desc->next, desc->addr); |
| 349 | } |
| 350 | } |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | } |
| 355 | |
Damjan Marion | 8389fb9 | 2017-10-13 18:29:53 +0200 | [diff] [blame] | 356 | /* |
| 357 | * fd.io coding-style-patch-verification: ON |
| 358 | * |
| 359 | * Local Variables: |
| 360 | * eval: (c-set-style "gnu") |
| 361 | * End: |
| 362 | */ |