blob: f24d71eee8ada3bea32f561b1c271b45fab720c3 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 *------------------------------------------------------------------
3 * tuntap.c - kernel stack (reverse) punt/inject path
4 *
5 * Copyright (c) 2009 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#include <fcntl.h> /* for open */
21#include <sys/ioctl.h>
22#include <sys/socket.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25#include <sys/uio.h> /* for iovec */
26#include <netinet/in.h>
27
28#include <linux/if_arp.h>
29#include <linux/if_tun.h>
30
31#include <vlib/vlib.h>
32#include <vlib/unix/unix.h>
33
34#include <vnet/ip/ip.h>
35
36#include <vnet/ethernet/ethernet.h>
37
38#if DPDK == 1
39#include <vnet/devices/dpdk/dpdk.h>
40#endif
41
42static vnet_device_class_t tuntap_dev_class;
43static vnet_hw_interface_class_t tuntap_interface_class;
44
45static void tuntap_punt_frame (vlib_main_t * vm,
46 vlib_node_runtime_t * node,
47 vlib_frame_t * frame);
48static void tuntap_nopunt_frame (vlib_main_t * vm,
49 vlib_node_runtime_t * node,
50 vlib_frame_t * frame);
51
52/*
53 * This driver runs in one of two distinct modes:
54 * "punt/inject" mode, where we send pkts not otherwise processed
55 * by the forwarding to the Linux kernel stack, and
56 * "normal interface" mode, where we treat the Linux kernel stack
57 * as a peer.
58 *
59 * By default, we select punt/inject mode.
60 */
61
62typedef struct {
63 u32 sw_if_index;
64 u8 is_v6;
65 u8 addr[16];
66} subif_address_t;
67
68typedef struct {
69 /* Vector of iovecs for readv/writev calls. */
70 struct iovec * iovecs;
71
72 /* Vector of VLIB rx buffers to use. We allocate them in blocks
73 of VLIB_FRAME_SIZE (256). */
74 u32 * rx_buffers;
75
76 /* File descriptors for /dev/net/tun and provisioning socket. */
77 int dev_net_tun_fd, dev_tap_fd;
78
79 /* Create a "tap" [ethernet] encaps device */
80 int is_ether;
81
82 /* 1 if a "normal" routed intfc, 0 if a punt/inject interface */
83
84 int have_normal_interface;
85
86 /* tap device destination MAC address. Required, or Linux drops pkts */
87 u8 ether_dst_mac[6];
88
89 /* Interface MTU in bytes and # of default sized buffers. */
90 u32 mtu_bytes, mtu_buffers;
91
92 /* Linux interface name for tun device. */
93 char * tun_name;
94
95 /* Pool of subinterface addresses */
96 subif_address_t *subifs;
97
98 /* Hash for subif addresses */
99 mhash_t subif_mhash;
100
101 u32 unix_file_index;
102
103 /* For the "normal" interface, if configured */
104 u32 hw_if_index, sw_if_index;
105
106} tuntap_main_t;
107
108static tuntap_main_t tuntap_main = {
109 .tun_name = "vnet",
110
111 /* Suitable defaults for an Ethernet-like tun/tap device */
112 .mtu_bytes = 4096 + 256,
113};
114
115/*
116 * tuntap_tx
117 * Output node, writes the buffers comprising the incoming frame
118 * to the tun/tap device, aka hands them to the Linux kernel stack.
119 *
120 */
121static uword
122tuntap_tx (vlib_main_t * vm,
123 vlib_node_runtime_t * node,
124 vlib_frame_t * frame)
125{
126 u32 * buffers = vlib_frame_args (frame);
127 uword n_packets = frame->n_vectors;
128 tuntap_main_t * tm = &tuntap_main;
129 int i;
130
131 for (i = 0; i < n_packets; i++)
132 {
133 struct iovec * iov;
134 vlib_buffer_t * b;
135 uword l;
136
137 b = vlib_get_buffer (vm, buffers[i]);
138
139 if (tm->is_ether && (!tm->have_normal_interface))
140 {
141 vlib_buffer_reset(b);
Damjan Marionf1213b82016-03-13 02:22:06 +0100142 clib_memcpy (vlib_buffer_get_current (b), tm->ether_dst_mac, 6);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700143 }
144
145 /* Re-set iovecs if present. */
146 if (tm->iovecs)
147 _vec_len (tm->iovecs) = 0;
148
149 /* VLIB buffer chain -> Unix iovec(s). */
150 vec_add2 (tm->iovecs, iov, 1);
151 iov->iov_base = b->data + b->current_data;
152 iov->iov_len = l = b->current_length;
153
154 if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
155 {
156 do {
157 b = vlib_get_buffer (vm, b->next_buffer);
158
159 vec_add2 (tm->iovecs, iov, 1);
160
161 iov->iov_base = b->data + b->current_data;
162 iov->iov_len = b->current_length;
163 l += b->current_length;
164 } while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
165 }
166
167 if (writev (tm->dev_net_tun_fd, tm->iovecs, vec_len (tm->iovecs)) < l)
168 clib_unix_warning ("writev");
169 }
170
171 /* The normal interface path flattens the buffer chain */
172 if (tm->have_normal_interface)
173 vlib_buffer_free_no_next (vm, buffers, n_packets);
174 else
175 vlib_buffer_free (vm, buffers, n_packets);
176
177 return n_packets;
178}
179
180VLIB_REGISTER_NODE (tuntap_tx_node,static) = {
181 .function = tuntap_tx,
182 .name = "tuntap-tx",
183 .type = VLIB_NODE_TYPE_INTERNAL,
184 .vector_size = 4,
185};
186
187enum {
188 TUNTAP_RX_NEXT_IP4_INPUT,
189 TUNTAP_RX_NEXT_IP6_INPUT,
190 TUNTAP_RX_NEXT_ETHERNET_INPUT,
191 TUNTAP_RX_NEXT_DROP,
192 TUNTAP_RX_N_NEXT,
193};
194
195static uword
196tuntap_rx (vlib_main_t * vm,
197 vlib_node_runtime_t * node,
198 vlib_frame_t * frame)
199{
200 tuntap_main_t * tm = &tuntap_main;
201 vlib_buffer_t * b;
202 u32 bi;
Damjan Marion19010202016-03-24 17:17:47 +0100203 const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700204#if DPDK == 0
Ed Warnickecb9cada2015-12-08 15:45:58 -0700205 u32 free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX;
206#else
207 dpdk_main_t * dm = &dpdk_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700208 u32 free_list_index = dm->vlib_buffer_free_list_index;
209#endif
210
211 /* Make sure we have some RX buffers. */
212 {
213 uword n_left = vec_len (tm->rx_buffers);
214 uword n_alloc;
215
216 if (n_left < VLIB_FRAME_SIZE / 2)
217 {
218 if (! tm->rx_buffers)
219 vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE);
220
221 n_alloc = vlib_buffer_alloc_from_free_list
222 (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left,
223 free_list_index);
224 _vec_len (tm->rx_buffers) = n_left + n_alloc;
225 }
226 }
227
228 /* Allocate RX buffers from end of rx_buffers.
229 Turn them into iovecs to pass to readv. */
230 {
231 uword i_rx = vec_len (tm->rx_buffers) - 1;
232 vlib_buffer_t * b;
233 word i, n_bytes_left, n_bytes_in_packet;
234
235 /* We should have enough buffers left for an MTU sized packet. */
236 ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers);
237
238 vec_validate (tm->iovecs, tm->mtu_buffers - 1);
239 for (i = 0; i < tm->mtu_buffers; i++)
240 {
241 b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]);
242 tm->iovecs[i].iov_base = b->data;
243 tm->iovecs[i].iov_len = buffer_size;
244 }
245
246 n_bytes_left = readv (tm->dev_net_tun_fd, tm->iovecs, tm->mtu_buffers);
247 n_bytes_in_packet = n_bytes_left;
248 if (n_bytes_left <= 0)
249 {
250 if (errno != EAGAIN)
251 clib_unix_warning ("readv %d", n_bytes_left);
252 return 0;
253 }
254
255 bi = tm->rx_buffers[i_rx];
256
257 while (1)
258 {
259#if DPDK == 1
260 struct rte_mbuf * mb;
261#endif
262 b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]);
263#if DPDK == 1
Damjan Marion19010202016-03-24 17:17:47 +0100264 mb = rte_mbuf_from_vlib_buffer(b);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700265#endif
266 b->flags = 0;
267 b->current_data = 0;
268 b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
269
270 n_bytes_left -= buffer_size;
271#if DPDK == 1
272 rte_pktmbuf_data_len (mb) = b->current_length;
273#endif
274
275 if (n_bytes_left <= 0)
276 {
277#if DPDK == 1
278 rte_pktmbuf_pkt_len (mb) = n_bytes_in_packet;
279#endif
280 break;
281 }
282
283 i_rx--;
284 b->flags |= VLIB_BUFFER_NEXT_PRESENT;
285 b->next_buffer = tm->rx_buffers[i_rx];
286#if DPDK == 1
287 ASSERT(0);
288 // ((struct rte_pktmbuf *)(b->mb))->next =
289 // vlib_get_buffer (vm, tm->rx_buffers[i_rx])->mb;
290#endif
291 }
292
293 /* Interface counters for tuntap interface. */
294 vlib_increment_combined_counter
295 (vnet_main.interface_main.combined_sw_if_counters
296 + VNET_INTERFACE_COUNTER_RX,
297 os_get_cpu_number(),
298 tm->sw_if_index,
299 1, n_bytes_in_packet);
300
301 _vec_len (tm->rx_buffers) = i_rx;
302 }
303
304 b = vlib_get_buffer (vm, bi);
305
306 {
307 u32 next_index;
308 uword n_trace = vlib_get_trace_count (vm, node);
309
310 vnet_buffer (b)->sw_if_index[VLIB_RX] = tm->sw_if_index;
311 vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0;
312
313 /*
314 * Turn this on if you run into
315 * "bad monkey" contexts, and you want to know exactly
316 * which nodes they've visited...
317 */
318 if (VLIB_BUFFER_TRACE_TRAJECTORY)
319 b->pre_data[0] = 0;
320
321 b->error = node->errors[0];
322
323 if (tm->is_ether)
324 {
325 next_index = TUNTAP_RX_NEXT_ETHERNET_INPUT;
326 }
327 else
328 switch (b->data[0] & 0xf0)
329 {
330 case 0x40:
331 next_index = TUNTAP_RX_NEXT_IP4_INPUT;
332 break;
333 case 0x60:
334 next_index = TUNTAP_RX_NEXT_IP6_INPUT;
335 break;
336 default:
337 next_index = TUNTAP_RX_NEXT_DROP;
338 break;
339 }
340
341 /* The linux kernel couldn't care less if our interface is up */
342 if (tm->have_normal_interface)
343 {
344 vnet_main_t *vnm = vnet_get_main();
345 vnet_sw_interface_t * si;
346 si = vnet_get_sw_interface (vnm, tm->sw_if_index);
347 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
348 next_index = TUNTAP_RX_NEXT_DROP;
349 }
350
351 vlib_set_next_frame_buffer (vm, node, next_index, bi);
352
353 if (n_trace > 0)
354 {
355 vlib_trace_buffer (vm, node, next_index,
356 b, /* follow_chain */ 1);
357 vlib_set_trace_count (vm, node, n_trace - 1);
358 }
359 }
360
361 return 1;
362}
363
364static char * tuntap_rx_error_strings[] = {
365 "unknown packet type",
366};
367
368VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
369 .function = tuntap_rx,
370 .name = "tuntap-rx",
371 .type = VLIB_NODE_TYPE_INPUT,
372 .state = VLIB_NODE_STATE_INTERRUPT,
373 .vector_size = 4,
374 .n_errors = 1,
375 .error_strings = tuntap_rx_error_strings,
376
377 .n_next_nodes = TUNTAP_RX_N_NEXT,
378 .next_nodes = {
379 [TUNTAP_RX_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
380 [TUNTAP_RX_NEXT_IP6_INPUT] = "ip6-input",
381 [TUNTAP_RX_NEXT_DROP] = "error-drop",
382 [TUNTAP_RX_NEXT_ETHERNET_INPUT] = "ethernet-input",
383 },
384};
385
386/* Gets called when file descriptor is ready from epoll. */
387static clib_error_t * tuntap_read_ready (unix_file_t * uf)
388{
389 vlib_main_t * vm = vlib_get_main();
390 vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index);
391 return 0;
392}
393
394/*
395 * tuntap_exit
396 * Clean up the tun/tap device
397 */
398
399static clib_error_t *
400tuntap_exit (vlib_main_t * vm)
401{
402 tuntap_main_t *tm = &tuntap_main;
403 struct ifreq ifr;
404 int sfd;
405
406 /* Not present. */
407 if (! tm->dev_net_tun_fd || tm->dev_net_tun_fd < 0)
408 return 0;
409
410 sfd = socket (AF_INET, SOCK_STREAM, 0);
411 if (sfd < 0)
412 clib_unix_warning("provisioning socket");
413
414 memset(&ifr, 0, sizeof (ifr));
415 strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name)-1);
416
417 /* get flags, modify to bring down interface... */
418 if (ioctl (sfd, SIOCGIFFLAGS, &ifr) < 0)
419 clib_unix_warning ("SIOCGIFFLAGS");
420
421 ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
422
423 if (ioctl (sfd, SIOCSIFFLAGS, &ifr) < 0)
424 clib_unix_warning ("SIOCSIFFLAGS");
425
426 /* Turn off persistence */
427 if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 0) < 0)
428 clib_unix_warning ("TUNSETPERSIST");
429 close(tm->dev_tap_fd);
Dave Barachf9c231e2016-08-05 10:10:18 -0400430 if (tm->dev_net_tun_fd >= 0)
431 close(tm->dev_net_tun_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700432 close (sfd);
433
434 return 0;
435}
436
437VLIB_MAIN_LOOP_EXIT_FUNCTION (tuntap_exit);
438
439static clib_error_t *
440tuntap_config (vlib_main_t * vm, unformat_input_t * input)
441{
442 tuntap_main_t *tm = &tuntap_main;
443 clib_error_t * error = 0;
444 struct ifreq ifr;
445 u8 * name;
446 int flags = IFF_TUN | IFF_NO_PI;
447 int is_enabled = 0, is_ether = 0, have_normal_interface = 0;
Damjan Marion19010202016-03-24 17:17:47 +0100448 const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700449
450 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
451 {
452 if (unformat (input, "mtu %d", &tm->mtu_bytes))
453 ;
454 else if (unformat (input, "enable"))
455 is_enabled = 1;
456 else if (unformat (input, "disable"))
457 is_enabled = 0;
458 else if (unformat (input, "ethernet") ||
459 unformat (input, "ether"))
460 is_ether = 1;
461 else if (unformat (input, "have-normal-interface") ||
462 unformat (input, "have-normal"))
463 have_normal_interface = 1;
464 else if (unformat (input, "name %s", &name))
465 tm->tun_name = (char *) name;
466 else
467 return clib_error_return (0, "unknown input `%U'",
468 format_unformat_error, input);
469 }
470
471 tm->dev_net_tun_fd = -1;
472 tm->dev_tap_fd = -1;
473
474 if (is_enabled == 0)
475 return 0;
476
477 if (geteuid())
478 {
479 clib_warning ("tuntap disabled: must be superuser");
480 return 0;
481 }
482
483 tm->is_ether = is_ether;
484 tm->have_normal_interface = have_normal_interface;
485
486 if (is_ether)
487 flags = IFF_TAP | IFF_NO_PI;
488
489 if ((tm->dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0)
490 {
491 error = clib_error_return_unix (0, "open /dev/net/tun");
492 goto done;
493 }
494
495 memset (&ifr, 0, sizeof (ifr));
496 strncpy(ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1);
497 ifr.ifr_flags = flags;
498 if (ioctl (tm->dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0)
499 {
500 error = clib_error_return_unix (0, "ioctl TUNSETIFF");
501 goto done;
502 }
503
504 /* Make it persistent, at least until we split. */
505 if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 1) < 0)
506 {
507 error = clib_error_return_unix (0, "TUNSETPERSIST");
508 goto done;
509 }
510
511 /* Open a provisioning socket */
512 if ((tm->dev_tap_fd = socket(PF_PACKET, SOCK_RAW,
513 htons(ETH_P_ALL))) < 0 )
514 {
515 error = clib_error_return_unix (0, "socket");
516 goto done;
517 }
518
519 /* Find the interface index. */
520 {
521 struct ifreq ifr;
522 struct sockaddr_ll sll;
523
524 memset (&ifr, 0, sizeof(ifr));
525 strncpy (ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1);
526 if (ioctl (tm->dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 )
527 {
528 error = clib_error_return_unix (0, "ioctl SIOCGIFINDEX");
529 goto done;
530 }
531
532 /* Bind the provisioning socket to the interface. */
533 memset(&sll, 0, sizeof(sll));
534 sll.sll_family = AF_PACKET;
535 sll.sll_ifindex = ifr.ifr_ifindex;
536 sll.sll_protocol = htons(ETH_P_ALL);
537
538 if (bind(tm->dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0)
539 {
540 error = clib_error_return_unix (0, "bind");
541 goto done;
542 }
543 }
544
545 /* non-blocking I/O on /dev/tapX */
546 {
547 int one = 1;
548 if (ioctl (tm->dev_net_tun_fd, FIONBIO, &one) < 0)
549 {
550 error = clib_error_return_unix (0, "ioctl FIONBIO");
551 goto done;
552 }
553 }
554
555 tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size;
556
557 ifr.ifr_mtu = tm->mtu_bytes;
558 if (ioctl (tm->dev_tap_fd, SIOCSIFMTU, &ifr) < 0)
559 {
560 error = clib_error_return_unix (0, "ioctl SIOCSIFMTU");
561 goto done;
562 }
563
564 /* get flags, modify to bring up interface... */
565 if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
566 {
567 error = clib_error_return_unix (0, "ioctl SIOCGIFFLAGS");
568 goto done;
569 }
570
571 ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
572
573 if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
574 {
575 error = clib_error_return_unix (0, "ioctl SIOCSIFFLAGS");
576 goto done;
577 }
578
579 if (is_ether)
580 {
581 if (ioctl (tm->dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0)
582 {
583 error = clib_error_return_unix (0, "ioctl SIOCGIFHWADDR");
584 goto done;
585 }
586 else
Damjan Marionf1213b82016-03-13 02:22:06 +0100587 clib_memcpy (tm->ether_dst_mac, ifr.ifr_hwaddr.sa_data, 6);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700588 }
589
590 if (have_normal_interface)
591 {
592 vnet_main_t *vnm = vnet_get_main();
593 error = ethernet_register_interface
594 (vnm,
595 tuntap_dev_class.index,
596 0 /* device instance */,
597 tm->ether_dst_mac /* ethernet address */,
598 &tm->hw_if_index,
599 0 /* flag change */);
600 if (error)
601 clib_error_report (error);
602 tm->sw_if_index = tm->hw_if_index;
603 vm->os_punt_frame = tuntap_nopunt_frame;
604 }
605 else
606 {
607 vnet_main_t *vnm = vnet_get_main();
608 vnet_hw_interface_t * hi;
609
610 vm->os_punt_frame = tuntap_punt_frame;
611
612 tm->hw_if_index = vnet_register_interface
613 (vnm,
614 tuntap_dev_class.index, 0 /* device instance */,
615 tuntap_interface_class.index, 0);
616 hi = vnet_get_hw_interface (vnm, tm->hw_if_index);
617 tm->sw_if_index = hi->sw_if_index;
618
619 /* Interface is always up. */
620 vnet_hw_interface_set_flags (vnm, tm->hw_if_index,
621 VNET_HW_INTERFACE_FLAG_LINK_UP);
622 vnet_sw_interface_set_flags (vnm, tm->sw_if_index,
623 VNET_SW_INTERFACE_FLAG_ADMIN_UP);
624 }
625
626 {
627 unix_file_t template = {0};
628 template.read_function = tuntap_read_ready;
629 template.file_descriptor = tm->dev_net_tun_fd;
630 tm->unix_file_index = unix_file_add (&unix_main, &template);
631 }
632
633 done:
634 if (error)
635 {
636 if (tm->dev_net_tun_fd >= 0)
637 close (tm->dev_net_tun_fd);
638 if (tm->dev_tap_fd >= 0)
639 close (tm->dev_tap_fd);
640 }
641
642 return error;
643}
644
645VLIB_CONFIG_FUNCTION (tuntap_config, "tuntap");
646
647void
648tuntap_ip4_add_del_interface_address (ip4_main_t * im,
649 uword opaque,
650 u32 sw_if_index,
651 ip4_address_t * address,
652 u32 address_length,
653 u32 if_address_index,
654 u32 is_delete)
655{
656 tuntap_main_t * tm = &tuntap_main;
657 struct ifreq ifr;
658 subif_address_t subif_addr, * ap;
659 uword * p;
660
661 /* Tuntap disabled, or using a "normal" interface. */
662 if (tm->have_normal_interface || tm->dev_tap_fd < 0)
663 return;
664
665 /* See if we already know about this subif */
666 memset (&subif_addr, 0, sizeof (subif_addr));
667 subif_addr.sw_if_index = sw_if_index;
Damjan Marionf1213b82016-03-13 02:22:06 +0100668 clib_memcpy (&subif_addr.addr, address, sizeof (*address));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700669
670 p = mhash_get (&tm->subif_mhash, &subif_addr);
671
672 if (p)
673 ap = pool_elt_at_index (tm->subifs, p[0]);
674 else
675 {
676 pool_get (tm->subifs, ap);
677 *ap = subif_addr;
678 mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
679 }
680
681 /* Use subif pool index to select alias device. */
682 memset (&ifr, 0, sizeof (ifr));
683 snprintf (ifr.ifr_name, sizeof(ifr.ifr_name),
684 "%s:%d", tm->tun_name, (int)(ap - tm->subifs));
685
686 if (! is_delete)
687 {
688 struct sockaddr_in * sin;
689
690 sin = (struct sockaddr_in *)&ifr.ifr_addr;
691
692 /* Set ipv4 address, netmask. */
693 sin->sin_family = AF_INET;
Damjan Marionf1213b82016-03-13 02:22:06 +0100694 clib_memcpy (&sin->sin_addr.s_addr, address, 4);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700695 if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0)
696 clib_unix_warning ("ioctl SIOCSIFADDR");
697
698 sin->sin_addr.s_addr = im->fib_masks[address_length];
699 if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0)
700 clib_unix_warning ("ioctl SIOCSIFNETMASK");
701 }
702 else
703 {
704 mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */);
705 pool_put (tm->subifs, ap);
706 }
707
708 /* get flags, modify to bring up interface... */
709 if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0)
710 clib_unix_warning ("ioctl SIOCGIFFLAGS");
711
712 if (is_delete)
713 ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
714 else
715 ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
716
717 if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0)
718 clib_unix_warning ("ioctl SIOCSIFFLAGS");
719}
720
721/*
722 * $$$$ gross workaround for a known #include bug
723 * #include <linux/ipv6.h> causes multiple definitions if
724 * netinet/in.h is also included.
725 */
726struct in6_ifreq {
727 struct in6_addr ifr6_addr;
728 u32 ifr6_prefixlen;
729 int ifr6_ifindex;
730};
731
732/*
733 * Both the v6 interface address API and the way ifconfig
734 * displays subinterfaces differ from their v4 couterparts.
735 * The code given here seems to work but YMMV.
736 */
737void
738tuntap_ip6_add_del_interface_address (ip6_main_t * im,
739 uword opaque,
740 u32 sw_if_index,
741 ip6_address_t * address,
742 u32 address_length,
743 u32 if_address_index,
744 u32 is_delete)
745{
746 tuntap_main_t * tm = &tuntap_main;
747 struct ifreq ifr;
748 struct in6_ifreq ifr6;
749 subif_address_t subif_addr, * ap;
750 uword * p;
751
752 /* Tuntap disabled, or using a "normal" interface. */
753 if (tm->have_normal_interface || tm->dev_tap_fd < 0)
754 return;
755
756 /* See if we already know about this subif */
757 memset (&subif_addr, 0, sizeof (subif_addr));
758 subif_addr.sw_if_index = sw_if_index;
759 subif_addr.is_v6 = 1;
Damjan Marionf1213b82016-03-13 02:22:06 +0100760 clib_memcpy (&subif_addr.addr, address, sizeof (*address));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700761
762 p = mhash_get (&tm->subif_mhash, &subif_addr);
763
764 if (p)
765 ap = pool_elt_at_index (tm->subifs, p[0]);
766 else
767 {
768 pool_get (tm->subifs, ap);
769 *ap = subif_addr;
770 mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0);
771 }
772
773 /* Use subif pool index to select alias device. */
774 memset (&ifr, 0, sizeof (ifr));
775 memset (&ifr6, 0, sizeof (ifr6));
776 snprintf (ifr.ifr_name, sizeof(ifr.ifr_name),
777 "%s:%d", tm->tun_name, (int)(ap - tm->subifs));
778
779 if (! is_delete)
780 {
781 int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
782 if (sockfd < 0)
783 clib_unix_warning ("get ifindex socket");
784
785 if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
786 clib_unix_warning ("get ifindex");
787
788 ifr6.ifr6_ifindex = ifr.ifr_ifindex;
789 ifr6.ifr6_prefixlen = address_length;
Damjan Marionf1213b82016-03-13 02:22:06 +0100790 clib_memcpy (&ifr6.ifr6_addr, address, 16);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700791
792 if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0)
793 clib_unix_warning ("set address");
794
795 close (sockfd);
796 }
797 else
798 {
799 int sockfd = socket (AF_INET6, SOCK_STREAM, 0);
800 if (sockfd < 0)
801 clib_unix_warning ("get ifindex socket");
802
803 if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0)
804 clib_unix_warning ("get ifindex");
805
806 ifr6.ifr6_ifindex = ifr.ifr_ifindex;
807 ifr6.ifr6_prefixlen = address_length;
Damjan Marionf1213b82016-03-13 02:22:06 +0100808 clib_memcpy (&ifr6.ifr6_addr, address, 16);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700809
810 if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0)
811 clib_unix_warning ("del address");
812
Dave Barachf9c231e2016-08-05 10:10:18 -0400813 if (sockfd >= 0)
814 close (sockfd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700815
816 mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */);
817 pool_put (tm->subifs, ap);
818 }
819}
820
821static void
822tuntap_punt_frame (vlib_main_t * vm,
823 vlib_node_runtime_t * node,
824 vlib_frame_t * frame)
825{
826 tuntap_tx (vm, node, frame);
827 vlib_frame_free (vm, node, frame);
828}
829
830static void
831tuntap_nopunt_frame (vlib_main_t * vm,
832 vlib_node_runtime_t * node,
833 vlib_frame_t * frame)
834{
835 u32 * buffers = vlib_frame_args (frame);
836 uword n_packets = frame->n_vectors;
837 vlib_buffer_free (vm, buffers, n_packets);
838 vlib_frame_free (vm, node, frame);
839}
840
841VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
842 .name = "tuntap",
843};
844
845static u8 * format_tuntap_interface_name (u8 * s, va_list * args)
846{
847 u32 i = va_arg (*args, u32);
848
849 s = format (s, "tuntap-%d", i);
850 return s;
851}
852
853static uword
854tuntap_intfc_tx (vlib_main_t * vm,
855 vlib_node_runtime_t * node,
856 vlib_frame_t * frame)
857{
858 tuntap_main_t * tm = &tuntap_main;
859 u32 * buffers = vlib_frame_args (frame);
860 uword n_buffers = frame->n_vectors;
861
862 /* Normal interface transmit happens only on the normal interface... */
863 if (tm->have_normal_interface)
864 return tuntap_tx (vm, node, frame);
865
866 vlib_buffer_free (vm, buffers, n_buffers);
867 return n_buffers;
868}
869
870VNET_DEVICE_CLASS (tuntap_dev_class,static) = {
871 .name = "tuntap",
872 .tx_function = tuntap_intfc_tx,
873 .format_device_name = format_tuntap_interface_name,
874};
875
876static clib_error_t *
877tuntap_init (vlib_main_t * vm)
878{
879 clib_error_t * error;
880 ip4_main_t * im4 = &ip4_main;
881 ip6_main_t * im6 = &ip6_main;
882 ip4_add_del_interface_address_callback_t cb4;
883 ip6_add_del_interface_address_callback_t cb6;
884 tuntap_main_t * tm = &tuntap_main;
885
886 error = vlib_call_init_function (vm, ip4_init);
887 if (error)
888 return error;
889
890 mhash_init (&tm->subif_mhash, sizeof (u32), sizeof(subif_address_t));
891
892 cb4.function = tuntap_ip4_add_del_interface_address;
893 cb4.function_opaque = 0;
894 vec_add1 (im4->add_del_interface_address_callbacks, cb4);
895
896 cb6.function = tuntap_ip6_add_del_interface_address;
897 cb6.function_opaque = 0;
898 vec_add1 (im6->add_del_interface_address_callbacks, cb6);
899
900 return 0;
901}
902
903VLIB_INIT_FUNCTION (tuntap_init);