blob: 7464d4e629da007b0df221cba569cc5d200bfcb4 [file] [log] [blame]
Damjan Marion83243a02016-02-29 13:09:30 +01001/*
2 *------------------------------------------------------------------
3 * af_packet.c - linux kernel packet interface
4 *
5 * Copyright (c) 2016 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#include <linux/if_ether.h>
21#include <linux/if_packet.h>
22
23#include <vlib/vlib.h>
24#include <vlib/unix/unix.h>
25#include <vnet/ip/ip.h>
26#include <vnet/ethernet/ethernet.h>
27
28#include <vnet/devices/af_packet/af_packet.h>
29
30#define AF_PACKET_DEBUG_SOCKET 0
31
32#define AF_PACKET_TX_FRAMES_PER_BLOCK 1024
33#define AF_PACKET_TX_FRAME_SIZE (2048 * 5)
34#define AF_PACKET_TX_BLOCK_NR 1
35#define AF_PACKET_TX_FRAME_NR (AF_PACKET_TX_BLOCK_NR * \
36 AF_PACKET_TX_FRAMES_PER_BLOCK)
37#define AF_PACKET_TX_BLOCK_SIZE (AF_PACKET_TX_FRAME_SIZE * \
38 AF_PACKET_TX_FRAMES_PER_BLOCK)
39
40#define AF_PACKET_RX_FRAMES_PER_BLOCK 1024
41#define AF_PACKET_RX_FRAME_SIZE (2048 * 5)
42#define AF_PACKET_RX_BLOCK_NR 1
43#define AF_PACKET_RX_FRAME_NR (AF_PACKET_RX_BLOCK_NR * \
44 AF_PACKET_RX_FRAMES_PER_BLOCK)
45#define AF_PACKET_RX_BLOCK_SIZE (AF_PACKET_RX_FRAME_SIZE * \
46 AF_PACKET_RX_FRAMES_PER_BLOCK)
47
48#if AF_PACKET_DEBUG_SOCKET == 1
49#define DBG_SOCK(args...) clib_warning(args);
50#else
51#define DBG_SOCK(args...)
52#endif
53
54/*defined in net/if.h but clashes with dpdk headers */
Damjan Marion00a9dca2016-08-17 17:05:46 +020055unsigned int if_nametoindex (const char *ifname);
Damjan Marion83243a02016-02-29 13:09:30 +010056
57typedef struct tpacket_req tpacket_req_t;
58
59static u32
Damjan Marion00a9dca2016-08-17 17:05:46 +020060af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
61 u32 flags)
Damjan Marion83243a02016-02-29 13:09:30 +010062{
63 /* nothing for now */
64 return 0;
65}
66
Damjan Marion00a9dca2016-08-17 17:05:46 +020067static clib_error_t *
68af_packet_fd_read_ready (unix_file_t * uf)
Damjan Marion83243a02016-02-29 13:09:30 +010069{
Damjan Marion00a9dca2016-08-17 17:05:46 +020070 af_packet_main_t *apm = &af_packet_main;
Damjan Marioneb743fa2017-03-20 16:34:15 +010071 vnet_main_t *vnm = vnet_get_main ();
Damjan Marion83243a02016-02-29 13:09:30 +010072 u32 idx = uf->private_data;
Damjan Marioneb743fa2017-03-20 16:34:15 +010073 af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx);
Damjan Marion83243a02016-02-29 13:09:30 +010074
Damjan Marion00a9dca2016-08-17 17:05:46 +020075 apm->pending_input_bitmap =
76 clib_bitmap_set (apm->pending_input_bitmap, idx, 1);
Damjan Marion83243a02016-02-29 13:09:30 +010077
78 /* Schedule the rx node */
Damjan Marioneb743fa2017-03-20 16:34:15 +010079 vnet_device_input_set_interrupt_pending (vnm, apif->hw_if_index, 0);
Damjan Marion83243a02016-02-29 13:09:30 +010080
81 return 0;
82}
83
84static int
Damjan Marion00a9dca2016-08-17 17:05:46 +020085create_packet_v2_sock (u8 * name, tpacket_req_t * rx_req,
86 tpacket_req_t * tx_req, int *fd, u8 ** ring)
Damjan Marion83243a02016-02-29 13:09:30 +010087{
88 int ret, err;
89 struct sockaddr_ll sll;
90 uint host_if_index;
91 int ver = TPACKET_V2;
Damjan Marion00a9dca2016-08-17 17:05:46 +020092 socklen_t req_sz = sizeof (struct tpacket_req);
Damjan Marion83243a02016-02-29 13:09:30 +010093 u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr +
Damjan Marion00a9dca2016-08-17 17:05:46 +020094 tx_req->tp_block_size * tx_req->tp_block_nr;
Damjan Marion83243a02016-02-29 13:09:30 +010095
Damjan Marion00a9dca2016-08-17 17:05:46 +020096 host_if_index = if_nametoindex ((const char *) name);
Damjan Marion83243a02016-02-29 13:09:30 +010097
98 if (!host_if_index)
99 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200100 DBG_SOCK ("Wrong host interface name");
Damjan Marion83243a02016-02-29 13:09:30 +0100101 ret = VNET_API_ERROR_INVALID_INTERFACE;
102 goto error;
103 }
104
Damjan Marion00a9dca2016-08-17 17:05:46 +0200105 if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100106 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200107 DBG_SOCK ("Failed to create socket");
Damjan Marion83243a02016-02-29 13:09:30 +0100108 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
109 goto error;
110 }
111
Damjan Marion00a9dca2016-08-17 17:05:46 +0200112 if ((err =
113 setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100114 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200115 DBG_SOCK ("Failed to set rx packet interface version");
Damjan Marion83243a02016-02-29 13:09:30 +0100116 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
117 goto error;
118 }
119
120 int opt = 1;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200121 if ((err =
122 setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100123 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200124 DBG_SOCK ("Failed to set packet tx ring error handling option");
Damjan Marion83243a02016-02-29 13:09:30 +0100125 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
126 goto error;
127 }
128
Damjan Marion00a9dca2016-08-17 17:05:46 +0200129 if ((err =
130 setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz)) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100131 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200132 DBG_SOCK ("Failed to set packet rx ring options");
Damjan Marion83243a02016-02-29 13:09:30 +0100133 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
134 goto error;
135 }
136
Damjan Marion00a9dca2016-08-17 17:05:46 +0200137 if ((err =
138 setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz)) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100139 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200140 DBG_SOCK ("Failed to set packet rx ring options");
Damjan Marion83243a02016-02-29 13:09:30 +0100141 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
142 goto error;
143 }
144
Damjan Marion00a9dca2016-08-17 17:05:46 +0200145 *ring =
146 mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, *fd,
147 0);
Damjan Marion83243a02016-02-29 13:09:30 +0100148 if (*ring == MAP_FAILED)
149 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200150 DBG_SOCK ("mmap failure");
Damjan Marion83243a02016-02-29 13:09:30 +0100151 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
152 goto error;
153 }
154
Damjan Marion00a9dca2016-08-17 17:05:46 +0200155 memset (&sll, 0, sizeof (sll));
Damjan Marion83243a02016-02-29 13:09:30 +0100156 sll.sll_family = PF_PACKET;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200157 sll.sll_protocol = htons (ETH_P_ALL);
Damjan Marion83243a02016-02-29 13:09:30 +0100158 sll.sll_ifindex = host_if_index;
159
Damjan Marion00a9dca2016-08-17 17:05:46 +0200160 if ((err = bind (*fd, (struct sockaddr *) &sll, sizeof (sll))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100161 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200162 DBG_SOCK ("Failed to bind rx packet socket (error %d)", err);
Damjan Marion83243a02016-02-29 13:09:30 +0100163 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
164 goto error;
165 }
166
167 return 0;
168error:
Dave Barach16ad6ae2016-07-28 17:55:30 -0400169 if (*fd >= 0)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200170 close (*fd);
Peter Leidba76f22016-04-08 08:16:31 -0700171 *fd = -1;
Damjan Marion83243a02016-02-29 13:09:30 +0100172 return ret;
173}
174
175int
Damjan Marion00a9dca2016-08-17 17:05:46 +0200176af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
177 u32 * sw_if_index)
Damjan Marion83243a02016-02-29 13:09:30 +0100178{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200179 af_packet_main_t *apm = &af_packet_main;
Damjan Marion83243a02016-02-29 13:09:30 +0100180 int ret, fd = -1;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200181 struct tpacket_req *rx_req = 0;
182 struct tpacket_req *tx_req = 0;
183 u8 *ring = 0;
184 af_packet_if_t *apif = 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100185 u8 hw_addr[6];
Damjan Marion00a9dca2016-08-17 17:05:46 +0200186 clib_error_t *error;
187 vnet_sw_interface_t *sw;
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100188 vlib_thread_main_t *tm = vlib_get_thread_main ();
Damjan Marion00a9dca2016-08-17 17:05:46 +0200189 vnet_main_t *vnm = vnet_get_main ();
190 uword *p;
Damjan Marion83243a02016-02-29 13:09:30 +0100191 uword if_index;
Ivan Kellybfe737a2016-10-07 18:02:43 +0200192 u8 *host_if_name_dup = vec_dup (host_if_name);
Damjan Marion83243a02016-02-29 13:09:30 +0100193
194 p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
195 if (p)
196 {
197 return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
198 }
199
Damjan Marion00a9dca2016-08-17 17:05:46 +0200200 vec_validate (rx_req, 0);
Damjan Marion83243a02016-02-29 13:09:30 +0100201 rx_req->tp_block_size = AF_PACKET_RX_BLOCK_SIZE;
202 rx_req->tp_frame_size = AF_PACKET_RX_FRAME_SIZE;
203 rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
204 rx_req->tp_frame_nr = AF_PACKET_RX_FRAME_NR;
205
Damjan Marion00a9dca2016-08-17 17:05:46 +0200206 vec_validate (tx_req, 0);
Damjan Marion83243a02016-02-29 13:09:30 +0100207 tx_req->tp_block_size = AF_PACKET_TX_BLOCK_SIZE;
208 tx_req->tp_frame_size = AF_PACKET_TX_FRAME_SIZE;
209 tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
210 tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR;
211
Damjan Marion00a9dca2016-08-17 17:05:46 +0200212 ret = create_packet_v2_sock (host_if_name, rx_req, tx_req, &fd, &ring);
Damjan Marion83243a02016-02-29 13:09:30 +0100213
214 if (ret != 0)
215 goto error;
216
217 /* So far everything looks good, let's create interface */
Damjan Marion048ee2e2016-03-16 22:59:21 +0100218 pool_get (apm->interfaces, apif);
Damjan Marion83243a02016-02-29 13:09:30 +0100219 if_index = apif - apm->interfaces;
220
221 apif->fd = fd;
222 apif->rx_ring = ring;
223 apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr;
224 apif->rx_req = rx_req;
225 apif->tx_req = tx_req;
Ivan Kellybfe737a2016-10-07 18:02:43 +0200226 apif->host_if_name = host_if_name_dup;
Dave Barach13f3c452016-03-29 11:56:41 -0400227 apif->per_interface_next_index = ~0;
Peter Leidba76f22016-04-08 08:16:31 -0700228 apif->next_tx_frame = 0;
229 apif->next_rx_frame = 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100230
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100231 if (tm->n_vlib_mains > 1)
Damjan Marion1927da22017-03-27 17:08:20 +0200232 clib_spinlock_init (&apif->lockp);
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100233
Damjan Marion83243a02016-02-29 13:09:30 +0100234 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200235 unix_file_t template = { 0 };
Damjan Marion83243a02016-02-29 13:09:30 +0100236 template.read_function = af_packet_fd_read_ready;
237 template.file_descriptor = fd;
238 template.private_data = if_index;
239 template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
240 apif->unix_file_index = unix_file_add (&unix_main, &template);
241 }
242
243 /*use configured or generate random MAC address */
244 if (hw_addr_set)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200245 clib_memcpy (hw_addr, hw_addr_set, 6);
Damjan Marion83243a02016-02-29 13:09:30 +0100246 else
247 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200248 f64 now = vlib_time_now (vm);
Damjan Marion83243a02016-02-29 13:09:30 +0100249 u32 rnd;
250 rnd = (u32) (now * 1e6);
251 rnd = random_u32 (&rnd);
252
Damjan Marion00a9dca2016-08-17 17:05:46 +0200253 clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
Damjan Marion83243a02016-02-29 13:09:30 +0100254 hw_addr[0] = 2;
255 hw_addr[1] = 0xfe;
256 }
257
Damjan Marion00a9dca2016-08-17 17:05:46 +0200258 error = ethernet_register_interface (vnm, af_packet_device_class.index,
259 if_index, hw_addr, &apif->hw_if_index,
260 af_packet_eth_flag_change);
Damjan Marion83243a02016-02-29 13:09:30 +0100261
262 if (error)
263 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200264 memset (apif, 0, sizeof (*apif));
265 pool_put (apm->interfaces, apif);
Damjan Marion83243a02016-02-29 13:09:30 +0100266 clib_error_report (error);
267 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
268 goto error;
269 }
270
271 sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
272 apif->sw_if_index = sw->sw_if_index;
Damjan Marion153646e2017-04-05 18:15:45 +0200273 vnet_set_device_input_node (vnm, apif->hw_if_index,
274 af_packet_input_node.index);
275 vnet_device_input_assign_thread (vnm, apif->hw_if_index, 0, /* queue */
Damjan Marioneb743fa2017-03-20 16:34:15 +0100276 ~0 /* any cpu */ );
Damjan Marion153646e2017-04-05 18:15:45 +0200277 vnet_device_input_set_mode (vnm, apif->hw_if_index, 0,
278 VNET_DEVICE_INPUT_MODE_INTERRUPT);
Damjan Marion83243a02016-02-29 13:09:30 +0100279
280 vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
281 VNET_HW_INTERFACE_FLAG_LINK_UP);
282
Ivan Kellybfe737a2016-10-07 18:02:43 +0200283 mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
284 0);
Pierre Pfister78ea9c22016-05-23 12:51:54 +0100285 if (sw_if_index)
286 *sw_if_index = apif->sw_if_index;
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100287
Damjan Marion83243a02016-02-29 13:09:30 +0100288 return 0;
289
290error:
Ivan Kellybfe737a2016-10-07 18:02:43 +0200291 vec_free (host_if_name_dup);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200292 vec_free (rx_req);
293 vec_free (tx_req);
Damjan Marion83243a02016-02-29 13:09:30 +0100294 return ret;
295}
296
Peter Leidba76f22016-04-08 08:16:31 -0700297int
Damjan Marion00a9dca2016-08-17 17:05:46 +0200298af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name)
Peter Leidba76f22016-04-08 08:16:31 -0700299{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200300 vnet_main_t *vnm = vnet_get_main ();
Peter Leidba76f22016-04-08 08:16:31 -0700301 af_packet_main_t *apm = &af_packet_main;
302 af_packet_if_t *apif;
303 uword *p;
304 uword if_index;
305 u32 ring_sz;
306
Damjan Marion00a9dca2016-08-17 17:05:46 +0200307 p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
308 if (p == NULL)
309 {
310 clib_warning ("Host interface %s does not exist", host_if_name);
311 return VNET_API_ERROR_SYSCALL_ERROR_1;
312 }
313 apif = pool_elt_at_index (apm->interfaces, p[0]);
Peter Leidba76f22016-04-08 08:16:31 -0700314 if_index = apif - apm->interfaces;
315
316 /* bring down the interface */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200317 vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
Peter Leidba76f22016-04-08 08:16:31 -0700318
319 /* clean up */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200320 if (apif->unix_file_index != ~0)
321 {
322 unix_file_del (&unix_main, unix_main.file_pool + apif->unix_file_index);
323 apif->unix_file_index = ~0;
324 }
Eyal Barif298ecf2016-09-19 18:47:39 +0300325 else
326 close (apif->fd);
327
Peter Leidba76f22016-04-08 08:16:31 -0700328 ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr +
Damjan Marion00a9dca2016-08-17 17:05:46 +0200329 apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr;
330 if (munmap (apif->rx_ring, ring_sz))
331 clib_warning ("Host interface %s could not free rx/tx ring",
332 host_if_name);
Peter Leidba76f22016-04-08 08:16:31 -0700333 apif->rx_ring = NULL;
334 apif->tx_ring = NULL;
Peter Leidba76f22016-04-08 08:16:31 -0700335 apif->fd = -1;
336
Damjan Marion00a9dca2016-08-17 17:05:46 +0200337 vec_free (apif->rx_req);
Peter Leidba76f22016-04-08 08:16:31 -0700338 apif->rx_req = NULL;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200339 vec_free (apif->tx_req);
Peter Leidba76f22016-04-08 08:16:31 -0700340 apif->tx_req = NULL;
341
Damjan Marion00a9dca2016-08-17 17:05:46 +0200342 vec_free (apif->host_if_name);
Peter Leidba76f22016-04-08 08:16:31 -0700343 apif->host_if_name = NULL;
344
Damjan Marion00a9dca2016-08-17 17:05:46 +0200345 mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index);
Peter Leidba76f22016-04-08 08:16:31 -0700346
Damjan Marion00a9dca2016-08-17 17:05:46 +0200347 ethernet_delete_interface (vnm, apif->hw_if_index);
Peter Leidba76f22016-04-08 08:16:31 -0700348
Damjan Marion00a9dca2016-08-17 17:05:46 +0200349 pool_put (apm->interfaces, apif);
Peter Leidba76f22016-04-08 08:16:31 -0700350
351 return 0;
352}
353
Damjan Marion83243a02016-02-29 13:09:30 +0100354static clib_error_t *
355af_packet_init (vlib_main_t * vm)
356{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200357 af_packet_main_t *apm = &af_packet_main;
Damjan Marion553f6bd2016-09-07 11:54:22 +0200358 vlib_thread_main_t *tm = vlib_get_thread_main ();
Damjan Marion83243a02016-02-29 13:09:30 +0100359
360 memset (apm, 0, sizeof (af_packet_main_t));
361
362 mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
363
Damjan Marion553f6bd2016-09-07 11:54:22 +0200364 vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
365 CLIB_CACHE_LINE_BYTES);
366
Damjan Marion83243a02016-02-29 13:09:30 +0100367 return 0;
368}
369
370VLIB_INIT_FUNCTION (af_packet_init);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200371
372/*
373 * fd.io coding-style-patch-verification: ON
374 *
375 * Local Variables:
376 * eval: (c-set-style "gnu")
377 * End:
378 */