blob: 5fdc59f2a2068b637f11d8a618c190961d15d08c [file] [log] [blame]
Damjan Marion83243a02016-02-29 13:09:30 +01001/*
2 *------------------------------------------------------------------
3 * af_packet.c - linux kernel packet interface
4 *
5 * Copyright (c) 2016 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#include <linux/if_ether.h>
21#include <linux/if_packet.h>
22
23#include <vlib/vlib.h>
24#include <vlib/unix/unix.h>
25#include <vnet/ip/ip.h>
26#include <vnet/ethernet/ethernet.h>
27
28#include <vnet/devices/af_packet/af_packet.h>
29
30#define AF_PACKET_DEBUG_SOCKET 0
31
32#define AF_PACKET_TX_FRAMES_PER_BLOCK 1024
33#define AF_PACKET_TX_FRAME_SIZE (2048 * 5)
34#define AF_PACKET_TX_BLOCK_NR 1
35#define AF_PACKET_TX_FRAME_NR (AF_PACKET_TX_BLOCK_NR * \
36 AF_PACKET_TX_FRAMES_PER_BLOCK)
37#define AF_PACKET_TX_BLOCK_SIZE (AF_PACKET_TX_FRAME_SIZE * \
38 AF_PACKET_TX_FRAMES_PER_BLOCK)
39
40#define AF_PACKET_RX_FRAMES_PER_BLOCK 1024
41#define AF_PACKET_RX_FRAME_SIZE (2048 * 5)
42#define AF_PACKET_RX_BLOCK_NR 1
43#define AF_PACKET_RX_FRAME_NR (AF_PACKET_RX_BLOCK_NR * \
44 AF_PACKET_RX_FRAMES_PER_BLOCK)
45#define AF_PACKET_RX_BLOCK_SIZE (AF_PACKET_RX_FRAME_SIZE * \
46 AF_PACKET_RX_FRAMES_PER_BLOCK)
47
48#if AF_PACKET_DEBUG_SOCKET == 1
49#define DBG_SOCK(args...) clib_warning(args);
50#else
51#define DBG_SOCK(args...)
52#endif
53
54/*defined in net/if.h but clashes with dpdk headers */
Damjan Marion00a9dca2016-08-17 17:05:46 +020055unsigned int if_nametoindex (const char *ifname);
Damjan Marion83243a02016-02-29 13:09:30 +010056
57typedef struct tpacket_req tpacket_req_t;
58
59static u32
Damjan Marion00a9dca2016-08-17 17:05:46 +020060af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
61 u32 flags)
Damjan Marion83243a02016-02-29 13:09:30 +010062{
63 /* nothing for now */
64 return 0;
65}
66
Damjan Marion00a9dca2016-08-17 17:05:46 +020067static clib_error_t *
68af_packet_fd_read_ready (unix_file_t * uf)
Damjan Marion83243a02016-02-29 13:09:30 +010069{
Damjan Marion00a9dca2016-08-17 17:05:46 +020070 af_packet_main_t *apm = &af_packet_main;
Damjan Marioneb743fa2017-03-20 16:34:15 +010071 vnet_main_t *vnm = vnet_get_main ();
Damjan Marion83243a02016-02-29 13:09:30 +010072 u32 idx = uf->private_data;
Damjan Marioneb743fa2017-03-20 16:34:15 +010073 af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx);
Damjan Marion83243a02016-02-29 13:09:30 +010074
Damjan Marion00a9dca2016-08-17 17:05:46 +020075 apm->pending_input_bitmap =
76 clib_bitmap_set (apm->pending_input_bitmap, idx, 1);
Damjan Marion83243a02016-02-29 13:09:30 +010077
78 /* Schedule the rx node */
Damjan Marioneb743fa2017-03-20 16:34:15 +010079 vnet_device_input_set_interrupt_pending (vnm, apif->hw_if_index, 0);
Damjan Marion83243a02016-02-29 13:09:30 +010080
81 return 0;
82}
83
84static int
Damjan Marion00a9dca2016-08-17 17:05:46 +020085create_packet_v2_sock (u8 * name, tpacket_req_t * rx_req,
86 tpacket_req_t * tx_req, int *fd, u8 ** ring)
Damjan Marion83243a02016-02-29 13:09:30 +010087{
88 int ret, err;
89 struct sockaddr_ll sll;
90 uint host_if_index;
91 int ver = TPACKET_V2;
Damjan Marion00a9dca2016-08-17 17:05:46 +020092 socklen_t req_sz = sizeof (struct tpacket_req);
Damjan Marion83243a02016-02-29 13:09:30 +010093 u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr +
Damjan Marion00a9dca2016-08-17 17:05:46 +020094 tx_req->tp_block_size * tx_req->tp_block_nr;
Damjan Marion83243a02016-02-29 13:09:30 +010095
Damjan Marion00a9dca2016-08-17 17:05:46 +020096 host_if_index = if_nametoindex ((const char *) name);
Damjan Marion83243a02016-02-29 13:09:30 +010097
98 if (!host_if_index)
99 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200100 DBG_SOCK ("Wrong host interface name");
Damjan Marion83243a02016-02-29 13:09:30 +0100101 ret = VNET_API_ERROR_INVALID_INTERFACE;
102 goto error;
103 }
104
Damjan Marion00a9dca2016-08-17 17:05:46 +0200105 if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100106 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200107 DBG_SOCK ("Failed to create socket");
Damjan Marion83243a02016-02-29 13:09:30 +0100108 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
109 goto error;
110 }
111
Damjan Marion00a9dca2016-08-17 17:05:46 +0200112 if ((err =
113 setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100114 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200115 DBG_SOCK ("Failed to set rx packet interface version");
Damjan Marion83243a02016-02-29 13:09:30 +0100116 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
117 goto error;
118 }
119
120 int opt = 1;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200121 if ((err =
122 setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100123 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200124 DBG_SOCK ("Failed to set packet tx ring error handling option");
Damjan Marion83243a02016-02-29 13:09:30 +0100125 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
126 goto error;
127 }
128
Damjan Marion00a9dca2016-08-17 17:05:46 +0200129 if ((err =
130 setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz)) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100131 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200132 DBG_SOCK ("Failed to set packet rx ring options");
Damjan Marion83243a02016-02-29 13:09:30 +0100133 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
134 goto error;
135 }
136
Damjan Marion00a9dca2016-08-17 17:05:46 +0200137 if ((err =
138 setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz)) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100139 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200140 DBG_SOCK ("Failed to set packet rx ring options");
Damjan Marion83243a02016-02-29 13:09:30 +0100141 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
142 goto error;
143 }
144
Damjan Marion00a9dca2016-08-17 17:05:46 +0200145 *ring =
146 mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, *fd,
147 0);
Damjan Marion83243a02016-02-29 13:09:30 +0100148 if (*ring == MAP_FAILED)
149 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200150 DBG_SOCK ("mmap failure");
Damjan Marion83243a02016-02-29 13:09:30 +0100151 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
152 goto error;
153 }
154
Damjan Marion00a9dca2016-08-17 17:05:46 +0200155 memset (&sll, 0, sizeof (sll));
Damjan Marion83243a02016-02-29 13:09:30 +0100156 sll.sll_family = PF_PACKET;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200157 sll.sll_protocol = htons (ETH_P_ALL);
Damjan Marion83243a02016-02-29 13:09:30 +0100158 sll.sll_ifindex = host_if_index;
159
Damjan Marion00a9dca2016-08-17 17:05:46 +0200160 if ((err = bind (*fd, (struct sockaddr *) &sll, sizeof (sll))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100161 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200162 DBG_SOCK ("Failed to bind rx packet socket (error %d)", err);
Damjan Marion83243a02016-02-29 13:09:30 +0100163 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
164 goto error;
165 }
166
167 return 0;
168error:
Dave Barach16ad6ae2016-07-28 17:55:30 -0400169 if (*fd >= 0)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200170 close (*fd);
Peter Leidba76f22016-04-08 08:16:31 -0700171 *fd = -1;
Damjan Marion83243a02016-02-29 13:09:30 +0100172 return ret;
173}
174
175int
Damjan Marion00a9dca2016-08-17 17:05:46 +0200176af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
177 u32 * sw_if_index)
Damjan Marion83243a02016-02-29 13:09:30 +0100178{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200179 af_packet_main_t *apm = &af_packet_main;
Damjan Marion83243a02016-02-29 13:09:30 +0100180 int ret, fd = -1;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200181 struct tpacket_req *rx_req = 0;
182 struct tpacket_req *tx_req = 0;
183 u8 *ring = 0;
184 af_packet_if_t *apif = 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100185 u8 hw_addr[6];
Damjan Marion00a9dca2016-08-17 17:05:46 +0200186 clib_error_t *error;
187 vnet_sw_interface_t *sw;
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100188 vlib_thread_main_t *tm = vlib_get_thread_main ();
Damjan Marion00a9dca2016-08-17 17:05:46 +0200189 vnet_main_t *vnm = vnet_get_main ();
190 uword *p;
Damjan Marion83243a02016-02-29 13:09:30 +0100191 uword if_index;
Ivan Kellybfe737a2016-10-07 18:02:43 +0200192 u8 *host_if_name_dup = vec_dup (host_if_name);
Damjan Marion83243a02016-02-29 13:09:30 +0100193
194 p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
195 if (p)
196 {
197 return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
198 }
199
Damjan Marion00a9dca2016-08-17 17:05:46 +0200200 vec_validate (rx_req, 0);
Damjan Marion83243a02016-02-29 13:09:30 +0100201 rx_req->tp_block_size = AF_PACKET_RX_BLOCK_SIZE;
202 rx_req->tp_frame_size = AF_PACKET_RX_FRAME_SIZE;
203 rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
204 rx_req->tp_frame_nr = AF_PACKET_RX_FRAME_NR;
205
Damjan Marion00a9dca2016-08-17 17:05:46 +0200206 vec_validate (tx_req, 0);
Damjan Marion83243a02016-02-29 13:09:30 +0100207 tx_req->tp_block_size = AF_PACKET_TX_BLOCK_SIZE;
208 tx_req->tp_frame_size = AF_PACKET_TX_FRAME_SIZE;
209 tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
210 tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR;
211
Damjan Marion00a9dca2016-08-17 17:05:46 +0200212 ret = create_packet_v2_sock (host_if_name, rx_req, tx_req, &fd, &ring);
Damjan Marion83243a02016-02-29 13:09:30 +0100213
214 if (ret != 0)
215 goto error;
216
217 /* So far everything looks good, let's create interface */
Damjan Marion048ee2e2016-03-16 22:59:21 +0100218 pool_get (apm->interfaces, apif);
Damjan Marion83243a02016-02-29 13:09:30 +0100219 if_index = apif - apm->interfaces;
220
221 apif->fd = fd;
222 apif->rx_ring = ring;
223 apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr;
224 apif->rx_req = rx_req;
225 apif->tx_req = tx_req;
Ivan Kellybfe737a2016-10-07 18:02:43 +0200226 apif->host_if_name = host_if_name_dup;
Dave Barach13f3c452016-03-29 11:56:41 -0400227 apif->per_interface_next_index = ~0;
Peter Leidba76f22016-04-08 08:16:31 -0700228 apif->next_tx_frame = 0;
229 apif->next_rx_frame = 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100230
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100231 if (tm->n_vlib_mains > 1)
232 {
233 apif->lockp = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
234 CLIB_CACHE_LINE_BYTES);
235 memset ((void *) apif->lockp, 0, CLIB_CACHE_LINE_BYTES);
236 }
237
Damjan Marion83243a02016-02-29 13:09:30 +0100238 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200239 unix_file_t template = { 0 };
Damjan Marion83243a02016-02-29 13:09:30 +0100240 template.read_function = af_packet_fd_read_ready;
241 template.file_descriptor = fd;
242 template.private_data = if_index;
243 template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
244 apif->unix_file_index = unix_file_add (&unix_main, &template);
245 }
246
247 /*use configured or generate random MAC address */
248 if (hw_addr_set)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200249 clib_memcpy (hw_addr, hw_addr_set, 6);
Damjan Marion83243a02016-02-29 13:09:30 +0100250 else
251 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200252 f64 now = vlib_time_now (vm);
Damjan Marion83243a02016-02-29 13:09:30 +0100253 u32 rnd;
254 rnd = (u32) (now * 1e6);
255 rnd = random_u32 (&rnd);
256
Damjan Marion00a9dca2016-08-17 17:05:46 +0200257 clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
Damjan Marion83243a02016-02-29 13:09:30 +0100258 hw_addr[0] = 2;
259 hw_addr[1] = 0xfe;
260 }
261
Damjan Marion00a9dca2016-08-17 17:05:46 +0200262 error = ethernet_register_interface (vnm, af_packet_device_class.index,
263 if_index, hw_addr, &apif->hw_if_index,
264 af_packet_eth_flag_change);
Damjan Marion83243a02016-02-29 13:09:30 +0100265
266 if (error)
267 {
Damjan Marion00a9dca2016-08-17 17:05:46 +0200268 memset (apif, 0, sizeof (*apif));
269 pool_put (apm->interfaces, apif);
Damjan Marion83243a02016-02-29 13:09:30 +0100270 clib_error_report (error);
271 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
272 goto error;
273 }
274
275 sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
276 apif->sw_if_index = sw->sw_if_index;
Damjan Marioneb743fa2017-03-20 16:34:15 +0100277 vnet_set_device_input_node (apif->hw_if_index, af_packet_input_node.index);
278 vnet_device_input_assign_thread (apif->hw_if_index, 0, /* queue */
279 ~0 /* any cpu */ );
Damjan Marion83243a02016-02-29 13:09:30 +0100280
281 vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
282 VNET_HW_INTERFACE_FLAG_LINK_UP);
283
Ivan Kellybfe737a2016-10-07 18:02:43 +0200284 mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
285 0);
Pierre Pfister78ea9c22016-05-23 12:51:54 +0100286 if (sw_if_index)
287 *sw_if_index = apif->sw_if_index;
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100288
Damjan Marion83243a02016-02-29 13:09:30 +0100289 return 0;
290
291error:
Ivan Kellybfe737a2016-10-07 18:02:43 +0200292 vec_free (host_if_name_dup);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200293 vec_free (rx_req);
294 vec_free (tx_req);
Damjan Marion83243a02016-02-29 13:09:30 +0100295 return ret;
296}
297
Peter Leidba76f22016-04-08 08:16:31 -0700298int
Damjan Marion00a9dca2016-08-17 17:05:46 +0200299af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name)
Peter Leidba76f22016-04-08 08:16:31 -0700300{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200301 vnet_main_t *vnm = vnet_get_main ();
Peter Leidba76f22016-04-08 08:16:31 -0700302 af_packet_main_t *apm = &af_packet_main;
303 af_packet_if_t *apif;
304 uword *p;
305 uword if_index;
306 u32 ring_sz;
307
Damjan Marion00a9dca2016-08-17 17:05:46 +0200308 p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
309 if (p == NULL)
310 {
311 clib_warning ("Host interface %s does not exist", host_if_name);
312 return VNET_API_ERROR_SYSCALL_ERROR_1;
313 }
314 apif = pool_elt_at_index (apm->interfaces, p[0]);
Peter Leidba76f22016-04-08 08:16:31 -0700315 if_index = apif - apm->interfaces;
316
317 /* bring down the interface */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200318 vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
Peter Leidba76f22016-04-08 08:16:31 -0700319
320 /* clean up */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200321 if (apif->unix_file_index != ~0)
322 {
323 unix_file_del (&unix_main, unix_main.file_pool + apif->unix_file_index);
324 apif->unix_file_index = ~0;
325 }
Eyal Barif298ecf2016-09-19 18:47:39 +0300326 else
327 close (apif->fd);
328
Peter Leidba76f22016-04-08 08:16:31 -0700329 ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr +
Damjan Marion00a9dca2016-08-17 17:05:46 +0200330 apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr;
331 if (munmap (apif->rx_ring, ring_sz))
332 clib_warning ("Host interface %s could not free rx/tx ring",
333 host_if_name);
Peter Leidba76f22016-04-08 08:16:31 -0700334 apif->rx_ring = NULL;
335 apif->tx_ring = NULL;
Peter Leidba76f22016-04-08 08:16:31 -0700336 apif->fd = -1;
337
Damjan Marion00a9dca2016-08-17 17:05:46 +0200338 vec_free (apif->rx_req);
Peter Leidba76f22016-04-08 08:16:31 -0700339 apif->rx_req = NULL;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200340 vec_free (apif->tx_req);
Peter Leidba76f22016-04-08 08:16:31 -0700341 apif->tx_req = NULL;
342
Damjan Marion00a9dca2016-08-17 17:05:46 +0200343 vec_free (apif->host_if_name);
Peter Leidba76f22016-04-08 08:16:31 -0700344 apif->host_if_name = NULL;
345
Damjan Marion00a9dca2016-08-17 17:05:46 +0200346 mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index);
Peter Leidba76f22016-04-08 08:16:31 -0700347
Damjan Marion00a9dca2016-08-17 17:05:46 +0200348 ethernet_delete_interface (vnm, apif->hw_if_index);
Peter Leidba76f22016-04-08 08:16:31 -0700349
Damjan Marion00a9dca2016-08-17 17:05:46 +0200350 pool_put (apm->interfaces, apif);
Peter Leidba76f22016-04-08 08:16:31 -0700351
352 return 0;
353}
354
Damjan Marion83243a02016-02-29 13:09:30 +0100355static clib_error_t *
356af_packet_init (vlib_main_t * vm)
357{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200358 af_packet_main_t *apm = &af_packet_main;
Damjan Marion553f6bd2016-09-07 11:54:22 +0200359 vlib_thread_main_t *tm = vlib_get_thread_main ();
Damjan Marion83243a02016-02-29 13:09:30 +0100360
361 memset (apm, 0, sizeof (af_packet_main_t));
362
363 mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
364
Damjan Marion553f6bd2016-09-07 11:54:22 +0200365 vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
366 CLIB_CACHE_LINE_BYTES);
367
Damjan Marion83243a02016-02-29 13:09:30 +0100368 return 0;
369}
370
371VLIB_INIT_FUNCTION (af_packet_init);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200372
373/*
374 * fd.io coding-style-patch-verification: ON
375 *
376 * Local Variables:
377 * eval: (c-set-style "gnu")
378 * End:
379 */