blob: ac6fc05a627bdb6127595f4751c8847a37006735 [file] [log] [blame]
Damjan Marion83243a02016-02-29 13:09:30 +01001/*
2 *------------------------------------------------------------------
3 * af_packet.c - linux kernel packet interface
4 *
5 * Copyright (c) 2016 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#include <linux/if_ether.h>
21#include <linux/if_packet.h>
Mohsin Kazmia50a14c2018-04-25 15:58:05 +020022#include <sys/ioctl.h>
23#include <net/if.h>
Ray Kinsellac855b732017-04-21 12:24:43 +010024#include <dirent.h>
Ray Kinsella7bfa1192017-05-15 11:52:43 +010025#include <sys/stat.h>
26#include <sys/types.h>
27#include <fcntl.h>
Damjan Marion83243a02016-02-29 13:09:30 +010028
Damjan Marion01914ce2017-09-14 19:04:50 +020029#include <vppinfra/linux/sysfs.h>
Damjan Marion83243a02016-02-29 13:09:30 +010030#include <vlib/vlib.h>
31#include <vlib/unix/unix.h>
32#include <vnet/ip/ip.h>
Aloys Augustine39376e2021-03-29 22:08:09 +020033#include <vnet/devices/netlink.h>
Damjan Marion83243a02016-02-29 13:09:30 +010034#include <vnet/ethernet/ethernet.h>
Mohammed Hawari85c19432020-12-18 16:29:45 +010035#include <vnet/interface/rx_queue_funcs.h>
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +000036#include <vnet/interface/tx_queue_funcs.h>
Damjan Marion83243a02016-02-29 13:09:30 +010037
38#include <vnet/devices/af_packet/af_packet.h>
39
Dave Wallace71612d62017-10-24 01:32:41 -040040af_packet_main_t af_packet_main;
41
Mohsin Kazmicae84fa2021-10-08 15:10:49 +000042VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = {
43 .name = "af-packet-ip-device",
44 .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
45};
46
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +020047#define AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK 1024
Mohsin Kazmic1fd17b2022-03-22 21:40:04 +000048#define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 33) // GSO packet of 64KB
Damjan Marion83243a02016-02-29 13:09:30 +010049#define AF_PACKET_TX_BLOCK_NR 1
Damjan Marion83243a02016-02-29 13:09:30 +010050
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +000051#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 32
Mohsin Kazmi219cbcb2022-03-18 16:58:31 +000052#define AF_PACKET_DEFAULT_RX_FRAME_SIZE 2048
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +000053#define AF_PACKET_RX_BLOCK_NR 160
Damjan Marion83243a02016-02-29 13:09:30 +010054
Damjan Marion83243a02016-02-29 13:09:30 +010055/*defined in net/if.h but clashes with dpdk headers */
Damjan Marion00a9dca2016-08-17 17:05:46 +020056unsigned int if_nametoindex (const char *ifname);
Damjan Marion83243a02016-02-29 13:09:30 +010057
Damjan Marion88a9c0e2022-01-06 21:14:08 +010058static clib_error_t *
Damjan Marion1cd0e5d2022-01-17 14:49:17 +010059af_packet_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
60 u32 frame_size)
Damjan Marion83243a02016-02-29 13:09:30 +010061{
Damjan Marion88a9c0e2022-01-06 21:14:08 +010062 clib_error_t *error, *rv;
Ray Kinsella7bfa1192017-05-15 11:52:43 +010063 af_packet_main_t *apm = &af_packet_main;
Damjan Marion88a9c0e2022-01-06 21:14:08 +010064 af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, hi->dev_instance);
Ray Kinsella7bfa1192017-05-15 11:52:43 +010065
Damjan Marion1cd0e5d2022-01-17 14:49:17 +010066 error = vnet_netlink_set_link_mtu (apif->host_if_index,
67 frame_size + hi->frame_overhead);
Damjan Marion88a9c0e2022-01-06 21:14:08 +010068
69 if (error)
Ray Kinsella7bfa1192017-05-15 11:52:43 +010070 {
Damjan Marion88a9c0e2022-01-06 21:14:08 +010071 vlib_log_err (apm->log_class, "netlink failed to change MTU: %U",
72 format_clib_error, error);
73 rv = vnet_error (VNET_ERR_SYSCALL_ERROR_1, "netlink error: %U",
74 format_clib_error, error);
75 clib_error_free (error);
76 return rv;
Ray Kinsella7bfa1192017-05-15 11:52:43 +010077 }
Damjan Marion88a9c0e2022-01-06 21:14:08 +010078 else
Damjan Marion1cd0e5d2022-01-17 14:49:17 +010079 apif->host_mtu = frame_size + hi->frame_overhead;
Damjan Marion83243a02016-02-29 13:09:30 +010080 return 0;
81}
82
Nathan Skrzypczakffc6bdc2021-02-01 17:13:59 +010083static int
84af_packet_read_mtu (af_packet_if_t *apif)
85{
86 af_packet_main_t *apm = &af_packet_main;
87 clib_error_t *error;
Aloys Augustine39376e2021-03-29 22:08:09 +020088 error = vnet_netlink_get_link_mtu (apif->host_if_index, &apif->host_mtu);
Nathan Skrzypczakffc6bdc2021-02-01 17:13:59 +010089 if (error)
90 {
Aloys Augustine39376e2021-03-29 22:08:09 +020091 vlib_log_err (apm->log_class, "netlink failed to get MTU: %U",
Nathan Skrzypczakffc6bdc2021-02-01 17:13:59 +010092 format_clib_error, error);
93 clib_error_free (error);
94 return VNET_API_ERROR_SYSCALL_ERROR_1;
95 }
96 return 0;
97}
98
Damjan Marion00a9dca2016-08-17 17:05:46 +020099static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200100af_packet_fd_read_ready (clib_file_t * uf)
Damjan Marion83243a02016-02-29 13:09:30 +0100101{
Damjan Marioneb743fa2017-03-20 16:34:15 +0100102 vnet_main_t *vnm = vnet_get_main ();
Damjan Marion83243a02016-02-29 13:09:30 +0100103
Damjan Marion83243a02016-02-29 13:09:30 +0100104 /* Schedule the rx node */
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000105 vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data);
Damjan Marion83243a02016-02-29 13:09:30 +0100106 return 0;
107}
108
109static int
Ray Kinsellac855b732017-04-21 12:24:43 +0100110is_bridge (const u8 * host_if_name)
111{
112 u8 *s;
113 DIR *dir = NULL;
114
115 s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0);
116 dir = opendir ((char *) s);
117 vec_free (s);
118
119 if (dir)
120 {
121 closedir (dir);
122 return 0;
123 }
124
125 return -1;
126}
127
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000128static void
129af_packet_set_rx_queues (vlib_main_t *vm, af_packet_if_t *apif)
130{
131 vnet_main_t *vnm = vnet_get_main ();
132 af_packet_queue_t *rx_queue;
133
134 vnet_hw_if_set_input_node (vnm, apif->hw_if_index,
135 af_packet_input_node.index);
136
137 vec_foreach (rx_queue, apif->rx_queues)
138 {
139 rx_queue->queue_index = vnet_hw_if_register_rx_queue (
140 vnm, apif->hw_if_index, rx_queue->queue_id, VNET_HW_IF_RXQ_THREAD_ANY);
141
142 {
143 clib_file_t template = { 0 };
144 template.read_function = af_packet_fd_read_ready;
145 template.file_descriptor = rx_queue->fd;
146 template.private_data = rx_queue->queue_index;
147 template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
148 template.description =
149 format (0, "%U queue %u", format_af_packet_device_name,
150 apif->dev_instance, rx_queue->queue_id);
151 rx_queue->clib_file_index = clib_file_add (&file_main, &template);
152 }
153 vnet_hw_if_set_rx_queue_file_index (vnm, rx_queue->queue_index,
154 rx_queue->clib_file_index);
155 vnet_hw_if_set_rx_queue_mode (vnm, rx_queue->queue_index,
156 VNET_HW_IF_RX_MODE_INTERRUPT);
157 rx_queue->mode = VNET_HW_IF_RX_MODE_INTERRUPT;
158 }
159 vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
160}
161
162static void
163af_packet_set_tx_queues (vlib_main_t *vm, af_packet_if_t *apif)
164{
165 vnet_main_t *vnm = vnet_get_main ();
166 af_packet_main_t *apm = &af_packet_main;
167 af_packet_queue_t *tx_queue;
168
169 vec_foreach (tx_queue, apif->tx_queues)
170 {
171 tx_queue->queue_index = vnet_hw_if_register_tx_queue (
172 vnm, apif->hw_if_index, tx_queue->queue_id);
173 }
174
175 if (apif->num_txqs == 0)
176 {
177 vlib_log_err (apm->log_class, "Interface %U has 0 txq",
178 format_vnet_hw_if_index_name, vnm, apif->hw_if_index);
179 return;
180 }
181
182 for (u32 j = 0; j < vlib_get_n_threads (); j++)
183 {
184 u32 qi = apif->tx_queues[j % apif->num_txqs].queue_index;
185 vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
186 }
187
188 vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
189}
190
Ray Kinsellac855b732017-04-21 12:24:43 +0100191static int
Mohsin Kazmi219cbcb2022-03-18 16:58:31 +0000192create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req,
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000193 tpacket_req3_t *tx_req, int *fd, af_packet_ring_t *ring,
194 u32 *hdrlen_ptr, u8 *is_cksum_gso_enabled,
195 u32 fanout_id, u8 is_fanout)
Damjan Marion83243a02016-02-29 13:09:30 +0100196{
Mohsin Kazmiacba9f72018-05-17 15:42:27 +0200197 af_packet_main_t *apm = &af_packet_main;
Damjan Marion83243a02016-02-29 13:09:30 +0100198 struct sockaddr_ll sll;
Mohsin Kazmi219cbcb2022-03-18 16:58:31 +0000199 socklen_t req_sz = sizeof (tpacket_req3_t);
200 int ret;
201 int ver = TPACKET_V3;
202 u32 hdrlen = 0;
203 u32 len = sizeof (hdrlen);
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000204 u32 ring_sz = 0;
205
206 if (rx_req)
207 ring_sz += rx_req->tp_block_size * rx_req->tp_block_nr;
208
209 if (tx_req)
210 ring_sz += tx_req->tp_block_size * tx_req->tp_block_nr;
Damjan Marion83243a02016-02-29 13:09:30 +0100211
Damjan Marion00a9dca2016-08-17 17:05:46 +0200212 if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100213 {
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000214 vlib_log_err (apm->log_class,
215 "Failed to create AF_PACKET socket: %s (errno %d)",
216 strerror (errno), errno);
Damjan Marion83243a02016-02-29 13:09:30 +0100217 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
218 goto error;
219 }
Damjan Marionc5170202017-10-11 14:15:47 +0200220
Chaoyu Jinafb19302018-03-13 07:37:41 -0700221 /* bind before rx ring is cfged so we don't receive packets from other interfaces */
Dave Barachb7b92992018-10-17 10:38:51 -0400222 clib_memset (&sll, 0, sizeof (sll));
Chaoyu Jinafb19302018-03-13 07:37:41 -0700223 sll.sll_family = PF_PACKET;
224 sll.sll_protocol = htons (ETH_P_ALL);
225 sll.sll_ifindex = host_if_index;
jackiechen19857fa41602019-05-07 18:59:13 +0800226 if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
Chaoyu Jinafb19302018-03-13 07:37:41 -0700227 {
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000228 vlib_log_err (apm->log_class,
229 "Failed to bind rx packet socket: %s (errno %d)",
230 strerror (errno), errno);
Chaoyu Jinafb19302018-03-13 07:37:41 -0700231 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
232 goto error;
233 }
234
jackiechen19857fa41602019-05-07 18:59:13 +0800235 if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100236 {
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000237 vlib_log_err (apm->log_class,
238 "Failed to set rx packet interface version: %s (errno %d)",
239 strerror (errno), errno);
Damjan Marion83243a02016-02-29 13:09:30 +0100240 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
241 goto error;
242 }
243
Mohsin Kazmi219cbcb2022-03-18 16:58:31 +0000244 if (getsockopt (*fd, SOL_PACKET, PACKET_HDRLEN, &hdrlen, &len) < 0)
245 {
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000246 vlib_log_err (
Mohsin Kazmi219cbcb2022-03-18 16:58:31 +0000247 apm->log_class,
248 "Failed to get packet hdr len error handling option: %s (errno %d)",
249 strerror (errno), errno);
250 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
251 goto error;
252 }
253 else
254 *hdrlen_ptr = hdrlen;
255
Damjan Marionc5170202017-10-11 14:15:47 +0200256 int opt = 1;
jackiechen19857fa41602019-05-07 18:59:13 +0800257 if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0)
Damjan Marion83243a02016-02-29 13:09:30 +0100258 {
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000259 vlib_log_err (
Mohsin Kazmi219cbcb2022-03-18 16:58:31 +0000260 apm->log_class,
261 "Failed to set packet tx ring error handling option: %s (errno %d)",
262 strerror (errno), errno);
Damjan Marion83243a02016-02-29 13:09:30 +0100263 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
264 goto error;
265 }
266
Mohsin Kazmic1fd17b2022-03-22 21:40:04 +0000267 int opt2 = 1;
268 if (setsockopt (*fd, SOL_PACKET, PACKET_VNET_HDR, &opt2, sizeof (opt2)) < 0)
269 {
270 vlib_log_debug (
271 apm->log_class,
272 "Failed to set packet vnet hdr error handling option: %s (errno %d)",
273 strerror (errno), errno);
274 }
275 else
276 *is_cksum_gso_enabled = 1;
277
Florin Coras2dc942e2021-11-22 21:34:56 -0800278#if defined(PACKET_QDISC_BYPASS)
279 /* Introduced with Linux 3.14 so the ifdef should eventually be removed */
Mohammed Hawarieed6fc92021-09-06 11:48:17 +0200280 if (setsockopt (*fd, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof (opt)) <
281 0)
282 {
283 vlib_log_debug (apm->log_class,
284 "Failed to set qdisc bypass error "
285 "handling option: %s (errno %d)",
286 strerror (errno), errno);
287 }
Florin Coras2dc942e2021-11-22 21:34:56 -0800288#endif
Mohammed Hawarieed6fc92021-09-06 11:48:17 +0200289
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000290 if (is_fanout)
Damjan Marion83243a02016-02-29 13:09:30 +0100291 {
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000292 int fanout = ((fanout_id & 0xffff) | ((PACKET_FANOUT_HASH) << 16));
293 if (setsockopt (*fd, SOL_PACKET, PACKET_FANOUT, &fanout,
294 sizeof (fanout)) < 0)
295 {
296 vlib_log_err (apm->log_class,
297 "Failed to set fanout options: %s (errno %d)",
298 strerror (errno), errno);
299 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
300 goto error;
301 }
302 }
303
304 if (rx_req)
305 if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0)
306 {
307 vlib_log_err (apm->log_class,
jackiechen19857fa41602019-05-07 18:59:13 +0800308 "Failed to set packet rx ring options: %s (errno %d)",
309 strerror (errno), errno);
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000310 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
311 goto error;
312 }
Damjan Marion83243a02016-02-29 13:09:30 +0100313
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000314 if (tx_req)
315 if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0)
316 {
317 vlib_log_err (apm->log_class,
jackiechen19857fa41602019-05-07 18:59:13 +0800318 "Failed to set packet tx ring options: %s (errno %d)",
319 strerror (errno), errno);
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000320 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
321 goto error;
322 }
323
324 ring->ring_start_addr = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE,
325 MAP_SHARED | MAP_LOCKED, *fd, 0);
326 if (ring->ring_start_addr == MAP_FAILED)
327 {
328 vlib_log_err (apm->log_class, "mmap failure: %s (errno %d)",
329 strerror (errno), errno);
Damjan Marion83243a02016-02-29 13:09:30 +0100330 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
331 goto error;
332 }
333
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000334 ring->ring_size = ring_sz;
Damjan Marion83243a02016-02-29 13:09:30 +0100335
Damjan Marion83243a02016-02-29 13:09:30 +0100336 return 0;
337error:
Dave Barach16ad6ae2016-07-28 17:55:30 -0400338 if (*fd >= 0)
jackiechen19857fa41602019-05-07 18:59:13 +0800339 {
340 close (*fd);
341 *fd = -1;
342 }
Damjan Marion83243a02016-02-29 13:09:30 +0100343 return ret;
344}
345
346int
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000347af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif,
348 af_packet_create_if_arg_t *arg,
349 af_packet_queue_t *rx_queue, af_packet_queue_t *tx_queue,
350 u8 queue_id, u8 is_fanout)
351{
352 af_packet_main_t *apm = &af_packet_main;
353 tpacket_req3_t *rx_req = 0;
354 tpacket_req3_t *tx_req = 0;
355 int ret, fd = -1;
356 af_packet_ring_t ring = { 0 };
357 u8 *ring_addr = 0;
358 u32 rx_frames_per_block, tx_frames_per_block;
359 u32 rx_frame_size, tx_frame_size;
360 u32 hdrlen = 0;
361 u32 i = 0;
362 u8 is_cksum_gso_enabled = 0;
363
364 if (rx_queue)
365 {
366 rx_frames_per_block = arg->rx_frames_per_block ?
367 arg->rx_frames_per_block :
368 AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK;
369
370 rx_frame_size = arg->rx_frame_size ? arg->rx_frame_size :
371 AF_PACKET_DEFAULT_RX_FRAME_SIZE;
372 vec_validate (rx_queue->rx_req, 0);
373 rx_queue->rx_req->tp_block_size = rx_frame_size * rx_frames_per_block;
374 rx_queue->rx_req->tp_frame_size = rx_frame_size;
375 rx_queue->rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
376 rx_queue->rx_req->tp_frame_nr =
377 AF_PACKET_RX_BLOCK_NR * rx_frames_per_block;
378 rx_queue->rx_req->tp_retire_blk_tov = 1; // 1 ms block timout
379 rx_queue->rx_req->tp_feature_req_word = 0;
380 rx_queue->rx_req->tp_sizeof_priv = 0;
381 rx_req = rx_queue->rx_req;
382 }
383
384 if (tx_queue)
385 {
386 tx_frames_per_block = arg->tx_frames_per_block ?
387 arg->tx_frames_per_block :
388 AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK;
389 tx_frame_size = arg->tx_frame_size ? arg->tx_frame_size :
390 AF_PACKET_DEFAULT_TX_FRAME_SIZE;
391
392 vec_validate (tx_queue->tx_req, 0);
393 tx_queue->tx_req->tp_block_size = tx_frame_size * tx_frames_per_block;
394 tx_queue->tx_req->tp_frame_size = tx_frame_size;
395 tx_queue->tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
396 tx_queue->tx_req->tp_frame_nr =
397 AF_PACKET_TX_BLOCK_NR * tx_frames_per_block;
398 tx_queue->tx_req->tp_retire_blk_tov = 0;
399 tx_queue->tx_req->tp_sizeof_priv = 0;
400 tx_queue->tx_req->tp_feature_req_word = 0;
401 tx_req = tx_queue->tx_req;
402 }
403
404 ret = create_packet_v3_sock (apif->host_if_index, rx_req, tx_req, &fd, &ring,
405 &hdrlen, &is_cksum_gso_enabled,
406 apif->dev_instance, is_fanout);
407
408 if (ret != 0)
409 goto error;
410
411 vec_add1 (apif->rings, ring);
412 ring_addr = ring.ring_start_addr;
413
414 if (rx_queue)
415 {
416 rx_queue->fd = fd;
417 vec_validate (rx_queue->rx_ring, rx_queue->rx_req->tp_block_nr - 1);
418 vec_foreach_index (i, rx_queue->rx_ring)
419 {
420 rx_queue->rx_ring[i] =
421 ring_addr + i * rx_queue->rx_req->tp_block_size;
422 }
423
424 rx_queue->next_rx_block = 0;
425 rx_queue->queue_id = queue_id;
426 rx_queue->is_rx_pending = 0;
427 ring_addr = ring_addr + rx_queue->rx_req->tp_block_size *
428 rx_queue->rx_req->tp_block_nr;
429 }
430
431 if (tx_queue)
432 {
433 tx_queue->fd = fd;
434 vec_validate (tx_queue->tx_ring, tx_queue->tx_req->tp_block_nr - 1);
435 vec_foreach_index (i, tx_queue->tx_ring)
436 {
437 tx_queue->tx_ring[i] =
438 ring_addr + i * tx_queue->tx_req->tp_block_size;
439 }
440
441 tx_queue->next_tx_frame = 0;
442 tx_queue->queue_id = queue_id;
443 clib_spinlock_init (&tx_queue->lockp);
444 }
445
446 if (queue_id == 0)
447 {
448 apif->hdrlen = hdrlen;
449 apif->is_cksum_gso_enabled = is_cksum_gso_enabled;
450 }
451
452 return 0;
453error:
454 vlib_log_err (apm->log_class, "Failed to set queue %u error", queue_id);
455 vec_free (rx_queue->rx_req);
456 vec_free (tx_queue->tx_req);
457 return ret;
458}
459
460int
461af_packet_device_init (vlib_main_t *vm, af_packet_if_t *apif,
462 af_packet_create_if_arg_t *args)
463{
464 af_packet_main_t *apm = &af_packet_main;
465 af_packet_queue_t *rx_queue = 0;
466 af_packet_queue_t *tx_queue = 0;
467 u16 nq = clib_min (args->num_rxqs, args->num_txqs);
468 u16 i = 0;
469 int ret = 0;
470 u8 is_fanout = (args->num_rxqs > 1) ? 1 : 0;
471
472 vec_validate (apif->rx_queues, args->num_rxqs - 1);
473 vec_validate (apif->tx_queues, args->num_txqs - 1);
474
475 for (; i < nq; i++)
476 {
477 rx_queue = vec_elt_at_index (apif->rx_queues, i);
478 tx_queue = vec_elt_at_index (apif->tx_queues, i);
479 ret = af_packet_queue_init (vm, apif, args, rx_queue, tx_queue, i,
480 is_fanout);
481 if (ret != 0)
482 goto error;
483 }
484
485 if (args->num_rxqs > args->num_txqs)
486 {
487 for (; i < args->num_rxqs; i++)
488 {
489 rx_queue = vec_elt_at_index (apif->rx_queues, i);
490 ret =
491 af_packet_queue_init (vm, apif, args, rx_queue, 0, i, is_fanout);
492 if (ret != 0)
493 goto error;
494 }
495 }
496 else if (args->num_txqs > args->num_rxqs)
497 {
498 for (; i < args->num_txqs; i++)
499 {
500 tx_queue = vec_elt_at_index (apif->tx_queues, i);
501 ret = af_packet_queue_init (vm, apif, args, 0, tx_queue, i, 0);
502 if (ret != 0)
503 goto error;
504 }
505 }
506
507 apif->num_rxqs = args->num_rxqs;
508 apif->num_txqs = args->num_txqs;
509
510 return 0;
511error:
512 vlib_log_err (apm->log_class, "Failed to init device error");
513 return ret;
514}
515
516int
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200517af_packet_create_if (af_packet_create_if_arg_t *arg)
Damjan Marion83243a02016-02-29 13:09:30 +0100518{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200519 af_packet_main_t *apm = &af_packet_main;
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200520 vlib_main_t *vm = vlib_get_main ();
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000521 int fd2 = -1;
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200522 struct ifreq ifr;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200523 af_packet_if_t *apif = 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100524 u8 hw_addr[6];
Damjan Marion00a9dca2016-08-17 17:05:46 +0200525 vnet_sw_interface_t *sw;
526 vnet_main_t *vnm = vnet_get_main ();
Mohsin Kazmic1fd17b2022-03-22 21:40:04 +0000527 vnet_hw_if_caps_t caps = VNET_HW_IF_CAP_INT_MODE;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200528 uword *p;
Damjan Marion83243a02016-02-29 13:09:30 +0100529 uword if_index;
jackiechen19857fa41602019-05-07 18:59:13 +0800530 u8 *host_if_name_dup = 0;
Ray Kinsellac855b732017-04-21 12:24:43 +0100531 int host_if_index = -1;
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000532 int ret = 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100533
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200534 p = mhash_get (&apm->if_index_by_host_if_name, arg->host_if_name);
Damjan Marion83243a02016-02-29 13:09:30 +0100535 if (p)
536 {
Mohsin Kazmi43fc6882018-03-22 23:45:23 +0100537 apif = vec_elt_at_index (apm->interfaces, p[0]);
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200538 arg->sw_if_index = apif->sw_if_index;
Mohsin Kazmi43fc6882018-03-22 23:45:23 +0100539 return VNET_API_ERROR_IF_ALREADY_EXISTS;
Damjan Marion83243a02016-02-29 13:09:30 +0100540 }
541
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200542 host_if_name_dup = vec_dup (arg->host_if_name);
543
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200544 /*
545 * make sure host side of interface is 'UP' before binding AF_PACKET
546 * socket on it.
547 */
548 if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
Ray Kinsellac855b732017-04-21 12:24:43 +0100549 {
jackiechen19857fa41602019-05-07 18:59:13 +0800550 vlib_log_debug (apm->log_class,
551 "Failed to create AF_UNIX socket: %s (errno %d)",
552 strerror (errno), errno);
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200553 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
554 goto error;
555 }
556
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200557 clib_memcpy (ifr.ifr_name, (const char *) arg->host_if_name,
558 vec_len (arg->host_if_name));
jackiechen19857fa41602019-05-07 18:59:13 +0800559 if (ioctl (fd2, SIOCGIFINDEX, &ifr) < 0)
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200560 {
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200561 vlib_log_debug (
562 apm->log_class,
563 "Failed to retrieve the interface (%s) index: %s (errno %d)",
564 arg->host_if_name, strerror (errno), errno);
jackiechen19857fa41602019-05-07 18:59:13 +0800565 ret = VNET_API_ERROR_INVALID_INTERFACE;
566 goto error;
Ray Kinsellac855b732017-04-21 12:24:43 +0100567 }
568
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200569 host_if_index = ifr.ifr_ifindex;
jackiechen19857fa41602019-05-07 18:59:13 +0800570 if (ioctl (fd2, SIOCGIFFLAGS, &ifr) < 0)
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200571 {
jackiechen19857fa41602019-05-07 18:59:13 +0800572 vlib_log_debug (apm->log_class,
573 "Failed to get the active flag: %s (errno %d)",
574 strerror (errno), errno);
575 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200576 goto error;
577 }
578
579 if (!(ifr.ifr_flags & IFF_UP))
580 {
581 ifr.ifr_flags |= IFF_UP;
jackiechen19857fa41602019-05-07 18:59:13 +0800582 if (ioctl (fd2, SIOCSIFFLAGS, &ifr) < 0)
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200583 {
jackiechen19857fa41602019-05-07 18:59:13 +0800584 vlib_log_debug (apm->log_class,
585 "Failed to set the active flag: %s (errno %d)",
586 strerror (errno), errno);
587 ret = VNET_API_ERROR_SYSCALL_ERROR_1;
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200588 goto error;
589 }
590 }
591
592 if (fd2 > -1)
jackiechen19857fa41602019-05-07 18:59:13 +0800593 {
594 close (fd2);
595 fd2 = -1;
596 }
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200597
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200598 ret = is_bridge (arg->host_if_name);
Ray Kinsellac855b732017-04-21 12:24:43 +0100599 if (ret == 0) /* is a bridge, ignore state */
600 host_if_index = -1;
601
Damjan Marion83243a02016-02-29 13:09:30 +0100602 /* So far everything looks good, let's create interface */
Damjan Marion048ee2e2016-03-16 22:59:21 +0100603 pool_get (apm->interfaces, apif);
Damjan Marion83243a02016-02-29 13:09:30 +0100604 if_index = apif - apm->interfaces;
605
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000606 apif->dev_instance = if_index;
Ray Kinsellac855b732017-04-21 12:24:43 +0100607 apif->host_if_index = host_if_index;
Ivan Kellybfe737a2016-10-07 18:02:43 +0200608 apif->host_if_name = host_if_name_dup;
Dave Barach13f3c452016-03-29 11:56:41 -0400609 apif->per_interface_next_index = ~0;
Mohsin Kazmicae84fa2021-10-08 15:10:49 +0000610 apif->mode = arg->mode;
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000611
612 ret = af_packet_device_init (vm, apif, arg);
613 if (ret != 0)
614 goto error;
Damjan Marion83243a02016-02-29 13:09:30 +0100615
Nathan Skrzypczakffc6bdc2021-02-01 17:13:59 +0100616 ret = af_packet_read_mtu (apif);
617 if (ret != 0)
618 goto error;
619
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100620
Nathan Skrzypczakb225b0a2021-10-26 16:11:38 +0200621 if (apif->mode != AF_PACKET_IF_MODE_IP)
Mohsin Kazmicae84fa2021-10-08 15:10:49 +0000622 {
Damjan Marion5c954c42022-01-06 20:36:14 +0100623 vnet_eth_interface_registration_t eir = {};
Mohsin Kazmicae84fa2021-10-08 15:10:49 +0000624 /*use configured or generate random MAC address */
625 if (arg->hw_addr)
626 clib_memcpy (hw_addr, arg->hw_addr, 6);
627 else
628 {
629 f64 now = vlib_time_now (vm);
630 u32 rnd;
631 rnd = (u32) (now * 1e6);
632 rnd = random_u32 (&rnd);
633
634 clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
635 hw_addr[0] = 2;
636 hw_addr[1] = 0xfe;
637 }
638
Damjan Marion5c954c42022-01-06 20:36:14 +0100639 eir.dev_class_index = af_packet_device_class.index;
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000640 eir.dev_instance = apif->dev_instance;
Damjan Marion5c954c42022-01-06 20:36:14 +0100641 eir.address = hw_addr;
Damjan Marion1cd0e5d2022-01-17 14:49:17 +0100642 eir.cb.set_max_frame_size = af_packet_eth_set_max_frame_size;
Damjan Marion5c954c42022-01-06 20:36:14 +0100643 apif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
Mohsin Kazmicae84fa2021-10-08 15:10:49 +0000644 }
Damjan Marion83243a02016-02-29 13:09:30 +0100645 else
646 {
Mohsin Kazmicae84fa2021-10-08 15:10:49 +0000647 apif->hw_if_index = vnet_register_interface (
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000648 vnm, af_packet_device_class.index, apif->dev_instance,
649 af_packet_ip_device_hw_interface_class.index, apif->dev_instance);
Damjan Marion83243a02016-02-29 13:09:30 +0100650 }
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000651
Damjan Marion83243a02016-02-29 13:09:30 +0100652 sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
653 apif->sw_if_index = sw->sw_if_index;
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000654
655 af_packet_set_rx_queues (vm, apif);
656 af_packet_set_tx_queues (vm, apif);
Damjan Marion44036902017-04-28 12:29:15 +0200657
Mohsin Kazmic1fd17b2022-03-22 21:40:04 +0000658 if (apif->is_cksum_gso_enabled)
659 caps |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_IP4_CKSUM |
660 VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
661
662 vnet_hw_if_set_caps (vnm, apif->hw_if_index, caps);
Damjan Marion83243a02016-02-29 13:09:30 +0100663 vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
664 VNET_HW_INTERFACE_FLAG_LINK_UP);
665
Ivan Kellybfe737a2016-10-07 18:02:43 +0200666 mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
667 0);
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200668 arg->sw_if_index = apif->sw_if_index;
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100669
Damjan Marion83243a02016-02-29 13:09:30 +0100670 return 0;
671
672error:
Mohsin Kazmia50a14c2018-04-25 15:58:05 +0200673 if (fd2 > -1)
jackiechen19857fa41602019-05-07 18:59:13 +0800674 {
675 close (fd2);
676 fd2 = -1;
677 }
Ivan Kellybfe737a2016-10-07 18:02:43 +0200678 vec_free (host_if_name_dup);
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000679 memset (apif, 0, sizeof (*apif));
680 pool_put (apm->interfaces, apif);
Damjan Marion83243a02016-02-29 13:09:30 +0100681 return ret;
682}
683
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000684static int
685af_packet_rx_queue_free (af_packet_if_t *apif, af_packet_queue_t *rx_queue)
686{
687 clib_file_del_by_index (&file_main, rx_queue->clib_file_index);
688 close (rx_queue->fd);
689 rx_queue->fd = -1;
690 rx_queue->rx_ring = NULL;
691 vec_free (rx_queue->rx_req);
692 rx_queue->rx_req = NULL;
693 return 0;
694}
695
696static int
697af_packet_tx_queue_free (af_packet_if_t *apif, af_packet_queue_t *tx_queue)
698{
699 close (tx_queue->fd);
700 tx_queue->fd = -1;
701 clib_spinlock_free (&tx_queue->lockp);
702 tx_queue->tx_ring = NULL;
703 vec_free (tx_queue->tx_req);
704 tx_queue->tx_req = NULL;
705 return 0;
706}
707
708static int
709af_packet_ring_free (af_packet_if_t *apif, af_packet_ring_t *ring)
710{
711 af_packet_main_t *apm = &af_packet_main;
712
713 if (ring)
714 {
715 // FIXME: unmap the memory
716 if (munmap (ring->ring_start_addr, ring->ring_size))
717 vlib_log_warn (apm->log_class,
718 "Host interface %s could not free ring %p of size %u",
719 apif->host_if_name, ring->ring_start_addr,
720 ring->ring_size);
721 else
722 ring->ring_start_addr = 0;
723 }
724
725 return 0;
726}
727
Peter Leidba76f22016-04-08 08:16:31 -0700728int
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200729af_packet_delete_if (u8 *host_if_name)
Peter Leidba76f22016-04-08 08:16:31 -0700730{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200731 vnet_main_t *vnm = vnet_get_main ();
Peter Leidba76f22016-04-08 08:16:31 -0700732 af_packet_main_t *apm = &af_packet_main;
733 af_packet_if_t *apif;
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000734 af_packet_queue_t *rx_queue;
735 af_packet_queue_t *tx_queue;
736 af_packet_ring_t *ring;
Peter Leidba76f22016-04-08 08:16:31 -0700737 uword *p;
Peter Leidba76f22016-04-08 08:16:31 -0700738
Damjan Marion00a9dca2016-08-17 17:05:46 +0200739 p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
740 if (p == NULL)
741 {
Mohsin Kazmiacba9f72018-05-17 15:42:27 +0200742 vlib_log_warn (apm->log_class, "Host interface %s does not exist",
743 host_if_name);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200744 return VNET_API_ERROR_SYSCALL_ERROR_1;
745 }
746 apif = pool_elt_at_index (apm->interfaces, p[0]);
Peter Leidba76f22016-04-08 08:16:31 -0700747
748 /* bring down the interface */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200749 vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
Peter Leidba76f22016-04-08 08:16:31 -0700750
751 /* clean up */
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000752 vec_foreach (rx_queue, apif->rx_queues)
753 af_packet_rx_queue_free (apif, rx_queue);
754 vec_foreach (tx_queue, apif->tx_queues)
755 af_packet_tx_queue_free (apif, tx_queue);
756 vec_foreach (ring, apif->rings)
757 af_packet_ring_free (apif, ring);
Eyal Barif298ecf2016-09-19 18:47:39 +0300758
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000759 vec_free (apif->rx_queues);
760 apif->rx_queues = NULL;
761 vec_free (apif->tx_queues);
762 apif->tx_queues = NULL;
763 vec_free (apif->rings);
764 apif->rings = NULL;
Peter Leidba76f22016-04-08 08:16:31 -0700765
Damjan Marion00a9dca2016-08-17 17:05:46 +0200766 vec_free (apif->host_if_name);
Peter Leidba76f22016-04-08 08:16:31 -0700767 apif->host_if_name = NULL;
Ray Kinsellac855b732017-04-21 12:24:43 +0100768 apif->host_if_index = -1;
Peter Leidba76f22016-04-08 08:16:31 -0700769
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000770 mhash_unset (&apm->if_index_by_host_if_name, host_if_name, p);
Peter Leidba76f22016-04-08 08:16:31 -0700771
Nathan Skrzypczakb225b0a2021-10-26 16:11:38 +0200772 if (apif->mode != AF_PACKET_IF_MODE_IP)
Mohsin Kazmicae84fa2021-10-08 15:10:49 +0000773 ethernet_delete_interface (vnm, apif->hw_if_index);
774 else
775 vnet_delete_hw_interface (vnm, apif->hw_if_index);
Peter Leidba76f22016-04-08 08:16:31 -0700776
Mohsin Kazmi5a7aa512022-03-25 14:27:45 +0000777 memset (apif, 0, sizeof (*apif));
Damjan Marion00a9dca2016-08-17 17:05:46 +0200778 pool_put (apm->interfaces, apif);
Peter Leidba76f22016-04-08 08:16:31 -0700779
780 return 0;
781}
782
Jakub Grajciar92b02752017-10-20 13:37:28 +0200783int
Nathan Skrzypczak7d0e30b2021-06-23 11:28:39 +0200784af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set)
Jakub Grajciar92b02752017-10-20 13:37:28 +0200785{
Mohsin Kazmic1fd17b2022-03-22 21:40:04 +0000786 // deprecated ...
Jakub Grajciar92b02752017-10-20 13:37:28 +0200787 return 0;
788}
789
Mohsin Kazmi04e0bb22018-05-28 18:55:37 +0200790int
791af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs)
792{
793 af_packet_main_t *apm = &af_packet_main;
794 af_packet_if_t *apif;
795 af_packet_if_detail_t *r_af_packet_ifs = NULL;
796 af_packet_if_detail_t *af_packet_if = NULL;
797
Damjan Marionb2c31b62020-12-13 21:47:40 +0100798 pool_foreach (apif, apm->interfaces)
799 {
Jakub Grajciar3b2db902019-08-26 11:25:52 +0200800 vec_add2 (r_af_packet_ifs, af_packet_if, 1);
801 af_packet_if->sw_if_index = apif->sw_if_index;
802 if (apif->host_if_name)
803 {
804 clib_memcpy (af_packet_if->host_if_name, apif->host_if_name,
805 MIN (ARRAY_LEN (af_packet_if->host_if_name) - 1,
806 strlen ((const char *) apif->host_if_name)));
807 }
Damjan Marionb2c31b62020-12-13 21:47:40 +0100808 }
Mohsin Kazmi04e0bb22018-05-28 18:55:37 +0200809
810 *out_af_packet_ifs = r_af_packet_ifs;
811
812 return 0;
813}
814
Damjan Marion83243a02016-02-29 13:09:30 +0100815static clib_error_t *
816af_packet_init (vlib_main_t * vm)
817{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200818 af_packet_main_t *apm = &af_packet_main;
Damjan Marion553f6bd2016-09-07 11:54:22 +0200819 vlib_thread_main_t *tm = vlib_get_thread_main ();
Damjan Marion83243a02016-02-29 13:09:30 +0100820
Dave Barachb7b92992018-10-17 10:38:51 -0400821 clib_memset (apm, 0, sizeof (af_packet_main_t));
Damjan Marion83243a02016-02-29 13:09:30 +0100822
823 mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
824
Damjan Marion553f6bd2016-09-07 11:54:22 +0200825 vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
826 CLIB_CACHE_LINE_BYTES);
827
Mohsin Kazmiacba9f72018-05-17 15:42:27 +0200828 apm->log_class = vlib_log_register_class ("af_packet", 0);
829 vlib_log_debug (apm->log_class, "initialized");
830
Damjan Marion83243a02016-02-29 13:09:30 +0100831 return 0;
832}
833
834VLIB_INIT_FUNCTION (af_packet_init);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200835
836/*
837 * fd.io coding-style-patch-verification: ON
838 *
839 * Local Variables:
840 * eval: (c-set-style "gnu")
841 * End:
842 */