blob: 2e72f5faad48a61b349971cc59f533971e1624f7 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef __included_dpdk_h__
16#define __included_dpdk_h__
17
18/* $$$$ We should rename always_inline -> clib_always_inline */
19#undef always_inline
20
21#include <rte_config.h>
22
23#include <rte_common.h>
24#include <rte_dev.h>
25#include <rte_log.h>
26#include <rte_memory.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070027#include <rte_memzone.h>
28#include <rte_tailq.h>
29#include <rte_eal.h>
30#include <rte_per_lcore.h>
31#include <rte_launch.h>
32#include <rte_atomic.h>
33#include <rte_cycles.h>
34#include <rte_prefetch.h>
35#include <rte_lcore.h>
36#include <rte_per_lcore.h>
37#include <rte_branch_prediction.h>
38#include <rte_interrupts.h>
39#include <rte_pci.h>
40#include <rte_random.h>
41#include <rte_debug.h>
42#include <rte_ether.h>
43#include <rte_ethdev.h>
44#include <rte_ring.h>
45#include <rte_mempool.h>
46#include <rte_mbuf.h>
Damjan Marione90892e2016-02-23 19:20:28 +010047#ifdef RTE_LIBRTE_KNI
Ed Warnickecb9cada2015-12-08 15:45:58 -070048#include <rte_kni.h>
Damjan Marione90892e2016-02-23 19:20:28 +010049#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -070050#include <rte_virtio_net.h>
51#include <rte_pci_dev_ids.h>
52#include <rte_version.h>
John Lod9bf9ab2016-02-25 11:17:55 -050053#include <rte_eth_bond.h>
Jasvinder Singh85ecc812016-07-21 17:02:19 +010054#include <rte_sched.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070055
56#include <vnet/unix/pcap.h>
57#include <vnet/devices/virtio/vhost-user.h>
58
59#if CLIB_DEBUG > 0
60#define always_inline static inline
61#else
62#define always_inline static inline __attribute__ ((__always_inline__))
63#endif
64
Damjan Marion1f0da172016-07-13 22:44:18 +020065#if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 0)
66#define DPDK_VHOST_USER 1
67#else
68#define DPDK_VHOST_USER 0
69#endif
70
Damjan Marionc68b4cb2016-05-25 20:11:33 +020071#include <vlib/pci/pci.h>
72
Damjan Mariona06dfb32016-06-26 20:16:57 +020073#define NB_MBUF (16<<10)
Ed Warnickecb9cada2015-12-08 15:45:58 -070074
Damjan Marionb8abf872016-03-14 20:02:35 +010075extern vnet_device_class_t dpdk_device_class;
Jean-Mickael Guerin8941ec22016-03-04 14:14:21 +010076extern vlib_node_registration_t dpdk_input_node;
Jean-Mickael Guerin8941ec22016-03-04 14:14:21 +010077extern vlib_node_registration_t handoff_dispatch_node;
Ed Warnickecb9cada2015-12-08 15:45:58 -070078
Ed Warnickecb9cada2015-12-08 15:45:58 -070079#define foreach_dpdk_pmd \
Dave Barach61efa142016-01-22 08:23:09 -050080 _ ("rte_nicvf_pmd", THUNDERX) \
Ed Warnickecb9cada2015-12-08 15:45:58 -070081 _ ("rte_em_pmd", E1000EM) \
82 _ ("rte_igb_pmd", IGB) \
83 _ ("rte_igbvf_pmd", IGBVF) \
84 _ ("rte_ixgbe_pmd", IXGBE) \
85 _ ("rte_ixgbevf_pmd", IXGBEVF) \
86 _ ("rte_i40e_pmd", I40E) \
87 _ ("rte_i40evf_pmd", I40EVF) \
88 _ ("rte_virtio_pmd", VIRTIO) \
Ed Warnickecb9cada2015-12-08 15:45:58 -070089 _ ("rte_enic_pmd", ENIC) \
90 _ ("rte_vmxnet3_pmd", VMXNET3) \
91 _ ("AF_PACKET PMD", AF_PACKET) \
John Lod9bf9ab2016-02-25 11:17:55 -050092 _ ("rte_bond_pmd", BOND) \
Damjan Marion2068e982016-01-27 16:59:04 +010093 _ ("rte_pmd_fm10k", FM10K) \
Sachina29f2002016-06-17 09:24:20 +053094 _ ("rte_cxgbe_pmd", CXGBE) \
95 _ ("rte_dpaa2_dpni", DPAA2)
Ed Warnickecb9cada2015-12-08 15:45:58 -070096
Damjan Marion25665672016-08-16 18:56:08 +020097typedef enum
98{
Ed Warnickecb9cada2015-12-08 15:45:58 -070099 VNET_DPDK_PMD_NONE,
100#define _(s,f) VNET_DPDK_PMD_##f,
101 foreach_dpdk_pmd
102#undef _
Damjan Marion25665672016-08-16 18:56:08 +0200103 VNET_DPDK_PMD_UNKNOWN, /* must be last */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700104} dpdk_pmd_t;
105
Damjan Marion25665672016-08-16 18:56:08 +0200106typedef enum
107{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700108 VNET_DPDK_PORT_TYPE_ETH_1G,
109 VNET_DPDK_PORT_TYPE_ETH_10G,
110 VNET_DPDK_PORT_TYPE_ETH_40G,
John Lod9bf9ab2016-02-25 11:17:55 -0500111 VNET_DPDK_PORT_TYPE_ETH_BOND,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700112 VNET_DPDK_PORT_TYPE_ETH_SWITCH,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700113 VNET_DPDK_PORT_TYPE_AF_PACKET,
114 VNET_DPDK_PORT_TYPE_UNKNOWN,
115} dpdk_port_type_t;
116
Damjan Marion25665672016-08-16 18:56:08 +0200117typedef struct
118{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700119 f64 deadline;
Damjan Marion25665672016-08-16 18:56:08 +0200120 vlib_frame_t *frame;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700121} dpdk_frame_t;
122
123#define DPDK_EFD_MAX_DISCARD_RATE 10
124
Damjan Marion25665672016-08-16 18:56:08 +0200125typedef struct
126{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700127 u16 last_burst_sz;
128 u16 max_burst_sz;
129 u32 full_frames_cnt;
130 u32 consec_full_frames_cnt;
131 u32 congestion_cnt;
132 u64 last_poll_time;
133 u64 max_poll_delay;
134 u32 discard_cnt;
135 u32 total_packet_cnt;
136} dpdk_efd_agent_t;
137
Damjan Marion1f0da172016-07-13 22:44:18 +0200138#if DPDK_VHOST_USER
Damjan Marion25665672016-08-16 18:56:08 +0200139typedef struct
140{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700141 int callfd;
142 int kickfd;
143 int errfd;
Shesha Sreenivasamurthyb8f45b32016-02-03 09:38:36 -0800144 int enabled;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700145 u32 callfd_idx;
146 u32 n_since_last_int;
147 f64 int_deadline;
Shesha Sreenivasamurthy94550842016-03-02 10:33:26 -0800148 u64 packets;
149 u64 bytes;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700150} dpdk_vu_vring;
151
Damjan Marion25665672016-08-16 18:56:08 +0200152typedef struct
153{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700154 u32 is_up;
155 u32 unix_fd;
156 u32 unix_file_index;
157 u32 client_fd;
158 char sock_filename[256];
159 int sock_errno;
160 u8 sock_is_server;
161 u8 active;
162
163 u64 feature_mask;
164 u32 num_vrings;
Shesha Sreenivasamurthyb8f45b32016-02-03 09:38:36 -0800165 dpdk_vu_vring vrings[VHOST_MAX_QUEUE_PAIRS * 2];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700166 u64 region_addr[VHOST_MEMORY_MAX_NREGIONS];
167 u32 region_fd[VHOST_MEMORY_MAX_NREGIONS];
Shesha Sreenivasamurthy81f70922016-06-15 13:47:40 -0700168 u64 region_offset[VHOST_MEMORY_MAX_NREGIONS];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700169} dpdk_vu_intf_t;
Damjan Marion1f0da172016-07-13 22:44:18 +0200170#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700171
Damjan Marion25665672016-08-16 18:56:08 +0200172typedef void (*dpdk_flowcontrol_callback_t) (vlib_main_t * vm,
173 u32 hw_if_index, u32 n_packets);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700174
175/*
176 * The header for the tx_vector in dpdk_device_t.
177 * Head and tail are indexes into the tx_vector and are of type
178 * u64 so they never overflow.
179 */
Damjan Marion25665672016-08-16 18:56:08 +0200180typedef struct
181{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700182 u64 tx_head;
183 u64 tx_tail;
184} tx_ring_hdr_t;
185
Damjan Marion25665672016-08-16 18:56:08 +0200186typedef struct
187{
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100188 struct rte_ring *swq;
189
190 u64 hqos_field0_slabmask;
191 u32 hqos_field0_slabpos;
192 u32 hqos_field0_slabshr;
193 u64 hqos_field1_slabmask;
194 u32 hqos_field1_slabpos;
195 u32 hqos_field1_slabshr;
196 u64 hqos_field2_slabmask;
197 u32 hqos_field2_slabpos;
198 u32 hqos_field2_slabshr;
199 u32 hqos_tc_table[64];
200} dpdk_device_hqos_per_worker_thread_t;
201
202typedef struct
203{
204 struct rte_ring **swq;
205 struct rte_mbuf **pkts_enq;
206 struct rte_mbuf **pkts_deq;
207 struct rte_sched_port *hqos;
208 u32 hqos_burst_enq;
209 u32 hqos_burst_deq;
210 u32 pkts_enq_len;
211 u32 swq_pos;
212} dpdk_device_hqos_per_hqos_thread_t;
213
214typedef struct
215{
Damjan Marion25665672016-08-16 18:56:08 +0200216 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
Damjan Marion85cdbd02016-02-12 18:00:23 +0100217 volatile u32 **lockp;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700218
219 /* Instance ID */
220 u32 device_index;
221
222 u32 vlib_hw_if_index;
223 u32 vlib_sw_if_index;
224
225 /* next node index if we decide to steal the rx graph arc */
226 u32 per_interface_next_index;
227
228 /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */
Damjan Marion25665672016-08-16 18:56:08 +0200229 struct rte_mbuf ***tx_vectors; /* one per worker thread */
230 struct rte_mbuf ***rx_vectors;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700231
232 /* vector of traced contexts, per device */
Damjan Marion25665672016-08-16 18:56:08 +0200233 u32 *d_trace_buffers;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700234
Ed Warnickecb9cada2015-12-08 15:45:58 -0700235 dpdk_pmd_t pmd:8;
236 i8 cpu_socket;
237
Damjan Marionb28e4982016-08-22 22:34:38 +0200238 u16 flags;
Damjan Marion56431702016-09-19 13:18:09 +0200239#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0)
240#define DPDK_DEVICE_FLAG_PROMISC (1 << 1)
241#define DPDK_DEVICE_FLAG_PMD (1 << 2)
242#define DPDK_DEVICE_FLAG_KNI (1 << 3)
243#define DPDK_DEVICE_FLAG_VHOST_USER (1 << 4)
Damjan Mariona7cc4472016-09-19 13:31:49 +0200244#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5)
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100245#define DPDK_DEVICE_FLAG_HQOS (1 << 6)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700246
Damjan Marionb58598b2016-09-19 13:44:37 +0200247 u16 nb_tx_desc;
Damjan Marion25665672016-08-16 18:56:08 +0200248 CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700249
Damjan Marion25665672016-08-16 18:56:08 +0200250 u8 *interface_name_suffix;
Damjan Marion32f7bc12016-07-21 03:42:37 -0700251
Damjan Mariona7cc4472016-09-19 13:31:49 +0200252 /* number of sub-interfaces */
253 u16 num_subifs;
254
Ed Warnickecb9cada2015-12-08 15:45:58 -0700255 /* PMD related */
256 u16 tx_q_used;
257 u16 rx_q_used;
258 u16 nb_rx_desc;
Damjan Marion25665672016-08-16 18:56:08 +0200259 u16 *cpu_socket_id_by_queue;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700260 struct rte_eth_conf port_conf;
261 struct rte_eth_txconf tx_conf;
262
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100263 /* HQoS related */
264 dpdk_device_hqos_per_worker_thread_t *hqos_wt;
265 dpdk_device_hqos_per_hqos_thread_t *hqos_ht;
266
Ed Warnickecb9cada2015-12-08 15:45:58 -0700267 /* KNI related */
268 struct rte_kni *kni;
269 u8 kni_port_id;
270
Damjan Marion1f0da172016-07-13 22:44:18 +0200271#if DPDK_VHOST_USER
Ed Warnickecb9cada2015-12-08 15:45:58 -0700272 /* vhost-user related */
273 u32 vu_if_id;
Damjan Marion25665672016-08-16 18:56:08 +0200274 struct virtio_net vu_vhost_dev;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700275 u32 vu_is_running;
276 dpdk_vu_intf_t *vu_intf;
Damjan Marion1f0da172016-07-13 22:44:18 +0200277#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700278
279 /* af_packet */
280 u8 af_packet_port_id;
281
282 struct rte_eth_link link;
283 f64 time_last_link_update;
284
285 struct rte_eth_stats stats;
286 struct rte_eth_stats last_stats;
Sean Hopea4f16a02016-03-28 13:11:31 -0400287 struct rte_eth_stats last_cleared_stats;
Damjan Marion1f0da172016-07-13 22:44:18 +0200288#if RTE_VERSION >= RTE_VERSION_NUM(16, 7, 0, 0)
Damjan Marion25665672016-08-16 18:56:08 +0200289 struct rte_eth_xstat *xstats;
290 struct rte_eth_xstat *last_cleared_xstats;
Damjan Marion1f0da172016-07-13 22:44:18 +0200291#else
Damjan Marion25665672016-08-16 18:56:08 +0200292 struct rte_eth_xstats *xstats;
293 struct rte_eth_xstats *last_cleared_xstats;
Damjan Marion1f0da172016-07-13 22:44:18 +0200294#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700295 f64 time_last_stats_update;
296 dpdk_port_type_t port_type;
297
298 dpdk_efd_agent_t efd_agent;
Damjan Marion25665672016-08-16 18:56:08 +0200299 u8 need_txlock; /* Used by VNET_DPDK_DEV_VHOST_USER */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700300} dpdk_device_t;
301
Bud Grise02301ef2016-02-24 16:09:05 -0500302#define DPDK_STATS_POLL_INTERVAL (10.0)
Damjan Marion25665672016-08-16 18:56:08 +0200303#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */
Bud Grise02301ef2016-02-24 16:09:05 -0500304
305#define DPDK_LINK_POLL_INTERVAL (3.0)
Damjan Marion25665672016-08-16 18:56:08 +0200306#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700307
Damjan Marion25665672016-08-16 18:56:08 +0200308typedef struct
309{
310 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700311
312 /* total input packet counter */
313 u64 aggregate_rx_packets;
314} dpdk_worker_t;
315
Damjan Marion25665672016-08-16 18:56:08 +0200316typedef struct
317{
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100318 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
319
320 /* total input packet counter */
321 u64 aggregate_rx_packets;
322} dpdk_hqos_thread_t;
323
324typedef struct
325{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700326 u32 device;
327 u16 queue_id;
328} dpdk_device_and_queue_t;
329
330/* Early-Fast-Discard (EFD) */
331#define DPDK_EFD_DISABLED 0
332#define DPDK_EFD_DISCARD_ENABLED (1 << 0)
333#define DPDK_EFD_MONITOR_ENABLED (1 << 1)
334#define DPDK_EFD_DROPALL_ENABLED (1 << 2)
335
336#define DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT 90
337#define DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH 6
338
Damjan Marion25665672016-08-16 18:56:08 +0200339typedef struct dpdk_efd_t
340{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700341 u16 enabled;
342 u16 queue_hi_thresh;
343 u16 consec_full_frames_hi_thresh;
344 u16 pad;
345} dpdk_efd_t;
346
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100347#ifndef DPDK_HQOS_DBG_BYPASS
348#define DPDK_HQOS_DBG_BYPASS 0
349#endif
350
351typedef struct dpdk_device_config_hqos_t
352{
353 u32 hqos_thread;
354 u32 hqos_thread_valid;
355
356 u32 swq_size;
357 u32 burst_enq;
358 u32 burst_deq;
359
360 u32 pktfield0_slabpos;
361 u32 pktfield1_slabpos;
362 u32 pktfield2_slabpos;
363 u64 pktfield0_slabmask;
364 u64 pktfield1_slabmask;
365 u64 pktfield2_slabmask;
366 u32 tc_table[64];
367
368 struct rte_sched_port_params port;
369 struct rte_sched_subport_params *subport;
370 struct rte_sched_pipe_params *pipe;
371 uint32_t *pipe_map;
372} dpdk_device_config_hqos_t;
373
374int dpdk_hqos_validate_mask (u64 mask, u32 n);
375void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t *
376 hqos, u32 pipe_profile_id);
377void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos);
378clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd,
379 dpdk_device_config_hqos_t * hqos);
380void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos,
381 struct rte_mbuf **pkts, u32 n_pkts);
382
Damjan Marionc68b4cb2016-05-25 20:11:33 +0200383#define foreach_dpdk_device_config_item \
384 _ (num_rx_queues) \
Damjan Marion39da6212016-06-06 13:21:04 +0200385 _ (num_tx_queues) \
386 _ (num_rx_desc) \
Srivatsa Sangli820e3632016-06-14 13:10:55 -0700387 _ (num_tx_desc) \
388 _ (rss_fn)
Damjan Marionc68b4cb2016-05-25 20:11:33 +0200389
Damjan Marion25665672016-08-16 18:56:08 +0200390typedef struct
391{
392 vlib_pci_addr_t pci_addr;
393 u8 is_blacklisted;
394 u8 vlan_strip_offload;
John Loce3e9712016-07-07 13:54:44 -0400395#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0
396#define DPDK_DEVICE_VLAN_STRIP_OFF 1
397#define DPDK_DEVICE_VLAN_STRIP_ON 2
398
Damjan Marionc68b4cb2016-05-25 20:11:33 +0200399#define _(x) uword x;
400 foreach_dpdk_device_config_item
401#undef _
Damjan Marion0b140722016-06-14 00:36:09 +0200402 clib_bitmap_t * workers;
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100403 u32 hqos_enabled;
404 dpdk_device_config_hqos_t hqos;
Damjan Marionc68b4cb2016-05-25 20:11:33 +0200405} dpdk_device_config_t;
406
Damjan Marion25665672016-08-16 18:56:08 +0200407typedef struct
408{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700409
Damjan Marion64ae6692016-05-25 18:40:13 +0200410 /* Config stuff */
Damjan Marion25665672016-08-16 18:56:08 +0200411 u8 **eal_init_args;
412 u8 *eal_init_args_str;
413 u8 *uio_driver_name;
Damjan Marion64ae6692016-05-25 18:40:13 +0200414 u8 no_multi_seg;
415 u8 enable_tcp_udp_checksum;
416
417 /* Required config parameters */
418 u8 coremask_set_manually;
419 u8 nchannels_set_manually;
420 u32 coremask;
421 u32 nchannels;
422 u32 num_mbufs;
Damjan Marion25665672016-08-16 18:56:08 +0200423 u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */
Damjan Marion64ae6692016-05-25 18:40:13 +0200424
425 /*
426 * format interface names ala xxxEthernet%d/%d/%d instead of
Damjan Marion137c7c62016-07-06 22:52:49 +0200427 * xxxEthernet%x/%x/%x.
Damjan Marion64ae6692016-05-25 18:40:13 +0200428 */
429 u8 interface_name_format_decimal;
430
431 /* virtio vhost-user switch */
432 u8 use_virtio_vhost;
433
434 /* vhost-user coalescence frames config */
435 u32 vhost_coalesce_frames;
436 f64 vhost_coalesce_time;
437
Damjan Marionc68b4cb2016-05-25 20:11:33 +0200438 /* per-device config */
439 dpdk_device_config_t default_devconf;
Damjan Marion25665672016-08-16 18:56:08 +0200440 dpdk_device_config_t *dev_confs;
441 uword *device_config_index_by_pci_addr;
Damjan Marionc68b4cb2016-05-25 20:11:33 +0200442
Damjan Marion64ae6692016-05-25 18:40:13 +0200443} dpdk_config_main_t;
444
445dpdk_config_main_t dpdk_config_main;
446
Damjan Marion25665672016-08-16 18:56:08 +0200447typedef struct
448{
Damjan Marion64ae6692016-05-25 18:40:13 +0200449
Ed Warnickecb9cada2015-12-08 15:45:58 -0700450 /* Devices */
Damjan Marion25665672016-08-16 18:56:08 +0200451 dpdk_device_t *devices;
452 dpdk_device_and_queue_t **devices_by_cpu;
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100453 dpdk_device_and_queue_t **devices_by_hqos_cpu;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700454
455 /* per-thread recycle lists */
Damjan Marion25665672016-08-16 18:56:08 +0200456 u32 **recycle;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700457
Dave Barachd81566f2016-02-15 11:34:13 -0500458 /* buffer flags template, configurable to enable/disable tcp / udp cksum */
459 u32 buffer_flags_template;
460
Ed Warnickecb9cada2015-12-08 15:45:58 -0700461 /* flow control callback. If 0 then flow control is disabled */
462 dpdk_flowcontrol_callback_t flowcontrol_callback;
463
464 /* vlib buffer free list, must be same size as an rte_mbuf */
465 u32 vlib_buffer_free_list_index;
466
Ed Warnickecb9cada2015-12-08 15:45:58 -0700467 /* dpdk worker "threads" */
Damjan Marion25665672016-08-16 18:56:08 +0200468 dpdk_worker_t *workers;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700469
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100470 /* dpdk HQoS "threads" */
471 dpdk_hqos_thread_t *hqos_threads;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700472
473 /* Ethernet input node index */
474 u32 ethernet_input_node_index;
475
Ed Warnickecb9cada2015-12-08 15:45:58 -0700476 /* pcap tracing [only works if (CLIB_DEBUG > 0)] */
477 int tx_pcap_enable;
478 pcap_main_t pcap_main;
Damjan Marion25665672016-08-16 18:56:08 +0200479 u8 *pcap_filename;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700480 u32 pcap_sw_if_index;
481 u32 pcap_pkts_to_capture;
482
Ed Warnickecb9cada2015-12-08 15:45:58 -0700483 /* hashes */
Damjan Marion25665672016-08-16 18:56:08 +0200484 uword *dpdk_device_by_kni_port_id;
485 uword *vu_sw_if_index_by_listener_fd;
486 uword *vu_sw_if_index_by_sock_fd;
487 u32 *vu_inactive_interfaces_device_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700488
489 u32 next_vu_if_id;
490
491 /* efd (early-fast-discard) settings */
492 dpdk_efd_t efd;
493
494 /*
495 * flag indicating that a posted admin up/down
496 * (via post_sw_interface_set_flags) is in progress
497 */
498 u8 admin_up_down_in_progress;
499
Damjan Marionc68b4cb2016-05-25 20:11:33 +0200500 u8 use_rss;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700501
502 /* which cpus are running dpdk-input */
503 int input_cpu_first_index;
504 int input_cpu_count;
505
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100506 /* which cpus are running I/O TX */
507 int hqos_cpu_first_index;
508 int hqos_cpu_count;
509
Bud Grise02301ef2016-02-24 16:09:05 -0500510 /* control interval of dpdk link state and stat polling */
511 f64 link_state_poll_interval;
512 f64 stat_poll_interval;
513
Dave Barach08602d12016-06-04 14:10:59 -0400514 /* Sleep for this many MS after each device poll */
515 u32 poll_sleep;
516
Ed Warnickecb9cada2015-12-08 15:45:58 -0700517 /* convenience */
Damjan Marion25665672016-08-16 18:56:08 +0200518 vlib_main_t *vlib_main;
519 vnet_main_t *vnet_main;
520 dpdk_config_main_t *conf;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700521} dpdk_main_t;
522
523dpdk_main_t dpdk_main;
524
Damjan Marion25665672016-08-16 18:56:08 +0200525typedef enum
526{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700527 DPDK_RX_NEXT_IP4_INPUT,
528 DPDK_RX_NEXT_IP6_INPUT,
529 DPDK_RX_NEXT_MPLS_INPUT,
530 DPDK_RX_NEXT_ETHERNET_INPUT,
531 DPDK_RX_NEXT_DROP,
532 DPDK_RX_N_NEXT,
533} dpdk_rx_next_t;
534
Damjan Marion25665672016-08-16 18:56:08 +0200535typedef struct
536{
Damjan Marion7f620972016-02-25 16:00:11 +0100537 u32 buffer_index;
538 u16 device_index;
539 u8 queue_index;
540 struct rte_mbuf mb;
541 /* Copy of VLIB buffer; packet data stored in pre_data. */
542 vlib_buffer_t buffer;
543} dpdk_tx_dma_trace_t;
544
Damjan Marion25665672016-08-16 18:56:08 +0200545typedef struct
546{
Damjan Marion7f620972016-02-25 16:00:11 +0100547 u32 buffer_index;
548 u16 device_index;
549 u16 queue_index;
550 struct rte_mbuf mb;
Damjan Marion25665672016-08-16 18:56:08 +0200551 vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */
552 u8 data[256]; /* First 256 data bytes, used for hexdump */
Damjan Marion7f620972016-02-25 16:00:11 +0100553} dpdk_rx_dma_trace_t;
554
Ed Warnickecb9cada2015-12-08 15:45:58 -0700555void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b);
556
557void dpdk_set_next_node (dpdk_rx_next_t, char *);
558
Damjan Marion25665672016-08-16 18:56:08 +0200559clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address);
Christian Dechamplain2073cfe2016-02-19 12:26:57 -0500560
Damjan Marion25665672016-08-16 18:56:08 +0200561clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi,
562 struct ether_addr mc_addr_vec[], int naddr);
Christian Dechamplain2073cfe2016-02-19 12:26:57 -0500563
Ed Warnickecb9cada2015-12-08 15:45:58 -0700564void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd);
565
Damjan Marion25665672016-08-16 18:56:08 +0200566clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700567
Damjan Marion25665672016-08-16 18:56:08 +0200568void dpdk_set_flowcontrol_callback (vlib_main_t * vm,
569 dpdk_flowcontrol_callback_t callback);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700570
571u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance);
572
Damjan Marion25665672016-08-16 18:56:08 +0200573void set_efd_bitmap (u8 * bitmap, u32 value, u32 op);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700574
Damjan Marion25665672016-08-16 18:56:08 +0200575struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b);
576struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b);
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700577
Ed Warnickecb9cada2015-12-08 15:45:58 -0700578#define foreach_dpdk_error \
579 _(NONE, "no error") \
580 _(RX_PACKET_ERROR, "Rx packet errors") \
581 _(RX_BAD_FCS, "Rx bad fcs") \
582 _(L4_CHECKSUM_ERROR, "Rx L4 checksum errors") \
583 _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \
584 _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \
585 _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \
586 _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error") \
587 _(IPV4_EFD_DROP_PKTS, "IPV4 Early Fast Discard rx drops") \
588 _(IPV6_EFD_DROP_PKTS, "IPV6 Early Fast Discard rx drops") \
589 _(MPLS_EFD_DROP_PKTS, "MPLS Early Fast Discard rx drops") \
590 _(VLAN_EFD_DROP_PKTS, "VLAN Early Fast Discard rx drops")
591
Damjan Marion25665672016-08-16 18:56:08 +0200592typedef enum
593{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700594#define _(f,s) DPDK_ERROR_##f,
595 foreach_dpdk_error
596#undef _
Damjan Marion25665672016-08-16 18:56:08 +0200597 DPDK_N_ERROR,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700598} dpdk_error_t;
599
600/*
601 * Increment EFD drop counter
602 */
Damjan Marion25665672016-08-16 18:56:08 +0200603static_always_inline void
604increment_efd_drop_counter (vlib_main_t * vm, u32 counter_index, u32 count)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700605{
Damjan Marion25665672016-08-16 18:56:08 +0200606 vlib_node_t *my_n;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700607
Damjan Marion25665672016-08-16 18:56:08 +0200608 my_n = vlib_get_node (vm, dpdk_input_node.index);
609 vm->error_main.counters[my_n->error_heap_index + counter_index] += count;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700610}
611
Bud Grise02301ef2016-02-24 16:09:05 -0500612int dpdk_set_stat_poll_interval (f64 interval);
613int dpdk_set_link_state_poll_interval (f64 interval);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700614void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
Damjan Marion25665672016-08-16 18:56:08 +0200615void dpdk_device_lock_init (dpdk_device_t * xd);
616void dpdk_device_lock_free (dpdk_device_t * xd);
617void dpdk_efd_update_counters (dpdk_device_t * xd, u32 n_buffers,
618 u16 enabled);
619u32 is_efd_discardable (vlib_thread_main_t * tm, vlib_buffer_t * b0,
620 struct rte_mbuf *mb);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700621
Damjan Marion1f0da172016-07-13 22:44:18 +0200622#if DPDK_VHOST_USER
Ed Warnickecb9cada2015-12-08 15:45:58 -0700623/* dpdk vhost-user interrupt management */
Damjan Marion25665672016-08-16 18:56:08 +0200624u8 dpdk_vhost_user_want_interrupt (dpdk_device_t * xd, int idx);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700625void dpdk_vhost_user_send_interrupt (vlib_main_t * vm, dpdk_device_t * xd,
Damjan Marion25665672016-08-16 18:56:08 +0200626 int idx);
Damjan Marion1f0da172016-07-13 22:44:18 +0200627#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700628
629
Damjan Marion25665672016-08-16 18:56:08 +0200630static inline u64
631vnet_get_aggregate_rx_packets (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700632{
Damjan Marion25665672016-08-16 18:56:08 +0200633 dpdk_main_t *dm = &dpdk_main;
634 u64 sum = 0;
635 dpdk_worker_t *dw;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700636
Damjan Marion25665672016-08-16 18:56:08 +0200637 vec_foreach (dw, dm->workers) sum += dw->aggregate_rx_packets;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700638
Damjan Marion25665672016-08-16 18:56:08 +0200639 return sum;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700640}
641
642void dpdk_rx_trace (dpdk_main_t * dm,
Damjan Marion25665672016-08-16 18:56:08 +0200643 vlib_node_runtime_t * node,
644 dpdk_device_t * xd,
645 u16 queue_id, u32 * buffers, uword n_buffers);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700646
647#define EFD_OPERATION_LESS_THAN 0
648#define EFD_OPERATION_GREATER_OR_EQUAL 1
649
Damjan Marion25665672016-08-16 18:56:08 +0200650void efd_config (u32 enabled,
651 u32 ip_prec, u32 ip_op,
652 u32 mpls_exp, u32 mpls_op, u32 vlan_cos, u32 vlan_op);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700653
Damjan Marion25665672016-08-16 18:56:08 +0200654void post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index,
655 u32 flags);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700656
Damjan Marion1f0da172016-07-13 22:44:18 +0200657#if DPDK_VHOST_USER
Ed Warnickecb9cada2015-12-08 15:45:58 -0700658typedef struct vhost_user_memory vhost_user_memory_t;
659
660void dpdk_vhost_user_process_init (void **ctx);
661void dpdk_vhost_user_process_cleanup (void *ctx);
Damjan Marion25665672016-08-16 18:56:08 +0200662uword dpdk_vhost_user_process_if (vlib_main_t * vm, dpdk_device_t * xd,
663 void *ctx);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700664
665// vhost-user calls
666int dpdk_vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
Damjan Marion25665672016-08-16 18:56:08 +0200667 const char *sock_filename,
668 u8 is_server,
669 u32 * sw_if_index,
670 u64 feature_mask,
671 u8 renumber, u32 custom_dev_instance,
672 u8 * hwaddr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700673int dpdk_vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
Damjan Marion25665672016-08-16 18:56:08 +0200674 const char *sock_filename,
675 u8 is_server,
676 u32 sw_if_index,
677 u64 feature_mask,
678 u8 renumber, u32 custom_dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700679int dpdk_vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
Damjan Marion25665672016-08-16 18:56:08 +0200680 u32 sw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700681int dpdk_vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
Damjan Marion25665672016-08-16 18:56:08 +0200682 vhost_user_intf_details_t ** out_vuids);
Damjan Marion1f0da172016-07-13 22:44:18 +0200683#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700684
685u32 dpdk_get_admin_up_down_in_progress (void);
686
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -0400687u32 dpdk_num_mbufs (void);
688
Damjan Marion25665672016-08-16 18:56:08 +0200689dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t * hi);
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -0400690
Damjan Marion25665672016-08-16 18:56:08 +0200691i8 dpdk_get_cpu_socket (vnet_hw_interface_t * hi);
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -0400692
Damjan Marion25665672016-08-16 18:56:08 +0200693void *dpdk_input_multiarch_select ();
694void *dpdk_input_rss_multiarch_select ();
695void *dpdk_input_efd_multiarch_select ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700696
Damjan Marion25665672016-08-16 18:56:08 +0200697clib_error_t *dpdk_get_hw_interface_stats (u32 hw_if_index,
698 struct rte_eth_stats *dest);
Sean Hopea4f16a02016-03-28 13:11:31 -0400699
Damjan Marion7f620972016-02-25 16:00:11 +0100700format_function_t format_dpdk_device_name;
701format_function_t format_dpdk_device;
702format_function_t format_dpdk_tx_dma_trace;
703format_function_t format_dpdk_rx_dma_trace;
704format_function_t format_dpdk_rte_mbuf;
705format_function_t format_dpdk_rx_rte_mbuf;
706unformat_function_t unformat_socket_mem;
Damjan Marion25665672016-08-16 18:56:08 +0200707clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn);
Jasvinder Singh85ecc812016-07-21 17:02:19 +0100708clib_error_t *unformat_hqos (unformat_input_t * input,
709 dpdk_device_config_hqos_t * hqos);
Damjan Marion7f620972016-02-25 16:00:11 +0100710
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200711
712static inline void
Damjan Marion25665672016-08-16 18:56:08 +0200713dpdk_pmd_constructor_init ()
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200714{
715 /* Add references to DPDK Driver Constructor functions to get the dynamic
716 * loader to pull in the driver library & run the constructors.
717 */
718#define _(d) \
719 do { \
720 void devinitfn_ ##d(void); \
721 __attribute__((unused)) void (* volatile pf)(void); \
722 pf = devinitfn_ ##d; \
723 } while(0);
724
725#ifdef RTE_LIBRTE_EM_PMD
726 _(em_pmd_drv)
727#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200728#ifdef RTE_LIBRTE_IGB_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200729 _(pmd_igb_drv)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200730#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200731#ifdef RTE_LIBRTE_IXGBE_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200732 _(rte_ixgbe_driver)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200733#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200734#ifdef RTE_LIBRTE_I40E_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200735 _(rte_i40e_driver) _(rte_i40evf_driver)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200736#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200737#ifdef RTE_LIBRTE_FM10K_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200738 _(rte_fm10k_driver)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200739#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200740#ifdef RTE_LIBRTE_VIRTIO_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200741 _(rte_virtio_driver)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200742#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200743#ifdef RTE_LIBRTE_VMXNET3_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200744 _(rte_vmxnet3_driver)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200745#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200746#ifdef RTE_LIBRTE_VICE_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200747 _(rte_vice_driver)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200748#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200749#ifdef RTE_LIBRTE_ENIC_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200750 _(rte_enic_driver)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200751#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200752#ifdef RTE_LIBRTE_PMD_AF_PACKET
Damjan Marion25665672016-08-16 18:56:08 +0200753 _(pmd_af_packet_drv)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200754#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200755#ifdef RTE_LIBRTE_CXGBE_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200756 _(rte_cxgbe_driver)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200757#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200758#ifdef RTE_LIBRTE_PMD_BOND
Damjan Marion25665672016-08-16 18:56:08 +0200759 _(bond_drv)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200760#endif
Sachina29f2002016-06-17 09:24:20 +0530761#ifdef RTE_LIBRTE_DPAA2_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200762 _(pmd_dpaa2_drv)
Sachina29f2002016-06-17 09:24:20 +0530763#endif
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200764#undef _
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200765/*
766 * At the moment, the ThunderX NIC driver doesn't have
767 * an entry point named "devinitfn_rte_xxx_driver"
768 */
769#define _(d) \
770 do { \
771 void d(void); \
772 __attribute__((unused)) void (* volatile pf)(void); \
773 pf = d; \
774 } while(0);
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200775#ifdef RTE_LIBRTE_THUNDERVNIC_PMD
Damjan Marion25665672016-08-16 18:56:08 +0200776 _(rte_nicvf_pmd_init)
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200777#endif
778#undef _
Damjan Marionaa04a0f2016-04-01 12:34:24 +0200779}
780
Todd Foggoae3eefff2016-05-20 22:10:34 -0400781uword
782admin_up_down_process (vlib_main_t * vm,
Damjan Marion25665672016-08-16 18:56:08 +0200783 vlib_node_runtime_t * rt, vlib_frame_t * f);
Todd Foggoae3eefff2016-05-20 22:10:34 -0400784
Ed Warnickecb9cada2015-12-08 15:45:58 -0700785#endif /* __included_dpdk_h__ */
Damjan Marion25665672016-08-16 18:56:08 +0200786
787/*
788 * fd.io coding-style-patch-verification: ON
789 *
790 * Local Variables:
791 * eval: (c-set-style "gnu")
792 * End:
793 */