af_xdp: add option to claim all available rx queues
Type: feature
Change-Id: I97176c2c90ea664a68078b3a7b7d44eb237a7f13
Signed-off-by: Benoît Ganne <bganne@cisco.com>
diff --git a/src/plugins/af_xdp/af_xdp.api b/src/plugins/af_xdp/af_xdp.api
index 765af68..14f51d8 100644
--- a/src/plugins/af_xdp/af_xdp.api
+++ b/src/plugins/af_xdp/af_xdp.api
@@ -31,7 +31,7 @@
@param context - sender context, to match reply w/ request
@param host_if - Linux netdev interface name
@param name - new af_xdp interface name (optional)
- @param rxq_num - number of receive queues (optional)
+ @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional)
@param rxq_size - receive queue size (optional)
@param txq_size - transmit queue size (optional)
@param mode - operation mode (optional)
@@ -50,7 +50,7 @@
u16 txq_size [default=0];
vl_api_af_xdp_mode_t mode [default=0];
string prog[256];
- option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues num] [prog pathname] [zero-copy|no-zero-copy]";
+ option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [zero-copy|no-zero-copy]";
option status="in_progress";
};
diff --git a/src/plugins/af_xdp/af_xdp.h b/src/plugins/af_xdp/af_xdp.h
index fd990ec..3bd53ad 100644
--- a/src/plugins/af_xdp/af_xdp.h
+++ b/src/plugins/af_xdp/af_xdp.h
@@ -22,6 +22,8 @@
#include <vnet/interface.h>
#include <bpf/xsk.h>
+#define AF_XDP_NUM_RX_QUEUES_ALL ((u16)-1)
+
#define af_xdp_log(lvl, dev, f, ...) \
vlib_log(lvl, af_xdp_main.log_class, "%v: " f, (dev)->name, ##__VA_ARGS__)
diff --git a/src/plugins/af_xdp/af_xdp_doc.md b/src/plugins/af_xdp/af_xdp_doc.md
index 40a3596..76d653f 100644
--- a/src/plugins/af_xdp/af_xdp_doc.md
+++ b/src/plugins/af_xdp/af_xdp_doc.md
@@ -17,8 +17,13 @@
(4096-bytes on most systems) minus 256-bytes, and they are additional
limitations depending upon specific Linux device drivers.
As a rule of thumb, a MTU of 3000-bytes or less should be safe.
-Furthermore, upon UMEM creation, the kernel allocates a physically-contiguous structure, whose size is proportional to the number of 4KB pages contained in the UMEM. That allocation might fail when the number of buffers allocated by VPP is too high. That number can be controlled with the `buffers { buffers-per-numa }` configuration option.
-Finally, note that because of this limitation, this plugin is unlikely to be compatible with the use of 1GB hugepages.
+Furthermore, upon UMEM creation, the kernel allocates a
+physically-contiguous structure, whose size is proportional to the number
+of 4KB pages contained in the UMEM. That allocation might fail when
+the number of buffers allocated by VPP is too high. That number can be
+controlled with the `buffers { buffers-per-numa }` configuration option.
+Finally, note that because of this limitation, this plugin is unlikely
+to be compatible with the use of 1GB hugepages.
## Requirements
The Linux kernel interface must be up and have enough queues before
@@ -31,9 +36,10 @@
will usually be several RX queues (typically 1 per core) and packets are
spread accross queues by RSS. In order to receive consistent traffic,
you **must** program the NIC dispatching accordingly. The simplest way
-to get all the packets is to reconfigure the Linux kernel driver to use
-only `num_rx_queues` RX queues (ie all NIC queues will be associated
-with the AF_XDP socket):
+to get all the packets is to specify `num-rx-queues all` to grab all
+available queues or to reconfigure the Linux kernel driver to use only
+`num_rx_queues` RX queues (ie all NIC queues will be associated with
+the AF_XDP socket):
```
~# ethtool -L <iface> combined <num_rx_queues>
```
@@ -57,25 +63,21 @@
## Security considerations
When creating an AF_XDP interface, it will receive all packets arriving
-to the NIC RX queue #0. You need to configure the Linux kernel NIC
-driver properly to ensure that only intented packets will arrive in
-this queue. There is no way to filter the packets after-the-fact using
-eg. netfilter or eBPF.
+to the NIC RX queue [0, num_rx_queues[`. You need to configure the Linux
+kernel NIC driver properly to ensure that only intented packets will
+arrive in this queue. There is no way to filter the packets after-the-fact
+using eg. netfilter or eBPF.
## Quickstart
-1. Setup the Linux kernel interface (enp216s0f0 here) to use 4 queues:
-```
-~# ethtool -L enp216s0f0 combined 4
-```
-2. Put the Linux kernel interface up and in promiscuous mode:
+1. Put the Linux kernel interface up and in promiscuous mode:
```
~# ip l set dev enp216s0f0 promisc on up
```
-3. Create the AF_XDP interface:
+2. Create the AF_XDP interface:
```
-~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues 4
+~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues all
```
-4. Use the interface as usual, eg.:
+3. Use the interface as usual, eg.:
```
~# vppctl set int ip addr enp216s0f0/0 1.1.1.1/24
~# vppctl set int st enp216s0f0/0 up
diff --git a/src/plugins/af_xdp/cli.c b/src/plugins/af_xdp/cli.c
index 5fe7c2e..d5f21d4 100644
--- a/src/plugins/af_xdp/cli.c
+++ b/src/plugins/af_xdp/cli.c
@@ -47,7 +47,7 @@
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (af_xdp_create_command, static) = {
.path = "create interface af_xdp",
- .short_help = "create interface af_xdp <host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues num] [prog pathname] [zero-copy|no-zero-copy]",
+ .short_help = "create interface af_xdp <host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [zero-copy|no-zero-copy]",
.function = af_xdp_create_command_fn,
};
/* *INDENT-ON* */
diff --git a/src/plugins/af_xdp/device.c b/src/plugins/af_xdp/device.c
index c750e8d..5090d3a 100644
--- a/src/plugins/af_xdp/device.c
+++ b/src/plugins/af_xdp/device.c
@@ -172,14 +172,27 @@
af_xdp_create_queue (vlib_main_t * vm, af_xdp_create_if_args_t * args,
af_xdp_device_t * ad, int qid, int rxq_num, int txq_num)
{
- struct xsk_umem **umem = vec_elt_at_index (ad->umem, qid);
- struct xsk_socket **xsk = vec_elt_at_index (ad->xsk, qid);
- af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, qid);
- af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, qid);
+ struct xsk_umem **umem;
+ struct xsk_socket **xsk;
+ af_xdp_rxq_t *rxq;
+ af_xdp_txq_t *txq;
struct xsk_umem_config umem_config;
struct xsk_socket_config sock_config;
struct xdp_options opt;
socklen_t optlen;
+
+ vec_validate_aligned (ad->umem, qid, CLIB_CACHE_LINE_BYTES);
+ umem = vec_elt_at_index (ad->umem, qid);
+
+ vec_validate_aligned (ad->xsk, qid, CLIB_CACHE_LINE_BYTES);
+ xsk = vec_elt_at_index (ad->xsk, qid);
+
+ vec_validate_aligned (ad->rxqs, qid, CLIB_CACHE_LINE_BYTES);
+ rxq = vec_elt_at_index (ad->rxqs, qid);
+
+ vec_validate_aligned (ad->txqs, qid, CLIB_CACHE_LINE_BYTES);
+ txq = vec_elt_at_index (ad->txqs, qid);
+
/*
* fq and cq must always be allocated even if unused
* whereas rx and tx indicates whether we want rxq, txq, or both
@@ -333,10 +346,6 @@
goto err1;
q_num = clib_max (rxq_num, txq_num);
- vec_validate_aligned (ad->rxqs, q_num - 1, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (ad->txqs, q_num - 1, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (ad->umem, q_num - 1, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (ad->xsk, q_num - 1, CLIB_CACHE_LINE_BYTES);
ad->txq_num = txq_num;
for (i = 0; i < q_num; i++)
{
@@ -345,10 +354,10 @@
/*
* queue creation failed
* it is only a fatal error if we could not create the number of rx
- * queues requested explicitely by the user
+ * queues requested explicitely by the user and the user did not
+ * requested 'max'
* we might create less tx queues than workers but this is ok
*/
- af_xdp_txq_t *txq;
/* fixup vectors length */
vec_set_len (ad->umem, i);
@@ -356,15 +365,17 @@
vec_set_len (ad->rxqs, i);
vec_set_len (ad->txqs, i);
- if (i < rxq_num)
+ if (i < rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != rxq_num)
goto err1; /* failed creating requested rxq: fatal error, bailing out */
- /*
- * we created all rxq but failed some txq: not an error but
- * initialize lock for shared txq
- */
- ad->txq_num = i;
- vec_foreach (txq, ad->txqs) clib_spinlock_init (&txq->lock);
+ if (i < txq_num)
+ {
+ /* we created less txq than threads not an error but initialize lock for shared txq */
+ af_xdp_txq_t *txq;
+ ad->txq_num = i;
+ vec_foreach (txq, ad->txqs) clib_spinlock_init (&txq->lock);
+ }
+
args->rv = 0;
clib_error_free (args->error);
break;
@@ -404,7 +415,7 @@
vnet_hw_interface_set_input_node (vnm, ad->hw_if_index,
af_xdp_input_node.index);
- for (i = 0; i < rxq_num; i++)
+ for (i = 0; i < vec_len (ad->rxqs); i++)
{
af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i);
clib_file_t f = {
diff --git a/src/plugins/af_xdp/unformat.c b/src/plugins/af_xdp/unformat.c
index 154d459..b229246 100644
--- a/src/plugins/af_xdp/unformat.c
+++ b/src/plugins/af_xdp/unformat.c
@@ -40,6 +40,8 @@
;
else if (unformat (line_input, "tx-queue-size %u", &args->txq_size))
;
+ else if (unformat (line_input, "num-rx-queues all"))
+ args->rxq_num = AF_XDP_NUM_RX_QUEUES_ALL;
else if (unformat (line_input, "num-rx-queues %u", &args->rxq_num))
;
else if (unformat (line_input, "prog %s", &args->prog))