bond: tx perf improvement, part trois
Introduce bond_tx_inline which takes lb as a constant for gcc to do the optimization
The number appears a tad better for 256 bytes frame.
with the patch
--------------
Thread 2 vpp_wk_1 (lcore 3)
Time 4.3, average vectors/node 224.00, last 128 main loops 40.00 per node 222.61
vector rates in 8.4836e6, out 1.6967e7, drop 0.0000e0, punt 0.0000e0
Name State Calls Vectors Suspends Clocks Vectors/Call
BondEthernet0-output active 141054 36109824 0 2.51e1 256.00
BondEthernet0-tx active 141054 36109824 0 2.55e1 256.00
TenGigabitEthernet6/0/0-output active 141054 18055469 0 9.43e0 128.00
TenGigabitEthernet6/0/0-tx active 141054 18055469 0 6.97e1 128.00
TenGigabitEthernet6/0/1-output active 141054 18054355 0 9.54e0 127.99
TenGigabitEthernet6/0/1-tx active 141054 18054355 0 7.05e1 127.99
bond-input active 141054 36109824 0 1.76e1 256.00
dpdk-input polling 70527 36109824 0 5.03e1 512.00
ethernet-input active 141054 36109824 0 6.12e1 256.00
ip4-input active 141054 36109824 0 3.26e1 256.00
ip4-lookup active 141054 36109824 0 2.94e1 256.00
ip4-rewrite active 141054 36109824 0 3.27e1 256.00
without the patch
-----------------
Thread 2 vpp_wk_1 (lcore 3)
Time 4.3, average vectors/node 224.00, last 128 main loops 40.00 per node 222.61
vector rates in 8.4443e6, out 1.6889e7, drop 0.0000e0, punt 0.0000e0
Name State Calls Vectors Suspends Clocks Vectors/Call
BondEthernet0-output active 142744 36542464 0 2.51e1 256.00
BondEthernet0-tx active 142744 36542464 0 2.67e1 256.00
TenGigabitEthernet6/0/0-output active 142744 18270813 0 9.19e0 127.99
TenGigabitEthernet6/0/0-tx active 142744 18270813 0 6.98e1 127.99
TenGigabitEthernet6/0/1-output active 142744 18271651 0 9.43e0 128.00
TenGigabitEthernet6/0/1-tx active 142744 18271651 0 7.02e1 128.00
bond-input active 142744 36542464 0 1.76e1 256.00
dpdk-input polling 71372 36542464 0 5.08e1 512.00
ethernet-input active 142744 36542464 0 6.15e1 256.00
ip4-input active 142744 36542464 0 3.23e1 256.00
ip4-lookup active 142744 36542464 0 2.96e1 256.00
ip4-rewrite active 142744 36542464 0 3.28e1 256.00
Change-Id: I9fd43eda3c735cbff680ac6d2f01ecdae81f0eda
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
index 79ca2fa..8a78728 100644
--- a/src/vnet/bonding/device.c
+++ b/src/vnet/bonding/device.c
@@ -379,63 +379,28 @@
return 0;
}
-static bond_load_balance_func_t bond_load_balance_table[] = {
-#define _(v,f,s, p) { bond_load_balance_##p },
- foreach_bond_lb_algo
-#undef _
-};
-
-VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static_always_inline void
+bond_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, bond_if_t * bif,
+ uword slave_count, u32 lb_alg)
{
- vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
bond_main_t *bm = &bond_main;
- bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance);
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 *from = vlib_frame_vector_args (frame);
- ethernet_header_t *eth;
- u32 n_left;
- u32 sw_if_index;
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 thread_index = vm->thread_index;
bond_packet_trace_t *t0;
uword n_trace = vlib_get_trace_count (vm, node);
- u16 thread_index = vm->thread_index;
- vnet_main_t *vnm = vnet_get_main ();
u32 *to_next;
vlib_frame_t *f;
- uword slave_count;
+ ethernet_header_t *eth;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ u32 sw_if_index;
u32 port0 = 0, port1 = 0, port2 = 0, port3 = 0;
bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
thread_index);
- if (PREDICT_FALSE (bif->admin_up == 0))
- {
- vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
- vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
- VNET_INTERFACE_COUNTER_DROP,
- thread_index, bif->sw_if_index,
- frame->n_vectors);
- vlib_error_count (vm, node->node_index, BOND_TX_ERROR_IF_DOWN,
- frame->n_vectors);
- return frame->n_vectors;
- }
-
- n_left = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left);
-
- slave_count = vec_len (bif->active_slaves);
- if (PREDICT_FALSE (slave_count == 0))
- {
- vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
- vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
- VNET_INTERFACE_COUNTER_DROP,
- thread_index, bif->sw_if_index,
- frame->n_vectors);
- vlib_error_count (vm, node->node_index, BOND_TX_ERROR_NO_SLAVE,
- frame->n_vectors);
- return frame->n_vectors;
- }
-
b = bufs;
while (n_left >= 4)
{
@@ -464,22 +429,72 @@
if (PREDICT_TRUE (slave_count > 1))
{
- port0 =
- (bond_load_balance_table[bif->lb]).load_balance (vm, node,
- bif, b[0],
- slave_count);
- port1 =
- (bond_load_balance_table[bif->lb]).load_balance (vm, node,
- bif, b[1],
- slave_count);
- port2 =
- (bond_load_balance_table[bif->lb]).load_balance (vm, node,
- bif, b[2],
- slave_count);
- port3 =
- (bond_load_balance_table[bif->lb]).load_balance (vm, node,
- bif, b[3],
- slave_count);
+ if (lb_alg == BOND_LB_L2)
+ {
+ port0 = bond_load_balance_l2 (vm, node, bif, b[0], slave_count);
+ port1 = bond_load_balance_l2 (vm, node, bif, b[1], slave_count);
+ port2 = bond_load_balance_l2 (vm, node, bif, b[2], slave_count);
+ port3 = bond_load_balance_l2 (vm, node, bif, b[3], slave_count);
+ }
+ else if (lb_alg == BOND_LB_L34)
+ {
+ port0 = bond_load_balance_l34 (vm, node, bif, b[0],
+ slave_count);
+ port1 = bond_load_balance_l34 (vm, node, bif, b[1],
+ slave_count);
+ port2 = bond_load_balance_l34 (vm, node, bif, b[2],
+ slave_count);
+ port3 = bond_load_balance_l34 (vm, node, bif, b[3],
+ slave_count);
+ }
+ else if (lb_alg == BOND_LB_L23)
+ {
+ port0 = bond_load_balance_l23 (vm, node, bif, b[0],
+ slave_count);
+ port1 = bond_load_balance_l23 (vm, node, bif, b[1],
+ slave_count);
+ port2 = bond_load_balance_l23 (vm, node, bif, b[2],
+ slave_count);
+ port3 = bond_load_balance_l23 (vm, node, bif, b[3],
+ slave_count);
+ }
+ else if (lb_alg == BOND_LB_RR)
+ {
+ port0 = bond_load_balance_round_robin (vm, node, bif, b[0],
+ slave_count);
+ port1 = bond_load_balance_round_robin (vm, node, bif, b[1],
+ slave_count);
+ port2 = bond_load_balance_round_robin (vm, node, bif, b[2],
+ slave_count);
+ port3 = bond_load_balance_round_robin (vm, node, bif, b[3],
+ slave_count);
+ }
+ else if (lb_alg == BOND_LB_BC)
+ {
+ port0 = bond_load_balance_broadcast (vm, node, bif, b[0],
+ slave_count);
+ port1 = bond_load_balance_broadcast (vm, node, bif, b[1],
+ slave_count);
+ port2 = bond_load_balance_broadcast (vm, node, bif, b[2],
+ slave_count);
+ port3 = bond_load_balance_broadcast (vm, node, bif, b[3],
+ slave_count);
+ }
+ else if (lb_alg == BOND_LB_AB)
+ {
+ port0 = bond_load_balance_active_backup (vm, node, bif, b[0],
+ slave_count);
+ port1 = bond_load_balance_active_backup (vm, node, bif, b[1],
+ slave_count);
+ port2 = bond_load_balance_active_backup (vm, node, bif, b[2],
+ slave_count);
+ port3 = bond_load_balance_active_backup (vm, node, bif, b[3],
+ slave_count);
+ }
+ else
+ {
+ ASSERT (0);
+ }
}
sif_if_index0 = *vec_elt_at_index (bif->active_slaves, port0);
@@ -574,9 +589,42 @@
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
if (PREDICT_TRUE (slave_count > 1))
- port0 =
- (bond_load_balance_table[bif->lb]).load_balance (vm, node, bif,
- b[0], slave_count);
+ {
+ if (bif->lb == BOND_LB_L2)
+ {
+ port0 = bond_load_balance_l2 (vm, node, bif, b[0], slave_count);
+ }
+ else if (bif->lb == BOND_LB_L34)
+ {
+ port0 = bond_load_balance_l34 (vm, node, bif, b[0],
+ slave_count);
+ }
+ else if (bif->lb == BOND_LB_L23)
+ {
+ port0 = bond_load_balance_l23 (vm, node, bif, b[0],
+ slave_count);
+ }
+ else if (bif->lb == BOND_LB_RR)
+ {
+ port0 = bond_load_balance_round_robin (vm, node, bif, b[0],
+ slave_count);
+ }
+ else if (bif->lb == BOND_LB_BC)
+ {
+ port0 = bond_load_balance_broadcast (vm, node, bif, b[0],
+ slave_count);
+ }
+ else if (bif->lb == BOND_LB_AB)
+ {
+ port0 = bond_load_balance_active_backup (vm, node, bif, b[0],
+ slave_count);
+ }
+ else
+ {
+ ASSERT (0);
+ }
+ }
+
sif_if_index0 = *vec_elt_at_index (bif->active_slaves, port0);
/* Do the tracing before the old interface is overwritten */
@@ -622,6 +670,57 @@
vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters
+ VNET_INTERFACE_COUNTER_TX, thread_index,
bif->sw_if_index, frame->n_vectors);
+}
+
+VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
+ bond_main_t *bm = &bond_main;
+ u16 thread_index = vm->thread_index;
+ bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance);
+ uword slave_count;
+
+ if (PREDICT_FALSE (bif->admin_up == 0))
+ {
+ vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+ vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_DROP,
+ thread_index, bif->sw_if_index,
+ frame->n_vectors);
+ vlib_error_count (vm, node->node_index, BOND_TX_ERROR_IF_DOWN,
+ frame->n_vectors);
+ return frame->n_vectors;
+ }
+
+ slave_count = vec_len (bif->active_slaves);
+ if (PREDICT_FALSE (slave_count == 0))
+ {
+ vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
+ vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_DROP,
+ thread_index, bif->sw_if_index,
+ frame->n_vectors);
+ vlib_error_count (vm, node->node_index, BOND_TX_ERROR_NO_SLAVE,
+ frame->n_vectors);
+ return frame->n_vectors;
+ }
+
+ if (bif->lb == BOND_LB_L2)
+ bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_L2);
+ else if (bif->lb == BOND_LB_L34)
+ bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_L34);
+ else if (bif->lb == BOND_LB_L23)
+ bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_L23);
+ else if (bif->lb == BOND_LB_RR)
+ bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_RR);
+ else if (bif->lb == BOND_LB_BC)
+ bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_BC);
+ else if (bif->lb == BOND_LB_AB)
+ bond_tx_inline (vm, node, frame, bif, slave_count, BOND_LB_AB);
+ else
+ ASSERT (0);
return frame->n_vectors;
}
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
index 6b13a46..e1359d0 100644
--- a/src/vnet/bonding/node.h
+++ b/src/vnet/bonding/node.h
@@ -51,13 +51,13 @@
/* configurable load-balances */
#define foreach_bond_lb \
_ (2, L23, "l23", l23) \
- _ (1, l34 , "l34", l34) \
+ _ (1, L34 , "l34", l34) \
_ (0, L2, "l2", l2)
/* load-balance functions implemented in bond-output */
#define foreach_bond_lb_algo \
_ (0, L2, "l2", l2) \
- _ (1, l34 , "l34", l34) \
+ _ (1, L34 , "l34", l34) \
_ (2, L23, "l23", l23) \
_ (3, RR, "round-robin", round_robin) \
_ (4, BC, "broadcast", broadcast) \