bonding: add support for numa-only in lacp mode

If numa-only is set, Only slaves on local numa node
transmit pkts if have at least one, otherwise the bond
interface works as usual.

CLI change:
create bond mode lacp [load-balance { l2 | l23 | l34 } {numa-only}]
[hw-addr <mac-address>] [id <if-id>]

The new member "u8 numa_only;" is also added to bond_create_if_args_t.

Type: feature

Change-Id: Icdccedafb0738d8c9d4a5acce909ce562428c071
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
diff --git a/src/vnet/bonding/bond.api b/src/vnet/bonding/bond.api
index e779453..e699267 100644
--- a/src/vnet/bonding/bond.api
+++ b/src/vnet/bonding/bond.api
@@ -19,7 +19,7 @@
     the bonding device driver
 */
 
-option version = "1.0.0";
+option version = "1.0.1";
 
 /** \brief Initialize a new bond interface with the given paramters
     @param client_index - opaque cookie to identify the sender
@@ -29,6 +29,7 @@
     @param mac_address - mac addr to assign to the interface if use_custom_mac is set
     @param mode - mode, required (1=round-robin, 2=active-backup, 3=xor, 4=broadcastcast, 5=lacp)
     @param lb - load balance, optional (0=l2, 1=l34, 2=l23) valid for xor and lacp modes. Otherwise ignored
+    @param numa_only - if numa_only is set, pkts will be transmitted by LAG members on local numa node only if have at least one, otherwise it works as usual.
 */
 define bond_create
 {
@@ -39,6 +40,7 @@
   u8 mac_address[6];
   u8 mode;
   u8 lb;
+  u8 numa_only;
 };
 
 /** \brief Reply for bond create reply
@@ -118,6 +120,7 @@
     @param interface_name - name of interface
     @param mode - bonding mode
     @param lb - load balance algo
+    @param numa_only - enable local numa TX for lacp mode
     @param active_slaves - active slaves count
     @param slaves - config slave count
 */
@@ -129,6 +132,7 @@
   u8 interface_name[64];
   u8 mode;
   u8 lb;
+  u8 numa_only;
   u32 active_slaves;
   u32 slaves;
 };
diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c
index 07c2cbc..8e18423 100644
--- a/src/vnet/bonding/bond_api.c
+++ b/src/vnet/bonding/bond_api.c
@@ -83,6 +83,7 @@
 
   ap->mode = mp->mode;
   ap->lb = mp->lb;
+  ap->numa_only = mp->numa_only;
   bond_create_if (vm, ap);
 
   int rv = ap->rv;
@@ -149,6 +150,7 @@
 		    strlen ((const char *) bond_if->interface_name)));
   mp->mode = bond_if->mode;
   mp->lb = bond_if->lb;
+  mp->numa_only = bond_if->numa_only;
   mp->active_slaves = htonl (bond_if->active_slaves);
   mp->slaves = htonl (bond_if->slaves);
 
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
index cb344c6..371e3c1 100644
--- a/src/vnet/bonding/cli.c
+++ b/src/vnet/bonding/cli.c
@@ -47,6 +47,17 @@
 	  }
 	vec_del1 (bif->active_slaves, i);
 	hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index);
+	if (sif->lacp_enabled && bif->numa_only)
+	  {
+	    /* For lacp mode, if we check it is a slave on local numa node,
+	       bif->n_numa_slaves should be decreased by 1 becasue the first
+	       bif->n_numa_slaves are all slaves on local numa node */
+	    if (i < bif->n_numa_slaves)
+	      {
+		bif->n_numa_slaves--;
+		ASSERT (bif->n_numa_slaves >= 0);
+	      }
+	  }
 	break;
       }
   }
@@ -104,7 +115,18 @@
     {
       hash_set (bif->active_slave_by_sw_if_index, sif->sw_if_index,
 		sif->sw_if_index);
-      vec_add1 (bif->active_slaves, sif->sw_if_index);
+
+      if ((sif->lacp_enabled && bif->numa_only)
+	  && (vm->numa_node == hw->numa_node))
+	{
+	  vec_insert_elts (bif->active_slaves, &sif->sw_if_index, 1,
+			   bif->n_numa_slaves);
+	  bif->n_numa_slaves++;
+	}
+      else
+	{
+	  vec_add1 (bif->active_slaves, sif->sw_if_index);
+	}
 
       /* First slave becomes active? */
       if ((vec_len (bif->active_slaves) == 1) &&
@@ -168,6 +190,7 @@
                      strlen ((const char *) hi->name)));
     bondif->mode = bif->mode;
     bondif->lb = bif->lb;
+    bondif->numa_only = bif->numa_only;
     bondif->active_slaves = vec_len (bif->active_slaves);
     bondif->slaves = vec_len (bif->slaves);
   );
@@ -388,6 +411,7 @@
   sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
   bif->sw_if_index = sw->sw_if_index;
   bif->group = bif->sw_if_index;
+  bif->numa_only = args->numa_only;
   if (vlib_get_thread_main ()->n_vlib_mains > 1)
     clib_spinlock_init (&bif->lockp);
 
@@ -428,6 +452,14 @@
 	args.hw_addr_set = 1;
       else if (unformat (line_input, "id %u", &args.id))
 	;
+      else if (unformat (line_input, "numa-only"))
+	{
+	  if (args.mode == BOND_MODE_LACP)
+	    args.numa_only = 1;
+	  else
+	    return clib_error_return (0,
+				      "Only lacp mode supports numa-only so far!");
+	}
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
@@ -446,7 +478,7 @@
 VLIB_CLI_COMMAND (bond_create_command, static) = {
   .path = "create bond",
   .short_help = "create bond mode {round-robin | active-backup | broadcast | "
-    "{lacp | xor} [load-balance { l2 | l23 | l34 }]} [hw-addr <mac-address>] "
+    "{lacp | xor} [load-balance { l2 | l23 | l34 } {numa-only}]} [hw-addr <mac-address>] "
     "[id <if-id>]",
   .function = bond_create_command_fn,
 };
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
index b76396a..c9e8b4a 100644
--- a/src/vnet/bonding/device.c
+++ b/src/vnet/bonding/device.c
@@ -699,6 +699,11 @@
       goto done;
     }
 
+  /* if have at least one slave on local numa node, only slaves on local numa
+     node will transmit pkts when bif->local_numa_only is enabled */
+  if (bif->n_numa_slaves >= 1)
+    n_slaves = bif->n_numa_slaves;
+
   if (bif->lb == BOND_LB_L2)
     bond_tx_inline (vm, bif, bufs, hashes, n_left, n_slaves, BOND_LB_L2);
   else if (bif->lb == BOND_LB_L34)
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
index 1782150..94b43dc 100644
--- a/src/vnet/bonding/node.h
+++ b/src/vnet/bonding/node.h
@@ -82,6 +82,7 @@
   u8 hw_addr[6];
   u8 mode;
   u8 lb;
+  u8 numa_only;
   /* return */
   u32 sw_if_index;
   int rv;
@@ -117,6 +118,7 @@
   u8 interface_name[64];
   u8 mode;
   u8 lb;
+  u8 numa_only;
   u32 active_slaves;
   u32 slaves;
 } bond_interface_details_t;
@@ -187,6 +189,14 @@
   lacp_port_info_t actor;
   u8 individual_aggregator;
 
+  /* If the flag numa_only is set, it means that only slaves
+     on local numa node works for lacp mode if have at least one,
+     otherwise it works as usual. */
+  u8 numa_only;
+
+  /* How many slaves on local numa node are there in lacp mode? */
+  word n_numa_slaves;
+
   u32 group;
   uword *port_number_bitmap;
   u8 use_custom_mac;