bonding: add support for numa-only in lacp mode

If numa-only is set, Only slaves on local numa node
transmit pkts if have at least one, otherwise the bond
interface works as usual.

CLI change:
create bond mode lacp [load-balance { l2 | l23 | l34 } {numa-only}]
[hw-addr <mac-address>] [id <if-id>]

The new member "u8 numa_only;" is also added to bond_create_if_args_t.

Type: feature

Change-Id: Icdccedafb0738d8c9d4a5acce909ce562428c071
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
index a8d4a0d..d231310 100644
--- a/src/vat/api_format.c
+++ b/src/vat/api_format.c
@@ -7769,6 +7769,7 @@
   u8 lb;
   u8 mode_is_set = 0;
   u32 id = ~0;
+  u8 numa_only = 0;
 
   clib_memset (mac_address, 0, sizeof (mac_address));
   lb = BOND_LB_L2;
@@ -7784,6 +7785,8 @@
       else if (unformat (i, "hw-addr %U", unformat_ethernet_address,
 			 mac_address))
 	custom_mac = 1;
+      else if (unformat (i, "numa-only"))
+	numa_only = 1;
       else if (unformat (i, "id %u", &id))
 	;
       else
@@ -7804,6 +7807,7 @@
   mp->mode = mode;
   mp->lb = lb;
   mp->id = htonl (id);
+  mp->numa_only = numa_only;
 
   if (custom_mac)
     clib_memcpy (mp->mac_address, mac_address, 6);
diff --git a/src/vnet/bonding/bond.api b/src/vnet/bonding/bond.api
index e779453..e699267 100644
--- a/src/vnet/bonding/bond.api
+++ b/src/vnet/bonding/bond.api
@@ -19,7 +19,7 @@
     the bonding device driver
 */
 
-option version = "1.0.0";
+option version = "1.0.1";
 
 /** \brief Initialize a new bond interface with the given paramters
     @param client_index - opaque cookie to identify the sender
@@ -29,6 +29,7 @@
     @param mac_address - mac addr to assign to the interface if use_custom_mac is set
     @param mode - mode, required (1=round-robin, 2=active-backup, 3=xor, 4=broadcastcast, 5=lacp)
     @param lb - load balance, optional (0=l2, 1=l34, 2=l23) valid for xor and lacp modes. Otherwise ignored
+    @param numa_only - if numa_only is set, pkts will be transmitted by LAG members on local numa node only if have at least one, otherwise it works as usual.
 */
 define bond_create
 {
@@ -39,6 +40,7 @@
   u8 mac_address[6];
   u8 mode;
   u8 lb;
+  u8 numa_only;
 };
 
 /** \brief Reply for bond create reply
@@ -118,6 +120,7 @@
     @param interface_name - name of interface
     @param mode - bonding mode
     @param lb - load balance algo
+    @param numa_only - enable local numa TX for lacp mode
     @param active_slaves - active slaves count
     @param slaves - config slave count
 */
@@ -129,6 +132,7 @@
   u8 interface_name[64];
   u8 mode;
   u8 lb;
+  u8 numa_only;
   u32 active_slaves;
   u32 slaves;
 };
diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c
index 07c2cbc..8e18423 100644
--- a/src/vnet/bonding/bond_api.c
+++ b/src/vnet/bonding/bond_api.c
@@ -83,6 +83,7 @@
 
   ap->mode = mp->mode;
   ap->lb = mp->lb;
+  ap->numa_only = mp->numa_only;
   bond_create_if (vm, ap);
 
   int rv = ap->rv;
@@ -149,6 +150,7 @@
 		    strlen ((const char *) bond_if->interface_name)));
   mp->mode = bond_if->mode;
   mp->lb = bond_if->lb;
+  mp->numa_only = bond_if->numa_only;
   mp->active_slaves = htonl (bond_if->active_slaves);
   mp->slaves = htonl (bond_if->slaves);
 
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
index cb344c6..371e3c1 100644
--- a/src/vnet/bonding/cli.c
+++ b/src/vnet/bonding/cli.c
@@ -47,6 +47,17 @@
 	  }
 	vec_del1 (bif->active_slaves, i);
 	hash_unset (bif->active_slave_by_sw_if_index, sif->sw_if_index);
+	if (sif->lacp_enabled && bif->numa_only)
+	  {
+	    /* For lacp mode, if we check it is a slave on local numa node,
+	       bif->n_numa_slaves should be decreased by 1 becasue the first
+	       bif->n_numa_slaves are all slaves on local numa node */
+	    if (i < bif->n_numa_slaves)
+	      {
+		bif->n_numa_slaves--;
+		ASSERT (bif->n_numa_slaves >= 0);
+	      }
+	  }
 	break;
       }
   }
@@ -104,7 +115,18 @@
     {
       hash_set (bif->active_slave_by_sw_if_index, sif->sw_if_index,
 		sif->sw_if_index);
-      vec_add1 (bif->active_slaves, sif->sw_if_index);
+
+      if ((sif->lacp_enabled && bif->numa_only)
+	  && (vm->numa_node == hw->numa_node))
+	{
+	  vec_insert_elts (bif->active_slaves, &sif->sw_if_index, 1,
+			   bif->n_numa_slaves);
+	  bif->n_numa_slaves++;
+	}
+      else
+	{
+	  vec_add1 (bif->active_slaves, sif->sw_if_index);
+	}
 
       /* First slave becomes active? */
       if ((vec_len (bif->active_slaves) == 1) &&
@@ -168,6 +190,7 @@
                      strlen ((const char *) hi->name)));
     bondif->mode = bif->mode;
     bondif->lb = bif->lb;
+    bondif->numa_only = bif->numa_only;
     bondif->active_slaves = vec_len (bif->active_slaves);
     bondif->slaves = vec_len (bif->slaves);
   );
@@ -388,6 +411,7 @@
   sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
   bif->sw_if_index = sw->sw_if_index;
   bif->group = bif->sw_if_index;
+  bif->numa_only = args->numa_only;
   if (vlib_get_thread_main ()->n_vlib_mains > 1)
     clib_spinlock_init (&bif->lockp);
 
@@ -428,6 +452,14 @@
 	args.hw_addr_set = 1;
       else if (unformat (line_input, "id %u", &args.id))
 	;
+      else if (unformat (line_input, "numa-only"))
+	{
+	  if (args.mode == BOND_MODE_LACP)
+	    args.numa_only = 1;
+	  else
+	    return clib_error_return (0,
+				      "Only lacp mode supports numa-only so far!");
+	}
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
@@ -446,7 +478,7 @@
 VLIB_CLI_COMMAND (bond_create_command, static) = {
   .path = "create bond",
   .short_help = "create bond mode {round-robin | active-backup | broadcast | "
-    "{lacp | xor} [load-balance { l2 | l23 | l34 }]} [hw-addr <mac-address>] "
+    "{lacp | xor} [load-balance { l2 | l23 | l34 } {numa-only}]} [hw-addr <mac-address>] "
     "[id <if-id>]",
   .function = bond_create_command_fn,
 };
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
index b76396a..c9e8b4a 100644
--- a/src/vnet/bonding/device.c
+++ b/src/vnet/bonding/device.c
@@ -699,6 +699,11 @@
       goto done;
     }
 
+  /* if have at least one slave on local numa node, only slaves on local numa
+     node will transmit pkts when bif->local_numa_only is enabled */
+  if (bif->n_numa_slaves >= 1)
+    n_slaves = bif->n_numa_slaves;
+
   if (bif->lb == BOND_LB_L2)
     bond_tx_inline (vm, bif, bufs, hashes, n_left, n_slaves, BOND_LB_L2);
   else if (bif->lb == BOND_LB_L34)
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
index 1782150..94b43dc 100644
--- a/src/vnet/bonding/node.h
+++ b/src/vnet/bonding/node.h
@@ -82,6 +82,7 @@
   u8 hw_addr[6];
   u8 mode;
   u8 lb;
+  u8 numa_only;
   /* return */
   u32 sw_if_index;
   int rv;
@@ -117,6 +118,7 @@
   u8 interface_name[64];
   u8 mode;
   u8 lb;
+  u8 numa_only;
   u32 active_slaves;
   u32 slaves;
 } bond_interface_details_t;
@@ -187,6 +189,14 @@
   lacp_port_info_t actor;
   u8 individual_aggregator;
 
+  /* If the flag numa_only is set, it means that only slaves
+     on local numa node works for lacp mode if have at least one,
+     otherwise it works as usual. */
+  u8 numa_only;
+
+  /* How many slaves on local numa node are there in lacp mode? */
+  word n_numa_slaves;
+
   u32 group;
   uword *port_number_bitmap;
   u8 use_custom_mac;
diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c
index 95f0267..c6427f2 100644
--- a/src/vpp/api/custom_dump.c
+++ b/src/vpp/api/custom_dump.c
@@ -665,6 +665,8 @@
     s = format (s, "mode %U ", format_bond_mode, mp->mode);
   if (mp->lb)
     s = format (s, "lb %U ", format_bond_load_balance, mp->lb);
+  if (mp->numa_only)
+    s = format (s, "numa-only is set in lacp mode");
   if (mp->id != ~0)
     s = format (s, "id %u ", ntohl (mp->id));
   FINISH;
diff --git a/test/test_bond.py b/test/test_bond.py
index 03f0eea..d1ae77a 100644
--- a/test/test_bond.py
+++ b/test/test_bond.py
@@ -65,6 +65,7 @@
         bond0 = VppBondInterface(self,
                                  mode=3,
                                  lb=1,
+                                 numa_only=0,
                                  use_custom_mac=1,
                                  mac_address=mac)
         bond0.add_vpp_config()
diff --git a/test/vpp_bond_interface.py b/test/vpp_bond_interface.py
index 153f114..f05a07b 100644
--- a/test/vpp_bond_interface.py
+++ b/test/vpp_bond_interface.py
@@ -5,19 +5,21 @@
 class VppBondInterface(VppInterface):
     """VPP bond interface."""
 
-    def __init__(self, test, mode, lb=0,
+    def __init__(self, test, mode, lb=0, numa_only=0,
                  use_custom_mac=0, mac_address=''):
 
         """ Create VPP Bond interface """
         super(VppBondInterface, self).__init__(test)
         self.mode = mode
         self.lb = lb
+        self.numa_only = numa_only
         self.use_custom_mac = use_custom_mac
         self.mac_address = mac_address
 
     def add_vpp_config(self):
         r = self.test.vapi.bond_create(self.mode,
                                        self.lb,
+                                       self.numa_only,
                                        self.use_custom_mac,
                                        self.mac_address)
         self.set_sw_if_index(r.sw_if_index)
diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py
index c639832..40c6045 100644
--- a/test/vpp_papi_provider.py
+++ b/test/vpp_papi_provider.py
@@ -2202,12 +2202,14 @@
             self,
             mode,
             lb,
+            numa_only,
             use_custom_mac,
             mac_address='',
             interface_id=0xFFFFFFFF):
         """
         :param mode: mode
         :param lb: load balance
+        :param numa_only: tx on local numa node for lacp mode
         :param use_custom_mac: use custom mac
         :param mac_address: mac address
         :param interface_id: custom interface ID
@@ -2216,6 +2218,7 @@
             self.papi.bond_create,
             {'mode': mode,
              'lb': lb,
+             'numa_only': numa_only,
              'use_custom_mac': use_custom_mac,
              'mac_address': mac_address,
              'id': interface_id