L3 proxy FIB source for container networking

Change-Id: I4164c4c19c8dbfd73e6ddf94a12056325cc093b9
Signed-off-by: Neale Ranns <nranns@cisco.com>
Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
diff --git a/src/vnet.am b/src/vnet.am
index d76441f..5186eaa 100644
--- a/src/vnet.am
+++ b/src/vnet.am
@@ -1057,6 +1057,7 @@
   vnet/dpo/interface_tx_dpo.c  			\
   vnet/dpo/mpls_disposition.c   		\
   vnet/dpo/mpls_label_dpo.c			\
+  vnet/dpo/l3_proxy_dpo.c			\
   vnet/dpo/l2_bridge_dpo.c
 
 nobase_include_HEADERS +=			\
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index f849691..5f7fe74 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -208,6 +208,16 @@
     pool_put(adj_pool, adj);
 }
 
+u32
+adj_dpo_get_urpf (const dpo_id_t *dpo)
+{
+    ip_adjacency_t *adj;
+
+    adj = adj_get(dpo->dpoi_index);
+
+    return (adj->rewrite_header.sw_if_index);
+}
+
 void
 adj_lock (adj_index_t adj_index)
 {
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
index 8d86e2a..82023f1 100644
--- a/src/vnet/adj/adj_glean.c
+++ b/src/vnet/adj/adj_glean.c
@@ -251,6 +251,7 @@
     .dv_lock = adj_dpo_lock,
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_glean,
+    .dv_get_urpf = adj_dpo_get_urpf,
 };
 
 /**
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
index 2c123c5..ca41cb2 100644
--- a/src/vnet/adj/adj_internal.h
+++ b/src/vnet/adj/adj_internal.h
@@ -109,4 +109,6 @@
 extern void adj_mcast_remove(fib_protocol_t proto,
 			     u32 sw_if_index);
 
+extern u32 adj_dpo_get_urpf(const dpo_id_t *dpo);
+
 #endif
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
index da06cd0..00a12ad 100644
--- a/src/vnet/adj/adj_mcast.c
+++ b/src/vnet/adj/adj_mcast.c
@@ -387,11 +387,13 @@
     .dv_lock = adj_dpo_lock,
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_mcast,
+    .dv_get_urpf = adj_dpo_get_urpf,
 };
 const static dpo_vft_t adj_mcast_midchain_dpo_vft = {
     .dv_lock = adj_dpo_lock,
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_mcast_midchain,
+    .dv_get_urpf = adj_dpo_get_urpf,
 };
 
 /**
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index e9a510b..370fa46 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -615,6 +615,7 @@
     .dv_lock = adj_dpo_lock,
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_midchain,
+    .dv_get_urpf = adj_dpo_get_urpf,
 };
 
 /**
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
index 3d450d1..fc7a7fc 100644
--- a/src/vnet/adj/adj_nbr.c
+++ b/src/vnet/adj/adj_nbr.c
@@ -1047,11 +1047,13 @@
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_nbr,
     .dv_mem_show = adj_mem_show,
+    .dv_get_urpf = adj_dpo_get_urpf,
 };
 const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
     .dv_lock = adj_dpo_lock,
     .dv_unlock = adj_dpo_unlock,
     .dv_format = format_adj_nbr_incomplete,
+    .dv_get_urpf = adj_dpo_get_urpf,
 };
 
 /**
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index e94f347..7658132 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -41,6 +41,7 @@
 #include <vnet/dpo/interface_tx_dpo.h>
 #include <vnet/dpo/mpls_disposition.h>
 #include <vnet/dpo/l2_bridge_dpo.h>
+#include <vnet/dpo/l3_proxy_dpo.h>
 
 /**
  * Array of char* names for the DPO types and protos
@@ -345,6 +346,17 @@
     dpo_vfts[dpo->dpoi_type].dv_unlock(dpo);
 }
 
+u32
+dpo_get_urpf(const dpo_id_t *dpo)
+{
+    if (dpo_id_is_valid(dpo) &&
+        (NULL != dpo_vfts[dpo->dpoi_type].dv_get_urpf))
+    {
+        return (dpo_vfts[dpo->dpoi_type].dv_get_urpf(dpo));
+    }
+
+    return (~0);
+}
 
 static u32
 dpo_get_next_node (dpo_type_t child_type,
@@ -525,6 +537,7 @@
     interface_tx_dpo_module_init();
     mpls_disp_dpo_module_init();
     l2_bridge_dpo_module_init();
+    l3_proxy_dpo_module_init();
 
     return (NULL);
 }
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index d1309c1..304b433 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -115,6 +115,7 @@
     DPO_INTERFACE_RX,
     DPO_INTERFACE_TX,
     DPO_L2_BRIDGE,
+    DPO_L3_PROXY,
     DPO_LAST,
 } __attribute__((packed)) dpo_type_t;
 
@@ -142,7 +143,8 @@
     [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \
     [DPO_INTERFACE_RX] = "dpo-interface-rx",	\
     [DPO_INTERFACE_TX] = "dpo-interface-tx",	\
-    [DPO_L2_BRIDGE] = "dpo-l2-bridge"	\
+    [DPO_L2_BRIDGE] = "dpo-l2-bridge",	\
+    [DPO_L3_PROXY] = "dpo-l3-proxy",	\
 }
 
 /**
@@ -310,10 +312,10 @@
  * @param child_node
  *  The VLIB grpah node index to create an arc from to the parent
  *
- * @parem dpo
+ * @param dpo
  *  This is the DPO to stack and set.
  *
- * @paren parent_dpo
+ * @param parent_dpo
  *  The parent DPO to stack onto.
  */ 
 extern void dpo_stack_from_node(u32 child_node,
@@ -321,6 +323,16 @@
                                 const dpo_id_t *parent);
 
 /**
+ * Get a uRPF interface for the DPO
+ *
+ * @param dpo
+ *  The DPO from which to get the uRPF interface
+ *
+ * @return valid SW interface index or ~0
+ */
+extern u32 dpo_get_urpf(const dpo_id_t *dpo);
+
+/**
  * @brief  A lock function registered for a DPO type
  */
 typedef void (*dpo_lock_fn_t)(dpo_id_t *dpo);
@@ -342,6 +354,12 @@
 typedef u32* (*dpo_get_next_node_t)(const dpo_id_t *dpo);
 
 /**
+ * @brief Given a DPO instance return an interface that can
+ * be used in an uRPF check
+ */
+typedef u32 (*dpo_get_urpf_t)(const dpo_id_t *dpo);
+
+/**
  * @brief A virtual function table regisitered for a DPO type
  */
 typedef struct dpo_vft_t_
@@ -369,6 +387,10 @@
      * function
      */
     dpo_get_next_node_t dv_get_next_node;
+    /**
+     * Get uRPF interface
+     */
+    dpo_get_urpf_t dv_get_urpf;
 } dpo_vft_t;
 
 
diff --git a/src/vnet/dpo/l3_proxy_dpo.c b/src/vnet/dpo/l3_proxy_dpo.c
new file mode 100644
index 0000000..ea3db71
--- /dev/null
+++ b/src/vnet/dpo/l3_proxy_dpo.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing l3_proxying the packet, i.e. it's for-us
+ */
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/l3_proxy_dpo.h>
+
+/**
+ * @brief pool of all l3_proxy DPOs
+ */
+l3_proxy_dpo_t *l3_proxy_dpo_pool;
+
+static l3_proxy_dpo_t *
+l3_proxy_dpo_alloc (void)
+{
+    l3_proxy_dpo_t *l3p;
+
+    pool_get_aligned(l3_proxy_dpo_pool, l3p, CLIB_CACHE_LINE_BYTES);
+    memset(l3p, 0, sizeof(*l3p));
+
+    return (l3p);
+}
+
+static l3_proxy_dpo_t *
+l3_proxy_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+    ASSERT(DPO_L3_PROXY == dpo->dpoi_type);
+
+    return (l3_proxy_dpo_get(dpo->dpoi_index));
+}
+
+
+/*
+ * l3_proxy_dpo_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The local adj is added to an interface's l3_proxy prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+void
+l3_proxy_dpo_add_or_lock (dpo_proto_t proto,
+                          u32 sw_if_index,
+                          dpo_id_t *dpo)
+{
+    l3_proxy_dpo_t *l3p;
+
+    l3p = l3_proxy_dpo_alloc();
+
+    l3p->l3p_sw_if_index = sw_if_index;
+
+    dpo_set(dpo, DPO_L3_PROXY, proto, (l3p - l3_proxy_dpo_pool));
+}
+
+static void
+l3_proxy_dpo_lock (dpo_id_t *dpo)
+{
+    l3_proxy_dpo_t *l3p;
+
+    l3p = l3_proxy_dpo_get_from_dpo(dpo);
+    l3p->l3p_locks++;
+}
+
+static void
+l3_proxy_dpo_unlock (dpo_id_t *dpo)
+{
+    l3_proxy_dpo_t *l3p;
+
+    l3p = l3_proxy_dpo_get_from_dpo(dpo);
+    l3p->l3p_locks--;
+
+    if (0 == l3p->l3p_locks)
+    {
+        pool_put(l3_proxy_dpo_pool, l3p);
+    }
+}
+
+static u32
+l3_proxy_dpo_get_urpf (const dpo_id_t *dpo)
+{
+    l3_proxy_dpo_t *l3p;
+
+    l3p = l3_proxy_dpo_get_from_dpo(dpo);
+
+    return (l3p->l3p_sw_if_index);
+}
+
+static u8*
+format_l3_proxy_dpo (u8 *s, va_list *ap)
+{
+    CLIB_UNUSED(index_t index) = va_arg(*ap, index_t);
+    CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
+    vnet_main_t * vnm = vnet_get_main();
+    l3_proxy_dpo_t *l3p;
+
+    if (pool_is_free_index(l3_proxy_dpo_pool, index))
+    {
+        return (format(s, "dpo-l3_proxy DELETED"));
+    }
+
+    l3p = l3_proxy_dpo_get(index);
+
+    if (~0 != l3p->l3p_sw_if_index)
+    {
+        return (format(s, "dpo-l3_proxy: %U",
+                       format_vnet_sw_interface_name, vnm,
+                       vnet_get_sw_interface(vnm, l3p->l3p_sw_if_index)));
+    }
+    else
+    {
+        return (format(s, "dpo-l3-proxy"));
+    }
+}
+
+static void
+l3_proxy_dpo_mem_show (void)
+{
+    fib_show_memory_usage("L3 Proxy",
+			  pool_elts(l3_proxy_dpo_pool),
+			  pool_len(l3_proxy_dpo_pool),
+			  sizeof(l3_proxy_dpo_t));
+}
+
+const static dpo_vft_t l3_proxy_vft = {
+    .dv_lock = l3_proxy_dpo_lock,
+    .dv_unlock = l3_proxy_dpo_unlock,
+    .dv_format = format_l3_proxy_dpo,
+    .dv_get_urpf = l3_proxy_dpo_get_urpf,
+    .dv_mem_show = l3_proxy_dpo_mem_show,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a l3_proxy
+ *        object.
+ *
+ * this means that these graph nodes are ones from which a l3_proxy is the
+ * parent object in the DPO-graph.
+ */
+const static char* const l3_proxy_ip4_nodes[] =
+{
+    "ip4-local",
+    NULL,
+};
+const static char* const l3_proxy_ip6_nodes[] =
+{
+    "ip6-local",
+    NULL,
+};
+
+const static char* const * const l3_proxy_nodes[DPO_PROTO_NUM] =
+{
+    [DPO_PROTO_IP4]  = l3_proxy_ip4_nodes,
+    [DPO_PROTO_IP6]  = l3_proxy_ip6_nodes,
+    [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+l3_proxy_dpo_module_init (void)
+{
+    dpo_register(DPO_L3_PROXY, &l3_proxy_vft, l3_proxy_nodes);
+}
diff --git a/src/vnet/dpo/l3_proxy_dpo.h b/src/vnet/dpo/l3_proxy_dpo.h
new file mode 100644
index 0000000..f17ace5
--- /dev/null
+++ b/src/vnet/dpo/l3_proxy_dpo.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing L3 proxy. An L3 proxy is when VPP has
+ * an address in the FIB that is also assigned to an attached host.
+ */
+
+#ifndef __L3_PROXY_DPO_H__
+#define __L3_PROXY_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/ip6.h>
+
+typedef struct l3_proxy_dpo_t_
+{
+    /**
+     * The Software interface index on which traffic is l3_proxyd
+     */
+    u32 l3p_sw_if_index;
+
+    /**
+     * number oflocks.
+     */
+    u16 l3p_locks;
+} l3_proxy_dpo_t;
+
+extern void l3_proxy_dpo_add_or_lock (dpo_proto_t proto,
+                                      u32 sw_if_index,
+                                      dpo_id_t *dpo);
+
+extern void l3_proxy_dpo_module_init(void);
+
+/**
+ * @brief pool of all l3_proxy DPOs
+ */
+l3_proxy_dpo_t *l3_proxy_dpo_pool;
+
+static inline l3_proxy_dpo_t *
+l3_proxy_dpo_get (index_t index)
+{
+    return (pool_elt_at_index(l3_proxy_dpo_pool, index));
+}
+
+#endif
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 2f6e37f..7e4b52a 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -43,6 +43,10 @@
      */
     FIB_SOURCE_CLASSIFY,
     /**
+     * A route the is being 'proxied' on behalf of another device
+     */
+    FIB_SOURCE_PROXY,
+    /**
      * Route added as a result of interface configuration.
      * this will also come from the API/CLI, but the distinction is
      * that is from confiiguration on an interface, not a 'ip route' command
@@ -136,6 +140,7 @@
 #define FIB_SOURCES {					\
     [FIB_SOURCE_SPECIAL] = "special",			\
     [FIB_SOURCE_INTERFACE] = "interface",		\
+    [FIB_SOURCE_PROXY] = "proxy",                       \
     [FIB_SOURCE_API] = "API",			        \
     [FIB_SOURCE_CLI] = "CLI",			        \
     [FIB_SOURCE_ADJ] = "adjacency",			\
diff --git a/src/vnet/fib/fib_entry_src_special.c b/src/vnet/fib/fib_entry_src_special.c
index e979e18..c3e4fe5 100644
--- a/src/vnet/fib/fib_entry_src_special.c
+++ b/src/vnet/fib/fib_entry_src_special.c
@@ -67,4 +67,5 @@
     fib_entry_src_register(FIB_SOURCE_SIXRD, &special_src_vft);
     fib_entry_src_register(FIB_SOURCE_CLASSIFY, &special_src_vft);
     fib_entry_src_register(FIB_SOURCE_AE, &special_src_vft);
+    fib_entry_src_register(FIB_SOURCE_PROXY, &special_src_vft);
 }
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 7b713a4..926b2f3 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -1854,20 +1854,21 @@
 
     case FIB_PATH_TYPE_EXCLUSIVE:
     case FIB_PATH_TYPE_SPECIAL:
-	/*
+    {
+        /*
 	 * these path types may link to an adj, if that's what
 	 * the clinet gave
 	 */
-	if (dpo_is_adj(&path->fp_dpo))
+        u32 rpf_sw_if_index;
+
+        rpf_sw_if_index = dpo_get_urpf(&path->fp_dpo);
+
+        if (~0 != rpf_sw_if_index)
 	{
-	    ip_adjacency_t *adj;
-
-	    adj = adj_get(path->fp_dpo.dpoi_index);
-
-	    fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index);
+	    fib_urpf_list_append(urpf, rpf_sw_if_index);
 	}
 	break;
-
+    }
     case FIB_PATH_TYPE_DEAG:
     case FIB_PATH_TYPE_RECEIVE:
     case FIB_PATH_TYPE_INTF_RX:
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index 856c494..61350b4 100644
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -49,6 +49,7 @@
 #include <vnet/dpo/punt_dpo.h>
 #include <vnet/dpo/receive_dpo.h>
 #include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/dpo/l3_proxy_dpo.h>
 #include <vnet/ip/ip6_neighbor.h>
 
 /**
@@ -1433,6 +1434,84 @@
 };
 /* *INDENT-ON* */
 
+clib_error_t *
+ip_container_cmd (vlib_main_t * vm,
+		  unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  fib_prefix_t pfx;
+
+  u32 is_del;
+  vnet_main_t *vnm;
+  u32 fib_index;
+  u32 sw_if_index;
+
+  vnm = vnet_get_main ();
+  is_del = 0;
+  sw_if_index = ~0;
+
+  /* Get a line of input. */
+  if (!unformat_user (main_input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4))
+	{
+	  pfx.fp_proto = FIB_PROTOCOL_IP4;
+	  pfx.fp_len = 32;
+	}
+      else if (unformat (line_input, "%U",
+			 unformat_ip6_address, &pfx.fp_addr.ip6))
+	{
+	  pfx.fp_proto = FIB_PROTOCOL_IP6;
+	  pfx.fp_len = 128;
+	}
+      else if (unformat (line_input, "%U",
+			 unformat_vnet_sw_interface, vnm, &sw_if_index))
+	;
+      else if (unformat (line_input, "del"))
+	is_del = 1;
+      else
+	return (clib_error_return (0, "unknown input '%U'",
+				   format_unformat_error, line_input));
+    }
+
+  if (~0 == sw_if_index)
+    {
+      return (clib_error_return (0, "no interface"));
+    }
+
+  fib_index = fib_table_get_table_id_for_sw_if_index (pfx.fp_proto,
+						      sw_if_index);
+
+  if (is_del)
+    fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_PROXY);
+  else
+    {
+      dpo_id_t proxy_dpo = DPO_INVALID;
+
+      l3_proxy_dpo_add_or_lock (fib_proto_to_dpo (pfx.fp_proto),
+				sw_if_index, &proxy_dpo);
+
+      fib_table_entry_special_dpo_add (fib_index,
+				       &pfx,
+				       FIB_SOURCE_PROXY,
+				       FIB_ENTRY_FLAG_EXCLUSIVE, &proxy_dpo);
+    }
+
+  return (NULL);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_container_command_node, static) = {
+  .path = "ip container",
+  .function = ip_container_cmd,
+  .short_help = "ip container <address> <interface>",
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
 /*
  * fd.io coding-style-patch-verification: ON
  *