LISP: Map-server fallback feature

Change-Id: I1356296e1a85b5d532f45ba70572b2184ac3f6fb
Signed-off-by: Filip Tehlar <ftehlar@cisco.com>
diff --git a/src/vnet/lisp-cp/control.c b/src/vnet/lisp-cp/control.c
index a85656b..72af525 100644
--- a/src/vnet/lisp-cp/control.c
+++ b/src/vnet/lisp-cp/control.c
@@ -670,6 +670,9 @@
       memset (ms, 0, sizeof (*ms));
       ip_address_copy (&ms->address, addr);
       vec_add1 (lcm->map_servers, ms[0]);
+
+      if (vec_len (lcm->map_servers) == 1)
+	lcm->do_map_server_election = 1;
     }
   else
     {
@@ -678,6 +681,9 @@
 	  ms = vec_elt_at_index (lcm->map_servers, i);
 	  if (!ip_address_cmp (&ms->address, addr))
 	    {
+	      if (!ip_address_cmp (&ms->address, &lcm->active_map_server))
+		lcm->do_map_server_election = 1;
+
 	      vec_del1 (lcm->map_servers, i);
 	      break;
 	    }
@@ -1496,6 +1502,26 @@
   return 0;
 }
 
+int
+vnet_lisp_map_register_fallback_threshold_set (u32 value)
+{
+  lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+  if (0 == value)
+    {
+      return VNET_API_ERROR_INVALID_ARGUMENT;
+    }
+
+  lcm->max_expired_map_registers = value;
+  return 0;
+}
+
+u32
+vnet_lisp_map_register_fallback_threshold_get (void)
+{
+  lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+  return lcm->max_expired_map_registers;
+}
+
 /**
  * Configure Proxy-ETR
  *
@@ -2342,24 +2368,29 @@
   r->retries_num = 0;
 }
 
-static int
-elect_map_resolver (lisp_cp_main_t * lcm)
-{
-  lisp_msmr_t *mr;
+#define foreach_msmr \
+  _(server) \
+  _(resolver)
 
-  vec_foreach (mr, lcm->map_resolvers)
-  {
-    if (!mr->is_down)
-      {
-	ip_address_copy (&lcm->active_map_resolver, &mr->address);
-	lcm->do_map_resolver_election = 0;
-	return 1;
-      }
-  }
-  return 0;
+#define _(name) \
+static int                                                              \
+elect_map_ ## name (lisp_cp_main_t * lcm)                               \
+{                                                                       \
+  lisp_msmr_t *mr;                                                      \
+  vec_foreach (mr, lcm->map_ ## name ## s)                              \
+  {                                                                     \
+    if (!mr->is_down)                                                   \
+      {                                                                 \
+	ip_address_copy (&lcm->active_map_ ##name, &mr->address);       \
+	lcm->do_map_ ## name ## _election = 0;                          \
+	return 1;                                                       \
+      }                                                                 \
+  }                                                                     \
+  return 0;                                                             \
 }
-
-static void
+foreach_msmr
+#undef _
+  static void
 free_map_register_records (mapping_t * maps)
 {
   mapping_t *map;
@@ -2488,31 +2519,32 @@
   return b;
 }
 
-static int
-get_egress_map_resolver_ip (lisp_cp_main_t * lcm, ip_address_t * ip)
-{
-  lisp_msmr_t *mr;
-  while (lcm->do_map_resolver_election
-	 | (0 == ip_fib_get_first_egress_ip_for_dst (lcm,
-						     &lcm->active_map_resolver,
-						     ip)))
-    {
-      if (0 == elect_map_resolver (lcm))
-	/* all map resolvers are down */
-	{
-	  /* restart MR checking by marking all of them up */
-	  vec_foreach (mr, lcm->map_resolvers) mr->is_down = 0;
-	  return -1;
-	}
-    }
-  return 0;
+#define _(name) \
+static int                                                              \
+get_egress_map_ ##name## _ip (lisp_cp_main_t * lcm, ip_address_t * ip)  \
+{                                                                       \
+  lisp_msmr_t *mr;                                                      \
+  while (lcm->do_map_ ## name ## _election                              \
+	 | (0 == ip_fib_get_first_egress_ip_for_dst                     \
+            (lcm, &lcm->active_map_ ##name, ip)))                       \
+    {                                                                   \
+      if (0 == elect_map_ ## name (lcm))                                \
+	/* all map resolvers/servers are down */                        \
+	{                                                               \
+	  /* restart MR/MS checking by marking all of them up */        \
+	  vec_foreach (mr, lcm->map_ ## name ## s) mr->is_down = 0;     \
+	  return -1;                                                    \
+	}                                                               \
+    }                                                                   \
+  return 0;                                                             \
 }
 
+foreach_msmr
+#undef _
 /* CP output statistics */
 #define foreach_lisp_cp_output_error                  \
 _(MAP_REGISTERS_SENT, "map-registers sent")           \
 _(RLOC_PROBES_SENT, "rloc-probes sent")
-
 static char *lisp_cp_output_error_strings[] = {
 #define _(sym,string) string,
   foreach_lisp_cp_output_error
@@ -2588,7 +2620,6 @@
   f->n_vectors = 1;
   vlib_put_frame_to_node (lcm->vlib_main, next_index, f);
 
-  hash_set (lcm->map_register_messages_by_nonce, nonce, 0);
   return 0;
 }
 
@@ -2642,28 +2673,18 @@
 static int
 send_map_register (lisp_cp_main_t * lcm, u8 want_map_notif)
 {
+  pending_map_register_t *pmr;
   u32 bi, map_registers_sent = 0;
   vlib_buffer_t *b;
   ip_address_t sloc;
   vlib_frame_t *f;
   u64 nonce = 0;
   u32 next_index, *to_next;
-  ip_address_t *ms = 0;
   mapping_t *records, *r, *group, *k;
 
-  // TODO: support multiple map servers and do election
-  if (0 == vec_len (lcm->map_servers))
+  if (get_egress_map_server_ip (lcm, &sloc) < 0)
     return -1;
 
-  ms = &lcm->map_servers[0].address;
-
-  if (0 == ip_fib_get_first_egress_ip_for_dst (lcm, ms, &sloc))
-    {
-      clib_warning ("no eligible interface address found for %U!",
-		    format_ip_address, &lcm->map_servers[0]);
-      return -1;
-    }
-
   records = build_map_register_record_list (lcm);
   if (!records)
     return -1;
@@ -2692,15 +2713,15 @@
 	  }
       }
 
-    b = build_map_register (lcm, &sloc, ms, &nonce, want_map_notif, group,
-			    key_id, key, &bi);
+    b = build_map_register (lcm, &sloc, &lcm->active_map_server, &nonce,
+			    want_map_notif, group, key_id, key, &bi);
     vec_free (group);
     if (!b)
       continue;
 
     vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
 
-    next_index = (ip_addr_version (&lcm->active_map_resolver) == IP4) ?
+    next_index = (ip_addr_version (&lcm->active_map_server) == IP4) ?
       ip4_lookup_node.index : ip6_lookup_node.index;
 
     f = vlib_get_frame_to_node (lcm->vlib_main, next_index);
@@ -2712,7 +2733,11 @@
     vlib_put_frame_to_node (lcm->vlib_main, next_index, f);
     map_registers_sent++;
 
-    hash_set (lcm->map_register_messages_by_nonce, nonce, 0);
+    pool_get (lcm->pending_map_registers_pool, pmr);
+    memset (pmr, 0, sizeof (*pmr));
+    pmr->time_to_expire = PENDING_MREG_EXPIRATION_TIME;
+    hash_set (lcm->map_register_messages_by_nonce, nonce,
+	      pmr - lcm->pending_map_registers_pool);
   }
   free_map_register_records (records);
 
@@ -3488,7 +3513,11 @@
     }
 
   a->is_free = 1;
+  pool_put_index (lcm->pending_map_registers_pool, pmr_index[0]);
   hash_unset (lcm->map_register_messages_by_nonce, a->nonce);
+
+  /* reset map-notify counter */
+  lcm->expired_map_registers = 0;
 }
 
 static mapping_t *
@@ -3635,8 +3664,8 @@
   if (!is_auth_data_valid (mnotif_hdr, vlib_buffer_get_tail (b)
 			   - (u8 *) mnotif_hdr, key_id, key))
     {
-      clib_warning ("Map-notify auth data verification failed for nonce %lu!",
-		    a->nonce);
+      clib_warning ("Map-notify auth data verification failed for nonce "
+		    "0x%lx!", a->nonce);
       map_records_arg_free (a);
       return 0;
     }
@@ -4000,9 +4029,11 @@
   lcm->lisp_pitr = 0;
   lcm->flags = 0;
   memset (&lcm->active_map_resolver, 0, sizeof (lcm->active_map_resolver));
+  memset (&lcm->active_map_server, 0, sizeof (lcm->active_map_server));
 
   gid_dictionary_init (&lcm->mapping_index_by_gid);
   lcm->do_map_resolver_election = 1;
+  lcm->do_map_server_election = 1;
   lcm->map_request_mode = MR_MODE_DST_ONLY;
 
   num_threads = 1 /* main thread */  + vtm->n_threads;
@@ -4021,6 +4052,8 @@
   timing_wheel_init (&lcm->wheel, now, vm->clib_time.clocks_per_second);
   lcm->nsh_map_index = ~0;
   lcm->map_register_ttl = MAP_REGISTER_DEFAULT_TTL;
+  lcm->max_expired_map_registers = MAX_EXPIRED_MAP_REGISTERS_DEFAULT;
+  lcm->expired_map_registers = 0;
   return 0;
 }
 
@@ -4180,7 +4213,7 @@
   /* *INDENT-ON* */
 
   vec_foreach (pmr_index, to_be_removed)
-    pool_put_index (lcm->pending_map_requests_by_nonce, pmr_index[0]);
+    pool_put_index (lcm->pending_map_requests_pool, pmr_index[0]);
 
   vec_free (to_be_removed);
 }
@@ -4201,15 +4234,98 @@
     }
 }
 
+static int
+update_pending_map_register (pending_map_register_t * r, f64 dt, u8 * del_all)
+{
+  lisp_cp_main_t *lcm = vnet_lisp_cp_get_main ();
+  lisp_msmr_t *ms;
+  del_all[0] = 0;
+
+  r->time_to_expire -= dt;
+
+  if (r->time_to_expire < 0)
+    {
+      lcm->expired_map_registers++;
+
+      if (lcm->expired_map_registers >= lcm->max_expired_map_registers)
+	{
+	  ms = get_map_server (&lcm->active_map_server);
+	  if (!ms)
+	    {
+	      clib_warning ("Map server %U not found - probably deleted "
+			    "by the user recently.", format_ip_address,
+			    &lcm->active_map_server);
+	    }
+	  else
+	    {
+	      clib_warning ("map server %U is unreachable, ignoring",
+			    format_ip_address, &lcm->active_map_server);
+
+	      /* mark current map server unavailable so it won't be
+	       * elected next time */
+	      ms->is_down = 1;
+	      ms->last_update = vlib_time_now (lcm->vlib_main);
+	    }
+
+	  elect_map_server (lcm);
+
+	  /* indication for deleting all pending map registers */
+	  del_all[0] = 1;
+	  lcm->expired_map_registers = 0;
+	  return 0;
+	}
+      else
+	{
+	  /* delete pending map register */
+	  return 0;
+	}
+    }
+  return 1;
+}
+
 static void
 update_map_register (lisp_cp_main_t * lcm, f64 dt)
 {
+  u32 *to_be_removed = 0, *pmr_index;
   static f64 time_left = QUICK_MAP_REGISTER_INTERVAL;
   static u64 mreg_sent_counter = 0;
 
+  pending_map_register_t *pmr;
+  u8 del_all = 0;
+
   if (!lcm->is_enabled || !lcm->map_registering)
     return;
 
+  /* *INDENT-OFF* */
+  pool_foreach (pmr, lcm->pending_map_registers_pool,
+  ({
+    if (!update_pending_map_register (pmr, dt, &del_all))
+    {
+      if (del_all)
+        break;
+      vec_add1 (to_be_removed, pmr - lcm->pending_map_registers_pool);
+    }
+  }));
+  /* *INDENT-ON* */
+
+  if (del_all)
+    {
+      /* delete all pending map register messages so they won't
+       * trigger another map server election.. */
+      pool_free (lcm->pending_map_registers_pool);
+      hash_free (lcm->map_register_messages_by_nonce);
+
+      /* ..and trigger registration against next map server (if any) */
+      time_left = 0;
+    }
+  else
+    {
+      vec_foreach (pmr_index, to_be_removed)
+	pool_put_index (lcm->pending_map_registers_pool, pmr_index[0]);
+    }
+
+  vec_free (to_be_removed);
+
   time_left -= dt;
   if (time_left <= 0)
     {
diff --git a/src/vnet/lisp-cp/control.h b/src/vnet/lisp-cp/control.h
index d030e58..7b0380f 100644
--- a/src/vnet/lisp-cp/control.h
+++ b/src/vnet/lisp-cp/control.h
@@ -25,18 +25,22 @@
 #define PENDING_MREQ_EXPIRATION_TIME        3.0	/* seconds */
 #define PENDING_MREQ_QUEUE_LEN              5
 
-#define PENDING_MREG_EXPIRATION_TIME        3.0	/* seconds */
 #define RLOC_PROBING_INTERVAL               60.0
 
 /* when map-registration is enabled "quick registration" takes place first.
    In this mode ETR sends map-register messages at an increased frequency
    until specified message count is reached */
-#define QUICK_MAP_REGISTER_MSG_COUNT        3
+#define QUICK_MAP_REGISTER_MSG_COUNT        5
 #define QUICK_MAP_REGISTER_INTERVAL         3.0
 
 /* normal map-register period */
 #define MAP_REGISTER_INTERVAL               60.0
 
+/* how many tries until next map-server election */
+#define MAX_EXPIRED_MAP_REGISTERS_DEFAULT   3
+
+#define PENDING_MREG_EXPIRATION_TIME        3.0	/* seconds */
+
 /* 24 hours */
 #define MAP_REGISTER_DEFAULT_TTL            86400
 
@@ -53,6 +57,11 @@
 
 typedef struct
 {
+  f64 time_to_expire;
+} pending_map_register_t;
+
+typedef struct
+{
   gid_address_t leid;
   gid_address_t reid;
   u8 is_src_dst;
@@ -180,6 +189,9 @@
   /* pool of pending map requests */
   pending_map_request_t *pending_map_requests_pool;
 
+  /* pool of pending map registers */
+  pending_map_register_t *pending_map_registers_pool;
+
   /* hash map of sent map register messages */
   uword *map_register_messages_by_nonce;
 
@@ -194,8 +206,10 @@
    * since the vector may be modified during request resend/retry procedure
    * and break things :-) */
   ip_address_t active_map_resolver;
+  ip_address_t active_map_server;
 
   u8 do_map_resolver_election;
+  u8 do_map_server_election;
 
   /* map-request  locator set index */
   u32 mreq_itr_rlocs;
@@ -241,6 +255,10 @@
   /* TTL used for all mappings when registering */
   u32 map_register_ttl;
 
+  /* control variables for map server election */
+  u32 max_expired_map_registers;
+  u32 expired_map_registers;
+
   /* commodity */
   ip4_main_t *im4;
   ip6_main_t *im6;
@@ -367,6 +385,8 @@
 int vnet_lisp_nsh_set_locator_set (u8 * locator_set_name, u8 is_add);
 int vnet_lisp_map_register_set_ttl (u32 ttl);
 u32 vnet_lisp_map_register_get_ttl (void);
+int vnet_lisp_map_register_fallback_threshold_set (u32 value);
+u32 vnet_lisp_map_register_fallback_threshold_get (void);
 
 map_records_arg_t *parse_map_reply (vlib_buffer_t * b);
 
diff --git a/src/vnet/lisp-cp/one.api b/src/vnet/lisp-cp/one.api
index 5087c63..3fcc9da 100644
--- a/src/vnet/lisp-cp/one.api
+++ b/src/vnet/lisp-cp/one.api
@@ -942,6 +942,26 @@
   u8 is_en;
 };
 
+autoreply define one_map_register_fallback_threshold
+{
+  u32 client_index;
+  u32 context;
+  u32 value;
+};
+
+define show_one_map_register_fallback_threshold
+{
+  u32 client_index;
+  u32 context;
+};
+
+define show_one_map_register_fallback_threshold_reply
+{
+  u32 context;
+  i32 retval;
+  u32 value;
+};
+
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/lisp-cp/one_api.c b/src/vnet/lisp-cp/one_api.c
index db443b5..6117f93 100644
--- a/src/vnet/lisp-cp/one_api.c
+++ b/src/vnet/lisp-cp/one_api.c
@@ -87,6 +87,8 @@
 _(ONE_ENABLE_DISABLE, one_enable_disable)                               \
 _(ONE_RLOC_PROBE_ENABLE_DISABLE, one_rloc_probe_enable_disable)         \
 _(ONE_MAP_REGISTER_ENABLE_DISABLE, one_map_register_enable_disable)     \
+_(ONE_MAP_REGISTER_FALLBACK_THRESHOLD,                                  \
+  one_map_register_fallback_threshold)                                  \
 _(ONE_ADD_DEL_REMOTE_MAPPING, one_add_del_remote_mapping)               \
 _(ONE_ADD_DEL_ADJACENCY, one_add_del_adjacency)                         \
 _(ONE_PITR_SET_LOCATOR_SET, one_pitr_set_locator_set)                   \
@@ -106,6 +108,8 @@
 _(SHOW_ONE_RLOC_PROBE_STATE, show_one_rloc_probe_state)                 \
 _(SHOW_ONE_MAP_REGISTER_STATE, show_one_map_register_state)             \
 _(SHOW_ONE_MAP_REGISTER_TTL, show_one_map_register_ttl)                 \
+_(SHOW_ONE_MAP_REGISTER_FALLBACK_THRESHOLD,                             \
+  show_one_map_register_fallback_threshold)                             \
 _(SHOW_ONE_STATUS, show_one_status)                                     \
 _(ONE_ADD_DEL_MAP_REQUEST_ITR_RLOCS,                                    \
   one_add_del_map_request_itr_rlocs)                                    \
@@ -1604,6 +1608,35 @@
   vec_free (entries);
 }
 
+static void
+  vl_api_one_map_register_fallback_threshold_t_handler
+  (vl_api_one_map_register_fallback_threshold_t * mp)
+{
+  vl_api_one_map_register_fallback_threshold_reply_t *rmp;
+  int rv = 0;
+
+  mp->value = clib_net_to_host_u32 (mp->value);
+  rv = vnet_lisp_map_register_fallback_threshold_set (mp->value);
+  REPLY_MACRO (VL_API_ONE_MAP_REGISTER_FALLBACK_THRESHOLD);
+}
+
+static void
+  vl_api_show_one_map_register_fallback_threshold_t_handler
+  (vl_api_show_one_map_register_fallback_threshold_t * mp)
+{
+  vl_api_show_one_map_register_fallback_threshold_reply_t *rmp;
+  int rv = 0;
+
+  u32 value = vnet_lisp_map_register_fallback_threshold_get ();
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_FALLBACK_THRESHOLD_REPLY,
+  ({
+    rmp->value = clib_host_to_net_u32 (value);
+  }));
+  /* *INDENT-ON* */
+}
+
 /*
  * one_api_hookup
  * Add vpe's API message handlers to the table.
diff --git a/src/vnet/lisp-cp/one_cli.c b/src/vnet/lisp-cp/one_cli.c
index 3b41189..e165f71 100644
--- a/src/vnet/lisp-cp/one_cli.c
+++ b/src/vnet/lisp-cp/one_cli.c
@@ -777,6 +777,70 @@
 };
 /* *INDENT-ON* */
 
+static clib_error_t *
+lisp_map_register_fallback_threshold_show_command_fn (vlib_main_t * vm,
+						      unformat_input_t *
+						      input,
+						      vlib_cli_command_t *
+						      cmd)
+{
+  u32 val = vnet_lisp_map_register_fallback_threshold_get ();
+  vlib_cli_output (vm, "map register fallback treshold value: %d", val);
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_map_register_fallback_threshold_show_command) = {
+    .path = "show one map-register fallback-threshold",
+    .short_help = "show one map-register fallback-threshold",
+    .function = lisp_map_register_fallback_threshold_show_command_fn,
+};
+
+/* *INDENT-ON* */
+
+static clib_error_t *
+lisp_map_register_fallback_threshold_command_fn (vlib_main_t * vm,
+						 unformat_input_t * input,
+						 vlib_cli_command_t * cmd)
+{
+  unformat_input_t _line_input, *line_input = &_line_input;
+  clib_error_t *error = 0;
+  u32 val = 0;
+  int rv = 0;
+
+  /* Get a line of input. */
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%d", &val))
+	;
+      else
+	{
+	  error = clib_error_return (0, "parse error");
+	  goto done;
+	}
+    }
+
+  rv = vnet_lisp_map_register_fallback_threshold_set (val);
+  if (rv)
+    {
+      error = clib_error_return (0, "setting fallback threshold failed!");
+    }
+
+done:
+  unformat_free (line_input);
+  return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (one_map_register_fallback_threshold_command) = {
+    .path = "one map-register fallback-threshold",
+    .short_help = "one map-register fallback-threshold <count>",
+    .function = lisp_map_register_fallback_threshold_command_fn,
+};
+/* *INDENT-ON* */
 
 static clib_error_t *
 lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm,