vlib: make runtime_data thread-local

Change-Id: I4aa3e7e42fb81211de1aed07dc7befee87a1e18b
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/init.h b/src/vlib/init.h
index 4fa5b30..12db3f9 100644
--- a/src/vlib/init.h
+++ b/src/vlib/init.h
@@ -109,6 +109,7 @@
 }
 
 #define VLIB_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,init)
+#define VLIB_WORKER_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,worker_init)
 
 #define VLIB_MAIN_LOOP_ENTER_FUNCTION(x) \
   VLIB_DECLARE_INIT_FUNCTION(x,main_loop_enter)
diff --git a/src/vlib/main.h b/src/vlib/main.h
index a6d50b3..98bc823 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -162,6 +162,7 @@
 
   /* List of init functions to call, setup by constructors */
   _vlib_init_function_list_elt_t *init_function_registrations;
+  _vlib_init_function_list_elt_t *worker_init_function_registrations;
   _vlib_init_function_list_elt_t *main_loop_enter_function_registrations;
   _vlib_init_function_list_elt_t *main_loop_exit_function_registrations;
   _vlib_init_function_list_elt_t *api_init_function_registrations;
diff --git a/src/vlib/node.c b/src/vlib/node.c
index c419a13..dc0a4de 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -434,9 +434,7 @@
       rt->errors[i] = vlib_error_set (n->index, i);
 
     STATIC_ASSERT_SIZEOF (vlib_node_runtime_t, 128);
-    ASSERT (vec_len (n->runtime_data) <=
-	    sizeof (vlib_node_runtime_t) -
-	    STRUCT_OFFSET_OF (vlib_node_runtime_t, runtime_data));
+    ASSERT (vec_len (n->runtime_data) <= VLIB_NODE_RUNTIME_DATA_SIZE);
 
     if (vec_len (n->runtime_data) > 0)
       clib_memcpy (rt->runtime_data, n->runtime_data,
diff --git a/src/vlib/node.h b/src/vlib/node.h
index b624e9d..2a532cc 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -411,65 +411,68 @@
 
 typedef struct vlib_node_runtime_t
 {
-  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-  /* Node function to call. */
-  vlib_node_function_t *function;
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);	/**< cacheline mark */
 
-  /* Vector of errors for this node. */
-  vlib_error_t *errors;
+  vlib_node_function_t *function;	/**< Node function to call. */
 
-  /* Number of clock cycles. */
-  u32 clocks_since_last_overflow;
+  vlib_error_t *errors;			/**< Vector of errors for this node. */
 
-  /* Maximum clock cycle for an invocation. */
-  u32 max_clock;
+  u32 clocks_since_last_overflow;	/**< Number of clock cycles. */
 
-  /* Number of vectors in the recorded max_clock. */
-  u32 max_clock_n;
+  u32 max_clock;			/**< Maximum clock cycle for an
+					  invocation. */
 
-  /* Number of calls. */
-  u32 calls_since_last_overflow;
+  u32 max_clock_n;			/**< Number of vectors in the recorded
+					  max_clock. */
 
-  /* Number of vector elements processed by this node. */
-  u32 vectors_since_last_overflow;
+  u32 calls_since_last_overflow;	/**< Number of calls. */
 
-  /* Start of next frames for this node. */
-  u32 next_frame_index;
+  u32 vectors_since_last_overflow;	/**< Number of vector elements
+					  processed by this node. */
 
-  /* Node index. */
-  u32 node_index;
+  u32 next_frame_index;			/**< Start of next frames for this
+					  node. */
 
-  /* For input nodes: decremented on each main loop interation until it reaches zero
-     and function is called.  Allows some input nodes to be called
-     more than others. */
-  u32 input_main_loops_per_call;
+  u32 node_index;			/**< Node index. */
 
-  /* Saved main loop counter of last dispatch of this node. */
-  u32 main_loop_count_last_dispatch;
+  u32 input_main_loops_per_call;	/**< For input nodes: decremented
+					  on each main loop interation until
+					  it reaches zero and function is
+					  called.  Allows some input nodes to
+					  be called more than others. */
+
+  u32 main_loop_count_last_dispatch;	/**< Saved main loop counter of last
+					  dispatch of this node. */
 
   u32 main_loop_vector_stats[2];
 
-  /* Copy of main node flags. */
-  u16 flags;
+  u16 flags;				/**< Copy of main node flags. */
 
-  /* Input node state. */
-  u16 state;
+  u16 state;				/**< Input node state. */
 
   u16 n_next_nodes;
 
-  /* Next frame index that vector arguments were last enqueued to
-     last time this node ran.  Set to zero before first run
-     of this node. */
-  u16 cached_next_index;
+  u16 cached_next_index;		/**< Next frame index that vector
+					  arguments were last enqueued to
+					  last time this node ran. Set to
+					  zero before first run of this
+					  node. */
 
-  /* CPU this node runs on */
-  u16 cpu_index;
+  u16 cpu_index;			/**< CPU this node runs on */
 
-  /* Function dependent node-runtime. */
-  u8 runtime_data[0];
+  u8 runtime_data[0];			/**< Function dependent
+					  node-runtime data. This data is
+					  thread local, and it is not
+					  cloned from main thread. It needs
+					  to be initialized for each thread
+					  before it is used unless
+					  runtime_data template exists in
+					  vlib_node_t. */
 }
 vlib_node_runtime_t;
 
+#define VLIB_NODE_RUNTIME_DATA_SIZE	(sizeof (vlib_node_runtime_t) - STRUCT_OFFSET_OF (vlib_node_runtime_t, runtime_data))
+
 typedef struct
 {
   /* Number of allocated frames for this scalar/vector size. */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 07dbff3..3756c3f 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -633,6 +633,8 @@
 	      vm_clone->cpu_index = worker_thread_index;
 	      vm_clone->heap_base = w->thread_mheap;
 	      vm_clone->mbuf_alloc_list = 0;
+	      vm_clone->init_functions_called =
+		hash_create (0, /* value bytes */ 0);
 	      memset (&vm_clone->random_buffer, 0,
 		      sizeof (vm_clone->random_buffer));
 
@@ -674,11 +676,33 @@
 		}
 	      nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
 		vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+	      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+	      {
+		vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+		rt->cpu_index = vm_clone->cpu_index;
+		/* copy initial runtime_data from node */
+		if (n->runtime_data_bytes > 0)
+		  clib_memcpy (rt->runtime_data, n->runtime_data,
+			       VLIB_NODE_RUNTIME_DATA_SIZE);
+		else if (CLIB_DEBUG > 0)
+		  memset (rt->runtime_data, 0xfe,
+			  VLIB_NODE_RUNTIME_DATA_SIZE);
+	      }
 
 	      nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
 		vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
 	      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+	      {
+		vlib_node_t *n = vlib_get_node (vm, rt->node_index);
 		rt->cpu_index = vm_clone->cpu_index;
+		/* copy initial runtime_data from node */
+		if (n->runtime_data_bytes > 0)
+		  clib_memcpy (rt->runtime_data, n->runtime_data,
+			       VLIB_NODE_RUNTIME_DATA_SIZE);
+		else if (CLIB_DEBUG > 0)
+		  memset (rt->runtime_data, 0xfe,
+			  VLIB_NODE_RUNTIME_DATA_SIZE);
+	      }
 
 	      nm_clone->processes = vec_dup (nm->processes);
 
@@ -926,26 +950,51 @@
 	clib_mem_free (old_nodes_clone[j]);
       vec_free (old_nodes_clone);
 
-      vec_free (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
 
+      /* re-clone internal nodes */
+      old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
       nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
 	vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
 
-      /* clone input node runtime */
-      old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
-
-      nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
-	vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
-
-      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
       {
+	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
 	rt->cpu_index = vm_clone->cpu_index;
+	/* copy runtime_data, will be overwritten later for existing rt */
+	clib_memcpy (rt->runtime_data, n->runtime_data,
+		     VLIB_NODE_RUNTIME_DATA_SIZE);
       }
 
       for (j = 0; j < vec_len (old_rt); j++)
 	{
 	  rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
 	  rt->state = old_rt[j].state;
+	  clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
+		       VLIB_NODE_RUNTIME_DATA_SIZE);
+	}
+
+      vec_free (old_rt);
+
+      /* re-clone input nodes */
+      old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
+      nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+	vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+
+      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+      {
+	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+	rt->cpu_index = vm_clone->cpu_index;
+	/* copy runtime_data, will be overwritten later for existing rt */
+	clib_memcpy (rt->runtime_data, n->runtime_data,
+		     VLIB_NODE_RUNTIME_DATA_SIZE);
+      }
+
+      for (j = 0; j < vec_len (old_rt); j++)
+	{
+	  rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+	  rt->state = old_rt[j].state;
+	  clib_memcpy (rt->runtime_data, old_rt[j].runtime_data,
+		       VLIB_NODE_RUNTIME_DATA_SIZE);
 	}
 
       vec_free (old_rt);
@@ -1342,6 +1391,7 @@
   vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
   vlib_main_t *vm = vlib_get_main ();
+  clib_error_t *e;
 
   ASSERT (vm->cpu_index == os_get_cpu_number ());
 
@@ -1349,6 +1399,11 @@
   clib_time_init (&vm->clib_time);
   clib_mem_set_heap (w->thread_mheap);
 
+  e = vlib_call_init_exit_functions
+    (vm, vm->worker_init_function_registrations, 1 /* call_once */ );
+  if (e)
+    clib_error_report (e);
+
   /* Wait until the dpdk init sequence is complete */
   while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
     vlib_worker_thread_barrier_check ();
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
index 86f7a6e..dd16db5 100644
--- a/src/vnet/gre/node.c
+++ b/src/vnet/gre/node.c
@@ -448,7 +448,6 @@
 {
   gre_main_t * em = &gre_main;
   gre_protocol_info_t * pi;
-  gre_input_runtime_t * rt;
   u16 * n;
 
   {
@@ -464,10 +463,13 @@
 				       node_index);
 
   /* Setup gre protocol -> next index sparse vector mapping. */
-  rt = vlib_node_get_runtime_data (vm, gre_input_node.index);
-  n = sparse_vec_validate (rt->next_by_protocol, 
-                           clib_host_to_net_u16 (protocol));
-  n[0] = pi->next_index;
+  foreach_vlib_main ({
+    gre_input_runtime_t * rt;
+    rt = vlib_node_get_runtime_data (this_vlib_main, gre_input_node.index);
+    n = sparse_vec_validate (rt->next_by_protocol,
+                             clib_host_to_net_u16 (protocol));
+    n[0] = pi->next_index;
+  });
 }
 
 static void
@@ -529,3 +531,17 @@
 }
 
 VLIB_INIT_FUNCTION (gre_input_init);
+
+static clib_error_t * gre_input_worker_init (vlib_main_t * vm)
+{
+  gre_input_runtime_t * rt;
+
+  rt = vlib_node_get_runtime_data (vm, gre_input_node.index);
+
+  rt->next_by_protocol = sparse_vec_new
+    (/* elt bytes */ sizeof (rt->next_by_protocol[0]),
+     /* bits in index */ BITS (((gre_header_t *) 0)->protocol));
+  return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (gre_input_worker_init);
diff --git a/src/vnet/hdlc/node.c b/src/vnet/hdlc/node.c
index 4fe0296..57e04c8 100644
--- a/src/vnet/hdlc/node.c
+++ b/src/vnet/hdlc/node.c
@@ -285,18 +285,9 @@
   .unformat_buffer = unformat_hdlc_header,
 };
 
-static clib_error_t * hdlc_input_init (vlib_main_t * vm)
+static clib_error_t * hdlc_input_runtime_init (vlib_main_t * vm)
 {
   hdlc_input_runtime_t * rt;
-
-  {
-    clib_error_t * error = vlib_call_init_function (vm, hdlc_init);
-    if (error)
-      clib_error_report (error);
-  }
-
-  hdlc_setup_node (vm, hdlc_input_node.index);
-
   rt = vlib_node_get_runtime_data (vm, hdlc_input_node.index);
 
   rt->next_by_protocol = sparse_vec_new
@@ -313,7 +304,23 @@
   return 0;
 }
 
+static clib_error_t * hdlc_input_init (vlib_main_t * vm)
+{
+
+  {
+    clib_error_t * error = vlib_call_init_function (vm, hdlc_init);
+    if (error)
+      clib_error_report (error);
+  }
+
+  hdlc_setup_node (vm, hdlc_input_node.index);
+  hdlc_input_runtime_init (vm);
+
+  return 0;
+}
+
 VLIB_INIT_FUNCTION (hdlc_input_init);
+VLIB_WORKER_INIT_FUNCTION (hdlc_input_runtime_init);
 
 void
 hdlc_register_input_protocol (vlib_main_t * vm,
diff --git a/src/vnet/l2/l2_input_classify.c b/src/vnet/l2/l2_input_classify.c
index 497df19..485b9ab 100644
--- a/src/vnet/l2/l2_input_classify.c
+++ b/src/vnet/l2/l2_input_classify.c
@@ -505,6 +505,21 @@
 
 VLIB_INIT_FUNCTION (l2_input_classify_init);
 
+clib_error_t *
+l2_input_classify_worker_init (vlib_main_t * vm)
+{
+  l2_input_classify_main_t *cm = &l2_input_classify_main;
+  l2_input_classify_runtime_t *rt;
+
+  rt = vlib_node_get_runtime_data (vm, l2_input_classify_node.index);
+
+  rt->l2cm = cm;
+  rt->vcm = cm->vnet_classify_main;
+
+  return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (l2_input_classify_worker_init);
 
 /** Enable/disable l2 input classification on a specific interface. */
 void
diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c
index 832be1a..c1bdadd 100644
--- a/src/vnet/l2/l2_output_classify.c
+++ b/src/vnet/l2/l2_output_classify.c
@@ -505,6 +505,22 @@
 
 VLIB_INIT_FUNCTION (l2_output_classify_init);
 
+clib_error_t *
+l2_output_classify_worker_init (vlib_main_t * vm)
+{
+  l2_output_classify_main_t *cm = &l2_output_classify_main;
+  l2_output_classify_runtime_t *rt;
+
+  rt = vlib_node_get_runtime_data (vm, l2_output_classify_node.index);
+
+  rt->l2cm = cm;
+  rt->vcm = cm->vnet_classify_main;
+
+  return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (l2_output_classify_worker_init);
+
 /** Enable/disable l2 input classification on a specific interface. */
 void
 vnet_l2_output_classify_enable_disable (u32 sw_if_index, int enable_disable)
diff --git a/src/vnet/l2tp/l2tp.c b/src/vnet/l2tp/l2tp.c
index 2d32339..cb94d7e 100644
--- a/src/vnet/l2tp/l2tp.c
+++ b/src/vnet/l2tp/l2tp.c
@@ -747,6 +747,16 @@
 
 VLIB_INIT_FUNCTION (l2tp_init);
 
+clib_error_t *
+l2tp_worker_init (vlib_main_t * vm)
+{
+  l2tp_encap_init (vm);
+
+  return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (l2tp_worker_init);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/mpls/node.c b/src/vnet/mpls/node.c
index 1810091..03bfaf5 100644
--- a/src/vnet/mpls/node.c
+++ b/src/vnet/mpls/node.c
@@ -301,3 +301,16 @@
 }
 
 VLIB_INIT_FUNCTION (mpls_input_init);
+
+static clib_error_t * mpls_input_worker_init (vlib_main_t * vm)
+{
+  mpls_input_runtime_t * rt;
+  rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+  rt->last_label = (u32) ~0;
+  rt->last_inner_fib_index = 0;
+  rt->last_outer_fib_index = 0;
+  rt->mpls_main = &mpls_main;
+  return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (mpls_input_worker_init);
diff --git a/src/vnet/ppp/node.c b/src/vnet/ppp/node.c
index 4f1f6a7..2f6e0c3 100644
--- a/src/vnet/ppp/node.c
+++ b/src/vnet/ppp/node.c
@@ -295,18 +295,10 @@
 /* *INDENT-ON* */
 
 static clib_error_t *
-ppp_input_init (vlib_main_t * vm)
+ppp_input_runtime_init (vlib_main_t * vm)
 {
   ppp_input_runtime_t *rt;
 
-  {
-    clib_error_t *error = vlib_call_init_function (vm, ppp_init);
-    if (error)
-      clib_error_report (error);
-  }
-
-  ppp_setup_node (vm, ppp_input_node.index);
-
   rt = vlib_node_get_runtime_data (vm, ppp_input_node.index);
 
   rt->next_by_protocol = sparse_vec_new
@@ -323,7 +315,24 @@
   return 0;
 }
 
+static clib_error_t *
+ppp_input_init (vlib_main_t * vm)
+{
+
+  {
+    clib_error_t *error = vlib_call_init_function (vm, ppp_init);
+    if (error)
+      clib_error_report (error);
+  }
+
+  ppp_setup_node (vm, ppp_input_node.index);
+  ppp_input_runtime_init (vm);
+
+  return 0;
+}
+
 VLIB_INIT_FUNCTION (ppp_input_init);
+VLIB_WORKER_INIT_FUNCTION (ppp_input_runtime_init);
 
 void
 ppp_register_input_protocol (vlib_main_t * vm,
diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c
index c7605a3..9b2a8ac 100644
--- a/src/vnet/tcp/tcp_syn_filter4.c
+++ b/src/vnet/tcp/tcp_syn_filter4.c
@@ -450,18 +450,21 @@
 
   if (enable_disable)
     {
-      vlib_main_t *vm = vlib_get_main ();
       syn_filter4_runtime_t *rt;
 
-      rt = vlib_node_get_runtime_data (vm, syn_filter4_node.index);
-      vec_validate (rt->syn_counts, 1023);
-      /*
-       * Given perfect disperson / optimal hashing results:
-       * Allow 128k (successful) syns/sec. 1024, buckets each of which
-       * absorb 128 syns before filtering. Reset table once a second.
-       * Reality bites, lets try resetting once every 100ms.
-       */
-      rt->reset_interval = 0.1;	/* reset interval in seconds */
+      /* *INDENT-OFF* */
+      foreach_vlib_main ({
+	rt = vlib_node_get_runtime_data (this_vlib_main, syn_filter4_node.index);
+	vec_validate (rt->syn_counts, 1023);
+	/*
+	 * Given perfect disperson / optimal hashing results:
+	 * Allow 128k (successful) syns/sec. 1024, buckets each of which
+	 * absorb 128 syns before filtering. Reset table once a second.
+	 * Reality bites, lets try resetting once every 100ms.
+	 */
+	rt->reset_interval = 0.1;	/* reset interval in seconds */
+      });
+      /* *INDENT-ON* */
     }
 
   rv = vnet_feature_enable_disable ("ip4-local", "syn-filter-4",
diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c
index 6b239f7..3a60b29 100644
--- a/src/vnet/udp/udp_local.c
+++ b/src/vnet/udp/udp_local.c
@@ -520,11 +520,15 @@
 				       : udp6_input_node.index, node_index);
 
   /* Setup udp protocol -> next index sparse vector mapping. */
-  rt = vlib_node_get_runtime_data
-    (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
-  n = sparse_vec_validate (rt->next_by_dst_port,
-			   clib_host_to_net_u16 (dst_port));
-  n[0] = pi->next_index;
+  /* *INDENT-OFF* */
+  foreach_vlib_main({
+    rt = vlib_node_get_runtime_data
+      (this_vlib_main, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
+    n = sparse_vec_validate (rt->next_by_dst_port,
+			     clib_host_to_net_u16 (dst_port));
+    n[0] = pi->next_index;
+  });
+  /* *INDENT-ON* */
 }
 
 void
@@ -541,11 +545,15 @@
     return;
 
   /* Kill the mapping. Don't bother killing the pi, it may be back. */
-  rt = vlib_node_get_runtime_data
-    (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
-  n = sparse_vec_validate (rt->next_by_dst_port,
-			   clib_host_to_net_u16 (dst_port));
-  n[0] = SPARSE_VEC_INVALID_INDEX;
+  /* *INDENT-OFF* */
+  foreach_vlib_main({
+    rt = vlib_node_get_runtime_data
+      (this_vlib_main, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
+    n = sparse_vec_validate (rt->next_by_dst_port,
+			     clib_host_to_net_u16 (dst_port));
+    n[0] = SPARSE_VEC_INVALID_INDEX;
+  });
+  /* *INDENT-ON* */
 }
 
 void
@@ -604,10 +612,27 @@
   pn->unformat_edit = unformat_pg_udp_header;
 }
 
+static void
+udp_local_node_runtime_init (vlib_main_t * vm)
+{
+  udp_input_runtime_t *rt;
+
+  rt = vlib_node_get_runtime_data (vm, udp4_input_node.index);
+  rt->next_by_dst_port = sparse_vec_new
+    ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
+     /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+  rt->punt_unknown = 0;
+
+  rt = vlib_node_get_runtime_data (vm, udp6_input_node.index);
+  rt->next_by_dst_port = sparse_vec_new
+    ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
+     /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+  rt->punt_unknown = 0;
+}
+
 clib_error_t *
 udp_local_init (vlib_main_t * vm)
 {
-  udp_input_runtime_t *rt;
   udp_main_t *um = &udp_main;
   int i;
 
@@ -628,27 +653,13 @@
   udp_setup_node (vm, udp4_input_node.index);
   udp_setup_node (vm, udp6_input_node.index);
 
-  rt = vlib_node_get_runtime_data (vm, udp4_input_node.index);
-
-  rt->next_by_dst_port = sparse_vec_new
-    ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
-     /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
-
-  rt->punt_unknown = 0;
+  udp_local_node_runtime_init (vm);
 
 #define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */);
   foreach_udp4_dst_port
 #undef _
-    rt = vlib_node_get_runtime_data (vm, udp6_input_node.index);
-
-  rt->next_by_dst_port = sparse_vec_new
-    ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
-     /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
-
-  rt->punt_unknown = 0;
-
 #define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */);
-  foreach_udp6_dst_port
+    foreach_udp6_dst_port
 #undef _
     ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index);
   /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */
@@ -657,6 +668,15 @@
 
 VLIB_INIT_FUNCTION (udp_local_init);
 
+clib_error_t *
+udp_local_worker_init (vlib_main_t * vm)
+{
+  udp_local_node_runtime_init (vm);
+  return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (udp_local_worker_init);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *