Use thread local storage for thread index

This patch deprecates stack-based thread identification,
Also removes requirement that thread stacks are adjacent.

Finally, possibly annoying for some folks, it renames
all occurences of cpu_index and cpu_number with thread
index. Using word "cpu" is misleading here as thread can
be migrated ti different CPU, and also it is not related
to linux cpu index.

Change-Id: I68cdaf661e701d2336fc953dcb9978d10a70f7c1
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/examples/srv6-sample-localsid/node.c b/src/examples/srv6-sample-localsid/node.c
index 7bae9cd..e83e235 100644
--- a/src/examples/srv6-sample-localsid/node.c
+++ b/src/examples/srv6-sample-localsid/node.c
@@ -114,7 +114,7 @@
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
   {
@@ -168,7 +168,7 @@
       /* This increments the SRv6 per LocalSID counters.*/
       vlib_increment_combined_counter
         (((next0 == SRV6_SAMPLE_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : &(sm->sr_ls_valid_counters)),
-        cpu_index,
+        thread_index,
         ls0 - sm->localsids,
         1, vlib_buffer_length_in_chain (vm, b0));
 
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index 2765c29..c80b3fa 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -132,7 +132,7 @@
   u32 merge_index;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   f = vlib_buffer_get_free_list (vm, free_list_index);
 
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index 50b2668..9166124 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -243,7 +243,7 @@
   ASSERT (ring->tx_tail == 0);
 
   n_retry = 16;
-  queue_id = vm->cpu_index;
+  queue_id = vm->thread_index;
 
   do
     {
@@ -266,7 +266,7 @@
 	{
 	  /* no wrap, transmit in one burst */
 	  dpdk_device_hqos_per_worker_thread_t *hqos =
-	    &xd->hqos_wt[vm->cpu_index];
+	    &xd->hqos_wt[vm->thread_index];
 
 	  ASSERT (hqos->swq != NULL);
 
@@ -332,7 +332,7 @@
 		     vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 my_cpu = vm->cpu_index;
+  u32 my_cpu = vm->thread_index;
   struct rte_mbuf *mb_new;
 
   if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0)
@@ -376,7 +376,7 @@
   tx_ring_hdr_t *ring;
   u32 n_on_ring;
 
-  my_cpu = vm->cpu_index;
+  my_cpu = vm->thread_index;
 
   queue_id = my_cpu;
 
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index dd40ff4..52b4ca4 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -79,7 +79,7 @@
 {
   vlib_simple_counter_main_t *cm;
   vnet_main_t *vnm = vnet_get_main ();
-  u32 my_cpu = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u64 rxerrors, last_rxerrors;
 
   /* only update counters for PMD interfaces */
@@ -96,7 +96,7 @@
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_NO_BUF);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     xd->stats.rx_nombuf -
 				     xd->last_stats.rx_nombuf);
     }
@@ -107,7 +107,7 @@
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_MISS);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     xd->stats.imissed -
 				     xd->last_stats.imissed);
     }
@@ -119,7 +119,7 @@
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_RX_ERROR);
 
-      vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, xd->vlib_sw_if_index,
 				     rxerrors - last_rxerrors);
     }
 
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index 538db6c..7eaf8da 100755
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -324,7 +324,7 @@
   int rv;
   int j;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
     {
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index e740fd1..b10e0fa 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -283,7 +283,7 @@
  */
 static_always_inline u32
 dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
-		   vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id,
+		   vlib_node_runtime_t * node, u32 thread_index, u16 queue_id,
 		   int maybe_multiseg)
 {
   u32 n_buffers;
@@ -294,7 +294,7 @@
   uword n_rx_bytes = 0;
   u32 n_trace, trace_cnt __attribute__ ((unused));
   vlib_buffer_free_list_t *fl;
-  vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, cpu_index);
+  vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, thread_index);
 
   if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
     return 0;
@@ -306,7 +306,7 @@
       return 0;
     }
 
-  vec_reset_length (xd->d_trace_buffers[cpu_index]);
+  vec_reset_length (xd->d_trace_buffers[thread_index]);
   trace_cnt = n_trace = vlib_get_trace_count (vm, node);
 
   if (n_trace > 0)
@@ -318,7 +318,7 @@
 	{
 	  struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
 	  vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
-	  vec_add1 (xd->d_trace_buffers[cpu_index],
+	  vec_add1 (xd->d_trace_buffers[thread_index],
 		    vlib_get_buffer_index (vm, b));
 	}
     }
@@ -546,20 +546,22 @@
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
-  if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0))
+  if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[thread_index]) > 0))
     {
-      dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index],
-		     vec_len (xd->d_trace_buffers[cpu_index]));
-      vlib_set_trace_count (vm, node, n_trace -
-			    vec_len (xd->d_trace_buffers[cpu_index]));
+      dpdk_rx_trace (dm, node, xd, queue_id,
+		     xd->d_trace_buffers[thread_index],
+		     vec_len (xd->d_trace_buffers[thread_index]));
+      vlib_set_trace_count (vm, node,
+			    n_trace -
+			    vec_len (xd->d_trace_buffers[thread_index]));
     }
 
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
+     thread_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, mb_index);
+  vnet_device_increment_rx_packets (thread_index, mb_index);
 
   return mb_index;
 }
@@ -630,19 +632,19 @@
   dpdk_device_t *xd;
   uword n_rx_packets = 0;
   dpdk_device_and_queue_t *dq;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /*
    * Poll all devices on this cpu for input/interrupts.
    */
   /* *INDENT-OFF* */
-  vec_foreach (dq, dm->devices_by_cpu[cpu_index])
+  vec_foreach (dq, dm->devices_by_cpu[thread_index])
     {
       xd = vec_elt_at_index(dm->devices, dq->device);
       if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
-        n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 1);
+        n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 1);
       else
-        n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 0);
+        n_rx_packets += dpdk_device_input (dm, xd, node, thread_index, dq->queue_id, /* maybe_multiseg */ 0);
     }
   /* *INDENT-ON* */
 
diff --git a/src/plugins/dpdk/hqos/hqos.c b/src/plugins/dpdk/hqos/hqos.c
index a288fca..8b251be 100644
--- a/src/plugins/dpdk/hqos/hqos.c
+++ b/src/plugins/dpdk/hqos/hqos.c
@@ -397,7 +397,7 @@
 dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   u32 dev_pos;
 
   dev_pos = 0;
@@ -405,12 +405,12 @@
     {
       vlib_worker_thread_barrier_check ();
 
-      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]);
+      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
       if (dev_pos >= n_devs)
 	dev_pos = 0;
 
       dpdk_device_and_queue_t *dq =
-	vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos);
+	vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
       dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
 
       dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
@@ -479,7 +479,7 @@
 dpdk_hqos_thread_internal (vlib_main_t * vm)
 {
   dpdk_main_t *dm = &dpdk_main;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   u32 dev_pos;
 
   dev_pos = 0;
@@ -487,7 +487,7 @@
     {
       vlib_worker_thread_barrier_check ();
 
-      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]);
+      u32 n_devs = vec_len (dm->devices_by_hqos_cpu[thread_index]);
       if (PREDICT_FALSE (n_devs == 0))
 	{
 	  dev_pos = 0;
@@ -497,7 +497,7 @@
 	dev_pos = 0;
 
       dpdk_device_and_queue_t *dq =
-	vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos);
+	vec_elt_at_index (dm->devices_by_hqos_cpu[thread_index], dev_pos);
       dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
 
       dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
@@ -586,7 +586,7 @@
 
   vm = vlib_get_main ();
 
-  ASSERT (vm->cpu_index == os_get_cpu_number ());
+  ASSERT (vm->thread_index == vlib_get_thread_index ());
 
   clib_time_init (&vm->clib_time);
   clib_mem_set_heap (w->thread_mheap);
@@ -595,7 +595,7 @@
   while (tm->worker_thread_release == 0)
     vlib_worker_thread_barrier_check ();
 
-  if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0)
+  if (vec_len (dm->devices_by_hqos_cpu[vm->thread_index]) == 0)
     return
       clib_error
       ("current I/O TX thread does not have any devices assigned to it");
diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c
index cd0a603..3ae8c9b 100644
--- a/src/plugins/dpdk/ipsec/cli.c
+++ b/src/plugins/dpdk/ipsec/cli.c
@@ -42,8 +42,8 @@
   for (i = 0; i < tm->n_vlib_mains; i++)
     {
       uword key, data;
-      u32 cpu_index = vlib_mains[i]->cpu_index;
-      crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+      u32 thread_index = vlib_mains[i]->thread_index;
+      crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
       u8 *s = 0;
 
       if (skip_master)
@@ -57,7 +57,7 @@
 	  i32 last_cdev = -1;
 	  crypto_qp_data_t *qpd;
 
-	  s = format (s, "%u\t", cpu_index);
+	  s = format (s, "%u\t", thread_index);
 
 	  /* *INDENT-OFF* */
 	  vec_foreach (qpd, cwm->qp_data)
@@ -95,7 +95,7 @@
 	    cap.sym.auth.algo = p_key->auth_algo;
 	    check_algo_is_supported (&cap, auth_str);
 	    vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n",
-			     vlib_mains[i]->cpu_index, cipher_str, auth_str,
+			     vlib_mains[i]->thread_index, cipher_str, auth_str,
 			     p_key->is_outbound ? "out" : "in",
 			     cwm->qp_data[data].dev_id,
 			     cwm->qp_data[data].qp_id);
diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c
index dc3452b..a3c4590 100644
--- a/src/plugins/dpdk/ipsec/crypto_node.c
+++ b/src/plugins/dpdk/ipsec/crypto_node.c
@@ -171,9 +171,9 @@
 dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
 		      vlib_frame_t * frame)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   crypto_qp_data_t *qpd;
   u32 n_deq = 0;
 
diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h
index 320295b..56f0c756 100644
--- a/src/plugins/dpdk/ipsec/esp.h
+++ b/src/plugins/dpdk/ipsec/esp.h
@@ -170,9 +170,9 @@
 create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess,
 		 u8 is_outbound)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   struct rte_crypto_sym_xform cipher_xform = { 0 };
   struct rte_crypto_sym_xform auth_xform = { 0 };
   struct rte_crypto_sym_xform *xfs;
diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c
index 286e03f..bab76e3 100644
--- a/src/plugins/dpdk/ipsec/esp_decrypt.c
+++ b/src/plugins/dpdk/ipsec/esp_decrypt.c
@@ -88,7 +88,7 @@
 {
   u32 n_left_from, *from, *to_next, next_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   dpdk_crypto_main_t * dcm = &dpdk_crypto_main;
   dpdk_esp_main_t * em = &dpdk_esp_main;
   u32 i;
@@ -104,7 +104,7 @@
       return n_left_from;
     }
 
-  crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index);
+  crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, thread_index);
   u32 n_qps = vec_len(cwm->qp_data);
   struct rte_crypto_op ** cops_to_enq[n_qps];
   u32 n_cop_qp[n_qps], * bi_to_enq[n_qps];
diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c
index 5b03de7..f996d7d 100644
--- a/src/plugins/dpdk/ipsec/esp_encrypt.c
+++ b/src/plugins/dpdk/ipsec/esp_encrypt.c
@@ -93,7 +93,7 @@
 {
   u32 n_left_from, *from, *to_next, next_index;
   ipsec_main_t *im = &ipsec_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
   dpdk_esp_main_t *em = &dpdk_esp_main;
   u32 i;
@@ -111,7 +111,8 @@
       return n_left_from;
     }
 
-  crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index);
+  crypto_worker_main_t *cwm =
+    vec_elt_at_index (dcm->workers_main, thread_index);
   u32 n_qps = vec_len (cwm->qp_data);
   struct rte_crypto_op **cops_to_enq[n_qps];
   u32 n_cop_qp[n_qps], *bi_to_enq[n_qps];
diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c
index b0aaaae..5d8f4fb 100644
--- a/src/plugins/dpdk/ipsec/ipsec.c
+++ b/src/plugins/dpdk/ipsec/ipsec.c
@@ -289,7 +289,7 @@
 	      if (!map)
 		{
 		  clib_warning ("unable to create hash table for worker %u",
-				vlib_mains[i]->cpu_index);
+				vlib_mains[i]->thread_index);
 		  goto error;
 		}
 	      cwm->algo_qp_map = map;
diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h
index 28bffc8..f0f793c 100644
--- a/src/plugins/dpdk/ipsec/ipsec.h
+++ b/src/plugins/dpdk/ipsec/ipsec.h
@@ -95,8 +95,8 @@
 crypto_alloc_cops ()
 {
   dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
-  u32 cpu_index = os_get_cpu_number ();
-  crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
+  u32 thread_index = vlib_get_thread_index ();
+  crypto_worker_main_t *cwm = &dcm->workers_main[thread_index];
   unsigned socket_id = rte_socket_id ();
   crypto_qp_data_t *qpd;
 
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index 7ee2a78..942b8b2 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -39,7 +39,7 @@
    * thread then do not intercept. (Must not be called from an
    * independent pthread).
    */
-  if (os_get_cpu_number () == 0)
+  if (vlib_get_thread_index () == 0)
     {
       /*
        * We're in the vlib main thread or a vlib process. Make sure
diff --git a/src/plugins/flowperpkt/l2_node.c b/src/plugins/flowperpkt/l2_node.c
index 1c2f681..fdaf81d 100644
--- a/src/plugins/flowperpkt/l2_node.c
+++ b/src/plugins/flowperpkt/l2_node.c
@@ -102,7 +102,7 @@
 		       u8 * src_mac, u8 * dst_mac,
 		       u16 ethertype, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->cpu_index;
+  u32 my_cpu_number = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
diff --git a/src/plugins/flowperpkt/node.c b/src/plugins/flowperpkt/node.c
index f77f087..0277682 100644
--- a/src/plugins/flowperpkt/node.c
+++ b/src/plugins/flowperpkt/node.c
@@ -101,7 +101,7 @@
 			 u32 src_address, u32 dst_address,
 			 u8 tos, u64 timestamp, u16 length, int do_flush)
 {
-  u32 my_cpu_number = vm->cpu_index;
+  u32 my_cpu_number = vm->thread_index;
   flow_report_main_t *frm = &flow_report_main;
   ip4_header_t *ip;
   udp_header_t *udp;
diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h
index 2bf3fd5..9de0d13 100644
--- a/src/plugins/ioam/export-common/ioam_export.h
+++ b/src/plugins/ioam/export-common/ioam_export.h
@@ -477,8 +477,8 @@
   from = vlib_frame_vector_args (F);                                           \
   n_left_from = (F)->n_vectors;                                                \
   next_index = (N)->cached_next_index;                                         \
-  while (__sync_lock_test_and_set ((EM)->lockp[(VM)->cpu_index], 1));          \
-  my_buf = ioam_export_get_my_buffer (EM, (VM)->cpu_index);                    \
+  while (__sync_lock_test_and_set ((EM)->lockp[(VM)->thread_index], 1));       \
+  my_buf = ioam_export_get_my_buffer (EM, (VM)->thread_index);                 \
   my_buf->touched_at = vlib_time_now (VM);                                     \
   while (n_left_from > 0)                                                      \
     {                                                                          \
@@ -620,7 +620,7 @@
     }                                                                          \
   vlib_node_increment_counter (VM, export_node.index,                          \
 			       EXPORT_ERROR_RECORDED, pkts_recorded);          \
-  *(EM)->lockp[(VM)->cpu_index] = 0;                                           \
+  *(EM)->lockp[(VM)->thread_index] = 0;                                        \
 } while(0)
 
 #endif /* __included_ioam_export_h__ */
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
index a56dc04..0cf742c 100644
--- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
@@ -396,7 +396,7 @@
 					      clib_net_to_host_u32
 					      (tcp0->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index0))
+					      vm->thread_index, &pool_index0))
 		    {
 		      cache_ts_added++;
 		    }
@@ -419,7 +419,7 @@
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index0;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -455,7 +455,7 @@
 					      clib_net_to_host_u32
 					      (tcp1->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index1))
+					      vm->thread_index, &pool_index1))
 		    {
 		      cache_ts_added++;
 		    }
@@ -479,7 +479,7 @@
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh1 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index1;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -562,7 +562,7 @@
 					      clib_net_to_host_u32
 					      (tcp0->seq_number) + 1,
 					      no_of_responses, now,
-					      vm->cpu_index, &pool_index0))
+					      vm->thread_index, &pool_index0))
 		    {
 		      cache_ts_added++;
 		    }
@@ -585,7 +585,7 @@
 	      e2e =
 		(ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
 					     cm->rewrite_pool_index_offset);
-	      e2e->pool_id = (u8) vm->cpu_index;
+	      e2e->pool_id = (u8) vm->thread_index;
 	      e2e->pool_index = pool_index0;
 	      ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
 					   ((u8 *) e2e +
@@ -701,7 +701,7 @@
   ioam_cache_main_t *cm = &ioam_cache_main;
   int i;
   u32 pool_index;
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 count = 0;
 
   for (i = 0; i < vec_len (expired_timers); i++)
@@ -724,7 +724,7 @@
 				  vlib_frame_t * f)
 {
   ioam_cache_main_t *cm = &ioam_cache_main;
-  u32 my_thread_index = os_get_cpu_number ();
+  u32 my_thread_index = vlib_get_thread_index ();
   struct timespec ts, tsrem;
 
   tw_timer_expire_timers_16t_2w_512sl (&cm->timer_wheels[my_thread_index],
diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c
index f3c5cc0..08f5b69 100644
--- a/src/plugins/ixge/ixge.c
+++ b/src/plugins/ixge/ixge.c
@@ -1887,7 +1887,7 @@
   vlib_increment_combined_counter (vnet_main.
 				   interface_main.combined_sw_if_counters +
 				   VNET_INTERFACE_COUNTER_RX,
-				   0 /* cpu_index */ ,
+				   0 /* thread_index */ ,
 				   xd->vlib_sw_if_index, n_packets,
 				   dq->rx.n_bytes);
 
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
index add8123..addc2a4 100644
--- a/src/plugins/lb/lb.c
+++ b/src/plugins/lb/lb.c
@@ -63,11 +63,11 @@
   s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
   s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
 
-  u32 cpu_index;
-  for(cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++ ) {
-    lb_hash_t *h = lbm->per_cpu[cpu_index].sticky_ht;
+  u32 thread_index;
+  for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
+    lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
     if (h) {
-      s = format(s, "core %d\n", cpu_index);
+      s = format(s, "core %d\n", thread_index);
       s = format(s, "  timeout: %ds\n", h->timeout);
       s = format(s, "  usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())),  lb_hash_size(h));
     }
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
index 8b763c5..3171148 100644
--- a/src/plugins/lb/node.c
+++ b/src/plugins/lb/node.c
@@ -60,10 +60,10 @@
   return s;
 }
 
-lb_hash_t *lb_get_sticky_table(u32 cpu_index)
+lb_hash_t *lb_get_sticky_table(u32 thread_index)
 {
   lb_main_t *lbm = &lb_main;
-  lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
+  lb_hash_t *sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
   //Check if size changed
   if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht))))
     {
@@ -71,8 +71,8 @@
       lb_hash_bucket_t *b;
       u32 i;
       lb_hash_foreach_entry(sticky_ht, b, i) {
-	vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1);
-	vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1);
+	vlib_refcount_add(&lbm->as_refcount, thread_index, b->value[i], -1);
+	vlib_refcount_add(&lbm->as_refcount, thread_index, 0, 1);
       }
 
       lb_hash_free(sticky_ht);
@@ -81,8 +81,8 @@
 
   //Create if necessary
   if (PREDICT_FALSE(sticky_ht == NULL)) {
-    lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
-    sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
+    lbm->per_cpu[thread_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
+    sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
     clib_warning("Regenerated sticky table %p", sticky_ht);
   }
 
@@ -153,10 +153,10 @@
 {
   lb_main_t *lbm = &lb_main;
   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 lb_time = lb_hash_time_now(vm);
 
-  lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
+  lb_hash_t *sticky_ht = lb_get_sticky_table(thread_index);
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
@@ -240,9 +240,9 @@
 	  //Configuration may be changed, vectors resized, etc...
 
 	  //Dereference previously used
-	  vlib_refcount_add(&lbm->as_refcount, cpu_index,
+	  vlib_refcount_add(&lbm->as_refcount, thread_index,
 			    lb_hash_available_value(sticky_ht, hash0, available_index0), -1);
-	  vlib_refcount_add(&lbm->as_refcount, cpu_index,
+	  vlib_refcount_add(&lbm->as_refcount, thread_index,
 			    asindex0, 1);
 
 	  //Add sticky entry
@@ -260,7 +260,7 @@
 	}
 
       vlib_increment_simple_counter(&lbm->vip_counters[counter],
-				    cpu_index,
+				    thread_index,
 				    vnet_buffer (p0)->ip.adj_index[VLIB_TX],
 				    1);
 
diff --git a/src/plugins/lb/refcount.c b/src/plugins/lb/refcount.c
index 22415c8..6f01ab5 100644
--- a/src/plugins/lb/refcount.c
+++ b/src/plugins/lb/refcount.c
@@ -31,10 +31,10 @@
 {
   u64 count = 0;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
-  u32 cpu_index;
-  for (cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++) {
-    if (r->per_cpu[cpu_index].length > index)
-      count += r->per_cpu[cpu_index].counters[index];
+  u32 thread_index;
+  for (thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++) {
+    if (r->per_cpu[thread_index].length > index)
+      count += r->per_cpu[thread_index].counters[index];
   }
   return count;
 }
diff --git a/src/plugins/lb/refcount.h b/src/plugins/lb/refcount.h
index 8c26e7b..dcfcb3f 100644
--- a/src/plugins/lb/refcount.h
+++ b/src/plugins/lb/refcount.h
@@ -45,9 +45,9 @@
 void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size);
 
 static_always_inline
-void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v)
+void vlib_refcount_add(vlib_refcount_t *r, u32 thread_index, u32 counter_index, i32 v)
 {
-  vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[cpu_index];
+  vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[thread_index];
   if (PREDICT_FALSE(counter_index >= per_cpu->length))
     __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2));
 
diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c
index 659d5df..cee1f3d 100644
--- a/src/plugins/memif/node.c
+++ b/src/plugins/memif/node.c
@@ -94,7 +94,7 @@
   u32 n_rx_bytes = 0;
   u32 *to_next = 0;
   u32 n_free_bufs;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 bi0, bi1;
   vlib_buffer_t *b0, *b1;
   u16 ring_size = 1 << mif->log2_ring_size;
@@ -105,14 +105,15 @@
   if (mif->per_interface_next_index != ~0)
     next_index = mif->per_interface_next_index;
 
-  n_free_bufs = vec_len (nm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < ring_size))
     {
-      vec_validate (nm->rx_buffers[cpu_index], ring_size + n_free_bufs - 1);
+      vec_validate (nm->rx_buffers[thread_index],
+		    ring_size + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
 			   ring_size);
-      _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   head = ring->head;
@@ -158,15 +159,15 @@
 			     CLIB_CACHE_LINE_BYTES, LOAD);
 	    }
 	  /* get empty buffer */
-	  u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1;
-	  bi0 = nm->rx_buffers[cpu_index][last_buf];
-	  bi1 = nm->rx_buffers[cpu_index][last_buf - 1];
-	  _vec_len (nm->rx_buffers[cpu_index]) -= 2;
+	  u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1;
+	  bi0 = nm->rx_buffers[thread_index][last_buf];
+	  bi1 = nm->rx_buffers[thread_index][last_buf - 1];
+	  _vec_len (nm->rx_buffers[thread_index]) -= 2;
 
 	  if (last_buf > 4)
 	    {
-	      memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 2]);
-	      memif_prefetch (vm, nm->rx_buffers[cpu_index][last_buf - 3]);
+	      memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 2]);
+	      memif_prefetch (vm, nm->rx_buffers[thread_index][last_buf - 3]);
 	    }
 
 	  /* enqueue buffer */
@@ -256,9 +257,9 @@
       while (num_slots && n_left_to_next)
 	{
 	  /* get empty buffer */
-	  u32 last_buf = vec_len (nm->rx_buffers[cpu_index]) - 1;
-	  bi0 = nm->rx_buffers[cpu_index][last_buf];
-	  _vec_len (nm->rx_buffers[cpu_index]) = last_buf;
+	  u32 last_buf = vec_len (nm->rx_buffers[thread_index]) - 1;
+	  bi0 = nm->rx_buffers[thread_index][last_buf];
+	  _vec_len (nm->rx_buffers[thread_index]) = last_buf;
 
 	  /* enqueue buffer */
 	  to_next[0] = bi0;
@@ -315,7 +316,7 @@
   ring->tail = head;
 
   vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
-				   + VNET_INTERFACE_COUNTER_RX, cpu_index,
+				   + VNET_INTERFACE_COUNTER_RX, thread_index,
 				   mif->hw_if_index, n_rx_packets,
 				   n_rx_bytes);
 
@@ -327,7 +328,7 @@
 		vlib_frame_t * frame)
 {
   u32 n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   memif_main_t *nm = &memif_main;
   memif_if_t *mif;
 
@@ -337,7 +338,7 @@
       if (mif->flags & MEMIF_IF_FLAG_ADMIN_UP &&
 	  mif->flags & MEMIF_IF_FLAG_CONNECTED &&
 	  (mif->if_index % nm->input_cpu_count) ==
-	  (cpu_index - nm->input_cpu_first_index))
+	  (thread_index - nm->input_cpu_first_index))
 	{
 	  if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
 	    n_rx_packets +=
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
index b496136..e5ee965 100644
--- a/src/plugins/snat/in2out.c
+++ b/src/plugins/snat/in2out.c
@@ -212,7 +212,7 @@
                       snat_session_t ** sessionp,
                       vlib_node_runtime_t * node,
                       u32 next0,
-                      u32 cpu_index)
+                      u32 thread_index)
 {
   snat_user_t *u;
   snat_user_key_t user_key;
@@ -246,27 +246,27 @@
   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
     {
       /* no, make a new one */
-      pool_get (sm->per_thread_data[cpu_index].users, u);
+      pool_get (sm->per_thread_data[thread_index].users, u);
       memset (u, 0, sizeof (*u));
       u->addr = ip0->src_address;
       u->fib_index = rx_fib_index0;
 
-      pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
+      pool_get (sm->per_thread_data[thread_index].list_pool, per_user_list_head_elt);
 
       u->sessions_per_user_list_head_index = per_user_list_head_elt -
-        sm->per_thread_data[cpu_index].list_pool;
+        sm->per_thread_data[thread_index].list_pool;
 
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        u->sessions_per_user_list_head_index);
 
-      kv0.value = u - sm->per_thread_data[cpu_index].users;
+      kv0.value = u - sm->per_thread_data[thread_index].users;
 
       /* add user */
       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
     }
   else
     {
-      u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+      u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
                              value0.value);
     }
 
@@ -276,25 +276,25 @@
       /* Remove the oldest dynamic translation */
       do {
           oldest_per_user_translation_list_index =
-            clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
+            clib_dlist_remove_head (sm->per_thread_data[thread_index].list_pool,
                                     u->sessions_per_user_list_head_index);
 
           ASSERT (oldest_per_user_translation_list_index != ~0);
 
           /* add it back to the end of the LRU list */
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               u->sessions_per_user_list_head_index,
                               oldest_per_user_translation_list_index);
           /* Get the list element */
           oldest_per_user_translation_list_elt =
-            pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
+            pool_elt_at_index (sm->per_thread_data[thread_index].list_pool,
                                oldest_per_user_translation_list_index);
 
           /* Get the session index from the list element */
           session_index = oldest_per_user_translation_list_elt->value;
 
           /* Get the session */
-          s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+          s = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                  session_index);
       } while (snat_is_session_static (s));
 
@@ -346,7 +346,7 @@
         }
 
       /* Create a new session */
-      pool_get (sm->per_thread_data[cpu_index].sessions, s);
+      pool_get (sm->per_thread_data[thread_index].sessions, s);
       memset (s, 0, sizeof (*s));
       
       s->outside_address_index = address_index;
@@ -362,22 +362,22 @@
         }
 
       /* Create list elts */
-      pool_get (sm->per_thread_data[cpu_index].list_pool,
+      pool_get (sm->per_thread_data[thread_index].list_pool,
                 per_user_translation_list_elt);
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        per_user_translation_list_elt -
-                       sm->per_thread_data[cpu_index].list_pool);
+                       sm->per_thread_data[thread_index].list_pool);
 
       per_user_translation_list_elt->value =
-        s - sm->per_thread_data[cpu_index].sessions;
+        s - sm->per_thread_data[thread_index].sessions;
       s->per_user_index = per_user_translation_list_elt -
-                          sm->per_thread_data[cpu_index].list_pool;
+                          sm->per_thread_data[thread_index].list_pool;
       s->per_user_list_head_index = u->sessions_per_user_list_head_index;
 
-      clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                           s->per_user_list_head_index,
                           per_user_translation_list_elt -
-                          sm->per_thread_data[cpu_index].list_pool);
+                          sm->per_thread_data[thread_index].list_pool);
    }
   
   s->in2out = *key0;
@@ -388,12 +388,12 @@
 
   /* Add to translation hashes */
   kv0.key = s->in2out.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
       clib_warning ("in2out key add failed");
   
   kv0.key = s->out2in.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   
   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
       clib_warning ("out2in key add failed");
@@ -403,7 +403,7 @@
   worker_by_out_key.port = s->out2in.port;
   worker_by_out_key.fib_index = s->out2in.fib_index;
   kv0.key = worker_by_out_key.as_u64;
-  kv0.value = cpu_index;
+  kv0.value = thread_index;
   clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
 
   /* log NAT event */
@@ -465,7 +465,7 @@
  *
  * @param[in,out] sm             SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -473,7 +473,7 @@
  * @param d                      optional parameter
  */
 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -524,13 +524,13 @@
         }
 
       next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                         &s0, node, next0, cpu_index);
+                         &s0, node, next0, thread_index);
 
       if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
         goto out;
     }
   else
-    s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+    s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                             value0.value);
 
 out:
@@ -548,7 +548,7 @@
  *
  * @param[in] sm                 SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -556,7 +556,7 @@
  * @param d                      optional parameter
  */
 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -624,7 +624,7 @@
                                u32 rx_fib_index0,
                                vlib_node_runtime_t * node,
                                u32 next0,
-                               u32 cpu_index,
+                               u32 thread_index,
                                void *d)
 {
   snat_session_key_t key0, sm0;
@@ -641,7 +641,7 @@
 
   echo0 = (icmp_echo_header_t *)(icmp0+1);
 
-  next0_tmp = sm->icmp_match_in2out_cb(sm, node, cpu_index, b0,
+  next0_tmp = sm->icmp_match_in2out_cb(sm, node, thread_index, b0,
                                        &key0, &sm0, &dont_translate, d);
   if (next0_tmp != ~0)
     next0 = next0_tmp;
@@ -847,11 +847,11 @@
                                          vlib_node_runtime_t * node,
                                          u32 next0,
                                          f64 now,
-                                         u32 cpu_index,
+                                         u32 thread_index,
                                          snat_session_t ** p_s0)
 {
   next0 = icmp_in2out(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                      next0, cpu_index, p_s0);
+                      next0, thread_index, p_s0);
   snat_session_t * s0 = *p_s0;
   if (PREDICT_TRUE(next0 != SNAT_IN2OUT_NEXT_DROP && s0))
     {
@@ -862,9 +862,9 @@
       /* Per-user LRU list maintenance for dynamic translations */
       if (!snat_is_session_static (s0))
         {
-          clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                              s0->per_user_index);
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               s0->per_user_list_head_index,
                               s0->per_user_index);
         }
@@ -884,7 +884,7 @@
   snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
   f64 now = vlib_time_now (vm);
   u32 stats_node_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
     snat_in2out_node.index;
@@ -977,7 +977,7 @@
                 {
                   next0 = icmp_in2out_slow_path 
                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, 
-                     node, next0, now, cpu_index, &s0);
+                     node, next0, now, thread_index, &s0);
                   goto trace00;
                 }
             }
@@ -1006,7 +1006,7 @@
                     goto trace00;
 
                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                                     &s0, node, next0, cpu_index);
+                                     &s0, node, next0, thread_index);
                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace00;
                 }
@@ -1017,7 +1017,7 @@
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->src_address.as_u32;
@@ -1063,9 +1063,9 @@
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1081,7 +1081,7 @@
               t->next_index = next0;
                   t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
@@ -1117,7 +1117,7 @@
                 {
                   next1 = icmp_in2out_slow_path 
                     (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
-                     next1, now, cpu_index, &s1);
+                     next1, now, thread_index, &s1);
                   goto trace01;
                 }
             }
@@ -1146,7 +1146,7 @@
                     goto trace01;
 
                   next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
-                                     &s1, node, next1, cpu_index);
+                                     &s1, node, next1, thread_index);
                   if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace01;
                 }
@@ -1157,7 +1157,7 @@
                 }
             }
           else
-            s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value1.value);
 
           old_addr1 = ip1->src_address.as_u32;
@@ -1203,9 +1203,9 @@
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s1))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s1->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s1->per_user_list_head_index,
                                   s1->per_user_index);
             }
@@ -1220,7 +1220,7 @@
               t->next_index = next1;
               t->session_index = ~0;
               if (s1)
-                t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
@@ -1292,7 +1292,7 @@
                 {
                   next0 = icmp_in2out_slow_path 
                     (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                     next0, now, cpu_index, &s0);
+                     next0, now, thread_index, &s0);
                   goto trace0;
                 }
             }
@@ -1321,7 +1321,7 @@
                     goto trace0;
 
                   next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
-                                     &s0, node, next0, cpu_index);
+                                     &s0, node, next0, thread_index);
 
                   if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
                     goto trace0;
@@ -1333,7 +1333,7 @@
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->src_address.as_u32;
@@ -1379,9 +1379,9 @@
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1397,7 +1397,7 @@
               t->next_index = next0;
                   t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
@@ -2010,7 +2010,7 @@
   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
   u32 next_worker_index = 0;
   u32 current_worker_index = ~0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ASSERT (vec_len (sm->workers));
 
@@ -2048,7 +2048,7 @@
 
       next_worker_index = sm->worker_in2out_cb(ip0, rx_fib_index0);
 
-      if (PREDICT_FALSE (next_worker_index != cpu_index))
+      if (PREDICT_FALSE (next_worker_index != thread_index))
         {
           do_handoff = 1;
 
diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c
index 656e42d..5d308d7 100644
--- a/src/plugins/snat/out2in.c
+++ b/src/plugins/snat/out2in.c
@@ -129,7 +129,7 @@
                                    snat_session_key_t in2out,
                                    snat_session_key_t out2in,
                                    vlib_node_runtime_t * node,
-                                   u32 cpu_index)
+                                   u32 thread_index)
 {
   snat_user_t *u;
   snat_user_key_t user_key;
@@ -146,36 +146,36 @@
   if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
     {
       /* no, make a new one */
-      pool_get (sm->per_thread_data[cpu_index].users, u);
+      pool_get (sm->per_thread_data[thread_index].users, u);
       memset (u, 0, sizeof (*u));
       u->addr = in2out.addr;
       u->fib_index = in2out.fib_index;
 
-      pool_get (sm->per_thread_data[cpu_index].list_pool,
+      pool_get (sm->per_thread_data[thread_index].list_pool,
                 per_user_list_head_elt);
 
       u->sessions_per_user_list_head_index = per_user_list_head_elt -
-        sm->per_thread_data[cpu_index].list_pool;
+        sm->per_thread_data[thread_index].list_pool;
 
-      clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+      clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                        u->sessions_per_user_list_head_index);
 
-      kv0.value = u - sm->per_thread_data[cpu_index].users;
+      kv0.value = u - sm->per_thread_data[thread_index].users;
 
       /* add user */
       clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
 
       /* add non-traslated packets worker lookup */
-      kv0.value = cpu_index;
+      kv0.value = thread_index;
       clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
     }
   else
     {
-      u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+      u = pool_elt_at_index (sm->per_thread_data[thread_index].users,
                              value0.value);
     }
 
-  pool_get (sm->per_thread_data[cpu_index].sessions, s);
+  pool_get (sm->per_thread_data[thread_index].sessions, s);
   memset (s, 0, sizeof (*s));
 
   s->outside_address_index = ~0;
@@ -183,22 +183,22 @@
   u->nstaticsessions++;
 
   /* Create list elts */
-  pool_get (sm->per_thread_data[cpu_index].list_pool,
+  pool_get (sm->per_thread_data[thread_index].list_pool,
             per_user_translation_list_elt);
-  clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+  clib_dlist_init (sm->per_thread_data[thread_index].list_pool,
                    per_user_translation_list_elt -
-                   sm->per_thread_data[cpu_index].list_pool);
+                   sm->per_thread_data[thread_index].list_pool);
 
   per_user_translation_list_elt->value =
-    s - sm->per_thread_data[cpu_index].sessions;
+    s - sm->per_thread_data[thread_index].sessions;
   s->per_user_index =
-    per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool;
+    per_user_translation_list_elt - sm->per_thread_data[thread_index].list_pool;
   s->per_user_list_head_index = u->sessions_per_user_list_head_index;
 
-  clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+  clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                       s->per_user_list_head_index,
                       per_user_translation_list_elt -
-                      sm->per_thread_data[cpu_index].list_pool);
+                      sm->per_thread_data[thread_index].list_pool);
 
   s->in2out = in2out;
   s->out2in = out2in;
@@ -206,12 +206,12 @@
 
   /* Add to translation hashes */
   kv0.key = s->in2out.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
   if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
       clib_warning ("in2out key add failed");
 
   kv0.key = s->out2in.as_u64;
-  kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+  kv0.value = s - sm->per_thread_data[thread_index].sessions;
 
   if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
       clib_warning ("out2in key add failed");
@@ -298,7 +298,7 @@
  *
  * @param[in,out] sm             SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -306,7 +306,7 @@
  * @param d                      optional parameter
  */
 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -366,7 +366,7 @@
 
       /* Create session initiated by host from external network */
       s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
-                                             node, cpu_index);
+                                             node, thread_index);
 
       if (!s0)
         {
@@ -375,7 +375,7 @@
         }
     }
   else
-    s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+    s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                             value0.value);
 
 out:
@@ -393,7 +393,7 @@
  *
  * @param[in] sm                 SNAT main
  * @param[in,out] node           SNAT node runtime
- * @param[in] cpu_index          CPU index
+ * @param[in] thread_index       thread index
  * @param[in,out] b0             buffer containing packet to be translated
  * @param[out] p_key             address and port before NAT translation
  * @param[out] p_value           address and port after NAT translation
@@ -401,7 +401,7 @@
  * @param d                      optional parameter
  */
 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d)
@@ -460,7 +460,7 @@
                                u32 rx_fib_index0,
                                vlib_node_runtime_t * node,
                                u32 next0,
-                               u32 cpu_index,
+                               u32 thread_index,
                                void *d)
 {
   snat_session_key_t key0, sm0;
@@ -477,7 +477,7 @@
 
   echo0 = (icmp_echo_header_t *)(icmp0+1);
 
-  next0_tmp = sm->icmp_match_out2in_cb(sm, node, cpu_index, b0,
+  next0_tmp = sm->icmp_match_out2in_cb(sm, node, thread_index, b0,
                                        &key0, &sm0, &dont_translate, d);
   if (next0_tmp != ~0)
     next0 = next0_tmp;
@@ -589,11 +589,11 @@
                                          u32 rx_fib_index0,
                                          vlib_node_runtime_t * node,
                                          u32 next0, f64 now,
-                                         u32 cpu_index,
+                                         u32 thread_index,
                                          snat_session_t ** p_s0)
 {
   next0 = icmp_out2in(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
-                      next0, cpu_index, p_s0);
+                      next0, thread_index, p_s0);
   snat_session_t * s0 = *p_s0;
   if (PREDICT_TRUE(next0 != SNAT_OUT2IN_NEXT_DROP && s0))
     {
@@ -604,9 +604,9 @@
       /* Per-user LRU list maintenance for dynamic translation */
       if (!snat_is_session_static (s0))
         {
-          clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                              s0->per_user_index);
-          clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+          clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                               s0->per_user_list_head_index,
                               s0->per_user_index);
         }
@@ -624,7 +624,7 @@
   u32 pkts_processed = 0;
   snat_main_t * sm = &snat_main;
   f64 now = vlib_time_now (vm);
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -712,7 +712,7 @@
             {
               next0 = icmp_out2in_slow_path 
                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, 
-                 next0, now, cpu_index, &s0);
+                 next0, now, thread_index, &s0);
               goto trace0;
             }
 
@@ -743,7 +743,7 @@
 
               /* Create session initiated by host from external network */
               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s0)
                 {
                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -752,7 +752,7 @@
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->dst_address.as_u32;
@@ -796,9 +796,9 @@
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -813,7 +813,7 @@
               t->next_index = next0;
               t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
@@ -847,7 +847,7 @@
             {
               next1 = icmp_out2in_slow_path 
                 (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, 
-                 next1, now, cpu_index, &s1);
+                 next1, now, thread_index, &s1);
               goto trace1;
             }
 
@@ -878,7 +878,7 @@
 
               /* Create session initiated by host from external network */
               s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s1)
                 {
                   b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -887,7 +887,7 @@
                 }
             }
           else
-            s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value1.value);
 
           old_addr1 = ip1->dst_address.as_u32;
@@ -931,9 +931,9 @@
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s1))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s1->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s1->per_user_list_head_index,
                                   s1->per_user_index);
             }
@@ -948,7 +948,7 @@
               t->next_index = next1;
               t->session_index = ~0;
               if (s1)
-                t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s1 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
@@ -1016,7 +1016,7 @@
             {
               next0 = icmp_out2in_slow_path 
                 (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, 
-                 next0, now, cpu_index, &s0);
+                 next0, now, thread_index, &s0);
               goto trace00;
             }
 
@@ -1048,7 +1048,7 @@
 
               /* Create session initiated by host from external network */
               s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
-                                                     cpu_index);
+                                                     thread_index);
               if (!s0)
                 {
                   b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
@@ -1057,7 +1057,7 @@
                 }
             }
           else
-            s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+            s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions,
                                     value0.value);
 
           old_addr0 = ip0->dst_address.as_u32;
@@ -1101,9 +1101,9 @@
           /* Per-user LRU list maintenance for dynamic translation */
           if (!snat_is_session_static (s0))
             {
-              clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_remove (sm->per_thread_data[thread_index].list_pool,
                                  s0->per_user_index);
-              clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+              clib_dlist_addtail (sm->per_thread_data[thread_index].list_pool,
                                   s0->per_user_list_head_index,
                                   s0->per_user_index);
             }
@@ -1118,7 +1118,7 @@
               t->next_index = next0;
               t->session_index = ~0;
               if (s0)
-                t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+                t->session_index = s0 - sm->per_thread_data[thread_index].sessions;
             }
 
           pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
@@ -1599,7 +1599,7 @@
   u32 n_left_to_next_worker = 0, *to_next_worker = 0;
   u32 next_worker_index = 0;
   u32 current_worker_index = ~0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ASSERT (vec_len (sm->workers));
 
@@ -1637,7 +1637,7 @@
 
       next_worker_index = sm->worker_out2in_cb(ip0, rx_fib_index0);
 
-      if (PREDICT_FALSE (next_worker_index != cpu_index))
+      if (PREDICT_FALSE (next_worker_index != thread_index))
         {
           do_handoff = 1;
 
diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h
index 017825c..f4e1c5c 100644
--- a/src/plugins/snat/snat.h
+++ b/src/plugins/snat/snat.h
@@ -221,7 +221,7 @@
 
 typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm,
                                         vlib_node_runtime_t *node,
-                                        u32 cpu_index,
+                                        u32 thread_index,
                                         vlib_buffer_t *b0,
                                         snat_session_key_t *p_key,
                                         snat_session_key_t *p_value,
@@ -402,22 +402,22 @@
 } tcp_udp_header_t;
 
 u32 icmp_match_in2out_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_out2in_fast(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
 u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node,
-                           u32 cpu_index, vlib_buffer_t *b0,
+                           u32 thread_index, vlib_buffer_t *b0,
                            snat_session_key_t *p_key,
                            snat_session_key_t *p_value,
                            u8 *p_dont_translate, void *d);
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index a517a59..be3b41e 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -299,7 +299,7 @@
   if (CLIB_DEBUG == 0)
     return;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   /* smp disaster check */
   if (vec_len (vlib_mains) > 1)
@@ -355,7 +355,7 @@
   vlib_buffer_free_list_t *f;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0)
     {
@@ -474,7 +474,7 @@
   u32 merge_index;
   int i;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   f = vlib_buffer_get_free_list (vm, free_list_index);
 
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 394c336..328660a 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -209,7 +209,7 @@
 vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   uword *p = hash_get (bm->buffer_known_hash, buffer_index);
   return p ? p[0] : VLIB_BUFFER_UNKNOWN;
@@ -221,7 +221,7 @@
 			     vlib_buffer_known_state_t state)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   hash_set (bm->buffer_known_hash, buffer_index, state);
 }
 
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index f853f65..3cc9507 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -709,7 +709,7 @@
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap->flags |= MHEAP_FLAG_VALIDATE;
           // Turn off small object cache because it delays detection of errors
@@ -722,7 +722,7 @@
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap->flags &= ~MHEAP_FLAG_VALIDATE;
           mheap->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
@@ -733,7 +733,7 @@
     {
         /* *INDENT-OFF* */
         foreach_vlib_main({
-          heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+          heap = clib_per_cpu_mheaps[this_vlib_main->thread_index];
           mheap = mheap_header(heap);
           mheap_validate(heap);
         });
diff --git a/src/vlib/counter.h b/src/vlib/counter.h
index 17a8521..60e2055 100644
--- a/src/vlib/counter.h
+++ b/src/vlib/counter.h
@@ -70,17 +70,17 @@
 
 /** Increment a simple counter
     @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
-    @param cpu_index - (u32) the current cpu index
+    @param thread_index - (u32) the current cpu index
     @param index - (u32) index of the counter to increment
     @param increment - (u64) quantitiy to add to the counter
 */
 always_inline void
 vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
-			       u32 cpu_index, u32 index, u64 increment)
+			       u32 thread_index, u32 index, u64 increment)
 {
   counter_t *my_counters;
 
-  my_counters = cm->counters[cpu_index];
+  my_counters = cm->counters[thread_index];
   my_counters[index] += increment;
 }
 
@@ -201,7 +201,7 @@
 
 /** Increment a combined counter
     @param cm - (vlib_combined_counter_main_t *) comined counter main pointer
-    @param cpu_index - (u32) the current cpu index
+    @param thread_index - (u32) the current cpu index
     @param index - (u32) index of the counter to increment
     @param packet_increment - (u64) number of packets to add to the counter
     @param byte_increment - (u64) number of bytes to add to the counter
@@ -209,13 +209,13 @@
 
 always_inline void
 vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
-				 u32 cpu_index,
+				 u32 thread_index,
 				 u32 index, u64 n_packets, u64 n_bytes)
 {
   vlib_counter_t *my_counters;
 
   /* Use this CPU's counter array */
-  my_counters = cm->counters[cpu_index];
+  my_counters = cm->counters[thread_index];
 
   my_counters[index].packets += n_packets;
   my_counters[index].bytes += n_bytes;
@@ -224,14 +224,14 @@
 /** Pre-fetch a per-thread combined counter for the given object index */
 always_inline void
 vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm,
-				u32 cpu_index, u32 index)
+				u32 thread_index, u32 index)
 {
   vlib_counter_t *cpu_counters;
 
   /*
    * This CPU's index is assumed to already be in cache
    */
-  cpu_counters = cm->counters[cpu_index];
+  cpu_counters = cm->counters[thread_index];
   CLIB_PREFETCH (cpu_counters + index, CLIB_CACHE_LINE_BYTES, STORE);
 }
 
diff --git a/src/vlib/error.c b/src/vlib/error.c
index a2c2317..e4ed4ee 100644
--- a/src/vlib/error.c
+++ b/src/vlib/error.c
@@ -149,7 +149,7 @@
   vlib_node_t *n = vlib_get_node (vm, node_index);
   uword l;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   /* Free up any previous error strings. */
   if (n->n_errors > 0)
diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h
index f51ec38..9dd01fb 100644
--- a/src/vlib/global_funcs.h
+++ b/src/vlib/global_funcs.h
@@ -23,7 +23,7 @@
 vlib_get_main (void)
 {
   vlib_main_t *vm;
-  vm = vlib_mains[os_get_cpu_number ()];
+  vm = vlib_mains[vlib_get_thread_index ()];
   ASSERT (vm);
   return vm;
 }
diff --git a/src/vlib/main.c b/src/vlib/main.c
index b22203f..422d3e2 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -136,18 +136,18 @@
   else
     {
       f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
-      f->cpu_index = vm->cpu_index;
+      f->thread_index = vm->thread_index;
       fi = vlib_frame_index_no_check (vm, f);
     }
 
   /* Poison frame when debugging. */
   if (CLIB_DEBUG > 0)
     {
-      u32 save_cpu_index = f->cpu_index;
+      u32 save_thread_index = f->thread_index;
 
       memset (f, 0xfe, n);
 
-      f->cpu_index = save_cpu_index;
+      f->thread_index = save_thread_index;
     }
 
   /* Insert magic number. */
@@ -517,7 +517,7 @@
 	   * a dangling frame reference. Each thread has its own copy of
 	   * the next_frames vector.
 	   */
-	  if (0 && r->cpu_index != next_runtime->cpu_index)
+	  if (0 && r->thread_index != next_runtime->thread_index)
 	    {
 	      nf->frame_index = ~0;
 	      nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
@@ -866,7 +866,7 @@
 				  : evm->node_call_elog_event_types,
 				  node_index),
 		/* track */
-		(vm->cpu_index ? &vlib_worker_threads[vm->cpu_index].
+		(vm->thread_index ? &vlib_worker_threads[vm->thread_index].
 		 elog_track : &em->default_track),
 		/* data to log */ n_vectors);
 }
@@ -963,7 +963,7 @@
 
   vm->cpu_time_last_node_dispatch = last_time_stamp;
 
-  if (1 /* || vm->cpu_index == node->cpu_index */ )
+  if (1 /* || vm->thread_index == node->thread_index */ )
     {
       vlib_main_t *stat_vm;
 
@@ -1029,7 +1029,7 @@
 	  {
 	    u32 node_name, vector_length, is_polling;
 	  } *ed;
-	  vlib_worker_thread_t *w = vlib_worker_threads + vm->cpu_index;
+	  vlib_worker_thread_t *w = vlib_worker_threads + vm->thread_index;
 #endif
 
 	  if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 0197b4f..329bf07 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -156,7 +156,7 @@
   uword *init_functions_called;
 
   /* to compare with node runtime */
-  u32 cpu_index;
+  u32 thread_index;
 
   void **mbuf_alloc_list;
 
diff --git a/src/vlib/node.c b/src/vlib/node.c
index dc0a4de..bbd3a42 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -99,7 +99,7 @@
   vlib_pending_frame_t *pf;
   i32 i, j, n_insert;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   vlib_worker_thread_barrier_sync (vm);
 
diff --git a/src/vlib/node.h b/src/vlib/node.h
index fc7e7da..1e2f4c3 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -344,8 +344,8 @@
   /* Number of vector elements currently in frame. */
   u16 n_vectors;
 
-  /* Owner cpuid / heap id */
-  u16 cpu_index;
+  /* Owner thread / heap id */
+  u16 thread_index;
 
   /* Scalar and vector arguments to next node. */
   u8 arguments[0];
@@ -459,7 +459,7 @@
 					  zero before first run of this
 					  node. */
 
-  u16 cpu_index;			/**< CPU this node runs on */
+  u16 thread_index;			/**< thread this node runs on */
 
   u8 runtime_data[0];			/**< Function dependent
 					  node-runtime data. This data is
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index 1f7d94e..54e3687 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -201,9 +201,9 @@
 vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
 {
   vlib_frame_t *f;
-  u32 cpu_index = frame_index & VLIB_CPU_MASK;
+  u32 thread_index = frame_index & VLIB_CPU_MASK;
   u32 offset = frame_index & VLIB_OFFSET_MASK;
-  vm = vlib_mains[cpu_index];
+  vm = vlib_mains[thread_index];
   f = vm->heap_base + offset;
   return f;
 }
@@ -215,10 +215,10 @@
 
   ASSERT (((uword) f & VLIB_CPU_MASK) == 0);
 
-  vm = vlib_mains[f->cpu_index];
+  vm = vlib_mains[f->thread_index];
 
   i = ((u8 *) f - (u8 *) vm->heap_base);
-  return i | f->cpu_index;
+  return i | f->thread_index;
 }
 
 always_inline vlib_frame_t *
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index ef3a24d..4a111f8 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -35,27 +35,12 @@
 vlib_worker_thread_t *vlib_worker_threads;
 vlib_thread_main_t vlib_thread_main;
 
+__thread uword vlib_thread_index = 0;
+
 uword
 os_get_cpu_number (void)
 {
-  void *sp;
-  uword n;
-  u32 len;
-
-  len = vec_len (vlib_thread_stacks);
-  if (len == 0)
-    return 0;
-
-  /* Get any old stack address. */
-  sp = &sp;
-
-  n = ((uword) sp - (uword) vlib_thread_stacks[0])
-    >> VLIB_LOG2_THREAD_STACK_SIZE;
-
-  /* "processes" have their own stacks, and they always run in thread 0 */
-  n = n >= len ? 0 : n;
-
-  return n;
+  return vlib_thread_index;
 }
 
 uword
@@ -275,21 +260,6 @@
   return 0;
 }
 
-vlib_worker_thread_t *
-vlib_alloc_thread (vlib_main_t * vm)
-{
-  vlib_worker_thread_t *w;
-
-  if (vec_len (vlib_worker_threads) >= vec_len (vlib_thread_stacks))
-    {
-      clib_warning ("out of worker threads... Quitting...");
-      exit (1);
-    }
-  vec_add2 (vlib_worker_threads, w, 1);
-  w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
-  return w;
-}
-
 vlib_frame_queue_t *
 vlib_frame_queue_alloc (int nelts)
 {
@@ -427,7 +397,7 @@
       f64 b4 = vlib_time_now_ticks (vm, before);
       vlib_worker_thread_barrier_check (vm, b4);
       /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
-      // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm);
+      // vlib_frame_queue_dequeue (vm->thread_index, vm, nm);
     }
 
   elt = fq->elts + (new_tail & (fq->nelts - 1));
@@ -497,6 +467,8 @@
   w->lwp = syscall (SYS_gettid);
   w->thread_id = pthread_self ();
 
+  vlib_thread_index = w - vlib_worker_threads;
+
   rv = (void *) clib_calljmp
     ((uword (*)(uword)) w->thread_function,
      (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
@@ -610,7 +582,9 @@
 		  mheap_alloc (0 /* use VM */ , tr->mheap_size);
 	      else
 		w->thread_mheap = main_heap;
-	      w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+
+	      w->thread_stack =
+		vlib_thread_stack_init (w - vlib_worker_threads);
 	      w->thread_function = tr->function;
 	      w->thread_function_arg = w;
 	      w->instance_id = k;
@@ -630,7 +604,7 @@
 	      vm_clone = clib_mem_alloc (sizeof (*vm_clone));
 	      clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
 
-	      vm_clone->cpu_index = worker_thread_index;
+	      vm_clone->thread_index = worker_thread_index;
 	      vm_clone->heap_base = w->thread_mheap;
 	      vm_clone->mbuf_alloc_list = 0;
 	      vm_clone->init_functions_called =
@@ -679,7 +653,7 @@
 	      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
 	      {
 		vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-		rt->cpu_index = vm_clone->cpu_index;
+		rt->thread_index = vm_clone->thread_index;
 		/* copy initial runtime_data from node */
 		if (n->runtime_data && n->runtime_data_bytes > 0)
 		  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -692,7 +666,7 @@
 	      vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
 	      {
 		vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-		rt->cpu_index = vm_clone->cpu_index;
+		rt->thread_index = vm_clone->thread_index;
 		/* copy initial runtime_data from node */
 		if (n->runtime_data && n->runtime_data_bytes > 0)
 		  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -756,7 +730,8 @@
 		  mheap_alloc (0 /* use VM */ , tr->mheap_size);
 	      else
 		w->thread_mheap = main_heap;
-	      w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+	      w->thread_stack =
+		vlib_thread_stack_init (w - vlib_worker_threads);
 	      w->thread_function = tr->function;
 	      w->thread_function_arg = w;
 	      w->instance_id = j;
@@ -827,7 +802,7 @@
 				  uword n_calls,
 				  uword n_vectors, uword n_clocks);
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (vec_len (vlib_mains) == 1)
     return;
@@ -835,7 +810,7 @@
   vm = vlib_mains[0];
   nm = &vm->node_main;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
 
   /*
@@ -955,7 +930,7 @@
       vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
       {
 	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-	rt->cpu_index = vm_clone->cpu_index;
+	rt->thread_index = vm_clone->thread_index;
 	/* copy runtime_data, will be overwritten later for existing rt */
 	if (n->runtime_data && n->runtime_data_bytes > 0)
 	  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -981,7 +956,7 @@
       vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
       {
 	vlib_node_t *n = vlib_get_node (vm, rt->node_index);
-	rt->cpu_index = vm_clone->cpu_index;
+	rt->thread_index = vm_clone->thread_index;
 	/* copy runtime_data, will be overwritten later for existing rt */
 	if (n->runtime_data && n->runtime_data_bytes > 0)
 	  clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -1180,7 +1155,7 @@
   if (vlib_mains == 0)
     return;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
   vlib_worker_thread_barrier_sync (vm);
 
   switch (which)
@@ -1212,7 +1187,7 @@
 
   vlib_worker_threads[0].barrier_sync_count++;
 
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
 
@@ -1260,7 +1235,7 @@
 int
 vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm)
 {
-  u32 thread_id = vm->cpu_index;
+  u32 thread_id = vm->thread_index;
   vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
   vlib_frame_queue_elt_t *elt;
   u32 *from, *to;
@@ -1393,7 +1368,7 @@
   vlib_main_t *vm = vlib_get_main ();
   clib_error_t *e;
 
-  ASSERT (vm->cpu_index == os_get_cpu_number ());
+  ASSERT (vm->thread_index == vlib_get_thread_index ());
 
   vlib_worker_thread_init (w);
   clib_time_init (&vm->clib_time);
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index eca4fc2..101d3d4 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -153,8 +153,6 @@
 /* Called early, in thread 0's context */
 clib_error_t *vlib_thread_init (vlib_main_t * vm);
 
-vlib_worker_thread_t *vlib_alloc_thread (vlib_main_t * vm);
-
 int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
 			      u32 frame_queue_index, vlib_frame_t * frame,
 			      vlib_frame_queue_msg_type_t type);
@@ -183,12 +181,19 @@
 void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
 void vlib_worker_thread_barrier_release (vlib_main_t * vm);
 
+extern __thread uword vlib_thread_index;
+static_always_inline uword
+vlib_get_thread_index (void)
+{
+  return vlib_thread_index;
+}
+
 always_inline void
 vlib_smp_unsafe_warning (void)
 {
   if (CLIB_DEBUG > 0)
     {
-      if (os_get_cpu_number ())
+      if (vlib_get_thread_index ())
 	fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
     }
 }
@@ -331,21 +336,21 @@
 }
 
 always_inline u32
-vlib_get_worker_cpu_index (u32 worker_index)
+vlib_get_worker_thread_index (u32 worker_index)
 {
   return worker_index + 1;
 }
 
 always_inline u32
-vlib_get_worker_index (u32 cpu_index)
+vlib_get_worker_index (u32 thread_index)
 {
-  return cpu_index - 1;
+  return thread_index - 1;
 }
 
 always_inline u32
 vlib_get_current_worker_index ()
 {
-  return os_get_cpu_number () - 1;
+  return vlib_get_thread_index () - 1;
 }
 
 static inline void
@@ -467,6 +472,8 @@
   return elt;
 }
 
+u8 *vlib_thread_stack_init (uword thread_index);
+
 int vlib_thread_cb_register (struct vlib_main_t *vm,
 			     vlib_thread_callbacks_t * cb);
 
diff --git a/src/vlib/unix/cj.c b/src/vlib/unix/cj.c
index 33ba163..7c1e947 100644
--- a/src/vlib/unix/cj.c
+++ b/src/vlib/unix/cj.c
@@ -48,7 +48,7 @@
 
   r = (cj_record_t *) & (cjm->records[new_tail & (cjm->num_records - 1)]);
   r->time = vlib_time_now (cjm->vlib_main);
-  r->cpu = os_get_cpu_number ();
+  r->thread_index = vlib_get_thread_index ();
   r->type = type;
   r->data[0] = pointer_to_uword (data0);
   r->data[1] = pointer_to_uword (data1);
@@ -133,7 +133,8 @@
 cj_dump_one_record (cj_record_t * r)
 {
   fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n",
-	   r->cpu, r->time, r->type, (long long unsigned int) r->data[0],
+	   r->thread_index, r->time, r->type,
+	   (long long unsigned int) r->data[0],
 	   (long long unsigned int) r->data[1]);
 }
 
@@ -161,7 +162,7 @@
   index = (cjm->tail + 1) & (cjm->num_records - 1);
   r = &(cjm->records[index]);
 
-  if (r->cpu != (u32) ~ 0)
+  if (r->thread_index != (u32) ~ 0)
     {
       /* Yes, dump from tail + 1 to the end */
       for (i = index; i < cjm->num_records; i++)
diff --git a/src/vlib/unix/cj.h b/src/vlib/unix/cj.h
index 67626af..d0a1d46 100644
--- a/src/vlib/unix/cj.h
+++ b/src/vlib/unix/cj.h
@@ -23,7 +23,7 @@
 typedef struct
 {
   f64 time;
-  u32 cpu;
+  u32 thread_index;
   u32 type;
   u64 data[2];
 } cj_record_t;
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index 6b96cc0..db5ddd6 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -510,13 +510,28 @@
   return i;
 }
 
+u8 *
+vlib_thread_stack_init (uword thread_index)
+{
+  vec_validate (vlib_thread_stacks, thread_index);
+  vlib_thread_stacks[thread_index] = clib_mem_alloc_aligned
+    (VLIB_THREAD_STACK_SIZE, VLIB_THREAD_STACK_SIZE);
+
+  /*
+   * Disallow writes to the bottom page of the stack, to
+   * catch stack overflows.
+   */
+  if (mprotect (vlib_thread_stacks[thread_index],
+		clib_mem_get_page_size (), PROT_READ) < 0)
+    clib_unix_warning ("thread stack");
+  return vlib_thread_stacks[thread_index];
+}
+
 int
 vlib_unix_main (int argc, char *argv[])
 {
   vlib_main_t *vm = &vlib_global_main;	/* one and only time for this! */
-  vlib_thread_main_t *tm = &vlib_thread_main;
   unformat_input_t input;
-  u8 *thread_stacks;
   clib_error_t *e;
   int i;
 
@@ -548,29 +563,9 @@
     }
   unformat_free (&input);
 
-  /*
-   * allocate n x VLIB_THREAD_STACK_SIZE stacks, aligned to a
-   * VLIB_THREAD_STACK_SIZE boundary
-   * See also: os_get_cpu_number() in vlib/vlib/threads.c
-   */
-  thread_stacks = clib_mem_alloc_aligned
-    ((uword) tm->n_thread_stacks * VLIB_THREAD_STACK_SIZE,
-     VLIB_THREAD_STACK_SIZE);
+  vlib_thread_stack_init (0);
 
-  vec_validate (vlib_thread_stacks, tm->n_thread_stacks - 1);
-  for (i = 0; i < vec_len (vlib_thread_stacks); i++)
-    {
-      vlib_thread_stacks[i] = thread_stacks;
-
-      /*
-       * Disallow writes to the bottom page of the stack, to
-       * catch stack overflows.
-       */
-      if (mprotect (thread_stacks, clib_mem_get_page_size (), PROT_READ) < 0)
-	clib_unix_warning ("thread stack");
-
-      thread_stacks += VLIB_THREAD_STACK_SIZE;
-    }
+  vlib_thread_index = 0;
 
   i = clib_calljmp (thread0, (uword) vm,
 		    (void *) (vlib_thread_stacks[0] +
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
index f68e54e..20d70dd 100644
--- a/src/vnet/adj/adj_l2.c
+++ b/src/vnet/adj/adj_l2.c
@@ -52,7 +52,7 @@
 {
     u32 * from = vlib_frame_vector_args (frame);
     u32 n_left_from, n_left_to_next, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     ethernet_main_t * em = &ethernet_main;
 
     n_left_from = frame->n_vectors;
@@ -93,7 +93,7 @@
             vnet_buffer(p0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index;
 
 	    vlib_increment_combined_counter(&adjacency_counters,
-                                            cpu_index,
+                                            thread_index,
                                             adj_index0,
                                             /* packet increment */ 0,
                                             /* byte increment */ rw_len0);
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index e8087f0..5756de4 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -49,7 +49,7 @@
     u32 next_index;
     vnet_main_t *vnm = vnet_get_main ();
     vnet_interface_main_t *im = &vnm->interface_main;
-    u32 cpu_index = vm->cpu_index;
+    u32 thread_index = vm->thread_index;
 
     /* Vector of buffer / pkt indices we're supposed to process */
     from = vlib_frame_vector_args (frame);
@@ -124,13 +124,13 @@
 	    {
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj0->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b0));
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj1->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b1));
@@ -181,7 +181,7 @@
 	    {
 		vlib_increment_combined_counter (im->combined_sw_if_counters
 						 + VNET_INTERFACE_COUNTER_TX,
-						 cpu_index,
+						 thread_index,
 						 adj0->rewrite_header.sw_if_index,
 						 1,
 						 vlib_buffer_length_in_chain (vm, b0));
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
index 9a0f9d8..128570b 100644
--- a/src/vnet/adj/adj_nsh.c
+++ b/src/vnet/adj/adj_nsh.c
@@ -53,7 +53,7 @@
 {
     u32 * from = vlib_frame_vector_args (frame);
     u32 n_left_from, n_left_to_next, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     n_left_from = frame->n_vectors;
     next_index = node->cached_next_index;
@@ -94,7 +94,7 @@
             vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
 
             vlib_increment_combined_counter(&adjacency_counters,
-                                            cpu_index,
+                                            thread_index,
                                             adj_index0,
                                             /* packet increment */ 0,
                                             /* byte increment */ rw_len0);
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index 98842a4..70a189b 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -251,12 +251,12 @@
   vnet_classify_entry_##size##_t * working_copy##size = 0;
   foreach_size_in_u32x4;
 #undef _
-  u32 cpu_number = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
 
-  if (cpu_number >= vec_len (t->working_copies))
+  if (thread_index >= vec_len (t->working_copies))
     {
       oldheap = clib_mem_set_heap (t->mheap);
-      vec_validate (t->working_copies, cpu_number);
+      vec_validate (t->working_copies, thread_index);
       clib_mem_set_heap (oldheap);
     }
 
@@ -265,7 +265,7 @@
    * updates from multiple threads will not result in sporadic, spurious
    * lookup failures. 
    */
-  working_copy = t->working_copies[cpu_number];
+  working_copy = t->working_copies[thread_index];
 
   t->saved_bucket.as_u64 = b->as_u64;
   oldheap = clib_mem_set_heap (t->mheap);
@@ -290,7 +290,7 @@
         default:
           abort();
         }
-      t->working_copies[cpu_number] = working_copy;
+      t->working_copies[thread_index] = working_copy;
     }
 
   _vec_len(working_copy) = (1<<b->log2_pages)*t->entries_per_page;
@@ -318,7 +318,7 @@
   working_bucket.offset = vnet_classify_get_offset (t, working_copy);
   CLIB_MEMORY_BARRIER();
   b->as_u64 = working_bucket.as_u64;
-  t->working_copies[cpu_number] = working_copy;
+  t->working_copies[thread_index] = working_copy;
 }
 
 static vnet_classify_entry_t *
@@ -387,7 +387,7 @@
   int i;
   u64 hash, new_hash;
   u32 new_log2_pages;
-  u32 cpu_number = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u8 * key_minus_skip;
 
   ASSERT ((add_v->flags & VNET_CLASSIFY_ENTRY_FREE) == 0);
@@ -498,7 +498,7 @@
   new_log2_pages = t->saved_bucket.log2_pages + 1;
 
  expand_again:
-  working_copy = t->working_copies[cpu_number];
+  working_copy = t->working_copies[thread_index];
   new_v = split_and_rehash (t, working_copy, new_log2_pages);
 
   if (new_v == 0)
diff --git a/src/vnet/cop/ip4_whitelist.c b/src/vnet/cop/ip4_whitelist.c
index 6ef3d7d..1b5e336 100644
--- a/src/vnet/cop/ip4_whitelist.c
+++ b/src/vnet/cop/ip4_whitelist.c
@@ -60,7 +60,7 @@
   cop_feature_type_t next_index;
   cop_main_t *cm = &cop_main;
   vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -177,12 +177,12 @@
           dpo1 = load_balance_get_bucket_i(lb1, 0);
 
           vlib_increment_combined_counter
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0)
                + sizeof(ethernet_header_t));
 
           vlib_increment_combined_counter
-              (vcm, cpu_index, lb_index1, 1,
+              (vcm, thread_index, lb_index1, 1,
                vlib_buffer_length_in_chain (vm, b1)
                + sizeof(ethernet_header_t));
 
@@ -273,7 +273,7 @@
           dpo0 = load_balance_get_bucket_i(lb0, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
diff --git a/src/vnet/cop/ip6_whitelist.c b/src/vnet/cop/ip6_whitelist.c
index c2e16cc..f3fe62e 100644
--- a/src/vnet/cop/ip6_whitelist.c
+++ b/src/vnet/cop/ip6_whitelist.c
@@ -61,7 +61,7 @@
   cop_main_t *cm = &cop_main;
   ip6_main_t * im6 = &ip6_main;
   vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -153,12 +153,12 @@
           dpo1 = load_balance_get_bucket_i(lb1, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index1, 1,
+              (vcm, thread_index, lb_index1, 1,
                vlib_buffer_length_in_chain (vm, b1)
                + sizeof(ethernet_header_t));
 
@@ -233,7 +233,7 @@
           dpo0 = load_balance_get_bucket_i(lb0, 0);
 
           vlib_increment_combined_counter 
-              (vcm, cpu_index, lb_index0, 1,
+              (vcm, thread_index, lb_index0, 1,
                vlib_buffer_length_in_chain (vm, b0) 
                + sizeof(ethernet_header_t));
 
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
index ba337f3..7698010 100644
--- a/src/vnet/devices/af_packet/node.c
+++ b/src/vnet/devices/af_packet/node.c
@@ -124,7 +124,7 @@
   u32 frame_num = apif->rx_req->tp_frame_nr;
   u8 *block_start = apif->rx_ring + block * block_size;
   uword n_trace = vlib_get_trace_count (vm, node);
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
 							  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
   u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes;
@@ -132,15 +132,15 @@
   if (apif->per_interface_next_index != ~0)
     next_index = apif->per_interface_next_index;
 
-  n_free_bufs = vec_len (apm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
     {
-      vec_validate (apm->rx_buffers[cpu_index],
+      vec_validate (apm->rx_buffers[thread_index],
 		    VLIB_FRAME_SIZE + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &apm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &apm->rx_buffers[thread_index][n_free_bufs],
 			   VLIB_FRAME_SIZE);
-      _vec_len (apm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (apm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   rx_frame = apif->next_rx_frame;
@@ -163,11 +163,11 @@
 	    {
 	      /* grab free buffer */
 	      u32 last_empty_buffer =
-		vec_len (apm->rx_buffers[cpu_index]) - 1;
+		vec_len (apm->rx_buffers[thread_index]) - 1;
 	      prev_bi0 = bi0;
-	      bi0 = apm->rx_buffers[cpu_index][last_empty_buffer];
+	      bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
 	      b0 = vlib_get_buffer (vm, bi0);
-	      _vec_len (apm->rx_buffers[cpu_index]) = last_empty_buffer;
+	      _vec_len (apm->rx_buffers[thread_index]) = last_empty_buffer;
 	      n_free_bufs--;
 
 	      /* copy data */
@@ -236,9 +236,9 @@
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
   return n_rx_packets;
 }
 
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index 4164522..5e5e812 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -104,7 +104,7 @@
 
 void
 vnet_device_input_assign_thread (u32 hw_if_index,
-				 u16 queue_id, uword cpu_index)
+				 u16 queue_id, uword thread_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
   vnet_device_main_t *vdm = &vnet_device_main;
@@ -115,19 +115,19 @@
 
   ASSERT (hw->input_node_index > 0);
 
-  if (vdm->first_worker_cpu_index == 0)
-    cpu_index = 0;
+  if (vdm->first_worker_thread_index == 0)
+    thread_index = 0;
 
-  if (cpu_index != 0 &&
-      (cpu_index < vdm->first_worker_cpu_index ||
-       cpu_index > vdm->last_worker_cpu_index))
+  if (thread_index != 0 &&
+      (thread_index < vdm->first_worker_thread_index ||
+       thread_index > vdm->last_worker_thread_index))
     {
-      cpu_index = vdm->next_worker_cpu_index++;
-      if (vdm->next_worker_cpu_index > vdm->last_worker_cpu_index)
-	vdm->next_worker_cpu_index = vdm->first_worker_cpu_index;
+      thread_index = vdm->next_worker_thread_index++;
+      if (vdm->next_worker_thread_index > vdm->last_worker_thread_index)
+	vdm->next_worker_thread_index = vdm->first_worker_thread_index;
     }
 
-  vm = vlib_mains[cpu_index];
+  vm = vlib_mains[thread_index];
   rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
 
   vec_add2 (rt->devices_and_queues, dq, 1);
@@ -136,33 +136,33 @@
   dq->queue_id = queue_id;
 
   vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort);
-  vec_validate (hw->input_node_cpu_index_by_queue, queue_id);
-  hw->input_node_cpu_index_by_queue[queue_id] = cpu_index;
+  vec_validate (hw->input_node_thread_index_by_queue, queue_id);
+  hw->input_node_thread_index_by_queue[queue_id] = thread_index;
 }
 
 static int
 vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id,
-				   uword cpu_index)
+				   uword thread_index)
 {
   vnet_main_t *vnm = vnet_get_main ();
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
   vnet_device_input_runtime_t *rt;
   vnet_device_and_queue_t *dq;
-  uword old_cpu_index;
+  uword old_thread_index;
 
-  if (hw->input_node_cpu_index_by_queue == 0)
+  if (hw->input_node_thread_index_by_queue == 0)
     return VNET_API_ERROR_INVALID_INTERFACE;
 
-  if (vec_len (hw->input_node_cpu_index_by_queue) < queue_id + 1)
+  if (vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1)
     return VNET_API_ERROR_INVALID_INTERFACE;
 
-  old_cpu_index = hw->input_node_cpu_index_by_queue[queue_id];
+  old_thread_index = hw->input_node_thread_index_by_queue[queue_id];
 
-  if (old_cpu_index == cpu_index)
+  if (old_thread_index == thread_index)
     return 0;
 
   rt =
-    vlib_node_get_runtime_data (vlib_mains[old_cpu_index],
+    vlib_node_get_runtime_data (vlib_mains[old_thread_index],
 				hw->input_node_index);
 
   vec_foreach (dq, rt->devices_and_queues)
@@ -240,7 +240,7 @@
   vnet_device_main_t *vdm = &vnet_device_main;
   u32 hw_if_index = (u32) ~ 0;
   u32 queue_id = (u32) 0;
-  u32 cpu_index = (u32) ~ 0;
+  u32 thread_index = (u32) ~ 0;
   int rv;
 
   if (!unformat_user (input, unformat_line_input, line_input))
@@ -253,10 +253,10 @@
 	;
       else if (unformat (line_input, "queue %d", &queue_id))
 	;
-      else if (unformat (line_input, "main", &cpu_index))
-	cpu_index = 0;
-      else if (unformat (line_input, "worker %d", &cpu_index))
-	cpu_index += vdm->first_worker_cpu_index;
+      else if (unformat (line_input, "main", &thread_index))
+	thread_index = 0;
+      else if (unformat (line_input, "worker %d", &thread_index))
+	thread_index += vdm->first_worker_thread_index;
       else
 	{
 	  error = clib_error_return (0, "parse error: '%U'",
@@ -271,16 +271,17 @@
   if (hw_if_index == (u32) ~ 0)
     return clib_error_return (0, "please specify valid interface name");
 
-  if (cpu_index > vdm->last_worker_cpu_index)
+  if (thread_index > vdm->last_worker_thread_index)
     return clib_error_return (0,
 			      "please specify valid worker thread or main");
 
-  rv = vnet_device_input_unassign_thread (hw_if_index, queue_id, cpu_index);
+  rv =
+    vnet_device_input_unassign_thread (hw_if_index, queue_id, thread_index);
 
   if (rv)
     return clib_error_return (0, "not found");
 
-  vnet_device_input_assign_thread (hw_if_index, queue_id, cpu_index);
+  vnet_device_input_assign_thread (hw_if_index, queue_id, thread_index);
 
   return 0;
 }
@@ -326,9 +327,9 @@
   tr = p ? (vlib_thread_registration_t *) p[0] : 0;
   if (tr && tr->count > 0)
     {
-      vdm->first_worker_cpu_index = tr->first_index;
-      vdm->next_worker_cpu_index = tr->first_index;
-      vdm->last_worker_cpu_index = tr->first_index + tr->count - 1;
+      vdm->first_worker_thread_index = tr->first_index;
+      vdm->next_worker_thread_index = tr->first_index;
+      vdm->last_worker_thread_index = tr->first_index + tr->count - 1;
     }
   return 0;
 }
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
index bbb29fe..966f830 100644
--- a/src/vnet/devices/devices.h
+++ b/src/vnet/devices/devices.h
@@ -50,9 +50,9 @@
 typedef struct
 {
   vnet_device_per_worker_data_t *workers;
-  uword first_worker_cpu_index;
-  uword last_worker_cpu_index;
-  uword next_worker_cpu_index;
+  uword first_worker_thread_index;
+  uword last_worker_thread_index;
+  uword next_worker_thread_index;
 } vnet_device_main_t;
 
 typedef struct
@@ -80,7 +80,7 @@
 }
 
 void vnet_device_input_assign_thread (u32 hw_if_index, u16 queue_id,
-				      uword cpu_index);
+				      uword thread_index);
 
 static inline u64
 vnet_get_aggregate_rx_packets (void)
@@ -95,12 +95,12 @@
 }
 
 static inline void
-vnet_device_increment_rx_packets (u32 cpu_index, u64 count)
+vnet_device_increment_rx_packets (u32 thread_index, u64 count)
 {
   vnet_device_main_t *vdm = &vnet_device_main;
   vnet_device_per_worker_data_t *pwd;
 
-  pwd = vec_elt_at_index (vdm->workers, cpu_index);
+  pwd = vec_elt_at_index (vdm->workers, thread_index);
   pwd->aggregate_rx_packets += count;
 }
 
@@ -117,9 +117,9 @@
 {
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
 
-  ASSERT (queue_id < vec_len (hw->input_node_cpu_index_by_queue));
-  u32 cpu_index = hw->input_node_cpu_index_by_queue[queue_id];
-  vlib_node_set_interrupt_pending (vlib_mains[cpu_index],
+  ASSERT (queue_id < vec_len (hw->input_node_thread_index_by_queue));
+  u32 thread_index = hw->input_node_thread_index_by_queue[queue_id];
+  vlib_node_set_interrupt_pending (vlib_mains[thread_index],
 				   hw->input_node_index);
 }
 
diff --git a/src/vnet/devices/netmap/node.c b/src/vnet/devices/netmap/node.c
index 68ea783..e120eea 100644
--- a/src/vnet/devices/netmap/node.c
+++ b/src/vnet/devices/netmap/node.c
@@ -98,22 +98,22 @@
   u32 n_free_bufs;
   struct netmap_ring *ring;
   int cur_ring;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
 							  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
 
   if (nif->per_interface_next_index != ~0)
     next_index = nif->per_interface_next_index;
 
-  n_free_bufs = vec_len (nm->rx_buffers[cpu_index]);
+  n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
   if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
     {
-      vec_validate (nm->rx_buffers[cpu_index],
+      vec_validate (nm->rx_buffers[thread_index],
 		    VLIB_FRAME_SIZE + n_free_bufs - 1);
       n_free_bufs +=
-	vlib_buffer_alloc (vm, &nm->rx_buffers[cpu_index][n_free_bufs],
+	vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
 			   VLIB_FRAME_SIZE);
-      _vec_len (nm->rx_buffers[cpu_index]) = n_free_bufs;
+      _vec_len (nm->rx_buffers[thread_index]) = n_free_bufs;
     }
 
   cur_ring = nif->first_rx_ring;
@@ -163,11 +163,11 @@
 		  vlib_buffer_t *b0;
 		  /* grab free buffer */
 		  u32 last_empty_buffer =
-		    vec_len (nm->rx_buffers[cpu_index]) - 1;
+		    vec_len (nm->rx_buffers[thread_index]) - 1;
 		  prev_bi0 = bi0;
-		  bi0 = nm->rx_buffers[cpu_index][last_empty_buffer];
+		  bi0 = nm->rx_buffers[thread_index][last_empty_buffer];
 		  b0 = vlib_get_buffer (vm, bi0);
-		  _vec_len (nm->rx_buffers[cpu_index]) = last_empty_buffer;
+		  _vec_len (nm->rx_buffers[thread_index]) = last_empty_buffer;
 		  n_free_bufs--;
 
 		  /* copy data */
@@ -247,9 +247,9 @@
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
 
   return n_rx_packets;
 }
@@ -260,7 +260,7 @@
 {
   int i;
   u32 n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   netmap_main_t *nm = &netmap_main;
   netmap_if_t *nmi;
 
@@ -269,7 +269,7 @@
       nmi = vec_elt_at_index (nm->interfaces, i);
       if (nmi->is_admin_up &&
 	  (i % nm->input_cpu_count) ==
-	  (cpu_index - nm->input_cpu_first_index))
+	  (thread_index - nm->input_cpu_first_index))
 	n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi);
     }
 
diff --git a/src/vnet/devices/ssvm/node.c b/src/vnet/devices/ssvm/node.c
index a6c9dfd..539b416 100644
--- a/src/vnet/devices/ssvm/node.c
+++ b/src/vnet/devices/ssvm/node.c
@@ -89,7 +89,7 @@
   ethernet_header_t *eh0;
   u16 type0;
   u32 n_rx_bytes = 0, l3_offset0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 trace_cnt __attribute__ ((unused)) = vlib_get_trace_count (vm, node);
   volatile u32 *lock;
   u32 *elt_indices;
@@ -284,10 +284,10 @@
 
   vlib_increment_combined_counter
     (vnet_get_main ()->interface_main.combined_sw_if_counters
-     + VNET_INTERFACE_COUNTER_RX, cpu_index,
+     + VNET_INTERFACE_COUNTER_RX, thread_index,
      intfc->vlib_hw_if_index, rx_queue_index, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, rx_queue_index);
+  vnet_device_increment_rx_packets (thread_index, rx_queue_index);
 
   return rx_queue_index;
 }
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 00807dc..5e720f6 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -331,7 +331,7 @@
 {
   //Let's try to assign one queue to each thread
   u32 qid = 0;
-  u32 cpu_index = 0;
+  u32 thread_index = 0;
   vui->use_tx_spinlock = 0;
   while (1)
     {
@@ -341,20 +341,21 @@
 	  if (!rxvq->started || !rxvq->enabled)
 	    continue;
 
-	  vui->per_cpu_tx_qid[cpu_index] = qid;
-	  cpu_index++;
-	  if (cpu_index == vlib_get_thread_main ()->n_vlib_mains)
+	  vui->per_cpu_tx_qid[thread_index] = qid;
+	  thread_index++;
+	  if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
 	    return;
 	}
       //We need to loop, meaning the spinlock has to be used
       vui->use_tx_spinlock = 1;
-      if (cpu_index == 0)
+      if (thread_index == 0)
 	{
 	  //Could not find a single valid one
-	  for (cpu_index = 0;
-	       cpu_index < vlib_get_thread_main ()->n_vlib_mains; cpu_index++)
+	  for (thread_index = 0;
+	       thread_index < vlib_get_thread_main ()->n_vlib_mains;
+	       thread_index++)
 	    {
-	      vui->per_cpu_tx_qid[cpu_index] = 0;
+	      vui->per_cpu_tx_qid[thread_index] = 0;
 	    }
 	  return;
 	}
@@ -368,7 +369,7 @@
   vhost_user_intf_t *vui;
   vhost_cpu_t *vhc;
   u32 *workers = 0;
-  u32 cpu_index;
+  u32 thread_index;
   vlib_main_t *vm;
 
   //Let's list all workers cpu indexes
@@ -400,9 +401,9 @@
 	    continue;
 
 	  i %= vec_len (vui_workers);
-	  cpu_index = vui_workers[i];
+	  thread_index = vui_workers[i];
 	  i++;
-	  vhc = &vum->cpus[cpu_index];
+	  vhc = &vum->cpus[thread_index];
 
 	  iaq.qid = qid;
 	  iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
@@ -429,14 +430,14 @@
     vhc->operation_mode = mode;
   }
 
-  for (cpu_index = vum->input_cpu_first_index;
-       cpu_index < vum->input_cpu_first_index + vum->input_cpu_count;
-       cpu_index++)
+  for (thread_index = vum->input_cpu_first_index;
+       thread_index < vum->input_cpu_first_index + vum->input_cpu_count;
+       thread_index++)
     {
       vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
 
-      vhc = &vum->cpus[cpu_index];
-      vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+      vhc = &vum->cpus[thread_index];
+      vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
       switch (vhc->operation_mode)
 	{
 	case VHOST_USER_INTERRUPT_MODE:
@@ -532,7 +533,7 @@
 {
   vhost_user_main_t *vum = &vhost_user_main;
   vhost_cpu_t *vhc;
-  u32 cpu_index;
+  u32 thread_index;
   vhost_iface_and_queue_t *vhiq;
   vlib_main_t *vm;
   u32 ifq2;
@@ -553,8 +554,8 @@
 	  if ((vhiq->vhost_iface_index == (ifq >> 8)) &&
 	      (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff)))
 	    {
-	      cpu_index = vhc - vum->cpus;
-	      vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+	      thread_index = vhc - vum->cpus;
+	      vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
 	      /*
 	       * Convert RX virtqueue number in the lower byte to vring
 	       * queue index for the input node process. Top bytes contain
@@ -1592,7 +1593,7 @@
   u32 n_trace = vlib_get_trace_count (vm, node);
   u16 qsz_mask;
   u32 map_hint = 0;
-  u16 cpu_index = os_get_cpu_number ();
+  u16 thread_index = vlib_get_thread_index ();
   u16 copy_len = 0;
 
   {
@@ -1651,32 +1652,32 @@
    * in the loop and come back later. This is not an issue as for big packet,
    * processing cost really comes from the memory copy.
    */
-  if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len < n_left + 1))
+  if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1))
     {
-      u32 curr_len = vum->cpus[cpu_index].rx_buffers_len;
-      vum->cpus[cpu_index].rx_buffers_len +=
+      u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
+      vum->cpus[thread_index].rx_buffers_len +=
 	vlib_buffer_alloc_from_free_list (vm,
-					  vum->cpus[cpu_index].rx_buffers +
+					  vum->cpus[thread_index].rx_buffers +
 					  curr_len,
 					  VHOST_USER_RX_BUFFERS_N - curr_len,
 					  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
 
       if (PREDICT_FALSE
-	  (vum->cpus[cpu_index].rx_buffers_len <
+	  (vum->cpus[thread_index].rx_buffers_len <
 	   VHOST_USER_RX_BUFFER_STARVATION))
 	{
 	  /* In case of buffer starvation, discard some packets from the queue
 	   * and log the event.
 	   * We keep doing best effort for the remaining packets. */
-	  u32 flush = (n_left + 1 > vum->cpus[cpu_index].rx_buffers_len) ?
-	    n_left + 1 - vum->cpus[cpu_index].rx_buffers_len : 1;
+	  u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
+	    n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
 	  flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
 
 	  n_left -= flush;
 	  vlib_increment_simple_counter (vnet_main.
 					 interface_main.sw_if_counters +
 					 VNET_INTERFACE_COUNTER_DROP,
-					 os_get_cpu_number (),
+					 vlib_get_thread_index (),
 					 vui->sw_if_index, flush);
 
 	  vlib_error_count (vm, vhost_user_input_node.index,
@@ -1696,7 +1697,7 @@
 	  u32 desc_data_offset;
 	  vring_desc_t *desc_table = txvq->desc;
 
-	  if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len <= 1))
+	  if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
 	    {
 	      /* Not enough rx_buffers
 	       * Note: We yeld on 1 so we don't need to do an additional
@@ -1707,17 +1708,18 @@
 	    }
 
 	  desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
-	  vum->cpus[cpu_index].rx_buffers_len--;
-	  bi_current = (vum->cpus[cpu_index].rx_buffers)
-	    [vum->cpus[cpu_index].rx_buffers_len];
+	  vum->cpus[thread_index].rx_buffers_len--;
+	  bi_current = (vum->cpus[thread_index].rx_buffers)
+	    [vum->cpus[thread_index].rx_buffers_len];
 	  b_head = b_current = vlib_get_buffer (vm, bi_current);
 	  to_next[0] = bi_current;	//We do that now so we can forget about bi_current
 	  to_next++;
 	  n_left_to_next--;
 
 	  vlib_prefetch_buffer_with_index (vm,
-					   (vum->cpus[cpu_index].rx_buffers)
-					   [vum->cpus[cpu_index].
+					   (vum->
+					    cpus[thread_index].rx_buffers)
+					   [vum->cpus[thread_index].
 					    rx_buffers_len - 1], LOAD);
 
 	  /* Just preset the used descriptor id and length for later */
@@ -1791,7 +1793,7 @@
 		  (b_current->current_length == VLIB_BUFFER_DATA_SIZE))
 		{
 		  if (PREDICT_FALSE
-		      (vum->cpus[cpu_index].rx_buffers_len == 0))
+		      (vum->cpus[thread_index].rx_buffers_len == 0))
 		    {
 		      /* Cancel speculation */
 		      to_next--;
@@ -1805,17 +1807,18 @@
 		       * but valid.
 		       */
 		      vhost_user_input_rewind_buffers (vm,
-						       &vum->cpus[cpu_index],
+						       &vum->cpus
+						       [thread_index],
 						       b_head);
 		      n_left = 0;
 		      goto stop;
 		    }
 
 		  /* Get next output */
-		  vum->cpus[cpu_index].rx_buffers_len--;
+		  vum->cpus[thread_index].rx_buffers_len--;
 		  u32 bi_next =
-		    (vum->cpus[cpu_index].rx_buffers)[vum->cpus
-						      [cpu_index].rx_buffers_len];
+		    (vum->cpus[thread_index].rx_buffers)[vum->cpus
+							 [thread_index].rx_buffers_len];
 		  b_current->next_buffer = bi_next;
 		  b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
 		  bi_current = bi_next;
@@ -1823,7 +1826,7 @@
 		}
 
 	      /* Prepare a copy order executed later for the data */
-	      vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	      vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	      copy_len++;
 	      u32 desc_data_l =
 		desc_table[desc_current].len - desc_data_offset;
@@ -1880,7 +1883,7 @@
 	  if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
 	    {
 	      if (PREDICT_FALSE
-		  (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+		  (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
 					  copy_len, &map_hint)))
 		{
 		  clib_warning
@@ -1905,7 +1908,7 @@
 
   /* Do the memory copies */
   if (PREDICT_FALSE
-      (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+      (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
 			      copy_len, &map_hint)))
     {
       clib_warning ("Memory mapping error on interface hw_if_index=%d "
@@ -1933,9 +1936,9 @@
   vlib_increment_combined_counter
     (vnet_main.interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
+     vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
 
-  vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+  vnet_device_increment_rx_packets (thread_index, n_rx_packets);
 
   return n_rx_packets;
 }
@@ -1946,15 +1949,15 @@
 {
   vhost_user_main_t *vum = &vhost_user_main;
   uword n_rx_packets = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   vhost_iface_and_queue_t *vhiq;
   vhost_user_intf_t *vui;
   vhost_cpu_t *vhc;
 
-  vhc = &vum->cpus[cpu_index];
+  vhc = &vum->cpus[thread_index];
   if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
     {
-      vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+      vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
       {
 	vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
 	n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
@@ -2096,7 +2099,7 @@
   vhost_user_vring_t *rxvq;
   u16 qsz_mask;
   u8 error;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 map_hint = 0;
   u8 retry = 8;
   u16 copy_len;
@@ -2116,7 +2119,7 @@
 
   qid =
     VHOST_VRING_IDX_RX (*vec_elt_at_index
-			(vui->per_cpu_tx_qid, os_get_cpu_number ()));
+			(vui->per_cpu_tx_qid, vlib_get_thread_index ()));
   rxvq = &vui->vrings[qid];
   if (PREDICT_FALSE (vui->use_tx_spinlock))
     vhost_user_vring_lock (vui, qid);
@@ -2143,10 +2146,10 @@
 
       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	{
-	  vum->cpus[cpu_index].current_trace =
+	  vum->cpus[thread_index].current_trace =
 	    vlib_add_trace (vm, node, b0,
-			    sizeof (*vum->cpus[cpu_index].current_trace));
-	  vhost_user_tx_trace (vum->cpus[cpu_index].current_trace,
+			    sizeof (*vum->cpus[thread_index].current_trace));
+	  vhost_user_tx_trace (vum->cpus[thread_index].current_trace,
 			       vui, qid / 2, b0, rxvq);
 	}
 
@@ -2188,14 +2191,14 @@
       {
 	// Get a header from the header array
 	virtio_net_hdr_mrg_rxbuf_t *hdr =
-	  &vum->cpus[cpu_index].tx_headers[tx_headers_len];
+	  &vum->cpus[thread_index].tx_headers[tx_headers_len];
 	tx_headers_len++;
 	hdr->hdr.flags = 0;
 	hdr->hdr.gso_type = 0;
 	hdr->num_buffers = 1;	//This is local, no need to check
 
 	// Prepare a copy order executed later for the header
-	vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	copy_len++;
 	cpy->len = vui->virtio_net_hdr_sz;
 	cpy->dst = buffer_map_addr;
@@ -2220,7 +2223,7 @@
 	      else if (vui->virtio_net_hdr_sz == 12)	//MRG is available
 		{
 		  virtio_net_hdr_mrg_rxbuf_t *hdr =
-		    &vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+		    &vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
 
 		  //Move from available to used buffer
 		  rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id =
@@ -2282,7 +2285,7 @@
 	    }
 
 	  {
-	    vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+	    vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
 	    copy_len++;
 	    cpy->len = bytes_left;
 	    cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
@@ -2325,8 +2328,8 @@
 
       if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	{
-	  vum->cpus[cpu_index].current_trace->hdr =
-	    vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+	  vum->cpus[thread_index].current_trace->hdr =
+	    vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
 	}
 
       n_left--;			//At the end for error counting when 'goto done' is invoked
@@ -2336,7 +2339,7 @@
 done:
   //Do the memory copies
   if (PREDICT_FALSE
-      (vhost_user_tx_copy (vui, vum->cpus[cpu_index].copy,
+      (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
 			   copy_len, &map_hint)))
     {
       clib_warning ("Memory mapping error on interface hw_if_index=%d "
@@ -2386,7 +2389,7 @@
       vlib_increment_simple_counter
 	(vnet_main.interface_main.sw_if_counters
 	 + VNET_INTERFACE_COUNTER_DROP,
-	 os_get_cpu_number (), vui->sw_if_index, n_left);
+	 vlib_get_thread_index (), vui->sw_if_index, n_left);
     }
 
   vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
@@ -2773,11 +2776,11 @@
 	case ~0:
 	  vec_foreach (vhc, vum->cpus)
 	  {
-	    u32 cpu_index = vhc - vum->cpus;
+	    u32 thread_index = vhc - vum->cpus;
 	    f64 next_timeout;
 
 	    next_timeout = timeout;
-	    vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+	    vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
 	    {
 	      vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
 	      vhost_user_vring_t *rxvq =
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index e94e871..97ad0a4 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -266,7 +266,7 @@
                        int table_from_interface)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
 
     from = vlib_frame_vector_args (from_frame);
@@ -407,10 +407,10 @@
 	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi1, 1,
+		(cm, thread_index, lbi1, 1,
 		 vlib_buffer_length_in_chain (vm, b1));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -511,7 +511,7 @@
 	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -606,7 +606,7 @@
 {
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     from = vlib_frame_vector_args (from_frame);
     n_left_from = from_frame->n_vectors;
@@ -749,10 +749,10 @@
 	    vnet_buffer(b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi1, 1,
+		(cm, thread_index, lbi1, 1,
 		 vlib_buffer_length_in_chain (vm, b1));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -853,7 +853,7 @@
 	    vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	    vlib_increment_combined_counter
-		(cm, cpu_index, lbi0, 1,
+		(cm, thread_index, lbi0, 1,
 		 vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -930,7 +930,7 @@
                        int table_from_interface)
 {
     u32 n_left_from, next_index, * from, * to_next;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
     vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
 
     from = vlib_frame_vector_args (from_frame);
@@ -994,7 +994,7 @@
             vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
             vlib_increment_combined_counter
-                (cm, cpu_index, lbi0, 1,
+                (cm, thread_index, lbi0, 1,
                  vlib_buffer_length_in_chain (vm, b0));
 
 	    if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index a9f334b..e25ceae 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -627,7 +627,7 @@
     vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
     replicate_main_t * rm = &replicate_main;
     u32 n_left_from, * from, * to_next, next_index;
-    u32 cpu_index = os_get_cpu_number();
+    u32 thread_index = vlib_get_thread_index();
 
     from = vlib_frame_vector_args (frame);
     n_left_from = frame->n_vectors;
@@ -657,12 +657,12 @@
             rep0 = replicate_get(repi0);
 
             vlib_increment_combined_counter(
-                cm, cpu_index, repi0, 1,
+                cm, thread_index, repi0, 1,
                 vlib_buffer_length_in_chain(vm, b0));
 
-	    vec_validate (rm->clones[cpu_index], rep0->rep_n_buckets - 1);
+	    vec_validate (rm->clones[thread_index], rep0->rep_n_buckets - 1);
 
-	    num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[cpu_index], rep0->rep_n_buckets, 128);
+	    num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[thread_index], rep0->rep_n_buckets, 128);
 
 	    if (num_cloned != rep0->rep_n_buckets)
 	      {
@@ -673,7 +673,7 @@
 
             for (bucket = 0; bucket < num_cloned; bucket++)
             {
-                ci0 = rm->clones[cpu_index][bucket];
+                ci0 = rm->clones[thread_index][bucket];
                 c0 = vlib_get_buffer(vm, ci0);
 
                 to_next[0] = ci0;
@@ -700,7 +700,7 @@
 		    vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 		  }
             }
-	    vec_reset_length (rm->clones[cpu_index]);
+	    vec_reset_length (rm->clones[thread_index]);
         }
 
         vlib_put_next_frame (vm, node, next_index, n_left_to_next);
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
index ee75750..c74a097 100644
--- a/src/vnet/ethernet/arp.c
+++ b/src/vnet/ethernet/arp.c
@@ -1771,7 +1771,7 @@
 				    * a)
 {
   vnet_main_t *vm = vnet_get_main ();
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
     vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 9894e3c..335e3f9 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -362,7 +362,7 @@
   u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT;
   u32 i, next_node_index, bvi_flag, sw_if_index;
   u32 n_pkts = 0, n_bytes = 0;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   vnet_main_t *vnm = vnet_get_main ();
   vnet_interface_main_t *im = &vnm->interface_main;
   vlib_node_main_t *nm = &vm->node_main;
@@ -420,8 +420,9 @@
 
       /* increment TX interface stat */
       vlib_increment_combined_counter (im->combined_sw_if_counters +
-				       VNET_INTERFACE_COUNTER_TX, cpu_index,
-				       sw_if_index, n_pkts, n_bytes);
+				       VNET_INTERFACE_COUNTER_TX,
+				       thread_index, sw_if_index, n_pkts,
+				       n_bytes);
     }
 
   return n_left_from;
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index b699e38..f7787ed 100755
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -291,7 +291,7 @@
   vlib_node_runtime_t *error_node;
   u32 n_left_from, next_index, *from, *to_next;
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 cached_sw_if_index = ~0;
   u32 cached_is_l2 = 0;		/* shut up gcc */
   vnet_hw_interface_t *hi = NULL;	/* used for main interface only */
@@ -510,7 +510,7 @@
 						     interface_main.combined_sw_if_counters
 						     +
 						     VNET_INTERFACE_COUNTER_RX,
-						     cpu_index,
+						     thread_index,
 						     new_sw_if_index0, 1,
 						     len0);
 		  if (new_sw_if_index1 != old_sw_if_index1
@@ -519,7 +519,7 @@
 						     interface_main.combined_sw_if_counters
 						     +
 						     VNET_INTERFACE_COUNTER_RX,
-						     cpu_index,
+						     thread_index,
 						     new_sw_if_index1, 1,
 						     len1);
 
@@ -530,7 +530,7 @@
 			  vlib_increment_combined_counter
 			    (vnm->interface_main.combined_sw_if_counters
 			     + VNET_INTERFACE_COUNTER_RX,
-			     cpu_index,
+			     thread_index,
 			     stats_sw_if_index,
 			     stats_n_packets, stats_n_bytes);
 			  stats_n_packets = stats_n_bytes = 0;
@@ -696,13 +696,13 @@
 		    vlib_increment_combined_counter
 		      (vnm->interface_main.combined_sw_if_counters
 		       + VNET_INTERFACE_COUNTER_RX,
-		       cpu_index, new_sw_if_index0, 1, len0);
+		       thread_index, new_sw_if_index0, 1, len0);
 		  if (stats_n_packets > 0)
 		    {
 		      vlib_increment_combined_counter
 			(vnm->interface_main.combined_sw_if_counters
 			 + VNET_INTERFACE_COUNTER_RX,
-			 cpu_index,
+			 thread_index,
 			 stats_sw_if_index, stats_n_packets, stats_n_bytes);
 		      stats_n_packets = stats_n_bytes = 0;
 		    }
@@ -734,7 +734,7 @@
       vlib_increment_combined_counter
 	(vnm->interface_main.combined_sw_if_counters
 	 + VNET_INTERFACE_COUNTER_RX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vnet/gre/node.c b/src/vnet/gre/node.c
index 2683586..acf15f2 100644
--- a/src/vnet/gre/node.c
+++ b/src/vnet/gre/node.c
@@ -75,7 +75,7 @@
   u64 cached_tunnel_key6[4];
   u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index = 0;
 
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 len;
   vnet_interface_main_t *im = &gm->vnet_main->interface_main;
 
@@ -257,7 +257,7 @@
           len = vlib_buffer_length_in_chain (vm, b0);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
@@ -324,7 +324,7 @@
           len = vlib_buffer_length_in_chain (vm, b1);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
@@ -502,7 +502,7 @@
           len = vlib_buffer_length_in_chain (vm, b0);
           vlib_increment_combined_counter (im->combined_sw_if_counters
                                            + VNET_INTERFACE_COUNTER_RX,
-                                           cpu_index,
+                                           thread_index,
                                            tunnel_sw_if_index,
                                            1 /* packets */,
                                            len /* bytes */);
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index a1ea2d6..08f08b1 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -468,7 +468,7 @@
   u32 input_node_index;
 
   /* input node cpu index by queue */
-  u32 *input_node_cpu_index_by_queue;
+  u32 *input_node_thread_index_by_queue;
 
 } vnet_hw_interface_t;
 
diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c
index 03f2cdc..663dc30 100644
--- a/src/vnet/interface_output.c
+++ b/src/vnet/interface_output.c
@@ -196,7 +196,7 @@
  */
 static_always_inline void
 incr_output_stats (vnet_main_t * vnm,
-		   u32 cpu_index,
+		   u32 thread_index,
 		   u32 length,
 		   u32 sw_if_index,
 		   u32 * last_sw_if_index, u32 * n_packets, u32 * n_bytes)
@@ -216,7 +216,7 @@
 
 	  vlib_increment_combined_counter (im->combined_sw_if_counters
 					   + VNET_INTERFACE_COUNTER_TX,
-					   cpu_index,
+					   thread_index,
 					   *last_sw_if_index,
 					   *n_packets, *n_bytes);
 	}
@@ -240,7 +240,7 @@
   u32 n_left_to_tx, *from, *from_end, *to_tx;
   u32 n_bytes, n_buffers, n_packets;
   u32 last_sw_if_index;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   n_buffers = frame->n_vectors;
 
@@ -266,7 +266,7 @@
 
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_TX_ERROR);
-      vlib_increment_simple_counter (cm, cpu_index,
+      vlib_increment_simple_counter (cm, thread_index,
 				     rt->sw_if_index, n_buffers);
       return vlib_error_drop_buffers (vm, node, from,
 				      /* buffer stride */ 1,
@@ -341,18 +341,18 @@
 		  from += 1;
 		  to_tx += n_buffers;
 		  n_left_to_tx -= n_buffers;
-		  incr_output_stats (vnm, cpu_index, n_slow_bytes,
+		  incr_output_stats (vnm, thread_index, n_slow_bytes,
 				     vnet_buffer (b)->sw_if_index[VLIB_TX],
 				     &last_sw_if_index, &n_packets, &n_bytes);
 		}
 	    }
 	  else
 	    {
-	      incr_output_stats (vnm, cpu_index,
+	      incr_output_stats (vnm, thread_index,
 				 vlib_buffer_length_in_chain (vm, b0),
 				 vnet_buffer (b0)->sw_if_index[VLIB_TX],
 				 &last_sw_if_index, &n_packets, &n_bytes);
-	      incr_output_stats (vnm, cpu_index,
+	      incr_output_stats (vnm, thread_index,
 				 vlib_buffer_length_in_chain (vm, b0),
 				 vnet_buffer (b1)->sw_if_index[VLIB_TX],
 				 &last_sw_if_index, &n_packets, &n_bytes);
@@ -396,7 +396,7 @@
 	      to_tx += n_buffers;
 	      n_left_to_tx -= n_buffers;
 	    }
-	  incr_output_stats (vnm, cpu_index,
+	  incr_output_stats (vnm, thread_index,
 			     vlib_buffer_length_in_chain (vm, b0),
 			     vnet_buffer (b0)->sw_if_index[VLIB_TX],
 			     &last_sw_if_index, &n_packets, &n_bytes);
@@ -408,7 +408,7 @@
     }
 
   /* Final update of interface stats. */
-  incr_output_stats (vnm, cpu_index, 0, ~0,	/* ~0 will flush stats */
+  incr_output_stats (vnm, thread_index, 0, ~0,	/* ~0 will flush stats */
 		     &last_sw_if_index, &n_packets, &n_bytes);
 
   return n_buffers;
@@ -428,7 +428,7 @@
   u32 n_left_to_tx, *from, *from_end, *to_tx;
   u32 n_bytes, n_buffers, n_packets;
   u32 n_bytes_b0, n_bytes_b1, n_bytes_b2, n_bytes_b3;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
   vnet_interface_main_t *im = &vnm->interface_main;
   u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
   u32 current_config_index = ~0;
@@ -458,7 +458,7 @@
 
       cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
 			     VNET_INTERFACE_COUNTER_TX_ERROR);
-      vlib_increment_simple_counter (cm, cpu_index,
+      vlib_increment_simple_counter (cm, thread_index,
 				     rt->sw_if_index, n_buffers);
 
       return vlib_error_drop_buffers (vm, node, from,
@@ -558,7 +558,7 @@
 	    {
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif0, 1,
+					       thread_index, tx_swif0, 1,
 					       n_bytes_b0);
 	    }
 
@@ -567,7 +567,7 @@
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif1, 1,
+					       thread_index, tx_swif1, 1,
 					       n_bytes_b1);
 	    }
 
@@ -576,7 +576,7 @@
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif2, 1,
+					       thread_index, tx_swif2, 1,
 					       n_bytes_b2);
 	    }
 	  if (PREDICT_FALSE (tx_swif3 != rt->sw_if_index))
@@ -584,7 +584,7 @@
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif3, 1,
+					       thread_index, tx_swif3, 1,
 					       n_bytes_b3);
 	    }
 	}
@@ -623,7 +623,7 @@
 
 	      vlib_increment_combined_counter (im->combined_sw_if_counters +
 					       VNET_INTERFACE_COUNTER_TX,
-					       cpu_index, tx_swif0, 1,
+					       thread_index, tx_swif0, 1,
 					       n_bytes_b0);
 	    }
 	}
@@ -634,7 +634,7 @@
   /* Update main interface stats. */
   vlib_increment_combined_counter (im->combined_sw_if_counters
 				   + VNET_INTERFACE_COUNTER_TX,
-				   cpu_index,
+				   thread_index,
 				   rt->sw_if_index, n_packets, n_bytes);
   return n_buffers;
 }
@@ -893,7 +893,7 @@
   u32 current_sw_if_index, n_errors_current_sw_if_index;
   u64 current_counter;
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = vm->cpu_index;
+  u32 thread_index = vm->thread_index;
 
   static vlib_error_t memory[VNET_ERROR_N_DISPOSITION];
   static char memory_init[VNET_ERROR_N_DISPOSITION];
@@ -965,19 +965,19 @@
 	  current_counter -= 2;
 	  n_errors_current_sw_if_index -= 2;
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  /* Increment super-interface drop/punt counters for
 	     sub-interfaces. */
 	  sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0);
 	  vlib_increment_simple_counter
-	    (cm, cpu_index, sw_if0->sup_sw_if_index,
+	    (cm, thread_index, sw_if0->sup_sw_if_index,
 	     sw_if0->sup_sw_if_index != sw_if_index0);
 
 	  sw_if1 = vnet_get_sw_interface (vnm, sw_if_index1);
 	  vlib_increment_simple_counter
-	    (cm, cpu_index, sw_if1->sup_sw_if_index,
+	    (cm, thread_index, sw_if1->sup_sw_if_index,
 	     sw_if1->sup_sw_if_index != sw_if_index1);
 
 	  em->counters[current_counter_index] = current_counter;
@@ -1013,11 +1013,12 @@
       sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
 
       /* Increment drop/punt counters. */
-      vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+      vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 
       /* Increment super-interface drop/punt counters for sub-interfaces. */
       sw_if0 = vnet_get_sw_interface (vnm, sw_if_index0);
-      vlib_increment_simple_counter (cm, cpu_index, sw_if0->sup_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index,
+				     sw_if0->sup_sw_if_index,
 				     sw_if0->sup_sw_if_index != sw_if_index0);
 
       if (PREDICT_FALSE (e0 != current_error))
@@ -1041,12 +1042,12 @@
     {
       vnet_sw_interface_t *si;
 
-      vlib_increment_simple_counter (cm, cpu_index, current_sw_if_index,
+      vlib_increment_simple_counter (cm, thread_index, current_sw_if_index,
 				     n_errors_current_sw_if_index);
 
       si = vnet_get_sw_interface (vnm, current_sw_if_index);
       if (si->sup_sw_if_index != current_sw_if_index)
-	vlib_increment_simple_counter (cm, cpu_index, si->sup_sw_if_index,
+	vlib_increment_simple_counter (cm, thread_index, si->sup_sw_if_index,
 				       n_errors_current_sw_if_index);
     }
 
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index ee1703e..fdfe7f6 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -75,7 +75,7 @@
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -292,19 +292,19 @@
 	  vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index0, 1,
+	    (cm, thread_index, lb_index0, 1,
 	     vlib_buffer_length_in_chain (vm, p0)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index1, 1,
+	    (cm, thread_index, lb_index1, 1,
 	     vlib_buffer_length_in_chain (vm, p1)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index2, 1,
+	    (cm, thread_index, lb_index2, 1,
 	     vlib_buffer_length_in_chain (vm, p2)
 	     + sizeof (ethernet_header_t));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lb_index3, 1,
+	    (cm, thread_index, lb_index3, 1,
 	     vlib_buffer_length_in_chain (vm, p3)
 	     + sizeof (ethernet_header_t));
 
@@ -392,7 +392,7 @@
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  from += 1;
 	  to_next += 1;
@@ -479,7 +479,7 @@
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -584,9 +584,9 @@
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -639,7 +639,7 @@
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -2330,7 +2330,7 @@
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -2379,9 +2379,9 @@
 	  if (do_counters)
 	    {
 	      vlib_prefetch_combined_counter (&adjacency_counters,
-					      cpu_index, adj_index0);
+					      thread_index, adj_index0);
 	      vlib_prefetch_combined_counter (&adjacency_counters,
-					      cpu_index, adj_index1);
+					      thread_index, adj_index1);
 	    }
 
 	  ip0 = vlib_buffer_get_current (p0);
@@ -2527,13 +2527,13 @@
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index,
+		 thread_index,
 		 adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index,
+		 thread_index,
 		 adj_index1, 1,
 		 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
 	    }
@@ -2618,7 +2618,7 @@
 
 	  if (do_counters)
 	    vlib_prefetch_combined_counter (&adjacency_counters,
-					    cpu_index, adj_index0);
+					    thread_index, adj_index0);
 
 	  /* Guess we are only writing on simple Ethernet header. */
 	  vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
@@ -2637,7 +2637,7 @@
 	  if (do_counters)
 	    vlib_increment_combined_counter
 	      (&adjacency_counters,
-	       cpu_index, adj_index0, 1,
+	       thread_index, adj_index0, 1,
 	       vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 
 	  /* Check MTU of outgoing interface. */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index ba200a9..3b08f4b 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -85,7 +85,7 @@
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip4_input_node.index);
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -178,8 +178,8 @@
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 	  vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  /* Punt packets with options or wrong version. */
 	  if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
@@ -299,7 +299,7 @@
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 
 	  /* Punt packets with options or wrong version. */
 	  if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index c120f12..c2fc4f8 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -74,7 +74,7 @@
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -185,9 +185,9 @@
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  from += 2;
 	  to_next += 2;
@@ -291,7 +291,7 @@
 	  vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  from += 1;
 	  to_next += 1;
@@ -703,7 +703,7 @@
   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, *from, *to_next;
   ip_lookup_next_t next;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   ip6_main_t *im = &ip6_main;
 
   from = vlib_frame_vector_args (frame);
@@ -824,9 +824,9 @@
 	  vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+	    (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
 
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -886,7 +886,7 @@
 	    }
 
 	  vlib_increment_combined_counter
-	    (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+	    (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next,
 					   to_next, n_left_to_next,
@@ -1897,7 +1897,7 @@
 
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -2019,11 +2019,11 @@
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index0, 1,
+		 thread_index, adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index1, 1,
+		 thread_index, adj_index1, 1,
 		 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
 	    }
 
@@ -2156,7 +2156,7 @@
 	    {
 	      vlib_increment_combined_counter
 		(&adjacency_counters,
-		 cpu_index, adj_index0, 1,
+		 thread_index, adj_index0, 1,
 		 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
 	    }
 
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index 2030608..ffdc472 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -82,7 +82,7 @@
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_input_node.index);
   vlib_simple_counter_main_t *cm;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -171,8 +171,8 @@
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 	  vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
 
 	  error0 = error1 = IP6_ERROR_NONE;
 
@@ -270,7 +270,7 @@
 	  vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
 	  vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
 
-	  vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+	  vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
 	  error0 = IP6_ERROR_NONE;
 
 	  /* Version != 6?  Drop it. */
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index 5d1fb6f..2af546d 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -581,7 +581,7 @@
   u32 next_index;
   pending_resolution_t *pr, *mc;
 
-  if (os_get_cpu_number ())
+  if (vlib_get_thread_index ())
     {
       set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
 				  1 /* set new neighbor */ , is_static,
@@ -722,7 +722,7 @@
   uword *p;
   int rv = 0;
 
-  if (os_get_cpu_number ())
+  if (vlib_get_thread_index ())
     {
       set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
 				  0 /* unset */ , 0, 0);
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
index 50cac80..799003b 100644
--- a/src/vnet/ipsec/esp.h
+++ b/src/vnet/ipsec/esp.h
@@ -282,8 +282,8 @@
 	   u8 * data, int data_len, u8 * signature, u8 use_esn, u32 seq_hi)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  HMAC_CTX *ctx = &(em->per_thread_data[cpu_index].hmac_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  HMAC_CTX *ctx = &(em->per_thread_data[thread_index].hmac_ctx);
   const EVP_MD *md = NULL;
   unsigned int len;
 
@@ -292,10 +292,10 @@
   if (PREDICT_FALSE (em->esp_integ_algs[alg].md == 0))
     return 0;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_integ_alg))
+  if (PREDICT_FALSE (alg != em->per_thread_data[thread_index].last_integ_alg))
     {
       md = em->esp_integ_algs[alg].md;
-      em->per_thread_data[cpu_index].last_integ_alg = alg;
+      em->per_thread_data[thread_index].last_integ_alg = alg;
     }
 
   HMAC_Init (ctx, key, key_len, md);
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index 7289b26..925d2b4 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -85,8 +85,8 @@
 		     u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[cpu_index].decrypt_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].decrypt_ctx);
   const EVP_CIPHER *cipher = NULL;
   int out_len;
 
@@ -95,10 +95,11 @@
   if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == 0))
     return;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_decrypt_alg))
+  if (PREDICT_FALSE
+      (alg != em->per_thread_data[thread_index].last_decrypt_alg))
     {
       cipher = em->esp_crypto_algs[alg].type;
-      em->per_thread_data[cpu_index].last_decrypt_alg = alg;
+      em->per_thread_data[thread_index].last_decrypt_alg = alg;
     }
 
   EVP_DecryptInit_ex (ctx, cipher, NULL, key, iv);
@@ -117,11 +118,11 @@
   u32 *recycle = 0;
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ipsec_alloc_empty_buffers (vm, im);
 
-  u32 *empty_buffers = im->empty_buffers[cpu_index];
+  u32 *empty_buffers = im->empty_buffers[thread_index];
 
   if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from))
     {
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index 44ae229..b2bc4e0 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -88,8 +88,8 @@
 		     u8 * in, u8 * out, size_t in_len, u8 * key, u8 * iv)
 {
   esp_main_t *em = &esp_main;
-  u32 cpu_index = os_get_cpu_number ();
-  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[cpu_index].encrypt_ctx);
+  u32 thread_index = vlib_get_thread_index ();
+  EVP_CIPHER_CTX *ctx = &(em->per_thread_data[thread_index].encrypt_ctx);
   const EVP_CIPHER *cipher = NULL;
   int out_len;
 
@@ -98,10 +98,11 @@
   if (PREDICT_FALSE (em->esp_crypto_algs[alg].type == IPSEC_CRYPTO_ALG_NONE))
     return;
 
-  if (PREDICT_FALSE (alg != em->per_thread_data[cpu_index].last_encrypt_alg))
+  if (PREDICT_FALSE
+      (alg != em->per_thread_data[thread_index].last_encrypt_alg))
     {
       cipher = em->esp_crypto_algs[alg].type;
-      em->per_thread_data[cpu_index].last_encrypt_alg = alg;
+      em->per_thread_data[thread_index].last_encrypt_alg = alg;
     }
 
   EVP_EncryptInit_ex (ctx, cipher, NULL, key, iv);
@@ -119,11 +120,11 @@
   n_left_from = from_frame->n_vectors;
   ipsec_main_t *im = &ipsec_main;
   u32 *recycle = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   ipsec_alloc_empty_buffers (vm, im);
 
-  u32 *empty_buffers = im->empty_buffers[cpu_index];
+  u32 *empty_buffers = im->empty_buffers[thread_index];
 
   if (PREDICT_FALSE (vec_len (empty_buffers) < n_left_from))
     {
diff --git a/src/vnet/ipsec/ikev2.c b/src/vnet/ipsec/ikev2.c
index 2c1074d..3f9978a 100644
--- a/src/vnet/ipsec/ikev2.c
+++ b/src/vnet/ipsec/ikev2.c
@@ -303,16 +303,16 @@
 ikev2_delete_sa (ikev2_sa_t * sa)
 {
   ikev2_main_t *km = &ikev2_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   uword *p;
 
   ikev2_sa_free_all_vec (sa);
 
-  p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi, sa->rspi);
+  p = hash_get (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi);
   if (p)
     {
-      hash_unset (km->per_thread_data[cpu_index].sa_by_rspi, sa->rspi);
-      pool_put (km->per_thread_data[cpu_index].sas, sa);
+      hash_unset (km->per_thread_data[thread_index].sa_by_rspi, sa->rspi);
+      pool_put (km->per_thread_data[thread_index].sas, sa);
     }
 }
 
@@ -776,29 +776,31 @@
   ikev2_sa_t *tmp;
   u32 i, *delete = 0;
   ikev2_child_sa_t *c;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   if (!sa->initial_contact)
     return;
 
   /* find old IKE SAs with the same authenticated identity */
   /* *INDENT-OFF* */
-  pool_foreach (tmp, km->per_thread_data[cpu_index].sas, ({
+  pool_foreach (tmp, km->per_thread_data[thread_index].sas, ({
         if (tmp->i_id.type != sa->i_id.type ||
             vec_len(tmp->i_id.data) != vec_len(sa->i_id.data) ||
             memcmp(sa->i_id.data, tmp->i_id.data, vec_len(sa->i_id.data)))
           continue;
 
         if (sa->rspi != tmp->rspi)
-          vec_add1(delete, tmp - km->per_thread_data[cpu_index].sas);
+          vec_add1(delete, tmp - km->per_thread_data[thread_index].sas);
   }));
   /* *INDENT-ON* */
 
   for (i = 0; i < vec_len (delete); i++)
     {
-      tmp = pool_elt_at_index (km->per_thread_data[cpu_index].sas, delete[i]);
-      vec_foreach (c, tmp->childs)
-	ikev2_delete_tunnel_interface (km->vnet_main, tmp, c);
+      tmp =
+	pool_elt_at_index (km->per_thread_data[thread_index].sas, delete[i]);
+      vec_foreach (c,
+		   tmp->childs) ikev2_delete_tunnel_interface (km->vnet_main,
+							       tmp, c);
       ikev2_delete_sa (tmp);
     }
 
@@ -1922,10 +1924,10 @@
 {
   ikev2_main_t *km = &ikev2_main;
   ikev2_sa_t *sa;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /* *INDENT-OFF* */
-  pool_foreach (sa, km->per_thread_data[cpu_index].sas, ({
+  pool_foreach (sa, km->per_thread_data[thread_index].sas, ({
     if (sa->ispi == clib_net_to_host_u64(ike->ispi) &&
         sa->iaddr.as_u32 == iaddr.as_u32 &&
         sa->raddr.as_u32 == raddr.as_u32)
@@ -2036,7 +2038,7 @@
   u32 n_left_from, *from, *to_next;
   ikev2_next_t next_index;
   ikev2_main_t *km = &ikev2_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -2134,11 +2136,14 @@
 		      if (sa0->state == IKEV2_STATE_SA_INIT)
 			{
 			  /* add SA to the pool */
-			  pool_get (km->per_thread_data[cpu_index].sas, sa0);
+			  pool_get (km->per_thread_data[thread_index].sas,
+				    sa0);
 			  clib_memcpy (sa0, &sa, sizeof (*sa0));
-			  hash_set (km->per_thread_data[cpu_index].sa_by_rspi,
+			  hash_set (km->
+				    per_thread_data[thread_index].sa_by_rspi,
 				    sa0->rspi,
-				    sa0 - km->per_thread_data[cpu_index].sas);
+				    sa0 -
+				    km->per_thread_data[thread_index].sas);
 			}
 		      else
 			{
@@ -2169,11 +2174,11 @@
 		  if (sa0->state == IKEV2_STATE_SA_INIT)
 		    {
 		      /* add SA to the pool */
-		      pool_get (km->per_thread_data[cpu_index].sas, sa0);
+		      pool_get (km->per_thread_data[thread_index].sas, sa0);
 		      clib_memcpy (sa0, &sa, sizeof (*sa0));
-		      hash_set (km->per_thread_data[cpu_index].sa_by_rspi,
+		      hash_set (km->per_thread_data[thread_index].sa_by_rspi,
 				sa0->rspi,
-				sa0 - km->per_thread_data[cpu_index].sas);
+				sa0 - km->per_thread_data[thread_index].sas);
 		    }
 		  else
 		    {
@@ -2184,12 +2189,13 @@
 	  else if (ike0->exchange == IKEV2_EXCHANGE_IKE_AUTH)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
@@ -2240,12 +2246,13 @@
 	  else if (ike0->exchange == IKEV2_EXCHANGE_INFORMATIONAL)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
@@ -2305,12 +2312,13 @@
 	  else if (ike0->exchange == IKEV2_EXCHANGE_CREATE_CHILD_SA)
 	    {
 	      uword *p;
-	      p = hash_get (km->per_thread_data[cpu_index].sa_by_rspi,
+	      p = hash_get (km->per_thread_data[thread_index].sa_by_rspi,
 			    clib_net_to_host_u64 (ike0->rspi));
 	      if (p)
 		{
-		  sa0 = pool_elt_at_index (km->per_thread_data[cpu_index].sas,
-					   p[0]);
+		  sa0 =
+		    pool_elt_at_index (km->per_thread_data[thread_index].sas,
+				       p[0]);
 
 		  r = ikev2_retransmit_resp (sa0, ike0);
 		  if (r == 1)
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 58f0f14..c884e36 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -324,21 +324,21 @@
 always_inline void
 ipsec_alloc_empty_buffers (vlib_main_t * vm, ipsec_main_t * im)
 {
-  u32 cpu_index = os_get_cpu_number ();
-  uword l = vec_len (im->empty_buffers[cpu_index]);
+  u32 thread_index = vlib_get_thread_index ();
+  uword l = vec_len (im->empty_buffers[thread_index]);
   uword n_alloc = 0;
 
   if (PREDICT_FALSE (l < VLIB_FRAME_SIZE))
     {
-      if (!im->empty_buffers[cpu_index])
+      if (!im->empty_buffers[thread_index])
 	{
-	  vec_alloc (im->empty_buffers[cpu_index], 2 * VLIB_FRAME_SIZE);
+	  vec_alloc (im->empty_buffers[thread_index], 2 * VLIB_FRAME_SIZE);
 	}
 
-      n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[cpu_index] + l,
+      n_alloc = vlib_buffer_alloc (vm, im->empty_buffers[thread_index] + l,
 				   2 * VLIB_FRAME_SIZE - l);
 
-      _vec_len (im->empty_buffers[cpu_index]) = l + n_alloc;
+      _vec_len (im->empty_buffers[thread_index]) = l + n_alloc;
     }
 }
 
diff --git a/src/vnet/ipsec/ipsec_if.c b/src/vnet/ipsec/ipsec_if.c
index dc88200..ed12489 100644
--- a/src/vnet/ipsec/ipsec_if.c
+++ b/src/vnet/ipsec/ipsec_if.c
@@ -99,7 +99,7 @@
 ipsec_add_del_tunnel_if_rpc_callback (ipsec_add_del_tunnel_args_t * a)
 {
   vnet_main_t *vnm = vnet_get_main ();
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   return ipsec_add_del_tunnel_if_internal (vnm, a);
 }
diff --git a/src/vnet/l2/l2_bvi.h b/src/vnet/l2/l2_bvi.h
index dd1130a..e21a161 100644
--- a/src/vnet/l2/l2_bvi.h
+++ b/src/vnet/l2/l2_bvi.h
@@ -97,7 +97,7 @@
   vlib_increment_combined_counter
     (vnet_main->interface_main.combined_sw_if_counters
      + VNET_INTERFACE_COUNTER_RX,
-     vlib_main->cpu_index,
+     vlib_main->thread_index,
      vnet_buffer (b0)->sw_if_index[VLIB_RX],
      1, vlib_buffer_length_in_chain (vlib_main, b0));
   return TO_BVI_ERR_OK;
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
index 041ff38..e5d6878 100644
--- a/src/vnet/l2/l2_input.c
+++ b/src/vnet/l2/l2_input.c
@@ -117,7 +117,7 @@
 static_always_inline void
 classify_and_dispatch (vlib_main_t * vm,
 		       vlib_node_runtime_t * node,
-		       u32 cpu_index,
+		       u32 thread_index,
 		       l2input_main_t * msm, vlib_buffer_t * b0, u32 * next0)
 {
   /*
@@ -237,7 +237,7 @@
   u32 n_left_from, *from, *to_next;
   l2input_next_t next_index;
   l2input_main_t *msm = &l2input_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;	/* number of packets to process */
@@ -350,10 +350,10 @@
 	  vlib_node_increment_counter (vm, l2input_node.index,
 				       L2INPUT_ERROR_L2INPUT, 4);
 
-	  classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b1, &next1);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b2, &next2);
-	  classify_and_dispatch (vm, node, cpu_index, msm, b3, &next3);
+	  classify_and_dispatch (vm, node, thread_index, msm, b0, &next0);
+	  classify_and_dispatch (vm, node, thread_index, msm, b1, &next1);
+	  classify_and_dispatch (vm, node, thread_index, msm, b2, &next2);
+	  classify_and_dispatch (vm, node, thread_index, msm, b3, &next3);
 
 	  /* verify speculative enqueues, maybe switch current next frame */
 	  /* if next0==next1==next_index then nothing special needs to be done */
@@ -393,7 +393,7 @@
 	  vlib_node_increment_counter (vm, l2input_node.index,
 				       L2INPUT_ERROR_L2INPUT, 1);
 
-	  classify_and_dispatch (vm, node, cpu_index, msm, b0, &next0);
+	  classify_and_dispatch (vm, node, thread_index, msm, b0, &next0);
 
 	  /* verify speculative enqueue, maybe switch current next frame */
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c
index 00f2257..e17b2a1 100644
--- a/src/vnet/l2/l2_output.c
+++ b/src/vnet/l2/l2_output.c
@@ -643,11 +643,11 @@
 
   hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
 
-  uword cpu_number;
+  uword thread_index;
 
-  cpu_number = os_get_cpu_number ();
+  thread_index = vlib_get_thread_index ();
 
-  if (cpu_number)
+  if (thread_index)
     {
       u32 oldflags;
 
diff --git a/src/vnet/l2tp/decap.c b/src/vnet/l2tp/decap.c
index e898693..4610412 100644
--- a/src/vnet/l2tp/decap.c
+++ b/src/vnet/l2tp/decap.c
@@ -149,7 +149,7 @@
 
   /* per-mapping byte stats include the ethernet header */
   vlib_increment_combined_counter (&lm->counter_main,
-				   os_get_cpu_number (),
+				   vlib_get_thread_index (),
 				   counter_index, 1 /* packet_increment */ ,
 				   vlib_buffer_length_in_chain (vm, b) +
 				   sizeof (ethernet_header_t));
diff --git a/src/vnet/l2tp/encap.c b/src/vnet/l2tp/encap.c
index ed7a958..dcdfde4 100644
--- a/src/vnet/l2tp/encap.c
+++ b/src/vnet/l2tp/encap.c
@@ -124,7 +124,7 @@
 
   /* per-mapping byte stats include the ethernet header */
   vlib_increment_combined_counter (&lm->counter_main,
-				   os_get_cpu_number (),
+				   vlib_get_thread_index (),
 				   counter_index, 1 /* packet_increment */ ,
 				   vlib_buffer_length_in_chain (vm, b));
 
diff --git a/src/vnet/l2tp/l2tp.c b/src/vnet/l2tp/l2tp.c
index cb94d7e..3dedc44 100644
--- a/src/vnet/l2tp/l2tp.c
+++ b/src/vnet/l2tp/l2tp.c
@@ -157,7 +157,7 @@
   u32 session_index;
   u32 counter_index;
   u32 nincr = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   /* *INDENT-OFF* */
   pool_foreach (session, lm->sessions,
@@ -167,11 +167,11 @@
       session_index_to_counter_index (session_index,
                                       SESSION_COUNTER_USER_TO_NETWORK);
     vlib_increment_combined_counter (&lm->counter_main,
-                                     cpu_index,
+                                     thread_index,
                                      counter_index,
                                      1/*pkt*/, 1111 /*bytes*/);
     vlib_increment_combined_counter (&lm->counter_main,
-                                     cpu_index,
+                                     thread_index,
                                      counter_index+1,
                                      1/*pkt*/, 2222 /*bytes*/);
     nincr++;
diff --git a/src/vnet/lisp-gpe/decap.c b/src/vnet/lisp-gpe/decap.c
index d887a95..6876971 100644
--- a/src/vnet/lisp-gpe/decap.c
+++ b/src/vnet/lisp-gpe/decap.c
@@ -103,7 +103,7 @@
 }
 
 static_always_inline void
-incr_decap_stats (vnet_main_t * vnm, u32 cpu_index, u32 length,
+incr_decap_stats (vnet_main_t * vnm, u32 thread_index, u32 length,
 		  u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets,
 		  u32 * n_bytes)
 {
@@ -122,7 +122,7 @@
 
 	  vlib_increment_combined_counter (im->combined_sw_if_counters +
 					   VNET_INTERFACE_COUNTER_RX,
-					   cpu_index, *last_sw_if_index,
+					   thread_index, *last_sw_if_index,
 					   *n_packets, *n_bytes);
 	}
       *last_sw_if_index = sw_if_index;
@@ -150,11 +150,11 @@
 lisp_gpe_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		       vlib_frame_t * from_frame, u8 is_v4)
 {
-  u32 n_left_from, next_index, *from, *to_next, cpu_index;
+  u32 n_left_from, next_index, *from, *to_next, thread_index;
   u32 n_bytes = 0, n_packets = 0, last_sw_if_index = ~0, drops = 0;
   lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main ();
 
-  cpu_index = os_get_cpu_number ();
+  thread_index = vlib_get_thread_index ();
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
 
@@ -267,7 +267,7 @@
 
 	  if (si0)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b0), si0[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0];
@@ -282,7 +282,7 @@
 
 	  if (si1)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b1), si1[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b1)->sw_if_index[VLIB_RX] = si1[0];
@@ -397,7 +397,7 @@
 
 	  if (si0)
 	    {
-	      incr_decap_stats (lgm->vnet_main, cpu_index,
+	      incr_decap_stats (lgm->vnet_main, thread_index,
 				vlib_buffer_length_in_chain (vm, b0), si0[0],
 				&last_sw_if_index, &n_packets, &n_bytes);
 	      vnet_buffer (b0)->sw_if_index[VLIB_RX] = si0[0];
@@ -430,7 +430,7 @@
     }
 
   /* flush iface stats */
-  incr_decap_stats (lgm->vnet_main, cpu_index, 0, ~0, &last_sw_if_index,
+  incr_decap_stats (lgm->vnet_main, thread_index, 0, ~0, &last_sw_if_index,
 		    &n_packets, &n_bytes);
   vlib_node_increment_counter (vm, lisp_gpe_ip4_input_node.index,
 			       LISP_GPE_ERROR_NO_TUNNEL, drops);
diff --git a/src/vnet/lldp/lldp_input.c b/src/vnet/lldp/lldp_input.c
index 762743d..e88f6fd 100644
--- a/src/vnet/lldp/lldp_input.c
+++ b/src/vnet/lldp/lldp_input.c
@@ -35,7 +35,7 @@
 static void
 lldp_rpc_update_peer_cb (const lldp_intf_update_t * a)
 {
-  ASSERT (os_get_cpu_number () == 0);
+  ASSERT (vlib_get_thread_index () == 0);
 
   lldp_intf_t *n = lldp_get_intf (&lldp_main, a->hw_if_index);
   if (!n)
diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c
index 1a20d70..e39b6f1 100644
--- a/src/vnet/map/ip4_map.c
+++ b/src/vnet/map/ip4_map.c
@@ -248,7 +248,7 @@
   next_index = node->cached_next_index;
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -377,7 +377,7 @@
 					       ip40) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next0;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip6h0->payload_length) +
@@ -409,7 +409,7 @@
 					       ip41) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next1;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index1, 1,
 						   clib_net_to_host_u16
 						   (ip6h1->payload_length) +
@@ -520,7 +520,7 @@
 					       ip40) ?
 		    IP4_MAP_NEXT_IP6_REWRITE : next0;
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip6h0->payload_length) +
@@ -564,7 +564,7 @@
   next_index = node->cached_next_index;
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 *fragments_to_drop = NULL;
   u32 *fragments_to_loopback = NULL;
 
@@ -694,8 +694,8 @@
 	    {
 	      if (error0 == MAP_ERROR_NONE)
 		vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-						 cpu_index, map_domain_index0,
-						 1,
+						 thread_index,
+						 map_domain_index0, 1,
 						 clib_net_to_host_u16
 						 (ip60->payload_length) + 40);
 	      next0 =
diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c
index b63d76b..5f2bcbf 100644
--- a/src/vnet/map/ip4_map_t.c
+++ b/src/vnet/map/ip4_map_t.c
@@ -477,7 +477,7 @@
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -520,7 +520,7 @@
 	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1, len0);
 	    }
@@ -1051,7 +1051,7 @@
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -1158,7 +1158,7 @@
 	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip40->
@@ -1169,7 +1169,7 @@
 	      (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p1)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip41->
@@ -1252,7 +1252,7 @@
 	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip40->
diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c
index f7eb768..63ada96 100644
--- a/src/vnet/map/ip6_map.c
+++ b/src/vnet/map/ip6_map.c
@@ -172,7 +172,7 @@
     vlib_node_get_runtime (vm, ip6_map_node.index);
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -319,7 +319,7 @@
 			IP6_MAP_NEXT_IP4_REWRITE : next0;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip40->length));
@@ -352,7 +352,7 @@
 			IP6_MAP_NEXT_IP4_REWRITE : next1;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index1, 1,
 						   clib_net_to_host_u16
 						   (ip41->length));
@@ -505,7 +505,7 @@
 			IP6_MAP_NEXT_IP4_REWRITE : next0;
 		    }
 		  vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						   cpu_index,
+						   thread_index,
 						   map_domain_index0, 1,
 						   clib_net_to_host_u16
 						   (ip40->length));
@@ -820,7 +820,7 @@
     vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
   map_main_t *mm = &map_main;
   vlib_combined_counter_main_t *cm = mm->domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 *fragments_to_drop = NULL;
   u32 *fragments_to_loopback = NULL;
 
@@ -958,8 +958,8 @@
 	    {
 	      if (error0 == MAP_ERROR_NONE)
 		vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-						 cpu_index, map_domain_index0,
-						 1,
+						 thread_index,
+						 map_domain_index0, 1,
 						 clib_net_to_host_u16
 						 (ip40->length));
 	      next0 =
@@ -1015,7 +1015,7 @@
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
   map_main_t *mm = &map_main;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u16 *fragment_ids, *fid;
 
   from = vlib_frame_vector_args (frame);
@@ -1143,7 +1143,8 @@
 	  ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
 	  new_icmp40->checksum = ~ip_csum_fold (sum);
 
-	  vlib_increment_simple_counter (&mm->icmp_relayed, cpu_index, 0, 1);
+	  vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
+					 1);
 
 	error:
 	  if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
diff --git a/src/vnet/map/ip6_map_t.c b/src/vnet/map/ip6_map_t.c
index eb3996c..9915167 100644
--- a/src/vnet/map/ip6_map_t.c
+++ b/src/vnet/map/ip6_map_t.c
@@ -448,7 +448,7 @@
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -493,7 +493,7 @@
 	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       len0);
@@ -1051,7 +1051,7 @@
   vlib_node_runtime_t *error_node =
     vlib_node_get_runtime (vm, ip6_map_t_node.index);
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -1218,7 +1218,7 @@
 	      (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
@@ -1229,7 +1229,7 @@
 	      (error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p1)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
@@ -1403,7 +1403,7 @@
 	      (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->
 					       map_t.map_domain_index, 1,
 					       clib_net_to_host_u16
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
index 893c451..1b9bdd0 100644
--- a/src/vnet/mpls/mpls_input.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -76,7 +76,7 @@
   u32 n_left_from, next_index, * from, * to_next;
   mpls_input_runtime_t * rt;
   mpls_main_t * mm;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   vlib_simple_counter_main_t * cm;
   vnet_main_t * vnm = vnet_get_main();
 
@@ -151,7 +151,7 @@
               next0 = MPLS_INPUT_NEXT_LOOKUP;
               vnet_feature_arc_start(mm->input_feature_arc_index,
                                      sw_if_index0, &next0, b0);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
           }
 
           if (PREDICT_FALSE(h1[3] == 0))
@@ -164,7 +164,7 @@
               next1 = MPLS_INPUT_NEXT_LOOKUP;
               vnet_feature_arc_start(mm->input_feature_arc_index,
                                      sw_if_index1, &next1, b1);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
           }
 
           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -215,7 +215,7 @@
             {
               next0 = MPLS_INPUT_NEXT_LOOKUP;
 	      vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
-              vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+              vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
             }
 
           if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) 
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 475bb20..ace6a70 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -67,7 +67,7 @@
   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
   u32 n_left_from, next_index, * from, * to_next;
   mpls_main_t * mm = &mpls_main;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -220,16 +220,16 @@
           vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, b0));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi1, 1,
+              (cm, thread_index, lbi1, 1,
                vlib_buffer_length_in_chain (vm, b1));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi2, 1,
+              (cm, thread_index, lbi2, 1,
                vlib_buffer_length_in_chain (vm, b2));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi3, 1,
+              (cm, thread_index, lbi3, 1,
                vlib_buffer_length_in_chain (vm, b3));
 
           /*
@@ -351,7 +351,7 @@
           vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, b0));
 
           /*
@@ -440,7 +440,7 @@
 {
   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
   u32 n_left_from, n_left_to_next, * from, * to_next;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 next;
 
   from = vlib_frame_vector_args (frame);
@@ -536,10 +536,10 @@
           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, p0));
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi1, 1,
+              (cm, thread_index, lbi1, 1,
                vlib_buffer_length_in_chain (vm, p1));
 
           if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
@@ -597,7 +597,7 @@
           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
 
           vlib_increment_combined_counter
-              (cm, cpu_index, lbi0, 1,
+              (cm, thread_index, lbi0, 1,
                vlib_buffer_length_in_chain (vm, p0));
 
           vlib_validate_buffer_enqueue_x1 (vm, node, next,
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 08018fd..d90dec2 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -64,12 +64,12 @@
                     vlib_frame_t * from_frame,
 		    int is_midchain)
 {
-  u32 n_left_from, next_index, * from, * to_next, cpu_index;
+  u32 n_left_from, next_index, * from, * to_next, thread_index;
   vlib_node_runtime_t * error_node;
   u32 n_left_to_next;
   mpls_main_t *mm;
 
-  cpu_index = os_get_cpu_number();
+  thread_index = vlib_get_thread_index();
   error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -137,13 +137,13 @@
           /* Bump the adj counters for packet and bytes */
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index0,
                1,
                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index1,
                1,
                vlib_buffer_length_in_chain (vm, p1) + rw_len1);
@@ -245,7 +245,7 @@
           
           vlib_increment_combined_counter
               (&adjacency_counters,
-               cpu_index,
+               thread_index,
                adj_index0,
                1,
                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c
index 2649798..597ae06 100644
--- a/src/vnet/pg/input.c
+++ b/src/vnet/pg/input.c
@@ -893,7 +893,7 @@
 
     vlib_increment_combined_counter (im->combined_sw_if_counters
 				     + VNET_INTERFACE_COUNTER_RX,
-				     os_get_cpu_number (),
+				     vlib_get_thread_index (),
 				     si->sw_if_index, n_buffers, length_sum);
   }
 
@@ -1266,7 +1266,7 @@
 	    l += vlib_buffer_index_length_in_chain (vm, buffers[i]);
 	  vlib_increment_combined_counter (im->combined_sw_if_counters
 					   + VNET_INTERFACE_COUNTER_RX,
-					   os_get_cpu_number (),
+					   vlib_get_thread_index (),
 					   si->sw_if_index, n_alloc, l);
 	  s->current_replay_packet_index += n_alloc;
 	  s->current_replay_packet_index %=
diff --git a/src/vnet/replication.c b/src/vnet/replication.c
index 86d922b..233a8c2 100644
--- a/src/vnet/replication.c
+++ b/src/vnet/replication.c
@@ -31,16 +31,16 @@
 {
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
   ip4_header_t *ip;
   u32 ctx_id;
 
   /* Allocate a context, reserve context 0 */
-  if (PREDICT_FALSE (rm->contexts[cpu_number] == 0))
-    pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES);
+  if (PREDICT_FALSE (rm->contexts[thread_index] == 0))
+    pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES);
 
-  pool_get_aligned (rm->contexts[cpu_number], ctx, CLIB_CACHE_LINE_BYTES);
-  ctx_id = ctx - rm->contexts[cpu_number];
+  pool_get_aligned (rm->contexts[thread_index], ctx, CLIB_CACHE_LINE_BYTES);
+  ctx_id = ctx - rm->contexts[thread_index];
 
   /* Save state from vlib buffer */
   ctx->saved_free_list_index = b0->free_list_index;
@@ -94,11 +94,11 @@
 {
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
   ip4_header_t *ip;
 
   /* Get access to the replication context */
-  ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count);
+  ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count);
 
   /* Restore vnet buffer state */
   clib_memcpy (vnet_buffer (b0), ctx->vnet_buffer,
@@ -133,7 +133,7 @@
       b0->flags &= ~VLIB_BUFFER_RECYCLE;
 
       /* Free context back to its pool */
-      pool_put (rm->contexts[cpu_number], ctx);
+      pool_put (rm->contexts[thread_index], ctx);
     }
 
   return ctx;
@@ -160,7 +160,7 @@
   replication_main_t *rm = &replication_main;
   replication_context_t *ctx;
   u32 feature_node_index = 0;
-  uword cpu_number = vm->cpu_index;
+  uword thread_index = vm->thread_index;
 
   /*
    * All buffers in the list are destined to the same recycle node.
@@ -172,7 +172,7 @@
     {
       bi0 = fl->buffers[0];
       b0 = vlib_get_buffer (vm, bi0);
-      ctx = pool_elt_at_index (rm->contexts[cpu_number], b0->recycle_count);
+      ctx = pool_elt_at_index (rm->contexts[thread_index], b0->recycle_count);
       feature_node_index = ctx->recycle_node_index;
     }
 
diff --git a/src/vnet/replication.h b/src/vnet/replication.h
index 5dc554c..ce4b3ff 100644
--- a/src/vnet/replication.h
+++ b/src/vnet/replication.h
@@ -100,7 +100,7 @@
   replication_main_t *rm = &replication_main;
 
   return replication_is_recycled (b0) ?
-    pool_elt_at_index (rm->contexts[os_get_cpu_number ()],
+    pool_elt_at_index (rm->contexts[vlib_get_thread_index ()],
 		       b0->recycle_count) : 0;
 }
 
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index b86e87d..dd211c5 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -311,7 +311,7 @@
   unix_shared_memory_queue_t *q;
   application_t *app;
   int n_tx_packets = 0;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   int i, rv;
   f64 now = vlib_time_now (vm);
 
diff --git a/src/vnet/sr/sr_localsid.c b/src/vnet/sr/sr_localsid.c
index 2e3d56d..6d72a50 100755
--- a/src/vnet/sr/sr_localsid.c
+++ b/src/vnet/sr/sr_localsid.c
@@ -887,7 +887,7 @@
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -974,26 +974,26 @@
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_increment_combined_counter
 	    (((next1 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b1));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b1));
 
 	  vlib_increment_combined_counter
 	    (((next2 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b2));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b2));
 
 	  vlib_increment_combined_counter
 	    (((next3 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b3));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b3));
 
 	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, bi1, bi2, bi3,
@@ -1062,8 +1062,8 @@
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
@@ -1103,7 +1103,7 @@
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
   next_index = node->cached_next_index;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -1205,26 +1205,26 @@
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_increment_combined_counter
 	    (((next1 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b1));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b1));
 
 	  vlib_increment_combined_counter
 	    (((next2 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b2));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b2));
 
 	  vlib_increment_combined_counter
 	    (((next3 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b3));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b3));
 
 	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, bi1, bi2, bi3,
@@ -1295,8 +1295,8 @@
 	  vlib_increment_combined_counter
 	    (((next0 ==
 	       SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
-	      &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1,
-	     vlib_buffer_length_in_chain (vm, b0));
+	      &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+	     1, vlib_buffer_length_in_chain (vm, b0));
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index e370506..c1567aa 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -174,7 +174,7 @@
     pthread_sigmask (SIG_SETMASK, &s, 0);
   }
 
-  clib_per_cpu_mheaps[os_get_cpu_number ()] = clib_per_cpu_mheaps[0];
+  clib_per_cpu_mheaps[vlib_get_thread_index ()] = clib_per_cpu_mheaps[0];
 
   while (1)
     {
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index b2a371e..b6c3482 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -646,10 +646,10 @@
 void
 tcp_timer_keep_handler (u32 conn_index)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
-  tc = tcp_connection_get (conn_index, cpu_index);
+  tc = tcp_connection_get (conn_index, thread_index);
   tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID;
 
   tcp_connection_close (tc);
@@ -675,10 +675,10 @@
 void
 tcp_timer_waitclose_handler (u32 conn_index)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
-  tc = tcp_connection_get (conn_index, cpu_index);
+  tc = tcp_connection_get (conn_index, thread_index);
   tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID;
 
   /* Session didn't come back with a close(). Send FIN either way
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 0090e15..eaca672 100644
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -343,7 +343,7 @@
     }                                                           	\
   else                                                          	\
     {                                                           	\
-      u32 _thread_index = os_get_cpu_number ();                 	\
+      u32 _thread_index = vlib_get_thread_index ();                 	\
       _tc = tcp_connection_get (_tc_index, _thread_index);      	\
     }                                                           	\
   ELOG_TYPE_DECLARE (_e) =                                      	\
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index a8224dc..7e9fa47 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1142,7 +1142,7 @@
 			  vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
   tcp_main_t *tm = vnet_get_tcp_main ();
 
   from = vlib_frame_vector_args (from_frame);
@@ -1332,7 +1332,7 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
   u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
   from = vlib_frame_vector_args (from_frame);
@@ -1634,7 +1634,7 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -1989,7 +1989,7 @@
 		     vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
   u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
@@ -2243,7 +2243,7 @@
 		    vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
 
   from = vlib_frame_vector_args (from_frame);
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index ea157bd..e18bfad 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -387,8 +387,8 @@
 #define tcp_get_free_buffer_index(tm, bidx)                             \
 do {                                                                    \
   u32 *my_tx_buffers, n_free_buffers;                                   \
-  u32 cpu_index = os_get_cpu_number();                             	\
-  my_tx_buffers = tm->tx_buffers[cpu_index];                            \
+  u32 thread_index = vlib_get_thread_index();                             	\
+  my_tx_buffers = tm->tx_buffers[thread_index];                            \
   if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0))                      \
     {                                                                   \
       n_free_buffers = 32;      /* TODO config or macro */              \
@@ -396,7 +396,7 @@
       _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list (      \
           tm->vlib_main, my_tx_buffers, n_free_buffers,                 \
           VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);                         \
-      tm->tx_buffers[cpu_index] = my_tx_buffers;                        \
+      tm->tx_buffers[thread_index] = my_tx_buffers;                        \
     }                                                                   \
   /* buffer shortage */                                                 \
   if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0))                     \
@@ -408,8 +408,8 @@
 #define tcp_return_buffer(tm)						\
 do {									\
   u32 *my_tx_buffers;							\
-  u32 cpu_index = os_get_cpu_number();                             	\
-  my_tx_buffers = tm->tx_buffers[cpu_index];                          	\
+  u32 thread_index = vlib_get_thread_index();                             	\
+  my_tx_buffers = tm->tx_buffers[thread_index];                          	\
   _vec_len (my_tx_buffers) +=1;						\
 } while (0)
 
@@ -942,7 +942,7 @@
 void
 tcp_timer_delack_handler (u32 index)
 {
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
   tc = tcp_connection_get (index, thread_index);
@@ -1022,7 +1022,7 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
   u32 bi, snd_space, n_bytes;
@@ -1152,7 +1152,7 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
   u32 bi, n_bytes;
@@ -1313,7 +1313,7 @@
 		     vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -1524,7 +1524,7 @@
 			 vlib_frame_t * from_frame, u8 is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index 4b22109..810278e 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -70,7 +70,7 @@
   udp4_uri_input_next_t next_index;
   udp_uri_main_t *um = vnet_get_udp_main ();
   session_manager_main_t *smm = vnet_get_session_manager_main ();
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   u8 my_enqueue_epoch;
   u32 *session_indices_to_enqueue;
   static u32 serial_number;
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index fb1a8ba..0fc62f6 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -366,7 +366,7 @@
       vlib_increment_combined_counter (
           vnet_main.interface_main.combined_sw_if_counters
           + VNET_INTERFACE_COUNTER_RX,
-          os_get_cpu_number(), ti->sw_if_index,
+          vlib_get_thread_index(), ti->sw_if_index,
           1, n_bytes_in_packet);
 
       if (PREDICT_FALSE(n_trace > 0)) {
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index 2cfcc92..ac67465 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -189,7 +189,7 @@
   /* Update tuntap interface output stats. */
   vlib_increment_combined_counter (im->combined_sw_if_counters
 				   + VNET_INTERFACE_COUNTER_TX,
-				   vm->cpu_index,
+				   vm->thread_index,
 				   tm->sw_if_index, n_packets, n_bytes);
 
 
@@ -297,7 +297,7 @@
     vlib_increment_combined_counter
         (vnet_main.interface_main.combined_sw_if_counters
          + VNET_INTERFACE_COUNTER_RX,
-         os_get_cpu_number(),
+         vlib_get_thread_index(),
          tm->sw_if_index,
          1, n_bytes_in_packet);
 
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
index 22ab4b6..d4fe423 100644
--- a/src/vnet/vxlan-gpe/decap.c
+++ b/src/vnet/vxlan-gpe/decap.c
@@ -115,7 +115,7 @@
   vxlan4_gpe_tunnel_key_t last_key4;
   vxlan6_gpe_tunnel_key_t last_key6;
   u32 pkts_decapsulated = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   if (is_ip4)
@@ -342,7 +342,7 @@
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -427,7 +427,7 @@
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len1;
         stats_sw_if_index = sw_if_index1;
@@ -588,7 +588,7 @@
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -615,7 +615,7 @@
   if (stats_n_packets)
   {
     vlib_increment_combined_counter (
-        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, cpu_index,
+        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, thread_index,
         stats_sw_if_index, stats_n_packets, stats_n_bytes);
     node->runtime_data[0] = stats_sw_if_index;
   }
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
index 3a486e5..67ed94b 100644
--- a/src/vnet/vxlan-gpe/encap.c
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -151,7 +151,7 @@
   vnet_main_t * vnm = ngm->vnet_main;
   vnet_interface_main_t * im = &vnm->interface_main;
   u32 pkts_encapsulated = 0;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   from = vlib_frame_vector_args (from_frame);
@@ -253,7 +253,7 @@
           if (stats_n_packets)
             vlib_increment_combined_counter (
                 im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-                cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+                thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
           stats_sw_if_index = sw_if_index0;
           stats_n_packets = 2;
           stats_n_bytes = len0 + len1;
@@ -262,10 +262,10 @@
         {
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, sw_if_index0, 1, len0);
+              thread_index, sw_if_index0, 1, len0);
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, sw_if_index1, 1, len1);
+              thread_index, sw_if_index1, 1, len1);
         }
       }
 
@@ -335,7 +335,7 @@
         if (stats_n_packets)
           vlib_increment_combined_counter (
               im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-              cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+              thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
         stats_n_packets = 1;
         stats_n_bytes = len0;
         stats_sw_if_index = sw_if_index0;
@@ -359,7 +359,7 @@
   if (stats_n_packets)
   {
     vlib_increment_combined_counter (
-        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, cpu_index,
+        im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, thread_index,
         stats_sw_if_index, stats_n_packets, stats_n_bytes);
     node->runtime_data[0] = stats_sw_if_index;
   }
diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c
index 514b2c9..2acb1f6 100644
--- a/src/vnet/vxlan/decap.c
+++ b/src/vnet/vxlan/decap.c
@@ -81,7 +81,7 @@
   vxlan4_tunnel_key_t last_key4;
   vxlan6_tunnel_key_t last_key6;
   u32 pkts_decapsulated = 0;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
 
   if (is_ip4)
@@ -314,7 +314,7 @@
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -468,7 +468,7 @@
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len1;
@@ -674,7 +674,7 @@
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -711,7 +711,7 @@
     {
       vlib_increment_combined_counter 
 	(im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vnet/vxlan/encap.c b/src/vnet/vxlan/encap.c
index 5b63064..4cfbbc2 100644
--- a/src/vnet/vxlan/encap.c
+++ b/src/vnet/vxlan/encap.c
@@ -77,7 +77,7 @@
   vnet_interface_main_t * im = &vnm->interface_main;
   u32 pkts_encapsulated = 0;
   u16 old_l0 = 0, old_l1 = 0;
-  u32 cpu_index = os_get_cpu_number();
+  u32 thread_index = vlib_get_thread_index();
   u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
   u32 sw_if_index0 = 0, sw_if_index1 = 0;
   u32 next0 = 0, next1 = 0;
@@ -301,7 +301,7 @@
 		  if (stats_n_packets) 
 		    vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, stats_sw_if_index, 
+		       thread_index, stats_sw_if_index, 
 		       stats_n_packets, stats_n_bytes);
 		  stats_sw_if_index = sw_if_index0;
 		  stats_n_packets = 2;
@@ -311,10 +311,10 @@
 	        {
 		  vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, sw_if_index0, 1, len0);
+		       thread_index, sw_if_index0, 1, len0);
 		  vlib_increment_combined_counter 
 		      (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		       cpu_index, sw_if_index1, 1, len1);
+		       thread_index, sw_if_index1, 1, len1);
 		}
 	    }
 
@@ -464,7 +464,7 @@
 	      if (stats_n_packets)
 		vlib_increment_combined_counter 
 		  (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-		   cpu_index, stats_sw_if_index, 
+		   thread_index, stats_sw_if_index, 
 		   stats_n_packets, stats_n_bytes);
 	      stats_n_packets = 1;
 	      stats_n_bytes = len0;
@@ -496,7 +496,7 @@
     {
       vlib_increment_combined_counter 
 	(im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
-	 cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+	 thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
       node->runtime_data[0] = stats_sw_if_index;
     }
 
diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c
index 042d02e..4309cd5 100644
--- a/src/vpp/stats/stats.c
+++ b/src/vpp/stats/stats.c
@@ -66,14 +66,14 @@
 void
 dslock (stats_main_t * sm, int release_hint, int tag)
 {
-  u32 thread_id;
+  u32 thread_index;
   data_structure_lock_t *l = sm->data_structure_lock;
 
   if (PREDICT_FALSE (l == 0))
     return;
 
-  thread_id = os_get_cpu_number ();
-  if (l->lock && l->thread_id == thread_id)
+  thread_index = vlib_get_thread_index ();
+  if (l->lock && l->thread_index == thread_index)
     {
       l->count++;
       return;
@@ -85,7 +85,7 @@
   while (__sync_lock_test_and_set (&l->lock, 1))
     /* zzzz */ ;
   l->tag = tag;
-  l->thread_id = thread_id;
+  l->thread_index = thread_index;
   l->count = 1;
 }
 
@@ -99,14 +99,14 @@
 void
 dsunlock (stats_main_t * sm)
 {
-  u32 thread_id;
+  u32 thread_index;
   data_structure_lock_t *l = sm->data_structure_lock;
 
   if (PREDICT_FALSE (l == 0))
     return;
 
-  thread_id = os_get_cpu_number ();
-  ASSERT (l->lock && l->thread_id == thread_id);
+  thread_index = vlib_get_thread_index ();
+  ASSERT (l->lock && l->thread_index == thread_index);
   l->count--;
   if (l->count == 0)
     {
diff --git a/src/vpp/stats/stats.h b/src/vpp/stats/stats.h
index 118115b..024dc78 100644
--- a/src/vpp/stats/stats.h
+++ b/src/vpp/stats/stats.h
@@ -30,7 +30,7 @@
 {
   volatile u32 lock;
   volatile u32 release_hint;
-  u32 thread_id;
+  u32 thread_index;
   u32 count;
   int tag;
 } data_structure_lock_t;