Use thread local storage for thread index

This patch deprecates stack-based thread identification,
Also removes requirement that thread stacks are adjacent.

Finally, possibly annoying for some folks, it renames
all occurences of cpu_index and cpu_number with thread
index. Using word "cpu" is misleading here as thread can
be migrated ti different CPU, and also it is not related
to linux cpu index.

Change-Id: I68cdaf661e701d2336fc953dcb9978d10a70f7c1
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c
index b63d76b..5f2bcbf 100644
--- a/src/vnet/map/ip4_map_t.c
+++ b/src/vnet/map/ip4_map_t.c
@@ -477,7 +477,7 @@
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -520,7 +520,7 @@
 	  if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1, len0);
 	    }
@@ -1051,7 +1051,7 @@
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
   vlib_combined_counter_main_t *cm = map_main.domain_counters;
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
 
   while (n_left_from > 0)
     {
@@ -1158,7 +1158,7 @@
 	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip40->
@@ -1169,7 +1169,7 @@
 	      (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p1)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip41->
@@ -1252,7 +1252,7 @@
 	      (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
 	    {
 	      vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
-					       cpu_index,
+					       thread_index,
 					       vnet_buffer (p0)->map_t.
 					       map_domain_index, 1,
 					       clib_net_to_host_u16 (ip40->