Use thread local storage for thread index

This patch deprecates stack-based thread identification,
Also removes requirement that thread stacks are adjacent.

Finally, possibly annoying for some folks, it renames
all occurences of cpu_index and cpu_number with thread
index. Using word "cpu" is misleading here as thread can
be migrated ti different CPU, and also it is not related
to linux cpu index.

Change-Id: I68cdaf661e701d2336fc953dcb9978d10a70f7c1
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index e370506..c1567aa 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -174,7 +174,7 @@
     pthread_sigmask (SIG_SETMASK, &s, 0);
   }
 
-  clib_per_cpu_mheaps[os_get_cpu_number ()] = clib_per_cpu_mheaps[0];
+  clib_per_cpu_mheaps[vlib_get_thread_index ()] = clib_per_cpu_mheaps[0];
 
   while (1)
     {
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index b2a371e..b6c3482 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -646,10 +646,10 @@
 void
 tcp_timer_keep_handler (u32 conn_index)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
-  tc = tcp_connection_get (conn_index, cpu_index);
+  tc = tcp_connection_get (conn_index, thread_index);
   tc->timers[TCP_TIMER_KEEP] = TCP_TIMER_HANDLE_INVALID;
 
   tcp_connection_close (tc);
@@ -675,10 +675,10 @@
 void
 tcp_timer_waitclose_handler (u32 conn_index)
 {
-  u32 cpu_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
-  tc = tcp_connection_get (conn_index, cpu_index);
+  tc = tcp_connection_get (conn_index, thread_index);
   tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID;
 
   /* Session didn't come back with a close(). Send FIN either way
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 0090e15..eaca672 100644
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -343,7 +343,7 @@
     }                                                           	\
   else                                                          	\
     {                                                           	\
-      u32 _thread_index = os_get_cpu_number ();                 	\
+      u32 _thread_index = vlib_get_thread_index ();                 	\
       _tc = tcp_connection_get (_tc_index, _thread_index);      	\
     }                                                           	\
   ELOG_TYPE_DECLARE (_e) =                                      	\
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index a8224dc..7e9fa47 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1142,7 +1142,7 @@
 			  vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
   tcp_main_t *tm = vnet_get_tcp_main ();
 
   from = vlib_frame_vector_args (from_frame);
@@ -1332,7 +1332,7 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
   u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
   from = vlib_frame_vector_args (from_frame);
@@ -1634,7 +1634,7 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index, errors = 0;
+  u32 my_thread_index = vm->thread_index, errors = 0;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -1989,7 +1989,7 @@
 		     vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
   u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
@@ -2243,7 +2243,7 @@
 		    vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
 
   from = vlib_frame_vector_args (from_frame);
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index ea157bd..e18bfad 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -387,8 +387,8 @@
 #define tcp_get_free_buffer_index(tm, bidx)                             \
 do {                                                                    \
   u32 *my_tx_buffers, n_free_buffers;                                   \
-  u32 cpu_index = os_get_cpu_number();                             	\
-  my_tx_buffers = tm->tx_buffers[cpu_index];                            \
+  u32 thread_index = vlib_get_thread_index();                             	\
+  my_tx_buffers = tm->tx_buffers[thread_index];                            \
   if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0))                      \
     {                                                                   \
       n_free_buffers = 32;      /* TODO config or macro */              \
@@ -396,7 +396,7 @@
       _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list (      \
           tm->vlib_main, my_tx_buffers, n_free_buffers,                 \
           VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);                         \
-      tm->tx_buffers[cpu_index] = my_tx_buffers;                        \
+      tm->tx_buffers[thread_index] = my_tx_buffers;                        \
     }                                                                   \
   /* buffer shortage */                                                 \
   if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0))                     \
@@ -408,8 +408,8 @@
 #define tcp_return_buffer(tm)						\
 do {									\
   u32 *my_tx_buffers;							\
-  u32 cpu_index = os_get_cpu_number();                             	\
-  my_tx_buffers = tm->tx_buffers[cpu_index];                          	\
+  u32 thread_index = vlib_get_thread_index();                             	\
+  my_tx_buffers = tm->tx_buffers[thread_index];                          	\
   _vec_len (my_tx_buffers) +=1;						\
 } while (0)
 
@@ -942,7 +942,7 @@
 void
 tcp_timer_delack_handler (u32 index)
 {
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
 
   tc = tcp_connection_get (index, thread_index);
@@ -1022,7 +1022,7 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
   u32 bi, snd_space, n_bytes;
@@ -1152,7 +1152,7 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
-  u32 thread_index = os_get_cpu_number ();
+  u32 thread_index = vlib_get_thread_index ();
   tcp_connection_t *tc;
   vlib_buffer_t *b;
   u32 bi, n_bytes;
@@ -1313,7 +1313,7 @@
 		     vlib_frame_t * from_frame, int is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -1524,7 +1524,7 @@
 			 vlib_frame_t * from_frame, u8 is_ip4)
 {
   u32 n_left_from, next_index, *from, *to_next;
-  u32 my_thread_index = vm->cpu_index;
+  u32 my_thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;