Use thread local storage for thread index
This patch deprecates stack-based thread identification,
Also removes requirement that thread stacks are adjacent.
Finally, possibly annoying for some folks, it renames
all occurences of cpu_index and cpu_number with thread
index. Using word "cpu" is misleading here as thread can
be migrated ti different CPU, and also it is not related
to linux cpu index.
Change-Id: I68cdaf661e701d2336fc953dcb9978d10a70f7c1
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 00807dc..5e720f6 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -331,7 +331,7 @@
{
//Let's try to assign one queue to each thread
u32 qid = 0;
- u32 cpu_index = 0;
+ u32 thread_index = 0;
vui->use_tx_spinlock = 0;
while (1)
{
@@ -341,20 +341,21 @@
if (!rxvq->started || !rxvq->enabled)
continue;
- vui->per_cpu_tx_qid[cpu_index] = qid;
- cpu_index++;
- if (cpu_index == vlib_get_thread_main ()->n_vlib_mains)
+ vui->per_cpu_tx_qid[thread_index] = qid;
+ thread_index++;
+ if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
return;
}
//We need to loop, meaning the spinlock has to be used
vui->use_tx_spinlock = 1;
- if (cpu_index == 0)
+ if (thread_index == 0)
{
//Could not find a single valid one
- for (cpu_index = 0;
- cpu_index < vlib_get_thread_main ()->n_vlib_mains; cpu_index++)
+ for (thread_index = 0;
+ thread_index < vlib_get_thread_main ()->n_vlib_mains;
+ thread_index++)
{
- vui->per_cpu_tx_qid[cpu_index] = 0;
+ vui->per_cpu_tx_qid[thread_index] = 0;
}
return;
}
@@ -368,7 +369,7 @@
vhost_user_intf_t *vui;
vhost_cpu_t *vhc;
u32 *workers = 0;
- u32 cpu_index;
+ u32 thread_index;
vlib_main_t *vm;
//Let's list all workers cpu indexes
@@ -400,9 +401,9 @@
continue;
i %= vec_len (vui_workers);
- cpu_index = vui_workers[i];
+ thread_index = vui_workers[i];
i++;
- vhc = &vum->cpus[cpu_index];
+ vhc = &vum->cpus[thread_index];
iaq.qid = qid;
iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
@@ -429,14 +430,14 @@
vhc->operation_mode = mode;
}
- for (cpu_index = vum->input_cpu_first_index;
- cpu_index < vum->input_cpu_first_index + vum->input_cpu_count;
- cpu_index++)
+ for (thread_index = vum->input_cpu_first_index;
+ thread_index < vum->input_cpu_first_index + vum->input_cpu_count;
+ thread_index++)
{
vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
- vhc = &vum->cpus[cpu_index];
- vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+ vhc = &vum->cpus[thread_index];
+ vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
switch (vhc->operation_mode)
{
case VHOST_USER_INTERRUPT_MODE:
@@ -532,7 +533,7 @@
{
vhost_user_main_t *vum = &vhost_user_main;
vhost_cpu_t *vhc;
- u32 cpu_index;
+ u32 thread_index;
vhost_iface_and_queue_t *vhiq;
vlib_main_t *vm;
u32 ifq2;
@@ -553,8 +554,8 @@
if ((vhiq->vhost_iface_index == (ifq >> 8)) &&
(VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff)))
{
- cpu_index = vhc - vum->cpus;
- vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+ thread_index = vhc - vum->cpus;
+ vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
/*
* Convert RX virtqueue number in the lower byte to vring
* queue index for the input node process. Top bytes contain
@@ -1592,7 +1593,7 @@
u32 n_trace = vlib_get_trace_count (vm, node);
u16 qsz_mask;
u32 map_hint = 0;
- u16 cpu_index = os_get_cpu_number ();
+ u16 thread_index = vlib_get_thread_index ();
u16 copy_len = 0;
{
@@ -1651,32 +1652,32 @@
* in the loop and come back later. This is not an issue as for big packet,
* processing cost really comes from the memory copy.
*/
- if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len < n_left + 1))
+ if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len < n_left + 1))
{
- u32 curr_len = vum->cpus[cpu_index].rx_buffers_len;
- vum->cpus[cpu_index].rx_buffers_len +=
+ u32 curr_len = vum->cpus[thread_index].rx_buffers_len;
+ vum->cpus[thread_index].rx_buffers_len +=
vlib_buffer_alloc_from_free_list (vm,
- vum->cpus[cpu_index].rx_buffers +
+ vum->cpus[thread_index].rx_buffers +
curr_len,
VHOST_USER_RX_BUFFERS_N - curr_len,
VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
if (PREDICT_FALSE
- (vum->cpus[cpu_index].rx_buffers_len <
+ (vum->cpus[thread_index].rx_buffers_len <
VHOST_USER_RX_BUFFER_STARVATION))
{
/* In case of buffer starvation, discard some packets from the queue
* and log the event.
* We keep doing best effort for the remaining packets. */
- u32 flush = (n_left + 1 > vum->cpus[cpu_index].rx_buffers_len) ?
- n_left + 1 - vum->cpus[cpu_index].rx_buffers_len : 1;
+ u32 flush = (n_left + 1 > vum->cpus[thread_index].rx_buffers_len) ?
+ n_left + 1 - vum->cpus[thread_index].rx_buffers_len : 1;
flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
n_left -= flush;
vlib_increment_simple_counter (vnet_main.
interface_main.sw_if_counters +
VNET_INTERFACE_COUNTER_DROP,
- os_get_cpu_number (),
+ vlib_get_thread_index (),
vui->sw_if_index, flush);
vlib_error_count (vm, vhost_user_input_node.index,
@@ -1696,7 +1697,7 @@
u32 desc_data_offset;
vring_desc_t *desc_table = txvq->desc;
- if (PREDICT_FALSE (vum->cpus[cpu_index].rx_buffers_len <= 1))
+ if (PREDICT_FALSE (vum->cpus[thread_index].rx_buffers_len <= 1))
{
/* Not enough rx_buffers
* Note: We yeld on 1 so we don't need to do an additional
@@ -1707,17 +1708,18 @@
}
desc_current = txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
- vum->cpus[cpu_index].rx_buffers_len--;
- bi_current = (vum->cpus[cpu_index].rx_buffers)
- [vum->cpus[cpu_index].rx_buffers_len];
+ vum->cpus[thread_index].rx_buffers_len--;
+ bi_current = (vum->cpus[thread_index].rx_buffers)
+ [vum->cpus[thread_index].rx_buffers_len];
b_head = b_current = vlib_get_buffer (vm, bi_current);
to_next[0] = bi_current; //We do that now so we can forget about bi_current
to_next++;
n_left_to_next--;
vlib_prefetch_buffer_with_index (vm,
- (vum->cpus[cpu_index].rx_buffers)
- [vum->cpus[cpu_index].
+ (vum->
+ cpus[thread_index].rx_buffers)
+ [vum->cpus[thread_index].
rx_buffers_len - 1], LOAD);
/* Just preset the used descriptor id and length for later */
@@ -1791,7 +1793,7 @@
(b_current->current_length == VLIB_BUFFER_DATA_SIZE))
{
if (PREDICT_FALSE
- (vum->cpus[cpu_index].rx_buffers_len == 0))
+ (vum->cpus[thread_index].rx_buffers_len == 0))
{
/* Cancel speculation */
to_next--;
@@ -1805,17 +1807,18 @@
* but valid.
*/
vhost_user_input_rewind_buffers (vm,
- &vum->cpus[cpu_index],
+ &vum->cpus
+ [thread_index],
b_head);
n_left = 0;
goto stop;
}
/* Get next output */
- vum->cpus[cpu_index].rx_buffers_len--;
+ vum->cpus[thread_index].rx_buffers_len--;
u32 bi_next =
- (vum->cpus[cpu_index].rx_buffers)[vum->cpus
- [cpu_index].rx_buffers_len];
+ (vum->cpus[thread_index].rx_buffers)[vum->cpus
+ [thread_index].rx_buffers_len];
b_current->next_buffer = bi_next;
b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
bi_current = bi_next;
@@ -1823,7 +1826,7 @@
}
/* Prepare a copy order executed later for the data */
- vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
copy_len++;
u32 desc_data_l =
desc_table[desc_current].len - desc_data_offset;
@@ -1880,7 +1883,7 @@
if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
{
if (PREDICT_FALSE
- (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+ (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
copy_len, &map_hint)))
{
clib_warning
@@ -1905,7 +1908,7 @@
/* Do the memory copies */
if (PREDICT_FALSE
- (vhost_user_input_copy (vui, vum->cpus[cpu_index].copy,
+ (vhost_user_input_copy (vui, vum->cpus[thread_index].copy,
copy_len, &map_hint)))
{
clib_warning ("Memory mapping error on interface hw_if_index=%d "
@@ -1933,9 +1936,9 @@
vlib_increment_combined_counter
(vnet_main.interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX,
- os_get_cpu_number (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
+ vlib_get_thread_index (), vui->sw_if_index, n_rx_packets, n_rx_bytes);
- vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
return n_rx_packets;
}
@@ -1946,15 +1949,15 @@
{
vhost_user_main_t *vum = &vhost_user_main;
uword n_rx_packets = 0;
- u32 cpu_index = os_get_cpu_number ();
+ u32 thread_index = vlib_get_thread_index ();
vhost_iface_and_queue_t *vhiq;
vhost_user_intf_t *vui;
vhost_cpu_t *vhc;
- vhc = &vum->cpus[cpu_index];
+ vhc = &vum->cpus[thread_index];
if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
{
- vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+ vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
{
vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
@@ -2096,7 +2099,7 @@
vhost_user_vring_t *rxvq;
u16 qsz_mask;
u8 error;
- u32 cpu_index = os_get_cpu_number ();
+ u32 thread_index = vlib_get_thread_index ();
u32 map_hint = 0;
u8 retry = 8;
u16 copy_len;
@@ -2116,7 +2119,7 @@
qid =
VHOST_VRING_IDX_RX (*vec_elt_at_index
- (vui->per_cpu_tx_qid, os_get_cpu_number ()));
+ (vui->per_cpu_tx_qid, vlib_get_thread_index ()));
rxvq = &vui->vrings[qid];
if (PREDICT_FALSE (vui->use_tx_spinlock))
vhost_user_vring_lock (vui, qid);
@@ -2143,10 +2146,10 @@
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- vum->cpus[cpu_index].current_trace =
+ vum->cpus[thread_index].current_trace =
vlib_add_trace (vm, node, b0,
- sizeof (*vum->cpus[cpu_index].current_trace));
- vhost_user_tx_trace (vum->cpus[cpu_index].current_trace,
+ sizeof (*vum->cpus[thread_index].current_trace));
+ vhost_user_tx_trace (vum->cpus[thread_index].current_trace,
vui, qid / 2, b0, rxvq);
}
@@ -2188,14 +2191,14 @@
{
// Get a header from the header array
virtio_net_hdr_mrg_rxbuf_t *hdr =
- &vum->cpus[cpu_index].tx_headers[tx_headers_len];
+ &vum->cpus[thread_index].tx_headers[tx_headers_len];
tx_headers_len++;
hdr->hdr.flags = 0;
hdr->hdr.gso_type = 0;
hdr->num_buffers = 1; //This is local, no need to check
// Prepare a copy order executed later for the header
- vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
copy_len++;
cpy->len = vui->virtio_net_hdr_sz;
cpy->dst = buffer_map_addr;
@@ -2220,7 +2223,7 @@
else if (vui->virtio_net_hdr_sz == 12) //MRG is available
{
virtio_net_hdr_mrg_rxbuf_t *hdr =
- &vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+ &vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
//Move from available to used buffer
rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id =
@@ -2282,7 +2285,7 @@
}
{
- vhost_copy_t *cpy = &vum->cpus[cpu_index].copy[copy_len];
+ vhost_copy_t *cpy = &vum->cpus[thread_index].copy[copy_len];
copy_len++;
cpy->len = bytes_left;
cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
@@ -2325,8 +2328,8 @@
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- vum->cpus[cpu_index].current_trace->hdr =
- vum->cpus[cpu_index].tx_headers[tx_headers_len - 1];
+ vum->cpus[thread_index].current_trace->hdr =
+ vum->cpus[thread_index].tx_headers[tx_headers_len - 1];
}
n_left--; //At the end for error counting when 'goto done' is invoked
@@ -2336,7 +2339,7 @@
done:
//Do the memory copies
if (PREDICT_FALSE
- (vhost_user_tx_copy (vui, vum->cpus[cpu_index].copy,
+ (vhost_user_tx_copy (vui, vum->cpus[thread_index].copy,
copy_len, &map_hint)))
{
clib_warning ("Memory mapping error on interface hw_if_index=%d "
@@ -2386,7 +2389,7 @@
vlib_increment_simple_counter
(vnet_main.interface_main.sw_if_counters
+ VNET_INTERFACE_COUNTER_DROP,
- os_get_cpu_number (), vui->sw_if_index, n_left);
+ vlib_get_thread_index (), vui->sw_if_index, n_left);
}
vlib_buffer_free (vm, vlib_frame_args (frame), frame->n_vectors);
@@ -2773,11 +2776,11 @@
case ~0:
vec_foreach (vhc, vum->cpus)
{
- u32 cpu_index = vhc - vum->cpus;
+ u32 thread_index = vhc - vum->cpus;
f64 next_timeout;
next_timeout = timeout;
- vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+ vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
{
vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
vhost_user_vring_t *rxvq =