64 bit per-thread counters
after:
TenGigabitEthernet5/0/1-output active 107522 17375708 0 7.22e0 161.60
TenGigabitEthernet5/0/1-tx active 107522 17375708 0 6.93e1 161.60
ip4-input-no-checksum active 107522 17375708 0 2.52e1 161.60
ip4-lookup active 107522 17375708 0 3.10e1 161.60
ip4-rewrite active 107522 17375708 0 2.52e1 161.60
before
TenGigabitEthernet5/0/1-output active 433575 110995200 0 6.95e0 256.00
TenGigabitEthernet5/0/1-tx active 433575 110995200 0 7.14e1 256.00
ip4-input-no-checksum active 433575 110995200 0 2.66e1 256.00
ip4-lookup active 433575 110995200 0 3.29e1 256.00
ip4-rewrite active 433575 110995200 0 2.59e1 256.00
Change-Id: I46405bd22189f48a39f06e3443bb7e13f410b539
Signed-off-by: Neale Ranns <nranns@cisco.com>
diff --git a/src/vlib/counter.c b/src/vlib/counter.c
index 9f66e04..62f4bd6 100644
--- a/src/vlib/counter.c
+++ b/src/vlib/counter.c
@@ -42,56 +42,36 @@
void
vlib_clear_simple_counters (vlib_simple_counter_main_t * cm)
{
+ counter_t *my_counters;
uword i, j;
- u16 *my_minis;
- for (i = 0; i < vec_len (cm->minis); i++)
+ for (i = 0; i < vec_len (cm->counters); i++)
{
- my_minis = cm->minis[i];
+ my_counters = cm->counters[i];
- for (j = 0; j < vec_len (my_minis); j++)
+ for (j = 0; j < vec_len (my_counters); j++)
{
- cm->maxi[j] += my_minis[j];
- my_minis[j] = 0;
+ my_counters[j] = 0;
}
}
-
- j = vec_len (cm->maxi);
- if (j > 0)
- vec_validate (cm->value_at_last_clear, j - 1);
- for (i = 0; i < j; i++)
- cm->value_at_last_clear[i] = cm->maxi[i];
}
void
vlib_clear_combined_counters (vlib_combined_counter_main_t * cm)
{
+ vlib_counter_t *my_counters;
uword i, j;
- vlib_mini_counter_t *my_minis;
- for (i = 0; i < vec_len (cm->minis); i++)
+ for (i = 0; i < vec_len (cm->counters); i++)
{
- my_minis = cm->minis[i];
+ my_counters = cm->counters[i];
- for (j = 0; j < vec_len (my_minis); j++)
+ for (j = 0; j < vec_len (my_counters); j++)
{
- cm->maxi[j].packets += my_minis[j].packets;
- cm->maxi[j].bytes += my_minis[j].bytes;
- my_minis[j].packets = 0;
- my_minis[j].bytes = 0;
+ my_counters[j].packets = 0;
+ my_counters[j].bytes = 0;
}
}
-
- j = vec_len (cm->maxi);
- if (j > 0)
- vec_validate (cm->value_at_last_clear, j - 1);
-
- for (i = 0; i < j; i++)
- {
- vlib_counter_t *c = vec_elt_at_index (cm->value_at_last_clear, i);
-
- c[0] = cm->maxi[i];
- }
}
void
@@ -100,10 +80,9 @@
vlib_thread_main_t *tm = vlib_get_thread_main ();
int i;
- vec_validate (cm->minis, tm->n_vlib_mains - 1);
+ vec_validate (cm->counters, tm->n_vlib_mains - 1);
for (i = 0; i < tm->n_vlib_mains; i++)
- vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES);
}
void
@@ -112,10 +91,23 @@
vlib_thread_main_t *tm = vlib_get_thread_main ();
int i;
- vec_validate (cm->minis, tm->n_vlib_mains - 1);
+ vec_validate (cm->counters, tm->n_vlib_mains - 1);
for (i = 0; i < tm->n_vlib_mains; i++)
- vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES);
+}
+
+u32
+vlib_combined_counter_n_counters (const vlib_combined_counter_main_t * cm)
+{
+ ASSERT (cm->counters);
+ return (vec_len (cm->counters[0]));
+}
+
+u32
+vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm)
+{
+ ASSERT (cm->counters);
+ return (vec_len (cm->counters[0]));
}
void
diff --git a/src/vlib/counter.h b/src/vlib/counter.h
index abfa89e..17a8521 100644
--- a/src/vlib/counter.h
+++ b/src/vlib/counter.h
@@ -44,59 +44,48 @@
Optimized thread-safe counters.
- Each vlib_[simple|combined]_counter_main_t consists of a single
- vector of thread-safe / atomically-updated u64 counters [the
- "maxi" vector], and a (u16 **) per-thread vector [the "minis"
- vector] of narrow, per-thread counters.
+ Each vlib_[simple|combined]_counter_main_t consists of a per-thread
+ vector of per-object counters.
- The idea is to drastically reduce the number of atomic operations.
- In the case of packet counts, we divide the number of atomic ops
- by 2**16, etc.
+ The idea is to drastically eliminate atomic operations.
*/
+/** 64bit counters */
+typedef u64 counter_t;
+
/** A collection of simple counters */
typedef struct
{
- u16 **minis; /**< Per-thread u16 non-atomic counters */
- u64 *maxi; /**< Shared wide counters */
- u64 *value_at_last_clear; /**< Counter values as of last clear. */
- u64 *value_at_last_serialize; /**< Values as of last serialize. */
+ counter_t **counters; /**< Per-thread u64 non-atomic counters */
+ counter_t *value_at_last_serialize; /**< Values as of last serialize. */
u32 last_incremental_serialize_index; /**< Last counter index
serialized incrementally. */
char *name; /**< The counter collection's name. */
} vlib_simple_counter_main_t;
+/** The number of counters (not the number of per-thread counters) */
+u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm);
+
/** Increment a simple counter
@param cm - (vlib_simple_counter_main_t *) simple counter main pointer
@param cpu_index - (u32) the current cpu index
@param index - (u32) index of the counter to increment
- @param increment - (u32) quantitiy to add to the counter
+ @param increment - (u64) quantitiy to add to the counter
*/
always_inline void
vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
- u32 cpu_index, u32 index, u32 increment)
+ u32 cpu_index, u32 index, u64 increment)
{
- u16 *my_minis;
- u16 *mini;
- u32 old, new;
+ counter_t *my_counters;
- my_minis = cm->minis[cpu_index];
- mini = vec_elt_at_index (my_minis, index);
- old = mini[0];
- new = old + increment;
- mini[0] = new;
-
- if (PREDICT_FALSE (mini[0] != new))
- {
- __sync_fetch_and_add (&cm->maxi[index], new);
- my_minis[index] = 0;
- }
+ my_counters = cm->counters[cpu_index];
+ my_counters[index] += increment;
}
/** Get the value of a simple counter
- Scrapes the entire set of mini counters. Innacurate unless
+ Scrapes the entire set of per-thread counters. Innacurate unless
worker threads which might increment the counter are
barrier-synchronized
@@ -104,30 +93,21 @@
@param index - (u32) index of the counter to fetch
@returns - (u64) current counter value
*/
-always_inline u64
+always_inline counter_t
vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
{
- u16 *my_minis, *mini;
- u64 v;
+ counter_t *my_counters;
+ counter_t v;
int i;
- ASSERT (index < vec_len (cm->maxi));
+ ASSERT (index < vlib_simple_counter_n_counters (cm));
v = 0;
- for (i = 0; i < vec_len (cm->minis); i++)
+ for (i = 0; i < vec_len (cm->counters); i++)
{
- my_minis = cm->minis[i];
- mini = vec_elt_at_index (my_minis, index);
- v += mini[0];
- }
-
- v += cm->maxi[index];
-
- if (index < vec_len (cm->value_at_last_clear))
- {
- ASSERT (v >= cm->value_at_last_clear[index]);
- v -= cm->value_at_last_clear[index];
+ my_counters = cm->counters[i];
+ v += my_counters[index];
}
return v;
@@ -142,29 +122,24 @@
always_inline void
vlib_zero_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
{
- u16 *my_minis;
+ counter_t *my_counters;
int i;
- ASSERT (index < vec_len (cm->maxi));
+ ASSERT (index < vlib_simple_counter_n_counters (cm));
- for (i = 0; i < vec_len (cm->minis); i++)
+ for (i = 0; i < vec_len (cm->counters); i++)
{
- my_minis = cm->minis[i];
- my_minis[index] = 0;
+ my_counters = cm->counters[i];
+ my_counters[index] = 0;
}
-
- cm->maxi[index] = 0;
-
- if (index < vec_len (cm->value_at_last_clear))
- cm->value_at_last_clear[index] = 0;
}
/** Combined counter to hold both packets and byte differences.
*/
typedef struct
{
- u64 packets; /**< packet counter */
- u64 bytes; /**< byte counter */
+ counter_t packets; /**< packet counter */
+ counter_t bytes; /**< byte counter */
} vlib_counter_t;
/** Add two combined counters, results in the first counter
@@ -201,24 +176,19 @@
a->packets = a->bytes = 0;
}
-/** Mini combined counter */
-typedef struct
-{
- u16 packets; /**< Packet count */
- i16 bytes; /**< Byte count */
-} vlib_mini_counter_t;
-
/** A collection of combined counters */
typedef struct
{
- vlib_mini_counter_t **minis; /**< Per-thread u16 non-atomic counter pairs */
- vlib_counter_t *maxi; /**< Shared wide counter pairs */
- vlib_counter_t *value_at_last_clear; /**< Counter values as of last clear. */
+ vlib_counter_t **counters; /**< Per-thread u64 non-atomic counter pairs */
vlib_counter_t *value_at_last_serialize; /**< Counter values as of last serialize. */
u32 last_incremental_serialize_index; /**< Last counter index serialized incrementally. */
char *name; /**< The counter collection's name. */
} vlib_combined_counter_main_t;
+/** The number of counters (not the number of per-thread counters) */
+u32 vlib_combined_counter_n_counters (const vlib_combined_counter_main_t *
+ cm);
+
/** Clear a collection of simple counters
@param cm - (vlib_simple_counter_main_t *) collection to clear
*/
@@ -233,62 +203,41 @@
@param cm - (vlib_combined_counter_main_t *) comined counter main pointer
@param cpu_index - (u32) the current cpu index
@param index - (u32) index of the counter to increment
- @param packet_increment - (u32) number of packets to add to the counter
- @param byte_increment - (u32) number of bytes to add to the counter
+ @param packet_increment - (u64) number of packets to add to the counter
+ @param byte_increment - (u64) number of bytes to add to the counter
*/
always_inline void
vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
u32 cpu_index,
- u32 index,
- u32 packet_increment, u32 byte_increment)
+ u32 index, u64 n_packets, u64 n_bytes)
{
- vlib_mini_counter_t *my_minis, *mini;
- u32 old_packets, new_packets;
- i32 old_bytes, new_bytes;
+ vlib_counter_t *my_counters;
- /* Use this CPU's mini counter array */
- my_minis = cm->minis[cpu_index];
+ /* Use this CPU's counter array */
+ my_counters = cm->counters[cpu_index];
- mini = vec_elt_at_index (my_minis, index);
- old_packets = mini->packets;
- old_bytes = mini->bytes;
-
- new_packets = old_packets + packet_increment;
- new_bytes = old_bytes + byte_increment;
-
- mini->packets = new_packets;
- mini->bytes = new_bytes;
-
- /* Bytes always overflow before packets.. */
- if (PREDICT_FALSE (mini->bytes != new_bytes))
- {
- vlib_counter_t *maxi = vec_elt_at_index (cm->maxi, index);
-
- __sync_fetch_and_add (&maxi->packets, new_packets);
- __sync_fetch_and_add (&maxi->bytes, new_bytes);
-
- mini->packets = 0;
- mini->bytes = 0;
- }
+ my_counters[index].packets += n_packets;
+ my_counters[index].bytes += n_bytes;
}
-#define vlib_prefetch_combined_counter(_cm, _cpu_index, _index) \
-{ \
- vlib_mini_counter_t *_cpu_minis; \
- \
- /* \
- * This CPU's mini index is assumed to already be in cache \
- */ \
- _cpu_minis = (_cm)->minis[(_cpu_index)]; \
- CLIB_PREFETCH(_cpu_minis + (_index), \
- sizeof(*_cpu_minis), \
- STORE); \
+/** Pre-fetch a per-thread combined counter for the given object index */
+always_inline void
+vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm,
+ u32 cpu_index, u32 index)
+{
+ vlib_counter_t *cpu_counters;
+
+ /*
+ * This CPU's index is assumed to already be in cache
+ */
+ cpu_counters = cm->counters[cpu_index];
+ CLIB_PREFETCH (cpu_counters + index, CLIB_CACHE_LINE_BYTES, STORE);
}
/** Get the value of a combined counter, never called in the speed path
- Scrapes the entire set of mini counters. Innacurate unless
+ Scrapes the entire set of per-thread counters. Innacurate unless
worker threads which might increment the counter are
barrier-synchronized
@@ -298,35 +247,27 @@
*/
static inline void
-vlib_get_combined_counter (vlib_combined_counter_main_t * cm,
+vlib_get_combined_counter (const vlib_combined_counter_main_t * cm,
u32 index, vlib_counter_t * result)
{
- vlib_mini_counter_t *my_minis, *mini;
- vlib_counter_t *maxi;
+ vlib_counter_t *my_counters, *counter;
int i;
result->packets = 0;
result->bytes = 0;
- for (i = 0; i < vec_len (cm->minis); i++)
+ for (i = 0; i < vec_len (cm->counters); i++)
{
- my_minis = cm->minis[i];
+ my_counters = cm->counters[i];
- mini = vec_elt_at_index (my_minis, index);
- result->packets += mini->packets;
- result->bytes += mini->bytes;
+ counter = vec_elt_at_index (my_counters, index);
+ result->packets += counter->packets;
+ result->bytes += counter->bytes;
}
-
- maxi = vec_elt_at_index (cm->maxi, index);
- result->packets += maxi->packets;
- result->bytes += maxi->bytes;
-
- if (index < vec_len (cm->value_at_last_clear))
- vlib_counter_sub (result, &cm->value_at_last_clear[index]);
}
/** Clear a combined counter
- Clears the set of per-thread u16 counters, and the shared vlib_counter_t
+ Clears the set of per-thread counters.
@param cm - (vlib_combined_counter_main_t *) combined counter main pointer
@param index - (u32) index of the counter to clear
@@ -334,21 +275,17 @@
always_inline void
vlib_zero_combined_counter (vlib_combined_counter_main_t * cm, u32 index)
{
- vlib_mini_counter_t *mini, *my_minis;
+ vlib_counter_t *my_counters, *counter;
int i;
- for (i = 0; i < vec_len (cm->minis); i++)
+ for (i = 0; i < vec_len (cm->counters); i++)
{
- my_minis = cm->minis[i];
+ my_counters = cm->counters[i];
- mini = vec_elt_at_index (my_minis, index);
- mini->packets = 0;
- mini->bytes = 0;
+ counter = vec_elt_at_index (my_counters, index);
+ counter->packets = 0;
+ counter->bytes = 0;
}
-
- vlib_counter_zero (&cm->maxi[index]);
- if (index < vec_len (cm->value_at_last_clear))
- vlib_counter_zero (&cm->value_at_last_clear[index]);
}
/** validate a simple counter
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 0dad61d..7352c2e 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2375,6 +2375,17 @@
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+ /*
+ * pre-fetch the per-adjacency counters
+ */
+ if (do_counters)
+ {
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ cpu_index, adj_index0);
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ cpu_index, adj_index1);
+ }
+
/* We should never rewrite a pkt using the MISS adjacency */
ASSERT (adj_index0 && adj_index1);
@@ -2480,17 +2491,6 @@
rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
error1);
- /*
- * pre-fetch the per-adjacency counters
- */
- if (do_counters)
- {
- vlib_prefetch_combined_counter (&adjacency_counters,
- cpu_index, adj_index0);
- vlib_prefetch_combined_counter (&adjacency_counters,
- cpu_index, adj_index1);
- }
-
/* Don't adjust the buffer for ttl issue; icmp-error node wants
* to see the IP headerr */
if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
@@ -2624,8 +2624,9 @@
p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
}
- vlib_prefetch_combined_counter (&adjacency_counters,
- cpu_index, adj_index0);
+ if (do_counters)
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ cpu_index, adj_index0);
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
@@ -2641,10 +2642,11 @@
rw_len0 = adj0[0].rewrite_header.data_bytes;
vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
- vlib_increment_combined_counter
- (&adjacency_counters,
- cpu_index,
- adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ if (do_counters)
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ cpu_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
/* Check MTU of outgoing interface. */
error0 = (vlib_buffer_length_in_chain (vm, p0)
diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c
index 7006b1d..99305af 100644
--- a/src/vnet/map/map.c
+++ b/src/vnet/map/map.c
@@ -1304,7 +1304,7 @@
{
which = cm - mm->domain_counters;
- for (i = 0; i < vec_len (cm->maxi); i++)
+ for (i = 0; i < vlib_combined_counter_n_counters (cm); i++)
{
vlib_get_combined_counter (cm, i, &v);
total_pkts[which] += v.packets;
diff --git a/src/vnet/map/map_api.c b/src/vnet/map/map_api.c
index 7febeb3..d618e7a 100644
--- a/src/vnet/map/map_api.c
+++ b/src/vnet/map/map_api.c
@@ -211,7 +211,7 @@
{
which = cm - mm->domain_counters;
- for (i = 0; i < vec_len (cm->maxi); i++)
+ for (i = 0; i < vlib_combined_counter_n_counters (cm); i++)
{
vlib_get_combined_counter (cm, i, &v);
total_pkts[which] += v.packets;
diff --git a/src/vnet/rewrite.c b/src/vnet/rewrite.c
index c4a171c..47fb74d 100644
--- a/src/vnet/rewrite.c
+++ b/src/vnet/rewrite.c
@@ -79,7 +79,7 @@
if (NULL != si)
s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si);
else
- s = format (s, "DELETED");
+ s = format (s, "DELETED:%d", rw->sw_if_index);
}
/* Format rewrite string. */
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index d8301fa..8df4040 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -849,7 +849,7 @@
{
which = cm - im->combined_sw_if_counters;
- for (i = 0; i < vec_len (cm->maxi); i++)
+ for (i = 0; i < vlib_combined_counter_n_counters (cm); i++)
{
vlib_get_combined_counter (cm, i, &v);
total_pkts[which] += v.packets;
diff --git a/src/vpp/stats/stats.c b/src/vpp/stats/stats.c
index c46d441..1927da0 100644
--- a/src/vpp/stats/stats.c
+++ b/src/vpp/stats/stats.c
@@ -134,7 +134,7 @@
vlib_simple_counter_main_t *cm;
u32 items_this_message = 0;
u64 v, *vp = 0;
- int i;
+ int i, n_counts;
/*
* Prevent interface registration from expanding / moving the vectors...
@@ -144,13 +144,13 @@
vec_foreach (cm, im->sw_if_counters)
{
-
- for (i = 0; i < vec_len (cm->maxi); i++)
+ n_counts = vlib_simple_counter_n_counters (cm);
+ for (i = 0; i < n_counts; i++)
{
if (mp == 0)
{
items_this_message = clib_min (SIMPLE_COUNTER_BATCH_SIZE,
- vec_len (cm->maxi) - i);
+ n_counts - i);
mp = vl_msg_api_alloc_as_if_client
(sizeof (*mp) + items_this_message * sizeof (v));
@@ -189,19 +189,19 @@
vlib_combined_counter_main_t *cm;
u32 items_this_message = 0;
vlib_counter_t v, *vp = 0;
- int i;
+ int i, n_counts;
vnet_interface_counter_lock (im);
vec_foreach (cm, im->combined_sw_if_counters)
{
-
- for (i = 0; i < vec_len (cm->maxi); i++)
+ n_counts = vlib_combined_counter_n_counters (cm);
+ for (i = 0; i < n_counts; i++)
{
if (mp == 0)
{
items_this_message = clib_min (COMBINED_COUNTER_BATCH_SIZE,
- vec_len (cm->maxi) - i);
+ n_counts - i);
mp = vl_msg_api_alloc_as_if_client
(sizeof (*mp) + items_this_message * sizeof (v));