| /* SPDX-License-Identifier: Apache-2.0 |
| * Copyright(c) 2021 Cisco Systems, Inc. |
| */ |
| |
| #include <vppinfra/format.h> |
| #include <vppinfra/vector/test/test.h> |
| #include <vppinfra/error.h> |
| |
| test_main_t test_main; |
| |
| int |
| test_march_supported (clib_march_variant_type_t type) |
| { |
| #define _(s, n) \ |
| if (CLIB_MARCH_VARIANT_TYPE_##s == type) \ |
| return clib_cpu_march_priority_##s (); |
| foreach_march_variant |
| #undef _ |
| return 0; |
| } |
| |
| clib_error_t * |
| test_funct (test_main_t *tm) |
| { |
| for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++) |
| { |
| test_registration_t *r = tm->registrations[i]; |
| |
| if (r == 0 || test_march_supported (i) < 0) |
| continue; |
| |
| fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i); |
| fformat (stdout, |
| "-------------------------------------------------------\n"); |
| while (r) |
| { |
| clib_error_t *err; |
| if (tm->filter && strstr (r->name, (char *) tm->filter) == 0) |
| goto next; |
| err = (r->fn) (0); |
| fformat (stdout, "%-50s %s\n", r->name, err ? "FAIL" : "PASS"); |
| if (err) |
| { |
| clib_error_report (err); |
| fformat (stdout, "\n"); |
| } |
| next: |
| r = r->next; |
| } |
| } |
| |
| fformat (stdout, "\n"); |
| return 0; |
| } |
| |
| #define TEST_PERF_MAX_EVENTS 7 |
| typedef struct |
| { |
| char *name; |
| char *desc; |
| u64 config[TEST_PERF_MAX_EVENTS]; |
| u32 type; |
| u8 n_events; |
| format_function_t *format_fn; |
| } test_perf_event_bundle_t; |
| |
| static u8 * |
| format_test_perf_bundle_default (u8 *s, va_list *args) |
| { |
| test_main_t *tm = &test_main; |
| test_perf_event_bundle_t __clib_unused *b = |
| va_arg (*args, test_perf_event_bundle_t *); |
| test_perf_t *tp = va_arg (*args, test_perf_t *); |
| u64 *data = va_arg (*args, u64 *); |
| |
| if (tm->ref_clock > 0) |
| { |
| if (data) |
| s = format (s, "%8.1f", tm->ref_clock * data[0] / data[1] / 1e9); |
| else |
| s = format (s, "%8s", "Freq"); |
| } |
| |
| if (data) |
| s = format (s, "%5.2f", (f64) data[2] / data[0]); |
| else |
| s = format (s, "%5s", "IPC"); |
| |
| if (data) |
| s = format (s, "%8.2f", (f64) data[0] / tp->n_ops); |
| else |
| s = format (s, "%8s", "Clks/Op"); |
| |
| if (data) |
| s = format (s, "%8.2f", (f64) data[2] / tp->n_ops); |
| else |
| s = format (s, "%8s", "Inst/Op"); |
| |
| if (data) |
| s = format (s, "%9.2f", (f64) data[3] / tp->n_ops); |
| else |
| s = format (s, "%9s", "Brnch/Op"); |
| |
| if (data) |
| s = format (s, "%10.2f", (f64) data[4] / tp->n_ops); |
| else |
| s = format (s, "%10s", "BrMiss/Op"); |
| return s; |
| } |
| |
| static u8 * |
| format_test_perf_bundle_core_power (u8 *s, va_list *args) |
| { |
| test_perf_event_bundle_t __clib_unused *b = |
| va_arg (*args, test_perf_event_bundle_t *); |
| test_perf_t __clib_unused *tp = va_arg (*args, test_perf_t *); |
| u64 *data = va_arg (*args, u64 *); |
| |
| if (data) |
| s = format (s, "%7.1f %%", (f64) 100 * data[1] / data[0]); |
| else |
| s = format (s, "%9s", "Level 0"); |
| |
| if (data) |
| s = format (s, "%8.1f %%", (f64) 100 * data[2] / data[0]); |
| else |
| s = format (s, "%9s", "Level 1"); |
| |
| if (data) |
| s = format (s, "%7.1f %%", (f64) 100 * data[3] / data[0]); |
| else |
| s = format (s, "%9s", "Level 2"); |
| |
| return s; |
| } |
| |
| test_perf_event_bundle_t perf_bundles[] = { |
| { |
| .name = "default", |
| .desc = "IPC, Clocks/Operatiom, Instr/Operation, Branch Total & Miss", |
| .type = PERF_TYPE_HARDWARE, |
| .config[0] = PERF_COUNT_HW_CPU_CYCLES, |
| .config[1] = PERF_COUNT_HW_REF_CPU_CYCLES, |
| .config[2] = PERF_COUNT_HW_INSTRUCTIONS, |
| .config[3] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, |
| .config[4] = PERF_COUNT_HW_BRANCH_MISSES, |
| .n_events = 5, |
| .format_fn = format_test_perf_bundle_default, |
| } |
| #ifdef __x86_64__ |
| #define PERF_INTEL_CODE(event, umask) ((event) | (umask) << 8) |
| , |
| { |
| .name = "core-power", |
| .desc = |
| "Core cycles where the core was running under specific turbo schedule.", |
| .type = PERF_TYPE_RAW, |
| .config[0] = PERF_INTEL_CODE (0x3c, 0x00), |
| .config[1] = PERF_INTEL_CODE (0x28, 0x07), |
| .config[2] = PERF_INTEL_CODE (0x28, 0x18), |
| .config[3] = PERF_INTEL_CODE (0x28, 0x20), |
| .config[4] = PERF_INTEL_CODE (0x28, 0x40), |
| .n_events = 5, |
| .format_fn = format_test_perf_bundle_core_power, |
| } |
| #endif |
| }; |
| |
| #ifdef __linux__ |
| clib_error_t * |
| test_perf (test_main_t *tm) |
| { |
| clib_error_t *err = 0; |
| test_perf_event_bundle_t *b = 0; |
| int group_fd = -1, fds[TEST_PERF_MAX_EVENTS]; |
| u64 count[TEST_PERF_MAX_EVENTS + 3] = {}; |
| struct perf_event_attr pe = { |
| .size = sizeof (struct perf_event_attr), |
| .disabled = 1, |
| .exclude_kernel = 1, |
| .exclude_hv = 1, |
| .pinned = 1, |
| .exclusive = 1, |
| .read_format = (PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED | |
| PERF_FORMAT_TOTAL_TIME_RUNNING), |
| }; |
| |
| for (int i = 0; i < TEST_PERF_MAX_EVENTS; i++) |
| fds[i] = -1; |
| |
| tm->ref_clock = os_cpu_clock_frequency (); |
| |
| if (tm->bundle) |
| { |
| for (int i = 0; i < ARRAY_LEN (perf_bundles); i++) |
| if (strncmp ((char *) tm->bundle, perf_bundles[i].name, |
| vec_len (tm->bundle)) == 0) |
| { |
| b = perf_bundles + i; |
| break; |
| } |
| if (b == 0) |
| return clib_error_return (0, "Unknown bundle '%s'", tm->bundle); |
| } |
| else |
| b = perf_bundles; |
| |
| for (int i = 0; i < b->n_events; i++) |
| { |
| pe.config = b->config[i]; |
| pe.type = b->type; |
| int fd = syscall (__NR_perf_event_open, &pe, /* pid */ 0, /* cpu */ -1, |
| /* group_fd */ group_fd, /* flags */ 0); |
| if (fd < 0) |
| { |
| err = clib_error_return_unix (0, "perf_event_open"); |
| goto done; |
| } |
| |
| if (group_fd == -1) |
| { |
| group_fd = fd; |
| pe.pinned = 0; |
| pe.exclusive = 0; |
| } |
| fds[i] = fd; |
| } |
| fformat (stdout, "Warming up...\n"); |
| for (u64 i = 0; i < (u64) tm->ref_clock; i++) |
| asm inline("" : : "r"(i * i) : "memory"); |
| |
| for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++) |
| { |
| test_registration_t *r = tm->registrations[i]; |
| |
| if (r == 0 || test_march_supported (i) < 0) |
| continue; |
| |
| fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i); |
| fformat (stdout, |
| "-------------------------------------------------------\n"); |
| while (r) |
| { |
| if (r->perf_tests) |
| { |
| test_perf_t *pt = r->perf_tests; |
| if (tm->filter && strstr (r->name, (char *) tm->filter) == 0) |
| goto next; |
| fformat (stdout, "%-22s%-12s%U\n", r->name, "OpType", |
| b->format_fn, b, pt, 0UL); |
| do |
| { |
| u32 read_size = (b->n_events + 3) * sizeof (u64); |
| for (int i = 0; i < tm->repeat; i++) |
| { |
| test_perf_event_reset (group_fd); |
| pt->fn (group_fd, pt); |
| if ((read (group_fd, &count, read_size) != read_size)) |
| { |
| err = clib_error_return_unix (0, "read"); |
| goto done; |
| } |
| if (count[1] != count[2]) |
| clib_warning ( |
| "perf counters were not running all the time." |
| #ifdef __x86_64__ |
| "\nConsider turning NMI watchdog off ('sysctl -w " |
| "kernel.nmi_watchdog=0')." |
| #endif |
| ); |
| fformat (stdout, " %-20s%-12s%U\n", pt->name, |
| pt->op_name ? pt->op_name : "", b->format_fn, b, |
| pt, count + 3); |
| } |
| } |
| while ((++pt)->fn); |
| } |
| next: |
| r = r->next; |
| } |
| } |
| |
| done: |
| for (int i = 0; i < TEST_PERF_MAX_EVENTS; i++) |
| if (fds[i] != -1) |
| close (fds[i]); |
| return err; |
| } |
| #endif |
| |
| int |
| main (int argc, char *argv[]) |
| { |
| test_main_t *tm = &test_main; |
| unformat_input_t _i = {}, *i = &_i; |
| clib_mem_init (0, 64ULL << 20); |
| clib_error_t *err; |
| int perf = 0; |
| |
| /* defaults */ |
| tm->repeat = 3; |
| |
| unformat_init_command_line (i, argv); |
| |
| while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) |
| { |
| if (unformat (i, "perf")) |
| perf = 1; |
| else if (unformat (i, "filter %s", &tm->filter)) |
| ; |
| else if (unformat (i, "bundle %s", &tm->bundle)) |
| ; |
| else if (unformat (i, "repeat %d", &tm->repeat)) |
| ; |
| else |
| { |
| clib_warning ("unknown input '%U'", format_unformat_error, i); |
| exit (1); |
| } |
| } |
| |
| if (perf) |
| err = test_perf (tm); |
| else |
| err = test_funct (tm); |
| |
| if (err) |
| { |
| clib_error_report (err); |
| fformat (stderr, "\n"); |
| return 1; |
| } |
| return 0; |
| } |
| |
| void * |
| test_mem_alloc (uword size) |
| { |
| void *rv; |
| size = round_pow2 (size, CLIB_CACHE_LINE_BYTES); |
| rv = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES); |
| clib_memset_u8 (rv, 0, size); |
| return rv; |
| } |
| |
| void * |
| test_mem_alloc_and_fill_inc_u8 (uword size, u8 start, u8 mask) |
| { |
| u8 *rv; |
| mask = mask ? mask : 0xff; |
| size = round_pow2 (size, CLIB_CACHE_LINE_BYTES); |
| rv = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES); |
| for (uword i = 0; i < size; i++) |
| rv[i] = ((u8) i + start) & mask; |
| return rv; |
| } |
| |
| void * |
| test_mem_alloc_and_splat (uword elt_size, uword n_elts, void *elt) |
| { |
| u8 *rv, *e; |
| uword data_size = elt_size * n_elts; |
| uword alloc_size = round_pow2 (data_size, CLIB_CACHE_LINE_BYTES); |
| e = rv = clib_mem_alloc_aligned (alloc_size, CLIB_CACHE_LINE_BYTES); |
| while (e - rv < data_size) |
| { |
| clib_memcpy_fast (e, elt, elt_size); |
| e += elt_size; |
| } |
| |
| if (data_size < alloc_size) |
| clib_memset_u8 (e, 0, alloc_size - data_size); |
| return rv; |
| } |
| |
| void |
| test_mem_free (void *p) |
| { |
| clib_mem_free (p); |
| } |