Kyle Swenson | 8d8f654 | 2021-03-15 11:02:55 -0600 | [diff] [blame] | 1 | /* |
| 2 | * kernel/time/timer_stats.c |
| 3 | * |
| 4 | * Collect timer usage statistics. |
| 5 | * |
| 6 | * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar |
| 7 | * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> |
| 8 | * |
| 9 | * timer_stats is based on timer_top, a similar functionality which was part of |
| 10 | * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the |
| 11 | * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based |
| 12 | * on dynamic allocation of the statistics entries and linear search based |
| 13 | * lookup combined with a global lock, rather than the static array, hash |
| 14 | * and per-CPU locking which is used by timer_stats. It was written for the |
| 15 | * pre hrtimer kernel code and therefore did not take hrtimers into account. |
| 16 | * Nevertheless it provided the base for the timer_stats implementation and |
| 17 | * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks |
| 18 | * for this effort. |
| 19 | * |
| 20 | * timer_top.c is |
| 21 | * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus |
| 22 | * Written by Daniel Petrini <d.pensator@gmail.com> |
| 23 | * timer_top.c was released under the GNU General Public License version 2 |
| 24 | * |
| 25 | * We export the addresses and counting of timer functions being called, |
| 26 | * the pid and cmdline from the owner process if applicable. |
| 27 | * |
| 28 | * Start/stop data collection: |
| 29 | * # echo [1|0] >/proc/timer_stats |
| 30 | * |
| 31 | * Display the information collected so far: |
| 32 | * # cat /proc/timer_stats |
| 33 | * |
| 34 | * This program is free software; you can redistribute it and/or modify |
| 35 | * it under the terms of the GNU General Public License version 2 as |
| 36 | * published by the Free Software Foundation. |
| 37 | */ |
| 38 | |
| 39 | #include <linux/proc_fs.h> |
| 40 | #include <linux/module.h> |
| 41 | #include <linux/spinlock.h> |
| 42 | #include <linux/sched.h> |
| 43 | #include <linux/seq_file.h> |
| 44 | #include <linux/kallsyms.h> |
| 45 | |
| 46 | #include <asm/uaccess.h> |
| 47 | |
| 48 | /* |
| 49 | * This is our basic unit of interest: a timer expiry event identified |
| 50 | * by the timer, its start/expire functions and the PID of the task that |
| 51 | * started the timer. We count the number of times an event happens: |
| 52 | */ |
| 53 | struct entry { |
| 54 | /* |
| 55 | * Hash list: |
| 56 | */ |
| 57 | struct entry *next; |
| 58 | |
| 59 | /* |
| 60 | * Hash keys: |
| 61 | */ |
| 62 | void *timer; |
| 63 | void *start_func; |
| 64 | void *expire_func; |
| 65 | pid_t pid; |
| 66 | |
| 67 | /* |
| 68 | * Number of timeout events: |
| 69 | */ |
| 70 | unsigned long count; |
| 71 | u32 flags; |
| 72 | |
| 73 | /* |
| 74 | * We save the command-line string to preserve |
| 75 | * this information past task exit: |
| 76 | */ |
| 77 | char comm[TASK_COMM_LEN + 1]; |
| 78 | |
| 79 | } ____cacheline_aligned_in_smp; |
| 80 | |
| 81 | /* |
| 82 | * Spinlock protecting the tables - not taken during lookup: |
| 83 | */ |
| 84 | static DEFINE_RAW_SPINLOCK(table_lock); |
| 85 | |
| 86 | /* |
| 87 | * Per-CPU lookup locks for fast hash lookup: |
| 88 | */ |
| 89 | static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); |
| 90 | |
| 91 | /* |
| 92 | * Mutex to serialize state changes with show-stats activities: |
| 93 | */ |
| 94 | static DEFINE_MUTEX(show_mutex); |
| 95 | |
| 96 | /* |
| 97 | * Collection status, active/inactive: |
| 98 | */ |
| 99 | int __read_mostly timer_stats_active; |
| 100 | |
| 101 | /* |
| 102 | * Beginning/end timestamps of measurement: |
| 103 | */ |
| 104 | static ktime_t time_start, time_stop; |
| 105 | |
| 106 | /* |
| 107 | * tstat entry structs only get allocated while collection is |
| 108 | * active and never freed during that time - this simplifies |
| 109 | * things quite a bit. |
| 110 | * |
| 111 | * They get freed when a new collection period is started. |
| 112 | */ |
| 113 | #define MAX_ENTRIES_BITS 10 |
| 114 | #define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) |
| 115 | |
| 116 | static unsigned long nr_entries; |
| 117 | static struct entry entries[MAX_ENTRIES]; |
| 118 | |
| 119 | static atomic_t overflow_count; |
| 120 | |
| 121 | /* |
| 122 | * The entries are in a hash-table, for fast lookup: |
| 123 | */ |
| 124 | #define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) |
| 125 | #define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) |
| 126 | #define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) |
| 127 | |
| 128 | #define __tstat_hashfn(entry) \ |
| 129 | (((unsigned long)(entry)->timer ^ \ |
| 130 | (unsigned long)(entry)->start_func ^ \ |
| 131 | (unsigned long)(entry)->expire_func ^ \ |
| 132 | (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) |
| 133 | |
| 134 | #define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) |
| 135 | |
| 136 | static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; |
| 137 | |
| 138 | static void reset_entries(void) |
| 139 | { |
| 140 | nr_entries = 0; |
| 141 | memset(entries, 0, sizeof(entries)); |
| 142 | memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); |
| 143 | atomic_set(&overflow_count, 0); |
| 144 | } |
| 145 | |
| 146 | static struct entry *alloc_entry(void) |
| 147 | { |
| 148 | if (nr_entries >= MAX_ENTRIES) |
| 149 | return NULL; |
| 150 | |
| 151 | return entries + nr_entries++; |
| 152 | } |
| 153 | |
| 154 | static int match_entries(struct entry *entry1, struct entry *entry2) |
| 155 | { |
| 156 | return entry1->timer == entry2->timer && |
| 157 | entry1->start_func == entry2->start_func && |
| 158 | entry1->expire_func == entry2->expire_func && |
| 159 | entry1->pid == entry2->pid; |
| 160 | } |
| 161 | |
| 162 | /* |
| 163 | * Look up whether an entry matching this item is present |
| 164 | * in the hash already. Must be called with irqs off and the |
| 165 | * lookup lock held: |
| 166 | */ |
| 167 | static struct entry *tstat_lookup(struct entry *entry, char *comm) |
| 168 | { |
| 169 | struct entry **head, *curr, *prev; |
| 170 | |
| 171 | head = tstat_hashentry(entry); |
| 172 | curr = *head; |
| 173 | |
| 174 | /* |
| 175 | * The fastpath is when the entry is already hashed, |
| 176 | * we do this with the lookup lock held, but with the |
| 177 | * table lock not held: |
| 178 | */ |
| 179 | while (curr) { |
| 180 | if (match_entries(curr, entry)) |
| 181 | return curr; |
| 182 | |
| 183 | curr = curr->next; |
| 184 | } |
| 185 | /* |
| 186 | * Slowpath: allocate, set up and link a new hash entry: |
| 187 | */ |
| 188 | prev = NULL; |
| 189 | curr = *head; |
| 190 | |
| 191 | raw_spin_lock(&table_lock); |
| 192 | /* |
| 193 | * Make sure we have not raced with another CPU: |
| 194 | */ |
| 195 | while (curr) { |
| 196 | if (match_entries(curr, entry)) |
| 197 | goto out_unlock; |
| 198 | |
| 199 | prev = curr; |
| 200 | curr = curr->next; |
| 201 | } |
| 202 | |
| 203 | curr = alloc_entry(); |
| 204 | if (curr) { |
| 205 | *curr = *entry; |
| 206 | curr->count = 0; |
| 207 | curr->next = NULL; |
| 208 | memcpy(curr->comm, comm, TASK_COMM_LEN); |
| 209 | |
| 210 | smp_mb(); /* Ensure that curr is initialized before insert */ |
| 211 | |
| 212 | if (prev) |
| 213 | prev->next = curr; |
| 214 | else |
| 215 | *head = curr; |
| 216 | } |
| 217 | out_unlock: |
| 218 | raw_spin_unlock(&table_lock); |
| 219 | |
| 220 | return curr; |
| 221 | } |
| 222 | |
| 223 | /** |
| 224 | * timer_stats_update_stats - Update the statistics for a timer. |
| 225 | * @timer: pointer to either a timer_list or a hrtimer |
| 226 | * @pid: the pid of the task which set up the timer |
| 227 | * @startf: pointer to the function which did the timer setup |
| 228 | * @timerf: pointer to the timer callback function of the timer |
| 229 | * @comm: name of the process which set up the timer |
| 230 | * @tflags: The flags field of the timer |
| 231 | * |
| 232 | * When the timer is already registered, then the event counter is |
| 233 | * incremented. Otherwise the timer is registered in a free slot. |
| 234 | */ |
| 235 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, |
| 236 | void *timerf, char *comm, u32 tflags) |
| 237 | { |
| 238 | /* |
| 239 | * It doesn't matter which lock we take: |
| 240 | */ |
| 241 | raw_spinlock_t *lock; |
| 242 | struct entry *entry, input; |
| 243 | unsigned long flags; |
| 244 | |
| 245 | if (likely(!timer_stats_active)) |
| 246 | return; |
| 247 | |
| 248 | lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id()); |
| 249 | |
| 250 | input.timer = timer; |
| 251 | input.start_func = startf; |
| 252 | input.expire_func = timerf; |
| 253 | input.pid = pid; |
| 254 | input.flags = tflags; |
| 255 | |
| 256 | raw_spin_lock_irqsave(lock, flags); |
| 257 | if (!timer_stats_active) |
| 258 | goto out_unlock; |
| 259 | |
| 260 | entry = tstat_lookup(&input, comm); |
| 261 | if (likely(entry)) |
| 262 | entry->count++; |
| 263 | else |
| 264 | atomic_inc(&overflow_count); |
| 265 | |
| 266 | out_unlock: |
| 267 | raw_spin_unlock_irqrestore(lock, flags); |
| 268 | } |
| 269 | |
| 270 | static void print_name_offset(struct seq_file *m, unsigned long addr) |
| 271 | { |
| 272 | char symname[KSYM_NAME_LEN]; |
| 273 | |
| 274 | if (lookup_symbol_name(addr, symname) < 0) |
| 275 | seq_printf(m, "<%p>", (void *)addr); |
| 276 | else |
| 277 | seq_printf(m, "%s", symname); |
| 278 | } |
| 279 | |
| 280 | static int tstats_show(struct seq_file *m, void *v) |
| 281 | { |
| 282 | struct timespec period; |
| 283 | struct entry *entry; |
| 284 | unsigned long ms; |
| 285 | long events = 0; |
| 286 | ktime_t time; |
| 287 | int i; |
| 288 | |
| 289 | mutex_lock(&show_mutex); |
| 290 | /* |
| 291 | * If still active then calculate up to now: |
| 292 | */ |
| 293 | if (timer_stats_active) |
| 294 | time_stop = ktime_get(); |
| 295 | |
| 296 | time = ktime_sub(time_stop, time_start); |
| 297 | |
| 298 | period = ktime_to_timespec(time); |
| 299 | ms = period.tv_nsec / 1000000; |
| 300 | |
| 301 | seq_puts(m, "Timer Stats Version: v0.3\n"); |
| 302 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); |
| 303 | if (atomic_read(&overflow_count)) |
| 304 | seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count)); |
| 305 | seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive"); |
| 306 | |
| 307 | for (i = 0; i < nr_entries; i++) { |
| 308 | entry = entries + i; |
| 309 | if (entry->flags & TIMER_DEFERRABLE) { |
| 310 | seq_printf(m, "%4luD, %5d %-16s ", |
| 311 | entry->count, entry->pid, entry->comm); |
| 312 | } else { |
| 313 | seq_printf(m, " %4lu, %5d %-16s ", |
| 314 | entry->count, entry->pid, entry->comm); |
| 315 | } |
| 316 | |
| 317 | print_name_offset(m, (unsigned long)entry->start_func); |
| 318 | seq_puts(m, " ("); |
| 319 | print_name_offset(m, (unsigned long)entry->expire_func); |
| 320 | seq_puts(m, ")\n"); |
| 321 | |
| 322 | events += entry->count; |
| 323 | } |
| 324 | |
| 325 | ms += period.tv_sec * 1000; |
| 326 | if (!ms) |
| 327 | ms = 1; |
| 328 | |
| 329 | if (events && period.tv_sec) |
| 330 | seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", |
| 331 | events, events * 1000 / ms, |
| 332 | (events * 1000000 / ms) % 1000); |
| 333 | else |
| 334 | seq_printf(m, "%ld total events\n", events); |
| 335 | |
| 336 | mutex_unlock(&show_mutex); |
| 337 | |
| 338 | return 0; |
| 339 | } |
| 340 | |
| 341 | /* |
| 342 | * After a state change, make sure all concurrent lookup/update |
| 343 | * activities have stopped: |
| 344 | */ |
| 345 | static void sync_access(void) |
| 346 | { |
| 347 | unsigned long flags; |
| 348 | int cpu; |
| 349 | |
| 350 | for_each_online_cpu(cpu) { |
| 351 | raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); |
| 352 | |
| 353 | raw_spin_lock_irqsave(lock, flags); |
| 354 | /* nothing */ |
| 355 | raw_spin_unlock_irqrestore(lock, flags); |
| 356 | } |
| 357 | } |
| 358 | |
| 359 | static ssize_t tstats_write(struct file *file, const char __user *buf, |
| 360 | size_t count, loff_t *offs) |
| 361 | { |
| 362 | char ctl[2]; |
| 363 | |
| 364 | if (count != 2 || *offs) |
| 365 | return -EINVAL; |
| 366 | |
| 367 | if (copy_from_user(ctl, buf, count)) |
| 368 | return -EFAULT; |
| 369 | |
| 370 | mutex_lock(&show_mutex); |
| 371 | switch (ctl[0]) { |
| 372 | case '0': |
| 373 | if (timer_stats_active) { |
| 374 | timer_stats_active = 0; |
| 375 | time_stop = ktime_get(); |
| 376 | sync_access(); |
| 377 | } |
| 378 | break; |
| 379 | case '1': |
| 380 | if (!timer_stats_active) { |
| 381 | reset_entries(); |
| 382 | time_start = ktime_get(); |
| 383 | smp_mb(); |
| 384 | timer_stats_active = 1; |
| 385 | } |
| 386 | break; |
| 387 | default: |
| 388 | count = -EINVAL; |
| 389 | } |
| 390 | mutex_unlock(&show_mutex); |
| 391 | |
| 392 | return count; |
| 393 | } |
| 394 | |
| 395 | static int tstats_open(struct inode *inode, struct file *filp) |
| 396 | { |
| 397 | return single_open(filp, tstats_show, NULL); |
| 398 | } |
| 399 | |
| 400 | static const struct file_operations tstats_fops = { |
| 401 | .open = tstats_open, |
| 402 | .read = seq_read, |
| 403 | .write = tstats_write, |
| 404 | .llseek = seq_lseek, |
| 405 | .release = single_release, |
| 406 | }; |
| 407 | |
| 408 | void __init init_timer_stats(void) |
| 409 | { |
| 410 | int cpu; |
| 411 | |
| 412 | for_each_possible_cpu(cpu) |
| 413 | raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); |
| 414 | } |
| 415 | |
| 416 | static int __init init_tstats_procfs(void) |
| 417 | { |
| 418 | struct proc_dir_entry *pe; |
| 419 | |
| 420 | pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); |
| 421 | if (!pe) |
| 422 | return -ENOMEM; |
| 423 | return 0; |
| 424 | } |
| 425 | __initcall(init_tstats_procfs); |