Blame - drivers/oprofile/cpu_buffer.c - codeaurora/cp-linux

blob: 0581461c3a67be58ebeebee854bdc091e5f2befb [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame]	1	/**
				2	* @file cpu_buffer.c
				3	*
				4	* @remark Copyright 2002-2009 OProfile authors
				5	* @remark Read the file COPYING
				6	*
				7	* @author John Levon <levon@movementarian.org>
				8	* @author Barry Kasindorf <barry.kasindorf@amd.com>
				9	* @author Robert Richter <robert.richter@amd.com>
				10	*
				11	* Each CPU has a local buffer that stores PC value/event
				12	* pairs. We also log context switches when we notice them.
				13	* Eventually each CPU's buffer is processed into the global
				14	* event buffer by sync_buffer().
				15	*
				16	* We use a local buffer for two reasons: an NMI or similar
				17	* interrupt cannot synchronise, and high sampling rates
				18	* would lead to catastrophic global synchronisation if
				19	* a global buffer was used.
				20	*/
				21
				22	#include <linux/sched.h>
				23	#include <linux/oprofile.h>
				24	#include <linux/errno.h>
				25
				26	#include "event_buffer.h"
				27	#include "cpu_buffer.h"
				28	#include "buffer_sync.h"
				29	#include "oprof.h"
				30
				31	#define OP_BUFFER_FLAGS 0
				32
				33	static struct ring_buffer *op_ring_buffer;
				34	DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
				35
				36	static void wq_sync_buffer(struct work_struct *work);
				37
				38	#define DEFAULT_TIMER_EXPIRE (HZ / 10)
				39	static int work_enabled;
				40
				41	unsigned long oprofile_get_cpu_buffer_size(void)
				42	{
				43	return oprofile_cpu_buffer_size;
				44	}
				45
				46	void oprofile_cpu_buffer_inc_smpl_lost(void)
				47	{
				48	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
				49
				50	cpu_buf->sample_lost_overflow++;
				51	}
				52
				53	void free_cpu_buffers(void)
				54	{
				55	if (op_ring_buffer)
				56	ring_buffer_free(op_ring_buffer);
				57	op_ring_buffer = NULL;
				58	}
				59
				60	#define RB_EVENT_HDR_SIZE 4
				61
				62	int alloc_cpu_buffers(void)
				63	{
				64	int i;
				65
				66	unsigned long buffer_size = oprofile_cpu_buffer_size;
				67	unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
				68	RB_EVENT_HDR_SIZE);
				69
				70	op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
				71	if (!op_ring_buffer)
				72	goto fail;
				73
				74	for_each_possible_cpu(i) {
				75	struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
				76
				77	b->last_task = NULL;
				78	b->last_is_kernel = -1;
				79	b->tracing = 0;
				80	b->buffer_size = buffer_size;
				81	b->sample_received = 0;
				82	b->sample_lost_overflow = 0;
				83	b->backtrace_aborted = 0;
				84	b->sample_invalid_eip = 0;
				85	b->cpu = i;
				86	INIT_DELAYED_WORK(&b->work, wq_sync_buffer);
				87	}
				88	return 0;
				89
				90	fail:
				91	free_cpu_buffers();
				92	return -ENOMEM;
				93	}
				94
				95	void start_cpu_work(void)
				96	{
				97	int i;
				98
				99	work_enabled = 1;
				100
				101	for_each_online_cpu(i) {
				102	struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
				103
				104	/*
				105	* Spread the work by 1 jiffy per cpu so they dont all
				106	* fire at once.
				107	*/
				108	schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
				109	}
				110	}
				111
				112	void end_cpu_work(void)
				113	{
				114	work_enabled = 0;
				115	}
				116
				117	void flush_cpu_work(void)
				118	{
				119	int i;
				120
				121	for_each_online_cpu(i) {
				122	struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
				123
				124	/* these works are per-cpu, no need for flush_sync */
				125	flush_delayed_work(&b->work);
				126	}
				127	}
				128
				129	/*
				130	* This function prepares the cpu buffer to write a sample.
				131	*
				132	* Struct op_entry is used during operations on the ring buffer while
				133	* struct op_sample contains the data that is stored in the ring
				134	* buffer. Struct entry can be uninitialized. The function reserves a
				135	* data array that is specified by size. Use
				136	* op_cpu_buffer_write_commit() after preparing the sample. In case of
				137	* errors a null pointer is returned, otherwise the pointer to the
				138	* sample.
				139	*
				140	*/
				141	struct op_sample
				142	op_cpu_buffer_write_reserve(struct op_entry entry, unsigned long size)
				143	{
				144	entry->event = ring_buffer_lock_reserve
				145	(op_ring_buffer, sizeof(struct op_sample) +
				146	size * sizeof(entry->sample->data[0]));
				147	if (!entry->event)
				148	return NULL;
				149	entry->sample = ring_buffer_event_data(entry->event);
				150	entry->size = size;
				151	entry->data = entry->sample->data;
				152
				153	return entry->sample;
				154	}
				155
				156	int op_cpu_buffer_write_commit(struct op_entry *entry)
				157	{
				158	return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
				159	}
				160
				161	struct op_sample op_cpu_buffer_read_entry(struct op_entry entry, int cpu)
				162	{
				163	struct ring_buffer_event *e;
				164	e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL);
				165	if (!e)
				166	return NULL;
				167
				168	entry->event = e;
				169	entry->sample = ring_buffer_event_data(e);
				170	entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
				171	/ sizeof(entry->sample->data[0]);
				172	entry->data = entry->sample->data;
				173	return entry->sample;
				174	}
				175
				176	unsigned long op_cpu_buffer_entries(int cpu)
				177	{
				178	return ring_buffer_entries_cpu(op_ring_buffer, cpu);
				179	}
				180
				181	static int
				182	op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
				183	int is_kernel, struct task_struct *task)
				184	{
				185	struct op_entry entry;
				186	struct op_sample *sample;
				187	unsigned long flags;
				188	int size;
				189
				190	flags = 0;
				191
				192	if (backtrace)
				193	flags \|= TRACE_BEGIN;
				194
				195	/* notice a switch from user->kernel or vice versa */
				196	is_kernel = !!is_kernel;
				197	if (cpu_buf->last_is_kernel != is_kernel) {
				198	cpu_buf->last_is_kernel = is_kernel;
				199	flags \|= KERNEL_CTX_SWITCH;
				200	if (is_kernel)
				201	flags \|= IS_KERNEL;
				202	}
				203
				204	/* notice a task switch */
				205	if (cpu_buf->last_task != task) {
				206	cpu_buf->last_task = task;
				207	flags \|= USER_CTX_SWITCH;
				208	}
				209
				210	if (!flags)
				211	/* nothing to do */
				212	return 0;
				213
				214	if (flags & USER_CTX_SWITCH)
				215	size = 1;
				216	else
				217	size = 0;
				218
				219	sample = op_cpu_buffer_write_reserve(&entry, size);
				220	if (!sample)
				221	return -ENOMEM;
				222
				223	sample->eip = ESCAPE_CODE;
				224	sample->event = flags;
				225
				226	if (size)
				227	op_cpu_buffer_add_data(&entry, (unsigned long)task);
				228
				229	op_cpu_buffer_write_commit(&entry);
				230
				231	return 0;
				232	}
				233
				234	static inline int
				235	op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
				236	unsigned long pc, unsigned long event)
				237	{
				238	struct op_entry entry;
				239	struct op_sample *sample;
				240
				241	sample = op_cpu_buffer_write_reserve(&entry, 0);
				242	if (!sample)
				243	return -ENOMEM;
				244
				245	sample->eip = pc;
				246	sample->event = event;
				247
				248	return op_cpu_buffer_write_commit(&entry);
				249	}
				250
				251	/*
				252	* This must be safe from any context.
				253	*
				254	* is_kernel is needed because on some architectures you cannot
				255	* tell if you are in kernel or user space simply by looking at
				256	* pc. We tag this in the buffer by generating kernel enter/exit
				257	* events whenever is_kernel changes
				258	*/
				259	static int
				260	log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
				261	unsigned long backtrace, int is_kernel, unsigned long event,
				262	struct task_struct *task)
				263	{
				264	struct task_struct *tsk = task ? task : current;
				265	cpu_buf->sample_received++;
				266
				267	if (pc == ESCAPE_CODE) {
				268	cpu_buf->sample_invalid_eip++;
				269	return 0;
				270	}
				271
				272	if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
				273	goto fail;
				274
				275	if (op_add_sample(cpu_buf, pc, event))
				276	goto fail;
				277
				278	return 1;
				279
				280	fail:
				281	cpu_buf->sample_lost_overflow++;
				282	return 0;
				283	}
				284
				285	static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
				286	{
				287	cpu_buf->tracing = 1;
				288	}
				289
				290	static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
				291	{
				292	cpu_buf->tracing = 0;
				293	}
				294
				295	static inline void
				296	__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
				297	unsigned long event, int is_kernel,
				298	struct task_struct *task)
				299	{
				300	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
				301	unsigned long backtrace = oprofile_backtrace_depth;
				302
				303	/*
				304	* if log_sample() fail we can't backtrace since we lost the
				305	* source of this event
				306	*/
				307	if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task))
				308	/* failed */
				309	return;
				310
				311	if (!backtrace)
				312	return;
				313
				314	oprofile_begin_trace(cpu_buf);
				315	oprofile_ops.backtrace(regs, backtrace);
				316	oprofile_end_trace(cpu_buf);
				317	}
				318
				319	void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
				320	unsigned long event, int is_kernel,
				321	struct task_struct *task)
				322	{
				323	__oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
				324	}
				325
				326	void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
				327	unsigned long event, int is_kernel)
				328	{
				329	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
				330	}
				331
				332	void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
				333	{
				334	int is_kernel;
				335	unsigned long pc;
				336
				337	if (likely(regs)) {
				338	is_kernel = !user_mode(regs);
				339	pc = profile_pc(regs);
				340	} else {
				341	is_kernel = 0; /* This value will not be used */
				342	pc = ESCAPE_CODE; /* as this causes an early return. */
				343	}
				344
				345	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
				346	}
				347
				348	/*
				349	* Add samples with data to the ring buffer.
				350	*
				351	* Use oprofile_add_data(&entry, val) to add data and
				352	* oprofile_write_commit(&entry) to commit the sample.
				353	*/
				354	void
				355	oprofile_write_reserve(struct op_entry entry, struct pt_regs const regs,
				356	unsigned long pc, int code, int size)
				357	{
				358	struct op_sample *sample;
				359	int is_kernel = !user_mode(regs);
				360	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
				361
				362	cpu_buf->sample_received++;
				363
				364	/* no backtraces for samples with data */
				365	if (op_add_code(cpu_buf, 0, is_kernel, current))
				366	goto fail;
				367
				368	sample = op_cpu_buffer_write_reserve(entry, size + 2);
				369	if (!sample)
				370	goto fail;
				371	sample->eip = ESCAPE_CODE;
				372	sample->event = 0; /* no flags */
				373
				374	op_cpu_buffer_add_data(entry, code);
				375	op_cpu_buffer_add_data(entry, pc);
				376
				377	return;
				378
				379	fail:
				380	entry->event = NULL;
				381	cpu_buf->sample_lost_overflow++;
				382	}
				383
				384	int oprofile_add_data(struct op_entry *entry, unsigned long val)
				385	{
				386	if (!entry->event)
				387	return 0;
				388	return op_cpu_buffer_add_data(entry, val);
				389	}
				390
				391	int oprofile_add_data64(struct op_entry *entry, u64 val)
				392	{
				393	if (!entry->event)
				394	return 0;
				395	if (op_cpu_buffer_get_size(entry) < 2)
				396	/*
				397	* the function returns 0 to indicate a too small
				398	* buffer, even if there is some space left
				399	*/
				400	return 0;
				401	if (!op_cpu_buffer_add_data(entry, (u32)val))
				402	return 0;
				403	return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
				404	}
				405
				406	int oprofile_write_commit(struct op_entry *entry)
				407	{
				408	if (!entry->event)
				409	return -EINVAL;
				410	return op_cpu_buffer_write_commit(entry);
				411	}
				412
				413	void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
				414	{
				415	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
				416	log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
				417	}
				418
				419	void oprofile_add_trace(unsigned long pc)
				420	{
				421	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
				422
				423	if (!cpu_buf->tracing)
				424	return;
				425
				426	/*
				427	* broken frame can give an eip with the same value as an
				428	* escape code, abort the trace if we get it
				429	*/
				430	if (pc == ESCAPE_CODE)
				431	goto fail;
				432
				433	if (op_add_sample(cpu_buf, pc, 0))
				434	goto fail;
				435
				436	return;
				437	fail:
				438	cpu_buf->tracing = 0;
				439	cpu_buf->backtrace_aborted++;
				440	return;
				441	}
				442
				443	/*
				444	* This serves to avoid cpu buffer overflow, and makes sure
				445	* the task mortuary progresses
				446	*
				447	* By using schedule_delayed_work_on and then schedule_delayed_work
				448	* we guarantee this will stay on the correct cpu
				449	*/
				450	static void wq_sync_buffer(struct work_struct *work)
				451	{
				452	struct oprofile_cpu_buffer *b =
				453	container_of(work, struct oprofile_cpu_buffer, work.work);
				454	if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) {
				455	cancel_delayed_work(&b->work);
				456	return;
				457	}
				458	sync_buffer(b->cpu);
				459
				460	/* don't re-add the work if we're shutting down */
				461	if (work_enabled)
				462	schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE);
				463	}