blob: 88428ee8f2ebaba47d21639c480b691b6f5e4572 [file] [log] [blame]
/*
Copyright (c) 2010 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vppinfra/error.h>
#include <vppinfra/unix.h>
#include <vppinfra/elog.h>
#include <vppinfra/format.h>
#include <vppinfra/os.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
typedef enum
{
RUNNING = 0,
WAKEUP,
} sched_event_type_t;
typedef struct
{
u32 cpu;
u8 *task;
u32 pid;
f64 timestamp;
sched_event_type_t type;
} sched_event_t;
void
kelog_init (elog_main_t * em, char *kernel_tracer, u32 n_events)
{
int enable_fd, current_tracer_fd, data_fd;
int len;
struct timespec ts, ts2;
char *trace_enable = "/debug/tracing/tracing_enabled";
char *current_tracer = "/debug/tracing/current_tracer";
char *trace_data = "/debug/tracing/trace";
f64 realtime, monotonic;
f64 freq, secs_per_clock;
ASSERT (kernel_tracer);
/*$$$$ fixme */
n_events = 1 << 18;
/* init first so we won't hurt ourselves if we bail */
elog_init (em, n_events);
enable_fd = open (trace_enable, O_RDWR);
if (enable_fd < 0)
{
clib_warning ("Couldn't open %s", trace_enable);
return;
}
/* disable kernel tracing */
if (write (enable_fd, "0\n", 2) != 2)
{
clib_unix_warning ("disable tracing");
close (enable_fd);
return;
}
/*
* open + clear the data buffer.
* see .../linux/kernel/trace/trace.c:tracing_open()
*/
data_fd = open (trace_data, O_RDWR | O_TRUNC);
if (data_fd < 0)
{
clib_warning ("Couldn't open+clear %s", trace_data);
return;
}
close (data_fd);
/* configure tracing */
current_tracer_fd = open (current_tracer, O_RDWR);
if (current_tracer_fd < 0)
{
clib_warning ("Couldn't open %s", current_tracer);
close (enable_fd);
return;
}
len = strlen (kernel_tracer);
if (write (current_tracer_fd, kernel_tracer, len) != len)
{
clib_unix_warning ("configure trace");
close (current_tracer_fd);
close (enable_fd);
return;
}
close (current_tracer_fd);
/*
* The kernel event log uses CLOCK_MONOTONIC timestamps,
* not CLOCK_REALTIME timestamps. These differ by a constant
* but the constant is not available in user mode.
* This estimate will be off by one syscall round-trip.
*/
clib_time_init (&em->cpu_timer);
em->init_time.cpu = em->cpu_timer.init_cpu_time;
syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts);
/* enable kernel tracing */
if (write (enable_fd, "1\n", 2) != 2)
{
clib_unix_warning ("enable tracing");
close (enable_fd);
return;
}
close (enable_fd);
}
u8 *
format_sched_event (u8 * s, va_list * va)
{
sched_event_t *e = va_arg (*va, sched_event_t *);
s = format (s, "cpu %d task %10s type %s timestamp %12.6f\n",
e->cpu, e->task, e->type ? "WAKEUP " : "RUNNING", e->timestamp);
return s;
}
sched_event_t *
parse_sched_switch_trace (u8 * tdata, u32 * index)
{
u8 *cp = tdata + *index;
u8 *limit = tdata + vec_len (tdata);
int colons;
static sched_event_t event;
sched_event_t *e = &event;
static u8 *task_name;
u32 secs, usecs;
int i;
again:
/* eat leading w/s */
while (cp < limit && (*cp == ' ' && *cp == '\t'))
cp++;
if (cp == limit)
return 0;
/* header line */
if (*cp == '#')
{
while (cp < limit && (*cp != '\n'))
cp++;
if (*cp == '\n')
{
cp++;
goto again;
}
clib_warning ("bugger 0");
return 0;
}
while (cp < limit && *cp != ']')
cp++;
if (*cp == 0)
return 0;
if (*cp != ']')
{
clib_warning ("bugger 0.1");
return 0;
}
cp++;
while (cp < limit && (*cp == ' ' && *cp == '\t'))
cp++;
if (cp == limit)
{
clib_warning ("bugger 0.2");
return 0;
}
secs = atoi (cp);
while (cp < limit && (*cp != '.'))
cp++;
if (cp == limit)
{
clib_warning ("bugger 0.3");
return 0;
}
cp++;
usecs = atoi (cp);
e->timestamp = ((f64) secs) + ((f64) usecs) * 1e-6;
/* eat up to third colon */
for (i = 0; i < 3; i++)
{
while (cp < limit && *cp != ':')
cp++;
cp++;
}
--cp;
if (*cp != ':')
{
clib_warning ("bugger 1");
return 0;
}
/* aim at '>' (switch-to) / '+' (wakeup) */
cp += 5;
if (cp >= limit)
{
clib_warning ("bugger 2");
return 0;
}
if (*cp == '>')
e->type = RUNNING;
else if (*cp == '+')
e->type = WAKEUP;
else
{
clib_warning ("bugger 3");
return 0;
}
cp += 3;
if (cp >= limit)
{
clib_warning ("bugger 4");
return 0;
}
e->cpu = atoi (cp);
cp += 4;
if (cp >= limit)
{
clib_warning ("bugger 4");
return 0;
}
while (cp < limit && (*cp == ' ' || *cp == '\t'))
cp++;
e->pid = atoi (cp);
for (i = 0; i < 2; i++)
{
while (cp < limit && *cp != ':')
cp++;
cp++;
}
--cp;
if (*cp != ':')
{
clib_warning ("bugger 5");
return 0;
}
cp += 3;
if (cp >= limit)
{
clib_warning ("bugger 6");
return 0;
}
while (cp < limit && (*cp != ' ' && *cp != '\n'))
{
vec_add1 (task_name, *cp);
cp++;
}
vec_add1 (task_name, 0);
/* _vec_len() = 0 in caller */
e->task = task_name;
if (cp < limit)
cp++;
*index = cp - tdata;
return e;
}
static u32
elog_id_for_pid (elog_main_t * em, u8 * name, u32 pid)
{
uword *p, r;
mhash_t *h = &em->string_table_hash;
if (!em->string_table_hash.hash)
mhash_init (h, sizeof (uword), sizeof (pid));
p = mhash_get (h, &pid);
if (p)
return p[0];
r = elog_string (em, "%s(%d)", name, pid);
mhash_set (h, &pid, r, /* old_value */ 0);
return r;
}
void
kelog_collect_sched_switch_trace (elog_main_t * em)
{
int enable_fd, data_fd;
char *trace_enable = "/debug/tracing/tracing_enabled";
char *trace_data = "/debug/tracing/trace";
u8 *data = 0;
u8 *dp;
int bytes, total_bytes;
u32 pos;
sched_event_t *evt;
u64 nsec_to_add;
u32 index;
f64 clocks_per_sec;
enable_fd = open (trace_enable, O_RDWR);
if (enable_fd < 0)
{
clib_warning ("Couldn't open %s", trace_enable);
return;
}
/* disable kernel tracing */
if (write (enable_fd, "0\n", 2) != 2)
{
clib_unix_warning ("disable tracing");
close (enable_fd);
return;
}
close (enable_fd);
/* Read the trace data */
data_fd = open (trace_data, O_RDWR);
if (data_fd < 0)
{
clib_warning ("Couldn't open %s", trace_data);
return;
}
/*
* Extract trace into a vector. Note that seq_printf() [kernel]
* is not guaranteed to produce 4096 bytes at a time.
*/
vec_validate (data, 4095);
total_bytes = 0;
pos = 0;
while (1)
{
bytes = read (data_fd, data + pos, 4096);
if (bytes <= 0)
break;
total_bytes += bytes;
_vec_len (data) = total_bytes;
pos = vec_len (data);
vec_validate (data, vec_len (data) + 4095);
}
vec_add1 (data, 0);
/* Synthesize events */
em->is_enabled = 1;
index = 0;
while ((evt = parse_sched_switch_trace (data, &index)))
{
u64 fake_cpu_clock;
fake_cpu_clock = evt->timestamp * em->cpu_timer.clocks_per_second;
{
ELOG_TYPE_DECLARE (e) =
{
.format = "%d: %s %s",.format_args = "i4T4t4",.n_enum_strings =
2,.enum_strings =
{
"running", "wakeup",}
,};
struct
{
u32 cpu, string_table_offset, which;
} *ed;
ed = elog_event_data_not_inline (em, &__ELOG_TYPE_VAR (e),
&em->default_track, fake_cpu_clock);
ed->cpu = evt->cpu;
ed->string_table_offset = elog_id_for_pid (em, evt->task, evt->pid);
ed->which = evt->type;
}
_vec_len (evt->task) = 0;
}
em->is_enabled = 0;
}
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/