blob: 88428ee8f2ebaba47d21639c480b691b6f5e4572 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 Copyright (c) 2010 Cisco and/or its affiliates.
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15*/
16
17#include <vppinfra/error.h>
18#include <vppinfra/unix.h>
19#include <vppinfra/elog.h>
20#include <vppinfra/format.h>
21#include <vppinfra/os.h>
22
23#include <sys/types.h>
24#include <sys/stat.h>
25#include <fcntl.h>
26#include <time.h>
27
28typedef enum
Dave Barachc3799992016-08-15 11:12:27 -040029{
30 RUNNING = 0,
31 WAKEUP,
32} sched_event_type_t;
Ed Warnickecb9cada2015-12-08 15:45:58 -070033
Dave Barachc3799992016-08-15 11:12:27 -040034typedef struct
Ed Warnickecb9cada2015-12-08 15:45:58 -070035{
36 u32 cpu;
37 u8 *task;
38 u32 pid;
39 f64 timestamp;
40 sched_event_type_t type;
41} sched_event_t;
42
Dave Barachc3799992016-08-15 11:12:27 -040043void
44kelog_init (elog_main_t * em, char *kernel_tracer, u32 n_events)
Ed Warnickecb9cada2015-12-08 15:45:58 -070045{
46 int enable_fd, current_tracer_fd, data_fd;
47 int len;
48 struct timespec ts, ts2;
49 char *trace_enable = "/debug/tracing/tracing_enabled";
50 char *current_tracer = "/debug/tracing/current_tracer";
51 char *trace_data = "/debug/tracing/trace";
52 f64 realtime, monotonic;
53 f64 freq, secs_per_clock;
54
55 ASSERT (kernel_tracer);
56
57 /*$$$$ fixme */
Dave Barachc3799992016-08-15 11:12:27 -040058 n_events = 1 << 18;
Ed Warnickecb9cada2015-12-08 15:45:58 -070059
60 /* init first so we won't hurt ourselves if we bail */
61 elog_init (em, n_events);
62
63 enable_fd = open (trace_enable, O_RDWR);
64 if (enable_fd < 0)
65 {
66 clib_warning ("Couldn't open %s", trace_enable);
67 return;
68 }
69 /* disable kernel tracing */
Dave Barachc3799992016-08-15 11:12:27 -040070 if (write (enable_fd, "0\n", 2) != 2)
Ed Warnickecb9cada2015-12-08 15:45:58 -070071 {
72 clib_unix_warning ("disable tracing");
Dave Barachc3799992016-08-15 11:12:27 -040073 close (enable_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -070074 return;
75 }
Dave Barachc3799992016-08-15 11:12:27 -040076
77 /*
Ed Warnickecb9cada2015-12-08 15:45:58 -070078 * open + clear the data buffer.
79 * see .../linux/kernel/trace/trace.c:tracing_open()
80 */
81 data_fd = open (trace_data, O_RDWR | O_TRUNC);
Dave Barachc3799992016-08-15 11:12:27 -040082 if (data_fd < 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -070083 {
84 clib_warning ("Couldn't open+clear %s", trace_data);
85 return;
86 }
Dave Barachc3799992016-08-15 11:12:27 -040087 close (data_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -070088
89 /* configure tracing */
90 current_tracer_fd = open (current_tracer, O_RDWR);
91
92 if (current_tracer_fd < 0)
93 {
94 clib_warning ("Couldn't open %s", current_tracer);
Dave Barachc3799992016-08-15 11:12:27 -040095 close (enable_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -070096 return;
97 }
98
Dave Barachc3799992016-08-15 11:12:27 -040099 len = strlen (kernel_tracer);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700100
Dave Barachc3799992016-08-15 11:12:27 -0400101 if (write (current_tracer_fd, kernel_tracer, len) != len)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700102 {
103 clib_unix_warning ("configure trace");
Dave Barachc3799992016-08-15 11:12:27 -0400104 close (current_tracer_fd);
105 close (enable_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700106 return;
107 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700108
Dave Barachc3799992016-08-15 11:12:27 -0400109 close (current_tracer_fd);
110
111 /*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700112 * The kernel event log uses CLOCK_MONOTONIC timestamps,
113 * not CLOCK_REALTIME timestamps. These differ by a constant
114 * but the constant is not available in user mode.
115 * This estimate will be off by one syscall round-trip.
116 */
117 clib_time_init (&em->cpu_timer);
118 em->init_time.cpu = em->cpu_timer.init_cpu_time;
119 syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts);
Dave Barachc3799992016-08-15 11:12:27 -0400120
Ed Warnickecb9cada2015-12-08 15:45:58 -0700121 /* enable kernel tracing */
Dave Barachc3799992016-08-15 11:12:27 -0400122 if (write (enable_fd, "1\n", 2) != 2)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700123 {
124 clib_unix_warning ("enable tracing");
Dave Barachc3799992016-08-15 11:12:27 -0400125 close (enable_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700126 return;
127 }
128
Dave Barachc3799992016-08-15 11:12:27 -0400129 close (enable_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700130}
131
132
Dave Barachc3799992016-08-15 11:12:27 -0400133u8 *
134format_sched_event (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700135{
136 sched_event_t *e = va_arg (*va, sched_event_t *);
137
138 s = format (s, "cpu %d task %10s type %s timestamp %12.6f\n",
Dave Barachc3799992016-08-15 11:12:27 -0400139 e->cpu, e->task, e->type ? "WAKEUP " : "RUNNING", e->timestamp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700140
141 return s;
142}
143
Dave Barachc3799992016-08-15 11:12:27 -0400144sched_event_t *
145parse_sched_switch_trace (u8 * tdata, u32 * index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700146{
147 u8 *cp = tdata + *index;
Dave Barachc3799992016-08-15 11:12:27 -0400148 u8 *limit = tdata + vec_len (tdata);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700149 int colons;
150 static sched_event_t event;
151 sched_event_t *e = &event;
152 static u8 *task_name;
153 u32 secs, usecs;
154 int i;
155
Dave Barachc3799992016-08-15 11:12:27 -0400156again:
Ed Warnickecb9cada2015-12-08 15:45:58 -0700157 /* eat leading w/s */
158 while (cp < limit && (*cp == ' ' && *cp == '\t'))
159 cp++;
160 if (cp == limit)
161 return 0;
Dave Barachc3799992016-08-15 11:12:27 -0400162
Ed Warnickecb9cada2015-12-08 15:45:58 -0700163 /* header line */
164 if (*cp == '#')
165 {
166 while (cp < limit && (*cp != '\n'))
Dave Barachc3799992016-08-15 11:12:27 -0400167 cp++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700168 if (*cp == '\n')
Dave Barachc3799992016-08-15 11:12:27 -0400169 {
170 cp++;
171 goto again;
172 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700173 clib_warning ("bugger 0");
174 return 0;
175 }
176
177 while (cp < limit && *cp != ']')
178 cp++;
179
180 if (*cp == 0)
181 return 0;
182
183 if (*cp != ']')
184 {
185 clib_warning ("bugger 0.1");
186 return 0;
187 }
Dave Barachc3799992016-08-15 11:12:27 -0400188
Ed Warnickecb9cada2015-12-08 15:45:58 -0700189 cp++;
190 while (cp < limit && (*cp == ' ' && *cp == '\t'))
191 cp++;
192 if (cp == limit)
193 {
194 clib_warning ("bugger 0.2");
195 return 0;
196 }
Dave Barachc3799992016-08-15 11:12:27 -0400197
198 secs = atoi (cp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700199
200 while (cp < limit && (*cp != '.'))
201 cp++;
202
203 if (cp == limit)
204 {
205 clib_warning ("bugger 0.3");
206 return 0;
207 }
Dave Barachc3799992016-08-15 11:12:27 -0400208
Ed Warnickecb9cada2015-12-08 15:45:58 -0700209 cp++;
210
211 usecs = atoi (cp);
212
Dave Barachc3799992016-08-15 11:12:27 -0400213 e->timestamp = ((f64) secs) + ((f64) usecs) * 1e-6;
214
Ed Warnickecb9cada2015-12-08 15:45:58 -0700215 /* eat up to third colon */
216 for (i = 0; i < 3; i++)
217 {
218 while (cp < limit && *cp != ':')
Dave Barachc3799992016-08-15 11:12:27 -0400219 cp++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700220 cp++;
221 }
222 --cp;
223 if (*cp != ':')
224 {
225 clib_warning ("bugger 1");
226 return 0;
227 }
228 /* aim at '>' (switch-to) / '+' (wakeup) */
229 cp += 5;
230 if (cp >= limit)
231 {
232 clib_warning ("bugger 2");
233 return 0;
234 }
235 if (*cp == '>')
236 e->type = RUNNING;
237 else if (*cp == '+')
238 e->type = WAKEUP;
239 else
240 {
241 clib_warning ("bugger 3");
242 return 0;
243 }
244
245 cp += 3;
Dave Barachc3799992016-08-15 11:12:27 -0400246 if (cp >= limit)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700247 {
248 clib_warning ("bugger 4");
249 return 0;
250 }
Dave Barachc3799992016-08-15 11:12:27 -0400251
Ed Warnickecb9cada2015-12-08 15:45:58 -0700252 e->cpu = atoi (cp);
253 cp += 4;
Dave Barachc3799992016-08-15 11:12:27 -0400254
255 if (cp >= limit)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700256 {
257 clib_warning ("bugger 4");
258 return 0;
259 }
260 while (cp < limit && (*cp == ' ' || *cp == '\t'))
261 cp++;
262
263 e->pid = atoi (cp);
Dave Barachc3799992016-08-15 11:12:27 -0400264
Ed Warnickecb9cada2015-12-08 15:45:58 -0700265 for (i = 0; i < 2; i++)
266 {
267 while (cp < limit && *cp != ':')
Dave Barachc3799992016-08-15 11:12:27 -0400268 cp++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700269 cp++;
270 }
271 --cp;
272 if (*cp != ':')
273 {
274 clib_warning ("bugger 5");
275 return 0;
276 }
277
278 cp += 3;
Dave Barachc3799992016-08-15 11:12:27 -0400279 if (cp >= limit)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700280 {
281 clib_warning ("bugger 6");
282 return 0;
283 }
284 while (cp < limit && (*cp != ' ' && *cp != '\n'))
285 {
Dave Barachc3799992016-08-15 11:12:27 -0400286 vec_add1 (task_name, *cp);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700287 cp++;
288 }
Dave Barachc3799992016-08-15 11:12:27 -0400289 vec_add1 (task_name, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700290 /* _vec_len() = 0 in caller */
291 e->task = task_name;
292
293 if (cp < limit)
Dave Barachc3799992016-08-15 11:12:27 -0400294 cp++;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700295
296 *index = cp - tdata;
297 return e;
298}
299
Dave Barachc3799992016-08-15 11:12:27 -0400300static u32
301elog_id_for_pid (elog_main_t * em, u8 * name, u32 pid)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700302{
Dave Barachc3799992016-08-15 11:12:27 -0400303 uword *p, r;
304 mhash_t *h = &em->string_table_hash;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700305
Dave Barachc3799992016-08-15 11:12:27 -0400306 if (!em->string_table_hash.hash)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700307 mhash_init (h, sizeof (uword), sizeof (pid));
Dave Barachc3799992016-08-15 11:12:27 -0400308
Ed Warnickecb9cada2015-12-08 15:45:58 -0700309 p = mhash_get (h, &pid);
310 if (p)
311 return p[0];
312 r = elog_string (em, "%s(%d)", name, pid);
313 mhash_set (h, &pid, r, /* old_value */ 0);
314 return r;
315}
316
Dave Barachc3799992016-08-15 11:12:27 -0400317void
318kelog_collect_sched_switch_trace (elog_main_t * em)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700319{
320 int enable_fd, data_fd;
321 char *trace_enable = "/debug/tracing/tracing_enabled";
322 char *trace_data = "/debug/tracing/trace";
323 u8 *data = 0;
324 u8 *dp;
325 int bytes, total_bytes;
326 u32 pos;
327 sched_event_t *evt;
328 u64 nsec_to_add;
329 u32 index;
330 f64 clocks_per_sec;
Dave Barachc3799992016-08-15 11:12:27 -0400331
Ed Warnickecb9cada2015-12-08 15:45:58 -0700332 enable_fd = open (trace_enable, O_RDWR);
333 if (enable_fd < 0)
334 {
335 clib_warning ("Couldn't open %s", trace_enable);
336 return;
337 }
338 /* disable kernel tracing */
Dave Barachc3799992016-08-15 11:12:27 -0400339 if (write (enable_fd, "0\n", 2) != 2)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700340 {
341 clib_unix_warning ("disable tracing");
Dave Barachc3799992016-08-15 11:12:27 -0400342 close (enable_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700343 return;
344 }
Dave Barachc3799992016-08-15 11:12:27 -0400345 close (enable_fd);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700346
347 /* Read the trace data */
348 data_fd = open (trace_data, O_RDWR);
349 if (data_fd < 0)
350 {
351 clib_warning ("Couldn't open %s", trace_data);
352 return;
353 }
354
Dave Barachc3799992016-08-15 11:12:27 -0400355 /*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700356 * Extract trace into a vector. Note that seq_printf() [kernel]
357 * is not guaranteed to produce 4096 bytes at a time.
358 */
359 vec_validate (data, 4095);
360 total_bytes = 0;
361 pos = 0;
362 while (1)
363 {
Dave Barachc3799992016-08-15 11:12:27 -0400364 bytes = read (data_fd, data + pos, 4096);
365 if (bytes <= 0)
366 break;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700367
368 total_bytes += bytes;
Dave Barachc3799992016-08-15 11:12:27 -0400369 _vec_len (data) = total_bytes;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700370
Dave Barachc3799992016-08-15 11:12:27 -0400371 pos = vec_len (data);
372 vec_validate (data, vec_len (data) + 4095);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700373 }
Dave Barachc3799992016-08-15 11:12:27 -0400374 vec_add1 (data, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700375
376 /* Synthesize events */
377 em->is_enabled = 1;
378
379 index = 0;
380 while ((evt = parse_sched_switch_trace (data, &index)))
381 {
382 u64 fake_cpu_clock;
383
384 fake_cpu_clock = evt->timestamp * em->cpu_timer.clocks_per_second;
385 {
Dave Barachc3799992016-08-15 11:12:27 -0400386 ELOG_TYPE_DECLARE (e) =
387 {
388 .format = "%d: %s %s",.format_args = "i4T4t4",.n_enum_strings =
389 2,.enum_strings =
390 {
391 "running", "wakeup",}
392 ,};
393 struct
394 {
395 u32 cpu, string_table_offset, which;
396 } *ed;
397
398 ed = elog_event_data_not_inline (em, &__ELOG_TYPE_VAR (e),
399 &em->default_track, fake_cpu_clock);
400 ed->cpu = evt->cpu;
401 ed->string_table_offset = elog_id_for_pid (em, evt->task, evt->pid);
402 ed->which = evt->type;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700403 }
Dave Barachc3799992016-08-15 11:12:27 -0400404 _vec_len (evt->task) = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700405 }
406 em->is_enabled = 0;
407}
Dave Barachc3799992016-08-15 11:12:27 -0400408
409/*
410 * fd.io coding-style-patch-verification: ON
411 *
412 * Local Variables:
413 * eval: (c-set-style "gnu")
414 * End:
415 */