File-copy from v4.4.100
This is the result of 'cp' from a linux-stable tree with the 'v4.4.100'
tag checked out (commit 26d6298789e695c9f627ce49a7bbd2286405798a) on
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
Please refer to that tree for all history prior to this point.
Change-Id: I8a9ee2aea93cd29c52c847d0ce33091a73ae6afe
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
new file mode 100644
index 0000000..edd638b
--- /dev/null
+++ b/samples/bpf/Makefile
@@ -0,0 +1,79 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-y := test_verifier test_maps
+hostprogs-y += sock_example
+hostprogs-y += fds_example
+hostprogs-y += sockex1
+hostprogs-y += sockex2
+hostprogs-y += sockex3
+hostprogs-y += tracex1
+hostprogs-y += tracex2
+hostprogs-y += tracex3
+hostprogs-y += tracex4
+hostprogs-y += tracex5
+hostprogs-y += tracex6
+hostprogs-y += trace_output
+hostprogs-y += lathist
+
+test_verifier-objs := test_verifier.o libbpf.o
+test_maps-objs := test_maps.o libbpf.o
+sock_example-objs := sock_example.o libbpf.o
+fds_example-objs := bpf_load.o libbpf.o fds_example.o
+sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
+sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
+sockex3-objs := bpf_load.o libbpf.o sockex3_user.o
+tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
+tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
+tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
+tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
+tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
+tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
+trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
+lathist-objs := bpf_load.o libbpf.o lathist_user.o
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+always += sockex1_kern.o
+always += sockex2_kern.o
+always += sockex3_kern.o
+always += tracex1_kern.o
+always += tracex2_kern.o
+always += tracex3_kern.o
+always += tracex4_kern.o
+always += tracex5_kern.o
+always += tracex6_kern.o
+always += trace_output_kern.o
+always += tcbpf1_kern.o
+always += lathist_kern.o
+
+HOSTCFLAGS += -I$(objtree)/usr/include
+
+HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+HOSTLOADLIBES_fds_example += -lelf
+HOSTLOADLIBES_sockex1 += -lelf
+HOSTLOADLIBES_sockex2 += -lelf
+HOSTLOADLIBES_sockex3 += -lelf
+HOSTLOADLIBES_tracex1 += -lelf
+HOSTLOADLIBES_tracex2 += -lelf
+HOSTLOADLIBES_tracex3 += -lelf
+HOSTLOADLIBES_tracex4 += -lelf -lrt
+HOSTLOADLIBES_tracex5 += -lelf
+HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_trace_output += -lelf -lrt
+HOSTLOADLIBES_lathist += -lelf
+
+# point this to your LLVM backend with bpf support
+LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
+
+# asm/sysreg.h inline assmbly used by it is incompatible with llvm.
+# But, ehere is not easy way to fix it, so just exclude it since it is
+# useless for BPF samples.
+$(obj)/%.o: $(src)/%.c
+ clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
new file mode 100644
index 0000000..7ad19e1
--- /dev/null
+++ b/samples/bpf/bpf_helpers.h
@@ -0,0 +1,108 @@
+#ifndef __BPF_HELPERS_H
+#define __BPF_HELPERS_H
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+ (void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+ (void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+ (void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
+static int (*bpf_perf_event_read)(void *map, int index) =
+ (void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+ (void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+ (void *) BPF_FUNC_redirect;
+static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+ (void *) BPF_FUNC_perf_event_output;
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+};
+
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+ (void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l4_csum_replace;
+
+#if defined(__x86_64__)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+
+#elif defined(__s390x__)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+
+#elif defined(__aarch64__)
+
+#define PT_REGS_PARM1(x) ((x)->regs[0])
+#define PT_REGS_PARM2(x) ((x)->regs[1])
+#define PT_REGS_PARM3(x) ((x)->regs[2])
+#define PT_REGS_PARM4(x) ((x)->regs[3])
+#define PT_REGS_PARM5(x) ((x)->regs[4])
+#define PT_REGS_RET(x) ((x)->regs[30])
+#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[0])
+#define PT_REGS_SP(x) ((x)->sp)
+
+#endif
+#endif
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
new file mode 100644
index 0000000..da86a8e
--- /dev/null
+++ b/samples/bpf/bpf_load.c
@@ -0,0 +1,345 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <poll.h>
+#include <ctype.h>
+#include "libbpf.h"
+#include "bpf_helpers.h"
+#include "bpf_load.h"
+
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
+static char license[128];
+static int kern_version;
+static bool processed_sec[128];
+int map_fd[MAX_MAPS];
+int prog_fd[MAX_PROGS];
+int event_fd[MAX_PROGS];
+int prog_cnt;
+int prog_array_fd = -1;
+
+static int populate_prog_array(const char *event, int prog_fd)
+{
+ int ind = atoi(event), err;
+
+ err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
+ if (err < 0) {
+ printf("failed to store prog_fd in prog_array\n");
+ return -1;
+ }
+ return 0;
+}
+
+static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
+{
+ bool is_socket = strncmp(event, "socket", 6) == 0;
+ bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
+ bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
+ enum bpf_prog_type prog_type;
+ char buf[256];
+ int fd, efd, err, id;
+ struct perf_event_attr attr = {};
+
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ if (is_socket) {
+ prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ } else if (is_kprobe || is_kretprobe) {
+ prog_type = BPF_PROG_TYPE_KPROBE;
+ } else {
+ printf("Unknown event '%s'\n", event);
+ return -1;
+ }
+
+ fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
+ if (fd < 0) {
+ printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
+ return -1;
+ }
+
+ prog_fd[prog_cnt++] = fd;
+
+ if (is_socket) {
+ event += 6;
+ if (*event != '/')
+ return 0;
+ event++;
+ if (!isdigit(*event)) {
+ printf("invalid prog number\n");
+ return -1;
+ }
+ return populate_prog_array(event, fd);
+ }
+
+ if (is_kprobe || is_kretprobe) {
+ if (is_kprobe)
+ event += 7;
+ else
+ event += 10;
+
+ if (*event == 0) {
+ printf("event name cannot be empty\n");
+ return -1;
+ }
+
+ if (isdigit(*event))
+ return populate_prog_array(event, fd);
+
+ snprintf(buf, sizeof(buf),
+ "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+ is_kprobe ? 'p' : 'r', event, event);
+ err = system(buf);
+ if (err < 0) {
+ printf("failed to create kprobe '%s' error '%s'\n",
+ event, strerror(errno));
+ return -1;
+ }
+ }
+
+ strcpy(buf, DEBUGFS);
+ strcat(buf, "events/kprobes/");
+ strcat(buf, event);
+ strcat(buf, "/id");
+
+ efd = open(buf, O_RDONLY, 0);
+ if (efd < 0) {
+ printf("failed to open event %s\n", event);
+ return -1;
+ }
+
+ err = read(efd, buf, sizeof(buf));
+ if (err < 0 || err >= sizeof(buf)) {
+ printf("read from '%s' failed '%s'\n", event, strerror(errno));
+ return -1;
+ }
+
+ close(efd);
+
+ buf[err] = 0;
+ id = atoi(buf);
+ attr.config = id;
+
+ efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+ if (efd < 0) {
+ printf("event %d fd %d err %s\n", id, efd, strerror(errno));
+ return -1;
+ }
+ event_fd[prog_cnt - 1] = efd;
+ ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+ ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+
+ return 0;
+}
+
+static int load_maps(struct bpf_map_def *maps, int len)
+{
+ int i;
+
+ for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
+
+ map_fd[i] = bpf_create_map(maps[i].type,
+ maps[i].key_size,
+ maps[i].value_size,
+ maps[i].max_entries);
+ if (map_fd[i] < 0)
+ return 1;
+
+ if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ prog_array_fd = map_fd[i];
+ }
+ return 0;
+}
+
+static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
+ GElf_Shdr *shdr, Elf_Data **data)
+{
+ Elf_Scn *scn;
+
+ scn = elf_getscn(elf, i);
+ if (!scn)
+ return 1;
+
+ if (gelf_getshdr(scn, shdr) != shdr)
+ return 2;
+
+ *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
+ if (!*shname || !shdr->sh_size)
+ return 3;
+
+ *data = elf_getdata(scn, 0);
+ if (!*data || elf_getdata(scn, *data) != NULL)
+ return 4;
+
+ return 0;
+}
+
+static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
+ GElf_Shdr *shdr, struct bpf_insn *insn)
+{
+ int i, nrels;
+
+ nrels = shdr->sh_size / shdr->sh_entsize;
+
+ for (i = 0; i < nrels; i++) {
+ GElf_Sym sym;
+ GElf_Rel rel;
+ unsigned int insn_idx;
+
+ gelf_getrel(data, i, &rel);
+
+ insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+
+ gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
+
+ if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ printf("invalid relo for insn[%d].code 0x%x\n",
+ insn_idx, insn[insn_idx].code);
+ return 1;
+ }
+ insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)];
+ }
+
+ return 0;
+}
+
+int load_bpf_file(char *path)
+{
+ int fd, i;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr, shdr_prog;
+ Elf_Data *data, *data_prog, *symbols = NULL;
+ char *shname, *shname_prog;
+
+ if (elf_version(EV_CURRENT) == EV_NONE)
+ return 1;
+
+ fd = open(path, O_RDONLY, 0);
+ if (fd < 0)
+ return 1;
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+
+ if (!elf)
+ return 1;
+
+ if (gelf_getehdr(elf, &ehdr) != &ehdr)
+ return 1;
+
+ /* clear all kprobes */
+ i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+
+ /* scan over all elf sections to get license and map info */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+
+ if (0) /* helpful for llvm debugging */
+ printf("section %d:%s data %p size %zd link %d flags %d\n",
+ i, shname, data->d_buf, data->d_size,
+ shdr.sh_link, (int) shdr.sh_flags);
+
+ if (strcmp(shname, "license") == 0) {
+ processed_sec[i] = true;
+ memcpy(license, data->d_buf, data->d_size);
+ } else if (strcmp(shname, "version") == 0) {
+ processed_sec[i] = true;
+ if (data->d_size != sizeof(int)) {
+ printf("invalid size of version section %zd\n",
+ data->d_size);
+ return 1;
+ }
+ memcpy(&kern_version, data->d_buf, sizeof(int));
+ } else if (strcmp(shname, "maps") == 0) {
+ processed_sec[i] = true;
+ if (load_maps(data->d_buf, data->d_size))
+ return 1;
+ } else if (shdr.sh_type == SHT_SYMTAB) {
+ symbols = data;
+ }
+ }
+
+ /* load programs that need map fixup (relocations) */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+ if (shdr.sh_type == SHT_REL) {
+ struct bpf_insn *insns;
+
+ if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
+ &shdr_prog, &data_prog))
+ continue;
+
+ insns = (struct bpf_insn *) data_prog->d_buf;
+
+ processed_sec[shdr.sh_info] = true;
+ processed_sec[i] = true;
+
+ if (parse_relo_and_apply(data, symbols, &shdr, insns))
+ continue;
+
+ if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
+ memcmp(shname_prog, "kretprobe/", 10) == 0 ||
+ memcmp(shname_prog, "socket", 6) == 0)
+ load_and_attach(shname_prog, insns, data_prog->d_size);
+ }
+ }
+
+ /* load programs that don't use maps */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (processed_sec[i])
+ continue;
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+
+ if (memcmp(shname, "kprobe/", 7) == 0 ||
+ memcmp(shname, "kretprobe/", 10) == 0 ||
+ memcmp(shname, "socket", 6) == 0)
+ load_and_attach(shname, data->d_buf, data->d_size);
+ }
+
+ close(fd);
+ return 0;
+}
+
+void read_trace_pipe(void)
+{
+ int trace_fd;
+
+ trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ if (trace_fd < 0)
+ return;
+
+ while (1) {
+ static char buf[4096];
+ ssize_t sz;
+
+ sz = read(trace_fd, buf, sizeof(buf));
+ if (sz > 0) {
+ buf[sz] = 0;
+ puts(buf);
+ }
+ }
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
new file mode 100644
index 0000000..cbd7c2b
--- /dev/null
+++ b/samples/bpf/bpf_load.h
@@ -0,0 +1,27 @@
+#ifndef __BPF_LOAD_H
+#define __BPF_LOAD_H
+
+#define MAX_MAPS 32
+#define MAX_PROGS 32
+
+extern int map_fd[MAX_MAPS];
+extern int prog_fd[MAX_PROGS];
+extern int event_fd[MAX_PROGS];
+
+/* parses elf file compiled by llvm .c->.o
+ * . parses 'maps' section and creates maps via BPF syscall
+ * . parses 'license' section and passes it to syscall
+ * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
+ * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
+ * . loads eBPF programs via BPF syscall
+ *
+ * One ELF file can contain multiple BPF programs which will be loaded
+ * and their FDs stored stored in prog_fd array
+ *
+ * returns zero on success
+ */
+int load_bpf_file(char *path);
+
+void read_trace_pipe(void);
+
+#endif
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
new file mode 100644
index 0000000..e2fd16c
--- /dev/null
+++ b/samples/bpf/fds_example.c
@@ -0,0 +1,183 @@
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "bpf_load.h"
+#include "libbpf.h"
+
+#define BPF_F_PIN (1 << 0)
+#define BPF_F_GET (1 << 1)
+#define BPF_F_PIN_GET (BPF_F_PIN | BPF_F_GET)
+
+#define BPF_F_KEY (1 << 2)
+#define BPF_F_VAL (1 << 3)
+#define BPF_F_KEY_VAL (BPF_F_KEY | BPF_F_VAL)
+
+#define BPF_M_UNSPEC 0
+#define BPF_M_MAP 1
+#define BPF_M_PROG 2
+
+static void usage(void)
+{
+ printf("Usage: fds_example [...]\n");
+ printf(" -F <file> File to pin/get object\n");
+ printf(" -P |- pin object\n");
+ printf(" -G `- get object\n");
+ printf(" -m eBPF map mode\n");
+ printf(" -k <key> |- map key\n");
+ printf(" -v <value> `- map value\n");
+ printf(" -p eBPF prog mode\n");
+ printf(" -o <object> `- object file\n");
+ printf(" -h Display this help.\n");
+}
+
+static int bpf_map_create(void)
+{
+ return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
+ sizeof(uint32_t), 1024);
+}
+
+static int bpf_prog_create(const char *object)
+{
+ static const struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ if (object) {
+ assert(!load_bpf_file((char *)object));
+ return prog_fd[0];
+ } else {
+ return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
+ insns, sizeof(insns), "GPL", 0);
+ }
+}
+
+static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
+ uint32_t value)
+{
+ int fd, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_map_create();
+ printf("bpf: map fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) {
+ ret = bpf_update_elem(fd, &key, &value, 0);
+ printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ } else if (flags & BPF_F_KEY) {
+ ret = bpf_lookup_elem(fd, &key, &value);
+ printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ }
+
+ return 0;
+}
+
+static int bpf_do_prog(const char *file, uint32_t flags, const char *object)
+{
+ int fd, sock, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_prog_create(object);
+ printf("bpf: prog fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ sock = open_raw_sock("lo");
+ assert(sock > 0);
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &fd, sizeof(fd));
+ printf("bpf: sock:%d <- fd:%d attached ret:(%d,%s)\n", sock, fd,
+ ret, strerror(errno));
+ assert(ret == 0);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *file = NULL, *object = NULL;
+ uint32_t key = 0, value = 0, flags = 0;
+ int opt, mode = BPF_M_UNSPEC;
+
+ while ((opt = getopt(argc, argv, "F:PGmk:v:po:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'F':
+ file = optarg;
+ break;
+ case 'P':
+ flags |= BPF_F_PIN;
+ break;
+ case 'G':
+ flags |= BPF_F_GET;
+ break;
+ /* Map-related args */
+ case 'm':
+ mode = BPF_M_MAP;
+ break;
+ case 'k':
+ key = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_KEY;
+ break;
+ case 'v':
+ value = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_VAL;
+ break;
+ /* Prog-related args */
+ case 'p':
+ mode = BPF_M_PROG;
+ break;
+ case 'o':
+ object = optarg;
+ break;
+ default:
+ goto out;
+ }
+ }
+
+ if (!(flags & BPF_F_PIN_GET) || !file)
+ goto out;
+
+ switch (mode) {
+ case BPF_M_MAP:
+ return bpf_do_map(file, flags, key, value);
+ case BPF_M_PROG:
+ return bpf_do_prog(file, flags, object);
+ }
+out:
+ usage();
+ return -1;
+}
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
new file mode 100644
index 0000000..18fa088
--- /dev/null
+++ b/samples/bpf/lathist_kern.c
@@ -0,0 +1,99 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2015 BMW Car IT GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define MAX_ENTRIES 20
+#define MAX_CPU 4
+
+/* We need to stick to static allocated memory (an array instead of
+ * hash table) because managing dynamic memory from the
+ * trace_preempt_[on|off] tracepoints hooks is not supported.
+ */
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u64),
+ .max_entries = MAX_CPU,
+};
+
+SEC("kprobe/trace_preempt_off")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ int cpu = bpf_get_smp_processor_id();
+ u64 *ts = bpf_map_lookup_elem(&my_map, &cpu);
+
+ if (ts)
+ *ts = bpf_ktime_get_ns();
+
+ return 0;
+}
+
+static unsigned int log2(unsigned int v)
+{
+ unsigned int r;
+ unsigned int shift;
+
+ r = (v > 0xFFFF) << 4; v >>= r;
+ shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+ shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+ shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+ r |= (v >> 1);
+
+ return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+ unsigned int hi = v >> 32;
+
+ if (hi)
+ return log2(hi) + 32;
+ else
+ return log2(v);
+}
+
+struct bpf_map_def SEC("maps") my_lat = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(long),
+ .max_entries = MAX_CPU * MAX_ENTRIES,
+};
+
+SEC("kprobe/trace_preempt_on")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ u64 *ts, cur_ts, delta;
+ int key, cpu;
+ long *val;
+
+ cpu = bpf_get_smp_processor_id();
+ ts = bpf_map_lookup_elem(&my_map, &cpu);
+ if (!ts)
+ return 0;
+
+ cur_ts = bpf_ktime_get_ns();
+ delta = log2l(cur_ts - *ts);
+
+ if (delta > MAX_ENTRIES - 1)
+ delta = MAX_ENTRIES - 1;
+
+ key = cpu * MAX_ENTRIES + delta;
+ val = bpf_map_lookup_elem(&my_lat, &key);
+ if (val)
+ __sync_fetch_and_add((long *)val, 1);
+
+ return 0;
+
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
new file mode 100644
index 0000000..65da8c1
--- /dev/null
+++ b/samples/bpf/lathist_user.c
@@ -0,0 +1,103 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2015 BMW Car IT GmbH
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_ENTRIES 20
+#define MAX_CPU 4
+#define MAX_STARS 40
+
+struct cpu_hist {
+ long data[MAX_ENTRIES];
+ long max;
+};
+
+static struct cpu_hist cpu_hist[MAX_CPU];
+
+static void stars(char *str, long val, long max, int width)
+{
+ int i;
+
+ for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+ str[i] = '*';
+ if (val > max)
+ str[i - 1] = '+';
+ str[i] = '\0';
+}
+
+static void print_hist(void)
+{
+ char starstr[MAX_STARS];
+ struct cpu_hist *hist;
+ int i, j;
+
+ /* clear screen */
+ printf("\033[2J");
+
+ for (j = 0; j < MAX_CPU; j++) {
+ hist = &cpu_hist[j];
+
+ /* ignore CPUs without data (maybe offline?) */
+ if (hist->max == 0)
+ continue;
+
+ printf("CPU %d\n", j);
+ printf(" latency : count distribution\n");
+ for (i = 1; i <= MAX_ENTRIES; i++) {
+ stars(starstr, hist->data[i - 1], hist->max, MAX_STARS);
+ printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+ (1l << i) >> 1, (1l << i) - 1,
+ hist->data[i - 1], MAX_STARS, starstr);
+ }
+ }
+}
+
+static void get_data(int fd)
+{
+ long key, value;
+ int c, i;
+
+ for (i = 0; i < MAX_CPU; i++)
+ cpu_hist[i].max = 0;
+
+ for (c = 0; c < MAX_CPU; c++) {
+ for (i = 0; i < MAX_ENTRIES; i++) {
+ key = c * MAX_ENTRIES + i;
+ bpf_lookup_elem(fd, &key, &value);
+
+ cpu_hist[c].data[i] = value;
+ if (value > cpu_hist[c].max)
+ cpu_hist[c].max = value;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ while (1) {
+ get_data(map_fd[1]);
+ print_hist();
+ sleep(5);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
new file mode 100644
index 0000000..65a8d48
--- /dev/null
+++ b/samples/bpf/libbpf.c
@@ -0,0 +1,154 @@
+/* eBPF mini library */
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/unistd.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/netlink.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
+#include "libbpf.h"
+
+static __u64 ptr_to_u64(void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
+ int max_entries)
+{
+ union bpf_attr attr = {
+ .map_type = map_type,
+ .key_size = key_size,
+ .value_size = value_size,
+ .max_entries = max_entries
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = ptr_to_u64(key),
+ .value = ptr_to_u64(value),
+ .flags = flags,
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_lookup_elem(int fd, void *key, void *value)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = ptr_to_u64(key),
+ .value = ptr_to_u64(value),
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_delete_elem(int fd, void *key)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = ptr_to_u64(key),
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_get_next_key(int fd, void *key, void *next_key)
+{
+ union bpf_attr attr = {
+ .map_fd = fd,
+ .key = ptr_to_u64(key),
+ .next_key = ptr_to_u64(next_key),
+ };
+
+ return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
+
+char bpf_log_buf[LOG_BUF_SIZE];
+
+int bpf_prog_load(enum bpf_prog_type prog_type,
+ const struct bpf_insn *insns, int prog_len,
+ const char *license, int kern_version)
+{
+ union bpf_attr attr = {
+ .prog_type = prog_type,
+ .insns = ptr_to_u64((void *) insns),
+ .insn_cnt = prog_len / sizeof(struct bpf_insn),
+ .license = ptr_to_u64((void *) license),
+ .log_buf = ptr_to_u64(bpf_log_buf),
+ .log_size = LOG_BUF_SIZE,
+ .log_level = 1,
+ };
+
+ /* assign one field outside of struct init to make sure any
+ * padding is zero initialized
+ */
+ attr.kern_version = kern_version;
+
+ bpf_log_buf[0] = 0;
+
+ return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ .bpf_fd = fd,
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+int open_raw_sock(const char *name)
+{
+ struct sockaddr_ll sll;
+ int sock;
+
+ sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
+ if (sock < 0) {
+ printf("cannot create raw socket\n");
+ return -1;
+ }
+
+ memset(&sll, 0, sizeof(sll));
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = if_nametoindex(name);
+ sll.sll_protocol = htons(ETH_P_ALL);
+ if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
+ printf("bind to %s: %s\n", name, strerror(errno));
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
+}
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
new file mode 100644
index 0000000..014aacf
--- /dev/null
+++ b/samples/bpf/libbpf.h
@@ -0,0 +1,201 @@
+/* eBPF mini library */
+#ifndef __LIBBPF_H
+#define __LIBBPF_H
+
+struct bpf_insn;
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
+ int max_entries);
+int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
+int bpf_lookup_elem(int fd, void *key, void *value);
+int bpf_delete_elem(int fd, void *key);
+int bpf_get_next_key(int fd, void *key, void *next_key);
+
+int bpf_prog_load(enum bpf_prog_type prog_type,
+ const struct bpf_insn *insns, int insn_len,
+ const char *license, int kern_version);
+
+int bpf_obj_pin(int fd, const char *pathname);
+int bpf_obj_get(const char *pathname);
+
+#define LOG_BUF_SIZE 65536
+extern char bpf_log_buf[LOG_BUF_SIZE];
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD 1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+/* create RAW socket and bind to interface 'name' */
+int open_raw_sock(const char *name);
+
+struct perf_event_attr;
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+ int group_fd, unsigned long flags);
+#endif
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
new file mode 100644
index 0000000..a0ce251
--- /dev/null
+++ b/samples/bpf/sock_example.c
@@ -0,0 +1,101 @@
+/* eBPF example program:
+ * - creates arraymap in kernel with key 4 bytes and value 8 bytes
+ *
+ * - loads eBPF program:
+ * r0 = skb->data[ETH_HLEN + offsetof(struct iphdr, protocol)];
+ * *(u32*)(fp - 4) = r0;
+ * // assuming packet is IPv4, lookup ip->proto in a map
+ * value = bpf_map_lookup_elem(map_fd, fp - 4);
+ * if (value)
+ * (*(u64*)value) += 1;
+ *
+ * - attaches this program to eth0 raw socket
+ *
+ * - every second user space reads map[tcp], map[udp], map[icmp] to see
+ * how many packets of given protocol were seen on eth0
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <stddef.h>
+#include "libbpf.h"
+
+static int test_sock(void)
+{
+ int sock = -1, map_fd, prog_fd, i, key;
+ long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+ 256);
+ if (map_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ goto cleanup;
+ }
+
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(),
+ };
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog),
+ "GPL", 0);
+ if (prog_fd < 0) {
+ printf("failed to load prog '%s'\n", strerror(errno));
+ goto cleanup;
+ }
+
+ sock = open_raw_sock("lo");
+
+ if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
+ sizeof(prog_fd)) < 0) {
+ printf("setsockopt %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ for (i = 0; i < 10; i++) {
+ key = IPPROTO_TCP;
+ assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
+
+ key = IPPROTO_UDP;
+ assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0);
+
+ key = IPPROTO_ICMP;
+ assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0);
+
+ printf("TCP %lld UDP %lld ICMP %lld packets\n",
+ tcp_cnt, udp_cnt, icmp_cnt);
+ sleep(1);
+ }
+
+cleanup:
+ /* maps, programs, raw sockets will auto cleanup on process exit */
+ return 0;
+}
+
+int main(void)
+{
+ FILE *f;
+
+ f = popen("ping -c5 localhost", "r");
+ (void)f;
+
+ return test_sock();
+}
diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c
new file mode 100644
index 0000000..ed18e9a
--- /dev/null
+++ b/samples/bpf/sockex1_kern.c
@@ -0,0 +1,29 @@
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = 256,
+};
+
+SEC("socket1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+ long *value;
+
+ if (skb->pkt_type != PACKET_OUTGOING)
+ return 0;
+
+ value = bpf_map_lookup_elem(&my_map, &index);
+ if (value)
+ __sync_fetch_and_add(value, skb->len);
+
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
new file mode 100644
index 0000000..678ce46
--- /dev/null
+++ b/samples/bpf/sockex1_user.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
+ sizeof(prog_fd[0])) == 0);
+
+ f = popen("ping -c5 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ long long tcp_cnt, udp_cnt, icmp_cnt;
+ int key;
+
+ key = IPPROTO_TCP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
+
+ key = IPPROTO_UDP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
+
+ key = IPPROTO_ICMP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
+
+ printf("TCP %lld UDP %lld ICMP %lld bytes\n",
+ tcp_cnt, udp_cnt, icmp_cnt);
+ sleep(1);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
new file mode 100644
index 0000000..ba0e177
--- /dev/null
+++ b/samples/bpf/sockex2_kern.c
@@ -0,0 +1,221 @@
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/in.h>
+#include <uapi/linux/if.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/if_tunnel.h>
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u16 thoff;
+ __u8 ip_proto;
+};
+
+static inline int proto_ports_offset(__u64 proto)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ return 0;
+ case IPPROTO_AH:
+ return 4;
+ default:
+ return 0;
+ }
+}
+
+static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
+{
+ return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
+ & (IP_MF | IP_OFFSET);
+}
+
+static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
+{
+ __u64 w0 = load_word(ctx, off);
+ __u64 w1 = load_word(ctx, off + 4);
+ __u64 w2 = load_word(ctx, off + 8);
+ __u64 w3 = load_word(ctx, off + 12);
+
+ return (__u32)(w0 ^ w1 ^ w2 ^ w3);
+}
+
+static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
+ struct flow_keys *flow)
+{
+ __u64 verlen;
+
+ if (unlikely(ip_is_fragment(skb, nhoff)))
+ *ip_proto = 0;
+ else
+ *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
+
+ if (*ip_proto != IPPROTO_GRE) {
+ flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
+ flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
+ }
+
+ verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
+ if (likely(verlen == 0x45))
+ nhoff += 20;
+ else
+ nhoff += (verlen & 0xF) << 2;
+
+ return nhoff;
+}
+
+static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
+ struct flow_keys *flow)
+{
+ *ip_proto = load_byte(skb,
+ nhoff + offsetof(struct ipv6hdr, nexthdr));
+ flow->src = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, saddr));
+ flow->dst = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, daddr));
+ nhoff += sizeof(struct ipv6hdr);
+
+ return nhoff;
+}
+
+static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow)
+{
+ __u64 nhoff = ETH_HLEN;
+ __u64 ip_proto;
+ __u64 proto = load_half(skb, 12);
+ int poff;
+
+ if (proto == ETH_P_8021AD) {
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (proto == ETH_P_8021Q) {
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (likely(proto == ETH_P_IP))
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ else if (proto == ETH_P_IPV6)
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ else
+ return false;
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ };
+
+ __u64 gre_flags = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, flags));
+ __u64 gre_proto = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, proto));
+
+ if (gre_flags & (GRE_VERSION|GRE_ROUTING))
+ break;
+
+ proto = gre_proto;
+ nhoff += 4;
+ if (gre_flags & GRE_CSUM)
+ nhoff += 4;
+ if (gre_flags & GRE_KEY)
+ nhoff += 4;
+ if (gre_flags & GRE_SEQ)
+ nhoff += 4;
+
+ if (proto == ETH_P_8021Q) {
+ proto = load_half(skb,
+ nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (proto == ETH_P_IP)
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ else if (proto == ETH_P_IPV6)
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ else
+ return false;
+ break;
+ }
+ case IPPROTO_IPIP:
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ break;
+ case IPPROTO_IPV6:
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ break;
+ default:
+ break;
+ }
+
+ flow->ip_proto = ip_proto;
+ poff = proto_ports_offset(ip_proto);
+ if (poff >= 0) {
+ nhoff += poff;
+ flow->ports = load_word(skb, nhoff);
+ }
+
+ flow->thoff = (__u16) nhoff;
+
+ return true;
+}
+
+struct pair {
+ long packets;
+ long bytes;
+};
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__be32),
+ .value_size = sizeof(struct pair),
+ .max_entries = 1024,
+};
+
+SEC("socket2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+ struct flow_keys flow;
+ struct pair *value;
+ u32 key;
+
+ if (!flow_dissector(skb, &flow))
+ return 0;
+
+ key = flow.dst;
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value) {
+ __sync_fetch_and_add(&value->packets, 1);
+ __sync_fetch_and_add(&value->bytes, skb->len);
+ } else {
+ struct pair val = {1, skb->len};
+
+ bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
new file mode 100644
index 0000000..29a276d
--- /dev/null
+++ b/samples/bpf/sockex2_user.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
+ sizeof(prog_fd[0])) == 0);
+
+ f = popen("ping -c5 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ int key = 0, next_key;
+ struct pair value;
+
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &value);
+ printf("ip %s bytes %lld packets %lld\n",
+ inet_ntoa((struct in_addr){htonl(next_key)}),
+ value.bytes, value.packets);
+ key = next_key;
+ }
+ sleep(1);
+ }
+ return 0;
+}
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
new file mode 100644
index 0000000..41ae2fd
--- /dev/null
+++ b/samples/bpf/sockex3_kern.c
@@ -0,0 +1,290 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/in.h>
+#include <uapi/linux/if.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/if_tunnel.h>
+#include <uapi/linux/mpls.h>
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+
+#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") jmp_table = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 8,
+};
+
+#define PARSE_VLAN 1
+#define PARSE_MPLS 2
+#define PARSE_IP 3
+#define PARSE_IPV6 4
+
+/* protocol dispatch routine.
+ * It tail-calls next BPF program depending on eth proto
+ * Note, we could have used:
+ * bpf_tail_call(skb, &jmp_table, proto);
+ * but it would need large prog_array
+ */
+static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
+{
+ switch (proto) {
+ case ETH_P_8021Q:
+ case ETH_P_8021AD:
+ bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
+ break;
+ case ETH_P_MPLS_UC:
+ case ETH_P_MPLS_MC:
+ bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
+ break;
+ case ETH_P_IP:
+ bpf_tail_call(skb, &jmp_table, PARSE_IP);
+ break;
+ case ETH_P_IPV6:
+ bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
+ break;
+ }
+}
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
+{
+ return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
+ & (IP_MF | IP_OFFSET);
+}
+
+static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
+{
+ __u64 w0 = load_word(ctx, off);
+ __u64 w1 = load_word(ctx, off + 4);
+ __u64 w2 = load_word(ctx, off + 8);
+ __u64 w3 = load_word(ctx, off + 12);
+
+ return (__u32)(w0 ^ w1 ^ w2 ^ w3);
+}
+
+struct globals {
+ struct flow_keys flow;
+};
+
+struct bpf_map_def SEC("maps") percpu_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct globals),
+ .max_entries = 32,
+};
+
+/* user poor man's per_cpu until native support is ready */
+static struct globals *this_cpu_globals(void)
+{
+ u32 key = bpf_get_smp_processor_id();
+
+ return bpf_map_lookup_elem(&percpu_map, &key);
+}
+
+/* some simple stats for user space consumption */
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct flow_keys),
+ .value_size = sizeof(struct pair),
+ .max_entries = 1024,
+};
+
+static void update_stats(struct __sk_buff *skb, struct globals *g)
+{
+ struct flow_keys key = g->flow;
+ struct pair *value;
+
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value) {
+ __sync_fetch_and_add(&value->packets, 1);
+ __sync_fetch_and_add(&value->bytes, skb->len);
+ } else {
+ struct pair val = {1, skb->len};
+
+ bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
+ }
+}
+
+static __always_inline void parse_ip_proto(struct __sk_buff *skb,
+ struct globals *g, __u32 ip_proto)
+{
+ __u32 nhoff = skb->cb[0];
+ int poff;
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ };
+
+ __u32 gre_flags = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, flags));
+ __u32 gre_proto = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, proto));
+
+ if (gre_flags & (GRE_VERSION|GRE_ROUTING))
+ break;
+
+ nhoff += 4;
+ if (gre_flags & GRE_CSUM)
+ nhoff += 4;
+ if (gre_flags & GRE_KEY)
+ nhoff += 4;
+ if (gre_flags & GRE_SEQ)
+ nhoff += 4;
+
+ skb->cb[0] = nhoff;
+ parse_eth_proto(skb, gre_proto);
+ break;
+ }
+ case IPPROTO_IPIP:
+ parse_eth_proto(skb, ETH_P_IP);
+ break;
+ case IPPROTO_IPV6:
+ parse_eth_proto(skb, ETH_P_IPV6);
+ break;
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ g->flow.ports = load_word(skb, nhoff);
+ case IPPROTO_ICMP:
+ g->flow.ip_proto = ip_proto;
+ update_stats(skb, g);
+ break;
+ default:
+ break;
+ }
+}
+
+PROG(PARSE_IP)(struct __sk_buff *skb)
+{
+ struct globals *g = this_cpu_globals();
+ __u32 nhoff, verlen, ip_proto;
+
+ if (!g)
+ return 0;
+
+ nhoff = skb->cb[0];
+
+ if (unlikely(ip_is_fragment(skb, nhoff)))
+ return 0;
+
+ ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
+
+ if (ip_proto != IPPROTO_GRE) {
+ g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
+ g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
+ }
+
+ verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
+ nhoff += (verlen & 0xF) << 2;
+
+ skb->cb[0] = nhoff;
+ parse_ip_proto(skb, g, ip_proto);
+ return 0;
+}
+
+PROG(PARSE_IPV6)(struct __sk_buff *skb)
+{
+ struct globals *g = this_cpu_globals();
+ __u32 nhoff, ip_proto;
+
+ if (!g)
+ return 0;
+
+ nhoff = skb->cb[0];
+
+ ip_proto = load_byte(skb,
+ nhoff + offsetof(struct ipv6hdr, nexthdr));
+ g->flow.src = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, saddr));
+ g->flow.dst = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, daddr));
+ nhoff += sizeof(struct ipv6hdr);
+
+ skb->cb[0] = nhoff;
+ parse_ip_proto(skb, g, ip_proto);
+ return 0;
+}
+
+PROG(PARSE_VLAN)(struct __sk_buff *skb)
+{
+ __u32 nhoff, proto;
+
+ nhoff = skb->cb[0];
+
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ skb->cb[0] = nhoff;
+
+ parse_eth_proto(skb, proto);
+
+ return 0;
+}
+
+PROG(PARSE_MPLS)(struct __sk_buff *skb)
+{
+ __u32 nhoff, label;
+
+ nhoff = skb->cb[0];
+
+ label = load_word(skb, nhoff);
+ nhoff += sizeof(struct mpls_label);
+ skb->cb[0] = nhoff;
+
+ if (label & MPLS_LS_S_MASK) {
+ __u8 verlen = load_byte(skb, nhoff);
+ if ((verlen & 0xF0) == 4)
+ parse_eth_proto(skb, ETH_P_IP);
+ else
+ parse_eth_proto(skb, ETH_P_IPV6);
+ } else {
+ parse_eth_proto(skb, ETH_P_MPLS_UC);
+ }
+
+ return 0;
+}
+
+SEC("socket/0")
+int main_prog(struct __sk_buff *skb)
+{
+ __u32 nhoff = ETH_HLEN;
+ __u32 proto = load_half(skb, 12);
+
+ skb->cb[0] = nhoff;
+ parse_eth_proto(skb, proto);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
new file mode 100644
index 0000000..2617772
--- /dev/null
+++ b/samples/bpf/sockex3_user.c
@@ -0,0 +1,66 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+struct pair {
+ __u64 packets;
+ __u64 bytes;
+};
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+ sizeof(__u32)) == 0);
+
+ if (argc > 1)
+ f = popen("ping -c5 localhost", "r");
+ else
+ f = popen("netperf -l 4 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ struct flow_keys key = {}, next_key;
+ struct pair value;
+
+ sleep(1);
+ printf("IP src.port -> dst.port bytes packets\n");
+ while (bpf_get_next_key(map_fd[2], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[2], &next_key, &value);
+ printf("%s.%05d -> %s.%05d %12lld %12lld\n",
+ inet_ntoa((struct in_addr){htonl(next_key.src)}),
+ next_key.port16[0],
+ inet_ntoa((struct in_addr){htonl(next_key.dst)}),
+ next_key.port16[1],
+ value.bytes, value.packets);
+ key = next_key;
+ }
+ }
+ return 0;
+}
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
new file mode 100644
index 0000000..fa051b3
--- /dev/null
+++ b/samples/bpf/tcbpf1_kern.c
@@ -0,0 +1,89 @@
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+#include <uapi/linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+/* compiler workaround */
+#define _htonl __builtin_bswap32
+
+static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
+{
+ bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
+}
+
+#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
+#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
+
+static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
+{
+ __u8 old_tos = load_byte(skb, TOS_OFF);
+
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
+ bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
+}
+
+#define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check))
+#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
+
+#define IS_PSEUDO 0x10
+
+static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
+{
+ __u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
+
+ bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
+ bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
+}
+
+#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
+static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
+{
+ __u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
+
+ bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
+ bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
+}
+
+SEC("classifier")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ __u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+ long *value;
+
+ if (proto == IPPROTO_TCP) {
+ set_ip_tos(skb, 8);
+ set_tcp_ip_src(skb, 0xA010101);
+ set_tcp_dest_port(skb, 5001);
+ }
+
+ return 0;
+}
+SEC("redirect_xmit")
+int _redirect_xmit(struct __sk_buff *skb)
+{
+ return bpf_redirect(skb->ifindex + 1, 0);
+}
+SEC("redirect_recv")
+int _redirect_recv(struct __sk_buff *skb)
+{
+ return bpf_redirect(skb->ifindex + 1, 1);
+}
+SEC("clone_redirect_xmit")
+int _clone_redirect_xmit(struct __sk_buff *skb)
+{
+ bpf_clone_redirect(skb, skb->ifindex + 1, 0);
+ return TC_ACT_SHOT;
+}
+SEC("clone_redirect_recv")
+int _clone_redirect_recv(struct __sk_buff *skb)
+{
+ bpf_clone_redirect(skb, skb->ifindex + 1, 1);
+ return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
new file mode 100644
index 0000000..6299ee9
--- /dev/null
+++ b/samples/bpf/test_maps.c
@@ -0,0 +1,291 @@
+/*
+ * Testsuite for eBPF maps
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include "libbpf.h"
+
+/* sanity tests for map API */
+static void test_hashmap_sanity(int i, void *data)
+{
+ long long key, next_key, value;
+ int map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2);
+ if (map_fd < 0) {
+ printf("failed to create hashmap '%s'\n", strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* insert key=1 element */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
+ value = 0;
+ /* BPF_NOEXIST means: add new element if it doesn't exist */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ /* key=1 already exists */
+ errno == EEXIST);
+
+ assert(bpf_update_elem(map_fd, &key, &value, -1) == -1 && errno == EINVAL);
+
+ /* check that key=1 can be found */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234);
+
+ key = 2;
+ /* check that key=2 is not found */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+
+ /* BPF_EXIST means: update existing element */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 &&
+ /* key=2 is not there */
+ errno == ENOENT);
+
+ /* insert key=2 element */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
+
+ /* key=1 and key=2 were inserted, check that key=0 cannot be inserted
+ * due to max_entries limit
+ */
+ key = 0;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* check that key = 0 doesn't exist */
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
+
+ /* iterate over two elements */
+ assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
+ (next_key == 1 || next_key == 2));
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
+ (next_key == 1 || next_key == 2));
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* delete both elements */
+ key = 1;
+ assert(bpf_delete_elem(map_fd, &key) == 0);
+ key = 2;
+ assert(bpf_delete_elem(map_fd, &key) == 0);
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
+
+ key = 0;
+ /* check that map is empty */
+ assert(bpf_get_next_key(map_fd, &key, &next_key) == -1 &&
+ errno == ENOENT);
+ close(map_fd);
+}
+
+static void test_arraymap_sanity(int i, void *data)
+{
+ int key, next_key, map_fd;
+ long long value;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2);
+ if (map_fd < 0) {
+ printf("failed to create arraymap '%s'\n", strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* insert key=1 element */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
+ value = 0;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* check that key=1 can be found */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 1234);
+
+ key = 0;
+ /* check that key=0 is also found and zero initialized */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0);
+
+
+ /* key=0 and key=1 were inserted, check that key=2 cannot be inserted
+ * due to max_entries limit
+ */
+ key = 2;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == -1 &&
+ errno == E2BIG);
+
+ /* check that key = 2 doesn't exist */
+ assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+
+ /* iterate over two elements */
+ assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 &&
+ next_key == 1);
+ assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* delete shouldn't succeed */
+ key = 1;
+ assert(bpf_delete_elem(map_fd, &key) == -1 && errno == EINVAL);
+
+ close(map_fd);
+}
+
+#define MAP_SIZE (32 * 1024)
+static void test_map_large(void)
+{
+ struct bigkey {
+ int a;
+ char b[116];
+ long long c;
+ } key;
+ int map_fd, i, value;
+
+ /* allocate 4Mbyte of memory */
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE);
+ if (map_fd < 0) {
+ printf("failed to create large map '%s'\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < MAP_SIZE; i++) {
+ key = (struct bigkey) {.c = i};
+ value = i;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
+ }
+ key.c = -1;
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* iterate through all elements */
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_get_next_key(map_fd, &key, &key) == 0);
+ assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
+
+ key.c = 0;
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 && value == 0);
+ key.a = 1;
+ assert(bpf_lookup_elem(map_fd, &key, &value) == -1 && errno == ENOENT);
+
+ close(map_fd);
+}
+
+/* fork N children and wait for them to complete */
+static void run_parallel(int tasks, void (*fn)(int i, void *data), void *data)
+{
+ pid_t pid[tasks];
+ int i;
+
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ fn(i, data);
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("couldn't spawn #%d process\n", i);
+ exit(1);
+ }
+ }
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+}
+
+static void test_map_stress(void)
+{
+ run_parallel(100, test_hashmap_sanity, NULL);
+ run_parallel(100, test_arraymap_sanity, NULL);
+}
+
+#define TASKS 1024
+#define DO_UPDATE 1
+#define DO_DELETE 0
+static void do_work(int fn, void *data)
+{
+ int map_fd = ((int *)data)[0];
+ int do_update = ((int *)data)[1];
+ int i;
+ int key, value;
+
+ for (i = fn; i < MAP_SIZE; i += TASKS) {
+ key = value = i;
+ if (do_update)
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
+ else
+ assert(bpf_delete_elem(map_fd, &key) == 0);
+ }
+}
+
+static void test_map_parallel(void)
+{
+ int i, map_fd, key = 0, value = 0;
+ int data[2];
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE);
+ if (map_fd < 0) {
+ printf("failed to create map for parallel test '%s'\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ data[0] = map_fd;
+ data[1] = DO_UPDATE;
+ /* use the same map_fd in children to add elements to this map
+ * child_0 adds key=0, key=1024, key=2048, ...
+ * child_1 adds key=1, key=1025, key=2049, ...
+ * child_1023 adds key=1023, ...
+ */
+ run_parallel(TASKS, do_work, data);
+
+ /* check that key=0 is already there */
+ assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* check that all elements were inserted */
+ key = -1;
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_get_next_key(map_fd, &key, &key) == 0);
+ assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
+
+ /* another check for all elements */
+ for (i = 0; i < MAP_SIZE; i++) {
+ key = MAP_SIZE - i - 1;
+ assert(bpf_lookup_elem(map_fd, &key, &value) == 0 &&
+ value == key);
+ }
+
+ /* now let's delete all elemenets in parallel */
+ data[1] = DO_DELETE;
+ run_parallel(TASKS, do_work, data);
+
+ /* nothing should be left */
+ key = -1;
+ assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
+}
+
+int main(void)
+{
+ test_hashmap_sanity(0, NULL);
+ test_arraymap_sanity(0, NULL);
+ test_map_large();
+ test_map_parallel();
+ test_map_stress();
+ printf("test_maps: OK\n");
+ return 0;
+}
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
new file mode 100644
index 0000000..563c507
--- /dev/null
+++ b/samples/bpf/test_verifier.c
@@ -0,0 +1,1311 @@
+/*
+ * Testsuite for eBPF verifier
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <linux/unistd.h>
+#include <string.h>
+#include <linux/filter.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/resource.h>
+#include "libbpf.h"
+
+#define MAX_INSNS 512
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+#define MAX_FIXUPS 8
+
+struct bpf_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+ int fixup[MAX_FIXUPS];
+ int prog_array_fixup[MAX_FIXUPS];
+ const char *errstr;
+ const char *errstr_unpriv;
+ enum {
+ UNDEF,
+ ACCEPT,
+ REJECT
+ } result, result_unpriv;
+ enum bpf_prog_type prog_type;
+};
+
+static struct bpf_test tests[] = {
+ {
+ "add+sub+mul",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_2, 3),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "unreachable",
+ .insns = {
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unreachable",
+ .result = REJECT,
+ },
+ {
+ "unreachable2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unreachable",
+ .result = REJECT,
+ },
+ {
+ "out of range jump",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "out of range jump2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, -2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "test1 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "test2 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "test3 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test4 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test5 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "no bpf_exit",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "loop (back-edge)",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "loop2 (back-edge)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "conditional loop",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "read uninitialized register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "read invalid register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R15 is invalid",
+ .result = REJECT,
+ },
+ {
+ "program doesn't init R0 before exit",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "program doesn't init R0 before exit in all branches",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "stack out of bounds",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack",
+ .result = REJECT,
+ },
+ {
+ "invalid call insn1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_CALL uses reserved",
+ .result = REJECT,
+ },
+ {
+ "invalid call insn2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_CALL uses reserved",
+ .result = REJECT,
+ },
+ {
+ "invalid function call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid func 1234567",
+ .result = REJECT,
+ },
+ {
+ "uninitialized stack1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {2},
+ .errstr = "invalid indirect read from stack",
+ .result = REJECT,
+ },
+ {
+ "uninitialized stack2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid read from stack",
+ .result = REJECT,
+ },
+ {
+ "check valid spill/fill",
+ .insns = {
+ /* spill R1(ctx) into stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+
+ /* fill it back into R2 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+
+ /* should be able to access R0 = *(R2 + 8) */
+ /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "check corrupted spill/fill",
+ .insns = {
+ /* spill R1(ctx) into stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+
+ /* mess up with R1 pointer on stack */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
+
+ /* fill back into R0 should fail */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .errstr = "corrupted spill",
+ .result = REJECT,
+ },
+ {
+ "invalid src register in STX",
+ .insns = {
+ BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R15 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in STX",
+ .insns = {
+ BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R14 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in ST",
+ .insns = {
+ BPF_ST_MEM(BPF_B, 14, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R14 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid src register in LDX",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R12 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in LDX",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R11 is invalid",
+ .result = REJECT,
+ },
+ {
+ "junk insn",
+ .insns = {
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM",
+ .result = REJECT,
+ },
+ {
+ "junk insn2",
+ .insns = {
+ BPF_RAW_INSN(1, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_LDX uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "junk insn3",
+ .insns = {
+ BPF_RAW_INSN(-1, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_ALU opcode f0",
+ .result = REJECT,
+ },
+ {
+ "junk insn4",
+ .insns = {
+ BPF_RAW_INSN(-1, -1, -1, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_ALU opcode f0",
+ .result = REJECT,
+ },
+ {
+ "junk insn5",
+ .insns = {
+ BPF_RAW_INSN(0x7f, -1, -1, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_ALU uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "misaligned read from stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned access",
+ .result = REJECT,
+ },
+ {
+ "invalid map_fd for function call",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "fd 0 is not pointing to valid bpf_map",
+ .result = REJECT,
+ },
+ {
+ "don't check return value before access",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "R0 invalid mem access 'map_value_or_null'",
+ .result = REJECT,
+ },
+ {
+ "access memory with incorrect alignment",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "misaligned access",
+ .result = REJECT,
+ },
+ {
+ "sometimes access memory with incorrect alignment",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R0 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "jump test 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 14),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 5),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 19),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 15),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 7),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {24},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 4",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields ok",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, queue_mapping)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, protocol)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_present)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_tci)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields bad1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {4},
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad3",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -12),
+ },
+ .fixup = {6},
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad4",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -13),
+ },
+ .fixup = {7},
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "check skb->mark is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check skb->tc_index is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check non-u32 access to cb",
+ .insns = {
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check out of range skb->cb access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 256),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ },
+ {
+ "write skb fields from socket prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ },
+ {
+ "write skb fields from tc_cls_act prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "PTR_TO_STACK store/load",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned access off -6 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on reg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned access off -2 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds low",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack off=-79992 size=8",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds high",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack off=0 size=8",
+ },
+ {
+ "unpriv: return pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr",
+ },
+ {
+ "unpriv: add const to pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: add pointer to pointer",
+ .insns = {
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: neg pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: cmp pointer with const",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer comparison",
+ },
+ {
+ "unpriv: cmp pointer with pointer",
+ .insns = {
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 pointer comparison",
+ },
+ {
+ "unpriv: check that printk is disallowed",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "unknown func 6",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to helper function",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr_unpriv = "R4 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: indirectly pass pointer on stack to helper function",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ },
+ {
+ "unpriv: mangle pointer on stack 1",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: mangle pointer on stack 2",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: read pointer from stack in small chunks",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid size",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into ctx",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into map elem value",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {3},
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: partial copy of pointer",
+ .insns = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 partial copy",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to tail_call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_array_fixup = {1},
+ .errstr_unpriv = "R3 leaks addr into helper",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp map pointer with zero",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup = {1},
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: write into frame pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "frame pointer is read only",
+ .result = REJECT,
+ },
+ {
+ "unpriv: cmp of frame pointer",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp of stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: obfuscate stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer arithmetic",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+};
+
+static int probe_filter_length(struct bpf_insn *fp)
+{
+ int len = 0;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+
+ return len + 1;
+}
+
+static int create_map(void)
+{
+ int map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+ sizeof(long long), sizeof(long long), 1024);
+ if (map_fd < 0)
+ printf("failed to create map '%s'\n", strerror(errno));
+
+ return map_fd;
+}
+
+static int create_prog_array(void)
+{
+ int map_fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
+ sizeof(int), sizeof(int), 4);
+ if (map_fd < 0)
+ printf("failed to create prog_array '%s'\n", strerror(errno));
+
+ return map_fd;
+}
+
+static int test(void)
+{
+ int prog_fd, i, pass_cnt = 0, err_cnt = 0;
+ bool unpriv = geteuid() != 0;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ struct bpf_insn *prog = tests[i].insns;
+ int prog_type = tests[i].prog_type;
+ int prog_len = probe_filter_length(prog);
+ int *fixup = tests[i].fixup;
+ int *prog_array_fixup = tests[i].prog_array_fixup;
+ int expected_result;
+ const char *expected_errstr;
+ int map_fd = -1, prog_array_fd = -1;
+
+ if (*fixup) {
+ map_fd = create_map();
+
+ do {
+ prog[*fixup].imm = map_fd;
+ fixup++;
+ } while (*fixup);
+ }
+ if (*prog_array_fixup) {
+ prog_array_fd = create_prog_array();
+
+ do {
+ prog[*prog_array_fixup].imm = prog_array_fd;
+ prog_array_fixup++;
+ } while (*prog_array_fixup);
+ }
+ printf("#%d %s ", i, tests[i].descr);
+
+ prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
+ prog, prog_len * sizeof(struct bpf_insn),
+ "GPL", 0);
+
+ if (unpriv && tests[i].result_unpriv != UNDEF)
+ expected_result = tests[i].result_unpriv;
+ else
+ expected_result = tests[i].result;
+
+ if (unpriv && tests[i].errstr_unpriv)
+ expected_errstr = tests[i].errstr_unpriv;
+ else
+ expected_errstr = tests[i].errstr;
+
+ if (expected_result == ACCEPT) {
+ if (prog_fd < 0) {
+ printf("FAIL\nfailed to load prog '%s'\n",
+ strerror(errno));
+ printf("%s", bpf_log_buf);
+ err_cnt++;
+ goto fail;
+ }
+ } else {
+ if (prog_fd >= 0) {
+ printf("FAIL\nunexpected success to load\n");
+ printf("%s", bpf_log_buf);
+ err_cnt++;
+ goto fail;
+ }
+ if (strstr(bpf_log_buf, expected_errstr) == 0) {
+ printf("FAIL\nunexpected error message: %s",
+ bpf_log_buf);
+ err_cnt++;
+ goto fail;
+ }
+ }
+
+ pass_cnt++;
+ printf("OK\n");
+fail:
+ if (map_fd >= 0)
+ close(map_fd);
+ if (prog_array_fd >= 0)
+ close(prog_array_fd);
+ close(prog_fd);
+
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt);
+
+ return 0;
+}
+
+int main(void)
+{
+ struct rlimit r = {1 << 20, 1 << 20};
+
+ setrlimit(RLIMIT_MEMLOCK, &r);
+ return test();
+}
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
new file mode 100644
index 0000000..9b96f4f
--- /dev/null
+++ b/samples/bpf/trace_output_kern.c
@@ -0,0 +1,30 @@
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = 2,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct S {
+ u64 pid;
+ u64 cookie;
+ } data;
+
+ data.pid = bpf_get_current_pid_tgid();
+ data.cookie = 0x12345678;
+
+ bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
new file mode 100644
index 0000000..661a7d0
--- /dev/null
+++ b/samples/bpf/trace_output_user.c
@@ -0,0 +1,196 @@
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+static int pmu_fd;
+
+int page_size;
+int page_cnt = 8;
+volatile struct perf_event_mmap_page *header;
+
+typedef void (*print_fn)(void *data, int size);
+
+static int perf_event_mmap(int fd)
+{
+ void *base;
+ int mmap_size;
+
+ page_size = getpagesize();
+ mmap_size = page_size * (page_cnt + 1);
+
+ base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ printf("mmap err\n");
+ return -1;
+ }
+
+ header = base;
+ return 0;
+}
+
+static int perf_event_poll(int fd)
+{
+ struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+ return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+ struct perf_event_header header;
+ __u32 size;
+ char data[];
+};
+
+void perf_event_read(print_fn fn)
+{
+ __u64 data_tail = header->data_tail;
+ __u64 data_head = header->data_head;
+ __u64 buffer_size = page_cnt * page_size;
+ void *base, *begin, *end;
+ char buf[256];
+
+ asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+ if (data_head == data_tail)
+ return;
+
+ base = ((char *)header) + page_size;
+
+ begin = base + data_tail % buffer_size;
+ end = base + data_head % buffer_size;
+
+ while (begin != end) {
+ struct perf_event_sample *e;
+
+ e = begin;
+ if (begin + e->header.size > base + buffer_size) {
+ long len = base + buffer_size - begin;
+
+ assert(len < e->header.size);
+ memcpy(buf, begin, len);
+ memcpy(buf + len, base, e->header.size - len);
+ e = (void *) buf;
+ begin = base + e->header.size - len;
+ } else if (begin + e->header.size == base + buffer_size) {
+ begin = base;
+ } else {
+ begin += e->header.size;
+ }
+
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ fn(e->data, e->size);
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ struct {
+ struct perf_event_header header;
+ __u64 id;
+ __u64 lost;
+ } *lost = (void *) e;
+ printf("lost %lld events\n", lost->lost);
+ } else {
+ printf("unknown event type=%d size=%d\n",
+ e->header.type, e->header.size);
+ }
+ }
+
+ __sync_synchronize(); /* smp_mb() */
+ header->data_tail = data_head;
+}
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static __u64 start_time;
+
+#define MAX_CNT 100000ll
+
+static void print_bpf_output(void *data, int size)
+{
+ static __u64 cnt;
+ struct {
+ __u64 pid;
+ __u64 cookie;
+ } *e = data;
+
+ if (e->cookie != 0x12345678) {
+ printf("BUG pid %llx cookie %llx sized %d\n",
+ e->pid, e->cookie, size);
+ kill(0, SIGINT);
+ }
+
+ cnt++;
+
+ if (cnt == MAX_CNT) {
+ printf("recv %lld events per sec\n",
+ MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+ kill(0, SIGINT);
+ }
+}
+
+static void test_bpf_perf_event(void)
+{
+ struct perf_event_attr attr = {
+ .sample_type = PERF_SAMPLE_RAW,
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ };
+ int key = 0;
+
+ pmu_fd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+
+ assert(pmu_fd >= 0);
+ assert(bpf_update_elem(map_fd[0], &key, &pmu_fd, BPF_ANY) == 0);
+ ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+ FILE *f;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ test_bpf_perf_event();
+
+ if (perf_event_mmap(pmu_fd) < 0)
+ return 1;
+
+ f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
+ (void) f;
+
+ start_time = time_get_ns();
+ for (;;) {
+ perf_event_poll(pmu_fd);
+ perf_event_read(print_bpf_output);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
new file mode 100644
index 0000000..3f450a8
--- /dev/null
+++ b/samples/bpf/tracex1_kern.c
@@ -0,0 +1,50 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ /* attaches to kprobe netif_receive_skb,
+ * looks for packets on loobpack device and prints them
+ */
+ char devname[IFNAMSIZ] = {};
+ struct net_device *dev;
+ struct sk_buff *skb;
+ int len;
+
+ /* non-portable! works for the given kernel only */
+ skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
+
+ dev = _(skb->dev);
+
+ len = _(skb->len);
+
+ bpf_probe_read(devname, sizeof(devname), dev->name);
+
+ if (devname[0] == 'l' && devname[1] == 'o') {
+ char fmt[] = "skb %p len %d\n";
+ /* using bpf_trace_printk() for DEBUG ONLY */
+ bpf_trace_printk(fmt, sizeof(fmt), skb, len);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
new file mode 100644
index 0000000..31a4818
--- /dev/null
+++ b/samples/bpf/tracex1_user.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ f = popen("taskset 1 ping -c5 localhost", "r");
+ (void) f;
+
+ read_trace_pipe();
+
+ return 0;
+}
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
new file mode 100644
index 0000000..b32367c
--- /dev/null
+++ b/samples/bpf/tracex2_kern.c
@@ -0,0 +1,100 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(long),
+ .max_entries = 1024,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/kfree_skb")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ long loc = 0;
+ long init_val = 1;
+ long *value;
+
+ /* x64/s390x specific: read ip of kfree_skb caller.
+ * non-portable version of __builtin_return_address(0)
+ */
+ bpf_probe_read(&loc, sizeof(loc), (void *)PT_REGS_RET(ctx));
+
+ value = bpf_map_lookup_elem(&my_map, &loc);
+ if (value)
+ *value += 1;
+ else
+ bpf_map_update_elem(&my_map, &loc, &init_val, BPF_ANY);
+ return 0;
+}
+
+static unsigned int log2(unsigned int v)
+{
+ unsigned int r;
+ unsigned int shift;
+
+ r = (v > 0xFFFF) << 4; v >>= r;
+ shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+ shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+ shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+ r |= (v >> 1);
+ return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+ unsigned int hi = v >> 32;
+ if (hi)
+ return log2(hi) + 32;
+ else
+ return log2(v);
+}
+
+struct hist_key {
+ char comm[16];
+ u64 pid_tgid;
+ u64 uid_gid;
+ u32 index;
+};
+
+struct bpf_map_def SEC("maps") my_hist_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct hist_key),
+ .value_size = sizeof(long),
+ .max_entries = 1024,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog3(struct pt_regs *ctx)
+{
+ long write_size = PT_REGS_PARM3(ctx);
+ long init_val = 1;
+ long *value;
+ struct hist_key key = {};
+
+ key.index = log2l(write_size);
+ key.pid_tgid = bpf_get_current_pid_tgid();
+ key.uid_gid = bpf_get_current_uid_gid();
+ bpf_get_current_comm(&key.comm, sizeof(key.comm));
+
+ value = bpf_map_lookup_elem(&my_hist_map, &key);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+ else
+ bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
new file mode 100644
index 0000000..cd0241c
--- /dev/null
+++ b/samples/bpf/tracex2_user.c
@@ -0,0 +1,144 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_INDEX 64
+#define MAX_STARS 38
+
+static void stars(char *str, long val, long max, int width)
+{
+ int i;
+
+ for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+ str[i] = '*';
+ if (val > max)
+ str[i - 1] = '+';
+ str[i] = '\0';
+}
+
+struct task {
+ char comm[16];
+ __u64 pid_tgid;
+ __u64 uid_gid;
+};
+
+struct hist_key {
+ struct task t;
+ __u32 index;
+};
+
+#define SIZE sizeof(struct task)
+
+static void print_hist_for_pid(int fd, void *task)
+{
+ struct hist_key key = {}, next_key;
+ char starstr[MAX_STARS];
+ long value;
+ long data[MAX_INDEX] = {};
+ int max_ind = -1;
+ long max_value = 0;
+ int i, ind;
+
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ if (memcmp(&next_key, task, SIZE)) {
+ key = next_key;
+ continue;
+ }
+ bpf_lookup_elem(fd, &next_key, &value);
+ ind = next_key.index;
+ data[ind] = value;
+ if (value && ind > max_ind)
+ max_ind = ind;
+ if (value > max_value)
+ max_value = value;
+ key = next_key;
+ }
+
+ printf(" syscall write() stats\n");
+ printf(" byte_size : count distribution\n");
+ for (i = 1; i <= max_ind + 1; i++) {
+ stars(starstr, data[i - 1], max_value, MAX_STARS);
+ printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+ (1l << i) >> 1, (1l << i) - 1, data[i - 1],
+ MAX_STARS, starstr);
+ }
+}
+
+static void print_hist(int fd)
+{
+ struct hist_key key = {}, next_key;
+ static struct task tasks[1024];
+ int task_cnt = 0;
+ int i;
+
+ while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+ int found = 0;
+
+ for (i = 0; i < task_cnt; i++)
+ if (memcmp(&tasks[i], &next_key, SIZE) == 0)
+ found = 1;
+ if (!found)
+ memcpy(&tasks[task_cnt++], &next_key, SIZE);
+ key = next_key;
+ }
+
+ for (i = 0; i < task_cnt; i++) {
+ printf("\npid %d cmd %s uid %d\n",
+ (__u32) tasks[i].pid_tgid,
+ tasks[i].comm,
+ (__u32) tasks[i].uid_gid);
+ print_hist_for_pid(fd, &tasks[i]);
+ }
+
+}
+
+static void int_exit(int sig)
+{
+ print_hist(map_fd[1]);
+ exit(0);
+}
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ long key, next_key, value;
+ FILE *f;
+ int i;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ signal(SIGINT, int_exit);
+
+ /* start 'ping' in the background to have some kfree_skb events */
+ f = popen("ping -c5 localhost", "r");
+ (void) f;
+
+ /* start 'dd' in the background to have plenty of 'write' syscalls */
+ f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
+ (void) f;
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ key = 0;
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &value);
+ printf("location 0x%lx count %ld\n", next_key, value);
+ key = next_key;
+ }
+ if (key)
+ printf("\n");
+ sleep(1);
+ }
+ print_hist(map_fd[1]);
+
+ return 0;
+}
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
new file mode 100644
index 0000000..bf337fb
--- /dev/null
+++ b/samples/bpf/tracex3_kern.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(u64),
+ .max_entries = 4096,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/blk_mq_start_request")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ long rq = PT_REGS_PARM1(ctx);
+ u64 val = bpf_ktime_get_ns();
+
+ bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
+ return 0;
+}
+
+static unsigned int log2l(unsigned long long n)
+{
+#define S(k) if (n >= (1ull << k)) { i += k; n >>= k; }
+ int i = -(n == 0);
+ S(32); S(16); S(8); S(4); S(2); S(1);
+ return i;
+#undef S
+}
+
+#define SLOTS 100
+
+struct bpf_map_def SEC("maps") lat_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = SLOTS,
+};
+
+SEC("kprobe/blk_update_request")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ long rq = PT_REGS_PARM1(ctx);
+ u64 *value, l, base;
+ u32 index;
+
+ value = bpf_map_lookup_elem(&my_map, &rq);
+ if (!value)
+ return 0;
+
+ u64 cur_time = bpf_ktime_get_ns();
+ u64 delta = cur_time - *value;
+
+ bpf_map_delete_elem(&my_map, &rq);
+
+ /* the lines below are computing index = log10(delta)*10
+ * using integer arithmetic
+ * index = 29 ~ 1 usec
+ * index = 59 ~ 1 msec
+ * index = 89 ~ 1 sec
+ * index = 99 ~ 10sec or more
+ * log10(x)*10 = log2(x)*10/log2(10) = log2(x)*3
+ */
+ l = log2l(delta);
+ base = 1ll << l;
+ index = (l * 64 + (delta - base) * 64 / base) * 3 / 64;
+
+ if (index >= SLOTS)
+ index = SLOTS - 1;
+
+ value = bpf_map_lookup_elem(&lat_map, &index);
+ if (value)
+ __sync_fetch_and_add((long *)value, 1);
+
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
new file mode 100644
index 0000000..0aaa933
--- /dev/null
+++ b/samples/bpf/tracex3_user.c
@@ -0,0 +1,150 @@
+/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+#define SLOTS 100
+
+static void clear_stats(int fd)
+{
+ __u32 key;
+ __u64 value = 0;
+
+ for (key = 0; key < SLOTS; key++)
+ bpf_update_elem(fd, &key, &value, BPF_ANY);
+}
+
+const char *color[] = {
+ "\033[48;5;255m",
+ "\033[48;5;252m",
+ "\033[48;5;250m",
+ "\033[48;5;248m",
+ "\033[48;5;246m",
+ "\033[48;5;244m",
+ "\033[48;5;242m",
+ "\033[48;5;240m",
+ "\033[48;5;238m",
+ "\033[48;5;236m",
+ "\033[48;5;234m",
+ "\033[48;5;232m",
+};
+const int num_colors = ARRAY_SIZE(color);
+
+const char nocolor[] = "\033[00m";
+
+const char *sym[] = {
+ " ",
+ " ",
+ ".",
+ ".",
+ "*",
+ "*",
+ "o",
+ "o",
+ "O",
+ "O",
+ "#",
+ "#",
+};
+
+bool full_range = false;
+bool text_only = false;
+
+static void print_banner(void)
+{
+ if (full_range)
+ printf("|1ns |10ns |100ns |1us |10us |100us"
+ " |1ms |10ms |100ms |1s |10s\n");
+ else
+ printf("|1us |10us |100us |1ms |10ms "
+ "|100ms |1s |10s\n");
+}
+
+static void print_hist(int fd)
+{
+ __u32 key;
+ __u64 value;
+ __u64 cnt[SLOTS];
+ __u64 max_cnt = 0;
+ __u64 total_events = 0;
+
+ for (key = 0; key < SLOTS; key++) {
+ value = 0;
+ bpf_lookup_elem(fd, &key, &value);
+ cnt[key] = value;
+ total_events += value;
+ if (value > max_cnt)
+ max_cnt = value;
+ }
+ clear_stats(fd);
+ for (key = full_range ? 0 : 29; key < SLOTS; key++) {
+ int c = num_colors * cnt[key] / (max_cnt + 1);
+
+ if (text_only)
+ printf("%s", sym[c]);
+ else
+ printf("%s %s", color[c], nocolor);
+ }
+ printf(" # %lld\n", total_events);
+}
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ int i;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ for (i = 1; i < ac; i++) {
+ if (strcmp(argv[i], "-a") == 0) {
+ full_range = true;
+ } else if (strcmp(argv[i], "-t") == 0) {
+ text_only = true;
+ } else if (strcmp(argv[i], "-h") == 0) {
+ printf("Usage:\n"
+ " -a display wider latency range\n"
+ " -t text only\n");
+ return 1;
+ }
+ }
+
+ printf(" heatmap of IO latency\n");
+ if (text_only)
+ printf(" %s", sym[num_colors - 1]);
+ else
+ printf(" %s %s", color[num_colors - 1], nocolor);
+ printf(" - many events with this latency\n");
+
+ if (text_only)
+ printf(" %s", sym[0]);
+ else
+ printf(" %s %s", color[0], nocolor);
+ printf(" - few events\n");
+
+ for (i = 0; ; i++) {
+ if (i % 20 == 0)
+ print_banner();
+ print_hist(map_fd[1]);
+ sleep(2);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c
new file mode 100644
index 0000000..ac46714
--- /dev/null
+++ b/samples/bpf/tracex4_kern.c
@@ -0,0 +1,54 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct pair {
+ u64 val;
+ u64 ip;
+};
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(struct pair),
+ .max_entries = 1000000,
+};
+
+/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
+ * example will no longer be meaningful
+ */
+SEC("kprobe/kmem_cache_free")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ long ptr = PT_REGS_PARM2(ctx);
+
+ bpf_map_delete_elem(&my_map, &ptr);
+ return 0;
+}
+
+SEC("kretprobe/kmem_cache_alloc_node")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ long ptr = PT_REGS_RC(ctx);
+ long ip = 0;
+
+ /* get ip address of kmem_cache_alloc_node() caller */
+ bpf_probe_read(&ip, sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip)));
+
+ struct pair v = {
+ .val = bpf_ktime_get_ns(),
+ .ip = ip,
+ };
+
+ bpf_map_update_elem(&my_map, &ptr, &v, BPF_ANY);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
new file mode 100644
index 0000000..bc4a3bd
--- /dev/null
+++ b/samples/bpf/tracex4_user.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+struct pair {
+ long long val;
+ __u64 ip;
+};
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static void print_old_objects(int fd)
+{
+ long long val = time_get_ns();
+ __u64 key, next_key;
+ struct pair v;
+
+ key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
+
+ key = -1;
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &v);
+ key = next_key;
+ if (val - v.val < 1000000000ll)
+ /* object was allocated more then 1 sec ago */
+ continue;
+ printf("obj 0x%llx is %2lldsec old was allocated at ip %llx\n",
+ next_key, (val - v.val) / 1000000000ll, v.ip);
+ }
+}
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ int i;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ for (i = 0; ; i++) {
+ print_old_objects(map_fd[1]);
+ sleep(1);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
new file mode 100644
index 0000000..b3f4295
--- /dev/null
+++ b/samples/bpf/tracex5_kern.c
@@ -0,0 +1,75 @@
+/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/seccomp.h>
+#include "bpf_helpers.h"
+
+#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
+
+struct bpf_map_def SEC("maps") progs = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 1024,
+};
+
+SEC("kprobe/seccomp_phase1")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+
+ /* dispatch into next BPF program depending on syscall number */
+ bpf_tail_call(ctx, &progs, sd.nr);
+
+ /* fall through -> unknown syscall */
+ if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) {
+ char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
+ bpf_trace_printk(fmt, sizeof(fmt), sd.nr);
+ }
+ return 0;
+}
+
+/* we jump here when syscall number == __NR_write */
+PROG(__NR_write)(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+ if (sd.args[2] == 512) {
+ char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
+ bpf_trace_printk(fmt, sizeof(fmt),
+ sd.args[0], sd.args[1], sd.args[2]);
+ }
+ return 0;
+}
+
+PROG(__NR_read)(struct pt_regs *ctx)
+{
+ struct seccomp_data sd = {};
+
+ bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+ if (sd.args[2] > 128 && sd.args[2] <= 1024) {
+ char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
+ bpf_trace_printk(fmt, sizeof(fmt),
+ sd.args[0], sd.args[1], sd.args[2]);
+ }
+ return 0;
+}
+
+PROG(__NR_mmap)(struct pt_regs *ctx)
+{
+ char fmt[] = "mmap\n";
+ bpf_trace_printk(fmt, sizeof(fmt));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
new file mode 100644
index 0000000..a04dd3c
--- /dev/null
+++ b/samples/bpf/tracex5_user.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+/* install fake seccomp program to enable seccomp code path inside the kernel,
+ * so that our kprobe attached to seccomp_phase1() can be triggered
+ */
+static void install_accept_all_seccomp(void)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
+ .filter = filter,
+ };
+ if (prctl(PR_SET_SECCOMP, 2, &prog))
+ perror("prctl");
+}
+
+int main(int ac, char **argv)
+{
+ FILE *f;
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ install_accept_all_seccomp();
+
+ f = popen("dd if=/dev/zero of=/dev/null count=5", "r");
+ (void) f;
+
+ read_trace_pipe();
+
+ return 0;
+}
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
new file mode 100644
index 0000000..be479c4
--- /dev/null
+++ b/samples/bpf/tracex6_kern.c
@@ -0,0 +1,27 @@
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = 32,
+};
+
+SEC("kprobe/sys_write")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ u64 count;
+ u32 key = bpf_get_smp_processor_id();
+ char fmt[] = "CPU-%d %llu\n";
+
+ count = bpf_perf_event_read(&my_map, key);
+ bpf_trace_printk(fmt, sizeof(fmt), key, count);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
new file mode 100644
index 0000000..8ea4976
--- /dev/null
+++ b/samples/bpf/tracex6_user.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define SAMPLE_PERIOD 0x7fffffffffffffffULL
+
+static void test_bpf_perf_event(void)
+{
+ int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ int *pmu_fd = malloc(nr_cpus * sizeof(int));
+ int status, i;
+
+ struct perf_event_attr attr_insn_pmu = {
+ .freq = 0,
+ .sample_period = SAMPLE_PERIOD,
+ .inherit = 0,
+ .type = PERF_TYPE_HARDWARE,
+ .read_format = 0,
+ .sample_type = 0,
+ .config = 0,/* PMU: cycles */
+ };
+
+ for (i = 0; i < nr_cpus; i++) {
+ pmu_fd[i] = perf_event_open(&attr_insn_pmu, -1/*pid*/, i/*cpu*/, -1/*group_fd*/, 0);
+ if (pmu_fd[i] < 0) {
+ printf("event syscall failed\n");
+ goto exit;
+ }
+
+ bpf_update_elem(map_fd[0], &i, &pmu_fd[i], BPF_ANY);
+ ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+ }
+
+ status = system("ls > /dev/null");
+ if (status)
+ goto exit;
+ status = system("sleep 2");
+ if (status)
+ goto exit;
+
+exit:
+ for (i = 0; i < nr_cpus; i++)
+ close(pmu_fd[i]);
+ close(map_fd[0]);
+ free(pmu_fd);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ test_bpf_perf_event();
+ read_trace_pipe();
+
+ return 0;
+}