From 2bf8c7e735ac8ab81eebd672caee41c77ec1b793 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Tue, 18 Aug 2020 14:16:41 +0900
Subject: samples: bpf: Fix broken bpf programs due to removed symbol

>From commit f1394b798814 ("block: mark blk_account_io_completion
static") symbol blk_account_io_completion() has been marked as static,
which makes it no longer possible to attach kprobe to this event.
Currently, there are broken samples due to this reason.

As a solution to this, attach kprobe events to blk_account_io_done()
to modify them to perform the same behavior as before.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200818051641.21724-1-danieltimlee@gmail.com
---
 samples/bpf/task_fd_query_kern.c | 2 +-
 samples/bpf/task_fd_query_user.c | 2 +-
 samples/bpf/tracex3_kern.c       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'samples')

diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c
index 278ade5427c8..c821294e1774 100644
--- a/samples/bpf/task_fd_query_kern.c
+++ b/samples/bpf/task_fd_query_kern.c
@@ -10,7 +10,7 @@ int bpf_prog1(struct pt_regs *ctx)
 	return 0;
 }
 
-SEC("kretprobe/blk_account_io_completion")
+SEC("kretprobe/blk_account_io_done")
 int bpf_prog2(struct pt_regs *ctx)
 {
 	return 0;
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index ff2e9c1c7266..4a74531dc403 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -314,7 +314,7 @@ int main(int argc, char **argv)
 	/* test two functions in the corresponding *_kern.c file */
 	CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request",
 					   BPF_FD_TYPE_KPROBE));
-	CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
+	CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_done",
 					   BPF_FD_TYPE_KRETPROBE));
 
 	/* test nondebug fs kprobe */
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index 659613c19a82..710a4410b2fb 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -49,7 +49,7 @@ struct {
 	__uint(max_entries, SLOTS);
 } lat_map SEC(".maps");
 
-SEC("kprobe/blk_account_io_completion")
+SEC("kprobe/blk_account_io_done")
 int bpf_prog2(struct pt_regs *ctx)
 {
 	long rq = PT_REGS_PARM1(ctx);
-- 
cgit v1.2.3


From 35a8b6dd339f04cbcb0b2d085334263542a12b70 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Sun, 23 Aug 2020 17:53:32 +0900
Subject: samples: bpf: Cleanup bpf_load.o from Makefile

Since commit cc7f641d637b ("samples: bpf: Refactor BPF map performance
test with libbpf") has ommited the removal of bpf_load.o from Makefile,
this commit removes the bpf_load.o rule for targets where bpf_load.o is
not used.

Fixes: cc7f641d637b ("samples: bpf: Refactor BPF map performance test with libbpf")
Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200823085334.9413-2-danieltimlee@gmail.com
---
 samples/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f87ee02073ba..0cac89230c6d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -76,7 +76,7 @@ trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
 lathist-objs := bpf_load.o lathist_user.o
 offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS)
 spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS)
-map_perf_test-objs := bpf_load.o map_perf_test_user.o
+map_perf_test-objs := map_perf_test_user.o
 test_overhead-objs := bpf_load.o test_overhead_user.o
 test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
 test_cgrp2_attach-objs := test_cgrp2_attach.o
-- 
cgit v1.2.3


From 3677d0a13171bb1dc8db0af84d48dea14a899962 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Sun, 23 Aug 2020 17:53:33 +0900
Subject: samples: bpf: Refactor kprobe tracing programs with libbpf

For the problem of increasing fragmentation of the bpf loader programs,
instead of using bpf_loader.o, which is used in samples/bpf, this
commit refactors the existing kprobe tracing programs with libbbpf
bpf loader.

    - For kprobe events pointing to system calls, the SYSCALL() macro in
    trace_common.h was used.
    - Adding a kprobe event and attaching a bpf program to it was done
    through bpf_program_attach().
    - Instead of using the existing BPF MAP definition, MAP definition
    has been refactored with the new BTF-defined MAP format.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200823085334.9413-3-danieltimlee@gmail.com
---
 samples/bpf/Makefile                              | 10 ++--
 samples/bpf/lathist_kern.c                        | 24 ++++----
 samples/bpf/lathist_user.c                        | 42 ++++++++++++--
 samples/bpf/spintest_kern.c                       | 36 ++++++------
 samples/bpf/spintest_user.c                       | 68 ++++++++++++++++++-----
 samples/bpf/test_current_task_under_cgroup_kern.c | 27 ++++-----
 samples/bpf/test_current_task_under_cgroup_user.c | 52 +++++++++++++----
 samples/bpf/test_probe_write_user_kern.c          | 12 ++--
 samples/bpf/test_probe_write_user_user.c          | 49 ++++++++++++----
 samples/bpf/trace_output_kern.c                   | 15 ++---
 samples/bpf/trace_output_user.c                   | 55 ++++++++++++------
 11 files changed, 272 insertions(+), 118 deletions(-)

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0cac89230c6d..c74d477474e2 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -71,11 +71,11 @@ tracex4-objs := tracex4_user.o
 tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
 tracex6-objs := tracex6_user.o
 tracex7-objs := tracex7_user.o
-test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
-trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
-lathist-objs := bpf_load.o lathist_user.o
+test_probe_write_user-objs := test_probe_write_user_user.o
+trace_output-objs := trace_output_user.o $(TRACE_HELPERS)
+lathist-objs := lathist_user.o
 offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS)
-spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS)
+spintest-objs := spintest_user.o $(TRACE_HELPERS)
 map_perf_test-objs := map_perf_test_user.o
 test_overhead-objs := bpf_load.o test_overhead_user.o
 test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
@@ -86,7 +86,7 @@ xdp1-objs := xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := xdp1_user.o
 xdp_router_ipv4-objs := xdp_router_ipv4_user.o
-test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \
+test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \
 				       test_current_task_under_cgroup_user.o
 trace_event-objs := trace_event_user.o $(TRACE_HELPERS)
 sampleip-objs := sampleip_user.o $(TRACE_HELPERS)
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
index ca9c2e4e69aa..4adfcbbe6ef4 100644
--- a/samples/bpf/lathist_kern.c
+++ b/samples/bpf/lathist_kern.c
@@ -18,12 +18,12 @@
  * trace_preempt_[on|off] tracepoints hooks is not supported.
  */
 
-struct bpf_map_def SEC("maps") my_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(u64),
-	.max_entries = MAX_CPU,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, u64);
+	__uint(max_entries, MAX_CPU);
+} my_map SEC(".maps");
 
 SEC("kprobe/trace_preempt_off")
 int bpf_prog1(struct pt_regs *ctx)
@@ -61,12 +61,12 @@ static unsigned int log2l(unsigned long v)
 		return log2(v);
 }
 
-struct bpf_map_def SEC("maps") my_lat = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(long),
-	.max_entries = MAX_CPU * MAX_ENTRIES,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, long);
+	__uint(max_entries, MAX_CPU * MAX_ENTRIES);
+} my_lat SEC(".maps");
 
 SEC("kprobe/trace_preempt_on")
 int bpf_prog2(struct pt_regs *ctx)
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
index 2ff2839a52d5..7d8ff2418303 100644
--- a/samples/bpf/lathist_user.c
+++ b/samples/bpf/lathist_user.c
@@ -6,9 +6,8 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <signal.h>
-#include <linux/bpf.h>
+#include <bpf/libbpf.h>
 #include <bpf/bpf.h>
-#include "bpf_load.h"
 
 #define MAX_ENTRIES	20
 #define MAX_CPU		4
@@ -81,20 +80,51 @@ static void get_data(int fd)
 
 int main(int argc, char **argv)
 {
+	struct bpf_link *links[2];
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
+	int map_fd, i = 0;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	map_fd = bpf_object__find_map_fd_by_name(obj, "my_lat");
+	if (map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		links[i] = bpf_program__attach(prog);
+		if (libbpf_get_error(links[i])) {
+			fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+			links[i] = NULL;
+			goto cleanup;
+		}
+		i++;
 	}
 
 	while (1) {
-		get_data(map_fd[1]);
+		get_data(map_fd);
 		print_hist();
 		sleep(5);
 	}
 
+cleanup:
+	for (i--; i >= 0; i--)
+		bpf_link__destroy(links[i]);
+
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c
index f508af357251..455da77319d9 100644
--- a/samples/bpf/spintest_kern.c
+++ b/samples/bpf/spintest_kern.c
@@ -12,25 +12,25 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-struct bpf_map_def SEC("maps") my_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(long),
-	.value_size = sizeof(long),
-	.max_entries = 1024,
-};
-struct bpf_map_def SEC("maps") my_map2 = {
-	.type = BPF_MAP_TYPE_PERCPU_HASH,
-	.key_size = sizeof(long),
-	.value_size = sizeof(long),
-	.max_entries = 1024,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, long);
+	__type(value, long);
+	__uint(max_entries, 1024);
+} my_map SEC(".maps");
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(key_size, sizeof(long));
+	__uint(value_size, sizeof(long));
+	__uint(max_entries, 1024);
+} my_map2 SEC(".maps");
 
-struct bpf_map_def SEC("maps") stackmap = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(u32),
-	.value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
-	.max_entries = 10000,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+	__uint(max_entries, 10000);
+} stackmap SEC(".maps");
 
 #define PROG(foo) \
 int foo(struct pt_regs *ctx) \
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
index fb430ea2ef51..847da9284fa8 100644
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -1,40 +1,77 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
 #include <unistd.h>
-#include <linux/bpf.h>
 #include <string.h>
 #include <assert.h>
 #include <sys/resource.h>
 #include <bpf/libbpf.h>
-#include "bpf_load.h"
+#include <bpf/bpf.h>
 #include "trace_helpers.h"
 
 int main(int ac, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	char filename[256], symbol[256];
+	struct bpf_object *obj = NULL;
+	struct bpf_link *links[20];
 	long key, next_key, value;
-	char filename[256];
+	struct bpf_program *prog;
+	int map_fd, i, j = 0;
+	const char *title;
 	struct ksym *sym;
-	int i;
 
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	setrlimit(RLIMIT_MEMLOCK, &r);
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
 
 	if (load_kallsyms()) {
 		printf("failed to process /proc/kallsyms\n");
 		return 2;
 	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		obj = NULL;
+		goto cleanup;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+	if (map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		title = bpf_program__title(prog, false);
+		if (sscanf(title, "kprobe/%s", symbol) != 1)
+			continue;
+
+		/* Attach prog only when symbol exists */
+		if (ksym_get_addr(symbol)) {
+			links[j] = bpf_program__attach(prog);
+			if (libbpf_get_error(links[j])) {
+				fprintf(stderr, "bpf_program__attach failed\n");
+				links[j] = NULL;
+				goto cleanup;
+			}
+			j++;
+		}
 	}
 
 	for (i = 0; i < 5; i++) {
 		key = 0;
 		printf("kprobing funcs:");
-		while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
-			bpf_map_lookup_elem(map_fd[0], &next_key, &value);
+		while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) {
+			bpf_map_lookup_elem(map_fd, &next_key, &value);
 			assert(next_key == value);
 			sym = ksym_search(value);
 			key = next_key;
@@ -48,10 +85,15 @@ int main(int ac, char **argv)
 		if (key)
 			printf("\n");
 		key = 0;
-		while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0)
-			bpf_map_delete_elem(map_fd[0], &next_key);
+		while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0)
+			bpf_map_delete_elem(map_fd, &next_key);
 		sleep(1);
 	}
 
+cleanup:
+	for (j--; j >= 0; j--)
+		bpf_link__destroy(links[j]);
+
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
index 6dc4f41bb6cb..fbd43e2bb4d3 100644
--- a/samples/bpf/test_current_task_under_cgroup_kern.c
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c
@@ -10,23 +10,24 @@
 #include <linux/version.h>
 #include <bpf/bpf_helpers.h>
 #include <uapi/linux/utsname.h>
+#include "trace_common.h"
 
-struct bpf_map_def SEC("maps") cgroup_map = {
-	.type			= BPF_MAP_TYPE_CGROUP_ARRAY,
-	.key_size		= sizeof(u32),
-	.value_size		= sizeof(u32),
-	.max_entries	= 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(u32));
+	__uint(max_entries, 1);
+} cgroup_map SEC(".maps");
 
-struct bpf_map_def SEC("maps") perf_map = {
-	.type			= BPF_MAP_TYPE_ARRAY,
-	.key_size		= sizeof(u32),
-	.value_size		= sizeof(u64),
-	.max_entries	= 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, u64);
+	__uint(max_entries, 1);
+} perf_map SEC(".maps");
 
 /* Writes the last PID that called sync to a map at index 0 */
-SEC("kprobe/sys_sync")
+SEC("kprobe/" SYSCALL(sys_sync))
 int bpf_prog1(struct pt_regs *ctx)
 {
 	u64 pid = bpf_get_current_pid_tgid();
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
index 06e9f8ce42e2..ac251a417f45 100644
--- a/samples/bpf/test_current_task_under_cgroup_user.c
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -4,10 +4,9 @@
 
 #define _GNU_SOURCE
 #include <stdio.h>
-#include <linux/bpf.h>
 #include <unistd.h>
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 #include "cgroup_helpers.h"
 
 #define CGROUP_PATH		"/my-cgroup"
@@ -15,13 +14,44 @@
 int main(int argc, char **argv)
 {
 	pid_t remote_pid, local_pid = getpid();
-	int cg2, idx = 0, rc = 0;
+	struct bpf_link *link = NULL;
+	struct bpf_program *prog;
+	int cg2, idx = 0, rc = 1;
+	struct bpf_object *obj;
 	char filename[256];
+	int map_fd[2];
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+	if (!prog) {
+		printf("finding a prog in obj file failed\n");
+		goto cleanup;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd[0] = bpf_object__find_map_fd_by_name(obj, "cgroup_map");
+	map_fd[1] = bpf_object__find_map_fd_by_name(obj, "perf_map");
+	if (map_fd[0] < 0 || map_fd[1] < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	link = bpf_program__attach(prog);
+	if (libbpf_get_error(link)) {
+		fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+		link = NULL;
+		goto cleanup;
 	}
 
 	if (setup_cgroup_environment())
@@ -70,12 +100,14 @@ int main(int argc, char **argv)
 		goto err;
 	}
 
-	goto out;
-err:
-	rc = 1;
+	rc = 0;
 
-out:
+err:
 	close(cg2);
 	cleanup_cgroup_environment();
+
+cleanup:
+	bpf_link__destroy(link);
+	bpf_object__close(obj);
 	return rc;
 }
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
index fd651a65281e..220a96438d75 100644
--- a/samples/bpf/test_probe_write_user_kern.c
+++ b/samples/bpf/test_probe_write_user_kern.c
@@ -13,12 +13,12 @@
 #include <bpf/bpf_core_read.h>
 #include "trace_common.h"
 
-struct bpf_map_def SEC("maps") dnat_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct sockaddr_in),
-	.value_size = sizeof(struct sockaddr_in),
-	.max_entries = 256,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, struct sockaddr_in);
+	__type(value, struct sockaddr_in);
+	__uint(max_entries, 256);
+} dnat_map SEC(".maps");
 
 /* kprobe is NOT a stable ABI
  * kernel functions can be removed, renamed or completely change semantics.
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
index 045eb5e30f54..00ccfb834e45 100644
--- a/samples/bpf/test_probe_write_user_user.c
+++ b/samples/bpf/test_probe_write_user_user.c
@@ -1,21 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
 #include <assert.h>
-#include <linux/bpf.h>
 #include <unistd.h>
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 #include <sys/socket.h>
-#include <string.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 
 int main(int ac, char **argv)
 {
-	int serverfd, serverconnfd, clientfd;
-	socklen_t sockaddr_len;
-	struct sockaddr serv_addr, mapped_addr, tmp_addr;
 	struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
+	struct sockaddr serv_addr, mapped_addr, tmp_addr;
+	int serverfd, serverconnfd, clientfd, map_fd;
+	struct bpf_link *link = NULL;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	socklen_t sockaddr_len;
 	char filename[256];
 	char *ip;
 
@@ -24,10 +25,35 @@ int main(int ac, char **argv)
 	tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+	if (libbpf_get_error(prog)) {
+		fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+		goto cleanup;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "dnat_map");
+	if (map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	link = bpf_program__attach(prog);
+	if (libbpf_get_error(link)) {
+		fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+		link = NULL;
+		goto cleanup;
 	}
 
 	assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
@@ -51,7 +77,7 @@ int main(int ac, char **argv)
 	mapped_addr_in->sin_port = htons(5555);
 	mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
 
-	assert(!bpf_map_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
+	assert(!bpf_map_update_elem(map_fd, &mapped_addr, &serv_addr, BPF_ANY));
 
 	assert(listen(serverfd, 5) == 0);
 
@@ -75,5 +101,8 @@ int main(int ac, char **argv)
 	/* Is the server's getsockname = the socket getpeername */
 	assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
 
+cleanup:
+	bpf_link__destroy(link);
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
index 1d7d422cae6f..b64815af0943 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output_kern.c
@@ -2,15 +2,16 @@
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "trace_common.h"
 
-struct bpf_map_def SEC("maps") my_map = {
-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(u32),
-	.max_entries = 2,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(u32));
+	__uint(max_entries, 2);
+} my_map SEC(".maps");
 
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
 int bpf_prog1(struct pt_regs *ctx)
 {
 	struct S {
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index 60a17dd05345..364b98764d54 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -1,23 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
 #include <fcntl.h>
 #include <poll.h>
-#include <linux/perf_event.h>
-#include <linux/bpf.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
 #include <time.h>
 #include <signal.h>
 #include <bpf/libbpf.h>
-#include "bpf_load.h"
-#include "perf-sys.h"
 
 static __u64 time_get_ns(void)
 {
@@ -57,20 +44,48 @@ static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
 int main(int argc, char **argv)
 {
 	struct perf_buffer_opts pb_opts = {};
+	struct bpf_link *link = NULL;
+	struct bpf_program *prog;
 	struct perf_buffer *pb;
+	struct bpf_object *obj;
+	int map_fd, ret = 0;
 	char filename[256];
 	FILE *f;
-	int ret;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+	if (map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+	if (libbpf_get_error(prog)) {
+		fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+		goto cleanup;
+	}
+
+	link = bpf_program__attach(prog);
+	if (libbpf_get_error(link)) {
+		fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+		link = NULL;
+		goto cleanup;
 	}
 
 	pb_opts.sample_cb = print_bpf_output;
-	pb = perf_buffer__new(map_fd[0], 8, &pb_opts);
+	pb = perf_buffer__new(map_fd, 8, &pb_opts);
 	ret = libbpf_get_error(pb);
 	if (ret) {
 		printf("failed to setup perf_buffer: %d\n", ret);
@@ -84,5 +99,9 @@ int main(int argc, char **argv)
 	while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
 	}
 	kill(0, SIGINT);
+
+cleanup:
+	bpf_link__destroy(link);
+	bpf_object__close(obj);
 	return ret;
 }
-- 
cgit v1.2.3


From f0c328f8af5d920a68f8217aec76d9a45288cef1 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Sun, 23 Aug 2020 17:53:34 +0900
Subject: samples: bpf: Refactor tracepoint tracing programs with libbpf

For the problem of increasing fragmentation of the bpf loader programs,
instead of using bpf_loader.o, which is used in samples/bpf, this
commit refactors the existing tracepoint tracing programs with libbbpf
bpf loader.

    - Adding a tracepoint event and attaching a bpf program to it was done
    through bpf_program_attach().
    - Instead of using the existing BPF MAP definition, MAP definition
    has been refactored with the new BTF-defined MAP format.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200823085334.9413-4-danieltimlee@gmail.com
---
 samples/bpf/Makefile           |  6 ++--
 samples/bpf/cpustat_kern.c     | 36 +++++++++++------------
 samples/bpf/cpustat_user.c     | 47 +++++++++++++++++++++++++-----
 samples/bpf/offwaketime_kern.c | 52 ++++++++++++++++-----------------
 samples/bpf/offwaketime_user.c | 66 ++++++++++++++++++++++++++++++++----------
 samples/bpf/syscall_tp_kern.c  | 24 +++++++--------
 samples/bpf/syscall_tp_user.c  | 54 ++++++++++++++++++++++++++--------
 7 files changed, 192 insertions(+), 93 deletions(-)

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index c74d477474e2..a6d3646b3818 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -74,7 +74,7 @@ tracex7-objs := tracex7_user.o
 test_probe_write_user-objs := test_probe_write_user_user.o
 trace_output-objs := trace_output_user.o $(TRACE_HELPERS)
 lathist-objs := lathist_user.o
-offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS)
+offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS)
 spintest-objs := spintest_user.o $(TRACE_HELPERS)
 map_perf_test-objs := map_perf_test_user.o
 test_overhead-objs := bpf_load.o test_overhead_user.o
@@ -100,8 +100,8 @@ xdp_redirect_map-objs := xdp_redirect_map_user.o
 xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o xdp_monitor_user.o
 xdp_rxq_info-objs := xdp_rxq_info_user.o
-syscall_tp-objs := bpf_load.o syscall_tp_user.o
-cpustat-objs := bpf_load.o cpustat_user.o
+syscall_tp-objs := syscall_tp_user.o
+cpustat-objs := cpustat_user.o
 xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := xdpsock_user.o
 xdp_fwd-objs := xdp_fwd_user.o
diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c
index a86a19d5f033..5aefd19cdfa1 100644
--- a/samples/bpf/cpustat_kern.c
+++ b/samples/bpf/cpustat_kern.c
@@ -51,28 +51,28 @@ static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
 #define MAP_OFF_PSTATE_IDX	3
 #define MAP_OFF_NUM		4
 
-struct bpf_map_def SEC("maps") my_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u64),
-	.max_entries = MAX_CPU * MAP_OFF_NUM,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, u64);
+	__uint(max_entries, MAX_CPU * MAP_OFF_NUM);
+} my_map SEC(".maps");
 
 /* cstate_duration records duration time for every idle state per CPU */
-struct bpf_map_def SEC("maps") cstate_duration = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u64),
-	.max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, u64);
+	__uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
+} cstate_duration SEC(".maps");
 
 /* pstate_duration records duration time for every operating point per CPU */
-struct bpf_map_def SEC("maps") pstate_duration = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u64),
-	.max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, u64);
+	__uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
+} pstate_duration SEC(".maps");
 
 /*
  * The trace events for cpu_idle and cpu_frequency are taken from:
diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c
index 869a99406dbf..96675985e9e0 100644
--- a/samples/bpf/cpustat_user.c
+++ b/samples/bpf/cpustat_user.c
@@ -9,7 +9,6 @@
 #include <string.h>
 #include <unistd.h>
 #include <fcntl.h>
-#include <linux/bpf.h>
 #include <locale.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -18,7 +17,9 @@
 #include <sys/wait.h>
 
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
+
+static int cstate_map_fd, pstate_map_fd;
 
 #define MAX_CPU			8
 #define MAX_PSTATE_ENTRIES	5
@@ -181,21 +182,50 @@ static void int_exit(int sig)
 {
 	cpu_stat_inject_cpu_idle_event();
 	cpu_stat_inject_cpu_frequency_event();
-	cpu_stat_update(map_fd[1], map_fd[2]);
+	cpu_stat_update(cstate_map_fd, pstate_map_fd);
 	cpu_stat_print();
 	exit(0);
 }
 
 int main(int argc, char **argv)
 {
+	struct bpf_link *link = NULL;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
 	int ret;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+	if (!prog) {
+		printf("finding a prog in obj file failed\n");
+		goto cleanup;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	cstate_map_fd = bpf_object__find_map_fd_by_name(obj, "cstate_duration");
+	pstate_map_fd = bpf_object__find_map_fd_by_name(obj, "pstate_duration");
+	if (cstate_map_fd < 0 || pstate_map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	link = bpf_program__attach(prog);
+	if (libbpf_get_error(link)) {
+		fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+		link = NULL;
+		goto cleanup;
 	}
 
 	ret = cpu_stat_inject_cpu_idle_event();
@@ -210,10 +240,13 @@ int main(int argc, char **argv)
 	signal(SIGTERM, int_exit);
 
 	while (1) {
-		cpu_stat_update(map_fd[1], map_fd[2]);
+		cpu_stat_update(cstate_map_fd, pstate_map_fd);
 		cpu_stat_print();
 		sleep(5);
 	}
 
+cleanup:
+	bpf_link__destroy(link);
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index e74ee1cd4b9c..14b792915a9c 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -28,38 +28,38 @@ struct key_t {
 	u32 tret;
 };
 
-struct bpf_map_def SEC("maps") counts = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct key_t),
-	.value_size = sizeof(u64),
-	.max_entries = 10000,
-};
-
-struct bpf_map_def SEC("maps") start = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u64),
-	.max_entries = 10000,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, struct key_t);
+	__type(value, u64);
+	__uint(max_entries, 10000);
+} counts SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, u32);
+	__type(value, u64);
+	__uint(max_entries, 10000);
+} start SEC(".maps");
 
 struct wokeby_t {
 	char name[TASK_COMM_LEN];
 	u32 ret;
 };
 
-struct bpf_map_def SEC("maps") wokeby = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(struct wokeby_t),
-	.max_entries = 10000,
-};
-
-struct bpf_map_def SEC("maps") stackmap = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(u32),
-	.value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
-	.max_entries = 10000,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, u32);
+	__type(value, struct wokeby_t);
+	__uint(max_entries, 10000);
+} wokeby SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+	__uint(max_entries, 10000);
+} stackmap SEC(".maps");
 
 #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
 
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index 51c7da5341cc..5734cfdaaacb 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -5,19 +5,19 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <signal.h>
-#include <linux/bpf.h>
-#include <string.h>
 #include <linux/perf_event.h>
 #include <errno.h>
-#include <assert.h>
 #include <stdbool.h>
 #include <sys/resource.h>
 #include <bpf/libbpf.h>
-#include "bpf_load.h"
+#include <bpf/bpf.h>
 #include "trace_helpers.h"
 
 #define PRINT_RAW_ADDR 0
 
+/* counts, stackmap */
+static int map_fd[2];
+
 static void print_ksym(__u64 addr)
 {
 	struct ksym *sym;
@@ -52,14 +52,14 @@ static void print_stack(struct key_t *key, __u64 count)
 	int i;
 
 	printf("%s;", key->target);
-	if (bpf_map_lookup_elem(map_fd[3], &key->tret, ip) != 0) {
+	if (bpf_map_lookup_elem(map_fd[1], &key->tret, ip) != 0) {
 		printf("---;");
 	} else {
 		for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
 			print_ksym(ip[i]);
 	}
 	printf("-;");
-	if (bpf_map_lookup_elem(map_fd[3], &key->wret, ip) != 0) {
+	if (bpf_map_lookup_elem(map_fd[1], &key->wret, ip) != 0) {
 		printf("---;");
 	} else {
 		for (i = 0; i < PERF_MAX_STACK_DEPTH; i++)
@@ -96,23 +96,54 @@ static void int_exit(int sig)
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct bpf_object *obj = NULL;
+	struct bpf_link *links[2];
+	struct bpf_program *prog;
+	int delay = 1, i = 0;
 	char filename[256];
-	int delay = 1;
-
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-	setrlimit(RLIMIT_MEMLOCK, &r);
 
-	signal(SIGINT, int_exit);
-	signal(SIGTERM, int_exit);
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
 
 	if (load_kallsyms()) {
 		printf("failed to process /proc/kallsyms\n");
 		return 2;
 	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		obj = NULL;
+		goto cleanup;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counts");
+	map_fd[1] = bpf_object__find_map_fd_by_name(obj, "stackmap");
+	if (map_fd[0] < 0 || map_fd[1] < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	bpf_object__for_each_program(prog, obj) {
+		links[i] = bpf_program__attach(prog);
+		if (libbpf_get_error(links[i])) {
+			fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+			links[i] = NULL;
+			goto cleanup;
+		}
+		i++;
 	}
 
 	if (argc > 1)
@@ -120,5 +151,10 @@ int main(int argc, char **argv)
 	sleep(delay);
 	print_stacks(map_fd[0]);
 
+cleanup:
+	for (i--; i >= 0; i--)
+		bpf_link__destroy(links[i]);
+
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index 5a62b03b1f88..50231c2eff9c 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c
@@ -18,19 +18,19 @@ struct syscalls_exit_open_args {
 	long ret;
 };
 
-struct bpf_map_def SEC("maps") enter_open_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u32),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, u32);
+	__uint(max_entries, 1);
+} enter_open_map SEC(".maps");
 
-struct bpf_map_def SEC("maps") exit_open_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u32),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, u32);
+	__type(value, u32);
+	__uint(max_entries, 1);
+} exit_open_map SEC(".maps");
 
 static __always_inline void count(void *map)
 {
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
index 57014bab7cbe..76a1d00128fb 100644
--- a/samples/bpf/syscall_tp_user.c
+++ b/samples/bpf/syscall_tp_user.c
@@ -5,16 +5,12 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdlib.h>
-#include <signal.h>
-#include <linux/bpf.h>
 #include <string.h>
 #include <linux/perf_event.h>
 #include <errno.h>
-#include <assert.h>
-#include <stdbool.h>
 #include <sys/resource.h>
+#include <bpf/libbpf.h>
 #include <bpf/bpf.h>
-#include "bpf_load.h"
 
 /* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
  * This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
@@ -49,16 +45,44 @@ static void verify_map(int map_id)
 
 static int test(char *filename, int num_progs)
 {
-	int i, fd, map0_fds[num_progs], map1_fds[num_progs];
+	int map0_fds[num_progs], map1_fds[num_progs], fd, i, j = 0;
+	struct bpf_link *links[num_progs * 4];
+	struct bpf_object *objs[num_progs];
+	struct bpf_program *prog;
 
 	for (i = 0; i < num_progs; i++) {
-		if (load_bpf_file(filename)) {
-			fprintf(stderr, "%s", bpf_log_buf);
-			return 1;
+		objs[i] = bpf_object__open_file(filename, NULL);
+		if (libbpf_get_error(objs[i])) {
+			fprintf(stderr, "opening BPF object file failed\n");
+			objs[i] = NULL;
+			goto cleanup;
 		}
-		printf("prog #%d: map ids %d %d\n", i, map_fd[0], map_fd[1]);
-		map0_fds[i] = map_fd[0];
-		map1_fds[i] = map_fd[1];
+
+		/* load BPF program */
+		if (bpf_object__load(objs[i])) {
+			fprintf(stderr, "loading BPF object file failed\n");
+			goto cleanup;
+		}
+
+		map0_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+							      "enter_open_map");
+		map1_fds[i] = bpf_object__find_map_fd_by_name(objs[i],
+							      "exit_open_map");
+		if (map0_fds[i] < 0 || map1_fds[i] < 0) {
+			fprintf(stderr, "finding a map in obj file failed\n");
+			goto cleanup;
+		}
+
+		bpf_object__for_each_program(prog, objs[i]) {
+			links[j] = bpf_program__attach(prog);
+			if (libbpf_get_error(links[j])) {
+				fprintf(stderr, "bpf_program__attach failed\n");
+				links[j] = NULL;
+				goto cleanup;
+			}
+			j++;
+		}
+		printf("prog #%d: map ids %d %d\n", i, map0_fds[i], map1_fds[i]);
 	}
 
 	/* current load_bpf_file has perf_event_open default pid = -1
@@ -80,6 +104,12 @@ static int test(char *filename, int num_progs)
 		verify_map(map1_fds[i]);
 	}
 
+cleanup:
+	for (j--; j >= 0; j--)
+		bpf_link__destroy(links[j]);
+
+	for (i--; i >= 0; i--)
+		bpf_object__close(objs[i]);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 35149b2c048e43562a598fd8ff91467d429bc666 Mon Sep 17 00:00:00 2001
From: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Date: Fri, 28 Aug 2020 10:26:28 +0200
Subject: samples/bpf: Add new sample xsk_fwd.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This sample code illustrates the packet forwarding between multiple
AF_XDP sockets in multi-threading environment. All the threads and
sockets are sharing a common buffer pool, with each socket having
its own private buffer cache. The sockets are created with the
xsk_socket__create_shared() function, which allows multiple AF_XDP
sockets to share the same UMEM object.

Example 1: Single thread handling two sockets. Packets received
from socket A (on top of interface IFA, queue QA) are forwarded
to socket B (on top of interface IFB, queue QB) and vice-versa.
The thread is affinitized to CPU core C:

./xsk_fwd -i IFA -q QA -i IFB -q QB -c C

Example 2: Two threads, each handling two sockets. Packets from
socket A are sent to socket B (by thread X), packets
from socket B are sent to socket A (by thread X); packets from
socket C are sent to socket D (by thread Y), packets from socket
D are sent to socket C (by thread Y). The two threads are bound
to CPU cores CX and CY:

./xdp_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD -c CX -c CY

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/1598603189-32145-15-git-send-email-magnus.karlsson@intel.com
---
 samples/bpf/Makefile  |    3 +
 samples/bpf/xsk_fwd.c | 1085 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1088 insertions(+)
 create mode 100644 samples/bpf/xsk_fwd.c

(limited to 'samples')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index a6d3646b3818..4f1ed0e3cf9f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -48,6 +48,7 @@ tprogs-y += syscall_tp
 tprogs-y += cpustat
 tprogs-y += xdp_adjust_tail
 tprogs-y += xdpsock
+tprogs-y += xsk_fwd
 tprogs-y += xdp_fwd
 tprogs-y += task_fd_query
 tprogs-y += xdp_sample_pkts
@@ -104,6 +105,7 @@ syscall_tp-objs := syscall_tp_user.o
 cpustat-objs := cpustat_user.o
 xdp_adjust_tail-objs := xdp_adjust_tail_user.o
 xdpsock-objs := xdpsock_user.o
+xsk_fwd-objs := xsk_fwd.o
 xdp_fwd-objs := xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
 xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
@@ -203,6 +205,7 @@ TPROGLDLIBS_trace_output	+= -lrt
 TPROGLDLIBS_map_perf_test	+= -lrt
 TPROGLDLIBS_test_overhead	+= -lrt
 TPROGLDLIBS_xdpsock		+= -pthread
+TPROGLDLIBS_xsk_fwd		+= -pthread
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make M=samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c
new file mode 100644
index 000000000000..1cd97c84c337
--- /dev/null
+++ b/samples/bpf/xsk_fwd.c
@@ -0,0 +1,1085 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+#define _GNU_SOURCE
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <netinet/ether.h>
+#include <net/if.h>
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/if_xdp.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/xsk.h>
+#include <bpf/bpf.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef __u64 u64;
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8  u8;
+
+/* This program illustrates the packet forwarding between multiple AF_XDP
+ * sockets in multi-threaded environment. All threads are sharing a common
+ * buffer pool, with each socket having its own private buffer cache.
+ *
+ * Example 1: Single thread handling two sockets. The packets received by socket
+ * A (interface IFA, queue QA) are forwarded to socket B (interface IFB, queue
+ * QB), while the packets received by socket B are forwarded to socket A. The
+ * thread is running on CPU core X:
+ *
+ *         ./xsk_fwd -i IFA -q QA -i IFB -q QB -c X
+ *
+ * Example 2: Two threads, each handling two sockets. The thread running on CPU
+ * core X forwards all the packets received by socket A to socket B, and all the
+ * packets received by socket B to socket A. The thread running on CPU core Y is
+ * performing the same packet forwarding between sockets C and D:
+ *
+ *         ./xsk_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD
+ *         -c CX -c CY
+ */
+
+/*
+ * Buffer pool and buffer cache
+ *
+ * For packet forwarding, the packet buffers are typically allocated from the
+ * pool for packet reception and freed back to the pool for further reuse once
+ * the packet transmission is completed.
+ *
+ * The buffer pool is shared between multiple threads. In order to minimize the
+ * access latency to the shared buffer pool, each thread creates one (or
+ * several) buffer caches, which, unlike the buffer pool, are private to the
+ * thread that creates them and therefore cannot be shared with other threads.
+ * The access to the shared pool is only needed either (A) when the cache gets
+ * empty due to repeated buffer allocations and it needs to be replenished from
+ * the pool, or (B) when the cache gets full due to repeated buffer free and it
+ * needs to be flushed back to the pull.
+ *
+ * In a packet forwarding system, a packet received on any input port can
+ * potentially be transmitted on any output port, depending on the forwarding
+ * configuration. For AF_XDP sockets, for this to work with zero-copy of the
+ * packet buffers when, it is required that the buffer pool memory fits into the
+ * UMEM area shared by all the sockets.
+ */
+
+struct bpool_params {
+	u32 n_buffers;
+	u32 buffer_size;
+	int mmap_flags;
+
+	u32 n_users_max;
+	u32 n_buffers_per_slab;
+};
+
+/* This buffer pool implementation organizes the buffers into equally sized
+ * slabs of *n_buffers_per_slab*. Initially, there are *n_slabs* slabs in the
+ * pool that are completely filled with buffer pointers (full slabs).
+ *
+ * Each buffer cache has a slab for buffer allocation and a slab for buffer
+ * free, with both of these slabs initially empty. When the cache's allocation
+ * slab goes empty, it is swapped with one of the available full slabs from the
+ * pool, if any is available. When the cache's free slab goes full, it is
+ * swapped for one of the empty slabs from the pool, which is guaranteed to
+ * succeed.
+ *
+ * Partially filled slabs never get traded between the cache and the pool
+ * (except when the cache itself is destroyed), which enables fast operation
+ * through pointer swapping.
+ */
+struct bpool {
+	struct bpool_params params;
+	pthread_mutex_t lock;
+	void *addr;
+
+	u64 **slabs;
+	u64 **slabs_reserved;
+	u64 *buffers;
+	u64 *buffers_reserved;
+
+	u64 n_slabs;
+	u64 n_slabs_reserved;
+	u64 n_buffers;
+
+	u64 n_slabs_available;
+	u64 n_slabs_reserved_available;
+
+	struct xsk_umem_config umem_cfg;
+	struct xsk_ring_prod umem_fq;
+	struct xsk_ring_cons umem_cq;
+	struct xsk_umem *umem;
+};
+
+static struct bpool *
+bpool_init(struct bpool_params *params,
+	   struct xsk_umem_config *umem_cfg)
+{
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved;
+	u64 slabs_size, slabs_reserved_size;
+	u64 buffers_size, buffers_reserved_size;
+	u64 total_size, i;
+	struct bpool *bp;
+	u8 *p;
+	int status;
+
+	/* mmap prep. */
+	if (setrlimit(RLIMIT_MEMLOCK, &r))
+		return NULL;
+
+	/* bpool internals dimensioning. */
+	n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) /
+		params->n_buffers_per_slab;
+	n_slabs_reserved = params->n_users_max * 2;
+	n_buffers = n_slabs * params->n_buffers_per_slab;
+	n_buffers_reserved = n_slabs_reserved * params->n_buffers_per_slab;
+
+	slabs_size = n_slabs * sizeof(u64 *);
+	slabs_reserved_size = n_slabs_reserved * sizeof(u64 *);
+	buffers_size = n_buffers * sizeof(u64);
+	buffers_reserved_size = n_buffers_reserved * sizeof(u64);
+
+	total_size = sizeof(struct bpool) +
+		slabs_size + slabs_reserved_size +
+		buffers_size + buffers_reserved_size;
+
+	/* bpool memory allocation. */
+	p = calloc(total_size, sizeof(u8));
+	if (!p)
+		return NULL;
+
+	/* bpool memory initialization. */
+	bp = (struct bpool *)p;
+	memcpy(&bp->params, params, sizeof(*params));
+	bp->params.n_buffers = n_buffers;
+
+	bp->slabs = (u64 **)&p[sizeof(struct bpool)];
+	bp->slabs_reserved = (u64 **)&p[sizeof(struct bpool) +
+		slabs_size];
+	bp->buffers = (u64 *)&p[sizeof(struct bpool) +
+		slabs_size + slabs_reserved_size];
+	bp->buffers_reserved = (u64 *)&p[sizeof(struct bpool) +
+		slabs_size + slabs_reserved_size + buffers_size];
+
+	bp->n_slabs = n_slabs;
+	bp->n_slabs_reserved = n_slabs_reserved;
+	bp->n_buffers = n_buffers;
+
+	for (i = 0; i < n_slabs; i++)
+		bp->slabs[i] = &bp->buffers[i * params->n_buffers_per_slab];
+	bp->n_slabs_available = n_slabs;
+
+	for (i = 0; i < n_slabs_reserved; i++)
+		bp->slabs_reserved[i] = &bp->buffers_reserved[i *
+			params->n_buffers_per_slab];
+	bp->n_slabs_reserved_available = n_slabs_reserved;
+
+	for (i = 0; i < n_buffers; i++)
+		bp->buffers[i] = i * params->buffer_size;
+
+	/* lock. */
+	status = pthread_mutex_init(&bp->lock, NULL);
+	if (status) {
+		free(p);
+		return NULL;
+	}
+
+	/* mmap. */
+	bp->addr = mmap(NULL,
+			n_buffers * params->buffer_size,
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS | params->mmap_flags,
+			-1,
+			0);
+	if (bp->addr == MAP_FAILED) {
+		pthread_mutex_destroy(&bp->lock);
+		free(p);
+		return NULL;
+	}
+
+	/* umem. */
+	status = xsk_umem__create(&bp->umem,
+				  bp->addr,
+				  bp->params.n_buffers * bp->params.buffer_size,
+				  &bp->umem_fq,
+				  &bp->umem_cq,
+				  umem_cfg);
+	if (status) {
+		munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
+		pthread_mutex_destroy(&bp->lock);
+		free(p);
+		return NULL;
+	}
+	memcpy(&bp->umem_cfg, umem_cfg, sizeof(*umem_cfg));
+
+	return bp;
+}
+
+static void
+bpool_free(struct bpool *bp)
+{
+	if (!bp)
+		return;
+
+	xsk_umem__delete(bp->umem);
+	munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
+	pthread_mutex_destroy(&bp->lock);
+	free(bp);
+}
+
+struct bcache {
+	struct bpool *bp;
+
+	u64 *slab_cons;
+	u64 *slab_prod;
+
+	u64 n_buffers_cons;
+	u64 n_buffers_prod;
+};
+
+static u32
+bcache_slab_size(struct bcache *bc)
+{
+	struct bpool *bp = bc->bp;
+
+	return bp->params.n_buffers_per_slab;
+}
+
+static struct bcache *
+bcache_init(struct bpool *bp)
+{
+	struct bcache *bc;
+
+	bc = calloc(1, sizeof(struct bcache));
+	if (!bc)
+		return NULL;
+
+	bc->bp = bp;
+	bc->n_buffers_cons = 0;
+	bc->n_buffers_prod = 0;
+
+	pthread_mutex_lock(&bp->lock);
+	if (bp->n_slabs_reserved_available == 0) {
+		pthread_mutex_unlock(&bp->lock);
+		free(bc);
+		return NULL;
+	}
+
+	bc->slab_cons = bp->slabs_reserved[bp->n_slabs_reserved_available - 1];
+	bc->slab_prod = bp->slabs_reserved[bp->n_slabs_reserved_available - 2];
+	bp->n_slabs_reserved_available -= 2;
+	pthread_mutex_unlock(&bp->lock);
+
+	return bc;
+}
+
+static void
+bcache_free(struct bcache *bc)
+{
+	struct bpool *bp;
+
+	if (!bc)
+		return;
+
+	/* In order to keep this example simple, the case of freeing any
+	 * existing buffers from the cache back to the pool is ignored.
+	 */
+
+	bp = bc->bp;
+	pthread_mutex_lock(&bp->lock);
+	bp->slabs_reserved[bp->n_slabs_reserved_available] = bc->slab_prod;
+	bp->slabs_reserved[bp->n_slabs_reserved_available + 1] = bc->slab_cons;
+	bp->n_slabs_reserved_available += 2;
+	pthread_mutex_unlock(&bp->lock);
+
+	free(bc);
+}
+
+/* To work correctly, the implementation requires that the *n_buffers* input
+ * argument is never greater than the buffer pool's *n_buffers_per_slab*. This
+ * is typically the case, with one exception taking place when large number of
+ * buffers are allocated at init time (e.g. for the UMEM fill queue setup).
+ */
+static inline u32
+bcache_cons_check(struct bcache *bc, u32 n_buffers)
+{
+	struct bpool *bp = bc->bp;
+	u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
+	u64 n_buffers_cons = bc->n_buffers_cons;
+	u64 n_slabs_available;
+	u64 *slab_full;
+
+	/*
+	 * Consumer slab is not empty: Use what's available locally. Do not
+	 * look for more buffers from the pool when the ask can only be
+	 * partially satisfied.
+	 */
+	if (n_buffers_cons)
+		return (n_buffers_cons < n_buffers) ?
+			n_buffers_cons :
+			n_buffers;
+
+	/*
+	 * Consumer slab is empty: look to trade the current consumer slab
+	 * (full) for a full slab from the pool, if any is available.
+	 */
+	pthread_mutex_lock(&bp->lock);
+	n_slabs_available = bp->n_slabs_available;
+	if (!n_slabs_available) {
+		pthread_mutex_unlock(&bp->lock);
+		return 0;
+	}
+
+	n_slabs_available--;
+	slab_full = bp->slabs[n_slabs_available];
+	bp->slabs[n_slabs_available] = bc->slab_cons;
+	bp->n_slabs_available = n_slabs_available;
+	pthread_mutex_unlock(&bp->lock);
+
+	bc->slab_cons = slab_full;
+	bc->n_buffers_cons = n_buffers_per_slab;
+	return n_buffers;
+}
+
+static inline u64
+bcache_cons(struct bcache *bc)
+{
+	u64 n_buffers_cons = bc->n_buffers_cons - 1;
+	u64 buffer;
+
+	buffer = bc->slab_cons[n_buffers_cons];
+	bc->n_buffers_cons = n_buffers_cons;
+	return buffer;
+}
+
+static inline void
+bcache_prod(struct bcache *bc, u64 buffer)
+{
+	struct bpool *bp = bc->bp;
+	u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
+	u64 n_buffers_prod = bc->n_buffers_prod;
+	u64 n_slabs_available;
+	u64 *slab_empty;
+
+	/*
+	 * Producer slab is not yet full: store the current buffer to it.
+	 */
+	if (n_buffers_prod < n_buffers_per_slab) {
+		bc->slab_prod[n_buffers_prod] = buffer;
+		bc->n_buffers_prod = n_buffers_prod + 1;
+		return;
+	}
+
+	/*
+	 * Producer slab is full: trade the cache's current producer slab
+	 * (full) for an empty slab from the pool, then store the current
+	 * buffer to the new producer slab. As one full slab exists in the
+	 * cache, it is guaranteed that there is at least one empty slab
+	 * available in the pool.
+	 */
+	pthread_mutex_lock(&bp->lock);
+	n_slabs_available = bp->n_slabs_available;
+	slab_empty = bp->slabs[n_slabs_available];
+	bp->slabs[n_slabs_available] = bc->slab_prod;
+	bp->n_slabs_available = n_slabs_available + 1;
+	pthread_mutex_unlock(&bp->lock);
+
+	slab_empty[0] = buffer;
+	bc->slab_prod = slab_empty;
+	bc->n_buffers_prod = 1;
+}
+
+/*
+ * Port
+ *
+ * Each of the forwarding ports sits on top of an AF_XDP socket. In order for
+ * packet forwarding to happen with no packet buffer copy, all the sockets need
+ * to share the same UMEM area, which is used as the buffer pool memory.
+ */
+#ifndef MAX_BURST_RX
+#define MAX_BURST_RX 64
+#endif
+
+#ifndef MAX_BURST_TX
+#define MAX_BURST_TX 64
+#endif
+
+struct burst_rx {
+	u64 addr[MAX_BURST_RX];
+	u32 len[MAX_BURST_RX];
+};
+
+struct burst_tx {
+	u64 addr[MAX_BURST_TX];
+	u32 len[MAX_BURST_TX];
+	u32 n_pkts;
+};
+
+struct port_params {
+	struct xsk_socket_config xsk_cfg;
+	struct bpool *bp;
+	const char *iface;
+	u32 iface_queue;
+};
+
+struct port {
+	struct port_params params;
+
+	struct bcache *bc;
+
+	struct xsk_ring_cons rxq;
+	struct xsk_ring_prod txq;
+	struct xsk_ring_prod umem_fq;
+	struct xsk_ring_cons umem_cq;
+	struct xsk_socket *xsk;
+	int umem_fq_initialized;
+
+	u64 n_pkts_rx;
+	u64 n_pkts_tx;
+};
+
+static void
+port_free(struct port *p)
+{
+	if (!p)
+		return;
+
+	/* To keep this example simple, the code to free the buffers from the
+	 * socket's receive and transmit queues, as well as from the UMEM fill
+	 * and completion queues, is not included.
+	 */
+
+	if (p->xsk)
+		xsk_socket__delete(p->xsk);
+
+	bcache_free(p->bc);
+
+	free(p);
+}
+
+static struct port *
+port_init(struct port_params *params)
+{
+	struct port *p;
+	u32 umem_fq_size, pos = 0;
+	int status, i;
+
+	/* Memory allocation and initialization. */
+	p = calloc(sizeof(struct port), 1);
+	if (!p)
+		return NULL;
+
+	memcpy(&p->params, params, sizeof(p->params));
+	umem_fq_size = params->bp->umem_cfg.fill_size;
+
+	/* bcache. */
+	p->bc = bcache_init(params->bp);
+	if (!p->bc ||
+	    (bcache_slab_size(p->bc) < umem_fq_size) ||
+	    (bcache_cons_check(p->bc, umem_fq_size) < umem_fq_size)) {
+		port_free(p);
+		return NULL;
+	}
+
+	/* xsk socket. */
+	status = xsk_socket__create_shared(&p->xsk,
+					   params->iface,
+					   params->iface_queue,
+					   params->bp->umem,
+					   &p->rxq,
+					   &p->txq,
+					   &p->umem_fq,
+					   &p->umem_cq,
+					   &params->xsk_cfg);
+	if (status) {
+		port_free(p);
+		return NULL;
+	}
+
+	/* umem fq. */
+	xsk_ring_prod__reserve(&p->umem_fq, umem_fq_size, &pos);
+
+	for (i = 0; i < umem_fq_size; i++)
+		*xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
+			bcache_cons(p->bc);
+
+	xsk_ring_prod__submit(&p->umem_fq, umem_fq_size);
+	p->umem_fq_initialized = 1;
+
+	return p;
+}
+
+static inline u32
+port_rx_burst(struct port *p, struct burst_rx *b)
+{
+	u32 n_pkts, pos, i;
+
+	/* Free buffers for FQ replenish. */
+	n_pkts = ARRAY_SIZE(b->addr);
+
+	n_pkts = bcache_cons_check(p->bc, n_pkts);
+	if (!n_pkts)
+		return 0;
+
+	/* RXQ. */
+	n_pkts = xsk_ring_cons__peek(&p->rxq, n_pkts, &pos);
+	if (!n_pkts) {
+		if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
+			struct pollfd pollfd = {
+				.fd = xsk_socket__fd(p->xsk),
+				.events = POLLIN,
+			};
+
+			poll(&pollfd, 1, 0);
+		}
+		return 0;
+	}
+
+	for (i = 0; i < n_pkts; i++) {
+		b->addr[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->addr;
+		b->len[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->len;
+	}
+
+	xsk_ring_cons__release(&p->rxq, n_pkts);
+	p->n_pkts_rx += n_pkts;
+
+	/* UMEM FQ. */
+	for ( ; ; ) {
+		int status;
+
+		status = xsk_ring_prod__reserve(&p->umem_fq, n_pkts, &pos);
+		if (status == n_pkts)
+			break;
+
+		if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
+			struct pollfd pollfd = {
+				.fd = xsk_socket__fd(p->xsk),
+				.events = POLLIN,
+			};
+
+			poll(&pollfd, 1, 0);
+		}
+	}
+
+	for (i = 0; i < n_pkts; i++)
+		*xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
+			bcache_cons(p->bc);
+
+	xsk_ring_prod__submit(&p->umem_fq, n_pkts);
+
+	return n_pkts;
+}
+
+static inline void
+port_tx_burst(struct port *p, struct burst_tx *b)
+{
+	u32 n_pkts, pos, i;
+	int status;
+
+	/* UMEM CQ. */
+	n_pkts = p->params.bp->umem_cfg.comp_size;
+
+	n_pkts = xsk_ring_cons__peek(&p->umem_cq, n_pkts, &pos);
+
+	for (i = 0; i < n_pkts; i++) {
+		u64 addr = *xsk_ring_cons__comp_addr(&p->umem_cq, pos + i);
+
+		bcache_prod(p->bc, addr);
+	}
+
+	xsk_ring_cons__release(&p->umem_cq, n_pkts);
+
+	/* TXQ. */
+	n_pkts = b->n_pkts;
+
+	for ( ; ; ) {
+		status = xsk_ring_prod__reserve(&p->txq, n_pkts, &pos);
+		if (status == n_pkts)
+			break;
+
+		if (xsk_ring_prod__needs_wakeup(&p->txq))
+			sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT,
+			       NULL, 0);
+	}
+
+	for (i = 0; i < n_pkts; i++) {
+		xsk_ring_prod__tx_desc(&p->txq, pos + i)->addr = b->addr[i];
+		xsk_ring_prod__tx_desc(&p->txq, pos + i)->len = b->len[i];
+	}
+
+	xsk_ring_prod__submit(&p->txq, n_pkts);
+	if (xsk_ring_prod__needs_wakeup(&p->txq))
+		sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+	p->n_pkts_tx += n_pkts;
+}
+
+/*
+ * Thread
+ *
+ * Packet forwarding threads.
+ */
+#ifndef MAX_PORTS_PER_THREAD
+#define MAX_PORTS_PER_THREAD 16
+#endif
+
+struct thread_data {
+	struct port *ports_rx[MAX_PORTS_PER_THREAD];
+	struct port *ports_tx[MAX_PORTS_PER_THREAD];
+	u32 n_ports_rx;
+	struct burst_rx burst_rx;
+	struct burst_tx burst_tx[MAX_PORTS_PER_THREAD];
+	u32 cpu_core_id;
+	int quit;
+};
+
+static void swap_mac_addresses(void *data)
+{
+	struct ether_header *eth = (struct ether_header *)data;
+	struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
+	struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
+	struct ether_addr tmp;
+
+	tmp = *src_addr;
+	*src_addr = *dst_addr;
+	*dst_addr = tmp;
+}
+
+static void *
+thread_func(void *arg)
+{
+	struct thread_data *t = arg;
+	cpu_set_t cpu_cores;
+	u32 i;
+
+	CPU_ZERO(&cpu_cores);
+	CPU_SET(t->cpu_core_id, &cpu_cores);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores);
+
+	for (i = 0; !t->quit; i = (i + 1) & (t->n_ports_rx - 1)) {
+		struct port *port_rx = t->ports_rx[i];
+		struct port *port_tx = t->ports_tx[i];
+		struct burst_rx *brx = &t->burst_rx;
+		struct burst_tx *btx = &t->burst_tx[i];
+		u32 n_pkts, j;
+
+		/* RX. */
+		n_pkts = port_rx_burst(port_rx, brx);
+		if (!n_pkts)
+			continue;
+
+		/* Process & TX. */
+		for (j = 0; j < n_pkts; j++) {
+			u64 addr = xsk_umem__add_offset_to_addr(brx->addr[j]);
+			u8 *pkt = xsk_umem__get_data(port_rx->params.bp->addr,
+						     addr);
+
+			swap_mac_addresses(pkt);
+
+			btx->addr[btx->n_pkts] = brx->addr[j];
+			btx->len[btx->n_pkts] = brx->len[j];
+			btx->n_pkts++;
+
+			if (btx->n_pkts == MAX_BURST_TX) {
+				port_tx_burst(port_tx, btx);
+				btx->n_pkts = 0;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Process
+ */
+static const struct bpool_params bpool_params_default = {
+	.n_buffers = 64 * 1024,
+	.buffer_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+	.mmap_flags = 0,
+
+	.n_users_max = 16,
+	.n_buffers_per_slab = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
+};
+
+static const struct xsk_umem_config umem_cfg_default = {
+	.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
+	.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+	.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+	.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+	.flags = 0,
+};
+
+static const struct port_params port_params_default = {
+	.xsk_cfg = {
+		.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.libbpf_flags = 0,
+		.xdp_flags = XDP_FLAGS_DRV_MODE,
+		.bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY,
+	},
+
+	.bp = NULL,
+	.iface = NULL,
+	.iface_queue = 0,
+};
+
+#ifndef MAX_PORTS
+#define MAX_PORTS 64
+#endif
+
+#ifndef MAX_THREADS
+#define MAX_THREADS 64
+#endif
+
+static struct bpool_params bpool_params;
+static struct xsk_umem_config umem_cfg;
+static struct bpool *bp;
+
+static struct port_params port_params[MAX_PORTS];
+static struct port *ports[MAX_PORTS];
+static u64 n_pkts_rx[MAX_PORTS];
+static u64 n_pkts_tx[MAX_PORTS];
+static int n_ports;
+
+static pthread_t threads[MAX_THREADS];
+static struct thread_data thread_data[MAX_THREADS];
+static int n_threads;
+
+static void
+print_usage(char *prog_name)
+{
+	const char *usage =
+		"Usage:\n"
+		"\t%s [ -b SIZE ] -c CORE -i INTERFACE [ -q QUEUE ]\n"
+		"\n"
+		"-c CORE        CPU core to run a packet forwarding thread\n"
+		"               on. May be invoked multiple times.\n"
+		"\n"
+		"-b SIZE        Number of buffers in the buffer pool shared\n"
+		"               by all the forwarding threads. Default: %u.\n"
+		"\n"
+		"-i INTERFACE   Network interface. Each (INTERFACE, QUEUE)\n"
+		"               pair specifies one forwarding port. May be\n"
+		"               invoked multiple times.\n"
+		"\n"
+		"-q QUEUE       Network interface queue for RX and TX. Each\n"
+		"               (INTERFACE, QUEUE) pair specified one\n"
+		"               forwarding port. Default: %u. May be invoked\n"
+		"               multiple times.\n"
+		"\n";
+	printf(usage,
+	       prog_name,
+	       bpool_params_default.n_buffers,
+	       port_params_default.iface_queue);
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+	struct option lgopts[] = {
+		{ NULL,  0, 0, 0 }
+	};
+	int opt, option_index;
+
+	/* Parse the input arguments. */
+	for ( ; ;) {
+		opt = getopt_long(argc, argv, "c:i:q:", lgopts, &option_index);
+		if (opt == EOF)
+			break;
+
+		switch (opt) {
+		case 'b':
+			bpool_params.n_buffers = atoi(optarg);
+			break;
+
+		case 'c':
+			if (n_threads == MAX_THREADS) {
+				printf("Max number of threads (%d) reached.\n",
+				       MAX_THREADS);
+				return -1;
+			}
+
+			thread_data[n_threads].cpu_core_id = atoi(optarg);
+			n_threads++;
+			break;
+
+		case 'i':
+			if (n_ports == MAX_PORTS) {
+				printf("Max number of ports (%d) reached.\n",
+				       MAX_PORTS);
+				return -1;
+			}
+
+			port_params[n_ports].iface = optarg;
+			port_params[n_ports].iface_queue = 0;
+			n_ports++;
+			break;
+
+		case 'q':
+			if (n_ports == 0) {
+				printf("No port specified for queue.\n");
+				return -1;
+			}
+			port_params[n_ports - 1].iface_queue = atoi(optarg);
+			break;
+
+		default:
+			printf("Illegal argument.\n");
+			return -1;
+		}
+	}
+
+	optind = 1; /* reset getopt lib */
+
+	/* Check the input arguments. */
+	if (!n_ports) {
+		printf("No ports specified.\n");
+		return -1;
+	}
+
+	if (!n_threads) {
+		printf("No threads specified.\n");
+		return -1;
+	}
+
+	if (n_ports % n_threads) {
+		printf("Ports cannot be evenly distributed to threads.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+print_port(u32 port_id)
+{
+	struct port *port = ports[port_id];
+
+	printf("Port %u: interface = %s, queue = %u\n",
+	       port_id, port->params.iface, port->params.iface_queue);
+}
+
+static void
+print_thread(u32 thread_id)
+{
+	struct thread_data *t = &thread_data[thread_id];
+	u32 i;
+
+	printf("Thread %u (CPU core %u): ",
+	       thread_id, t->cpu_core_id);
+
+	for (i = 0; i < t->n_ports_rx; i++) {
+		struct port *port_rx = t->ports_rx[i];
+		struct port *port_tx = t->ports_tx[i];
+
+		printf("(%s, %u) -> (%s, %u), ",
+		       port_rx->params.iface,
+		       port_rx->params.iface_queue,
+		       port_tx->params.iface,
+		       port_tx->params.iface_queue);
+	}
+
+	printf("\n");
+}
+
+static void
+print_port_stats_separator(void)
+{
+	printf("+-%4s-+-%12s-+-%13s-+-%12s-+-%13s-+\n",
+	       "----",
+	       "------------",
+	       "---------