From 62b8cea62e8bad0511260faab8e8de04c76a69af Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 7 Feb 2019 11:29:24 -0800
Subject: tools/bpf: add missing strings.h include

Few files in libbpf are using bzero() function (defined in strings.h header), but
don't include corresponding header. When libbpf is added as a dependency to pahole,
this undeterministically causes warnings on some machines:

bpf.c:225:2: warning: implicit declaration of function 'bzero' [-Wimplicit-function-declaration]
  bzero(&attr, sizeof(attr));
    ^~~~~

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c    | 1 +
 tools/lib/bpf/btf.c    | 1 +
 tools/lib/bpf/libbpf.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 3defad77dc7a..92fd27fe0599 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -22,6 +22,7 @@
  */
 
 #include <stdlib.h>
+#include <strings.h>
 #include <memory.h>
 #include <unistd.h>
 #include <asm/unistd.h>
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ab6528c935a1..4324eb47d214 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -4,6 +4,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <strings.h>
 #include <unistd.h>
 #include <errno.h>
 #include <linux/err.h>
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 47969aa0faf8..8d64ada5f728 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -18,6 +18,7 @@
 #include <libgen.h>
 #include <inttypes.h>
 #include <string.h>
+#include <strings.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <errno.h>
-- 
cgit v1.2.3


From a4021a3579c52d5a5131820aeb94f531a7b082a7 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 7 Feb 2019 09:34:51 -0800
Subject: tools/bpf: add log_level to bpf_load_program_attr

The kernel verifier has three levels of logs:
    0: no logs
    1: logs mostly useful
  > 1: verbose

Current libbpf API functions bpf_load_program_xattr() and
bpf_load_program() cannot specify log_level.
The bcc, however, provides an interface for user to
specify log_level 2 for verbose output.

This patch added log_level into structure
bpf_load_program_attr, so users, including bcc, can use
bpf_load_program_xattr() to change log_level. The
supported log_level is 0, 1, and 2.

The bpf selftest test_sock.c is modified to enable log_level = 2.
If the "verbose" in test_sock.c is changed to true,
the test will output logs like below:
  $ ./test_sock
  func#0 @0
  0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
  0: (bf) r6 = r1
  1: R1=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
  1: (61) r7 = *(u32 *)(r6 +28)
  invalid bpf_context access off=28 size=4

  Test case: bind4 load with invalid access: src_ip6 .. [PASS]
  ...
  Test case: bind6 allow all .. [PASS]
  Summary: 16 PASSED, 0 FAILED

Some test_sock tests are negative tests and verbose verifier
log will be printed out as shown in the above.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c                     | 22 +++++++++++++++++-----
 tools/lib/bpf/bpf.h                     |  1 +
 tools/testing/selftests/bpf/test_sock.c |  9 ++++++++-
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 92fd27fe0599..a5261f39e2bd 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -215,10 +215,15 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 {
 	void *finfo = NULL, *linfo = NULL;
 	union bpf_attr attr;
+	__u32 log_level;
 	__u32 name_len;
 	int fd;
 
-	if (!load_attr)
+	if (!load_attr || !log_buf != !log_buf_sz)
+		return -EINVAL;
+
+	log_level = load_attr->log_level;
+	if (log_level > 2 || (log_level && !log_buf))
 		return -EINVAL;
 
 	name_len = load_attr->name ? strlen(load_attr->name) : 0;
@@ -229,9 +234,16 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	attr.insn_cnt = (__u32)load_attr->insns_cnt;
 	attr.insns = ptr_to_u64(load_attr->insns);
 	attr.license = ptr_to_u64(load_attr->license);
-	attr.log_buf = ptr_to_u64(NULL);
-	attr.log_size = 0;
-	attr.log_level = 0;
+
+	attr.log_level = log_level;
+	if (log_level) {
+		attr.log_buf = ptr_to_u64(log_buf);
+		attr.log_size = log_buf_sz;
+	} else {
+		attr.log_buf = ptr_to_u64(NULL);
+		attr.log_size = 0;
+	}
+
 	attr.kern_version = load_attr->kern_version;
 	attr.prog_ifindex = load_attr->prog_ifindex;
 	attr.prog_btf_fd = load_attr->prog_btf_fd;
@@ -287,7 +299,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 			goto done;
 	}
 
-	if (!log_buf || !log_buf_sz)
+	if (log_level || !log_buf)
 		goto done;
 
 	/* Try again with log */
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ed09eed2dc3b..6ffdd79bea89 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -85,6 +85,7 @@ struct bpf_load_program_attr {
 	__u32 line_info_rec_size;
 	const void *line_info;
 	__u32 line_info_cnt;
+	__u32 log_level;
 };
 
 /* Flags to direct loading requirements */
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 561ffb6d6433..fb679ac3d4b0 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -20,6 +20,7 @@
 #define MAX_INSNS	512
 
 char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static bool verbose = false;
 
 struct sock_test {
 	const char *descr;
@@ -325,6 +326,7 @@ static int load_sock_prog(const struct bpf_insn *prog,
 			  enum bpf_attach_type attach_type)
 {
 	struct bpf_load_program_attr attr;
+	int ret;
 
 	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
 	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
@@ -332,8 +334,13 @@ static int load_sock_prog(const struct bpf_insn *prog,
 	attr.insns = prog;
 	attr.insns_cnt = probe_prog_length(attr.insns);
 	attr.license = "GPL";
+	attr.log_level = 2;
 
-	return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+	ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+	if (verbose && ret < 0)
+		fprintf(stderr, "%s\n", bpf_log_buf);
+
+	return ret;
 }
 
 static int attach_sock_prog(int cgfd, int progfd,
-- 
cgit v1.2.3


From d29d87f7e61226c339d1212beff6b82f653acd67 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 8 Feb 2019 11:19:36 -0800
Subject: btf: separate btf creation and loading

This change splits out previous btf__new functionality of constructing
struct btf and loading it into kernel into two:
- btf__new() just creates and initializes struct btf
- btf__load() attempts to load existing struct btf into kernel

btf__free will still close BTF fd, if it was ever loaded successfully
into kernel.

This change allows users of libbpf to manipulate BTF using its API,
without the need to unnecessarily load it into kernel.

One of the intended use cases is pahole, which will do DWARF to BTF
conversion and then use libbpf to do type deduplication, while then
handling ELF sections overwriting and other concerns on its own.

Fixes: 2d3feca8c44f ("bpf: btf: print map dump and lookup with btf info")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c      | 54 +++++++++++++++++++++++++++---------------------
 tools/lib/bpf/btf.h      |  1 +
 tools/lib/bpf/libbpf.c   |  2 +-
 tools/lib/bpf/libbpf.map |  1 +
 4 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 4324eb47d214..46db0a3b5cb7 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -367,8 +367,6 @@ void btf__free(struct btf *btf)
 
 struct btf *btf__new(__u8 *data, __u32 size)
 {
-	__u32 log_buf_size = 0;
-	char *log_buf = NULL;
 	struct btf *btf;
 	int err;
 
@@ -378,15 +376,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
 
 	btf->fd = -1;
 
-	log_buf = malloc(BPF_LOG_BUF_SIZE);
-	if (!log_buf) {
-		err = -ENOMEM;
-		goto done;
-	}
-
-	*log_buf = 0;
-	log_buf_size = BPF_LOG_BUF_SIZE;
-
 	btf->data = malloc(size);
 	if (!btf->data) {
 		err = -ENOMEM;
@@ -396,17 +385,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
 	memcpy(btf->data, data, size);
 	btf->data_size = size;
 
-	btf->fd = bpf_load_btf(btf->data, btf->data_size,
-			       log_buf, log_buf_size, false);
-
-	if (btf->fd == -1) {
-		err = -errno;
-		pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
-		if (log_buf && *log_buf)
-			pr_warning("%s\n", log_buf);
-		goto done;
-	}
-
 	err = btf_parse_hdr(btf);
 	if (err)
 		goto done;
@@ -418,8 +396,6 @@ struct btf *btf__new(__u8 *data, __u32 size)
 	err = btf_parse_type_sec(btf);
 
 done:
-	free(log_buf);
-
 	if (err) {
 		btf__free(btf);
 		return ERR_PTR(err);
@@ -428,6 +404,36 @@ done:
 	return btf;
 }
 
+int btf__load(struct btf *btf)
+{
+	__u32 log_buf_size = BPF_LOG_BUF_SIZE;
+	char *log_buf = NULL;
+	int err = 0;
+
+	if (btf->fd >= 0)
+		return -EEXIST;
+
+	log_buf = malloc(log_buf_size);
+	if (!log_buf)
+		return -ENOMEM;
+
+	*log_buf = 0;
+
+	btf->fd = bpf_load_btf(btf->data, btf->data_size,
+			       log_buf, log_buf_size, false);
+	if (btf->fd < 0) {
+		err = -errno;
+		pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+		if (*log_buf)
+			pr_warning("%s\n", log_buf);
+		goto done;
+	}
+
+done:
+	free(log_buf);
+	return err;
+}
+
 int btf__fd(const struct btf *btf)
 {
 	return btf->fd;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b393da90cc85..f55b7bc98d9e 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -57,6 +57,7 @@ struct btf_ext_header {
 
 LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
 				   const char *type_name);
 LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8d64ada5f728..e3c39edfb9d3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -836,7 +836,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 			obj->efile.maps_shndx = idx;
 		else if (strcmp(name, BTF_ELF_SEC) == 0) {
 			obj->btf = btf__new(data->d_buf, data->d_size);
-			if (IS_ERR(obj->btf)) {
+			if (IS_ERR(obj->btf) || btf__load(obj->btf)) {
 				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
 					   BTF_ELF_SEC, PTR_ERR(obj->btf));
 				obj->btf = NULL;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 89c1149e32ee..f5372df143f4 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -137,6 +137,7 @@ LIBBPF_0.0.2 {
 		btf__get_map_kv_tids;
 		btf__get_nr_types;
 		btf__get_strings;
+		btf__load;
 		btf_ext__free;
 		btf_ext__func_info_rec_size;
 		btf_ext__line_info_rec_size;
-- 
cgit v1.2.3


From 02c874460f3d9213096323ac8a937fb486a4e70d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 8 Feb 2019 11:19:37 -0800
Subject: btf: expose API to work with raw btf data

This patch exposes new API btf__get_raw_data() that allows to get a copy
of raw BTF data out of struct btf. This is useful for external programs
that need to manipulate raw data, e.g., pahole using btf__dedup() to
deduplicate BTF type info and then writing it back to file.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c      | 6 ++++++
 tools/lib/bpf/btf.h      | 1 +
 tools/lib/bpf/libbpf.map | 1 +
 3 files changed, 8 insertions(+)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 46db0a3b5cb7..4fba0aa989df 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -439,6 +439,12 @@ int btf__fd(const struct btf *btf)
 	return btf->fd;
 }
 
+const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
+{
+	*size = btf->data_size;
+	return btf->data;
+}
+
 void btf__get_strings(const struct btf *btf, const char **strings,
 		      __u32 *str_len)
 {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index f55b7bc98d9e..10fe412461fe 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -66,6 +66,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
 LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings,
 				 __u32 *str_len);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f5372df143f4..9e10467f8cbb 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -136,6 +136,7 @@ LIBBPF_0.0.2 {
 		btf__dedup;
 		btf__get_map_kv_tids;
 		btf__get_nr_types;
+		btf__get_raw_data;
 		btf__get_strings;
 		btf__load;
 		btf_ext__free;
-- 
cgit v1.2.3


From ae4ab4b4117d23da49f04a7e1fe82a41e6074eeb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 8 Feb 2019 11:19:38 -0800
Subject: btf: expose API to work with raw btf_ext data

This patch changes struct btf_ext to retain original data in sequential
block of memory, which makes it possible to expose
btf_ext__get_raw_data() interface similar to btf__get_raw_data(), allowing
users of libbpf to get access to raw representation of .BTF.ext section.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c      | 102 +++++++++++++++++++++++++----------------------
 tools/lib/bpf/btf.h      |   2 +
 tools/lib/bpf/libbpf.map |   1 +
 3 files changed, 57 insertions(+), 48 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 4fba0aa989df..f6b724ed1bdd 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -42,9 +42,8 @@ struct btf {
 
 struct btf_ext_info {
 	/*
-	 * info points to a deep copy of the individual info section
-	 * (e.g. func_info and line_info) from the .BTF.ext.
-	 * It does not include the __u32 rec_size.
+	 * info points to the individual info section (e.g. func_info and
+	 * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
 	 */
 	void *info;
 	__u32 rec_size;
@@ -52,8 +51,13 @@ struct btf_ext_info {
 };
 
 struct btf_ext {
+	union {
+		struct btf_ext_header *hdr;
+		void *data;
+	};
 	struct btf_ext_info func_info;
 	struct btf_ext_info line_info;
+	__u32 data_size;
 };
 
 struct btf_ext_info_sec {
@@ -596,7 +600,7 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 	return 0;
 }
 
-struct btf_ext_sec_copy_param {
+struct btf_ext_sec_setup_param {
 	__u32 off;
 	__u32 len;
 	__u32 min_rec_size;
@@ -604,20 +608,14 @@ struct btf_ext_sec_copy_param {
 	const char *desc;
 };
 
-static int btf_ext_copy_info(struct btf_ext *btf_ext,
-			     __u8 *data, __u32 data_size,
-			     struct btf_ext_sec_copy_param *ext_sec)
+static int btf_ext_setup_info(struct btf_ext *btf_ext,
+			      struct btf_ext_sec_setup_param *ext_sec)
 {
-	const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
 	const struct btf_ext_info_sec *sinfo;
 	struct btf_ext_info *ext_info;
 	__u32 info_left, record_size;
 	/* The start of the info sec (including the __u32 record_size). */
-	const void *info;
-
-	/* data and data_size do not include btf_ext_header from now on */
-	data = data + hdr->hdr_len;
-	data_size -= hdr->hdr_len;
+	void *info;
 
 	if (ext_sec->off & 0x03) {
 		pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
@@ -625,16 +623,15 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
 		return -EINVAL;
 	}
 
-	if (data_size < ext_sec->off ||
-	    ext_sec->len > data_size - ext_sec->off) {
+	info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
+	info_left = ext_sec->len;
+
+	if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
 		pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
-		     ext_sec->desc, ext_sec->off, ext_sec->len);
+			 ext_sec->desc, ext_sec->off, ext_sec->len);
 		return -EINVAL;
 	}
 
-	info = data + ext_sec->off;
-	info_left = ext_sec->len;
-
 	/* At least a record size */
 	if (info_left < sizeof(__u32)) {
 		pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
@@ -646,7 +643,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
 	if (record_size < ext_sec->min_rec_size ||
 	    record_size & 0x03) {
 		pr_debug("%s section in .BTF.ext has invalid record size %u\n",
-		     ext_sec->desc, record_size);
+			 ext_sec->desc, record_size);
 		return -EINVAL;
 	}
 
@@ -692,42 +689,35 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext,
 	ext_info = ext_sec->ext_info;
 	ext_info->len = ext_sec->len - sizeof(__u32);
 	ext_info->rec_size = record_size;
-	ext_info->info = malloc(ext_info->len);
-	if (!ext_info->info)
-		return -ENOMEM;
-	memcpy(ext_info->info, info + sizeof(__u32), ext_info->len);
+	ext_info->info = info + sizeof(__u32);
 
 	return 0;
 }
 
-static int btf_ext_copy_func_info(struct btf_ext *btf_ext,
-				  __u8 *data, __u32 data_size)
+static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
 {
-	const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
-	struct btf_ext_sec_copy_param param = {
-		.off = hdr->func_info_off,
-		.len = hdr->func_info_len,
+	struct btf_ext_sec_setup_param param = {
+		.off = btf_ext->hdr->func_info_off,
+		.len = btf_ext->hdr->func_info_len,
 		.min_rec_size = sizeof(struct bpf_func_info_min),
 		.ext_info = &btf_ext->func_info,
 		.desc = "func_info"
 	};
 
-	return btf_ext_copy_info(btf_ext, data, data_size, &param);
+	return btf_ext_setup_info(btf_ext, &param);
 }
 
-static int btf_ext_copy_line_info(struct btf_ext *btf_ext,
-				  __u8 *data, __u32 data_size)
+static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
 {
-	const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
-	struct btf_ext_sec_copy_param param = {
-		.off = hdr->line_info_off,
-		.len = hdr->line_info_len,
+	struct btf_ext_sec_setup_param param = {
+		.off = btf_ext->hdr->line_info_off,
+		.len = btf_ext->hdr->line_info_len,
 		.min_rec_size = sizeof(struct bpf_line_info_min),
 		.ext_info = &btf_ext->line_info,
 		.desc = "line_info",
 	};
 
-	return btf_ext_copy_info(btf_ext, data, data_size, &param);
+	return btf_ext_setup_info(btf_ext, &param);
 }
 
 static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
@@ -767,9 +757,7 @@ void btf_ext__free(struct btf_ext *btf_ext)
 {
 	if (!btf_ext)
 		return;
-
-	free(btf_ext->func_info.info);
-	free(btf_ext->line_info.info);
+	free(btf_ext->data);
 	free(btf_ext);
 }
 
@@ -786,13 +774,23 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	if (!btf_ext)
 		return ERR_PTR(-ENOMEM);
 
-	err = btf_ext_copy_func_info(btf_ext, data, size);
-	if (err) {
-		btf_ext__free(btf_ext);
-		return ERR_PTR(err);
+	btf_ext->data_size = size;
+	btf_ext->data = malloc(size);
+	if (!btf_ext->data) {
+		err = -ENOMEM;
+		goto done;
 	}
+	memcpy(btf_ext->data, data, size);
+
+	err = btf_ext_setup_func_info(btf_ext);
+	if (err)
+		goto done;
 
-	err = btf_ext_copy_line_info(btf_ext, data, size);
+	err = btf_ext_setup_line_info(btf_ext);
+	if (err)
+		goto done;
+
+done:
 	if (err) {
 		btf_ext__free(btf_ext);
 		return ERR_PTR(err);
@@ -801,6 +799,12 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 	return btf_ext;
 }
 
+const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
+{
+	*size = btf_ext->data_size;
+	return btf_ext->data;
+}
+
 static int btf_ext_reloc_info(const struct btf *btf,
 			      const struct btf_ext_info *ext_info,
 			      const char *sec_name, __u32 insns_cnt,
@@ -849,7 +853,8 @@ static int btf_ext_reloc_info(const struct btf *btf,
 	return -ENOENT;
 }
 
-int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext,
+int btf_ext__reloc_func_info(const struct btf *btf,
+			     const struct btf_ext *btf_ext,
 			     const char *sec_name, __u32 insns_cnt,
 			     void **func_info, __u32 *cnt)
 {
@@ -857,7 +862,8 @@ int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ex
 				  insns_cnt, func_info, cnt);
 }
 
-int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext,
+int btf_ext__reloc_line_info(const struct btf *btf,
+			     const struct btf_ext *btf_ext,
 			     const char *sec_name, __u32 insns_cnt,
 			     void **line_info, __u32 *cnt)
 {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 10fe412461fe..0306b54d54eb 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -78,6 +78,8 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 
 LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
 LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext* btf_ext,
+					     __u32 *size);
 LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
 					const struct btf_ext *btf_ext,
 					const char *sec_name, __u32 insns_cnt,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 9e10467f8cbb..eb78c7c261d9 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -141,6 +141,7 @@ LIBBPF_0.0.2 {
 		btf__load;
 		btf_ext__free;
 		btf_ext__func_info_rec_size;
+		btf_ext__get_raw_data;
 		btf_ext__line_info_rec_size;
 		btf_ext__new;
 		btf_ext__reloc_func_info;
-- 
cgit v1.2.3


From 49b57e0d01db73c99f86d68480fb9b4014bb1060 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 8 Feb 2019 11:19:39 -0800
Subject: tools/bpf: remove btf__get_strings() superseded by raw data API

Now that we have btf__get_raw_data() it's trivial for tests to iterate
over all strings for testing purposes, which eliminates the need for
btf__get_strings() API.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c                    |  7 ------
 tools/lib/bpf/btf.h                    |  2 --
 tools/lib/bpf/libbpf.map               |  1 -
 tools/testing/selftests/bpf/test_btf.c | 39 ++++++++++++++++++++++------------
 4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index f6b724ed1bdd..6953fedb88ff 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -449,13 +449,6 @@ const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
 	return btf->data;
 }
 
-void btf__get_strings(const struct btf *btf, const char **strings,
-		      __u32 *str_len)
-{
-	*strings = btf->strings;
-	*str_len = btf->hdr->str_len;
-}
-
 const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
 {
 	if (offset < btf->hdr->str_len)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 0306b54d54eb..94bbc249b0f1 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -67,8 +67,6 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
-LIBBPF_API void btf__get_strings(const struct btf *btf, const char **strings,
-				 __u32 *str_len);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index eb78c7c261d9..5fc8222209f8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -137,7 +137,6 @@ LIBBPF_0.0.2 {
 		btf__get_map_kv_tids;
 		btf__get_nr_types;
 		btf__get_raw_data;
-		btf__get_strings;
 		btf__load;
 		btf_ext__free;
 		btf_ext__func_info_rec_size;
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 447acc34db94..bbcacba39590 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5882,15 +5882,17 @@ static void dump_btf_strings(const char *strs, __u32 len)
 static int do_test_dedup(unsigned int test_num)
 {
 	const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
-	int err = 0, i;
-	__u32 test_nr_types, expect_nr_types, test_str_len, expect_str_len;
-	void *raw_btf;
-	unsigned int raw_btf_size;
+	__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
+	const struct btf_header *test_hdr, *expect_hdr;
 	struct btf *test_btf = NULL, *expect_btf = NULL;
+	const void *test_btf_data, *expect_btf_data;
 	const char *ret_test_next_str, *ret_expect_next_str;
 	const char *test_strs, *expect_strs;
 	const char *test_str_cur, *test_str_end;
 	const char *expect_str_cur, *expect_str_end;
+	unsigned int raw_btf_size;
+	void *raw_btf;
+	int err = 0, i;
 
 	fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
 
@@ -5927,23 +5929,34 @@ static int do_test_dedup(unsigned int test_num)
 		goto done;
 	}
 
-	btf__get_strings(test_btf, &test_strs, &test_str_len);
-	btf__get_strings(expect_btf, &expect_strs, &expect_str_len);
-	if (CHECK(test_str_len != expect_str_len,
-		  "test_str_len:%u != expect_str_len:%u",
-		  test_str_len, expect_str_len)) {
+	test_btf_data = btf__get_raw_data(test_btf, &test_btf_size);
+	expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size);
+	if (CHECK(test_btf_size != expect_btf_size,
+		  "test_btf_size:%u != expect_btf_size:%u",
+		  test_btf_size, expect_btf_size)) {
+		err = -1;
+		goto done;
+	}
+
+	test_hdr = test_btf_data;
+	test_strs = test_btf_data + test_hdr->str_off;
+	expect_hdr = expect_btf_data;
+	expect_strs = expect_btf_data + expect_hdr->str_off;
+	if (CHECK(test_hdr->str_len != expect_hdr->str_len,
+		  "test_hdr->str_len:%u != expect_hdr->str_len:%u",
+		  test_hdr->str_len, expect_hdr->str_len)) {
 		fprintf(stderr, "\ntest strings:\n");
-		dump_btf_strings(test_strs, test_str_len);
+		dump_btf_strings(test_strs, test_hdr->str_len);
 		fprintf(stderr, "\nexpected strings:\n");
-		dump_btf_strings(expect_strs, expect_str_len);
+		dump_btf_strings(expect_strs, expect_hdr->str_len);
 		err = -1;
 		goto done;
 	}
 
 	test_str_cur = test_strs;
-	test_str_end = test_strs + test_str_len;
+	test_str_end = test_strs + test_hdr->str_len;
 	expect_str_cur = expect_strs;
-	expect_str_end = expect_strs + expect_str_len;
+	expect_str_end = expect_strs + expect_hdr->str_len;
 	while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) {
 		size_t test_len, expect_len;
 
-- 
cgit v1.2.3


From 5f4566498dee5e38e36a015a968c22ed21568f0b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 8 Feb 2019 22:25:54 -0800
Subject: bpf: Fix narrow load on a bpf_sock returned from sk_lookup()

By adding this test to test_verifier:
{
	"reference tracking: access sk->src_ip4 (narrow load)",
	.insns = {
	BPF_SK_LOOKUP,
	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
	BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_0, offsetof(struct bpf_sock, src_ip4) + 2),
	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
	BPF_EMIT_CALL(BPF_FUNC_sk_release),
	BPF_EXIT_INSN(),
	},
	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
	.result = ACCEPT,
},

The above test loads 2 bytes from sk->src_ip4 where
sk is obtained by bpf_sk_lookup_tcp().

It hits an internal verifier error from convert_ctx_accesses():
[root@arch-fb-vm1 bpf]# ./test_verifier 665 665
Failed to load prog 'Invalid argument'!
0: (b7) r2 = 0
1: (63) *(u32 *)(r10 -8) = r2
2: (7b) *(u64 *)(r10 -16) = r2
3: (7b) *(u64 *)(r10 -24) = r2
4: (7b) *(u64 *)(r10 -32) = r2
5: (7b) *(u64 *)(r10 -40) = r2
6: (7b) *(u64 *)(r10 -48) = r2
7: (bf) r2 = r10
8: (07) r2 += -48
9: (b7) r3 = 36
10: (b7) r4 = 0
11: (b7) r5 = 0
12: (85) call bpf_sk_lookup_tcp#84
13: (bf) r6 = r0
14: (15) if r0 == 0x0 goto pc+3
 R0=sock(id=1,off=0,imm=0) R6=sock(id=1,off=0,imm=0) R10=fp0,call_-1 fp-8=????0000 fp-16=0000mmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm fp-48=mmmmmmmm refs=1
15: (69) r2 = *(u16 *)(r0 +26)
16: (bf) r1 = r6
17: (85) call bpf_sk_release#86
18: (95) exit

from 14 to 18: safe
processed 20 insns (limit 131072), stack depth 48
bpf verifier is misconfigured
Summary: 0 PASSED, 0 SKIPPED, 1 FAILED

The bpf_sock_is_valid_access() is expecting src_ip4 can be narrowly
loaded (meaning load any 1 or 2 bytes of the src_ip4) by
marking info->ctx_field_size.  However, this marked
ctx_field_size is not used.  This patch fixes it.

Due to the recent refactoring in test_verifier,
this new test will be added to the bpf-next branch
(together with the bpf_tcp_sock patchset)
to avoid merge conflict.

Fixes: c64b7983288e ("bpf: Add PTR_TO_SOCKET verifier type")
Cc: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b63bc77af2d1..516dfc6d78de 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1640,12 +1640,13 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
 	return 0;
 }
 
-static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
-			     int size, enum bpf_access_type t)
+static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
+			     u32 regno, int off, int size,
+			     enum bpf_access_type t)
 {
 	struct bpf_reg_state *regs = cur_regs(env);
 	struct bpf_reg_state *reg = &regs[regno];
-	struct bpf_insn_access_aux info;
+	struct bpf_insn_access_aux info = {};
 
 	if (reg->smin_value < 0) {
 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
@@ -1659,6 +1660,8 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
 		return -EACCES;
 	}
 
+	env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+
 	return 0;
 }
 
@@ -2055,7 +2058,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			verbose(env, "cannot write into socket\n");
 			return -EACCES;
 		}
-		err = check_sock_access(env, regno, off, size, t);
+		err = check_sock_access(env, insn_idx, regno, off, size, t);
 		if (!err && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
 	} else {
-- 
cgit v1.2.3


From 46f8bc92758c6259bcf945e9216098661c1587cd Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sat, 9 Feb 2019 23:22:20 -0800
Subject: bpf: Add a bpf_sock pointer to __sk_buff and a bpf_sk_fullsock helper

In kernel, it is common to check "skb->sk && sk_fullsock(skb->sk)"
before accessing the fields in sock.  For example, in __netdev_pick_tx:

static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
			    struct net_device *sb_dev)
{
	/* ... */

	struct sock *sk = skb->sk;

		if (queue_index != new_index && sk &&
		    sk_fullsock(sk) &&
		    rcu_access_pointer(sk->sk_dst_cache))
			sk_tx_queue_set(sk, new_index);

	/* ... */

	return queue_index;
}

This patch adds a "struct bpf_sock *sk" pointer to the "struct __sk_buff"
where a few of the convert_ctx_access() in filter.c has already been
accessing the skb->sk sock_common's fields,
e.g. sock_ops_convert_ctx_access().

"__sk_buff->sk" is a PTR_TO_SOCK_COMMON_OR_NULL in the verifier.
Some of the fileds in "bpf_sock" will not be directly
accessible through the "__sk_buff->sk" pointer.  It is limited
by the new "bpf_sock_common_is_valid_access()".
e.g. The existing "type", "protocol", "mark" and "priority" in bpf_sock
     are not allowed.

The newly added "struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)"
can be used to get a sk with all accessible fields in "bpf_sock".
This helper is added to both cg_skb and sched_(cls|act).

int cg_skb_foo(struct __sk_buff *skb) {
	struct bpf_sock *sk;

	sk = skb->sk;
	if (!sk)
		return 1;

	sk = bpf_sk_fullsock(sk);
	if (!sk)
		return 1;

	if (sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP)
		return 1;

	/* some_traffic_shaping(); */

	return 1;
}

(1) The sk is read only

(2) There is no new "struct bpf_sock_common" introduced.

(3) Future kernel sock's members could be added to bpf_sock only
    instead of repeatedly adding at multiple places like currently
    in bpf_sock_ops_md, bpf_sock_addr_md, sk_reuseport_md...etc.

(4) After "sk = skb->sk", the reg holding sk is in type
    PTR_TO_SOCK_COMMON_OR_NULL.

(5) After bpf_sk_fullsock(), the return type will be in type
    PTR_TO_SOCKET_OR_NULL which is the same as the return type of
    bpf_sk_lookup_xxx().

    However, bpf_sk_fullsock() does not take refcnt.  The
    acquire_reference_state() is only depending on the return type now.
    To avoid it, a new is_acquire_function() is checked before calling
    acquire_reference_state().

(6) The WARN_ON in "release_reference_state()" is no longer an
    internal verifier bug.

    When reg->id is not found in state->refs[], it means the
    bpf_prog does something wrong like
    "bpf_sk_release(bpf_sk_fullsock(skb->sk))" where reference has
    never been acquired by calling "bpf_sk_fullsock(skb->sk)".

    A -EINVAL and a verbose are done instead of WARN_ON.  A test is
    added to the test_verifier in a later patch.

    Since the WARN_ON in "release_reference_state()" is no longer
    needed, "__release_reference_state()" is folded into
    "release_reference_state()" also.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h      |  12 +++++
 include/uapi/linux/bpf.h |  12 ++++-
 kernel/bpf/verifier.c    | 132 +++++++++++++++++++++++++++++++++--------------
 net/core/filter.c        |  42 +++++++++++++++
 4 files changed, 157 insertions(+), 41 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bd169a7bcc93..a60463b45b54 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -194,6 +194,7 @@ enum bpf_arg_type {
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
 	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
+	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
 };
 
 /* type of values returned from helper functions */
@@ -256,6 +257,8 @@ enum bpf_reg_type {
 	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
 	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
 	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
+	PTR_TO_SOCK_COMMON,	 /* reg points to sock_common */
+	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -920,6 +923,9 @@ void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 #if defined(CONFIG_NET)
+bool bpf_sock_common_is_valid_access(int off, int size,
+				     enum bpf_access_type type,
+				     struct bpf_insn_access_aux *info);
 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 			      struct bpf_insn_access_aux *info);
 u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
@@ -928,6 +934,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 				struct bpf_prog *prog,
 				u32 *target_size);
 #else
+static inline bool bpf_sock_common_is_valid_access(int off, int size,
+						   enum bpf_access_type type,
+						   struct bpf_insn_access_aux *info)
+{
+	return false;
+}
 static inline bool bpf_sock_is_valid_access(int off, int size,
 					    enum bpf_access_type type,
 					    struct bpf_insn_access_aux *info)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1777fa0c61e4..5d79cba74ddc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2329,6 +2329,14 @@ union bpf_attr {
  *		"**y**".
  *	Return
  *		0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ *	Description
+ *		This helper gets a **struct bpf_sock** pointer such
+ *		that all the fields in bpf_sock can be accessed.
+ *	Return
+ *		A **struct bpf_sock** pointer on success, or NULL in
+ *		case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2425,7 +2433,8 @@ union bpf_attr {
 	FN(msg_pop_data),		\
 	FN(rc_pointer_rel),		\
 	FN(spin_lock),			\
-	FN(spin_unlock),
+	FN(spin_unlock),		\
+	FN(sk_fullsock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2545,6 +2554,7 @@ struct __sk_buff {
 	__u64 tstamp;
 	__u32 wire_len;
 	__u32 gso_segs;
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 struct bpf_tunnel_key {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 516dfc6d78de..b755d55a3791 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -331,10 +331,17 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
 	       type == PTR_TO_PACKET_META;
 }
 
+static bool type_is_sk_pointer(enum bpf_reg_type type)
+{
+	return type == PTR_TO_SOCKET ||
+		type == PTR_TO_SOCK_COMMON;
+}
+
 static bool reg_type_may_be_null(enum bpf_reg_type type)
 {
 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
-	       type == PTR_TO_SOCKET_OR_NULL;
+	       type == PTR_TO_SOCKET_OR_NULL ||
+	       type == PTR_TO_SOCK_COMMON_OR_NULL;
 }
 
 static bool type_is_refcounted(enum bpf_reg_type type)
@@ -377,6 +384,12 @@ static bool is_release_function(enum bpf_func_id func_id)
 	return func_id == BPF_FUNC_sk_release;
 }
 
+static bool is_acquire_function(enum bpf_func_id func_id)
+{
+	return func_id == BPF_FUNC_sk_lookup_tcp ||
+		func_id == BPF_FUNC_sk_lookup_udp;
+}
+
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
 	[NOT_INIT]		= "?",
@@ -392,6 +405,8 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_FLOW_KEYS]	= "flow_keys",
 	[PTR_TO_SOCKET]		= "sock",
 	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
+	[PTR_TO_SOCK_COMMON]	= "sock_common",
+	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 };
 
 static char slot_type_char[] = {
@@ -618,13 +633,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 }
 
 /* release function corresponding to acquire_reference_state(). Idempotent. */
-static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
+static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 {
 	int i, last_idx;
 
-	if (!ptr_id)
-		return -EFAULT;
-
 	last_idx = state->acquired_refs - 1;
 	for (i = 0; i < state->acquired_refs; i++) {
 		if (state->refs[i].id == ptr_id) {
@@ -636,21 +648,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
 			return 0;
 		}
 	}
-	return -EFAULT;
-}
-
-/* variation on the above for cases where we expect that there must be an
- * outstanding reference for the specified ptr_id.
- */
-static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
-{
-	struct bpf_func_state *state = cur_func(env);
-	int err;
-
-	err = __release_reference_state(state, ptr_id);
-	if (WARN_ON_ONCE(err != 0))
-		verbose(env, "verifier internal error: can't release reference\n");
-	return err;
+	return -EINVAL;
 }
 
 static int transfer_reference_state(struct bpf_func_state *dst,
@@ -1209,6 +1207,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case CONST_PTR_TO_MAP:
 	case PTR_TO_SOCKET:
 	case PTR_TO_SOCKET_OR_NULL:
+	case PTR_TO_SOCK_COMMON:
+	case PTR_TO_SOCK_COMMON_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -1647,6 +1647,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 	struct bpf_reg_state *regs = cur_regs(env);
 	struct bpf_reg_state *reg = &regs[regno];
 	struct bpf_insn_access_aux info = {};
+	bool valid;
 
 	if (reg->smin_value < 0) {
 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
@@ -1654,15 +1655,28 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 		return -EACCES;
 	}
 
-	if (!bpf_sock_is_valid_access(off, size, t, &info)) {
-		verbose(env, "invalid bpf_sock access off=%d size=%d\n",
-			off, size);
-		return -EACCES;
+	switch (reg->type) {
+	case PTR_TO_SOCK_COMMON:
+		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
+		break;
+	case PTR_TO_SOCKET:
+		valid = bpf_sock_is_valid_access(off, size, t, &info);
+		break;
+	default:
+		valid = false;
 	}
 
-	env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
 
-	return 0;
+	if (valid) {
+		env->insn_aux_data[insn_idx].ctx_field_size =
+			info.ctx_field_size;
+		return 0;
+	}
+
+	verbose(env, "R%d invalid %s access off=%d size=%d\n",
+		regno, reg_type_str[reg->type], off, size);
+
+	return -EACCES;
 }
 
 static bool __is_pointer_value(bool allow_ptr_leaks,
@@ -1688,8 +1702,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 {
 	const struct bpf_reg_state *reg = reg_state(env, regno);
 
-	return reg->type == PTR_TO_CTX ||
-	       reg->type == PTR_TO_SOCKET;
+	return reg->type == PTR_TO_CTX;
+}
+
+static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
+{
+	const struct bpf_reg_state *reg = reg_state(env, regno);
+
+	return type_is_sk_pointer(reg->type);
 }
 
 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -1800,6 +1820,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 	case PTR_TO_SOCKET:
 		pointer_desc = "sock ";
 		break;
+	case PTR_TO_SOCK_COMMON:
+		pointer_desc = "sock_common ";
+		break;
 	default:
 		break;
 	}
@@ -2003,11 +2026,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			 * PTR_TO_PACKET[_META,_END]. In the latter
 			 * case, we know the offset is zero.
 			 */
-			if (reg_type == SCALAR_VALUE)
+			if (reg_type == SCALAR_VALUE) {
 				mark_reg_unknown(env, regs, value_regno);
-			else
+			} else {
 				mark_reg_known_zero(env, regs,
 						    value_regno);
+				if (reg_type_may_be_null(reg_type))
+					regs[value_regno].id = ++env->id_gen;
+			}
 			regs[value_regno].type = reg_type;
 		}
 
@@ -2053,9 +2079,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		err = check_flow_keys_access(env, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
-	} else if (reg->type == PTR_TO_SOCKET) {
+	} else if (type_is_sk_pointer(reg->type)) {
 		if (t == BPF_WRITE) {
-			verbose(env, "cannot write into socket\n");
+			verbose(env, "R%d cannot write into %s\n",
+				regno, reg_type_str[reg->type]);
 			return -EACCES;
 		}
 		err = check_sock_access(env, insn_idx, regno, off, size, t);
@@ -2102,7 +2129,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 
 	if (is_ctx_reg(env, insn->dst_reg) ||
 	    is_pkt_reg(env, insn->dst_reg) ||
-	    is_flow_key_reg(env, insn->dst_reg)) {
+	    is_flow_key_reg(env, insn->dst_reg) ||
+	    is_sk_reg(env, insn->dst_reg)) {
 		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
 			insn->dst_reg,
 			reg_type_str[reg_state(env, insn->dst_reg)->type]);
@@ -2369,6 +2397,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		err = check_ctx_reg(env, reg, regno);
 		if (err < 0)
 			return err;
+	} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
+		expected_type = PTR_TO_SOCK_COMMON;
+		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
+		if (!type_is_sk_pointer(type))
+			goto err_type;
 	} else if (arg_type == ARG_PTR_TO_SOCKET) {
 		expected_type = PTR_TO_SOCKET;
 		if (type != expected_type)
@@ -2783,7 +2816,7 @@ static int release_reference(struct bpf_verifier_env *env,
 	for (i = 0; i <= vstate->curframe; i++)
 		release_reg_references(env, vstate->frame[i], meta->ptr_id);
 
-	return release_reference_state(env, meta->ptr_id);
+	return release_reference_state(cur_func(env), meta->ptr_id);
 }
 
 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3049,8 +3082,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		}
 	} else if (is_release_function(func_id)) {
 		err = release_reference(env, &meta);
-		if (err)
+		if (err) {
+			verbose(env, "func %s#%d reference has not been acquired before\n",
+				func_id_name(func_id), func_id);
 			return err;
+		}
 	}
 
 	regs = cur_regs(env);
@@ -3099,12 +3135,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 			regs[BPF_REG_0].id = ++env->id_gen;
 		}
 	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
-		int id = acquire_reference_state(env, insn_idx);
-		if (id < 0)
-			return id;
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
-		regs[BPF_REG_0].id = id;
+		if (is_acquire_function(func_id)) {
+			int id = acquire_reference_state(env, insn_idx);
+
+			if (id < 0)
+				return id;
+			/* For release_reference() */
+			regs[BPF_REG_0].id = id;
+		} else {
+			/* For mark_ptr_or_null_reg() */
+			regs[BPF_REG_0].id = ++env->id_gen;
+		}
 	} else {
 		verbose(env, "unknown return type %d of func %s#%d\n",
 			fn->ret_type, func_id_name(func_id), func_id);
@@ -3364,6 +3407,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	case PTR_TO_PACKET_END:
 	case PTR_TO_SOCKET:
 	case PTR_TO_SOCKET_OR_NULL:
+	case PTR_TO_SOCK_COMMON:
+	case PTR_TO_SOCK_COMMON_OR_NULL:
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
@@ -4597,6 +4642,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 			}
 		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
 			reg->type = PTR_TO_SOCKET;
+		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
+			reg->type = PTR_TO_SOCK_COMMON;
 		}
 		if (is_null || !(reg_is_refcounted(reg) ||
 				 reg_may_point_to_spin_lock(reg))) {
@@ -4621,7 +4668,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 	int i, j;
 
 	if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
-		__release_reference_state(state, id);
+		release_reference_state(state, id);
 
 	for (i = 0; i < MAX_BPF_REG; i++)
 		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@@ -5790,6 +5837,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_FLOW_KEYS:
 	case PTR_TO_SOCKET:
 	case PTR_TO_SOCKET_OR_NULL:
+	case PTR_TO_SOCK_COMMON:
+	case PTR_TO_SOCK_COMMON_OR_NULL:
 		/* Only valid matches are exact, which memcmp() above
 		 * would have accepted
 		 */
@@ -6110,6 +6159,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
 	case PTR_TO_CTX:
 	case PTR_TO_SOCKET:
 	case PTR_TO_SOCKET_OR_NULL:
+	case PTR_TO_SOCK_COMMON:
+	case PTR_TO_SOCK_COMMON_OR_NULL:
 		return false;
 	default:
 		return true;
@@ -7112,6 +7163,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			convert_ctx_access = ops->convert_ctx_access;
 			break;
 		case PTR_TO_SOCKET:
+		case PTR_TO_SOCK_COMMON:
 			convert_ctx_access = bpf_sock_convert_ctx_access;
 			break;
 		default:
diff --git a/net/core/filter.c b/net/core/filter.c
index 3a49f68eda10..401d2e0aebf8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1793,6 +1793,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
+{
+	sk = sk_to_full_sk(sk);
+
+	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_sk_fullsock_proto = {
+	.func		= bpf_sk_fullsock,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+};
+
 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
 					   unsigned int write_len)
 {
@@ -5406,6 +5420,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	switch (func_id) {
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_sk_fullsock:
+		return &bpf_sk_fullsock_proto;
 	default:
 		return sk_filter_func_proto(func_id, prog);
 	}
@@ -5477,6 +5493,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_socket_uid_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_skb_fib_lookup_proto;
+	case BPF_FUNC_sk_fullsock:
+		return &bpf_sk_fullsock_proto;
 #ifdef CONFIG_XFRM
 	case BPF_FUNC_skb_get_xfrm_state:
 		return &bpf_skb_get_xfrm_state_proto;
@@ -5764,6 +5782,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (size != sizeof(__u64))
 			return false;
 		break;
+	case offsetof(struct __sk_buff, sk):
+		if (type == BPF_WRITE || size != sizeof(__u64))
+			return false;
+		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+		break;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -5950,6 +5973,18 @@ static bool __sock_filter_check_size(int off, int size,
 	return size == size_default;
 }
 
+bool bpf_sock_common_is_valid_access(int off, int size,
+				     enum bpf_access_type type,
+				     struct bpf_insn_access_aux *info)
+{
+	switch (off) {
+	case bpf_ctx_range_till(struct bpf_sock, type, priority):
+		return false;
+	default:
+		return bpf_sock_is_valid_access(off, size, type, info);
+	}
+}
+
 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 			      struct bpf_insn_access_aux *info)
 {
@@ -6748,6 +6783,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		off += offsetof(struct qdisc_skb_cb, pkt_len);
 		*target_size = 4;
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
+		break;
+
+	case offsetof(struct __sk_buff, sk):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		break;
 	}
 
 	return insn - insn_buf;
-- 
cgit v1.2.3


From aa65d6960a98fc15a96ce361b26e9fd55c9bccc5 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sat, 9 Feb 2019 23:22:21 -0800
Subject: bpf: Add state, dst_ip4, dst_ip6 and dst_port to bpf_sock

This patch adds "state", "dst_ip4", "dst_ip6" and "dst_port" to the
bpf_sock.  The userspace has already been using "state",
e.g. inet_diag (ss -t) and getsockopt(TCP_INFO).

This patch also allows narrow load on the following existing fields:
"family", "type", "protocol" and "src_port".  Unlike IP address,
the load offset is resticted to the first byte for them but it
can be relaxed later if there is a use case.

This patch also folds __sock_filter_check_size() into
bpf_sock_is_valid_access() since it is not called
by any where else.  All bpf_sock checking is in
one place.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 17 ++++-----
 net/core/filter.c        | 99 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 85 insertions(+), 31 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 5d79cba74ddc..d8f91777c5b6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2606,15 +2606,14 @@ struct bpf_sock {
 	__u32 protocol;
 	__u32 mark;
 	__u32 priority;
-	__u32 src_ip4;		/* Allows 1,2,4-byte read.
-				 * Stored in network byte order.
-				 */
-	__u32 src_ip6[4];	/* Allows 1,2,4-byte read.
-				 * Stored in network byte order.
-				 */
-	__u32 src_port;		/* Allows 4-byte read.
-				 * Stored in host byte order
-				 */
+	/* IP address also allows 1 and 2 bytes access */
+	__u32 src_ip4;
+	__u32 src_ip6[4];
+	__u32 src_port;		/* host byte order */
+	__u32 dst_port;		/* network byte order */
+	__u32 dst_ip4;
+	__u32 dst_ip6[4];
+	__u32 state;
 };
 
 struct bpf_sock_tuple {
diff --git a/net/core/filter.c b/net/core/filter.c
index 401d2e0aebf8..01bb64bf2b5e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5958,21 +5958,6 @@ full_access:
 	return true;
 }
 
-static bool __sock_filter_check_size(int off, int size,
-				     struct bpf_insn_access_aux *info)
-{
-	const int size_default = sizeof(__u32);
-
-	switch (off) {
-	case bpf_ctx_range(struct bpf_sock, src_ip4):
-	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
-		bpf_ctx_record_field_size(info, size_default);
-		return bpf_ctx_narrow_access_ok(off, size, size_default);
-	}
-
-	return size == size_default;
-}
-
 bool bpf_sock_common_is_valid_access(int off, int size,
 				     enum bpf_access_type type,
 				     struct bpf_insn_access_aux *info)
@@ -5988,13 +5973,29 @@ bool bpf_sock_common_is_valid_access(int off, int size,
 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 			      struct bpf_insn_access_aux *info)
 {
+	const int size_default = sizeof(__u32);
+
 	if (off < 0 || off >= sizeof(struct bpf_sock))
 		return false;
 	if (off % size != 0)
 		return false;
-	if (!__sock_filter_check_size(off, size, info))
-		return false;
-	return true;
+
+	switch (off) {
+	case offsetof(struct bpf_sock, state):
+	case offsetof(struct bpf_sock, family):
+	case offsetof(struct bpf_sock, type):
+	case offsetof(struct bpf_sock, protocol):
+	case offsetof(struct bpf_sock, dst_port):
+	case offsetof(struct bpf_sock, src_port):
+	case bpf_ctx_range(struct bpf_sock, src_ip4):
+	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+	case bpf_ctx_range(struct bpf_sock, dst_ip4):
+	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+		bpf_ctx_record_field_size(info, size_default);
+		return bpf_ctx_narrow_access_ok(off, size, size_default);
+	}
+
+	return size == size_default;
 }
 
 static bool sock_filter_is_valid_access(int off, int size,
@@ -6838,24 +6839,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 		break;
 
 	case offsetof(struct bpf_sock, family):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
-
-		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      offsetof(struct sock, sk_family));
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_family),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common,
+				       skc_family,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_family),
+				       target_size));
 		break;
 
 	case offsetof(struct bpf_sock, type):
+		BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
 				      offsetof(struct sock, __sk_flags_offset));
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+		*target_size = 2;
 		break;
 
 	case offsetof(struct bpf_sock, protocol):
+		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
 				      offsetof(struct sock, __sk_flags_offset));
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
+		*target_size = 1;
 		break;
 
 	case offsetof(struct bpf_sock, src_ip4):
@@ -6867,6 +6876,15 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 				       target_size));
 		break;
 
+	case offsetof(struct bpf_sock, dst_ip4):
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_daddr,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_daddr),
+				       target_size));
+		break;
+
 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
 #if IS_ENABLED(CONFIG_IPV6)
 		off = si->off;
@@ -6885,6 +6903,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 #endif
 		break;
 
+	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		off = si->off;
+		off -= offsetof(struct bpf_sock, dst_ip6[0]);
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common,
+				       skc_v6_daddr.s6_addr32[0],
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_v6_daddr.s6_addr32[0]),
+				       target_size) + off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+		*target_size = 4;
+#endif
+		break;
+
 	case offsetof(struct bpf_sock, src_port):
 		*insn++ = BPF_LDX_MEM(
 			BPF_FIELD_SIZEOF(struct sock_common, skc_num),
@@ -6894,6 +6929,26 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 						    skc_num),
 				       target_size));
 		break;
+
+	case offsetof(struct bpf_sock, dst_port):
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_dport,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_dport),
+				       target_size));
+		break;
+
+	case offsetof(struct bpf_sock, state):
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_state),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_state,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_state),
+				       target_size));
+		break;
 	}
 
 	return insn - insn_buf;
-- 
cgit v1.2.3


From 9b1f3d6e5af295a72deb5e3f04db07a6a58be72e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sat, 9 Feb 2019 23:22:23 -0800
Subject: bpf: Refactor sock_ops_convert_ctx_access

The next patch will introduce a new "struct bpf_tcp_sock" which
exposes the same tcp_sock's fields already exposed in
"struct bpf_sock_ops".

This patch refactor the existing convert_ctx_access() codes for
"struct bpf_sock_ops" to get them ready to be reused for
"struct bpf_tcp_sock".  The "rtt_min" is not refactored
in this patch because its handling is different from other
fields.

The SOCK_OPS_GET_TCP_SOCK_FIELD is new. All other SOCK_OPS_XXX_FIELD
changes are code move only.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 287 ++++++++++++++++++++++++------------------------------
 1 file changed, 127 insertions(+), 160 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 01bb64bf2b5e..c0d7b9ef279f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5030,6 +5030,54 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
 };
 #endif /* CONFIG_IPV6_SEG6_BPF */
 
+#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT)		\
+do {									\
+	switch (si->off) {						\
+	case offsetof(md_type, snd_cwnd):				\
+		CONVERT(snd_cwnd); break;				\
+	case offsetof(md_type, srtt_us):				\
+		CONVERT(srtt_us); break;				\
+	case offsetof(md_type, snd_ssthresh):				\
+		CONVERT(snd_ssthresh); break;				\
+	case offsetof(md_type, rcv_nxt):				\
+		CONVERT(rcv_nxt); break;				\
+	case offsetof(md_type, snd_nxt):				\
+		CONVERT(snd_nxt); break;				\
+	case offsetof(md_type, snd_una):				\
+		CONVERT(snd_una); break;				\
+	case offsetof(md_type, mss_cache):				\
+		CONVERT(mss_cache); break;				\
+	case offsetof(md_type, ecn_flags):				\
+		CONVERT(ecn_flags); break;				\
+	case offsetof(md_type, rate_delivered):				\
+		CONVERT(rate_delivered); break;				\
+	case offsetof(md_type, rate_interval_us):			\
+		CONVERT(rate_interval_us); break;			\
+	case offsetof(md_type, packets_out):				\
+		CONVERT(packets_out); break;				\
+	case offsetof(md_type, retrans_out):				\
+		CONVERT(retrans_out); break;				\
+	case offsetof(md_type, total_retrans):				\
+		CONVERT(total_retrans); break;				\
+	case offsetof(md_type, segs_in):				\
+		CONVERT(segs_in); break;				\
+	case offsetof(md_type, data_segs_in):				\
+		CONVERT(data_segs_in); break;				\
+	case offsetof(md_type, segs_out):				\
+		CONVERT(segs_out); break;				\
+	case offsetof(md_type, data_segs_out):				\
+		CONVERT(data_segs_out); break;				\
+	case offsetof(md_type, lost_out):				\
+		CONVERT(lost_out); break;				\
+	case offsetof(md_type, sacked_out):				\
+		CONVERT(sacked_out); break;				\
+	case offsetof(md_type, bytes_received):				\
+		CONVERT(bytes_received); break;				\
+	case offsetof(md_type, bytes_acked):				\
+		CONVERT(bytes_acked); break;				\
+	}								\
+} while (0)
+
 #ifdef CONFIG_INET
 static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 			      int dif, int sdif, u8 family, u8 proto)
@@ -7196,6 +7244,85 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	struct bpf_insn *insn = insn_buf;
 	int off;
 
+/* Helper macro for adding read access to tcp_sock or sock fields. */
+#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
+	do {								      \
+		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
+			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern,     \
+						is_fullsock),		      \
+				      si->dst_reg, si->src_reg,		      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       is_fullsock));		      \
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2);	      \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern, sk),\
+				      si->dst_reg, si->src_reg,		      \
+				      offsetof(struct bpf_sock_ops_kern, sk));\
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,		      \
+						       OBJ_FIELD),	      \
+				      si->dst_reg, si->dst_reg,		      \
+				      offsetof(OBJ, OBJ_FIELD));	      \
+	} while (0)
+
+#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
+		SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
+
+/* Helper macro for adding write access to tcp_sock or sock fields.
+ * The macro is called with two registers, dst_reg which contains a pointer
+ * to ctx (context) and src_reg which contains the value that should be
+ * stored. However, we need an additional register since we cannot overwrite
+ * dst_reg because it may be used later in the program.
+ * Instead we "borrow" one of the other register. We first save its value
+ * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
+ * it at the end of the macro.
+ */
+#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
+	do {								      \
+		int reg = BPF_REG_9;					      \
+		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
+			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
+		if (si->dst_reg == reg || si->src_reg == reg)		      \
+			reg--;						      \
+		if (si->dst_reg == reg || si->src_reg == reg)		      \
+			reg--;						      \
+		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,		      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       temp));			      \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern,     \
+						is_fullsock),		      \
+				      reg, si->dst_reg,			      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       is_fullsock));		      \
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);		      \
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
+						struct bpf_sock_ops_kern, sk),\
+				      reg, si->dst_reg,			      \
+				      offsetof(struct bpf_sock_ops_kern, sk));\
+		*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),	      \
+				      reg, si->src_reg,			      \
+				      offsetof(OBJ, OBJ_FIELD));	      \
+		*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,		      \
+				      offsetof(struct bpf_sock_ops_kern,      \
+					       temp));			      \
+	} while (0)
+
+#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)	      \
+	do {								      \
+		if (TYPE == BPF_WRITE)					      \
+			SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
+		else							      \
+			SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);	      \
+	} while (0)
+
+	CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops,
+				       SOCK_OPS_GET_TCP_SOCK_FIELD);
+
+	if (insn > insn_buf)
+		return insn - insn_buf;
+
 	switch (si->off) {
 	case offsetof(struct bpf_sock_ops, op) ...
 	     offsetof(struct bpf_sock_ops, replylong[3]):
@@ -7353,175 +7480,15 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 				      FIELD_SIZEOF(struct minmax_sample, t));
 		break;
 
-/* Helper macro for adding read access to tcp_sock or sock fields. */
-#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)			      \
-	do {								      \
-		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >		      \
-			     FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
-						struct bpf_sock_ops_kern,     \
-						is_fullsock),		      \
-