summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-11-02 15:27:42 -0700
committerDavid S. Miller <davem@davemloft.net>2019-11-02 15:29:58 -0700
commitae8a76fb8b5d03fa2adc7249dc6131ba6a0c6119 (patch)
treeb197a7452b46abf51ffab8485236ccab69664d5c
parentd31e95585ca697fb31440c6fe30113adc85ecfbd (diff)
parent358fdb456288d48874d44a064a82bfb0d9963fa0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-11-02 The following pull-request contains BPF updates for your *net-next* tree. We've added 30 non-merge commits during the last 7 day(s) which contain a total of 41 files changed, 1864 insertions(+), 474 deletions(-). The main changes are: 1) Fix long standing user vs kernel access issue by introducing bpf_probe_read_user() and bpf_probe_read_kernel() helpers, from Daniel. 2) Accelerated xskmap lookup, from Björn and Maciej. 3) Support for automatic map pinning in libbpf, from Toke. 4) Cleanup of BTF-enabled raw tracepoints, from Alexei. 5) Various fixes to libbpf and selftests. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/bpf/index.rst9
-rw-r--r--Documentation/bpf/s390.rst205
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/maccess.c43
-rw-r--r--include/linux/bpf.h30
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/uaccess.h16
-rw-r--r--include/net/xdp_sock.h51
-rw-r--r--include/uapi/linux/bpf.h124
-rw-r--r--kernel/bpf/core.c12
-rw-r--r--kernel/bpf/syscall.c6
-rw-r--r--kernel/bpf/verifier.c39
-rw-r--r--kernel/bpf/xskmap.c112
-rw-r--r--kernel/trace/bpf_trace.c231
-rw-r--r--lib/test_bpf.c112
-rw-r--r--mm/maccess.c70
-rw-r--r--net/xdp/xsk.c33
-rw-r--r--samples/bpf/map_perf_test_kern.c4
-rw-r--r--samples/bpf/test_map_in_map_kern.c4
-rw-r--r--samples/bpf/test_probe_write_user_kern.c2
-rw-r--r--tools/include/uapi/linux/bpf.h124
-rw-r--r--tools/lib/bpf/bpf.c8
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_helpers.h6
-rw-r--r--tools/lib/bpf/libbpf.c466
-rw-r--r--tools/lib/bpf/libbpf.h23
-rw-r--r--tools/lib/bpf/libbpf.map5
-rw-r--r--tools/lib/bpf/libbpf_probes.c1
-rw-r--r--tools/lib/bpf/xsk.c83
-rw-r--r--tools/testing/selftests/bpf/Makefile16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c210
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c78
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c4
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h67
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h36
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_invalid.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_estats.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py3
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c23
41 files changed, 1863 insertions, 476 deletions
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 801a6ed3f2e5..4f5410b61441 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -47,6 +47,15 @@ Program types
prog_flow_dissector
+Testing BPF
+===========
+
+.. toctree::
+ :maxdepth: 1
+
+ s390
+
+
.. Links:
.. _Documentation/networking/filter.txt: ../networking/filter.txt
.. _man-pages: https://www.kernel.org/doc/man-pages/
diff --git a/Documentation/bpf/s390.rst b/Documentation/bpf/s390.rst
new file mode 100644
index 000000000000..21ecb309daea
--- /dev/null
+++ b/Documentation/bpf/s390.rst
@@ -0,0 +1,205 @@
+===================
+Testing BPF on s390
+===================
+
+1. Introduction
+***************
+
+IBM Z are mainframe computers, which are descendants of IBM System/360 from
+year 1964. They are supported by the Linux kernel under the name "s390". This
+document describes how to test BPF in an s390 QEMU guest.
+
+2. One-time setup
+*****************
+
+The following is required to build and run the test suite:
+
+ * s390 GCC
+ * s390 development headers and libraries
+ * Clang with BPF support
+ * QEMU with s390 support
+ * Disk image with s390 rootfs
+
+Debian supports installing compiler and libraries for s390 out of the box.
+Users of other distros may use debootstrap in order to set up a Debian chroot::
+
+ sudo debootstrap \
+ --variant=minbase \
+ --include=sudo \
+ testing \
+ ./s390-toolchain
+ sudo mount --rbind /dev ./s390-toolchain/dev
+ sudo mount --rbind /proc ./s390-toolchain/proc
+ sudo mount --rbind /sys ./s390-toolchain/sys
+ sudo chroot ./s390-toolchain
+
+Once on Debian, the build prerequisites can be installed as follows::
+
+ sudo dpkg --add-architecture s390x
+ sudo apt-get update
+ sudo apt-get install \
+ bc \
+ bison \
+ cmake \
+ debootstrap \
+ dwarves \
+ flex \
+ g++ \
+ gcc \
+ g++-s390x-linux-gnu \
+ gcc-s390x-linux-gnu \
+ gdb-multiarch \
+ git \
+ make \
+ python3 \
+ qemu-system-misc \
+ qemu-utils \
+ rsync \
+ libcap-dev:s390x \
+ libelf-dev:s390x \
+ libncurses-dev
+
+Latest Clang targeting BPF can be installed as follows::
+
+ git clone https://github.com/llvm/llvm-project.git
+ ln -s ../../clang llvm-project/llvm/tools/
+ mkdir llvm-project-build
+ cd llvm-project-build
+ cmake \
+ -DLLVM_TARGETS_TO_BUILD=BPF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_INSTALL_PREFIX=/opt/clang-bpf \
+ ../llvm-project/llvm
+ make
+ sudo make install
+ export PATH=/opt/clang-bpf/bin:$PATH
+
+The disk image can be prepared using a loopback mount and debootstrap::
+
+ qemu-img create -f raw ./s390.img 1G
+ sudo losetup -f ./s390.img
+ sudo mkfs.ext4 /dev/loopX
+ mkdir ./s390.rootfs
+ sudo mount /dev/loopX ./s390.rootfs
+ sudo debootstrap \
+ --foreign \
+ --arch=s390x \
+ --variant=minbase \
+ --include=" \
+ iproute2, \
+ iputils-ping, \
+ isc-dhcp-client, \
+ kmod, \
+ libcap2, \
+ libelf1, \
+ netcat, \
+ procps" \
+ testing \
+ ./s390.rootfs
+ sudo umount ./s390.rootfs
+ sudo losetup -d /dev/loopX
+
+3. Compilation
+**************
+
+In addition to the usual Kconfig options required to run the BPF test suite, it
+is also helpful to select::
+
+ CONFIG_NET_9P=y
+ CONFIG_9P_FS=y
+ CONFIG_NET_9P_VIRTIO=y
+ CONFIG_VIRTIO_PCI=y
+
+as that would enable a very easy way to share files with the s390 virtual
+machine.
+
+Compiling kernel, modules and testsuite, as well as preparing gdb scripts to
+simplify debugging, can be done using the following commands::
+
+ make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- menuconfig
+ make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- bzImage modules scripts_gdb
+ make ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- \
+ -C tools/testing/selftests \
+ TARGETS=bpf \
+ INSTALL_PATH=$PWD/tools/testing/selftests/kselftest_install \
+ install
+
+4. Running the test suite
+*************************
+
+The virtual machine can be started as follows::
+
+ qemu-system-s390x \
+ -cpu max,zpci=on \
+ -smp 2 \
+ -m 4G \
+ -kernel linux/arch/s390/boot/compressed/vmlinux \
+ -drive file=./s390.img,if=virtio,format=raw \
+ -nographic \
+ -append 'root=/dev/vda rw console=ttyS1' \
+ -virtfs local,path=./linux,security_model=none,mount_tag=linux \
+ -object rng-random,filename=/dev/urandom,id=rng0 \
+ -device virtio-rng-ccw,rng=rng0 \
+ -netdev user,id=net0 \
+ -device virtio-net-ccw,netdev=net0
+
+When using this on a real IBM Z, ``-enable-kvm`` may be added for better
+performance. When starting the virtual machine for the first time, disk image
+setup must be finalized using the following command::
+
+ /debootstrap/debootstrap --second-stage
+
+Directory with the code built on the host as well as ``/proc`` and ``/sys``
+need to be mounted as follows::
+
+ mkdir -p /linux
+ mount -t 9p linux /linux
+ mount -t proc proc /proc
+ mount -t sysfs sys /sys
+
+After that, the test suite can be run using the following commands::
+
+ cd /linux/tools/testing/selftests/kselftest_install
+ ./run_kselftest.sh
+
+As usual, tests can be also run individually::
+
+ cd /linux/tools/testing/selftests/bpf
+ ./test_verifier
+
+5. Debugging
+************
+
+It is possible to debug the s390 kernel using QEMU GDB stub, which is activated
+by passing ``-s`` to QEMU.
+
+It is preferable to turn KASLR off, so that gdb would know where to find the
+kernel image in memory, by building the kernel with::
+
+ RANDOMIZE_BASE=n
+
+GDB can then be attached using the following command::
+
+ gdb-multiarch -ex 'target remote localhost:1234' ./vmlinux
+
+6. Network
+**********
+
+In case one needs to use the network in the virtual machine in order to e.g.
+install additional packages, it can be configured using::
+
+ dhclient eth0
+
+7. Links
+********
+
+This document is a compilation of techniques, whose more comprehensive
+descriptions can be found by following these links:
+
+- `Debootstrap <https://wiki.debian.org/EmDebian/CrossDebootstrap>`_
+- `Multiarch <https://wiki.debian.org/Multiarch/HOWTO>`_
+- `Building LLVM <https://llvm.org/docs/CMake.html>`_
+- `Cross-compiling the kernel <https://wiki.gentoo.org/wiki/Embedded_Handbook/General/Cross-compiling_the_kernel>`_
+- `QEMU s390x Guest Support <https://wiki.qemu.org/Documentation/Platforms/S390X>`_
+- `Plan 9 folder sharing over Virtio <https://wiki.qemu.org/Documentation/9psetup>`_
+- `Using GDB with QEMU <https://wiki.osdev.org/Kernel_Debugging#Use_GDB_with_QEMU>`_
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc9b341..bbc68a54795e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644
index 000000000000..f5b85bdc0535
--- /dev/null
+++ b/arch/x86/mm/maccess.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ /*
+ * Range covering the highest possible canonical userspace address
+ * as well as non-canonical address range. For the canonical range
+ * we also need to include the userspace guard page.
+ */
+ return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+ canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+ if (unlikely(invalid_probe_range((unsigned long)src)))
+ return -EFAULT;
+
+ return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+ if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+ return -EFAULT;
+
+ return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 171be30fe0ae..7c7f518811a6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -373,6 +373,11 @@ enum bpf_cgroup_storage_type {
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+/* The longest tracepoint has 12 args.
+ * See include/trace/bpf_probe.h
+ */
+#define MAX_BPF_FUNC_ARGS 12
+
struct bpf_prog_stats {
u64 cnt;
u64 nsecs;
@@ -1004,31 +1009,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
}
#endif
-#if defined(CONFIG_XDP_SOCKETS)
-struct xdp_sock;
-struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key);
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs);
-void __xsk_map_flush(struct bpf_map *map);
-#else
-struct xdp_sock;
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
- u32 key)
-{
- return NULL;
-}
-
-static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs)
-{
- return -EOPNOTSUPP;
-}
-
-static inline void __xsk_map_flush(struct bpf_map *map)
-{
-}
-#endif
-
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 36a9c2325176..de14872b01ba 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -26,6 +26,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
+BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index d4ee6e942562..67f016010aad 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
* happens, handle that and return -EFAULT.
*/
extern long probe_kernel_read(void *dst, const void *src, size_t size);
+extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
extern long __probe_kernel_read(void *dst, const void *src, size_t size);
/*
@@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
+/*
+ * probe_user_write(): safely attempt to write to a location in user space
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
+extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
+
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
+extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
+ long count);
+extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
long count);
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index c9398ce7960f..e3780e4b74e1 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -69,7 +69,14 @@ struct xdp_umem {
/* Nodes are linked in the struct xdp_sock map_list field, and used to
* track which maps a certain socket reside in.
*/
-struct xsk_map;
+
+struct xsk_map {
+ struct bpf_map map;
+ struct list_head __percpu *flush_list;
+ spinlock_t lock; /* Synchronize map updates */
+ struct xdp_sock *xsk_map[];
+};
+
struct xsk_map_node {
struct list_head node;
struct xsk_map *map;
@@ -109,8 +116,6 @@ struct xdp_sock {
struct xdp_buff;
#ifdef CONFIG_XDP_SOCKETS
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-void xsk_flush(struct xdp_sock *xs);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
/* Used from netdev driver */
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
@@ -134,6 +139,22 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
struct xdp_sock **map_entry);
int xsk_map_inc(struct xsk_map *map);
void xsk_map_put(struct xsk_map *map);
+int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs);
+void __xsk_map_flush(struct bpf_map *map);
+
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+ u32 key)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct xdp_sock *xs;
+
+ if (key >= map->max_entries)
+ return NULL;
+
+ xs = READ_ONCE(m->xsk_map[key]);
+ return xs;
+}
static inline u64 xsk_umem_extract_addr(u64 addr)
{
@@ -224,15 +245,6 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return -ENOTSUPP;
}
-static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
-{
- return -ENOTSUPP;
-}
-
-static inline void xsk_flush(struct xdp_sock *xs)
-{
-}
-
static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return false;
@@ -357,6 +369,21 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
return 0;
}
+static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
+ struct xdp_sock *xs)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void __xsk_map_flush(struct bpf_map *map)
+{
+}
+
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+ u32 key)
+{
+ return NULL;
+}
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4af8b0819a32..df6809a76404 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -173,6 +173,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_PROG_TYPE_TRACING,
};
enum bpf_attach_type {
@@ -199,6 +200,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP6_RECVMSG,
BPF_CGROUP_GETSOCKOPT,
BPF_CGROUP_SETSOCKOPT,
+ BPF_TRACE_RAW_TP,
__MAX_BPF_ATTACH_TYPE
};
@@ -561,10 +563,13 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description
* For tracing programs, safely attempt to read *size* bytes from
- * address *src* and store the data in *dst*.
+ * kernel space address *unsafe_ptr* and store the data in *dst*.
+ *
+ * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
+ * instead.
* Return
* 0 on success, or a negative error in case of failure.
*
@@ -1426,45 +1431,14 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
- * Copy a NUL terminated string from an unsafe address
- * *unsafe_ptr* to *dst*. The *size* should include the
- * terminating NUL byte. In case the string length is smaller than
- * *size*, the target is not padded with further NUL bytes. If the
- * string length is larger than *size*, just *size*-1 bytes are
- * copied and the last byte is set to NUL.
- *
- * On success, the length of the copied string is returned. This
- * makes this helper useful in tracing programs for reading
- * strings, and more importantly to get its length at runtime. See
- * the following snippet:
- *
- * ::
- *
- * SEC("kprobe/sys_open")
- * void bpf_sys_open(struct pt_regs *ctx)
- * {
- * char buf[PATHLEN]; // PATHLEN is defined to 256
- * int res = bpf_probe_read_str(buf, sizeof(buf),
- * ctx->di);
- *
- * // Consume buf, for example push it to
- * // userspace via bpf_perf_event_output(); we
- * // can use res (the string length) as event
- * // size, after checking its boundaries.
- * }
- *
- * In comparison, using **bpf_probe_read()** helper here instead
- * to read the string would require to estimate the length at
- * compile time, and would often result in copying more memory
- * than necessary.
+ * Copy a NUL terminated string from an unsafe kernel address
+ * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * more details.
*
- * Another useful use case is when parsing individual process
- * arguments or individual environment variables navigating
- * *current*\ **->mm->arg_start** and *current*\
- * **->mm->env_start**: using this helper and the return value,
- * one can quickly iterate at the right offset of the memory area.
+ * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
+ * instead.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
@@ -2775,6 +2749,72 @@ union bpf_attr {
* restricted to raw_tracepoint bpf programs.
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Safely attempt to read *size* bytes from user space address
+ * *unsafe_ptr* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Safely attempt to read *size* bytes from kernel space address
+ * *unsafe_ptr* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe user address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read_user()** helper here
+ * instead to read the string would require to estimate the length
+ * at compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * Return
+ * On success, the strictly positive length of the string, including
+ * the trailing NUL character. On error, a negative value.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2888,7 +2928,11 @@ union bpf_attr {
FN(sk_storage_delete), \
FN(send_signal), \
FN(tcp_gen_syncookie), \
- FN(skb_output),
+ FN(skb_output), \
+ FN(probe_read_user), \
+ FN(probe_read_kernel), \
+ FN(probe_read_user_str), \
+ FN(probe_read_kernel_str),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 658d68d409a4..97e37d82a1cc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -668,9 +668,6 @@ static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
{
struct latch_tree_node *n;
- if (!bpf_jit_kallsyms_enabled())
- return NULL;
-
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
return n ?
container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
@@ -1309,11 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr)
+u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
{
memset(dst, 0, size);
return -EFAULT;
}
+
/**
* __bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -1569,9 +1567,9 @@ out:
LDST(W, u32)
LDST(DW, u64)
#undef LDST
-#define LDX_PROBE(SIZEOP, SIZE) \
- LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read(&DST, SIZE, (const void *)(long) SRC); \
+#define LDX_PROBE(SIZEOP, SIZE) \
+ LDX_PROBE_MEM_##SIZEOP: \
+ bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \
CONT;
LDX_PROBE(B, 1)
LDX_PROBE(H, 2)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 57eacd5fc24a..6d9ce95e5a8d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1579,7 +1579,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
u32 btf_id)
{
switch (prog_type) {
- case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ case BPF_PROG_TYPE_TRACING:
if (btf_id > BTF_MAX_TYPE)
return -EINVAL;
break;