diff options
105 files changed, 3058 insertions, 1797 deletions
diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst index 8cc25a06f353..314c8bf6f2a2 100644 --- a/Documentation/process/adding-syscalls.rst +++ b/Documentation/process/adding-syscalls.rst @@ -222,7 +222,7 @@ your new syscall number may get adjusted to resolve conflicts. The file ``kernel/sys_ni.c`` provides a fallback stub implementation of each system call, returning ``-ENOSYS``. Add your new system call here too:: - cond_syscall(sys_xyzzy); + COND_SYSCALL(xyzzy); Your new kernel functionality, and the system call that controls it, should normally be optional, so add a ``CONFIG`` option (typically to @@ -487,6 +487,38 @@ patchset, for the convenience of reviewers. The man page should be cc'ed to linux-man@vger.kernel.org For more details, see https://www.kernel.org/doc/man-pages/patches.html + +Do not call System Calls in the Kernel +-------------------------------------- + +System calls are, as stated above, interaction points between userspace and +the kernel. Therefore, system call functions such as ``sys_xyzzy()`` or +``compat_sys_xyzzy()`` should only be called from userspace via the syscall +table, but not from elsewhere in the kernel. If the syscall functionality is +useful to be used within the kernel, needs to be shared between an old and a +new syscall, or needs to be shared between a syscall and its compatibility +variant, it should be implemented by means of a "helper" function (such as +``kern_xyzzy()``). This kernel function may then be called within the +syscall stub (``sys_xyzzy()``), the compatibility syscall stub +(``compat_sys_xyzzy()``), and/or other kernel code. + +At least on 64-bit x86, it will be a hard requirement from v4.17 onwards to not +call system call functions in the kernel. It uses a different calling +convention for system calls where ``struct pt_regs`` is decoded on-the-fly in a +syscall wrapper which then hands processing over to the actual syscall function. +This means that only those parameters which are actually needed for a specific +syscall are passed on during syscall entry, instead of filling in six CPU +registers with random user space content all the time (which may cause serious +trouble down the call chain). + +Moreover, rules on how data may be accessed may differ between kernel data and +user data. This is another reason why calling ``sys_xyzzy()`` is generally a +bad idea. + +Exceptions to this rule are only allowed in architecture-specific overrides, +architecture-specific compatibility wrappers, or other code in arch/. + + References and Sources ---------------------- diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index fa1a392ca9a2..89faa6f4de47 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -189,7 +189,7 @@ SYSCALL_DEFINE6(osf_mmap, unsigned long, addr, unsigned long, len, goto out; if (off & ~PAGE_MASK) goto out; - ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return ret; } diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 3151f5623d0e..bdf7514204ab 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -35,5 +35,5 @@ asmlinkage long sys_arm_fadvise64_64(int fd, int advice, loff_t offset, loff_t len) { - return sys_fadvise64_64(fd, offset, len, advice); + return ksys_fadvise64_64(fd, offset, len, advice); } diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 26fe8ea93ea2..72981bae10eb 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -34,7 +34,7 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, if (offset_in_page(off) != 0) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 085adfcc74a4..9ebe1d633abc 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -139,7 +139,7 @@ int ia64_mmap_check(unsigned long addr, unsigned long len, asmlinkage unsigned long sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) { - addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); + addr = ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); if (!IS_ERR((void *) addr)) force_successful_syscall_return(); return addr; @@ -151,7 +151,7 @@ sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, lo if (offset_in_page(off) != 0) return -EINVAL; - addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + addr = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); if (!IS_ERR((void *) addr)) force_successful_syscall_return(); return addr; diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 27e10af5153a..6363ec83a290 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -46,7 +46,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, * so we need to shift the argument down by 1; m68k mmap64(3) * (in libc) expects the last argument of mmap2 in 4Kb units. */ - return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); } /* Convert virtual (user) address VADDR to physical address PADDR */ diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index f1e1f666ddde..ed9f34da1a2a 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -40,7 +40,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, if (pgoff & ~PAGE_MASK) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT); } SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, @@ -50,6 +50,6 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, if (pgoff & (~PAGE_MASK >> 12)) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, - pgoff >> (PAGE_SHIFT - 12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index b332f6fc1e72..318f1c05c5b3 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -67,8 +67,8 @@ SYSCALL_DEFINE6(32_mmap2, unsigned long, addr, unsigned long, len, { if (pgoff & (~PAGE_MASK >> 12)) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, - pgoff >> (PAGE_SHIFT-12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT-12)); } #define RLIM_INFINITY32 0x7fffffff @@ -82,13 +82,13 @@ struct rlimit32 { SYSCALL_DEFINE4(32_truncate64, const char __user *, path, unsigned long, __dummy, unsigned long, a2, unsigned long, a3) { - return sys_truncate(path, merge_64(a2, a3)); + return ksys_truncate(path, merge_64(a2, a3)); } SYSCALL_DEFINE4(32_ftruncate64, unsigned long, fd, unsigned long, __dummy, unsigned long, a2, unsigned long, a3) { - return sys_ftruncate(fd, merge_64(a2, a3)); + return ksys_ftruncate(fd, merge_64(a2, a3)); } SYSCALL_DEFINE5(32_llseek, unsigned int, fd, unsigned int, offset_high, @@ -105,13 +105,13 @@ SYSCALL_DEFINE5(32_llseek, unsigned int, fd, unsigned int, offset_high, SYSCALL_DEFINE6(32_pread, unsigned long, fd, char __user *, buf, size_t, count, unsigned long, unused, unsigned long, a4, unsigned long, a5) { - return sys_pread64(fd, buf, count, merge_64(a4, a5)); + return ksys_pread64(fd, buf, count, merge_64(a4, a5)); } SYSCALL_DEFINE6(32_pwrite, unsigned int, fd, const char __user *, buf, size_t, count, u32, unused, u64, a4, u64, a5) { - return sys_pwrite64(fd, buf, count, merge_64(a4, a5)); + return ksys_pwrite64(fd, buf, count, merge_64(a4, a5)); } SYSCALL_DEFINE1(32_personality, unsigned long, personality) @@ -131,7 +131,7 @@ SYSCALL_DEFINE1(32_personality, unsigned long, personality) asmlinkage ssize_t sys32_readahead(int fd, u32 pad0, u64 a2, u64 a3, size_t count) { - return sys_readahead(fd, merge_64(a2, a3), count); + return ksys_readahead(fd, merge_64(a2, a3), count); } asmlinkage long sys32_sync_file_range(int fd, int __pad, @@ -139,7 +139,7 @@ asmlinkage long sys32_sync_file_range(int fd, int __pad, unsigned long a4, unsigned long a5, int flags) { - return sys_sync_file_range(fd, + return ksys_sync_file_range(fd, merge_64(a2, a3), merge_64(a4, a5), flags); } @@ -149,7 +149,7 @@ asmlinkage long sys32_fadvise64_64(int fd, int __pad, unsigned long a4, unsigned long a5, int flags) { - return sys_fadvise64_64(fd, + return ksys_fadvise64_64(fd, merge_64(a2, a3), merge_64(a4, a5), flags); } @@ -157,6 +157,6 @@ asmlinkage long sys32_fadvise64_64(int fd, int __pad, asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_a2, unsigned offset_a3, unsigned len_a4, unsigned len_a5) { - return sys_fallocate(fd, mode, merge_64(offset_a2, offset_a3), - merge_64(len_a4, len_a5)); + return ksys_fallocate(fd, mode, merge_64(offset_a2, offset_a3), + merge_64(len_a4, len_a5)); } diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 58c6f634b550..69c17b549fd3 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -63,7 +63,8 @@ SYSCALL_DEFINE6(mips_mmap, unsigned long, addr, unsigned long, len, { if (offset & ~PAGE_MASK) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + offset >> PAGE_SHIFT); } SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len, @@ -73,7 +74,8 @@ SYSCALL_DEFINE6(mips_mmap2, unsigned long, addr, unsigned long, len, if (pgoff & (~PAGE_MASK >> 12)) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT-12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } save_static_function(sys_fork); diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 378a754ca186..8c99ebbe2bac 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -270,8 +270,8 @@ asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len, { /* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE we have. */ - return sys_mmap_pgoff(addr, len, prot, flags, fd, - pgoff >> (PAGE_SHIFT - 12)); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + pgoff >> (PAGE_SHIFT - 12)); } asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, @@ -279,7 +279,7 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, unsigned long offset) { if (!(offset & ~PAGE_MASK)) { - return sys_mmap_pgoff(addr, len, prot, flags, fd, + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); } else { return -EINVAL; @@ -292,24 +292,24 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len, asmlinkage long parisc_truncate64(const char __user * path, unsigned int high, unsigned int low) { - return sys_truncate(path, (long)high << 32 | low); + return ksys_truncate(path, (long)high << 32 | low); |