From f630c7c6f10546ebff15c3a856e7949feb7a2372 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 14 Dec 2020 19:03:14 -0800 Subject: kthread: add kthread_work tracepoints While migrating some code from wq to kthread_worker, I found that I missed the execute_start/end tracepoints. So add similar tracepoints for kthread_work. And for completeness, queue_work tracepoint (although this one differs slightly from the matching workqueue tracepoint). Link: https://lkml.kernel.org/r/20201010180323.126634-1-robdclark@gmail.com Signed-off-by: Rob Clark Cc: Rob Clark Cc: Steven Rostedt Cc: Ingo Molnar Cc: "Peter Zijlstra (Intel)" Cc: Phil Auld Cc: Valentin Schneider Cc: Thara Gopinath Cc: Randy Dunlap Cc: Vincent Donnefort Cc: Mel Gorman Cc: Jens Axboe Cc: Marcelo Tosatti Cc: Frederic Weisbecker Cc: Ilias Stamatis Cc: Liang Chen Cc: Ben Dooks Cc: Peter Zijlstra Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/sched.h | 84 ++++++++++++++++++++++++++++++++++++++++++++ kernel/kthread.c | 9 +++++ 2 files changed, 93 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index c96a4337afe6..5039af667645 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -5,6 +5,7 @@ #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCHED_H +#include #include #include #include @@ -51,6 +52,89 @@ TRACE_EVENT(sched_kthread_stop_ret, TP_printk("ret=%d", __entry->ret) ); +/** + * sched_kthread_work_queue_work - called when a work gets queued + * @worker: pointer to the kthread_worker + * @work: pointer to struct kthread_work + * + * This event occurs when a work is queued immediately or once a + * delayed work is actually queued (ie: once the delay has been + * reached). + */ +TRACE_EVENT(sched_kthread_work_queue_work, + + TP_PROTO(struct kthread_worker *worker, + struct kthread_work *work), + + TP_ARGS(worker, work), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + __field( void *, worker) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = work->func; + __entry->worker = worker; + ), + + TP_printk("work struct=%p function=%ps worker=%p", + __entry->work, __entry->function, __entry->worker) +); + +/** + * sched_kthread_work_execute_start - called immediately before the work callback + * @work: pointer to struct kthread_work + * + * Allows to track kthread work execution. + */ +TRACE_EVENT(sched_kthread_work_execute_start, + + TP_PROTO(struct kthread_work *work), + + TP_ARGS(work), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = work->func; + ), + + TP_printk("work struct %p: function %ps", __entry->work, __entry->function) +); + +/** + * sched_kthread_work_execute_end - called immediately after the work callback + * @work: pointer to struct work_struct + * @function: pointer to worker function + * + * Allows to track workqueue execution. + */ +TRACE_EVENT(sched_kthread_work_execute_end, + + TP_PROTO(struct kthread_work *work, kthread_work_func_t function), + + TP_ARGS(work, function), + + TP_STRUCT__entry( + __field( void *, work ) + __field( void *, function) + ), + + TP_fast_assign( + __entry->work = work; + __entry->function = function; + ), + + TP_printk("work struct %p: function %ps", __entry->work, __entry->function) +); + /* * Tracepoint for waking up a task: */ diff --git a/kernel/kthread.c b/kernel/kthread.c index 933a625621b8..34516b0a6eb7 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -704,8 +704,15 @@ repeat: raw_spin_unlock_irq(&worker->lock); if (work) { + kthread_work_func_t func = work->func; __set_current_state(TASK_RUNNING); + trace_sched_kthread_work_execute_start(work); work->func(work); + /* + * Avoid dereferencing work after this point. The trace + * event only cares about the address. + */ + trace_sched_kthread_work_execute_end(work, func); } else if (!freezing(current)) schedule(); @@ -834,6 +841,8 @@ static void kthread_insert_work(struct kthread_worker *worker, { kthread_insert_work_sanity_check(worker, work); + trace_sched_kthread_work_queue_work(worker, work); + list_add_tail(&work->node, pos); work->worker = worker; if (!worker->current_work && likely(worker->task)) -- cgit v1.2.3 From ebb2bdcef8a00d59b27d3532c423110559821e1d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 14 Dec 2020 19:03:18 -0800 Subject: kthread_worker: document CPU hotplug handling The kthread worker API is simple. In short, it allows to create, use, and destroy workers. kthread_create_worker_on_cpu() just allows to bind a newly created worker to a given CPU. It is up to the API user how to handle CPU hotplug. They have to decide how to handle pending work items, prevent queuing new ones, and restore the functionality when the CPU goes off and on. There are few catches: + The CPU affinity gets lost when it is scheduled on an offline CPU. + The worker might not exist when the CPU was off when the user created the workers. A good practice is to implement two CPU hotplug callbacks and destroy/create the worker when CPU goes down/up. Mention this in the function description. [akpm@linux-foundation.org: grammar tweaks] Link: https://lore.kernel.org/r/20201028073031.4536-1-qiang.zhang@windriver.com Link: https://lkml.kernel.org/r/20201102101039.19227-1-pmladek@suse.com Reported-by: Zhang Qiang Signed-off-by: Petr Mladek Cc: Tejun Heo Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kthread.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 34516b0a6eb7..97e053ade74a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -793,7 +793,25 @@ EXPORT_SYMBOL(kthread_create_worker); * A good practice is to add the cpu number also into the worker name. * For example, use kthread_create_worker_on_cpu(cpu, "helper/%d", cpu). * - * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM) + * CPU hotplug: + * The kthread worker API is simple and generic. It just provides a way + * to create, use, and destroy workers. + * + * It is up to the API user how to handle CPU hotplug. They have to decide + * how to handle pending work items, prevent queuing new ones, and + * restore the functionality when the CPU goes off and on. There are a + * few catches: + * + * - CPU affinity gets lost when it is scheduled on an offline CPU. + * + * - The worker might not exist when the CPU was off when the user + * created the workers. + * + * Good practice is to implement two CPU hotplug callbacks and to + * destroy/create the worker when the CPU goes down/up. + * + * Return: + * The pointer to the allocated worker on success, ERR_PTR(-ENOMEM) * when the needed structures could not get allocated, and ERR_PTR(-EINTR) * when the worker was SIGKILLed. */ -- cgit v1.2.3 From a85cbe6159ffc973e5702f70a3bd5185f8f3c38d Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Mon, 14 Dec 2020 19:03:21 -0800 Subject: uapi: move constants from to and include in UAPI headers instead of . The reason is to avoid indirect include when using some network headers: or others -> -> . This indirect include causes on MUSL redefinition of struct sysinfo when included both and some of UAPI headers: In file included from x86_64-buildroot-linux-musl/sysroot/usr/include/linux/kernel.h:5, from x86_64-buildroot-linux-musl/sysroot/usr/include/linux/netlink.h:5, from ../include/tst_netlink.h:14, from tst_crypto.c:13: x86_64-buildroot-linux-musl/sysroot/usr/include/linux/sysinfo.h:8:8: error: redefinition of `struct sysinfo' struct sysinfo { ^~~~~~~ In file included from ../include/tst_safe_macros.h:15, from ../include/tst_test.h:93, from tst_crypto.c:11: x86_64-buildroot-linux-musl/sysroot/usr/include/sys/sysinfo.h:10:8: note: originally defined here Link: https://lkml.kernel.org/r/20201015190013.8901-1-petr.vorel@gmail.com Signed-off-by: Petr Vorel Suggested-by: Rich Felker Acked-by: Rich Felker Cc: Peter Korsgaard Cc: Baruch Siach Cc: Florian Weimer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/const.h | 5 +++++ include/uapi/linux/ethtool.h | 2 +- include/uapi/linux/kernel.h | 9 +-------- include/uapi/linux/lightnvm.h | 2 +- include/uapi/linux/mroute6.h | 2 +- include/uapi/linux/netfilter/x_tables.h | 2 +- include/uapi/linux/netlink.h | 2 +- include/uapi/linux/sysctl.h | 2 +- 8 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index 5ed721ad5b19..af2a44c08683 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -28,4 +28,9 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* _UAPI_LINUX_CONST_H */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9ca87bc73c44..cde753bb2093 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -14,7 +14,7 @@ #ifndef _UAPI_LINUX_ETHTOOL_H #define _UAPI_LINUX_ETHTOOL_H -#include +#include #include #include diff --git a/include/uapi/linux/kernel.h b/include/uapi/linux/kernel.h index 0ff8f7477847..fadf2db71fe8 100644 --- a/include/uapi/linux/kernel.h +++ b/include/uapi/linux/kernel.h @@ -3,13 +3,6 @@ #define _UAPI_LINUX_KERNEL_H #include - -/* - * 'kernel.h' contains some often-used function prototypes etc - */ -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) -#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) - -#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#include #endif /* _UAPI_LINUX_KERNEL_H */ diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index f9a1be7fc696..ead2e72e5c88 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h @@ -21,7 +21,7 @@ #define _UAPI_LINUX_LIGHTNVM_H #ifdef __KERNEL__ -#include +#include #include #else /* __KERNEL__ */ #include diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h index c36177a86516..a1fd6173e2db 100644 --- a/include/uapi/linux/mroute6.h +++ b/include/uapi/linux/mroute6.h @@ -2,7 +2,7 @@ #ifndef _UAPI__LINUX_MROUTE6_H #define _UAPI__LINUX_MROUTE6_H -#include +#include #include #include #include /* For struct sockaddr_in6. */ diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h index a8283f7dbc51..b8c6bb233ac1 100644 --- a/include/uapi/linux/netfilter/x_tables.h +++ b/include/uapi/linux/netfilter/x_tables.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_X_TABLES_H #define _UAPI_X_TABLES_H -#include +#include #include #define XT_FUNCTION_MAXNAMELEN 30 diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index c3816ff7bfc3..3d94269bbfa8 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -2,7 +2,7 @@ #ifndef _UAPI__LINUX_NETLINK_H #define _UAPI__LINUX_NETLINK_H -#include +#include #include /* for __kernel_sa_family_t */ #include diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 27c1ed2822e6..458179df9b27 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -23,7 +23,7 @@ #ifndef _UAPI_LINUX_SYSCTL_H #define _UAPI_LINUX_SYSCTL_H -#include +#include #include #include -- cgit v1.2.3 From 483e6417aea5c6d278e6aa7facc50d961d95a0a9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 14 Dec 2020 19:03:24 -0800 Subject: ide/falcon: remove in_interrupt() usage falconide_get_lock() is called by ide_lock_host() and its caller (ide_issue_rq()) has already a might_sleep() check. stdma_lock() has wait_event() which also has a might_sleep() check. Remove the in_interrupt() check. Link: https://lkml.kernel.org/r/20201113161021.2217361-2-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/ide/falconide.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index dbeb2605e5f6..77af4c1a3f38 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -51,8 +51,6 @@ static void falconide_release_lock(void) static void falconide_get_lock(irq_handler_t handler, void *data) { if (falconide_intr_lock == 0) { - if (in_interrupt() > 0) - panic("Falcon IDE hasn't ST-DMA lock in interrupt"); stdma_lock(handler, data); falconide_intr_lock = 1; } -- cgit v1.2.3 From ec680c1990e70c44d6b4452300a62b15f5c51f9c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 14 Dec 2020 19:03:27 -0800 Subject: ide: remove BUG_ON(in_interrupt() || irqs_disabled()) from ide_unregister() In the discussion about preempt count consistency across kernel configurations: https://lore.kernel.org/r/20200914204209.256266093@linutronix.de/ it was concluded that the usage of in_interrupt() and related context checks should be removed from non-core code. Both BUG_ON()s in ide-probe.c were introduced in commit 4015c949fb465 ("[PATCH] update ide core") when ide_unregister() was extended with semaphore based locking. Both checks won't complain about disabled preemption which is also wrong. The might_sleep() in today's mutex_lock() will complain about the missuses. Remove the BUG_ON() statements. Link: https://lkml.kernel.org/r/20201120092421.1023428-3-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Acked-by: Jens Axboe Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/ide/ide-probe.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 1ddc45a04418..430b29e0abdb 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1592,9 +1592,6 @@ EXPORT_SYMBOL_GPL(ide_port_unregister_devices); static void ide_unregister(ide_hwif_t *hwif) { - BUG_ON(in_interrupt()); - BUG_ON(irqs_disabled()); - mutex_lock(&ide_cfg_mtx); if (hwif->present) { -- cgit v1.2.3 From 3f10c2fa40e444b8cacf82adcbbcd3602b99a645 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Mon, 14 Dec 2020 19:03:31 -0800 Subject: fs/ntfs: remove unused varibles We actually don't use these varibles, so remove them to avoid gcc warning: fs/ntfs/file.c:326:14: warning: variable `base_ni' set but not used [-Wunused-but-set-variable] fs/ntfs/logfile.c:481:21: warning: variable `log_page_mask' set but not used [-Wunused-but-set-variable] Link: https://lkml.kernel.org/r/1604821092-54631-1-git-send-email-alex.shi@linux.alibaba.com Signed-off-by: Alex Shi Acked-by: Anton Altaparmakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ntfs/file.c | 5 +---- fs/ntfs/logfile.c | 3 +-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index f42967b738eb..e5aab265dff1 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -323,7 +323,7 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, unsigned long flags; struct file *file = iocb->ki_filp; struct inode *vi = file_inode(file); - ntfs_inode *base_ni, *ni = NTFS_I(vi); + ntfs_inode *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " @@ -365,9 +365,6 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, err = -EOPNOTSUPP; goto out; } - base_ni = ni; - if (NInoAttr(ni)) - base_ni = ni->ext.base_ntfs_ino; err = file_remove_privs(file); if (unlikely(err)) goto out; diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index a0c40f1be7ac..bc1bf217b38e 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -478,7 +478,7 @@ bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) u8 *kaddr = NULL; RESTART_PAGE_HEADER *rstr1_ph = NULL; RESTART_PAGE_HEADER *rstr2_ph = NULL; - int log_page_size, log_page_mask, err; + int log_page_size, err; bool logfile_is_empty = true; u8 log_page_bits; @@ -501,7 +501,6 @@ bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp) log_page_size = DefaultLogPageSize; else log_page_size = PAGE_SIZE; - log_page_mask = log_page_size - 1; /* * Use ntfs_ffs() instead of ffs() to enable the compiler to * optimize log_page_size and log_page_bits into constants. -- cgit v1.2.3 From 4dad18f47767f97f859fad84a8c2c8ee8323c2b9 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Mon, 14 Dec 2020 19:03:34 -0800 Subject: fs/ntfs: remove unused variable attr_len This variable isn't used anymore, remove it to skip W=1 warning: fs/ntfs/inode.c:2350:6: warning: variable `attr_len' set but not used [-Wunused-but-set-variable] Link: https://lkml.kernel.org/r/4194376f-898b-b602-81c3-210567712092@linux.alibaba.com Signed-off-by: Alex Shi Acked-by: Anton Altaparmakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ntfs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index caf563981532..f7e4cbc26eaf 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2347,7 +2347,6 @@ int ntfs_truncate(struct inode *vi) ATTR_RECORD *a; const char *te = " Leaving file length out of sync with i_size."; int err, mp_size, size_change, alloc_change; - u32 attr_len; ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); BUG_ON(NInoAttr(ni)); @@ -2721,7 +2720,6 @@ do_non_resident_truncate: * this cannot fail since we are making the attribute smaller thus by * definition there is enough space to do so. */ - attr_len = le32_to_cpu(a->length); err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu(a->data.non_resident.mapping_pairs_offset)); BUG_ON(err); -- cgit v1.2.3 From a0823b5e4434d349c92ec5f7cec0c6e98788d9b6 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 14 Dec 2020 19:03:37 -0800 Subject: fs/ocfs2/cluster/tcp.c: remove unneeded break A break is not needed if it is preceded by a goto Link: https://lkml.kernel.org/r/20201019175216.2329-1-trix@redhat.com Signed-off-by: Tom Rix Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/tcp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 79a231719460..3bd8119bed5e 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1198,7 +1198,6 @@ static int o2net_process_message(struct o2net_sock_container *sc, msglog(hdr, "bad magic\n"); ret = -EINVAL; goto out; - break; } /* find a handler for it */ -- cgit v1.2.3 From 45680967ee29e67b62e6800a8780440b840a0b1f Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Mon, 14 Dec 2020 19:03:40 -0800 Subject: ocfs2: ratelimit the 'max lookup times reached' notice Running stress-ng on ocfs2 completely fills the kernel log with 'max lookup times reached, filesystem may have nested directories.' Let's ratelimit this message as done with others in the code. Test-case: # mkfs.ocfs2 --mount local $DEV # mount $DEV $MNT # cd $MNT # dmesg -C # stress-ng --dirdeep 1 --dirdeep-ops 1000 # dmesg | grep -c 'max lookup times reached' Before: # dmesg -C # stress-ng --dirdeep 1 --dirdeep-ops 1000 ... stress-ng: info: [11116] successful run completed in 3.03s # dmesg | grep -c 'max lookup times reached' 967 After: # dmesg -C # stress-ng --dirdeep 1 --dirdeep-ops 1000 ... stress-ng: info: [739] successful run completed in 0.96s # dmesg | grep -c 'max lookup times reached' 10 # dmesg [ 259.086086] ocfs2_check_if_ancestor: 1990 callbacks suppressed [ 259.086092] (stress-ng-dirde,740,1):ocfs2_check_if_ancestor:1091 max lookup times reached, filesystem may have nested directories, src inode: 18007, dest inode: 17940. ... Link: https://lkml.kernel.org/r/20201001224417.478263-1-mfo@canonical.com Signed-off-by: Mauricio Faria de Oliveira Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index c46bf7f581a1..2a237ab00453 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1088,8 +1088,8 @@ static int ocfs2_check_if_ancestor(struct ocfs2_super *osb, child_inode_no = parent_inode_no; if (++i >= MAX_LOOKUP_TIMES) { - mlog(ML_NOTICE, "max lookup times reached, filesystem " - "may have nested directories, " + mlog_ratelimited(ML_NOTICE, "max lookup times reached, " + "filesystem may have nested directories, " "src inode: %llu, dest inode: %llu.\n", (unsigned long long)src_inode_no, (unsigned long long)dest_inode_no); -- cgit v1.2.3 From a86ecfa6a873e42286398b2a594cfa9e4ec10322 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 19:03:44 -0800 Subject: arch/Kconfig: fix spelling mistakes There are a few spelling mistakes in the Kconfig comments and help text. Fix these. Link: https://lkml.kernel.org/r/20201207155004.171962-1-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index ba4e966484ab..4f654c149709 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -261,7 +261,7 @@ config ARCH_HAS_SET_DIRECT_MAP # # Select if the architecture provides the arch_dma_set_uncached symbol to -# either provide an uncached segement alias for a DMA allocation, or +# either provide an uncached segment alias for a DMA allocation, or # to remap the page tables in place. # config ARCH_HAS_DMA_SET_UNCACHED @@ -314,14 +314,14 @@ config ARCH_32BIT_OFF_T config HAVE_ASM_MODVERSIONS bool help - This symbol should be selected by an architecure if it provides + This symbol should be selected by an architecture if it provides to support the module versioning for symbols exported from assembly code. config HAVE_REGS_AND_STACK_ACCESS_API bool help - This symbol should be selected by an architecure if it supports + This symbol should be selected by an architecture if it supports the API needed to access registers and stack entries from pt_regs, declared in asm/ptrace.h For example the kprobes-based event tracer needs this API. @@ -336,7 +336,7 @@ config HAVE_RSEQ config HAVE_FUNCTION_ARG_ACCESS_API bool help - This symbol should be selected by an architecure if it supports + This symbol should be selected by an architecture if it supports the API needed to access function arguments from pt_regs, declared in asm/ptrace.h -- cgit v1.2.3 From 7714304f3ba16af9cf52952d182d031d04b62d6d Mon Sep 17 00:00:00 2001 From: Hui Su Date: Mon, 14 Dec 2020 19:03:47 -0800 Subject: mm/slab_common.c: use list_for_each_entry in dump_unreclaimable_slab() dump_unreclaimable_slab() acquires the slab_mutex first, and it won't remove any slab_caches list entry when itering the slab_caches lists. Thus we do not need list_for_each_entry_safe here, which is against removal of list entry. Link: https://lkml.kernel.org/r/20200926043440.GA180545@rlk Signed-off-by: Hui Su Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index f9ccd5dc13f3..0cd2821b7066 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -978,7 +978,7 @@ static int slab_show(struct seq_file *m, void *p) void dump_unreclaimable_slab(void) { - struct kmem_cache *s, *s2; + struct kmem_cache *s; struct slabinfo sinfo; /* @@ -996,7 +996,7 @@ void dump_unreclaimable_slab(void) pr_info("Unreclaimable slab info:\n"); pr_info("Name Used Total\n"); - list_for_each_entry_safe(s, s2, &slab_caches, list) { + list_for_each_entry(s, &slab_caches, list) { if (s->flags & SLAB_RECLAIM_ACCOUNT) continue; -- cgit v1.2.3 From 15d5de496bebfd7c0261987423480e98d1a14495 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:03:50 -0800 Subject: mm: slab: clarify krealloc()'s behavior with __GFP_ZERO Patch series "slab: provide and use krealloc_array()", v3. Andy brought to my attention the fact that users allocating an array of equally sized elements should check if the size multiplication doesn't overflow. This is why we have helpers like kmalloc_array(). However we don't have krealloc_array() equivalent and there are many users who do their own multiplication when calling krealloc() for arrays. This series provides krealloc_array() and uses it in a couple places. A separate series will follow adding devm_krealloc_array() which is needed in the xilinx adc driver. This patch (of 9): __GFP_ZERO is ignored by krealloc() (unless we fall-back to kmalloc() path, in which case it's honored). Point that out in the kerneldoc. Link: https://lkml.kernel.org/r/20201109110654.12547-1-brgl@bgdev.pl Link: https://lkml.kernel.org/r/20201109110654.12547-2-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Cc: Andy Shevchenko Cc: Sumit Semwal Cc: Gustavo Padovan Cc: Christian Knig Cc: Mauro Carvalho Chehab Cc: Borislav Petkov Cc: Tony Luck Cc: James Morse Cc: Robert Richter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: Alexander Shishkin Cc: Linus Walleij Cc: "Michael S . Tsirkin" Cc: Jason Wang Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Borislav Petkov Cc: Daniel Vetter Cc: Takashi Iwai Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 0cd2821b7066..2f2b55c2798e 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1091,9 +1091,9 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size, * @flags: the type of memory to allocate. * * The contents of the object pointed to are preserved up to the - * lesser of the new and old sizes. If @p is %NULL, krealloc() - * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a - * %NULL pointer, the object pointed to is freed. + * lesser of the new and old sizes (__GFP_ZERO flag is effectively ignored). + * If @p is %NULL, krealloc() behaves exactly like kmalloc(). If @new_size + * is 0 and @p is not a %NULL pointer, the object pointed to is freed. * * Return: pointer to the allocated memory or %NULL in case of error */ -- cgit v1.2.3 From f0dbd2bd1c22c6670e83ddcd46a9beb8b575e86d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:03:55 -0800 Subject: mm: slab: provide krealloc_array() When allocating an array of elements, users should check for multiplication overflow or preferably use one of the provided helpers like: kmalloc_array(). There's no krealloc_array() counterpart but there are many users who use regular krealloc() to reallocate arrays. Let's provide an actual krealloc_array() implementation. While at it: add some documentation regarding krealloc. Link: https://lkml.kernel.org/r/20201109110654.12547-3-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Acked-by: Vlastimil Babka Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/memory-allocation.rst | 4 ++++ include/linux/slab.h | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst index 4446a1ac36cc..5954ddf6ee13 100644 --- a/Documentation/core-api/memory-allocation.rst +++ b/Documentation/core-api/memory-allocation.rst @@ -147,6 +147,10 @@ The address of a chunk allocated with `kmalloc` is aligned to at least ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the alignment is also guaranteed to be at least the respective size. +Chunks allocated with kmalloc() can be resized with krealloc(). Similarly +to kmalloc_array(): a helper for resizing arrays is provided in the form of +krealloc_array(). + For large allocations you can use vmalloc() and vzalloc(), or directly request pages from the page allocator. The memory allocated by `vmalloc` and related functions is not physically contiguous. diff --git a/include/linux/slab.h b/include/linux/slab.h index dd6897f62010..be4ba5867ac5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -592,6 +592,24 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) return __kmalloc(bytes, flags); } +/** + * krealloc_array - reallocate memory for an array. + * @p: pointer to the memory chunk to reallocate + * @new_n: new number of elements to alloc + * @new_size: new size of a single member of the array + * @flags: the type of memory to allocate (see kmalloc) + */ +static __must_check inline void * +krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, flags); +} + /** * kcalloc - allocate memory for an array. The memory is set to zero. * @n: number of elements. -- cgit v1.2.3 From 64f0bd11696508feff896215c74496e3e9af617e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:03:59 -0800 Subject: ALSA: pcm: use krealloc_array() Use the helper that checks for overflows internally instead of manually calculating the size of the new array. Link: https://lkml.kernel.org/r/20201109110654.12547-4-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Reviewed-by: Takashi Iwai Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- sound/core/pcm_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index bda3514c7b2d..b7e3d8f44511 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1129,8 +1129,8 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond, if (constrs->rules_num >= constrs->rules_all) { struct snd_pcm_hw_rule *new; unsigned int new_rules = constrs->rules_all + 16; - new = krealloc(constrs->rules, new_rules * sizeof(*c), - GFP_KERNEL); + new = krealloc_array(constrs->rules, new_rules, + sizeof(*c), GFP_KERNEL); if (!new) { va_end(args); return -ENOMEM; -- cgit v1.2.3 From 3a99974872ccb2f625c68a7444e7575f1c7fd06d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:04:03 -0800 Subject: vhost: vringh: use krealloc_array() Use the helper that checks for overflows internally instead of manually calculating the size of the new array. Link: https://lkml.kernel.org/r/20201109110654.12547-5-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Acked-by: Michael S. Tsirkin Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/vhost/vringh.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index b7403ba8e7f7..85d85faba058 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -198,7 +198,8 @@ static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) flag = (iov->max_num & VRINGH_IOV_ALLOCATED); if (flag) - new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp); + new = krealloc_array(iov->iov, new_num, + sizeof(struct iovec), gfp); else { new = kmalloc_array(new_num, sizeof(struct iovec), gfp); if (new) { -- cgit v1.2.3 From 2207994d014e60e74cbec216a1a0c6aab06ccc91 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:04:08 -0800 Subject: pinctrl: use krealloc_array() Use the helper that checks for overflows internally instead of manually calculating the size of the new array. Link: https://lkml.kernel.org/r/20201109110654.12547-6-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Cc: Alexander Shishkin Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/pinctrl/pinctrl-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-utils.c b/drivers/pinctrl/pinctrl-utils.c index f2bcbf62c03d..93df0d4c0a24 100644 --- a/drivers/pinctrl/pinctrl-utils.c +++ b/drivers/pinctrl/pinctrl-utils.c @@ -39,7 +39,7 @@ int pinctrl_utils_reserve_map(struct pinctrl_dev *pctldev, if (old_num >= new_num) return 0; - new_map = krealloc(*map, sizeof(*new_map) * new_num, GFP_KERNEL); + new_map = krealloc_array(*map, new_num, sizeof(*new_map), GFP_KERNEL); if (!new_map) { dev_err(pctldev->dev, "krealloc(map) failed\n"); return -ENOMEM; -- cgit v1.2.3 From af11be05b6d0b35da45f6457ef1f871634a50b1f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:04:12 -0800 Subject: edac: ghes: use krealloc_array() Use the helper that checks for overflows internally instead of manually calculating the size of the new array. Link: https://lkml.kernel.org/r/20201109110654.12547-7-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Acked-by: Borislav Petkov Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/edac/ghes_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index a918ca93e4f7..6d1ddecbf0da 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -207,8 +207,8 @@ static void enumerate_dimms(const struct dmi_header *dh, void *arg) if (!hw->num_dimms || !(hw->num_dimms % 16)) { struct dimm_info *new; - new = krealloc(hw->dimms, (hw->num_dimms + 16) * sizeof(struct dimm_info), - GFP_KERNEL); + new = krealloc_array(hw->dimms, hw->num_dimms + 16, + sizeof(struct dimm_info), GFP_KERNEL); if (!new) { WARN_ON_ONCE(1); return; -- cgit v1.2.3 From 32ce25539d18fe04427c7305fdc49076f04660ac Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:04:16 -0800 Subject: drm: atomic: use krealloc_array() Use the helper that checks for overflows internally instead of manually calculating the size of the new array. Link: https://lkml.kernel.org/r/20201109110654.12547-8-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Acked-by: Daniel Vetter Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/drm_atomic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 58527f151984..09ad6a2ec17b 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -960,7 +960,8 @@ drm_atomic_get_connector_state(struct drm_atomic_state *state, struct __drm_connnectors_state *c; int alloc = max(index + 1, config->num_connector); - c = krealloc(state->connectors, alloc * sizeof(*state->connectors), GFP_KERNEL); + c = krealloc_array(state->connectors, alloc, + sizeof(*state->connectors), GFP_KERNEL); if (!c) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From f8f7e2bfb1182b7d60f38a72bff1a50130186c6b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:04:21 -0800 Subject: hwtracing: intel: use krealloc_array() Use the helper that checks for overflows internally instead of manually calculating the size of the new array. Link: https://lkml.kernel.org/r/20201109110654.12547-9-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christian Knig Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwtracing/intel_th/msu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 3a77551fb4fc..7d95242db900 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -2002,7 +2002,7 @@ nr_pages_store(struct device *dev, struct device_attribute *attr, } nr_wins++; - rewin = krealloc(win, sizeof(*win) * nr_wins, GFP_KERNEL); + rewin = krealloc_array(win, nr_wins, sizeof(*win), GFP_KERNEL); if (!rewin) { kfree(win); return -ENOMEM; -- cgit v1.2.3 From a47fc51d8e1e9ce0f2d8fd9e5197649f00bac4ca Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Dec 2020 19:04:25 -0800 Subject: dma-buf: use krealloc_array() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the helper that checks for overflows internally instead of manually calculating the size of the new array. Link: https://lkml.kernel.org/r/20201109110654.12547-10-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski Acked-by: Christian König Cc: Alexander Shishkin Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Borislav Petkov Cc: Christoph Lameter Cc: Daniel Vetter Cc: Daniel Vetter Cc: David Airlie Cc: David Rientjes Cc: Gustavo Padovan Cc: James Morse Cc: Jaroslav Kysela Cc: Jason Wang Cc: Joonsoo Kim Cc: Linus Walleij Cc: Maarten Lankhorst Cc: Mauro Carvalho Chehab Cc: Maxime Ripard Cc: "Michael S . Tsirkin" Cc: Pekka Enberg Cc: Robert Richter Cc: Sumit Semwal Cc: Takashi Iwai Cc: Takashi Iwai Cc: Thomas Zimmermann Cc: Tony Luck Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dma-buf/sync_file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 5a5a1da01a00..20d9bddbb985 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -270,8 +270,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, fences[i++] = dma_fence_get(a_fences[0]); if (num_fences > i) { - nfences = krealloc(fences, i * sizeof(*fences), - GFP_KERNEL); + nfences = krealloc_array(fences, i, sizeof(*fences), GFP_KERNEL); if (!nfences) goto err; -- cgit v1.2.3 From 0c06dd75514327be4b1c22b109341ff7dfeeff98 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:04:29 -0800 Subject: mm, slab, slub: clear the slab_cache field when freeing page The page allocator expects that page->mapping is NULL for a page being freed. SLAB and SLUB use the slab_cache field which is in union with mapping, but before freeing the page, the field is referenced with the "mapping" name when set to NULL. It's IMHO more correct (albeit functionally the same) to use the slab_cache name as that's the field we use in SL*B, and document why we clear it in a comment (we don't clear fields such as s_mem or freelist, as page allocator doesn't care about those). While using the 'mapping' name would automagically keep the code correct if the unions in struct page changed, such changes should be done consciously and needed changes evaluated - the comment should help with that. Link: https://lkml.kernel.org/r/20201210160020.21562-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: David Rientjes Acked-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 3 ++- mm/slub.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index b1113561b98b..2e67a513b0c9 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1399,7 +1399,8 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page) __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); page_mapcount_reset(page); - page->mapping = NULL; + /* In union with page->mapping where page allocator expects NULL */ + page->slab_cache = NULL; if (current->reclaim_state) current->reclaim_state->reclaimed_slab += 1 << order; diff --git a/mm/slub.c b/mm/slub.c index 34dcc09e2ec9..2098a544c4e2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1836,8 +1836,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); - - page->mapping = NULL; + /* In union with page->mapping where page allocator expects NULL */ + page->slab_cache = NULL; if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; unaccount_slab_page(page, order, s); -- cgit v1.2.3 From a32d654db543843a5ffb248feaec1a909718addd Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Mon, 14 Dec 2020 19:04:33 -0800 Subject: mm/slab: rerform init_on_free earlier Currently in CONFIG_SLAB init_on_free happens too late, and heap objects go to the heap quarantine not being erased. Lets move init_on_free clearing before calling kasan_slab_free(). In that case heap quarantine will store erased objects, similarly to CONFIG_SLUB=y behavior. Link: https://lkml.kernel.org/r/20201210183729.1261524-1-alex.popov@linux.com Signed-off-by: Alexander Popov Reviewed-by: Alexander Potapenko Acked-by: David Rientjes Acked-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 2e67a513b0c9..176b65e2157d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3417,6 +3417,9 @@ free_done: static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, unsigned long caller) { + if (unlikely(slab_want_init_on_free(cachep))) + memset(objp, 0, cachep->object_size); + /* Put the object into the quarantine, don't touch it for now. */ if (kasan_slab_free(cachep, objp, _RET_IP_)) return; @@ -3435,8 +3438,6 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); - if (unlikely(slab_want_init_on_free(cachep))) - memset(objp, 0, cachep->object_size); kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, caller); memcg_slab_free_hook(cachep, &objp, 1); -- cgit v1.2.3 From 965c484815f591737fb546628704d4c362317705 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 14 Dec 2020 19:04:36 -0800 Subject: mm, slub: use kmem_cache_debug_flags() in deactivate_slab() Commit 9cf7a1118365 ("mm/slub: make add_full() condition more explicit") replaced an unnecessarily generic kmem_cache_debug(s) check with an explicit check of SLAB_STORE_USER and #ifdef CONFIG_SLUB_DEBUG. We can achieve the same specific check with the recently added kmem_cache_debug_flags() which removes the #ifdef and restores the no-branch-overhead benefit of static key check when slub debugging is not enabled. Link: https://lkml.kernel.org/r/3ef24214-38c7-1238-8296-88caf7f48ab6@suse.cz Signed-off-by: Vlastimil Babka Cc: Abel Wu Cc: Christopher Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Liu Xiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 2098a544c4e2..79afc8a38ebf 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2245,8 +2245,7 @@ redo: } } else { m = M_FULL; -#ifdef CONFIG_SLUB_DEBUG - if ((s->flags & SLAB_STORE_USER) && !lock) { + if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) { lock = 1; /* * This also ensures that the scanning of full @@ -2255,7 +2254,6 @@ redo: */ spin_lock(&n->list_lock); } -#endif } if (l != m) { -- cgit v1.2.3 From 045ab8c9487ba099eade6578621e2af4a0d5ba0c Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 14 Dec 2020 19:04:40 -0800 Subject: mm/slub: let number of online CPUs determine the slub page order The page order of the slab that gets chosen for a given slab cache depends on the number of objects that can be fit in the slab while meeting other requirements. We start with a value of minimum objects based on nr_cpu_ids that is driven by possible number of CPUs and hence could be higher than the actual number of CPUs present in the system. This leads to calculate_order() chosing a page order that is on the higher side leading to increased slab memory consumption on systems that have bigger page sizes. Hence rely on the number of online CPUs when determining the mininum objects, thereby increasing the chances of chosing a lower conservative page order for the slab. Vlastimil said: "Ideally, we would react to hotplug events and update existing caches accordingly. But for that, recalculation of order for existing caches would have to be made safe, while not affecting hot paths. We have removed the sysfs interface with 32a6f409b693 ("mm, slub: remove runtime allocation order changes") as it didn't seem easy and worth the trouble. In case somebody wants to start with a large order right from the boot because they know they will hotplug lots of cpus later, they can use slub_min_objects= boot param to override this heuristic. So in case this change regresses somebody's performance, there's a way around it and thus the risk is low IMHO" Link: https://lkml.kernel.org/r/20201118082759.1413056-1-bharata@linux.ibm.com Signed-off-by: Bharata B Rao Acked-by: Vlastimil Babka Acked-by: Roman Gushchin Acked-by: David Rientjes Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Shakeel Butt Cc: Johannes Weiner Cc: Aneesh Kumar K.V Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 79afc8a38ebf..6326b98c2164 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3431,7 +3431,7 @@ static inline int calculate_order(unsigned int size) */ min_objects = slub_min_objects; if (!min_objects) - min_objects = 4 * (fls(nr_cpu_ids) + 1); + min_objects = 4 * (fls(num_online_cpus()) + 1); max_objects = order_objects(slub_max_order, size); min_objects = min(min_objects, max_objects); -- cgit v1.2.3 From 7d18dd75a8afc072aabc77f2a9c3df94cdc53f33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Dec 2020 19:04:43 -0800 Subject: device-dax/kmem: use struct_size() Linus notes the kernel has had a nice helper for the 'size of struct with variable array member at the end' operation for a couple years now, use it. Link: http://lore.kernel.org/r/CAHk-=wgNTLbvAD8mNTvh+GQyapNWeX20PXhU_+frqEvVq4298w@mail.gmail.com Link: https://lkml.kernel.org/r/160288261564.3242821.6055291930923876456.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reported-by: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dax/kmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index b4368c5b6a0c..403ec42472d1 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -61,7 +61,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) return -EINVAL; } - data = kzalloc(sizeof(*data) + sizeof(struct resource *) * dev_dax->nr_range, GFP_KERNEL); + data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL); if (!data) return -ENOMEM; -- cgit v1.2.3 From 7fb7ab6d618a4dc7ea3f3eafc92388a35b4f8894 Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Mon, 14 Dec 2020 19:04:46 -0800 Subject: mm: fix page_owner initializing issue for arm32 Page owner of pages used by page owner itself used is missing on arm32 targets. The reason is dummy_handle and failure_handle is not initialized correctly. Buddy allocator is used to initialize these two handles. However, buddy allocator is not ready when page owner calls it. This change fixed that by initializing page owner after buddy initialization. The working flow before and after this change are: original logic: 1. allocated memory for page_ext(using memblock). 2. invoke the init callback of page_ext_ops like page_owner(using buddy allocator). 3. initialize buddy. after this change: 1. allocated memory for page_ext(using memblock). 2. initialize buddy. 3. invoke the init callback of page_ext_ops like page_owner(using buddy allocator). with the change, failure/dummy_handle can get its correct value and page owner output for example has the one for page owner itself: Page allocated via order 2, mask 0x6202c0(GFP_USER|__GFP_NOWARN), pid 1006, ts 67278156558 ns PFN 543776 type Unmovable Block 531 type Unmovable Flags 0x0() init_page_owner+0x28/0x2f8 invoke_init_callbacks_flatmem+0x24/0x34 start_kernel+0x33c/0x5d8 Link: https://lkml.kernel.org/r/1603104925-5888-1-git-send-email-zhenhuah@codeaurora.org Signed-off-by: Zhenhua Huang Acked-by: Vlastimil Babka Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 8 ++++++++ init/main.c | 2 ++ mm/page_ext.c | 10 ++++++++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index cfce186f0c4e..aff81ba31bd8 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -44,8 +44,12 @@ static inline void page_ext_init_flatmem(void) { } extern void page_ext_init(void); +static inline void page_ext_init_flatmem_late(void) +{ +} #else extern void page_ext_init_flatmem(void); +extern void page_ext_init_flatmem_late(void); static inline void page_ext_init(void) { } @@ -76,6 +80,10 @@ static inline void page_ext_init(void) { } +static inline void page_ext_init_flatmem_late(void) +{ +} + static inline void page_ext_init_flatmem(void) { } diff --git a/init/main.c b/init/main.c index 32b2a8affafd..82ae0d1345a0 100644 --- a/init/main.c +++ b/init/main.c @@ -828,6 +828,8 @@ static void __init mm_init(void) init_debug_pagealloc(); report_meminit(); mem_init(); + /* page_owner must be initialized after buddy is ready */ + page_ext_init_flatmem_late(); kmem_cache_init(); kmemleak_init(); pgtable_init(); diff --git a/mm/page_ext.c