summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt12
-rw-r--r--drivers/gpu/drm/drm_dp_mst_topology.c2
-rw-r--r--include/linux/rculist.h4
-rw-r--r--include/linux/rcutiny.h1
-rw-r--r--include/linux/rcutree.h1
-rw-r--r--include/linux/torture.h2
-rw-r--r--kernel/rcu/rcu.h2
-rw-r--r--kernel/rcu/rcuperf.c5
-rw-r--r--kernel/rcu/rcutorture.c63
-rw-r--r--kernel/rcu/srcutree.c21
-rw-r--r--kernel/rcu/tree.c113
-rw-r--r--kernel/rcu/tree.h1
-rw-r--r--kernel/rcu/tree_exp.h19
-rw-r--r--kernel/rcu/tree_plugin.h19
-rw-r--r--kernel/rcu/tree_stall.h104
-rw-r--r--kernel/rcu/update.c14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kcsan-collapse.sh22
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh16
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh52
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh11
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE102
22 files changed, 381 insertions, 114 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index aaa867860433..828ff975fbc6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4210,12 +4210,24 @@
Duration of CPU stall (s) to test RCU CPU stall
warnings, zero to disable.
+ rcutorture.stall_cpu_block= [KNL]
+ Sleep while stalling if set. This will result
+ in warnings from preemptible RCU in addition
+ to any other stall-related activity.
+
rcutorture.stall_cpu_holdoff= [KNL]
Time to wait (s) after boot before inducing stall.
rcutorture.stall_cpu_irqsoff= [KNL]
Disable interrupts while stalling if set.
+ rcutorture.stall_gp_kthread= [KNL]
+ Duration (s) of forced sleep within RCU
+ grace-period kthread to test RCU CPU stall
+ warnings, zero to disable. If both stall_cpu
+ and stall_gp_kthread are specified, the
+ kthread is starved first, then the CPU.
+
rcutorture.stat_interval= [KNL]
Time (s) between statistics printk()s.
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 70c4b7afed12..c40986728734 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -5494,7 +5494,7 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port)
{
struct drm_dp_mst_port *immediate_upstream_port;
struct drm_dp_mst_port *fec_port;
- struct drm_dp_desc desc = { 0 };
+ struct drm_dp_desc desc = { };
u8 endpoint_fec;
u8 endpoint_dsc;
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 8214cdc715f2..7375bb3da140 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -371,7 +371,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the list_head within the struct.
- * @cond...: optional lockdep expression if called from non-RCU protection.
+ * @cond: optional lockdep expression if called from non-RCU protection.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as list_add_rcu()
@@ -646,7 +646,7 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_node within the struct.
- * @cond...: optional lockdep expression if called from non-RCU protection.
+ * @cond: optional lockdep expression if called from non-RCU protection.
*
* This list-traversal primitive may safely run concurrently with
* the _rcu list-mutation primitives such as hlist_add_head_rcu()
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index d77e11186afd..3465ba704a11 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,6 +87,7 @@ static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
static inline void rcu_momentary_dyntick_idle(void) { }
static inline void kfree_rcu_scheduler_running(void) { }
+static inline bool rcu_gp_might_be_stalled(void) { return false; }
/* Avoid RCU read-side critical sections leaking across. */
static inline void rcu_all_qs(void) { barrier(); }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 45f3f66bb04d..fbc26274af4d 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -39,6 +39,7 @@ void rcu_barrier(void);
bool rcu_eqs_special_set(int cpu);
void rcu_momentary_dyntick_idle(void);
void kfree_rcu_scheduler_running(void);
+bool rcu_gp_might_be_stalled(void);
unsigned long get_state_synchronize_rcu(void);
void cond_synchronize_rcu(unsigned long oldstate);
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 6241f59e2d6f..629b66e6c161 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -89,7 +89,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
#ifdef CONFIG_PREEMPTION
#define torture_preempt_schedule() preempt_schedule()
#else
-#define torture_preempt_schedule()
+#define torture_preempt_schedule() do { } while (0)
#endif
#endif /* __LINUX_TORTURE_H */
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 296f9262d119..cf66a3ccd757 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -457,6 +457,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
unsigned long secs,
unsigned long c_old,
unsigned long c);
+void rcu_gp_set_torture_wait(int duration);
#else
static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
int *flags, unsigned long *gp_seq)
@@ -474,6 +475,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
do { } while (0)
#endif
+static inline void rcu_gp_set_torture_wait(int duration) { }
#endif
#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a8d097d84d..16dd1e6b7c09 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -88,6 +88,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
+torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
static char *perf_type = "rcu";
module_param(perf_type, charp, 0444);
@@ -635,7 +636,7 @@ kfree_perf_thread(void *arg)
}
for (i = 0; i < kfree_alloc_num; i++) {
- alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
+ alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
if (!alloc_ptr)
return -ENOMEM;
@@ -722,6 +723,8 @@ kfree_perf_init(void)
schedule_timeout_uninterruptible(1);
}
+ pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
+
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
GFP_KERNEL);
if (kfree_reader_tasks == NULL) {
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 0bec9254959b..efb792e13fca 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -52,6 +52,18 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
+#ifndef data_race
+#define data_race(expr) \
+ ({ \
+ expr; \
+ })
+#endif
+#ifndef ASSERT_EXCLUSIVE_WRITER
+#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
+#endif
+#ifndef ASSERT_EXCLUSIVE_ACCESS
+#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
+#endif
/* Bits for ->extendables field, extendables param, and related definitions. */
#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */
@@ -103,6 +115,9 @@ torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable.");
torture_param(int, stall_cpu_holdoff, 10,
"Time to wait before starting stall (s).");
torture_param(int, stall_cpu_irqsoff, 0, "Disable interrupts while stalling.");
+torture_param(int, stall_cpu_block, 0, "Sleep while stalling.");
+torture_param(int, stall_gp_kthread, 0,
+ "Grace-period kthread stall duration (s).");
torture_param(int, stat_interval, 60,
"Number of seconds between stats printk()s");
torture_param(int, stutter, 5, "Number of seconds to run/halt test");
@@ -905,7 +920,7 @@ static int rcu_torture_boost(void *arg)
/* Wait for the next test interval. */
oldstarttime = boost_starttime;
- while (ULONG_CMP_LT(jiffies, oldstarttime)) {
+ while (time_before(jiffies, oldstarttime)) {
schedule_timeout_interruptible(oldstarttime - jiffies);
stutter_wait("rcu_torture_boost");
if (torture_must_stop())
@@ -915,7 +930,7 @@ static int rcu_torture_boost(void *arg)
/* Do one boost-test interval. */
endtime = oldstarttime + test_boost_duration * HZ;
call_rcu_time = jiffies;
- while (ULONG_CMP_LT(jiffies, endtime)) {
+ while (time_before(jiffies, endtime)) {
/* If we don't have a callback in flight, post one. */
if (!smp_load_acquire(&rbi.inflight)) {
/* RCU core before ->inflight = 1. */
@@ -986,7 +1001,7 @@ rcu_torture_fqs(void *arg)
VERBOSE_TOROUT_STRING("rcu_torture_fqs task started");
do {
fqs_resume_time = jiffies + fqs_stutter * HZ;
- while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
+ while (time_before(jiffies, fqs_resume_time) &&
!kthread_should_stop()) {
schedule_timeout_interruptible(1);
}
@@ -1525,9 +1540,9 @@ rcu_torture_stats_print(void)
atomic_long_read(&n_rcu_torture_timers));
torture_onoff_stats();
pr_cont("barrier: %ld/%ld:%ld\n",
- n_barrier_successes,
- n_barrier_attempts,
- n_rcu_torture_barrier_error);
+ data_race(n_barrier_successes),
+ data_race(n_barrier_attempts),
+ data_race(n_rcu_torture_barrier_error));
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
if (atomic_read(&n_rcu_torture_mberror) ||
@@ -1617,6 +1632,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
"test_boost=%d/%d test_boost_interval=%d "
"test_boost_duration=%d shutdown_secs=%d "
"stall_cpu=%d stall_cpu_holdoff=%d stall_cpu_irqsoff=%d "
+ "stall_cpu_block=%d "
"n_barrier_cbs=%d "
"onoff_interval=%d onoff_holdoff=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
@@ -1625,6 +1641,7 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag)
test_boost, cur_ops->can_boost,
test_boost_interval, test_boost_duration, shutdown_secs,
stall_cpu, stall_cpu_holdoff, stall_cpu_irqsoff,
+ stall_cpu_block,
n_barrier_cbs,
onoff_interval, onoff_holdoff);
}
@@ -1680,6 +1697,7 @@ static int rcutorture_booster_init(unsigned int cpu)
*/
static int rcu_torture_stall(void *args)
{
+ int idx;
unsigned long stop_at;
VERBOSE_TOROUT_STRING("rcu_torture_stall task started");
@@ -1688,26 +1706,37 @@ static int rcu_torture_stall(void *args)
schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff");
}
- if (!kthread_should_stop()) {
+ if (!kthread_should_stop() && stall_gp_kthread > 0) {
+ VERBOSE_TOROUT_STRING("rcu_torture_stall begin GP stall");
+ rcu_gp_set_torture_wait(stall_gp_kthread * HZ);
+ for (idx = 0; idx < stall_gp_kthread + 2; idx++) {
+ if (kthread_should_stop())
+ break;
+ schedule_timeout_uninterruptible(HZ);
+ }
+ }
+ if (!kthread_should_stop() && stall_cpu > 0) {
+ VERBOSE_TOROUT_STRING("rcu_torture_stall begin CPU stall");
stop_at = ktime_get_seconds() + stall_cpu;
/* RCU CPU stall is expected behavior in following code. */
- rcu_read_lock();
+ idx = cur_ops->readlock();
if (stall_cpu_irqsoff)
local_irq_disable();
- else
+ else if (!stall_cpu_block)
preempt_disable();
pr_alert("rcu_torture_stall start on CPU %d.\n",
- smp_processor_id());
+ raw_smp_processor_id());
while (ULONG_CMP_LT((unsigned long)ktime_get_seconds(),
stop_at))
- continue; /* Induce RCU CPU stall warning. */
+ if (stall_cpu_block)
+ schedule_timeout_uninterruptible(HZ);
if (stall_cpu_irqsoff)
local_irq_enable();
- else
+ else if (!stall_cpu_block)
preempt_enable();
- rcu_read_unlock();
- pr_alert("rcu_torture_stall end.\n");
+ cur_ops->readunlock(idx);
}
+ pr_alert("rcu_torture_stall end.\n");
torture_shutdown_absorb("rcu_torture_stall");
while (!kthread_should_stop())
schedule_timeout_interruptible(10 * HZ);
@@ -1717,7 +1746,7 @@ static int rcu_torture_stall(void *args)
/* Spawn CPU-stall kthread, if stall_cpu specified. */
static int __init rcu_torture_stall_init(void)
{
- if (stall_cpu <= 0)
+ if (stall_cpu <= 0 && stall_gp_kthread <= 0)
return 0;
return torture_create_kthread(rcu_torture_stall, NULL, stall_task);
}
@@ -1773,8 +1802,8 @@ struct rcu_fwd {
unsigned long rcu_launder_gp_seq_start;
};
-struct rcu_fwd *rcu_fwds;
-bool rcu_fwd_emergency_stop;
+static struct rcu_fwd *rcu_fwds;
+static bool rcu_fwd_emergency_stop;
static void rcu_torture_fwd_cb_hist(struct rcu_fwd *rfp)
{
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 0c71505f0e19..6d3ef700fb0e 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -29,6 +29,19 @@
#include "rcu.h"
#include "rcu_segcblist.h"
+#ifndef data_race
+#define data_race(expr) \
+ ({ \
+ expr; \
+ })
+#endif
+#ifndef ASSERT_EXCLUSIVE_WRITER
+#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0)
+#endif
+#ifndef ASSERT_EXCLUSIVE_ACCESS
+#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0)
+#endif
+
/* Holdoff in nanoseconds for auto-expediting. */
#define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
@@ -1268,8 +1281,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
struct srcu_data *sdp;
sdp = per_cpu_ptr(ssp->sda, cpu);
- u0 = sdp->srcu_unlock_count[!idx];
- u1 = sdp->srcu_unlock_count[idx];
+ u0 = data_race(sdp->srcu_unlock_count[!idx]);
+ u1 = data_race(sdp->srcu_unlock_count[idx]);
/*
* Make sure that a lock is always counted if the corresponding
@@ -1277,8 +1290,8 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
*/
smp_rmb();
- l0 = sdp->srcu_lock_count[!idx];
- l1 = sdp->srcu_lock_count[idx];
+ l0 = data_race(sdp->srcu_lock_count[!idx]);
+ l1 = data_race(sdp->srcu_lock_count[idx]);
c0 = l0 - u0;
c1 = l1 - u1;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 573fd78a7bca..f288477ee1c2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -113,7 +113,7 @@ static struct rcu_state rcu_state = {
static bool dump_tree;
module_param(dump_tree, bool, 0444);
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
-static bool use_softirq = 1;
+static bool use_softirq = true;
module_param(use_softirq, bool, 0444);
/* Control rcu_node-tree auto-balancing at boot time. */
static bool rcu_fanout_exact;
@@ -1275,7 +1275,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
goto unlock_out;
}
- trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("newreq"));
+ trace_rcu_grace_period(rcu_state.name, data_race(rcu_state.gp_seq), TPS("newreq"));
ret = true; /* Caller must wake GP kthread. */
unlock_out:
/* Push furthest requested GP to leaf node and rcu_data structure. */
@@ -1531,6 +1531,31 @@ static void rcu_gp_slow(int delay)
schedule_timeout_uninterruptible(delay);
}
+static unsigned long sleep_duration;
+
+/* Allow rcutorture to stall the grace-period kthread. */
+void rcu_gp_set_torture_wait(int duration)
+{
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0)
+ WRITE_ONCE(sleep_duration, duration);
+}
+EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait);
+
+/* Actually implement the aforementioned wait. */
+static void rcu_gp_torture_wait(void)
+{
+ unsigned long duration;
+
+ if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST))
+ return;
+ duration = xchg(&sleep_duration, 0UL);
+ if (duration > 0) {
+ pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);
+ schedule_timeout_uninterruptible(duration);
+ pr_alert("%s: Wait complete\n", __func__);
+ }
+}
+
/*
* Initialize a new grace period. Return false if no grace period required.
*/
@@ -1564,6 +1589,7 @@ static bool rcu_gp_init(void)
record_gp_stall_check_time();
/* Record GP times before starting GP, hence rcu_seq_start(). */
rcu_seq_start(&rcu_state.gp_seq);
+ ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
raw_spin_unlock_irq_rcu_node(rnp);
@@ -1669,12 +1695,16 @@ static bool rcu_gp_fqs_check_wake(int *gfp)
{
struct rcu_node *rnp = rcu_get_root();
- /* Someone like call_rcu() requested a force-quiescent-state scan. */
+ // If under overload conditions, force an immediate FQS scan.
+ if (*gfp & RCU_GP_FLAG_OVLD)
+ return true;
+
+ // Someone like call_rcu() requested a force-quiescent-state scan.
*gfp = READ_ONCE(rcu_state.gp_flags);
if (*gfp & RCU_GP_FLAG_FQS)
return true;
- /* The current grace period has completed. */
+ // The current grace period has completed.
if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
return true;
@@ -1712,13 +1742,15 @@ static void rcu_gp_fqs(bool first_time)
static void rcu_gp_fqs_loop(void)
{
bool first_gp_fqs;
- int gf;
+ int gf = 0;
unsigned long j;
int ret;
struct rcu_node *rnp = rcu_get_root();
first_gp_fqs = true;
j = READ_ONCE(jiffies_till_first_fqs);
+ if (rcu_state.cbovld)
+ gf = RCU_GP_FLAG_OVLD;
ret = 0;
for (;;) {
if (!ret) {
@@ -1731,6 +1763,7 @@ static void rcu_gp_fqs_loop(void)
rcu_state.gp_state = RCU_GP_WAIT_FQS;
ret = swait_event_idle_timeout_exclusive(
rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
+ rcu_gp_torture_wait();
rcu_state.gp_state = RCU_GP_DOING_FQS;
/* Locking provides needed memory barriers. */
/* If grace period done, leave loop. */
@@ -1738,12 +1771,16 @@ static void rcu_gp_fqs_loop(void)
!rcu_preempt_blocked_readers_cgp(rnp))
break;
/* If time for quiescent-state forcing, do it. */
- if (ULONG_CMP_GE(jiffies, rcu_state.jiffies_force_qs) ||
+ if (!time_after(rcu_state.jiffies_force_qs, jiffies) ||
(gf & RCU_GP_FLAG_FQS)) {
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
TPS("fqsstart"));
rcu_gp_fqs(first_gp_fqs);
- first_gp_fqs = false;
+ gf = 0;
+ if (first_gp_fqs) {
+ first_gp_fqs = false;
+ gf = rcu_state.cbovld ? RCU_GP_FLAG_OVLD : 0;
+ }
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
TPS("fqsend"));
cond_resched_tasks_rcu_qs();
@@ -1763,6 +1800,7 @@ static void rcu_gp_fqs_loop(void)
j = 1;
else
j = rcu_state.jiffies_force_qs - j;
+ gf = 0;
}
}
}
@@ -1839,6 +1877,7 @@ static void rcu_gp_cleanup(void)
/* Declare grace period done, trace first to use old GP number. */
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
rcu_seq_end(&rcu_state.gp_seq);
+ ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
rcu_state.gp_state = RCU_GP_IDLE;
/* Check for GP requests since above loop. */
rdp = this_cpu_ptr(&rcu_data);
@@ -1879,6 +1918,7 @@ static int __noreturn rcu_gp_kthread(void *unused)
swait_event_idle_exclusive(rcu_state.gp_wq,
READ_ONCE(rcu_state.gp_flags) &
RCU_GP_FLAG_INIT);
+ rcu_gp_torture_wait();
rcu_state.gp_state = RCU_GP_DONE_GPS;
/* Locking provides needed memory barrier. */
if (rcu_gp_init())
@@ -2869,6 +2909,8 @@ struct kfree_rcu_cpu {
struct delayed_work monitor_work;
bool monitor_todo;
bool initialized;
+ // Number of objects for which GP not started
+ int count;
};
static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
@@ -2982,6 +3024,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
krcp->head = NULL;
}
+ WRITE_ONCE(krcp->count, 0);
+
/*
* One work is per one batch, so there are two "free channels",
* "bhead_free" and "head_free" the batch can handle. It can be
@@ -3118,6 +3162,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
krcp->head = head;
}
+ WRITE_ONCE(krcp->count, krcp->count + 1);
+
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
!krcp->monitor_todo) {
@@ -3132,6 +3178,56 @@ unlock_return:
}
EXPORT_SYMBOL_GPL(kfree_call_rcu);
+static unsigned long
+kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu;
+ unsigned long count = 0;
+
+ /* Snapshot count of all CPUs */
+ for_each_online_cpu(cpu) {
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ count += READ_ONCE(krcp->count);
+ }
+
+ return count;
+}
+
+static unsigned long
+kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu, freed = 0;
+ unsigned long flags;
+
+ for_each_online_cpu(cpu) {
+ int count;
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ count = krcp->count;
+ spin_lock_irqsave(&krcp->lock, flags);
+ if (krcp->monitor_todo)
+ kfree_rcu_drain_unlock(krcp, flags);
+ else
+ spin_unlock_irqrestore(&krcp->lock, flags);
+
+ sc->nr_to_scan -= count;
+ freed += count;
+
+ if (sc->nr_to_scan <= 0)
+ break;
+ }
+
+ return freed;
+}
+
+static struct shrinker kfree_rcu_shrinker = {
+ .count_objects = kfree_rcu_shrink_count,
+ .scan_objects = kfree_rcu_shrink_scan,
+ .batch = 0,
+ .seeks = DEFAULT_SEEKS,
+};
+
void __init kfree_rcu_scheduler_running(void)
{
int cpu;
@@ -3657,6 +3753,7 @@ void rcu_cpu_starting(unsigned int cpu)
nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
/* Allow lockless access for expedited grace periods. */
smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + nbits); /* ^^^ */
+ ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus);
rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
@@ -4052,6 +4149,8 @@ static void __init kfree_rcu_batch_init(void)
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
krcp->initialized = true;
}
+ if (register_shrinker(&kfree_rcu_shrinker))
+ pr_err("Failed to register kfree_rcu() shrinker!\n");
}
void __init rcu_init(void)
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 29ba79989802..43991a40b084 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -359,6 +359,7 @@ struct rcu_state {
/* Values for rcu_state structure's gp_flags field. */
#define RCU_GP_FLAG_INIT 0x1 /* Need grace-period initialization. */
#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
+#define RCU_GP_FLAG_OVLD 0x4 /* Experiencing callback overload. */
/* Values for rcu_state structure's gp_state field. */
#define RCU_GP_IDLE 0 /* Initial state and no GP in progress. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 0e5ccb330f9d..72952edad1e4 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -150,7 +150,7 @@ static void __maybe_unused sync_exp_reset_tree(void)
static bool sync_rcu_exp_done(struct rcu_node *rnp)
{
raw_lockdep_assert_held_rcu_node(rnp);
- return rnp->exp_tasks == NULL &&
+ return READ_ONCE(rnp->exp_tasks) == NULL &&
READ_ONCE(rnp->expmask) == 0;
}
@@ -373,7 +373,7 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
* until such time as the ->expmask bits are cleared.
*/
if (rcu_preempt_has_tasks(rnp))
- rnp->exp_tasks = rnp->blkd_tasks.next;
+ WRITE_ONCE(rnp->exp_tasks, rnp->blkd_tasks.next);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* IPI the remaining CPUs for expedited quiescent state. */
@@ -542,8 +542,8 @@ static void synchronize_rcu_expedited_wait(void)
}
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
jiffies - jiffies_start, rcu_state.expedited_sequence,
- READ_ONCE(rnp_root->expmask),
- ".T"[!!rnp_root->exp_tasks]);
+ data_race(rnp_root->expmask),
+ ".T"[!!data_race(rnp_root->exp_tasks)]);
if (ndetected) {
pr_err("blocking rcu_node structures:");
rcu_for_each_node_breadth_first(rnp) {
@@ -553,8 +553,8 @@ static void synchronize_rcu_expedited_wait(void)
continue;
pr_cont(" l=%u:%d-%d:%#lx/%c",
rnp->level, rnp->grplo, rnp->grphi,
- READ_ONCE(rnp->expmask),
- ".T"[!!rnp->exp_tasks]);
+ data_race(rnp->expmask),
+ ".T"[!!data_race(rnp->exp_tasks)]);
}
pr_cont("\n");
}
@@ -700,17 +700,20 @@ static void sync_sched_exp_online_cleanup(int cpu)
*/
static int rcu_print_task_exp_stall(struct rcu_node *rnp)
{
- struct task_struct *t;
+ unsigned long flags;
int ndetected = 0;
+ struct task_struct *t;
- if (!rnp->exp_tasks)
+ if (!READ_ONCE(rnp->exp_tasks))
return 0;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
pr_cont(" P%d", t->pid);
ndetected++;
}
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return ndetected;
}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 4cef7e3bca69..50caa3fcbad2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -226,7 +226,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
}
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
- rnp->exp_tasks = &t->rcu_node_entry;
+ WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
!(rnp->qsmask & rdp->grpmask));
WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
@@ -493,12 +493,12 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
if (&t->rcu_node_entry == rnp->gp_tasks)
WRITE_ONCE(rnp->gp_tasks, np);
if (&t->rcu_node_entry == rnp->exp_tasks)
- rnp->exp_tasks = np;
+ WRITE_ONCE(rnp->exp_tasks, np);
if (IS_ENABLED(CONFIG_RCU_BOOST)) {
/* Snapshot ->boost_mtx ownership w/rnp->lock held. */
drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
if (&t->rcu_node_entry == rnp->boost_tasks)
- rnp->boost_tasks = np;
+ WRITE_ONCE(rnp->boost_tasks, np);
}
/*
@@ -746,8 +746,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
- __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,
- rnp->exp_tasks);
+ __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
+ READ_ONCE(rnp->exp_tasks));
pr_info("%s: ->blkd_tasks", __func__);
i = 0;
list_for_each(lhp, &rnp->blkd_tasks) {
@@ -1021,7 +1021,8 @@ static int rcu_boost_kthread(void *arg)
for (;;) {
WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
- rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
+ rcu_wait(READ_ONCE(rnp->boost_tasks) ||
+ READ_ONCE(rnp->exp_tasks));
trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
more2boost = rcu_boost(rnp);
@@ -1064,9 +1065,9 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
(rnp->gp_tasks != NULL &&
rnp->boost_tasks == NULL &&
rnp->qsmask == 0 &&
- (ULONG_CMP_GE(jiffies, rnp->boost_time) || rcu_state.cbovld))) {
+ (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
if (rnp->exp_tasks == NULL)
- rnp->boost_tasks = rnp->gp_tasks;
+ WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcu_wake_cond(rnp->boost_kthread_task,
READ_ONCE(rnp->boost_kthread_status));
@@ -2521,7 +2522,7 @@ static bool rcu_nohz_full_cpu(void)
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_cpu(smp_processor_id()) &&
(!rcu_gp_in_progress() ||
- ULONG_CMP_LT(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
+ time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
return true;
#endif /* #ifdef CONFIG_NO_HZ_FULL */
return false;
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index e1c68a74574f..ae76bd329582 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -15,10 +15,12 @@
int sysctl_panic_on_rcu_stall __read_mostly;
#ifdef CONFIG_PROVE_RCU
-#define RCU_STALL_DELAY_DELTA (5 * HZ)
+#define RCU_STALL_DELAY_DELTA (5 * HZ)
#else
-#define RCU_STALL_DELAY_DELTA 0
+#define RCU_STALL_DELAY_DELTA 0
#endif
+#define RCU_STALL_MIGHT_DIV 8
+#define RCU_STALL_MIGHT_MIN (2 * HZ)
/* Limit-check stall timeouts specified at boottime and runtime. */
int rcu_jiffies_till_stall_check(void)
@@ -40,6 +42,36 @@ int rcu_jiffies_till_stall_check(void)
}
EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
+/**
+ * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled?
+ *
+ * Returns @true if the current grace period is sufficiently old that
+ * it is reasonable to assume that it might be stalled. This can be
+ * useful when deciding whether to allocate memory to enable RCU-mediated
+ * freeing on the one hand or just invoking synchronize_rcu() on the other.
+ * The latter is preferable when the grace period is stalled.
+ *
+ * Note that sampling of the .gp_start and .gp_seq fields must be done
+ * carefully to avoid false positives at the beginnings and ends of
+ * grace periods.
+ */
+bool rcu_gp_might_be_stalled(void)
+{
+ unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV;
+ unsigned long j = jiffies;
+
+ if (d < RCU_STALL_MIGHT_MIN)
+ d = RCU_STALL_MIGHT_MIN;
+ smp_mb(); // jiffies before .gp_seq to avoid false positives.
+ if (!rcu_gp_in_progress())
+ return false;
+ // Long delays at this point avoids false positive, but a delay
+ // of ULONG_MAX/4 jiffies voids your no-false-positive warranty.
+ smp_mb(); // .gp_seq before second .gp_start
+ // And ditto here.
+ return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
+}
+
/* Don't do RCU CPU stall warnings during long sysrq printouts. */
void rcu_sysrq_start(void)
{
@@ -104,8 +136,8 @@ static void record_gp_stall_check_time(void)
WR