summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-03 15:40:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-03 15:40:38 -0800
commit281422869942c19f05a08d4017c633d08d390938 (patch)
tree78e1fb9155e3e82697f96ce04e7b40e8f7c5147c
parentf5a8160c1e055c0fd8d16a5b3ac97c638365b0db (diff)
parentb33e18f61bd18227a456016a77b1a968f5bc1d65 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU changes from Ingo Molnar: "The main changes in this cycle were: - Improvements to expedited grace periods (Paul E McKenney) - Performance improvements to and locktorture tests for percpu-rwsem (Oleg Nesterov, Paul E McKenney) - Torture-test changes (Paul E McKenney, Davidlohr Bueso) - Documentation updates (Paul E McKenney) - Miscellaneous fixes (Paul E McKenney, Boqun Feng, Oleg Nesterov, Patrick Marlier)" * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (54 commits) fs/writeback, rcu: Don't use list_entry_rcu() for pointer offsetting in bdi_split_work_to_wbs() rcu: Better hotplug handling for synchronize_sched_expedited() rcu: Enable stall warnings for synchronize_rcu_expedited() rcu: Add tasks to expedited stall-warning messages rcu: Add online/offline info to expedited stall warning message rcu: Consolidate expedited CPU selection rcu: Prepare for consolidating expedited CPU selection cpu: Remove try_get_online_cpus() rcu: Stop excluding CPU hotplug in synchronize_sched_expedited() rcu: Stop silencing lockdep false positive for expedited grace periods rcu: Switch synchronize_sched_expedited() to IPI locktorture: Fix module unwind when bad torture_type specified torture: Forgive non-plural arguments rcutorture: Fix unused-function warning for torturing_tasks() rcutorture: Fix module unwind when bad torture_type specified rcu_sync: Cleanup the CONFIG_PROVE_RCU checks locking/percpu-rwsem: Clean up the lockdep annotations in percpu_down_read() locking/percpu-rwsem: Fix the comments outdated by rcu_sync locking/percpu-rwsem: Make use of the rcu_sync infrastructure locking/percpu-rwsem: Make percpu_free_rwsem() after kzalloc() safe ...
-rw-r--r--Documentation/RCU/stallwarn.txt7
-rw-r--r--Documentation/RCU/torture.txt39
-rw-r--r--Documentation/RCU/trace.txt32
-rw-r--r--Documentation/RCU/whatisRCU.txt6
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--Documentation/locking/locktorture.txt3
-rw-r--r--Documentation/memory-barriers.txt12
-rw-r--r--fs/fs-writeback.c4
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/list.h5
-rw-r--r--include/linux/list_bl.h5
-rw-r--r--include/linux/list_nulls.h3
-rw-r--r--include/linux/percpu-rwsem.h3
-rw-r--r--include/linux/rcu_sync.h86
-rw-r--r--include/linux/rculist.h5
-rw-r--r--include/linux/rcupdate.h59
-rw-r--r--include/linux/rcutiny.h3
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/srcu.h5
-rw-r--r--kernel/cpu.c13
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/locking/locktorture.c164
-rw-r--r--kernel/locking/percpu-rwsem.c90
-rw-r--r--kernel/rcu/Makefile2
-rw-r--r--kernel/rcu/rcutorture.c16
-rw-r--r--kernel/rcu/srcu.c4
-rw-r--r--kernel/rcu/sync.c223
-rw-r--r--kernel/rcu/tiny.c8
-rw-r--r--kernel/rcu/tree.c512
-rw-r--r--kernel/rcu/tree.h69
-rw-r--r--kernel/rcu/tree_plugin.h437
-rw-r--r--kernel/rcu/tree_trace.c10
-rw-r--r--kernel/rcu/update.c2
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/torture.c1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh6
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST4
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK056
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK066
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot1
42 files changed, 1333 insertions, 545 deletions
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index efb9454875ab..0f7fb4298e7e 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -205,6 +205,13 @@ o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
behavior, you might need to replace some of the cond_resched()
calls with calls to cond_resched_rcu_qs().
+o Booting Linux using a console connection that is too slow to
+ keep up with the boot-time console-message rate. For example,
+ a 115Kbaud serial console can be -way- too slow to keep up
+ with boot-time message rates, and will frequently result in
+ RCU CPU stall warning messages. Especially if you have added
+ debug printk()s.
+
o Anything that prevents RCU's grace-period kthreads from running.
This can result in the "All QSes seen" console-log message.
This message will include information on when the kthread last
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index dac02a6219b1..118e7c176ce7 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -166,40 +166,27 @@ test_no_idle_hz Whether or not to test the ability of RCU to operate in
torture_type The type of RCU to test, with string values as follows:
- "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu().
-
- "rcu_sync": rcu_read_lock(), rcu_read_unlock(), and
- synchronize_rcu().
-
- "rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and
- synchronize_rcu_expedited().
+ "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu(),
+ along with expedited, synchronous, and polling
+ variants.
"rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and
- call_rcu_bh().
-
- "rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(),
- and synchronize_rcu_bh().
+ call_rcu_bh(), along with expedited and synchronous
+ variants.
- "rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(),
- and synchronize_rcu_bh_expedited().
+ "rcu_busted": This tests an intentionally incorrect version
+ of RCU in order to help test rcutorture itself.
"srcu": srcu_read_lock(), srcu_read_unlock() and
- call_srcu().
-
- "srcu_sync": srcu_read_lock(), srcu_read_unlock() and
- synchronize_srcu().
-
- "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
- synchronize_srcu_expedited().
+ call_srcu(), along with expedited and
+ synchronous variants.
"sched": preempt_disable(), preempt_enable(), and
- call_rcu_sched().
-
- "sched_sync": preempt_disable(), preempt_enable(), and
- synchronize_sched().
+ call_rcu_sched(), along with expedited,
+ synchronous, and polling variants.
- "sched_expedited": preempt_disable(), preempt_enable(), and
- synchronize_sched_expedited().
+ "tasks": voluntary context switch and call_rcu_tasks(),
+ along with expedited and synchronous variants.
Defaults to "rcu".
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 97f17e9decda..ec6998b1b6d0 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -56,14 +56,14 @@ rcuboost:
The output of "cat rcu/rcu_preempt/rcudata" looks as follows:
- 0!c=30455 g=30456 pq=1/0 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716
- 1!c=30719 g=30720 pq=1/0 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982
- 2!c=30150 g=30151 pq=1/1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458
- 3 c=31249 g=31250 pq=1/1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622
- 4!c=29502 g=29503 pq=1/0 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521
- 5 c=31201 g=31202 pq=1/0 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698
- 6!c=30253 g=30254 pq=1/0 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353
- 7 c=31178 g=31178 pq=1/0 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969
+ 0!c=30455 g=30456 cnq=1/0:1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716
+ 1!c=30719 g=30720 cnq=1/0:0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982
+ 2!c=30150 g=30151 cnq=1/1:1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458
+ 3 c=31249 g=31250 cnq=1/1:0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622
+ 4!c=29502 g=29503 cnq=1/0:1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521
+ 5 c=31201 g=31202 cnq=1/0:1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698
+ 6!c=30253 g=30254 cnq=1/0:1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353
+ 7 c=31178 g=31178 cnq=1/0:0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969
This file has one line per CPU, or eight for this 8-CPU system.
The fields are as follows:
@@ -188,14 +188,14 @@ o "ca" is the number of RCU callbacks that have been adopted by this
Kernels compiled with CONFIG_RCU_BOOST=y display the following from
/debug/rcu/rcu_preempt/rcudata:
- 0!c=12865 g=12866 pq=1/0 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871
- 1 c=14407 g=14408 pq=1/0 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485
- 2 c=14407 g=14408 pq=1/0 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490
- 3 c=14407 g=14408 pq=1/0 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290
- 4 c=14405 g=14406 pq=1/0 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114
- 5!c=14168 g=14169 pq=1/0 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722
- 6 c=14404 g=14405 pq=1/0 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811
- 7 c=14407 g=14408 pq=1/0 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042
+ 0!c=12865 g=12866 cnq=1/0:1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871
+ 1 c=14407 g=14408 cnq=1/0:0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485
+ 2 c=14407 g=14408 cnq=1/0:0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490
+ 3 c=14407 g=14408 cnq=1/0:0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290
+ 4 c=14405 g=14406 cnq=1/0:1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114
+ 5!c=14168 g=14169 cnq=1/0:0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722
+ 6 c=14404 g=14405 cnq=1/0:0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811
+ 7 c=14407 g=14408 cnq=1/0:1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042
This is similar to the output discussed above, but contains the following
additional fields:
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index adc2184009c5..dc49c6712b17 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -364,7 +364,7 @@ uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt.
};
DEFINE_SPINLOCK(foo_mutex);
- struct foo *gbl_foo;
+ struct foo __rcu *gbl_foo;
/*
* Create a new struct foo that is the same as the one currently
@@ -386,7 +386,7 @@ uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt.
new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
spin_lock(&foo_mutex);
- old_fp = gbl_foo;
+ old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
*new_fp = *old_fp;
new_fp->a = new_a;
rcu_assign_pointer(gbl_foo, new_fp);
@@ -487,7 +487,7 @@ The foo_update_a() function might then be written as follows:
new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
spin_lock(&foo_mutex);
- old_fp = gbl_foo;
+ old_fp = rcu_dereference_protected(gbl_foo, lockdep_is_held(&foo_mutex));
*new_fp = *old_fp;
new_fp->a = new_a;
rcu_assign_pointer(gbl_foo, new_fp);
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 50fc09b623f6..046832ef14ce 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3089,9 +3089,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
cache-to-cache transfer latencies.
rcutree.rcu_fanout_leaf= [KNL]
- Increase the number of CPUs assigned to each
- leaf rcu_node structure. Useful for very large
- systems.
+ Change the number of CPUs assigned to each
+ leaf rcu_node structure. Useful for very
+ large systems, which will choose the value 64,
+ and for NUMA systems with large remote-access
+ latencies, which will choose a value aligned
+ with the appropriate hardware boundaries.
rcutree.jiffies_till_sched_qs= [KNL]
Set required age in jiffies for a
diff --git a/Documentation/locking/locktorture.txt b/Documentation/locking/locktorture.txt
index 619f2bb136a5..a2ef3a929bf1 100644
--- a/Documentation/locking/locktorture.txt
+++ b/Documentation/locking/locktorture.txt
@@ -52,6 +52,9 @@ torture_type Type of lock to torture. By default, only spinlocks will
o "mutex_lock": mutex_lock() and mutex_unlock() pairs.
+ o "rtmutex_lock": rtmutex_lock() and rtmutex_unlock()
+ pairs. Kernel must have CONFIG_RT_MUTEX=y.
+
o "rwsem_lock": read/write down() and up() semaphore pairs.
torture_runnable Start locktorture at boot time in the case where the
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 2ba8461b0631..8e7cf9ad3db1 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1710,6 +1710,17 @@ There are some more advanced barrier functions:
operations" subsection for information on where to use these.
+ (*) lockless_dereference();
+ This can be thought of as a pointer-fetch wrapper around the
+ smp_read_barrier_depends() data-dependency barrier.
+
+ This is also similar to rcu_dereference(), but in cases where
+ object lifetime is handled by some mechanism other than RCU, for
+ example, when the objects removed only when the system goes down.
+ In addition, lockless_dereference() is used in some data structures
+ that can be used both with and without RCU.
+
+
(*) dma_wmb();
(*) dma_rmb();
@@ -1789,7 +1800,6 @@ The Linux kernel has a number of locking constructs:
(*) mutexes
(*) semaphores
(*) R/W semaphores
- (*) RCU
In all cases there are variants on "ACQUIRE" operations and "RELEASE" operations
for each construct. These operations all imply certain barriers:
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 29e4599f6fc1..7378169e90be 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -779,8 +779,8 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
bool skip_if_busy)
{
struct bdi_writeback *last_wb = NULL;
- struct bdi_writeback *wb = list_entry_rcu(&bdi->wb_list,
- struct bdi_writeback, bdi_node);
+ struct bdi_writeback *wb = list_entry(&bdi->wb_list,
+ struct bdi_writeback, bdi_node);
might_sleep();
restart:
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 23c30bdcca86..d2ca8c38f9c4 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,7 +228,6 @@ extern struct bus_type cpu_subsys;
extern void cpu_hotplug_begin(void);
extern void cpu_hotplug_done(void);
extern void get_online_cpus(void);
-extern bool try_get_online_cpus(void);
extern void put_online_cpus(void);
extern void cpu_hotplug_disable(void);
extern void cpu_hotplug_enable(void);
@@ -246,7 +245,6 @@ int cpu_down(unsigned int cpu);
static inline void cpu_hotplug_begin(void) {}
static inline void cpu_hotplug_done(void) {}
#define get_online_cpus() do { } while (0)
-#define try_get_online_cpus() true
#define put_online_cpus() do { } while (0)
#define cpu_hotplug_disable() do { } while (0)
#define cpu_hotplug_enable() do { } while (0)
diff --git a/include/linux/list.h b/include/linux/list.h
index 3e3e64a61002..993395a2e55c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -87,7 +87,7 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
static inline void __list_del(struct list_head * prev, struct list_head * next)
{
next->prev = prev;
- prev->next = next;
+ WRITE_ONCE(prev->next, next);
}
/**
@@ -615,7 +615,8 @@ static inline void __hlist_del(struct hlist_node *n)
{
struct hlist_node *next = n->next;
struct hlist_node **pprev = n->pprev;
- *pprev = next;
+
+ WRITE_ONCE(*pprev, next);
if (next)
next->pprev = pprev;
}
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 2eb88556c5c5..8132214e8efd 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -93,9 +93,10 @@ static inline void __hlist_bl_del(struct hlist_bl_node *n)
LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
/* pprev may be `first`, so be careful not to lose the lock bit */
- *pprev = (struct hlist_bl_node *)
+ WRITE_ONCE(*pprev,
+ (struct hlist_bl_node *)
((unsigned long)next |
- ((unsigned long)*pprev & LIST_BL_LOCKMASK));
+ ((unsigned long)*pprev & LIST_BL_LOCKMASK)));
if (next)
next->pprev = pprev;
}
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index f266661d2666..444d2b1313bd 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -76,7 +76,8 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
{
struct hlist_nulls_node *next = n->next;
struct hlist_nulls_node **pprev = n->pprev;
- *pprev = next;
+
+ WRITE_ONCE(*pprev, next);
if (!is_a_nulls(next))
next->pprev = pprev;
}
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 834c4e52cb2d..c2fa3ecb0dce 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -5,11 +5,12 @@
#include <linux/rwsem.h>
#include <linux/percpu.h>
#include <linux/wait.h>
+#include <linux/rcu_sync.h>
#include <linux/lockdep.h>
struct percpu_rw_semaphore {
+ struct rcu_sync rss;
unsigned int __percpu *fast_read_ctr;
- atomic_t write_ctr;
struct rw_semaphore rw_sem;
atomic_t slow_read_ctr;
wait_queue_head_t write_waitq;
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
new file mode 100644
index 000000000000..a63a33e6196e
--- /dev/null
+++ b/include/linux/rcu_sync.h
@@ -0,0 +1,86 @@
+/*
+ * RCU-based infrastructure for lightweight reader-writer locking
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (c) 2015, Red Hat, Inc.
+ *
+ * Author: Oleg Nesterov <oleg@redhat.com>
+ */
+
+#ifndef _LINUX_RCU_SYNC_H_
+#define _LINUX_RCU_SYNC_H_
+
+#include <linux/wait.h>
+#include <linux/rcupdate.h>
+
+enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
+
+/* Structure to mediate between updaters and fastpath-using readers. */
+struct rcu_sync {
+ int gp_state;
+ int gp_count;
+ wait_queue_head_t gp_wait;
+
+ int cb_state;
+ struct rcu_head cb_head;
+
+ enum rcu_sync_type gp_type;
+};
+
+extern void rcu_sync_lockdep_assert(struct rcu_sync *);
+
+/**
+ * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * Returns true if readers are permitted to use their fastpaths.
+ * Must be invoked within an RCU read-side critical section whose
+ * flavor matches that of the rcu_sync struture.
+ */
+static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
+{
+#ifdef CONFIG_PROVE_RCU
+ rcu_sync_lockdep_assert(rsp);
+#endif
+ return !rsp->gp_state; /* GP_IDLE */
+}
+
+extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_enter(struct rcu_sync *);
+extern void rcu_sync_exit(struct rcu_sync *);
+extern void rcu_sync_dtor(struct rcu_sync *);
+
+#define __RCU_SYNC_INITIALIZER(name, type) { \
+ .gp_state = 0, \
+ .gp_count = 0, \
+ .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
+ .cb_state = 0, \
+ .gp_type = type, \
+ }
+
+#define __DEFINE_RCU_SYNC(name, type) \
+ struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
+
+#define DEFINE_RCU_SYNC(name) \
+ __DEFINE_RCU_SYNC(name, RCU_SYNC)
+
+#define DEFINE_RCU_SCHED_SYNC(name) \
+ __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
+
+#define DEFINE_RCU_BH_SYNC(name) \
+ __DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
+
+#endif /* _LINUX_RCU_SYNC_H_ */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 17c6b1f84a77..5ed540986019 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -247,10 +247,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
#define list_entry_rcu(ptr, type, member) \
-({ \
- typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
- container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
-})
+ container_of(lockless_dereference(ptr), type, member)
/**
* Where are list_empty_rcu() and list_first_entry_rcu()?
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 581abf848566..a0189ba67fde 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -160,7 +160,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
* more than one CPU).
*/
void call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *head));
+ rcu_callback_t func);
#else /* #ifdef CONFIG_PREEMPT_RCU */
@@ -191,7 +191,7 @@ void call_rcu(struct rcu_head *head,
* memory ordering guarantees.
*/
void call_rcu_bh(struct rcu_head *head,
- void (*func)(struct rcu_head *head));
+ rcu_callback_t func);
/**
* call_rcu_sched() - Queue an RCU for invocation after sched grace period.
@@ -213,7 +213,7 @@ void call_rcu_bh(struct rcu_head *head,
* memory ordering guarantees.
*/
void call_rcu_sched(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu));
+ rcu_callback_t func);
void synchronize_sched(void);
@@ -274,7 +274,7 @@ do { \
* See the description of call_rcu() for more detailed information on
* memory ordering guarantees.
*/
-void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
+void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
void synchronize_rcu_tasks(void);
void rcu_barrier_tasks(void);
@@ -297,12 +297,14 @@ void synchronize_rcu(void);
static inline void __rcu_read_lock(void)
{
- preempt_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
+ preempt_disable();
}
static inline void __rcu_read_unlock(void)
{
- preempt_enable();
+ if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
+ preempt_enable();
}
static inline void synchronize_rcu(void)
@@ -535,29 +537,9 @@ static inline int rcu_read_lock_sched_held(void)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-/* Deprecate rcu_lockdep_assert(): Use RCU_LOCKDEP_WARN() instead. */
-static inline void __attribute((deprecated)) deprecate_rcu_lockdep_assert(void)
-{
-}
-
#ifdef CONFIG_PROVE_RCU
/**
- * rcu_lockdep_assert - emit lockdep splat if specified condition not met
- * @c: condition to check
- * @s: informative message
- */
-#define rcu_lockdep_assert(c, s) \
- do { \
- static bool __section(.data.unlikely) __warned; \
- deprecate_rcu_lockdep_assert(); \
- if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
- __warned = true; \
- lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
- } \
- } while (0)
-
-/**
* RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
* @c: condition to check
* @s: informative message
@@ -594,7 +576,6 @@ static inline void rcu_preempt_sleep_check(void)
#else /* #ifdef CONFIG_PROVE_RCU */
-#define rcu_lockdep_assert(c, s) deprecate_rcu_lockdep_assert()
#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
#define rcu_sleep_check() do { } while (0)
@@ -811,6 +792,28 @@ static inline void rcu_preempt_sleep_check(void)
#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
/**
+ * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism
+ * @p: The pointer to hand off
+ *
+ * This is simply an identity function, but it documents where a pointer
+ * is handed off from RCU to some other synchronization mechanism, for
+ * example, reference counting or locking. In C11, it would map to
+ * kill_dependency(). It could be used as follows:
+ *
+ * rcu_read_lock();
+ * p = rcu_dereference(gp);
+ * long_lived = is_long_lived(p);
+ * if (long_lived) {
+ * if (!atomic_inc_not_zero(p->refcnt))
+ * long_lived = false;
+ * else
+ * p = rcu_pointer_handoff(p);
+ * }
+ * rcu_read_unlock();
+ */
+#define rcu_pointer_handoff(p) (p)
+
+/**
* rcu_read_lock() - mark the beginning of an RCU read-side critical section
*
* When synchronize_rcu() is invoked on one CPU while other CPUs
@@ -1065,7 +1068,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define __kfree_rcu(head, offset) \
do { \
BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
- kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
+ kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
} while (0)
/**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ff968b7af3a4..4c1aaf9cce7b 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -83,7 +83,7 @@ static inline void synchronize_sched_expedited(void)
}
static inline void kfree_call_rcu(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu))
+ rcu_callback_t func)
{
call_rcu(head, func);
}
@@ -216,6 +216,7 @@ static inline bool rcu_is_watching(void)
static inline void rcu_all_qs(void)
{
+ barrier(); /* Avoid RCU read-side critical sections leaking across. */
}
#endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 5abec82f325e..60d15a080d7c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -48,7 +48,7 @@ void synchronize_rcu_bh(void);
void synchronize_sched_expedited(void);
void synchronize_rcu_expedited(void);
-void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
/**
* synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f87559df5b75..56667292d1e4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1345,10 +1345,12 @@ struct sched_dl_entity {
union rcu_special {
struct {
- bool blocked;
- bool need_qs;
- } b;
- short s;
+ u8 blocked;
+ u8 need_qs;
+ u8 exp_need_qs;
+ u8 pad; /* Otherwise the compiler can store garbage here. */
+ } b; /* Bits. */
+ u32 s; /* Set of bits. */
};
struct rcu_node;