summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-03-30 16:17:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-03-30 16:17:15 -0700
commit4b9fd8a829a1eec7442e38afff21d610604de56a (patch)
tree32757737b7eecbe8c5be7606d8dcec883d39f1e6 /kernel
parenta776c270a0b2fad6715cb714187e4290cadb9237 (diff)
parentf1e67e355c2aafeddf1eac31335709236996d2fe (diff)
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "The main changes in this cycle were: - Continued user-access cleanups in the futex code. - percpu-rwsem rewrite that uses its own waitqueue and atomic_t instead of an embedded rwsem. This addresses a couple of weaknesses, but the primary motivation was complications on the -rt kernel. - Introduce raw lock nesting detection on lockdep (CONFIG_PROVE_RAW_LOCK_NESTING=y), document the raw_lock vs. normal lock differences. This too originates from -rt. - Reuse lockdep zapped chain_hlocks entries, to conserve RAM footprint on distro-ish kernels running into the "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" depletion of the lockdep chain-entries pool. - Misc cleanups, smaller fixes and enhancements - see the changelog for details" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (55 commits) fs/buffer: Make BH_Uptodate_Lock bit_spin_lock a regular spinlock_t thermal/x86_pkg_temp: Make pkg_temp_lock a raw_spinlock_t Documentation/locking/locktypes: Minor copy editor fixes Documentation/locking/locktypes: Further clarifications and wordsmithing m68knommu: Remove mm.h include from uaccess_no.h x86: get rid of user_atomic_cmpxchg_inatomic() generic arch_futex_atomic_op_inuser() doesn't need access_ok() x86: don't reload after cmpxchg in unsafe_atomic_op2() loop x86: convert arch_futex_atomic_op_inuser() to user_access_begin/user_access_end() objtool: whitelist __sanitizer_cov_trace_switch() [parisc, s390, sparc64] no need for access_ok() in futex handling sh: no need of access_ok() in arch_futex_atomic_op_inuser() futex: arch_futex_atomic_op_inuser() calling conventions change completion: Use lockdep_assert_RT_in_threaded_ctx() in complete_all() lockdep: Add posixtimer context tracing bits lockdep: Annotate irq_work lockdep: Add hrtimer context tracing bits lockdep: Introduce wait-type checks completion: Use simple wait queues sched/swait: Prepare usage in completions ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c4
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/futex.c107
-rw-r--r--kernel/irq/handle.c7
-rw-r--r--kernel/irq_work.c2
-rw-r--r--kernel/locking/lockdep.c674
-rw-r--r--kernel/locking/lockdep_internals.h14
-rw-r--r--kernel/locking/lockdep_proc.c31
-rw-r--r--kernel/locking/mutex-debug.c2
-rw-r--r--kernel/locking/percpu-rwsem.c194
-rw-r--r--kernel/locking/rwsem.c9
-rw-r--r--kernel/locking/rwsem.h10
-rw-r--r--kernel/locking/spinlock_debug.c6
-rw-r--r--kernel/rcu/tree.c1
-rw-r--r--kernel/rcu/update.c24
-rw-r--r--kernel/sched/completion.c36
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/sched/swait.c15
-rw-r--r--kernel/time/hrtimer.c6
-rw-r--r--kernel/time/jiffies.c7
-rw-r--r--kernel/time/posix-cpu-timers.c6
-rw-r--r--kernel/time/tick-common.c10
-rw-r--r--kernel/time/tick-sched.c20
-rw-r--r--kernel/time/timekeeping.c6
-rw-r--r--kernel/time/timekeeping.h3
25 files changed, 837 insertions, 361 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9c706af713fb..221bf6a9e98a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void)
static void lockdep_acquire_cpus_lock(void)
{
- rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
}
static void lockdep_release_cpus_lock(void)
{
- rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, _THIS_IP_);
+ rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
}
/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 0b81b26a872a..764960fabfa1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -258,6 +258,7 @@ void rcuwait_wake_up(struct rcuwait *w)
wake_up_process(task);
rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(rcuwait_wake_up);
/*
* Determine if a process group is "orphaned", according to the POSIX
diff --git a/kernel/futex.c b/kernel/futex.c
index 82dfacb3250e..b59532862bc0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -135,8 +135,7 @@
*
* Where (A) orders the waiters increment and the futex value read through
* atomic operations (see hb_waiters_inc) and where (B) orders the write
- * to futex and the waiters read -- this is done by the barriers for both
- * shared and private futexes in get_futex_key_refs().
+ * to futex and the waiters read (see hb_waiters_pending()).
*
* This yields the following case (where X:=waiters, Y:=futex):
*
@@ -331,17 +330,6 @@ static void compat_exit_robust_list(struct task_struct *curr);
static inline void compat_exit_robust_list(struct task_struct *curr) { }
#endif
-static inline void futex_get_mm(union futex_key *key)
-{
- mmgrab(key->private.mm);
- /*
- * Ensure futex_get_mm() implies a full barrier such that
- * get_futex_key() implies a full barrier. This is relied upon
- * as smp_mb(); (B), see the ordering comment above.
- */
- smp_mb__after_atomic();
-}
-
/*
* Reflects a new waiter being added to the waitqueue.
*/
@@ -370,6 +358,10 @@ static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
{
#ifdef CONFIG_SMP
+ /*
+ * Full barrier (B), see the ordering comment above.
+ */
+ smp_mb();
return atomic_read(&hb->waiters);
#else
return 1;
@@ -407,69 +399,6 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
&& key1->both.offset == key2->both.offset);
}
-/*
- * Take a reference to the resource addressed by a key.
- * Can be called while holding spinlocks.
- *
- */
-static void get_futex_key_refs(union futex_key *key)
-{
- if (!key->both.ptr)
- return;
-
- /*
- * On MMU less systems futexes are always "private" as there is no per
- * process address space. We need the smp wmb nevertheless - yes,
- * arch/blackfin has MMU less SMP ...
- */
- if (!IS_ENABLED(CONFIG_MMU)) {
- smp_mb(); /* explicit smp_mb(); (B) */
- return;
- }
-
- switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
- case FUT_OFF_INODE:
- smp_mb(); /* explicit smp_mb(); (B) */
- break;
- case FUT_OFF_MMSHARED:
- futex_get_mm(key); /* implies smp_mb(); (B) */
- break;
- default:
- /*
- * Private futexes do not hold reference on an inode or
- * mm, therefore the only purpose of calling get_futex_key_refs
- * is because we need the barrier for the lockless waiter check.
- */
- smp_mb(); /* explicit smp_mb(); (B) */
- }
-}
-
-/*
- * Drop a reference to the resource addressed by a key.
- * The hash bucket spinlock must not be held. This is
- * a no-op for private futexes, see comment in the get
- * counterpart.
- */
-static void drop_futex_key_refs(union futex_key *key)
-{
- if (!key->both.ptr) {
- /* If we're here then we tried to put a key we failed to get */
- WARN_ON_ONCE(1);
- return;
- }
-
- if (!IS_ENABLED(CONFIG_MMU))
- return;
-
- switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
- case FUT_OFF_INODE:
- break;
- case FUT_OFF_MMSHARED:
- mmdrop(key->private.mm);
- break;
- }
-}
-
enum futex_access {
FUTEX_READ,
FUTEX_WRITE
@@ -601,7 +530,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
if (!fshared) {
key->private.mm = mm;
key->private.address = address;
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
return 0;
}
@@ -741,8 +669,6 @@ again:
rcu_read_unlock();
}
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
-
out:
put_page(page);
return err;
@@ -750,7 +676,6 @@ out:
static inline void put_futex_key(union futex_key *key)
{
- drop_futex_key_refs(key);
}
/**
@@ -1740,10 +1665,9 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
oparg = 1 << oparg;
}
- if (!access_ok(uaddr, sizeof(u32)))
- return -EFAULT;
-
+ pagefault_disable();
ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
+ pagefault_enable();
if (ret)
return ret;
@@ -1885,7 +1809,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
plist_add(&q->list, &hb2->chain);
q->lock_ptr = &hb2->lock;
}
- get_futex_key_refs(key2);
q->key = *key2;
}
@@ -1907,7 +1830,6 @@ static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
- get_futex_key_refs(key);
q->key = *key;
__unqueue_futex(q);
@@ -2018,7 +1940,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
u32 *cmpval, int requeue_pi)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
- int drop_count = 0, task_count = 0, ret;
+ int task_count = 0, ret;
struct futex_pi_state *pi_state = NULL;
struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
@@ -2139,7 +2061,6 @@ retry_private:
*/
if (ret > 0) {
WARN_ON(pi_state);
- drop_count++;
task_count++;
/*
* If we acquired the lock, then the user space value
@@ -2259,7 +2180,6 @@ retry_private:
* doing so.
*/
requeue_pi_wake_futex(this, &key2, hb2);
- drop_count++;
continue;
} else if (ret) {
/*
@@ -2280,7 +2200,6 @@ retry_private:
}
}
requeue_futex(this, hb1, hb2, &key2);
- drop_count++;
}
/*
@@ -2295,15 +2214,6 @@ out_unlock:
wake_up_q(&wake_q);
hb_waiters_dec(hb2);
- /*
- * drop_futex_key_refs() must be called outside the spinlocks. During
- * the requeue we moved futex_q's from the hash bucket at key1 to the
- * one at key2 and updated their key pointer. We no longer need to
- * hold the references to key1.
- */
- while (--drop_count >= 0)
- drop_futex_key_refs(&key1);
-
out_put_keys:
put_futex_key(&key2);
out_put_key1:
@@ -2433,7 +2343,6 @@ retry:
ret = 1;
}
- drop_futex_key_refs(&q->key);
return ret;
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a4ace611f47f..16ee716e8291 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -145,6 +145,13 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
for_each_action_of_desc(desc, action) {
irqreturn_t res;
+ /*
+ * If this IRQ would be threaded under force_irqthreads, mark it so.
+ */
+ if (irq_settings_can_thread(desc) &&
+ !(action->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT)))
+ trace_hardirq_threaded();
+
trace_irq_handler_entry(irq, action);
res = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, res);
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 828cc30774bc..48b5d1b6af4d 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -153,7 +153,9 @@ static void irq_work_run_list(struct llist_head *list)
*/
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
+ lockdep_irq_work_enter(work);
work->func(work);
+ lockdep_irq_work_exit(work);
/*
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 32406ef0d6a2..0ebf9807d971 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -84,12 +84,39 @@ module_param(lock_stat, int, 0644);
* to use a raw spinlock - we really dont want the spinlock
* code to recurse back into the lockdep code...
*/
-static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t __lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+static struct task_struct *__owner;
+
+static inline void lockdep_lock(void)
+{
+ DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+
+ arch_spin_lock(&__lock);
+ __owner = current;
+ current->lockdep_recursion++;
+}
+
+static inline void lockdep_unlock(void)
+{
+ if (debug_locks && DEBUG_LOCKS_WARN_ON(__owner != current))
+ return;
+
+ current->lockdep_recursion--;
+ __owner = NULL;
+ arch_spin_unlock(&__lock);
+}
+
+static inline bool lockdep_assert_locked(void)
+{
+ return DEBUG_LOCKS_WARN_ON(__owner != current);
+}
+
static struct task_struct *lockdep_selftest_task_struct;
+
static int graph_lock(void)
{
- arch_spin_lock(&lockdep_lock);
+ lockdep_lock();
/*
* Make sure that if another CPU detected a bug while
* walking the graph we dont change it (while the other
@@ -97,27 +124,15 @@ static int graph_lock(void)
* dropped already)
*/
if (!debug_locks) {
- arch_spin_unlock(&lockdep_lock);
+ lockdep_unlock();
return 0;
}
- /* prevent any recursions within lockdep from causing deadlocks */
- current->lockdep_recursion++;
return 1;
}
-static inline int graph_unlock(void)
+static inline void graph_unlock(void)
{
- if (debug_locks && !arch_spin_is_locked(&lockdep_lock)) {
- /*
- * The lockdep graph lock isn't locked while we expect it to
- * be, we're confused now, bye!
- */
- return DEBUG_LOCKS_WARN_ON(1);
- }
-
- current->lockdep_recursion--;
- arch_spin_unlock(&lockdep_lock);
- return 0;
+ lockdep_unlock();
}
/*
@@ -128,7 +143,7 @@ static inline int debug_locks_off_graph_unlock(void)
{
int ret = debug_locks_off();
- arch_spin_unlock(&lockdep_lock);
+ lockdep_unlock();
return ret;
}
@@ -147,6 +162,7 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
#define KEYHASH_SIZE (1UL << KEYHASH_BITS)
static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
unsigned long nr_lock_classes;
+unsigned long nr_zapped_classes;
#ifndef CONFIG_DEBUG_LOCKDEP
static
#endif
@@ -377,18 +393,31 @@ void lockdep_init_task(struct task_struct *task)
task->lockdep_recursion = 0;
}
+/*
+ * Split the recrursion counter in two to readily detect 'off' vs recursion.
+ */
+#define LOCKDEP_RECURSION_BITS 16
+#define LOCKDEP_OFF (1U << LOCKDEP_RECURSION_BITS)
+#define LOCKDEP_RECURSION_MASK (LOCKDEP_OFF - 1)
+
void lockdep_off(void)
{
- current->lockdep_recursion++;
+ current->lockdep_recursion += LOCKDEP_OFF;
}
EXPORT_SYMBOL(lockdep_off);
void lockdep_on(void)
{
- current->lockdep_recursion--;
+ current->lockdep_recursion -= LOCKDEP_OFF;
}
EXPORT_SYMBOL(lockdep_on);
+static inline void lockdep_recursion_finish(void)
+{
+ if (WARN_ON_ONCE(--current->lockdep_recursion))
+ current->lockdep_recursion = 0;
+}
+
void lockdep_set_selftest_task(struct task_struct *task)
{
lockdep_selftest_task_struct = task;
@@ -575,6 +604,7 @@ static const char *usage_str[] =
#include "lockdep_states.h"
#undef LOCKDEP_STATE
[LOCK_USED] = "INITIAL USE",
+ [LOCK_USAGE_STATES] = "IN-NMI",
};
#endif
@@ -653,7 +683,9 @@ static void print_lock_name(struct lock_class *class)
printk(KERN_CONT " (");
__print_lock_name(class);
- printk(KERN_CONT "){%s}", usage);
+ printk(KERN_CONT "){%s}-{%hd:%hd}", usage,
+ class->wait_type_outer ?: class->wait_type_inner,
+ class->wait_type_inner);
}
static void print_lockdep_cache(struct lockdep_map *lock)
@@ -787,6 +819,7 @@ static int count_matching_names(struct lock_class *new_class)
return count + 1;
}
+/* used from NMI context -- must be lockless */
static inline struct lock_class *
look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
{
@@ -1070,13 +1103,15 @@ static inline void check_data_structures(void) { }
#endif /* CONFIG_DEBUG_LOCKDEP */
+static void init_chain_block_buckets(void);
+
/*
* Initialize the lock_classes[] array elements, the free_lock_classes list
* and also the delayed_free structure.
*/
static void init_data_structures_once(void)
{
- static bool ds_initialized, rcu_head_initialized;
+ static bool __read_mostly ds_initialized, rcu_head_initialized;
int i;
if (likely(rcu_head_initialized))
@@ -1100,6 +1135,7 @@ static void init_data_structures_once(void)
INIT_LIST_HEAD(&lock_classes[i].locks_after);
INIT_LIST_HEAD(&lock_classes[i].locks_before);
}
+ init_chain_block_buckets();
}
static inline struct hlist_head *keyhashentry(const struct lock_class_key *key)
@@ -1230,6 +1266,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
WARN_ON_ONCE(!list_empty(&class->locks_before));
WARN_ON_ONCE(!list_empty(&class->locks_after));
class->name_version = count_matching_names(class);
+ class->wait_type_inner = lock->wait_type_inner;
+ class->wait_type_outer = lock->wait_type_outer;
/*
* We use RCU's safe list-add method to make
* parallel walking of the hash-list safe:
@@ -1469,6 +1507,8 @@ static int __bfs(struct lock_list *source_entry,
struct circular_queue *cq = &lock_cq;
int ret = 1;
+ lockdep_assert_locked();
+
if (match(source_entry, data)) {
*target_entry = source_entry;
ret = 0;
@@ -1491,8 +1531,6 @@ static int __bfs(struct lock_list *source_entry,
head = get_dep_list(lock, offset);
- DEBUG_LOCKS_WARN_ON(!irqs_disabled());
-
list_for_each_entry_rcu(entry, head, entry) {
if (!lock_accessed(entry)) {
unsigned int cq_depth;
@@ -1719,9 +1757,9 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
this.class = class;
raw_local_irq_save(flags);
- arch_spin_lock(&lockdep_lock);
+ lockdep_lock();
ret = __lockdep_count_forward_deps(&this);
- arch_spin_unlock(&lockdep_lock);
+ lockdep_unlock();
raw_local_irq_restore(flags);
return ret;
@@ -1746,9 +1784,9 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
this.class = class;
raw_local_irq_save(flags);
- arch_spin_lock(&lockdep_lock);
+ lockdep_lock();
ret = __lockdep_count_backward_deps(&this);
- arch_spin_unlock(&lockdep_lock);
+ lockdep_unlock();
raw_local_irq_restore(flags);
return ret;
@@ -2298,18 +2336,6 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
return 0;
}
-static void inc_chains(void)
-{
- if (current->hardirq_context)
- nr_hardirq_chains++;
- else {
- if (current->softirq_context)
- nr_softirq_chains++;
- else
- nr_process_chains++;
- }
-}
-
#else
static inline int check_irq_usage(struct task_struct *curr,
@@ -2317,13 +2343,27 @@ static inline int check_irq_usage(struct task_struct *curr,
{
return 1;
}
+#endif /* CONFIG_TRACE_IRQFLAGS */
-static inline void inc_chains(void)
+static void inc_chains(int irq_context)
{
- nr_process_chains++;
+ if (irq_context & LOCK_CHAIN_HARDIRQ_CONTEXT)
+ nr_hardirq_chains++;
+ else if (irq_context & LOCK_CHAIN_SOFTIRQ_CONTEXT)
+ nr_softirq_chains++;
+ else
+ nr_process_chains++;
}
-#endif /* CONFIG_TRACE_IRQFLAGS */
+static void dec_chains(int irq_context)
+{
+ if (irq_context & LOCK_CHAIN_HARDIRQ_CONTEXT)
+ nr_hardirq_chains--;
+ else if (irq_context & LOCK_CHAIN_SOFTIRQ_CONTEXT)
+ nr_softirq_chains--;
+ else
+ nr_process_chains--;
+}
static void
print_deadlock_scenario(struct held_lock *nxt, struct held_lock *prv)
@@ -2622,8 +2662,235 @@ out_bug:
struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
static DECLARE_BITMAP(lock_chains_in_use, MAX_LOCKDEP_CHAINS);
-int nr_chain_hlocks;
static u16 chain_hlocks[MAX_LOCKDEP_CHAIN_HLOCKS];
+unsigned long nr_zapped_lock_chains;
+unsigned int nr_free_chain_hlocks; /* Free chain_hlocks in buckets */
+unsigned int nr_lost_chain_hlocks; /* Lost chain_hlocks */
+unsigned int nr_large_chain_blocks; /* size > MAX_CHAIN_BUCKETS */
+
+/*
+ * The first 2 chain_hlocks entries in the chain block in the bucket
+ * list contains the following meta data:
+ *
+ * entry[0]:
+ * Bit 15 - always set to 1 (it is not a class index)
+ * Bits 0-14 - upper 15 bits of the next block index
+ * entry[1] - lower 16 bits of next block index
+ *
+ * A next block index of all 1 bits means it is the end of the list.
+ *
+ * On the unsized bucket (bucket-0), the 3rd and 4th entries contain
+ * the chain block size:
+ *
+ * entry[2] - upper 16 bits of the chain block size
+ * entry[3] - lower 16 bits of the chain block size
+ */
+#define MAX_CHAIN_BUCKETS 16
+#define CHAIN_BLK_FLAG (1U << 15)
+#define CHAIN_BLK_LIST_END 0xFFFFU
+
+static int chain_block_buckets[MAX_CHAIN_BUCKETS];
+
+static inline int size_to_bucket(int size)
+{
+ if (size > MAX_CHAIN_BUCKETS)
+ return 0;
+
+ return size - 1;
+}
+
+/*
+ * Iterate all the chain blocks in a bucket.
+ */
+#define for_each_chain_block(bucket, prev, curr) \
+ for ((prev) = -1, (curr) = chain_block_buckets[bucket]; \
+ (curr) >= 0; \
+ (prev) = (curr), (curr) = chain_block_next(curr))
+
+/*
+ * next block or -1
+ */
+static inline int chain_block_next(int offset)
+{
+ int next = chain_hlocks[offset];
+
+ WARN_ON_ONCE(!(next & CHAIN_BLK_FLAG));
+
+ if (next == CHAIN_BLK_LIST_END)
+ return -1;
+
+ next &= ~CHAIN_BLK_FLAG;
+ next <<= 16;
+ next |= chain_hlocks[offset + 1];
+
+ return next;
+}
+
+/*
+ * bucket-0 only
+ */
+static inline int chain_block_size(int offset)
+{
+ return (chain_hlocks[offset + 2] << 16) | chain_hlocks[offset + 3];
+}
+
+static inline void init_chain_block(int offset, int next, int bucket, int size)
+{
+ chain_hlocks[offset] = (next >> 16) | CHAIN_BLK_FLAG;
+ chain_hlocks[offset + 1] = (u16)next;
+
+ if (size && !bucket) {
+ chain_hlocks[offset + 2] = size >> 16;
+ chain_hlocks[offset + 3] = (u16)size;
+ }
+}
+
+static inline void add_chain_block(int offset, int size)
+{
+ int bucket = size_to_bucket(size);
+ int next = chain_block_buckets[bucket];
+ int prev, curr;
+
+ if (unlikely(size < 2)) {
+ /*
+ * We can't store single entries on the freelist. Leak them.
+ *
+ * One possible way out would be to uniquely mark them, other
+ * than with CHAIN_BLK_FLAG, such that we can recover them when
+ * the block before it is re-added.
+ */
+ if (size)
+ nr_lost_chain_hlocks++;
+ return;
+ }
+
+ nr_free_chain_hlocks += size;
+ if (!bucket) {
+ nr_large_chain_blocks++;
+
+ /*
+ * Variable sized, sort large to small.
+ */
+ for_each_chain_block(0, prev, curr) {
+ if (size >= chain_block_size(curr))
+ break;
+ }
+ init_chain_block(offset, curr, 0, size);
+ if (prev < 0)
+ chain_block_buckets[0] = offset;
+ else
+ init_chain_block(prev, offset, 0, 0);
+ return;
+ }
+ /*
+ * Fixed size, add to head.
+ */
+ init_chain_block(offset, next, bucket, size);
+ chain_block_buckets[bucket] = offset;
+}
+
+/*
+ * Only the first block in the list can be deleted.
+ *
+ * For the variable size bucket[0], the first block (the largest one) is
+ * returned, broken up and put back into the pool. So if a chain block of
+ * length > MAX_CHAIN_BUCKETS is ever used and zapped, it will just be
+ * queued up after the primordial chain block and never be used until the
+ * hlock entries in the primordial chain block is almost used up. That
+ * causes fragmentation and reduce allocation efficiency. That can be
+ * monitored by looking at the "large chain blocks" number in lockdep_stats.
+ */
+static inline void del_chain_block(int bucket, int size, int next)
+{
+ nr_free_chain_hlocks -= size;
+ chain_block_buckets[bucket] = next;
+
+ if (!bucket)
+ nr_large_chain_blocks--;
+}
+
+static void init_chain_block_buckets(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_CHAIN_BUCKETS; i++)
+ chain_block_buckets[i] = -1;
+
+ add_chain_block(0, ARRAY_SIZE(chain_hlocks));
+}
+
+/*
+ * Return offset of a chain block of the right size or -1 if not found.
+ *
+ * Fairly simple worst-fit allocator with the addition of a number of size
+ * specific free lists.
+ */
+static int alloc_chain_hlocks(int req)
+{
+ int bucket, curr, size;
+
+ /*
+ * We rely on the MSB to act as an escape bit to denote freelist
+ * pointers. Make sure this bit isn't set in 'normal' class_idx usage.
+ */
+ BUILD_BUG_ON((MAX_LOCKDEP_KEYS-1) & CHAIN_BLK_FLAG);
+
+ init_data_structures_once();
+
+ if (nr_free_chain_hlocks < req)
+ return -1;
+
+ /*
+ * We require a minimum of 2 (u16) entries to encode a freelist
+ * 'pointer'.
+ */
+ req = max(req, 2);
+ bucket = size_to_bucket(req);
+ curr = chain_block_buckets[bucket];
+
+ if (bucket) {
+ if (curr >= 0) {
+ del_chain_block(bucket, req, chain_block_next(curr));
+ return curr;
+ }
+ /* Try bucket 0 */
+ curr = chain_block_buckets[0];
+ }
+
+ /*
+ * The variable sized freelist is sorted by size; the first entry is
+ * the largest. Use it if it fits.
+ */
+ if (curr >= 0) {
+ size = chain_block_size(curr);
+ if (likely(size >= req)) {
+ del_chain_block(0, size, chain_block_next(curr));
+ add_chain_block(curr + req, size - req);
+ return curr;
+ }
+ }
+
+ /*
+ * Last resort, split a block in a larger sized bucket.
+ */
+ for (size = MAX_CHAIN_BUCKETS; size > req; size--) {
+ bucket = size_to_bucket(size);
+ curr = chain_block_buckets[bucket];
+ if (curr < 0)
+ continue;
+
+ del_chain_block(bucket, size, chain_block_next(curr));
+ add_chain_block(curr + req, size - req);
+ return curr;
+ }
+
+ return -1;
+}
+
+static inline void free_chain_hlocks(int base, int size)
+{
+ add_chain_block(base, max(size, 2));
+}
struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
{
@@ -2803,7 +3070,7 @@ static inline int add_chain_cache(struct task_struct *curr,
* disabled to make this an IRQ-safe lock.. for recursion reasons
* lockdep won't complain about its own locking errors.
*/
- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ if (lockdep_assert_locked())
return 0;
chain = alloc_lock_chain();
@@ -2824,15 +3091,8 @@ static inline int add_chain_cache(struct task_struct *curr,
BUILD_BUG_ON((1UL << 6) <= ARRAY_SIZE(curr->held_locks));
BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes));
- if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
- chain->base = nr_chain_hlocks;
- for (j = 0; j < chain->depth - 1; j++, i++) {
- int lock_id = curr->held_locks[i].class_idx;
- chain_hlocks[chain->base + j] = lock_id;
- }
- chain_hlocks[chain->base + j] = class - lock_classes;
- nr_chain_hlocks += chain->depth;
- } else {
+ j = alloc_chain_hlocks(chain->depth);
+ if (j < 0) {
if (!debug_locks_off_graph_unlock())
return 0;
@@ -2841,9 +3101,16 @@ static inline int add_chain_cache(struct task_struct *curr,
return 0;
}
+ chain->base = j;
+ for (j = 0; j < chain->depth - 1; j++, i++) {
+ int lock_id = curr->held_locks[i].class_idx;
+
+ chain_hlocks[chain->base + j] = lock_id;
+ }
+ chain_hlocks[chain->base + j] = class - lock_classes;
hlist_add_head_rcu(&chain->entry, hash_head);
debug_atomic_inc(chain_lookup_misses);
- inc_chains();
+ inc_chains(chain->irq_context);
return 1;
}
@@ -2987,6 +3254,8 @@ static inline int validate_chain(struct task_struct *curr,
{
return 1;
}
+
+static void init_chain_block_buckets(void) { }
#endif /* CONFIG_PROVE_LOCKING */
/*
@@ -3429,9 +3698,9 @@ void lockdep_hardirqs_on(unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
return;
- current->lockdep_recursion = 1;
+ current->lockdep_recursion++;
__trace_hardirqs_on_caller(ip);
- current->lockdep_recursion = 0;
+ lockdep_recursion_finish();
}
NOKPROBE_SYMBOL(lockdep_hardirqs_on);
@@ -3487,7 +3756,7 @@ void trace_softirqs_on(unsigned long ip)
return;
}
- current->lockdep_recursion = 1;
+ current->lockdep_recursion++;
/*
* We'll do an OFF -> ON transition:
*/
@@ -3502,7 +3771,7 @@ void trace_softirqs_on(unsigned long ip)
*/
if (curr->hardirqs_enabled)
mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ);
- current->lockdep_recursion = 0;
+ lockdep_recursion_finish();
}
/*
@@ -3596,7 +3865,8 @@ lock_used:
static inline unsigned int task_irq_context(struct task_struct *task)
{
- return 2 * !!task->hardirq_context + !!task->softirq_context;
+ return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context +
+ LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context;
}
static int separate_irq_context(struct task_struct *curr,
@@ -3682,6 +3952,113 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return ret;
}
+static int
+print_lock_invalid_wait_context(struct task_struct *curr,
+ struct held_lock *hlock)
+{
+ if (!debug_locks_off())
+ return 0;
+ if (debug_locks_silent)
+ return 0;
+
+ pr_warn("\n");
+ pr_warn("=============================\n");
+ pr_warn("[ BUG: Invalid wait context ]\n");
+ print_kernel_ident();
+ pr_warn("-----------------------------\n");
+
+ pr_warn("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
+ print_lock(hlock);
+
+ pr_warn("other info that might help us debug this:\n");
+ lockdep_print_held_locks(curr);
+
+ pr_warn("stack backtrace:\n");
+ dump_stack();
+
+ return 0;
+}
+
+/*
+ * Verify the wait_type context.
+ *
+ * This check validates