diff options
author | Ingo Molnar <mingo@kernel.org> | 2017-08-26 09:19:13 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-08-26 09:19:13 +0200 |
commit | 413d63d71b222108d19703f3fd5cf9108652a730 (patch) | |
tree | 4680de6aebb6430dc5f3d9327f86d65149e6b5ae /kernel | |
parent | d6c8103b0265d8db30e20e948a4f06382bbdaea7 (diff) | |
parent | 90a6cd503982bfd33ce8c70eb49bd2dd33bc6325 (diff) |
Merge branch 'linus' into x86/mm to pick up fixes and to fix conflicts
Conflicts:
arch/x86/kernel/head64.c
arch/x86/mm/mmap.c
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
35 files changed, 490 insertions, 183 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 833267bbd80b..6dd556931739 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -641,6 +641,7 @@ static int auditd_send_unicast_skb(struct sk_buff *skb) ac = rcu_dereference(auditd_conn); if (!ac) { rcu_read_unlock(); + kfree_skb(skb); rc = -ECONNREFUSED; goto err; } diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 62d686d96581..9eb8b3511636 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -66,7 +66,7 @@ static struct fsnotify_group *audit_watch_group; /* fsnotify events we care about. */ #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ - FS_MOVE_SELF | FS_EVENT_ON_CHILD) + FS_MOVE_SELF | FS_EVENT_ON_CHILD | FS_UNMOUNT) static void audit_free_parent(struct audit_parent *parent) { @@ -457,13 +457,15 @@ void audit_remove_watch_rule(struct audit_krule *krule) list_del(&krule->rlist); if (list_empty(&watch->rules)) { + /* + * audit_remove_watch() drops our reference to 'parent' which + * can get freed. Grab our own reference to be safe. + */ + audit_get_parent(parent); audit_remove_watch(watch); - - if (list_empty(&parent->watches)) { - audit_get_parent(parent); + if (list_empty(&parent->watches)) fsnotify_destroy_mark(&parent->mark, audit_watch_group); - audit_put_parent(parent); - } + audit_put_parent(parent); } } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 045646da97cc..6c772adabad2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1289,7 +1289,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info_len = min_t(u32, sizeof(info), info_len); if (copy_from_user(&info, uinfo, info_len)) - return err; + return -EFAULT; info.type = prog->type; info.id = prog->aux->id; @@ -1312,7 +1312,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } ulen = info.xlated_prog_len; - info.xlated_prog_len = bpf_prog_size(prog->len); + info.xlated_prog_len = bpf_prog_insn_size(prog); if (info.xlated_prog_len && ulen) { uinsns = u64_to_user_ptr(info.xlated_prog_insns); ulen = min_t(u32, info.xlated_prog_len, ulen); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6a86723c5b64..664d93972373 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -504,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) { regs[regno].min_value = BPF_REGISTER_MIN_RANGE; regs[regno].max_value = BPF_REGISTER_MAX_RANGE; + regs[regno].value_from_signed = false; regs[regno].min_align = 0; } @@ -777,12 +778,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, return -EACCES; } -static bool is_pointer_value(struct bpf_verifier_env *env, int regno) +static bool __is_pointer_value(bool allow_ptr_leaks, + const struct bpf_reg_state *reg) { - if (env->allow_ptr_leaks) + if (allow_ptr_leaks) return false; - switch (env->cur_state.regs[regno].type) { + switch (reg->type) { case UNKNOWN_VALUE: case CONST_IMM: return false; @@ -791,6 +793,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } } +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) +{ + return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); +} + static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, int off, int size, bool strict) { @@ -1832,10 +1839,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, dst_align = dst_reg->min_align; /* We don't know anything about what was done to this register, mark it - * as unknown. + * as unknown. Also, if both derived bounds came from signed/unsigned + * mixed compares and one side is unbounded, we cannot really do anything + * with them as boundaries cannot be trusted. Thus, arithmetic of two + * regs of such kind will get invalidated bounds on the dst side. */ - if (min_val == BPF_REGISTER_MIN_RANGE && - max_val == BPF_REGISTER_MAX_RANGE) { + if ((min_val == BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) || + (BPF_SRC(insn->code) == BPF_X && + ((min_val != BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) || + (min_val == BPF_REGISTER_MIN_RANGE && + max_val != BPF_REGISTER_MAX_RANGE) || + (dst_reg->min_value != BPF_REGISTER_MIN_RANGE && + dst_reg->max_value == BPF_REGISTER_MAX_RANGE) || + (dst_reg->min_value == BPF_REGISTER_MIN_RANGE && + dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) && + regs[insn->dst_reg].value_from_signed != + regs[insn->src_reg].value_from_signed)) { reset_reg_range_values(regs, insn->dst_reg); return; } @@ -1844,10 +1865,12 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, * do our normal operations to the register, we need to set the values * to the min/max since they are undefined. */ - if (min_val == BPF_REGISTER_MIN_RANGE) - dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - if (max_val == BPF_REGISTER_MAX_RANGE) - dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + if (opcode != BPF_SUB) { + if (min_val == BPF_REGISTER_MIN_RANGE) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + if (max_val == BPF_REGISTER_MAX_RANGE) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + } switch (opcode) { case BPF_ADD: @@ -1858,10 +1881,17 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, dst_reg->min_align = min(src_align, dst_align); break; case BPF_SUB: + /* If one of our values was at the end of our ranges, then the + * _opposite_ value in the dst_reg goes to the end of our range. + */ + if (min_val == BPF_REGISTER_MIN_RANGE) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + if (max_val == BPF_REGISTER_MAX_RANGE) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) - dst_reg->min_value -= min_val; + dst_reg->min_value -= max_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) - dst_reg->max_value -= max_val; + dst_reg->max_value -= min_val; dst_reg->min_align = min(src_align, dst_align); break; case BPF_MUL: @@ -2023,6 +2053,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].max_value = insn->imm; regs[insn->dst_reg].min_value = insn->imm; regs[insn->dst_reg].min_align = calc_align(insn->imm); + regs[insn->dst_reg].value_from_signed = false; } } else if (opcode > BPF_END) { @@ -2198,40 +2229,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + bool value_from_signed = true; + bool is_range = true; + switch (opcode) { case BPF_JEQ: /* If this is false then we know nothing Jon Snow, but if it is * true then we know for sure. */ true_reg->max_value = true_reg->min_value = val; + is_range = false; break; case BPF_JNE: /* If this is true we know nothing Jon Snow, but if it is false * we know the value for sure; */ false_reg->max_value = false_reg->min_value = val; + is_range = false; break; case BPF_JGT: - /* Unsigned comparison, the minimum value is 0. */ - false_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGT: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGT) { + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + } /* If this is false then we know the maximum val is val, * otherwise we know the min val is val+1. */ false_reg->max_value = val; + false_reg->value_from_signed = value_from_signed; true_reg->min_value = val + 1; + true_reg->value_from_signed = value_from_signed; break; case BPF_JGE: - /* Unsigned comparison, the minimum value is 0. */ - false_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGE: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGE) { + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + } /* If this is false then we know the maximum value is val - 1, * otherwise we know the mimimum value is val. */ false_reg->max_value = val - 1; + false_reg->value_from_signed = value_from_signed; true_reg->min_value = val; + true_reg->value_from_signed = value_from_signed; break; default: break; @@ -2239,6 +2293,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, check_reg_overflow(false_reg); check_reg_overflow(true_reg); + if (is_range) { + if (__is_pointer_value(false, false_reg)) + reset_reg_range_values(false_reg, 0); + if (__is_pointer_value(false, true_reg)) + reset_reg_range_values(true_reg, 0); + } } /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg @@ -2248,41 +2308,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + bool value_from_signed = true; + bool is_range = true; + switch (opcode) { case BPF_JEQ: /* If this is false then we know nothing Jon Snow, but if it is * true then we know for sure. */ true_reg->max_value = true_reg->min_value = val; + is_range = false; break; case BPF_JNE: /* If this is true we know nothing Jon Snow, but if it is false * we know the value for sure; */ false_reg->max_value = false_reg->min_value = val; + is_range = false; break; case BPF_JGT: - /* Unsigned comparison, the minimum value is 0. */ - true_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGT: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGT) { + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + } /* * If this is false, then the val is <= the register, if it is * true the register <= to the val. */ false_reg->min_value = val; + false_reg->value_from_signed = value_from_signed; true_reg->max_value = val - 1; + true_reg->value_from_signed = value_from_signed; break; case BPF_JGE: - /* Unsigned comparison, the minimum value is 0. */ - true_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGE: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGE) { + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + } /* If this is false then constant < register, if it is true then * the register < constant. */ false_reg->min_value = val + 1; + false_reg->value_from_signed = value_from_signed; true_reg->max_value = val; + true_reg->value_from_signed = value_from_signed; break; default: break; @@ -2290,6 +2373,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, check_reg_overflow(false_reg); check_reg_overflow(true_reg); + if (is_range) { + if (__is_pointer_value(false, false_reg)) + reset_reg_range_values(false_reg, 0); + if (__is_pointer_value(false, true_reg)) + reset_reg_range_values(true_reg, 0); + } } static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 793565c05742..8b4c3c2f2509 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -33,6 +33,9 @@ struct cgroup_taskset { struct list_head src_csets; struct list_head dst_csets; + /* the number of tasks in the set */ + int nr_tasks; + /* the subsys currently being processed */ int ssid; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 620794a20a33..df2e0f14a95d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2006,6 +2006,8 @@ static void cgroup_migrate_add_task(struct task_struct *task, if (!cset->mg_src_cgrp) return; + mgctx->tset.nr_tasks++; + list_move_tail(&task->cg_list, &cset->mg_tasks); if (list_empty(&cset->mg_node)) list_add_tail(&cset->mg_node, @@ -2094,21 +2096,19 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) struct css_set *cset, *tmp_cset; int ssid, failed_ssid, ret; - /* methods shouldn't be called if no task is actually migrating */ - if (list_empty(&tset->src_csets)) - return 0; - /* check that we can legitimately attach to the cgroup */ - do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { - if (ss->can_attach) { - tset->ssid = ssid; - ret = ss->can_attach(tset); - if (ret) { - failed_ssid = ssid; - goto out_cancel_attach; + if (tset->nr_tasks) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { + if (ss->can_attach) { + tset->ssid = ssid; + ret = ss->can_attach(tset); + if (ret) { + failed_ssid = ssid; + goto out_cancel_attach; + } } - } - } while_each_subsys_mask(); + } while_each_subsys_mask(); + } /* * Now that we're guaranteed success, proceed to move all tasks to @@ -2137,25 +2137,29 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) */ tset->csets = &tset->dst_csets; - do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { - if (ss->attach) { - tset->ssid = ssid; - ss->attach(tset); - } - } while_each_subsys_mask(); + if (tset->nr_tasks) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { + if (ss->attach) { + tset->ssid = ssid; + ss->attach(tset); + } + } while_each_subsys_mask(); + } ret = 0; goto out_release_tset; out_cancel_attach: - do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { - if (ssid == failed_ssid) - break; - if (ss->cancel_attach) { - tset->ssid = ssid; - ss->cancel_attach(tset); - } - } while_each_subsys_mask(); + if (tset->nr_tasks) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { + if (ssid == failed_ssid) + break; + if (ss->cancel_attach) { + tset->ssid = ssid; + ss->cancel_attach(tset); + } + } while_each_subsys_mask(); + } out_release_tset: spin_lock_irq(&css_set_lock); list_splice_init(&tset->dst_csets, &tset->src_csets); @@ -2997,11 +3001,11 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, cgrp->subtree_control &= ~disable; ret = cgroup_apply_control(cgrp); - cgroup_finalize_control(cgrp, ret); + if (ret) + goto out_unlock; kernfs_activate(cgrp->kn); - ret = 0; out_unlock: cgroup_kn_unlock(of->kn); return ret ?: nbytes; @@ -4669,6 +4673,10 @@ int __init cgroup_init(void) if (ss->bind) ss->bind(init_css_set.subsys[ssid]); + + mutex_lock(&cgroup_mutex); + css_populate_dir(init_css_set.subsys[ssid]); + mutex_unlock(&cgroup_mutex); } /* init_css_set.subsys[] has been updated, re-hash */ diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index ca8376e5008c..8d5151688504 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -63,6 +63,7 @@ #include <linux/cgroup.h> #include <linux/wait.h> +DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); /* See "Frequency meter" comments, below. */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 1538df9b2b65..ee20d4c546b5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1452,6 +1452,13 @@ static enum event_type_t get_event_type(struct perf_event *event) lockdep_assert_held(&ctx->lock); + /* + * It's 'group type', really, because if our group leader is + * pinned, so are we. + */ + if (event->group_leader != event) + event = event->group_leader; + event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE; if (!ctx->task) event_type |= EVENT_CPU; @@ -2210,6 +2217,33 @@ static int group_can_go_on(struct perf_event *event, return can_add_hw; } +/* + * Complement to update_event_times(). This computes the tstamp_* values to + * continue 'enabled' state from @now, and effectively discards the time + * between the prior tstamp_stopped and now (as we were in the OFF state, or + * just switched (context) time base). + * + * This further assumes '@event->state == INACTIVE' (we just came from OFF) and + * cannot have been scheduled in yet. And going into INACTIVE state means + * '@event->tstamp_stopped = @now'. + * + * Thus given the rules of update_event_times(): + * + * total_time_enabled = tstamp_stopped - tstamp_enabled + * total_time_running = tstamp_stopped - tstamp_running + * + * We can insert 'tstamp_stopped == now' and reverse them to compute new + * tstamp_* values. + */ +static void __perf_event_enable_time(struct perf_event *event, u64 now) +{ + WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE); + + event->tstamp_stopped = now; + event->tstamp_enabled = now - event->total_time_enabled; + event->tstamp_running = now - event->total_time_running; +} + static void add_event_to_ctx(struct perf_event *event, struct perf_event_context *ctx) { @@ -2217,9 +2251,12 @@ static void add_event_to_ctx(struct perf_event *event, list_add_event(event, ctx); perf_group_attach(event); - event->tstamp_enabled = tstamp; - event->tstamp_running = tstamp; - event->tstamp_stopped = tstamp; + /* + * We can be called with event->state == STATE_OFF when we create with + * .disabled = 1. In that case the IOC_ENABLE will call this function. + */ + if (event->state == PERF_EVENT_STATE_INACTIVE) + __perf_event_enable_time(event, tstamp); } static void ctx_sched_out(struct perf_event_context *ctx, @@ -2464,10 +2501,11 @@ static void __perf_event_mark_enabled(struct perf_event *event) u64 tstamp = perf_event_time(event); event->state = PERF_EVENT_STATE_INACTIVE; - event->tstamp_enabled = tstamp - event->total_time_enabled; + __perf_event_enable_time(event, tstamp); list_for_each_entry(sub, &event->sibling_list, group_entry) { + /* XXX should not be > INACTIVE if event isn't */ if (sub->state >= PERF_EVENT_STATE_INACTIVE) - sub->tstamp_enabled = tstamp - sub->total_time_enabled; + __perf_event_enable_time(sub, tstamp); } } @@ -4378,7 +4416,9 @@ EXPORT_SYMBOL_GPL(perf_event_read_value); static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { + struct perf_event_context *ctx = leader->ctx; struct perf_event *sub; + unsigned long flags; int n = 1; /* skip @nr */ int ret; @@ -4408,12 +4448,15 @@ static int __perf_read_group_add(struct perf_event *leader, if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + raw_spin_lock_irqsave(&ctx->lock, flags); + list_for_each_entry(sub, &leader->sibling_list, group_entry) { values[n++] += perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); } + raw_spin_unlock_irqrestore(&ctx->lock, flags); return 0; } @@ -5078,7 +5121,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) atomic_inc(&event->rb->aux_mmap_count); if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); } static void perf_pmu_output_stop(struct perf_event *event); @@ -5101,7 +5144,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) unsigned long size = perf_data_size(rb); if (event->pmu->event_unmapped) - event->pmu->event_unmapped(event); + event->pmu->event_unmapped(event, vma->vm_mm); /* * rb->aux_mmap_count will always drop before rb->mmap_count and @@ -5399,7 +5442,7 @@ aux_unlock: vma->vm_ops = &perf_mmap_vmops; if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); return ret; } @@ -7321,21 +7364,6 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } -static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) -{ - /* - * Due to interrupt latency (AKA "skid"), we may enter the - * kernel before taking an overflow, even if the PMU is only - * counting user events. - * To avoid leaking information to userspace, we must always - * reject kernel samples when exclude_kernel is set. - */ - if (event->attr.exclude_kernel && !user_mode(regs)) - return false; - - return true; -} - /* * Generic event overflow handling, sampling. */ @@ -7357,12 +7385,6 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); /* - * For security, drop the skid kernel samples if necessary. - */ - if (!sample_is_allowed(event, regs)) - return ret; - - /* * XXX event_limit might not quite work as expected on inherited * events */ diff --git a/kernel/fork.c b/kernel/fork.c index 17921b0390b4..e075b7780421 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -807,7 +807,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_aio(mm); mm_init_owner(mm, p); mmu_notifier_mm_init(mm); - clear_tlb_flush_pending(mm); + init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif diff --git a/kernel/futex.c b/kernel/futex.c index c934689043b2..f50b434756c1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -212,7 +212,7 @@ struct futex_pi_state { atomic_t refcount; union futex_key key; -}; +} __randomize_layout; /** * struct futex_q - The hashed futex queue entry, one per waiting task @@ -246,7 +246,7 @@ struct futex_q { struct rt_mutex_waiter *rt_waiter; union futex_key *requeue_pi_key; u32 bitset; -}; +} __randomize_layout; static const struct futex_q futex_q_init = { /* list gets initialized in queue_me()*/ @@ -670,13 +670,14 @@ again: * this reference was taken by ihold under the page lock * pinning the inode in place so i_lock was unnecessary. The * only way for this check to fail is if the inode was - * truncated in parallel so warn for now if this happens. + * truncated in parallel which is almost certainly an + * application bug. In such a case, just retry. * * We are not calling into get_futex_key_refs() in file-backed * cases, therefore a successful atomic_inc return below will * guarantee that get_futex_key() will still imply smp_mb(); (B). */ - if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) { + if (!atomic_inc_not_zero(&inode->i_count)) { rcu_read_unlock(); put_page(page); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index d171bc57e1e0..3675c6004f2a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -170,21 +170,11 @@ static void irq_state_clr_disabled(struct irq_desc *desc) irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED); } -static void irq_state_set_disabled(struct irq_desc *desc) -{ - irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); -} - static void irq_state_clr_masked(struct irq_desc *desc) { irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED); } -static void irq_state_set_masked(struct irq_desc *desc) -{ - irqd_set(&desc->irq_data, IRQD_IRQ_MASKED); -} - static void irq_state_clr_started(struct irq_desc *desc) { irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED); @@ -1010,7 +1000,7 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name); void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) { - unsigned long flags; + unsigned long flags, trigger, tmp; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); if (!desc) @@ -1024,6 +1014,8 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) irq_settings_clr_and_set(desc, clr, set); + trigger = irqd_get_trigger_type(&desc->irq_data); + irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT); if (irq_settings_has_no_balance_set(desc)) @@ -1035,7 +1027,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) if (irq_settings_is_level(desc)) irqd_set(&desc->irq_data, IRQD_LEVEL); - irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc)); + tmp = irq_settings_get_trigger_mask(desc); + if (tmp != IRQ_TYPE_NONE) + trigger = tmp; + + irqd_set(&desc->irq_data, trigger); irq_put_desc_unlock(desc, flags); } diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index aee8f7ec40af..638eb9c83d9f 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -95,8 +95,13 @@ static bool migrate_one_irq(struct irq_desc *desc) affinity = cpu_online_mask; brokeaff = true; } - - err = irq_do_set_affinity(d, affinity, true); + /* + * Do not set the force argument of irq_do_set_affinity() as this + * disables the masking of offline CPUs from the supplied affinity + * mask and therefore might keep/reassign the irq to the outgoing + * CPU. + */ + err = irq_do_set_affinity(d, affinity, false); if (err) { pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n", d->irq, err); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index dbfba9933ed2..a2c48058354c 100644 |