diff options
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 141 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 24 |
2 files changed, 129 insertions, 36 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 00a014670ed0..6cb6d685191d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2666,6 +2666,25 @@ perf_install_in_context(struct perf_event_context *ctx, */ smp_store_release(&event->ctx, ctx); + /* + * perf_event_attr::disabled events will not run and can be initialized + * without IPI. Except when this is the first event for the context, in + * that case we need the magic of the IPI to set ctx->is_active. + * + * The IOC_ENABLE that is sure to follow the creation of a disabled + * event will issue the IPI and reprogram the hardware. + */ + if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) { + raw_spin_lock_irq(&ctx->lock); + if (ctx->task == TASK_TOMBSTONE) { + raw_spin_unlock_irq(&ctx->lock); + return; + } + add_event_to_ctx(event, ctx); + raw_spin_unlock_irq(&ctx->lock); + return; + } + if (!task) { cpu_function_call(cpu, __perf_install_in_context, event); return; @@ -3204,10 +3223,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, raw_spin_lock(&ctx->lock); raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); if (context_equiv(ctx, next_ctx)) { + struct pmu *pmu = ctx->pmu; + WRITE_ONCE(ctx->task, next); WRITE_ONCE(next_ctx->task, task); - swap(ctx->task_ctx_data, next_ctx->task_ctx_data); + /* + * PMU specific parts of task perf context can require + * additional synchronization. As an example of such + * synchronization see implementation details of Intel + * LBR call stack data profiling; + */ + if (pmu->swap_task_ctx) + pmu->swap_task_ctx(ctx, next_ctx); + else + swap(ctx->task_ctx_data, next_ctx->task_ctx_data); /* * RCU_INIT_POINTER here is safe because we've not @@ -4229,8 +4259,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task, if (!task) { /* Must be root to operate on a CPU event: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); + err = perf_allow_cpu(&event->attr); + if (err) + return ERR_PTR(err); cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; @@ -4539,6 +4570,8 @@ static void _free_event(struct perf_event *event) unaccount_event(event); + security_perf_event_free(event); + if (event->rb) { /* * Can happen when we close an event with re-directed output. @@ -4992,6 +5025,10 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) struct perf_event_context *ctx; int ret; + ret = security_perf_event_read(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); @@ -5256,6 +5293,11 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct perf_event_context *ctx; long ret; + /* Treat ioctl like writes as it is likely a mutating operation. */ + ret = security_perf_event_write(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = _perf_ioctl(event, cmd, arg); perf_event_ctx_unlock(event, ctx); @@ -5721,6 +5763,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; + ret = security_perf_event_read(event); + if (ret) + return ret; + vma_size = vma->vm_end - vma->vm_start; if (vma->vm_pgoff == 0) { @@ -5846,7 +5892,7 @@ accounting: lock_limit >>= PAGE_SHIFT; locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra; - if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && + if ((locked > lock_limit) && perf_is_paranoid() && !capable(CAP_IPC_LOCK)) { ret = -EPERM; goto unlock; @@ -10034,7 +10080,7 @@ static struct lock_class_key cpuctx_lock; int perf_pmu_register(struct pmu *pmu, const char *name, int type) { - int cpu, ret; + int cpu, ret, max = PERF_TYPE_MAX; mutex_lock(&pmus_lock); ret = -ENOMEM; @@ -10047,12 +10093,17 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) goto skip_type; pmu->name = name; - if (type < 0) { - type = idr_alloc(&pmu_idr, pmu, PERF_TYPE_MAX, 0, GFP_KERNEL); - if (type < 0) { - ret = type; + if (type != PERF_TYPE_SOFTWARE) { + if (type >= 0) + max = type; + + ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL); + if (ret < 0) goto free_pdc; - } + + WARN_ON(type >= 0 && ret != type); + + type = ret; } pmu->type = type; @@ -10129,7 +10180,16 @@ got_cpu_context: if (!pmu->event_idx) pmu->event_idx = perf_event_idx_default; - list_add_rcu(&pmu->entry, &pmus); + /* + * Ensure the TYPE_SOFTWARE PMUs are at the head of the list, + * since these cannot be in the IDR. This way the linear search + * is fast, provided a valid software event is provided. + */ + if (type == PERF_TYPE_SOFTWARE || !name) + list_add_rcu(&pmu->entry, &pmus); + else + list_add_tail_rcu(&pmu->entry, &pmus); + atomic_set(&pmu->exclusive_cnt, 0); ret = 0; unlock: @@ -10142,7 +10202,7 @@ free_dev: put_device(pmu->dev); free_idr: - if (pmu->type >= PERF_TYPE_MAX) + if (pmu->type != PERF_TYPE_SOFTWARE) idr_remove(&pmu_idr, pmu->type); free_pdc: @@ -10164,7 +10224,7 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_rcu(); free_percpu(pmu->pmu_disable_count); - if (pmu->type >= PERF_TYPE_MAX) + if (pmu->type != PERF_TYPE_SOFTWARE) idr_remove(&pmu_idr, pmu->type); if (pmu_bus_running) { if (pmu->nr_addr_filters) @@ -10234,9 +10294,8 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) static struct pmu *perf_init_event(struct perf_event *event) { + int idx, type, ret; struct pmu *pmu; - int idx; - int ret; idx = srcu_read_lock(&pmus_srcu); @@ -10249,12 +10308,27 @@ static struct pmu *perf_init_event(struct perf_event *event) } rcu_read_lock(); - pmu = idr_find(&pmu_idr, event->attr.type); + /* + * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * are often aliases for PERF_TYPE_RAW. + */ + type = event->attr.type; + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) + type = PERF_TYPE_RAW; + +again: + pmu = idr_find(&pmu_idr, type); rcu_read_unlock(); if (pmu) { ret = perf_try_init_event(pmu, event); + if (ret == -ENOENT && event->attr.type != type) { + type = event->attr.type; + goto again; + } + if (ret) pmu = ERR_PTR(ret); + goto unlock; } @@ -10589,11 +10663,20 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, } } + err = security_perf_event_alloc(event); + if (err) + goto err_callchain_buffer; + /* symmetric to unaccount_event() in _free_event() */ account_event(event); return event; +err_callchain_buffer: + if (!event->parent) { + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); + } err_addr_filters: kfree(event->addr_filter_ranges); @@ -10682,9 +10765,11 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } /* privileged levels capture (kernel, hv): check permissions */ - if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) - && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) { + ret = perf_allow_kernel(attr); + if (ret) + return ret; + } } if (attr->sample_type & PERF_SAMPLE_REGS_USER) { @@ -10897,13 +10982,19 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + if (err) + return err; + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; if (!attr.exclude_kernel) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + err = perf_allow_kernel(&attr); + if (err) + return err; } if (attr.namespaces) { @@ -10920,9 +11011,11 @@ SYSCALL_DEFINE5(perf_event_open, } /* Only privileged users can get physical addresses */ - if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) && - perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) { + err = perf_allow_kernel(&attr); + if (err) + return err; + } err = security_locked_down(LOCKDOWN_PERF); if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR)) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ffb59a4ef4ff..246c83ac5643 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -754,6 +754,14 @@ static void *perf_mmap_alloc_page(int cpu) return page_address(page); } +static void perf_mmap_free_page(void *addr) +{ + struct page *page = virt_to_page(addr); + + page->mapping = NULL; + __free_page(page); +} + struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { struct ring_buffer *rb; @@ -788,9 +796,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) fail_data_pages: for (i--; i >= 0; i--) - free_page((unsigned long)rb->data_pages[i]); + perf_mmap_free_page(rb->data_pages[i]); - free_page((unsigned long)rb->user_page); + perf_mmap_free_page(rb->user_page); fail_user_page: kfree(rb); @@ -799,21 +807,13 @@ fail: return NULL; } -static void perf_mmap_free_page(unsigned long addr) -{ - struct page *page = virt_to_page((void *)addr); - - page->mapping = NULL; - __free_page(page); -} - void rb_free(struct ring_buffer *rb) { int i; - perf_mmap_free_page((unsigned long)rb->user_page); + perf_mmap_free_page(rb->user_page); for (i = 0; i < rb->nr_pages; i++) - perf_mmap_free_page((unsigned long)rb->data_pages[i]); + perf_mmap_free_page(rb->data_pages[i]); kfree(rb); } |