summaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c141
-rw-r--r--kernel/events/ring_buffer.c24
2 files changed, 129 insertions, 36 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 00a014670ed0..6cb6d685191d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2666,6 +2666,25 @@ perf_install_in_context(struct perf_event_context *ctx,
*/
smp_store_release(&event->ctx, ctx);
+ /*
+ * perf_event_attr::disabled events will not run and can be initialized
+ * without IPI. Except when this is the first event for the context, in
+ * that case we need the magic of the IPI to set ctx->is_active.
+ *
+ * The IOC_ENABLE that is sure to follow the creation of a disabled
+ * event will issue the IPI and reprogram the hardware.
+ */
+ if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) {
+ raw_spin_lock_irq(&ctx->lock);
+ if (ctx->task == TASK_TOMBSTONE) {
+ raw_spin_unlock_irq(&ctx->lock);
+ return;
+ }
+ add_event_to_ctx(event, ctx);
+ raw_spin_unlock_irq(&ctx->lock);
+ return;
+ }
+
if (!task) {
cpu_function_call(cpu, __perf_install_in_context, event);
return;
@@ -3204,10 +3223,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
raw_spin_lock(&ctx->lock);
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
+ struct pmu *pmu = ctx->pmu;
+
WRITE_ONCE(ctx->task, next);
WRITE_ONCE(next_ctx->task, task);
- swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
+ /*
+ * PMU specific parts of task perf context can require
+ * additional synchronization. As an example of such
+ * synchronization see implementation details of Intel
+ * LBR call stack data profiling;
+ */
+ if (pmu->swap_task_ctx)
+ pmu->swap_task_ctx(ctx, next_ctx);
+ else
+ swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
/*
* RCU_INIT_POINTER here is safe because we've not
@@ -4229,8 +4259,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
if (!task) {
/* Must be root to operate on a CPU event: */
- if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
- return ERR_PTR(-EACCES);
+ err = perf_allow_cpu(&event->attr);
+ if (err)
+ return ERR_PTR(err);
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
@@ -4539,6 +4570,8 @@ static void _free_event(struct perf_event *event)
unaccount_event(event);
+ security_perf_event_free(event);
+
if (event->rb) {
/*
* Can happen when we close an event with re-directed output.
@@ -4992,6 +5025,10 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
struct perf_event_context *ctx;
int ret;
+ ret = security_perf_event_read(event);
+ if (ret)
+ return ret;
+
ctx = perf_event_ctx_lock(event);
ret = __perf_read(event, buf, count);
perf_event_ctx_unlock(event, ctx);
@@ -5256,6 +5293,11 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct perf_event_context *ctx;
long ret;
+ /* Treat ioctl like writes as it is likely a mutating operation. */
+ ret = security_perf_event_write(event);
+ if (ret)
+ return ret;
+
ctx = perf_event_ctx_lock(event);
ret = _perf_ioctl(event, cmd, arg);
perf_event_ctx_unlock(event, ctx);
@@ -5721,6 +5763,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
+ ret = security_perf_event_read(event);
+ if (ret)
+ return ret;
+
vma_size = vma->vm_end - vma->vm_start;
if (vma->vm_pgoff == 0) {
@@ -5846,7 +5892,7 @@ accounting:
lock_limit >>= PAGE_SHIFT;
locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
- if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
+ if ((locked > lock_limit) && perf_is_paranoid() &&
!capable(CAP_IPC_LOCK)) {
ret = -EPERM;
goto unlock;
@@ -10034,7 +10080,7 @@ static struct lock_class_key cpuctx_lock;
int perf_pmu_register(struct pmu *pmu, const char *name, int type)
{
- int cpu, ret;
+ int cpu, ret, max = PERF_TYPE_MAX;
mutex_lock(&pmus_lock);
ret = -ENOMEM;
@@ -10047,12 +10093,17 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
goto skip_type;
pmu->name = name;
- if (type < 0) {
- type = idr_alloc(&pmu_idr, pmu, PERF_TYPE_MAX, 0, GFP_KERNEL);
- if (type < 0) {
- ret = type;
+ if (type != PERF_TYPE_SOFTWARE) {
+ if (type >= 0)
+ max = type;
+
+ ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
+ if (ret < 0)
goto free_pdc;
- }
+
+ WARN_ON(type >= 0 && ret != type);
+
+ type = ret;
}
pmu->type = type;
@@ -10129,7 +10180,16 @@ got_cpu_context:
if (!pmu->event_idx)
pmu->event_idx = perf_event_idx_default;
- list_add_rcu(&pmu->entry, &pmus);
+ /*
+ * Ensure the TYPE_SOFTWARE PMUs are at the head of the list,
+ * since these cannot be in the IDR. This way the linear search
+ * is fast, provided a valid software event is provided.
+ */
+ if (type == PERF_TYPE_SOFTWARE || !name)
+ list_add_rcu(&pmu->entry, &pmus);
+ else
+ list_add_tail_rcu(&pmu->entry, &pmus);
+
atomic_set(&pmu->exclusive_cnt, 0);
ret = 0;
unlock:
@@ -10142,7 +10202,7 @@ free_dev:
put_device(pmu->dev);
free_idr:
- if (pmu->type >= PERF_TYPE_MAX)
+ if (pmu->type != PERF_TYPE_SOFTWARE)
idr_remove(&pmu_idr, pmu->type);
free_pdc:
@@ -10164,7 +10224,7 @@ void perf_pmu_unregister(struct pmu *pmu)
synchronize_rcu();
free_percpu(pmu->pmu_disable_count);
- if (pmu->type >= PERF_TYPE_MAX)
+ if (pmu->type != PERF_TYPE_SOFTWARE)
idr_remove(&pmu_idr, pmu->type);
if (pmu_bus_running) {
if (pmu->nr_addr_filters)
@@ -10234,9 +10294,8 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
static struct pmu *perf_init_event(struct perf_event *event)
{
+ int idx, type, ret;
struct pmu *pmu;
- int idx;
- int ret;
idx = srcu_read_lock(&pmus_srcu);
@@ -10249,12 +10308,27 @@ static struct pmu *perf_init_event(struct perf_event *event)
}
rcu_read_lock();
- pmu = idr_find(&pmu_idr, event->attr.type);
+ /*
+ * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
+ * are often aliases for PERF_TYPE_RAW.
+ */
+ type = event->attr.type;
+ if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE)
+ type = PERF_TYPE_RAW;
+
+again:
+ pmu = idr_find(&pmu_idr, type);
rcu_read_unlock();
if (pmu) {
ret = perf_try_init_event(pmu, event);
+ if (ret == -ENOENT && event->attr.type != type) {
+ type = event->attr.type;
+ goto again;
+ }
+
if (ret)
pmu = ERR_PTR(ret);
+
goto unlock;
}
@@ -10589,11 +10663,20 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
}
}
+ err = security_perf_event_alloc(event);
+ if (err)
+ goto err_callchain_buffer;
+
/* symmetric to unaccount_event() in _free_event() */
account_event(event);
return event;
+err_callchain_buffer:
+ if (!event->parent) {
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ put_callchain_buffers();
+ }
err_addr_filters:
kfree(event->addr_filter_ranges);
@@ -10682,9 +10765,11 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
attr->branch_sample_type = mask;
}
/* privileged levels capture (kernel, hv): check permissions */
- if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
- && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) {
+ ret = perf_allow_kernel(attr);
+ if (ret)
+ return ret;
+ }
}
if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
@@ -10897,13 +10982,19 @@ SYSCALL_DEFINE5(perf_event_open,
if (flags & ~PERF_FLAG_ALL)
return -EINVAL;
+ /* Do we allow access to perf_event_open(2) ? */
+ err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
+ if (err)
+ return err;
+
err = perf_copy_attr(attr_uptr, &attr);
if (err)
return err;
if (!attr.exclude_kernel) {
- if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ err = perf_allow_kernel(&attr);
+ if (err)
+ return err;
}
if (attr.namespaces) {
@@ -10920,9 +11011,11 @@ SYSCALL_DEFINE5(perf_event_open,
}
/* Only privileged users can get physical addresses */
- if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
- perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) {
+ err = perf_allow_kernel(&attr);
+ if (err)
+ return err;
+ }
err = security_locked_down(LOCKDOWN_PERF);
if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index ffb59a4ef4ff..246c83ac5643 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -754,6 +754,14 @@ static void *perf_mmap_alloc_page(int cpu)
return page_address(page);
}
+static void perf_mmap_free_page(void *addr)
+{
+ struct page *page = virt_to_page(addr);
+
+ page->mapping = NULL;
+ __free_page(page);
+}
+
struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
{
struct ring_buffer *rb;
@@ -788,9 +796,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
fail_data_pages:
for (i--; i >= 0; i--)
- free_page((unsigned long)rb->data_pages[i]);
+ perf_mmap_free_page(rb->data_pages[i]);
- free_page((unsigned long)rb->user_page);
+ perf_mmap_free_page(rb->user_page);
fail_user_page:
kfree(rb);
@@ -799,21 +807,13 @@ fail:
return NULL;
}
-static void perf_mmap_free_page(unsigned long addr)
-{
- struct page *page = virt_to_page((void *)addr);
-
- page->mapping = NULL;
- __free_page(page);
-}
-
void rb_free(struct ring_buffer *rb)
{
int i;
- perf_mmap_free_page((unsigned long)rb->user_page);
+ perf_mmap_free_page(rb->user_page);
for (i = 0; i < rb->nr_pages; i++)
- perf_mmap_free_page((unsigned long)rb->data_pages[i]);
+ perf_mmap_free_page(rb->data_pages[i]);
kfree(rb);
}