summaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c189
1 files changed, 105 insertions, 84 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2fabc0627165..9a5f339a0e2d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -153,7 +153,7 @@ enum event_type_t {
*/
struct static_key_deferred perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
-static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
@@ -905,6 +905,15 @@ static void get_ctx(struct perf_event_context *ctx)
WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
}
+static void free_ctx(struct rcu_head *head)
+{
+ struct perf_event_context *ctx;
+
+ ctx = container_of(head, struct perf_event_context, rcu_head);
+ kfree(ctx->task_ctx_data);
+ kfree(ctx);
+}
+
static void put_ctx(struct perf_event_context *ctx)
{
if (atomic_dec_and_test(&ctx->refcount)) {
@@ -912,7 +921,7 @@ static void put_ctx(struct perf_event_context *ctx)
put_ctx(ctx->parent_ctx);
if (ctx->task)
put_task_struct(ctx->task);
- kfree_rcu(ctx, rcu_head);
+ call_rcu(&ctx->rcu_head, free_ctx);
}
}
@@ -1239,9 +1248,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
if (is_cgroup_event(event))
ctx->nr_cgroups++;
- if (has_branch_stack(event))
- ctx->nr_branch_stack++;
-
list_add_rcu(&event->event_entry, &ctx->event_list);
ctx->nr_events++;
if (event->attr.inherit_stat)
@@ -1408,9 +1414,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
cpuctx->cgrp = NULL;
}
- if (has_branch_stack(event))
- ctx->nr_branch_stack--;
-
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
@@ -1881,6 +1884,10 @@ event_sched_in(struct perf_event *event,
perf_pmu_disable(event->pmu);
+ event->tstamp_running += tstamp - event->tstamp_stopped;
+
+ perf_set_shadow_time(event, ctx, tstamp);
+
if (event->pmu->add(event, PERF_EF_START)) {
event->state = PERF_EVENT_STATE_INACTIVE;
event->oncpu = -1;
@@ -1888,10 +1895,6 @@ event_sched_in(struct perf_event *event,
goto out;
}
- event->tstamp_running += tstamp - event->tstamp_stopped;
-
- perf_set_shadow_time(event, ctx, tstamp);
-
if (!is_software_event(event))
cpuctx->active_oncpu++;
if (!ctx->nr_active++)
@@ -2559,6 +2562,9 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
next->perf_event_ctxp[ctxn] = ctx;
ctx->task = next;
next_ctx->task = task;
+
+ swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
+
do_switch = 0;
perf_event_sync_stat(ctx, next_ctx);
@@ -2577,6 +2583,56 @@ unlock:
}
}
+void perf_sched_cb_dec(struct pmu *pmu)
+{
+ this_cpu_dec(perf_sched_cb_usages);
+}
+
+void perf_sched_cb_inc(struct pmu *pmu)
+{
+ this_cpu_inc(perf_sched_cb_usages);
+}
+
+/*
+ * This function provides the context switch callback to the lower code
+ * layer. It is invoked ONLY when the context switch callback is enabled.
+ */
+static void perf_pmu_sched_task(struct task_struct *prev,
+ struct task_struct *next,
+ bool sched_in)
+{
+ struct perf_cpu_context *cpuctx;
+ struct pmu *pmu;
+ unsigned long flags;
+
+ if (prev == next)
+ return;
+
+ local_irq_save(flags);
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+ if (pmu->sched_task) {
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+ perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+
+ perf_pmu_disable(pmu);
+
+ pmu->sched_task(cpuctx->task_ctx, sched_in);
+
+ perf_pmu_enable(pmu);
+
+ perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
+ }
+ }
+
+ rcu_read_unlock();
+
+ local_irq_restore(flags);
+}
+
#define for_each_task_context_nr(ctxn) \
for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
@@ -2596,6 +2652,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
{
int ctxn;
+ if (__this_cpu_read(perf_sched_cb_usages))
+ perf_pmu_sched_task(task, next, false);
+
for_each_task_context_nr(ctxn)
perf_event_context_sched_out(task, ctxn, next);
@@ -2755,64 +2814,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
}
/*
- * When sampling the branck stack in system-wide, it may be necessary
- * to flush the stack on context switch. This happens when the branch
- * stack does not tag its entries with the pid of the current task.
- * Otherwise it becomes impossible to associate a branch entry with a
- * task. This ambiguity is more likely to appear when the branch stack
- * supports priv level filtering and the user sets it to monitor only
- * at the user level (which could be a useful measurement in system-wide
- * mode). In that case, the risk is high of having a branch stack with
- * branch from multiple tasks. Flushing may mean dropping the existing
- * entries or stashing them somewhere in the PMU specific code layer.
- *
- * This function provides the context switch callback to the lower code
- * layer. It is invoked ONLY when there is at least one system-wide context
- * with at least one active event using taken branch sampling.
- */
-static void perf_branch_stack_sched_in(struct task_struct *prev,
- struct task_struct *task)
-{
- struct perf_cpu_context *cpuctx;
- struct pmu *pmu;
- unsigned long flags;
-
- /* no need to flush branch stack if not changing task */
- if (prev == task)
- return;
-
- local_irq_save(flags);
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-
- /*
- * check if the context has at least one
- * event using PERF_SAMPLE_BRANCH_STACK
- */
- if (cpuctx->ctx.nr_branch_stack > 0
- && pmu->flush_branch_stack) {
-
- perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-
- perf_pmu_disable(pmu);
-
- pmu->flush_branch_stack();
-
- perf_pmu_enable(pmu);
-
- perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
- }
- }
-
- rcu_read_unlock();
-
- local_irq_restore(flags);
-}
-
-/*
* Called from scheduler to add the events of the current task
* with interrupts disabled.
*
@@ -2844,9 +2845,8 @@ void __perf_event_task_sched_in(struct task_struct *prev,
if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
perf_cgroup_sched_in(prev, task);
- /* check for system-wide branch_stack events */
- if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
- perf_branch_stack_sched_in(prev, task);
+ if (__this_cpu_read(perf_sched_cb_usages))
+ perf_pmu_sched_task(prev, task, true);
}
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -3321,12 +3321,15 @@ errout:
* Returns a matching context with refcount and pincount.
*/
static struct perf_event_context *
-find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
+find_get_context(struct pmu *pmu, struct task_struct *task,
+ struct perf_event *event)
{
struct perf_event_context *ctx, *clone_ctx = NULL;
struct perf_cpu_context *cpuctx;
+ void *task_ctx_data = NULL;
unsigned long flags;
int ctxn, err;
+ int cpu = event->cpu;
if (!task) {
/* Must be root to operate on a CPU event: */
@@ -3354,11 +3357,24 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
if (ctxn < 0)
goto errout;
+ if (event->attach_state & PERF_ATTACH_TASK_DATA) {
+ task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL);
+ if (!task_ctx_data) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ }
+
retry:
ctx = perf_lock_task_context(task, ctxn, &flags);
if (ctx) {
clone_ctx = unclone_ctx(ctx);
++ctx->pin_count;
+
+ if (task_ctx_data && !ctx->task_ctx_data) {
+ ctx->task_ctx_data = task_ctx_data;
+ task_ctx_data = NULL;
+ }
raw_spin_unlock_irqrestore(&ctx->lock, flags);
if (clone_ctx)
@@ -3369,6 +3385,11 @@ retry:
if (!ctx)
goto errout;
+ if (task_ctx_data) {
+ ctx->task_ctx_data = task_ctx_data;
+ task_ctx_data = NULL;
+ }
+
err = 0;
mutex_lock(&task->perf_event_mutex);
/*
@@ -3395,9 +3416,11 @@ retry:
}
}
+ kfree(task_ctx_data);
return ctx;
errout:
+ kfree(task_ctx_data);
return ERR_PTR(err);
}
@@ -3423,10 +3446,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
if (event->parent)
return;
- if (has_branch_stack(event)) {
- if (!(event->attach_state & PERF_ATTACH_TASK))
- atomic_dec(&per_cpu(perf_branch_stack_events, cpu));
- }
if (is_cgroup_event(event))
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
}
@@ -6133,6 +6152,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
}
hlist_add_head_rcu(&event->hlist_entry, head);
+ perf_event_update_userpage(event);
return 0;
}
@@ -6602,6 +6622,7 @@ static int cpu_clock_event_add(struct perf_event *event, int flags)
{
if (flags & PERF_EF_START)
cpu_clock_event_start(event, flags);
+ perf_event_update_userpage(event);
return 0;
}
@@ -6676,6 +6697,7 @@ static int task_clock_event_add(struct perf_event *event, int flags)
{
if (flags & PERF_EF_START)
task_clock_event_start(event, flags);
+ perf_event_update_userpage(event);
return 0;
}
@@ -7089,10 +7111,6 @@ static void account_event_cpu(struct perf_event *event, int cpu)
if (event->parent)
return;
- if (has_branch_stack(event)) {
- if (!(event->attach_state & PERF_ATTACH_TASK))
- atomic_inc(&per_cpu(perf_branch_stack_events, cpu));
- }
if (is_cgroup_event(event))
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
}
@@ -7224,6 +7242,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
goto err_ns;
+ if (!has_branch_stack(event))
+ event->attr.branch_sample_type = 0;
+
pmu = perf_init_event(event);
if (!pmu)
goto err_ns;
@@ -7586,7 +7607,7 @@ SYSCALL_DEFINE5(perf_event_open,
/*
* Get the target context (task or percpu):
*/
- ctx = find_get_context(pmu, task, event->cpu);
+ ctx = find_get_context(pmu, task, event);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto err_alloc;
@@ -7792,7 +7813,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
account_event(event);
- ctx = find_get_context(event->pmu, task, cpu);
+ ctx = find_get_context(event->pmu, task, event);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto err_free;