diff options
author | Steven Rostedt (VMware) <rostedt@goodmis.org> | 2018-03-09 13:19:28 -0500 |
---|---|---|
committer | Steven Rostedt (VMware) <rostedt@goodmis.org> | 2018-03-14 12:35:39 -0400 |
commit | 80765597bc587feae8dbc8ce97a0f32e12a6e625 (patch) | |
tree | 0a3823a6531798777eed0317f59660ac5495a73b /kernel/trace | |
parent | 478325f188657d0e503d1f88cdaf516c792352c5 (diff) |
tracing: Rewrite filter logic to be simpler and faster
Al Viro reviewed the filter logic of ftrace trace events and found it to be
very troubling. It creates a binary tree based on the logic operators and
walks it during tracing. He sent myself and Tom Zanussi a long explanation
(and formal proof) of how to do the string parsing better and end up with a
program array that can be simply iterated to come up with the correct
results.
I took his ideas and his pseudo code and rewrote the filter logic based on
them. In doing so, I was able to remove a lot of code, and have a much more
condensed filter logic in the process. I wrote a very long comment
describing the methadology that Al proposed in my own words. For more info
on how this works, read the comment above predicate_parse().
Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace.h | 15 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 2161 |
2 files changed, 979 insertions, 1197 deletions
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9de3e2a2f042..6fb46a06c9dc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1216,12 +1216,11 @@ struct ftrace_event_field { int is_signed; }; +struct prog_entry; + struct event_filter { - int n_preds; /* Number assigned */ - int a_preds; /* allocated */ - struct filter_pred __rcu *preds; - struct filter_pred __rcu *root; - char *filter_string; + struct prog_entry __rcu *prog; + char *filter_string; }; struct event_subsystem { @@ -1413,12 +1412,8 @@ struct filter_pred { unsigned short *ops; struct ftrace_event_field *field; int offset; - int not; + int not; int op; - unsigned short index; - unsigned short parent; - unsigned short left; - unsigned short right; }; static inline bool is_string_field(struct ftrace_event_field *field) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9d383f4383dc..703a416aa5c2 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -33,60 +33,52 @@ "# Only events with the given fields will be affected.\n" \ "# If no events are modified, an error message will be displayed here" +/* Due to token parsing '<=' must be before '<' and '>=' must be before '>' */ #define OPS \ - C( OP_OR, "||", 1 ), \ - C( OP_AND, "&&", 2 ), \ - C( OP_GLOB, "~", 4 ), \ - C( OP_NE, "!=", 4 ), \ - C( OP_EQ, "==", 4 ), \ - C( OP_LT, "<", 5 ), \ - C( OP_LE, "<=", 5 ), \ - C( OP_GT, ">", 5 ), \ - C( OP_GE, ">=", 5 ), \ - C( OP_BAND, "&", 6 ), \ - C( OP_NOT, "!", 6 ), \ - C( OP_NONE, "OP_NONE", 0 ), \ - C( OP_OPEN_PAREN, "(", 0 ), \ - C( OP_MAX, NULL, 0 ) + C( OP_GLOB, "~" ), \ + C( OP_NE, "!=" ), \ + C( OP_EQ, "==" ), \ + C( OP_LE, "<=" ), \ + C( OP_LT, "<" ), \ + C( OP_GE, ">=" ), \ + C( OP_GT, ">" ), \ + C( OP_BAND, "&" ), \ + C( OP_MAX, NULL ) #undef C -#define C(a, b, c) a +#define C(a, b) a enum filter_op_ids { OPS }; -struct filter_op { - int id; - char *string; - int precedence; -}; - #undef C -#define C(a, b, c) { a, b, c } +#define C(a, b) b -static struct filter_op filter_ops[] = { OPS }; +static const char * ops[] = { OPS }; /* - * pred functions are OP_LT, OP_LE, OP_GT, OP_GE, and OP_BAND + * pred functions are OP_LE, OP_LT, OP_GE, OP_GT, and OP_BAND * pred_funcs_##type below must match the order of them above. */ -#define PRED_FUNC_START OP_LT +#define PRED_FUNC_START OP_LE #define PRED_FUNC_MAX (OP_BAND - PRED_FUNC_START) #define ERRORS \ - C( NONE, "No error"), \ - C( INVALID_OP, "Invalid operator"), \ - C( UNBALANCED_PAREN, "Unbalanced parens"), \ - C( TOO_MANY_OPERANDS, "Too many operands"), \ - C( OPERAND_TOO_LONG, "Operand too long"), \ - C( FIELD_NOT_FOUND, "Field not found"), \ - C( ILLEGAL_FIELD_OP, "Illegal operation for field type"), \ - C( ILLEGAL_INTVAL, "Illegal integer value"), \ - C( BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \ - C( TOO_MANY_PREDS, "Too many terms in predicate expression"), \ - C( MISSING_FIELD, "Missing field name and/or value"), \ - C( INVALID_FILTER, "Meaningless filter expression"), \ - C( IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ - C( ILLEGAL_NOT_OP, "Illegal use of '!'"), + C(NONE, "No error"), \ + C(INVALID_OP, "Invalid operator"), \ + C(TOO_MANY_OPEN, "Too many '('"), \ + C(TOO_MANY_CLOSE, "Too few '('"), \ + C(MISSING_QUOTE, "Missing matching quote"), \ + C(OPERAND_TOO_LONG, "Operand too long"), \ + C(EXPECT_STRING, "Expecting string field"), \ + C(EXPECT_DIGIT, "Expecting numeric field"), \ + C(ILLEGAL_FIELD_OP, "Illegal operation for field type"), \ + C(FIELD_NOT_FOUND, "Field not found"), \ + C(ILLEGAL_INTVAL, "Illegal integer value"), \ + C(BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \ + C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \ + C(INVALID_FILTER, "Meaningless filter expression"), \ + C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ + C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), #undef C #define C(a, b) FILT_ERR_##a @@ -98,84 +90,535 @@ enum { ERRORS }; static char *err_text[] = { ERRORS }; -struct opstack_op { - enum filter_op_ids op; - struct list_head list; -}; +/* Called after a '!' character but "!=" and "!~" are not "not"s */ +static bool is_not(const char *str) +{ + switch (str[1]) { + case '=': + case '~': + return false; + } + return true; +} -struct postfix_elt { - enum filter_op_ids op; - char *operand; - struct list_head list; +/** + * prog_entry - a singe entry in the filter program + * @target: Index to jump to on a branch (actually one minus the index) + * @when_to_branch: The value of the result of the predicate to do a branch + * @pred: The predicate to execute. + */ +struct prog_entry { + int target; + int when_to_branch; + struct filter_pred *pred; }; -struct filter_parse_state { - struct filter_op *ops; - struct list_head opstack; - struct list_head postfix; +/** + * update_preds- assign a program entry a label target + * @prog: The program array + * @N: The index of the current entry in @prog + * @when_to_branch: What to assign a program entry for its branch condition + * + * The program entry at @N has a target that points to the index of a program + * entry that can have its target and when_to_branch fields updated. + * Update the current program entry denoted by index @N target field to be + * that of the updated entry. This will denote the entry to update if + * we are processing an "||" after an "&&" + */ +static void update_preds(struct prog_entry *prog, int N, int invert) +{ + int t, s; + + t = prog[N].target; + s = prog[t].target; + prog[t].when_to_branch = invert; + prog[t].target = N; + prog[N].target = s; +} + +struct filter_parse_error { int lasterr; int lasterr_pos; - - struct { - char *string; - unsigned int cnt; - unsigned int tail; - } infix; - - struct { - char string[MAX_FILTER_STR_VAL]; - int pos; - unsigned int tail; - } operand; }; -struct pred_stack { - struct filter_pred **preds; - int index; +static void parse_error(struct filter_parse_error *pe, int err, int pos) +{ + pe->lasterr = err; + pe->lasterr_pos = pos; +} + +typedef int (*parse_pred_fn)(const char *str, void *data, int pos, + struct filter_parse_error *pe, + struct filter_pred **pred); + +enum { + INVERT = 1, + PROCESS_AND = 2, + PROCESS_OR = 4, }; -/* If not of not match is equal to not of not, then it is a match */ +/* + * Without going into a formal proof, this explains the method that is used in + * parsing the logical expressions. + * + * For example, if we have: "a && !(!b || (c && g)) || d || e && !f" + * The first pass will convert it into the following program: + * + * n1: r=a; l1: if (!r) goto l4; + * n2: r=b; l2: if (!r) goto l4; + * n3: r=c; r=!r; l3: if (r) goto l4; + * n4: r=g; r=!r; l4: if (r) goto l5; + * n5: r=d; l5: if (r) goto T + * n6: r=e; l6: if (!r) goto l7; + * n7: r=f; r=!r; l7: if (!r) goto F + * T: return TRUE + * F: return FALSE + * + * To do this, we use a data structure to represent each of the above + * predicate and conditions that has: + * + * predicate, when_to_branch, invert, target + * + * The "predicate" will hold the function to determine the result "r". + * The "when_to_branch" denotes what "r" should be if a branch is to be taken + * "&&" would contain "!r" or (0) and "||" would contain "r" or (1). + * The "invert" holds whether the value should be reversed before testing. + * The "target" contains the label "l#" to jump to. + * + * A stack is created to hold values when parentheses are used. + * + * To simplify the logic, the labels will start at 0 and not 1. + * + * The possible invert values are 1 and 0. The number of "!"s that are in scope + * before the predicate determines the invert value, if the number is odd then + * the invert value is 1 and 0 otherwise. This means the invert value only + * needs to be toggled when a new "!" is introduced compared to what is stored + * on the stack, where parentheses were used. + * + * The top of the stack and "invert" are initialized to zero. + * + * ** FIRST PASS ** + * + * #1 A loop through all the tokens is done: + * + * #2 If the token is an "(", the stack is push, and the current stack value + * gets the current invert value, and the loop continues to the next token. + * The top of the stack saves the "invert" value to keep track of what + * the current inversion is. As "!(a && !b || c)" would require all + * predicates being affected separately by the "!" before the parentheses. + * And that would end up being equivalent to "(!a || b) && !c" + * + * #3 If the token is an "!", the current "invert" value gets inverted, and + * the loop continues. Note, if the next token is a predicate, then + * this "invert" value is only valid for the current program entry, + * and does not affect other predicates later on. + * + * The only other acceptable token is the predicate string. + * + * #4 A new entry into the program is added saving: the predicate and the + * current value of "invert". The target is currently assigned to the + * previous program index (this will not be its final value). + * + * #5 We now enter another loop and look at the next token. The only valid + * tokens are ")", "&&", "||" or end of the input string "\0". + * + * #6 The invert variable is reset to the current value saved on the top of + * the stack. + * + * #7 The top of the stack holds not only the current invert value, but also + * if a "&&" or "||" needs to be processed. Note, the "&&" takes higher + * precedence than "||". That is "a && b || c && d" is equivalent to + * "(a && b) || (c && d)". Thus the first thing to do is to see if "&&" needs + * to be processed. This is the case if an "&&" was the last token. If it was + * then we call update_preds(). This takes the program, the current index in + * the program, and the current value of "invert". More will be described + * below about this function. + * + * #8 If the next token is "&&" then we set a flag in the top of the stack + * that denotes that "&&" needs to be processed, break out of this loop + * and continue with the outer loop. + * + * #9 Otherwise, if a "||" needs to be processed then update_preds() is called. + * This is called with the program, the current index in the program, but + * this time with an inverted value of "invert" (that is !invert). This is + * because the value taken will become the "when_to_branch" value of the + * program. + * Note, this is called when the next token is not an "&&". As stated before, + * "&&" takes higher precedence, and "||" should not be processed yet if the + * next logical operation is "&&". + * + * #10 If the next token is "||" then we set a flag in the top of the stack + * that denotes that "||" needs to be processed, break out of this loop + * and continue with the outer loop. + * + * #11 If this is the end of the input string "\0" then we break out of both + * loops. + * + * #12 Otherwise, the next token is ")", where we pop the stack and continue + * this inner loop. + * + * Now to discuss the update_pred() function, as that is key to the setting up + * of the program. Remember the "target" of the program is initialized to the + * previous index and not the "l" label. The target holds the index into the + * program that gets affected by the operand. Thus if we have something like + * "a || b && c", when we process "a" the target will be "-1" (undefined). + * When we process "b", its target is "0", which is the index of "a", as that's + * the predicate that is affected by "||". But because the next token after "b" + * is "&&" we don't call update_preds(). Instead continue to "c". As the + * next token after "c" is not "&&" but the end of input, we first process the + * "&&" by calling update_preds() for the "&&" then we process the "||" by + * callin updates_preds() with the values for processing "||". + * + * What does that mean? What update_preds() does is to first save the "target" + * of the program entry indexed by the current program entry's "target" + * (remember the "target" is initialized to previous program entry), and then + * sets that "target" to the current index which represents the label "l#". + * That entry's "when_to_branch" is set to the value passed in (the "invert" + * or "!invert"). Then it sets the current program entry's target to the saved + * "target" value (the old value of the program that had its "target" updated + * to the label). + * + * Looking back at "a || b && c", we have the following steps: + * "a" - prog[0] = { "a", X, -1 } // pred, when_to_branch, target + * "||" - flag that we need to process "||"; continue outer loop + * "b" - prog[1] = { "b", X, 0 } + * "&&" - flag that we need to process "&&"; continue outer loop + * (Notice we did not process "||") + * "c" - prog[2] = { "c", X, 1 } + * update_preds(prog, 2, 0); // invert = 0 as we are processing "&&" + * t = prog[2].target; // t = 1 + * s = prog[t].target; // s = 0 + * prog[t].target = 2; // Set target to "l2" + * prog[t].when_to_branch = 0; + * prog[2].target = s; + * update_preds(prog, 2, 1); // invert = 1 as we are now processing "||" + * t = prog[2].target; // t = 0 + * s = prog[t].target; // s = -1 + * prog[t].target = 2; // Set target to "l2" + * prog[t].when_to_branch = 1; + * prog[2].target = s; + * + * #13 Which brings us to the final step of the first pass, which is to set + * the last program entry's when_to_branch and target, which will be + * when_to_branch = 0; target = N; ( the label after the program entry after + * the last program entry processed above). + * + * If we denote "TRUE" to be the entry after the last program entry processed, + * and "FALSE" the program entry after that, we are now done with the first + * pass. + * + * Making the above "a || b && c" have a progam of: + * prog[0] = { "a", 1, 2 } + * prog[1] = { "b", 0, 2 } + * prog[2] = { "c", 0, 3 } + * + * Which translates into: + * n0: r = a; l0: if (r) goto l2; + * n1: r = b; l1: if (!r) goto l2; + * n2: r = c; l2: if (!r) goto l3; // Which is the same as "goto F;" + * T: return TRUE; l3: + * F: return FALSE + * + * Although, after the first pass, the program is correct, it is + * inefficient. The simple sample of "a || b && c" could be easily been + * converted into: + * n0: r = a; if (r) goto T + * n1: r = b; if (!r) goto F + * n2: r = c; if (!r) goto F + * T: return TRUE; + * F: return FALSE; + * + * The First Pass is over the input string. The next too passes are over + * the program itself. + * + * ** SECOND PASS ** + * + * Which brings us to the second pass. If a jump to a label has the + * same condition as that label, it can instead jump to its target. + * The original example of "a && !(!b || (c && g)) || d || e && !f" + * where the first pass gives us: + * + * n1: r=a; l1: if (!r) goto l4; + * n2: r=b; l2: if (!r) goto l4; + * n3: r=c; r=!r; l3: if (r) goto l4; + * n4: r=g; r=!r; l4: if (r) goto l5; + * n5: r=d; l5: if (r) goto T + * n6: r=e; l6: if (!r) goto l7; + * n7: r=f; r=!r; l7: if (!r) goto F: + * T: return TRUE; + * F: return FALSE + * + * We can see that "l3: if (r) goto l4;" and at l4, we have "if (r) goto l5;". + * And "l5: if (r) goto T", we could optimize this by converting l3 and l4 + * to go directly to T. To accomplish this, we start from the last + * entry in the program and work our way back. If the target of the entry + * has the same "when_to_branch" then we could use that entry's target. + * Doing this, the above would end up as: + * + * n1: r=a; l1: if (!r) goto l4; + * n2: r=b; l2: if (!r) goto l4; + * n3: r=c; r=!r; l3: if (r) goto T; + * n4: r=g; r=!r; l4: if (r) goto T; + * n5: r=d; l5: if (r) goto T; + * n6: r=e; l6: if (!r) goto F; + * n7: r=f; r=!r; l7: if (!r) goto F; + * T: return TRUE + * F: return FALSE + * + * In that same pass, if the "when_to_branch" doesn't match, we can simply + * go to the program entry after the label. That is, "l2: if (!r) goto l4;" + * where "l4: if (r) goto T;", then we can convert l2 to be: + * "l2: if (!r) goto n5;". + * + * This will have the second pass give us: + * n1: r=a; l1: if (!r) goto n5; + * n2: r=b; l2: if (!r) goto n5; + * n3: r=c; r=!r; l3: if (r) goto T; + * n4: r=g; r=!r; l4: if (r) goto T; + * n5: r=d; l5: if (r) goto T + * n6: r=e; l6: if (!r) goto F; + * n7: r=f; r=!r; l7: if (!r) goto F + * T: return TRUE + * F: return FALSE + * + * Notice, all the "l#" labels are no longer used, and they can now + * be discarded. + * + * ** THIRD PASS ** + * + * For the third pass we deal with the inverts. As they simply just + * make the "when_to_branch" get inverted, a simple loop over the + * program to that does: "when_to_branch ^= invert;" will do the + * job, leaving us with: + * n1: r=a; if (!r) goto n5; + * n2: r=b; if (!r) goto n5; + * n3: r=c: if (!r) goto T; + * n4: r=g; if (!r) goto T; + * n5: r=d; if (r) goto T + * n6: r=e; if (!r) goto F; + * n7: r=f; if (r) goto F + * T: return TRUE + * F: return FALSE + * + * As "r = a; if (!r) goto n5;" is obviously the same as + * "if (!a) goto n5;" without doing anything we can interperate the + * program as: + * n1: if (!a) goto n5; + * n2: if (!b) goto n5; + * n3: if (!c) goto T; + * n4: if (!g) goto T; + * n5: if (d) goto T + * n6: if (!e) goto F; + * n7: if (f) goto F + * T: return TRUE + * F: return FALSE + * + * Since the inverts are discarded at the end, there's no reason to store + * them in the program array (and waste memory). A separate array to hold + * the inverts is used and freed at the end. + */ +static struct prog_entry * +predicate_parse(const char *str, int nr_parens, int nr_preds, + parse_pred_fn parse_pred, void *data, + struct filter_parse_error *pe) +{ + struct prog_entry *prog_stack; + struct prog_entry *prog; + const char *ptr = str; + char *inverts = NULL; + int *op_stack; + int *top; + int invert = 0; + int ret = -ENOMEM; + int len; + int N = 0; + int i; + + nr_preds += 2; /* For TRUE and FALSE */ + + op_stack = kmalloc(sizeof(*op_stack) * nr_parens, GFP_KERNEL); + if (!op_stack) + return ERR_PTR(-ENOMEM); + prog_stack = kmalloc(sizeof(*prog_stack) * nr_preds, GFP_KERNEL); + if (!prog_stack) { + parse_error(pe, -ENOMEM, 0); + goto out_free; + } + inverts = kmalloc(sizeof(*inverts) * nr_preds, GFP_KERNEL); + if (!inverts) { + parse_error(pe, -ENOMEM, 0); + goto out_free; + } + + top = op_stack; + prog = prog_stack; + *top = 0; + + /* First pass */ + while (*ptr) { /* #1 */ + const char *next = ptr++; + + if (isspace(*next)) + continue; + + switch (*next) { + case '(': /* #2 */ + if (top - op_stack > nr_parens) + return ERR_PTR(-EINVAL); + *(++top) = invert; + continue; + case '!': /* #3 */ + if (!is_not(next)) + break; + invert = !invert; + continue; + } + + if (N >= nr_preds) { + parse_error(pe, FILT_ERR_TOO_MANY_PREDS, next - str); + goto out_free; + } + + inverts[N] = invert; /* #4 */ + prog[N].target = N-1; + + len = parse_pred(next, data, ptr - str, pe, &prog[N].pred); + if (len < 0) { + ret = len; + goto out_free; + } + ptr = next + len; + + N++; + + ret = -1; + while (1) { /* #5 */ + next = ptr++; + if (isspace(*next)) + continue; + + switch (*next) { + case ')': + case '\0': + break; + case '&': + case '|': + if (next[1] == next[0]) { + ptr++; + break; + } + default: + parse_error(pe, FILT_ERR_TOO_MANY_PREDS, + next - str); + goto out_free; + } + + invert = *top & INVERT; + + if (*top & PROCESS_AND) { /* #7 */ + update_preds(prog, N - 1, invert); + *top &= ~PROCESS_AND; + } + if (*next == '&') { /* #8 */ + *top |= PROCESS_AND; + break; + } + if (*top & PROCESS_OR) { /* #9 */ + update_preds(prog, N - 1, !invert); + *top &= ~PROCESS_OR; + } + if (*next == '|') { /* #10 */ + *top |= PROCESS_OR; + break; + } + if (!*next) /* #11 */ + goto out; + + if (top == op_stack) { + ret = -1; + /* Too few '(' */ + parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, ptr - str); + goto out_free; + } + top--; /* #12 */ + } + } + out: + if (top != op_stack) { + /* Too many '(' */ + parse_error(pe, FILT_ERR_TOO_MANY_OPEN, ptr - str); + goto out_free; + } + + prog[N].pred = NULL; /* #13 */ + prog[N].target = 1; /* TRUE */ + prog[N+1].pred = NULL; + prog[N+1].target = 0; /* FALSE */ + prog[N-1].target = N; + prog[N-1].when_to_branch = false; + + /* Second Pass */ + for (i = N-1 ; i--; ) { + int target = prog[i].target; + if (prog[i].when_to_branch == prog[target].when_to_branch) + prog[i].target = prog[target].target; + } + + /* Third Pass */ + for (i = 0; i < N; i++) { + invert = inverts[i] ^ prog[i].when_to_branch; + prog[i].when_to_branch = invert; + /* Make sure the program always moves forward */ + if (WARN_ON(prog[i].target <= i)) { + ret = -EINVAL; + goto out_free; + } + } + + return prog; +out_free: + kfree(op_stack); + kfree(prog_stack); + kfree(inverts); + return ERR_PTR(ret); +} + #define DEFINE_COMPARISON_PRED(type) \ static int filter_pred_LT_##type(struct filter_pred *pred, void *event) \ { \ type *addr = (type *)(event + pred->offset); \ type val = (type)pred->val; \ - int match = (*addr < val); \ - return !!match == !pred->not; \ + return *addr < val; \ } \ static int filter_pred_LE_##type(struct filter_pred *pred, void *event) \ { \ type *addr = (type *)(event + pred->offset); \ type val = (type)pred->val; \ - int match = (*addr <= val); \ - return !!match == !pred->not; \ + return *addr <= val; \ } \ static int filter_pred_GT_##type(struct filter_pred *pred, void *event) \ { \ type *addr = (type *)(event + pred->offset); \ type val = (type)pred->val; \ - int match = (*addr > val); \ - return !!match == !pred->not; \ + return *addr > val; \ } \ static int filter_pred_GE_##type(struct filter_pred *pred, void *event) \ { \ type *addr = (type *)(event + pred->offset); \ type val = (type)pred->val; \ - int match = (*addr >= val); \ - return !!match == !pred->not; \ + return *addr >= val; \ } \ static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \ { \ type *addr = (type *)(event + pred->offset); \ type val = (type)pred->val; \ - int match = !!(*addr & val); \ - return match == !pred->not; \ + return !!(*addr & val); \ } \ static const filter_pred_fn_t pred_funcs_##type[] = { \ - filter_pred_LT_##type, \ filter_pred_LE_##type, \ - filter_pred_GT_##type, \ + filter_pred_LT_##type, \ filter_pred_GE_##type, \ + filter_pred_GT_##type, \ filter_pred_BAND_##type, \ }; @@ -261,44 +704,36 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event) static int filter_pred_cpu(struct filter_pred *pred, void *event) { int cpu, cmp; - int match = 0; cpu = raw_smp_processor_id(); cmp = pred->val; switch (pred->op) { case OP_EQ: - match = cpu == cmp; - break; + return cpu == cmp; + case OP_NE: + return cpu != cmp; case OP_LT: - match = cpu < cmp; - break; + return cpu < cmp; case OP_LE: - match = cpu <= cmp; - break; + return cpu <= cmp; case OP_GT: - match = cpu > cmp; - break; + return cpu > cmp; case OP_GE: - match = cpu >= cmp; - break; + return cpu >= cmp; default: - break; + return 0; } - - return !!match == !pred->not; } /* Filter predicate for COMM. */ static int filter_pred_comm(struct filter_pred *pred, void *event) { - int cmp, match; + int cmp; cmp = pred->regex.match(current->comm, &pred->regex, - pred->regex.field_len); - match = cmp ^ pred->not; - - return match; + TASK_COMM_LEN); + return cmp ^ pred->not; } static int filter_pred_none(struct filter_pred *pred, void *event) @@ -355,6 +790,7 @@ static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused) return 1; return 0; } + /** * filter_parse_regex - parse a basic regex * @buff: the raw regex @@ -415,10 +851,9 @@ static void filter_build_regex(struct filter_pred *pred) struct regex *r = &pred->regex; char *search; enum regex_type type = MATCH_FULL; - int not = 0; if (pred->op == OP_GLOB) { - type = filter_parse_regex(r->pattern, r->len, &search, ¬); + type = filter_parse_regex(r->pattern, r->len, &search, &pred->not); r->len = strlen(search); memmove(r->pattern, search, r->len+1); } @@ -440,210 +875,32 @@ static void filter_build_regex(struct filter_pred *pred) r->match = regex_match_glob; break; } - - pred->not ^= not; -} - -enum move_type { - MOVE_DOWN, - MOVE_UP_FROM_LEFT, - MOVE_UP_FROM_RIGHT -}; - -static struct filter_pred * -get_pred_parent(struct filter_pred *pred, struct filter_pred *preds, - int index, enum move_type *move) -{ - if (pred->parent & FILTER_PRED_IS_RIGHT) - *move = MOVE_UP_FROM_RIGHT; - else - *move = MOVE_UP_FROM_LEFT; - pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT]; - - return pred; -} - -enum walk_return { - WALK_PRED_ABORT, - WALK_PRED_PARENT, - WALK_PRED_DEFAULT, -}; - -typedef int (*filter_pred_walkcb_t) (enum move_type move, - struct filter_pred *pred, - int *err, void *data); - -static int walk_pred_tree(struct filter_pred *preds, - struct filter_pred *root, - filter_pred_walkcb_t cb, void *data) -{ - struct filter_pred *pred = root; - enum move_type move = MOVE_DOWN; - int done = 0; - - if (!preds) - return -EINVAL; - - do { - int err = 0, ret; - - ret = cb(move, pred, &err, data); - if (ret == WALK_PRED_ABORT) - return err; - if (ret == WALK_PRED_PARENT) - goto get_parent; - - switch (move) { - case MOVE_DOWN: - if (pred->left != FILTER_PRED_INVALID) { - pred = &preds[pred->left]; - continue; - } - goto get_parent; - case MOVE_UP_FROM_LEFT: - pred = &preds[pred->right]; - move = MOVE_DOWN; - continue; - case MOVE_UP_FROM_RIGHT: - get_parent: - if (pred == root) - break; - pred = get_pred_parent(pred, preds, - pred->parent, - &move); - continue; - } - done = 1; - } while (!done); - - /* We are fine. */ - return 0; -} - -/* - * A series of AND or ORs where found together. Instead of - * climbing up and down the tree branches, an array of the - * ops were made in order of checks. We can just move across - * the array and short circuit if needed. - */ -static int process_ops(struct filter_pred *preds, - struct filter_pred *op, void *rec) -{ - struct filter_pred *pred; - int match = 0; - int type; - int i; - - /* - * Micro-optimization: We set type to true if op - * is an OR and false otherwise (AND). Then we - * just need to test if the match is equal to - * the type, and if it is, we can short circuit the - * rest of the checks: - * - * if ((match && op->op == OP_OR) || - * (!match && op->op == OP_AND)) - * return match; - */ - type = op->op == OP_OR; - - for (i = 0; i < op->val; i++) { - pred = &preds[op->ops[i]]; - if (!WARN_ON_ONCE(!pred->fn)) - match = pred->fn(pred, rec); - if (!!match == type) - break; - } - /* If not of not match is equal to not of not, then it is a match */ - return !!match == !op->not; -} - -struct filter_match_preds_data { - struct filter_pred *preds; - int match; - void *rec; -}; - -static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred, - int *err, void *data) -{ - struct filter_match_preds_data *d = data; - - *err = 0; - switch (move) { - case MOVE_DOWN: - /* only AND and OR have children */ - if (pred->left != FILTER_PRED_INVALID) { - /* If ops is set, then it was folded. */ - if (!pred->ops) - return WALK_PRED_DEFAULT; - /* We can treat folded ops as a leaf node */ - d->match = process_ops(d->preds, pred, d->rec); - } else { - if (!WARN_ON_ONCE(!pred->fn)) - d->match = pred->fn(pred, d->rec); - } - - return WALK_PRED_PARENT; - case MOVE_UP_FROM_LEFT: - /* - * Check for short circuits. - * - * Optimization: !!match == (pred->op == OP_OR) - * is the same as: - * if ((match && pred->op == OP_OR) || - * (!match && pred->op == OP_AND)) - */ - if (!!d->match == (pred->op == OP_OR)) - return WALK_PRED_PARENT; - break; - case MOVE_UP_FROM_RIGHT: - break; - } - - return WALK_PRED_DEFAULT; } /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct event_filter *filter, void *rec) { - struct filter_pred *preds; - struct filter_pred *root; - struct filter_match_preds_data data = { - /* match is currently meaningless */ - .match = -1, - .rec = rec, - }; - int n_preds, ret; + struct prog_entry *prog; + int i; /* no filter is considered a match */ if (!filter) return 1; - n_preds = filter->n_preds; - if (!n_preds) - return 1; - - /* - * n_preds, root and filter->preds are protect with preemption disabled. - */ - root = rcu_dereference_sched(filter->root); - if (!root) + prog = rcu_dereference_sched(filter->prog); + if (!prog) return 1; - data.preds = preds = rcu_dereference_sched(filter->preds); - ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data); - WARN_ON(ret); - return data.match; + for (i = 0; prog[i].pred; i++) { + struct filter_pred *pred = prog[i].pred; + int match = pred->fn(pred, rec); + if (match == prog[i].when_to_branch) + i = prog[i].target; + } + return prog[i].target; } EXPORT_SYMBOL_GPL(filter_match_preds); -static void parse_error(struct filter_parse_state *ps, int err, int pos) -{ - ps->lasterr = err; - ps->lasterr_pos = pos; -} - static void remove_filter_string(struct event_filter *filter) { if (!filter) @@ -653,11 +910,11 @@ static void remove_filter_string(struct event_filter *filter) filter->filter_string = NULL; } -static void append_filter_err(struct filter_parse_state *ps, +static void append_filter_err(struct filter_parse_error *pe, struct event_filter *filter) { struct trace_seq *s; - int pos = ps->lasterr_pos; + int pos = pe->lasterr_pos; char *buf; int len; @@ -671,11 +928,19 @@ static void append_filter_err(struct filter_parse_state *ps, len = strlen(filter->filter_string); if (pos > len) - len = pos; + pos = len; + + /* indexing is off by one */ + if (pos) + pos++; trace_seq_puts(s, filter->filter_string); - trace_seq_printf(s, "\n%*s", pos, "^"); - trace_seq_printf(s, "\nparse_error: %s\n", err_text[ps->lasterr]); + if (pe->lasterr > 0) { + trace_seq_printf(s, "\n%*s", pos, "^"); + trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]); + } else { + trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr); + } trace_seq_putc(s, 0); buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL); if (buf) { @@ -715,108 +980,18 @@ void print_subsystem_event_filter(struct event_subsystem *system, mutex_unlock(&event_mutex); } -static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) -{ - stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL); - if (!stack->preds) - return -ENOMEM; - stack->index = n_preds; - return 0; -} - -static void __free_pred_stack(struct pred_stack *stack) -{ - kfree(stack->preds); - stack->index = 0; -} - -static int __push_pred_stack(struct pred_stack *stack, - struct filter_pred *pred) -{ - int index = stack->index; - - if (WARN_ON(index == 0)) - return -ENOSPC; - - stack->preds[--index] = pred; - stack->index = index; - return 0; -} - -static struct filter_pred * -__pop_pred_stack(struct pred_stack *stack) -{ - struct filter_pred *pred; - int index = stack->index; - - pred = stack->preds[index++]; - if (!pred) - return NULL; - - stack->index = index; - return pred; -} - -static int filter_set_pred(struct event_filter *filter, - int idx, - struct pred_stack *stack, - struct filter_pred *src) -{ - struct filter_pred *dest = &filter->preds[idx]; - struct filter_pred *left; - struct filter_pred *right; - - *dest = *src; - dest->index = idx; - - if (dest->op == OP_OR || dest->op == OP_AND) { - right = __pop_pred_stack(stack); |