summaryrefslogtreecommitdiffstats
path: root/peekaboo/ruleset/expressions.py
diff options
context:
space:
mode:
Diffstat (limited to 'peekaboo/ruleset/expressions.py')
-rw-r--r--peekaboo/ruleset/expressions.py515
1 files changed, 515 insertions, 0 deletions
diff --git a/peekaboo/ruleset/expressions.py b/peekaboo/ruleset/expressions.py
new file mode 100644
index 0000000..e4fe0ad
--- /dev/null
+++ b/peekaboo/ruleset/expressions.py
@@ -0,0 +1,515 @@
+###############################################################################
+# #
+# Peekaboo Extended Email Attachment Behavior Observation Owl #
+# #
+# ruleset/ #
+# expressions.py #
+###############################################################################
+# #
+# Copyright (C) 2016-2019 science + computing ag #
+# Based on pyparsing's eval_arith.py.
+# Copyright 2009, 2011 Paul McGuire
+# #
+# This program is free software: you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License as published by #
+# the Free Software Foundation, either version 3 of the License, or (at #
+# your option) any later version. #
+# #
+# This program is distributed in the hope that it will be useful, but #
+# WITHOUT ANY WARRANTY; without even the implied warranty of #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #
+# General Public License for more details. #
+# #
+# You should have received a copy of the GNU General Public License #
+# along with this program. If not, see <http://www.gnu.org/licenses/>. #
+# #
+###############################################################################
+
+""" A simple expression grammar used for writing generic rules. """
+
+from future.builtins import super
+
+import logging
+import operator
+import re
+from pyparsing import nums, alphas, alphanums, Word, Combine, Suppress, \
+ oneOf, opAssoc, infixNotation, Literal, Keyword, Group, \
+ delimitedList, QuotedString, ParserElement, ParseException
+from peekaboo.ruleset import Result
+
+
+logger = logging.getLogger(__name__)
+
+
+class EvalBase(object):
+ """ Base class of evaluatable objects providing common infrastructure. """
+ def __init__(self, tokens):
+ """ Just store the tokens for later evaluation. Expects all relevant
+ tokens to be grouped together in the first element of the token list
+ passed. This is the default for operand+operator+operand+... constructs
+ with infixNotation and can be forced for others using Group():
+
+ rvallist = Group(Suppress('[') + delimitedList(rval) + Suppress(']'))
+ """
+ self.value = self.token = tokens[0]
+ self.context = None
+ self.convert()
+ self.string_repr_format = "(%s)"
+
+ def convert(self):
+ """ Method to (optionally) convert the input token(s) into something
+ else. Particularly used for conversion to base types. """
+ self.value = self.token
+
+ def feedback(self, info):
+ """ Accept and process feedback evaluation children. """
+ # by default propagate feedback upwards if we have a context
+ if self.context and 'parent' in self.context:
+ self.context['parent'].feedback(info)
+
+ def subeval(self, expression, update=None):
+ """ Evaluate a subexpression with an updated evaluation context
+ containing common metadata such as that we're it's parent and optional
+ additional data. """
+ context = self.context.copy()
+ context['parent'] = self
+ if update:
+ context.update(update)
+ return expression.eval(context)
+
+ def set_context(self, context):
+ """ Save an evaluation context internally for later use by e.g.
+ feedback(). """
+ self.context = context
+
+ def eval(self, context):
+ """ Evaluate the object content against a context. Just return the
+ stored (and optionally converted) value by default and remember our
+ context for possible feedback to our parent or from our children. """
+ self.set_context(context)
+ return self.value
+
+ def __str__(self):
+ return self.string_repr_format % (
+ " ".join(["%s" % x for x in self.token]))
+
+
+class EvalBoolean(EvalBase):
+ """ Class to evaluate a parsed boolean constant """
+ def convert(self):
+ logger.debug("Boolean: %s", self.value)
+ self.value = self.token == "True"
+
+
+class EvalInteger(EvalBase):
+ """ Class to evaluate a parsed integer constant """
+ def convert(self):
+ logger.debug("Integer: %s", self.token)
+ self.value = int(self.token)
+
+
+class EvalReal(EvalBase):
+ """ Class to evaluate a parsed real constant """
+ def convert(self):
+ logger.debug("Real: %s", self.token)
+ self.value = float(self.token)
+
+
+class EvalString(EvalBase):
+ """ Class to evaluate a parsed string constant """
+ def convert(self):
+ logger.debug("String: %s", self.token)
+ self.value = self.token
+
+
+class OperatorRegex(object):
+ """ A class implementing operators on regular expressions. """
+ def __init__(self, string):
+ self.regex = re.compile(string)
+
+ @staticmethod
+ def compare_op_impl(function, other):
+ """ Implement handling of iterable operands. """
+ if isinstance(other, (list, set)):
+ for val in other:
+ logger.debug("Regular expression match: %s == %s",
+ function, val)
+ if function(val):
+ return True
+ return False
+
+ return function(other)
+
+ def __eq__(self, other):
+ """ Implement equality using re.match """
+ logger.debug("Regular expression match: %s == %s", self.regex, other)
+ return self.compare_op_impl(self.regex.match, other)
+
+ def __contains__(self, other):
+ """ Implement membership using re.search """
+ logger.debug("Regular expression search: %s in %s", self.regex, other)
+ return self.compare_op_impl(self.regex.search, other)
+
+
+class EvalRegex(EvalBase):
+ """ Class to evaluate a regular expression """
+ def convert(self):
+ logger.debug("Regular expression: %s", self.token)
+ self.value = OperatorRegex(self.token)
+
+ def eval(self, context):
+ self.set_context(context)
+ self.feedback({'regex_parsed': True})
+ return self.value
+
+
+class RegexIterableMixIn(object):
+ """ Common functionality for lists and sets containing regular expressions
+ with different behaviour of membership operators. """
+ def __eq__(self, other):
+ if not isinstance(other, (list, set)):
+ other = [other]
+
+ # in contrast to normal lists, a list of regexes compared to a list
+ # of strings is considered equal if any regex matches any string
+ for regex in self:
+ logger.debug("Eval regex: %s == %s", regex, other)
+ if regex == other:
+ return True
+
+ return False
+
+ def __contains__(self, item):
+ for regex in self:
+ logger.debug("Eval regex: %s in %s", regex, item)
+ # we implement "regex in string" of our grammar as "string in
+ # regex" so that our overridden operator
+ # regex.__contains__(string) is called and searching can be
+ # done. Otherwise error "TypeError: 'in <string>' requires
+ # string as left operand, not OperatorRegex" would ensue.
+ if item in regex:
+ return True
+
+ return False
+
+
+class RegexList(RegexIterableMixIn, list):
+ """ A list containing regular expressions with different behaviour of
+ membership operators. """
+
+
+class RegexSet(RegexIterableMixIn, set):
+ """ A set containing regular expressions with different behaviour of
+ membership operators. """
+
+
+class EvalRegexIterableMixIn(object):
+ """ Common functionality for iterables which may contain regular
+ expressions. """
+ def __init__(self, tokens):
+ super().__init__(tokens)
+ self.contains_regexes = False
+
+ def feedback(self, info):
+ """ Mark this object as containing regular expressions if a child
+ object reports so in its feedback to us. """
+ if 'regex_parsed' in info:
+ self.contains_regexes = True
+ del info['regex_parsed']
+
+ super().feedback(info)
+
+
+class EvalList(EvalRegexIterableMixIn, EvalBase):
+ """ Class to evaluate a parsed list """
+ def __init__(self, token):
+ super().__init__(token)
+ self.string_repr_format = "[%s]"
+
+ def eval(self, context):
+ self.set_context(context)
+ logger.debug("List: %s", self.value)
+ ret = []
+ for val in self.value:
+ ret.append(self.subeval(val))
+ if self.contains_regexes:
+ return RegexList(ret)
+ return ret
+
+
+class EvalSet(EvalRegexIterableMixIn, EvalBase):
+ """ Class to evaluate a parsed list """
+ def __init__(self, token):
+ super().__init__(token)
+ self.string_repr_format = "{%s}"
+
+ def eval(self, context):
+ self.set_context(context)
+ logger.debug("Set: %s", self.value)
+ ret = set()
+ for val in self.value:
+ ret.add(self.subeval(val))
+ if self.contains_regexes:
+ return RegexSet(ret)
+ return ret
+
+
+class IdentifierMissingException(KeyError):
+ pass
+
+
+class EvalIdentifier(EvalBase):
+ """ Class to evaluate a parsed object name """
+ def eval(self, context):
+ logger.debug("Identifier: %s", self.value)
+ if 'member' in context and context['member']:
+ return self.value
+
+ try:
+ return context['variables'][self.value]
+ except KeyError as error:
+ raise IdentifierMissingException(error.args[0])
+
+
+class EvalResult(EvalBase):
+ """ Class to evaluate a analysis result """
+ def convert(self):
+ logger.debug("Result: %s", self.token)
+ result_map = {
+ 'fail': Result.failed,
+ 'ignore': Result.ignored,
+ }
+
+ if self.token in result_map:
+ self.value = result_map[self.token]
+ else:
+ self.value = Result[self.token]
+
+
+class EvalModifier(EvalBase):
+ """ Class to evaluate typical single-operand modifier expressions such as
+ explicit sign change, bitwise and logical not. """
+ def __init__(self, tokens):
+ super().__init__(tokens)
+ self.operator, self.value = tokens[0]
+
+ def eval(self, context):
+ self.set_context(context)
+ val = self.subeval(self.value)
+ if self.operator == '+':
+ return val
+ elif self.operator == '-':
+ return -val
+ elif self.operator == '~':
+ return ~val
+ elif self.operator == 'not':
+ return not val
+
+ raise ValueError('Invalid operator %s' % self.operator)
+
+
+class EvalPower(EvalBase):
+ """ Class to evaluate exponentiation expressions """
+ def eval(self, context):
+ self.set_context(context)
+ res = self.subeval(self.value[-1])
+ for val in self.value[-3::-2]:
+ res = self.subeval(val)**res
+
+ return res
+
+
+def operator_operands(tokenlist):
+ """ Generator to extract operators and operands in pairs """
+ iterator = iter(tokenlist)
+ while True:
+ try:
+ yield (next(iterator), next(iterator))
+ except StopIteration:
+ break
+
+
+class EvalArith(EvalBase):
+ """ Class to evaluate typical arithmetic and bitwise operations like
+ addition, multiplication, division and shifts expressions. Operator
+ precedence is handled by the order in which they're evaluated by the
+ parser, i.e. given to infixNotation. """
+ def eval(self, context):
+ self.set_context(context)
+ ret = self.subeval(self.value[0])
+ for op, val in operator_operands(self.value[1:]):
+ if op == '+':
+ ret += self.subeval(val)
+ elif op == '-':
+ ret -= self.subeval(val)
+ elif op == '*':
+ ret *= self.subeval(val)
+ elif op == '/':
+ ret /= self.subeval(val)
+ elif op == '//':
+ ret //= self.subeval(val)
+ elif op == '%':
+ ret %= self.subeval(val)
+ elif op == '<<':
+ ret <<= self.subeval(val)
+ elif op == '>>':
+ ret >>= self.subeval(val)
+ elif op == '.':
+ ret = getattr(ret, self.subeval(val, update={'member': True}))
+ elif op == "->":
+ if ret:
+ ret = self.subeval(val)
+ else:
+ ret = None
+ else:
+ raise ValueError('Invalid operator %s' % op)
+
+ return ret
+
+
+class EvalLogic(EvalBase):
+ """ Class to evaluate comparison expressions """
+ def __init__(self, tokens):
+ super().__init__(tokens)
+ self.operator_map = {
+ "<": operator.lt,
+ "<=": operator.le,
+ ">": operator.gt,
+ ">=": operator.ge,
+ "==": operator.eq,
+ "!=": operator.ne,
+ "in": EvalLogic.in_,
+ "not in": EvalLogic.not_in,
+ "is": operator.is_,
+ "is not": operator.is_not,
+ "isdisjoint": lambda a, b: a.isdisjoint(b),
+ "and": operator.and_,
+ "or": operator.or_,
+ }
+
+ @staticmethod
+ def in_(a, b):
+ """ Literally implement membership test. Make it a static method so we
+ can do identity checks. Do not use operator.contains because it needs
+ operands swapped. """
+ return a in b
+
+ @staticmethod
+ def not_in(a, b):
+ """ Naively implement non-membership test. """
+ return a not in b
+
+ @staticmethod
+ def handle_regexes(function, val1, val2):
+ """ Special handling of equality and membership checks for regular
+ expressions. """
+ if (function in (operator.eq, operator.ne)
+ and isinstance(val2, (OperatorRegex, RegexIterableMixIn))):
+ # swap operands around in case the first does not contain any regex
+ # but the other does to reliably reroute to our overridden __eq__
+ # operator, just do that always to keep checks simple since
+ # (in)equality is commutative anyway
+ val1, val2 = val2, val1
+ elif (function in (EvalLogic.in_, EvalLogic.not_in)
+ and isinstance(val1, (OperatorRegex, RegexIterableMixIn))):
+ # "<regex> in <string>|<list-of-strings>" of our grammar directly
+ # implemented using the "in" operator would call
+ # <string>|<list-of-strings>.__contains__(<regex>) which we cannot
+ # override with reasonable effort. To get a call of
+ # <regex>.__contains__(<string>|<list-of-strings>) we need to
+ # switch operands. Otherwise error "TypeError: 'in <string>'
+ # requires string as left operand, not OperatorRegex" would ensue.
+ val1, val2 = val2, val1
+
+ # nothing special
+ return function(val1, val2)
+
+ def eval(self, context):
+ self.set_context(context)
+ val1 = self.subeval(self.value[0])
+ for op, parseobj in operator_operands(self.value[1:]):
+ val2 = self.subeval(parseobj)
+ logger.debug("Comparison: %s %s %s", val1, op, val2)
+ function = self.operator_map[op]
+ if not self.handle_regexes(function, val1, val2):
+ break
+ val1 = val2
+ else:
+ return True
+
+ return False
+
+
+class ExpressionParser(object):
+ """ Define and run the parser. """
+ def __init__(self):
+ # speed up infixNotation considerably at the price of some cache memory
+ ParserElement.enablePackrat()
+
+ boolean = Keyword('True') | Keyword('False')
+ integer = Word(nums)
+ real = Combine(Word(nums) + "." + Word(nums))
+ string = (QuotedString('"', escChar='\\')
+ | QuotedString("'", escChar='\\'))
+ regex = QuotedString('/', escChar='\\')
+ identifier = Word(alphas, alphanums + '_')
+ dereference = infixNotation(identifier, [
+ (Literal('.'), 2, opAssoc.LEFT, EvalArith),
+ ])
+ result = (Keyword('bad') | Keyword('fail') | Keyword('good')
+ | Keyword('ignore') | Keyword('unknown'))
+ rval = boolean | real | integer | string | regex | result | dereference
+ rvallist = Group(Suppress('[') + delimitedList(rval) + Suppress(']'))
+ rvalset = Group(Suppress('{') + delimitedList(rval) + Suppress('}'))
+ operand = rval | rvallist | rvalset
+
+ # parse actions replace the parsed tokens with an instantiated object
+ # which we can later call into for evaluation of its content
+ boolean.setParseAction(EvalBoolean)
+ integer.setParseAction(EvalInteger)
+ real.setParseAction(EvalReal)
+ string.setParseAction(EvalString)
+ regex.setParseAction(EvalRegex)
+ identifier.setParseAction(EvalIdentifier)
+ result.setParseAction(EvalResult)
+ rvallist.setParseAction(EvalList)
+ rvalset.setParseAction(EvalSet)
+
+ identity_test = Keyword('is') + ~Keyword('not') | Combine(
+ Keyword('is') + Keyword('not'), adjacent=False, joinString=' ')
+ membership_test = Keyword('in') | Combine(
+ Keyword('not') + Keyword('in'), adjacent=False, joinString=' ')
+ comparison_op = oneOf('< <= > >= != == isdisjoint')
+ comparison = identity_test | membership_test | comparison_op
+
+ self.parser = infixNotation(operand, [
+ (Literal('**'), 2, opAssoc.LEFT, EvalPower),
+ (oneOf('+ - ~'), 1, opAssoc.RIGHT, EvalModifier),
+ (oneOf('* / // %'), 2, opAssoc.LEFT, EvalArith),
+ (oneOf('+ -'), 2, opAssoc.LEFT, EvalArith),
+ (oneOf('<< >>'), 2, opAssoc.LEFT, EvalArith),
+ (Literal('&'), 2, opAssoc.LEFT, EvalArith),
+ (Literal('^'), 2, opAssoc.LEFT, EvalArith),
+ (Literal('|'), 2, opAssoc.LEFT, EvalArith),
+ (comparison, 2, opAssoc.LEFT, EvalLogic),
+ (Keyword('not'), 1, opAssoc.RIGHT, EvalModifier),
+ (Keyword('and'), 2, opAssoc.LEFT, EvalLogic),
+ (Keyword('or'), 2, opAssoc.LEFT, EvalLogic),
+ (Keyword('->'), 2, opAssoc.LEFT, EvalArith),
+ ])
+
+ def parse(self, expression):
+ """ Parse an expression and return an object supporting evaluation of
+ that expression against a context. """
+ try:
+ return self.parser.parseString(expression, parseAll=True)[0]
+ except ParseException as parse_error:
+ col = parse_error.col
+ raise SyntaxError(
+ "Expression parse error near character %d: %s>>%s<<%s" % (
+ parse_error.col, expression[0:col], expression[col],
+ expression[col+1:]))
+
+
+if __name__ == '__main__':
+ print(ExpressionParser().parse('foo == (bar - blub)'))