Validate ruleset config

Validate the ruleset configuration at startup to inform the user about misconfiguration and exit immediately instead of giving warnings during seemingly normal operation. This also gives us a chance to pre-compile regexes for more efficient matching later on. We give rules a new method get_config() which retrieves their configuration. This is called for each rule by new method the validate_config() of the ruleset engine to catch errors. This way the layout and extent of configuration is still completely governed by the rule and we can interview it about its happiness with what's provided in the configuration file. As an incidental cleanup, merge class PeekabooRulesetConfig into PeekabooRulesetParser because there's nothing left where it could and would need to help the rules with an abstraction of the config file. Also switch class PeekabooConfig to be a subclass of PeekabooConfigParser so it can (potentially) benefit from the list parsing code there. By moving the special log level and by-default-type getters over there as well we end up with nicely generic config classes that can benefit directly from improvements in the configparser module. Update the test suite to test and use this new functionality. Incidentally, remove the convoluted inheritance-based config testing layout in favour of creating subclasses of the config classes.
author: Michael Weiser <michael.weiser@gmx.de> 2019-04-18 08:57:15 +0000
committer: Michael Weiser <michael.weiser@gmx.de> 2019-04-25 12:20:20 +0000
commit: 77d2ff1348a6f1ade05b54b297fc771abd0e14cd (patch)
tree: e7914e3ad89f36b65aced27f53132244f894832f
parent: e6c44a8ca3c2216904731e4d889e53473691c0dc (diff)
10 files changed, 586 insertions, 432 deletions
diff --git a/peekaboo/config.py b/peekaboo/config.py
index 9215011..51a4870 100644
--- a/peekaboo/config.py
+++ b/peekaboo/config.py
@@ -26,6 +26,7 @@
 defaults as well as reading a configuration file. """
 
 
+import re
 import sys
 import logging
 import configparser
@@ -55,8 +56,160 @@ class PeekabooConfigParser( # pylint: disable=too-many-ancestors
                 'Configuration file "%s" can not be parsed: %s' %
                 (config_file, cperror))
 
+        self.lists = {}
+        self.relists = {}
+
+    def getlist(self, section, option, raw=False, vars=None, fallback=None):
+        """ Special getter where multiple options in the config file
+        distinguished by a .<no> suffix form a list. Matches the signature for
+        configparser getters. """
+        # cache results because the following is somewhat inefficient
+        if section not in self.lists:
+            self.lists[section] = {}
+
+        if option in self.lists[section]:
+            return self.lists[section][option]
+
+        if section not in self:
+            self.lists[section][option] = fallback
+            return fallback
+
+        # Go over all options in this section we want to allow "holes" in
+        # the lists, i.e setting.1, setting.2 but no setting.3 followed by
+        # setting.4. We use here that ConfigParser retains option order from
+        # the file.
+        value = []
+        for setting in self[section]:
+            if not setting.startswith(option):
+                continue
+
+            # Parse 'setting' into (key) and 'setting.subscript' into
+            # (key, subscript) and use it to determine if this setting is a
+            # list. Note how we do not use the subscript at all here.
+            name_parts = setting.split('.')
+            key = name_parts[0]
+            is_list = len(name_parts) > 1
+
+            if key != option:
+                continue
+
+            if not is_list:
+                raise PeekabooConfigException(
+                    'Option %s in section %s is supposed to be a list '
+                    'but given as individual setting' % (setting, section))
+
+            # Potential further checks:
+            # - There are no duplicate settings with ConfigParser. The last
+            #   one always wins.
+
+            value.append(self[section].get(setting, raw=raw, vars=vars))
+
+        # it's not gonna get any better on the next call, so cache even the
+        # default
+        if not value:
+            value = fallback
+
+        self.lists[section][option] = value
+        return value
+
+    def getrelist(self, section, option, raw=False, vars=None, fallback=None):
+        """ Special getter for lists of regular expressions. Returns the
+        compiled expression objects in a list ready for matching and searching.
+        """
+        if section not in self.relists:
+            self.relists[section] = {}
+
+        if option in self.relists[section]:
+            return self.relists[section][option]
+
+        if section not in self:
+            self.relists[section][option] = fallback
+            return fallback
+
+        strlist = self[section].getlist(option, raw=raw, vars=vars,
+                                        fallback=fallback)
+        if strlist is None:
+            self.relists[section][option] = None
+            return None
+
+        compiled_res = []
+        for regex in strlist:
+            try:
+                compiled_res.append(re.compile(regex))
+            except (ValueError, TypeError) as error:
+                raise PeekabooConfigException(
+                    'Failed to compile regular expression "%s" (section %s, '
+                    'option %s): %s' % (re, section, option, error))
+
+        # it's not gonna get any better on the next call, so cache even the
+        # default
+        if not compiled_res:
+            compiled_res = fallback
+
+        self.relists[section][option] = compiled_res
+        return compiled_res
+
+    def get_log_level(self, section, option, raw=False, vars=None,
+                      fallback=None):
+        """ Get the log level from the configuration file and parse the string
+        into a logging loglevel such as logging.CRITICAL. Raises config
+        exception if the log level is unknown. Options identical to get(). """
+        levels = {
+            'CRITICAL': logging.CRITICAL,
+            'ERROR': logging.ERROR,
+            'WARNING': logging.WARNING,
+            'INFO': logging.INFO,
+            'DEBUG': logging.DEBUG
+        }
+
+        level = self.get(section, option, raw=raw, vars=vars, fallback=None)
+        if level is None:
+            return fallback
 
-class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes
+        if level not in levels:
+            raise PeekabooConfigException('Unknown log level %s' % level)
+
+        return levels[level]
+
+    def get_by_default_type(self, section, option, fallback=None,
+                            option_type=None):
+        """ Get an option from the configuration file parser. Automatically
+        detects the type from the type of the default if given and calls the
+        right getter method to coerce the value to the correct type.
+
+        @param section: Which section to look for option in.
+        @type section: string
+        @param option: The option to read.
+        @type option: string
+        @param fallback: (optional) Default value to return if option is not
+                         found. Defaults itself to None so that the method will
+                         return None if the option is not found.
+        @type fallback: int, bool, str or None.
+        @param option_type: Override the option type.
+        @type option_type: int, bool, str or None. """
+        if option_type is None and fallback is not None:
+            option_type = type(fallback)
+
+        getter = {
+            int: self.getint,
+            bool: self.getboolean,
+            str: self.get,
+            None: self.get,
+        }
+
+        try:
+            return getter[option_type](section, option)
+        except configparser.NoSectionError:
+            logger.debug('Configuration section %s not found - using '
+                         'default %s', section, fallback)
+        except configparser.NoOptionError:
+            logger.debug('Configuration option %s not found in section '
+                         '%s - using default: %s', option, section, fallback)
+
+        return fallback
+
+
+class PeekabooConfig(PeekabooConfigParser):
     """ This class represents the Peekaboo configuration. """
     def __init__(self, config_file=None, log_level=None):
         """ Initialise the configuration with defaults, overwrite with command
@@ -142,21 +295,21 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes
         # read configuration file. Note that we require a configuration file
         # here. We may change that if we decide that we want to allow the user
         # to run us with the above defaults only.
-        self.__config = PeekabooConfigParser(self.config_file)
+        PeekabooConfigParser.__init__(self, self.config_file)
 
         # overwrite above defaults in our member variables via indirect access
         settings = vars(self)
         for (option, config_names) in config_options.items():
             # maybe use special getter
-            get = self.get
+            getter = self.get_by_default_type
             if len(config_names) == 3:
-                get = config_names[2]
+                getter = config_names[2]
 
             # e.g.:
             # self.log_format = self.get('logging', 'log_format',
             #                            self.log_format)
-            settings[option] = get(config_names[0], config_names[1],
-                                   settings[option])
+            settings[option] = getter(
+                config_names[0], config_names[1], fallback=settings[option])
 
         # Update logging with what we just parsed from the config
         self.setup_logging()
@@ -164,63 +317,6 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes
         # here we could overwrite defaults and config file with additional
         # command line arguments if required
 
-    def get(self, section, option, default=None, option_type=None):
-        """ Get an option from the configuration file parser. Automatically
-        detects the type from the type of the default if given and calls the
-        right getter method to coerce the value to the correct type.
-
-        @param section: Which section to look for option in.
-        @type section: string
-        @param option: The option to read.
-        @type option: string
-        @param default: (optional) Default value to return if option is not
-                        found. Defaults itself to None so that the method will
-                        return None if the option is not found.
-        @type default: int, bool, str or None.
-        @param option_type: Override the option type.
-        @type option_type: int, bool, str or None. """
-        if option_type is None and default is not None:
-            option_type = type(default)
-
-        getter = {
-            int: self.__config.getint,
-            bool: self.__config.getboolean,
-            str: self.__config.get,
-            None: self.__config.get,
-        }
-
-        try:
-            return getter[option_type](section, option)
-        except configparser.NoSectionError:
-            logger.debug('Configuration section %s not found - using '
-                         'default %s', section, default)
-        except configparser.NoOptionError:
-            logger.debug('Configuration option %s not found in section '
-                         '%s - using default: %s', option, section, default)
-
-        return default
-
-    def get_log_level(self, section, option, default=None):
-        """ Get the log level from the configuration file and parse the string
-        into a logging loglevel such as logging.CRITICAL. Raises config
-        exception if the log level is unknown. Options identical to get(). """
-        levels = {
-            'CRITICAL': logging.CRITICAL,
-            'ERROR': logging.ERROR,
-            'WARNING': logging.WARNING,
-            'INFO': logging.INFO,
-            'DEBUG': logging.DEBUG
-        }
-
-        level = self.get(section, option, None)
-        if level is None:
-            return default
-
-        if level not in levels:
-            raise PeekabooConfigException('Unknown log level %s' % level)
-
-        return levels[level]
-
     def setup_logging(self):
         """ Setup logging to console by reconfiguring the root logger so that
         it affects all loggers everywhere.  """
@@ -248,69 +344,3 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes
         return '<PeekabooConfig(%s)>' % settings
 
     __repr__ = __str__
-
-
-class PeekabooRulesetConfig(object):
-    """
-    This class represents the ruleset configuration file "ruleset.conf".
-
-    The ruleset configuration is stored as a dictionary in the form of
-    ruleset_config[rule_name][config_option] = value | [value1, value2, ...]
-
-    @since: 1.6
-    """
-    def __init__(self, config_file):
-        self.config_file = config_file
-        self.ruleset_config = {}
-
-        config = PeekabooConfigParser(self.config_file)
-        sections = config.sections()
-        for section in sections:
-            self.ruleset_config[section] = {}
-
-        for section in sections:
-            for setting in config.options(section):
-                # Parse 'setting' into (key) and 'setting.subscript' into
-                # (key, subscript) and use it to determine if this setting is a
-                # list. Note how we do not use the subscript at all here.
-                name_parts = setting.split('.')
-                key = name_parts[0]
-                is_list = len(name_parts) > 1
-
-                saved_val = self.ruleset_config[section].get(key)
-                if saved_val is None and is_list:
-                    saved_val = []
-
-                # If the setting wants to add to a list the saved or freshly
-                # initialised value from above should be a list. Otherwise it
-                # should of course not be.
-                if is_list != isinstance(saved_val, list):
-                    raise PeekabooConfigException(
-                        'Setting %s in section %s specified as list as well '
-                        'as individual setting' % (setting, section))
-
-                # Potential further checks:
-                # - There are no duplicate settings with ConfigParser. The last
-                #   one always wins.
-
-                if is_list:
-                    saved_val.append(config.get(section, setting))
-                else:
-                    saved_val = config.get(section, setting)
-
-                self.ruleset_config[section][key] = saved_val
-
-    def rule_config(self, rule):
-        """ Get the configuration for a rule.
-
-        @param rule: Name of the rule whose configuration to return.
-        @type rule: string
-        @return: dict of rule configuration settings or None if no
-                 configuration is present. """
-        return self.ruleset_config.get(rule)
-
-    def __str__(self):
-        return '<PeekabooRulesetConfiguration(filepath="%s", %s)>' % \
-            (self.config_file, self.ruleset_config)
-
-    __repr__ = __str__
diff --git a/peekaboo/daemon.py b/peekaboo/daemon.py
index c7fbf5e..13169a4 100644
--- a/peekaboo/daemon.py
+++ b/peekaboo/daemon.py
@@ -38,12 +38,14 @@ from argparse import ArgumentParser
 from sdnotify import SystemdNotifier
 from sqlalchemy.exc import SQLAlchemyError
 from peekaboo import PEEKABOO_OWL, __version__
-from peekaboo.config import PeekabooConfig, PeekabooRulesetConfig
+from peekaboo.config import PeekabooConfig, PeekabooConfigParser
 from peekaboo.db import PeekabooDatabase
 from peekaboo.queuing import JobQueue
+from peekaboo.ruleset.engine import RulesetEngine
 from peekaboo.sample import SampleFactory
 from peekaboo.server import PeekabooServer
-from peekaboo.exceptions import PeekabooDatabaseError, PeekabooConfigException
+from peekaboo.exceptions import PeekabooDatabaseError, \
+        PeekabooConfigException, PeekabooRulesetConfigError
 from peekaboo.toolbox.cuckoo import CuckooEmbed, CuckooApi
 
 
@@ -321,12 +323,22 @@ def run():
     # workers of the job queue need the ruleset configuration to create the
     # ruleset engine with it
     try:
-        ruleset_config = PeekabooRulesetConfig(config.ruleset_config)
-        logger.debug(ruleset_config)
+        ruleset_config = PeekabooConfigParser(config.ruleset_config)
     except PeekabooConfigException as error:
         logging.critical(error)
         sys.exit(1)
 
+    # verify the ruleset configuration by spawning a ruleset engine and having
+    # it verify it
+    try:
+        engine = RulesetEngine(ruleset_config, db_con)
+    except (KeyError, ValueError, PeekabooConfigException) as error:
+        logging.critical('Ruleset configuration error: %s', error)
+        sys.exit(1)
+    except PeekabooRulesetConfigError as error:
+        logging.critical(error)
+        sys.exit(1)
+
     job_queue = JobQueue(
         worker_count=config.worker_count, ruleset_config=ruleset_config,
         db_con=db_con,
diff --git a/peekaboo/exceptions.py b/peekaboo/exceptions.py
index 1ee8097..298cd21 100644
--- a/peekaboo/exceptions.py
+++ b/peekaboo/exceptions.py
@@ -40,6 +40,11 @@ class PeekabooRulesetException(PeekabooException):
     pass
 
 
+class PeekabooRulesetConfigError(PeekabooException):
+    """ Used to signal that a rule is unhappy with its configuration. """
+    pass
+
+
 class PeekabooAnalysisDeferred(PeekabooRulesetException):
     """ Analysis has been deferred to a later point in time.
 
diff --git a/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo b/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo
index 6cca488..c89b24b 100644
--- a/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo
+++ b/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo
diff --git a/peekaboo/locale/de/LC_MESSAGES/peekaboo.po b/peekaboo/locale/de/LC_MESSAGES/peekaboo.po
index 3a2be9f..d82f699 100644
--- a/peekaboo/locale/de/LC_MESSAGES/peekaboo.po
+++ b/peekaboo/locale/de/LC_MESSAGES/peekaboo.po
@@ -6,7 +6,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: PeekabooAV 1.6.2\n"
 "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
-"POT-Creation-Date: 2019-04-17 09:12+0000\n"
+"POT-Creation-Date: 2019-04-17 09:26+0000\n"
 "PO-Revision-Date: 2019-02-14 22:02+0000\n"
 "Last-Translator: Michael Weiser <michael.weiser@gmx.de>\n"
 "Language: de\n"
@@ -100,55 +100,55 @@ msgstr "Ja"
 msgid "No"
 msgstr "Nein"
 
-#: peekaboo/ruleset/engine.py:97
+#: peekaboo/ruleset/engine.py:118
 msgid "Rule aborted with error"
 msgstr "Regel mit Fehler abgebrochen"
 
-#: peekaboo/ruleset/rules.py:86
+#: peekaboo/ruleset/rules.py:133
 msgid "File is not yet known to the system"
 msgstr "Datei ist dem System noch nicht bekannt"
 
-#: peekaboo/ruleset/rules.py:106
+#: peekaboo/ruleset/rules.py:154
 #, python-format
 msgid "Failure to determine sample file size: %s"
 msgstr "Ermittlung der Dateigröße fehlgeschlagen: %s"
 
-#: peekaboo/ruleset/rules.py:111
+#: peekaboo/ruleset/rules.py:159
 #, python-format
 msgid "File has more than %d bytes"
 msgstr "Datei hat mehr als %d bytes"
 
-#: peekaboo/ruleset/rules.py:116
+#: peekaboo/ruleset/rules.py:165
 #, python-format
-msgid "File is more than %d bytes long"
-msgstr "Datei ist nur %d bytes lang"
+msgid "File is only %d bytes long"
+msgstr ""
 
-#: peekaboo/ruleset/rules.py:135
+#: peekaboo/ruleset/rules.py:187
 msgid "File type is on whitelist"
 msgstr "Dateityp ist auf Whitelist"
 
-#: peekaboo/ruleset/rules.py:139
+#: peekaboo/ruleset/rules.py:191
 msgid "File type is not on whitelist"
 msgstr "Dateityp ist nicht auf Whitelist"
 
-#: peekaboo/ruleset/rules.py:158
+#: peekaboo/ruleset/rules.py:213
 msgid "File type is on the list of types to analyze"
 msgstr "Dateityp ist auf der Liste der zu analysiserenden Typen"
 
-#: peekaboo/ruleset/rules.py:163
+#: peekaboo/ruleset/rules.py:218
 #, python-format
 msgid "File type is not on the list of types to analyse (%s)"
 msgstr "Dateityp ist nicht auf der Liste der zu analysierenden Typen (%s)"
 
-#: peekaboo/ruleset/rules.py:176
+#: peekaboo/ruleset/rules.py:231
 msgid "The file contains an Office macro"
 msgstr "Die Datei beinhaltet ein Office-Makro"
 
-#: peekaboo/ruleset/rules.py:180
+#: peekaboo/ruleset/rules.py:235
 msgid "The file does not contain a recognizable Office macro"
 msgstr "Die Datei beinhaltet kein erkennbares Office-Makro"
 
-#: peekaboo/ruleset/rules.py:210 peekaboo/ruleset/rules.py:332
+#: peekaboo/ruleset/rules.py:265 peekaboo/ruleset/rules.py:402
 msgid ""
 "Behavioral analysis by Cuckoo has produced an error and did not finish "
 "successfully"
@@ -156,48 +156,40 @@ msgstr ""
 "Die Verhaltensanalyse durch Cuckoo hat einen Fehler produziert und konnte"
 " nicht erfolgreich abgeschlossen werden"
 
-#: peekaboo/ruleset/rules.py:244
-msgid "Empty list of malicious signatures"
-msgstr "Leere Liste schädlicher Signaturen"
-
-#: peekaboo/ruleset/rules.py:262
+#: peekaboo/ruleset/rules.py:322
 msgid "No signature suggesting malware detected"
 msgstr "Keine Signatur erkannt die auf Schadcode hindeutet"
 
-#: peekaboo/ruleset/rules.py:267
+#: peekaboo/ruleset/rules.py:327
 #, python-format
 msgid "The following signatures have been recognized: %s"
 msgstr "Folgende Signaturen wurden erkannt: %s"
 
-#: peekaboo/ruleset/rules.py:284
+#: peekaboo/ruleset/rules.py:346
 #, python-format
 msgid "Cuckoo score >= %s: %s"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:289
+#: peekaboo/ruleset/rules.py:351
 #, python-format
 msgid "Cuckoo score < %s: %s"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:305
-msgid "Empty domain list"
-msgstr "Leere Domainliste"
-
-#: peekaboo/ruleset/rules.py:310
+#: peekaboo/ruleset/rules.py:375
 #, python-format
 msgid "The file attempts to contact at least one domain on the blacklist (%s)"
 msgstr ""
 "Die Datei versucht mindestens eine Domain aus der Blacklist zu "
 "kontaktieren (%s)"
 
-#: peekaboo/ruleset/rules.py:316
+#: peekaboo/ruleset/rules.py:381
 msgid "File does not seem to attempt contact with domains on the blacklist"
 msgstr "Datei scheint keine Domains aus der Blacklist kontaktieren zu wollen"
 
-#: peekaboo/ruleset/rules.py:352
+#: peekaboo/ruleset/rules.py:418
 msgid "Behavioral analysis by Cuckoo completed successfully"
 msgstr "Die Verhaltensanalyse durch Cuckoo wurde erfolgreich abgeschlossen"
 
-#: peekaboo/ruleset/rules.py:369
+#: peekaboo/ruleset/rules.py:435
 msgid "File does not seem to exhibit recognizable malicious behaviour"
 msgstr "Datei scheint keine erkennbaren Schadroutinen zu starten"
diff --git a/peekaboo/locale/peekaboo.pot b/peekaboo/locale/peekaboo.pot
index d2231b7..56b6113 100644
--- a/peekaboo/locale/peekaboo.pot
+++ b/peekaboo/locale/peekaboo.pot
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: PROJECT VERSION\n"
 "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
-"POT-Creation-Date: 2019-04-17 09:12+0000\n"
+"POT-Creation-Date: 2019-04-17 09:26+0000\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -99,101 +99,93 @@ msgstr ""
 msgid "No"
 msgstr ""
 
-#: peekaboo/ruleset/engine.py:97
+#: peekaboo/ruleset/engine.py:118
 msgid "Rule aborted with error"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:86
+#: peekaboo/ruleset/rules.py:133
 msgid "File is not yet known to the system"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:106
+#: peekaboo/ruleset/rules.py:154
 #, python-format
 msgid "Failure to determine sample file size: %s"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:111
+#: peekaboo/ruleset/rules.py:159
 #, python-format
 msgid "File has more than %d bytes"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:116
+#: peekaboo/ruleset/rules.py:165
 #, python-format
-msgid "File is more than %d bytes long"
+msgid "File is only %d bytes long"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:135
+#: peekaboo/ruleset/rules.py:187
 msgid "File type is on whitelist"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:139
+#: peekaboo/ruleset/rules.py:191
 msgid "File type is not on whitelist"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:158
+#: peekaboo/ruleset/rules.py:213
 msgid "File type is on the list of types to analyze"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:163
+#: peekaboo/ruleset/rules.py:218
 #, python-format
 msgid "File type is not on the list of types to analyse (%s)"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:176
+#: peekaboo/ruleset/rules.py:231
 msgid "The file contains an Office macro"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:180
+#: peekaboo/ruleset/rules.py:235
 msgid "The file does not contain a recognizable Office macro"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:210 peekaboo/ruleset/rules.py:332
+#: peekaboo/ruleset/rules.py:265 peekaboo/ruleset/rules.py:402
 msgid ""
 "Behavioral analysis by Cuckoo has produced an error and did not finish "
 "successfully"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:244
-msgid "Empty list of malicious signatures"
-msgstr ""
-
-#: peekaboo/ruleset/rules.py:262
+#: peekaboo/ruleset/rules.py:322
 msgid "No signature suggesting malware detected"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:267
+#: peekaboo/ruleset/rules.py:327
 #, python-format
 msgid "The following signatures have been recognized: %s"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:284
+#: peekaboo/ruleset/rules.py:346
 #, python-format
 msgid "Cuckoo score >= %s: %s"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:289
+#: peekaboo/ruleset/rules.py:351
 #, python-format
 msgid "Cuckoo score < %s: %s"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:305
-msgid "Empty domain list"
-msgstr ""
-
-#: peekaboo/ruleset/rules.py:310
+#: peekaboo/ruleset/rules.py:375
 #, python-format
 msgid "The file attempts to contact at least one domain on the blacklist (%s)"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:316
+#: peekaboo/ruleset/rules.py:381
 msgid "File does not seem to attempt contact with domains on the blacklist"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:352
+#: peekaboo/ruleset/rules.py:418
 msgid "Behavioral analysis by Cuckoo completed successfully"
 msgstr ""
 
-#: peekaboo/ruleset/rules.py:369
+#: peekaboo/ruleset/rules.py:435
 msgid "File does not seem to exhibit recognizable malicious behaviour"
 msgstr ""
 
diff --git a/peekaboo/queuing.py b/peekaboo/queuing.py
index 8a4215f..160a4b1 100644
--- a/peekaboo/queuing.py
+++ b/peekaboo/queuing.py
@@ -384,9 +384,9 @@ class Worker(Thread):
                 self.job_queue.done(sample.sha256sum)
                 continue
 
-            engine = RulesetEngine(sample, self.ruleset_config, self.db_con)
+            engine = RulesetEngine(self.ruleset_config, self.db_con)
             try:
-                engine.run()
+                engine.run(sample)
             except PeekabooAnalysisDeferred:
                 logger.debug("Report for sample %s still pending", sample)
                 continue
diff --git a/peekaboo/ruleset/engine.py b/peekaboo/ruleset/engine.py
index 86b95e8..376cc08 100644
--- a/peekaboo/ruleset/engine.py
+++ b/peekaboo/ruleset/engine.py
@@ -28,7 +28,8 @@ import logging
 from peekaboo.ruleset import Result, RuleResult
 from peekaboo.ruleset.rules import *
 from peekaboo.toolbox.peekabooyar import ContainsPeekabooYarRule
-from peekaboo.exceptions import PeekabooAnalysisDeferred
+from peekaboo.exceptions import PeekabooAnalysisDeferred, \
+        PeekabooConfigException, PeekabooRulesetConfigError
 
 
 logger = logging.getLogger(__name__)
@@ -54,19 +55,51 @@ class RulesetEngine(object):
         FinalRule
     ]
 
-    def __init__(self, sample, ruleset_config, db_con):
-        self.sample = sample
+    def __init__(self, ruleset_config, db_con):
         self.config = ruleset_config
         self.db_con = db_con
 
         # create a lookup table from rule name to class
-        self.rules = {}
+        self.rule_classes = {}
         for known_rule in self.known_rules:
-            self.rules[known_rule.rule_name] = known_rule
+            self.rule_classes[known_rule.rule_name] = known_rule
 
-    def run(self):
-        for rule in self.config.rule_config('rules').get('rule'):
-            result = self.__exec_rule(self.sample, self.rules[rule])
+        try:
+            self.enabled_rules = self.config.getlist('rules', 'rule')
+        except PeekabooConfigException as error:
+            raise PeekabooRulesetConfigError(
+                'Ruleset configuration error: %s' % error)
+
+        self.validate_rule_config()
+
+    def validate_rule_config(self):
+        """ Validate the rule configuration in various ways.
+
+        @returns: None
+        @raises PeekabooRulesetConfigError: if configuration errors are found
+        """
+        if not self.enabled_rules:
+            raise PeekabooRulesetConfigError(
+                'No enabled rules found, check ruleset config.')
+
+        # check if unknown rules are enabled
+        known_rule_names = self.rule_classes.keys()
+        unknown_rules = set(self.enabled_rules) - set(known_rule_names)
+        if unknown_rules:
+            raise PeekabooRulesetConfigError(
+                'Unknown rule(s) enabled: %s' % ', '.join(unknown_rules))
+
+        config_sections = []
+        for rule in self.enabled_rules:
+            # not passing database connection. Needs revisiting if a rule
+            # ever wants to retrieve configuration from the database. For
+            # now at least rule constructor and get_config() need to be
+            # able to cope without it.
+            rule = self.rule_classes[rule](self.config)
+
+    def run(self, sample):
+        for rule in self.enabled_rules:
+            result = self.__exec_rule(sample, self.rule_classes[rule])
             if not result.further_analysis:
                 return
 
@@ -80,8 +113,7 @@ class RulesetEngine(object):
         logger.debug("Processing rule '%s' for %s" % (rule_name, sample))
 
         try:
-            rule_config = self.config.rule_config(rule_name)
-            rule = rule_class(config=rule_config, db_con=self.db_con)
+            rule = rule_class(config=self.config, db_con=self.db_con)
             result = rule.evaluate(sample)
             sample.add_rule_result(result)
         except PeekabooAnalysisDeferred:
diff --git a/peekaboo/ruleset/rules.py b/peekaboo/ruleset/rules.py
index a631cc1..90f9dcb 100644
--- a/peekaboo/ruleset/rules.py
+++ b/peekaboo/ruleset/rules.py
@@ -30,7 +30,7 @@ import re
 import logging
 from peekaboo.ruleset import Result, RuleResult
 from peekaboo.exceptions import PeekabooAnalysisDeferred, \
-        CuckooSubmitFailedException
+        CuckooSubmitFailedException, PeekabooRulesetConfigError
 
 
 logger = logging.getLogger(__name__)
@@ -45,13 +45,10 @@ class Rule(object):
     def __init__(self, config=None, db_con=None):
         """ Initialize common configuration and resources """
         self.db_con = db_con
+        self.config = config
 
-        # initialise and retain config as empty dict if no rule config is given
-        # to us so the rule can rely on it and does not need to do any type
-        # checking
-        self.config = {}
-        if config is not None:
-            self.config = config
+        # initialise and validate configuration
+        self.get_config()
 
     def result(self, result, reason, further_analysis):
         """ Construct a RuleResult for returning to the engine. """
@@ -68,6 +65,56 @@ class Rule(object):
         """
         raise NotImplementedError
 
+    def get_config(self):
+        """ Extract this rule's configuration out of the ruleset configuration
+        object given at creation. To be overridden by child classes if they
+        have configuration options. """
+        # pass
+
+    # the following getters are somewhat boilerplate but unavoidable for now.
+    # They serve the purpose of keeping config access specifics out of rules for
+    # the sake of readablility.
+    def get_config_value(self, getter, option, *args, **kwargs):
+        """ Get a configuation value for this rule from the ruleset
+        configuration. Getter routine and option name to be provided by caller.
+        The rule's name is always used as configuration section name.
+
+        @param getter: getter routine to use
+        @type getter: getter method of PeekabooConfigParser
+        @param option: name of option to read
+        @type option: string
+        @param args, kwargs: additional arguments passed to the getter routine,
+                             such as fallback.
+
+        @returns: configuration value read from config
+        """
+        # additional common logic to go here
+        return getter(self.rule_name, option, *args, **kwargs)
+
+    def get_config_int(self, option, default=None):
+        """ Get an integer from the ruleset configuration. See get_config_value
+        for parameters. """
+        return self.get_config_value(
+            self.config.getint, option, fallback=default)
+
+    def get_config_float(self, option, default=None):
+        """ Get a float from the ruleset configuration. See get_config_value
+        for parameters. """
+        return self.get_config_value(
+            self.config.getfloat, option, fallback=default)
+
+    def get_config_list(self, option, default=None):
+        """ Get a list from the ruleset configuration. See get_config_value
+        for parameters. """
+        return self.get_config_value(
+            self.config.getlist, option, fallback=default)
+
+    def get_config_relist(self, option, default=None):
+        """ Get a list of compiled regular expressions from the ruleset. See
+        get_config_value for parameters. """
+        return self.get_config_value(
+            self.config.getrelist, option, fallback=default)
+
 
 class KnownRule(Rule):
     """ A rule determining if a sample is known by looking at the database for
@@ -92,12 +139,13 @@ class FileLargerThanRule(Rule):
     """
     rule_name = 'file_larger_than'
 
+    def get_config(self):
+        self.size_threshold = self.get_config_int('bytes', 5)
+
     def evaluate(self, sample):
         """ Evaluate whether the sample is larger than a certain threshold.
         Advise the engine to stop processing if the size is below the
         threshold. """
-        size = int(self.config.get('bytes', 5))
-
         try:
             sample_size = sample.file_size
         except OSError as oserr:
@@ -106,14 +154,15 @@ class FileLargerThanRule(Rule):
                 _("Failure to determine sample file size: %s") % oserr,
                 False)
 
-        if sample_size > size:
+        if sample_size > self.size_threshold:
             return self.result(Result.unknown,
-                               _("File has more than %d bytes") % size,
+                               _("File has more than %d bytes")
+                               % self.size_threshold,
                                True)
 
         return self.result(
             Result.ignored,
-            _("File is more than %d bytes long") % s
author	Michael Weiser <michael.weiser@gmx.de>	2019-04-18 08:57:15 +0000
committer	Michael Weiser <michael.weiser@gmx.de>	2019-04-25 12:20:20 +0000
commit	77d2ff1348a6f1ade05b54b297fc771abd0e14cd (patch)
tree	e7914e3ad89f36b65aced27f53132244f894832f
parent	e6c44a8ca3c2216904731e4d889e53473691c0dc (diff)