Refactor report evaluation for submission error handling

If Cuckoo analysis fails we used to report the sample as bad, i.e. as containing a virus which is not exactly true and might arguably be seen as a false positive. Instead we should report the sample as failed and let whoever called us deal with it however they want. This changes sample reporting from a two-state (good, bad) logic to three-state (good, bad, failed). If Cuckoo itself reports to us that it was not able to complete the analysis we can easily accomplish this by returning Result.failed from rule cuckoo_analysis_failed. For this we turn the hitherto unused Result.checked result into Result.failed and reorder slightly. The numerical value here is important to achieve correct ordering of result "worseness": 1. If any rule resports "bad" the sample has been positively identified as containing something malevolent and this should overrule any other finding. 2. If any rule reports "failed" then we don't know what it might have found and it could be "bad". So "failed" should be the next worst finding after "bad". 3. If any rule reports "good" then it positively identified the content as non-malevolent. 4. "unknown" should mean that we tried to identify the content but couldn't reach any conclusion either way. 5. "unchecked" should mean that we haven't even tried finding out anything about the sample (yet). For cases where actual submission to Cuckoo failed we need to catch our own CuckooAnalysisFailedException raised by Cuckoo.submit(). Since this was hidden in the report acquisition code way inside Sample, we need to move submission and report handling to the ruleset. Here we provide a common base class CuckooRule implementing the evaluate() method in a way that checks for the Cuckoo report to be present and submitting the sample to Cuckoo if not. If the report is present (i.e. has been reported back by Cuckoo later on) the actual rules' evaluate_report() method is called by CuckooRule's evalute(), nicely modelling that those rules actually only evaluate the Cuckoo report and not the sample itself. This by the way gets rid of the cuckoo_report attribute of sample, replacing it with private member __cuckoo_report and a setter Sample.register_cuckoo_report() called by Cuckoo.resubmit_with_report().
author: Michael Weiser <michael.weiser@gmx.de> 2019-02-11 18:53:34 +0000
committer: Michael Weiser <michael.weiser@gmx.de> 2019-02-13 08:27:45 +0000
commit: 78977fc0e815c90803f9cd905bf3096311845ea4 (patch)
tree: 165ed3282239061655d2c4c8303edbf57b52d216
parent: 4532f933d42bc5c63918dfa1eb24101976df6e70 (diff)
7 files changed, 99 insertions, 47 deletions
diff --git a/peekaboo/queuing.py b/peekaboo/queuing.py
index ef4eabd..4500f4e 100644
--- a/peekaboo/queuing.py
+++ b/peekaboo/queuing.py
@@ -27,6 +27,7 @@ import logging
 from threading import Thread, Event, Lock
 from queue import Queue, Empty
 from time import sleep
+from peekaboo.ruleset import Result
 from peekaboo.ruleset.engine import RulesetEngine
 from peekaboo.exceptions import CuckooReportPendingException
 
@@ -309,8 +310,11 @@ class Worker(Thread):
                 engine.run()
                 engine.report()
 
-                logger.debug('Saving results to database')
-                self.db_con.analysis_save(sample)
+                if sample.get_result() != Result.failed:
+                    logger.debug('Saving results to database')
+                    self.db_con.analysis_save(sample)
+                else:
+                    logger.debug('Not saving results of failed analysis')
                 sample.remove_from_connection_map()
 
                 self.job_queue.done(sample.sha256sum)
diff --git a/peekaboo/ruleset/__init__.py b/peekaboo/ruleset/__init__.py
index ce486cb..60e9e87 100644
--- a/peekaboo/ruleset/__init__.py
+++ b/peekaboo/ruleset/__init__.py
@@ -35,8 +35,8 @@ class Result(Enum):
     unchecked = 1
     unknown = 2
     ignored = 3
-    checked = 4
-    good = 5
+    good = 4
+    failed = 5
     bad = 6
 
     def __ge__(self, other):
diff --git a/peekaboo/ruleset/engine.py b/peekaboo/ruleset/engine.py
index e0b5776..9e5f4a9 100644
--- a/peekaboo/ruleset/engine.py
+++ b/peekaboo/ruleset/engine.py
@@ -146,11 +146,10 @@ def dump_processing_info(sample):
             logger.exception(e)
 
     # Cuckoo report
-    if sample.has_attr('cuckoo_report'):
-        report = sample.get_attr('cuckoo_report').raw
-
+    report = sample.cuckoo_report
+    if report:
         try:
             with open(os.path.join(dump_dir, filename + '_cuckoo_report.json'), 'w+') as f:
-                json.dump(report, f, indent = 1)
+                json.dump(report.raw, f, indent=1)
         except Exception as e:
             logger.exception(e)
diff --git a/peekaboo/ruleset/rules.py b/peekaboo/ruleset/rules.py
index a29596d..48851cb 100644
--- a/peekaboo/ruleset/rules.py
+++ b/peekaboo/ruleset/rules.py
@@ -29,6 +29,8 @@
 import re
 import logging
 from peekaboo.ruleset import Result, RuleResult
+from peekaboo.exceptions import CuckooReportPendingException, \
+        CuckooAnalysisFailedException
 
 
 logger = logging.getLogger(__name__)
@@ -62,7 +64,8 @@ class Rule(object):
                   assessment (i.e. the rule's name) and whether to continue
                   analysis or not.
         """
-        raise NotImplemented
+        raise NotImplementedError
+
 
 class KnownRule(Rule):
     """ A rule determining if a sample is known by looking at the database for
@@ -152,12 +155,55 @@ class FileTypeOnGreylistRule(Rule):
                            False)
 
 
-class CuckooEvilSigRule(Rule):
+class CuckooRule(Rule):
+    """ A common base class for rules that evaluate the Cuckoo report. """
+    def evaluate(self, sample):
+        """ If a report is present for the sample in question we call method
+        evaluate_report() implemented by subclasses to evaluate it for
+        findings. Otherwise we submit the sample to Cuckoo and raise
+        CuckooReportPendingException to abort the current run of the ruleset
+        until the report arrives. If submission to Cuckoo fails we will
+        ourselves report the sample as failed.
+
+        @param sample: The sample to evaluate.
+        @raises CuckooReportPendingException: if the sample was submitted to
+                                              Cuckoo
+        @returns: RuleResult containing verdict.
+        """
+        report = sample.cuckoo_report
+        if report is None:
+            try:
+                job_id = sample.submit_to_cuckoo()
+            except CuckooAnalysisFailedException:
+                return self.result(
+                    Result.failed,
+                    "Die Verhaltensanalyse durch Cuckoo hat einen "
+                    "Fehler produziert und konnte nicht erfolgreich "
+                    "abgeschlossen werden",
+                    False)
+
+            logger.info('Sample submitted to Cuckoo. Job ID: %s. '
+                        'Sample: %s', job_id, sample)
+            raise CuckooReportPendingException()
+
+        # call report evaluation function if we get here
+        return self.evaluate_report(report)
+
+    def evaluate_report(self, report):
+        """ Evaluate a Cuckoo report.
+
+        @param report: The Cuckoo report.
+        @returns: RuleResult containing verdict.
+        """
+        raise NotImplementedError
+
+
+class CuckooEvilSigRule(CuckooRule):
     """ A rule evaluating the signatures from the Cuckoo report against a list
     of signatures considered bad. """
     rule_name = 'cuckoo_evil_sig'
 
-    def evaluate(self, sample):
+    def evaluate_report(self, report):
         """ Evaluate the sample against signatures that if matched mark a
         sample as bad. """
         # list all installed signatures
@@ -169,10 +215,9 @@ class CuckooEvilSigRule(Rule):
                                "Leere Liste schaedlicher Signaturen",
                                True)
 
-        sigs = []
-
         # look through matched signatures
-        for descr in sample.cuckoo_report.signatures:
+        sigs = []
+        for descr in report.signatures:
             logger.debug(descr['description'])
             sigs.append(descr['description'])
 
@@ -195,25 +240,25 @@ class CuckooEvilSigRule(Rule):
                            False)
 
 
-class CuckooScoreRule(Rule):
+class CuckooScoreRule(CuckooRule):
     """ A rule checking the score reported by Cuckoo against a configurable
     threshold. """
     rule_name = 'cuckoo_score'
 
-    def evaluate(self, sample):
+    def evaluate_report(self, report):
         """ Evaluate the score reported by Cuckoo against the threshold from
         the configuration and report sample as bad if above. """
         threshold = float(self.config.get('higher_than', 4.0))
 
-        if sample.cuckoo_report.score >= threshold:
+        if report.score >= threshold:
             return self.result(Result.bad,
                                "Cuckoo score >= %s: %s" %
-                               (threshold, sample.cuckoo_report.score),
+                               (threshold, report.score),
                                False)
 
         return self.result(Result.unknown,
                            "Cuckoo score < %s: %s" %
-                           (threshold, sample.cuckoo_report.score),
+                           (threshold, report.score),
                            True)
 
 
@@ -234,12 +279,12 @@ class OfficeMacroRule(Rule):
                            True)
 
 
-class RequestsEvilDomainRule(Rule):
+class RequestsEvilDomainRule(CuckooRule):
     """ A rule checking the domains reported as requested by the sample by
     Cuckoo against a blacklist. """
     rule_name = 'requests_evil_domain'
 
-    def evaluate(self, sample):
+    def evaluate_report(self, report):
         """ Report the sample as bad if one of the requested domains is on our
         list of evil domains. """
         evil_domains = self.config.get('domain', ())
@@ -247,7 +292,7 @@ class RequestsEvilDomainRule(Rule):
             logger.warn("Empty evil domain list, check ruleset config.")
             return self.result(Result.unknown, "Leere Domainliste", True)
 
-        for domain in sample.cuckoo_report.requested_domains:
+        for domain in report.requested_domains:
             if domain in evil_domains:
                 return self.result(Result.bad,
                                    "Die Datei versucht mindestens eine Domain "
@@ -261,15 +306,15 @@ class RequestsEvilDomainRule(Rule):
                            True)
 
 
-class CuckooAnalysisFailedRule(Rule):
+class CuckooAnalysisFailedRule(CuckooRule):
     """ A rule checking the final status reported by Cuckoo for success. """
     rule_name = 'cuckoo_analysis_failed'
 
-    def evaluate(self, sample):
+    def evaluate_report(self, report):
         """ Report the sample as bad if the Cuckoo indicates that the analysis
         has failed. """
-        if sample.cuckoo_report.analysis_failed:
-            return self.result(Result.bad,
+        if report.analysis_failed:
+            return self.result(Result.failed,
                                "Die Verhaltensanalyse durch Cuckoo hat einen "
                                "Fehler produziert und konnte nicht erfolgreich "
                                "abgeschlossen werden",
diff --git a/peekaboo/sample.py b/peekaboo/sample.py
index cd4e6e4..4da7a1f 100644
--- a/peekaboo/sample.py
+++ b/peekaboo/sample.py
@@ -31,8 +31,6 @@ import shutil
 import logging
 import tempfile
 from datetime import datetime
-from peekaboo.exceptions import CuckooReportPendingException, \
-                                CuckooAnalysisFailedException
 from peekaboo.toolbox.sampletools import next_job_hash
 from peekaboo.toolbox.files import guess_mime_type_from_file_contents, \
                                    guess_mime_type_from_filename
@@ -88,6 +86,7 @@ class Sample(object):
         # sha256sum.suffix
         self.__submit_path = None
         self.__cuckoo_job_id = -1
+        self.__cuckoo_report = None
         self.__result = ruleset.Result.unchecked
         self.__reason = None
         self.__report = []  # Peekaboo's own report
@@ -383,19 +382,25 @@ class Sample(object):
 
     @property
     def cuckoo_report(self):
-        if not self.has_attr('cuckoo_report'):
-            try:
-                logger.debug("Submitting %s to Cuckoo" % self.__submit_path)
-                self.__cuckoo_job_id = self.__cuckoo.submit(self.__submit_path)
-                message = 'Erfolgreich an Cuckoo gegeben %s als Job %d\n' \
-                          % (self, self.__cuckoo_job_id)
-                self.__report.append(message)
-                logger.info('Sample submitted to Cuckoo. Job ID: %s. '
-                            'Sample: %s' % (self.__cuckoo_job_id, self))
-                raise CuckooReportPendingException()
-            except CuckooAnalysisFailedException as e:
-                logger.exception(e)
-        return self.get_attr('cuckoo_report')
+        """ Returns the cuckoo report """
+        return self.__cuckoo_report
+
+    def submit_to_cuckoo(self):
+        """ Submit the sample to Cuckoo for analysis and record job id.
+
+        @raises: CuckooAnalsisFailedException if submission failed
+        @returns: cuckoo job id
+        """
+        logger.debug("Submitting %s to Cuckoo", self.__submit_path)
+        self.__cuckoo_job_id = self.__cuckoo.submit(self.__submit_path)
+        message = 'Erfolgreich an Cuckoo gegeben %s als Job %d\n' \
+                  % (self, self.__cuckoo_job_id)
+        self.__report.append(message)
+        return self.__cuckoo_job_id
+
+    def register_cuckoo_report(self, report):
+        """ Records a Cuckoo report for later evaluation. """
+        self.__cuckoo_report = report
 
     def __close_socket(self):
         logger.debug('Closing socket connection.')
diff --git a/peekaboo/toolbox/cuckoo.py b/peekaboo/toolbox/cuckoo.py
index e26d5db..88b69a1 100644
--- a/peekaboo/toolbox/cuckoo.py
+++ b/peekaboo/toolbox/cuckoo.py
@@ -54,14 +54,14 @@ class Cuckoo:
             logger.debug('Requesting Cuckoo report for sample %s' % sample)
             report = self.get_report(job_id)
 
-            # do not set the sample attribute if we were unable to get the
-            # report because e.g. it was corrupted or the API connection
+            # do not register the report with the sample if we were unable to
+            # get it because e.g. it was corrupted or the API connection
             # failed. This will cause the sample to be resubmitted to Cuckoo
             # upon the next try to access the report.
             # TODO: This can cause an endless loop.
             if report is not None:
                 reportobj = CuckooReport(report)
-                sample.set_attr('cuckoo_report', reportobj)
+                sample.register_cuckoo_report(reportobj)
 
             self.job_queue.submit(sample, self.__class__)
             logger.debug("Remaining connections: %d" % self.connection_map.size())
@@ -116,7 +116,6 @@ class CuckooEmbed(Cuckoo):
             raise CuckooAnalysisFailedException(e)
         
         if not p.returncode == 0:
-            # TODO: tell opponent on socket that file has not been checked.
             raise CuckooAnalysisFailedException('cuckoo submit returned a non-zero return code.')
         else:
             out, err = p.communicate()
diff --git a/test.py b/test.py
index a574aa4..489c1a2 100644
--- a/test.py
+++ b/test.py
@@ -333,7 +333,7 @@ class TestDatabase(unittest.TestCase):
             job_hash_regex=cls.conf.job_hash_regex, keep_mail_data=False)
         cls.sample = cls.factory.make_sample(os.path.realpath(__file__))
         result = RuleResult('Unittest',
-                            Result.checked,
+                            Result.failed,
                             'This is just a test case.',
                             further_analysis=False)
         cls.sample.add_rule_result(result)
@@ -347,7 +347,7 @@ class TestDatabase(unittest.TestCase):
         """ Test retrieval of analysis results. """
         sample_info = self.db_con.sample_info_fetch(self.sample)
         self.assertEqual(sample_info.sha256sum, self.sample.sha256sum)
-        self.assertEqual(sample_info.result, Result.checked)
+        self.assertEqual(sample_info.result, Result.failed)
         self.assertEqual(sample_info.reason, 'This is just a test case.')
 
     def test_5_in_flight_no_cluster(self):
author	Michael Weiser <michael.weiser@gmx.de>	2019-02-11 18:53:34 +0000
committer	Michael Weiser <michael.weiser@gmx.de>	2019-02-13 08:27:45 +0000
commit	78977fc0e815c90803f9cd905bf3096311845ea4 (patch)
tree	165ed3282239061655d2c4c8303edbf57b52d216
parent	4532f933d42bc5c63918dfa1eb24101976df6e70 (diff)