diff options
author | Michael Weiser <michael.weiser@gmx.de> | 2019-04-17 09:08:20 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-17 09:08:20 +0000 |
commit | a0a694ecde0cbc8171763a938e2f0c4f581f832b (patch) | |
tree | ff5d6f088535d03b526c5a73bde4e9f0edf68b9c | |
parent | 91fe84aea999d38cab12a4cf6338f29bb34a4c02 (diff) | |
parent | 5158956e66af1f55a3d07422abe5e617d83bd614 (diff) |
Merge pull request #74 from michaelweiser/cuckoo-hang
Cuckoo hang
-rw-r--r-- | peekaboo.conf.sample | 17 | ||||
-rw-r--r-- | peekaboo/config.py | 2 | ||||
-rw-r--r-- | peekaboo/daemon.py | 3 | ||||
-rw-r--r-- | peekaboo/db.py | 25 | ||||
-rw-r--r-- | peekaboo/debug.py | 2 | ||||
-rw-r--r-- | peekaboo/exceptions.py | 21 | ||||
-rw-r--r-- | peekaboo/locale/de/LC_MESSAGES/peekaboo.mo | bin | 4173 -> 4286 bytes | |||
-rw-r--r-- | peekaboo/locale/de/LC_MESSAGES/peekaboo.po | 15 | ||||
-rw-r--r-- | peekaboo/locale/peekaboo.pot | 15 | ||||
-rw-r--r-- | peekaboo/queuing.py | 8 | ||||
-rw-r--r-- | peekaboo/ruleset/engine.py | 4 | ||||
-rw-r--r-- | peekaboo/ruleset/rules.py | 17 | ||||
-rw-r--r-- | peekaboo/sample.py | 11 | ||||
-rw-r--r-- | peekaboo/server.py | 9 | ||||
-rw-r--r-- | peekaboo/toolbox/cuckoo.py | 100 |
15 files changed, 175 insertions, 74 deletions
diff --git a/peekaboo.conf.sample b/peekaboo.conf.sample index cf27c75..4698db7 100644 --- a/peekaboo.conf.sample +++ b/peekaboo.conf.sample @@ -4,7 +4,7 @@ # -#[global] +[global] #user : peekaboo #group : peekaboo #socket_file : /var/run/peekaboo/peekaboo.sock @@ -29,14 +29,14 @@ #report_locale : <unset> -#[ruleset] +[ruleset] #config : /opt/peekaboo/etc/ruleset.conf # # Logging configuration # -#[logging] +[logging] # log_level # possible values: CRITICAL | ERROR | WARNING | INFO | DEBUG #log_level : INFO @@ -49,7 +49,7 @@ # # Database configuration # -#[db] +[db] # SQLite #url : sqlite:////var/lib/peekaboo/peekaboo.db # MySQL (recommended) @@ -57,10 +57,15 @@ # PostgreSQL # url : postgresql://user:password@host:port/database +# Enable additional logging by the SQLAlchemy database module beyond Peekaboo's +# own logging. Can be considered another set of debug logging even beyond +# Peekaboo's DEBUG log level. +#log_level : WARNING + # # Cuckoo specific settings # -#[cuckoo] +[cuckoo] # mode has two options: # embed : for legacy embeded mode # api : to access cuckoo via the REST api @@ -75,7 +80,7 @@ #url : http://127.0.0.1:8090 #poll_interval : 5 -#[cluster] +[cluster] # if multiple instances are to run in parallel and avoid concurrent analysis of # the same sample, set instance_id to a nonzero positive unique integer value # on each instance and use the same networked DBMS instance (MySQL or diff --git a/peekaboo/config.py b/peekaboo/config.py index f99c398..407207d 100644 --- a/peekaboo/config.py +++ b/peekaboo/config.py @@ -85,6 +85,7 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes self.processing_info_dir = '/var/lib/peekaboo/malware_reports' self.report_locale = None self.db_url = 'sqlite:////var/lib/peekaboo/peekaboo.db' + self.db_log_level = logging.WARNING self.config_file = '/opt/peekaboo/etc/peekaboo.conf' self.ruleset_config = '/opt/peekaboo/etc/ruleset.conf' self.cuckoo_mode = "api" @@ -117,6 +118,7 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes 'processing_info_dir': ['global', 'processing_info_dir'], 'report_locale': ['global', 'report_locale'], 'db_url': ['db', 'url'], + 'db_log_level': ['db', 'log_level', self.get_log_level], 'ruleset_config': ['ruleset', 'config'], 'cuckoo_mode': ['cuckoo', 'mode'], 'cuckoo_url': ['cuckoo', 'url'], diff --git a/peekaboo/daemon.py b/peekaboo/daemon.py index bc77580..9da2013 100644 --- a/peekaboo/daemon.py +++ b/peekaboo/daemon.py @@ -281,7 +281,8 @@ def run(): try: db_con = PeekabooDatabase( db_url=config.db_url, instance_id=config.cluster_instance_id, - stale_in_flight_threshold=config.cluster_stale_in_flight_threshold) + stale_in_flight_threshold=config.cluster_stale_in_flight_threshold, + log_level=config.db_log_level) except PeekabooDatabaseError as error: logging.critical(error) sys.exit(1) diff --git a/peekaboo/db.py b/peekaboo/db.py index 4761492..6130281 100644 --- a/peekaboo/db.py +++ b/peekaboo/db.py @@ -86,6 +86,17 @@ class InFlightSample(Base): instance_id = Column(Integer, nullable=False) start_time = Column(DateTime, nullable=False) + def __str__(self): + return ( + '<InFlightSample(sha256sum="%s", instance_id="%s", ' + 'start_time="%s")>' + % (self.sha256sum, + self.instance_id, + self.start_time.strftime("%Y%m%dT%H%M%S")) + ) + + __repr__ = __str__ + class SampleInfo(Base): """ @@ -153,7 +164,8 @@ class PeekabooDatabase(object): @author: Sebastian Deiss """ def __init__(self, db_url, instance_id=0, - stale_in_flight_threshold=1*60*60): + stale_in_flight_threshold=1*60*60, + log_level=logging.WARNING): """ Initialize the Peekaboo database handler. @@ -166,6 +178,8 @@ class PeekabooDatabase(object): @param stale_in_flight_threshold: Number of seconds after which a in flight marker is considered stale and deleted or ignored. """ + logging.getLogger('sqlalchemy.engine').setLevel(log_level) + self.__engine = create_engine(db_url, pool_recycle=1) session_factory = sessionmaker(bind=self.__engine) self.__session = scoped_session(session_factory) @@ -389,6 +403,15 @@ class PeekabooDatabase(object): '(%d seconds)', self.stale_in_flight_threshold) try: + # the loop triggers the query, so only do it if debugging is + # enabled + if logger.isEnabledFor(logging.DEBUG): + # obviously there's a race between logging and actual delete + # here, use with caution, compare with actual number of markers + # cleared below before relying on it for debugging + for stale in query: + logger.debug('Stale in-flight marker to clear: %s', stale) + # delete() is not queued and goes to the DB before commit() cleared = query.delete() session.commit() diff --git a/peekaboo/debug.py b/peekaboo/debug.py index 6b78136..3fca8bb 100644 --- a/peekaboo/debug.py +++ b/peekaboo/debug.py @@ -43,7 +43,7 @@ class PeekabooDebugger(Thread): self.socket = None self.poll_interval = 5 self.bufsize = 4096 - Thread.__init__(self) + Thread.__init__(self, name="Debugger") def wait_for_input(self, socket, timeout): r = [] diff --git a/peekaboo/exceptions.py b/peekaboo/exceptions.py index bc11ec0..1ee8097 100644 --- a/peekaboo/exceptions.py +++ b/peekaboo/exceptions.py @@ -40,9 +40,26 @@ class PeekabooRulesetException(PeekabooException): pass -class CuckooReportPendingException(PeekabooRulesetException): +class PeekabooAnalysisDeferred(PeekabooRulesetException): + """ Analysis has been deferred to a later point in time. + + An exception signifying that analysis has been deferred to a later point in + time. Ruleset processing will be aborted (without error). Useful if we're + waiting for someone to finish their analysis and defer our interpretation + of their findings until they become available, most notably the Cuckoo + report. + + Not an exception in the traditional sense since it does not indicate an + error but actually influences control flow instead. Somewhat questionable + in that regard. + + The raiser becomes owner of the sample and is responsible to appropriately + resubmit it into Peekaboo once it wants processing to continue. That should + take into account that the ruleset will be rerun from the very beginning. + """ pass -class CuckooAnalysisFailedException(PeekabooException): +class CuckooSubmitFailedException(PeekabooException): + """ An exception raised if submitting a job to Cuckoo fails. """ pass diff --git a/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo b/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo Binary files differindex 1af9ffb..dab15b2 100644 --- a/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo +++ b/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo diff --git a/peekaboo/locale/de/LC_MESSAGES/peekaboo.po b/peekaboo/locale/de/LC_MESSAGES/peekaboo.po index 044406f..86ff0ac 100644 --- a/peekaboo/locale/de/LC_MESSAGES/peekaboo.po +++ b/peekaboo/locale/de/LC_MESSAGES/peekaboo.po @@ -6,7 +6,7 @@ msgid "" msgstr "" "Project-Id-Version: PeekabooAV 1.6.2\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2019-04-08 12:24+0000\n" +"POT-Creation-Date: 2019-04-15 15:41+0000\n" "PO-Revision-Date: 2019-02-14 22:02+0000\n" "Last-Translator: Michael Weiser <michael.weiser@gmx.de>\n" "Language: de\n" @@ -17,25 +17,30 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.4.0\n" -#: peekaboo/queuing.py:375 +#: peekaboo/queuing.py:390 msgid "Sample initialization failed" msgstr "Initialisierung der zu analysierenden Datei fehlgeschlagen" -#: peekaboo/sample.py:186 +#: peekaboo/sample.py:189 #, python-format msgid "File \"%s\" %s is being analyzed" msgstr "Datei \"%s\" %s wird analysiert" -#: peekaboo/sample.py:239 +#: peekaboo/sample.py:242 #, python-format msgid "File \"%s\" is considered \"%s\"" msgstr "Die Datei \"%s\" wird als \"%s\" betrachtet" -#: peekaboo/sample.py:299 +#: peekaboo/sample.py:302 #, python-format msgid "File \"%s\": %s" msgstr "Datei \"%s\": %s" +#: peekaboo/sample.py:498 +#, python-format +msgid "Sample %s successfully submitted to Cuckoo as job %d" +msgstr "Erfolgreich an Cuckoo gegeben %s als Job %d" + #: peekaboo/server.py:143 msgid "Hello, this is Peekaboo." msgstr "Hallo das ist Peekaboo" diff --git a/peekaboo/locale/peekaboo.pot b/peekaboo/locale/peekaboo.pot index b7631fe..8551395 100644 --- a/peekaboo/locale/peekaboo.pot +++ b/peekaboo/locale/peekaboo.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2019-04-08 12:24+0000\n" +"POT-Creation-Date: 2019-04-15 15:41+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Language-Team: LANGUAGE <LL@li.org>\n" @@ -17,25 +17,30 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel 2.4.0\n" -#: peekaboo/queuing.py:375 +#: peekaboo/queuing.py:390 msgid "Sample initialization failed" msgstr "" -#: peekaboo/sample.py:186 +#: peekaboo/sample.py:189 #, python-format msgid "File \"%s\" %s is being analyzed" msgstr "" -#: peekaboo/sample.py:239 +#: peekaboo/sample.py:242 #, python-format msgid "File \"%s\" is considered \"%s\"" msgstr "" -#: peekaboo/sample.py:299 +#: peekaboo/sample.py:302 #, python-format msgid "File \"%s\": %s" msgstr "" +#: peekaboo/sample.py:498 +#, python-format +msgid "Sample %s successfully submitted to Cuckoo as job %d" +msgstr "" + #: peekaboo/server.py:143 msgid "Hello, this is Peekaboo." msgstr "" diff --git a/peekaboo/queuing.py b/peekaboo/queuing.py index e80cedf..02c3d59 100644 --- a/peekaboo/queuing.py +++ b/peekaboo/queuing.py @@ -29,7 +29,7 @@ from queue import Queue, Empty from time import sleep from peekaboo.ruleset import Result, RuleResult from peekaboo.ruleset.engine import RulesetEngine -from peekaboo.exceptions import CuckooReportPendingException, \ +from peekaboo.exceptions import PeekabooAnalysisDeferred, \ PeekabooDatabaseError @@ -317,7 +317,7 @@ class ClusterDuplicateHandler(Thread): self.shutdown_requested.clear() self.job_queue = job_queue self.interval = interval - Thread.__init__(self) + Thread.__init__(self, name="ClusterDuplicateHandler") def run(self): logger.debug("Cluster duplicate handler started.") @@ -354,7 +354,7 @@ class Worker(Thread): self.job_queue = job_queue self.ruleset_config = ruleset_config self.db_con = db_con - Thread.__init__(self) + Thread.__init__(self, name="Worker-%d" % wid) def run(self): self.running_flag.set() @@ -395,7 +395,7 @@ class Worker(Thread): engine = RulesetEngine(sample, self.ruleset_config, self.db_con) try: engine.run() - except CuckooReportPendingException: + except PeekabooAnalysisDeferred: logger.debug("Report for sample %s still pending", sample) continue diff --git a/peekaboo/ruleset/engine.py b/peekaboo/ruleset/engine.py index 3370c33..2d6467d 100644 --- a/peekaboo/ruleset/engine.py +++ b/peekaboo/ruleset/engine.py @@ -28,7 +28,7 @@ import logging from peekaboo.ruleset import Result, RuleResult from peekaboo.ruleset.rules import * from peekaboo.toolbox.peekabooyar import ContainsPeekabooYarRule -from peekaboo.exceptions import CuckooReportPendingException +from peekaboo.exceptions import PeekabooAnalysisDeferred logger = logging.getLogger(__name__) @@ -89,7 +89,7 @@ class RulesetEngine(object): further_analysis=True) sample.add_rule_result(result) - except CuckooReportPendingException as e: + except PeekabooAnalysisDeferred: # in case the Sample is requesting the Cuckoo report raise # catch all other exceptions for this rule diff --git a/peekaboo/ruleset/rules.py b/peekaboo/ruleset/rules.py index e7eff77..120e4f5 100644 --- a/peekaboo/ruleset/rules.py +++ b/peekaboo/ruleset/rules.py @@ -29,8 +29,8 @@ import re import logging from peekaboo.ruleset import Result, RuleResult -from peekaboo.exceptions import CuckooReportPendingException, \ - CuckooAnalysisFailedException +from peekaboo.exceptions import PeekabooAnalysisDeferred, \ + CuckooSubmitFailedException logger = logging.getLogger(__name__) @@ -186,20 +186,23 @@ class CuckooRule(Rule): """ If a report is present for the sample in question we call method evaluate_report() implemented by subclasses to evaluate it for findings. Otherwise we submit the sample to Cuckoo and raise - CuckooReportPendingException to abort the current run of the ruleset + PeekabooAnalysisDeferred to abort the current run of the ruleset until the report arrives. If submission to Cuckoo fails we will ourselves report the sample as failed. @param sample: The sample to evaluate. - @raises CuckooReportPendingException: if the sample was submitted to - Cuckoo + @raises PeekabooAnalysisDeferred: if the sample was submitted to Cuckoo @returns: RuleResult containing verdict. """ report = sample.cuckoo_report if report is None: try: job_id = sample.submit_to_cuckoo() - except CuckooAnalysisFailedException: + except CuckooSubmitFailedException as failed: + logger.error("Submit to Cuckoo failed: %s", failed) + # exception message intentionally not present in message + # delivered back to client as to not disclose internal + # information, should request user to contact admin instead return self.result( Result.failed, _("Behavioral analysis by Cuckoo has produced an error " @@ -208,7 +211,7 @@ class CuckooRule(Rule): logger.info('Sample submitted to Cuckoo. Job ID: %s. ' 'Sample: %s', job_id, sample) - raise CuckooReportPendingException() + raise PeekabooAnalysisDeferred() # call report evaluation function if we get here return self.evaluate_report(report) diff --git a/peekaboo/sample.py b/peekaboo/sample.py index 8b4b071..7cb9f03 100644 --- a/peekaboo/sample.py +++ b/peekaboo/sample.py @@ -336,8 +336,10 @@ class Sample(object): peekaboo_report = os.path.join(dump_dir, filename + '_report.txt') try: with open(peekaboo_report, 'w+') as pr_file: - pr_file.write('\n'.join(self.__report)) - pr_file.write('\n'.join(self.__internal_report)) + if self.__report: + pr_file.write('\n'.join(self.__report + [""])) + if self.__internal_report: + pr_file.write('\n'.join(self.__internal_report + [""])) except (OSError, IOError) as error: logger.error('Failure to write report file %s: %s', peekaboo_report, error) @@ -492,8 +494,9 @@ class Sample(object): """ logger.debug("Submitting %s to Cuckoo", self.__submit_path) self.__cuckoo_job_id = self.__cuckoo.submit(self) - self.__internal_report.append('Erfolgreich an Cuckoo gegeben %s als ' - 'Job %d' % (self, self.__cuckoo_job_id)) + self.__internal_report.append( + _('Sample %s successfully submitted to Cuckoo as job %d') + % (self, self.__cuckoo_job_id)) return self.__cuckoo_job_id def register_cuckoo_report(self, report): diff --git a/peekaboo/server.py b/peekaboo/server.py index ec86e80..9cff8de 100644 --- a/peekaboo/server.py +++ b/peekaboo/server.py @@ -32,7 +32,7 @@ import os import stat import socket import socketserver -from threading import Thread, Event +from threading import Thread, Event, current_thread from peekaboo.ruleset import Result @@ -126,6 +126,11 @@ class PeekabooStreamRequestHandler(socketserver.StreamRequestHandler): @author: Sebastian Deiss """ def setup(self): + # rename thread for higher log message clarity + thread = current_thread() + # keep trailing thread number by replacing just the base name + thread.name = thread.name.replace('Thread-', 'Request-') + socketserver.StreamRequestHandler.setup(self) self.job_queue = self.server.job_queue self.sample_factory = self.server.sample_factory @@ -391,7 +396,7 @@ class PeekabooServer(object): sample_factory=sample_factory, request_queue_size=request_queue_size) - self.runner = Thread(target=self.server.serve_forever) + self.runner = Thread(target=self.server.serve_forever, name="Server") self.runner.start() os.chmod(sock_file, diff --git a/peekaboo/toolbox/cuckoo.py b/peekaboo/toolbox/cuckoo.py index d62fc85..1ede5f7 100644 --- a/peekaboo/toolbox/cuckoo.py +++ b/peekaboo/toolbox/cuckoo.py @@ -30,11 +30,15 @@ import locale import logging import json import subprocess -import requests import random -from twisted.internet import protocol, reactor, process + from time import sleep -from peekaboo.exceptions import CuckooAnalysisFailedException +from threading import RLock + +import requests +from twisted.internet import protocol, reactor, process + +from peekaboo.exceptions import CuckooSubmitFailedException logger = logging.getLogger(__name__) @@ -46,13 +50,44 @@ class Cuckoo: self.job_queue = job_queue self.shutdown_requested = False self.running_jobs = {} + self.running_jobs_lock = RLock() + + def register_running_job(self, job_id, sample): + """ Register a job as running. Detect if another sample has already + been registered with the same job ID which obviously must never happen + because it corrupts our internal housekeeping. Guarded by a lock + because multiple worker threads will call this routine and check for + collision and update of job log might otherwise race each other. + + @param job_id: ID of the job to register as running. + @type job_id: int + @param sample: Sample object to associate with this job ID + @type sample: Sample + + @returns: None + @raises: CuckooSubmitFailedException on job id collision """ + with self.running_jobs_lock: + if (job_id in self.running_jobs and + self.running_jobs[job_id] is not sample): + raise CuckooSubmitFailedException( + 'A job with ID %d is already registered as running ' + 'for sample %s' % (job_id, self.running_jobs[job_id])) + + self.running_jobs[job_id] = sample def resubmit_with_report(self, job_id): + """ Resubmit a sample to the job queue after the report became + available. Retrieves the report from Cuckoo. + + @param job_id: ID of job which has finished. + @type job_id: int + + @returns: None """ logger.debug("Analysis done for task #%d" % job_id) - # thread-safe, no locking required, revisit if splitting into - # multiple operations - sample = self.running_jobs.pop(job_id, None) + with self.running_jobs_lock: + sample = self.running_jobs.pop(job_id, None) + if sample is None: logger.debug('No sample found for job ID %d', job_id) return None @@ -125,35 +160,34 @@ class CuckooEmbed(Cuckoo): stderr=subprocess.PIPE, universal_newlines=True) p.wait() - except Exception as e: - raise CuckooAnalysisFailedException(e) + except Exception as error: + raise CuckooSubmitFailedException(error) if not p.returncode == 0: - raise CuckooAnalysisFailedException('cuckoo submit returned a non-zero return code.') - else: - out, err = p.communicate() - logger.debug("cuckoo submit STDOUT: %s", out) - logger.debug("cuckoo submit STDERR: %s", err) - - match = None - pattern_no = 0 - for pattern in self.job_id_patterns: - match = re.search(pattern, out) - if match is not None: - logger.debug('Pattern %d matched.' % pattern_no) - break + raise CuckooSubmitFailedException( + 'cuckoo submit returned a non-zero return code.') - pattern_no += 1 - + out, err = p.communicate() + logger.debug("cuckoo submit STDOUT: %s", out) + logger.debug("cuckoo submit STDERR: %s", err) + + match = None + pattern_no = 0 + for pattern in self.job_id_patterns: + match = re.search(pattern, out) if match is not None: - job_id = int(match.group(1)) - # thread-safe, no locking required, revisit if splitting into - # multiple operations - self.running_jobs[job_id] = sample - return job_id + logger.debug('Pattern %d matched.', pattern_no) + break + + pattern_no += 1 + + if match is not None: + job_id = int(match.group(1)) + self.register_running_job(job_id, sample) + return job_id - raise CuckooAnalysisFailedException( - 'Unable to extract job ID from given string %s' % out) + raise CuckooSubmitFailedException( + 'Unable to extract job ID from given string %s' % out) def get_report(self, job_id): path = os.path.join(self.cuckoo_storage, @@ -271,11 +305,9 @@ class CuckooApi(Cuckoo): task_id = response["task_id"] if task_id > 0: - # thread-safe, no locking required, revisit if splitting into - # multiple operations - self.running_jobs[task_id] = sample + self.register_running_job(task_id, sample) return task_id - raise CuckooAnalysisFailedException( + raise CuckooSubmitFailedException( 'Unable to extract job ID from given string %s' % response) def get_report(self, job_id): |