diff options
author | Michael Weiser <michael.weiser@gmx.de> | 2019-04-08 17:11:52 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-08 17:11:52 +0000 |
commit | 91fe84aea999d38cab12a4cf6338f29bb34a4c02 (patch) | |
tree | 3291a2abb244850e3e3b044e87f0ca98ab403f3a | |
parent | aaf0e1ae4b6e10398530affc95d3d9305e38f39a (diff) | |
parent | 203fd19cbbf82f8d7f652b179569aca08b6ba0e1 (diff) |
Merge pull request #73 from michaelweiser/encoding
Encoding
-rw-r--r-- | peekaboo/config.py | 6 | ||||
-rw-r--r-- | peekaboo/daemon.py | 6 | ||||
-rw-r--r-- | peekaboo/locale/de/LC_MESSAGES/peekaboo.mo | bin | 4173 -> 4173 bytes | |||
-rw-r--r-- | peekaboo/locale/de/LC_MESSAGES/peekaboo.po | 10 | ||||
-rw-r--r-- | peekaboo/queuing.py | 12 | ||||
-rw-r--r-- | peekaboo/ruleset/rules.py | 4 | ||||
-rw-r--r-- | peekaboo/sample.py | 19 | ||||
-rw-r--r-- | peekaboo/server.py | 8 | ||||
-rw-r--r-- | peekaboo/toolbox/cuckoo.py | 46 |
9 files changed, 68 insertions, 43 deletions
diff --git a/peekaboo/config.py b/peekaboo/config.py index e98b929..f99c398 100644 --- a/peekaboo/config.py +++ b/peekaboo/config.py @@ -247,7 +247,7 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes if not option.startswith('_'): settings[option] = value - return '<PeekabooConfig(%s)>' % str(settings) + return '<PeekabooConfig(%s)>' % settings __repr__ = __str__ @@ -333,7 +333,7 @@ class PeekabooRulesetConfig(object): return config.get('enabled', True) def __str__(self): - return str('<PeekabooRulesetConfiguration(filepath="%s", %s)>' % - (self.config_file, self.ruleset_config)) + return '<PeekabooRulesetConfiguration(filepath="%s", %s)>' % \ + (self.config_file, self.ruleset_config) __repr__ = __str__ diff --git a/peekaboo/daemon.py b/peekaboo/daemon.py index 736f883..bc77580 100644 --- a/peekaboo/daemon.py +++ b/peekaboo/daemon.py @@ -271,7 +271,11 @@ def run(): logger.debug('Installing report message translations') translation = gettext.translation(locale_domain, locale_dir, languages, fallback=True) - translation.install() + # python2's gettext needs to be told explicitly to return unicode strings + loc_kwargs = {} + if sys.version_info[0] < 3: + loc_kwargs = {'unicode': True} + translation.install(loc_kwargs) # establish a connection to the database try: diff --git a/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo b/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo Binary files differindex b0efe60..1af9ffb 100644 --- a/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo +++ b/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo diff --git a/peekaboo/locale/de/LC_MESSAGES/peekaboo.po b/peekaboo/locale/de/LC_MESSAGES/peekaboo.po index c4749fb..044406f 100644 --- a/peekaboo/locale/de/LC_MESSAGES/peekaboo.po +++ b/peekaboo/locale/de/LC_MESSAGES/peekaboo.po @@ -43,15 +43,15 @@ msgstr "Hallo das ist Peekaboo" #: peekaboo/server.py:176 #, fuzzy msgid "Error: Invalid JSON in request." -msgstr "FEHLER: Ungueltiges JSON." +msgstr "FEHLER: Ungültiges JSON." #: peekaboo/server.py:181 msgid "ERROR: Invalid data structure." -msgstr "FEHLER: Ungueltiges Datenformat." +msgstr "FEHLER: Ungültiges Datenformat." #: peekaboo/server.py:188 msgid "ERROR: Incomplete data structure." -msgstr "FEHLER: Unvollstaendige Datenstruktur." +msgstr "FEHLER: Unvollständige Datenstruktur." #: peekaboo/server.py:195 msgid "ERROR: Path does not exist or no permission to access it." @@ -111,7 +111,7 @@ msgstr "Datei ist dem System noch nicht bekannt" #: peekaboo/ruleset/rules.py:104 #, python-format msgid "Failure to determine sample file size: %s" -msgstr "Ermittlung der Dateigroesse fehlgeschlagen: %s" +msgstr "Ermittlung der Dateigröße fehlgeschlagen: %s" #: peekaboo/ruleset/rules.py:109 #, python-format @@ -158,7 +158,7 @@ msgstr "" #: peekaboo/ruleset/rules.py:239 msgid "Empty list of malicious signatures" -msgstr "Leere Liste schaedlicher Signaturen" +msgstr "Leere Liste schädlicher Signaturen" #: peekaboo/ruleset/rules.py:257 msgid "No signature suggesting malware detected" diff --git a/peekaboo/queuing.py b/peekaboo/queuing.py index ea21d97..e80cedf 100644 --- a/peekaboo/queuing.py +++ b/peekaboo/queuing.py @@ -100,7 +100,7 @@ class JobQueue: @raises Full: if the queue is full. """ sample_hash = sample.sha256sum - sample_str = str(sample) + sample_str = "%s" % sample duplicate = None cluster_duplicate = None resubmit = None @@ -180,7 +180,7 @@ class JobQueue: return False if locked: - sample_str = str(sample_duplicates[0]) + sample_str = "%s" % sample_duplicates[0] if self.duplicates.get(sample_hash) is not None: logger.error("Possible backlog corruption for sample " "%s! Please file a bug report. Trying to " @@ -232,12 +232,12 @@ class JobQueue: # already known and process them quickly now that the first # instance has gone through full analysis. Therefore we can ignore # them here. - if not self.duplicates.has_key(sample_hash): + if sample_hash not in self.duplicates: return # submit all samples which have accumulated in the backlog for s in self.duplicates[sample_hash]['duplicates']: - submitted_duplicates.append(str(s)) + submitted_duplicates.append("%s" % s) self.jobs.put(s, True, self.queue_timeout) sample = self.duplicates[sample_hash]['master'] @@ -246,7 +246,7 @@ class JobQueue: except PeekabooDatabaseError as dberr: logger.error(dberr) - sample_str = str(sample) + sample_str = "%s" % sample del self.duplicates[sample_hash] logger.debug("Cleared sample %s from in-flight list" % sample_str) @@ -294,7 +294,7 @@ class JobQueue: # wait for workers to end interval = 1 - for attempt in range(1, timeout / interval + 1): + for attempt in range(1, timeout // interval + 1): still_running = [] for worker in self.workers: if worker.running: diff --git a/peekaboo/ruleset/rules.py b/peekaboo/ruleset/rules.py index f112a70..e7eff77 100644 --- a/peekaboo/ruleset/rules.py +++ b/peekaboo/ruleset/rules.py @@ -159,7 +159,7 @@ class FileTypeOnGreylistRule(Rule): return self.result(Result.unknown, _("File type is not on the list of types to " - "analyse (%s)") % (str(sample.mimetypes)), + "analyse (%s)") % sample.mimetypes, False) @@ -248,7 +248,7 @@ class CuckooEvilSigRule(CuckooRule): # check if there is a "bad" signatures and return bad matched_bad_sigs = [] for sig in bad_sigs: - match = re.search(sig, str(sigs)) + match = re.search(sig, "\n".join(sigs)) if match: matched_bad_sigs.append(sig) diff --git a/peekaboo/sample.py b/peekaboo/sample.py index 7449e79..8b4b071 100644 --- a/peekaboo/sample.py +++ b/peekaboo/sample.py @@ -32,6 +32,9 @@ import shutil import string import logging import tempfile +# python 3's open with encoding parameter and implicit usage of the system +# locale-specified encoding +from builtins import open from datetime import datetime from peekaboo.toolbox.files import guess_mime_type_from_file_contents, \ guess_mime_type_from_filename @@ -295,8 +298,8 @@ class Sample(object): """ Add a rule result to the sample. This also adds a message about this to the report and updates the overall analysis result (so far). """ - logger.debug('Adding rule result %s' % str(res)) - self.__report.append(_("File \"%s\": %s") % (self.__filename, str(res))) + logger.debug('Adding rule result %s', res) + self.__report.append(_("File \"%s\": %s") % (self.__filename, res)) logger.debug("Current overall result: %s, new rule result: %s", self.__result, res.result) @@ -332,9 +335,9 @@ class Sample(object): # Peekaboo's report peekaboo_report = os.path.join(dump_dir, filename + '_report.txt') try: - with open(peekaboo_report, 'w+') as f: - f.write('\n'.join(self.__report)) - f.write('\n'.join(self.__internal_report)) + with open(peekaboo_report, 'w+') as pr_file: + pr_file.write('\n'.join(self.__report)) + pr_file.write('\n'.join(self.__internal_report)) except (OSError, IOError) as error: logger.error('Failure to write report file %s: %s', peekaboo_report, error) @@ -357,8 +360,10 @@ class Sample(object): cuckoo_report = os.path.join(dump_dir, filename + '_cuckoo_report.json') try: - with open(cuckoo_report, 'w+') as f: - json.dump(self.__cuckoo_report.raw, f, indent=1) + with open(cuckoo_report, 'wb+') as cr_json_file: + cr_json = json.dumps(self.__cuckoo_report.raw, + indent=1, ensure_ascii=True) + cr_json_file.write(cr_json.encode('ascii')) except (OSError, IOError) as error: logger.error('Failure to dump json report to %s: %s', cuckoo_report, error) diff --git a/peekaboo/server.py b/peekaboo/server.py index 1595d8d..ec86e80 100644 --- a/peekaboo/server.py +++ b/peekaboo/server.py @@ -184,7 +184,7 @@ class PeekabooStreamRequestHandler(socketserver.StreamRequestHandler): submitted = [] for part in parts: - if not part.has_key('full_name'): + if 'full_name' not in part: self.talk_back(_('ERROR: Incomplete data structure.')) logger.error('Incomplete data structure.') return None @@ -342,12 +342,14 @@ class PeekabooStreamRequestHandler(socketserver.StreamRequestHandler): @returns: True on successful sending of all messages, False on error of sending and None specifically if sending failed because the client closed the connection. """ - if isinstance(msgs, str): + if not isinstance(msgs, (list, tuple)): msgs = (msgs, ) for msg in msgs: try: - self.request.sendall('%s\n' % msg) + # FIXME: Hard-coded, arbitrary encoding since we have no + # clearly defined protocol here. + self.request.sendall(('%s\n' % msg).encode('utf-8')) except IOError as ioerror: if ioerror.errno == errno.EPIPE: logger.warning('Client closed connection on us: %s', diff --git a/peekaboo/toolbox/cuckoo.py b/peekaboo/toolbox/cuckoo.py index 9b2372e..d62fc85 100644 --- a/peekaboo/toolbox/cuckoo.py +++ b/peekaboo/toolbox/cuckoo.py @@ -26,6 +26,7 @@ import re import os +import locale import logging import json import subprocess @@ -115,9 +116,14 @@ class CuckooEmbed(Cuckoo): # cuckoo_submit is a list, make a copy as to not modify the # original value proc = self.cuckoo_submit.split(' ') + [sample.submit_path] + + # universal_newlines opens channels to child in text mode and + # returns strings instead of bytes in return which we do to avoid + # the need to handle decoding ourselves p = subprocess.Popen(proc, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, + universal_newlines=True) p.wait() except Exception as e: raise CuckooAnalysisFailedException(e) @@ -126,15 +132,13 @@ class CuckooEmbed(Cuckoo): raise CuckooAnalysisFailedException('cuckoo submit returned a non-zero return code.') else: out, err = p.communicate() - logger.debug("cuckoo submit STDOUT: %s" % out) - logger.debug("cuckoo submit STDERR: %s" % err) - - response = out.replace("\n", "") + logger.debug("cuckoo submit STDOUT: %s", out) + logger.debug("cuckoo submit STDERR: %s", err) match = None pattern_no = 0 for pattern in self.job_id_patterns: - match = re.search(pattern, response) + match = re.search(pattern, out) if match is not None: logger.debug('Pattern %d matched.' % pattern_no) break @@ -149,7 +153,7 @@ class CuckooEmbed(Cuckoo): return job_id raise CuckooAnalysisFailedException( - 'Unable to extract job ID from given string %s' % response) + 'Unable to extract job ID from given string %s' % out) def get_report(self, job_id): path = os.path.join(self.cuckoo_storage, @@ -321,6 +325,7 @@ class CuckooServer(protocol.ProcessProtocol): """ def __init__(self, cuckoo): self.cuckoo = cuckoo + self.encoding = locale.getpreferredencoding() def connectionMade(self): logger.info('Connected. Cuckoo PID: %s' % self.transport.pid) @@ -328,11 +333,19 @@ class CuckooServer(protocol.ProcessProtocol): def outReceived(self, data): """ on receiving output on STDOUT from Cuckoo """ - logger.debug('STDOUT %s' % str(data)) + # explicit decoding: The program is sending us stuff and because it's + # just stdout/stderr we have no defined protocol, no structure and no + # guaranteed encoding. Normally we'd tell popen to open in text mode + # which would automatically apply the system encoding. With Twisted + # there doesn't seem to be that option. But since it's our child, we + # can (hopefully) assume that it uses our locale settings. So we use + # the default encoding as returned by our interpreter. + logger.debug('STDOUT %s', data.decode(self.encoding)) def errReceived(self, data): """ on receiving output on STDERR from Cuckoo """ - logger.debug('STDERR %s' % str(data.replace('\n', ''))) + content = data.decode(self.encoding) + logger.debug('STDERR %s', content.replace('\n', '')) # # FILE SUBMITTED @@ -341,11 +354,12 @@ class CuckooServer(protocol.ProcessProtocol): # 2016-04-12 09:14:06,984 [lib.cuckoo.core.scheduler] INFO: Starting # analysis of FILE "cuckoo.png" (task #201, options "") # INFO: Starting analysis of FILE ".bashrc" (task #4, options "") - m = re.match('.*INFO: Starting analysis of FILE \"(.*)\" \(task #([0-9]*), options .*', data) + match = re.match(r'.*INFO: Starting analysis of FILE "(.*)" ' + r'\(task #([0-9]*), options .*', content) - if m: - logger.info("File submitted: task #%s, filename %s" % (m.group(2), - m.group(1))) + if match: + logger.info("File submitted: task #%s, filename %s", + match.group(2), match.group(1)) # # ANALYSIS DONE @@ -353,7 +367,7 @@ class CuckooServer(protocol.ProcessProtocol): # 2016-04-12 09:25:27,824 [lib.cuckoo.core.scheduler] INFO: Task #202: # reports generation completed ... m = re.match(".*INFO: Task #([0-9]*): reports generation completed.*", - data) + content) if m: job_id = int(m.group(1)) self.cuckoo.resubmit_with_report(job_id) @@ -371,11 +385,11 @@ class CuckooServer(protocol.ProcessProtocol): self.cuckoo.shut_down(1) def processExited(self, reason): - logger.info("Cuckoo exited with status %s" % str(reason.value.exitCode)) + logger.info("Cuckoo exited with status %s", reason.value.exitCode) self.cuckoo.shut_down() def processEnded(self, reason): - logger.info("Cuckoo ended with status %s" % str(reason.value.exitCode)) + logger.info("Cuckoo ended with status %s", reason.value.exitCode) self.cuckoo.shut_down() |