summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Weiser <michael.weiser@gmx.de>2019-04-08 17:11:52 +0000
committerGitHub <noreply@github.com>2019-04-08 17:11:52 +0000
commit91fe84aea999d38cab12a4cf6338f29bb34a4c02 (patch)
tree3291a2abb244850e3e3b044e87f0ca98ab403f3a
parentaaf0e1ae4b6e10398530affc95d3d9305e38f39a (diff)
parent203fd19cbbf82f8d7f652b179569aca08b6ba0e1 (diff)
Merge pull request #73 from michaelweiser/encoding
Encoding
-rw-r--r--peekaboo/config.py6
-rw-r--r--peekaboo/daemon.py6
-rw-r--r--peekaboo/locale/de/LC_MESSAGES/peekaboo.mobin4173 -> 4173 bytes
-rw-r--r--peekaboo/locale/de/LC_MESSAGES/peekaboo.po10
-rw-r--r--peekaboo/queuing.py12
-rw-r--r--peekaboo/ruleset/rules.py4
-rw-r--r--peekaboo/sample.py19
-rw-r--r--peekaboo/server.py8
-rw-r--r--peekaboo/toolbox/cuckoo.py46
9 files changed, 68 insertions, 43 deletions
diff --git a/peekaboo/config.py b/peekaboo/config.py
index e98b929..f99c398 100644
--- a/peekaboo/config.py
+++ b/peekaboo/config.py
@@ -247,7 +247,7 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes
if not option.startswith('_'):
settings[option] = value
- return '<PeekabooConfig(%s)>' % str(settings)
+ return '<PeekabooConfig(%s)>' % settings
__repr__ = __str__
@@ -333,7 +333,7 @@ class PeekabooRulesetConfig(object):
return config.get('enabled', True)
def __str__(self):
- return str('<PeekabooRulesetConfiguration(filepath="%s", %s)>' %
- (self.config_file, self.ruleset_config))
+ return '<PeekabooRulesetConfiguration(filepath="%s", %s)>' % \
+ (self.config_file, self.ruleset_config)
__repr__ = __str__
diff --git a/peekaboo/daemon.py b/peekaboo/daemon.py
index 736f883..bc77580 100644
--- a/peekaboo/daemon.py
+++ b/peekaboo/daemon.py
@@ -271,7 +271,11 @@ def run():
logger.debug('Installing report message translations')
translation = gettext.translation(locale_domain, locale_dir, languages,
fallback=True)
- translation.install()
+ # python2's gettext needs to be told explicitly to return unicode strings
+ loc_kwargs = {}
+ if sys.version_info[0] < 3:
+ loc_kwargs = {'unicode': True}
+ translation.install(loc_kwargs)
# establish a connection to the database
try:
diff --git a/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo b/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo
index b0efe60..1af9ffb 100644
--- a/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo
+++ b/peekaboo/locale/de/LC_MESSAGES/peekaboo.mo
Binary files differ
diff --git a/peekaboo/locale/de/LC_MESSAGES/peekaboo.po b/peekaboo/locale/de/LC_MESSAGES/peekaboo.po
index c4749fb..044406f 100644
--- a/peekaboo/locale/de/LC_MESSAGES/peekaboo.po
+++ b/peekaboo/locale/de/LC_MESSAGES/peekaboo.po
@@ -43,15 +43,15 @@ msgstr "Hallo das ist Peekaboo"
#: peekaboo/server.py:176
#, fuzzy
msgid "Error: Invalid JSON in request."
-msgstr "FEHLER: Ungueltiges JSON."
+msgstr "FEHLER: Ungültiges JSON."
#: peekaboo/server.py:181
msgid "ERROR: Invalid data structure."
-msgstr "FEHLER: Ungueltiges Datenformat."
+msgstr "FEHLER: Ungültiges Datenformat."
#: peekaboo/server.py:188
msgid "ERROR: Incomplete data structure."
-msgstr "FEHLER: Unvollstaendige Datenstruktur."
+msgstr "FEHLER: Unvollständige Datenstruktur."
#: peekaboo/server.py:195
msgid "ERROR: Path does not exist or no permission to access it."
@@ -111,7 +111,7 @@ msgstr "Datei ist dem System noch nicht bekannt"
#: peekaboo/ruleset/rules.py:104
#, python-format
msgid "Failure to determine sample file size: %s"
-msgstr "Ermittlung der Dateigroesse fehlgeschlagen: %s"
+msgstr "Ermittlung der Dateigröße fehlgeschlagen: %s"
#: peekaboo/ruleset/rules.py:109
#, python-format
@@ -158,7 +158,7 @@ msgstr ""
#: peekaboo/ruleset/rules.py:239
msgid "Empty list of malicious signatures"
-msgstr "Leere Liste schaedlicher Signaturen"
+msgstr "Leere Liste schädlicher Signaturen"
#: peekaboo/ruleset/rules.py:257
msgid "No signature suggesting malware detected"
diff --git a/peekaboo/queuing.py b/peekaboo/queuing.py
index ea21d97..e80cedf 100644
--- a/peekaboo/queuing.py
+++ b/peekaboo/queuing.py
@@ -100,7 +100,7 @@ class JobQueue:
@raises Full: if the queue is full.
"""
sample_hash = sample.sha256sum
- sample_str = str(sample)
+ sample_str = "%s" % sample
duplicate = None
cluster_duplicate = None
resubmit = None
@@ -180,7 +180,7 @@ class JobQueue:
return False
if locked:
- sample_str = str(sample_duplicates[0])
+ sample_str = "%s" % sample_duplicates[0]
if self.duplicates.get(sample_hash) is not None:
logger.error("Possible backlog corruption for sample "
"%s! Please file a bug report. Trying to "
@@ -232,12 +232,12 @@ class JobQueue:
# already known and process them quickly now that the first
# instance has gone through full analysis. Therefore we can ignore
# them here.
- if not self.duplicates.has_key(sample_hash):
+ if sample_hash not in self.duplicates:
return
# submit all samples which have accumulated in the backlog
for s in self.duplicates[sample_hash]['duplicates']:
- submitted_duplicates.append(str(s))
+ submitted_duplicates.append("%s" % s)
self.jobs.put(s, True, self.queue_timeout)
sample = self.duplicates[sample_hash]['master']
@@ -246,7 +246,7 @@ class JobQueue:
except PeekabooDatabaseError as dberr:
logger.error(dberr)
- sample_str = str(sample)
+ sample_str = "%s" % sample
del self.duplicates[sample_hash]
logger.debug("Cleared sample %s from in-flight list" % sample_str)
@@ -294,7 +294,7 @@ class JobQueue:
# wait for workers to end
interval = 1
- for attempt in range(1, timeout / interval + 1):
+ for attempt in range(1, timeout // interval + 1):
still_running = []
for worker in self.workers:
if worker.running:
diff --git a/peekaboo/ruleset/rules.py b/peekaboo/ruleset/rules.py
index f112a70..e7eff77 100644
--- a/peekaboo/ruleset/rules.py
+++ b/peekaboo/ruleset/rules.py
@@ -159,7 +159,7 @@ class FileTypeOnGreylistRule(Rule):
return self.result(Result.unknown,
_("File type is not on the list of types to "
- "analyse (%s)") % (str(sample.mimetypes)),
+ "analyse (%s)") % sample.mimetypes,
False)
@@ -248,7 +248,7 @@ class CuckooEvilSigRule(CuckooRule):
# check if there is a "bad" signatures and return bad
matched_bad_sigs = []
for sig in bad_sigs:
- match = re.search(sig, str(sigs))
+ match = re.search(sig, "\n".join(sigs))
if match:
matched_bad_sigs.append(sig)
diff --git a/peekaboo/sample.py b/peekaboo/sample.py
index 7449e79..8b4b071 100644
--- a/peekaboo/sample.py
+++ b/peekaboo/sample.py
@@ -32,6 +32,9 @@ import shutil
import string
import logging
import tempfile
+# python 3's open with encoding parameter and implicit usage of the system
+# locale-specified encoding
+from builtins import open
from datetime import datetime
from peekaboo.toolbox.files import guess_mime_type_from_file_contents, \
guess_mime_type_from_filename
@@ -295,8 +298,8 @@ class Sample(object):
""" Add a rule result to the sample. This also adds a message about
this to the report and updates the overall analysis result (so far).
"""
- logger.debug('Adding rule result %s' % str(res))
- self.__report.append(_("File \"%s\": %s") % (self.__filename, str(res)))
+ logger.debug('Adding rule result %s', res)
+ self.__report.append(_("File \"%s\": %s") % (self.__filename, res))
logger.debug("Current overall result: %s, new rule result: %s",
self.__result, res.result)
@@ -332,9 +335,9 @@ class Sample(object):
# Peekaboo's report
peekaboo_report = os.path.join(dump_dir, filename + '_report.txt')
try:
- with open(peekaboo_report, 'w+') as f:
- f.write('\n'.join(self.__report))
- f.write('\n'.join(self.__internal_report))
+ with open(peekaboo_report, 'w+') as pr_file:
+ pr_file.write('\n'.join(self.__report))
+ pr_file.write('\n'.join(self.__internal_report))
except (OSError, IOError) as error:
logger.error('Failure to write report file %s: %s',
peekaboo_report, error)
@@ -357,8 +360,10 @@ class Sample(object):
cuckoo_report = os.path.join(dump_dir,
filename + '_cuckoo_report.json')
try:
- with open(cuckoo_report, 'w+') as f:
- json.dump(self.__cuckoo_report.raw, f, indent=1)
+ with open(cuckoo_report, 'wb+') as cr_json_file:
+ cr_json = json.dumps(self.__cuckoo_report.raw,
+ indent=1, ensure_ascii=True)
+ cr_json_file.write(cr_json.encode('ascii'))
except (OSError, IOError) as error:
logger.error('Failure to dump json report to %s: %s',
cuckoo_report, error)
diff --git a/peekaboo/server.py b/peekaboo/server.py
index 1595d8d..ec86e80 100644
--- a/peekaboo/server.py
+++ b/peekaboo/server.py
@@ -184,7 +184,7 @@ class PeekabooStreamRequestHandler(socketserver.StreamRequestHandler):
submitted = []
for part in parts:
- if not part.has_key('full_name'):
+ if 'full_name' not in part:
self.talk_back(_('ERROR: Incomplete data structure.'))
logger.error('Incomplete data structure.')
return None
@@ -342,12 +342,14 @@ class PeekabooStreamRequestHandler(socketserver.StreamRequestHandler):
@returns: True on successful sending of all messages, False on error of
sending and None specifically if sending failed because the
client closed the connection. """
- if isinstance(msgs, str):
+ if not isinstance(msgs, (list, tuple)):
msgs = (msgs, )
for msg in msgs:
try:
- self.request.sendall('%s\n' % msg)
+ # FIXME: Hard-coded, arbitrary encoding since we have no
+ # clearly defined protocol here.
+ self.request.sendall(('%s\n' % msg).encode('utf-8'))
except IOError as ioerror:
if ioerror.errno == errno.EPIPE:
logger.warning('Client closed connection on us: %s',
diff --git a/peekaboo/toolbox/cuckoo.py b/peekaboo/toolbox/cuckoo.py
index 9b2372e..d62fc85 100644
--- a/peekaboo/toolbox/cuckoo.py
+++ b/peekaboo/toolbox/cuckoo.py
@@ -26,6 +26,7 @@
import re
import os
+import locale
import logging
import json
import subprocess
@@ -115,9 +116,14 @@ class CuckooEmbed(Cuckoo):
# cuckoo_submit is a list, make a copy as to not modify the
# original value
proc = self.cuckoo_submit.split(' ') + [sample.submit_path]
+
+ # universal_newlines opens channels to child in text mode and
+ # returns strings instead of bytes in return which we do to avoid
+ # the need to handle decoding ourselves
p = subprocess.Popen(proc,
stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ stderr=subprocess.PIPE,
+ universal_newlines=True)
p.wait()
except Exception as e:
raise CuckooAnalysisFailedException(e)
@@ -126,15 +132,13 @@ class CuckooEmbed(Cuckoo):
raise CuckooAnalysisFailedException('cuckoo submit returned a non-zero return code.')
else:
out, err = p.communicate()
- logger.debug("cuckoo submit STDOUT: %s" % out)
- logger.debug("cuckoo submit STDERR: %s" % err)
-
- response = out.replace("\n", "")
+ logger.debug("cuckoo submit STDOUT: %s", out)
+ logger.debug("cuckoo submit STDERR: %s", err)
match = None
pattern_no = 0
for pattern in self.job_id_patterns:
- match = re.search(pattern, response)
+ match = re.search(pattern, out)
if match is not None:
logger.debug('Pattern %d matched.' % pattern_no)
break
@@ -149,7 +153,7 @@ class CuckooEmbed(Cuckoo):
return job_id
raise CuckooAnalysisFailedException(
- 'Unable to extract job ID from given string %s' % response)
+ 'Unable to extract job ID from given string %s' % out)
def get_report(self, job_id):
path = os.path.join(self.cuckoo_storage,
@@ -321,6 +325,7 @@ class CuckooServer(protocol.ProcessProtocol):
"""
def __init__(self, cuckoo):
self.cuckoo = cuckoo
+ self.encoding = locale.getpreferredencoding()
def connectionMade(self):
logger.info('Connected. Cuckoo PID: %s' % self.transport.pid)
@@ -328,11 +333,19 @@ class CuckooServer(protocol.ProcessProtocol):
def outReceived(self, data):
""" on receiving output on STDOUT from Cuckoo """
- logger.debug('STDOUT %s' % str(data))
+ # explicit decoding: The program is sending us stuff and because it's
+ # just stdout/stderr we have no defined protocol, no structure and no
+ # guaranteed encoding. Normally we'd tell popen to open in text mode
+ # which would automatically apply the system encoding. With Twisted
+ # there doesn't seem to be that option. But since it's our child, we
+ # can (hopefully) assume that it uses our locale settings. So we use
+ # the default encoding as returned by our interpreter.
+ logger.debug('STDOUT %s', data.decode(self.encoding))
def errReceived(self, data):
""" on receiving output on STDERR from Cuckoo """
- logger.debug('STDERR %s' % str(data.replace('\n', '')))
+ content = data.decode(self.encoding)
+ logger.debug('STDERR %s', content.replace('\n', ''))
#
# FILE SUBMITTED
@@ -341,11 +354,12 @@ class CuckooServer(protocol.ProcessProtocol):
# 2016-04-12 09:14:06,984 [lib.cuckoo.core.scheduler] INFO: Starting
# analysis of FILE "cuckoo.png" (task #201, options "")
# INFO: Starting analysis of FILE ".bashrc" (task #4, options "")
- m = re.match('.*INFO: Starting analysis of FILE \"(.*)\" \(task #([0-9]*), options .*', data)
+ match = re.match(r'.*INFO: Starting analysis of FILE "(.*)" '
+ r'\(task #([0-9]*), options .*', content)
- if m:
- logger.info("File submitted: task #%s, filename %s" % (m.group(2),
- m.group(1)))
+ if match:
+ logger.info("File submitted: task #%s, filename %s",
+ match.group(2), match.group(1))
#
# ANALYSIS DONE
@@ -353,7 +367,7 @@ class CuckooServer(protocol.ProcessProtocol):
# 2016-04-12 09:25:27,824 [lib.cuckoo.core.scheduler] INFO: Task #202:
# reports generation completed ...
m = re.match(".*INFO: Task #([0-9]*): reports generation completed.*",
- data)
+ content)
if m:
job_id = int(m.group(1))
self.cuckoo.resubmit_with_report(job_id)
@@ -371,11 +385,11 @@ class CuckooServer(protocol.ProcessProtocol):
self.cuckoo.shut_down(1)
def processExited(self, reason):
- logger.info("Cuckoo exited with status %s" % str(reason.value.exitCode))
+ logger.info("Cuckoo exited with status %s", reason.value.exitCode)
self.cuckoo.shut_down()
def processEnded(self, reason):
- logger.info("Cuckoo ended with status %s" % str(reason.value.exitCode))
+ logger.info("Cuckoo ended with status %s", reason.value.exitCode)
self.cuckoo.shut_down()