summaryrefslogtreecommitdiffstats
path: root/peekaboo
diff options
context:
space:
mode:
authorMichael Weiser <michael.weiser@gmx.de>2019-04-08 14:25:07 +0000
committerMichael Weiser <michael.weiser@gmx.de>2019-04-08 16:48:29 +0000
commit2c773c853dd9bfd098113e031bc513d3e1c8e31f (patch)
treed2995edb902a3da80ac0b05d6b53c04632705279 /peekaboo
parent1120b049cde0aa0d673fb7b54dae83d3e13da329 (diff)
Avoid encoding errors using encoding-aware open
Writing the report file in dump_processing_info() would throw exceptions such as: UnicodeEncodeError: 'ascii' codec can't encode character u'\xfc' in position 37: ordinal not in range(128) if anything in the report was not plain ascii, such as an input file name containing umlauts. Solve this by importing the open function from the futures builtins module, i.e. use python3 semantics where files implicitly use the system locale for input and output encoding. write() then accepts whta is unicode() in python2 and plain strings in python3 and automatically encodes them for the output file's encoding. As a side-effect this throws up a problem where the json module insists of returning a bytearray (or rather str() in python2) which write will not accept with: TypeError: write() argument 1 must be unicode, not str This can be side-stepped nicely by opening the JSON dump file with flag 'b'. But this then fails on python3 because that one expectes a byte-like object for binary output which we produce by explicitly encoding to 'ascii'. This works because json by default ensures its output is ascii clean. But for paranoia's sake we add the ensure_ascii option anyway. This workaround (including the 'b' on open) can go away when we stop supporting python2.
Diffstat (limited to 'peekaboo')
-rw-r--r--peekaboo/sample.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/peekaboo/sample.py b/peekaboo/sample.py
index f6ae9b6..8b4b071 100644
--- a/peekaboo/sample.py
+++ b/peekaboo/sample.py
@@ -32,6 +32,9 @@ import shutil
import string
import logging
import tempfile
+# python 3's open with encoding parameter and implicit usage of the system
+# locale-specified encoding
+from builtins import open
from datetime import datetime
from peekaboo.toolbox.files import guess_mime_type_from_file_contents, \
guess_mime_type_from_filename
@@ -332,9 +335,9 @@ class Sample(object):
# Peekaboo's report
peekaboo_report = os.path.join(dump_dir, filename + '_report.txt')
try:
- with open(peekaboo_report, 'w+') as f:
- f.write('\n'.join(self.__report))
- f.write('\n'.join(self.__internal_report))
+ with open(peekaboo_report, 'w+') as pr_file:
+ pr_file.write('\n'.join(self.__report))
+ pr_file.write('\n'.join(self.__internal_report))
except (OSError, IOError) as error:
logger.error('Failure to write report file %s: %s',
peekaboo_report, error)
@@ -357,8 +360,10 @@ class Sample(object):
cuckoo_report = os.path.join(dump_dir,
filename + '_cuckoo_report.json')
try:
- with open(cuckoo_report, 'w+') as f:
- json.dump(self.__cuckoo_report.raw, f, indent=1)
+ with open(cuckoo_report, 'wb+') as cr_json_file:
+ cr_json = json.dumps(self.__cuckoo_report.raw,
+ indent=1, ensure_ascii=True)
+ cr_json_file.write(cr_json.encode('ascii'))
except (OSError, IOError) as error:
logger.error('Failure to dump json report to %s: %s',
cuckoo_report, error)