diff options
author | Sebastian Deiss <sebastian.deiss@atos.net> | 2018-06-28 11:24:32 +0200 |
---|---|---|
committer | Sebastian Deiss <sebastian.deiss@atos.net> | 2018-06-28 11:24:32 +0200 |
commit | 257d0acf559584f2bbeac173ecfd789dbd2e3a8f (patch) | |
tree | fa61ef698c1f94025edbffdc7b7fa80c1a15b799 | |
parent | eb8033e109ff61627e02309499c2478b43e96389 (diff) |
Improve MIME type determination by using multiple sources
-rw-r--r-- | peekaboo/ruleset/rules.py | 4 | ||||
-rw-r--r-- | peekaboo/sample.py | 52 | ||||
-rw-r--r-- | peekaboo/toolbox/files.py | 12 | ||||
-rw-r--r-- | test.py | 2 |
4 files changed, 43 insertions, 27 deletions
diff --git a/peekaboo/ruleset/rules.py b/peekaboo/ruleset/rules.py index adf154e..607a48c 100644 --- a/peekaboo/ruleset/rules.py +++ b/peekaboo/ruleset/rules.py @@ -80,7 +80,7 @@ def file_type_on_whitelist(config, s): whitelist = config['file_type_on_whitelist']['whitelist'] - if s.mimetype in whitelist: + if set(s.mimetypes).issubset(set(whitelist)): return RuleResult(position, result=Result.ignored, reason="Dateityp ist auf Whitelist", @@ -99,7 +99,7 @@ def file_type_on_greylist(config, s): greylist = config['file_type_on_greylist']['greylist'] - if s.mimetype in greylist: + if set(s.mimetypes).issubset(set(greylist)): return RuleResult(position, result=Result.unknown, reason="Dateityp ist auf der Liste der zu analysiserenden Typen", diff --git a/peekaboo/sample.py b/peekaboo/sample.py index 68209d0..1a8b623 100644 --- a/peekaboo/sample.py +++ b/peekaboo/sample.py @@ -34,7 +34,8 @@ from peekaboo.config import get_config from peekaboo.exceptions import CuckooReportPendingException, \ CuckooAnalysisFailedException from peekaboo.toolbox.sampletools import SampleMetaInfo, ConnectionMap, next_job_hash -from peekaboo.toolbox.files import chown2me, guess_mime_type_from_file_contents +from peekaboo.toolbox.files import chown2me, guess_mime_type_from_file_contents, \ + guess_mime_type_from_filename from peekaboo.toolbox.ms_office import has_office_macros from peekaboo.toolbox.cuckoo import submit_to_cuckoo import peekaboo.ruleset as ruleset @@ -298,14 +299,14 @@ class Sample(object): return self.get_attr('file_extension') @property - def mimetype(self): + def mimetypes(self): """ Can not be cached (hard to determine if known/complete). determine mime on original p[0-9]* file later result will be "inode/symlink" """ - mime_type = None + mime_types = [] smime = { 'p7s': [ @@ -321,32 +322,35 @@ class Sample(object): declared_mt = self.__meta_info.get_mime_type() if declared_mt is not None: logger.debug('Sample declared as "%s"' % declared_mt) - mime_type = declared_mt + mime_types.append(declared_mt) except Exception as e: logger.exception(e) + declared_mt = None if self.meta_info_loaded: - logger.error('Cannot get mime type from meta info although meta info is loaded.') - - detected_mime_type = guess_mime_type_from_file_contents(self.__path) - if detected_mime_type != mime_type: - logger.debug( - 'Detected MIME type does not match declared MIME Type: declared: %s, detected: %s.' - % (mime_type, detected_mime_type) - ) - # check if the sample is an smime signature (smime.p7s) - # If so, don't overwrite the MIME type since we do not want to analyse S/MIME signatures. - try: - declared_filename = self.get_attr('meta_info_name_declared') - except KeyError: - declared_filename = self.__filename - if declared_filename == 'smime.p7s' and mime_type in smime['p7s']: - logger.info('Using declared MIME type over detected one for S/MIME signatures.') - else: - logger.debug('Overwriting declared MIME Type with "%s"' % detected_mime_type) - mime_type = detected_mime_type + logger.error('Cannot get MIME type from meta info although meta info is loaded.') + + try: + declared_filename = self.get_attr('meta_info_name_declared') + except KeyError: + declared_filename = self.__filename + + content_based_mime_type = guess_mime_type_from_file_contents(self.__path) + if content_based_mime_type is not None and content_based_mime_type not in mime_types: + mime_types.append(content_based_mime_type) + + name_based_mime_type = guess_mime_type_from_filename(declared_filename) + if name_based_mime_type is not None and name_based_mime_type not in mime_types: + mime_types.append(name_based_mime_type) + + logger.debug('Determined MIME Types: %s' % mime_types) + # check if the sample is an S/MIME signature (smime.p7s) + # If so, don't overwrite the MIME type since we do not want to analyse S/MIME signatures. + if declared_filename == 'smime.p7s' and declared_mt in smime['p7s']: + logger.info('S/MIME signature detected. Using declared MIME type over detected ones.') + mime_types = [declared_mt] if not self.has_attr('mimetypes'): - self.set_attr('mimetypes', mime_type) + self.set_attr('mimetypes', mime_types) return self.get_attr('mimetypes') diff --git a/peekaboo/toolbox/files.py b/peekaboo/toolbox/files.py index 4896474..1bb6471 100644 --- a/peekaboo/toolbox/files.py +++ b/peekaboo/toolbox/files.py @@ -27,6 +27,7 @@ import logging import subprocess import magic +import mimetypes from peekaboo.config import get_config @@ -51,3 +52,14 @@ def guess_mime_type_from_file_contents(file_path): mt = magic.from_file(file_path, mime=True) if mt: return mt + + +def guess_mime_type_from_filename(file_path): + """ Guess the type of a file based on its filename or URL. """ + if not mimetypes.inited: + mimetypes.init() + mimetypes.add_type('application/javascript', '.jse') + + mt = mimetypes.guess_type(file_path)[0] + if mt: + return mt @@ -171,7 +171,7 @@ class TestSample(unittest.TestCase): def test_sample_attributes(self): self.assertEqual(self.sample.get_filename(), 'test.py') self.assertEqual(self.sample.file_extension, 'py') - self.assertTrue(self.sample.mimetype, 'text/x-python') + self.assertTrue(set(['text/x-python']).issubset(set(self.sample.mimetypes))) self.assertIsNotNone(self.sample.sha256sum) self.assertEqual(self.sample.job_id, -1) self.assertEqual(self.sample.get_result(), Result.unchecked) |