diff options
author | Michael Weiser <michael.weiser@gmx.de> | 2019-02-14 16:28:28 +0000 |
---|---|---|
committer | Michael Weiser <michael.weiser@gmx.de> | 2019-02-14 16:35:53 +0000 |
commit | 7412e890c0207d8c25568f7cad25f9358e7fea82 (patch) | |
tree | 8bd12fbd40a964c511270e625a4365722574fef2 | |
parent | 005a4e174d04a107b5c93b58d613edc0bfff1f67 (diff) |
Make job hash regex greedy
Since we only ever want to extract an identifier from the input file
path, the rest of the regexp only needs to provide anchors inside the
path to identify the job hash. So it can be greedy which allows it to be
shorter and less error-prone in configuration.
-rw-r--r-- | peekaboo.conf.sample | 2 | ||||
-rw-r--r-- | peekaboo/config.py | 2 | ||||
-rw-r--r-- | peekaboo/sample.py | 2 | ||||
-rw-r--r-- | test.py | 19 |
4 files changed, 17 insertions, 8 deletions
diff --git a/peekaboo.conf.sample b/peekaboo.conf.sample index 9d29653..7c9fea1 100644 --- a/peekaboo.conf.sample +++ b/peekaboo.conf.sample @@ -12,7 +12,7 @@ #interpreter : /usr/bin/python -u #worker_count : 3 #sample_base_dir : /tmp -#job_hash_regex : /var/lib/amavis/tmp/([^/]+)/parts.* +#job_hash_regex : /amavis/tmp/([^/]+)/parts/ # 'yes' or 'no' to use Peekaboo's debug module, which allows # additional code execution at runtime. #use_debug_module : no diff --git a/peekaboo/config.py b/peekaboo/config.py index 903b565..f921846 100644 --- a/peekaboo/config.py +++ b/peekaboo/config.py @@ -79,7 +79,7 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes self.interpreter = '/usr/bin/python -u' self.worker_count = 3 self.sample_base_dir = '/tmp' - self.job_hash_regex = '/var/lib/amavis/tmp/([^/]+)/parts.*' + self.job_hash_regex = '/amavis/tmp/([^/]+)/parts/' self.use_debug_module = False self.keep_mail_data = False self.db_url = 'sqlite:////var/lib/peekaboo/peekaboo.db' diff --git a/peekaboo/sample.py b/peekaboo/sample.py index c46d13c..2f5bbfb 100644 --- a/peekaboo/sample.py +++ b/peekaboo/sample.py @@ -266,7 +266,7 @@ class Sample(object): return job_hash def get_job_hash(self): - job_hash = re.sub(self.__job_hash_regex, r'\1', + job_hash = re.sub('.*%s.*' % self.__job_hash_regex, r'\1', self.__path) if job_hash == self.__path: # regex did not match. @@ -83,7 +83,7 @@ class TestDefaultConfig(TestConfig): self.assertEqual(self.config.worker_count, 3) self.assertEqual(self.config.sample_base_dir, '/tmp') self.assertEqual( - self.config.job_hash_regex, '/var/lib/amavis/tmp/([^/]+)/parts.*') + self.config.job_hash_regex, '/amavis/tmp/([^/]+)/parts/') self.assertEqual(self.config.use_debug_module, False) self.assertEqual(self.config.keep_mail_data, False) self.assertEqual( @@ -299,7 +299,7 @@ class PeekabooDummyConfig(object): """ A dummy configuration for the test cases. """ def __init__(self): """ Initialize dummy configuration """ - self.job_hash_regex = r'/var/lib/amavis/tmp/([^/]+)/parts.*' + self.job_hash_regex = r'/amavis/tmp/([^/]+)/parts/' self.sample_base_dir = '/tmp' def get(self, option, default): @@ -488,11 +488,20 @@ class TestSample(unittest.TestCase): def test_job_hash_regex(self): """ Test extraction of the job hash from the working directory path. """ - path_with_job_hash = '/var/lib/amavis/tmp/amavis-20170831T132736-07759-iSI0rJ4b/parts' + job_hash = 'amavis-20170831T132736-07759-iSI0rJ4b' + path_with_job_hash = '/d/var/lib/amavis/tmp/%s/parts/file' % job_hash sample = self.factory.make_sample(path_with_job_hash) - job_hash = sample.get_job_hash() - self.assertEqual(job_hash, 'amavis-20170831T132736-07759-iSI0rJ4b', + self.assertEqual(job_hash, sample.get_job_hash(), 'Job hash regex is not working') + + legacy_factory = SampleFactory( + cuckoo=None, base_dir=self.conf.sample_base_dir, + job_hash_regex=r'/var/lib/amavis/tmp/([^/]+)/parts.*', + keep_mail_data=False) + sample = legacy_factory.make_sample(path_with_job_hash) + self.assertEqual(job_hash, sample.get_job_hash(), + 'Job hash regex is not working') + job_hash = self.sample.get_job_hash() self.assertIn('peekaboo-run_analysis', job_hash) |