summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Weiser <michael.weiser@gmx.de>2019-02-14 16:28:28 +0000
committerMichael Weiser <michael.weiser@gmx.de>2019-02-14 16:35:53 +0000
commit7412e890c0207d8c25568f7cad25f9358e7fea82 (patch)
tree8bd12fbd40a964c511270e625a4365722574fef2
parent005a4e174d04a107b5c93b58d613edc0bfff1f67 (diff)
Make job hash regex greedy
Since we only ever want to extract an identifier from the input file path, the rest of the regexp only needs to provide anchors inside the path to identify the job hash. So it can be greedy which allows it to be shorter and less error-prone in configuration.
-rw-r--r--peekaboo.conf.sample2
-rw-r--r--peekaboo/config.py2
-rw-r--r--peekaboo/sample.py2
-rw-r--r--test.py19
4 files changed, 17 insertions, 8 deletions
diff --git a/peekaboo.conf.sample b/peekaboo.conf.sample
index 9d29653..7c9fea1 100644
--- a/peekaboo.conf.sample
+++ b/peekaboo.conf.sample
@@ -12,7 +12,7 @@
#interpreter : /usr/bin/python -u
#worker_count : 3
#sample_base_dir : /tmp
-#job_hash_regex : /var/lib/amavis/tmp/([^/]+)/parts.*
+#job_hash_regex : /amavis/tmp/([^/]+)/parts/
# 'yes' or 'no' to use Peekaboo's debug module, which allows
# additional code execution at runtime.
#use_debug_module : no
diff --git a/peekaboo/config.py b/peekaboo/config.py
index 903b565..f921846 100644
--- a/peekaboo/config.py
+++ b/peekaboo/config.py
@@ -79,7 +79,7 @@ class PeekabooConfig(object): # pylint: disable=too-many-instance-attributes
self.interpreter = '/usr/bin/python -u'
self.worker_count = 3
self.sample_base_dir = '/tmp'
- self.job_hash_regex = '/var/lib/amavis/tmp/([^/]+)/parts.*'
+ self.job_hash_regex = '/amavis/tmp/([^/]+)/parts/'
self.use_debug_module = False
self.keep_mail_data = False
self.db_url = 'sqlite:////var/lib/peekaboo/peekaboo.db'
diff --git a/peekaboo/sample.py b/peekaboo/sample.py
index c46d13c..2f5bbfb 100644
--- a/peekaboo/sample.py
+++ b/peekaboo/sample.py
@@ -266,7 +266,7 @@ class Sample(object):
return job_hash
def get_job_hash(self):
- job_hash = re.sub(self.__job_hash_regex, r'\1',
+ job_hash = re.sub('.*%s.*' % self.__job_hash_regex, r'\1',
self.__path)
if job_hash == self.__path:
# regex did not match.
diff --git a/test.py b/test.py
index a589297..be9d814 100644
--- a/test.py
+++ b/test.py
@@ -83,7 +83,7 @@ class TestDefaultConfig(TestConfig):
self.assertEqual(self.config.worker_count, 3)
self.assertEqual(self.config.sample_base_dir, '/tmp')
self.assertEqual(
- self.config.job_hash_regex, '/var/lib/amavis/tmp/([^/]+)/parts.*')
+ self.config.job_hash_regex, '/amavis/tmp/([^/]+)/parts/')
self.assertEqual(self.config.use_debug_module, False)
self.assertEqual(self.config.keep_mail_data, False)
self.assertEqual(
@@ -299,7 +299,7 @@ class PeekabooDummyConfig(object):
""" A dummy configuration for the test cases. """
def __init__(self):
""" Initialize dummy configuration """
- self.job_hash_regex = r'/var/lib/amavis/tmp/([^/]+)/parts.*'
+ self.job_hash_regex = r'/amavis/tmp/([^/]+)/parts/'
self.sample_base_dir = '/tmp'
def get(self, option, default):
@@ -488,11 +488,20 @@ class TestSample(unittest.TestCase):
def test_job_hash_regex(self):
""" Test extraction of the job hash from the working directory path.
"""
- path_with_job_hash = '/var/lib/amavis/tmp/amavis-20170831T132736-07759-iSI0rJ4b/parts'
+ job_hash = 'amavis-20170831T132736-07759-iSI0rJ4b'
+ path_with_job_hash = '/d/var/lib/amavis/tmp/%s/parts/file' % job_hash
sample = self.factory.make_sample(path_with_job_hash)
- job_hash = sample.get_job_hash()
- self.assertEqual(job_hash, 'amavis-20170831T132736-07759-iSI0rJ4b',
+ self.assertEqual(job_hash, sample.get_job_hash(),
'Job hash regex is not working')
+
+ legacy_factory = SampleFactory(
+ cuckoo=None, base_dir=self.conf.sample_base_dir,
+ job_hash_regex=r'/var/lib/amavis/tmp/([^/]+)/parts.*',
+ keep_mail_data=False)
+ sample = legacy_factory.make_sample(path_with_job_hash)
+ self.assertEqual(job_hash, sample.get_job_hash(),
+ 'Job hash regex is not working')
+
job_hash = self.sample.get_job_hash()
self.assertIn('peekaboo-run_analysis', job_hash)