Re-implemented queuing system and mapping between socket <-> sample(s)

Also, the toolbox has been improved further by moving specific parts to own modules. Some formatting was performed too.
author: Sebastian Deiss <sebastian.deiss@atos.net> 2017-10-16 15:44:52 +0200
committer: Sebastian Deiss <sebastian.deiss@atos.net> 2017-10-16 15:44:52 +0200
commit: 14076642c7547a4ac5b48010ea3a61b887caba48 (patch)
tree: e9cd434de3d5f90c3b2d42ee541cd75e806c4781
parent: 417da10b33b3e885389dac1270d8d93bc8cf2fc7 (diff)
8 files changed, 414 insertions, 166 deletions
diff --git a/peekaboo/daemon.py b/peekaboo/daemon.py
index 535f321..4bf28a0 100644
--- a/peekaboo/daemon.py
+++ b/peekaboo/daemon.py
@@ -38,9 +38,9 @@ from peekaboo import _owl, __version__
 from peekaboo.config import parse_config, get_config
 from peekaboo.db import PeekabooDatabase
 from peekaboo.toolbox.cuckoo import CuckooManager
-import peekaboo.pjobs as pjobs
-import peekaboo.sample as sample
-
+from peekaboo.toolbox.sampletools import ConnectionMap
+from peekaboo.queuing import JobQueue, create_workers
+from peekaboo.sample import make_sample
 
 logger = logging.getLogger(__name__)
 
@@ -53,7 +53,7 @@ class PeekabooStreamServer(SocketServer.ThreadingUnixStreamServer):
     """
     def __init__(self, server_address, request_handler_cls, bind_and_activate=True):
         self.config = get_config()
-        self.workers = pjobs.Workers(self.config.worker_count)
+        create_workers(self.config.worker_count)
         # We can only accept 2 * worker_count connections.
         self.request_queue_size = self.config.worker_count * 2
         self.allow_reuse_address = True
@@ -76,70 +76,70 @@ class PeekabooStreamServer(SocketServer.ThreadingUnixStreamServer):
 
 
 class PeekabooStreamRequestHandler(SocketServer.StreamRequestHandler):
-    def __init__(self, request, client_address, server):
-        self.workers = server.workers
-        SocketServer.StreamRequestHandler.__init__(self, request, client_address, server)
-
     def handle(self):
+        """
+        Handles an analyses request. The path of the directory / file to analyse must
+        be written to the corresponding socket.
+        The maximum buffer size is 1024 bytes.
+        """
         self.request.sendall('Hallo das ist Peekaboo\n\n')
-        # receive directory path
         path = self.request.recv(1024).rstrip()
-        logger.info("Received %s" % path)
+        logger.info("Got analyses request for %s" % path)
 
         if not os.path.exists(path):
-            self.request.sendall("ERROR: path from amavis doesn't exist or no "
-                                 "permission to access it")
-            logger.error('Path from amavis doesn\'t exist or no '
-                         'permission to access it')
+            self.request.sendall(
+                'FEHLER: Pfad existiert nicht oder Zugriff verweigert.'
+            )
+            logger.error("ERROR: Path does not exist or no permission to access it.")
         else:
-            # close connection if there is nothing to analyze
             for_analysis = []
             if os.path.isfile(path):
-                sample = self._make_sample(path, self.request)
+                sample = make_sample(path, self.request)
                 if sample:
                     for_analysis.append(sample)
             else:
-                # walk recursively through entries in directory
+                # walk recursively through entries in the given directory.
                 for dirname, __, filenames in os.walk(path):
                     for filename in filenames:
                         logger.debug("Found file %s" % filename)
-                        p = os.path.join(dirname, filename)
-                        sample = self._make_sample(p, self.request)
+                        f = os.path.join(dirname, filename)
+                        sample = make_sample(f, self.request)
                         if sample:
                             for_analysis.append(sample)
 
-            # introduced after issue where results were reported
-            # before all file could be added
-            for s in for_analysis:
-                pjobs.Jobs.add_job(self.request, s)
-                self.workers.submit_job(s, self.__class__)
+            # introduced after an issue where results were reported
+            # before all files could be added.
+            for sample in for_analysis:
+                ConnectionMap.add(self.request, sample)
+                JobQueue.submit(sample, self.__class__)
 
     # TODO: do cleanup work here in finish()
 
-    def _make_sample(self, p, conn):
-        logger.debug("Looking at file %s" % p)
-        if not os.path.isfile(p):
-            logger.debug('%s is not a file' % p)
-            return None
-        s = sample.Sample(conn, p)
-        logger.debug('Created sample %s' % s)
-
-        return s
-
 
 def run():
     """ Runs the daemon. """
     arg_parser = ArgumentParser()
-    arg_parser.add_argument('-c', '--config', action='store', required=False,
-                            default=os.path.join('./peekaboo.conf'),
-                            help='The configuration file for Peekaboo.')
-    arg_parser.add_argument('-d', '--debug', action='store_true', required=False,
-                            help="Run Peekaboo in debug mode regardless of what's "
-                                 "specified in the configuration.",
-                            default=False)
-    arg_parser.add_argument('-D', '--daemon', action='store_true', required=False,
-                            help='Run Peekaboo in daemon mode (suppresses the logo to be written to STDOUT).',
-                            default=False)
+    arg_parser.add_argument(
+        '-c', '--config',
+        action='store',
+        required=False,
+        default=os.path.join('./peekaboo.conf'),
+        help='The configuration file for Peekaboo.'
+    )
+    arg_parser.add_argument(
+        '-d', '--debug',
+        action='store_true',
+        required=False,
+        default=False,
+        help="Run Peekaboo in debug mode regardless of what's specified in the configuration."
+    )
+    arg_parser.add_argument(
+        '-D', '--daemon',
+        action='store_true',
+        required=False,
+        default=False,
+        help='Run Peekaboo in daemon mode (suppresses the logo to be written to STDOUT).'
+    )
     args = arg_parser.parse_args()
 
     if not args.daemon:
@@ -173,7 +173,7 @@ def run():
         peekaboo_debugger()
 
     if os.getuid() == 0:
-        logger.warning('Peekaboo should not run as root')
+        logger.warning('Peekaboo should not run as root.')
         # drop privileges to user
         os.setgid(grp.getgrnam(config.group)[2])
         os.setuid(pwd.getpwnam(config.user)[2])
diff --git a/peekaboo/queuing.py b/peekaboo/queuing.py
new file mode 100644
index 0000000..9550d7a
--- /dev/null
+++ b/peekaboo/queuing.py
@@ -0,0 +1,96 @@
+###############################################################################
+#                                                                             #
+# Peekaboo Extended Email Attachment Behavior Observation Owl                 #
+#                                                                             #
+# queuing.py                                                                  #
+###############################################################################
+#                                                                             #
+# Copyright (C) 2016-2017  science + computing ag                             #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or (at       #
+# your option) any later version.                                             #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but         #
+# WITHOUT ANY WARRANTY; without even the implied warranty of                  #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU           #
+# General Public License for more details.                                    #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+
+import logging
+from threading import Thread
+from Queue import Queue
+from peekaboo.ruleset.engine import evaluate
+from peekaboo.exceptions import CuckooReportPendingException
+
+
+logger = logging.getLogger(__name__)
+
+
+def create_workers(worker_count=4):
+    for i in range(0, worker_count):
+        logger.debug("Create Worker %d" % i)
+        w = Worker()
+        JobQueue.workers.append(w)
+        w.start()
+
+
+class JobQueue(object):
+    """
+    Peekaboo's queuing system.
+
+    @author: Sebastian Deiss
+    """
+    workers = []
+    jobs = Queue()
+
+    @staticmethod
+    def submit(sample, submitter, timeout=300):
+        """
+        Adds a Sample object to the job queue.
+        If the queue is full, we block for 300 seconds and then throw an exception.
+
+        :param sample: The Sample object to add to the queue.
+        :param submitter: The name of the class / module that wants to submit the sample.
+        :param timeout: Block until timeout is reached and then trow an exception
+                        if the job has not been submitted.
+        :raises Full: if the queue is full.
+        """
+        logger.debug("New sample submitted to job queue by %s. %s" % (submitter, sample))
+        # thread safe most likely no race condition possible
+        JobQueue.jobs.put(sample, True, timeout)
+
+
+class Worker(Thread):
+    """
+    A Worker to process a sample.
+
+    @author: Sebastian Deiss
+    """
+    def __init__(self):
+        self.active = True
+        Thread.__init__(self)
+
+    def run(self):
+        while self.active:
+            logger.debug('Worker is ready')
+            s = JobQueue.jobs.get(True)  # wait blocking for next job (thread safe)
+            logger.debug('Processing sample %s' % str(s))
+
+            s.init()
+
+            try:
+                evaluate(s)
+            except CuckooReportPendingException:
+                pass
+            except Exception as e:
+                logger.exception(e)
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.active = False
diff --git a/peekaboo/sample.py b/peekaboo/sample.py
index cad9bd2..60e4414 100644
--- a/peekaboo/sample.py
+++ b/peekaboo/sample.py
@@ -31,21 +31,37 @@ import shutil
 import logging
 from datetime import datetime
 from peekaboo.config import get_config
-from peekaboo.exceptions import CuckooReportPendingException, CuckooAnalysisFailedException
-from peekaboo.toolbox.sampletools import SampleMetaInfo, \
-                                         next_job_hash, \
-                                         chown2me, \
-                                         guess_mime_type_from_file_contents, \
-                                         guess_mime_type_from_filename
+from peekaboo.exceptions import CuckooReportPendingException, \
+                                CuckooAnalysisFailedException
+from peekaboo.toolbox.sampletools import SampleMetaInfo, next_job_hash
+from peekaboo.toolbox.files import chown2me, guess_mime_type_from_filename, \
+                                   guess_mime_type_from_file_contents
 from peekaboo.toolbox.ms_office import has_office_macros
 from peekaboo.toolbox.cuckoo import submit_to_cuckoo
-import peekaboo.pjobs as pjobs
+from peekaboo.toolbox.sampletools import ConnectionMap
 import peekaboo.ruleset as ruleset
 
 
 logger = logging.getLogger(__name__)
 
 
+def make_sample(file, socket):
+    """
+    Create a Sample object from a given file.
+
+    :param file: Path to the file to create a Sample object from.
+    :param socket: An optional socket to write the report to.
+    :return: A sample object representing the given file or None if the file does not exist.
+    """
+    logger.debug("Looking at file %s" % file)
+    if not os.path.isfile(file):
+        logger.debug('%s is not a file' % file)
+        return None
+    s = Sample(file, socket)
+    logger.debug('Created sample %s' % s)
+    return s
+
+
 class Sample(object):
     """
     This class handles and describes samples to be analysed by Peekaboo.
@@ -60,8 +76,7 @@ class Sample(object):
     @author: Felix Bauer
     @author: Sebastian Deiss
     """
-    def __init__(self, sock, file_path):
-        self.__socket = sock
+    def __init__(self, file_path, sock=None):
         self.__path = file_path
         self.__config = get_config()
         self.__db_con = self.__config.get_db_con()
@@ -73,6 +88,7 @@ class Sample(object):
         self.__symlink = None
         self.__result = ruleset.Result.unchecked
         self.__report = []  # Peekaboo's own report
+        self.__socket = sock
         # Additional attributes for a sample object (e. g. dump info)
         self.__attributes = {}
         self.initalized = False
@@ -116,13 +132,7 @@ class Sample(object):
         message = "Datei \"%s\" %s wird analysiert\n" % (self.__filename,
                                                          self.sha256sum)
         self.__report.append(message)
-        try:
-            self.__socket.send(message)
-        except IOError as e:
-            if e.errno == errno.EPIPE:
-                logger.warning('Unable send message "%s". Broken pipe.' % message)
-            else:
-                logger.exception(e)
+        self.__send_message(message)
 
     def get_attr(self, key):
         """
@@ -186,7 +196,11 @@ class Sample(object):
         else:
             logger.debug('Saving results to database')
             self.__db_con.sample_info_update(self)
-        self._cleanup()
+        if self.__socket is not None:
+            ConnectionMap.remove(self.__socket, self)
+        if not ConnectionMap.has_connection(self.__socket):
+            self.__cleanup_temp_files()
+            self.__close_socket()
 
     def add_rule_result(self, res):
         logger.debug('Adding rule result %s' % str(res))
@@ -338,7 +352,7 @@ class Sample(object):
                 message = 'Erfolgreich an Cuckoo gegeben %s als Job %d\n' \
                           % (self, job_id)
                 self.__report.append(message)
-                logger.info('Sample %s submitted to Cuckoo. Job ID: %s' % (self, job_id))
+                logger.info('Sample submitted to Cuckoo. Job ID: %s. Sample: %s' % (job_id, self))
                 raise CuckooReportPendingException()
             except CuckooAnalysisFailedException as e:
                 logger.exception(e)
@@ -366,7 +380,7 @@ class Sample(object):
             os.mkdir(os.path.join(self.__config.sample_base_dir,
                                   job_hash))
 
-        logger.debug("job_hash: %s" % job_hash)
+        logger.debug("Job hash for this sample: %s" % job_hash)
         return job_hash
 
     def load_meta_info(self, meta_info_file):
@@ -378,7 +392,7 @@ class Sample(object):
                 logger.debug('meta_info_%s = %s' % (info[0], info[1]))
                 self.set_attr('meta_info_' + info[0], info[1])
             self.meta_info_loaded = True
-        except Exception as e:
+        except Exception:
             logger.info('No metadata available for file %s' % self.__path)
 
     def __create_symlink(self):
@@ -394,26 +408,20 @@ class Sample(object):
 
         os.symlink(orig, self.__symlink)
 
-
-###################################################################
     def report(self):
+        """
+        Create the report for this sample. The report is saved as a list of
+        strings and is available via get_report(). Also, if a socket connection was
+        supplied to the sample the report messages are also written to the socket.
+        """
         # TODO: move to rule processing engine.
-        """ report result to socket connection """
         self.determine_result()
 
         for rule_result in self.get_attr('rule_results'):
             message = "Datei \"%s\": %s\n" % (self.__filename, str(rule_result))
             self.__report.append(message)
-            logger.info('Connection send: %s ' % message)
-            try:
-                self.__socket.send(message)
-            except IOError as e:
-                if e.errno == errno.EPIPE:
-                    logger.warning('Unable send message "%s". Broken pipe.' % message)
-                else:
-                    logger.exception(e)
+            self.__send_message(message)
 
-        # check if result still init value inProgress
         if self.__result == ruleset.Result.inProgress:
             logger.warning('Ruleset result forces to unchecked.')
             self.__result = ruleset.Result.unchecked
@@ -421,44 +429,44 @@ class Sample(object):
         message = "Die Datei \"%s\" wurde als \"%s\" eingestuft\n\n" \
                   % (self.__filename, self.__result.name)
         self.__report.append(message)
-        logger.debug('Connection send: %s ' % message)
-        if self.__socket:
-            try:
-                self.__socket.send(message)
-            except IOError as e:
-                if e.errno == errno.EPIPE:
-                    logger.warning('Unable send message "%s". Broken pipe.' % message)
-                else:
-                    logger.exception(e)
-
-    def _cleanup(self):
-        # TODO: move elsewhere.
-        if pjobs.Jobs.remove_job(self.__socket, self) <= 0:
-            # returns count of remaining samples for this connection
-            logger.debug('Closing connection.')
-            # delete all files created by dump_info
-            try:
-                logger.debug("Deleting tempdir %s" % self.__wd)
-                shutil.rmtree(self.__wd)
-            except OSError as e:
-                logger.error("OSError while clean up %s: %s"
-                             % (self.__wd, str(e)))
-            if not os.path.isdir(self.__wd):
-                logger.debug('Clean up of %s complete' % self.__wd)
-            else:
-                logger.info('Clean up of %s failed' % self.__wd)
+        self.__send_message(message)
 
-            try:
+    def __close_socket(self):
+        logger.debug('Closing socket connection.')
+        try:
+            if self.__socket is not None:
                 self.__socket.close()
-            except EnvironmentError as e:
-                # base class for exceptions that can occur outside the Python system.
-                # e. g. IOError, OSError
-                if e.errno == errno.EPIPE:
-                    logger.warning('Unable to close the socket. Broken pipe.')
-                else:
-                    logger.exception(e)
+        except EnvironmentError as e:
+            # base class for exceptions that can occur outside the Python system.
+            # e. g. IOError, OSError
+            if e.errno == errno.EPIPE:
+                logger.warning('Unable to close the socket. Broken pipe.')
+            else:
+                logger.exception(e)
+
+    def __cleanup_temp_files(self):
+        try:
+            logger.debug("Deleting tempdir %s" % self.__wd)
+            shutil.rmtree(self.__wd)
+        except OSError as e:
+            logger.exception(e)
+
+    def __send_message(self, msg):
+        """
+        Write a message to the socket.
 
-###################################################################
+        :param msg: The message to send (max. 1024 bytes).
+        """
+        if self.__socket is None:
+            return
+        try:
+            self.__socket.send(msg)
+            logger.debug('Message send: %s ' % msg)
+        except IOError as e:
+            if e.errno == errno.EPIPE:
+                logger.warning('Unable send message "%s". Broken pipe.' % msg)
+            else:
+                logger.exception(e)
 
     def __str__(self):
         meta_info_loaded = 'no'
diff --git a/peekaboo/toolbox/cuckoo.py b/peekaboo/toolbox/cuckoo.py
index 9927909..38e6198 100644
--- a/peekaboo/toolbox/cuckoo.py
+++ b/peekaboo/toolbox/cuckoo.py
@@ -33,7 +33,8 @@ from twisted.internet import protocol
 from peekaboo import MultiRegexMatcher
 from peekaboo.config import get_config
 from peekaboo.exceptions import CuckooAnalysisFailedException
-import peekaboo.pjobs as pjobs
+from peekaboo.toolbox.sampletools import ConnectionMap
+from peekaboo.queuing import JobQueue
 
 
 logger = logging.getLogger(__name__)
@@ -64,7 +65,7 @@ def submit_to_cuckoo(sample):
         out, err = p.communicate()
         logger.debug("cuckoo submit STDOUT: %s" % out)
         logger.debug("cuckoo submit STDERR: %s" % err)
-        # process output to get jobID
+        # process output to get job ID
         patterns = list()
         # Example: Success: File "/var/lib/peekaboo/.bashrc" added as task with ID #4
         patterns.append(".*Success.*: File .* added as task with ID #([0-9]*).*")
@@ -133,19 +134,17 @@ class CuckooManager(protocol.ProcessProtocol):
         if m:
             job_id = int(m.group(1))
             logger.info("Analysis done for task #%d" % job_id)
-
-            logger.debug("Queued jobs %d" % pjobs.Jobs.length())
-            sample = pjobs.Jobs.get_sample_by_job_id(job_id)
+            logger.debug("Remaining connections: %d" % ConnectionMap.size())
+            sample = ConnectionMap.get_sample_by_job_id(job_id)
             if sample:
                 logger.debug('Requesting Cuckoo report for sample %s' % sample)
                 self.__report = CuckooReport(job_id)
                 sample.set_attr('cuckoo_report', self.__report)
                 sample.set_attr('cuckoo_json_report_file', self.__report.file_path)
-
-                pjobs.Workers.submit_job(sample, self.__class__)
-                logger.debug("Queued jobs %d" % pjobs.Jobs.length())
+                JobQueue.submit(sample, self.__class__)
+                logger.debug("Remaining connections: %d" % ConnectionMap.size())
             else:
-                logger.info('No job found for ID %d' % job_id)
+                logger.info('No connection found for ID %d' % job_id)
 
     def inConnectionLost(self):
         logger.debug("Cuckoo closed STDIN")
@@ -160,11 +159,11 @@ class CuckooManager(protocol.ProcessProtocol):
         os._exit(1)
 
     def processExited(self, reason):
-        logger.info("Cuckoo exited status %d" % reason.value.exitCode)
+        logger.info("Cuckoo exited with status %d" % reason.value.exitCode)
         os._exit(0)
 
     def processEnded(self, reason):
-        logger.info("Cuckoo ended status %d" % reason.value.exitCode)
+        logger.info("Cuckoo ended with status %d" % reason.value.exitCode)
         os._exit(0)
 
 
diff --git a/peekaboo/toolbox/files.py b/peekaboo/toolbox/files.py
new file mode 100644
index 0000000..6a43828
--- /dev/null
+++ b/peekaboo/toolbox/files.py
@@ -0,0 +1,65 @@
+###############################################################################
+#                                                                             #
+# Peekaboo Extended Email Attachment Behavior Observation Owl                 #
+#                                                                             #
+# toolbox/                                                                    #
+#         files.py                                                            #
+###############################################################################
+#                                                                             #
+# Copyright (C) 2016-2017  science + computing ag                             #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or (at       #
+# your option) any later version.                                             #
+#                                                                             #
+# This program is distributed in the hope that it will be useful, but         #
+# WITHOUT ANY WARRANTY; without even the implied warranty of                  #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU           #
+# General Public License for more details.                                    #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+###############################################################################
+
+
+import logging
+import subprocess
+import mimetypes
+import magic
+from peekaboo.config import get_config
+
+
+logger = logging.getLogger(__name__)
+
+
+def chown2me():
+    """ Acquire ownership of all directories under /tmp with the prefix "amavis-". """
+    # TODO: Find a better solution to acquire ownership and only for the directory currently in usse.
+    logger.debug('Invoking chown2me...')
+    config = get_config()
+    proc = subprocess.Popen(config.chown2me_exec,
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+    proc.wait()
+    if proc.returncode != 0:
+        logger.error('chown2me exited with code %d' % proc.returncode)
+
+
+def guess_mime_type_from_filename(file_path):
+    """ Guess the type of a file based on its filename or URL. """
+    if not mimetypes.inited:
+        mimetypes.init()
+        mimetypes.add_type('application/javascript', '.jse')
+
+    mt = mimetypes.guess_type(file_path)[0]
+    if mt:
+        return mt
+
+
+def guess_mime_type_from_file_contents(file_path):
+    """  Get type from file magic bytes. """
+    mt = magic.from_file(file_path, mime=True)
+    if mt:
+        return mt
diff --git a/peekaboo/toolbox/plugins/oneanalysis.py b/peekaboo/toolbox/plugins/oneanalysis.py
index dabee86..4b42377 100644
--- a/peekaboo/toolbox/plugins/oneanalysis.py
+++ b/peekaboo/toolbox/plugins/oneanalysis.py
@@ -29,9 +29,10 @@ import threading
 import traceback
 import sys
 import logging
-import peekaboo.pjobs
+import peekaboo.queuing
 from peekaboo.ruleset import RuleResult
 from peekaboo.exceptions import CuckooReportPendingException
+from peekaboo.toolbox.sampletools import ConnectionMap
 
 
 logger = logging.getLogger(__name__)
@@ -69,7 +70,7 @@ class OneAnalysis(object):
                                   further_analysis=True)
 
             l = []
-            for sample in peekaboo.pjobs.Jobs.get_samples_by_sha256(s.sha256sum):
+            for sample in ConnectionMap.get_samples_by_sha256(s.sha256sum):
                 if sample != s:
                     if not sample.has_attr('pending') or not sample.get_attr('pending') is True:
                         l.append(sample)
@@ -93,8 +94,8 @@ class OneAnalysis(object):
     def queue_identical_samples(self, s):
         with self.__in_use:
             logger.debug("queueing identical samples")
-            for sample in peekaboo.pjobs.Jobs.get_samples_by_sha256(s.sha256sum):
+            for sample in ConnectionMap.get_samples_by_sha256(s.sha256sum):
                 pending = sample.get_attr('pending')
                 if pending:
                     sample.set_attr('pending', False)
-                    peekaboo.pjobs.Workers.submit_job(sample, 'OneAnalysis')
+                    peekaboo.queuing.JobQueue.submit(sample, self.__class__)
diff --git a/peekaboo/toolbox/sampletools.py b/peekaboo/toolbox/sampletools.py
index 55f34a7..0df42fe 100644
--- a/peekaboo/toolbox/sampletools.py
+++ b/peekaboo/toolbox/sampletools.py
@@ -26,13 +26,11 @@
 
 import logging
 import string
-import subprocess
-import mimetypes
-import magic
+import threading
 from random import choice
 from datetime import datetime
 from ConfigParser import SafeConfigParser
-from peekaboo.config import get_config
+from peekaboo.ruleset import Result
 
 
 logger = logging.getLogger(__name__)
@@ -70,6 +68,118 @@ class SampleMetaInfo(object):
         return '<SampleMetaInfo(%s)>' % str(self.meta_info)
 
 
+class ConnectionMap(object):
+    """
+    Maps socket objects with one or more samples.
+    This is required for the reporting so we know which
+    Sample objects belong to which socket connection.
+
+    @author: Sebastian Deiss
+    """
+    __lock = threading.RLock()
+    __map = {}
+
+    @staticmethod
+    def add(socket, sample):
+        with ConnectionMap.__lock:
+            logger.debug('Registered sample for connection %s' % socket)
+            if ConnectionMap.has_connection(socket):
+                ConnectionMap.__map[socket].append(sample)
+            else:
+                ConnectionMap.__map[socket] = [sample]
+            return ConnectionMap.size()
+
+    @staticmethod
+    def remove(socket, sample):
+        with ConnectionMap.__lock:
+            if ConnectionMap.has_connection(socket):
+                logger.debug(
+                    'Removing sample for connection %s, Sample: %s' % (socket, sample)
+                )
+                ConnectionMap.__map[socket].remove(sample)
+                if len(ConnectionMap.__map[socket]) == 0:
+                    ConnectionMap.__map.pop(socket)
+                    logger.debug('Removing connection: %s' % socket)
+            else:
+                logger.debug(
+                    'Connection does not exist.'
+                    'Connection: %s, Sample: %s, Map: %s'
+                     % (socket, sample, ConnectionMap.__map)
+                )
+            return ConnectionMap.size()
+
+    @staticmethod
+    def size():
+        return len(ConnectionMap.__map)
+
+    @staticmethod
+    def _dump():
+        return ConnectionMap.__map
+
+    @staticmethod
+    def has_connection(socket):
+        if socket in ConnectionMap.__map.keys():
+            return True
+        return False
+
+    @staticmethod
+    def get_sample_by_job_id(job_id):
+        with ConnectionMap.__lock:
+            logger.debug("Searching for a sample with job ID %d" % job_id)
+            matching_sample = None
+            for __, samples in ConnectionMap.__map.iteritems():
+                logger.debug('Samples for this connection: %s' % samples)
+                for sample in samples:
+                    if job_id == sample.job_id:
+                        logger.debug('Found %s for job ID %d' % (sample, job_id))
+                        return sample
+
+    @staticmethod
+    def get_sample_by_sha256(sha256sum):
+        with ConnectionMap.__lock:
+            logger.debug(
+                'Searching for a sample with SHA-256 checksum %s' % sha256sum
+            )
+            matching_sample = None
+            for __, samples in ConnectionMap.__map.iteritems():
+                logger.debug('Samples for this connection: %s' % samples)
+                for sample in samples:
+                    if sha256sum == sample.sha256sum:
+                        logger.debug(
+                            'Found %s for SHA-256 hash %s' % (sample, sha256sum)
+                        )
+                        return sample
+
+    @staticmethod
+    def get_samples_by_sha256(sha256sum):
+        with ConnectionMap.__lock:
+            logger.debug('Searching for all samples with SHA-256 checksum %s' % sha256sum)
+            matching_samples = []
+            for __, samples in ConnectionMap.__map.iteritems():
+                logger.debug('Samples for this connection: %s' % samples)
+                for sample in samples:
+                    if sha256sum == sample.sha256sum:
+                        logger.debug('Found %s for SHA-256 hash %s' % (sample, sha256sum))
+                        matching_samples.append(sample)
+            return matching_samples
+
+    @staticmethod
+    def get_samples_by_connection(socket):
+        with ConnectionMap.__lock:
+            matching_samples = []
+            for sock in ConnectionMap.__map.iteritems():
+                if sock == socket:
+                    matching_samples = ConnectionMap.__map[sock]
+            return matching_samples
+
+    @staticmethod
+    def in_progress(sha256sum):
+        sample = ConnectionMap.get_sample_by_sha256(sha256sum)
+        if sample is not None and sample.get_result() == Result.inProgress:
+            return True
+        return False
+
+
 def next_job_hash(size=8):
     """
     Generates a job hash (default: 8 characters).
@@ -86,34 +196,3 @@ def next_job_hash(size=8):
         for _ in range(size)
     )
     return job_hash
-
-
-def chown2me():
-    """ Acquire ownership of all directories under /tmp with the prefix "amavis-". """
-    # TODO: Find a better solution to acquire ownership and only for the directory currently in usse.
-    logger.debug('Invoking chown2me...')
-    config = get_config()
-    proc = subprocess.Popen(config.chown2me_exec,
-                            stdout=subprocess.PIPE,
-                            stderr=subprocess.PIPE)
-    proc.wait()
-    if proc.returncode != 0:
-        logger.error('chown2me exited with code %d' % proc.returncode)
-
-
-def guess_mime_type_from_filename(file_path):
-    """ Guess the type of a file based on its filename or URL. """
-    if not mimetypes.inited:
-        mimetypes.init()
-        mimetypes.add_type('application/javascript', '.jse')
-
-    mt = mimetypes.guess_type(file_path)[0]
-    if mt:
-        return mt
-
-
-def guess_mime_type_from_file_contents(file_path):
-    """  Get type from file magic bytes. """
-    mt = magic.from_file(file_path, mime=True)
-    if mt:
-        return mt
diff --git a/test.py b/test.py
index 5d7abd2..015e09f 100644
--- a/test.py
+++ b/test.py
@@ -95,7 +95,7 @@ class TestDatabase(unittest.TestCase):
         db_con = PeekabooDatabase('sqlite:///' + cls.test_db)
         cls.conf.set_db_con(db_con)
         _set_config(cls.conf)
-        cls.sample = Sample(None, os.path.realpath(__file__))
+        cls.sample = Sample(os.path.realpath(__file__))
         result = RuleResult('Unittest',
                             Result.unknown,
                             'This is just a test case.',
@@ -154,7 +154,7 @@ class TestSample(unittest.TestCase):
         db_con = PeekabooDatabase('sqlite:///' + cls.test_db)
         cls.conf.set_db_con(db_con)
         _set_config(cls.conf)
-        cls.sample = Sample(None, os.path.realpath(__file__))
+        cls.sample = Sample(os.path.realpath(__file__))
 
     def test_attribute_dict(self):
         self.sample.set_attr('Unittest', 'Hello World!')
@@ -165,7 +165,7 @@ class TestSample(unittest.TestCase):
 
     def test_job_hash_regex(self):
         path_with_job_hash = '/var/lib/amavis/tmp/amavis-20170831T132736-07759-iSI0rJ4b/parts'
-        sample = Sample(None, path_with_job_hash)
+        sample = Sample(path_with_job_hash)
         job_hash = sample.get_job_hash()
         self.assertEqual(job_hash, 'amavis-20170831T132736-07759-iSI0rJ4b',
                          'Job hash regex is not working')
@@ -213,7 +213,7 @@ class TestSample(unittest.TestCase):
         test_meta_info += 'queue_id      :\n'
         with open('./junk.info', 'w+') as f:
             f.write(test_meta_info)
-        sample = Sample(None, 'junk')
+        sample = Sample('junk')
         self.assertEqual(sample.file_extension, '')
         sample.load_meta_info('./junk.info')
         self.assertEqual(sample.file_extension, 'docx')
author	Sebastian Deiss <sebastian.deiss@atos.net>	2017-10-16 15:44:52 +0200
committer	Sebastian Deiss <sebastian.deiss@atos.net>	2017-10-16 15:44:52 +0200
commit	14076642c7547a4ac5b48010ea3a61b887caba48 (patch)
tree	e9cd434de3d5f90c3b2d42ee541cd75e806c4781
parent	417da10b33b3e885389dac1270d8d93bc8cf2fc7 (diff)