summaryrefslogtreecommitdiffstats
path: root/peekaboo/toolbox/cuckoo.py
blob: 86b08f5575a78076fccfd9132f7578e41ac074a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
###############################################################################
#                                                                             #
# Peekaboo Extended Email Attachment Behavior Observation Owl                 #
#                                                                             #
# toolbox/                                                                    #
#         cuckoo.py                                                           #
###############################################################################
#                                                                             #
# Copyright (C) 2016-2018  science + computing ag                             #
#                                                                             #
# This program is free software: you can redistribute it and/or modify        #
# it under the terms of the GNU General Public License as published by        #
# the Free Software Foundation, either version 3 of the License, or (at       #
# your option) any later version.                                             #
#                                                                             #
# This program is distributed in the hope that it will be useful, but         #
# WITHOUT ANY WARRANTY; without even the implied warranty of                  #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU           #
# General Public License for more details.                                    #
#                                                                             #
# You should have received a copy of the GNU General Public License           #
# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
#                                                                             #
###############################################################################


import re
import os
import logging
import json
import subprocess
from twisted.internet import protocol
from peekaboo import MultiRegexMatcher
from peekaboo.config import get_config
from peekaboo.exceptions import CuckooAnalysisFailedException
from peekaboo.toolbox.sampletools import ConnectionMap
from peekaboo.queuing import JobQueue


logger = logging.getLogger(__name__)


def submit_to_cuckoo(sample):
    """
    Submit a file or directory to Cuckoo for behavioural analysis.

    :param sample: Path to a file or a directory.
    :return: The job ID used by Cuckoo to identify this analysis task.
    """
    config = get_config()
    try:
        proc = config.cuckoo_submit
        proc.append(sample)
        p = subprocess.Popen(proc,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        p.wait()
    except Exception as e:
        raise CuckooAnalysisFailedException(e)

    if not p.returncode == 0:
        # TODO: tell opponent on socket that file has not been checked.
        raise CuckooAnalysisFailedException('cuckoo submit returned a non-zero return code.')
    else:
        out, err = p.communicate()
        logger.debug("cuckoo submit STDOUT: %s" % out)
        logger.debug("cuckoo submit STDERR: %s" % err)
        # process output to get job ID
        patterns = list()
        # Example: Success: File "/var/lib/peekaboo/.bashrc" added as task with ID #4
        patterns.append(".*Success.*: File .* added as task with ID #([0-9]*).*")
        patterns.append(".*added as task with ID ([0-9]*).*")
        matcher = MultiRegexMatcher(patterns)
        response = out.replace("\n", "")
        m = matcher.match(response)
        logger.debug('Pattern %d matched.' % matcher.matched_pattern)

        if m:
            job_id = int(m.group(1))
            return job_id
        raise CuckooAnalysisFailedException(
            'Unable to extract job ID from given string %s' % response
        )


class CuckooServer(protocol.ProcessProtocol):
    """
    Class that is used by twisted.internet.reactor to process Cuckoo
    output and process its behavior.

    Usage:
    srv = CuckooServer()
    reactor.spawnProcess(srv, 'python2', ['python2', '/path/to/cukoo.py'])
    reactor.run()

    @author: Felix Bauer
    @author: Sebastian Deiss
    """
    def __init__(self):
        self.__report = None

    def connectionMade(self):
        logger.info('Connected. Cuckoo PID: %s' % self.transport.pid)
        return None

    def outReceived(self, data):
        """ on receiving output on STDOUT from Cuckoo """
        logger.debug('STDOUT %s' % str(data))

    def errReceived(self, data):
        """ on receiving output on STDERR from Cuckoo """
        logger.debug('STDERR %s' % str(data.replace('\n', '')))

        #
        # FILE SUBMITTED
        # printed out but has no further effect
        #
        # 2016-04-12 09:14:06,984 [lib.cuckoo.core.scheduler] INFO: Starting
        # analysis of FILE "cuckoo.png" (task #201, options "")
        # INFO: Starting analysis of FILE ".bashrc" (task #4, options "")
        m = re.match('.*INFO: Starting analysis of FILE \"(.*)\" \(task #([0-9]*), options .*', data)

        if m:
            logger.info("File submitted: task #%s, filename %s" % (m.group(2),
                                                                   m.group(1)))

        #
        # ANALYSIS DONE
        #
        # 2016-04-12 09:25:27,824 [lib.cuckoo.core.scheduler] INFO: Task #202:
        # reports generation completed ...
        m = re.match(".*INFO: Task #([0-9]*): reports generation completed.*",
                     data)
        if m:
            job_id = int(m.group(1))
            logger.debug("Analysis done for task #%d" % job_id)
            logger.debug("Remaining connections: %d" % ConnectionMap.size())
            sample = ConnectionMap.get_sample_by_job_id(job_id)
            if sample:
                logger.debug('Requesting Cuckoo report for sample %s' % sample)
                self.__report = CuckooReport(job_id)
                sample.set_attr('cuckoo_report', self.__report)
                sample.set_attr('cuckoo_json_report_file', self.__report.file_path)
                JobQueue.submit(sample, self.__class__)
                logger.debug("Remaining connections: %d" % ConnectionMap.size())
            else:
                logger.debug('No connection found for ID %d' % job_id)

    def inConnectionLost(self):
        logger.debug("Cuckoo closed STDIN")
        os._exit(1)

    def outConnectionLost(self):
        logger.debug("Cuckoo closed STDOUT")
        os._exit(1)

    def errConnectionLost(self):
        logger.warning("Cuckoo closed STDERR")
        os._exit(1)

    def processExited(self, reason):
        logger.info("Cuckoo exited with status %s" % str(reason.value.exitCode))
        os._exit(0)

    def processEnded(self, reason):
        logger.info("Cuckoo ended with status %s" % str(reason.value.exitCode))
        os._exit(0)


class CuckooReport(object):
    """
    Represents a Cuckoo analysis JSON report.

    @author: Sebastian Deiss
    """
    def __init__(self, job_id):
        self.job_id = job_id
        self.file_path = None
        self.report = None
        self._parse()

    def _parse(self):
        """
        Reads the JSON report from Cuckoo and loads it into the Sample object.
        """
        config = get_config()
        cuckoo_report = os.path.join(
            config.cuckoo_storage, 'analyses/%d/reports/report.json'
                                   % self.job_id
        )

        if not os.path.isfile(cuckoo_report):
            raise OSError('Cuckoo report not found at %s.' % cuckoo_report)
        else:
            logger.debug(
                'Accessing Cuckoo report for task %d at %s '
                % (self.job_id, cuckoo_report)
            )
            self.file_path = cuckoo_report
            with open(cuckoo_report) as data:
                try:
                    report = json.load(data)
                    self.report = report
                except ValueError as e:
                    logger.exception(e)

    @property
    def requested_domains(self):
        """
        Gets the requested domains from the Cuckoo report.

        :return: The requested domains from the Cuckoo report.
        """
        try:
            return [d['request'] for d in self.report['network']['dns']]
        except KeyError:
            return []

    @property
    def signatures(self):
        """
        Gets the triggered signatures from the Cuckoo report.

        :return: The triggered signatures from the Cuckoo report or
                 None of there was an error parsing the Cuckoo report.
        """
        try:
            return self.report['signatures']
        except KeyError:
            return []

    @property
    def score(self):
        """
        Gets the score from the Cuckoo report.

        :return: The score from the Cuckoo report or
                 None of there was an error parsing the Cuckoo report.
        """
        try:
            return self.report['info']['score']
        except KeyError:
            return 0.0

    @property
    def errors(self):
        """
        Errors occurred during Cuckoo analysis.

        :return: The errors occurred during Cuckoo analysis or
                 None of there was an error parsing the Cuckoo report.
        """
        try:
            return self.report['debug']['errors']
        except KeyError:
            return []

    @property
    def analysis_failed(self):
        """
        Has the Cuckoo analysis failed?

        :return: True if the Cuckoo analysis failed, otherwise False.
        """
        if self.errors:
            logger.warning('Cuckoo produced %d error(s) during processing.' % len(self.errors))
        try:
            log = self.report['debug']['cuckoo']
            for entry in log:
                if 'analysis completed successfully' in entry:
                    return False
            return True
        except KeyError:
            return True