diff options
author | Michael Weiser <michael.weiser@gmx.de> | 2019-05-07 17:54:34 +0000 |
---|---|---|
committer | Michael Weiser <michael.weiser@gmx.de> | 2019-05-09 08:14:56 +0000 |
commit | 652a0515c9de74bd058b6942857e241237007ad8 (patch) | |
tree | c0d1abd986008e2bdc2bfdab39ce689ae81288c4 | |
parent | 82b6931f20ef6d0ce0a975299ad8e4a439aa92f6 (diff) |
Let requests and urllib3 do our retries for us
Use a session configured with various retry parameters and a special
whitelisting retry class do our retries for us. This offloads all kinds
of error handling we've not even thought of yet to urllib3 while still
staying highly customizable. This also gives us a nice backoff algorithm
when retrying which we only need to parametrize.
While at it, unroll the __get() method into the one user of POST
(submit()) to make it more readable.
-rw-r--r-- | peekaboo/toolbox/cuckoo.py | 136 |
1 files changed, 97 insertions, 39 deletions
diff --git a/peekaboo/toolbox/cuckoo.py b/peekaboo/toolbox/cuckoo.py index b1df4b7..bb6ffb3 100644 --- a/peekaboo/toolbox/cuckoo.py +++ b/peekaboo/toolbox/cuckoo.py @@ -24,6 +24,8 @@ ############################################################################### +from future.builtins import super # pylint: disable=wrong-import-order + import re import os import locale @@ -31,11 +33,12 @@ import logging import json import subprocess import random +import requests +import urllib3.util.retry from time import sleep from threading import RLock -import requests from twisted.internet import protocol, reactor, process from peekaboo.exceptions import CuckooSubmitFailedException @@ -243,48 +246,87 @@ class CuckooEmbed(Cuckoo): (currently) intentionally a no-op. """ pass + +class WhitelistRetry(urllib3.util.retry.Retry): + """ A Retry class which has a status code whitelist, allowing to retry all + requests not whitelisted in a hard-core, catch-all manner. """ + def __init__(self, status_whitelist=None, **kwargs): + super().__init__(**kwargs) + self.status_whitelist = status_whitelist or set() + + def is_retry(self, method, status_code, has_retry_after=False): + """ Override Retry's is_retry to introduce our status whitelist logic. + """ + # we retry all methods so no check if method is retryable here + + if self.status_whitelist and status_code not in self.status_whitelist: + return True + + return super().is_retry(method, status_code, has_retry_after) + + class CuckooApi(Cuckoo): """ Interfaces with a Cuckoo installation via its REST API. """ - def __init__(self, job_queue, url="http://localhost:8090", poll_interval=5): + def __init__(self, job_queue, url="http://localhost:8090", poll_interval=5, + retries=5, backoff=0.5): Cuckoo.__init__(self, job_queue) self.url = url self.poll_interval = poll_interval + + # urrlib3 backoff formula: + # <backoff factor> * (2 ^ (<retry count so far> - 1)) + # with second try intentionally having no sleep, + # e.g. with retry count==5 and backoff factor==0.5: + # try 1: fail, sleep(0.5*2^(1-1)==0.5*2^0==0.5*1==0.5->intentionally + # overridden to 0) + # try 2: fail, sleep(0.5*2^(2-1)==0.5*2^1==1) + # try 3: fail, sleep(0.5*2^(3-1)==0.5*2^2==2) + # try 4: fail, sleep(0.5*2^(4-1)==0.5*2^3==4) + # try 5: fail, abort, sleep would've been 8 before try 6 + # + # Also, use method_whitelist=False to enable POST and other methods for + # retry which aren't by default because they're not considered + # idempotent. We assume that with the REST API a request either + # succeeds or fails without residual effects, making them atomic and + # idempotent. + # + # And finally we retry everything but a 200 response, which admittedly + # is a bit hard-core but serves our purposes for now. + retry_config = WhitelistRetry(total=retries, + backoff_factor=backoff, + method_whitelist=False, + status_whitelist=set([200])) + retry_adapter = requests.adapters.HTTPAdapter(max_retries=retry_config) + self.session = requests.session() + self.session.mount('http://', retry_adapter) + self.session.mount('https://', retry_adapter) + self.reported = self.__status()["tasks"]["reported"] logger.info("Connection to Cuckoo seems to work, %i reported tasks seen", self.reported) - - def __get(self, url, method="get", files=""): - r = "" - logger.debug("Requesting %s, method %s" % (url, method)) - - # try 3 times to get a successfull response - for retry in range(0, 3): - try: - if method == "get": - r = requests.get("%s/%s" % (self.url, url)) - elif method == "post": - r = requests.post("%s/%s" % (self.url, url), files=files) - else: - break - if r.status_code != 200: - continue - else: - return r.json() - except requests.exceptions.Timeout as e: - # Maybe set up for a retry, or continue in a retry loop - print(e) - if e and retry >= 2: - raise e - except requests.exceptions.TooManyRedirects as e: - # Tell the user their URL was bad and try a different one - print(e) - if e and retry >= 2: - raise e - except requests.exceptions.RequestException as e: - # catastrophic error. bail. - print(e) - if e and retry >= 2: - raise e - return None + + def __get(self, path): + request_url = "%s/%s" % (self.url, path) + logger.debug("Getting %s", request_url) + + try: + response = self.session.get(request_url) + # all requests exceptions are derived from RequestsException, including + # RetryError, TooManyRedirects and Timeout + except requests.exceptions.RequestException as error: + logger.error('Request to REST API failed: %s', error) + return None + + # no check for status code here since we retry all but 200 + # responses and raise an exception if retries fail + try: + json_resp = response.json() + except ValueError as error: + logger.error( + 'Invalid JSON in response when getting %s: %s', + request_url, error) + return None + + return json_resp def __status(self): return self.__get("cuckoo/status") @@ -293,14 +335,30 @@ class CuckooApi(Cuckoo): path = sample.submit_path filename = os.path.basename(path) files = {"file": (filename, open(path, 'rb'))} - response = self.__get("tasks/create/file", method="post", files=files) + logger.debug("Creating Cuckoo task with content from %s and " + "filename %s", path, filename) + + try: + response = self.session.post( + "%s/tasks/create/file" % self.url, files=files) + except requests.exceptions.RequestException as error: + raise CuckooSubmitFailedException( + 'Error creating Cuckoo task: %s' % error) - task_id = response["task_id"] + try: + json_resp = response.json() + except ValueError as error: + raise CuckooSubmitFailedException( + 'Invalid JSON in response when creating Cuckoo task: %s' + % error) + + task_id = json_resp["task_id"] if task_id > 0: self.register_running_job(task_id, sample) return task_id + raise CuckooSubmitFailedException( - 'Unable to extract job ID from given string %s' % response) + 'Unable to extract job ID from response %s' % json_resp) def get_report(self, job_id): logger.debug("Report from Cuckoo API requested, job_id = %d" % job_id) |