summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Weiser <michael.weiser@gmx.de>2019-05-07 17:54:34 +0000
committerMichael Weiser <michael.weiser@gmx.de>2019-05-09 08:14:56 +0000
commit652a0515c9de74bd058b6942857e241237007ad8 (patch)
treec0d1abd986008e2bdc2bfdab39ce689ae81288c4
parent82b6931f20ef6d0ce0a975299ad8e4a439aa92f6 (diff)
Let requests and urllib3 do our retries for us
Use a session configured with various retry parameters and a special whitelisting retry class do our retries for us. This offloads all kinds of error handling we've not even thought of yet to urllib3 while still staying highly customizable. This also gives us a nice backoff algorithm when retrying which we only need to parametrize. While at it, unroll the __get() method into the one user of POST (submit()) to make it more readable.
-rw-r--r--peekaboo/toolbox/cuckoo.py136
1 files changed, 97 insertions, 39 deletions
diff --git a/peekaboo/toolbox/cuckoo.py b/peekaboo/toolbox/cuckoo.py
index b1df4b7..bb6ffb3 100644
--- a/peekaboo/toolbox/cuckoo.py
+++ b/peekaboo/toolbox/cuckoo.py
@@ -24,6 +24,8 @@
###############################################################################
+from future.builtins import super # pylint: disable=wrong-import-order
+
import re
import os
import locale
@@ -31,11 +33,12 @@ import logging
import json
import subprocess
import random
+import requests
+import urllib3.util.retry
from time import sleep
from threading import RLock
-import requests
from twisted.internet import protocol, reactor, process
from peekaboo.exceptions import CuckooSubmitFailedException
@@ -243,48 +246,87 @@ class CuckooEmbed(Cuckoo):
(currently) intentionally a no-op. """
pass
+
+class WhitelistRetry(urllib3.util.retry.Retry):
+ """ A Retry class which has a status code whitelist, allowing to retry all
+ requests not whitelisted in a hard-core, catch-all manner. """
+ def __init__(self, status_whitelist=None, **kwargs):
+ super().__init__(**kwargs)
+ self.status_whitelist = status_whitelist or set()
+
+ def is_retry(self, method, status_code, has_retry_after=False):
+ """ Override Retry's is_retry to introduce our status whitelist logic.
+ """
+ # we retry all methods so no check if method is retryable here
+
+ if self.status_whitelist and status_code not in self.status_whitelist:
+ return True
+
+ return super().is_retry(method, status_code, has_retry_after)
+
+
class CuckooApi(Cuckoo):
""" Interfaces with a Cuckoo installation via its REST API. """
- def __init__(self, job_queue, url="http://localhost:8090", poll_interval=5):
+ def __init__(self, job_queue, url="http://localhost:8090", poll_interval=5,
+ retries=5, backoff=0.5):
Cuckoo.__init__(self, job_queue)
self.url = url
self.poll_interval = poll_interval
+
+ # urrlib3 backoff formula:
+ # <backoff factor> * (2 ^ (<retry count so far> - 1))
+ # with second try intentionally having no sleep,
+ # e.g. with retry count==5 and backoff factor==0.5:
+ # try 1: fail, sleep(0.5*2^(1-1)==0.5*2^0==0.5*1==0.5->intentionally
+ # overridden to 0)
+ # try 2: fail, sleep(0.5*2^(2-1)==0.5*2^1==1)
+ # try 3: fail, sleep(0.5*2^(3-1)==0.5*2^2==2)
+ # try 4: fail, sleep(0.5*2^(4-1)==0.5*2^3==4)
+ # try 5: fail, abort, sleep would've been 8 before try 6
+ #
+ # Also, use method_whitelist=False to enable POST and other methods for
+ # retry which aren't by default because they're not considered
+ # idempotent. We assume that with the REST API a request either
+ # succeeds or fails without residual effects, making them atomic and
+ # idempotent.
+ #
+ # And finally we retry everything but a 200 response, which admittedly
+ # is a bit hard-core but serves our purposes for now.
+ retry_config = WhitelistRetry(total=retries,
+ backoff_factor=backoff,
+ method_whitelist=False,
+ status_whitelist=set([200]))
+ retry_adapter = requests.adapters.HTTPAdapter(max_retries=retry_config)
+ self.session = requests.session()
+ self.session.mount('http://', retry_adapter)
+ self.session.mount('https://', retry_adapter)
+
self.reported = self.__status()["tasks"]["reported"]
logger.info("Connection to Cuckoo seems to work, %i reported tasks seen", self.reported)
-
- def __get(self, url, method="get", files=""):
- r = ""
- logger.debug("Requesting %s, method %s" % (url, method))
-
- # try 3 times to get a successfull response
- for retry in range(0, 3):
- try:
- if method == "get":
- r = requests.get("%s/%s" % (self.url, url))
- elif method == "post":
- r = requests.post("%s/%s" % (self.url, url), files=files)
- else:
- break
- if r.status_code != 200:
- continue
- else:
- return r.json()
- except requests.exceptions.Timeout as e:
- # Maybe set up for a retry, or continue in a retry loop
- print(e)
- if e and retry >= 2:
- raise e
- except requests.exceptions.TooManyRedirects as e:
- # Tell the user their URL was bad and try a different one
- print(e)
- if e and retry >= 2:
- raise e
- except requests.exceptions.RequestException as e:
- # catastrophic error. bail.
- print(e)
- if e and retry >= 2:
- raise e
- return None
+
+ def __get(self, path):
+ request_url = "%s/%s" % (self.url, path)
+ logger.debug("Getting %s", request_url)
+
+ try:
+ response = self.session.get(request_url)
+ # all requests exceptions are derived from RequestsException, including
+ # RetryError, TooManyRedirects and Timeout
+ except requests.exceptions.RequestException as error:
+ logger.error('Request to REST API failed: %s', error)
+ return None
+
+ # no check for status code here since we retry all but 200
+ # responses and raise an exception if retries fail
+ try:
+ json_resp = response.json()
+ except ValueError as error:
+ logger.error(
+ 'Invalid JSON in response when getting %s: %s',
+ request_url, error)
+ return None
+
+ return json_resp
def __status(self):
return self.__get("cuckoo/status")
@@ -293,14 +335,30 @@ class CuckooApi(Cuckoo):
path = sample.submit_path
filename = os.path.basename(path)
files = {"file": (filename, open(path, 'rb'))}
- response = self.__get("tasks/create/file", method="post", files=files)
+ logger.debug("Creating Cuckoo task with content from %s and "
+ "filename %s", path, filename)
+
+ try:
+ response = self.session.post(
+ "%s/tasks/create/file" % self.url, files=files)
+ except requests.exceptions.RequestException as error:
+ raise CuckooSubmitFailedException(
+ 'Error creating Cuckoo task: %s' % error)
- task_id = response["task_id"]
+ try:
+ json_resp = response.json()
+ except ValueError as error:
+ raise CuckooSubmitFailedException(
+ 'Invalid JSON in response when creating Cuckoo task: %s'
+ % error)
+
+ task_id = json_resp["task_id"]
if task_id > 0:
self.register_running_job(task_id, sample)
return task_id
+
raise CuckooSubmitFailedException(
- 'Unable to extract job ID from given string %s' % response)
+ 'Unable to extract job ID from response %s' % json_resp)
def get_report(self, job_id):
logger.debug("Report from Cuckoo API requested, job_id = %d" % job_id)