summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Mashchenko <ilyamaschenko@gmail.com>2019-03-07 13:49:56 +0300
committerGitHub <noreply@github.com>2019-03-07 13:49:56 +0300
commit2175673e29f8ee35d2e48719c6cf0169ddd31405 (patch)
tree7b4e3f15342bc88d3187184ebf87f85d50e96ccb
parentd66b45ac0fa9918944c91c5ed4b3f4e76fb9f950 (diff)
python.d.plugin: use separate process for initial module checking (#5552)
##### Summary This PR adds (major) changes only to `python.d.plugin` file. Fixes: #5525 `pyhton.d.plugin` imports a lot of additional packages during initial module initialization/job creating/checking and there is no way to unimport them, even if they arn't needed. It consumes relatively a lot of ram. ___ Memory utilization comparing before/after the PR (one job `example` module, py3.7.2): > 21.1 => 8.8 MiB ![screenshot_20190305_111837](https://user-images.githubusercontent.com/22274335/53791147-c27a6e00-3f39-11e9-8eaf-8ac3809a3b6e.png) ##### Component Name [`collectors/python.d.plugin`](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/python.d.plugin.in) ##### Additional Information This PR adds separate process for initial module checking. Logic: - main process spawns checker process - checker process loads every module, loads module config, creates jobs and runs job.check() for every job, if check success it adds the job to the list. - checker process returns list of modules and jobs. - main process loads only active modules, etc.
-rw-r--r--collectors/python.d.plugin/powerdns/powerdns.chart.py1
-rw-r--r--collectors/python.d.plugin/python.d.plugin.in973
-rw-r--r--collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py8
-rw-r--r--collectors/python.d.plugin/python_modules/bases/loaders.py20
-rw-r--r--collectors/python.d.plugin/python_modules/bases/loggers.py2
5 files changed, 648 insertions, 356 deletions
diff --git a/collectors/python.d.plugin/powerdns/powerdns.chart.py b/collectors/python.d.plugin/powerdns/powerdns.chart.py
index 7ed1554f65..6e5b42a76e 100644
--- a/collectors/python.d.plugin/powerdns/powerdns.chart.py
+++ b/collectors/python.d.plugin/powerdns/powerdns.chart.py
@@ -125,6 +125,7 @@ class Service(UrlService):
UrlService.__init__(self, configuration=configuration, name=name)
self.order = ORDER
self.definitions = CHARTS
+ self.url = configuration.get('url', 'http://127.0.0.1:8081/api/v1/servers/localhost/statistics')
def check(self):
self._manager = self._build_manager()
diff --git a/collectors/python.d.plugin/python.d.plugin.in b/collectors/python.d.plugin/python.d.plugin.in
index 240c44e0f6..8ce621e0bd 100644
--- a/collectors/python.d.plugin/python.d.plugin.in
+++ b/collectors/python.d.plugin/python.d.plugin.in
@@ -8,426 +8,699 @@ echo "ERROR python IS NOT AVAILABLE IN THIS SYSTEM")" "$0" "$@" # '''
# Author: Ilya Mashchenko (l2isbad)
# SPDX-License-Identifier: GPL-3.0-or-later
+
+import collections
+import copy
import gc
+import multiprocessing
import os
+import re
import sys
+import time
import threading
-from re import sub
-from sys import version_info, argv, stdout
-from time import sleep
+ENV_NETDATA_USER_CONFIG_DIR = 'NETDATA_USER_CONFIG_DIR'
+ENV_NETDATA_STOCK_CONFIG_DIR = 'NETDATA_STOCK_CONFIG_DIR'
+ENV_NETDATA_PLUGINS_DIR = 'NETDATA_PLUGINS_DIR'
+ENV_NETDATA_UPDATE_EVERY = 'NETDATA_UPDATE_EVERY'
+
+
+def dirs():
+ user_config = os.getenv(
+ ENV_NETDATA_USER_CONFIG_DIR,
+ '@configdir_POST@',
+ )
+ stock_config = os.getenv(
+ ENV_NETDATA_STOCK_CONFIG_DIR,
+ '@libconfigdir_POST@',
+ )
+ modules_user_config = os.path.join(user_config, 'python.d')
+ modules_stock_config = os.path.join(stock_config, 'python.d')
+
+ modules = os.path.abspath(
+ os.getenv(
+ ENV_NETDATA_PLUGINS_DIR,
+ os.path.dirname(__file__),
+ ) + '/../python.d'
+ )
+ pythond_packages = os.path.join(modules, 'python_modules')
+
+ return collections.namedtuple(
+ 'Dirs',
+ [
+ 'user_config',
+ 'stock_config',
+ 'modules_user_config',
+ 'modules_stock_config',
+ 'modules',
+ 'pythond_packages',
+ ]
+ )(
+ user_config,
+ stock_config,
+ modules_user_config,
+ modules_stock_config,
+ modules,
+ pythond_packages,
+ )
+
+
+DIRS = dirs()
+
+sys.path.append(DIRS.pythond_packages)
+
+
+from bases.collection import safe_print
+from bases.loggers import PythonDLogger
+from bases.loaders import load_config
+from bases.loaders import load_module as _load_module
-GC_RUN = True
-GC_COLLECT_EVERY = 300
+try:
+ from collections import OrderedDict
+except ImportError:
+ from third_party.ordereddict import OrderedDict
-PY_VERSION = version_info[:2]
-USER_CONFIG_DIR = os.getenv('NETDATA_USER_CONFIG_DIR', '@configdir_POST@')
-STOCK_CONFIG_DIR = os.getenv('NETDATA_STOCK_CONFIG_DIR', '@libconfigdir_POST@')
+END_TASK_MARKER = None
-PLUGINS_USER_CONFIG_DIR = os.path.join(USER_CONFIG_DIR, 'python.d')
-PLUGINS_STOCK_CONFIG_DIR = os.path.join(STOCK_CONFIG_DIR, 'python.d')
+IS_ATTY = sys.stdout.isatty()
+PLUGIN_CONF_FILE = 'python.d.conf'
-PLUGINS_DIR = os.path.abspath(os.getenv(
- 'NETDATA_PLUGINS_DIR',
- os.path.dirname(__file__)) + '/../python.d')
+MODULE_SUFFIX = '.chart.py'
+OBSOLETED_MODULES = (
+ 'apache_cache', # replaced by web_log
+ 'gunicorn_log', # replaced by web_log
+ 'nginx_log', # replaced by web_log
+ 'cpufreq', # rewritten in C
+ 'cpuidle', # rewritten in C
+ 'mdstat', # rewritten in C
+ 'linux_power_supply', # rewritten in C
+)
-PYTHON_MODULES_DIR = os.path.join(PLUGINS_DIR, 'python_modules')
-sys.path.append(PYTHON_MODULES_DIR)
+AVAILABLE_MODULES = [
+ m[:-len(MODULE_SUFFIX)] for m in sorted(os.listdir(DIRS.modules))
+ if m.endswith(MODULE_SUFFIX) and m[:-len(MODULE_SUFFIX)] not in OBSOLETED_MODULES
+]
-from bases.loaders import ModuleAndConfigLoader # noqa: E402
-from bases.loggers import PythonDLogger # noqa: E402
-from bases.collection import setdefault_values, run_and_exit, safe_print # noqa: E402
+PLUGIN_BASE_CONF = {
+ 'enabled': True,
+ 'default_run': True,
+ 'gc_run': True,
+ 'gc_interval': 300,
+}
-try:
- from collections import OrderedDict
-except ImportError:
- from third_party.ordereddict import OrderedDict
+JOB_BASE_CONF = {
+ 'update_every': os.getenv(ENV_NETDATA_UPDATE_EVERY, 1),
+ 'priority': 60000,
+ 'autodetection_retry': 0,
+ 'chart_cleanup': 10,
+ 'penalty': True,
+ 'name': str(),
+}
-IS_ATTY = stdout.isatty()
-BASE_CONFIG = {'update_every': os.getenv('NETDATA_UPDATE_EVERY', 1),
- 'priority': 60000,
- 'autodetection_retry': 0,
- 'chart_cleanup': 10,
- 'penalty': True,
- 'name': str()}
+class HeartBeat(threading.Thread):
+ def __init__(self, every):
+ threading.Thread.__init__(self)
+ self.daemon = True
+ self.every = every
+ def run(self):
+ while True:
+ time.sleep(self.every)
+ if IS_ATTY:
+ continue
+ safe_print('\n')
+
+
+def load_module(name):
+ abs_path = os.path.join(DIRS.modules, '{0}{1}'.format(name, MODULE_SUFFIX))
+ return _load_module(name, abs_path)
+
+
+def multi_path_find(name, paths):
+ for path in paths:
+ abs_name = os.path.join(path, name)
+ if os.path.isfile(abs_name):
+ return abs_name
+ return ''
+
+
+Task = collections.namedtuple(
+ 'Task',
+ [
+ 'module_name',
+ 'explicitly_enabled',
+ ],
+)
+
+Result = collections.namedtuple(
+ 'Result',
+ [
+ 'module_name',
+ 'jobs_configs',
+ ],
+)
+
+
+class ModuleChecker(multiprocessing.Process):
+ def __init__(
+ self,
+ task_queue,
+ result_queue,
+ ):
+ multiprocessing.Process.__init__(self)
+ self.log = PythonDLogger()
+ self.log.job_name = 'checker'
+ self.task_queue = task_queue
+ self.result_queue = result_queue
+
+ def run(self):
+ self.log.info('starting...')
+ HeartBeat(1).start()
+ while self.run_once():
+ pass
+ self.log.info('terminating...')
+
+ def run_once(self):
+ task = self.task_queue.get()
+
+ if task is END_TASK_MARKER:
+ self.task_queue.task_done()
+ self.result_queue.put(END_TASK_MARKER)
+ return False
-MODULE_EXTENSION = '.chart.py'
-OBSOLETE_MODULES = ['apache_cache', 'gunicorn_log', 'nginx_log', 'cpufreq', 'cpuidle', 'mdstat', 'linux_power_supply']
+ result = self.do_task(task)
+ if result:
+ self.result_queue.put(result)
+ self.task_queue.task_done()
+ return True
-def module_ok(m):
- return m.endswith(MODULE_EXTENSION) and m[:-len(MODULE_EXTENSION)] not in OBSOLETE_MODULES
+ def do_task(self, task):
+ self.log.info("{0} : checking".format(task.module_name))
+ # LOAD SOURCE
+ module = Module(task.module_name)
+ try:
+ module.load_source()
+ except Exception as error:
+ self.log.warning("{0} : error on loading source : {1}, skipping module".format(
+ task.module_name,
+ error,
+ ))
+ return None
+ else:
+ self.log.info("{0} : source successfully loaded".format(task.module_name))
-ALL_MODULES = [m for m in sorted(os.listdir(PLUGINS_DIR)) if module_ok(m)]
+ if module.is_disabled_by_default() and not task.explicitly_enabled:
+ self.log.info("{0} : disabled by default".format(task.module_name))
+ return None
+ # LOAD CONFIG
+ paths = [
+ DIRS.modules_user_config,
+ DIRS.modules_stock_config,
+ ]
-def parse_cmd():
- debug = 'debug' in argv[1:]
- trace = 'trace' in argv[1:]
- override_update_every = next((arg for arg in argv[1:] if arg.isdigit() and int(arg) > 1), False)
- modules = [''.join([m, MODULE_EXTENSION]) for m in argv[1:] if ''.join([m, MODULE_EXTENSION]) in ALL_MODULES]
- return debug, trace, override_update_every, modules or ALL_MODULES
+ conf_abs_path = multi_path_find(
+ name='{0}.conf'.format(task.module_name),
+ paths=paths,
+ )
+ if conf_abs_path:
+ self.log.info("{0} : found config file '{1}'".format(task.module_name, conf_abs_path))
+ try:
+ module.load_config(conf_abs_path)
+ except Exception as error:
+ self.log.warning("{0} : error on loading config : {1}, skipping module".format(
+ task.module_name, error))
+ return None
+ else:
+ self.log.info("{0} : config was not found in '{1}', using default 1 job config".format(
+ task.module_name, paths))
+
+ # CHECK JOBS
+ jobs = module.create_jobs()
+ self.log.info("{0} : created {1} job(s) from the config".format(task.module_name, len(jobs)))
+
+ successful_jobs_configs = list()
+ for job in jobs:
+ if job.autodetection_retry() > 0:
+ successful_jobs_configs.append(job.config)
+ self.log.info("{0}[{1}]: autodetection job, will be checked in main".format(task.module_name, job.name))
+ continue
-def multi_job_check(config):
- return next((True for key in config if isinstance(config[key], dict)), False)
+ try:
+ job.init()
+ except Exception as error:
+ self.log.warning("{0}[{1}] : unhandled exception on init : {2}, skipping the job)".format(
+ task.module_name, job.name, error))
+ continue
+ try:
+ ok = job.check()
+ except Exception as error:
+ self.log.warning("{0}[{1}] : unhandled exception on check : {2}, skipping the job".format(
+ task.module_name, job.name, error))
+ continue
-class RawModule:
- def __init__(self, name, path, explicitly_enabled=True):
- self.name = name
- self.path = path
- self.explicitly_enabled = explicitly_enabled
-
-
-class Job(object):
- def __init__(self, initialized_job, job_id):
- """
- :param initialized_job: instance of <Class Service>
- :param job_id: <str>
- """
- self.job = initialized_job
- self.id = job_id # key in Modules.jobs()
- self.module_name = self.job.__module__ # used in Plugin.delete_job()
- self.recheck_every = self.job.configuration.pop('autodetection_retry')
- self.checked = False # used in Plugin.check_job()
- self.created = False # used in Plugin.create_job_charts()
- if self.job.update_every < int(OVERRIDE_UPDATE_EVERY):
- self.job.update_every = int(OVERRIDE_UPDATE_EVERY)
-
- def __getattr__(self, item):
- return getattr(self.job, item)
-
- def __repr__(self):
- return self.job.__repr__()
-
- def is_dead(self):
- return bool(self.ident) and not self.is_alive()
-
- def not_launched(self):
- return not bool(self.ident)
-
- def is_autodetect(self):
- return self.recheck_every
-
-
-class Module(object):
- def __init__(self, service, config):
- """
- :param service: <Module>
- :param config: <dict>
- """
+ if not ok:
+ self.log.info("{0}[{1}] : check failed, skipping the job".format(task.module_name, job.name))
+ continue
+
+ self.log.info("{0}[{1}] : check successful".format(task.module_name, job.name))
+
+ job.config['autodetection_retry'] = job.config['update_every']
+ successful_jobs_configs.append(job.config)
+
+ if not successful_jobs_configs:
+ self.log.info("{0} : all jobs failed, skipping module".format(task.module_name))
+ return None
+
+ return Result(module.source.__name__, successful_jobs_configs)
+
+
+class JobConf(OrderedDict):
+ def __init__(self, *args):
+ OrderedDict.__init__(self, *args)
+
+ def set_defaults_from_module(self, module):
+ for k in [k for k in JOB_BASE_CONF if hasattr(module, k)]:
+ self[k] = getattr(module, k)
+
+ def set_defaults_from_config(self, module_config):
+ for k in [k for k in JOB_BASE_CONF if k in module_config]:
+ self[k] = module_config[k]
+
+ def set_job_name(self, name):
+ self['job_name'] = re.sub(r'\s+', '_', name)
+
+ def set_override_name(self, name):
+ self['override_name'] = re.sub(r'\s+', '_', name)
+
+ def as_dict(self):
+ return copy.deepcopy(OrderedDict(self))
+
+
+class Job:
+ def __init__(
+ self,
+ service,
+ module_name,
+ config,
+ ):
self.service = service
- self.name = service.__name__
- self.config = self.jobs_configurations_builder(config)
- self.jobs = OrderedDict()
- self.counter = 1
+ self.config = config
+ self.module_name = module_name
+ self.name = config['job_name']
+ self.override_name = config['override_name']
+ self.wrapped = None
- self.initialize_jobs()
+ def init(self):
+ self.wrapped = self.service(configuration=self.config.as_dict())
- def __repr__(self):
- return "<Class Module '{name}'>".format(name=self.name)
+ def check(self):
+ return self.wrapped.check()
- def __iter__(self):
- return iter(OrderedDict(self.jobs).values())
+ def post_check(self, min_update_every):
+ if self.wrapped.update_every < min_update_every:
+ self.wrapped.update_every = min_update_every
- def __getitem__(self, item):
- return self.jobs[item]
+ def create(self):
+ return self.wrapped.create()
- def __delitem__(self, key):
- del self.jobs[key]
+ def autodetection_retry(self):
+ return self.config['autodetection_retry']
- def __len__(self):
- return len(self.jobs)
+ def run(self):
+ self.wrapped.run()
- def __bool__(self):
- return bool(self.jobs)
- def __nonzero__(self):
- return self.__bool__()
+class Module:
+ def __init__(self, name):
+ self.name = name
+ self.source = None
+ self.config = dict()
- def jobs_configurations_builder(self, config):
- """
- :param config: <dict>
- :return:
- """
- counter = 0
- job_base_config = dict()
+ def is_disabled_by_default(self):
+ return bool(getattr(self.source, 'disabled_by_default', False))
- for attr in BASE_CONFIG:
- job_base_config[attr] = config.pop(attr, getattr(self.service, attr, BASE_CONFIG[attr]))
+ def load_source(self):
+ self.source = load_module(self.name)
- if not config:
- config = {str(): dict()}
- elif not multi_job_check(config):
- config = {str(): config}
+ def load_config(self, abs_path):
+ self.config = load_config(abs_path) or dict()
- for job_name in config:
- if not isinstance(config[job_name], dict):
- continue
+ def gather_jobs_configs(self):
+ job_names = [v for v in self.config if isinstance(self.config[v], dict)]
- job_config = setdefault_values(config[job_name], base_dict=job_base_config)
- job_name = sub(r'\s+', '_', job_name)
- config[job_name]['name'] = sub(r'\s+', '_', config[job_name]['name'])
- counter += 1
- job_id = 'job' + str(counter).zfill(3)
+ if len(job_names) == 0:
+ job_conf = JobConf(JOB_BASE_CONF)
+ job_conf.set_defaults_from_module(self.source)
+ job_conf.update(self.config)
+ job_conf.set_job_name(self.name)
+ job_conf.set_override_name(job_conf.pop('name'))
+ return [job_conf]
- yield job_id, job_name, job_config
+ configs = list()
+ for job_name in job_names:
+ raw_job_conf = self.config[job_name]
+ job_conf = JobConf(JOB_BASE_CONF)
+ job_conf.set_defaults_from_module(self.source)
+ job_conf.set_defaults_from_config(self.config)
+ job_conf.update(raw_job_conf)
+ job_conf.set_job_name(job_name)
+ job_conf.set_override_name(job_conf.pop('name'))
+ configs.append(job_conf)
- def initialize_jobs(self):
- """
- :return:
- """
- for job_id, job_name, job_config in self.config:
- job_config['job_name'] = job_name
- job_config['override_name'] = job_config.pop('name')
+ return configs
- try:
- initialized_job = self.service.Service(configuration=job_config)
- except Exception as error:
- Logger.error("job initialization: '{module_name} {job_name}' "
- "=> ['FAILED'] ({error})".format(module_name=self.name,
- job_name=job_name,
- error=error))
- continue
- else:
- Logger.debug("job initialization: '{module_name} {job_name}' "
- "=> ['OK']".format(module_name=self.name,
- job_name=job_name or self.name))
- self.jobs[job_id] = Job(initialized_job=initialized_job,
- job_id=job_id)
- del self.config
- del self.service
-
-
-class Plugin(object):
- def __init__(self):
- self.loader = ModuleAndConfigLoader()
- self.modules = OrderedDict()
- self.sleep_time = 1
- self.runs_counter = 0
-
- user_config = os.path.join(USER_CONFIG_DIR, 'python.d.conf')
- stock_config = os.path.join(STOCK_CONFIG_DIR, 'python.d.conf')
-
- Logger.debug("loading '{0}'".format(user_config))
- self.config, error = self.loader.load_config_from_file(user_config)
-
- if error:
- Logger.error("cannot load '{0}': {1}. Will try stock version.".format(user_config, error))
- Logger.debug("loading '{0}'".format(stock_config))
- self.config, error = self.loader.load_config_from_file(stock_config)
- if error:
- Logger.error("cannot load '{0}': {1}".format(stock_config, error))
-
- self.do_gc = self.config.get("gc_run", GC_RUN)
- self.gc_interval = self.config.get("gc_interval", GC_COLLECT_EVERY)
-
- if not self.config.get('enabled', True):
- run_and_exit(Logger.info)('DISABLED in configuration file.')
-
- self.load_and_initialize_modules()
- if not self.modules:
- run_and_exit(Logger.info)('No modules to run. Exit...')
-
- def __iter__(self):
- return iter(OrderedDict(self.modules).values())
-
- @property
- def jobs(self):
- return (job for mod in self for job in mod)
-
- @property
- def dead_jobs(self):
- return (job for job in self.jobs if job.is_dead())
-
- @property
- def autodetect_jobs(self):
- return [job for job in self.jobs if job.not_launched()]
-
- def enabled_modules(self):
- for mod in MODULES_TO_RUN:
- mod_name = mod[:-len(MODULE_EXTENSION)]
- mod_path = os.path.join(PLUGINS_DIR, mod)
- if any(
- [
- self.config.get('default_run', True) and self.config.get(mod_name, True),
- (not self.config.get('default_run')) and self.config.get(mod_name),
- ]
- ):
- yield RawModule(
- name=mod_name,
- path=mod_path,
- explicitly_enabled=self.config.get(mod_name),
- )
-
- def load_and_initialize_modules(self):
- for mod in self.enabled_modules():
-
- # Load module from file ------------------------------------------------------------
- loaded_module, error = self.loader.load_module_from_file(mod.name, mod.path)
- log = Logger.error if error else Logger.debug
- log("module load source: '{module_name}' => [{status}]".format(status='FAILED' if error else 'OK',
- module_name=mod.name))
- if error:
- Logger.error("load source error : {0}".format(error))
- continue
+ def create_jobs(self, jobs_conf=None):
+ return [Job(self.source.Service, self.name, conf) for conf in jobs_conf or self.gather_jobs_configs()]
- # Load module config from file ------------------------------------------------------
- user_config = os.path.join(PLUGINS_USER_CONFIG_DIR, mod.name + '.conf')
- stock_config = os.path.join(PLUGINS_STOCK_CONFIG_DIR, mod.name + '.conf')
- Logger.debug("loading '{0}'".format(user_config))
- loaded_config, error = self.loader.load_config_from_file(user_config)
- if error:
- Logger.error("cannot load '{0}' : {1}. Will try stock version.".format(user_config, error))
- Logger.debug("loading '{0}'".format(stock_config))
- loaded_config, error = self.loader.load_config_from_file(stock_config)
+class JobRunner(threading.Thread):
+ def __init__(self, job):
+ threading.Thread.__init__(self)
+ self.daemon = True
+ self.wrapped = job
- if error:
- Logger.error("cannot load '{0}': {1}".format(stock_config, error))
+ def run(self):
+ self.wrapped.run()
- # Skip disabled modules
- if getattr(loaded_module, 'disabled_by_default', False) and not mod.explicitly_enabled:
- Logger.info("module '{0}' disabled by default".format(loaded_module.__name__))
- continue
- # Module initialization ---------------------------------------------------
+class PluginConf(dict):
+ def __init__(self, *args):
+ dict.__init__(self, *args)
- initialized_module = Module(service=loaded_module, config=loaded_config)
- Logger.debug("module status: '{module_name}' => [{status}] "
- "(jobs: {jobs_number})".format(status='OK' if initialized_module else 'FAILED',
- module_name=initialized_module.name,
- jobs_number=len(initialized_module)))
- if initialized_module:
- self.modules[initialized_module.name] = initialized_module
+ def is_module_enabled(self, module_name, explicit):
+ if module_name in self:
+ return self[module_name]
+ if explicit:
+ return False
+ return self['default_run']
+
+
+class Plugin:
+ def __init__(
+ self,
+ min_update_every=1,
+ modules_to_run=tuple(AVAILABLE_MODULES),
+ ):
+ self.log = PythonDLogger()
+ self.config = PluginConf(PLUGIN_BASE_CONF)
+ self.task_queue = multiprocessing.JoinableQueue()
+ self.result_queue = multiprocessing.JoinableQueue()
+ self.min_update_every = min_update_every
+ self.modules_to_run = modules_to_run
+ self.auto_detection_jobs = list()
+ self.tasks = list()
+ self.results = list()
+ self.checked_jobs = collections.defaultdict(list)
+ self.runs = 0
@staticmethod
- def check_job(job):
- """
- :param job: <Job>
- :return:
- """
- try:
- check_ok = bool(job.check())
- except Exception as error:
- job.error('check() unhandled exception: {error}'.format(error=error))
- return None
- else:
- return check_ok
+ def shutdown():
+ safe_print('DISABLE')
+ exit(0)
- @staticmethod
- def create_job_charts(job):
- """
- :param job: <Job>
- :return:
- """
+ def run(self):
+ jobs = self.create_jobs()
+ if not jobs:
+ return
+
+ for job in self.prepare_jobs(jobs):
+ self.log.info('{0}[{1}] : started in thread'.format(job.module_name, job.name))
+ JobRunner(job).start()
+
+ self.serve()
+
+ def enqueue_tasks(self):
+ for task in self.tasks:
+ self.task_queue.put(task)
+ self.task_queue.put(END_TASK_MARKER)
+
+ def dequeue_results(self):
+ while True:
+ result = self.result_queue.get()
+ self.result_queue.task_done()
+ if result is END_TASK_MARKER:
+ break
+ self.results.append(result)
+
+ def load_config(self):
+ paths = [
+ DIRS.user_config,
+ DIRS.stock_config,
+ ]
+
+ self.log.info("checking for config in {0}".format(paths))
+ abs_path = multi_path_find(name=PLUGIN_CONF_FILE, paths=paths)
+ if not abs_path:
+ self.log.warning('config was not found, using defaults')
+ return True
+
+ self.log.info("config found, loading config '{0}'".format(abs_path))
try:
- create_ok = job.create()
+ config = load_config(abs_path) or dict()
except Exception as error:
- job.error('create() unhandled exception: {error}'.format(error=error))
+ self.log.error('error on loading config : {0}'.format(error))
return False
- else:
- return create_ok
-
- def delete_job(self, job):
- """
- :param job: <Job>
- :return:
- """
- del self.modules[job.module_name][job.id]
-
- def run_check(self):
- checked = list()
- for job in self.jobs:
- if job.name in checked:
- job.info('check() => [DROPPED] (already served by another job)')
- self.delete_job(job)
+
+ self.log.info('config successfully loaded')
+ self.config.update(config)
+ return True
+
+ def setup(self):
+ self.log.info('starting setup')
+ if not self.load_config():
+ return False
+
+ if not self.config['enabled']:
+ self.log.info('disabled in configuration file')
+ return False
+
+ for mod in self.modules_to_run:
+ if self.config.is_module_enabled(mod, False):
+ task = Task(mod, self.config.is_module_enabled(mod, True))
+ self.tasks.append(task)
+ else:
+ self.log.info("{0} : disabled in configuration file".format(mod))
+
+ if not self.tasks:
+ self.log.info('no modules to run')
+ return False
+
+ worker = ModuleChecker(self.task_queue, self.result_queue)
+ self.log.info('starting checker process ({0} module(s) to check)'.format(len(self.tasks)))
+ worker.start()
+
+ # TODO: timeouts?
+ self.enqueue_tasks()
+ self.task_queue.join()
+ self.dequeue_results()
+ self.result_queue.join()
+ self.log.info('stopping checker process')
+ worker.join()
+
+ if not self.results:
+ self.log.info('no modules to run')
+ return False
+
+ self.log.info("setup complete, {0} active module(s) : '{1}'".format(
+ len(self.results),
+ [v.module_name for v in self.results])
+ )
+
+ return True
+
+ def create_jobs(self):
+ jobs = list()
+ for result in self.results:
+ module = Module(result.module_name)
+ try:
+ module.load_source()
+ except Exception as error:
+ self.log.warning("{0} : error on loading module source : {1}, skipping module".format(
+ result.module_name, error))
continue
- ok = self.check_job(job)
- if ok:
- job.info('check() => [OK]')
- checked.append(job.name)
- job.checked = True
+
+ module_jobs = module.create_jobs(result.jobs_configs)
+ self.log.info("{0} : created {1} job(s)".format(module.name, len(module_jobs)))
+ jobs.extend(module_jobs)
+
+ return jobs
+
+ def prepare_jobs(self, jobs):
+ prepared = list()
+
+ for job in jobs:
+ check_name = job.override_name or job.name
+ if check_name in self.checked_jobs[job.module_name]:
+ self.log.info('{0}[{1}] : already served by another job, skipping the job'.format(
+ job.module_name, job.name))
continue
- if not job.is_autodetect() or ok is None:
- job.info('check() => [FAILED]')
- self.delete_job(job)
- else:
- job.info('check() => [RECHECK] (autodetection_retry: {0})'.format(job.recheck_every))
- def run_create(self):
- for job in self.jobs:
- if not job.checked:
- # skip autodetection_retry jobs
+ try:
+ job.init()
+ except Exception as error:
+ self.log.warning("{0}[{1}] : unhandled exception on init : {2}, skipping the job".format(
+ job.module_name, job.name, error))
+ continue
+
+ self.log.info("{0}[{1}] : init successful".format(job.module_name, job.name))
+
+ try:
+ ok = job.check()
+ except Exception as error:
+ self.log.warning("{0}[{1}] : unhandled exception on check : {2}, skipping the job".format(
+ job.module_name, job.name, error))
continue
- ok = self.create_job_charts(job)
- if ok:
- job.debug('create() => [OK] (charts: {0})'.format(len(job.charts)))
- job.created = True
+
+ if not ok:
+ self.log.info('{0}[{1}] : check failed'.format(job.module_name, job.name))
+ if job.autodetection_retry() > 0:
+ self.log.info('{0}[{1}] : will recheck every {2} second(s)'.format(
+ job.module_name, job.name, job.autodetection_retry()))
+ self.auto_detection_jobs.append(job)
continue
- job.error('create() => [FAILED] (charts: {0})'.format(len(job.charts)))
- self.delete_job(job)
- def start(self):
- self.run_check()
- self.run_create()
- for job in self.jobs:
- if job.created:
- job.start()
+ self.log.info('{0}[{1}] : check successful'.format(job.module_name, job.name))
+
+ job.post_check(int(self.min_update_every))
+
+ if not job.create():
+ self.log.info('{0}[{1}] : create failed'.format(job.module_name, job.name))
+
+ self.checked_jobs[job.module_name].append(check_name)
+ prepared.append(job)
+
+ return prepared
+
+ def serve(self):
+ gc_run = self.config['gc_run']
+ gc_interval = self.config['gc_interval']
while True:
- if threading.active_count() <= 1 and not self.autodetect_jobs:
- run_and_exit(Logger.info)('FINISHED')
+ self.runs += 1
- sleep(self.sleep_time)
- self.cleanup()
- self.autodetect_retry()
+ if threading.active_count() <= 3 and not self.auto_detection_jobs:
+ return
- # FIXME: https://github.com/netdata/netdata/issues/3817
- if self.do_gc and self.runs_counter % self.gc_interval == 0:
+ time.sleep(1)
+
+ if gc_run and self.runs % gc_interval == 0:
v = gc.collect()
- Logger.debug("GC full collection run result: {0}".format(v))
-
- # for exiting on SIGPIPE
- if not IS_ATTY: