summaryrefslogtreecommitdiffstats
path: root/ngxtop/config_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'ngxtop/config_parser.py')
-rw-r--r--ngxtop/config_parser.py145
1 files changed, 145 insertions, 0 deletions
diff --git a/ngxtop/config_parser.py b/ngxtop/config_parser.py
new file mode 100644
index 0000000..82a2d2a
--- /dev/null
+++ b/ngxtop/config_parser.py
@@ -0,0 +1,145 @@
+"""
+Nginx config parser and pattern builder.
+"""
+import os
+import re
+import subprocess
+
+from pyparsing import Literal, Word, ZeroOrMore, OneOrMore, Group, \
+ printables, quotedString, pythonStyleComment, removeQuotes
+
+from utils import choose_one, error_exit
+
+
+REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])'
+REGEX_LOG_FORMAT_VARIABLE = r'\$([a-z0-9\_]+)'
+LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \
+ '"$request" $status $body_bytes_sent ' \
+ '"$http_referer" "$http_user_agent"'
+
+# common parser element
+semicolon = Literal(';').suppress()
+# nginx string parameter can contain any character except: { ; " '
+parameter = Word(''.join(c for c in printables if c not in set('{;"\'')))
+# which can also be quoted
+parameter = parameter | quotedString.setParseAction(removeQuotes)
+
+
+def detect_config_path():
+ """
+ Get nginx configuration file path based on `nginx -V` output
+ :return: detected nginx configuration file path
+ """
+ try:
+ proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE)
+ except OSError:
+ error_exit('Access log file or format was not set and nginx config file cannot be detected. ' +
+ 'Perhaps nginx is not in your PATH?')
+
+ stdout, stderr = proc.communicate()
+ version_output = stderr.decode('utf-8')
+ conf_path_match = re.search(r'--conf-path=(\S*)', version_output)
+ if conf_path_match is not None:
+ return conf_path_match.group(1)
+
+ prefix_match = re.search(r'--prefix=(\S*)', version_output)
+ if prefix_match is not None:
+ return prefix_match.group(1) + '/conf/nginx.conf'
+ return '/etc/nginx/nginx.conf'
+
+
+def get_access_logs(config):
+ """
+ Parse config for access_log directives
+ :return: iterator over ('path', 'format name') tuple of found directives
+ """
+ access_log = Literal("access_log") + ZeroOrMore(parameter) + semicolon
+ access_log.ignore(pythonStyleComment)
+
+ for directive in access_log.searchString(config).asList():
+ path = directive[1]
+ if path == 'off' or path.startswith('syslog:'):
+ # nothing to process here
+ continue
+
+ format_name = 'combined'
+ if len(directive) > 2 and '=' not in directive[2]:
+ format_name = directive[2]
+
+ yield path, format_name
+
+
+def get_log_formats(config):
+ """
+ Parse config for log_format directives
+ :return: iterator over ('format name', 'format string') tuple of found directives
+ """
+ # log_format name [params]
+ log_format = Literal('log_format') + parameter + Group(OneOrMore(parameter)) + semicolon
+ log_format.ignore(pythonStyleComment)
+
+ for directive in log_format.searchString(config).asList():
+ name = directive[1]
+ format_string = ''.join(directive[2])
+ yield name, format_string
+
+
+def detect_log_config(arguments):
+ """
+ Detect access log config (path and format) of nginx. Offer user to select if multiple access logs are detected.
+ :return: path and format of detected / selected access log
+ """
+ config = arguments['--config']
+ if config is None:
+ config = detect_config_path()
+ if not os.path.exists(config):
+ error_exit('Nginx config file not found: %s' % config)
+
+ with open(config) as f:
+ config_str = f.read()
+ access_logs = dict(get_access_logs(config_str))
+ if not access_logs:
+ error_exit('Access log file is not provided and ngxtop cannot detect it from your config file (%s).' % config)
+
+ log_formats = dict(get_log_formats(config_str))
+ if len(access_logs) == 1:
+ log_path, format_name = access_logs.items()[0]
+ if format_name == 'combined':
+ return log_path, LOG_FORMAT_COMBINED
+ if format_name not in log_formats:
+ error_exit('Incorrect format name set in config for access log file "%s"' % log_path)
+ return log_path, log_formats[format_name]
+
+ # multiple access logs configured, offer to select one
+ print('Multiple access logs detected in configuration:')
+ log_path = choose_one(access_logs.keys(), 'Select access log file to process: ')
+ format_name = access_logs[log_path]
+ if format_name not in log_formats:
+ error_exit('Incorrect format name set in config for access log file "%s"' % log_path)
+ return log_path, log_formats[format_name]
+
+
+def build_pattern(log_format):
+ """
+ Build regular expression to parse given format.
+ :param log_format: format string to parse
+ :return: regular expression to parse given format
+ """
+ if log_format == 'combined':
+ log_format = LOG_FORMAT_COMBINED
+ pattern = re.sub(REGEX_SPECIAL_CHARS, r'\\\1', log_format)
+ pattern = re.sub(REGEX_LOG_FORMAT_VARIABLE, '(?P<\\1>.*)', pattern)
+ return re.compile(pattern)
+
+
+def extract_variables(log_format):
+ """
+ Extract all variables from a log format string.
+ :param log_format: format string to extract
+ :return: iterator over all variables in given format string
+ """
+ if log_format == 'combined':
+ log_format = LOG_FORMAT_COMBINED
+ for match in re.findall(REGEX_LOG_FORMAT_VARIABLE, log_format):
+ yield match
+