summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.rst12
-rw-r--r--ngxtop/config_parser.py150
-rwxr-xr-xngxtop/ngxtop.py202
-rw-r--r--ngxtop/utils.py19
-rw-r--r--setup.py2
-rw-r--r--tests/test_config_parser.py60
6 files changed, 279 insertions, 166 deletions
diff --git a/README.rst b/README.rst
index ecb4904..2680597 100644
--- a/README.rst
+++ b/README.rst
@@ -5,8 +5,11 @@
**ngxtop** parses your nginx access log and outputs useful, ``top``-like, metrics of your nginx server.
So you can tell what is happening with your server in real-time.
-Can read from stdin (experimental), useful for remote log files. In this case, if not log
-format is specified, 'combined' will be used.
+``ngxtop`` tries to determine the correct location and format of nginx access log file by default, so you can just run
+``ngxtop`` and having a close look at all requests coming to your nginx server. But it does not limit you to nginx
+and the default top view. ``ngxtop`` is flexible enough for you to configure and change most of its behaviours.
+You can query for different things, specify your log and format, even parse remote Apache common access log with ease.
+See sample usages below for some ideas about what you can do with it.
Installation
------------
@@ -51,7 +54,6 @@ Usage
-c <file>, --config <file> allow ngxtop to parse nginx config file for log format and location.
-i <filter-expression>, --filter <filter-expression> filter in, records satisfied given expression are processed.
-p <filter-expression>, --pre-filter <filter-expression> in-filter expression to check in pre-parsing phase.
- -b, --db-dump dump database to disk when finished
Samples
-------
@@ -118,8 +120,8 @@ List 4xx or 5xx responses together with HTTP referer
|-----------+----------+----------------|
| - | 400 | - |
-Output from remote server using log-format option
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Parse apache log from remote server with `common` format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
::
diff --git a/ngxtop/config_parser.py b/ngxtop/config_parser.py
new file mode 100644
index 0000000..b8e4804
--- /dev/null
+++ b/ngxtop/config_parser.py
@@ -0,0 +1,150 @@
+"""
+Nginx config parser and pattern builder.
+"""
+import os
+import re
+import subprocess
+
+from pyparsing import Literal, Word, ZeroOrMore, OneOrMore, Group, \
+ printables, quotedString, pythonStyleComment, removeQuotes
+
+from utils import choose_one, error_exit
+
+
+REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])'
+REGEX_LOG_FORMAT_VARIABLE = r'\$([a-z0-9\_]+)'
+LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \
+ '"$request" $status $body_bytes_sent ' \
+ '"$http_referer" "$http_user_agent"'
+LOG_FORMAT_COMMON = '$remote_addr - $remote_user [$time_local] ' \
+ '"$request" $status $body_bytes_sent ' \
+ '"$http_x_forwarded_for"'
+
+# common parser element
+semicolon = Literal(';').suppress()
+# nginx string parameter can contain any character except: { ; " '
+parameter = Word(''.join(c for c in printables if c not in set('{;"\'')))
+# which can also be quoted
+parameter = parameter | quotedString.setParseAction(removeQuotes)
+
+
+def detect_config_path():
+ """
+ Get nginx configuration file path based on `nginx -V` output
+ :return: detected nginx configuration file path
+ """
+ try:
+ proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE)
+ except OSError:
+ error_exit('Access log file or format was not set and nginx config file cannot be detected. ' +
+ 'Perhaps nginx is not in your PATH?')
+
+ stdout, stderr = proc.communicate()
+ version_output = stderr.decode('utf-8')
+ conf_path_match = re.search(r'--conf-path=(\S*)', version_output)
+ if conf_path_match is not None:
+ return conf_path_match.group(1)
+
+ prefix_match = re.search(r'--prefix=(\S*)', version_output)
+ if prefix_match is not None:
+ return prefix_match.group(1) + '/conf/nginx.conf'
+ return '/etc/nginx/nginx.conf'
+
+
+def get_access_logs(config):
+ """
+ Parse config for access_log directives
+ :return: iterator over ('path', 'format name') tuple of found directives
+ """
+ access_log = Literal("access_log") + ZeroOrMore(parameter) + semicolon
+ access_log.ignore(pythonStyleComment)
+
+ for directive in access_log.searchString(config).asList():
+ path = directive[1]
+ if path == 'off' or path.startswith('syslog:'):
+ # nothing to process here
+ continue
+
+ format_name = 'combined'
+ if len(directive) > 2 and '=' not in directive[2]:
+ format_name = directive[2]
+
+ yield path, format_name
+
+
+def get_log_formats(config):
+ """
+ Parse config for log_format directives
+ :return: iterator over ('format name', 'format string') tuple of found directives
+ """
+ # log_format name [params]
+ log_format = Literal('log_format') + parameter + Group(OneOrMore(parameter)) + semicolon
+ log_format.ignore(pythonStyleComment)
+
+ for directive in log_format.searchString(config).asList():
+ name = directive[1]
+ format_string = ''.join(directive[2])
+ yield name, format_string
+
+
+def detect_log_config(arguments):
+ """
+ Detect access log config (path and format) of nginx. Offer user to select if multiple access logs are detected.
+ :return: path and format of detected / selected access log
+ """
+ config = arguments['--config']
+ if config is None:
+ config = detect_config_path()
+ if not os.path.exists(config):
+ error_exit('Nginx config file not found: %s' % config)
+
+ with open(config) as f:
+ config_str = f.read()
+ access_logs = dict(get_access_logs(config_str))
+ if not access_logs:
+ error_exit('Access log file is not provided and ngxtop cannot detect it from your config file (%s).' % config)
+
+ log_formats = dict(get_log_formats(config_str))
+ if len(access_logs) == 1:
+ log_path, format_name = access_logs.items()[0]
+ if format_name == 'combined':
+ return log_path, LOG_FORMAT_COMBINED
+ if format_name not in log_formats:
+ error_exit('Incorrect format name set in config for access log file "%s"' % log_path)
+ return log_path, log_formats[format_name]
+
+ # multiple access logs configured, offer to select one
+ print('Multiple access logs detected in configuration:')
+ log_path = choose_one(access_logs.keys(), 'Select access log file to process: ')
+ format_name = access_logs[log_path]
+ if format_name not in log_formats:
+ error_exit('Incorrect format name set in config for access log file "%s"' % log_path)
+ return log_path, log_formats[format_name]
+
+
+def build_pattern(log_format):
+ """
+ Build regular expression to parse given format.
+ :param log_format: format string to parse
+ :return: regular expression to parse given format
+ """
+ if log_format == 'combined':
+ log_format = LOG_FORMAT_COMBINED
+ elif log_format == 'common':
+ log_format = LOG_FORMAT_COMMON
+ pattern = re.sub(REGEX_SPECIAL_CHARS, r'\\\1', log_format)
+ pattern = re.sub(REGEX_LOG_FORMAT_VARIABLE, '(?P<\\1>.*)', pattern)
+ return re.compile(pattern)
+
+
+def extract_variables(log_format):
+ """
+ Extract all variables from a log format string.
+ :param log_format: format string to extract
+ :return: iterator over all variables in given format string
+ """
+ if log_format == 'combined':
+ log_format = LOG_FORMAT_COMBINED
+ for match in re.findall(REGEX_LOG_FORMAT_VARIABLE, log_format):
+ yield match
+
diff --git a/ngxtop/ngxtop.py b/ngxtop/ngxtop.py
index 3355891..6e1bdfe 100755
--- a/ngxtop/ngxtop.py
+++ b/ngxtop/ngxtop.py
@@ -8,7 +8,7 @@ Usage:
Options:
-l <file>, --access-log <file> access log file to parse.
- -f <format>, --log-format <format> log format as specify in log_format directive.
+ -f <format>, --log-format <format> log format as specify in log_format directive. [default: combined]
--no-follow ngxtop default behavior is to ignore current lines in log
and only watch for new lines as they are written to the access log.
Use this flag to tell ngxtop to process the current content of the access log instead.
@@ -29,7 +29,6 @@ Options:
-c <file>, --config <file> allow ngxtop to parse nginx config file for log format and location.
-i <filter-expression>, --filter <filter-expression> filter in, records satisfied given expression are processed.
-p <filter-expression>, --pre-filter <filter-expression> in-filter expression to check in pre-parsing phase.
- -b, --db-dump dump database to disk
Examples:
All examples read nginx config file for access log location and format.
@@ -53,25 +52,19 @@ Examples:
Average body bytes sent of 200 responses of requested path begin with 'foo':
$ ngxtop avg bytes_sent --filter 'status == 200 and request_path.startswith("foo")'
- Analyze output from remote machine using 'common' log format
- $ ssh remote_machine tail -f /var/log/apache2/access.log | ngxtop -f common
-
-Available variables for filters:
- remote_addr, remote_user, time_local, request, status, body_bytes_sent, http_referer, http_user_agent
- (if you use 'common' log format, maybe you have http_x_forwarded_for instead of http_user_agent)
-
+ Analyze apache access log from remote machine using 'common' log format
+ $ ssh remote tail -f /var/log/apache2/access.log | ngxtop -f common
"""
from __future__ import print_function
+import atexit
from contextlib import closing
+import curses
import logging
import os
-import re
import sqlite3
-import subprocess
-import threading
import time
-from datetime import date
import sys
+import signal
try:
import urlparse
@@ -81,15 +74,9 @@ except ImportError:
from docopt import docopt
import tabulate
+from config_parser import detect_log_config, detect_config_path, extract_variables, build_pattern
+from utils import error_exit
-REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])'
-REGEX_LOG_FORMAT_VARIABLE = r'\$([a-z0-9\_]+)'
-LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \
- '"$request" $status $body_bytes_sent ' \
- '"$http_referer" "$http_user_agent"'
-LOG_FORMAT_COMMON = '$remote_addr - $remote_user [$time_local] ' \
- '"$request" $status $body_bytes_sent ' \
- '"$http_x_forwarded_for"'
DEFAULT_QUERIES = [
('Summary:',
@@ -123,86 +110,6 @@ DEFAULT_QUERIES = [
DEFAULT_FIELDS = set(['status_type', 'bytes_sent'])
-# =============================
-# Global variable for dbdump
-# =============================
-processor = None
-
-
-# ====================
-# Nginx utilities
-# ====================
-def get_nginx_conf_path():
- """
- Get nginx conf path based on `nginx -V` output
- """
- proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE)
- stdout, stderr = proc.communicate()
-
- version_output = stderr.decode('utf-8')
- conf_path_match = re.search(r'--conf-path=(\S*)', version_output)
- if conf_path_match is not None:
- return conf_path_match.group(1)
-
- prefix_match = re.search(r'--prefix=(\S*)', version_output)
- if prefix_match is not None:
- return prefix_match.group(1) + '/conf/nginx.conf'
- return '/etc/nginx/nginx.conf'
-
-
-def extract_nginx_conf(path, log_file=None, log_format=None):
- """
- *experimental* read nginx conf file to extract access log file location and format.
- TODO: rewrite this method to:
- - match all access_log directive to get all possible log files
- - for each log file search the correct log_format
- - if more than one log file, offer user to choose which one
- """
- with open(path) as conf_file:
- conf = conf_file.read()
-
- log_format_directive = re.search(r'log_format\s+(\S+)\s+(.*?);', conf, flags=re.DOTALL)
- log_format_name = log_format_directive.group(1) if log_format_directive else 'combined'
- log_format = log_format_directive.group(2) if log_format_directive else 'combined'
-
- # take care of log format in multiple line
- # only most common case, which encapsulate log format in single quote is handled
- if '\n' in log_format:
- log_format = ''.join(line.strip() for line in log_format.split('\n'))
- if log_format.startswith("'"):
- log_format = log_format.replace("'", "")
-
- access_log_directive = re.search(r'access_log\s+(\S+)\s+%s' % log_format_name, conf)
- # Use the log file from config only when not supplied with --access-log option,
- # else it is overwritten everytime.
- if not log_file:
- log_file = access_log_directive.group(1) if access_log_directive else '/var/log/nginx/access.log'
-
- return log_file, log_format
-
-
-def build_pattern(log_format):
- """
- Take an nginx's log format string and return the required regexp pattern to parse the access log
- """
- if log_format == 'combined':
- return build_pattern(LOG_FORMAT_COMBINED)
- elif log_format == 'common':
- return build_pattern(LOG_FORMAT_COMMON)
- pattern = re.sub(REGEX_SPECIAL_CHARS, r'\\\1', log_format)
- pattern = re.sub(REGEX_LOG_FORMAT_VARIABLE, '(?P<\\1>.*)', pattern)
- return re.compile(pattern)
-
-
-def extract_variables(log_format):
- if log_format == 'combined':
- log_format = LOG_FORMAT_COMBINED
- elif log_format == 'common':
- log_format = LOG_FORMAT_COMMON
- for match in re.findall(REGEX_LOG_FORMAT_VARIABLE, log_format):
- yield match
-
-
# ======================
# generator utilities
# ======================
@@ -226,8 +133,11 @@ def map_field(field, func, dict_sequence):
set the result as new value for that key.
"""
for item in dict_sequence:
- item[field] = func(item.get(field, None))
- yield item
+ try:
+ item[field] = func(item.get(field, None))
+ yield item
+ except ValueError:
+ pass
def add_field(field, func, dict_sequence):
@@ -294,7 +204,7 @@ class SQLProcessor(object):
self.index_fields = index_fields if index_fields is not None else []
self.column_list = ','.join(fields)
self.holder_list = ','.join(':%s' % var for var in fields)
- self.conn = sqlite3.connect(':memory:', check_same_thread=False)
+ self.conn = sqlite3.connect(':memory:')
self.init_db()
def process(self, records):
@@ -303,7 +213,6 @@ class SQLProcessor(object):
with closing(self.conn.cursor()) as cursor:
for r in records:
cursor.execute(insert, r)
- return self.count()
def report(self):
if not self.begin:
@@ -354,9 +263,8 @@ def process_log(lines, pattern, processor, arguments):
if filter_exp:
records = (r for r in records if eval(filter_exp, {}, r))
- total = processor.process(records)
- print(processor.report())
- return total
+ processor.process(records)
+ print(processor.report()) # this will only run when start in --no-follow mode
def build_processor(arguments):
@@ -398,7 +306,7 @@ def build_processor(arguments):
def build_source(access_log, arguments):
# constructing log source
- if (access_log == 'stdin'):
+ if access_log == 'stdin':
lines = sys.stdin
elif arguments['--no-follow']:
lines = open(access_log)
@@ -407,77 +315,53 @@ def build_source(access_log, arguments):
return lines
-def build_reporter(processor, arguments):
+def setup_reporter(processor, arguments):
if arguments['--no-follow']:
- return None
+ return
- def report(interval=float(arguments['--interval'])):
- os.system('cls' if os.name == 'nt' else 'clear')
- while True:
- time.sleep(interval)
- output = processor.report()
- os.system('cls' if os.name == 'nt' else 'clear')
- print(output)
+ scr = curses.initscr()
+ atexit.register(curses.endwin)
+
+ def print_report(sig, frame):
+ output = processor.report()
+ scr.erase()
+ try:
+ scr.addstr(output)
+ except curses.error:
+ pass
+ scr.refresh()
- thread = threading.Thread(target=report)
- thread.daemon = True
- return thread
+ signal.signal(signal.SIGALRM, print_report)
+ interval = float(arguments['--interval'])
+ signal.setitimer(signal.ITIMER_REAL, 0.1, interval)
def process(arguments):
- global processor
access_log = arguments['--access-log']
log_format = arguments['--log-format']
- if not access_log and not sys.stdin.isatty():
+ if access_log is None and not sys.stdin.isatty():
+ # assume logs can be fetched directly from stdin when piped
access_log = 'stdin'
- else:
- if access_log is None or log_format is None:
- config = arguments['--config']
- if config is None:
- config = get_nginx_conf_path()
- access_log, log_format = extract_nginx_conf(config, access_log)
- else:
- config = None
-
- # Maybe nginx is not installed, so we'll fix a default log format if not defined here
- if log_format is None:
- log_format = 'combined'
+ if access_log is None:
+ access_log, log_format = detect_log_config(arguments)
logging.info('access_log: %s', access_log)
logging.info('log_format: %s', log_format)
+ if not os.path.exists(access_log):
+ error_exit('access log file "%s" does not exist' % access_log)
if arguments['info']:
- print('configuration file:\n ', config)
+ print('nginx configuration file:\n ', detect_config_path())
print('access log file:\n ', access_log)
print('access log format:\n ', log_format)
print('available variables:\n ', ', '.join(sorted(extract_variables(log_format))))
return
- begin = time.time()
source = build_source(access_log, arguments)
pattern = build_pattern(log_format)
processor = build_processor(arguments)
- reporter = build_reporter(processor, arguments)
- if reporter is not None:
- reporter.start()
- total = process_log(source, pattern, processor, arguments)
- duration = time.time() - begin
- logging.info('Processed %d lines in %.3f seconds, %.2f lines/sec.', total, duration, total / duration)
-
-
-# ================
-# Database dump
-# ================
-def dbdump():
- """
- *experimental* if requested, database is dumped to a file when script is interrupted from keyboard
- Filename is composed from current date and process id
- """
- dbfile = "{}_{}.sql".format(date.today().strftime("%Y%m%d"), os.getpid())
- logging.info("Database dump: %s", dbfile)
- with open(dbfile, 'w') as f:
- for line in processor.conn.iterdump():
- f.write('%s\n' % line)
+ setup_reporter(processor, arguments)
+ process_log(source, pattern, processor, arguments)
def main():
@@ -494,8 +378,6 @@ def main():
try:
process(args)
except KeyboardInterrupt:
- if args['--db-dump']:
- dbdump()
sys.exit(0)
diff --git a/ngxtop/utils.py b/ngxtop/utils.py
new file mode 100644
index 0000000..ef61072
--- /dev/null
+++ b/ngxtop/utils.py
@@ -0,0 +1,19 @@
+import sys
+
+
+def choose_one(choices, prompt):
+ for idx, choice in enumerate(choices):
+ print('%d. %s' % (idx + 1, choice))
+ selected = None
+ while not selected or selected <= 0 or selected > len(choices):
+ selected = raw_input(prompt)
+ try:
+ selected = int(selected)
+ except ValueError:
+ selected = None
+ return choices[selected - 1]
+
+
+def error_exit(msg, status=1):
+ sys.stderr.write('Error: %s\n' % msg)
+ sys.exit(status) \ No newline at end of file
diff --git a/setup.py b/setup.py
index d7e6c35..8f86be9 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@ setup(
keywords='cli monitoring nginx system',
packages=['ngxtop'],
- install_requires=['docopt', 'tabulate'],
+ install_requires=['docopt', 'tabulate', 'pyparsing'],
entry_points={
'console_scripts': [
diff --git a/tests/test_config_parser.py b/tests/test_config_parser.py
new file mode 100644
index 0000000..3b6504b
--- /dev/null
+++ b/tests/test_config_parser.py
@@ -0,0 +1,60 @@
+from ngxtop import config_parser
+
+
+def test_get_log_formats():
+ config = '''
+ http {
+ # ubuntu default, log_format on multiple lines
+ log_format main '$remote_addr - $remote_user [$time_local] "$request" '
+ "$status $body_bytes_sent '$http_referer' "
+ '"$http_user_agent" "$http_x_forwarded_for"';
+
+ # name can also be quoted, and format don't always have to
+ log_format 'te st' $remote_addr;
+ }
+ '''
+ formats = dict(config_parser.get_log_formats(config))
+ assert 'main' in formats
+ assert "'$http_referer'" in formats['main']
+ assert 'te st' in formats
+
+
+def test_get_access_logs_no_format():
+ config = '''
+ http {
+ # ubuntu default
+ access_log /var/log/nginx/access.log;
+
+ # syslog is a valid access log, but we can't follow it
+ access_log syslog:server=address combined;
+
+ # commented
+ # access_log commented;
+
+ server {
+ location / {
+ # has parameter with default format
+ access_log /path/to/log gzip=1;
+ }
+ }
+ }
+ '''
+ logs = dict(config_parser.get_access_logs(config))
+ assert len(logs) == 2
+ assert logs['/var/log/nginx/access.log'] == 'combined'
+ assert logs['/path/to/log'] == 'combined'
+
+
+def test_access_logs_with_format_name():
+ config = '''
+ http {
+ access_log /path/to/main.log main gzip=5 buffer=32k flush=1m;
+ server {
+ access_log /path/to/test.log 'te st';
+ }
+ }
+ '''
+ logs = dict(config_parser.get_access_logs(config))
+ assert len(logs) == 2
+ assert logs['/path/to/main.log'] == 'main'
+ assert logs['/path/to/test.log'] == 'te st'