From c0ca54525004de3308cc27077ff907d5f5c1cd09 Mon Sep 17 00:00:00 2001 From: Binh Le Date: Mon, 24 Mar 2014 23:22:42 +0700 Subject: Restructured and prepared for packaging --- .gitignore | 4 + LICENSE.txt | 2 +- README.md | 92 ----------- README.rst | 101 ++++++++++++ ngxtop.py | 442 ---------------------------------------------------- ngxtop/__init__.py | 1 + ngxtop/ngxtop.py | 446 +++++++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 - setup.cfg | 0 setup.py | 34 ++++ 10 files changed, 587 insertions(+), 537 deletions(-) create mode 100644 .gitignore delete mode 100644 README.md create mode 100644 README.rst delete mode 100755 ngxtop.py create mode 100644 ngxtop/__init__.py create mode 100755 ngxtop/ngxtop.py delete mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57075a8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.py[cod] +.idea +.vagrant +Vagrantfile diff --git a/LICENSE.txt b/LICENSE.txt index 85a8a8a..15a483d 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) [year] [fullname] +Copyright (c) 2014 Binh Le. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md deleted file mode 100644 index 237c414..0000000 --- a/README.md +++ /dev/null @@ -1,92 +0,0 @@ -#`ngxtop` - know what is happening to your nginx server in real-time. - -**ngxtop** parses your nginx access log and outputs useful, `top`-like, metrics of your nginx server. - - -## Usage -``` -Usage: - ngxtop [options] - ngxtop [options] (print|top|avg|sum) - ngxtop info - -Options: - -l , --access-log access log file to parse. - -f , --log-format log format as specify in log_format directive. - --no-follow ngxtop default behavior is to ignore current lines in log - and only watch for new lines as they are written to the access log. - Use this flag to tell ngxtop to process the current content of the access log instead. - -t , --interval report interval when running in follow mode [default: 2.0] - - -g , --group-by group by variable [default: request_path] - -w , --having having clause [default: 1] - -o , --order-by order of output for default query [default: count] - -n , --limit limit the number of records included in report for top command [default: 10] - -a ..., --a ... add exp (must be aggregation exp: sum, avg, min, max, etc.) into output - - -v, --verbose more verbose output - -d, --debug print every line and parsed record - -h, --help print this help message. - --version print version information. -``` - -## Sample output - -###Default output - -``` -$ ./ngxtop.py -running for 411 seconds, 64332 records processed: 156.60 req/sec - -Summary: -| count | avg_bytes_sent | 2xx | 3xx | 4xx | 5xx | -|---------+------------------+-------+-------+-------+-------| -| 64332 | 2775.251 | 61262 | 2994 | 71 | 5 | - -Detailed: -| request_path | count | avg_bytes_sent | 2xx | 3xx | 4xx | 5xx | -|------------------------------------------+---------+------------------+-------+-------+-------+-------| -| /abc/xyz/xxxx | 20946 | 434.693 | 20935 | 0 | 11 | 0 | -| /xxxxx.json | 5633 | 1483.723 | 5633 | 0 | 0 | 0 | -| /xxxxx/xxx/xxxxxxxxxxxxx | 3629 | 6835.499 | 3626 | 0 | 3 | 0 | -| /xxxxx/xxx/xxxxxxxx | 3627 | 15971.885 | 3623 | 0 | 4 | 0 | -| /xxxxx/xxx/xxxxxxx | 3624 | 7830.236 | 3621 | 0 | 3 | 0 | -| /static/js/minified/utils.min.js | 3031 | 1781.155 | 2104 | 927 | 0 | 0 | -| /static/js/minified/xxxxxxx.min.v1.js | 2889 | 2210.235 | 2068 | 821 | 0 | 0 | -| /static/tracking/js/xxxxxxxx.js | 2594 | 1325.681 | 1927 | 667 | 0 | 0 | -| /xxxxx/xxx.html | 2521 | 573.597 | 2520 | 0 | 1 | 0 | -| /xxxxx/xxxx.json | 1840 | 800.542 | 1839 | 0 | 1 | 0 | -``` - -###View top source IPs of clients - -``` -$ ./ngxtop.py top remote_addr -running for 20 seconds, 3215 records processed: 159.62 req/sec - -top remote_addr -| remote_addr | count | -|-----------------+---------| -| 118.173.177.161 | 20 | -| 110.78.145.3 | 16 | -| 171.7.153.7 | 16 | -| 180.183.67.155 | 16 | -| 183.89.65.9 | 16 | -| 202.28.182.5 | 16 | -| 1.47.170.12 | 15 | -| 119.46.184.2 | 15 | -| 125.26.135.219 | 15 | -| 125.26.213.203 | 15 | -``` - -###List 4xx or 5xx responses together with HTTP referer - -``` -$ ./ngxtop.py -i 'status >= 400' print request status http_referer -running for 2 seconds, 28 records processed: 13.95 req/sec - -request, status, http_referer: -| request | status | http_referer | -|-----------+----------+----------------| -| - | 400 | - | -``` diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..97c3568 --- /dev/null +++ b/README.rst @@ -0,0 +1,101 @@ +=================================================== +``ngxtop`` - **real-time** metrics for nginx server +=================================================== + +**ngxtop** parses your nginx access log and outputs useful, ``top``-like, metrics of your nginx server. +So you can tell what is happening with your server in real-time. + + +Usage +----- + +:: + + Usage: + ngxtop [options] + ngxtop [options] (print|top|avg|sum) + ngxtop info + + Options: + -l , --access-log access log file to parse. + -f , --log-format log format as specify in log_format directive. + --no-follow ngxtop default behavior is to ignore current lines in log + and only watch for new lines as they are written to the access log. + Use this flag to tell ngxtop to process the current content of the access log instead. + -t , --interval report interval when running in follow mode [default: 2.0] + + -g , --group-by group by variable [default: request_path] + -w , --having having clause [default: 1] + -o , --order-by order of output for default query [default: count] + -n , --limit limit the number of records included in report for top command [default: 10] + -a ..., --a ... add exp (must be aggregation exp: sum, avg, min, max, etc.) into output + + -v, --verbose more verbose output + -d, --debug print every line and parsed record + -h, --help print this help message. + --version print version information. + +Samples +------- + +Default output +~~~~~~~~~~~~~~ + +:: + + $ ./ngxtop.py + running for 411 seconds, 64332 records processed: 156.60 req/sec + + Summary: + | count | avg_bytes_sent | 2xx | 3xx | 4xx | 5xx | + |---------+------------------+-------+-------+-------+-------| + | 64332 | 2775.251 | 61262 | 2994 | 71 | 5 | + + Detailed: + | request_path | count | avg_bytes_sent | 2xx | 3xx | 4xx | 5xx | + |------------------------------------------+---------+------------------+-------+-------+-------+-------| + | /abc/xyz/xxxx | 20946 | 434.693 | 20935 | 0 | 11 | 0 | + | /xxxxx.json | 5633 | 1483.723 | 5633 | 0 | 0 | 0 | + | /xxxxx/xxx/xxxxxxxxxxxxx | 3629 | 6835.499 | 3626 | 0 | 3 | 0 | + | /xxxxx/xxx/xxxxxxxx | 3627 | 15971.885 | 3623 | 0 | 4 | 0 | + | /xxxxx/xxx/xxxxxxx | 3624 | 7830.236 | 3621 | 0 | 3 | 0 | + | /static/js/minified/utils.min.js | 3031 | 1781.155 | 2104 | 927 | 0 | 0 | + | /static/js/minified/xxxxxxx.min.v1.js | 2889 | 2210.235 | 2068 | 821 | 0 | 0 | + | /static/tracking/js/xxxxxxxx.js | 2594 | 1325.681 | 1927 | 667 | 0 | 0 | + | /xxxxx/xxx.html | 2521 | 573.597 | 2520 | 0 | 1 | 0 | + | /xxxxx/xxxx.json | 1840 | 800.542 | 1839 | 0 | 1 | 0 | + +View top source IPs of clients +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + $ ./ngxtop.py top remote_addr + running for 20 seconds, 3215 records processed: 159.62 req/sec + + top remote_addr + | remote_addr | count | + |-----------------+---------| + | 118.173.177.161 | 20 | + | 110.78.145.3 | 16 | + | 171.7.153.7 | 16 | + | 180.183.67.155 | 16 | + | 183.89.65.9 | 16 | + | 202.28.182.5 | 16 | + | 1.47.170.12 | 15 | + | 119.46.184.2 | 15 | + | 125.26.135.219 | 15 | + | 125.26.213.203 | 15 | + +List 4xx or 5xx responses together with HTTP referer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:: + + $ ./ngxtop.py -i 'status >= 400' print request status http_referer + running for 2 seconds, 28 records processed: 13.95 req/sec + + request, status, http_referer: + | request | status | http_referer | + |-----------+----------+----------------| + | - | 400 | - | diff --git a/ngxtop.py b/ngxtop.py deleted file mode 100755 index 6e1cf48..0000000 --- a/ngxtop.py +++ /dev/null @@ -1,442 +0,0 @@ -#!/usr/bin/env python - -"""ngxtop - ad-hoc query for nginx access log. - -Usage: - ngxtop [options] - ngxtop [options] (print|top|avg|sum) ... - ngxtop info - ngxtop [options] query ... - -Options: - -l , --access-log access log file to parse. - -f , --log-format log format as specify in log_format directive. - --no-follow ngxtop default behavior is to ignore current lines in log - and only watch for new lines as they are written to the access log. - Use this flag to tell ngxtop to process the current content of the access log instead. - -t , --interval report interval when running in follow mode [default: 2.0] - - -g , --group-by group by variable [default: request_path] - -w , --having having clause [default: 1] - -o , --order-by order of output for default query [default: count] - -n , --limit limit the number of records included in report for top command [default: 10] - -a ..., --a ... add exp (must be aggregation exp: sum, avg, min, max, etc.) into output - - -v, --verbose more verbose output - -d, --debug print every line and parsed record - -h, --help print this help message. - --version print version information. - - Advanced / experimental options: - -c , --config allow ngxtop to parse nginx config file for log format and location. - -i , --filter filter in, records satisfied given expression are processed. - -p , --pre-filter in-filter expression to check in pre-parsing phase. - -Examples: - All examples read nginx config file for access log location and format. - If you want to specify the access log file and / or log format, use the -f and -a options. - - "top" like view of nginx requests - $ ngxtop - - Top 10 requested path with status 404: - $ ngxtop top request_path --filter 'status == 404' - - Top 10 requests with highest total bytes sent - $ ngxtop --order-by 'avg(bytes_sent) * count' - - Top 10 remote address, e.g., who's hitting you the most - $ ngxtop --group-by remote_addr - - Print requests with 4xx or 5xx status, together with status and http referer - $ ngxtop -i 'status >= 400' print request status http_referer - - Average body bytes sent of 200 responses of requested path begin with 'foo': - $ ngxtop avg bytes_sent --filter 'status == 200 and request_path.startswith("foo")' -""" -from contextlib import closing -import logging -import os -import re -import sqlite3 -import subprocess -import threading -import urlparse -import time -import sys - -from docopt import docopt -import tabulate - - -REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])' -REGEX_LOG_FORMAT_VARIABLE = r'\$([a-z0-9\_]+)' -LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \ - '"$request" $status $body_bytes_sent ' \ - '"$http_referer" "$http_user_agent"' - -DEFAULT_QUERIES = [ - ('Summary:', - '''SELECT - count(1) AS count, - avg(bytes_sent) AS avg_bytes_sent, - count(CASE WHEN status_type = 2 THEN 1 END) AS '2xx', - count(CASE WHEN status_type = 3 THEN 1 END) AS '3xx', - count(CASE WHEN status_type = 4 THEN 1 END) AS '4xx', - count(CASE WHEN status_type = 5 THEN 1 END) AS '5xx' - FROM log - ORDER BY %(--order-by)s DESC - LIMIT %(--limit)s'''), - - ('Detailed:', - '''SELECT - %(--group-by)s, - count(1) AS count, - avg(bytes_sent) AS avg_bytes_sent, - count(CASE WHEN status_type = 2 THEN 1 END) AS '2xx', - count(CASE WHEN status_type = 3 THEN 1 END) AS '3xx', - count(CASE WHEN status_type = 4 THEN 1 END) AS '4xx', - count(CASE WHEN status_type = 5 THEN 1 END) AS '5xx' - FROM log - GROUP BY %(--group-by)s - HAVING %(--having)s - ORDER BY %(--order-by)s DESC - LIMIT %(--limit)s''') -] - -DEFAULT_FIELDS = set(['status_type', 'bytes_sent']) - - -# ==================== -# Nginx utilities -# ==================== -def get_nginx_conf_path(): - """ - Get nginx conf path based on `nginx -V` output - """ - proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE) - stdout, stderr = proc.communicate() - - conf_path_match = re.search(r'--conf-path=(\S*)', stderr) - if conf_path_match is not None: - return conf_path_match.group(1) - - prefix_match = re.search(r'--prefix=(\S*)', stderr) - if prefix_match is not None: - return prefix_match.group(1) + '/conf/nginx.conf' - return '/etc/nginx/nginx.conf' - - -def extract_nginx_conf(path, log_file=None, log_format=None): - """ - *experimental* read nginx conf file to extract access log file location and format. - TODO: rewrite this method to: - - match all access_log directive to get all possible log files - - for each log file search the correct log_format - - if more than one log file, offer user to choose which one - """ - with open(path) as conf_file: - conf = conf_file.read() - - log_format_directive = re.search(r'log_format\s+(\S+)\s+(.*?);', conf, flags=re.DOTALL) - log_format_name = log_format_directive.group(1) if log_format_directive else 'combined' - log_format = log_format_directive.group(2) if log_format_directive else 'combined' - - # take care of log format in multiple line - # only most common case, which encapsulate log format in single quote is handled - if '\n' in log_format: - log_format = ''.join(line.strip() for line in log_format.split('\n')) - if log_format.startswith("'"): - log_format = log_format.replace("'", "") - - access_log_directive = re.search(r'access_log\s+(\S+)\s+%s' % log_format_name, conf) - # Use the log file from config only when not supplied with --access-log option, - # else it is overwritten everytime. - if not log_file: - log_file = access_log_directive.group(1) if access_log_directive else 'logs/access.log' - - return log_file, log_format - - -def build_pattern(log_format): - """ - Take an nginx's log format string and return the required regexp pattern to parse the access log - """ - if log_format == 'combined': - return build_pattern(LOG_FORMAT_COMBINED) - pattern = re.sub(REGEX_SPECIAL_CHARS, r'\\\1', log_format) - pattern = re.sub(REGEX_LOG_FORMAT_VARIABLE, '(?P<\\1>.*)', pattern) - return re.compile(pattern) - - -def extract_variables(log_format): - for match in re.findall(REGEX_LOG_FORMAT_VARIABLE, log_format): - yield match - - -# ====================== -# generator utilities -# ====================== -def follow(the_file): - """ - Follow a given file and yield new lines when they are available, like `tail -f`. - """ - with open(the_file) as f: - f.seek(0, 2) # seek to eof - while True: - line = f.readline() - if not line: - time.sleep(0.1) # sleep briefly before trying again - continue - yield line - - -def map_field(field, func, dict_sequence): - """ - Apply given function to value of given key in every dictionary in sequence and - set the result as new value for that key. - """ - for item in dict_sequence: - item[field] = func(item.get(field, None)) - yield item - - -def add_field(field, func, dict_sequence): - """ - Apply given function to the record and store result in given field of current record. - Do nothing if record already contains given field. - """ - for item in dict_sequence: - if field not in item: - item[field] = func(item) - yield item - - -def trace(sequence, phase=''): - for item in sequence: - logging.debug('%s:\n%s', phase, item) - yield item - - -# ====================== -# Access log parsing -# ====================== -def parse_request_path(record): - if 'request_uri' in record: - uri = record['request_uri'] - elif 'request' in record: - uri = ' '.join(record['request'].split(' ')[1:-1]) - else: - uri = None - return urlparse.urlparse(uri).path if uri else None - - -def parse_status_type(record): - return record['status'] / 100 if 'status' in record else None - - -def to_int(value): - return int(value) if value and value != '-' else 0 - - -def to_float(value): - return float(value) if value and value != '-' else 0.0 - - -def parse_log(lines, pattern): - matches = (pattern.match(l) for l in lines) - records = (m.groupdict() for m in matches if m is not None) - records = map_field('status', to_int, records) - records = add_field('status_type', parse_status_type, records) - records = add_field('bytes_sent', lambda r: r['body_bytes_sent'], records) - records = map_field('bytes_sent', to_int, records) - records = map_field('request_time', to_float, records) - records = add_field('request_path', parse_request_path, records) - return records - - -# ================================= -# Records and statistic processor -# ================================= -class SQLProcessor(object): - def __init__(self, report_queries, fields, index_fields=None): - self.begin = False - self.report_queries = report_queries - self.index_fields = index_fields if index_fields is not None else [] - self.column_list = ','.join(fields) - self.holder_list = ','.join(':%s' % var for var in fields) - self.conn = sqlite3.connect(':memory:', check_same_thread=False) - self.init_db() - - def process(self, records): - self.begin = time.time() - insert = 'insert into log (%s) values (%s)' % (self.column_list, self.holder_list) - with closing(self.conn.cursor()) as cursor: - for r in records: - cursor.execute(insert, r) - return self.count() - - def report(self): - if not self.begin: - return '' - count = self.count() - duration = time.time() - self.begin - status = 'running for %.0f seconds, %d records processed: %.2f req/sec' - output = [status % (duration, count, count / duration)] - with closing(self.conn.cursor()) as cursor: - for query in self.report_queries: - if isinstance(query, tuple): - label, query = query - else: - label = '' - cursor.execute(query) - columns = (d[0] for d in cursor.description) - result = tabulate.tabulate(cursor.fetchall(), headers=columns, tablefmt='orgtbl', floatfmt='.3f') - output.append('%s\n%s' % (label, result)) - return '\n\n'.join(output) - - def init_db(self): - create_table = 'create table log (%s)' % self.column_list - with closing(self.conn.cursor()) as cursor: - logging.info('sqlite init: %s', create_table) - cursor.execute(create_table) - for idx, field in enumerate(self.index_fields): - sql = 'create index log_idx%d on log (%s)' % (idx, field) - logging.info('sqlite init: %s', sql) - cursor.execute(sql) - - def count(self): - with closing(self.conn.cursor()) as cursor: - cursor.execute('select count(1) from log') - return cursor.fetchone()[0] - - -# =============== -# Log processing -# =============== -def process_log(lines, pattern, processor, arguments): - pre_filer_exp = arguments['--pre-filter'] - if pre_filer_exp: - lines = (line for line in lines if eval(pre_filer_exp, {}, dict(line=line))) - - records = parse_log(lines, pattern) - - filter_exp = arguments['--filter'] - if filter_exp: - records = (r for r in records if eval(filter_exp, {}, r)) - - total = processor.process(records) - print processor.report() - return total - - -def build_processor(arguments): - fields = arguments[''] - if arguments['print']: - label = ', '.join(fields) + ':' - selections = ', '.join(fields) - query = 'select %s from log group by %s' % (selections, selections) - report_queries = [(label, query)] - elif arguments['top']: - limit = int(arguments['--limit']) - report_queries = [] - for var in fields: - label = 'top %s' % var - query = 'select %s, count(1) as count from log group by %s order by count desc limit %d' % (var, var, limit) - report_queries.append((label, query)) - elif arguments['avg']: - label = 'average %s' % fields - selections = ', '.join('avg(%s)' % var for var in fields) - query = 'select %s from log' % selections - report_queries = [(label, query)] - elif arguments['sum']: - label = 'sum %s' % fields - selections = ', '.join('sum(%s)' % var for var in fields) - query = 'select %s from log' % selections - report_queries = [(label, query)] - elif arguments['query']: - report_queries = arguments[''] - fields = arguments[''] - else: - report_queries = [(name, query % arguments) for name, query in DEFAULT_QUERIES] - fields = DEFAULT_FIELDS.union(set([arguments['--group-by']])) - - for label, query in report_queries: - logging.info('query for "%s":\n %s', label, query) - processor = SQLProcessor(report_queries, fields) - return processor - - -def build_source(access_log, arguments): - # constructing log source - if arguments['--no-follow']: - lines = open(access_log) - else: - lines = follow(access_log) - return lines - - -def build_reporter(processor, arguments): - if arguments['--no-follow']: - return None - - def report(interval=float(arguments['--interval'])): - os.system('cls' if os.name == 'nt' else 'clear') - while True: - time.sleep(interval) - output = processor.report() - os.system('cls' if os.name == 'nt' else 'clear') - print output - - thread = threading.Thread(target=report) - thread.daemon = True - return thread - - -def main(arguments): - access_log = arguments['--access-log'] - log_format = arguments['--log-format'] - if access_log is None or log_format is None: - config = arguments['--config'] - if config is None: - config = get_nginx_conf_path() - access_log, log_format = extract_nginx_conf(config, access_log) - else: - config = None - logging.info('access_log: %s', access_log) - logging.info('log_format: %s', log_format) - - if arguments['info']: - print 'configuration file:\n ', config - print 'access log file:\n ', access_log - print 'access log format:\n ', log_format - print 'available variables:\n ', ', '.join(sorted(extract_variables(log_format))) - return - - begin = time.time() - source = build_source(access_log, arguments) - pattern = build_pattern(log_format) - processor = build_processor(arguments) - reporter = build_reporter(processor, arguments) - if reporter is not None: - reporter.start() - total = process_log(source, pattern, processor, arguments) - duration = time.time() - begin - logging.info('Processed %d lines in %.3f seconds, %.2f lines/sec.', total, duration, total / duration) - - -if __name__ == '__main__': - args = docopt(__doc__, version='xstat 0.1') - - log_level = logging.WARNING - if args['--verbose']: - log_level = logging.INFO - if args['--debug']: - log_level = logging.DEBUG - logging.basicConfig(level=log_level, format='%(levelname)s: %(message)s') - logging.debug('arguments:\n%s', args) - - try: - main(args) - except KeyboardInterrupt: - sys.exit(0) diff --git a/ngxtop/__init__.py b/ngxtop/__init__.py new file mode 100644 index 0000000..7ec1fc3 --- /dev/null +++ b/ngxtop/__init__.py @@ -0,0 +1 @@ +from ngxtop import main \ No newline at end of file diff --git a/ngxtop/ngxtop.py b/ngxtop/ngxtop.py new file mode 100755 index 0000000..b4fd0d5 --- /dev/null +++ b/ngxtop/ngxtop.py @@ -0,0 +1,446 @@ +#!/usr/bin/env python + +"""ngxtop - ad-hoc query for nginx access log. + +Usage: + ngxtop [options] + ngxtop [options] (print|top|avg|sum) ... + ngxtop info + ngxtop [options] query ... + +Options: + -l , --access-log access log file to parse. + -f , --log-format log format as specify in log_format directive. + --no-follow ngxtop default behavior is to ignore current lines in log + and only watch for new lines as they are written to the access log. + Use this flag to tell ngxtop to process the current content of the access log instead. + -t , --interval report interval when running in follow mode [default: 2.0] + + -g , --group-by group by variable [default: request_path] + -w , --having having clause [default: 1] + -o , --order-by order of output for default query [default: count] + -n , --limit limit the number of records included in report for top command [default: 10] + -a ..., --a ... add exp (must be aggregation exp: sum, avg, min, max, etc.) into output + + -v, --verbose more verbose output + -d, --debug print every line and parsed record + -h, --help print this help message. + --version print version information. + + Advanced / experimental options: + -c , --config allow ngxtop to parse nginx config file for log format and location. + -i , --filter filter in, records satisfied given expression are processed. + -p , --pre-filter in-filter expression to check in pre-parsing phase. + +Examples: + All examples read nginx config file for access log location and format. + If you want to specify the access log file and / or log format, use the -f and -a options. + + "top" like view of nginx requests + $ ngxtop + + Top 10 requested path with status 404: + $ ngxtop top request_path --filter 'status == 404' + + Top 10 requests with highest total bytes sent + $ ngxtop --order-by 'avg(bytes_sent) * count' + + Top 10 remote address, e.g., who's hitting you the most + $ ngxtop --group-by remote_addr + + Print requests with 4xx or 5xx status, together with status and http referer + $ ngxtop -i 'status >= 400' print request status http_referer + + Average body bytes sent of 200 responses of requested path begin with 'foo': + $ ngxtop avg bytes_sent --filter 'status == 200 and request_path.startswith("foo")' +""" +from contextlib import closing +import logging +import os +import re +import sqlite3 +import subprocess +import threading +import urlparse +import time +import sys + +from docopt import docopt +import tabulate + + +REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])' +REGEX_LOG_FORMAT_VARIABLE = r'\$([a-z0-9\_]+)' +LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \ + '"$request" $status $body_bytes_sent ' \ + '"$http_referer" "$http_user_agent"' + +DEFAULT_QUERIES = [ + ('Summary:', + '''SELECT + count(1) AS count, + avg(bytes_sent) AS avg_bytes_sent, + count(CASE WHEN status_type = 2 THEN 1 END) AS '2xx', + count(CASE WHEN status_type = 3 THEN 1 END) AS '3xx', + count(CASE WHEN status_type = 4 THEN 1 END) AS '4xx', + count(CASE WHEN status_type = 5 THEN 1 END) AS '5xx' + FROM log + ORDER BY %(--order-by)s DESC + LIMIT %(--limit)s'''), + + ('Detailed:', + '''SELECT + %(--group-by)s, + count(1) AS count, + avg(bytes_sent) AS avg_bytes_sent, + count(CASE WHEN status_type = 2 THEN 1 END) AS '2xx', + count(CASE WHEN status_type = 3 THEN 1 END) AS '3xx', + count(CASE WHEN status_type = 4 THEN 1 END) AS '4xx', + count(CASE WHEN status_type = 5 THEN 1 END) AS '5xx' + FROM log + GROUP BY %(--group-by)s + HAVING %(--having)s + ORDER BY %(--order-by)s DESC + LIMIT %(--limit)s''') +] + +DEFAULT_FIELDS = set(['status_type', 'bytes_sent']) + + +# ==================== +# Nginx utilities +# ==================== +def get_nginx_conf_path(): + """ + Get nginx conf path based on `nginx -V` output + """ + proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + + conf_path_match = re.search(r'--conf-path=(\S*)', stderr) + if conf_path_match is not None: + return conf_path_match.group(1) + + prefix_match = re.search(r'--prefix=(\S*)', stderr) + if prefix_match is not None: + return prefix_match.group(1) + '/conf/nginx.conf' + return '/etc/nginx/nginx.conf' + + +def extract_nginx_conf(path, log_file=None, log_format=None): + """ + *experimental* read nginx conf file to extract access log file location and format. + TODO: rewrite this method to: + - match all access_log directive to get all possible log files + - for each log file search the correct log_format + - if more than one log file, offer user to choose which one + """ + with open(path) as conf_file: + conf = conf_file.read() + + log_format_directive = re.search(r'log_format\s+(\S+)\s+(.*?);', conf, flags=re.DOTALL) + log_format_name = log_format_directive.group(1) if log_format_directive else 'combined' + log_format = log_format_directive.group(2) if log_format_directive else 'combined' + + # take care of log format in multiple line + # only most common case, which encapsulate log format in single quote is handled + if '\n' in log_format: + log_format = ''.join(line.strip() for line in log_format.split('\n')) + if log_format.startswith("'"): + log_format = log_format.replace("'", "") + + access_log_directive = re.search(r'access_log\s+(\S+)\s+%s' % log_format_name, conf) + # Use the log file from config only when not supplied with --access-log option, + # else it is overwritten everytime. + if not log_file: + log_file = access_log_directive.group(1) if access_log_directive else 'logs/access.log' + + return log_file, log_format + + +def build_pattern(log_format): + """ + Take an nginx's log format string and return the required regexp pattern to parse the access log + """ + if log_format == 'combined': + return build_pattern(LOG_FORMAT_COMBINED) + pattern = re.sub(REGEX_SPECIAL_CHARS, r'\\\1', log_format) + pattern = re.sub(REGEX_LOG_FORMAT_VARIABLE, '(?P<\\1>.*)', pattern) + return re.compile(pattern) + + +def extract_variables(log_format): + for match in re.findall(REGEX_LOG_FORMAT_VARIABLE, log_format): + yield match + + +# ====================== +# generator utilities +# ====================== +def follow(the_file): + """ + Follow a given file and yield new lines when they are available, like `tail -f`. + """ + with open(the_file) as f: + f.seek(0, 2) # seek to eof + while True: + line = f.readline() + if not line: + time.sleep(0.1) # sleep briefly before trying again + continue + yield line + + +def map_field(field, func, dict_sequence): + """ + Apply given function to value of given key in every dictionary in sequence and + set the result as new value for that key. + """ + for item in dict_sequence: + item[field] = func(item.get(field, None)) + yield item + + +def add_field(field, func, dict_sequence): + """ + Apply given function to the record and store result in given field of current record. + Do nothing if record already contains given field. + """ + for item in dict_sequence: + if field not in item: + item[field] = func(item) + yield item + + +def trace(sequence, phase=''): + for item in sequence: + logging.debug('%s:\n%s', phase, item) + yield item + + +# ====================== +# Access log parsing +# ====================== +def parse_request_path(record): + if 'request_uri' in record: + uri = record['request_uri'] + elif 'request' in record: + uri = ' '.join(record['request'].split(' ')[1:-1]) + else: + uri = None + return urlparse.urlparse(uri).path if uri else None + + +def parse_status_type(record): + return record['status'] / 100 if 'status' in record else None + + +def to_int(value): + return int(value) if value and value != '-' else 0 + + +def to_float(value): + return float(value) if value and value != '-' else 0.0 + + +def parse_log(lines, pattern): + matches = (pattern.match(l) for l in lines) + records = (m.groupdict() for m in matches if m is not None) + records = map_field('status', to_int, records) + records = add_field('status_type', parse_status_type, records) + records = add_field('bytes_sent', lambda r: r['body_bytes_sent'], records) + records = map_field('bytes_sent', to_int, records) + records = map_field('request_time', to_float, records) + records = add_field('request_path', parse_request_path, records) + return records + + +# ================================= +# Records and statistic processor +# ================================= +class SQLProcessor(object): + def __init__(self, report_queries, fields, index_fields=None): + self.begin = False + self.report_queries = report_queries + self.index_fields = index_fields if index_fields is not None else [] + self.column_list = ','.join(fields) + self.holder_list = ','.join(':%s' % var for var in fields) + self.conn = sqlite3.connect(':memory:', check_same_thread=False) + self.init_db() + + def process(self, records): + self.begin = time.time() + insert = 'insert into log (%s) values (%s)' % (self.column_list, self.holder_list) + with closing(self.conn.cursor()) as cursor: + for r in records: + cursor.execute(insert, r) + return self.count() + + def report(self): + if not self.begin: + return '' + count = self.count() + duration = time.time() - self.begin + status = 'running for %.0f seconds, %d records processed: %.2f req/sec' + output = [status % (duration, count, count / duration)] + with closing(self.conn.cursor()) as cursor: + for query in self.report_queries: + if isinstance(query, tuple): + label, query = query + else: + label = '' + cursor.execute(query) + columns = (d[0] for d in cursor.description) + result = tabulate.tabulate(cursor.fetchall(), headers=columns, tablefmt='orgtbl', floatfmt='.3f') + output.append('%s\n%s' % (label, result)) + return '\n\n'.join(output) + + def init_db(self): + create_table = 'create table log (%s)' % self.column_list + with closing(self.conn.cursor()) as cursor: + logging.info('sqlite init: %s', create_table) + cursor.execute(create_table) + for idx, field in enumerate(self.index_fields): + sql = 'create index log_idx%d on log (%s)' % (idx, field) + logging.info('sqlite init: %s', sql) + cursor.execute(sql) + + def count(self): + with closing(self.conn.cursor()) as cursor: + cursor.execute('select count(1) from log') + return cursor.fetchone()[0] + + +# =============== +# Log processing +# =============== +def process_log(lines, pattern, processor, arguments): + pre_filer_exp = arguments['--pre-filter'] + if pre_filer_exp: + lines = (line for line in lines if eval(pre_filer_exp, {}, dict(line=line))) + + records = parse_log(lines, pattern) + + filter_exp = arguments['--filter'] + if filter_exp: + records = (r for r in records if eval(filter_exp, {}, r)) + + total = processor.process(records) + print processor.report() + return total + + +def build_processor(arguments): + fields = arguments[''] + if arguments['print']: + label = ', '.join(fields) + ':' + selections = ', '.join(fields) + query = 'select %s from log group by %s' % (selections, selections) + report_queries = [(label, query)] + elif arguments['top']: + limit = int(arguments['--limit']) + report_queries = [] + for var in fields: + label = 'top %s' % var + query = 'select %s, count(1) as count from log group by %s order by count desc limit %d' % (var, var, limit) + report_queries.append((label, query)) + elif arguments['avg']: + label = 'average %s' % fields + selections = ', '.join('avg(%s)' % var for var in fields) + query = 'select %s from log' % selections + report_queries = [(label, query)] + elif arguments['sum']: + label = 'sum %s' % fields + selections = ', '.join('sum(%s)' % var for var in fields) + query = 'select %s from log' % selections + report_queries = [(label, query)] + elif arguments['query']: + report_queries = arguments[''] + fields = arguments[''] + else: + report_queries = [(name, query % arguments) for name, query in DEFAULT_QUERIES] + fields = DEFAULT_FIELDS.union(set([arguments['--group-by']])) + + for label, query in report_queries: + logging.info('query for "%s":\n %s', label, query) + processor = SQLProcessor(report_queries, fields) + return processor + + +def build_source(access_log, arguments): + # constructing log source + if arguments['--no-follow']: + lines = open(access_log) + else: + lines = follow(access_log) + return lines + + +def build_reporter(processor, arguments): + if arguments['--no-follow']: + return None + + def report(interval=float(arguments['--interval'])): + os.system('cls' if os.name == 'nt' else 'clear') + while True: + time.sleep(interval) + output = processor.report() + os.system('cls' if os.name == 'nt' else 'clear') + print output + + thread = threading.Thread(target=report) + thread.daemon = True + return thread + + +def process(arguments): + access_log = arguments['--access-log'] + log_format = arguments['--log-format'] + if access_log is None or log_format is None: + config = arguments['--config'] + if config is None: + config = get_nginx_conf_path() + access_log, log_format = extract_nginx_conf(config, access_log) + else: + config = None + logging.info('access_log: %s', access_log) + logging.info('log_format: %s', log_format) + + if arguments['info']: + print 'configuration file:\n ', config + print 'access log file:\n ', access_log + print 'access log format:\n ', log_format + print 'available variables:\n ', ', '.join(sorted(extract_variables(log_format))) + return + + begin = time.time() + source = build_source(access_log, arguments) + pattern = build_pattern(log_format) + processor = build_processor(arguments) + reporter = build_reporter(processor, arguments) + if reporter is not None: + reporter.start() + total = process_log(source, pattern, processor, arguments) + duration = time.time() - begin + logging.info('Processed %d lines in %.3f seconds, %.2f lines/sec.', total, duration, total / duration) + + +def main(): + args = docopt(__doc__, version='xstat 0.1') + + log_level = logging.WARNING + if args['--verbose']: + log_level = logging.INFO + if args['--debug']: + log_level = logging.DEBUG + logging.basicConfig(level=log_level, format='%(levelname)s: %(message)s') + logging.debug('arguments:\n%s', args) + + try: + process(args) + except KeyboardInterrupt: + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d68ced8..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -docopt -tabulate diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..db45f95 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from setuptools import setup + +setup( + name='ngxtop', + version='0.0.1', + description='Real-time metrics for nginx server', + long_description=open('README.rst').read(), + license='MIT', + + url='https://github.com/lebinh/ngxtop', + author='Binh Le', + author_email='lebinh.it@gmail.com', + + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: MIT License', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: System Administrators', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + ], + keywords='cli monitoring nginx system', + + packages=['ngxtop'], + install_requires=['docopt', 'tabulate'], + + entry_points={ + 'console_scripts': [ + 'ngxtop = ngxtop.ngxtop:main', + ], + }, +) -- cgit v1.2.3