#!/usr/bin/env python
# Copyright (C) 2012-2014 Harel Ben-Attia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details (doc/LICENSE contains
# a copy of it)
#
#
# Name : q (With respect to The Q Continuum)
# Author : Harel Ben Attia - harelba@gmail.com, harelba @ github, @harelba on twitter
# Requires : python with sqlite3 (standard in python>=2.6)
#
#
# q allows performing SQL-like statements on tabular text data.
#
# Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line.
#
# Full Documentation and details in http://harelba.github.io/q/
#
# Run with --help for command line details
#
q_version = "1.5.0" # not released yet
import os
import sys
import sqlite3
import gzip
import glob
from optparse import OptionParser,OptionGroup
import traceback as tb
import codecs
import locale
import time
import re
from ConfigParser import ConfigParser
import traceback
import csv
import hashlib
DEBUG = False
def get_stdout_encoding(encoding_override=None):
if encoding_override is not None and encoding_override != 'none':
return encoding_override
if sys.stdout.isatty():
return sys.stdout.encoding
else:
return locale.getpreferredencoding()
SHOW_SQL = False
p = ConfigParser()
p.read([os.path.expanduser('~/.qrc'), '.qrc'])
def get_option_with_default(p, option_type, option, default):
if not p.has_option('options', option):
return default
if option_type == 'boolean':
return p.getboolean('options', option)
elif option_type == 'int':
return p.getint('options', option)
elif option_type == 'string':
return p.get('options', option)
elif option_type == 'escaped_string':
return p.get('options', option).decode('string-escape')
else:
raise Exception("Unknown option type")
default_beautify = get_option_with_default(p, 'boolean', 'beautify', False)
default_gzipped = get_option_with_default(p, 'boolean', 'gzipped', False)
default_delimiter = get_option_with_default(
p, 'escaped_string', 'delimiter', None)
default_output_delimiter = get_option_with_default(
p, 'escaped_string', 'output_delimiter', None)
default_skip_header = get_option_with_default(p, 'int', 'skip_header', 0)
default_formatting = get_option_with_default(p, 'string', 'formatting', None)
default_encoding = get_option_with_default(p, 'string', 'encoding', 'UTF-8')
default_output_encoding = get_option_with_default(p, 'string', 'encoding', None)
default_query_encoding = get_option_with_default(p, 'string', 'query_encoding', locale.getpreferredencoding())
default_output_header = get_option_with_default(p, 'string', 'output_header', False)
parser = OptionParser(usage="""
q allows performing SQL-like statements on tabular text data.
Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line.
Basic usage is q "<sql like query>" where table names are just regular file names (Use - to read from standard input)
When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN.
Column types are detected automatically. Use -A in order to see the column name/type analysis.
Delimiter can be set using the -d (or -t) option. Output delimiter can be set using -D
All sqlite3 SQL constructs are supported.
Examples:
Example 1: ls -ltrd * | q "select c1,count(1) from - group by c1"
This example would print a count of each unique permission string in the current folder.
Example 2: seq 1 1000 | q "select avg(c1),sum(c1) from -"
This example would provide the average and the sum of the numbers in the range 1 to 1000
Example 3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc"
This example will output the total size in MB per user+group in the /tmp subtree
See the help or https://github.com/harelba/q/ for more details.
""")
#-----------------------------------------------
parser.add_option("-v", "--version", dest="version", default=False, action="store_true",
help="Print version")
#-----------------------------------------------
input_data_option_group = OptionGroup(parser,"Input Data Options")
input_data_option_group.add_option("-H", "--skip-header", dest="skip_header", default=default_skip_header, action="store_true",
help="Skip header row. This has been changed from earlier version - Only one header row is supported, and the header row is used for column naming")
input_data_option_group.add_option("-d", "--delimiter", dest="delimiter", default=default_delimiter,
help