summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbin/q72
-rwxr-xr-xtest/test-suite177
2 files changed, 239 insertions, 10 deletions
diff --git a/bin/q b/bin/q
index fa5e5c3..edbd310 100755
--- a/bin/q
+++ b/bin/q
@@ -47,11 +47,14 @@ import csv
DEBUG = False
-# Encode stdout properly,
-if sys.stdout.isatty():
- STDOUT = codecs.getwriter(sys.stdout.encoding)(sys.stdout)
-else:
- STDOUT = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
+def get_stdout_encoding(encoding_override=None):
+ if encoding_override is not None and encoding_override != 'none':
+ return encoding_override
+
+ if sys.stdout.isatty():
+ return sys.stdout.encoding
+ else:
+ return locale.getpreferredencoding()
SHOW_SQL = False
@@ -82,6 +85,8 @@ default_output_delimiter = get_option_with_default(
default_skip_header = get_option_with_default(p, 'int', 'skip_header', 0)
default_formatting = get_option_with_default(p, 'string', 'formatting', None)
default_encoding = get_option_with_default(p, 'string', 'encoding', 'UTF-8')
+default_output_encoding = get_option_with_default(p, 'string', 'encoding', None)
+default_query_encoding = get_option_with_default(p, 'string', 'query_encoding', locale.getpreferredencoding())
parser = OptionParser(usage="""
q allows performing SQL-like statements on tabular text data.
@@ -116,7 +121,7 @@ parser.add_option("-b", "--beautify", dest="beautify", default=default_beautify,
parser.add_option("-z", "--gzipped", dest="gzipped", default=default_gzipped, action="store_true",
help="Data is gzipped. Useful for reading from stdin. For files, .gz means automatic gunzipping")
parser.add_option("-d", "--delimiter", dest="delimiter", default=default_delimiter,
- help="Field delimiter. If none specified, then space is used as the delimiter. If you need multi-character delimiters, run the tool with engine version 1 by adding '-E v1'. Using v1 will also revert to the old behavior where if no delimiter is provided, then any whitespace will be considered as a delimiter.")
+ help="Field delimiter. If none specified, then space is used as the delimiter.")
parser.add_option("-D", "--output-delimiter", dest="output_delimiter", default=default_output_delimiter,
help="Field delimiter for output. If none specified, then the -d delimiter is used if present, or space if no delimiter is specified")
parser.add_option("-t", "--tab-delimited", dest="tab_delimited", default=False, action="store_true",
@@ -129,6 +134,12 @@ parser.add_option("-f", "--formatting", dest="formatting", default=default_forma
help="Output-level formatting, in the format X=fmt,Y=fmt etc, where X,Y are output column numbers (e.g. 1 for first SELECT column etc.")
parser.add_option("-e", "--encoding", dest="encoding", default=default_encoding,
help="Input file encoding. Defaults to UTF-8. set to none for not setting any encoding - faster, but at your own risk...")
+parser.add_option("-E", "--output-encoding", dest="output_encoding", default=default_output_encoding,
+ help="Output encoding. Defaults to 'none', leading to selecting the system/terminal encoding")
+parser.add_option("-Q", "--query-encoding", dest="query_encoding", default=default_query_encoding,
+ help="query text encoding. Experimental. Please send your feedback on this")
+parser.add_option("-q", "--query-filename", dest="query_filename", default=None,
+ help="Read query from the provided filename instead of the command line, possibly using the provided query encoding (using -Q).")
parser.add_option("-v", "--version", dest="version", default=False, action="store_true",
help="Print version")
parser.add_option("-A", "--analyze-only", dest="analyze_only", action='store_true',
@@ -165,7 +176,7 @@ class Sqlite3DB(object):
def update_many(self, sql, params):
try:
if self.show_sql:
- print sql
+ print sql, " params: " + str(params)
self.cursor.executemany(sql, params)
finally:
pass # cursor.close()
@@ -842,19 +853,57 @@ if options.version:
print "q version %s" % q_version
sys.exit(0)
-if len(args) != 1:
- parser.print_help()
+if len(args) > 1:
+ print >>sys.stderr,"Must provide query as one parameter, enclosed in quotes, or through a file with the -f parameter"
+ sys.exit(1)
+
+if len(args) == 0 and options.query_filename is None:
+ print >>sys.stderr,"Must provide a query in the command line, or through the a file with the -f parameter"
+ sys.exit(1)
+
+if options.query_filename is not None:
+ if len(args) != 0:
+ print >>sys.stderr,"Can't provide both a query file and a query on the command line"
+ sys.exit(1)
+ try:
+ f = file(options.query_filename)
+ query_str = f.read()
+ f.close()
+ except:
+ print >>sys.stderr,"Could not read query from file %s" % options.query_filename
+ sys.exit(1)
+else:
+ query_str = args[0]
+
+if options.query_encoding is not None and options.query_encoding != 'none':
+ try:
+ query_str = query_str.decode(options.query_encoding)
+ except:
+ print >>sys.stderr,"Could not decode query using the provided query encoding (%s)" % options.query_encoding
+ sys.exit(3)
+
+query_str = query_str.strip()
+
+if len(query_str) == 0:
+ print >>sys.stderr,"Query cannot be empty"
sys.exit(1)
if options.mode not in ['fluffy', 'relaxed', 'strict']:
print >>sys.stderr, "Parsing mode can be one of fluffy, relaxed or strict"
sys.exit(13)
+output_encoding = get_stdout_encoding(options.output_encoding)
+try:
+ STDOUT = codecs.getwriter(output_encoding)(sys.stdout)
+except:
+ print >>sys.stderr,"Could not create output stream using output encoding %s" % (output_encoding)
+ sys.exit(200)
+
# Create DB object
db = Sqlite3DB()
# Create SQL statment
-sql_object = Sql('%s' % args[0])
+sql_object = Sql('%s' % query_str)
# If the user flagged for a tab-delimited file then set the delimiter to tab
if options.tab_delimited:
@@ -991,6 +1040,9 @@ try:
row_str.append(fmt_str % "")
STDOUT.write(output_delimiter.join(row_str) + "\n")
+except (UnicodeEncodeError, UnicodeError), e:
+ print >>sys.stderr, "Cannot encode data. Error:%s" % e
+ sys.exit(3)
except IOError, e:
if e.errno == 32:
# broken pipe, that's ok
diff --git a/test/test-suite b/test/test-suite
index fcedb3e..f2df122 100755
--- a/test/test-suite
+++ b/test/test-suite
@@ -17,7 +17,11 @@ import sys
import os
import time
from tempfile import NamedTemporaryFile
+import locale
+# q uses this encoding as the default output encoding. Some of the tests use it in order to
+# make sure that the output is correctly encoded
+SYSTEM_ENCODING = locale.getpreferredencoding()
def run_command(cmd_to_run):
p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True)
@@ -77,6 +81,9 @@ header_row_with_spaces = 'name,value 1,value2'
sample_data_with_spaces_with_header = header_row_with_spaces + \
"\n" + sample_data_with_spaces_no_header
+long_value1 = "23683289372328372328373"
+int_value = "2328372328373"
+sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value)
def one_column_warning(e):
return e[0].startswith('Warning: column count is one')
@@ -421,6 +428,8 @@ class BasicTests(AbstractQTestCase):
self.assertEquals(o[1], 'b')
self.assertEquals(o[2], 'c')
+ self.cleanup(tmpfile)
+
def test_keep_leading_whitespace_in_values(self):
tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header)
cmd = '../bin/q -d , "select c1 from %s" -k' % tmpfile.name
@@ -434,6 +443,8 @@ class BasicTests(AbstractQTestCase):
self.assertEquals(o[1], ' b')
self.assertEquals(o[2], 'c')
+ self.cleanup(tmpfile)
+
def test_no_impact_of_keeping_leading_whitespace_on_integers(self):
tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header)
cmd = '../bin/q -d , "select c2 from %s" -k -A' % tmpfile.name
@@ -448,6 +459,8 @@ class BasicTests(AbstractQTestCase):
self.assertEquals(o[2].strip(), '`c2` - int')
self.assertEquals(o[3].strip(), '`c3` - int')
+ self.cleanup(tmpfile)
+
def test_spaces_in_header_row(self):
tmpfile = self.create_file_with_data(
header_row_with_spaces + "\n" + sample_data_no_header)
@@ -462,6 +475,8 @@ class BasicTests(AbstractQTestCase):
self.assertEquals(o[1], 'b,2')
self.assertEquals(o[2], 'c,')
+ self.cleanup(tmpfile)
+
def test_column_analysis_for_spaces_in_header_row(self):
tmpfile = self.create_file_with_data(
header_row_with_spaces + "\n" + sample_data_no_header)
@@ -477,6 +492,168 @@ class BasicTests(AbstractQTestCase):
self.assertEquals(o[2].strip(), '`value 1` - int')
self.assertEquals(o[3].strip(), '`value2` - int')
+ self.cleanup(tmpfile)
+
+ def test_no_query_in_command_line(self):
+ cmd = '../bin/q -d , ""'
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 1)
+ self.assertEquals(len(e), 1)
+ self.assertEquals(len(o), 0)
+
+ self.assertEquals(e[0],'Query cannot be empty')
+
+ def test_empty_query_in_command_line(self):
+ cmd = '../bin/q -d , " "'
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 1)
+ self.assertEquals(len(e), 1)
+ self.assertEquals(len(o), 0)
+
+ self.assertEquals(e[0],'Query cannot be empty')
+
+ def test_too_many_command_line_parameters(self):
+ cmd = '../bin/q -d , select *'
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 1)
+ self.assertEquals(len(e), 1)
+ self.assertEquals(len(o), 0)
+
+ self.assertTrue(e[0].startswith('Must provide query as one parameter'))
+
+ def test_use_query_file(self):
+ tmp_data_file = self.create_file_with_data(sample_data_with_header)
+ tmp_query_file = self.create_file_with_data("select name from %s" % tmp_data_file.name)
+
+ cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 3)
+
+ self.assertEquals(o[0], 'a')
+ self.assertEquals(o[1], 'b')
+ self.assertEquals(o[2], 'c')
+
+ self.cleanup(tmp_data_file)
+ self.cleanup(tmp_query_file)
+
+ def test_use_query_file_with_incorrect_query_encoding(self):
+ tmp_data_file = self.create_file_with_data(sample_data_with_header)
+ tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)
+
+ cmd = '../bin/q -d , -q %s -H -Q ascii' % tmp_query_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,3)
+ self.assertEquals(len(o),0)
+ self.assertEquals(len(e),1)
+
+ self.assertTrue(e[0].startswith('Could not decode query using the provided query encoding'))
+
+ self.cleanup(tmp_data_file)
+ self.cleanup(tmp_query_file)
+
+ def test_use_query_file_with_query_encoding(self):
+ tmp_data_file = self.create_file_with_data(sample_data_with_header)
+ tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)
+
+ cmd = '../bin/q -d , -q %s -H -Q utf-8' % tmp_query_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 3)
+
+ self.assertEquals(o[0].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d')
+ self.assertEquals(o[1].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d')
+ self.assertEquals(o[2].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d')
+
+ self.cleanup(tmp_data_file)
+ self.cleanup(tmp_query_file)
+
+ def test_use_query_file_and_command_line(self):
+ tmp_data_file = self.create_file_with_data(sample_data_with_header)
+ tmp_query_file = self.create_file_with_data("select name from %s" % tmp_data_file.name)
+
+ cmd = '../bin/q -d , -q %s -H "select * from ppp"' % tmp_query_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 1)
+ self.assertEquals(len(e), 1)
+ self.assertEquals(len(o), 0)
+
+ self.assertTrue(e[0].startswith("Can't provide both a query file and a query on the command line"))
+
+ self.cleanup(tmp_data_file)
+ self.cleanup(tmp_query_file)
+
+ def test_select_output_encoding(self):
+ tmp_data_file = self.create_file_with_data(sample_data_with_header)
+ tmp_query_file = self.create_file_with_data("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)
+
+ for target_encoding in ['utf-8','ibm852']:
+ cmd = '../bin/q -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding)
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 3)
+
+ self.assertEquals(o[0].decode(target_encoding), u'Hr\xe1\u010d')
+ self.assertEquals(o[1].decode(target_encoding), u'Hr\xe1\u010d')
+ self.assertEquals(o[2].decode(target_encoding), u'Hr\xe1\u010d')
+
+ self.cleanup(tmp_data_file)
+ self.cleanup(tmp_query_file)
+
+ def test_select_failed_output_encoding(self):
+ tmp_data_file = self.create_file_with_data(sample_data_with_header)
+ tmp_query_file = self.create_file_with_data("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)
+
+ cmd = '../bin/q -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 3)
+ self.assertEquals(len(e), 1)
+ self.assertEquals(len(o), 0)
+
+ self.assertTrue(e[0].startswith('Cannot encode data'))
+
+ self.cleanup(tmp_data_file)
+ self.cleanup(tmp_query_file)
+
+
+ def test_use_query_file_with_empty_query(self):
+ tmp_query_file = self.create_file_with_data(" ")
+
+ cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 1)
+ self.assertEquals(len(e), 1)
+ self.assertEquals(len(o), 0)
+
+ self.assertTrue(e[0].startswith("Query cannot be empty"))
+
+ self.cleanup(tmp_query_file)
+
+ def test_use_non_existent_query_file(self):
+ cmd = '../bin/q -d , -q non-existent-query-file -H'
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 1)
+ self.assertEquals(len(e), 1)
+ self.assertEquals(len(o), 0)
+
+ self.assertTrue(e[0].startswith("Could not read query from file"))
+
+
+
class ParsingModeTests(AbstractQTestCase):