summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbin/q15
-rwxr-xr-xtest/test-suite245
2 files changed, 256 insertions, 4 deletions
diff --git a/bin/q b/bin/q
index dac5af7..baad15c 100755
--- a/bin/q
+++ b/bin/q
@@ -27,7 +27,7 @@
#
# Run with --help for command line details
#
-q_version = "1.4.1"
+q_version = "1.5.0" # not released yet
import os
import sys
@@ -141,6 +141,10 @@ input_data_option_group.add_option("-c", "--column-count", dest="column_count",
help="Specific column count when using relaxed or strict mode")
input_data_option_group.add_option("-k", "--keep-leading-whitespace", dest="keep_leading_whitespace_in_values", default=False, action="store_true",
help="Keep leading whitespace in values. Default behavior strips leading whitespace off values, in order to provide out-of-the-box usability for simple use cases. If you need to preserve whitespace, use this flag.")
+input_data_option_group.add_option("--disable-double-double-quoting", dest="disable_double_double_quoting", default=True, action="store_false",
+ help="Disable support for double double-quoting for escaping the double quote character. By default, you can use \"\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.")
+input_data_option_group.add_option("--disable-escaped-double-quoting", dest="disable_escaped_double_quoting", default=True, action="store_false",
+ help="Disable support for escaped double-quoting for escaping the double quote character. By default, you can use \\\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.")
parser.add_option_group(input_data_option_group)
#-----------------------------------------------
output_data_option_group = OptionGroup(parser,"Output Options")
@@ -155,6 +159,7 @@ output_data_option_group.add_option("-f", "--formatting", dest="formatting", def
help="Output-level formatting, in the format X=fmt,Y=fmt etc, where X,Y are output column numbers (e.g. 1 for first SELECT column etc.")
output_data_option_group.add_option("-E", "--output-encoding", dest="output_encoding", default=default_output_encoding,
help="Output encoding. Defaults to 'none', leading to selecting the system/terminal encoding")
+# -M will be added here for supporting output quoting mode in the future
parser.add_option_group(output_data_option_group)
#-----------------------------------------------
query_option_group = OptionGroup(parser,"Query Related Options")
@@ -972,7 +977,13 @@ else:
skip_initial_space = True
q_dialect = {'skipinitialspace': skip_initial_space, 'quoting': 0,
- 'delimiter': options.delimiter, 'quotechar': '"', 'doublequote': False}
+ 'delimiter': options.delimiter, 'quotechar': '"' }
+
+q_dialect['doublequote'] = options.disable_double_double_quoting;
+
+if options.disable_escaped_double_quoting:
+ q_dialect['escapechar'] = '\\'
+
csv.register_dialect('q', **q_dialect)
file_reading_method = 'csv'
diff --git a/test/test-suite b/test/test-suite
index 3eaae0d..2212ff8 100755
--- a/test/test-suite
+++ b/test/test-suite
@@ -27,7 +27,7 @@ def run_command(cmd_to_run):
p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True)
o, e = p.communicate()
# remove last newline
- o = o.strip()
+ o = o.rstrip()
e = e.strip()
# split rows
if o != '':
@@ -72,6 +72,26 @@ sample_data_with_empty_string_no_header = "\n".join(
sample_data_with_header = header_row + "\n" + sample_data_no_header
sample_data_with_missing_header_names = "name,value1\n" + sample_data_no_header
+sample_quoted_data = '''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted
+control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6"
+non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline
+ value""." "this is an escaped \\"multiline
+ value\\"."
+control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6"
+'''
+
+double_double_quoted_data = '''regular_double_quoted double_double_quoted
+"this is a quoted value" "this is a quoted value with ""double double quotes"""
+'''
+
+escaped_double_quoted_data = '''regular_double_quoted escaped_double_quoted
+"this is a quoted value" "this is a quoted value with \\"escaped double quotes\\""
+'''
+
+combined_quoted_data = '''regular_double_quoted double_double_quoted escaped_double_quoted
+"this is a quoted value" "this is a quoted value with ""double double quotes""" "this is a quoted value with \\"escaped double quotes\\""
+'''
+
# Values with leading whitespace
sample_data_rows_with_spaces = ['a,1,0', ' b, 2,0', 'c,,0']
sample_data_with_spaces_no_header = "\n".join(
@@ -715,8 +735,229 @@ class BasicTests(AbstractQTestCase):
self.assertTrue(e[0].startswith("Could not read query from file"))
-
+ def test_non_quoted_values_in_quoted_data(self):
+ tmp_data_file = self.create_file_with_data(sample_quoted_data)
+
+ cmd = '../bin/q -d " " "select c1 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),4)
+
+ self.assertTrue(o[0],'non_quoted')
+ self.assertTrue(o[1],'control-value-1')
+ self.assertTrue(o[2],'non-quoted-value')
+ self.assertTrue(o[3],'control-value-1')
+
+ self.cleanup(tmp_data_file)
+
+ def test_regular_quoted_values_in_quoted_data(self):
+ tmp_data_file = self.create_file_with_data(sample_quoted_data)
+
+ cmd = '../bin/q -d " " "select c2 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),4)
+
+ self.assertTrue(o[0],'regular_double_quoted')
+ self.assertTrue(o[1],'control-value-2')
+ self.assertTrue(o[2],'this is a quoted value')
+ self.assertTrue(o[3],'control-value-2')
+
+ self.cleanup(tmp_data_file)
+
+ def test_double_double_quoted_values_in_quoted_data(self):
+ tmp_data_file = self.create_file_with_data(sample_quoted_data)
+
+ cmd = '../bin/q -d " " "select c3 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),4)
+
+ self.assertTrue(o[0],'double_double_quoted')
+ self.assertTrue(o[1],'control-value-3')
+ self.assertTrue(o[2],'this is a "double double" quoted value')
+ self.assertTrue(o[3],'control-value-3')
+
+ self.cleanup(tmp_data_file)
+
+ def test_escaped_double_quoted_values_in_quoted_data(self):
+ tmp_data_file = self.create_file_with_data(sample_quoted_data)
+
+ cmd = '../bin/q -d " " "select c4 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),4)
+
+ self.assertTrue(o[0],'escaped_double_quoted')
+ self.assertTrue(o[1],'control-value-4')
+ self.assertTrue(o[2],'this is an escaped "quoted value"')
+ self.assertTrue(o[3],'control-value-4')
+
+ self.cleanup(tmp_data_file)
+
+ def test_multiline_double_double_quoted_values_in_quoted_data(self):
+ tmp_data_file = self.create_file_with_data(sample_quoted_data)
+
+ # FIXME Need to convert \0a to proper encoding suitable for the person running the tests.
+ cmd = '../bin/q -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),4)
+
+ self.assertTrue(o[0],'multiline_double_double_quoted')
+ self.assertTrue(o[1],'control-value-5')
+ self.assertTrue(o[2],'this is a double double quoted "multiline\n value".')
+ self.assertTrue(o[3],'control-value-5')
+
+ self.cleanup(tmp_data_file)
+
+ def test_multiline_escaped_double_quoted_values_in_quoted_data(self):
+ tmp_data_file = self.create_file_with_data(sample_quoted_data)
+
+ # FIXME Need to convert \0a to proper encoding suitable for the person running the tests.
+ cmd = '../bin/q -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),4)
+
+ self.assertTrue(o[0],'multiline_escaped_double_quoted')
+ self.assertTrue(o[1],'control-value-6')
+ self.assertTrue(o[2],'this is an escaped "multiline:: value".')
+ self.assertTrue(o[3],'control-value-6')
+
+ self.cleanup(tmp_data_file)
+
+ def test_disable_double_double_quoted_data_flag__values(self):
+ # This test (and flag) is meant to verify backward comptibility only. It is possible that
+ # this flag will be removed completely in the future
+
+ tmp_data_file = self.create_file_with_data(double_double_quoted_data)
+
+ cmd = '../bin/q -d " " --disable-double-double-quoting "select c2 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),2)
+
+ self.assertEquals(o[0],'double_double_quoted')
+ self.assertEquals(o[1],'this is a quoted value with "double')
+
+ cmd = '../bin/q -d " " --disable-double-double-quoting "select c3 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),2)
+
+ self.assertEquals(o[0],'')
+ self.assertEquals(o[1],'double')
+
+ cmd = '../bin/q -d " " --disable-double-double-quoting "select c4 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),2)
+
+ self.assertEquals(o[0],'')
+ self.assertEquals(o[1],'quotes"""')
+
+ self.cleanup(tmp_data_file)
+
+ def test_disable_escaped_double_quoted_data_flag__values(self):
+ # This test (and flag) is meant to verify backward comptibility only. It is possible that
+ # this flag will be removed completely in the future
+
+ tmp_data_file = self.create_file_with_data(escaped_double_quoted_data)
+
+ cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c2 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),2)
+
+ self.assertEquals(o[0],'escaped_double_quoted')
+ self.assertEquals(o[1],'this is a quoted value with \\escaped')
+
+ cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c3 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),2)
+
+ self.assertEquals(o[0],'')
+ self.assertEquals(o[1],'double')
+
+ cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c4 from %s"' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ self.assertEquals(len(o),2)
+
+ self.assertEquals(o[0],'')
+ self.assertEquals(o[1],'quotes\\""')
+ self.cleanup(tmp_data_file)
+
+ def test_combined_quoted_data_flags__number_of_columns_detected(self):
+ # This test (and flags) is meant to verify backward comptibility only. It is possible that
+ # these flags will be removed completely in the future
+ tmp_data_file = self.create_file_with_data(combined_quoted_data)
+
+ cmd = '../bin/q -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ o = o[1:] # remove the first "Table for file..." line in the output
+
+ self.assertEquals(len(o),7) # found 7 fields
+
+ cmd = '../bin/q -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ o = o[1:] # remove the first "Table for file..." line in the output
+
+ self.assertEquals(len(o),5) # found 5 fields
+
+ cmd = '../bin/q -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ o = o[1:] # remove the first "Table for file..." line in the output
+
+ self.assertEquals(len(o),5) # found 5 fields
+
+ cmd = '../bin/q -d " " "select * from %s" -A' % tmp_data_file.name
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode,0)
+ self.assertEquals(len(e),0)
+ o = o[1:] # remove the first "Table for file..." line in the output
+
+ self.assertEquals(len(o),3) # found only 3 fields, which is the correct amount
+
+ self.cleanup(tmp_data_file)
class ParsingModeTests(AbstractQTestCase):