diff options
-rwxr-xr-x | bin/q | 15 | ||||
-rwxr-xr-x | test/test-suite | 245 |
2 files changed, 256 insertions, 4 deletions
@@ -27,7 +27,7 @@ # # Run with --help for command line details # -q_version = "1.4.1" +q_version = "1.5.0" # not released yet import os import sys @@ -141,6 +141,10 @@ input_data_option_group.add_option("-c", "--column-count", dest="column_count", help="Specific column count when using relaxed or strict mode") input_data_option_group.add_option("-k", "--keep-leading-whitespace", dest="keep_leading_whitespace_in_values", default=False, action="store_true", help="Keep leading whitespace in values. Default behavior strips leading whitespace off values, in order to provide out-of-the-box usability for simple use cases. If you need to preserve whitespace, use this flag.") +input_data_option_group.add_option("--disable-double-double-quoting", dest="disable_double_double_quoting", default=True, action="store_false", + help="Disable support for double double-quoting for escaping the double quote character. By default, you can use \"\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.") +input_data_option_group.add_option("--disable-escaped-double-quoting", dest="disable_escaped_double_quoting", default=True, action="store_false", + help="Disable support for escaped double-quoting for escaping the double quote character. By default, you can use \\\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.") parser.add_option_group(input_data_option_group) #----------------------------------------------- output_data_option_group = OptionGroup(parser,"Output Options") @@ -155,6 +159,7 @@ output_data_option_group.add_option("-f", "--formatting", dest="formatting", def help="Output-level formatting, in the format X=fmt,Y=fmt etc, where X,Y are output column numbers (e.g. 1 for first SELECT column etc.") output_data_option_group.add_option("-E", "--output-encoding", dest="output_encoding", default=default_output_encoding, help="Output encoding. Defaults to 'none', leading to selecting the system/terminal encoding") +# -M will be added here for supporting output quoting mode in the future parser.add_option_group(output_data_option_group) #----------------------------------------------- query_option_group = OptionGroup(parser,"Query Related Options") @@ -972,7 +977,13 @@ else: skip_initial_space = True q_dialect = {'skipinitialspace': skip_initial_space, 'quoting': 0, - 'delimiter': options.delimiter, 'quotechar': '"', 'doublequote': False} + 'delimiter': options.delimiter, 'quotechar': '"' } + +q_dialect['doublequote'] = options.disable_double_double_quoting; + +if options.disable_escaped_double_quoting: + q_dialect['escapechar'] = '\\' + csv.register_dialect('q', **q_dialect) file_reading_method = 'csv' diff --git a/test/test-suite b/test/test-suite index 3eaae0d..2212ff8 100755 --- a/test/test-suite +++ b/test/test-suite @@ -27,7 +27,7 @@ def run_command(cmd_to_run): p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True) o, e = p.communicate() # remove last newline - o = o.strip() + o = o.rstrip() e = e.strip() # split rows if o != '': @@ -72,6 +72,26 @@ sample_data_with_empty_string_no_header = "\n".join( sample_data_with_header = header_row + "\n" + sample_data_no_header sample_data_with_missing_header_names = "name,value1\n" + sample_data_no_header +sample_quoted_data = '''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted +control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" +non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline + value""." "this is an escaped \\"multiline + value\\"." +control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6" +''' + +double_double_quoted_data = '''regular_double_quoted double_double_quoted +"this is a quoted value" "this is a quoted value with ""double double quotes""" +''' + +escaped_double_quoted_data = '''regular_double_quoted escaped_double_quoted +"this is a quoted value" "this is a quoted value with \\"escaped double quotes\\"" +''' + +combined_quoted_data = '''regular_double_quoted double_double_quoted escaped_double_quoted +"this is a quoted value" "this is a quoted value with ""double double quotes""" "this is a quoted value with \\"escaped double quotes\\"" +''' + # Values with leading whitespace sample_data_rows_with_spaces = ['a,1,0', ' b, 2,0', 'c,,0'] sample_data_with_spaces_no_header = "\n".join( @@ -715,8 +735,229 @@ class BasicTests(AbstractQTestCase): self.assertTrue(e[0].startswith("Could not read query from file")) - + def test_non_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + cmd = '../bin/q -d " " "select c1 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),4) + + self.assertTrue(o[0],'non_quoted') + self.assertTrue(o[1],'control-value-1') + self.assertTrue(o[2],'non-quoted-value') + self.assertTrue(o[3],'control-value-1') + + self.cleanup(tmp_data_file) + + def test_regular_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + cmd = '../bin/q -d " " "select c2 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),4) + + self.assertTrue(o[0],'regular_double_quoted') + self.assertTrue(o[1],'control-value-2') + self.assertTrue(o[2],'this is a quoted value') + self.assertTrue(o[3],'control-value-2') + + self.cleanup(tmp_data_file) + + def test_double_double_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + cmd = '../bin/q -d " " "select c3 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),4) + + self.assertTrue(o[0],'double_double_quoted') + self.assertTrue(o[1],'control-value-3') + self.assertTrue(o[2],'this is a "double double" quoted value') + self.assertTrue(o[3],'control-value-3') + + self.cleanup(tmp_data_file) + + def test_escaped_double_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + cmd = '../bin/q -d " " "select c4 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),4) + + self.assertTrue(o[0],'escaped_double_quoted') + self.assertTrue(o[1],'control-value-4') + self.assertTrue(o[2],'this is an escaped "quoted value"') + self.assertTrue(o[3],'control-value-4') + + self.cleanup(tmp_data_file) + + def test_multiline_double_double_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + # FIXME Need to convert \0a to proper encoding suitable for the person running the tests. + cmd = '../bin/q -d " " "select replace(c5,X\'0A\',\'::\') from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),4) + + self.assertTrue(o[0],'multiline_double_double_quoted') + self.assertTrue(o[1],'control-value-5') + self.assertTrue(o[2],'this is a double double quoted "multiline\n value".') + self.assertTrue(o[3],'control-value-5') + + self.cleanup(tmp_data_file) + + def test_multiline_escaped_double_quoted_values_in_quoted_data(self): + tmp_data_file = self.create_file_with_data(sample_quoted_data) + + # FIXME Need to convert \0a to proper encoding suitable for the person running the tests. + cmd = '../bin/q -d " " "select replace(c6,X\'0A\',\'::\') from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),4) + + self.assertTrue(o[0],'multiline_escaped_double_quoted') + self.assertTrue(o[1],'control-value-6') + self.assertTrue(o[2],'this is an escaped "multiline:: value".') + self.assertTrue(o[3],'control-value-6') + + self.cleanup(tmp_data_file) + + def test_disable_double_double_quoted_data_flag__values(self): + # This test (and flag) is meant to verify backward comptibility only. It is possible that + # this flag will be removed completely in the future + + tmp_data_file = self.create_file_with_data(double_double_quoted_data) + + cmd = '../bin/q -d " " --disable-double-double-quoting "select c2 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),2) + + self.assertEquals(o[0],'double_double_quoted') + self.assertEquals(o[1],'this is a quoted value with "double') + + cmd = '../bin/q -d " " --disable-double-double-quoting "select c3 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),2) + + self.assertEquals(o[0],'') + self.assertEquals(o[1],'double') + + cmd = '../bin/q -d " " --disable-double-double-quoting "select c4 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),2) + + self.assertEquals(o[0],'') + self.assertEquals(o[1],'quotes"""') + + self.cleanup(tmp_data_file) + + def test_disable_escaped_double_quoted_data_flag__values(self): + # This test (and flag) is meant to verify backward comptibility only. It is possible that + # this flag will be removed completely in the future + + tmp_data_file = self.create_file_with_data(escaped_double_quoted_data) + + cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c2 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),2) + + self.assertEquals(o[0],'escaped_double_quoted') + self.assertEquals(o[1],'this is a quoted value with \\escaped') + + cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c3 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),2) + + self.assertEquals(o[0],'') + self.assertEquals(o[1],'double') + + cmd = '../bin/q -d " " --disable-escaped-double-quoting "select c4 from %s"' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + self.assertEquals(len(o),2) + + self.assertEquals(o[0],'') + self.assertEquals(o[1],'quotes\\""') + self.cleanup(tmp_data_file) + + def test_combined_quoted_data_flags__number_of_columns_detected(self): + # This test (and flags) is meant to verify backward comptibility only. It is possible that + # these flags will be removed completely in the future + tmp_data_file = self.create_file_with_data(combined_quoted_data) + + cmd = '../bin/q -d " " --disable-double-double-quoting --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + o = o[1:] # remove the first "Table for file..." line in the output + + self.assertEquals(len(o),7) # found 7 fields + + cmd = '../bin/q -d " " --disable-escaped-double-quoting "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + o = o[1:] # remove the first "Table for file..." line in the output + + self.assertEquals(len(o),5) # found 5 fields + + cmd = '../bin/q -d " " --disable-double-double-quoting "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + o = o[1:] # remove the first "Table for file..." line in the output + + self.assertEquals(len(o),5) # found 5 fields + + cmd = '../bin/q -d " " "select * from %s" -A' % tmp_data_file.name + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode,0) + self.assertEquals(len(e),0) + o = o[1:] # remove the first "Table for file..." line in the output + + self.assertEquals(len(o),3) # found only 3 fields, which is the correct amount + + self.cleanup(tmp_data_file) class ParsingModeTests(AbstractQTestCase): |