summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarel Ben-Attia <harelba@gmail.com>2016-04-02 15:56:17 +0300
committerHarel Ben-Attia <harelba@gmail.com>2016-04-02 15:56:17 +0300
commitcd8bc6f6f7159071089f3067b39085ab34257a0a (patch)
tree9e68709cc808392c7adc2cfde0c7d569e9a90ea5
parent0aa96f216956391ec498dd4204a2f043ac1f2ba6 (diff)
Added a flag to disable automatic column type detection
-rwxr-xr-xbin/q21
-rwxr-xr-xtest/test-suite69
2 files changed, 85 insertions, 5 deletions
diff --git a/bin/q b/bin/q
index c4f3c45..76c9e4b 100755
--- a/bin/q
+++ b/bin/q
@@ -396,7 +396,7 @@ class LineSplitter(object):
class TableColumnInferer(object):
- def __init__(self, mode, expected_column_count, input_delimiter, skip_header=False):
+ def __init__(self, mode, expected_column_count, input_delimiter, skip_header=False,disable_column_type_detection=False):
self.inferred = False
self.mode = mode
self.rows = []
@@ -404,6 +404,7 @@ class TableColumnInferer(object):
self.header_row = None
self.expected_column_count = expected_column_count
self.input_delimiter = input_delimiter
+ self.disable_column_type_detection = disable_column_type_detection
def analyze(self, col_vals):
if self.inferred:
@@ -427,6 +428,9 @@ class TableColumnInferer(object):
self.do_analysis()
def determine_type_of_value(self, value):
+ if self.disable_column_type_detection:
+ return str
+
if value is not None:
value = value.strip()
if value == '' or value is None:
@@ -677,7 +681,8 @@ class MaterializedFileState(object):
class TableCreator(object):
- def __init__(self, db, filenames_str, line_splitter, skip_header=False, gzipped=False, encoding='UTF-8', mode='fluffy', expected_column_count=None, input_delimiter=None,stdin_file=None,stdin_filename='-'):
+ def __init__(self, db, filenames_str, line_splitter, skip_header=False, gzipped=False, encoding='UTF-8', mode='fluffy', expected_column_count=None, input_delimiter=None,disable_column_type_detection=False,
+ stdin_file=None,stdin_filename='-'):
self.db = db
self.filenames_str = filenames_str
self.skip_header = skip_header
@@ -692,7 +697,7 @@ class TableCreator(object):
self.stdin_filename = stdin_filename
self.column_inferer = TableColumnInferer(
- mode, expected_column_count, input_delimiter, skip_header)
+ mode, expected_column_count, input_delimiter, skip_header,disable_column_type_detection)
# Filled only after table population since we're inferring the table
# creation data
@@ -1073,6 +1078,7 @@ class QInputParams(object):
delimiter=' ',input_encoding='UTF-8',gzipped_input=False,parsing_mode='relaxed',
expected_column_count=None,keep_leading_whitespace_in_values=False,
disable_double_double_quoting=False,disable_escaped_double_quoting=False,
+ disable_column_type_detection=False,
input_quoting_mode='minimal',stdin_file=None,stdin_filename='-'):
self.skip_header = skip_header
self.delimiter = delimiter
@@ -1084,6 +1090,7 @@ class QInputParams(object):
self.disable_double_double_quoting = disable_double_double_quoting
self.disable_escaped_double_quoting = disable_escaped_double_quoting
self.input_quoting_mode = input_quoting_mode
+ self.disable_column_type_detection = disable_column_type_detection
def merged_with(self,input_params):
params = QInputParams(**self.__dict__)
@@ -1154,7 +1161,8 @@ class QTextAsData(object):
table_creator = TableCreator(
self.db, filename, line_splitter, input_params.skip_header, input_params.gzipped_input, input_params.input_encoding,
mode=input_params.parsing_mode, expected_column_count=input_params.expected_column_count,
- input_delimiter=input_params.delimiter,stdin_file = stdin_file,stdin_filename = stdin_filename)
+ input_delimiter=input_params.delimiter,disable_column_type_detection=input_params.disable_column_type_detection,
+ stdin_file = stdin_file,stdin_filename = stdin_filename)
table_creator.populate(dialect_id,stop_after_analysis)
@@ -1542,6 +1550,8 @@ def run_standalone():
help="Disable support for double double-quoting for escaping the double quote character. By default, you can use \"\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.")
input_data_option_group.add_option("--disable-escaped-double-quoting", dest="disable_escaped_double_quoting", default=True, action="store_false",
help="Disable support for escaped double-quoting for escaping the double quote character. By default, you can use \\\" inside double quoted fields to escape double quotes. Mainly for backward compatibility.")
+ input_data_option_group.add_option("--disable-column-type-detection", dest="disable_column_type_detection", default=False, action="store_true",
+ help="Don't detect column types - All columns will be text columns")
input_data_option_group.add_option("-w","--input-quoting-mode",dest="input_quoting_mode",default="minimal",
help="Input quoting mode. Possible values are all, minimal and none. Note the slightly misleading parameter name, and see the matching -W parameter for output quoting.")
parser.add_option_group(input_data_option_group)
@@ -1676,7 +1686,8 @@ def run_standalone():
keep_leading_whitespace_in_values=options.keep_leading_whitespace_in_values,
disable_double_double_quoting=options.disable_double_double_quoting,
disable_escaped_double_quoting=options.disable_escaped_double_quoting,
- input_quoting_mode=options.input_quoting_mode)
+ input_quoting_mode=options.input_quoting_mode,
+ disable_column_type_detection=options.disable_column_type_detection)
q_engine = QTextAsData(default_input_params=default_input_params)
output_params = QOutputParams(
diff --git a/test/test-suite b/test/test-suite
index 65652aa..e041e0d 100755
--- a/test/test-suite
+++ b/test/test-suite
@@ -1694,6 +1694,75 @@ class SqlTests(AbstractQTestCase):
self.cleanup(tmpfile2)
+ def test_disable_column_type_detection(self):
+ tmpfile = self.create_file_with_data('''regular_text,text_with_digits1,text_with_digits2,float_number
+"regular text 1",67,"67",12.3
+"regular text 2",067,"067",22.3
+"regular text 3",123,"123",33.4
+"regular text 4",-123,"-123",0122.2
+''')
+
+ # Check original column type detection
+ cmd = '../bin/q -A -d , -H "select * from %s"' % (tmpfile.name)
+
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 5)
+
+
+ self.assertEquals(o[0],'Table for file: %s' % tmpfile.name)
+ self.assertEquals(o[1],' `regular_text` - text')
+ self.assertEquals(o[2],' `text_with_digits1` - int')
+ self.assertEquals(o[3],' `text_with_digits2` - int')
+ self.assertEquals(o[4],' `float_number` - float')
+
+ # Check column types detected when actual detection is disabled
+ cmd = '../bin/q -A -d , -H --disable-column-type-detection "select * from %s"' % (tmpfile.name)
+
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 5)
+
+ self.assertEquals(o[0],'Table for file: %s' % tmpfile.name)
+ self.assertEquals(o[1],' `regular_text` - text')
+ self.assertEquals(o[2],' `text_with_digits1` - text')
+ self.assertEquals(o[3],' `text_with_digits2` - text')
+ self.assertEquals(o[4],' `float_number` - text')
+
+ # Get actual data with regular detection
+ cmd = '../bin/q -d , -H "select * from %s"' % (tmpfile.name)
+
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 4)
+
+ self.assertEquals(o[0],"regular text 1,67,67,12.3");
+ self.assertEquals(o[1],"regular text 2,67,67,22.3");
+ self.assertEquals(o[2],"regular text 3,123,123,33.4");
+ self.assertEquals(o[3],"regular text 4,-123,-123,122.2");
+
+ # Get actual data without detection
+ cmd = '../bin/q -d , -H --disable-column-type-detection "select * from %s"' % (tmpfile.name)
+
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 4)
+
+ self.assertEquals(o[0],"regular text 1,67,67,12.3");
+ self.assertEquals(o[1],"regular text 2,067,067,22.3");
+ self.assertEquals(o[2],"regular text 3,123,123,33.4");
+ self.assertEquals(o[3],"regular text 4,-123,-123,0122.2");
+
+ self.cleanup(tmpfile)
+
class BasicModuleTests(AbstractQTestCase):
def test_simple_query(self):