summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarel Ben-Attia <harelba@gmail.com>2014-11-23 15:18:23 -0500
committerHarel Ben-Attia <harelba@gmail.com>2014-11-23 15:18:23 -0500
commitd04a24c8715dc2af32f623c72ee7dd69c2ede198 (patch)
tree8a753bf054395a1acd14a8ea87725e0ee27e60b8
parent92d0bb62138b4d5d74f60f157db9dd37071267ca (diff)
Multiple queries on CLI + load data from string API + finer details in table structure response + tests
-rwxr-xr-xbin/q115
-rwxr-xr-xtest/test-suite144
2 files changed, 205 insertions, 54 deletions
diff --git a/bin/q b/bin/q
index 1fd1020..5f9520b 100755
--- a/bin/q
+++ b/bin/q
@@ -47,6 +47,7 @@ import traceback
import csv
import hashlib
import uuid
+import cStringIO
DEBUG = False
@@ -609,12 +610,13 @@ class TableCreatorState(object):
FULLY_READ = 'FULLY_READ'
class MaterializedFileState(object):
- def __init__(self,filename,f,encoding,dialect):
+ def __init__(self,filename,f,encoding,dialect,is_stdin):
self.filename = filename
self.lines_read = 0
self.f = f
self.encoding = encoding
self.dialect = dialect
+ self.is_stdin = is_stdin
def read_file_using_csv(self):
csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect)
@@ -708,7 +710,9 @@ class TableCreator(object):
f = self.open_file(filename)
- mfs = MaterializedFileState(filename,f,self.encoding,dialect)
+ is_stdin = filename == self.stdin_filename
+
+ mfs = MaterializedFileState(filename,f,self.encoding,dialect,is_stdin)
self.materialized_file_dict[filename] = mfs
def _populate(self,dialect,stop_after_analysis=False):
@@ -939,10 +943,19 @@ class QDataLoad(object):
return "DataLoad<'%s' at %s (took %4.3f seconds)>" % (self.filename,self.start_time,self.duration())
__repr__ = __str__
+class QMaterializedFile(object):
+ def __init__(self,filename,is_stdin):
+ self.filename = filename
+ self.is_stdin = is_stdin
+
+ def __str__(self):
+ return "QMaterializedFile<filename=%s,is_stdin=%s>" % (self.filename,self.is_stdin)
+ __repr__ = __str__
+
class QTableStructure(object):
- def __init__(self,filenames_str,materialized_file_list,column_names,column_types):
+ def __init__(self,filenames_str,materialized_files,column_names,column_types):
self.filenames_str = filenames_str
- self.materialized_file_list = materialized_file_list
+ self.materialized_files = materialized_files
self.column_names = column_names
self.column_types = column_types
@@ -1089,6 +1102,14 @@ class QTextAsData(object):
def load_data(self,filename,input_params=QInputParams(),stop_after_analysis=False):
self._load_data(filename,input_params,stop_after_analysis=stop_after_analysis)
+ def load_data_from_string(self,filename,str_data,input_params=QInputParams(),stop_after_analysis=False):
+ sf = cStringIO.StringIO(str_data)
+ try:
+ self._load_data(filename,input_params,stdin_file=sf,stdin_filename=filename,stop_after_analysis=stop_after_analysis)
+ finally:
+ if sf is not None:
+ sf.close()
+
def _ensure_data_is_loaded(self,sql_object,input_params,stdin_file,stdin_filename='-',stop_after_analysis=False):
data_loads = []
@@ -1187,12 +1208,20 @@ class QTextAsData(object):
pass
self.table_creators = {}
+ def _create_materialized_files(self,table_creator):
+ d = table_creator.materialized_file_dict
+ m = {}
+ for filename,mfs in d.iteritems():
+ m[filename] = QMaterializedFile(filename,mfs.is_stdin)
+ return m
+
def _create_table_structures_list(self):
table_structures = []
for filename,table_creator in self.table_creators.iteritems():
column_names = table_creator.column_inferer.get_column_names()
column_types = [self.db.type_names[table_creator.column_inferer.get_column_dict()[k]].lower() for k in column_names]
- table_structure = QTableStructure(table_creator.filenames_str,table_creator.materialized_file_list,column_names,column_types)
+ materialized_files = self._create_materialized_files(table_creator)
+ table_structure = QTableStructure(table_creator.filenames_str,materialized_files,column_names,column_types)
table_structures.append(table_structure)
return table_structures
@@ -1275,6 +1304,22 @@ class QOutputPrinter(object):
print >>f_out," `%s` - %s" % (n,t)
def print_output(self,f_out,f_err,results):
+ try:
+ self._print_output(f_out,f_err,results)
+ except (UnicodeEncodeError, UnicodeError), e:
+ print >>f_err, "Cannot encode data. Error:%s" % e
+ sys.exit(3)
+ except IOError, e:
+ if e.errno == 32:
+ # broken pipe, that's ok
+ pass
+ else:
+ # dont miss other problems for now
+ raise
+ except KeyboardInterrupt:
+ pass
+
+ def _print_output(self,f_out,f_err,results):
self.print_errors_and_warnings(f_err,results)
data = results.data
@@ -1452,13 +1497,9 @@ def run_standalone():
print_credentials()
sys.exit(0)
- if len(args) > 1:
- print >>sys.stderr,"Must provide query as one parameter, enclosed in quotes, or through a file with the -f parameter"
- sys.exit(1)
-
if len(args) == 0 and options.query_filename is None:
print_credentials()
- print >>sys.stderr,"Must provide a query in the command line, or through the a file with the -f parameter"
+ print >>sys.stderr,"Must provide at least one query in the command line, or through the a file with the -f parameter"
sys.exit(1)
if options.query_filename is not None:
@@ -1467,26 +1508,26 @@ def run_standalone():
sys.exit(1)
try:
f = file(options.query_filename)
- query_str = f.read()
+ query_strs = [f.read()]
f.close()
except:
print >>sys.stderr,"Could not read query from file %s" % options.query_filename
sys.exit(1)
else:
- query_str = args[0]
+ query_strs = args
if options.query_encoding is not None and options.query_encoding != 'none':
try:
- query_str = query_str.decode(options.query_encoding)
- except:
- print >>sys.stderr,"Could not decode query using the provided query encoding (%s)" % options.query_encoding
- sys.exit(3)
+ for idx in range(len(query_strs)):
+ query_strs[idx] = query_strs[idx].decode(options.query_encoding).strip()
- query_str = query_str.strip()
+ if len(query_strs[idx]) == 0:
+ print >>sys.stderr,"Query cannot be empty (query number %s)" % (idx+1)
+ sys.exit(1)
- if len(query_str) == 0:
- print >>sys.stderr,"Query cannot be empty"
- sys.exit(1)
+ except Exception,e:
+ print >>sys.stderr,"Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding)
+ sys.exit(3)
if options.mode not in ['fluffy', 'relaxed', 'strict']:
print >>sys.stderr, "Parsing mode can be one of fluffy, relaxed or strict"
@@ -1557,15 +1598,7 @@ def run_standalone():
disable_double_double_quoting=options.disable_double_double_quoting,
disable_escaped_double_quoting=options.disable_escaped_double_quoting,
input_quoting_mode=options.input_quoting_mode)
-
- q_query = QTextAsData(default_input_params=default_input_params)
-
- if options.analyze_only:
- q_output = q_query.analyze(query_str,stdin_file=sys.stdin)
- else:
- q_output = q_query.execute(query_str,stdin_file=sys.stdin)
-
- q_query.unload()
+ q_engine = QTextAsData(default_input_params=default_input_params)
output_params = QOutputParams(
delimiter=options.output_delimiter,
@@ -1573,30 +1606,22 @@ def run_standalone():
output_quoting_mode=options.output_quoting_mode,
formatting=options.formatting,
output_header=options.output_header)
-
q_output_printer = QOutputPrinter(output_params)
- try:
+ for query_str in query_strs:
if options.analyze_only:
+ q_output = q_engine.analyze(query_str,stdin_file=sys.stdin)
q_output_printer.print_analysis(STDOUT,sys.stderr,q_output)
else:
+ q_output = q_engine.execute(query_str,stdin_file=sys.stdin)
q_output_printer.print_output(STDOUT,sys.stderr,q_output)
- except (UnicodeEncodeError, UnicodeError), e:
- print >>sys.stderr, "Cannot encode data. Error:%s" % e
- sys.exit(3)
- except IOError, e:
- if e.errno == 32:
- # broken pipe, that's ok
- pass
- else:
- # dont miss other problems for now
- raise
- except KeyboardInterrupt:
- pass
+ if q_output.status == 'error':
+ sys.exit(q_output.error.errorcode)
+
+ q_engine.unload()
- if q_output.status == 'error':
- sys.exit(q_output.error.errorcode)
+ sys.exit(0)
if __name__ == '__main__':
diff --git a/test/test-suite b/test/test-suite
index 371a986..d627041 100755
--- a/test/test-suite
+++ b/test/test-suite
@@ -10,8 +10,8 @@
#
import unittest
-import pprint
-
+import json
+from json import JSONEncoder
from subprocess import PIPE, Popen, STDOUT
import sys
import os
@@ -596,7 +596,7 @@ class BasicTests(AbstractQTestCase):
self.assertEquals(len(e), 1)
self.assertEquals(len(o), 0)
- self.assertEquals(e[0],'Query cannot be empty')
+ self.assertEquals(e[0],'Query cannot be empty (query number 1)')
def test_empty_query_in_command_line(self):
cmd = '../bin/q -d , " "'
@@ -606,17 +606,50 @@ class BasicTests(AbstractQTestCase):
self.assertEquals(len(e), 1)
self.assertEquals(len(o), 0)
- self.assertEquals(e[0],'Query cannot be empty')
+ self.assertEquals(e[0],'Query cannot be empty (query number 1)')
- def test_too_many_command_line_parameters(self):
- cmd = '../bin/q -d , select *'
+ def test_failure_in_query_stops_processing_queries(self):
+ cmd = '../bin/q -d , "select 500" "select 300" "wrong-query" "select 8000"'
retcode, o, e = run_command(cmd)
self.assertEquals(retcode, 1)
self.assertEquals(len(e), 1)
- self.assertEquals(len(o), 0)
+ self.assertEquals(len(o), 2)
+ self.assertEquals(o[0],'500')
+ self.assertEquals(o[1],'300')
- self.assertTrue(e[0].startswith('Must provide query as one parameter'))
+ def test_multiple_queries_in_command_line(self):
+ cmd = '../bin/q -d , "select 500" "select 300+100" "select 300" "select 200"'
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 4)
+
+ self.assertEquals(o[0],'500')
+ self.assertEquals(o[1],'400')
+ self.assertEquals(o[2],'300')
+ self.assertEquals(o[3],'200')
+
+ def test_literal_calculation_query(self):
+ cmd = '../bin/q -d , "select 1+40/6"'
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 1)
+
+ self.assertEquals(o[0],'7')
+
+ def test_literal_calculation_query_float_result(self):
+ cmd = '../bin/q -d , "select 1+40/6.0"'
+ retcode, o, e = run_command(cmd)
+
+ self.assertEquals(retcode, 0)
+ self.assertEquals(len(e), 0)
+ self.assertEquals(len(o), 1)
+
+ self.assertEquals(o[0],'7.66666666667')
def test_use_query_file(self):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
@@ -647,7 +680,7 @@ class BasicTests(AbstractQTestCase):
self.assertEquals(len(o),0)
self.assertEquals(len(e),1)
- self.assertTrue(e[0].startswith('Could not decode query using the provided query encoding'))
+ self.assertTrue(e[0].startswith('Could not decode query number 1 using the provided query encoding (ascii)'))
self.cleanup(tmp_data_file)
self.cleanup(tmp_query_file)
@@ -1872,6 +1905,99 @@ class BasicModuleTests(AbstractQTestCase):
self.assertTrue(q_output.status == 'error')
self.assertTrue(q_output.error.msg.startswith('query error'))
+ def test_execute_response(self):
+ tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6")
+
+ q = QTextAsData()
+
+ q_output = q.execute("select a,c from %s" % tmpfile.name,QInputParams(skip_header=True))
+
+ self.assertTrue(q_output.status == 'ok')
+ self.assertTrue(q_output.error is None)
+ self.assertEquals(len(q_output.warnings),0)
+ self.assertEquals(len(q_output.data),2)
+ self.assertEquals(q_output.data,[ (1,3),(4,6) ])
+ self.assertTrue(q_output.metadata is not None)
+
+ metadata = q_output.metadata
+
+ self.assertEquals(metadata.output_column_name_list, [ 'a','c'])
+ self.assertEquals(len(metadata.data_loads),1)
+ self.assertEquals(len(metadata.table_structures),1)
+
+ table_structure = metadata.table_structures[0]
+
+ self.assertEquals(table_structure.column_names,[ 'a','b','c'])
+ self.assertEquals(table_structure.column_types,[ 'int','int','int'])
+ self.assertEquals(table_structure.filenames_str,tmpfile.name)
+ self.assertTrue(len(table_structure.materialized_files.keys()),1)
+ self.assertTrue(table_structure.materialized_files[tmpfile.name].filename,tmpfile.name)
+ self.assertFalse(table_structure.materialized_files[tmpfile.name].is_stdin)
+
+ self.cleanup(tmpfile)
+
+ def test_analyze_response(self):
+ tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6")
+
+ q = QTextAsData()
+
+ q_output = q.analyze("select a,c from %s" % tmpfile.name,QInputParams(skip_header=True))
+
+ self.assertTrue(q_output.status == 'ok')
+ self.assertTrue(q_output.error is None)
+ self.assertEquals(len(q_output.warnings),0)
+ self.assertEquals(len(q_output.data),2)
+ self.assertEquals(q_output.data,[ (1,3),(4,6) ])
+ self.assertTrue(q_output.metadata is not None)
+
+ metadata = q_output.metadata
+
+ self.assertEquals(metadata.output_column_name_list, [ 'a','c'])
+ self.assertEquals(len(metadata.data_loads),1)
+ self.assertEquals(len(metadata.table_structures),1)
+
+ table_structure = metadata.table_structures[0]
+
+ self.assertEquals(table_structure.column_names,[ 'a','b','c'])
+ self.assertEquals(table_structure.column_types,[ 'int','int','int'])
+ self.assertEquals(table_structure.filenames_str,tmpfile.name)
+ self.assertTrue(len(table_structure.materialized_files.keys()),1)
+ self.assertTrue(table_structure.materialized_files[tmpfile.name].filename,tmpfile.name)
+ self.assertFalse(table_structure.materialized_files[tmpfile.name].is_stdin)
+
+ self.cleanup(tmpfile)
+
+ def test_load_data_from_string(self):
+ input_str = 'column1,column2,column3\n' + '\n'.join(['value1,2.5,value3'] * 1000)
+
+ q = QTextAsData()
+
+ q.load_data_from_string('my_data',input_str,QInputParams(skip_header=True,delimiter=','))
+
+ q_output = q.execute('select column2,column3 from my_data')
+
+ self.assertTrue(q_output.status == 'ok')
+ self.assertTrue(q_output.error is None)
+ self.assertEquals(len(q_output.warnings),0)
+ self.assertTrue(len(q_output.data),1000)
+ self.assertEquals(len(set(q_output.data)),1)
+ self.assertEquals(list(set(q_output.data))[0],(2.5,'value3'))
+
+ metadata = q_output.metadata
+
+ self.assertTrue(metadata.output_column_name_list,['column2','column3'])
+ self.assertEquals(len(metadata.data_loads),0)
+ self.assertTrue(len(metadata.table_structures),1)
+
+ table_structure = metadata.table_structures[0]
+
+ self.assertEquals(table_structure.column_names,['column1','column2','column3'])
+ self.assertEquals(table_structure.column_types,['text','float','text'])
+ self.assertEquals(table_structure.filenames_str,'my_data')
+ self.assertTrue(len(table_structure.materialized_files.keys()),1)
+ self.assertTrue(table_structure.materialized_files['my_data'].filename,'my_data')
+ self.assertTrue(table_structure.materialized_files['my_data'].is_stdin)
+
def suite():
tl = unittest.TestLoader()