From d04a24c8715dc2af32f623c72ee7dd69c2ede198 Mon Sep 17 00:00:00 2001 From: Harel Ben-Attia Date: Sun, 23 Nov 2014 15:18:23 -0500 Subject: Multiple queries on CLI + load data from string API + finer details in table structure response + tests --- bin/q | 115 ++++++++++++++++++++++++++------------------ test/test-suite | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 205 insertions(+), 54 deletions(-) diff --git a/bin/q b/bin/q index 1fd1020..5f9520b 100755 --- a/bin/q +++ b/bin/q @@ -47,6 +47,7 @@ import traceback import csv import hashlib import uuid +import cStringIO DEBUG = False @@ -609,12 +610,13 @@ class TableCreatorState(object): FULLY_READ = 'FULLY_READ' class MaterializedFileState(object): - def __init__(self,filename,f,encoding,dialect): + def __init__(self,filename,f,encoding,dialect,is_stdin): self.filename = filename self.lines_read = 0 self.f = f self.encoding = encoding self.dialect = dialect + self.is_stdin = is_stdin def read_file_using_csv(self): csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect) @@ -708,7 +710,9 @@ class TableCreator(object): f = self.open_file(filename) - mfs = MaterializedFileState(filename,f,self.encoding,dialect) + is_stdin = filename == self.stdin_filename + + mfs = MaterializedFileState(filename,f,self.encoding,dialect,is_stdin) self.materialized_file_dict[filename] = mfs def _populate(self,dialect,stop_after_analysis=False): @@ -939,10 +943,19 @@ class QDataLoad(object): return "DataLoad<'%s' at %s (took %4.3f seconds)>" % (self.filename,self.start_time,self.duration()) __repr__ = __str__ +class QMaterializedFile(object): + def __init__(self,filename,is_stdin): + self.filename = filename + self.is_stdin = is_stdin + + def __str__(self): + return "QMaterializedFile" % (self.filename,self.is_stdin) + __repr__ = __str__ + class QTableStructure(object): - def __init__(self,filenames_str,materialized_file_list,column_names,column_types): + def __init__(self,filenames_str,materialized_files,column_names,column_types): self.filenames_str = filenames_str - self.materialized_file_list = materialized_file_list + self.materialized_files = materialized_files self.column_names = column_names self.column_types = column_types @@ -1089,6 +1102,14 @@ class QTextAsData(object): def load_data(self,filename,input_params=QInputParams(),stop_after_analysis=False): self._load_data(filename,input_params,stop_after_analysis=stop_after_analysis) + def load_data_from_string(self,filename,str_data,input_params=QInputParams(),stop_after_analysis=False): + sf = cStringIO.StringIO(str_data) + try: + self._load_data(filename,input_params,stdin_file=sf,stdin_filename=filename,stop_after_analysis=stop_after_analysis) + finally: + if sf is not None: + sf.close() + def _ensure_data_is_loaded(self,sql_object,input_params,stdin_file,stdin_filename='-',stop_after_analysis=False): data_loads = [] @@ -1187,12 +1208,20 @@ class QTextAsData(object): pass self.table_creators = {} + def _create_materialized_files(self,table_creator): + d = table_creator.materialized_file_dict + m = {} + for filename,mfs in d.iteritems(): + m[filename] = QMaterializedFile(filename,mfs.is_stdin) + return m + def _create_table_structures_list(self): table_structures = [] for filename,table_creator in self.table_creators.iteritems(): column_names = table_creator.column_inferer.get_column_names() column_types = [self.db.type_names[table_creator.column_inferer.get_column_dict()[k]].lower() for k in column_names] - table_structure = QTableStructure(table_creator.filenames_str,table_creator.materialized_file_list,column_names,column_types) + materialized_files = self._create_materialized_files(table_creator) + table_structure = QTableStructure(table_creator.filenames_str,materialized_files,column_names,column_types) table_structures.append(table_structure) return table_structures @@ -1275,6 +1304,22 @@ class QOutputPrinter(object): print >>f_out," `%s` - %s" % (n,t) def print_output(self,f_out,f_err,results): + try: + self._print_output(f_out,f_err,results) + except (UnicodeEncodeError, UnicodeError), e: + print >>f_err, "Cannot encode data. Error:%s" % e + sys.exit(3) + except IOError, e: + if e.errno == 32: + # broken pipe, that's ok + pass + else: + # dont miss other problems for now + raise + except KeyboardInterrupt: + pass + + def _print_output(self,f_out,f_err,results): self.print_errors_and_warnings(f_err,results) data = results.data @@ -1452,13 +1497,9 @@ def run_standalone(): print_credentials() sys.exit(0) - if len(args) > 1: - print >>sys.stderr,"Must provide query as one parameter, enclosed in quotes, or through a file with the -f parameter" - sys.exit(1) - if len(args) == 0 and options.query_filename is None: print_credentials() - print >>sys.stderr,"Must provide a query in the command line, or through the a file with the -f parameter" + print >>sys.stderr,"Must provide at least one query in the command line, or through the a file with the -f parameter" sys.exit(1) if options.query_filename is not None: @@ -1467,26 +1508,26 @@ def run_standalone(): sys.exit(1) try: f = file(options.query_filename) - query_str = f.read() + query_strs = [f.read()] f.close() except: print >>sys.stderr,"Could not read query from file %s" % options.query_filename sys.exit(1) else: - query_str = args[0] + query_strs = args if options.query_encoding is not None and options.query_encoding != 'none': try: - query_str = query_str.decode(options.query_encoding) - except: - print >>sys.stderr,"Could not decode query using the provided query encoding (%s)" % options.query_encoding - sys.exit(3) + for idx in range(len(query_strs)): + query_strs[idx] = query_strs[idx].decode(options.query_encoding).strip() - query_str = query_str.strip() + if len(query_strs[idx]) == 0: + print >>sys.stderr,"Query cannot be empty (query number %s)" % (idx+1) + sys.exit(1) - if len(query_str) == 0: - print >>sys.stderr,"Query cannot be empty" - sys.exit(1) + except Exception,e: + print >>sys.stderr,"Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding) + sys.exit(3) if options.mode not in ['fluffy', 'relaxed', 'strict']: print >>sys.stderr, "Parsing mode can be one of fluffy, relaxed or strict" @@ -1557,15 +1598,7 @@ def run_standalone(): disable_double_double_quoting=options.disable_double_double_quoting, disable_escaped_double_quoting=options.disable_escaped_double_quoting, input_quoting_mode=options.input_quoting_mode) - - q_query = QTextAsData(default_input_params=default_input_params) - - if options.analyze_only: - q_output = q_query.analyze(query_str,stdin_file=sys.stdin) - else: - q_output = q_query.execute(query_str,stdin_file=sys.stdin) - - q_query.unload() + q_engine = QTextAsData(default_input_params=default_input_params) output_params = QOutputParams( delimiter=options.output_delimiter, @@ -1573,30 +1606,22 @@ def run_standalone(): output_quoting_mode=options.output_quoting_mode, formatting=options.formatting, output_header=options.output_header) - q_output_printer = QOutputPrinter(output_params) - try: + for query_str in query_strs: if options.analyze_only: + q_output = q_engine.analyze(query_str,stdin_file=sys.stdin) q_output_printer.print_analysis(STDOUT,sys.stderr,q_output) else: + q_output = q_engine.execute(query_str,stdin_file=sys.stdin) q_output_printer.print_output(STDOUT,sys.stderr,q_output) - except (UnicodeEncodeError, UnicodeError), e: - print >>sys.stderr, "Cannot encode data. Error:%s" % e - sys.exit(3) - except IOError, e: - if e.errno == 32: - # broken pipe, that's ok - pass - else: - # dont miss other problems for now - raise - except KeyboardInterrupt: - pass + if q_output.status == 'error': + sys.exit(q_output.error.errorcode) + + q_engine.unload() - if q_output.status == 'error': - sys.exit(q_output.error.errorcode) + sys.exit(0) if __name__ == '__main__': diff --git a/test/test-suite b/test/test-suite index 371a986..d627041 100755 --- a/test/test-suite +++ b/test/test-suite @@ -10,8 +10,8 @@ # import unittest -import pprint - +import json +from json import JSONEncoder from subprocess import PIPE, Popen, STDOUT import sys import os @@ -596,7 +596,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertEquals(e[0],'Query cannot be empty') + self.assertEquals(e[0],'Query cannot be empty (query number 1)') def test_empty_query_in_command_line(self): cmd = '../bin/q -d , " "' @@ -606,17 +606,50 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) - self.assertEquals(e[0],'Query cannot be empty') + self.assertEquals(e[0],'Query cannot be empty (query number 1)') - def test_too_many_command_line_parameters(self): - cmd = '../bin/q -d , select *' + def test_failure_in_query_stops_processing_queries(self): + cmd = '../bin/q -d , "select 500" "select 300" "wrong-query" "select 8000"' retcode, o, e = run_command(cmd) self.assertEquals(retcode, 1) self.assertEquals(len(e), 1) - self.assertEquals(len(o), 0) + self.assertEquals(len(o), 2) + self.assertEquals(o[0],'500') + self.assertEquals(o[1],'300') - self.assertTrue(e[0].startswith('Must provide query as one parameter')) + def test_multiple_queries_in_command_line(self): + cmd = '../bin/q -d , "select 500" "select 300+100" "select 300" "select 200"' + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode, 0) + self.assertEquals(len(e), 0) + self.assertEquals(len(o), 4) + + self.assertEquals(o[0],'500') + self.assertEquals(o[1],'400') + self.assertEquals(o[2],'300') + self.assertEquals(o[3],'200') + + def test_literal_calculation_query(self): + cmd = '../bin/q -d , "select 1+40/6"' + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode, 0) + self.assertEquals(len(e), 0) + self.assertEquals(len(o), 1) + + self.assertEquals(o[0],'7') + + def test_literal_calculation_query_float_result(self): + cmd = '../bin/q -d , "select 1+40/6.0"' + retcode, o, e = run_command(cmd) + + self.assertEquals(retcode, 0) + self.assertEquals(len(e), 0) + self.assertEquals(len(o), 1) + + self.assertEquals(o[0],'7.66666666667') def test_use_query_file(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) @@ -647,7 +680,7 @@ class BasicTests(AbstractQTestCase): self.assertEquals(len(o),0) self.assertEquals(len(e),1) - self.assertTrue(e[0].startswith('Could not decode query using the provided query encoding')) + self.assertTrue(e[0].startswith('Could not decode query number 1 using the provided query encoding (ascii)')) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) @@ -1872,6 +1905,99 @@ class BasicModuleTests(AbstractQTestCase): self.assertTrue(q_output.status == 'error') self.assertTrue(q_output.error.msg.startswith('query error')) + def test_execute_response(self): + tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + + q = QTextAsData() + + q_output = q.execute("select a,c from %s" % tmpfile.name,QInputParams(skip_header=True)) + + self.assertTrue(q_output.status == 'ok') + self.assertTrue(q_output.error is None) + self.assertEquals(len(q_output.warnings),0) + self.assertEquals(len(q_output.data),2) + self.assertEquals(q_output.data,[ (1,3),(4,6) ]) + self.assertTrue(q_output.metadata is not None) + + metadata = q_output.metadata + + self.assertEquals(metadata.output_column_name_list, [ 'a','c']) + self.assertEquals(len(metadata.data_loads),1) + self.assertEquals(len(metadata.table_structures),1) + + table_structure = metadata.table_structures[0] + + self.assertEquals(table_structure.column_names,[ 'a','b','c']) + self.assertEquals(table_structure.column_types,[ 'int','int','int']) + self.assertEquals(table_structure.filenames_str,tmpfile.name) + self.assertTrue(len(table_structure.materialized_files.keys()),1) + self.assertTrue(table_structure.materialized_files[tmpfile.name].filename,tmpfile.name) + self.assertFalse(table_structure.materialized_files[tmpfile.name].is_stdin) + + self.cleanup(tmpfile) + + def test_analyze_response(self): + tmpfile = self.create_file_with_data("a b c\n1 2 3\n4 5 6") + + q = QTextAsData() + + q_output = q.analyze("select a,c from %s" % tmpfile.name,QInputParams(skip_header=True)) + + self.assertTrue(q_output.status == 'ok') + self.assertTrue(q_output.error is None) + self.assertEquals(len(q_output.warnings),0) + self.assertEquals(len(q_output.data),2) + self.assertEquals(q_output.data,[ (1,3),(4,6) ]) + self.assertTrue(q_output.metadata is not None) + + metadata = q_output.metadata + + self.assertEquals(metadata.output_column_name_list, [ 'a','c']) + self.assertEquals(len(metadata.data_loads),1) + self.assertEquals(len(metadata.table_structures),1) + + table_structure = metadata.table_structures[0] + + self.assertEquals(table_structure.column_names,[ 'a','b','c']) + self.assertEquals(table_structure.column_types,[ 'int','int','int']) + self.assertEquals(table_structure.filenames_str,tmpfile.name) + self.assertTrue(len(table_structure.materialized_files.keys()),1) + self.assertTrue(table_structure.materialized_files[tmpfile.name].filename,tmpfile.name) + self.assertFalse(table_structure.materialized_files[tmpfile.name].is_stdin) + + self.cleanup(tmpfile) + + def test_load_data_from_string(self): + input_str = 'column1,column2,column3\n' + '\n'.join(['value1,2.5,value3'] * 1000) + + q = QTextAsData() + + q.load_data_from_string('my_data',input_str,QInputParams(skip_header=True,delimiter=',')) + + q_output = q.execute('select column2,column3 from my_data') + + self.assertTrue(q_output.status == 'ok') + self.assertTrue(q_output.error is None) + self.assertEquals(len(q_output.warnings),0) + self.assertTrue(len(q_output.data),1000) + self.assertEquals(len(set(q_output.data)),1) + self.assertEquals(list(set(q_output.data))[0],(2.5,'value3')) + + metadata = q_output.metadata + + self.assertTrue(metadata.output_column_name_list,['column2','column3']) + self.assertEquals(len(metadata.data_loads),0) + self.assertTrue(len(metadata.table_structures),1) + + table_structure = metadata.table_structures[0] + + self.assertEquals(table_structure.column_names,['column1','column2','column3']) + self.assertEquals(table_structure.column_types,['text','float','text']) + self.assertEquals(table_structure.filenames_str,'my_data') + self.assertTrue(len(table_structure.materialized_files.keys()),1) + self.assertTrue(table_structure.materialized_files['my_data'].filename,'my_data') + self.assertTrue(table_structure.materialized_files['my_data'].is_stdin) + def suite(): tl = unittest.TestLoader() -- cgit v1.2.3