#!/usr/bin/env python # # # Simplistic test suite for q. # # Currently takes into account the project folder structure for running, so it needs # to be executed from the current folder # # import unittest import pprint from subprocess import PIPE, Popen, STDOUT import sys import os import time from tempfile import NamedTemporaryFile import locale # q uses this encoding as the default output encoding. Some of the tests use it in order to # make sure that the output is correctly encoded SYSTEM_ENCODING = locale.getpreferredencoding() def run_command(cmd_to_run): p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True) o, e = p.communicate() # remove last newline o = o.strip() e = e.strip() # split rows if o != '': o = o.split(os.linesep) else: o = [] if e != '': e = e.split(os.linesep) else: e = [] return (p.returncode, o, e) uneven_ls_output = """drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt drwxr-xr-x 2 root root 4096 Apr 24 2013 /srv drwx------ 2 root root 16384 Jun 21 2013 /lost+found lrwxrwxrwx 1 root root 33 Jun 21 2013 /initrd.img.old -> /boot/initrd.img-3.8.0-19-generic drwxr-xr-x 2 root root 4096 Jun 21 2013 /cdrom drwxr-xr-x 3 root root 4096 Jun 21 2013 /home lrwxrwxrwx 1 root root 29 Jun 21 2013 /vmlinuz -> boot/vmlinuz-3.8.0-19-generic lrwxrwxrwx 1 root root 32 Jun 21 2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic """ find_output = """8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp 8299123 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576 8263229 964 -rw-rw-r-- 1 mapred mapred 984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser 8263230 4 -rw-rw-r-- 1 harel harel 1223 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormconf.ser 8299113 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate 8263406 4 -rw-rw-r-- 1 harel harel 2002 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514168746 8263476 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514168746.version 8263607 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514169735.version 8263533 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514172733.version 8263604 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version """ header_row = 'name,value1,value2' sample_data_rows = ['a,1,0', 'b,2,0', 'c,,0'] sample_data_rows_with_empty_string = ['a,aaa,0', 'b,bbb,0', 'c,,0'] sample_data_no_header = "\n".join(sample_data_rows) + "\n" sample_data_with_empty_string_no_header = "\n".join( sample_data_rows_with_empty_string) + "\n" sample_data_with_header = header_row + "\n" + sample_data_no_header sample_data_with_missing_header_names = "name,value1\n" + sample_data_no_header # Values with leading whitespace sample_data_rows_with_spaces = ['a,1,0', ' b, 2,0', 'c,,0'] sample_data_with_spaces_no_header = "\n".join( sample_data_rows_with_spaces) + "\n" header_row_with_spaces = 'name,value 1,value2' sample_data_with_spaces_with_header = header_row_with_spaces + \ "\n" + sample_data_with_spaces_no_header long_value1 = "23683289372328372328373" int_value = "2328372328373" sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value) def one_column_warning(e): return e[0].startswith('Warning: column count is one') class AbstractQTestCase(unittest.TestCase): def create_file_with_data(self, data, encoding='utf-8'): tmpfile = NamedTemporaryFile(delete=False) if encoding != 'none' and encoding is not None: tmpfile.write(data.encode(encoding)) else: tmpfile.write(data) tmpfile.close() return tmpfile def cleanup(self, tmpfile): os.remove(tmpfile.name) class BasicTests(AbstractQTestCase): def test_basic_aggregation(self): retcode, o, e = run_command( 'seq 1 10 | ../bin/q "select sum(c1),avg(c1) from -"') self.assertTrue(retcode == 0) self.assertTrue(len(o) == 1) self.assertTrue(len(e) == 1) s = sum(xrange(1, 11)) self.assertTrue(o[0] == '%s %s' % (s, s / 10.0)) self.assertTrue(one_column_warning(e)) def test_gzipped_file(self): tmpfile = self.create_file_with_data( '\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00', encoding='none') cmd = '../bin/q -z "select sum(c1),avg(c1) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertTrue(retcode == 0) self.assertTrue(len(o) == 1) self.assertTrue(len(e) == 1) s = sum(xrange(1, 11)) self.assertTrue(o[0] == '%s %s' % (s, s / 10.0)) self.assertTrue(one_column_warning(e)) self.cleanup(tmpfile) def test_delimition_mistake_with_header(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d " " "select * from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 3) self.assertTrue(e[0].startswith( "Warning: column count is one - did you provide the correct delimiter")) self.assertTrue(e[1].startswith("Bad header row")) self.assertTrue("Column name cannot contain commas" in e[2]) self.cleanup(tmpfile) def test_regexp_int_data_handling(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) self.assertEquals(o[0],"1") self.cleanup(tmpfile) def test_regexp_null_data_handling(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) self.assertEquals(o[0],"2") self.cleanup(tmpfile) def test_select_one_column(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(" ".join(o), 'a b c') self.cleanup(tmpfile) def test_tab_delimition_parameter(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(",", "\t")) cmd = '../bin/q -t "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t")) self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t")) self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t")) self.cleanup(tmpfile) def test_tab_delimition_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data( sample_data_no_header.replace(",", "\t")) cmd = '../bin/q -t -d , "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t")) self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t")) self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t")) self.cleanup(tmpfile) def test_output_delimiter(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , -D "|" "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], sample_data_rows[0].replace(",", "|")) self.assertEquals(o[1], sample_data_rows[1].replace(",", "|")) self.assertEquals(o[2], sample_data_rows[2].replace(",", "|")) self.cleanup(tmpfile) def test_output_delimiter_tab_parameter(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , -T "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t")) self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t")) self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t")) self.cleanup(tmpfile) def test_output_delimiter_tab_parameter__with_manual_override_attempt(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , -T -D "|" "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], sample_data_rows[0].replace(",", "\t")) self.assertEquals(o[1], sample_data_rows[1].replace(",", "\t")) self.assertEquals(o[2], sample_data_rows[2].replace(",", "\t")) self.cleanup(tmpfile) def test_stdin_input(self): cmd = 'printf "%s" | ../bin/q -d , "select c1,c2,c3 from -"' % sample_data_no_header retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], sample_data_rows[0]) self.assertEquals(o[1], sample_data_rows[1]) self.assertEquals(o[2], sample_data_rows[2]) def test_column_separation(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select c1,c2,c3 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], sample_data_rows[0]) self.assertEquals(o[1], sample_data_rows[1]) self.assertEquals(o[2], sample_data_rows[2]) self.cleanup(tmpfile) def test_column_analysis(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) self.assertEquals(o[1].strip(), '`c1` - text') self.assertEquals(o[2].strip(), '`c2` - int') self.assertEquals(o[3].strip(), '`c3` - int') self.cleanup(tmpfile) def test_column_analysis_no_header(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) self.assertEquals(o[1].strip(), '`c1` - text') self.assertEquals(o[2].strip(), '`c2` - int') self.assertEquals(o[3].strip(), '`c3` - int') def test_header_exception_on_numeric_header_data(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select * from %s" -A -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 3) self.assertTrue( 'Bad header row: Header must contain only strings' in e[0]) self.assertTrue("Column name must be a string" in e[1]) self.assertTrue("Column name must be a string" in e[2]) self.cleanup(tmpfile) def test_column_analysis_with_header(self): tmpfile = self.create_file_with_data(sample_data_with_header) cmd = '../bin/q -d , "select c1 from %s" -A -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) self.assertEquals(o[1].strip(), '`name` - text') self.assertEquals(o[2].strip(), '`value1` - int') self.assertEquals(o[3].strip(), '`value2` - int') self.cleanup(tmpfile) def test_data_with_header(self): tmpfile = self.create_file_with_data(sample_data_with_header) cmd = '../bin/q -d , "select name from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(" ".join(o), "a b c") self.cleanup(tmpfile) def test_output_header_when_input_header_exists(self): tmpfile = self.create_file_with_data(sample_data_with_header) cmd = '../bin/q -d , "select name from %s" -H -O' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 4) self.assertEquals(o[0],'name') self.assertEquals(o[1],'a') self.assertEquals(o[2],'b') self.assertEquals(o[3],'c') self.cleanup(tmpfile) def test_generated_column_name_warning_when_header_line_exists(self): tmpfile = self.create_file_with_data(sample_data_with_header) cmd = '../bin/q -d , "select c3 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 2) self.assertTrue('no such column: c3' in e[0]) self.assertEquals( e[1], 'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names') self.cleanup(tmpfile) def test_column_analysis_with_unexpected_header(self): tmpfile = self.create_file_with_data(sample_data_with_header) cmd = '../bin/q -d , "select c1 from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 4) self.assertEquals(len(e), 1) self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) self.assertEquals(o[1].strip(), '`c1` - text') self.assertEquals(o[2].strip(), '`c2` - text') self.assertEquals(o[3].strip(), '`c3` - text') self.assertEquals( e[0], 'Warning - There seems to be header line in the file, but -H has not been specified. All fields will be detected as text fields, and the header line will appear as part of the data') self.cleanup(tmpfile) def test_empty_data(self): tmpfile = self.create_file_with_data('') cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) self.assertTrue('Warning - data is empty' in e[0]) self.cleanup(tmpfile) def test_empty_data_with_header_param(self): tmpfile = self.create_file_with_data('') cmd = '../bin/q -d , "select c1 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) self.assertTrue('Warning - data is empty' in e[0]) self.cleanup(tmpfile) def test_one_row_of_data_without_header_param(self): tmpfile = self.create_file_with_data(header_row) cmd = '../bin/q -d , "select c2 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) self.assertEquals(o[0], 'value1') self.cleanup(tmpfile) def test_one_row_of_data_with_header_param(self): tmpfile = self.create_file_with_data(header_row) cmd = '../bin/q -d , "select c2 from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) self.assertTrue('Warning - data is empty' in e[0]) self.cleanup(tmpfile) def test_dont_leading_keep_whitespace_in_values(self): tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) self.assertEquals(o[0], 'a') self.assertEquals(o[1], 'b') self.assertEquals(o[2], 'c') self.cleanup(tmpfile) def test_keep_leading_whitespace_in_values(self): tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) cmd = '../bin/q -d , "select c1 from %s" -k' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) self.assertEquals(o[0], 'a') self.assertEquals(o[1], ' b') self.assertEquals(o[2], 'c') self.cleanup(tmpfile) def test_no_impact_of_keeping_leading_whitespace_on_integers(self): tmpfile = self.create_file_with_data(sample_data_with_spaces_no_header) cmd = '../bin/q -d , "select c2 from %s" -k -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 4) self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) self.assertEquals(o[1].strip(), '`c1` - text') self.assertEquals(o[2].strip(), '`c2` - int') self.assertEquals(o[3].strip(), '`c3` - int') self.cleanup(tmpfile) def test_spaces_in_header_row(self): tmpfile = self.create_file_with_data( header_row_with_spaces + "\n" + sample_data_no_header) cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) self.assertEquals(o[0], 'a,1') self.assertEquals(o[1], 'b,2') self.assertEquals(o[2], 'c,') self.cleanup(tmpfile) def test_column_analysis_for_spaces_in_header_row(self): tmpfile = self.create_file_with_data( header_row_with_spaces + "\n" + sample_data_no_header) cmd = '../bin/q -d , "select name,\`value 1\` from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 4) self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) self.assertEquals(o[1].strip(), '`name` - text') self.assertEquals(o[2].strip(), '`value 1` - int') self.assertEquals(o[3].strip(), '`value2` - int') self.cleanup(tmpfile) def test_no_query_in_command_line(self): cmd = '../bin/q -d , ""' retcode, o, e = run_command(cmd) self.assertEquals(retcode, 1) self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) self.assertEquals(e[0],'Query cannot be empty') def test_empty_query_in_command_line(self): cmd = '../bin/q -d , " "' retcode, o, e = run_command(cmd) self.assertEquals(retcode, 1) self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) self.assertEquals(e[0],'Query cannot be empty') def test_too_many_command_line_parameters(self): cmd = '../bin/q -d , select *' retcode, o, e = run_command(cmd) self.assertEquals(retcode, 1) self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) self.assertTrue(e[0].startswith('Must provide query as one parameter')) def test_use_query_file(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data("select name from %s" % tmp_data_file.name) cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) self.assertEquals(o[0], 'a') self.assertEquals(o[1], 'b') self.assertEquals(o[2], 'c') self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) def test_use_query_file_with_incorrect_query_encoding(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None) cmd = '../bin/q -d , -q %s -H -Q ascii' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEquals(retcode,3) self.assertEquals(len(o),0) self.assertEquals(len(e),1) self.assertTrue(e[0].startswith('Could not decode query using the provided query encoding')) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) def test_output_header_with_non_ascii_names(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name,encoding=None) cmd = '../bin/q -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEquals(retcode,0) self.assertEquals(len(o),4) self.assertEquals(len(e),0) self.assertEquals(o[0].decode(SYSTEM_ENCODING), u'name,Hr\xe1\u010d') self.assertEquals(o[1].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d') self.assertEquals(o[2].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d') self.assertEquals(o[3].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d') self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) def test_use_query_file_with_query_encoding(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None) cmd = '../bin/q -d , -q %s -H -Q utf-8' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) self.assertEquals(o[0].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d') self.assertEquals(o[1].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d') self.assertEquals(o[2].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d') self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) def test_use_query_file_and_command_line(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data("select name from %s" % tmp_data_file.name) cmd = '../bin/q -d , -q %s -H "select * from ppp"' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 1) self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) self.assertTrue(e[0].startswith("Can't provide both a query file and a query on the command line")) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) def test_select_output_encoding(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None) for target_encoding in ['utf-8','ibm852']: cmd = '../bin/q -d , -q %s -H -Q utf-8 -E %s' % (tmp_query_file.name,target_encoding) retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) self.assertEquals(o[0].decode(target_encoding), u'Hr\xe1\u010d') self.assertEquals(o[1].decode(target_encoding), u'Hr\xe1\u010d') self.assertEquals(o[2].decode(target_encoding), u'Hr\xe1\u010d') self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) def test_select_failed_output_encoding(self): tmp_data_file = self.create_file_with_data(sample_data_with_header) tmp_query_file = self.create_file_with_data("select 'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None) cmd = '../bin/q -d , -q %s -H -Q utf-8 -E ascii' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 3) self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) self.assertTrue(e[0].startswith('Cannot encode data')) self.cleanup(tmp_data_file) self.cleanup(tmp_query_file) def test_use_query_file_with_empty_query(self): tmp_query_file = self.create_file_with_data(" ") cmd = '../bin/q -d , -q %s -H' % tmp_query_file.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 1) self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) self.assertTrue(e[0].startswith("Query cannot be empty")) self.cleanup(tmp_query_file) def test_use_non_existent_query_file(self): cmd = '../bin/q -d , -q non-existent-query-file -H' retcode, o, e = run_command(cmd) self.assertEquals(retcode, 1) self.assertEquals(len(e), 1) self.assertEquals(len(o), 0) self.assertTrue(e[0].startswith("Could not read query from file")) class ParsingModeTests(AbstractQTestCase): def test_strict_mode_column_count_mismatch_error(self): tmpfile = self.create_file_with_data(uneven_ls_output) cmd = '../bin/q -m strict "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) self.assertTrue("Column Count is expected to identical" in e[0]) self.cleanup(tmpfile) def test_strict_mode_too_large_specific_column_count(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , -m strict -c 4 "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) self.assertEquals( e[0], "Strict mode. Column count is expected to be 4 but is 3") self.cleanup(tmpfile) def test_strict_mode_too_small_specific_column_count(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , -m strict -c 2 "select count(*) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) self.assertEquals( e[0], "Strict mode. Column count is expected to be 2 but is 3") self.cleanup(tmpfile) def test_relaxed_mode_missing_columns_in_header(self): tmpfile = self.create_file_with_data( sample_data_with_missing_header_names) cmd = '../bin/q -d , -m relaxed "select count(*) from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 4) self.assertEquals(len(e), 0) self.assertEquals(o[0], 'Table for file: %s' % tmpfile.name) self.assertEquals(o[1].strip(), '`name` - text') self.assertEquals(o[2].strip(), '`value1` - int') self.assertEquals(o[3].strip(), '`c3` - int') self.cleanup(tmpfile) def test_strict_mode_missing_columns_in_header(self): tmpfile = self.create_file_with_data( sample_data_with_missing_header_names) cmd = '../bin/q -d , -m strict "select count(*) from %s" -H -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertNotEquals(retcode, 0) self.assertEquals(len(o), 0) self.assertEquals(len(e), 1) self.assertEquals( e[0], 'Strict mode. Header row contains less columns than expected column count(2 vs 3)') self.cleanup(tmpfile) def test_output_delimiter_with_missing_fields(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select * from %s" -D ";"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], 'a;1;0') self.assertEquals(o[1], 'b;2;0') self.assertEquals(o[2], 'c;;0') self.cleanup(tmpfile) def test_handling_of_null_integers(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select avg(c2) from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) self.assertEquals(o[0], '1.5') self.cleanup(tmpfile) def test_empty_integer_values_converted_to_null(self): tmpfile = self.create_file_with_data(sample_data_no_header) cmd = '../bin/q -d , "select * from %s where c2 is null"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) self.assertEquals(o[0], 'c,,0') self.cleanup(tmpfile) def test_empty_string_values_not_converted_to_null(self): tmpfile = self.create_file_with_data( sample_data_with_empty_string_no_header) cmd = '../bin/q -d , "select * from %s where c2 == %s"' % ( tmpfile.name, "''") retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 1) self.assertEquals(len(e), 0) self.assertEquals(o[0], 'c,,0') self.cleanup(tmpfile) def test_relaxed_mode_detected_columns(self): tmpfile = self.create_file_with_data(uneven_ls_output) cmd = '../bin/q -m relaxed "select count(*) from %s" -A' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) table_name_row = o[0] column_rows = o[1:] self.assertEquals(len(column_rows), 11) column_tuples = [x.strip().split(" ") for x in column_rows] column_info = [(x[0], x[2]) for x in column_tuples] column_names = [x[0] for x in column_tuples] column_types = [x[2] for x in column_tuples] self.assertEquals(column_names, ['`c%s`' % x for x in xrange(1, 12)]) self.assertEquals(column_types, [ 'text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text', 'text', 'text']) self.cleanup(tmpfile) def test_relaxed_mode_detected_columns_with_specific_column_count(self): tmpfile = self.create_file_with_data(uneven_ls_output) cmd = '../bin/q -m relaxed "select count(*) from %s" -A -c 9' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) table_name_row = o[0] column_rows = o[1:] self.assertEquals(len(column_rows), 9) column_tuples = [x.strip().split(" ") for x in column_rows] column_info = [(x[0], x[2]) for x in column_tuples] column_names = [x[0] for x in column_tuples] column_types = [x[2] for x in column_tuples] self.assertEquals(column_names, ['`c%s`' % x for x in xrange(1, 10)]) self.assertEquals( column_types, ['text', 'int', 'text', 'text', 'int', 'text', 'int', 'int', 'text']) self.cleanup(tmpfile) def test_relaxed_mode_last_column_data_with_specific_column_count(self): tmpfile = self.create_file_with_data(uneven_ls_output) cmd = '../bin/q -m relaxed "select c9 from %s" -c 9' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 9) self.assertEquals(len(e), 0) expected_output = ["/selinux", "/mnt", "/srv", "/lost+found", "/initrd.img.old -> /boot/initrd.img-3.8.0-19-generic", "/cdrom", "/home", "/vmlinuz -> boot/vmlinuz-3.8.0-19-generic", "/initrd.img -> boot/initrd.img-3.8.0-19-generic"] self.assertEquals(o, expected_output) self.cleanup(tmpfile) def test_fluffy_mode(self): tmpfile = self.create_file_with_data(uneven_ls_output) cmd = '../bin/q -m fluffy "select c9 from %s"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 9) self.assertEquals(len(e), 0) expected_output = ["/selinux", "/mnt", "/srv", "/lost+found", "/initrd.img.old", "/cdrom", "/home", "/vmlinuz", "/initrd.img"] self.assertEquals(o, expected_output) self.cleanup(tmpfile) class FormattingTests(AbstractQTestCase): def test_column_formatting(self): cmd = 'seq 1 10 | ../bin/q -f 1=%4.3f,2=%4.3f "select sum(c1),avg(c1) from -"' retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 1) self.assertEquals(o[0], '55.000 5.500') class SqlTests(AbstractQTestCase): def test_find_example(self): tmpfile = self.create_file_with_data(find_output) cmd = '../bin/q "select c5,c6,sum(c7)/1024.0/1024 as total from %s group by c5,c6 order by total desc"' % tmpfile.name retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(len(e), 0) self.assertEquals(o[0], 'mapred mapred 0.93895816803') self.assertEquals(o[1], 'root root 0.02734375') self.assertEquals(o[2], 'harel harel 0.0108880996704') self.cleanup(tmpfile) def test_join_example(self): cmd = '../bin/q "select myfiles.c8,emails.c2 from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"' retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 2) self.assertEquals(o[0], 'ppp dip.1@otherdomain.com') self.assertEquals(o[1], 'ppp dip.2@otherdomain.com') def test_join_example_with_output_header(self): cmd = '../bin/q -O "select myfiles.c8 aaa,emails.c2 bbb from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"' retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(o), 3) self.assertEquals(o[0], 'aaa bbb') self.assertEquals(o[1], 'ppp dip.1@otherdomain.com') self.assertEquals(o[2], 'ppp dip.2@otherdomain.com') def test_self_join1(self): tmpfile = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)"' % (tmpfile.name,tmpfile.name) retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 10) self.cleanup(tmpfile) def test_self_join_reuses_table(self): tmpfile = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c1 = a2.c1)" -A' % (tmpfile.name,tmpfile.name) retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 3) self.assertEquals(o[0],'Table for file: %s' % tmpfile.name) self.assertEquals(o[1],' `c1` - int') self.assertEquals(o[2],' `c2` - int') self.cleanup(tmpfile) def test_self_join2(self): tmpfile1 = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2)"' % (tmpfile1.name,tmpfile1.name) retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 10*10) self.cleanup(tmpfile1) tmpfile2 = self.create_file_with_data("\n".join(["%s 9000" % i for i in range(0,10)])) cmd = '../bin/q "select * from %s a1 join %s a2 on (a1.c2 = a2.c2) join %s a3 on (a1.c2 = a3.c2)"' % (tmpfile2.name,tmpfile2.name,tmpfile2.name) retcode, o, e = run_command(cmd) self.assertEquals(retcode, 0) self.assertEquals(len(e), 0) self.assertEquals(len(o), 10*10*10) self.cleanup(tmpfile2) def suite(): tl = unittest.TestLoader() basic_stuff = tl.loadTestsFromTestCase(BasicTests) parsing_mode = tl.loadTestsFromTestCase(ParsingModeTests) sql = tl.loadTestsFromTestCase(SqlTests) formatting = tl.loadTestsFromTestCase(FormattingTests) return unittest.TestSuite([basic_stuff, parsing_mode, sql, formatting]) if __name__ == '__main__': unittest.TextTestRunner(verbosity=2).run(suite())