summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarel Ben-Attia <harelba@gmail.com>2018-12-21 18:19:48 +0200
committerGitHub <noreply@github.com>2018-12-21 18:19:48 +0200
commit28f776ed464c8c125995485cf40f037128354061 (patch)
treecb3d0b635d221eec00cc9b8a5a6d71b535bfd4e7
parentaeb9dae530372b8c7498e5906dac15778f3cab9d (diff)
parent746cb9f67657c9d484befbe15862d788b18be876 (diff)
Merge pull request #195 from harelba/more-py3-stuff2.0.2_test12.0.1-test12.0.0
Python 2+3 Compatibility
-rw-r--r--.travis.yml8
-rwxr-xr-xbin/q180
-rw-r--r--requirements.txt2
-rwxr-xr-xtest/test-all2
-rwxr-xr-xtest/test-suite1705
5 files changed, 1023 insertions, 874 deletions
diff --git a/.travis.yml b/.travis.yml
index 77becd8..402d9e1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,8 +3,12 @@ python:
- "2.7"
- "3.6"
matrix:
+ include:
+ - python: "3.7"
+ dist: xenial # Need for python 3.7
allow_failures:
- python: "3.6"
-install: pip install flake8
-before_script: flake8 ./bin/q --count --select=E901,E999,F821,F822,F823 --show-source --statistics
+ - python: "3.7"
+install: pip install -r requirements.txt
+before_script: flake8 ./bin/q ./test/test-suite --count --select=E901,E999,F821,F822,F823 --show-source --statistics
script: test/test-all
diff --git a/bin/q b/bin/q
index 4a555c9..507991d 100755
--- a/bin/q
+++ b/bin/q
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
# Copyright (C) 2012-2018 Harel Ben-Attia
#
@@ -27,7 +27,10 @@
#
# Run with --help for command line details
#
+from __future__ import absolute_import
+from __future__ import division
from __future__ import print_function
+
q_version = "1.8"
__all__ = [ 'QTextAsData' ]
@@ -43,13 +46,18 @@ import codecs
import locale
import time
import re
-from ConfigParser import ConfigParser
+from six.moves import configparser, range, filter
import traceback
import csv
import hashlib
import uuid
-import cStringIO
import math
+import six
+import io
+
+if six.PY3:
+ long = int
+ unicode = six.text_type
DEBUG = False
@@ -129,8 +137,7 @@ class Sqlite3DB(object):
for s in self.conn.iterdump():
c.execute(s)
results = c.fetchall()
- #print "executed %s results %s " % (s,results)
- for source_filename_str,tn in table_names_mapping.iteritems():
+ for source_filename_str,tn in six.iteritems(table_names_mapping):
c.execute('alter table `%s` rename to `%s`' % (tn, source_filename_str))
new_db.close()
@@ -179,7 +186,10 @@ class Sqlite3DB(object):
self.cursor.execute(q)
if self.cursor.description is not None:
# we decode the column names, so they can be encoded to any output format later on
- query_column_names = [c[0].decode('utf-8') for c in self.cursor.description]
+ if six.PY2:
+ query_column_names = [unicode(c[0],'utf-8') for c in self.cursor.description]
+ else:
+ query_column_names = [c[0] for c in self.cursor.description]
else:
query_column_names = None
result = self.cursor.fetchall()
@@ -222,7 +232,7 @@ class Sqlite3DB(object):
def generate_create_table(self, table_name, column_names, column_dict):
# Convert dict from python types to db types
column_name_to_db_type = dict(
- (n, self.type_names[t]) for n, t in column_dict.iteritems())
+ (n, self.type_names[t]) for n, t in six.iteritems(column_dict))
column_defs = ','.join(['"%s" %s' % (
n.replace('"', '""'), column_name_to_db_type[n]) for n in column_names])
return 'CREATE TABLE %s (%s)' % (table_name, column_defs)
@@ -418,12 +428,12 @@ class Sql(object):
qtable_name] = effective_table_name
def get_effective_sql(self,original_names=False):
- if len(filter(lambda x: x is None, self.qtable_name_effective_table_names)) != 0:
+ if len(list(filter(lambda x: x is None, self.qtable_name_effective_table_names))) != 0:
raise Exception('There are qtables without effective tables')
effective_sql = [x for x in self.sql_parts]
- for qtable_name, positions in self.qtable_name_positions.iteritems():
+ for qtable_name, positions in six.iteritems(self.qtable_name_positions):
for pos in positions:
if not original_names:
effective_sql[pos] = self.qtable_name_effective_table_names[
@@ -525,8 +535,8 @@ class TableColumnInferer(object):
return type_list[0]
else:
# check for the number of types without nulls,
- type_list_without_nulls = filter(
- lambda x: x is not None, type_list)
+ type_list_without_nulls = list(filter(
+ lambda x: x is not None, type_list))
# If all the sample lines are of the same type,
if len(set(type_list_without_nulls)) == 1:
# return it
@@ -579,7 +589,7 @@ class TableColumnInferer(object):
(v, "Column name must be UTF-8 Compatible"))
continue
# We're checking for column duplication for each field in order to be able to still provide it along with other errors
- if len(filter(lambda x: x == v,value_list)) > 1:
+ if len(list(filter(lambda x: x == v,value_list))) > 1:
entry = (v, "Column name is duplicated")
# Don't duplicate the error report itself
if entry not in column_name_errors:
@@ -611,7 +621,7 @@ class TableColumnInferer(object):
# in relaxed mode, add columns to fill the missing ones
self.header_row = self.header_row + \
['c%s' % (x + len(self.header_row) + 1)
- for x in xrange(self.column_count - len(self.header_row))]
+ for x in range(self.column_count - len(self.header_row))]
elif len(self.header_row) > self.column_count:
if self.mode == 'strict':
raise ColumnCountMismatchException("Strict mode. Header row contains more columns than expected column count (%s vs %s)" % (
@@ -641,7 +651,7 @@ class TableColumnInferer(object):
counts = {}
for column_count in column_count_list:
counts[column_count] = counts.get(column_count, 0) + 1
- return ", ".join(["%s rows with %s columns" % (v, k) for k, v in counts.iteritems()])
+ return six.u(", ").join([six.u("{} rows with {} columns".format(v, k)) for k, v in six.iteritems(counts)])
def _do_strict_analysis(self):
column_count_list = [len(col_vals) for col_vals in self.rows]
@@ -661,7 +671,7 @@ class TableColumnInferer(object):
def infer_column_types(self):
self.column_types = []
self.column_types2 = []
- for column_number in xrange(self.column_count):
+ for column_number in range(self.column_count):
column_value_list = [
row[column_number] if column_number < len(row) else None for row in self.rows]
column_type = self.determine_type_of_value_list(column_value_list)
@@ -693,7 +703,27 @@ class TableColumnInferer(object):
return self.column_types
-def encoded_csv_reader(encoding, f, dialect, **kwargs):
+def py3_encoded_csv_reader(encoding, f, dialect, is_stdin,**kwargs):
+ try:
+ csv_reader = csv.reader(f, dialect, **kwargs)
+
+ for row in csv_reader:
+ yield row
+ except ValueError as e:
+ if e.message is not None and e.message.startswith('could not convert string to'):
+ raise CouldNotConvertStringToNumericValueException(e.message)
+ else:
+ raise CouldNotParseInputException(str(e))
+ except Exception as e:
+ if str(e).startswith("field larger than field limit"):
+ raise ColumnMaxLengthLimitExceededException(str(e))
+ elif 'universal-newline' in str(e):
+ raise UniversalNewlinesExistException()
+ else:
+ raise
+
+
+def py2_encoded_csv_reader(encoding, f, dialect, is_stdin, **kwargs):
try:
csv_reader = csv.reader(f, dialect, **kwargs)
if encoding is not None and encoding != 'none':
@@ -715,6 +745,11 @@ def encoded_csv_reader(encoding, f, dialect, **kwargs):
else:
raise
+if six.PY2:
+ encoded_csv_reader = py2_encoded_csv_reader
+else:
+ encoded_csv_reader = py3_encoded_csv_reader
+
def normalized_filename(filename):
if filename == '-':
return 'stdin'
@@ -743,12 +778,16 @@ class MaterializedFileState(object):
# multiple people.
if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom:
try:
- BOM = self.f.read(3)
- if BOM != '\xef\xbb\xbf':
+ if six.PY2:
+ BOM = self.f.read(3)
+ else:
+ BOM = self.f.buffer.read(3)
+
+ if BOM != six.b('\xef\xbb\xbf'):
raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM))
except Exception as e:
raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e))
- csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect)
+ csv_reader = encoded_csv_reader(self.encoding, self.f, is_stdin=self.is_stdin,dialect=self.dialect)
try:
for col_vals in csv_reader:
self.lines_read += 1
@@ -838,13 +877,19 @@ class TableCreator(object):
raise CannotUnzipStdInException()
else:
if self.gzipped or filename.endswith('.gz'):
- f = gzip.GzipFile(fileobj=file(filename,'rb'))
+ f = codecs.iterdecode(gzip.GzipFile(fileobj=io.open(filename,'rb')),encoding=self.encoding)
else:
- if self.with_universal_newlines:
- file_opening_mode = 'rbU'
+ if six.PY3:
+ if self.with_universal_newlines:
+ f = io.open(filename, 'rU',newline=None,encoding=self.encoding)
+ else:
+ f = io.open(filename, 'r', newline=None, encoding=self.encoding)
else:
- file_opening_mode = 'rb'
- f = file(filename,file_opening_mode)
+ if self.with_universal_newlines:
+ file_opening_mode = 'rbU'
+ else:
+ file_opening_mode = 'rb'
+ f = open(filename, file_opening_mode)
return f
def _pre_populate(self,dialect):
@@ -975,7 +1020,7 @@ class TableCreator(object):
if actual_col_count < expected_col_count:
col_vals = col_vals + \
- [None for x in xrange(expected_col_count - actual_col_count)]
+ [None for x in range(expected_col_count - actual_col_count)]
# in relaxed mode, we merge all extra columns to the last column value
if self.mode == 'relaxed':
@@ -1066,10 +1111,11 @@ class TableCreator(object):
def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter):
if len(m) == 0:
return []
- max_lengths = [0 for x in xrange(0, len(m[0]))]
- for row_index in xrange(0, len(m)):
- for col_index in xrange(0, len(m[0])):
- new_len = len(unicode(output_field_quoting_func(output_delimiter,m[row_index][col_index])))
+ max_lengths = [0 for x in range(0, len(m[0]))]
+ for row_index in range(0, len(m)):
+ for col_index in range(0, len(m[0])):
+ # TODO Optimize this and make sure that py2 hack of float precision is applied here as well
+ new_len = len("{}".format(output_field_quoting_func(output_delimiter,m[row_index][col_index])))
if new_len > max_lengths[col_index]:
max_lengths[col_index] = new_len
return max_lengths
@@ -1272,7 +1318,7 @@ class QTextAsData(object):
self._load_data(filename,input_params,stop_after_analysis=stop_after_analysis)
def load_data_from_string(self,filename,str_data,input_params=QInputParams(),stop_after_analysis=False):
- sf = cStringIO.StringIO(str_data)
+ sf = six.StringIO(str_data)
try:
self._load_data(filename,input_params,stdin_file=sf,stdin_filename=filename,stop_after_analysis=stop_after_analysis)
finally:
@@ -1381,6 +1427,8 @@ class QTextAsData(object):
except KeyboardInterrupt as e:
warnings.append(QWarning(e,"Interrupted"))
except Exception as e:
+ if DEBUG:
+ print(traceback.format_exc())
error = QError(e,repr(e),199)
return QOutput(warnings = warnings,error = error , metadata=QMetadata(table_structures=table_structures,data_loads = data_loads))
@@ -1390,7 +1438,7 @@ class QTextAsData(object):
def unload(self):
- for filename,table_creator in self.table_creators.iteritems():
+ for filename,table_creator in six.iteritems(self.table_creators):
try:
table_creator.drop_table()
except:
@@ -1401,13 +1449,13 @@ class QTextAsData(object):
def _create_materialized_files(self,table_creator):
d = table_creator.materialized_file_dict
m = {}
- for filename,mfs in d.iteritems():
+ for filename,mfs in six.iteritems(d):
m[filename] = QMaterializedFile(filename,mfs.is_stdin)
return m
def _create_table_structures_list(self):
table_structures = []
- for filename,table_creator in self.table_creators.iteritems():
+ for filename,table_creator in six.iteritems(self.table_creators):
column_names = table_creator.column_inferer.get_column_names()
column_types = [self.db.type_names[table_creator.column_inferer.get_column_dict()[k]].lower() for k in column_names]
materialized_files = self._create_materialized_files(table_creator)
@@ -1421,7 +1469,7 @@ class QTextAsData(object):
return q_output
def escape_double_quotes_if_needed(v):
- x = v.replace('"','""')
+ x = v.replace(six.u('"'), six.u('""'))
return x
def quote_none_func(output_delimiter,v):
@@ -1431,22 +1479,22 @@ def quote_minimal_func(output_delimiter,v):
if v is None:
return v
t = type(v)
- if t == str or t == unicode and ((output_delimiter in v) or ('"' in v)):
- return '"%s"' % (escape_double_quotes_if_needed(v))
- return v;
+ if (t == str or t == unicode) and ((output_delimiter in v) or (six.u('"') in v)):
+ return six.u('"{}"').format(escape_double_quotes_if_needed(v))
+ return v
def quote_nonnumeric_func(output_delimiter,v):
if v is None:
return v
if type(v) == str or type(v) == unicode:
- return '"%s"' % (escape_double_quotes_if_needed(v))
- return v;
+ return six.u('"{}"').format(escape_double_quotes_if_needed(v))
+ return v
def quote_all_func(output_delimiter,v):
if type(v) == str or type(v) == unicode:
- return '"%s"' % (escape_double_quotes_if_needed(v))
+ return six.u('"{}"').format(escape_double_quotes_if_needed(v))
else:
- return '"%s"' % v
+ return six.u('"{}"').format(v)
class QOutputParams(object):
def __init__(self,
@@ -1454,12 +1502,14 @@ class QOutputParams(object):
beautify=False,
output_quoting_mode='minimal',
formatting=None,
- output_header=False):
+ output_header=False,
+ encoding=None):
self.delimiter = delimiter
self.beautify = beautify
self.output_quoting_mode = output_quoting_mode
self.formatting = formatting
self.output_header = output_header
+ self.encoding = encoding
def __str__(self):
return "QOutputParams<%s>" % str(self.__dict__)
@@ -1539,7 +1589,7 @@ class QOutputPrinter(object):
formatting_dict = dict(
[(x.split("=")[0], x.split("=")[1]) for x in self.output_params.formatting.split(",")])
else:
- formatting_dict = None
+ formatting_dict = {}
try:
if self.output_params.output_header and results.metadata.output_column_name_list is not None:
@@ -1548,24 +1598,31 @@ class QOutputPrinter(object):
row_str = []
skip_formatting = rownum == 0 and self.output_params.output_header
for i, col in enumerate(row):
- if formatting_dict is not None and str(i + 1) in formatting_dict.keys() and not skip_formatting:
+ if str(i + 1) in formatting_dict.keys() and not skip_formatting:
fmt_str = formatting_dict[str(i + 1)]
else:
if self.output_params.beautify:
- fmt_str = "%%-%ss" % max_lengths[i]
+ fmt_str = six.u("{{0:<{}}}").format(max_lengths[i])
else:
- fmt_str = "%s"
+ fmt_str = six.u("{}")
if col is not None:
- row_str.append(fmt_str % self.output_field_quoting_func(self.output_params.delimiter,col))
+ # Hack for python2 - The defaulting rendering of a float to string is losing precision. This hack works around it by using repr()
+ if six.PY2 and isinstance(col, float) and str(i+1) not in formatting_dict:
+ col = repr(col)
+ xx = self.output_field_quoting_func(self.output_params.delimiter,col)
+ row_str.append(fmt_str.format(xx))
else:
- row_str.append(fmt_str % "")
+ row_str.append(fmt_str.format(""))
+
- f_out.write(self.output_params.delimiter.join(row_str) + "\n")
+ xxxx = six.u(self.output_params.delimiter).join(row_str) + six.u("\n")
+ f_out.write(xxxx)
except (UnicodeEncodeError, UnicodeError) as e:
print("Cannot encode data. Error:%s" % e, file=sys.stderr)
sys.exit(3)
except TypeError as e:
+ print(traceback.format_exc())
print("Error while formatting output: %s" % e, file=sys.stderr)
sys.exit(4)
except IOError as e:
@@ -1585,7 +1642,7 @@ class QOutputPrinter(object):
pass
def run_standalone():
- p = ConfigParser()
+ p = configparser.ConfigParser()
p.read([os.path.expanduser('~/.qrc'), '.qrc'])
def get_option_with_default(p, option_type, option, default):
@@ -1717,6 +1774,7 @@ def run_standalone():
print_credentials()
sys.exit(0)
+###
if len(args) == 0 and options.query_filename is None:
print_credentials()
print("Must provide at least one query in the command line, or through a file with the -q parameter", file=sys.stderr)
@@ -1727,14 +1785,17 @@ def run_standalone():
print("Can't provide both a query file and a query on the command line", file=sys.stderr)
sys.exit(1)
try:
- f = file(options.query_filename)
+ f = open(options.query_filename,'rb')
query_strs = [f.read()]
f.close()
except:
print("Could not read query from file %s" % options.query_filename, file=sys.stderr)
sys.exit(1)
else:
- query_strs = args
+ if sys.stdin.encoding is not None:
+ query_strs = [x.encode(sys.stdin.encoding) for x in args]
+ else:
+ query_strs = args
if options.query_encoding is not None and options.query_encoding != 'none':
try:
@@ -1748,6 +1809,7 @@ def run_standalone():
except Exception as e:
print("Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding), file=sys.stderr)
sys.exit(3)
+###
if options.mode not in ['fluffy', 'relaxed', 'strict']:
print("Parsing mode can be one of fluffy, relaxed or strict", file=sys.stderr)
@@ -1755,7 +1817,10 @@ def run_standalone():
output_encoding = get_stdout_encoding(options.output_encoding)
try:
- STDOUT = codecs.getwriter(output_encoding)(sys.stdout)
+ if six.PY3:
+ STDOUT = codecs.getwriter(output_encoding)(sys.stdout.buffer)
+ else:
+ STDOUT = codecs.getwriter(output_encoding)(sys.stdout)
except:
print("Could not create output stream using output encoding %s" % (output_encoding), file=sys.stderr)
sys.exit(200)
@@ -1773,11 +1838,11 @@ def run_standalone():
print("Delimiter must be one character only", file=sys.stderr)
sys.exit(5)
- if options.input_quoting_mode not in QTextAsData.input_quoting_modes.keys():
- print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QTextAsData.input_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr)
+ if options.input_quoting_mode not in list(QTextAsData.input_quoting_modes.keys()):
+ print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(sorted(QTextAsData.input_quoting_modes.keys())),options.input_quoting_mode), file=sys.stderr)
sys.exit(55)
- if options.output_quoting_mode not in QOutputPrinter.output_quoting_modes.keys():
+ if options.output_quoting_mode not in list(QOutputPrinter.output_quoting_modes.keys()):
print("Output quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QOutputPrinter.output_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr)
sys.exit(56)
@@ -1851,7 +1916,8 @@ def run_standalone():
beautify=options.beautify,
output_quoting_mode=options.output_quoting_mode,
formatting=options.formatting,
- output_header=options.output_header)
+ output_header=options.output_header,
+ encoding=output_encoding)
q_output_printer = QOutputPrinter(output_params,show_tracebacks=options.verbose)
for query_str in query_strs:
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..6c4193a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+six==1.11.0
+flake8==3.6.0
diff --git a/test/test-all b/test/test-all
index 94d1f5a..d3dcb02 100755
--- a/test/test-all
+++ b/test/test-all
@@ -9,6 +9,6 @@ trap return_to_original_folder EXIT
pushd $(dirname $0)/
-./test-suite
+./test-suite "$@"
set +e
diff --git a/test/test-suite b/test/test-suite
index b5c0595..e17afcd 100755
--- a/test/test-suite
+++ b/test/test-suite
@@ -20,6 +20,9 @@ import time
from tempfile import NamedTemporaryFile
import locale
import pprint
+import six
+from six.moves import range
+import codecs
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin'))
from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams
@@ -28,24 +31,37 @@ from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams
# make sure that the output is correctly encoded
SYSTEM_ENCODING = locale.getpreferredencoding()
+
+DEBUG = False
+if len(sys.argv) > 2 and sys.argv[2] == '-v':
+ DEBUG = True
+
def run_command(cmd_to_run):
+ global DEBUG
+ if DEBUG:
+ print("CMD: {}".format(cmd_to_run))
+
p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True)
o, e = p.communicate()
# remove last newline
o = o.rstrip()
e = e.strip()
# split rows
- if o != '':
- o = o.split(os.linesep)
+ if o != six.b(''):
+ o = o.split(six.b(os.linesep))
else:
o = []
- if e != '':
- e = e.split(os.linesep)
+ if e != six.b(''):
+ e = e.split(six.b(os.linesep))
else:
e = []
- return (p.returncode, o, e)
-uneven_ls_output = """drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux
+ res = (p.returncode, o, e)
+ if DEBUG:
+ print("RESULT:{}".format(res))
+ return res
+
+uneven_ls_output = six.b("""drwxr-xr-x 2 root root 4096 Jun 11 2012 /selinux
drwxr-xr-x 2 root root 4096 Apr 19 2013 /mnt
drwxr-xr-x 2 root root 4096 Apr 24 2013 /srv
drwx------ 2 root root 16384 Jun 21 2013 /lost+found
@@ -54,9 +70,9 @@ drwxr-xr-x 2 root root 4096 Jun 21 2013 /cdrom
drwxr-xr-x 3 root root 4096 Jun 21 2013 /home
lrwxrwxrwx 1 root root 29 Jun 21 2013 /vmlinuz -> boot/vmlinuz-3.8.0-19-generic
lrwxrwxrwx 1 root root 32 Jun 21 2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic
-"""
+""")
-find_output = """8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp
+find_output = six.b("""8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 11:00 /tmp
8299123 4 drwxrwxr-x 2 harel harel 4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576
8263229 964 -rw-rw-r-- 1 mapred mapred 984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser
8263230 4 -rw-rw-r-- 1 harel harel 1223 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormconf.ser
@@ -66,73 +82,74 @@ find_output = """8257537 32 drwxrwxrwt 218 root root 28672 Mar 1 1
8263607 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514169735.version
8263533 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514172733.version
8263604 0 -rw-rw-r-- 1 harel harel 0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version
-"""
-
-header_row = 'name,value1,value2'
-sample_data_rows = ['a,1,0', 'b,2,0', 'c,,0']
-sample_data_rows_with_empty_string = ['a,aaa,0', 'b,bbb,0', 'c,,0']
-sample_data_no_header = "\n".join(sample_data_rows) + "\n"
-sample_data_with_empty_string_no_header = "\n".join(
- sample_data_rows_with_empty_string) + "\n"
-sample_data_with_header = header_row + "\n" + sample_data_no_header
-sample_data_with_missing_header_names = "name,value1\n" + sample_data_no_header
-
-sample_quoted_data = '''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted
+""")
+
+header_row = six.b('name,value1,value2')
+sample_data_rows = [six.b('a,1,0'), six.b('b,2,0'), six.b('c,,0')]
+sample_data_rows_with_empty_string = [six.b('a,aaa,0'), six.b('b,bbb,0'), six.b('c,,0')]
+sample_data_no_header = six.b("\n").join(sample_data_rows) + six.b("\n")
+sample_data_with_empty_string_no_header = six.b("\n").join(
+ sample_data_rows_with_empty_string) + six.b("\n")
+sample_data_with_header = header_row + six.b("\n") + sample_data_no_header
+sample_data_with_missing_header_names = six.b("name,value1\n") + sample_data_no_header
+
+sample_quoted_data = six.b('''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted
control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6"
non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline
value""." "this is an escaped \\"multiline
value\\"."
control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6"
-'''
+''')
-double_double_quoted_data = '''regular_double_quoted double_double_quoted
+double_double_quoted_data = six.b('''regular_double_quoted double_double_quoted
"this is a quoted value" "this is a quoted value with ""double double quotes"""
-'''
+''')
-escaped_double_quoted_data = '''regular_double_quoted escaped_double_quoted
+escaped_double_quoted_data = six.b('''regular_double_quoted escaped_double_quoted
"this is a quoted value" "this is a quoted value with \\"escaped double quotes\\""
-'''
+''')
-combined_quoted_data = '''regular_double_quoted double_double_quoted escaped_double_quoted
+combined_quoted_data = six.b('''regular_double_quoted double_double_quoted escaped_double_quoted
"this is a quoted value" "this is a quoted value with ""double double quotes""" "this is a quoted value with \\"escaped double quotes\\""
-'''
+''')
-sample_quoted_data2 = '"quoted data" 23\nunquoted-data 54'
+sample_quoted_data2 = six.b('"quoted data" 23\nunquoted-data 54')
-one_column_data = '''data without commas 1
+one_column_data = six.b('''data without commas 1
data without commas 2
-'''
+''')
# Values with leading whitespace
-sample_data_rows_with_spaces = ['a,1,0', ' b, 2,0', 'c,,0']
-sample_data_with_spaces_no_header = "\n".join(
- sample_data_rows_with_spaces) + "\n"
+sample_data_rows_with_spaces = [six.b('a,1,0'), six.b(' b, 2,0'), six.b('c,,0')]
+sample_data_with_spaces_no_header = six.b("\n").join(
+ sample_data_rows_with_spaces) + six.b("\n")
-header_row_with_spaces = 'name,value 1,value2'
+header_row_with_spaces = six.b('name,value 1,value2')
sample_data_with_spaces_with_header = header_row_with_spaces + \
- "\n" + sample_data_with_spaces_no_header
+ six.b("\n") + sample_data_with_spaces_no_header
long_value1 = "23683289372328372328373"
int_value = "2328372328373"
sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value)
def one_column_warning(e):
- return e[0].startswith('Warning: column count is one')
+ return e[0].startswith(six.b('Warning: column count is one'))
class AbstractQTestCase(unittest.TestCase):
- def create_file_with_data(self, data, encoding='utf-8'):
+ def create_file_with_data(self, data, encoding=None):
+ if encoding is not None:
+ raise Exception('Deprecated: Encoding must be none')
tmpfile = NamedTemporaryFile(delete=False)
- if encoding != 'none' and encoding is not None:
- tmpfile.write(data.encode(encoding))
- else:
- tmpfile.write(data)
+ tmpfile.write(data)
tmpfile.close()
return tmpfile
def cleanup(self, tmpfile):
- os.remove(tmpfile.name)
+ global DEBUG
+ if not DEBUG:
+ os.remove(tmpfile.name)
def random_tmp_filename(self,prefix,postfix):
# TODO Use more robust method for this
@@ -146,16 +163,16 @@ class SaveDbToDiskTests(AbstractQTestCase):
self.assertFalse(os.path.exists(db_filename))
retcode, o, e = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename)
-
+
self.assertTrue(retcode == 0)
self.assertTrue(len(o) == 0)
self.assertTrue(len(e) == 5)
- self.assertTrue(e[0].startswith('Going to save data'))
- self.assertTrue(db_filename in e[0])
- self.assertTrue(e[1].startswith('Data has been loaded in'))
- self.assertTrue(e[2].startswith('Saving data to db file'))
- self.assertTrue(e[3].startswith('Data has been saved into'))
- self.assertTrue(e[4] == 'Query to run on the database: select count(*) from `-`;')
+ self.assertTrue(e[0].startswith(six.b('Going to save data')))
+ self.assertTrue(db_filename.encode(sys.stdout.encoding) in e[0])
+ self.assertTrue(e[1].startswith(six.b('Data has been loaded in')))
+ self.assertTrue(e[2].startswith(six.b('Saving data to db file')))
+ self.assertTrue(e[3].startswith(six.b('Data has been saved into')))
+ self.assertTrue(e[4] == six.b('Query to run on the database: select count(*) from `-`;'))
self.assertTrue(os.path.exists(db_filename))
@@ -178,8 +195,8 @@ class SaveDbToDiskTests(AbstractQTestCase):
retcode2, o2, e2 = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename)
self.assertTrue(retcode2 != 0)
- self.assertTrue(e2[0].startswith('Going to save data into a disk database'))
- self.assertTrue(e2[1] == 'Disk database file %s already exists.' % db_filename)
+ self.assertTrue(e2[0].startswith(six.b('Going to save data into a disk database')))
+ self.assertTrue(e2[1] == six.b('Disk database file {} already exists.'.format(db_filename)))
os.remove(db_filename)
@@ -193,13 +210,13 @@ class BasicTests(AbstractQTestCase):
self.assertTrue(len(o) == 1)
self.assertTrue(len(e) == 1)
- s = sum(xrange(1, 11))
- self.assertTrue(o[0] == '%s %s' % (s, s / 10.0))
+ s = sum(range(1, 11))
+ self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0)))
self.assertTrue(one_column_warning(e))
def test_gzipped_file(self):
tmpfile = self.create_file_with_data(
- '\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00', encoding='none')
+ six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00'))
cmd = '../bin/q -z "select sum(c1),avg(c1) from %s"' % tmpfile.name
@@ -208,15 +225,15 @@ class BasicTests(AbstractQTestCase):
self.assertTrue(len(o) == 1)
self.assertTrue(len(e) == 1)
- s = sum(xrange(1, 11))
- self.assertTrue(o[0] == '%s %s' % (s, s / 10.0))
+ s = sum(range(1, 11))
+ self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0)))
self.assertTrue(one_column_warning(e))
self.cleanup(tmpfile)
def test_attempt_to_unzip_stdin(self):
tmpfile = self.create_file_with_data(
- '\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00', encoding='none')
+ six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00'))
cmd = 'cat %s | ../bin/q -z "select sum(c1),avg(c1) from -"' % tmpfile.name
@@ -225,7 +242,7 @@ class BasicTests(AbstractQTestCase):
self.assertTrue(len(o) == 0)
self.assertTrue(len(e) == 1)
- self.assertEquals(e[0],'Cannot decompress standard input. Pipe the input through zcat in order to decompress.')
+ self.assertEqual(e[0],six.b('Cannot decompress standard input. Pipe the input through zcat in order to decompress.'))
self.cleanup(tmpfile)
@@ -235,14 +252,14 @@ class BasicTests(AbstractQTestCase):
cmd = '../bin/q -d " " "select * from %s" -H' % tmpfile.name
retcode, o, e = run_command(cmd)
- self.assertNotEquals(retcode, 0)
- self.assertEquals(len(o), 0)
- self.assertEquals(len(e), 3)
+ self.assertNotEqual(retcode, 0)
+ self.assertEqual(len(o), 0)
+ self.assertEqual(len(e), 3)
self.assertTrue(e[0].startswith(
- "Warning: column count is one - did you provide the correct delimiter"))
- self.assertTrue(e[1].startswith("Bad header row"))
- self.assertTrue("Column name cannot contain commas" in e[2])
+ six.b("Warning: column count is one - did you provide the correct delimiter")))
+ self.assertTrue(e[1].startswith(six.b("Bad header row")))
+ self.assertTrue(six.b("Column name cannot contain commas") in e[2])
self.cleanup(tmpfile)
@@ -252,11 +269,11 @@ class BasicTests(AbstractQTestCase):
cmd = '../bin/q -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name
retcode, o, e = run_command(cmd)
- self.assertEquals(retcode, 0)
- self.assertEquals(len(o), 1)
- self.assertEquals(len(e), 0)
+ self.assertEqual(retcode, 0)
+ self.assertEqual(len(o), 1)
+ self.assertEqual(len(e), 0)
- self.assertEquals(o[0],"1")
+ self.assertEqual(o[0],six.b("1"))
self.cleanup(tmpfile)
@@ -266,11 +283,11 @@ class BasicTests(AbstractQTestCase):
cmd = '../bin/q -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name
retcode, o, e = run_command(cmd)
- self.assertEquals(retcode, 0)
- self.assertEquals(len(o), 1)
- self.assertEquals(len(e), 0)
+ self.assertEqual(retcode, 0)
+ self.assertEqual(len(o), 1)
+ self.assertEqual(len(e), 0)
- self.assertEquals(o[0],"2")
+ self.assertEqual(o[0],six.b("2"))
self.cleanup(tmpfile)
@@ -280,41 +297,41 @@ class BasicTests(AbstractQTestCase):
cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name
retcode, o, e = run_command(cmd)
- self.assertEquals(retcode, 0)
- self.assertEquals(len(o), 3)
- self.assertEquals(len(e), 0)
+ self.assertEqual(retcode, 0)
+ self.assertEqual(len(o), 3)
+ self.assertEqual(len(e), 0)<