Merge pull request #195 from harelba/more-py3-stuff2.0.2_test1 2.0.1-test1 2.0.0

Python 2+3 Compatibility
author: Harel Ben-Attia <harelba@gmail.com> 2018-12-21 18:19:48 +0200
committer: GitHub <noreply@github.com> 2018-12-21 18:19:48 +0200
commit: 28f776ed464c8c125995485cf40f037128354061 (patch)
tree: cb3d0b635d221eec00cc9b8a5a6d71b535bfd4e7
parent: aeb9dae530372b8c7498e5906dac15778f3cab9d (diff)
parent: 746cb9f67657c9d484befbe15862d788b18be876 (diff)
5 files changed, 1023 insertions, 874 deletions
diff --git a/.travis.yml b/.travis.yml
index 77becd8..402d9e1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,8 +3,12 @@ python:
   - "2.7"
   - "3.6"
 matrix:
+  include:
+    - python: "3.7"
+      dist: xenial  # Need for python 3.7
   allow_failures:
     - python: "3.6"
-install: pip install flake8
-before_script: flake8 ./bin/q --count --select=E901,E999,F821,F822,F823 --show-source --statistics
+    - python: "3.7"
+install: pip install -r requirements.txt
+before_script: flake8 ./bin/q ./test/test-suite --count --select=E901,E999,F821,F822,F823 --show-source --statistics
 script: test/test-all
diff --git a/bin/q b/bin/q
index 4a555c9..507991d 100755
--- a/bin/q
+++ b/bin/q
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 
 #   Copyright (C) 2012-2018 Harel Ben-Attia
 #
@@ -27,7 +27,10 @@
 #
 # Run with --help for command line details
 #
+from __future__ import absolute_import
+from __future__ import division
 from __future__ import print_function
+
 q_version = "1.8"
 
 __all__ = [ 'QTextAsData' ]
@@ -43,13 +46,18 @@ import codecs
 import locale
 import time
 import re
-from ConfigParser import ConfigParser
+from six.moves import configparser, range, filter
 import traceback
 import csv
 import hashlib
 import uuid
-import cStringIO
 import math
+import six
+import io
+
+if six.PY3:
+    long = int
+    unicode = six.text_type
 
 DEBUG = False
 
@@ -129,8 +137,7 @@ class Sqlite3DB(object):
         for s in self.conn.iterdump():
             c.execute(s)
             results = c.fetchall()
-            #print "executed %s results %s " % (s,results)
-        for source_filename_str,tn in table_names_mapping.iteritems():
+        for source_filename_str,tn in six.iteritems(table_names_mapping):
             c.execute('alter table `%s` rename to `%s`' % (tn, source_filename_str))
         new_db.close()
 
@@ -179,7 +186,10 @@ class Sqlite3DB(object):
             self.cursor.execute(q)
             if self.cursor.description is not None:
                 # we decode the column names, so they can be encoded to any output format later on
-                query_column_names = [c[0].decode('utf-8') for c in self.cursor.description]
+                if six.PY2:
+                    query_column_names = [unicode(c[0],'utf-8') for c in self.cursor.description]
+                else:
+                    query_column_names = [c[0] for c in self.cursor.description]
             else:
                 query_column_names = None
             result = self.cursor.fetchall()
@@ -222,7 +232,7 @@ class Sqlite3DB(object):
     def generate_create_table(self, table_name, column_names, column_dict):
         # Convert dict from python types to db types
         column_name_to_db_type = dict(
-            (n, self.type_names[t]) for n, t in column_dict.iteritems())
+            (n, self.type_names[t]) for n, t in six.iteritems(column_dict))
         column_defs = ','.join(['"%s" %s' % (
             n.replace('"', '""'), column_name_to_db_type[n]) for n in column_names])
         return 'CREATE TABLE %s (%s)' % (table_name, column_defs)
@@ -418,12 +428,12 @@ class Sql(object):
             qtable_name] = effective_table_name
 
     def get_effective_sql(self,original_names=False):
-        if len(filter(lambda x: x is None, self.qtable_name_effective_table_names)) != 0:
+        if len(list(filter(lambda x: x is None, self.qtable_name_effective_table_names))) != 0:
             raise Exception('There are qtables without effective tables')
 
         effective_sql = [x for x in self.sql_parts]
 
-        for qtable_name, positions in self.qtable_name_positions.iteritems():
+        for qtable_name, positions in six.iteritems(self.qtable_name_positions):
             for pos in positions:
                 if not original_names:
                     effective_sql[pos] = self.qtable_name_effective_table_names[
@@ -525,8 +535,8 @@ class TableColumnInferer(object):
             return type_list[0]
         else:
             # check for the number of types without nulls,
-            type_list_without_nulls = filter(
-                lambda x: x is not None, type_list)
+            type_list_without_nulls = list(filter(
+                lambda x: x is not None, type_list))
             # If all the sample lines are of the same type,
             if len(set(type_list_without_nulls)) == 1:
                 # return it
@@ -579,7 +589,7 @@ class TableColumnInferer(object):
                     (v, "Column name must be UTF-8 Compatible"))
                 continue
             # We're checking for column duplication for each field in order to be able to still provide it along with other errors
-            if len(filter(lambda x: x == v,value_list)) > 1:
+            if len(list(filter(lambda x: x == v,value_list))) > 1:
                 entry = (v, "Column name is duplicated")
                 # Don't duplicate the error report itself
                 if entry not in column_name_errors:
@@ -611,7 +621,7 @@ class TableColumnInferer(object):
                     # in relaxed mode, add columns to fill the missing ones
                     self.header_row = self.header_row + \
                         ['c%s' % (x + len(self.header_row) + 1)
-                         for x in xrange(self.column_count - len(self.header_row))]
+                         for x in range(self.column_count - len(self.header_row))]
             elif len(self.header_row) > self.column_count:
                 if self.mode == 'strict':
                     raise ColumnCountMismatchException("Strict mode. Header row contains more columns than expected column count (%s vs %s)" % (
@@ -641,7 +651,7 @@ class TableColumnInferer(object):
         counts = {}
         for column_count in column_count_list:
             counts[column_count] = counts.get(column_count, 0) + 1
-        return ", ".join(["%s rows with %s columns" % (v, k) for k, v in counts.iteritems()])
+        return six.u(", ").join([six.u("{} rows with {} columns".format(v, k)) for k, v in six.iteritems(counts)])
 
     def _do_strict_analysis(self):
         column_count_list = [len(col_vals) for col_vals in self.rows]
@@ -661,7 +671,7 @@ class TableColumnInferer(object):
     def infer_column_types(self):
         self.column_types = []
         self.column_types2 = []
-        for column_number in xrange(self.column_count):
+        for column_number in range(self.column_count):
             column_value_list = [
                 row[column_number] if column_number < len(row) else None for row in self.rows]
             column_type = self.determine_type_of_value_list(column_value_list)
@@ -693,7 +703,27 @@ class TableColumnInferer(object):
         return self.column_types
 
 
-def encoded_csv_reader(encoding, f, dialect, **kwargs):
+def py3_encoded_csv_reader(encoding, f, dialect, is_stdin,**kwargs):
+    try:
+        csv_reader = csv.reader(f, dialect, **kwargs)
+
+        for row in csv_reader:
+            yield row
+    except ValueError as e:
+        if e.message is not None and e.message.startswith('could not convert string to'):
+            raise CouldNotConvertStringToNumericValueException(e.message)
+        else:
+            raise CouldNotParseInputException(str(e))
+    except Exception as e:
+        if str(e).startswith("field larger than field limit"):
+            raise ColumnMaxLengthLimitExceededException(str(e))
+        elif 'universal-newline' in str(e):
+            raise UniversalNewlinesExistException()
+        else:
+            raise
+
+
+def py2_encoded_csv_reader(encoding, f, dialect, is_stdin, **kwargs):
     try:
         csv_reader = csv.reader(f, dialect, **kwargs)
         if encoding is not None and encoding != 'none':
@@ -715,6 +745,11 @@ def encoded_csv_reader(encoding, f, dialect, **kwargs):
         else:
             raise
 
+if six.PY2:
+    encoded_csv_reader = py2_encoded_csv_reader
+else:
+    encoded_csv_reader = py3_encoded_csv_reader
+
 def normalized_filename(filename):
     if filename == '-':
         return 'stdin'
@@ -743,12 +778,16 @@ class MaterializedFileState(object):
         # multiple people.
         if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom:
             try:
-                BOM = self.f.read(3)
-                if BOM != '\xef\xbb\xbf':
+                if six.PY2:
+                    BOM = self.f.read(3)
+                else:
+                    BOM = self.f.buffer.read(3)
+
+                if BOM != six.b('\xef\xbb\xbf'):
                     raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM))
             except Exception as e:
                 raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e))
-        csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect)
+        csv_reader = encoded_csv_reader(self.encoding, self.f, is_stdin=self.is_stdin,dialect=self.dialect)
         try:
             for col_vals in csv_reader:
                 self.lines_read += 1
@@ -838,13 +877,19 @@ class TableCreator(object):
                 raise CannotUnzipStdInException()
         else:
             if self.gzipped or filename.endswith('.gz'):
-                f = gzip.GzipFile(fileobj=file(filename,'rb'))
+                f = codecs.iterdecode(gzip.GzipFile(fileobj=io.open(filename,'rb')),encoding=self.encoding)
             else:
-                if self.with_universal_newlines:
-                    file_opening_mode = 'rbU'
+                if six.PY3:
+                    if self.with_universal_newlines:
+                        f = io.open(filename, 'rU',newline=None,encoding=self.encoding)
+                    else:
+                        f = io.open(filename, 'r', newline=None, encoding=self.encoding)
                 else:
-                    file_opening_mode = 'rb'
-                f = file(filename,file_opening_mode)
+                    if self.with_universal_newlines:
+                        file_opening_mode = 'rbU'
+                    else:
+                        file_opening_mode = 'rb'
+                    f = open(filename, file_opening_mode)
         return f
 
     def _pre_populate(self,dialect):
@@ -975,7 +1020,7 @@ class TableCreator(object):
 
         if actual_col_count < expected_col_count:
             col_vals = col_vals + \
-                [None for x in xrange(expected_col_count - actual_col_count)]
+                [None for x in range(expected_col_count - actual_col_count)]
 
         # in relaxed mode, we merge all extra columns to the last column value
         if self.mode == 'relaxed':
@@ -1066,10 +1111,11 @@ class TableCreator(object):
 def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter):
     if len(m) == 0:
         return []
-    max_lengths = [0 for x in xrange(0, len(m[0]))]
-    for row_index in xrange(0, len(m)):
-        for col_index in xrange(0, len(m[0])):
-            new_len = len(unicode(output_field_quoting_func(output_delimiter,m[row_index][col_index])))
+    max_lengths = [0 for x in range(0, len(m[0]))]
+    for row_index in range(0, len(m)):
+        for col_index in range(0, len(m[0])):
+            # TODO Optimize this and make sure that py2 hack of float precision is applied here as well
+            new_len = len("{}".format(output_field_quoting_func(output_delimiter,m[row_index][col_index])))
             if new_len > max_lengths[col_index]:
                 max_lengths[col_index] = new_len
     return max_lengths
@@ -1272,7 +1318,7 @@ class QTextAsData(object):
         self._load_data(filename,input_params,stop_after_analysis=stop_after_analysis)
 
     def load_data_from_string(self,filename,str_data,input_params=QInputParams(),stop_after_analysis=False):
-        sf = cStringIO.StringIO(str_data)
+        sf = six.StringIO(str_data)
         try:
             self._load_data(filename,input_params,stdin_file=sf,stdin_filename=filename,stop_after_analysis=stop_after_analysis)
         finally:
@@ -1381,6 +1427,8 @@ class QTextAsData(object):
         except KeyboardInterrupt as e:
             warnings.append(QWarning(e,"Interrupted"))
         except Exception as e:
+            if DEBUG:
+                print(traceback.format_exc())
             error = QError(e,repr(e),199)
 
         return QOutput(warnings = warnings,error = error , metadata=QMetadata(table_structures=table_structures,data_loads = data_loads))
@@ -1390,7 +1438,7 @@ class QTextAsData(object):
 
     def unload(self):
 
-        for filename,table_creator in self.table_creators.iteritems():
+        for filename,table_creator in six.iteritems(self.table_creators):
             try:
                 table_creator.drop_table()
             except:
@@ -1401,13 +1449,13 @@ class QTextAsData(object):
     def _create_materialized_files(self,table_creator):
         d = table_creator.materialized_file_dict
         m = {}
-        for filename,mfs in d.iteritems():
+        for filename,mfs in six.iteritems(d):
             m[filename] = QMaterializedFile(filename,mfs.is_stdin)
         return m
 
     def _create_table_structures_list(self):
         table_structures = []
-        for filename,table_creator in self.table_creators.iteritems():
+        for filename,table_creator in six.iteritems(self.table_creators):
             column_names = table_creator.column_inferer.get_column_names()
             column_types = [self.db.type_names[table_creator.column_inferer.get_column_dict()[k]].lower() for k in column_names]
             materialized_files = self._create_materialized_files(table_creator)
@@ -1421,7 +1469,7 @@ class QTextAsData(object):
         return q_output
 
 def escape_double_quotes_if_needed(v):
-    x = v.replace('"','""')
+    x = v.replace(six.u('"'), six.u('""'))
     return x
 
 def quote_none_func(output_delimiter,v):
@@ -1431,22 +1479,22 @@ def quote_minimal_func(output_delimiter,v):
     if v is None:
         return v
     t = type(v)
-    if t == str or t == unicode and ((output_delimiter in v) or ('"' in v)):
-        return '"%s"' % (escape_double_quotes_if_needed(v))
-    return v;
+    if (t == str or t == unicode) and ((output_delimiter in v) or (six.u('"') in v)):
+        return six.u('"{}"').format(escape_double_quotes_if_needed(v))
+    return v
 
 def quote_nonnumeric_func(output_delimiter,v):
     if v is None:
         return v
     if type(v) == str or type(v) == unicode:
-        return '"%s"' % (escape_double_quotes_if_needed(v))
-    return v;
+        return six.u('"{}"').format(escape_double_quotes_if_needed(v))
+    return v
 
 def quote_all_func(output_delimiter,v):
     if type(v) == str or type(v) == unicode:
-        return '"%s"' % (escape_double_quotes_if_needed(v))
+        return six.u('"{}"').format(escape_double_quotes_if_needed(v))
     else:
-        return '"%s"' % v
+        return six.u('"{}"').format(v)
 
 class QOutputParams(object):
     def __init__(self,
@@ -1454,12 +1502,14 @@ class QOutputParams(object):
             beautify=False,
             output_quoting_mode='minimal',
             formatting=None,
-            output_header=False):
+            output_header=False,
+                 encoding=None):
         self.delimiter = delimiter
         self.beautify = beautify
         self.output_quoting_mode = output_quoting_mode
         self.formatting = formatting
         self.output_header = output_header
+        self.encoding = encoding
 
     def __str__(self):
         return "QOutputParams<%s>" % str(self.__dict__)
@@ -1539,7 +1589,7 @@ class QOutputPrinter(object):
             formatting_dict = dict(
                 [(x.split("=")[0], x.split("=")[1]) for x in self.output_params.formatting.split(",")])
         else:
-            formatting_dict = None
+            formatting_dict = {}
 
         try:
             if self.output_params.output_header and results.metadata.output_column_name_list is not None:
@@ -1548,24 +1598,31 @@ class QOutputPrinter(object):
                 row_str = []
                 skip_formatting = rownum == 0 and self.output_params.output_header
                 for i, col in enumerate(row):
-                    if formatting_dict is not None and str(i + 1) in formatting_dict.keys() and not skip_formatting:
+                    if str(i + 1) in formatting_dict.keys() and not skip_formatting:
                         fmt_str = formatting_dict[str(i + 1)]
                     else:
                         if self.output_params.beautify:
-                            fmt_str = "%%-%ss" % max_lengths[i]
+                            fmt_str = six.u("{{0:<{}}}").format(max_lengths[i])
                         else:
-                            fmt_str = "%s"
+                            fmt_str = six.u("{}")
 
                     if col is not None:
-                        row_str.append(fmt_str % self.output_field_quoting_func(self.output_params.delimiter,col))
+                        # Hack for python2 - The defaulting rendering of a float to string is losing precision. This hack works around it by using repr()
+                        if six.PY2 and isinstance(col, float) and str(i+1) not in formatting_dict:
+                            col = repr(col)
+                        xx = self.output_field_quoting_func(self.output_params.delimiter,col)
+                        row_str.append(fmt_str.format(xx))
                     else:
-                        row_str.append(fmt_str % "")
+                        row_str.append(fmt_str.format(""))
+
 
-                f_out.write(self.output_params.delimiter.join(row_str) + "\n")
+                xxxx = six.u(self.output_params.delimiter).join(row_str) + six.u("\n")
+                f_out.write(xxxx)
         except (UnicodeEncodeError, UnicodeError) as e:
             print("Cannot encode data. Error:%s" % e, file=sys.stderr)
             sys.exit(3)
         except TypeError as e:
+            print(traceback.format_exc())
             print("Error while formatting output: %s" % e, file=sys.stderr)
             sys.exit(4)
         except IOError as e:
@@ -1585,7 +1642,7 @@ class QOutputPrinter(object):
             pass
 
 def run_standalone():
-    p = ConfigParser()
+    p = configparser.ConfigParser()
     p.read([os.path.expanduser('~/.qrc'), '.qrc'])
 
     def get_option_with_default(p, option_type, option, default):
@@ -1717,6 +1774,7 @@ def run_standalone():
         print_credentials()
         sys.exit(0)
 
+###
     if len(args) == 0 and options.query_filename is None:
         print_credentials()
         print("Must provide at least one query in the command line, or through a file with the -q parameter", file=sys.stderr)
@@ -1727,14 +1785,17 @@ def run_standalone():
             print("Can't provide both a query file and a query on the command line", file=sys.stderr)
             sys.exit(1)
         try:
-            f = file(options.query_filename)
+            f = open(options.query_filename,'rb')
             query_strs = [f.read()]
             f.close()
         except:
             print("Could not read query from file %s" % options.query_filename, file=sys.stderr)
             sys.exit(1)
     else:
-        query_strs = args
+        if sys.stdin.encoding is not None:
+            query_strs = [x.encode(sys.stdin.encoding) for x in args]
+        else:
+            query_strs = args
 
     if options.query_encoding is not None and options.query_encoding != 'none':
         try:
@@ -1748,6 +1809,7 @@ def run_standalone():
         except Exception as e:
             print("Could not decode query number %s using the provided query encoding (%s)" % (idx+1,options.query_encoding), file=sys.stderr)
             sys.exit(3)
+###
 
     if options.mode not in ['fluffy', 'relaxed', 'strict']:
         print("Parsing mode can be one of fluffy, relaxed or strict", file=sys.stderr)
@@ -1755,7 +1817,10 @@ def run_standalone():
 
     output_encoding = get_stdout_encoding(options.output_encoding)
     try:
-        STDOUT = codecs.getwriter(output_encoding)(sys.stdout)
+        if six.PY3:
+            STDOUT = codecs.getwriter(output_encoding)(sys.stdout.buffer)
+        else:
+            STDOUT = codecs.getwriter(output_encoding)(sys.stdout)
     except:
         print("Could not create output stream using output encoding %s" % (output_encoding), file=sys.stderr)
         sys.exit(200)
@@ -1773,11 +1838,11 @@ def run_standalone():
         print("Delimiter must be one character only", file=sys.stderr)
         sys.exit(5)
 
-    if options.input_quoting_mode not in QTextAsData.input_quoting_modes.keys():
-        print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QTextAsData.input_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr)
+    if options.input_quoting_mode not in list(QTextAsData.input_quoting_modes.keys()):
+        print("Input quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(sorted(QTextAsData.input_quoting_modes.keys())),options.input_quoting_mode), file=sys.stderr)
         sys.exit(55)
 
-    if options.output_quoting_mode not in QOutputPrinter.output_quoting_modes.keys():
+    if options.output_quoting_mode not in list(QOutputPrinter.output_quoting_modes.keys()):
         print("Output quoting mode can only be one of %s. It cannot be set to '%s'" % (",".join(QOutputPrinter.output_quoting_modes.keys()),options.input_quoting_mode), file=sys.stderr)
         sys.exit(56)
 
@@ -1851,7 +1916,8 @@ def run_standalone():
         beautify=options.beautify,
         output_quoting_mode=options.output_quoting_mode,
         formatting=options.formatting,
-        output_header=options.output_header)
+        output_header=options.output_header,
+        encoding=output_encoding)
     q_output_printer = QOutputPrinter(output_params,show_tracebacks=options.verbose)
 
     for query_str in query_strs:
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..6c4193a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+six==1.11.0
+flake8==3.6.0
diff --git a/test/test-all b/test/test-all
index 94d1f5a..d3dcb02 100755
--- a/test/test-all
+++ b/test/test-all
@@ -9,6 +9,6 @@ trap return_to_original_folder EXIT
 
 pushd $(dirname $0)/
 
-./test-suite
+./test-suite "$@"
 
 set +e
diff --git a/test/test-suite b/test/test-suite
index b5c0595..e17afcd 100755
--- a/test/test-suite
+++ b/test/test-suite
@@ -20,6 +20,9 @@ import time
 from tempfile import NamedTemporaryFile
 import locale
 import pprint
+import six
+from six.moves import range
+import codecs
 
 sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),'..','bin'))
 from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams
@@ -28,24 +31,37 @@ from qtextasdata import QTextAsData,QOutput,QOutputPrinter,QInputParams
 # make sure that the output is correctly encoded
 SYSTEM_ENCODING = locale.getpreferredencoding()
 
+
+DEBUG = False
+if len(sys.argv) > 2 and sys.argv[2] == '-v':
+    DEBUG = True
+
 def run_command(cmd_to_run):
+    global DEBUG
+    if DEBUG:
+        print("CMD: {}".format(cmd_to_run))
+
     p = Popen(cmd_to_run, stdout=PIPE, stderr=PIPE, shell=True)
     o, e = p.communicate()
     # remove last newline
     o = o.rstrip()
     e = e.strip()
     # split rows
-    if o != '':
-        o = o.split(os.linesep)
+    if o != six.b(''):
+        o = o.split(six.b(os.linesep))
     else:
         o = []
-    if e != '':
-        e = e.split(os.linesep)
+    if e != six.b(''):
+        e = e.split(six.b(os.linesep))
     else:
         e = []
-    return (p.returncode, o, e)
 
-uneven_ls_output = """drwxr-xr-x   2 root     root      4096 Jun 11  2012 /selinux
+    res = (p.returncode, o, e)
+    if DEBUG:
+        print("RESULT:{}".format(res))
+    return res
+
+uneven_ls_output = six.b("""drwxr-xr-x   2 root     root      4096 Jun 11  2012 /selinux
 drwxr-xr-x   2 root     root      4096 Apr 19  2013 /mnt
 drwxr-xr-x   2 root     root      4096 Apr 24  2013 /srv
 drwx------   2 root     root     16384 Jun 21  2013 /lost+found
@@ -54,9 +70,9 @@ drwxr-xr-x   2 root     root      4096 Jun 21  2013 /cdrom
 drwxr-xr-x   3 root     root      4096 Jun 21  2013 /home
 lrwxrwxrwx   1 root     root        29 Jun 21  2013 /vmlinuz -> boot/vmlinuz-3.8.0-19-generic
 lrwxrwxrwx   1 root     root        32 Jun 21  2013 /initrd.img -> boot/initrd.img-3.8.0-19-generic
-"""
+""")
 
-find_output = """8257537   32 drwxrwxrwt 218 root     root        28672 Mar  1 11:00 /tmp
+find_output = six.b("""8257537   32 drwxrwxrwt 218 root     root        28672 Mar  1 11:00 /tmp
 8299123    4 drwxrwxr-x   2 harel    harel        4096 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576
 8263229  964 -rw-rw-r--   1 mapred   mapred      984569 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormcode.ser
 8263230    4 -rw-rw-r--   1 harel    harel        1223 Feb 27 10:06 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/stormdist/testTopology3fad644a-54c0-4def-b19e-77ca97941595-1-1393513576/stormconf.ser
@@ -66,73 +82,74 @@ find_output = """8257537   32 drwxrwxrwt 218 root     root        28672 Mar  1 1
 8263607    0 -rw-rw-r--   1 harel    harel           0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514169735.version
 8263533    0 -rw-rw-r--   1 harel    harel           0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514172733.version
 8263604    0 -rw-rw-r--   1 harel    harel           0 Feb 27 10:16 /tmp/1628a3fd-b9fe-4dd1-bcdc-7eb869fe7461/supervisor/localstate/1393514175754.version
-"""
-
-header_row = 'name,value1,value2'
-sample_data_rows = ['a,1,0', 'b,2,0', 'c,,0']
-sample_data_rows_with_empty_string = ['a,aaa,0', 'b,bbb,0', 'c,,0']
-sample_data_no_header = "\n".join(sample_data_rows) + "\n"
-sample_data_with_empty_string_no_header = "\n".join(
-    sample_data_rows_with_empty_string) + "\n"
-sample_data_with_header = header_row + "\n" + sample_data_no_header
-sample_data_with_missing_header_names = "name,value1\n" + sample_data_no_header
-
-sample_quoted_data = '''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted
+""")
+
+header_row = six.b('name,value1,value2')
+sample_data_rows = [six.b('a,1,0'), six.b('b,2,0'), six.b('c,,0')]
+sample_data_rows_with_empty_string = [six.b('a,aaa,0'), six.b('b,bbb,0'), six.b('c,,0')]
+sample_data_no_header = six.b("\n").join(sample_data_rows) + six.b("\n")
+sample_data_with_empty_string_no_header = six.b("\n").join(
+    sample_data_rows_with_empty_string) + six.b("\n")
+sample_data_with_header = header_row + six.b("\n") + sample_data_no_header
+sample_data_with_missing_header_names = six.b("name,value1\n") + sample_data_no_header
+
+sample_quoted_data = six.b('''non_quoted regular_double_quoted double_double_quoted escaped_double_quoted multiline_double_double_quoted multiline_escaped_double_quoted
 control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6"
 non-quoted-value "this is a quoted value" "this is a ""double double"" quoted value" "this is an escaped \\"quoted value\\"" "this is a double double quoted ""multiline
   value""." "this is an escaped \\"multiline
   value\\"."
 control-value-1 "control-value-2" control-value-3 "control-value-4" control-value-5 "control-value-6"
-'''
+''')
 
-double_double_quoted_data = '''regular_double_quoted double_double_quoted 
+double_double_quoted_data = six.b('''regular_double_quoted double_double_quoted
 "this is a quoted value" "this is a quoted value with ""double double quotes"""
-'''
+''')
 
-escaped_double_quoted_data = '''regular_double_quoted escaped_double_quoted 
+escaped_double_quoted_data = six.b('''regular_double_quoted escaped_double_quoted
 "this is a quoted value" "this is a quoted value with \\"escaped double quotes\\""
-'''
+''')
 
-combined_quoted_data = '''regular_double_quoted double_double_quoted escaped_double_quoted
+combined_quoted_data = six.b('''regular_double_quoted double_double_quoted escaped_double_quoted
 "this is a quoted value" "this is a quoted value with ""double double quotes""" "this is a quoted value with \\"escaped double quotes\\""
-'''
+''')
 
-sample_quoted_data2 = '"quoted data" 23\nunquoted-data 54'
+sample_quoted_data2 = six.b('"quoted data" 23\nunquoted-data 54')
 
-one_column_data = '''data without commas 1
+one_column_data = six.b('''data without commas 1
 data without commas 2
-'''
+''')
 
 # Values with leading whitespace
-sample_data_rows_with_spaces = ['a,1,0', '   b,   2,0', 'c,,0']
-sample_data_with_spaces_no_header = "\n".join(
-    sample_data_rows_with_spaces) + "\n"
+sample_data_rows_with_spaces = [six.b('a,1,0'), six.b('   b,   2,0'), six.b('c,,0')]
+sample_data_with_spaces_no_header = six.b("\n").join(
+    sample_data_rows_with_spaces) + six.b("\n")
 
-header_row_with_spaces = 'name,value 1,value2'
+header_row_with_spaces = six.b('name,value 1,value2')
 sample_data_with_spaces_with_header = header_row_with_spaces + \
-    "\n" + sample_data_with_spaces_no_header
+    six.b("\n") + sample_data_with_spaces_no_header
 
 long_value1 = "23683289372328372328373"
 int_value = "2328372328373"
 sample_data_with_long_values = "%s\n%s\n%s" % (long_value1,int_value,int_value)
 
 def one_column_warning(e):
-    return e[0].startswith('Warning: column count is one')
+    return e[0].startswith(six.b('Warning: column count is one'))
 
 
 class AbstractQTestCase(unittest.TestCase):
 
-    def create_file_with_data(self, data, encoding='utf-8'):
+    def create_file_with_data(self, data, encoding=None):
+        if encoding is not None:
+            raise Exception('Deprecated: Encoding must be none')
         tmpfile = NamedTemporaryFile(delete=False)
-        if encoding != 'none' and encoding is not None:
-            tmpfile.write(data.encode(encoding))
-        else:
-            tmpfile.write(data)
+        tmpfile.write(data)
         tmpfile.close()
         return tmpfile
 
     def cleanup(self, tmpfile):
-        os.remove(tmpfile.name)
+        global DEBUG
+        if not DEBUG:
+            os.remove(tmpfile.name)
 
     def random_tmp_filename(self,prefix,postfix):
         # TODO Use more robust method for this
@@ -146,16 +163,16 @@ class SaveDbToDiskTests(AbstractQTestCase):
         self.assertFalse(os.path.exists(db_filename))
 
         retcode, o, e = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename)
-        
+
         self.assertTrue(retcode == 0)
         self.assertTrue(len(o) == 0)
         self.assertTrue(len(e) == 5)
-        self.assertTrue(e[0].startswith('Going to save data'))
-        self.assertTrue(db_filename in e[0])
-        self.assertTrue(e[1].startswith('Data has been loaded in'))
-        self.assertTrue(e[2].startswith('Saving data to db file'))
-        self.assertTrue(e[3].startswith('Data has been saved into'))
-        self.assertTrue(e[4] == 'Query to run on the database: select count(*) from `-`;')
+        self.assertTrue(e[0].startswith(six.b('Going to save data')))
+        self.assertTrue(db_filename.encode(sys.stdout.encoding) in e[0])
+        self.assertTrue(e[1].startswith(six.b('Data has been loaded in')))
+        self.assertTrue(e[2].startswith(six.b('Saving data to db file')))
+        self.assertTrue(e[3].startswith(six.b('Data has been saved into')))
+        self.assertTrue(e[4] == six.b('Query to run on the database: select count(*) from `-`;'))
 
         self.assertTrue(os.path.exists(db_filename))
 
@@ -178,8 +195,8 @@ class SaveDbToDiskTests(AbstractQTestCase):
 
         retcode2, o2, e2 = run_command('seq 1 1000 | ../bin/q "select count(*) from -" -c 1 -S %s' % db_filename)
         self.assertTrue(retcode2 != 0)
-        self.assertTrue(e2[0].startswith('Going to save data into a disk database'))
-        self.assertTrue(e2[1] == 'Disk database file %s already exists.' % db_filename)
+        self.assertTrue(e2[0].startswith(six.b('Going to save data into a disk database')))
+        self.assertTrue(e2[1] == six.b('Disk database file {} already exists.'.format(db_filename)))
 
         os.remove(db_filename)
 
@@ -193,13 +210,13 @@ class BasicTests(AbstractQTestCase):
         self.assertTrue(len(o) == 1)
         self.assertTrue(len(e) == 1)
 
-        s = sum(xrange(1, 11))
-        self.assertTrue(o[0] == '%s %s' % (s, s / 10.0))
+        s = sum(range(1, 11))
+        self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0)))
         self.assertTrue(one_column_warning(e))
 
     def test_gzipped_file(self):
         tmpfile = self.create_file_with_data(
-            '\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00', encoding='none')
+            six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00'))
 
         cmd = '../bin/q -z "select sum(c1),avg(c1) from %s"' % tmpfile.name
 
@@ -208,15 +225,15 @@ class BasicTests(AbstractQTestCase):
         self.assertTrue(len(o) == 1)
         self.assertTrue(len(e) == 1)
 
-        s = sum(xrange(1, 11))
-        self.assertTrue(o[0] == '%s %s' % (s, s / 10.0))
+        s = sum(range(1, 11))
+        self.assertTrue(o[0] == six.b('%s %s' % (s, s / 10.0)))
         self.assertTrue(one_column_warning(e))
 
         self.cleanup(tmpfile)
 
     def test_attempt_to_unzip_stdin(self):
         tmpfile = self.create_file_with_data(
-            '\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00', encoding='none')
+            six.b('\x1f\x8b\x08\x08\xf2\x18\x12S\x00\x03xxxxxx\x003\xe42\xe22\xe62\xe12\xe52\xe32\xe7\xb2\xe0\xb2\xe424\xe0\x02\x00\xeb\xbf\x8a\x13\x15\x00\x00\x00'))
 
         cmd = 'cat %s | ../bin/q -z "select sum(c1),avg(c1) from -"' % tmpfile.name
 
@@ -225,7 +242,7 @@ class BasicTests(AbstractQTestCase):
         self.assertTrue(len(o) == 0)
         self.assertTrue(len(e) == 1)
 
-        self.assertEquals(e[0],'Cannot decompress standard input. Pipe the input through zcat in order to decompress.')
+        self.assertEqual(e[0],six.b('Cannot decompress standard input. Pipe the input through zcat in order to decompress.'))
 
         self.cleanup(tmpfile)
 
@@ -235,14 +252,14 @@ class BasicTests(AbstractQTestCase):
         cmd = '../bin/q -d " " "select * from %s" -H' % tmpfile.name
         retcode, o, e = run_command(cmd)
 
-        self.assertNotEquals(retcode, 0)
-        self.assertEquals(len(o), 0)
-        self.assertEquals(len(e), 3)
+        self.assertNotEqual(retcode, 0)
+        self.assertEqual(len(o), 0)
+        self.assertEqual(len(e), 3)
 
         self.assertTrue(e[0].startswith(
-            "Warning: column count is one - did you provide the correct delimiter"))
-        self.assertTrue(e[1].startswith("Bad header row"))
-        self.assertTrue("Column name cannot contain commas" in e[2])
+            six.b("Warning: column count is one - did you provide the correct delimiter")))
+        self.assertTrue(e[1].startswith(six.b("Bad header row")))
+        self.assertTrue(six.b("Column name cannot contain commas") in e[2])
 
         self.cleanup(tmpfile)
 
@@ -252,11 +269,11 @@ class BasicTests(AbstractQTestCase):
         cmd = '../bin/q -d , "select c2 from %s where regexp(\'^1\',c2)"' % tmpfile.name
         retcode, o, e = run_command(cmd)
 
-        self.assertEquals(retcode, 0)
-        self.assertEquals(len(o), 1)
-        self.assertEquals(len(e), 0)
+        self.assertEqual(retcode, 0)
+        self.assertEqual(len(o), 1)
+        self.assertEqual(len(e), 0)
 
-        self.assertEquals(o[0],"1")
+        self.assertEqual(o[0],six.b("1"))
 
         self.cleanup(tmpfile)
 
@@ -266,11 +283,11 @@ class BasicTests(AbstractQTestCase):
         cmd = '../bin/q -d , "select count(*) from %s where regexp(\'^\',c2)"' % tmpfile.name
         retcode, o, e = run_command(cmd)
 
-        self.assertEquals(retcode, 0)
-        self.assertEquals(len(o), 1)
-        self.assertEquals(len(e), 0)
+        self.assertEqual(retcode, 0)
+        self.assertEqual(len(o), 1)
+        self.assertEqual(len(e), 0)
 
-        self.assertEquals(o[0],"2")
+        self.assertEqual(o[0],six.b("2"))
 
         self.cleanup(tmpfile)
 
@@ -280,41 +297,41 @@ class BasicTests(AbstractQTestCase):
         cmd = '../bin/q -d , "select c1 from %s"' % tmpfile.name
         retcode, o, e = run_command(cmd)
 
-        self.assertEquals(retcode, 0)
-        self.assertEquals(len(o), 3)
-        self.assertEquals(len(e), 0)
+        self.assertEqual(retcode, 0)
+        self.assertEqual(len(o), 3)
+        self.assertEqual(len(e), 0)
 
-        self.assertEquals(" ".join(o), 'a b c')
+        self.assertEqual(six.b(" ").join(o), six.b('a b c'))
 
         self.cleanup(tmpfile)
 
     def test_ta
author	Harel Ben-Attia <harelba@gmail.com>	2018-12-21 18:19:48 +0200
committer	GitHub <noreply@github.com>	2018-12-21 18:19:48 +0200
commit	28f776ed464c8c125995485cf40f037128354061 (patch)
tree	cb3d0b635d221eec00cc9b8a5a6d71b535bfd4e7
parent	aeb9dae530372b8c7498e5906dac15778f3cab9d (diff)
parent	746cb9f67657c9d484befbe15862d788b18be876 (diff)