summaryrefslogtreecommitdiffstats
path: root/bin
diff options
context:
space:
mode:
authorHarel Ben-Attia <harelba@gmail.com>2016-03-08 12:39:10 +0200
committerHarel Ben-Attia <harelba@gmail.com>2016-03-08 12:39:10 +0200
commit0aa96f216956391ec498dd4204a2f043ac1f2ba6 (patch)
treef5d5b8f3a6b587bb9743a0aa80d3379121e4cca3 /bin
parent5ab3dba91c875dde332c509d59d7e7226a1f73f8 (diff)
Fixed #122 - Empty data and missing header behavior fixed
This is actually a breaking change, since it adds the "expected but missing header" error instead of just ignoring/issuing a warning.
Diffstat (limited to 'bin')
-rwxr-xr-xbin/q42
1 files changed, 33 insertions, 9 deletions
diff --git a/bin/q b/bin/q
index f9fdab4..c4f3c45 100755
--- a/bin/q
+++ b/bin/q
@@ -250,6 +250,11 @@ class EmptyDataException(Exception):
def __init__(self):
pass
+class MissingHeaderException(Exception):
+
+ def __init__(self,msg):
+ self.msg = msg
+
class FileNotFoundException(Exception):
@@ -471,8 +476,6 @@ class TableColumnInferer(object):
if self.column_count == 1 and self.expected_column_count != 1:
print >>sys.stderr, "Warning: column count is one - did you provide the correct delimiter?"
- if self.column_count == 0:
- raise Exception("Detected a column count of zero... Failing")
self.infer_column_types()
@@ -550,11 +553,14 @@ class TableColumnInferer(object):
def _do_relaxed_analysis(self):
column_count_list = [len(col_vals) for col_vals in self.rows]
- if self.expected_column_count is not None:
- self.column_count = self.expected_column_count
+ if len(self.rows) == 0:
+ self.column_count = 0
else:
- # If not specified, we'll take the largest row in the sample rows
- self.column_count = max(column_count_list)
+ if self.expected_column_count is not None:
+ self.column_count = self.expected_column_count
+ else:
+ # If not specified, we'll take the largest row in the sample rows
+ self.column_count = max(column_count_list)
def get_column_count_summary(self, column_count_list):
counts = {}
@@ -757,6 +763,8 @@ class TableCreator(object):
self.materialized_file_dict[filename] = mfs
def _populate(self,dialect,stop_after_analysis=False):
+ total_data_lines_read = 0
+
# For each match
for filename in self.materialized_file_list:
mfs = self.materialized_file_dict[filename]
@@ -767,8 +775,10 @@ class TableCreator(object):
self._insert_row(col_vals)
if stop_after_analysis and self.column_inferer.inferred:
return
- if mfs.lines_read == 0 or (mfs.lines_read == 1 and self.skip_header):
- raise EmptyDataException()
+ if mfs.lines_read == 0 and self.skip_header:
+ raise MissingHeaderException("Header line is expected but missing in file %s" % filename)
+
+ total_data_lines_read += mfs.lines_read - (1 if self.skip_header else 0)
except StrictModeColumnCountMismatchException,e:
raise ColumnCountMismatchException(
'Strict mode - Expected %s columns instead of %s columns in file %s row %s. Either use relaxed/fluffy modes or check your delimiter' % (
@@ -786,6 +796,10 @@ class TableCreator(object):
self.column_inferer.force_analysis()
self._do_create_table()
+
+ if total_data_lines_read == 0:
+ raise EmptyDataException()
+
def populate(self,dialect,stop_after_analysis=False):
if self.state == TableCreatorState.NEW:
self._pre_populate(dialect)
@@ -928,9 +942,17 @@ class TableCreator(object):
self.table_name = self.db.generate_temp_table_name()
# Get the column definition dict from the inferer
column_dict = self.column_inferer.get_column_dict()
+
+ # Guard against empty tables (instead of preventing the creation, just create with a dummy column)
+ if len(column_dict) == 0:
+ column_dict = { 'dummy_column_for_empty_tables' : str }
+ ordered_column_names = [ 'dummy_column_for_empty_tables' ]
+ else:
+ ordered_column_names = self.column_inferer.get_column_names()
+
# Create the CREATE TABLE statement
create_table_stmt = self.db.generate_create_table(
- self.table_name, self.column_inferer.get_column_names(), column_dict)
+ self.table_name, ordered_column_names, column_dict)
# And create the table itself
self.db.execute_and_fetch(create_table_stmt)
# Mark the table as created
@@ -1208,6 +1230,8 @@ class QTextAsData(object):
except EmptyDataException,e:
warnings.append(QWarning(e,"Warning - data is empty"))
+ except MissingHeaderException,e:
+ error = QError(e,e.msg,117)
except FileNotFoundException, e:
error = QError(e,e.msg,30)
except sqlite3.OperationalError, e: