summaryrefslogtreecommitdiffstats
path: root/bin
diff options
context:
space:
mode:
authorHarel Ben-Attia <harelba@gmail.com>2014-12-12 11:08:00 -0500
committerHarel Ben-Attia <harelba@gmail.com>2014-12-12 11:08:00 -0500
commit61cec5ee5819bcff05bd33c5f32c1509238181f8 (patch)
treef9eaee88e4c96f8f119d08edf976b765a626fbf3 /bin
parent705a9611a26c6e311d8d11889dbeb5982af1d5a8 (diff)
Fixed utf-8 with BOM issues and reinstated the matching test + docs stuff
Diffstat (limited to 'bin')
-rwxr-xr-xbin/q13
1 files changed, 12 insertions, 1 deletions
diff --git a/bin/q b/bin/q
index a77f576..cbb62b4 100755
--- a/bin/q
+++ b/bin/q
@@ -27,7 +27,7 @@
#
# Run with --help for command line details
#
-q_version = "1.5.0" # not released yet
+q_version = "1.5.0"
__all__ = [ 'QTextAsData' ]
@@ -617,8 +617,19 @@ class MaterializedFileState(object):
self.encoding = encoding
self.dialect = dialect
self.is_stdin = is_stdin
+ self.skipped_bom = False
def read_file_using_csv(self):
+ # This is a hack for utf-8 with BOM encoding in order to skip the BOM. python's csv module
+ # has a bug which prevents fixing it using the proper encoding, and it has been encountered by
+ # multiple people.
+ if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom:
+ try:
+ BOM = self.f.read(3)
+ if BOM != '\xef\xbb\xbf':
+ raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM))
+ except Exception,e:
+ raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e))
csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect)
for col_vals in csv_reader:
self.lines_read += 1