diff options
author | Harel Ben-Attia <harelba@gmail.com> | 2014-12-12 11:08:00 -0500 |
---|---|---|
committer | Harel Ben-Attia <harelba@gmail.com> | 2014-12-12 11:08:00 -0500 |
commit | 61cec5ee5819bcff05bd33c5f32c1509238181f8 (patch) | |
tree | f9eaee88e4c96f8f119d08edf976b765a626fbf3 /bin | |
parent | 705a9611a26c6e311d8d11889dbeb5982af1d5a8 (diff) |
Fixed utf-8 with BOM issues and reinstated the matching test + docs stuff
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/q | 13 |
1 files changed, 12 insertions, 1 deletions
@@ -27,7 +27,7 @@ # # Run with --help for command line details # -q_version = "1.5.0" # not released yet +q_version = "1.5.0" __all__ = [ 'QTextAsData' ] @@ -617,8 +617,19 @@ class MaterializedFileState(object): self.encoding = encoding self.dialect = dialect self.is_stdin = is_stdin + self.skipped_bom = False def read_file_using_csv(self): + # This is a hack for utf-8 with BOM encoding in order to skip the BOM. python's csv module + # has a bug which prevents fixing it using the proper encoding, and it has been encountered by + # multiple people. + if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom: + try: + BOM = self.f.read(3) + if BOM != '\xef\xbb\xbf': + raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM)) + except Exception,e: + raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e)) csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect) for col_vals in csv_reader: self.lines_read += 1 |