diff options
-rwxr-xr-x | bin/q | 13 | ||||
-rw-r--r-- | doc/CHANGELOG.markdown | 7 | ||||
-rwxr-xr-x | test/test-suite | 4 |
3 files changed, 17 insertions, 7 deletions
@@ -27,7 +27,7 @@ # # Run with --help for command line details # -q_version = "1.5.0" # not released yet +q_version = "1.5.0" __all__ = [ 'QTextAsData' ] @@ -617,8 +617,19 @@ class MaterializedFileState(object): self.encoding = encoding self.dialect = dialect self.is_stdin = is_stdin + self.skipped_bom = False def read_file_using_csv(self): + # This is a hack for utf-8 with BOM encoding in order to skip the BOM. python's csv module + # has a bug which prevents fixing it using the proper encoding, and it has been encountered by + # multiple people. + if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom: + try: + BOM = self.f.read(3) + if BOM != '\xef\xbb\xbf': + raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM)) + except Exception,e: + raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e)) csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect) for col_vals in csv_reader: self.lines_read += 1 diff --git a/doc/CHANGELOG.markdown b/doc/CHANGELOG.markdown index 4921e93..7267f29 100644 --- a/doc/CHANGELOG.markdown +++ b/doc/CHANGELOG.markdown @@ -4,16 +4,19 @@ ## Change log **Fri Dec 12 2014 Harel Ben-Attia <harelba@gmail.com> 1.5.0-1** - Full input/output support for double-quoting fields with delimiters +- Multiple query support in one command line, reusing previously loaded data in subsequent queries +- Support literal SELECT statements (e.g. SELECT 5+12.5/3) - Full code restructuring (Internally working now using a full python API which will become public in the next version) - Added sha1 function - Solved the following bugs/pull-requests: - - [#63](../../../issues/63) - Unicode string support in regexp function + - [#10](../../../issues/10) - Reuse of previously loaded data when running multiple queries in one command line - [#64](../../../issues/64) - Full support for literal SELECT statements without a table (e.g. SELECT 5+12.5) - [#56](../../../issues/56),[#78](../../../issues/78) - Proper double quote handling, including multiline fields, for both input and output according to csv standards - - [#10](../../../issues/10) - Reuse of previously loaded data when running multiple queries in one command line - [#69](../../../issues/69) - Added warning suppression when the user provides a specific column count - [#40](../../../issues/40) - Code restructuring cleaning, creating a full python API - [#60](../../../issues/60) - Fixed RPM packaging + - [#68](../../../issues/68) - UTF-8 with BOM files cause column naming issues + - [#63](../../../issues/63) - Unicode string support in regexp function **Sat Jun 14 2014 Harel Ben-Attia <harelba@gmail.com> 1.4.0-1** - 2.5x Speed improvement due to better bulk loading - Output header support diff --git a/test/test-suite b/test/test-suite index ac54971..f506024 100755 --- a/test/test-suite +++ b/test/test-suite @@ -1043,10 +1043,6 @@ class BasicTests(AbstractQTestCase): self._internal_test_consistency_of_chaining_output_to_input(input_data,'all','all') def test_utf8_with_bom_encoding(self): - return - # DOES NOT PASS due to csv python module issue http://bugs.python.org/issue7185 . Need to find a workaround. - # Trying to wrap the file object passed to the csv module using a proper codec fails because of csv module internals. - utf_8_data_with_bom = '\xef\xbb\xbf"typeid","limit","apcost","date","checkpointId"\n"1","2","5","1,2,3,4,5,6,7","3000,3001,3002"\n"2","2","5","1,2,3,4,5,6,7","3003,3004,3005"\n' tmp_data_file = self.create_file_with_data(utf_8_data_with_bom,encoding=None) |