summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbin/q13
-rw-r--r--doc/CHANGELOG.markdown7
-rwxr-xr-xtest/test-suite4
3 files changed, 17 insertions, 7 deletions
diff --git a/bin/q b/bin/q
index a77f576..cbb62b4 100755
--- a/bin/q
+++ b/bin/q
@@ -27,7 +27,7 @@
#
# Run with --help for command line details
#
-q_version = "1.5.0" # not released yet
+q_version = "1.5.0"
__all__ = [ 'QTextAsData' ]
@@ -617,8 +617,19 @@ class MaterializedFileState(object):
self.encoding = encoding
self.dialect = dialect
self.is_stdin = is_stdin
+ self.skipped_bom = False
def read_file_using_csv(self):
+ # This is a hack for utf-8 with BOM encoding in order to skip the BOM. python's csv module
+ # has a bug which prevents fixing it using the proper encoding, and it has been encountered by
+ # multiple people.
+ if self.encoding == 'utf-8-sig' and self.lines_read == 0 and not self.skipped_bom:
+ try:
+ BOM = self.f.read(3)
+ if BOM != '\xef\xbb\xbf':
+ raise Exception('Value of BOM is not as expected - Value is "%s"' % str(BOM))
+ except Exception,e:
+ raise Exception('Tried to skip BOM for "utf-8-sig" encoding and failed. Error message is ' + str(e))
csv_reader = encoded_csv_reader(self.encoding, self.f, dialect=self.dialect)
for col_vals in csv_reader:
self.lines_read += 1
diff --git a/doc/CHANGELOG.markdown b/doc/CHANGELOG.markdown
index 4921e93..7267f29 100644
--- a/doc/CHANGELOG.markdown
+++ b/doc/CHANGELOG.markdown
@@ -4,16 +4,19 @@
## Change log
**Fri Dec 12 2014 Harel Ben-Attia <harelba@gmail.com> 1.5.0-1**
- Full input/output support for double-quoting fields with delimiters
+- Multiple query support in one command line, reusing previously loaded data in subsequent queries
+- Support literal SELECT statements (e.g. SELECT 5+12.5/3)
- Full code restructuring (Internally working now using a full python API which will become public in the next version)
- Added sha1 function
- Solved the following bugs/pull-requests:
- - [#63](../../../issues/63) - Unicode string support in regexp function
+ - [#10](../../../issues/10) - Reuse of previously loaded data when running multiple queries in one command line
- [#64](../../../issues/64) - Full support for literal SELECT statements without a table (e.g. SELECT 5+12.5)
- [#56](../../../issues/56),[#78](../../../issues/78) - Proper double quote handling, including multiline fields, for both input and output according to csv standards
- - [#10](../../../issues/10) - Reuse of previously loaded data when running multiple queries in one command line
- [#69](../../../issues/69) - Added warning suppression when the user provides a specific column count
- [#40](../../../issues/40) - Code restructuring cleaning, creating a full python API
- [#60](../../../issues/60) - Fixed RPM packaging
+ - [#68](../../../issues/68) - UTF-8 with BOM files cause column naming issues
+ - [#63](../../../issues/63) - Unicode string support in regexp function
**Sat Jun 14 2014 Harel Ben-Attia <harelba@gmail.com> 1.4.0-1**
- 2.5x Speed improvement due to better bulk loading
- Output header support
diff --git a/test/test-suite b/test/test-suite
index ac54971..f506024 100755
--- a/test/test-suite
+++ b/test/test-suite
@@ -1043,10 +1043,6 @@ class BasicTests(AbstractQTestCase):
self._internal_test_consistency_of_chaining_output_to_input(input_data,'all','all')
def test_utf8_with_bom_encoding(self):
- return
- # DOES NOT PASS due to csv python module issue http://bugs.python.org/issue7185 . Need to find a workaround.
- # Trying to wrap the file object passed to the csv module using a proper codec fails because of csv module internals.
-
utf_8_data_with_bom = '\xef\xbb\xbf"typeid","limit","apcost","date","checkpointId"\n"1","2","5","1,2,3,4,5,6,7","3000,3001,3002"\n"2","2","5","1,2,3,4,5,6,7","3003,3004,3005"\n'
tmp_data_file = self.create_file_with_data(utf_8_data_with_bom,encoding=None)