summaryrefslogtreecommitdiffstats
path: root/bin
diff options
context:
space:
mode:
authorHarel Ben-Attia <harelba@gmail.com>2014-06-07 11:48:17 -0400
committerHarel Ben-Attia <harelba@gmail.com>2014-06-07 11:48:17 -0400
commit471371d8889aa234120c2b869ebc68bc24df68b4 (patch)
tree49a5e04a723814dca37639ac55478e57df9cc6dc /bin
parent6ccf713593dd7d9e9e5646987b631e2d59af2819 (diff)
Encoding related additions + Allow reading query from a file
* Allow reading queries from file * Allow selecting the encoding of the query itself * Allow selecting output encoding separately from input encoding * Better encoding error reporting * Additional tests
Diffstat (limited to 'bin')
-rwxr-xr-xbin/q72
1 files changed, 62 insertions, 10 deletions
diff --git a/bin/q b/bin/q
index fa5e5c3..edbd310 100755
--- a/bin/q
+++ b/bin/q
@@ -47,11 +47,14 @@ import csv
DEBUG = False
-# Encode stdout properly,
-if sys.stdout.isatty():
- STDOUT = codecs.getwriter(sys.stdout.encoding)(sys.stdout)
-else:
- STDOUT = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
+def get_stdout_encoding(encoding_override=None):
+ if encoding_override is not None and encoding_override != 'none':
+ return encoding_override
+
+ if sys.stdout.isatty():
+ return sys.stdout.encoding
+ else:
+ return locale.getpreferredencoding()
SHOW_SQL = False
@@ -82,6 +85,8 @@ default_output_delimiter = get_option_with_default(
default_skip_header = get_option_with_default(p, 'int', 'skip_header', 0)
default_formatting = get_option_with_default(p, 'string', 'formatting', None)
default_encoding = get_option_with_default(p, 'string', 'encoding', 'UTF-8')
+default_output_encoding = get_option_with_default(p, 'string', 'encoding', None)
+default_query_encoding = get_option_with_default(p, 'string', 'query_encoding', locale.getpreferredencoding())
parser = OptionParser(usage="""
q allows performing SQL-like statements on tabular text data.
@@ -116,7 +121,7 @@ parser.add_option("-b", "--beautify", dest="beautify", default=default_beautify,
parser.add_option("-z", "--gzipped", dest="gzipped", default=default_gzipped, action="store_true",
help="Data is gzipped. Useful for reading from stdin. For files, .gz means automatic gunzipping")
parser.add_option("-d", "--delimiter", dest="delimiter", default=default_delimiter,
- help="Field delimiter. If none specified, then space is used as the delimiter. If you need multi-character delimiters, run the tool with engine version 1 by adding '-E v1'. Using v1 will also revert to the old behavior where if no delimiter is provided, then any whitespace will be considered as a delimiter.")
+ help="Field delimiter. If none specified, then space is used as the delimiter.")
parser.add_option("-D", "--output-delimiter", dest="output_delimiter", default=default_output_delimiter,
help="Field delimiter for output. If none specified, then the -d delimiter is used if present, or space if no delimiter is specified")
parser.add_option("-t", "--tab-delimited", dest="tab_delimited", default=False, action="store_true",
@@ -129,6 +134,12 @@ parser.add_option("-f", "--formatting", dest="formatting", default=default_forma
help="Output-level formatting, in the format X=fmt,Y=fmt etc, where X,Y are output column numbers (e.g. 1 for first SELECT column etc.")
parser.add_option("-e", "--encoding", dest="encoding", default=default_encoding,
help="Input file encoding. Defaults to UTF-8. set to none for not setting any encoding - faster, but at your own risk...")
+parser.add_option("-E", "--output-encoding", dest="output_encoding", default=default_output_encoding,
+ help="Output encoding. Defaults to 'none', leading to selecting the system/terminal encoding")
+parser.add_option("-Q", "--query-encoding", dest="query_encoding", default=default_query_encoding,
+ help="query text encoding. Experimental. Please send your feedback on this")
+parser.add_option("-q", "--query-filename", dest="query_filename", default=None,
+ help="Read query from the provided filename instead of the command line, possibly using the provided query encoding (using -Q).")
parser.add_option("-v", "--version", dest="version", default=False, action="store_true",
help="Print version")
parser.add_option("-A", "--analyze-only", dest="analyze_only", action='store_true',
@@ -165,7 +176,7 @@ class Sqlite3DB(object):
def update_many(self, sql, params):
try:
if self.show_sql:
- print sql
+ print sql, " params: " + str(params)
self.cursor.executemany(sql, params)
finally:
pass # cursor.close()
@@ -842,19 +853,57 @@ if options.version:
print "q version %s" % q_version
sys.exit(0)
-if len(args) != 1:
- parser.print_help()
+if len(args) > 1:
+ print >>sys.stderr,"Must provide query as one parameter, enclosed in quotes, or through a file with the -f parameter"
+ sys.exit(1)
+
+if len(args) == 0 and options.query_filename is None:
+ print >>sys.stderr,"Must provide a query in the command line, or through the a file with the -f parameter"
+ sys.exit(1)
+
+if options.query_filename is not None:
+ if len(args) != 0:
+ print >>sys.stderr,"Can't provide both a query file and a query on the command line"
+ sys.exit(1)
+ try:
+ f = file(options.query_filename)
+ query_str = f.read()
+ f.close()
+ except:
+ print >>sys.stderr,"Could not read query from file %s" % options.query_filename
+ sys.exit(1)
+else:
+ query_str = args[0]
+
+if options.query_encoding is not None and options.query_encoding != 'none':
+ try:
+ query_str = query_str.decode(options.query_encoding)
+ except:
+ print >>sys.stderr,"Could not decode query using the provided query encoding (%s)" % options.query_encoding
+ sys.exit(3)
+
+query_str = query_str.strip()
+
+if len(query_str) == 0:
+ print >>sys.stderr,"Query cannot be empty"
sys.exit(1)
if options.mode not in ['fluffy', 'relaxed', 'strict']:
print >>sys.stderr, "Parsing mode can be one of fluffy, relaxed or strict"
sys.exit(13)
+output_encoding = get_stdout_encoding(options.output_encoding)
+try:
+ STDOUT = codecs.getwriter(output_encoding)(sys.stdout)
+except:
+ print >>sys.stderr,"Could not create output stream using output encoding %s" % (output_encoding)
+ sys.exit(200)
+
# Create DB object
db = Sqlite3DB()
# Create SQL statment
-sql_object = Sql('%s' % args[0])
+sql_object = Sql('%s' % query_str)
# If the user flagged for a tab-delimited file then set the delimiter to tab
if options.tab_delimited:
@@ -991,6 +1040,9 @@ try:
row_str.append(fmt_str % "")
STDOUT.write(output_delimiter.join(row_str) + "\n")
+except (UnicodeEncodeError, UnicodeError), e:
+ print >>sys.stderr, "Cannot encode data. Error:%s" % e
+ sys.exit(3)
except IOError, e:
if e.errno == 32:
# broken pipe, that's ok