wip2.0.12

author: Harel Ben-Attia <harelba@gmail.com> 2020-08-30 16:36:08 +0300
committer: Harel Ben-Attia <harelba@gmail.com> 2020-08-30 16:36:08 +0300
commit: 7abaab5656d524a8b3a8706d37d85009c58f842c (patch)
tree: 470f3feb7eb2be3cbcd4e81325ee86b00b9f73ad
parent: b2f8a0ea431291fc5ba5bdc5306b31cf2263dc3b (diff)
34 files changed, 1204 insertions, 162 deletions
diff --git a/.gitignore b/.gitignore
index 27c87cc..e622440 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ packages
 dist/windows/
 _benchmark_data*
 *.benchmark-results
+generated-site/
diff --git a/.travis.yml b/.travis.yml
index 07f1415..d7f8ec4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,11 +1,136 @@
-language: python
-python:
-  - "2.7"
-  - "3.6"
-matrix:
+sudo: false
+
+stages:
+  - integration
+  - release
+
+env:
+  global:
+    - CACHE_NAME=${TRAVIS_JOB_NAME}
+
+
+_commands_provider:
+
+  _test: &_test make test
+
+  _lint: &_lint make lint
+
+  _release: &_release make local-release
+
+  _install_requirements: &_install_requirements make dep
+
+  # https://ttcshelbyville.wordpress.com/2012/12/19/disable-remote-differential-compression-form-the-command-line/
+  _disable_windows_compression: &_disable_windows_compression "powershell Disable-WindowsOptionalFeature -Online -FeatureName MSRDC-Infrastructure"
+
+  # https://travis-ci.community/t/yarn-network-troubles/333/7
+  _disable_windows_defender: &_disable_windows_defender "powershell Set-MpPreference -DisableRealtimeMonitoring \\$true"
+
+
+_steps_provider:
+
+  _test: &_step_test
+
+    install:
+      - *_install_requirements
+    before_script: *_lint
+    script: *_test
+
+  _release: &_step_release
+
+    install: *_install_requirements
+    script: *_release
+
+
+
+jobs:
   include:
-    - python: "3.7"
-      dist: xenial  # Need for python 3.7
-install: pip install -r requirements.txt
-before_script: flake8 ./bin/q ./test/test-suite --count --select=E901,E999,F821,F822,F823 --show-source --statistics
-script: PYTHONIOENCODING=UTF-8 test/test-all
+    - stage: integration
+      name: py27-macos
+      os: osx
+      language: generic
+      osx_image: xcode7.3
+      env:
+        - PYENV_VERSION=2.7.14
+      before_install: source setup-pyenv.sh
+      <<: *_step_test
+      cache:
+        directories:
+          - ${HOME}/.pyenv_cache
+
+    - stage: integration
+      name: py36-macos
+      os: osx
+      language: generic
+      osx_image: xcode7.3
+      env:
+        - PYENV_VERSION=3.6.4
+      before_install: source setup-pyenv.sh
+      <<: *_step_test
+      cache:
+        directories:
+          - ${HOME}/.pyenv_cache
+
+    - stage: integration
+      name: py37-macos
+      os: osx
+      language: generic
+      osx_image: xcode7.3
+      env:
+        - PYENV_VERSION=3.7.3
+      before_install: source setup-pyenv.sh
+      <<: *_step_test
+      cache:
+        directories:
+          - ${HOME}/.pyenv_cache
+
+    - stage: integration
+      name: py27-linux
+      language: python
+      python: "2.7"
+      <<: *_step_test
+
+    - stage: integration
+      name: py36-linux
+      language: python
+      python: "3.6"
+      <<: *_step_test
+
+    - stage: integration
+      name: py37-linux
+      language: python
+      dist: xenial
+      python: "3.7"
+      <<: *_step_test
+
+    - stage: release
+      name: macos
+      os: osx
+      language: generic
+      osx_image: xcode7.3
+      env:
+        - PYENV_VERSION=3.7.3
+      before_install: source setup-pyenv.sh
+      <<: *_step_release
+      cache:
+        directories:
+          - ${HOME}/.pyenv_cache
+
+    - stage: release
+      name: linux
+      language: python
+      dist: xenial
+      python: "3.7"
+      <<: *_step_release
+
+    - stage: release
+      name: windows
+      os: windows
+      language: shell
+      env:
+        - PATH=/c/Python37:/c/Python37/Scripts:$PATH
+      before_install:
+        - *_disable_windows_compression
+        - *_disable_windows_defender
+        - choco install make
+        - choco install python --version 3.7.3
+      <<: *_step_release
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e612a54
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,37 @@
+SHELL := /bin/bash
+
+PROJECT_NAME=$(shell dirname "$0")
+ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
+
+.PHONY: test help
+.DEFAULT_GOAL := ci
+
+ci: lint test ## Equivelant to 'make lint test'
+
+help: ## Show this help message.
+
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+
+dep: ## Install the dependent libraries.
+
+	pip install -r test-requirements.txt
+	pip install -e .
+
+lint: dep ## Run lint validations.
+
+	flake8 q/ --count --select=E901,E999,F821,F822,F823 --show-source --statistics
+
+test: dep ## Run the unit tests.
+
+	test/test-all
+	## TODO Bring back pytest
+	## py.test -rs -c pytest.ini -s -v q/tests/suite.py --rootdir . 
+
+release: ## Run release
+	pip install py-ci
+	pyci release --no-wheel-publish --wheel-universal
+
+local-release:
+	pip install py-ci
+	./do-manual-release.sh
+	
diff --git a/README.markdown b/README.markdown
index 9477de3..c8802d6 100644
--- a/README.markdown
+++ b/README.markdown
@@ -10,7 +10,7 @@ q's web site is [http://harelba.github.io/q/](http://harelba.github.io/q/). It c
 ## Installation.
 Extremely simple. 
 
-Instructions for all OSs are [here](http://harelba.github.io/q/install.html). 
+Instructions for all OSs are [here](http://harelba.github.io/q/#installation). 
 
 ## Examples
 
@@ -20,18 +20,19 @@ q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3"
 ps -ef | q -H "SELECT UID, COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3"
 ```
 
-Go [here](http://harelba.github.io/q/examples.html) for more examples.
+Go [here](http://harelba.github.io/q/#examples) for more examples.
 
 ## Python API
 A development branch for exposing q's capabilities as a <strong>Python module</strong> can be viewed <a href="https://github.com/harelba/q/tree/generic-injected-streams/PYTHON-API.markdown">here</a>, along with examples of the alpha version of the API.<br/>Existing functionality as a command-line tool will not be affected by this. Your input will be most appreciated.
 
-## Change log
-Click [here](http://harelba.github.io/q/changelog.html) to see the change log.
-
 ## Contact
 Any feedback/suggestions/complaints regarding this tool would be much appreciated. Contributions are most welcome as well, of course.
 
-Harel Ben-Attia, harelba@gmail.com, [@harelba](https://twitter.com/harelba) on Twitter
+Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/)
+
+Twitter [@harelba](https://twitter.com/harelba)
+
+Email [harelba@gmail.com](mailto:harelba@gmail.com)
 
 q on twitter: #qtextasdata
 
diff --git a/bin/__version__.py b/bin/__version__.py
new file mode 100755
index 0000000..f9aa4a0
--- /dev/null
+++ b/bin/__version__.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+q_version = '2.0.12'
+
+
+if __name__ == '__main__':
+    print(q_version)
diff --git a/bin/q b/bin/q.py
index fbd5879..11c76e5 100755
--- a/bin/q
+++ b/bin/q.py
@@ -30,8 +30,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-
-q_version = "2.0.6"
+from .__version__ import q_version
 
 __all__ = [ 'QTextAsData' ]
 
@@ -476,16 +475,18 @@ class TableColumnInferer(object):
         self.rows = []
         self.skip_header = skip_header
         self.header_row = None
+        self.header_row_filename = None
         self.expected_column_count = expected_column_count
         self.input_delimiter = input_delimiter
         self.disable_column_type_detection = disable_column_type_detection
 
-    def analyze(self, col_vals):
+    def analyze(self, filename, col_vals):
         if self.inferred:
             raise Exception("Already inferred columns")
 
         if self.skip_header and self.header_row is None:
             self.header_row = col_vals
+            self.header_row_filename = filename
         else:
             self.rows.append(col_vals)
 
@@ -905,17 +906,36 @@ class TableCreator(object):
             mfs = MaterializedFileState(filename,f,self.encoding,dialect,is_stdin)
             self.materialized_file_dict[filename] = mfs
 
+    def _should_skip_extra_headers(self, filenumber, filename, mfs, col_vals):
+        if not self.skip_header:
+            return False
+
+        if filenumber == 0:
+            return False
+
+        header_already_exists = self.column_inferer.header_row is not None
+
+        is_extra_header = self.skip_header and mfs.lines_read == 1 and header_already_exists
+
+        if is_extra_header:
+            if tuple(self.column_inferer.header_row) != tuple(col_vals):
+                raise BadHeaderException("Extra header {} in file {} mismatches original header {} from file {}. Table name is {}".format(",".join(col_vals),mfs.filename,",".join(self.column_inferer.header_row),self.column_inferer.header_row_filename,self.filenames_str))
+
+        return is_extra_header
+
     def _populate(self,dialect,stop_after_analysis=False):
         total_data_lines_read = 0
 
         # For each match
-        for filename in self.materialized_file_list:
+        for filenumber,filename in enumerate(self.materialized_file_list):
             mfs = self.materialized_file_dict[filename]
 
             try:
                 try:
                     for col_vals in mfs.read_file_using_csv():
-                        self._insert_row(col_vals)
+                        if self._should_skip_extra_headers(filenumber,filename,mfs,col_vals):
+                            continue
+                        self._insert_row(filename, col_vals)
                         if stop_after_analysis and self.column_inferer.inferred:
                             return
                     if mfs.lines_read == 0 and self.skip_header:
@@ -937,7 +957,7 @@ class TableCreator(object):
 
             if not self.table_created:
                 self.column_inferer.force_analysis()
-                self._do_create_table()
+                self._do_create_table(filename)
 
 
         if total_data_lines_read == 0:
@@ -960,20 +980,20 @@ class TableCreator(object):
             self.state = TableCreatorState.FULLY_READ
             return
 
-    def _flush_pre_creation_rows(self):
+    def _flush_pre_creation_rows(self, filename):
         for i, col_vals in enumerate(self.pre_creation_rows):
             if self.skip_header and i == 0:
                 # skip header line
                 continue
-            self._insert_row(col_vals)
+            self._insert_row(filename, col_vals)
         self._flush_inserts()
         self.pre_creation_rows = []
 
-    def _insert_row(self, col_vals):
+    def _insert_row(self, filename, col_vals):
         # If table has not been created yet
         if not self.table_created:
             # Try to create it along with another "example" line of data
-            self.try_to_create_table(col_vals)
+            self.try_to_create_table(filename, col_vals)
 
         # If the table is still not created, then we don't have enough data, just
         # store the data and return
@@ -1069,19 +1089,19 @@ class TableCreator(object):
         # print self.db.execute_and_fetch(self.db.generate_end_transaction())
         self.buffered_inserts = []
 
-    def try_to_create_table(self, col_vals):
+    def try_to_create_table(self, filename, col_vals):
         if self.table_created:
             raise Exception('Table is already created')
 
         # Add that line to the column inferer
-        result = self.column_inferer.analyze(col_vals)
+        result = self.column_inferer.analyze(filename, col_vals)
         # If inferer succeeded,
         if result:
-            self._do_create_table()
+            self._do_create_table(filename)
         else:
             pass  # We don't have enough information for creating the table yet
 
-    def _do_create_table(self):
+    def _do_create_table(self,filename):
         # Then generate a temp table name
         self.table_name = self.db.generate_temp_table_name()
         # Get the column definition dict from the inferer
@@ -1101,7 +1121,7 @@ class TableCreator(object):
         self.db.execute_and_fetch(create_table_stmt)
         # Mark the table as created
         self.table_created = True
-        self._flush_pre_creation_rows()
+        self._flush_pre_creation_rows(filename)
 
     def drop_table(self):
         if self.table_created:
@@ -1122,7 +1142,8 @@ def determine_max_col_lengths(m,output_field_quoting_func,output_delimiter):
 
 def print_credentials():
     print("q version %s" % q_version, file=sys.stderr)
-    print("Copyright (C) 2012-2017 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr)
+    print("Python: %s" % " // ".join([str(x).strip() for x in sys.version.split("\n")]), file=sys.stderr)
+    print("Copyright (C) 2012-2019 Harel Ben-Attia (harelba@gmail.com, @harelba on twitter)", file=sys.stderr)
     print("http://harelba.github.io/q/", file=sys.stderr)
     print(file=sys.stderr)
 
@@ -1403,7 +1424,7 @@ class QTextAsData(object):
             msg = str(e)
             error = QError(e,"query error: %s" % msg,1)
             if "no such column" in msg and effective_input_params.skip_header:
-                warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names'))
+                warnings.append(QWarning(e,'Warning - There seems to be a "no such column" error, and -H (header line) exists. Please make sure that you are using the column names from the header line and not the default (cXX) column names. Another issue might be that the file contains a BOM. Files that are encoded with UTF8 and contain a BOM can be read by specifying `-e utf-9-sig` in the command line. Support for non-UTF8 encoding will be provided in the future.'))
         except ColumnCountMismatchException as e:
             error = QError(e,e.msg,2)
         except (UnicodeDecodeError, UnicodeError) as e:
diff --git a/dist/create-rpm b/dist/create-rpm
index db1a255..8c247f9 100755
--- a/dist/create-rpm
+++ b/dist/create-rpm
@@ -40,12 +40,12 @@ then
 	exit 1
 fi
 
-curl -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/$BASED_ON_TAG"
+curl -f -o ${rpm_build_area}/SOURCES/q.tar.gz -L -R "https://github.com/harelba/q/tarball/$BASED_ON_TAG"
 mkdir -p ${rpm_build_area}/SOURCES
 pushd ${rpm_build_area}/SOURCES >/dev/null
 tar xvzf ./q.tar.gz --strip-components=1
 rm -vf ./q.tar.gz
-curl -o ./bin/q -L -R "https://github.com/harelba/packages-for-q/raw/master/single-binary/x86_64/${VERSION}/q"
+curl -f -o ./bin/q -L -R "https://github.com/harelba/packages-for-q/raw/master/single-binary/x86_64/${VERSION}/q"
 chmod +x ./bin/q
 popd >/dev/null
 find ${rpm_build_area}/ -ls
diff --git a/do-manual-release.sh b/do-manual-release.sh
new file mode 100755
index 0000000..33e68ad
--- /dev/null
+++ b/do-manual-release.sh
@@ -0,0 +1,29 @@
+#!/bin/bash -x
+
+set -e
+
+VERSION=$(bin/__version__.py)
+
+echo "Packing binary for $TRAVIS_OS_NAME"
+
+if [[ "$TRAVIS_OS_NAME" == "osx" || "$TRAVIS_OS_NAME" == "linux" ]]
+then
+	echo "Packing $TRAVIS_OS_NAME installer - packing binary"
+	pyci pack --repo harelba/q --sha $VERSION binary
+	echo "Packing $TRAVIS_OS_NAME installer - uploading"
+	pyci github upload-asset --asset q-$(uname -m)-$(uname -s) --release $VERSION
+else
+	echo "Packing windows installer - packing binary"
+	pyci pack --repo harelba/q --sha $VERSION binary
+	echo "Packing windows installer - listing files"
+	find `pwd` -ls | grep -v \.git/
+	echo "Packing windows installer - packing nsis"
+	BINARY_LOCATION="c:\\Users\\travis\\build\\harelba\\q\\q-AMD64-Windows.exe"
+	pyci pack nsis --program-files-dir q-TextAsData --binary-path $BINARY_LOCATION --version ${VERSION}.0
+	echo "Packing windows installer - uploading"
+	pyci github upload-asset --asset $BINARY_LOCATION --release $VERSION
+	SETUP_LOCATION="c:\\Users\\travis\\build\\harelba\\q\\q-AMD64-Windows-installer.exe"
+	pyci github upload-asset --asset $SETUP_LOCATION --release $VERSION
+fi
+
+echo "done"
diff --git a/mkdocs/docs/.DS_Store b/mkdocs/docs/.DS_Store
new file mode 100644
index 0000000..6f61d6d
--- /dev/null
+++ b/mkdocs/docs/.DS_Store
diff --git a/mkdocs/docs/about.md b/mkdocs/docs/about.md
new file mode 100644
index 0000000..b0e09e4
--- /dev/null
+++ b/mkdocs/docs/about.md
@@ -0,0 +1,8 @@
+# About
+
+### Linkedin: [Harel Ben Attia](https://www.linkedin.com/in/harelba/)
+
+### Twitter [@harelba](https://twitter.com/harelba)
+
+### Email [harelba@gmail.com](mailto:harelba@gmail.com)
+
diff --git a/mkdocs/docs/fsg9b9b1.txt b/mkdocs/docs/fsg9b9b1.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/mkdocs/docs/fsg9b9b1.txt
diff --git a/mkdocs/docs/google0efeb4ff0a886e81.html b/mkdocs/docs/google0efeb4ff0a886e81.html
new file mode 100644
index 0000000..24947e8
--- /dev/null
+++ b/mkdocs/docs/google0efeb4ff0a886e81.html
@@ -0,0 +1 @@
+google-site-verification: google0efeb4ff0a886e81.html
+\ No newline at end of file
diff --git a/mkdocs/docs/img/bg_hr.png b/mkdocs/docs/img/bg_hr.png
new file mode 100644
index 0000000..7973bd6
--- /dev/null
+++ b/mkdocs/docs/img/bg_hr.png
diff --git a/mkdocs/docs/img/blacktocat.png b/mkdocs/docs/img/blacktocat.png
new file mode 100644
index 0000000..6e264fe
--- /dev/null
+++ b/mkdocs/docs/img/blacktocat.png
diff --git a/mkdocs/docs/img/icon_download.png b/mkdocs/docs/img/icon_download.png
new file mode 100644
index 0000000..a2a287f
--- /dev/null
+++ b/mkdocs/docs/img/icon_download.png
diff --git a/mkdocs/docs/img/q-logo.png b/mkdocs/docs/img/q-logo.png
new file mode 100644
index 0000000..ac9599c
--- /dev/null
+++ b/mkdocs/docs/img/q-logo.png
diff --git a/mkdocs/docs/img/q-logo1.ico b/mkdocs/docs/img/q-logo1.ico
new file mode 100644
index 0000000..427a0d9
--- /dev/null
+++ b/mkdocs/docs/img/q-logo1.ico
diff --git a/mkdocs/docs/img/q-logo1.png b/mkdocs/docs/img/q-logo1.png
new file mode 100644
index 0000000..10380ec
--- /dev/null
+++ b/mkdocs/docs/img/q-logo1.png
diff --git a/mkdocs/docs/img/sprite_download.png b/mkdocs/docs/img/sprite_download.png
new file mode 100644
index 0000000..f2babd5
--- /dev/null
+++ b/mkdocs/docs/img/sprite_download.png
diff --git a/mkdocs/docs/img/sprite_download3.png b/mkdocs/docs/img/sprite_download3.png
new file mode 100644
index 0000000..9fd451a
--- /dev/null
+++ b/mkdocs/docs/img/sprite_download3.png
diff --git a/mkdocs/docs/img/sprite_download4.png b/mkdocs/docs/img/sprite_download4.png
new file mode 100644
index 0000000..db6e518
--- /dev/null
+++ b/mkdocs/docs/img/sprite_download4.png
diff --git a/mkdocs/docs/img/torii-favicon.ico b/mkdocs/docs/img/torii-favicon.ico
new file mode 100644
index 0000000..86da98b
--- /dev/null
+++ b/mkdocs/docs/img/torii-favicon.ico
diff --git a/mkdocs/docs/index.md b/mkdocs/docs/index.md
new file mode 100644
index 0000000..777d4cc
--- /dev/null
+++ b/mkdocs/docs/index.md
@@ -0,0 +1,381 @@
+# q - Run SQL directly on CSV or TSV files
+
+[![GitHub Stars](https://img.shields.io/github/stars/harelba/q.svg?style=social&label=GitHub Stars&maxAge=600)](https://GitHub.com/harelba/q/stargazers/)
+[![GitHub forks](https://img.shields.io/github/forks/harelba/q.svg?style=social&label=GitHub Forks&maxAge=600)](https://GitHub.com/harelba/q/network/)
+
+
+## Overview
+q is a command line tool that allows direct execution of SQL-like queries on CSVs/TSVs (and any other tabular text files).
+
+q treats ordinary files as database tables, and supports all SQL constructs, such as WHERE, GROUP BY, JOINs etc. It supports automatic column name and column type detection, and provides full support for multiple encodings.
+
+``` bash
+q "SELECT COUNT(*) FROM ./clicks_file.csv WHERE c3 > 32.3"
+```
+
+``` bash
+ps -ef | q -H "SELECT UID,COUNT(*) cnt FROM - GROUP BY UID ORDER BY cnt DESC LIMIT 3"
+```
+
+Look at some examples [here](#examples), or just download the tool using the links in the [installation](#installation) below and play with it.
+
+|                                        |                                                 |
+|:--------------------------------------:|:-----------------------------------------------:|
+| 完全支持所有的字符编码                 | すべての文字エンコーディングを完全にサポート    |
+| 모든 문자 인코딩이 완벽하게 지원됩니다 | все кодировки символов полностью поддерживаются |
+
+**Non-english users:** q fully supports all types of encoding. Use `-e data-encoding` to set the input data encoding, `-Q query-encoding` to set the query encoding, and use `-E output-encoding` to set the output encoding. Sensible defaults are in place for all three parameters. Please contact me if you encounter any issues and I'd be glad to help.
+
+**Files with BOM:** Files which contain a BOM ([Byte Order Mark](https://en.wikipedia.org/wiki/Byte_order_mark)) are not properly supported inside python's csv module. q contains a workaround that allows reading UTF8 files which contain a BOM - Use `-e utf-8-sig` for this. I plan to separate the BOM handling from the encoding itself, which would allow to support BOMs for all encodings.
+
+## Installation
+
+| Format | Instructions | Comments |
+:---|:---|:---|
+|[OSX](https://github.com/harelba/packages-for-q/raw/master/single-binary/Darwin/2.0.9/q)|Download the executable from the link on the left, make it executable, and use it.|`brew install q` currently downloads the older version `1.7.1`. I'll update it to install the new version soon||
+|[RPM Package](https://github.com/harelba/packages-for-q/raw/master/rpms/q-text-as-data-2.0.9-1.x86_64.rpm)| run `rpm -ivh <package-filename>` or `rpm -U <package-filename>` if you already have an older version of q.| A man page is available for this release. Just enter man q.|
+|[DEB Package](https://github.com/harelba/packages-for-q/raw/master/deb/q-text-as-data_2.0.9-2_amd64.deb)| Run `sudo dpkg -i <package-filename>`|A man page is available for this release. Just enter `man q`.|
+|[Windows Installer](https://github.com/harelba/packages-for-q/raw/master/windows/setup-q-2.0.9.exe)|Run the installer executable and hit next next next... q.exe will be added to the PATH so you can access it everywhere.|Windows doesn't update the PATH retroactively for open windows, so you'll need to open a new cmd window after the installation is done.|
+|[tar.gz](https://github.com/harelba/q/archive/2.0.9.tar.gz)|Full source file tree for latest stable version||
+|[zip](https://github.com/harelba/q/archive/2.0.9.zip)|Full source file tree for the latest stable version||
+
+**Older versions can be downloaded [here](https://github.com/harelba/packages-for-q). Please let me know if you plan on using an older version, and why - I know of no reason to use any of them.**
+
+## Requirements
+As of version `2.0.9`, there's no need for any external dependency. Python itself (3.7), and any needed libraries are self-contained inside the installation, isolated from the rest of your system.
+
+## Usage
+
+``` bash
+q <flags> "<query>"
+
+  Simplest execution is `q "SELECT * FROM myfile"` which prints the entire file.
+```
+
+q allows performing SQL-like statements on tabular text data. Its purpose is to bring SQL expressive power to the Linux command line and to provide easy access to text as actual data.
+
+Query should be an SQL-like query which contains *filenames instead of table names* (or - for stdin). The query itself should be provided as one parameter to the tool (i.e. enclosed in quotes). Multiple files can be used as one table by either writing them as `filename1+filename2+...` or by using shell wildcards (e.g. `my_files*.csv`).
+
+Use `-H` to signify that the input contains a header line. Column names will be detected automatically in that case, and can be used in the query. If this option is not provided, columns will be named cX, starting with 1 (e.g. `q "SELECT c3,c8 from ..."`).
+
+Use `-d` to specify the input delimiter.
+
+Column types are auto detected by the tool, no casting is needed. Note that there's a flag `--as-text` which forces all columns to be treated as text columns.
+
+Please note that column names that include spaces need to be used in the query with back-ticks, as per the sqlite standard.
+
+Query/Input/Output encodings are fully supported (and q tries to provide out-of-the-box usability in that area). Please use `-e`,`-E` and `-Q` to control encoding if needed.
+
+All sqlite3 SQL constructs are supported, including joins across files (use an alias for each table). Take a look at the [limitations](#limitations) section below for some rarely-used use cases which are not fully supported.
+
+### Query
+Each parameter that q gets is a full SQL query. All queries are executed one after another, outputing the results to standard output. Note that data loading is done only once, so when passing multiple queries on the same command-line, only the first one will take a long time. The rest will starting running almost instantanously, since all the data will already have been loaded. Remeber to double-quote each of the queries - Each parameter is a full SQL query.
+
+Any standard SQL expression, condition (both WHERE and HAVING), GROUP BY, ORDER BY etc. are allowed.
+
+JOINs are supported and Subqueries are supported in the WHERE clause, but unfortunately not in the FROM clause for now. Use table aliases when performing JOINs.
+
+The SQL syntax itself is sqlite's syntax. For details look at http://www.sqlite.org/lang.html or search the net for examples.
+
+NOTE: Full type detection is implemented, so there is no need for any casting or anything.
+
+NOTE2: When using the `-O` output header option, use column name aliases if you want to control the output column names. For example, `q -O -H "select count(*) cnt,sum(*) as mysum from -"` would output `cnt` and `mysum` as the output header column names.
+
+### Flags
+
+``` bash
+Usage: 
+        q allows performing SQL-like statements on tabular text data.
+
+        Its purpose is to bring SQL expressive power to manipulating text data using the Linux command line.
+
+        Basic usage is q "<sql-like query>" where table names are just regular file names (Use - to read from standard input)
+            When the input contains a header row, use -H, and column names will be set according to the header row content. If there isn't a header row, then columns will automatically be named c1..cN.
+
+        Column types are detected automatically. Use -A in order to see the column name/type analysis.
+
+        Delimiter can be set using the -d (or -t) option. Output delimiter can be set using -D
+
+        All sqlite3 SQL constructs are supported.
+
+        Examples:
+
+              Example 1: ls -ltrd * | q "select c1,count(1) from - group by c1"
+            This example would print a count of each unique permission string in the current folder.
+
+          Example 2: seq 1 1000 | q "select avg(c1),sum(c1) from -"
+            This example would provide the average and the sum of the numbers in the range 1 to 1000
+
+          Example 3: sudo find /tmp -ls | q "select c5,c6,sum(c7)/1024.0/1024 as total from - group by c5,c6 order by total desc"
+            This example will output the total size in MB per user+group in the /tmp subtree
+
+
+            See the help or https://github.com/harelba/q/ for more details.
+    
+
+Options:
+  -h, --help            show this help message and exit
+  -v, --version         Print version
+  -V, --verbose         Print debug info in case of problems
+  -S SAVE_DB_TO_DISK_FILENAME, --save-db-to-disk=SAVE_DB_TO_DISK_FILENAME
+                        Save database to an sqlite database file
+  --save-db-to-disk-method=SAVE_DB_TO_DISK_METHOD
+                        Method to use to save db to disk. 'standard' does not
+                        require any deps, 'fast' currenty requires manually
+                        running `pip install sqlitebck` on your python
+                        installation. Once packing issues are solved, the fast
+                        method will be the default.
+
+  Input Data Options:
+    -H, --skip-header   Skip header row. This has been changed from earlier
+                        version - Only one header row is supported, and the
+                        header row is used for column naming
+    -d DELIMITER, --delimiter=DELIMITER
+                        Field delimiter. If none specified, then space is used
+                        as the delimiter.
+    -t, --tab-delimited
+                        Same as -d <tab>. Just a shorthand for handling
+                        standard tab delimited file You can use $'\t' if you
+                        want (this is how Linux expects to provide tabs in the
+                        command line
+    -e ENCODING, --encoding=ENCODING
+                        Input file encoding. Defaults to UTF-8. set to none
+                        for not setting any encoding - faster, but at your own
+                        risk...
+    -z, --gzipped       Data is gzipped. Useful for reading from stdin. For
+                        files, .gz means automatic gunzipping
+    -A, --analyze-only  Analyze sample input and provide information about
+                        data types
+    -m MODE, --mode=MODE
+                        Data parsing mode. fluffy, relaxed and strict. In
+                        strict mode, the -c column-count parameter must be
+                        supplied as well
+    -c COLUMN_COUNT, --column-count=COLUMN_COUNT
+                        Specific column count when using relaxed or strict
+                        mode
+    -k, --
author	Harel Ben-Attia <harelba@gmail.com>	2020-08-30 16:36:08 +0300
committer	Harel Ben-Attia <harelba@gmail.com>	2020-08-30 16:36:08 +0300
commit	7abaab5656d524a8b3a8706d37d85009c58f842c (patch)
tree	470f3feb7eb2be3cbcd4e81325ee86b00b9f73ad
parent	b2f8a0ea431291fc5ba5bdc5306b31cf2263dc3b (diff)