Merge pull request #224 from dbcli/darikg/bugfix-compound-where

fix broken suggestions after compound where clause
author: Daniel Rocco <drocco@gmail.com> 2015-05-07 17:46:25 -0400
committer: Daniel Rocco <drocco@gmail.com> 2015-05-07 17:46:25 -0400
commit: 51be1f391af44531823da4c6e158a216f3de5457 (patch)
tree: 6552ca8c447c91449437e53d46d30b1ea440e812
parent: f0dcb3870881b1d6f76f67cc871fd6986c8684eb (diff)
parent: 48973863f98e4e66fe8a79cd1d7eadbcea026b24 (diff)
4 files changed, 146 insertions, 22 deletions
diff --git a/pgcli/packages/parseutils.py b/pgcli/packages/parseutils.py
index 80b29521..2b4ee58b 100644
--- a/pgcli/packages/parseutils.py
+++ b/pgcli/packages/parseutils.py
@@ -151,6 +151,7 @@ def extract_tables(sql):
     stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)
     return list(extract_table_identifiers(stream))
 
+
 def find_prev_keyword(sql):
     """ Find the last sql keyword in an SQL statement
 
@@ -161,10 +162,26 @@ def find_prev_keyword(sql):
         return None, ''
 
     parsed = sqlparse.parse(sql)[0]
-    for t in reversed(list(parsed.flatten())):
-        if t.is_keyword or t.value == '(':
-            idx = parsed.token_index(t)
-            text = ''.join(tok.value for tok in parsed.tokens[:idx+1])
+    flattened = list(parsed.flatten())
+
+    logical_operators = ('AND', 'OR', 'NOT', 'BETWEEN')
+
+    for t in reversed(flattened):
+        if t.value == '(' or (t.is_keyword and (
+                              t.value.upper() not in logical_operators)):
+            # Find the location of token t in the original parsed statement
+            # We can't use parsed.token_index(t) because t may be a child token
+            # inside a TokenList, in which case token_index thows an error
+            # Minimal example:
+            #   p = sqlparse.parse('select * from foo where bar')
+            #   t = list(p.flatten())[-3]  # The "Where" token
+            #   p.token_index(t)  # Throws ValueError: not in list
+            idx = flattened.index(t)
+
+            # Combine the string values of all tokens in the original list
+            # up to and including the target keyword token t, to produce a
+            # query string with everything after the keyword token removed
+            text = ''.join(tok.value for tok in flattened[:idx+1])
             return t.value, text
 
     return None, ''
diff --git a/pgcli/packages/sqlcompletion.py b/pgcli/packages/sqlcompletion.py
index 7dc7a552..1b2d1164 100644
--- a/pgcli/packages/sqlcompletion.py
+++ b/pgcli/packages/sqlcompletion.py
@@ -1,8 +1,7 @@
 from __future__ import print_function
 import sys
 import sqlparse
-from sqlparse.sql import Comparison, Identifier
-from sqlparse.tokens import Keyword
+from sqlparse.sql import Comparison, Identifier, Where
 from .parseutils import last_word, extract_tables, find_prev_keyword
 from .pgspecial import parse_special_command
 
@@ -135,23 +134,69 @@ def suggest_special(text):
 def suggest_based_on_last_token(token, text_before_cursor, full_text, identifier):
     if isinstance(token, string_types):
         token_v = token.lower()
-    else:
+    elif isinstance(token, Comparison):
         # If 'token' is a Comparison type such as
         # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling
         # token.value on the comparison type will only return the lhs of the
         # comparison. In this case a.id. So we need to do token.tokens to get
         # both sides of the comparison and pick the last token out of that
         # list.
-        if isinstance(token, Comparison):
-            token_v = token.tokens[-1].value.lower()
-        else:
-            token_v = token.value.lower()
+        token_v = token.tokens[-1].value.lower()
+    elif isinstance(token, Where):
+        # sqlparse groups all tokens from the where clause into a single token
+        # list. This means that token.value may be something like
+        # 'where foo > 5 and '. We need to look "inside" token.tokens to handle
+        # suggestions in complicated where clauses correctly
+        prev_keyword, text_before_cursor = find_prev_keyword(text_before_cursor)
+        return suggest_based_on_last_token(prev_keyword, text_before_cursor,
+                                           full_text, identifier)
+    else:
+        token_v = token.value.lower()
 
     if not token:
         return [{'type': 'keyword'}, {'type': 'special'}]
     elif token_v.endswith('('):
         p = sqlparse.parse(text_before_cursor)[0]
 
+        if p.tokens and isinstance(p.tokens[-1], Where):
+            # Four possibilities:
+            #  1 - Parenthesized clause like "WHERE foo AND ("
+            #        Suggest columns/functions
+            #  2 - Function call like "WHERE foo("
+            #        Suggest columns/functions
+            #  3 - Subquery expression like "WHERE EXISTS ("
+            #        Suggest keywords, in order to do a subquery
+            #  4 - Subquery OR array comparison like "WHERE foo = ANY("
+            #        Suggest columns/functions AND keywords. (If we wanted to be
+            #        really fancy, we could suggest only array-typed columns)
+
+            column_suggestions = suggest_based_on_last_token('where',
+                                    text_before_cursor, full_text, identifier)
+
+            # Check for a subquery expression (cases 3 & 4)
+            where = p.tokens[-1]
+            prev_tok = where.token_prev(len(where.tokens) - 1)
+
+            if isinstance(prev_tok, Comparison):
+                # e.g. "SELECT foo FROM bar WHERE foo = ANY("
+                prev_tok = prev_tok.tokens[-1]
+
+            prev_tok = prev_tok.value.lower()
+            if prev_tok == 'exists':
+                return [{'type': 'keyword'}]
+            elif prev_tok in ('any', 'some', 'all'):
+                return column_suggestions + [{'type': 'keyword'}]
+            elif prev_tok == 'in':
+                # Technically, we should suggest columns AND keywords, as
+                # per case 4. However, IN is different from ANY, SOME, ALL
+                # in that it can accept a *list* of columns, or a subquery.
+                # But suggesting keywords for , "SELECT * FROM foo WHERE bar IN
+                # (baz, qux, " would be overwhelming. So we special case 'IN'
+                # to not suggest keywords.
+                return column_suggestions
+            else:
+                return column_suggestions
+
         # Get the token before the parens
         prev_tok = p.token_prev(len(p.tokens) - 1)
         if prev_tok and prev_tok.value and prev_tok.value.lower() == 'using':
@@ -185,7 +230,7 @@ def suggest_based_on_last_token(token, text_before_cursor, full_text, identifier
         else:
             return [{'type': 'column', 'tables': extract_tables(full_text)},
                     {'type': 'function', 'schema': []}]
-    elif (token_v.endswith('join') and token.ttype in Keyword) or (token_v in
+    elif (token_v.endswith('join') and token.is_keyword) or (token_v in
             ('copy', 'from', 'update', 'into', 'describe', 'truncate')):
         schema = (identifier and identifier.get_parent_name()) or []
 
diff --git a/tests/test_parseutils.py b/tests/test_parseutils.py
index a93e1190..8182b3e3 100644
--- a/tests/test_parseutils.py
+++ b/tests/test_parseutils.py
@@ -88,3 +88,12 @@ def test_find_prev_keyword_using():
     q = 'select * from tbl1 inner join tbl2 using (col1, '
     kw, q2 = find_prev_keyword(q)
     assert kw == '(' and q2 == 'select * from tbl1 inner join tbl2 using ('
+
+@pytest.mark.parametrize('sql', [
+    'select * from foo where bar',
+    'select * from foo where bar = 1 and baz or ',
+    'select * from foo where bar = 1 and baz between qux and ',
+])
+def test_find_prev_keyword_where(sql):
+    kw, stripped = find_prev_keyword(sql)
+    assert kw == 'where' and stripped == 'select * from foo where'
diff --git a/tests/test_sqlcompletion.py b/tests/test_sqlcompletion.py
index 9469d44c..206c77b1 100644
--- a/tests/test_sqlcompletion.py
+++ b/tests/test_sqlcompletion.py
@@ -17,13 +17,43 @@ def test_select_suggests_cols_with_qualified_table_scope():
             {'type': 'column', 'tables': [('sch', 'tabl', None)]},
             {'type': 'function', 'schema': []}])
 
-def test_where_suggests_columns_functions():
-    suggestions = suggest_type('SELECT * FROM tabl WHERE ',
-            'SELECT * FROM tabl WHERE ')
+
+@pytest.mark.parametrize('expression', [
+    'SELECT * FROM tabl WHERE ',
+    'SELECT * FROM tabl WHERE (',
+    'SELECT * FROM tabl WHERE foo = ',
+    'SELECT * FROM tabl WHERE bar OR ',
+    'SELECT * FROM tabl WHERE foo = 1 AND ',
+    'SELECT * FROM tabl WHERE (bar > 10 AND ',
+    'SELECT * FROM tabl WHERE (bar AND (baz OR (qux AND (',
+    'SELECT * FROM tabl WHERE 10 < ',
+    'SELECT * FROM tabl WHERE foo BETWEEN ',
+    'SELECT * FROM tabl WHERE foo BETWEEN foo AND ',
+])
+def test_where_suggests_columns_functions(expression):
+    suggestions = suggest_type(expression, expression)
+    assert sorted_dicts(suggestions) == sorted_dicts([
+            {'type': 'column', 'tables': [(None, 'tabl', None)]},
+            {'type': 'function', 'schema': []}])
+
+@pytest.mark.parametrize('expression', [
+    'SELECT * FROM tabl WHERE foo IN (',
+    'SELECT * FROM tabl WHERE foo IN (bar, ',
+])
+def test_where_in_suggests_columns(expression):
+    suggestions = suggest_type(expression, expression)
     assert sorted_dicts(suggestions) == sorted_dicts([
             {'type': 'column', 'tables': [(None, 'tabl', None)]},
             {'type': 'function', 'schema': []}])
 
+def test_where_equals_any_suggests_columns_or_keywords():
+    text = 'SELECT * FROM tabl WHERE foo = ANY('
+    suggestions = suggest_type(text, text)
+    assert sorted_dicts(suggestions) == sorted_dicts([
+            {'type': 'column', 'tables': [(None, 'tabl', None)]},
+            {'type': 'function', 'schema': []},
+            {'type': 'keyword'}])
+
 def test_lparen_suggests_cols():
     suggestion = suggest_type('SELECT MAX( FROM tbl', 'SELECT MAX(')
     assert suggestion == [
@@ -145,17 +175,40 @@ def test_dot_col_comma_suggests_cols_or_schema_qualified_table():
         {'type': 'view', 'schema': 't2'},
         {'type': 'function', 'schema': 't2'}])
 
-def test_sub_select_suggests_keyword():
-    suggestion = suggest_type('SELECT * FROM (', 'SELECT * FROM (')
+@pytest.mark.parametrize('expression', [
+    'SELECT * FROM (',
+    'SELECT * FROM foo WHERE EXISTS (',
+    'SELECT * FROM foo WHERE bar AND NOT EXISTS (',
+])
+def test_sub_select_suggests_keyword(expression):
+    suggestion = suggest_type(expression, expression)
     assert suggestion == [{'type': 'keyword'}]
 
-def test_sub_select_partial_text_suggests_keyword():
-    suggestion = suggest_type('SELECT * FROM (S', 'SELECT * FROM (S')
+@pytest.mark.parametrize('expression', [
+    'SELECT * FROM (S',
+    'SELECT * FROM foo WHERE EXISTS (S',
+    'SELECT * FROM foo WHERE bar AND NOT EXISTS (S',
+])
+def test_sub_select_partial_text_suggests_keyword(expression):
+    suggestion = suggest_type(expression, expression)
     assert suggestion == [{'type': 'keyword'}]
 
-def test_sub_select_table_name_completion():
-    suggestion = suggest_type('SELECT * FROM (SELECT * FROM ',
-            'SELECT * FROM (SELECT * FROM ')
+def test_outer_table_reference_in_exists_subquery_suggests_columns():
+    q = 'SELECT * FROM foo f WHERE EXISTS (SELECT 1 FROM bar WHERE f.'
+    suggestions = suggest_type(q, q)
+    assert suggestions == [
+        {'type': 'column', 'tables': [(None, 'foo', 'f')]},
+        {'type': 'table', 'schema': 'f'},
+        {'type': 'view', 'schema': 'f'},
+        {'type': 'function', 'schema': 'f'}]
+
+@pytest.mark.parametrize('expression', [
+    'SELECT * FROM (SELECT * FROM ',
+    'SELECT * FROM foo WHERE EXISTS (SELECT * FROM ',
+    'SELECT * FROM foo WHERE bar AND NOT EXISTS (SELECT * FROM ',
+])
+def test_sub_select_table_name_completion(expression):
+    suggestion = suggest_type(expression, expression)
     assert sorted_dicts(suggestion) == sorted_dicts([
         {'type': 'table', 'schema': []},
         {'type': 'view', 'schema': []},
author	Daniel Rocco <drocco@gmail.com>	2015-05-07 17:46:25 -0400
committer	Daniel Rocco <drocco@gmail.com>	2015-05-07 17:46:25 -0400
commit	51be1f391af44531823da4c6e158a216f3de5457 (patch)
tree	6552ca8c447c91449437e53d46d30b1ea440e812
parent	f0dcb3870881b1d6f76f67cc871fd6986c8684eb (diff)
parent	48973863f98e4e66fe8a79cd1d7eadbcea026b24 (diff)