Fix the table extraction for nested select statement.

author: Amjith Ramanujam <amjith@newrelic.com> 2014-12-29 21:43:41 -0800
committer: Amjith Ramanujam <amjith@newrelic.com> 2014-12-29 21:43:41 -0800
commit: 65c7cdaafaedc933d40de313418ee3049515f8b6 (patch)
tree: 7fab944b3daad592a0fb4093b81dd3944ca4bc3c
parent: 0fba271143821500414f4563336fe8a4868a6e1e (diff)
2 files changed, 53 insertions, 10 deletions
diff --git a/pgcli/packages/parseutils.py b/pgcli/packages/parseutils.py
index bc0622d8..b8094076 100644
--- a/pgcli/packages/parseutils.py
+++ b/pgcli/packages/parseutils.py
@@ -67,14 +67,21 @@ def is_subselect(parsed):
             return True
     return False
 
-def extract_from_part(parsed):
+def extract_from_part(parsed, stop_at_punctuation=True):
     tbl_prefix_seen = False
     for item in parsed.tokens:
         if tbl_prefix_seen:
             if is_subselect(item):
-                for x in extract_from_part(item):
+                for x in extract_from_part(item, stop_at_punctuation):
                     yield x
-            elif item.ttype is Keyword or item.ttype is Punctuation:
+            # An incomplete nested select won't be recognized correctly as a
+            # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes
+            # the second FROM to trigger this elif condition resulting in a
+            # StopIteration. So we need to ignore the keyword if the keyword
+            # FROM.
+            elif stop_at_punctuation and item.ttype is Punctuation:
+                raise StopIteration
+            elif item.ttype is Keyword and item.value.upper() != 'FROM':
                 raise StopIteration
             else:
                 yield item
@@ -94,21 +101,29 @@ def extract_table_identifiers(token_stream):
         if isinstance(item, IdentifierList):
             for identifier in item.get_identifiers():
                 real_name = identifier.get_real_name()
-                yield (real_name, identifier.get_alias() or real_name)
+                if real_name:
+                    yield (real_name, identifier.get_alias() or real_name)
         elif isinstance(item, Identifier):
             real_name = item.get_real_name()
-            yield (real_name, item.get_alias() or real_name)
+            if real_name:
+                yield (real_name, item.get_alias() or real_name)
         elif isinstance(item, Function):
             yield (item.get_name(), item.get_name())
         # It's a bug to check for Keyword here, but in the example
         # above some tables names are identified as keywords...
-        elif item.ttype is Keyword:
-            yield (item.value, item.value)
+        #elif item.ttype is Keyword:
+            #yield (item.value, item.value)
 
 def extract_tables(sql, include_alias=False):
-    if not sql:
+    parsed = sqlparse.parse(sql)
+    if not parsed:
         return []
-    stream = extract_from_part(sqlparse.parse(sql)[0])
+    # INSERT statements must stop looking for tables at the sign of first
+    # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
+    # abc is the table name, but if we don't stop at the first lparen, then
+    # we'll identify abc, col1 and col2 as table names.
+    insert_stmt = parsed[0].token_first().value.lower() == 'insert'
+    stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)
     if include_alias:
         return dict((alias, t) for t, alias in extract_table_identifiers(stream))
     else:
@@ -121,3 +136,7 @@ def find_prev_keyword(sql):
     for t in reversed(list(sqlparse.parse(sql)[0].flatten())):
         if t.is_keyword or t.value == '(':
             return t.value
+
+if __name__ == '__main__':
+    sql = 'select * from (select t. from tabl t'
+    print extract_tables(sql, True)
diff --git a/tests/test_sqlcompletion.py b/tests/test_sqlcompletion.py
index eb3e7ddd..7e876e70 100644
--- a/tests/test_sqlcompletion.py
+++ b/tests/test_sqlcompletion.py
@@ -48,7 +48,6 @@ def test_insert_into_lparen_partial_text_suggests_cols():
     assert suggestion == ('columns', ['abc'])
 
 def test_insert_into_lparen_comma_suggests_cols():
-    #import pdb; pdb.set_trace()
     suggestion = suggest_type('INSERT INTO abc (id,', 'INSERT INTO abc (id,')
     assert suggestion == ('columns', ['abc'])
 
@@ -71,3 +70,28 @@ def test_dot_col_comma_suggests_cols():
     suggestion = suggest_type('SELECT t1.a, t2. FROM tabl1 t1, tabl2 t2',
             'SELECT t1.a, t2.')
     assert suggestion == ('columns', ['tabl2'])
+
+#def test_sub_select_suggests_keyword():
+    #suggestion = suggest_type('SELECT * FROM (',
+            #'SELECT * FROM (')
+    #assert suggestion == ('keywords', [])
+
+def test_sub_select_table_name_completion():
+    suggestion = suggest_type('SELECT * FROM (SELECT * FROM ',
+            'SELECT * FROM (SELECT * FROM ')
+    assert suggestion == ('tables', [])
+
+#def test_sub_select_col_name_completion():
+    #suggestion = suggest_type('SELECT * FROM (SELECT  FROM abc',
+            #'SELECT * FROM (SELECT ')
+    #assert suggestion == ('columns', ['abc'])
+
+#def test_sub_select_multiple_col_name_completion():
+    #suggestion = suggest_type('SELECT * FROM (SELECT a, FROM abc',
+            #'SELECT * FROM (SELECT a, ')
+    #assert suggestion == ('columns', ['abc'])
+
+def test_sub_select_dot_col_name_completion():
+    suggestion = suggest_type('SELECT * FROM (SELECT t. FROM tabl t',
+            'SELECT * FROM (SELECT t.')
+    assert suggestion == ('columns', ['tabl'])
author	Amjith Ramanujam <amjith@newrelic.com>	2014-12-29 21:43:41 -0800
committer	Amjith Ramanujam <amjith@newrelic.com>	2014-12-29 21:43:41 -0800
commit	65c7cdaafaedc933d40de313418ee3049515f8b6 (patch)
tree	7fab944b3daad592a0fb4093b81dd3944ca4bc3c
parent	0fba271143821500414f4563336fe8a4868a6e1e (diff)