summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarik Gamble <darik.gamble.spam@gmail.com>2016-07-22 07:09:17 -0400
committerDarik Gamble <darik.gamble.spam@gmail.com>2016-07-27 14:27:23 -0400
commit826016da292314e330c80e5a50f4fd3aa01abca6 (patch)
tree151dfc658f6a0800ee0f810109ee53eb513cd768
parent0a52204ae6b1034c5d9ee7dabb19fcd4e313e6f2 (diff)
Split table specific parseutils into separate subpackage
-rw-r--r--pgcli/packages/parseutils/tables.py166
-rw-r--r--pgcli/packages/parseutils/utils.py152
-rw-r--r--pgcli/packages/sqlcompletion.py5
-rw-r--r--pgcli/pgbuffer.py2
-rw-r--r--pgcli/pgcompleter.py3
5 files changed, 10 insertions, 318 deletions
diff --git a/pgcli/packages/parseutils/tables.py b/pgcli/packages/parseutils/tables.py
index 4a5b47c2..72cdcc6b 100644
--- a/pgcli/packages/parseutils/tables.py
+++ b/pgcli/packages/parseutils/tables.py
@@ -1,68 +1,8 @@
from __future__ import print_function
-import re
import sqlparse
from collections import namedtuple
from sqlparse.sql import IdentifierList, Identifier, Function
-from sqlparse.tokens import Keyword, DML, Punctuation, Token, Error
-
-cleanup_regex = {
- # This matches only alphanumerics and underscores.
- 'alphanum_underscore': re.compile(r'(\w+)$'),
- # This matches everything except spaces, parens, colon, and comma
- 'many_punctuations': re.compile(r'([^():,\s]+)$'),
- # This matches everything except spaces, parens, colon, comma, and period
- 'most_punctuations': re.compile(r'([^\.():,\s]+)$'),
- # This matches everything except a space.
- 'all_punctuations': re.compile('([^\s]+)$'),
- }
-
-def last_word(text, include='alphanum_underscore'):
- """
- Find the last word in a sentence.
-
- >>> last_word('abc')
- 'abc'
- >>> last_word(' abc')
- 'abc'
- >>> last_word('')
- ''
- >>> last_word(' ')
- ''
- >>> last_word('abc ')
- ''
- >>> last_word('abc def')
- 'def'
- >>> last_word('abc def ')
- ''
- >>> last_word('abc def;')
- ''
- >>> last_word('bac $def')
- 'def'
- >>> last_word('bac $def', include='most_punctuations')
- '$def'
- >>> last_word('bac \def', include='most_punctuations')
- '\\\\def'
- >>> last_word('bac \def;', include='most_punctuations')
- '\\\\def;'
- >>> last_word('bac::def', include='most_punctuations')
- 'def'
- >>> last_word('"foo*bar', include='most_punctuations')
- '"foo*bar'
- """
-
- if not text: # Empty string
- return ''
-
- if text[-1].isspace():
- return ''
- else:
- regex = cleanup_regex[include]
- matches = regex.search(text)
- if matches:
- return matches.group(0)
- else:
- return ''
-
+from sqlparse.tokens import Keyword, DML, Punctuation
TableReference = namedtuple('TableReference', ['schema', 'name', 'alias',
'is_function'])
@@ -86,6 +26,7 @@ def is_subselect(parsed):
def _identifier_is_function(identifier):
return any(isinstance(t, Function) for t in identifier.tokens)
+
def extract_from_part(parsed, stop_at_punctuation=True):
tbl_prefix_seen = False
for item in parsed.tokens:
@@ -204,106 +145,3 @@ def extract_tables(sql):
allow_functions=not insert_stmt)
# In the case 'sche.<cursor>', we get an empty TableReference; remove that
return tuple(i for i in identifiers if i.name)
-
-
-
-def find_prev_keyword(sql):
- """ Find the last sql keyword in an SQL statement
-
- Returns the value of the last keyword, and the text of the query with
- everything after the last keyword stripped
- """
- if not sql.strip():
- return None, ''
-
- parsed = sqlparse.parse(sql)[0]
- flattened = list(parsed.flatten())
-
- logical_operators = ('AND', 'OR', 'NOT', 'BETWEEN')
-
- for t in reversed(flattened):
- if t.value == '(' or (t.is_keyword and (
- t.value.upper() not in logical_operators)):
- # Find the location of token t in the original parsed statement
- # We can't use parsed.token_index(t) because t may be a child token
- # inside a TokenList, in which case token_index thows an error
- # Minimal example:
- # p = sqlparse.parse('select * from foo where bar')
- # t = list(p.flatten())[-3] # The "Where" token
- # p.token_index(t) # Throws ValueError: not in list
- idx = flattened.index(t)
-
- # Combine the string values of all tokens in the original list
- # up to and including the target keyword token t, to produce a
- # query string with everything after the keyword token removed
- text = ''.join(tok.value for tok in flattened[:idx+1])
- return t, text
-
- return None, ''
-
-
-# Postgresql dollar quote signs look like `$$` or `$tag$`
-dollar_quote_regex = re.compile(r'^\$[^$]*\$$')
-
-
-def is_open_quote(sql):
- """Returns true if the query contains an unclosed quote"""
-
- # parsed can contain one or more semi-colon separated commands
- parsed = sqlparse.parse(sql)
- return any(_parsed_is_open_quote(p) for p in parsed)
-
-
-def _parsed_is_open_quote(parsed):
- tokens = list(parsed.flatten())
-
- i = 0
- while i < len(tokens):
- tok = tokens[i]
- if tok.match(Token.Error, "'"):
- # An unmatched single quote
- return True
- elif (tok.ttype in Token.Name.Builtin
- and dollar_quote_regex.match(tok.value)):
- # Find the matching closing dollar quote sign
- for (j, tok2) in enumerate(tokens[i+1:], i+1):
- if tok2.match(Token.Name.Builtin, tok.value):
- # Found the matching closing quote - continue our scan for
- # open quotes thereafter
- i = j
- break
- else:
- # No matching dollar sign quote
- return True
-
- i += 1
-
- return False
-
-
-def parse_partial_identifier(word):
- """Attempt to parse a (partially typed) word as an identifier
-
- word may include a schema qualification, like `schema_name.partial_name`
- or `schema_name.` There may also be unclosed quotation marks, like
- `"schema`, or `schema."partial_name`
-
- :param word: string representing a (partially complete) identifier
- :return: sqlparse.sql.Identifier, or None
- """
-
- p = sqlparse.parse(word)[0]
- n_tok = len(p.tokens)
- if n_tok == 1 and isinstance(p.tokens[0], Identifier):
- return p.tokens[0]
- elif p.token_next_by(m=(Error, '"'))[1]:
- # An unmatched double quote, e.g. '"foo', 'foo."', or 'foo."bar'
- # Close the double quote, then reparse
- return parse_partial_identifier(word + '"')
- else:
- return None
-
-
-if __name__ == '__main__':
- sql = 'select * from (select t. from tabl t'
- print (extract_tables(sql))
diff --git a/pgcli/packages/parseutils/utils.py b/pgcli/packages/parseutils/utils.py
index 4a5b47c2..e5baa0b0 100644
--- a/pgcli/packages/parseutils/utils.py
+++ b/pgcli/packages/parseutils/utils.py
@@ -1,9 +1,8 @@
from __future__ import print_function
import re
import sqlparse
-from collections import namedtuple
-from sqlparse.sql import IdentifierList, Identifier, Function
-from sqlparse.tokens import Keyword, DML, Punctuation, Token, Error
+from sqlparse.sql import Identifier
+from sqlparse.tokens import Token, Error
cleanup_regex = {
# This matches only alphanumerics and underscores.
@@ -64,149 +63,6 @@ def last_word(text, include='alphanum_underscore'):
return ''
-TableReference = namedtuple('TableReference', ['schema', 'name', 'alias',
- 'is_function'])
-TableReference.ref = property(lambda self: self.alias or (
- self.name if self.name.islower() or self.name[0] == '"'
- else '"' + self.name + '"'))
-
-
-# This code is borrowed from sqlparse example script.
-# <url>
-def is_subselect(parsed):
- if not parsed.is_group():
- return False
- for item in parsed.tokens:
- if item.ttype is DML and item.value.upper() in ('SELECT', 'INSERT',
- 'UPDATE', 'CREATE', 'DELETE'):
- return True
- return False
-
-
-def _identifier_is_function(identifier):
- return any(isinstance(t, Function) for t in identifier.tokens)
-
-def extract_from_part(parsed, stop_at_punctuation=True):
- tbl_prefix_seen = False
- for item in parsed.tokens:
- if tbl_prefix_seen:
- if is_subselect(item):
- for x in extract_from_part(item, stop_at_punctuation):
- yield x
- elif stop_at_punctuation and item.ttype is Punctuation:
- raise StopIteration
- # An incomplete nested select won't be recognized correctly as a
- # sub-select. eg: 'SELECT * FROM (SELECT id FROM user'. This causes
- # the second FROM to trigger this elif condition resulting in a
- # StopIteration. So we need to ignore the keyword if the keyword
- # FROM.
- # Also 'SELECT * FROM abc JOIN def' will trigger this elif
- # condition. So we need to ignore the keyword JOIN and its variants
- # INNER JOIN, FULL OUTER JOIN, etc.
- elif item.ttype is Keyword and (
- not item.value.upper() == 'FROM') and (
- not item.value.upper().endswith('JOIN')):
- tbl_prefix_seen = False
- else:
- yield item
- elif item.ttype is Keyword or item.ttype is Keyword.DML:
- item_val = item.value.upper()
- if (item_val in ('COPY', 'FROM', 'INTO', 'UPDATE', 'TABLE') or
- item_val.endswith('JOIN')):
- tbl_prefix_seen = True
- # 'SELECT a, FROM abc' will detect FROM as part of the column list.
- # So this check here is necessary.
- elif isinstance(item, IdentifierList):
- for identifier in item.get_identifiers():
- if (identifier.ttype is Keyword and
- identifier.value.upper() == 'FROM'):
- tbl_prefix_seen = True
- break
-
-
-def extract_table_identifiers(token_stream, allow_functions=True):
- """yields tuples of TableReference namedtuples"""
-
- # We need to do some massaging of the names because postgres is case-
- # insensitive and '"Foo"' is not the same table as 'Foo' (while 'foo' is)
- def parse_identifier(item):
- name = item.get_real_name()
- schema_name = item.get_parent_name()
- alias = item.get_alias()
- if not name:
- schema_name = None
- name = item.get_name()
- alias = alias or name
- schema_quoted = schema_name and item.value[0] == '"'
- if schema_name and not schema_quoted:
- schema_name = schema_name.lower()
- quote_count = item.value.count('"')
- name_quoted = quote_count > 2 or (quote_count and not schema_quoted)
- alias_quoted = alias and item.value[-1] == '"'
- if alias_quoted or name_quoted and not alias and name.islower():
- alias = '"' + (alias or name) + '"'
- if name and not name_quoted and not name.islower():
- if not alias:
- alias = name
- name = name.lower()
- return schema_name, name, alias
-
-
- for item in token_stream:
- if isinstance(item, IdentifierList):
- for identifier in item.get_identifiers():
- # Sometimes Keywords (such as FROM ) are classified as
- # identifiers which don't have the get_real_name() method.
- try:
- schema_name = identifier.get_parent_name()
- real_name = identifier.get_real_name()
- is_function = (allow_functions and
- _identifier_is_function(identifier))
- except AttributeError:
- continue
- if real_name:
- yield TableReference(schema_name, real_name,
- identifier.get_alias(), is_function)
- elif isinstance(item, Identifier):
- schema_name, real_name, alias = parse_identifier(item)
- is_function = allow_functions and _identifier_is_function(item)
-
- yield TableReference(schema_name, real_name, alias, is_function)
- elif isinstance(item, Function):
- schema_name, real_name, alias = parse_identifier(item)
- yield TableReference(None, real_name, alias, allow_functions)
-
-
-# extract_tables is inspired from examples in the sqlparse lib.
-def extract_tables(sql):
- """Extract the table names from an SQL statment.
-
- Returns a list of TableReference namedtuples
-
- """
- parsed = sqlparse.parse(sql)
- if not parsed:
- return ()
-
- # INSERT statements must stop looking for tables at the sign of first
- # Punctuation. eg: INSERT INTO abc (col1, col2) VALUES (1, 2)
- # abc is the table name, but if we don't stop at the first lparen, then
- # we'll identify abc, col1 and col2 as table names.
- insert_stmt = parsed[0].token_first().value.lower() == 'insert'
- stream = extract_from_part(parsed[0], stop_at_punctuation=insert_stmt)
-
- # Kludge: sqlparse mistakenly identifies insert statements as
- # function calls due to the parenthesized column list, e.g. interprets
- # "insert into foo (bar, baz)" as a function call to foo with arguments
- # (bar, baz). So don't allow any identifiers in insert statements
- # to have is_function=True
- identifiers = extract_table_identifiers(stream,
- allow_functions=not insert_stmt)
- # In the case 'sche.<cursor>', we get an empty TableReference; remove that
- return tuple(i for i in identifiers if i.name)
-
-
-
def find_prev_keyword(sql):
""" Find the last sql keyword in an SQL statement
@@ -303,7 +159,3 @@ def parse_partial_identifier(word):
else:
return None
-
-if __name__ == '__main__':
- sql = 'select * from (select t. from tabl t'
- print (extract_tables(sql))
diff --git a/pgcli/packages/sqlcompletion.py b/pgcli/packages/sqlcompletion.py
index e339e2b4..2871ef96 100644
--- a/pgcli/packages/sqlcompletion.py
+++ b/pgcli/packages/sqlcompletion.py
@@ -4,8 +4,9 @@ import re
import sqlparse
from collections import namedtuple
from sqlparse.sql import Comparison, Identifier, Where
-from .parseutils import (
- last_word, extract_tables, find_prev_keyword, parse_partial_identifier)
+from .parseutils.utils import (
+ last_word, find_prev_keyword, parse_partial_identifier)
+from .parseutils.tables import extract_tables
from pgspecial.main import parse_special_command
PY2 = sys.version_info[0] == 2
diff --git a/pgcli/pgbuffer.py b/pgcli/pgbuffer.py
index df797cf8..b9e9b930 100644
--- a/pgcli/pgbuffer.py
+++ b/pgcli/pgbuffer.py
@@ -1,6 +1,6 @@
from prompt_toolkit.buffer import Buffer
from prompt_toolkit.filters import Condition
-from .packages.parseutils import is_open_quote
+from .packages.parseutils.utils import is_open_quote
class PGBuffer(Buffer):
diff --git a/pgcli/pgcompleter.py b/pgcli/pgcompleter.py
index 5bfbb691..5a18167e 100644
--- a/pgcli/pgcompleter.py
+++ b/pgcli/pgcompleter.py
@@ -12,7 +12,8 @@ from .packages.sqlcompletion import (FromClauseItem,
suggest_type, Special, Database, Schema, Table, Function, Column, View,
Keyword, NamedQuery, Datatype, Alias, Path, JoinCondition, Join)
from .packages.function_metadata import ColumnMetadata, ForeignKey
-from .packages.parseutils import last_word, TableReference
+from .packages.parseutils.utils import last_word
+from .packages.parseutils.tables import TableReference
from .packages.pgliterals.main import get_literals
from .packages.prioritization import PrevalenceCounter
from .config import load_config, config_location