#!/usr/bin/env python
# cardinal_pythonlib/sql/sql_grammar_mssql.py
"""
===============================================================================
Original code copyright (C) 2009-2022 Rudolf Cardinal (rudolf@pobox.com).
This file is part of cardinal_pythonlib.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===============================================================================
**SQL grammar for Microsoft SQL Server.**
"""
import re
from pyparsing import (
alphanums,
Combine,
delimitedList,
Forward,
Group,
infixNotation,
Literal,
NotAny,
oneOf,
OneOrMore,
opAssoc,
Optional,
ParserElement,
QuotedString,
Regex,
ZeroOrMore,
Word,
)
from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
from cardinal_pythonlib.sql.sql_grammar import (
ALL,
AND,
ANSI92_RESERVED_WORD_LIST,
ansi_comment,
AS,
ASC,
AVG,
BETWEEN,
BY,
CASE,
COLLATE,
COMMA,
CROSS,
COUNT,
delim_list,
DESC,
DISTINCT,
ELSE,
END,
EXISTS,
FROM,
GROUP,
HAVING,
IN,
INNER,
integer,
INTERVAL,
IS,
JOIN,
LEFT,
LIKE,
literal_value,
LPAR,
make_regex_except_words,
make_words_regex,
MAX,
MIN,
NATURAL,
NOT,
ON,
OR,
ORDER,
OUTER,
RIGHT,
RPAR,
SELECT,
SUM,
sql_keyword,
SqlGrammar,
_test_fail,
_test_succeed,
THEN,
time_unit,
UNION,
USING,
WHEN,
WHERE,
WITH,
)
log = get_brace_style_log_with_null_handler(__name__)
# =============================================================================
# Constants
# =============================================================================
# Not in SQL Server (though in MySQL):
#
# don't think so: BINARY; http://gilfster.blogspot.co.uk/2005/08/case-sensitivity-in-mysql.html # noqa: E501
# DISTINCTROW: no; https://stackoverflow.com/questions/8562136/distinctrow-equivalent-in-sql-server # noqa: E501
# DIV/MOD: not in SQL Server; use / and % respectively; https://msdn.microsoft.com/en-us/library/ms190279.aspx # noqa: E501
# PARTITION: not in SELECT? - https://msdn.microsoft.com/en-us/library/ms187802.aspx # noqa: E501
# XOR: use ^ instead; https://stackoverflow.com/questions/5411619/t-sql-xor-operator # noqa: E501
# Definitely part of SQL Server:
CHECKSUM_AGG = sql_keyword("CHECKSUM_AGG")
COUNT_BIG = sql_keyword("COUNT_BIG")
GROUPING = sql_keyword("GROUPING")
GROUPING_ID = sql_keyword("GROUPING_ID")
ROLLUP = sql_keyword("ROLLUP")
SOUNDEX = sql_keyword("SOUNDEX")
STDEV = sql_keyword("STDEV")
STDEV_P = sql_keyword("STDEV_P")
TOP = sql_keyword("TOP")
VAR = sql_keyword("VAR")
VARP = sql_keyword("VARP")
# =============================================================================
# Microsoft SQL Server grammar in pyparsing
# =============================================================================
[docs]class SqlGrammarMSSQLServer(SqlGrammar):
"""
SQL grammar (subclass of :class:`.SqlGrammar`) implementing
Microsoft SQL Server syntax.
"""
# -------------------------------------------------------------------------
# Forward declarations
# -------------------------------------------------------------------------
expr = Forward()
select_statement = Forward()
# -------------------------------------------------------------------------
# Keywords
# -------------------------------------------------------------------------
# https://msdn.microsoft.com/en-us/library/ms189822.aspx
sql_server_reserved_words = """
ADD ALL ALTER AND ANY AS ASC AUTHORIZATION
BACKUP BEGIN BETWEEN BREAK BROWSE BULK BY
CASCADE CASE CHECK CHECKPOINT CLOSE CLUSTERED COALESCE COLLATE COLUMN COMMIT
COMPUTE CONSTRAINT CONTAINS CONTAINSTABLE CONTINUE CONVERT CREATE CROSS
CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR
DATABASE DBCC DEALLOCATE DECLARE DEFAULT DELETE DENY DESC DISK DISTINCT
DISTRIBUTED DOUBLE DROP DUMP
ELSE END ERRLVL ESCAPE EXCEPT EXEC EXECUTE EXISTS EXIT EXTERNAL
FETCH FILE FILLFACTOR FOR FOREIGN FREETEXT FREETEXTTABLE FROM FULL FUNCTION
GOTO GRANT GROUP
HAVING HOLDLOCK
IDENTITY IDENTITY_INSERT IDENTITYCOL IF IN INDEX INNER INSERT INTERSECT INTO IS
JOIN
KEY KILL
LEFT LIKE LINENO LOAD
MERGE
NATIONAL NOCHECK NONCLUSTERED NOT NULL NULLIF
OF OFF OFFSETS ON OPEN OPENDATASOURCE OPENQUERY OPENROWSET OPENXML OPTION OR
ORDER OUTER OVER
PERCENT PIVOT PLAN PRECISION PRIMARY PRINT PROC PROCEDURE PUBLIC
RAISERROR READ READTEXT RECONFIGURE REFERENCES REPLICATION RESTORE RESTRICT
RETURN REVERT REVOKE RIGHT ROLLBACK ROWCOUNT ROWGUIDCOL RULE
SAVE SCHEMA SECURITYAUDIT SELECT SEMANTICKEYPHRASETABLE
SEMANTICSIMILARITYDETAILSTABLE SEMANTICSIMILARITYTABLE SESSION_USER SET
SETUSER SHUTDOWN SOME STATISTICS SYSTEM_USER
TABLE TABLESAMPLE TEXTSIZE THEN TO TOP TRAN TRANSACTION TRIGGER TRUNCATE
TRY_CONVERT TSEQUAL
UNION UNIQUE UNPIVOT UPDATE UPDATETEXT USE USER
VALUES VARYING VIEW
WAITFOR WHEN WHERE WHILE WITH WITHIN WRITETEXT
"""
# ... "WITHIN GROUP" is listed, not "WITHIN", but
odbc_reserved_words = """
ABSOLUTE ACTION ADA ADD ALL ALLOCATE ALTER AND ANY ARE AS ASC ASSERTION AT
AUTHORIZATION AVG BEGIN BETWEEN BIT BIT_LENGTH BOTH BY
CASCADE CASCADED CASE CAST CATALOG CHAR CHAR_LENGTH CHARACTER CHARACTER_LENGTH
CHECK CLOSE COALESCE COLLATE COLLATION COLUMN COMMIT CONNECT CONNECTION
CONSTRAINT CONSTRAINTS CONTINUE CONVERT CORRESPONDING COUNT CREATE CROSS
CURRENT CURRENT_DATE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR
DATE DAY DEALLOCATE DEC DECIMAL DECLARE DEFAULT DEFERRABLE DEFERRED DELETE DESC
DESCRIBE DESCRIPTOR DIAGNOSTICS DISCONNECT DISTINCT DOMAIN DOUBLE DROP
ELSE END END-EXEC ESCAPE EXCEPT EXCEPTION EXEC EXECUTE EXISTS EXTERNAL EXTRACT
FALSE FETCH FIRST FLOAT FOR FOREIGN FORTRAN FOUND FROM FULL
GET GLOBAL GO GOTO GRANT GROUP
HAVING HOUR
IDENTITY IMMEDIATE IN INCLUDE INDEX INDICATOR INITIALLY INNER INPUT INSENSITIVE
INSERT INT INTEGER INTERSECT INTERVAL INTO IS ISOLATION
JOIN
KEY
LANGUAGE LAST LEADING LEFT LEVEL LIKE LOCAL LOWER
MATCH MAX MIN MINUTE MODULE MONTH
NAMES NATIONAL NATURAL NCHAR NEXT NO NONE NOT NULL NULLIF NUMERIC
OCTET_LENGTH OF ON ONLY OPEN OPTION OR ORDER OUTER OUTPUT OVERLAPS
PAD PARTIAL PASCAL POSITION PRECISION PREPARE PRESERVE PRIMARY PRIOR PRIVILEGES
PROCEDURE PUBLIC
READ REAL REFERENCES RELATIVE RESTRICT REVOKE RIGHT ROLLBACK ROWS
SCHEMA SCROLL SECOND SECTION SELECT SESSION SESSION_USER SET SIZE SMALLINT SOME
SPACE SQL SQLCA SQLCODE SQLERROR SQLSTATE SQLWARNING SUBSTRING SUM
SYSTEM_USER
TABLE TEMPORARY THEN TIME TIMESTAMP TIMEZONE_HOUR TIMEZONE_MINUTE TO TRAILING
TRANSACTION TRANSLATE TRANSLATION TRIM TRUE
UNION UNIQUE UNKNOWN UPDATE UPPER USAGE USER USING
VALUE VALUES VARCHAR VARYING VIEW
WHEN WHENEVER WHERE WITH WORK WRITE
YEAR
ZONE
"""
# ... who thought "END-EXEC" was a good one?
# Then some more:
# - WITH ROLLUP: https://technet.microsoft.com/en-us/library/ms189305(v=sql.90).aspx # noqa: E501
# - SOUNDEX: https://msdn.microsoft.com/en-us/library/ms187384.aspx
rnc_extra_sql_server_keywords = """
ROLLUP
SOUNDEX
"""
sql_server_keywords = sorted(
list(
set(
sql_server_reserved_words.split()
+ odbc_reserved_words.split()
+ ANSI92_RESERVED_WORD_LIST.split()
)
)
)
# log.critical(sql_server_keywords)
keyword = make_words_regex(
sql_server_keywords, caseless=True, name="keyword"
)
# -------------------------------------------------------------------------
# Comments
# -------------------------------------------------------------------------
# https://msdn.microsoft.com/en-us/library/ff848807.aspx
comment = ansi_comment
# -----------------------------------------------------------------------------
# identifier
# -----------------------------------------------------------------------------
# http://dev.mysql.com/doc/refman/5.7/en/identifiers.html
bare_identifier_word = make_regex_except_words(
r"\b[a-zA-Z0-9$_]*\b",
ANSI92_RESERVED_WORD_LIST,
caseless=True,
name="bare_identifier_word",
)
identifier = (
bare_identifier_word
| QuotedString(quoteChar="[", endQuoteChar="]", unquoteResults=False)
).setName("identifier")
collation_name = identifier.copy()
column_name = identifier.copy()
column_alias = identifier.copy()
table_name = identifier.copy()
table_alias = identifier.copy()
schema_name = identifier.copy()
index_name = identifier.copy()
function_name = identifier.copy()
parameter_name = identifier.copy()
database_name = identifier.copy()
no_dot = NotAny(".")
table_spec = (
Combine(database_name + "." + schema_name + "." + table_name + no_dot)
| Combine(schema_name + "." + table_name + no_dot)
| table_name + no_dot
).setName("table_spec")
column_spec = (
Combine(
database_name
+ "."
+ schema_name
+ "."
+ table_name
+ "."
+ column_name
+ no_dot
)
| Combine(schema_name + "." + table_name + "." + column_name + no_dot)
| Combine(table_name + "." + column_name + no_dot)
| column_name + no_dot
).setName("column_spec")
# I'm unsure if SQL Server allows keywords in the parts after dots, like
# MySQL does.
# - https://stackoverflow.com/questions/285775/how-to-deal-with-sql-column-names-that-look-like-sql-keywords # noqa: E501
bind_parameter = Literal("?")
variable = Regex(r"@[a-zA-Z0-9\.$_]+").setName("variable")
argument_list = (
delimitedList(expr).setName("arglist").setParseAction(", ".join)
)
function_call = Combine(function_name + LPAR) + argument_list + RPAR
# Not supported: index hints
# ... https://stackoverflow.com/questions/11016935/how-can-i-force-a-query-to-not-use-a-index-on-a-given-table # noqa: E501
# -----------------------------------------------------------------------------
# CASE
# -----------------------------------------------------------------------------
case_expr = (
(
CASE
+ expr
+ OneOrMore(WHEN + expr + THEN + expr)
+ Optional(ELSE + expr)
+ END
)
| (
CASE
+ OneOrMore(WHEN + expr + THEN + expr)
+ Optional(ELSE + expr)
+ END
)
).setName("case_expr")
# -----------------------------------------------------------------------------
# Expressions
# -----------------------------------------------------------------------------
aggregate_function = (
# https://msdn.microsoft.com/en-us/library/ms173454.aspx
AVG
| CHECKSUM_AGG
| COUNT
| COUNT_BIG
| GROUPING
| GROUPING_ID
| MAX
| MIN
| STDEV
| STDEV_P
| SUM
| VAR
| VARP
)
expr_term = (
INTERVAL + expr + time_unit
| Optional(EXISTS) + LPAR + select_statement + RPAR
|
# ... e.g. mycol = EXISTS(SELECT ...)
# ... e.g. mycol IN (SELECT ...)
LPAR + delim_list(expr) + RPAR
|
# ... e.g. mycol IN (1, 2, 3)
case_expr
| bind_parameter
| variable
| function_call
| literal_value
| column_spec # not just identifier
)
UNARY_OP, BINARY_OP, TERNARY_OP = 1, 2, 3
expr << infixNotation(
expr_term,
[
# Having lots of operations in the list here SLOWS IT DOWN A LOT.
# Just combine them into an ordered list.
(COLLATE | oneOf("! - + ~"), UNARY_OP, opAssoc.RIGHT),
(
(
oneOf("^ * / %")
| oneOf("+ - << >> & | = <=> >= > <= < <> !=")
| (IS + Optional(NOT))
| LIKE
| (Optional(NOT) + IN)
| SOUNDEX # RNC; presumably at same level as LIKE
),
BINARY_OP,
opAssoc.LEFT,
),
((BETWEEN, AND), TERNARY_OP, opAssoc.LEFT),
# CASE handled above (hoping precedence is not too much of a
# problem)
(NOT, UNARY_OP, opAssoc.RIGHT),
(AND | "&&" | OR | "||" | ":=", BINARY_OP, opAssoc.LEFT),
],
lpar=LPAR,
rpar=RPAR,
)
# ignores LIKE [ESCAPE]
# -------------------------------------------------------------------------
# SELECT
# -------------------------------------------------------------------------
compound_operator = UNION + Optional(ALL | DISTINCT)
ordering_term = (
expr + Optional(COLLATE + collation_name) + Optional(ASC | DESC)
)
join_constraint = Optional(
Group((ON + expr) | (USING + LPAR + delim_list(column_name) + RPAR))
)
join_op = Group(
COMMA
| NATURAL + (Optional(LEFT | RIGHT) + Optional(OUTER)) + JOIN
| (INNER | CROSS) + JOIN
| Optional(LEFT | RIGHT) + Optional(OUTER) + JOIN
)
join_source = Forward()
single_source = (
table_spec.copy().setResultsName("from_tables", listAllMatches=True)
+ Optional(Optional(AS) + table_alias)
# Optional(index_hint_list) # not supported yet
) | (select_statement + Optional(AS) + table_alias) + (
LPAR + join_source + RPAR
)
join_source << Group(
single_source + ZeroOrMore(join_op + single_source + join_constraint)
)("join_source")
# ... must have a Group to append to it later, it seems
# ... but name it "join_source" here, or it gets enclosed in a further list
# when you name it later
result_base = (
# Aggregate functions: e.g. "MAX(" allowed, "MAX (" not allowed
Combine(COUNT + LPAR) + "*" + RPAR
| Combine(COUNT + LPAR) # special aggregate function
+ DISTINCT
+ expr
+ RPAR
| Combine(aggregate_function + LPAR) # special aggregate function
+ expr
+ RPAR
| expr
| "*"
| Combine(table_name + "." + "*")
| column_spec
| literal_value
)
result_column = (
result_base + Optional(Optional(AS) + column_alias)
).setResultsName("select_columns", listAllMatches=True)
# -------------------------------------------------------------------------
# SELECT
# -------------------------------------------------------------------------
where_expr = Group(expr).setResultsName("where_expr")
where_clause = Group(Optional(WHERE + where_expr)).setResultsName(
"where_clause"
)
select_core = (
SELECT
+ Optional(TOP + integer)
+ Group(Optional(ALL | DISTINCT))("select_specifier")
+ Group(delim_list(result_column))("select_expression")
+ Optional(
FROM
+ join_source
+ where_clause
+ Optional(
GROUP
+ BY
+ delim_list(ordering_term + Optional(ASC | DESC))(
"group_by_term"
)
+ Optional(WITH + ROLLUP)
)
+ Optional(HAVING + expr("having_expr"))
)
)
select_statement << (
select_core
+ ZeroOrMore(compound_operator + select_core)
+ Optional(
ORDER
+ BY
+ delim_list(ordering_term + Optional(ASC | DESC))(
"order_by_terms"
)
)
+
# PROCEDURE ignored
# rest ignored
Optional(";")
)
select_statement.ignore(comment)
# https://msdn.microsoft.com/en-us/library/ms175874.aspx
# ... approximately (and conservatively):
MSSQL_INVALID_FIRST_IF_UNQUOTED = re.compile(r"[^a-zA-Z_@#]")
MSSQL_INVALID_IF_UNQUOTED = re.compile(r"[^a-zA-Z0-9_@#$]")
def __init__(self):
super().__init__()
[docs] @classmethod
def quote_identifier(cls, identifier: str) -> str:
return f"[{identifier}]"
[docs] @classmethod
def is_quoted(cls, identifier: str) -> bool:
return identifier.startswith("[") and identifier.endswith("]")
[docs] @classmethod
def requires_quoting(cls, identifier: str) -> bool:
assert identifier, "Empty identifier"
if cls.MSSQL_INVALID_IF_UNQUOTED.search(identifier):
return True
firstchar = identifier[0]
if cls.MSSQL_INVALID_FIRST_IF_UNQUOTED.search(firstchar):
return True
if identifier.upper() in cls.sql_server_keywords:
return True
return False
[docs] @classmethod
def get_grammar(cls) -> ParserElement:
# Grammar (here, just SELECT)
return cls.select_statement
[docs] @classmethod
def get_column_spec(cls):
return cls.column_spec
[docs] @classmethod
def get_result_column(cls):
return cls.result_column
[docs] @classmethod
def get_join_op(cls):
return cls.join_op
[docs] @classmethod
def get_table_spec(cls):
return cls.table_spec
[docs] @classmethod
def get_join_constraint(cls):
return cls.join_constraint
[docs] @classmethod
def get_select_statement(cls):
return cls.select_statement
[docs] @classmethod
def get_expr(cls):
return cls.expr
[docs] @classmethod
def get_where_clause(cls):
return cls.where_clause
[docs] @classmethod
def get_where_expr(cls):
return cls.where_expr
[docs] @classmethod
def test_dialect_specific_2(cls):
log.info("Testing Microsoft SQL Server-specific aspects...")
log.info("Testing quoted identifiers")
_test_succeed(cls.identifier, "[FROM]")
_test_succeed(cls.identifier, "[SELECT FROM]")
log.info("Testing table_spec")
# SQL Server uses up to: db.schema.table.column
_test_succeed(cls.table_spec, "mytable")
_test_succeed(cls.table_spec, "mydb.mytable")
_test_succeed(cls.table_spec, "mydb.[my silly table]")
_test_succeed(cls.table_spec, "mydb.myschema.mytable")
_test_fail(cls.table_spec, "mydb . mytable")
_test_fail(cls.table_spec, "mydb.myschema.mytable.mycol")
log.info("Testing column_spec")
_test_succeed(cls.column_spec, "mycol")
_test_succeed(cls.column_spec, "forename")
_test_succeed(cls.column_spec, "mytable.mycol")
_test_succeed(cls.column_spec, "t1.a")
_test_succeed(cls.column_spec, "[my silly table].[my silly column]")
_test_succeed(cls.column_spec, "mydb.myschema.mytable.mycol")
_test_succeed(cls.column_spec, "myschema.mytable.mycol")
_test_fail(cls.column_spec, "myschema . mytable . mycol")
log.info("Testing variable")
_test_succeed(cls.variable, "@myvar")
log.info("Testing argument_list")
_test_succeed(cls.argument_list, "@myvar, 5")
log.info("Testing function_call")
_test_succeed(cls.function_call, "myfunc(@myvar, 5)")
# ---------------------------------------------------------------------
# Expressions
# ---------------------------------------------------------------------
log.info("Testing case_expr")
_test_succeed(
cls.case_expr,
"""
CASE v
WHEN 2 THEN x
WHEN 3 THEN y
ELSE -99
END
""",
)
# =============================================================================
# Tests
# =============================================================================
# noinspection PyUnboundLocalVariable
def pyparsing_bugtest_delimited_list_combine(fix_problem: bool = True) -> None:
if fix_problem:
# noinspection PyPep8Naming,PyShadowingNames
delimitedList = delim_list
word = Word(alphanums)
word_list_no_combine = delimitedList(word, combine=False)
word_list_combine = delimitedList(word, combine=True)
print(
word_list_no_combine.parseString("one, two", parseAll=True)
) # ['one', 'two']
print(
word_list_no_combine.parseString("one,two", parseAll=True)
) # ['one', 'two']
print(
word_list_combine.parseString("one, two", parseAll=True)
) # ['one']: ODD ONE OUT
print(
word_list_combine.parseString("one,two", parseAll=True)
) # ['one,two']