273 lines
8.5 KiB
Python
273 lines
8.5 KiB
Python
"""
|
|
Pygments MySQL lexer tests
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from pygments.lexers.sql import MySqlLexer
|
|
|
|
from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, \
|
|
Punctuation, String, Whitespace
|
|
|
|
|
|
@pytest.fixture(scope='module')
|
|
def lexer():
|
|
yield MySqlLexer()
|
|
|
|
|
|
@pytest.mark.parametrize('text', ('1', '22', '22 333', '22 a', '22+', '22)',
|
|
'22\n333', '22\r\n333'))
|
|
def test_integer_literals_positive_match(lexer, text):
|
|
"""Validate that integer literals are tokenized as integers."""
|
|
token = list(lexer.get_tokens(text))[0]
|
|
assert token[0] == Number.Integer
|
|
assert token[1] in {'1', '22'}
|
|
|
|
|
|
@pytest.mark.parametrize('text', ('1a', '1A', '1.', '1ひ', '1$', '1_',
|
|
'1\u0080', '1\uffff'))
|
|
def test_integer_literals_negative_match(lexer, text):
|
|
"""Validate that non-integer texts are not matched as integers."""
|
|
assert list(lexer.get_tokens(text))[0][0] != Number.Integer
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
'.123', '1.23', '123.',
|
|
'1e10', '1.0e10', '1.e-10', '.1e+10',
|
|
),
|
|
)
|
|
def test_float_literals(lexer, text):
|
|
assert list(lexer.get_tokens(text))[0] == (Number.Float, text)
|
|
|
|
|
|
@pytest.mark.parametrize('text', ("X'0af019'", "x'0AF019'", "0xaf019"))
|
|
def test_hexadecimal_literals(lexer, text):
|
|
assert list(lexer.get_tokens(text))[0] == (Number.Hex, text)
|
|
|
|
|
|
@pytest.mark.parametrize('text', ("B'010'", "b'010'", "0b010"))
|
|
def test_binary_literals(lexer, text):
|
|
assert list(lexer.get_tokens(text))[0] == (Number.Bin, text)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
"{d'2020-01-01'}", "{ d ' 2020^01@01 ' }",
|
|
"{t'8 9:10:11'}", "{ t ' 09:10:11.12 ' }", "{ t ' 091011 ' }",
|
|
'{ts"2020-01-01 09:10:11"}', "{ ts ' 2020@01/01 09:10:11 ' }",
|
|
),
|
|
)
|
|
def test_temporal_literals(lexer, text):
|
|
assert list(lexer.get_tokens(text))[0] == (Literal.Date, text)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text, expected_types',
|
|
(
|
|
(r"'a'", (String.Single,) * 3),
|
|
(r"""'""'""", (String.Single,) * 3),
|
|
(r"''''", (String.Single, String.Escape, String.Single)),
|
|
(r"'\''", (String.Single, String.Escape, String.Single)),
|
|
(r'"a"', (String.Double,) * 3),
|
|
(r'''"''"''', (String.Double,) * 3),
|
|
(r'""""', (String.Double, String.Escape, String.Double)),
|
|
(r'"\""', (String.Double, String.Escape, String.Double)),
|
|
),
|
|
)
|
|
def test_string_literals(lexer, text, expected_types):
|
|
tokens = list(lexer.get_tokens(text))[:len(expected_types)]
|
|
assert all(t[0] == e for t, e in zip(tokens, expected_types))
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
"@a", "@1", "@._.$",
|
|
"@'?'", """@'abc''def"`ghi'""",
|
|
'@"#"', '''@"abc""def'`ghi"''',
|
|
'@`^`', """@`abc``def'"ghi`""",
|
|
"@@timestamp",
|
|
"@@session.auto_increment_offset",
|
|
"@@global.auto_increment_offset",
|
|
"@@persist.auto_increment_offset",
|
|
"@@persist_only.auto_increment_offset",
|
|
'?',
|
|
),
|
|
)
|
|
def test_variables(lexer, text):
|
|
tokens = list(lexer.get_tokens(text))
|
|
assert all(t[0] == Name.Variable for t in tokens[:-1])
|
|
assert ''.join([t[1] for t in tokens]).strip() == text.strip()
|
|
|
|
|
|
@pytest.mark.parametrize('text', ('true', 'false', 'null', 'unknown'))
|
|
def test_constants(lexer, text):
|
|
assert list(lexer.get_tokens(text))[0] == (Name.Constant, text)
|
|
|
|
|
|
@pytest.mark.parametrize('text', ('-- abc', '--\tabc', '#abc'))
|
|
def test_comments_single_line(lexer, text):
|
|
# Test the standalone comment.
|
|
tokens = list(lexer.get_tokens(text))
|
|
assert tokens[0] == (Comment.Single, text)
|
|
|
|
# Test the comment with mixed tokens.
|
|
tokens = list(lexer.get_tokens('select' + text + '\nselect'))
|
|
assert tokens[0] == (Keyword, 'select')
|
|
assert tokens[1] == (Comment.Single, text)
|
|
assert tokens[-2] == (Keyword, 'select')
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
'/**/a', '/*a*b/c*/a', '/*\nabc\n*/a',
|
|
'/* /* */a'
|
|
)
|
|
)
|
|
def test_comments_multi_line(lexer, text):
|
|
tokens = list(lexer.get_tokens(text))
|
|
assert all(token[0] == Comment.Multiline for token in tokens[:-2])
|
|
assert ''.join(token[1] for token in tokens).strip() == text.strip()
|
|
|
|
# Validate nested comments are not supported.
|
|
assert tokens[-2][0] != Comment.Multiline
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text', ('BKA', 'SEMIJOIN'))
|
|
def test_optimizer_hints(lexer, text):
|
|
good = '/*+ ' + text + '(), */'
|
|
ignore = '/* ' + text + ' */'
|
|
bad1 = '/*+ a' + text + '() */'
|
|
bad2 = '/*+ ' + text + 'a */'
|
|
assert (Comment.Preproc, text) in lexer.get_tokens(good)
|
|
assert (Comment.Preproc, text) not in lexer.get_tokens(ignore)
|
|
assert (Comment.Preproc, text) not in lexer.get_tokens(bad1)
|
|
assert (Comment.Preproc, text) not in lexer.get_tokens(bad2)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text, expected_types',
|
|
(
|
|
# SET exceptions
|
|
('SET', (Keyword,)),
|
|
('SET abc = 1;', (Keyword,)),
|
|
('SET @abc = 1;', (Keyword,)),
|
|
('CHARACTER SET latin1', (Keyword, Whitespace, Keyword,)),
|
|
('SET("r", "g", "b")', (Keyword.Type, Punctuation)),
|
|
('SET ("r", "g", "b")', (Keyword.Type, Whitespace, Punctuation)),
|
|
),
|
|
)
|
|
def test_exceptions(lexer, text, expected_types):
|
|
tokens = list(lexer.get_tokens(text))[:len(expected_types)]
|
|
assert all(t[0] == e for t, e in zip(tokens, expected_types))
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
'SHOW', 'CREATE', 'ALTER', 'DROP',
|
|
'SELECT', 'INSERT', 'UPDATE', 'DELETE',
|
|
'WHERE', 'GROUP', 'ORDER', 'BY', 'AS',
|
|
'DISTINCT', 'JOIN', 'WITH', 'RECURSIVE',
|
|
'PARTITION', 'NTILE', 'MASTER_PASSWORD', 'XA',
|
|
'REQUIRE_TABLE_PRIMARY_KEY_CHECK', 'STREAM',
|
|
),
|
|
)
|
|
def test_keywords(lexer, text):
|
|
assert list(lexer.get_tokens(text))[0] == (Keyword, text)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
# Standard
|
|
'INT(', 'VARCHAR(', 'ENUM(', 'DATETIME', 'GEOMETRY', 'POINT', 'JSON',
|
|
# Aliases and compatibility
|
|
'FIXED', 'MEDIUMINT', 'INT3', 'REAL', 'SERIAL',
|
|
'LONG', 'NATIONAL', 'PRECISION', 'VARYING',
|
|
),
|
|
)
|
|
def test_data_types(lexer, text):
|
|
assert list(lexer.get_tokens(text))[0] == (Keyword.Type, text.strip('('))
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
# Common
|
|
'CAST', 'CONCAT_WS', 'DAYNAME', 'IFNULL', 'NOW', 'SUBSTR',
|
|
# Less common
|
|
'CAN_ACCESS_COLUMN', 'JSON_CONTAINS_PATH', 'ST_GEOMFROMGEOJSON',
|
|
),
|
|
)
|
|
def test_functions(lexer, text):
|
|
assert list(lexer.get_tokens(text + '('))[0] == (Name.Function, text)
|
|
assert list(lexer.get_tokens(text + ' ('))[0] == (Name.Function, text)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
'abc_$123', '上市年限', 'ひらがな', '123_$abc', '123ひらがな',
|
|
),
|
|
)
|
|
def test_schema_object_names_unquoted(lexer, text):
|
|
tokens = list(lexer.get_tokens(text))[:-1]
|
|
assert all(token[0] == Name for token in tokens)
|
|
assert ''.join(token[1] for token in tokens) == text
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
(
|
|
'`a`', '`1`', '`上市年限`', '`ひらがな`', '`select`', '`concat(`',
|
|
'`-- `', '`/*`', '`#`',
|
|
),
|
|
)
|
|
def test_schema_object_names_quoted(lexer, text):
|
|
tokens = list(lexer.get_tokens(text))[:-1]
|
|
assert tokens[0] == (Name.Quoted, '`')
|
|
assert tokens[1] == (Name.Quoted, text[1:-1])
|
|
assert tokens[2] == (Name.Quoted, '`')
|
|
assert ''.join(token[1] for token in tokens) == text
|
|
|
|
|
|
@pytest.mark.parametrize('text', ('````', ))
|
|
def test_schema_object_names_quoted_escaped(lexer, text):
|
|
"""Test quoted schema object names with escape sequences."""
|
|
tokens = list(lexer.get_tokens(text))[:-1]
|
|
assert tokens[0] == (Name.Quoted, '`')
|
|
assert tokens[1] == (Name.Quoted.Escape, text[1:-1])
|
|
assert tokens[2] == (Name.Quoted, '`')
|
|
assert ''.join(token[1] for token in tokens) == text
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text',
|
|
('+', '*', '/', '%', '&&', ':=', '!', '<', '->>', '^', '|', '~'),
|
|
)
|
|
def test_operators(lexer, text):
|
|
assert list(lexer.get_tokens(text))[0] == (Operator, text)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'text, expected_types',
|
|
(
|
|
('abc.efg', (Name, Punctuation, Name)),
|
|
('abc,efg', (Name, Punctuation, Name)),
|
|
('MAX(abc)', (Name.Function, Punctuation, Name, Punctuation)),
|
|
('efg;', (Name, Punctuation)),
|
|
),
|
|
)
|
|
def test_punctuation(lexer, text, expected_types):
|
|
tokens = list(lexer.get_tokens(text))[:len(expected_types)]
|
|
assert all(t[0] == e for t, e in zip(tokens, expected_types))
|