That way, we can set it to "" for old lexers, and check that it's present on new lexers. (In the future, we might also use it for better presentation in the documentation.)
262 lines
7.9 KiB
Python
262 lines
7.9 KiB
Python
"""
|
|
pygments.lexers.grammar_notation
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Lexers for grammar notations like BNF.
|
|
|
|
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
from pygments.lexer import RegexLexer, bygroups, include, this, using, words
|
|
from pygments.token import Comment, Keyword, Literal, Name, Number, \
|
|
Operator, Punctuation, String, Text, Whitespace
|
|
|
|
__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']
|
|
|
|
|
|
class BnfLexer(RegexLexer):
|
|
"""
|
|
This lexer is for grammar notations which are similar to
|
|
original BNF.
|
|
|
|
In order to maximize a number of targets of this lexer,
|
|
let's decide some designs:
|
|
|
|
* We don't distinguish `Terminal Symbol`.
|
|
|
|
* We do assume that `NonTerminal Symbol` are always enclosed
|
|
with arrow brackets.
|
|
|
|
* We do assume that `NonTerminal Symbol` may include
|
|
any printable characters except arrow brackets and ASCII 0x20.
|
|
This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.
|
|
|
|
* We do assume that target notation doesn't support comment.
|
|
|
|
* We don't distinguish any operators and punctuation except
|
|
`::=`.
|
|
|
|
Though these decision making might cause too minimal highlighting
|
|
and you might be disappointed, but it is reasonable for us.
|
|
"""
|
|
|
|
name = 'BNF'
|
|
aliases = ['bnf']
|
|
filenames = ['*.bnf']
|
|
mimetypes = ['text/x-bnf']
|
|
url = 'https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form'
|
|
version_added = '2.1'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'(<)([ -;=?-~]+)(>)',
|
|
bygroups(Punctuation, Name.Class, Punctuation)),
|
|
|
|
# an only operator
|
|
(r'::=', Operator),
|
|
|
|
# fallback
|
|
(r'[^<>:]+', Text), # for performance
|
|
(r'.', Text),
|
|
],
|
|
}
|
|
|
|
|
|
class AbnfLexer(RegexLexer):
|
|
"""
|
|
Lexer for IETF 7405 ABNF.
|
|
|
|
(Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.
|
|
"""
|
|
|
|
name = 'ABNF'
|
|
url = 'http://www.ietf.org/rfc/rfc7405.txt'
|
|
aliases = ['abnf']
|
|
filenames = ['*.abnf']
|
|
mimetypes = ['text/x-abnf']
|
|
version_added = '2.1'
|
|
|
|
_core_rules = (
|
|
'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
|
|
'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
|
|
'SP', 'VCHAR', 'WSP')
|
|
|
|
tokens = {
|
|
'root': [
|
|
# comment
|
|
(r';.*$', Comment.Single),
|
|
|
|
# quoted
|
|
# double quote itself in this state, it is as '%x22'.
|
|
(r'(%[si])?"[^"]*"', Literal),
|
|
|
|
# binary (but i have never seen...)
|
|
(r'%b[01]+\-[01]+\b', Literal), # range
|
|
(r'%b[01]+(\.[01]+)*\b', Literal), # concat
|
|
|
|
# decimal
|
|
(r'%d[0-9]+\-[0-9]+\b', Literal), # range
|
|
(r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat
|
|
|
|
# hexadecimal
|
|
(r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range
|
|
(r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat
|
|
|
|
# repetition (<a>*<b>element) including nRule
|
|
(r'\b[0-9]+\*[0-9]+', Operator),
|
|
(r'\b[0-9]+\*', Operator),
|
|
(r'\b[0-9]+', Operator),
|
|
(r'\*', Operator),
|
|
|
|
# Strictly speaking, these are not keyword but
|
|
# are called `Core Rule'.
|
|
(words(_core_rules, suffix=r'\b'), Keyword),
|
|
|
|
# nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
|
|
(r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),
|
|
|
|
# operators
|
|
(r'(=/|=|/)', Operator),
|
|
|
|
# punctuation
|
|
(r'[\[\]()]', Punctuation),
|
|
|
|
# fallback
|
|
(r'\s+', Whitespace),
|
|
(r'.', Text),
|
|
],
|
|
}
|
|
|
|
|
|
class JsgfLexer(RegexLexer):
|
|
"""
|
|
For JSpeech Grammar Format grammars.
|
|
"""
|
|
name = 'JSGF'
|
|
url = 'https://www.w3.org/TR/jsgf/'
|
|
aliases = ['jsgf']
|
|
filenames = ['*.jsgf']
|
|
mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']
|
|
version_added = '2.2'
|
|
|
|
tokens = {
|
|
'root': [
|
|
include('comments'),
|
|
include('non-comments'),
|
|
],
|
|
'comments': [
|
|
(r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
|
|
(r'/\*[\w\W]*?\*/', Comment.Multiline),
|
|
(r'//.*$', Comment.Single),
|
|
],
|
|
'non-comments': [
|
|
(r'\A#JSGF[^;]*', Comment.Preproc),
|
|
(r'\s+', Whitespace),
|
|
(r';', Punctuation),
|
|
(r'[=|()\[\]*+]', Operator),
|
|
(r'/[^/]+/', Number.Float),
|
|
(r'"', String.Double, 'string'),
|
|
(r'\{', String.Other, 'tag'),
|
|
(words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
|
|
(r'grammar\b', Keyword.Reserved, 'grammar name'),
|
|
(r'(<)(NULL|VOID)(>)',
|
|
bygroups(Punctuation, Name.Builtin, Punctuation)),
|
|
(r'<', Punctuation, 'rulename'),
|
|
(r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
|
|
],
|
|
'string': [
|
|
(r'"', String.Double, '#pop'),
|
|
(r'\\.', String.Escape),
|
|
(r'[^\\"]+', String.Double),
|
|
],
|
|
'tag': [
|
|
(r'\}', String.Other, '#pop'),
|
|
(r'\\.', String.Escape),
|
|
(r'[^\\}]+', String.Other),
|
|
],
|
|
'grammar name': [
|
|
(r';', Punctuation, '#pop'),
|
|
(r'\s+', Whitespace),
|
|
(r'\.', Punctuation),
|
|
(r'[^;\s.]+', Name.Namespace),
|
|
],
|
|
'rulename': [
|
|
(r'>', Punctuation, '#pop'),
|
|
(r'\*', Punctuation),
|
|
(r'\s+', Whitespace),
|
|
(r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
|
|
(r'[^.>]+', Name.Constant),
|
|
],
|
|
'documentation comment': [
|
|
(r'\*/', Comment.Multiline, '#pop'),
|
|
(r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
|
|
r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
|
|
bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special,
|
|
Whitespace, using(this, state='example'))),
|
|
(r'(^\s*\*?\s*)(@\S*)',
|
|
bygroups(Comment.Multiline, Comment.Special)),
|
|
(r'[^*\n@]+|\w|\W', Comment.Multiline),
|
|
],
|
|
'example': [
|
|
(r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
|
|
include('non-comments'),
|
|
(r'.', Comment.Multiline),
|
|
],
|
|
}
|
|
|
|
|
|
class PegLexer(RegexLexer):
|
|
"""
|
|
This lexer is for Parsing Expression Grammars (PEG).
|
|
|
|
Various implementations of PEG have made different decisions
|
|
regarding the syntax, so let's try to be accommodating:
|
|
|
|
* `<-`, `←`, `:`, and `=` are all accepted as rule operators.
|
|
|
|
* Both `|` and `/` are choice operators.
|
|
|
|
* `^`, `↑`, and `~` are cut operators.
|
|
|
|
* A single `a-z` character immediately before a string, or
|
|
multiple `a-z` characters following a string, are part of the
|
|
string (e.g., `r"..."` or `"..."ilmsuxa`).
|
|
"""
|
|
|
|
name = 'PEG'
|
|
url = 'https://bford.info/pub/lang/peg.pdf'
|
|
aliases = ['peg']
|
|
filenames = ['*.peg']
|
|
mimetypes = ['text/x-peg']
|
|
version_added = '2.6'
|
|
|
|
tokens = {
|
|
'root': [
|
|
# Comments
|
|
(r'#.*$', Comment.Single),
|
|
|
|
# All operators
|
|
(r'<-|[←:=/|&!?*+^↑~]', Operator),
|
|
|
|
# Other punctuation
|
|
(r'[()]', Punctuation),
|
|
|
|
# Keywords
|
|
(r'\.', Keyword),
|
|
|
|
# Character classes
|
|
(r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',
|
|
bygroups(Punctuation, String, Punctuation)),
|
|
|
|
# Single and double quoted strings (with optional modifiers)
|
|
(r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),
|
|
(r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),
|
|
|
|
# Nonterminals are not whitespace, operators, or punctuation
|
|
(r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),
|
|
|
|
# Fallback
|
|
(r'.', Text),
|
|
],
|
|
}
|