That way, we can set it to "" for old lexers, and check that it's present on new lexers. (In the future, we might also use it for better presentation in the documentation.)
121 lines
3.5 KiB
Python
121 lines
3.5 KiB
Python
"""
|
|
pygments.lexers.special
|
|
~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Special lexers.
|
|
|
|
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
import ast
|
|
|
|
from pygments.lexer import Lexer, line_re
|
|
from pygments.token import Token, Error, Text, Generic
|
|
from pygments.util import get_choice_opt
|
|
|
|
|
|
__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']
|
|
|
|
|
|
class TextLexer(Lexer):
|
|
"""
|
|
"Null" lexer, doesn't highlight anything.
|
|
"""
|
|
name = 'Text only'
|
|
aliases = ['text']
|
|
filenames = ['*.txt']
|
|
mimetypes = ['text/plain']
|
|
url = ""
|
|
version_added = ''
|
|
|
|
priority = 0.01
|
|
|
|
def get_tokens_unprocessed(self, text):
|
|
yield 0, Text, text
|
|
|
|
def analyse_text(text):
|
|
return TextLexer.priority
|
|
|
|
|
|
class OutputLexer(Lexer):
|
|
"""
|
|
Simple lexer that highlights everything as ``Token.Generic.Output``.
|
|
"""
|
|
name = 'Text output'
|
|
aliases = ['output']
|
|
url = ""
|
|
version_added = '2.10'
|
|
|
|
def get_tokens_unprocessed(self, text):
|
|
yield 0, Generic.Output, text
|
|
|
|
|
|
_ttype_cache = {}
|
|
|
|
|
|
class RawTokenLexer(Lexer):
|
|
"""
|
|
Recreate a token stream formatted with the `RawTokenFormatter`.
|
|
|
|
Additional options accepted:
|
|
|
|
`compress`
|
|
If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
|
|
the given compression algorithm before lexing (default: ``""``).
|
|
"""
|
|
name = 'Raw token data'
|
|
aliases = []
|
|
filenames = []
|
|
mimetypes = ['application/x-pygments-tokens']
|
|
url = 'https://pygments.org/docs/formatters/#RawTokenFormatter'
|
|
version_added = ''
|
|
|
|
def __init__(self, **options):
|
|
self.compress = get_choice_opt(options, 'compress',
|
|
['', 'none', 'gz', 'bz2'], '')
|
|
Lexer.__init__(self, **options)
|
|
|
|
def get_tokens(self, text):
|
|
if self.compress:
|
|
if isinstance(text, str):
|
|
text = text.encode('latin1')
|
|
try:
|
|
if self.compress == 'gz':
|
|
import gzip
|
|
text = gzip.decompress(text)
|
|
elif self.compress == 'bz2':
|
|
import bz2
|
|
text = bz2.decompress(text)
|
|
except OSError:
|
|
yield Error, text.decode('latin1')
|
|
if isinstance(text, bytes):
|
|
text = text.decode('latin1')
|
|
|
|
# do not call Lexer.get_tokens() because stripping is not optional.
|
|
text = text.strip('\n') + '\n'
|
|
for i, t, v in self.get_tokens_unprocessed(text):
|
|
yield t, v
|
|
|
|
def get_tokens_unprocessed(self, text):
|
|
length = 0
|
|
for match in line_re.finditer(text):
|
|
try:
|
|
ttypestr, val = match.group().rstrip().split('\t', 1)
|
|
ttype = _ttype_cache.get(ttypestr)
|
|
if not ttype:
|
|
ttype = Token
|
|
ttypes = ttypestr.split('.')[1:]
|
|
for ttype_ in ttypes:
|
|
if not ttype_ or not ttype_[0].isupper():
|
|
raise ValueError('malformed token name')
|
|
ttype = getattr(ttype, ttype_)
|
|
_ttype_cache[ttypestr] = ttype
|
|
val = ast.literal_eval(val)
|
|
if not isinstance(val, str):
|
|
raise ValueError('expected str')
|
|
except (SyntaxError, ValueError):
|
|
val = match.group()
|
|
ttype = Error
|
|
yield length, ttype, val
|
|
length += len(val)
|