This repository has been archived on 2024-06-20. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
coffee.pygments/tests/test_raw_token.py
Anders Kaseorg c2cf688397
RawToken{Formatter,Lexer}: support Python 3 and handle exceptions (#1602)
In Python 3, RawTokenFormatter would output non-ASCII for non-ASCII
input, and RawTokenLexer would throw Unicode-related exceptions for
ASCII or non-ASCII input; fix them.  Also, handle all exceptions, so
that callers who find RawTokenLexer via get_lexer_by_name on user
input don’t unexpectedly get a lexer that throws exceptions.

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
2021-02-14 09:01:47 +01:00

68 lines
2.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import bz2
import gzip
from pygments import highlight
from pygments.formatters import HtmlFormatter, RawTokenFormatter
from pygments.lexers import PythonLexer, RawTokenLexer
def test_raw_token():
code = "2 + α"
raw = highlight(code, PythonLexer(), RawTokenFormatter())
html = highlight(code, PythonLexer(), HtmlFormatter())
assert highlight(raw, RawTokenLexer(), RawTokenFormatter()) == raw
assert highlight(raw, RawTokenLexer(), HtmlFormatter()) == html
assert highlight(raw.decode(), RawTokenLexer(), HtmlFormatter()) == html
raw_gz = highlight(code, PythonLexer(), RawTokenFormatter(compress="gz"))
assert gzip.decompress(raw_gz) == raw
assert highlight(raw_gz, RawTokenLexer(compress="gz"), RawTokenFormatter()) == raw
assert (
highlight(
raw_gz.decode("latin1"), RawTokenLexer(compress="gz"), RawTokenFormatter()
)
== raw
)
raw_bz2 = highlight(code, PythonLexer(), RawTokenFormatter(compress="bz2"))
assert bz2.decompress(raw_bz2) == raw
assert highlight(raw_bz2, RawTokenLexer(compress="bz2"), RawTokenFormatter()) == raw
assert (
highlight(
raw_bz2.decode("latin1"), RawTokenLexer(compress="bz2"), RawTokenFormatter()
)
== raw
)
def test_invalid_raw_token():
# These should not throw exceptions.
assert (
highlight("Tolkien", RawTokenLexer(), RawTokenFormatter())
== b"Token.Error\t'Tolkien\\n'\n"
)
assert (
highlight("Tolkien\t'x'", RawTokenLexer(), RawTokenFormatter())
== b"Token\t'x'\n"
)
assert (
highlight("Token.Text\t42", RawTokenLexer(), RawTokenFormatter())
== b"Token.Error\t'Token.Text\\t42\\n'\n"
)
assert (
highlight("Token.Text\t'", RawTokenLexer(), RawTokenFormatter())
== b'Token.Error\t"Token.Text\\t\'\\n"\n'
)
assert (
highlight("Token.Text\t'α'", RawTokenLexer(), RawTokenFormatter())
== b"Token.Text\t'\\u03b1'\n"
)
assert (
highlight("Token.Text\tu'α'", RawTokenLexer(), RawTokenFormatter())
== b"Token.Text\t'\\u03b1'\n"
)
assert (
highlight(b"Token.Text\t'\xff'", RawTokenLexer(), RawTokenFormatter())
== b"Token.Text\t'\\xff'\n"
)