This repository has been archived on 2024-06-20. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
coffee.pygments/pygments/lexers/julia.py
Jean Abou Samra 25f230191f Move versionadded data to a lexer attribute
That way, we can set it to "" for old lexers, and check that it's
present on new lexers. (In the future, we might also use it for better
presentation in the documentation.)
2023-11-26 14:51:52 +01:00

293 lines
11 KiB
Python

"""
pygments.lexers.julia
~~~~~~~~~~~~~~~~~~~~~
Lexers for the Julia language.
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
words, include
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Generic, Whitespace
from pygments.util import shebang_matches
from pygments.lexers._julia_builtins import OPERATORS_LIST, DOTTED_OPERATORS_LIST, \
KEYWORD_LIST, BUILTIN_LIST, LITERAL_LIST
__all__ = ['JuliaLexer', 'JuliaConsoleLexer']
# see https://docs.julialang.org/en/v1/manual/variables/#Allowed-Variable-Names
allowed_variable = \
'(?:[a-zA-Z_\u00A1-\U0010ffff][a-zA-Z_0-9!\u00A1-\U0010ffff]*)'
# see https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_opsuffs.h
operator_suffixes = r'[²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ]*'
class JuliaLexer(RegexLexer):
"""
For Julia source code.
"""
name = 'Julia'
url = 'https://julialang.org/'
aliases = ['julia', 'jl']
filenames = ['*.jl']
mimetypes = ['text/x-julia', 'application/x-julia']
version_added = '1.6'
tokens = {
'root': [
(r'\n', Whitespace),
(r'[^\S\n]+', Whitespace),
(r'#=', Comment.Multiline, "blockcomment"),
(r'#.*$', Comment),
(r'[\[\](),;]', Punctuation),
# symbols
# intercept range expressions first
(r'(' + allowed_variable + r')(\s*)(:)(' + allowed_variable + ')',
bygroups(Name, Whitespace, Operator, Name)),
# then match :name which does not follow closing brackets, digits, or the
# ::, <:, and :> operators
(r'(?<![\]):<>\d.])(:' + allowed_variable + ')', String.Symbol),
# type assertions - excludes expressions like ::typeof(sin) and ::avec[1]
(r'(?<=::)(\s*)(' + allowed_variable + r')\b(?![(\[])',
bygroups(Whitespace, Keyword.Type)),
# type comparisons
# - MyType <: A or MyType >: A
('(' + allowed_variable + r')(\s*)([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])',
bygroups(Keyword.Type, Whitespace, Operator, Whitespace, Keyword.Type)),
# - <: B or >: B
(r'([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])',
bygroups(Operator, Whitespace, Keyword.Type)),
# - A <: or A >:
(r'\b(' + allowed_variable + r')(\s*)([<>]:)',
bygroups(Keyword.Type, Whitespace, Operator)),
# operators
# Suffixes aren't actually allowed on all operators, but we'll ignore that
# since those cases are invalid Julia code.
(words([*OPERATORS_LIST, *DOTTED_OPERATORS_LIST],
suffix=operator_suffixes), Operator),
(words(['.' + o for o in DOTTED_OPERATORS_LIST],
suffix=operator_suffixes), Operator),
(words(['...', '..']), Operator),
# NOTE
# Patterns below work only for definition sites and thus hardly reliable.
#
# functions
# (r'(function)(\s+)(' + allowed_variable + ')',
# bygroups(Keyword, Text, Name.Function)),
# chars
(r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|"
r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char),
# try to match trailing transpose
(r'(?<=[.\w)\]])(\'' + operator_suffixes + ')+', Operator),
# raw strings
(r'(raw)(""")', bygroups(String.Affix, String), 'tqrawstring'),
(r'(raw)(")', bygroups(String.Affix, String), 'rawstring'),
# regular expressions
(r'(r)(""")', bygroups(String.Affix, String.Regex), 'tqregex'),
(r'(r)(")', bygroups(String.Affix, String.Regex), 'regex'),
# other strings
(r'(' + allowed_variable + ')?(""")',
bygroups(String.Affix, String), 'tqstring'),
(r'(' + allowed_variable + ')?(")',
bygroups(String.Affix, String), 'string'),
# backticks
(r'(' + allowed_variable + ')?(```)',
bygroups(String.Affix, String.Backtick), 'tqcommand'),
(r'(' + allowed_variable + ')?(`)',
bygroups(String.Affix, String.Backtick), 'command'),
# type names
# - names that begin a curly expression
('(' + allowed_variable + r')(\{)',
bygroups(Keyword.Type, Punctuation), 'curly'),
# - names as part of bare 'where'
(r'(where)(\s+)(' + allowed_variable + ')',
bygroups(Keyword, Whitespace, Keyword.Type)),
# - curly expressions in general
(r'(\{)', Punctuation, 'curly'),
# - names as part of type declaration
(r'(abstract|primitive)([ \t]+)(type\b)([\s()]+)(' +
allowed_variable + r')',
bygroups(Keyword, Whitespace, Keyword, Text, Keyword.Type)),
(r'(mutable(?=[ \t]))?([ \t]+)?(struct\b)([\s()]+)(' +
allowed_variable + r')',
bygroups(Keyword, Whitespace, Keyword, Text, Keyword.Type)),
# macros
(r'@' + allowed_variable, Name.Decorator),
(words([*OPERATORS_LIST, '..', '.', *DOTTED_OPERATORS_LIST],
prefix='@', suffix=operator_suffixes), Name.Decorator),
# keywords
(words(KEYWORD_LIST, suffix=r'\b'), Keyword),
# builtin types
(words(BUILTIN_LIST, suffix=r'\b'), Keyword.Type),
# builtin literals
(words(LITERAL_LIST, suffix=r'\b'), Name.Builtin),
# names
(allowed_variable, Name),
# numbers
(r'(\d+((_\d+)+)?\.(?!\.)(\d+((_\d+)+)?)?|\.\d+((_\d+)+)?)([eEf][+-]?[0-9]+)?', Number.Float),
(r'\d+((_\d+)+)?[eEf][+-]?[0-9]+', Number.Float),
(r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?(\.([a-fA-F0-9]+((_[a-fA-F0-9]+)+)?)?)?p[+-]?\d+', Number.Float),
(r'0b[01]+((_[01]+)+)?', Number.Bin),
(r'0o[0-7]+((_[0-7]+)+)?', Number.Oct),
(r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?', Number.Hex),
(r'\d+((_\d+)+)?', Number.Integer),
# single dot operator matched last to permit e.g. ".1" as a float
(words(['.']), Operator),
],
"blockcomment": [
(r'[^=#]', Comment.Multiline),
(r'#=', Comment.Multiline, '#push'),
(r'=#', Comment.Multiline, '#pop'),
(r'[=#]', Comment.Multiline),
],
'curly': [
(r'\{', Punctuation, '#push'),
(r'\}', Punctuation, '#pop'),
(allowed_variable, Keyword.Type),
include('root'),
],
'tqrawstring': [
(r'"""', String, '#pop'),
(r'([^"]|"[^"][^"])+', String),
],
'rawstring': [
(r'"', String, '#pop'),
(r'\\"', String.Escape),
(r'([^"\\]|\\[^"])+', String),
],
# Interpolation is defined as "$" followed by the shortest full
# expression, which is something we can't parse. Include the most
# common cases here: $word, and $(paren'd expr).
'interp': [
(r'\$' + allowed_variable, String.Interpol),
(r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
],
'in-intp': [
(r'\(', Punctuation, '#push'),
(r'\)', Punctuation, '#pop'),
include('root'),
],
'string': [
(r'(")(' + allowed_variable + r'|\d+)?',
bygroups(String, String.Affix), '#pop'),
# FIXME: This escape pattern is not perfect.
(r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
include('interp'),
# @printf and @sprintf formats
(r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]',
String.Interpol),
(r'[^"$%\\]+', String),
(r'.', String),
],
'tqstring': [
(r'(""")(' + allowed_variable + r'|\d+)?',
bygroups(String, String.Affix), '#pop'),
(r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
include('interp'),
(r'[^"$%\\]+', String),
(r'.', String),
],
'regex': [
(r'(")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'),
(r'\\"', String.Regex),
(r'[^\\"]+', String.Regex),
],
'tqregex': [
(r'(""")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'),
(r'[^"]+', String.Regex),
],
'command': [
(r'(`)(' + allowed_variable + r'|\d+)?',
bygroups(String.Backtick, String.Affix), '#pop'),
(r'\\[`$]', String.Escape),
include('interp'),
(r'[^\\`$]+', String.Backtick),
(r'.', String.Backtick),
],
'tqcommand': [
(r'(```)(' + allowed_variable + r'|\d+)?',
bygroups(String.Backtick, String.Affix), '#pop'),
(r'\\\$', String.Escape),
include('interp'),
(r'[^\\`$]+', String.Backtick),
(r'.', String.Backtick),
],
}
def analyse_text(text):
return shebang_matches(text, r'julia')
class JuliaConsoleLexer(Lexer):
"""
For Julia console sessions. Modeled after MatlabSessionLexer.
"""
name = 'Julia console'
aliases = ['jlcon', 'julia-repl']
url = 'https://julialang.org/'
version_added = '1.6'
def get_tokens_unprocessed(self, text):
jllexer = JuliaLexer(**self.options)
start = 0
curcode = ''
insertions = []
output = False
error = False
for line in text.splitlines(keepends=True):
if line.startswith('julia>'):
insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])]))
curcode += line[6:]
output = False
error = False
elif line.startswith('help?>') or line.startswith('shell>'):
yield start, Generic.Prompt, line[:6]
yield start + 6, Text, line[6:]
output = False
error = False
elif line.startswith(' ') and not output:
insertions.append((len(curcode), [(0, Whitespace, line[:6])]))
curcode += line[6:]
else:
if curcode:
yield from do_insertions(
insertions, jllexer.get_tokens_unprocessed(curcode))
curcode = ''
insertions = []
if line.startswith('ERROR: ') or error:
yield start, Generic.Error, line
error = True
else:
yield start, Generic.Output, line
output = True
start += len(line)
if curcode:
yield from do_insertions(
insertions, jllexer.get_tokens_unprocessed(curcode))