That way, we can set it to "" for old lexers, and check that it's present on new lexers. (In the future, we might also use it for better presentation in the documentation.)
300 lines
10 KiB
Python
300 lines
10 KiB
Python
"""
|
|
pygments.lexers.esoteric
|
|
~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Lexers for esoteric languages.
|
|
|
|
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
from pygments.lexer import RegexLexer, include, words, bygroups
|
|
from pygments.token import Comment, Operator, Keyword, Name, String, Number, \
|
|
Punctuation, Error, Whitespace
|
|
|
|
__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer',
|
|
'CapDLLexer', 'AheuiLexer']
|
|
|
|
|
|
class BrainfuckLexer(RegexLexer):
|
|
"""
|
|
Lexer for the esoteric BrainFuck language.
|
|
"""
|
|
|
|
name = 'Brainfuck'
|
|
url = 'http://www.muppetlabs.com/~breadbox/bf/'
|
|
aliases = ['brainfuck', 'bf']
|
|
filenames = ['*.bf', '*.b']
|
|
mimetypes = ['application/x-brainfuck']
|
|
version_added = ''
|
|
|
|
tokens = {
|
|
'common': [
|
|
# use different colors for different instruction types
|
|
(r'[.,]+', Name.Tag),
|
|
(r'[+-]+', Name.Builtin),
|
|
(r'[<>]+', Name.Variable),
|
|
(r'[^.,+\-<>\[\]]+', Comment),
|
|
],
|
|
'root': [
|
|
(r'\[', Keyword, 'loop'),
|
|
(r'\]', Error),
|
|
include('common'),
|
|
],
|
|
'loop': [
|
|
(r'\[', Keyword, '#push'),
|
|
(r'\]', Keyword, '#pop'),
|
|
include('common'),
|
|
]
|
|
}
|
|
|
|
def analyse_text(text):
|
|
"""It's safe to assume that a program which mostly consists of + -
|
|
and < > is brainfuck."""
|
|
plus_minus_count = 0
|
|
greater_less_count = 0
|
|
|
|
range_to_check = max(256, len(text))
|
|
|
|
for c in text[:range_to_check]:
|
|
if c == '+' or c == '-':
|
|
plus_minus_count += 1
|
|
if c == '<' or c == '>':
|
|
greater_less_count += 1
|
|
|
|
if plus_minus_count > (0.25 * range_to_check):
|
|
return 1.0
|
|
if greater_less_count > (0.25 * range_to_check):
|
|
return 1.0
|
|
|
|
result = 0
|
|
if '[-]' in text:
|
|
result += 0.5
|
|
|
|
return result
|
|
|
|
|
|
class BefungeLexer(RegexLexer):
|
|
"""
|
|
Lexer for the esoteric Befunge language.
|
|
"""
|
|
name = 'Befunge'
|
|
url = 'http://en.wikipedia.org/wiki/Befunge'
|
|
aliases = ['befunge']
|
|
filenames = ['*.befunge']
|
|
mimetypes = ['application/x-befunge']
|
|
version_added = '0.7'
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'[0-9a-f]', Number),
|
|
(r'[+*/%!`-]', Operator), # Traditional math
|
|
(r'[<>^v?\[\]rxjk]', Name.Variable), # Move, imperatives
|
|
(r'[:\\$.,n]', Name.Builtin), # Stack ops, imperatives
|
|
(r'[|_mw]', Keyword),
|
|
(r'[{}]', Name.Tag), # Befunge-98 stack ops
|
|
(r'".*?"', String.Double), # Strings don't appear to allow escapes
|
|
(r'\'.', String.Single), # Single character
|
|
(r'[#;]', Comment), # Trampoline... depends on direction hit
|
|
(r'[pg&~=@iotsy]', Keyword), # Misc
|
|
(r'[()A-Z]', Comment), # Fingerprints
|
|
(r'\s+', Whitespace), # Whitespace doesn't matter
|
|
],
|
|
}
|
|
|
|
|
|
class CAmkESLexer(RegexLexer):
|
|
"""
|
|
Basic lexer for the input language for the CAmkES component platform.
|
|
"""
|
|
name = 'CAmkES'
|
|
url = 'https://sel4.systems/CAmkES/'
|
|
aliases = ['camkes', 'idl4']
|
|
filenames = ['*.camkes', '*.idl4']
|
|
version_added = '2.1'
|
|
|
|
tokens = {
|
|
'root': [
|
|
# C pre-processor directive
|
|
(r'^(\s*)(#.*)(\n)', bygroups(Whitespace, Comment.Preproc,
|
|
Whitespace)),
|
|
|
|
# Whitespace, comments
|
|
(r'\s+', Whitespace),
|
|
(r'/\*(.|\n)*?\*/', Comment),
|
|
(r'//.*$', Comment),
|
|
|
|
(r'[\[(){},.;\]]', Punctuation),
|
|
(r'[~!%^&*+=|?:<>/-]', Operator),
|
|
|
|
(words(('assembly', 'attribute', 'component', 'composition',
|
|
'configuration', 'connection', 'connector', 'consumes',
|
|
'control', 'dataport', 'Dataport', 'Dataports', 'emits',
|
|
'event', 'Event', 'Events', 'export', 'from', 'group',
|
|
'hardware', 'has', 'interface', 'Interface', 'maybe',
|
|
'procedure', 'Procedure', 'Procedures', 'provides',
|
|
'template', 'thread', 'threads', 'to', 'uses', 'with'),
|
|
suffix=r'\b'), Keyword),
|
|
|
|
(words(('bool', 'boolean', 'Buf', 'char', 'character', 'double',
|
|
'float', 'in', 'inout', 'int', 'int16_6', 'int32_t',
|
|
'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real',
|
|
'refin', 'semaphore', 'signed', 'string', 'struct',
|
|
'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t',
|
|
'unsigned', 'void'),
|
|
suffix=r'\b'), Keyword.Type),
|
|
|
|
# Recognised attributes
|
|
(r'[a-zA-Z_]\w*_(priority|domain|buffer)', Keyword.Reserved),
|
|
(words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'),
|
|
Keyword.Reserved),
|
|
|
|
# CAmkES-level include
|
|
(r'(import)(\s+)((?:<[^>]*>|"[^"]*");)',
|
|
bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),
|
|
|
|
# C-level include
|
|
(r'(include)(\s+)((?:<[^>]*>|"[^"]*");)',
|
|
bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),
|
|
|
|
# Literals
|
|
(r'0[xX][\da-fA-F]+', Number.Hex),
|
|
(r'-?[\d]+', Number),
|
|
(r'-?[\d]+\.[\d]+', Number.Float),
|
|
(r'"[^"]*"', String),
|
|
(r'[Tt]rue|[Ff]alse', Name.Builtin),
|
|
|
|
# Identifiers
|
|
(r'[a-zA-Z_]\w*', Name),
|
|
],
|
|
}
|
|
|
|
|
|
class CapDLLexer(RegexLexer):
|
|
"""
|
|
Basic lexer for CapDL.
|
|
|
|
The source of the primary tool that reads such specifications is available
|
|
at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this
|
|
lexer only supports a subset of the grammar. For example, identifiers can
|
|
shadow type names, but these instances are currently incorrectly
|
|
highlighted as types. Supporting this would need a stateful lexer that is
|
|
considered unnecessarily complex for now.
|
|
"""
|
|
name = 'CapDL'
|
|
url = 'https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml'
|
|
aliases = ['capdl']
|
|
filenames = ['*.cdl']
|
|
version_added = '2.2'
|
|
|
|
tokens = {
|
|
'root': [
|
|
# C pre-processor directive
|
|
(r'^(\s*)(#.*)(\n)',
|
|
bygroups(Whitespace, Comment.Preproc, Whitespace)),
|
|
|
|
# Whitespace, comments
|
|
(r'\s+', Whitespace),
|
|
(r'/\*(.|\n)*?\*/', Comment),
|
|
(r'(//|--).*$', Comment),
|
|
|
|
(r'[<>\[(){},:;=\]]', Punctuation),
|
|
(r'\.\.', Punctuation),
|
|
|
|
(words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps',
|
|
'objects'), suffix=r'\b'), Keyword),
|
|
|
|
(words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device',
|
|
'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb',
|
|
'ut', 'vcpu'), suffix=r'\b'), Keyword.Type),
|
|
|
|
# Properties
|
|
(words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf',
|
|
'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip',
|
|
'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W',
|
|
'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr',
|
|
'ports', 'reply', 'uncached'), suffix=r'\b'),
|
|
Keyword.Reserved),
|
|
|
|
# Literals
|
|
(r'0[xX][\da-fA-F]+', Number.Hex),
|
|
(r'\d+(\.\d+)?(k|M)?', Number),
|
|
(words(('bits',), suffix=r'\b'), Number),
|
|
(words(('cspace', 'vspace', 'reply_slot', 'caller_slot',
|
|
'ipc_buffer_slot'), suffix=r'\b'), Number),
|
|
|
|
# Identifiers
|
|
(r'[a-zA-Z_][-@\.\w]*', Name),
|
|
],
|
|
}
|
|
|
|
|
|
class RedcodeLexer(RegexLexer):
|
|
"""
|
|
A simple Redcode lexer based on ICWS'94.
|
|
Contributed by Adam Blinkinsop <blinks@acm.org>.
|
|
"""
|
|
name = 'Redcode'
|
|
aliases = ['redcode']
|
|
filenames = ['*.cw']
|
|
url = 'https://en.wikipedia.org/wiki/Core_War'
|
|
version_added = '0.8'
|
|
|
|
opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD',
|
|
'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL',
|
|
'ORG', 'EQU', 'END')
|
|
modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I')
|
|
|
|
tokens = {
|
|
'root': [
|
|
# Whitespace:
|
|
(r'\s+', Whitespace),
|
|
(r';.*$', Comment.Single),
|
|
# Lexemes:
|
|
# Identifiers
|
|
(r'\b(%s)\b' % '|'.join(opcodes), Name.Function),
|
|
(r'\b(%s)\b' % '|'.join(modifiers), Name.Decorator),
|
|
(r'[A-Za-z_]\w+', Name),
|
|
# Operators
|
|
(r'[-+*/%]', Operator),
|
|
(r'[#$@<>]', Operator), # mode
|
|
(r'[.,]', Punctuation), # mode
|
|
# Numbers
|
|
(r'[-+]?\d+', Number.Integer),
|
|
],
|
|
}
|
|
|
|
|
|
class AheuiLexer(RegexLexer):
|
|
"""
|
|
Aheui is esoteric language based on Korean alphabets.
|
|
"""
|
|
|
|
name = 'Aheui'
|
|
url = 'http://aheui.github.io/'
|
|
aliases = ['aheui']
|
|
filenames = ['*.aheui']
|
|
version_added = ''
|
|
|
|
tokens = {
|
|
'root': [
|
|
('['
|
|
'나-낳냐-냫너-넣녀-녛노-놓뇨-눟뉴-닇'
|
|
'다-닿댜-댷더-덯뎌-뎧도-돟됴-둫듀-딓'
|
|
'따-땋땨-떃떠-떻뗘-뗳또-똫뚀-뚷뜌-띟'
|
|
'라-랗랴-럏러-렇려-렿로-롷료-뤃류-릫'
|
|
'마-맣먀-먛머-멓며-몋모-뫃묘-뭏뮤-믷'
|
|
'바-밯뱌-뱧버-벟벼-볗보-봏뵤-붛뷰-빃'
|
|
'빠-빻뺘-뺳뻐-뻫뼈-뼣뽀-뽛뾰-뿧쀼-삏'
|
|
'사-샇샤-샿서-섷셔-셯소-솧쇼-숳슈-싛'
|
|
'싸-쌓쌰-썋써-쎃쎠-쎻쏘-쏳쑈-쑿쓔-씧'
|
|
'자-잫쟈-쟣저-젛져-졓조-좋죠-줗쥬-즿'
|
|
'차-챃챠-챻처-첳쳐-쳫초-촣쵸-춯츄-칗'
|
|
'카-캏캬-컇커-컿켜-켷코-콯쿄-쿻큐-킣'
|
|
'타-탛탸-턓터-텋텨-톃토-톻툐-퉇튜-틯'
|
|
'파-팧퍄-퍟퍼-펗펴-폏포-퐇표-풓퓨-픻'
|
|
'하-핳햐-햫허-헣혀-혛호-홓효-훟휴-힇'
|
|
']', Operator),
|
|
('.', Comment),
|
|
],
|
|
}
|