This repository has been archived on 2024-06-20. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
coffee.pygments/pygments/lexers/esoteric.py
Jean Abou Samra 25f230191f Move versionadded data to a lexer attribute
That way, we can set it to "" for old lexers, and check that it's
present on new lexers. (In the future, we might also use it for better
presentation in the documentation.)
2023-11-26 14:51:52 +01:00

300 lines
10 KiB
Python

"""
pygments.lexers.esoteric
~~~~~~~~~~~~~~~~~~~~~~~~
Lexers for esoteric languages.
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pygments.lexer import RegexLexer, include, words, bygroups
from pygments.token import Comment, Operator, Keyword, Name, String, Number, \
Punctuation, Error, Whitespace
__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer',
'CapDLLexer', 'AheuiLexer']
class BrainfuckLexer(RegexLexer):
"""
Lexer for the esoteric BrainFuck language.
"""
name = 'Brainfuck'
url = 'http://www.muppetlabs.com/~breadbox/bf/'
aliases = ['brainfuck', 'bf']
filenames = ['*.bf', '*.b']
mimetypes = ['application/x-brainfuck']
version_added = ''
tokens = {
'common': [
# use different colors for different instruction types
(r'[.,]+', Name.Tag),
(r'[+-]+', Name.Builtin),
(r'[<>]+', Name.Variable),
(r'[^.,+\-<>\[\]]+', Comment),
],
'root': [
(r'\[', Keyword, 'loop'),
(r'\]', Error),
include('common'),
],
'loop': [
(r'\[', Keyword, '#push'),
(r'\]', Keyword, '#pop'),
include('common'),
]
}
def analyse_text(text):
"""It's safe to assume that a program which mostly consists of + -
and < > is brainfuck."""
plus_minus_count = 0
greater_less_count = 0
range_to_check = max(256, len(text))
for c in text[:range_to_check]:
if c == '+' or c == '-':
plus_minus_count += 1
if c == '<' or c == '>':
greater_less_count += 1
if plus_minus_count > (0.25 * range_to_check):
return 1.0
if greater_less_count > (0.25 * range_to_check):
return 1.0
result = 0
if '[-]' in text:
result += 0.5
return result
class BefungeLexer(RegexLexer):
"""
Lexer for the esoteric Befunge language.
"""
name = 'Befunge'
url = 'http://en.wikipedia.org/wiki/Befunge'
aliases = ['befunge']
filenames = ['*.befunge']
mimetypes = ['application/x-befunge']
version_added = '0.7'
tokens = {
'root': [
(r'[0-9a-f]', Number),
(r'[+*/%!`-]', Operator), # Traditional math
(r'[<>^v?\[\]rxjk]', Name.Variable), # Move, imperatives
(r'[:\\$.,n]', Name.Builtin), # Stack ops, imperatives
(r'[|_mw]', Keyword),
(r'[{}]', Name.Tag), # Befunge-98 stack ops
(r'".*?"', String.Double), # Strings don't appear to allow escapes
(r'\'.', String.Single), # Single character
(r'[#;]', Comment), # Trampoline... depends on direction hit
(r'[pg&~=@iotsy]', Keyword), # Misc
(r'[()A-Z]', Comment), # Fingerprints
(r'\s+', Whitespace), # Whitespace doesn't matter
],
}
class CAmkESLexer(RegexLexer):
"""
Basic lexer for the input language for the CAmkES component platform.
"""
name = 'CAmkES'
url = 'https://sel4.systems/CAmkES/'
aliases = ['camkes', 'idl4']
filenames = ['*.camkes', '*.idl4']
version_added = '2.1'
tokens = {
'root': [
# C pre-processor directive
(r'^(\s*)(#.*)(\n)', bygroups(Whitespace, Comment.Preproc,
Whitespace)),
# Whitespace, comments
(r'\s+', Whitespace),
(r'/\*(.|\n)*?\*/', Comment),
(r'//.*$', Comment),
(r'[\[(){},.;\]]', Punctuation),
(r'[~!%^&*+=|?:<>/-]', Operator),
(words(('assembly', 'attribute', 'component', 'composition',
'configuration', 'connection', 'connector', 'consumes',
'control', 'dataport', 'Dataport', 'Dataports', 'emits',
'event', 'Event', 'Events', 'export', 'from', 'group',
'hardware', 'has', 'interface', 'Interface', 'maybe',
'procedure', 'Procedure', 'Procedures', 'provides',
'template', 'thread', 'threads', 'to', 'uses', 'with'),
suffix=r'\b'), Keyword),
(words(('bool', 'boolean', 'Buf', 'char', 'character', 'double',
'float', 'in', 'inout', 'int', 'int16_6', 'int32_t',
'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real',
'refin', 'semaphore', 'signed', 'string', 'struct',
'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t',
'unsigned', 'void'),
suffix=r'\b'), Keyword.Type),
# Recognised attributes
(r'[a-zA-Z_]\w*_(priority|domain|buffer)', Keyword.Reserved),
(words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'),
Keyword.Reserved),
# CAmkES-level include
(r'(import)(\s+)((?:<[^>]*>|"[^"]*");)',
bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),
# C-level include
(r'(include)(\s+)((?:<[^>]*>|"[^"]*");)',
bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),
# Literals
(r'0[xX][\da-fA-F]+', Number.Hex),
(r'-?[\d]+', Number),
(r'-?[\d]+\.[\d]+', Number.Float),
(r'"[^"]*"', String),
(r'[Tt]rue|[Ff]alse', Name.Builtin),
# Identifiers
(r'[a-zA-Z_]\w*', Name),
],
}
class CapDLLexer(RegexLexer):
"""
Basic lexer for CapDL.
The source of the primary tool that reads such specifications is available
at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this
lexer only supports a subset of the grammar. For example, identifiers can
shadow type names, but these instances are currently incorrectly
highlighted as types. Supporting this would need a stateful lexer that is
considered unnecessarily complex for now.
"""
name = 'CapDL'
url = 'https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml'
aliases = ['capdl']
filenames = ['*.cdl']
version_added = '2.2'
tokens = {
'root': [
# C pre-processor directive
(r'^(\s*)(#.*)(\n)',
bygroups(Whitespace, Comment.Preproc, Whitespace)),
# Whitespace, comments
(r'\s+', Whitespace),
(r'/\*(.|\n)*?\*/', Comment),
(r'(//|--).*$', Comment),
(r'[<>\[(){},:;=\]]', Punctuation),
(r'\.\.', Punctuation),
(words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps',
'objects'), suffix=r'\b'), Keyword),
(words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device',
'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb',
'ut', 'vcpu'), suffix=r'\b'), Keyword.Type),
# Properties
(words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf',
'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip',
'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W',
'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr',
'ports', 'reply', 'uncached'), suffix=r'\b'),
Keyword.Reserved),
# Literals
(r'0[xX][\da-fA-F]+', Number.Hex),
(r'\d+(\.\d+)?(k|M)?', Number),
(words(('bits',), suffix=r'\b'), Number),
(words(('cspace', 'vspace', 'reply_slot', 'caller_slot',
'ipc_buffer_slot'), suffix=r'\b'), Number),
# Identifiers
(r'[a-zA-Z_][-@\.\w]*', Name),
],
}
class RedcodeLexer(RegexLexer):
"""
A simple Redcode lexer based on ICWS'94.
Contributed by Adam Blinkinsop <blinks@acm.org>.
"""
name = 'Redcode'
aliases = ['redcode']
filenames = ['*.cw']
url = 'https://en.wikipedia.org/wiki/Core_War'
version_added = '0.8'
opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD',
'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL',
'ORG', 'EQU', 'END')
modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I')
tokens = {
'root': [
# Whitespace:
(r'\s+', Whitespace),
(r';.*$', Comment.Single),
# Lexemes:
# Identifiers
(r'\b(%s)\b' % '|'.join(opcodes), Name.Function),
(r'\b(%s)\b' % '|'.join(modifiers), Name.Decorator),
(r'[A-Za-z_]\w+', Name),
# Operators
(r'[-+*/%]', Operator),
(r'[#$@<>]', Operator), # mode
(r'[.,]', Punctuation), # mode
# Numbers
(r'[-+]?\d+', Number.Integer),
],
}
class AheuiLexer(RegexLexer):
"""
Aheui is esoteric language based on Korean alphabets.
"""
name = 'Aheui'
url = 'http://aheui.github.io/'
aliases = ['aheui']
filenames = ['*.aheui']
version_added = ''
tokens = {
'root': [
('['
'나-낳냐-냫너-넣녀-녛노-놓뇨-눟뉴-닇'
'다-닿댜-댷더-덯뎌-뎧도-돟됴-둫듀-딓'
'따-땋땨-떃떠-떻뗘-뗳또-똫뚀-뚷뜌-띟'
'라-랗랴-럏러-렇려-렿로-롷료-뤃류-릫'
'마-맣먀-먛머-멓며-몋모-뫃묘-뭏뮤-믷'
'바-밯뱌-뱧버-벟벼-볗보-봏뵤-붛뷰-빃'
'빠-빻뺘-뺳뻐-뻫뼈-뼣뽀-뽛뾰-뿧쀼-삏'
'사-샇샤-샿서-섷셔-셯소-솧쇼-숳슈-싛'
'싸-쌓쌰-썋써-쎃쎠-쎻쏘-쏳쑈-쑿쓔-씧'
'자-잫쟈-쟣저-젛져-졓조-좋죠-줗쥬-즿'
'차-챃챠-챻처-첳쳐-쳫초-촣쵸-춯츄-칗'
'카-캏캬-컇커-컿켜-켷코-콯쿄-쿻큐-킣'
'타-탛탸-턓터-텋텨-톃토-톻툐-퉇튜-틯'
'파-팧퍄-퍟퍼-펗펴-폏포-퐇표-풓퓨-픻'
'하-핳햐-햫허-헣혀-혛호-홓효-훟휴-힇'
']', Operator),
('.', Comment),
],
}