That way, we can set it to "" for old lexers, and check that it's present on new lexers. (In the future, we might also use it for better presentation in the documentation.)
644 lines
30 KiB
Python
644 lines
30 KiB
Python
"""
|
|
pygments.lexers.pascal
|
|
~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Lexers for Pascal family languages.
|
|
|
|
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
import re
|
|
|
|
from pygments.lexer import Lexer
|
|
from pygments.util import get_bool_opt, get_list_opt
|
|
from pygments.token import Comment, Operator, Keyword, Name, String, \
|
|
Number, Punctuation, Error, Whitespace
|
|
from pygments.scanner import Scanner
|
|
|
|
# compatibility import
|
|
from pygments.lexers.modula2 import Modula2Lexer # noqa: F401
|
|
|
|
__all__ = ['DelphiLexer', 'PortugolLexer']
|
|
|
|
|
|
class PortugolLexer(Lexer):
|
|
"""For Portugol, a Pascal dialect with keywords in Portuguese."""
|
|
name = 'Portugol'
|
|
aliases = ['portugol']
|
|
filenames = ['*.alg', '*.portugol']
|
|
mimetypes = []
|
|
url = "https://www.apoioinformatica.inf.br/produtos/visualg/linguagem"
|
|
version_added = ''
|
|
|
|
def __init__(self, **options):
|
|
Lexer.__init__(self, **options)
|
|
self.lexer = DelphiLexer(**options, portugol=True)
|
|
|
|
def get_tokens_unprocessed(self, text):
|
|
return self.lexer.get_tokens_unprocessed(text)
|
|
|
|
|
|
class DelphiLexer(Lexer):
|
|
"""
|
|
For Delphi (Borland Object Pascal),
|
|
Turbo Pascal and Free Pascal source code.
|
|
|
|
Additional options accepted:
|
|
|
|
`turbopascal`
|
|
Highlight Turbo Pascal specific keywords (default: ``True``).
|
|
`delphi`
|
|
Highlight Borland Delphi specific keywords (default: ``True``).
|
|
`freepascal`
|
|
Highlight Free Pascal specific keywords (default: ``True``).
|
|
`units`
|
|
A list of units that should be considered builtin, supported are
|
|
``System``, ``SysUtils``, ``Classes`` and ``Math``.
|
|
Default is to consider all of them builtin.
|
|
"""
|
|
name = 'Delphi'
|
|
aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
|
|
filenames = ['*.pas', '*.dpr']
|
|
mimetypes = ['text/x-pascal']
|
|
url = 'https://www.embarcadero.com/products/delphi'
|
|
version_added = ''
|
|
|
|
TURBO_PASCAL_KEYWORDS = (
|
|
'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case',
|
|
'const', 'constructor', 'continue', 'destructor', 'div', 'do',
|
|
'downto', 'else', 'end', 'file', 'for', 'function', 'goto',
|
|
'if', 'implementation', 'in', 'inherited', 'inline', 'interface',
|
|
'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator',
|
|
'or', 'packed', 'procedure', 'program', 'record', 'reintroduce',
|
|
'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to',
|
|
'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor'
|
|
)
|
|
|
|
DELPHI_KEYWORDS = (
|
|
'as', 'class', 'except', 'exports', 'finalization', 'finally',
|
|
'initialization', 'is', 'library', 'on', 'property', 'raise',
|
|
'threadvar', 'try'
|
|
)
|
|
|
|
FREE_PASCAL_KEYWORDS = (
|
|
'dispose', 'exit', 'false', 'new', 'true'
|
|
)
|
|
|
|
BLOCK_KEYWORDS = {
|
|
'begin', 'class', 'const', 'constructor', 'destructor', 'end',
|
|
'finalization', 'function', 'implementation', 'initialization',
|
|
'label', 'library', 'operator', 'procedure', 'program', 'property',
|
|
'record', 'threadvar', 'type', 'unit', 'uses', 'var'
|
|
}
|
|
|
|
FUNCTION_MODIFIERS = {
|
|
'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe',
|
|
'pascal', 'register', 'safecall', 'softfloat', 'stdcall',
|
|
'varargs', 'name', 'dynamic', 'near', 'virtual', 'external',
|
|
'override', 'assembler'
|
|
}
|
|
|
|
# XXX: those aren't global. but currently we know no way for defining
|
|
# them just for the type context.
|
|
DIRECTIVES = {
|
|
'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far',
|
|
'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected',
|
|
'published', 'public'
|
|
}
|
|
|
|
BUILTIN_TYPES = {
|
|
'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool',
|
|
'cardinal', 'char', 'comp', 'currency', 'double', 'dword',
|
|
'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint',
|
|
'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean',
|
|
'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency',
|
|
'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle',
|
|
'pint64', 'pinteger', 'plongint', 'plongword', 'pointer',
|
|
'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint',
|
|
'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword',
|
|
'pwordarray', 'pwordbool', 'real', 'real48', 'shortint',
|
|
'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate',
|
|
'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant',
|
|
'widechar', 'widestring', 'word', 'wordbool'
|
|
}
|
|
|
|
BUILTIN_UNITS = {
|
|
'System': (
|
|
'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8',
|
|
'append', 'arctan', 'assert', 'assigned', 'assignfile',
|
|
'beginthread', 'blockread', 'blockwrite', 'break', 'chdir',
|
|
'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble',
|
|
'concat', 'continue', 'copy', 'cos', 'dec', 'delete',
|
|
'dispose', 'doubletocomp', 'endthread', 'enummodules',
|
|
'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr',
|
|
'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize',
|
|
'fillchar', 'finalize', 'findclasshinstance', 'findhinstance',
|
|
'findresourcehinstance', 'flush', 'frac', 'freemem',
|
|
'get8087cw', 'getdir', 'getlasterror', 'getmem',
|
|
'getmemorymanager', 'getmodulefilename', 'getvariantmanager',
|
|
'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert',
|
|
'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset',
|
|
'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd',
|
|
'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount',
|
|
'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random',
|
|
'randomize', 'read', 'readln', 'reallocmem',
|
|
'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir',
|
|
'round', 'runerror', 'seek', 'seekeof', 'seekeoln',
|
|
'set8087cw', 'setlength', 'setlinebreakstyle',
|
|
'setmemorymanager', 'setstring', 'settextbuf',
|
|
'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt',
|
|
'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar',
|
|
'succ', 'swap', 'trunc', 'truncate', 'typeinfo',
|
|
'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring',
|
|
'upcase', 'utf8decode', 'utf8encode', 'utf8toansi',
|
|
'utf8tounicode', 'val', 'vararrayredim', 'varclear',
|
|
'widecharlentostring', 'widecharlentostrvar',
|
|
'widechartostring', 'widechartostrvar',
|
|
'widestringtoucs4string', 'write', 'writeln'
|
|
),
|
|
'SysUtils': (
|
|
'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks',
|
|
'allocmem', 'ansicomparefilename', 'ansicomparestr',
|
|
'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr',
|
|
'ansilastchar', 'ansilowercase', 'ansilowercasefilename',
|
|
'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext',
|
|
'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp',
|
|
'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan',
|
|
'ansistrscan', 'ansistrupper', 'ansiuppercase',
|
|
'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep',
|
|
'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype',
|
|
'callterminateprocs', 'changefileext', 'charlength',
|
|
'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr',
|
|
'comparetext', 'createdir', 'createguid', 'currentyear',
|
|
'currtostr', 'currtostrf', 'date', 'datetimetofiledate',
|
|
'datetimetostr', 'datetimetostring', 'datetimetosystemtime',
|
|
'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate',
|
|
'decodedatefully', 'decodetime', 'deletefile', 'directoryexists',
|
|
'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime',
|
|
'exceptionerrormessage', 'excludetrailingbackslash',
|
|
'excludetrailingpathdelimiter', 'expandfilename',
|
|
'expandfilenamecase', 'expanduncfilename', 'extractfiledir',
|
|
'extractfiledrive', 'extractfileext', 'extractfilename',
|
|
'extractfilepath', 'extractrelativepath', 'extractshortpathname',
|
|
'fileage', 'fileclose', 'filecreate', 'filedatetodatetime',
|
|
'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly',
|
|
'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr',
|
|
'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage',
|
|
'findclose', 'findcmdlineswitch', 'findfirst', 'findnext',
|
|
'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr',
|
|
'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr',
|
|
'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr',
|
|
'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir',
|
|
'getenvironmentvariable', 'getfileversion', 'getformatsettings',
|
|
'getlocaleformatsettings', 'getmodulename', 'getpackagedescription',
|
|
'getpackageinfo', 'gettime', 'guidtostring', 'incamonth',
|
|
'includetrailingbackslash', 'includetrailingpathdelimiter',
|
|
'incmonth', 'initializepackage', 'interlockeddecrement',
|
|
'interlockedexchange', 'interlockedexchangeadd',
|
|
'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter',
|
|
'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident',
|
|
'languages', 'lastdelimiter', 'loadpackage', 'loadstr',
|
|
'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now',
|
|
'outofmemoryerror', 'quotedstr', 'raiselastoserror',
|
|
'raiselastwin32error', 'removedir', 'renamefile', 'replacedate',
|
|
'replacetime', 'safeloadlibrary', 'samefilename', 'sametext',
|
|
'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize',
|
|
'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy',
|
|
'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp',
|
|
'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy',
|
|
'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew',
|
|
'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos',
|
|
'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr',
|
|
'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime',
|
|
'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint',
|
|
'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime',
|
|
'strtotimedef', 'strupper', 'supports', 'syserrormessage',
|
|
'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime',
|
|
'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright',
|
|
'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime',
|
|
'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime',
|
|
'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime',
|
|
'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext',
|
|
'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase',
|
|
'widesamestr', 'widesametext', 'wideuppercase', 'win32check',
|
|
'wraptext'
|
|
),
|
|
'Classes': (
|
|
'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize',
|
|
'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect',
|
|
'extractstrings', 'findclass', 'findglobalcomponent', 'getclass',
|
|
'groupdescendantswith', 'hextobin', 'identtoint',
|
|
'initinheritedcomponent', 'inttoident', 'invalidpoint',
|
|
'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext',
|
|
'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource',
|
|
'pointsequal', 'readcomponentres', 'readcomponentresex',
|
|
'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias',
|
|
'registerclasses', 'registercomponents', 'registerintegerconsts',
|
|
'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup',
|
|
'teststreamformat', 'unregisterclass', 'unregisterclasses',
|
|
'unregisterintegerconsts', 'unregistermoduleclasses',
|
|
'writecomponentresfile'
|
|
),
|
|
'Math': (
|
|
'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
|
|
'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil',
|
|
'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc',
|
|
'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle',
|
|
'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance',
|
|
'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask',
|
|
'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg',
|
|
'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate',
|
|
'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero',
|
|
'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue',
|
|
'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue',
|
|
'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods',
|
|
'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance',
|
|
'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd',
|
|
'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant',
|
|
'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode',
|
|
'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev',
|
|
'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation',
|
|
'tan', 'tanh', 'totalvariance', 'variance'
|
|
)
|
|
}
|
|
|
|
ASM_REGISTERS = {
|
|
'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0',
|
|
'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0',
|
|
'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx',
|
|
'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp',
|
|
'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
|
|
'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5',
|
|
'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5',
|
|
'xmm6', 'xmm7'
|
|
}
|
|
|
|
ASM_INSTRUCTIONS = {
|
|
'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound',
|
|
'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw',
|
|
'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae',
|
|
'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg',
|
|
'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb',
|
|
'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
|
|
'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo',
|
|
'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb',
|
|
'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid',
|
|
'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt',
|
|
'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd',
|
|
'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd',
|
|
'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe',
|
|
'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle',
|
|
'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge',
|
|
'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
|
|
'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave',
|
|
'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw',
|
|
'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw',
|
|
'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr',
|
|
'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx',
|
|
'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
|
|
'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw',
|
|
'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw',
|
|
'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe',
|
|
'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror',
|
|
'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb',
|
|
'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe',
|
|
'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle',
|
|
'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng',
|
|
'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz',
|
|
'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl',
|
|
'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold',
|
|
'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str',
|
|
'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit',
|
|
'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait',
|
|
'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat',
|
|
'xlatb', 'xor'
|
|
}
|
|
|
|
PORTUGOL_KEYWORDS = (
|
|
'aleatorio',
|
|
'algoritmo',
|
|
'arquivo',
|
|
'ate',
|
|
'caso',
|
|
'cronometro',
|
|
'debug',
|
|
'e',
|
|
'eco',
|
|
'enquanto',
|
|
'entao',
|
|
'escolha',
|
|
'escreva',
|
|
'escreval',
|
|
'faca',
|
|
'falso',
|
|
'fimalgoritmo',
|
|
'fimenquanto',
|
|
'fimescolha',
|
|
'fimfuncao',
|
|
'fimpara',
|
|
'fimprocedimento',
|
|
'fimrepita',
|
|
'fimse',
|
|
'funcao',
|
|
'inicio',
|
|
'int',
|
|
'interrompa',
|
|
'leia',
|
|
'limpatela',
|
|
'mod',
|
|
'nao',
|
|
'ou',
|
|
'outrocaso',
|
|
'para',
|
|
'passo',
|
|
'pausa',
|
|
'procedimento',
|
|
'repita',
|
|
'retorne',
|
|
'se',
|
|
'senao',
|
|
'timer',
|
|
'var',
|
|
'vetor',
|
|
'verdadeiro',
|
|
'xou',
|
|
'div',
|
|
'mod',
|
|
'abs',
|
|
'arccos',
|
|
'arcsen',
|
|
'arctan',
|
|
'cos',
|
|
'cotan',
|
|
'Exp',
|
|
'grauprad',
|
|
'int',
|
|
'log',
|
|
'logn',
|
|
'pi',
|
|
'quad',
|
|
'radpgrau',
|
|
'raizq',
|
|
'rand',
|
|
'randi',
|
|
'sen',
|
|
'Tan',
|
|
'asc',
|
|
'carac',
|
|
'caracpnum',
|
|
'compr',
|
|
'copia',
|
|
'maiusc',
|
|
'minusc',
|
|
'numpcarac',
|
|
'pos',
|
|
)
|
|
|
|
PORTUGOL_BUILTIN_TYPES = {
|
|
'inteiro', 'real', 'caractere', 'logico'
|
|
}
|
|
|
|
def __init__(self, **options):
|
|
Lexer.__init__(self, **options)
|
|
self.keywords = set()
|
|
self.builtins = set()
|
|
if get_bool_opt(options, 'portugol', False):
|
|
self.keywords.update(self.PORTUGOL_KEYWORDS)
|
|
self.builtins.update(self.PORTUGOL_BUILTIN_TYPES)
|
|
self.is_portugol = True
|
|
else:
|
|
self.is_portugol = False
|
|
|
|
if get_bool_opt(options, 'turbopascal', True):
|
|
self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
|
|
if get_bool_opt(options, 'delphi', True):
|
|
self.keywords.update(self.DELPHI_KEYWORDS)
|
|
if get_bool_opt(options, 'freepascal', True):
|
|
self.keywords.update(self.FREE_PASCAL_KEYWORDS)
|
|
for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
|
|
self.builtins.update(self.BUILTIN_UNITS[unit])
|
|
|
|
def get_tokens_unprocessed(self, text):
|
|
scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
|
|
stack = ['initial']
|
|
in_function_block = False
|
|
in_property_block = False
|
|
was_dot = False
|
|
next_token_is_function = False
|
|
next_token_is_property = False
|
|
collect_labels = False
|
|
block_labels = set()
|
|
brace_balance = [0, 0]
|
|
|
|
while not scanner.eos:
|
|
token = Error
|
|
|
|
if stack[-1] == 'initial':
|
|
if scanner.scan(r'\s+'):
|
|
token = Whitespace
|
|
elif not self.is_portugol and scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
|
|
if scanner.match.startswith('$'):
|
|
token = Comment.Preproc
|
|
else:
|
|
token = Comment.Multiline
|
|
elif scanner.scan(r'//.*?$'):
|
|
token = Comment.Single
|
|
elif self.is_portugol and scanner.scan(r'(<\-)|(>=)|(<=)|%|<|>|-|\+|\*|\=|(<>)|\/|\.|:|,'):
|
|
token = Operator
|
|
elif not self.is_portugol and scanner.scan(r'[-+*\/=<>:;,.@\^]'):
|
|
token = Operator
|
|
# stop label highlighting on next ";"
|
|
if collect_labels and scanner.match == ';':
|
|
collect_labels = False
|
|
elif scanner.scan(r'[\(\)\[\]]+'):
|
|
token = Punctuation
|
|
# abort function naming ``foo = Function(...)``
|
|
next_token_is_function = False
|
|
# if we are in a function block we count the open
|
|
# braces because ootherwise it's impossible to
|
|
# determine the end of the modifier context
|
|
if in_function_block or in_property_block:
|
|
if scanner.match == '(':
|
|
brace_balance[0] += 1
|
|
elif scanner.match == ')':
|
|
brace_balance[0] -= 1
|
|
elif scanner.match == '[':
|
|
brace_balance[1] += 1
|
|
elif scanner.match == ']':
|
|
brace_balance[1] -= 1
|
|
elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
|
|
lowercase_name = scanner.match.lower()
|
|
if lowercase_name == 'result':
|
|
token = Name.Builtin.Pseudo
|
|
elif lowercase_name in self.keywords:
|
|
token = Keyword
|
|
# if we are in a special block and a
|
|
# block ending keyword occurs (and the parenthesis
|
|
# is balanced) we end the current block context
|
|
if self.is_portugol:
|
|
if lowercase_name in ('funcao', 'procedimento'):
|
|
in_function_block = True
|
|
next_token_is_function = True
|
|
else:
|
|
if (in_function_block or in_property_block) and \
|
|
lowercase_name in self.BLOCK_KEYWORDS and \
|
|
brace_balance[0] <= 0 and \
|
|
brace_balance[1] <= 0:
|
|
in_function_block = False
|
|
in_property_block = False
|
|
brace_balance = [0, 0]
|
|
block_labels = set()
|
|
if lowercase_name in ('label', 'goto'):
|
|
collect_labels = True
|
|
elif lowercase_name == 'asm':
|
|
stack.append('asm')
|
|
elif lowercase_name == 'property':
|
|
in_property_block = True
|
|
next_token_is_property = True
|
|
elif lowercase_name in ('procedure', 'operator',
|
|
'function', 'constructor',
|
|
'destructor'):
|
|
in_function_block = True
|
|
next_token_is_function = True
|
|
# we are in a function block and the current name
|
|
# is in the set of registered modifiers. highlight
|
|
# it as pseudo keyword
|
|
elif not self.is_portugol and in_function_block and \
|
|
lowercase_name in self.FUNCTION_MODIFIERS:
|
|
token = Keyword.Pseudo
|
|
# if we are in a property highlight some more
|
|
# modifiers
|
|
elif not self.is_portugol and in_property_block and \
|
|
lowercase_name in ('read', 'write'):
|
|
token = Keyword.Pseudo
|
|
next_token_is_function = True
|
|
# if the last iteration set next_token_is_function
|
|
# to true we now want this name highlighted as
|
|
# function. so do that and reset the state
|
|
elif next_token_is_function:
|
|
# Look if the next token is a dot. If yes it's
|
|
# not a function, but a class name and the
|
|
# part after the dot a function name
|
|
if not self.is_portugol and scanner.test(r'\s*\.\s*'):
|
|
token = Name.Class
|
|
# it's not a dot, our job is done
|
|
else:
|
|
token = Name.Function
|
|
next_token_is_function = False
|
|
|
|
if self.is_portugol:
|
|
block_labels.add(scanner.match.lower())
|
|
|
|
# same for properties
|
|
elif not self.is_portugol and next_token_is_property:
|
|
token = Name.Property
|
|
next_token_is_property = False
|
|
# Highlight this token as label and add it
|
|
# to the list of known labels
|
|
elif not self.is_portugol and collect_labels:
|
|
token = Name.Label
|
|
block_labels.add(scanner.match.lower())
|
|
# name is in list of known labels
|
|
elif lowercase_name in block_labels:
|
|
token = Name.Label
|
|
elif self.is_portugol and lowercase_name in self.PORTUGOL_BUILTIN_TYPES:
|
|
token = Keyword.Type
|
|
elif not self.is_portugol and lowercase_name in self.BUILTIN_TYPES:
|
|
token = Keyword.Type
|
|
elif not self.is_portugol and lowercase_name in self.DIRECTIVES:
|
|
token = Keyword.Pseudo
|
|
# builtins are just builtins if the token
|
|
# before isn't a dot
|
|
elif not self.is_portugol and not was_dot and lowercase_name in self.builtins:
|
|
token = Name.Builtin
|
|
else:
|
|
token = Name
|
|
elif self.is_portugol and scanner.scan(r"\""):
|
|
token = String
|
|
stack.append('string')
|
|
elif not self.is_portugol and scanner.scan(r"'"):
|
|
token = String
|
|
stack.append('string')
|
|
elif not self.is_portugol and scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
|
|
token = String.Char
|
|
elif not self.is_portugol and scanner.scan(r'\$[0-9A-Fa-f]+'):
|
|
token = Number.Hex
|
|
elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
|
|
token = Number.Integer
|
|
elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
|
|
token = Number.Float
|
|
else:
|
|
# if the stack depth is deeper than once, pop
|
|
if len(stack) > 1:
|
|
stack.pop()
|
|
scanner.get_char()
|
|
|
|
elif stack[-1] == 'string':
|
|
if self.is_portugol:
|
|
if scanner.scan(r"''"):
|
|
token = String.Escape
|
|
elif scanner.scan(r"\""):
|
|
token = String
|
|
stack.pop()
|
|
elif scanner.scan(r"[^\"]*"):
|
|
token = String
|
|
else:
|
|
scanner.get_char()
|
|
stack.pop()
|
|
else:
|
|
if scanner.scan(r"''"):
|
|
token = String.Escape
|
|
elif scanner.scan(r"'"):
|
|
token = String
|
|
stack.pop()
|
|
elif scanner.scan(r"[^']*"):
|
|
token = String
|
|
else:
|
|
scanner.get_char()
|
|
stack.pop()
|
|
elif not self.is_portugol and stack[-1] == 'asm':
|
|
if scanner.scan(r'\s+'):
|
|
token = Whitespace
|
|
elif scanner.scan(r'end'):
|
|
token = Keyword
|
|
stack.pop()
|
|
elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
|
|
if scanner.match.startswith('$'):
|
|
token = Comment.Preproc
|
|
else:
|
|
token = Comment.Multiline
|
|
elif scanner.scan(r'//.*?$'):
|
|
token = Comment.Single
|
|
elif scanner.scan(r"'"):
|
|
token = String
|
|
stack.append('string')
|
|
elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'):
|
|
token = Name.Label
|
|
elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
|
|
lowercase_name = scanner.match.lower()
|
|
if lowercase_name in self.ASM_INSTRUCTIONS:
|
|
token = Keyword
|
|
elif lowercase_name in self.ASM_REGISTERS:
|
|
token = Name.Builtin
|
|
else:
|
|
token = Name
|
|
elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'):
|
|
token = Operator
|
|
elif scanner.scan(r'[\(\)\[\]]+'):
|
|
token = Punctuation
|
|
elif scanner.scan(r'\$[0-9A-Fa-f]+'):
|
|
token = Number.Hex
|
|
elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
|
|
token = Number.Integer
|
|
elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
|
|
token = Number.Float
|
|
else:
|
|
scanner.get_char()
|
|
stack.pop()
|
|
|
|
# save the dot!!!11
|
|
if not self.is_portugol and scanner.match.strip():
|
|
was_dot = scanner.match == '.'
|
|
|
|
yield scanner.start_pos, token, scanner.match or ''
|