asm.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051
  1. """
  2. pygments.lexers.asm
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexers for assembly languages.
  5. :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, using, words, \
  10. DelegatingLexer, default
  11. from pygments.lexers.c_cpp import CppLexer, CLexer
  12. from pygments.lexers.d import DLexer
  13. from pygments.token import Text, Name, Number, String, Comment, Punctuation, \
  14. Other, Keyword, Operator, Whitespace
  15. __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',
  16. 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer',
  17. 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer',
  18. 'Ca65Lexer', 'Dasm16Lexer']
  19. class GasLexer(RegexLexer):
  20. """
  21. For Gas (AT&T) assembly code.
  22. """
  23. name = 'GAS'
  24. aliases = ['gas', 'asm']
  25. filenames = ['*.s', '*.S']
  26. mimetypes = ['text/x-gas']
  27. url = 'https://www.gnu.org/software/binutils'
  28. version_added = ''
  29. #: optional Comment or Whitespace
  30. string = r'"(\\"|[^"])*"'
  31. char = r'[\w$.@-]'
  32. identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
  33. number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)'
  34. register = '%' + identifier + r'\b'
  35. tokens = {
  36. 'root': [
  37. include('whitespace'),
  38. (identifier + ':', Name.Label),
  39. (r'\.' + identifier, Name.Attribute, 'directive-args'),
  40. (r'lock|rep(n?z)?|data\d+', Name.Attribute),
  41. (identifier, Name.Function, 'instruction-args'),
  42. (r'[\r\n]+', Text)
  43. ],
  44. 'directive-args': [
  45. (identifier, Name.Constant),
  46. (string, String),
  47. ('@' + identifier, Name.Attribute),
  48. (number, Number.Integer),
  49. (register, Name.Variable),
  50. (r'[\r\n]+', Whitespace, '#pop'),
  51. (r'([;#]|//).*?\n', Comment.Single, '#pop'),
  52. (r'/[*].*?[*]/', Comment.Multiline),
  53. (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
  54. include('punctuation'),
  55. include('whitespace')
  56. ],
  57. 'instruction-args': [
  58. # For objdump-disassembled code, shouldn't occur in
  59. # actual assembler input
  60. ('([a-z0-9]+)( )(<)('+identifier+')(>)',
  61. bygroups(Number.Hex, Text, Punctuation, Name.Constant,
  62. Punctuation)),
  63. ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
  64. bygroups(Number.Hex, Text, Punctuation, Name.Constant,
  65. Punctuation, Number.Integer, Punctuation)),
  66. # Address constants
  67. (identifier, Name.Constant),
  68. (number, Number.Integer),
  69. # Registers
  70. (register, Name.Variable),
  71. # Numeric constants
  72. ('$'+number, Number.Integer),
  73. (r"$'(.|\\')'", String.Char),
  74. (r'[\r\n]+', Whitespace, '#pop'),
  75. (r'([;#]|//).*?\n', Comment.Single, '#pop'),
  76. (r'/[*].*?[*]/', Comment.Multiline),
  77. (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
  78. include('punctuation'),
  79. include('whitespace')
  80. ],
  81. 'whitespace': [
  82. (r'\n', Whitespace),
  83. (r'\s+', Whitespace),
  84. (r'([;#]|//).*?\n', Comment.Single),
  85. (r'/[*][\w\W]*?[*]/', Comment.Multiline)
  86. ],
  87. 'punctuation': [
  88. (r'[-*,.()\[\]!:{}]+', Punctuation)
  89. ]
  90. }
  91. def analyse_text(text):
  92. if re.search(r'^\.(text|data|section)', text, re.M):
  93. return True
  94. elif re.search(r'^\.\w+', text, re.M):
  95. return 0.1
  96. def _objdump_lexer_tokens(asm_lexer):
  97. """
  98. Common objdump lexer tokens to wrap an ASM lexer.
  99. """
  100. hex_re = r'[0-9A-Za-z]'
  101. return {
  102. 'root': [
  103. # File name & format:
  104. ('(.*?)(:)( +file format )(.*?)$',
  105. bygroups(Name.Label, Punctuation, Text, String)),
  106. # Section header
  107. ('(Disassembly of section )(.*?)(:)$',
  108. bygroups(Text, Name.Label, Punctuation)),
  109. # Function labels
  110. # (With offset)
  111. ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
  112. bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
  113. Punctuation, Number.Hex, Punctuation)),
  114. # (Without offset)
  115. ('('+hex_re+'+)( )(<)(.*?)(>:)$',
  116. bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
  117. Punctuation)),
  118. # Code line with disassembled instructions
  119. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
  120. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace,
  121. using(asm_lexer))),
  122. # Code line without raw instructions (objdump --no-show-raw-insn)
  123. ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$',
  124. bygroups(Whitespace, Name.Label, Whitespace,
  125. using(asm_lexer))),
  126. # Code line with ascii
  127. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
  128. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)),
  129. # Continued code line, only raw opcodes without disassembled
  130. # instruction
  131. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
  132. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)),
  133. # Skipped a few bytes
  134. (r'\t\.\.\.$', Text),
  135. # Relocation line
  136. # (With offset)
  137. (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
  138. bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
  139. Name.Constant, Punctuation, Number.Hex)),
  140. # (Without offset)
  141. (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
  142. bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
  143. Name.Constant)),
  144. (r'[^\n]+\n', Other)
  145. ]
  146. }
  147. class ObjdumpLexer(RegexLexer):
  148. """
  149. For the output of ``objdump -dr``.
  150. """
  151. name = 'objdump'
  152. aliases = ['objdump']
  153. filenames = ['*.objdump']
  154. mimetypes = ['text/x-objdump']
  155. url = 'https://www.gnu.org/software/binutils'
  156. version_added = ''
  157. tokens = _objdump_lexer_tokens(GasLexer)
  158. class DObjdumpLexer(DelegatingLexer):
  159. """
  160. For the output of ``objdump -Sr`` on compiled D files.
  161. """
  162. name = 'd-objdump'
  163. aliases = ['d-objdump']
  164. filenames = ['*.d-objdump']
  165. mimetypes = ['text/x-d-objdump']
  166. url = 'https://www.gnu.org/software/binutils'
  167. version_added = ''
  168. def __init__(self, **options):
  169. super().__init__(DLexer, ObjdumpLexer, **options)
  170. class CppObjdumpLexer(DelegatingLexer):
  171. """
  172. For the output of ``objdump -Sr`` on compiled C++ files.
  173. """
  174. name = 'cpp-objdump'
  175. aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
  176. filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
  177. mimetypes = ['text/x-cpp-objdump']
  178. url = 'https://www.gnu.org/software/binutils'
  179. version_added = ''
  180. def __init__(self, **options):
  181. super().__init__(CppLexer, ObjdumpLexer, **options)
  182. class CObjdumpLexer(DelegatingLexer):
  183. """
  184. For the output of ``objdump -Sr`` on compiled C files.
  185. """
  186. name = 'c-objdump'
  187. aliases = ['c-objdump']
  188. filenames = ['*.c-objdump']
  189. mimetypes = ['text/x-c-objdump']
  190. url = 'https://www.gnu.org/software/binutils'
  191. version_added = ''
  192. def __init__(self, **options):
  193. super().__init__(CLexer, ObjdumpLexer, **options)
  194. class HsailLexer(RegexLexer):
  195. """
  196. For HSAIL assembly code.
  197. """
  198. name = 'HSAIL'
  199. aliases = ['hsail', 'hsa']
  200. filenames = ['*.hsail']
  201. mimetypes = ['text/x-hsail']
  202. url = 'https://en.wikipedia.org/wiki/Heterogeneous_System_Architecture#HSA_Intermediate_Layer'
  203. version_added = '2.2'
  204. string = r'"[^"]*?"'
  205. identifier = r'[a-zA-Z_][\w.]*'
  206. # Registers
  207. register_number = r'[0-9]+'
  208. register = r'(\$(c|s|d|q)' + register_number + r')\b'
  209. # Qualifiers
  210. alignQual = r'(align\(\d+\))'
  211. widthQual = r'(width\((\d+|all)\))'
  212. allocQual = r'(alloc\(agent\))'
  213. # Instruction Modifiers
  214. roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))')
  215. datatypeMod = (r'_('
  216. # packedTypes
  217. r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|'
  218. r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|'
  219. r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|'
  220. # baseTypes
  221. r'u8|s8|u16|s16|u32|s32|u64|s64|'
  222. r'b128|b8|b16|b32|b64|b1|'
  223. r'f16|f32|f64|'
  224. # opaqueType
  225. r'roimg|woimg|rwimg|samp|sig32|sig64)')
  226. # Numeric Constant
  227. float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+'
  228. hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+'
  229. ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})'
  230. tokens = {
  231. 'root': [
  232. include('whitespace'),
  233. include('comments'),
  234. (string, String),
  235. (r'@' + identifier + ':?', Name.Label),
  236. (register, Name.Variable.Anonymous),
  237. include('keyword'),
  238. (r'&' + identifier, Name.Variable.Global),
  239. (r'%' + identifier, Name.Variable),
  240. (hexfloat, Number.Hex),
  241. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  242. (ieeefloat, Number.Float),
  243. (float, Number.Float),
  244. (r'\d+', Number.Integer),
  245. (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation)
  246. ],
  247. 'whitespace': [
  248. (r'(\n|\s)+', Whitespace),
  249. ],
  250. 'comments': [
  251. (r'/\*.*?\*/', Comment.Multiline),
  252. (r'//.*?\n', Comment.Single),
  253. ],
  254. 'keyword': [
  255. # Types
  256. (r'kernarg' + datatypeMod, Keyword.Type),
  257. # Regular keywords
  258. (r'\$(full|base|small|large|default|zero|near)', Keyword),
  259. (words((
  260. 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature',
  261. 'decl', 'kernel', 'function', 'enablebreakexceptions',
  262. 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize',
  263. 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize',
  264. 'requiredworkgroupsize', 'requirenopartialworkgroups'),
  265. suffix=r'\b'), Keyword),
  266. # instructions
  267. (roundingMod, Keyword),
  268. (datatypeMod, Keyword),
  269. (r'_(' + alignQual + '|' + widthQual + ')', Keyword),
  270. (r'_kernarg', Keyword),
  271. (r'(nop|imagefence)\b', Keyword),
  272. (words((
  273. 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim',
  274. 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid',
  275. 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid',
  276. 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev',
  277. 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos',
  278. 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt',
  279. 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid',
  280. 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor',
  281. 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign',
  282. 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi',
  283. 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect',
  284. 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul',
  285. 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert',
  286. 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt',
  287. 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st',
  288. '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu',
  289. '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt',
  290. '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu',
  291. '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add',
  292. '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec',
  293. '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global',
  294. 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave',
  295. '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4',
  296. '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth',
  297. '_width', '_height', '_depth', '_array', '_channelorder',
  298. '_channeltype', 'querysampler', '_coord', '_filter', '_addressing',
  299. 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar',
  300. 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid',
  301. 'activelanecount', 'activelanemask', 'activelanepermute', 'call',
  302. 'scall', 'icall', 'alloca', 'packetcompletionsig',
  303. 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex',
  304. 'stqueuereadindex', 'readonly', 'global', 'private', 'group',
  305. 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat',
  306. '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni',
  307. '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat',
  308. '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat',
  309. '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword),
  310. # Integer types
  311. (r'i[1-9]\d*', Keyword)
  312. ]
  313. }
  314. class LlvmLexer(RegexLexer):
  315. """
  316. For LLVM assembly code.
  317. """
  318. name = 'LLVM'
  319. url = 'https://llvm.org/docs/LangRef.html'
  320. aliases = ['llvm']
  321. filenames = ['*.ll']
  322. mimetypes = ['text/x-llvm']
  323. version_added = ''
  324. #: optional Comment or Whitespace
  325. string = r'"[^"]*?"'
  326. identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'
  327. block_label = r'(' + identifier + r'|(\d+))'
  328. tokens = {
  329. 'root': [
  330. include('whitespace'),
  331. # Before keywords, because keywords are valid label names :(...
  332. (block_label + r'\s*:', Name.Label),
  333. include('keyword'),
  334. (r'%' + identifier, Name.Variable),
  335. (r'@' + identifier, Name.Variable.Global),
  336. (r'%\d+', Name.Variable.Anonymous),
  337. (r'@\d+', Name.Variable.Global),
  338. (r'#\d+', Name.Variable.Global),
  339. (r'!' + identifier, Name.Variable),
  340. (r'!\d+', Name.Variable.Anonymous),
  341. (r'c?' + string, String),
  342. (r'0[xX][KLMHR]?[a-fA-F0-9]+', Number),
  343. (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
  344. (r'[=<>{}\[\]()*.,!]|x\b', Punctuation)
  345. ],
  346. 'whitespace': [
  347. (r'(\n|\s+)+', Whitespace),
  348. (r';.*?\n', Comment)
  349. ],
  350. 'keyword': [
  351. # Regular keywords
  352. (words((
  353. 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel',
  354. 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias',
  355. 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca',
  356. 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline',
  357. 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs',
  358. 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps',
  359. 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp',
  360. 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc',
  361. 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw',
  362. 'attributes', 'available_externally', 'avr_intrcc',
  363. 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress',
  364. 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg',
  365. 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr',
  366. 'callee', 'caller', 'calls', 'canAutoHide', 'catch',
  367. 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc',
  368. 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret',
  369. 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant',
  370. 'contract', 'convergent', 'critical', 'cxx_fast_tlscc',
  371. 'datalayout', 'declare', 'default', 'define', 'deplibs',
  372. 'dereferenceable', 'dereferenceable_or_null', 'distinct',
  373. 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent',
  374. 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch',
  375. 'extern_weak', 'external', 'externally_initialized',
  376. 'extractelement', 'extractvalue', 'fadd', 'false', 'fast',
  377. 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul',
  378. 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze',
  379. 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc',
  380. 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash',
  381. 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp',
  382. 'ifunc', 'inaccessiblemem_or_argmemonly',
  383. 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir',
  384. 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits',
  385. 'inlinehint', 'inrange', 'inreg', 'insertelement',
  386. 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect',
  387. 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind',
  388. 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr',
  389. 'live', 'load', 'local_unnamed_addr', 'localdynamic',
  390. 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize',
  391. 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress',
  392. 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf',
  393. 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture',
  394. 'nocf_check', 'noduplicate', 'noduplicates', 'nofree',
  395. 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none',
  396. 'nonlazybind', 'nonnull', 'noprofile', 'norecurse',
  397. 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail',
  398. 'notEligibleToImport', 'noundef', 'nounwind', 'nsw',
  399. 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset',
  400. 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',
  401. 'optnone', 'optsize', 'or', 'ord', 'param', 'params',
  402. 'partition', 'path', 'personality', 'phi', 'poison',
  403. 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc',
  404. 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device',
  405. 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly',
  406. 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume',
  407. 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice',
  408. 'safestack', 'samesize', 'sanitize_address',
  409. 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag',
  410. 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst',
  411. 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl',
  412. 'shufflevector', 'sideeffect', 'signext', 'single',
  413. 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1',
  414. 'sizeM1BitWidth', 'sle', 'slt', 'source_filename',
  415. 'speculatable', 'speculative_load_hardening', 'spir_func',
  416. 'spir_kernel', 'splat', 'srem', 'sret', 'ssp', 'sspreq',
  417. 'sspstrong', 'store', 'strictfp', 'sub', 'summaries',
  418. 'summary', 'swiftcc', 'swifterror', 'swiftself', 'switch',
  419. 'syncscope', 'tail', 'tailcc', 'target', 'thread_local', 'to',
  420. 'token', 'triple', 'true', 'trunc', 'type',
  421. 'typeCheckedLoadConstVCalls', 'typeCheckedLoadVCalls',
  422. 'typeid', 'typeidCompatibleVTable', 'typeIdInfo',
  423. 'typeTestAssumeConstVCalls', 'typeTestAssumeVCalls',
  424. 'typeTestRes', 'typeTests', 'udiv', 'ueq', 'uge', 'ugt',
  425. 'uitofp', 'ule', 'ult', 'umax', 'umin', 'undef', 'une',
  426. 'uniformRetVal', 'uniqueRetVal', 'unknown', 'unnamed_addr',
  427. 'uno', 'unordered', 'unreachable', 'unsat', 'unwind', 'urem',
  428. 'uselistorder', 'uselistorder_bb', 'uwtable', 'va_arg',
  429. 'varFlags', 'variable', 'vcall_visibility', 'vFuncId',
  430. 'virtFunc', 'virtualConstProp', 'void', 'volatile', 'vscale',
  431. 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc', 'win64cc',
  432. 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 'x',
  433. 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc', 'x86_mmx',
  434. 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc',
  435. 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext',
  436. 'zeroinitializer', 'zext', 'immarg', 'willreturn'),
  437. suffix=r'\b'), Keyword),
  438. # Types
  439. (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128',
  440. 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx',
  441. 'x86_amx', 'token', 'ptr')),
  442. Keyword.Type),
  443. # Integer types
  444. (r'i[1-9]\d*', Keyword.Type)
  445. ]
  446. }
  447. class LlvmMirBodyLexer(RegexLexer):
  448. """
  449. For LLVM MIR examples without the YAML wrapper.
  450. """
  451. name = 'LLVM-MIR Body'
  452. url = 'https://llvm.org/docs/MIRLangRef.html'
  453. aliases = ['llvm-mir-body']
  454. filenames = []
  455. mimetypes = []
  456. version_added = '2.6'
  457. tokens = {
  458. 'root': [
  459. # Attributes on basic blocks
  460. (words(('liveins', 'successors'), suffix=':'), Keyword),
  461. # Basic Block Labels
  462. (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label),
  463. (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label),
  464. (r'%bb\.[0-9]+(\.\w+)?', Name.Label),
  465. # Stack references
  466. (r'%stack\.[0-9]+(\.\w+\.addr)?', Name),
  467. # Subreg indices
  468. (r'%subreg\.\w+', Name),
  469. # Virtual registers
  470. (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'),
  471. # Reference to LLVM-IR global
  472. include('global'),
  473. # Reference to Intrinsic
  474. (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global),
  475. # Comparison predicates
  476. (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',
  477. 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
  478. (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge',
  479. 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'),
  480. Name.Builtin),
  481. # Physical registers
  482. (r'\$\w+', String.Single),
  483. # Assignment operator
  484. (r'=', Operator),
  485. # gMIR Opcodes
  486. (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|'
  487. r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|'
  488. r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|'
  489. r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|'
  490. r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'
  491. r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|'
  492. r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|'
  493. r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|'
  494. r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|'
  495. r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|'
  496. r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|'
  497. r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|'
  498. r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|'
  499. r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'
  500. r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'
  501. r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'
  502. r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'
  503. r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'
  504. r'FSUB)'
  505. r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|'
  506. r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'
  507. r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'
  508. r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'
  509. r'G_SHUFFLE_VECTOR)\b',
  510. Name.Builtin),
  511. # Target independent opcodes
  512. (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',
  513. Name.Builtin),
  514. # Flags
  515. (words(('killed', 'implicit')), Keyword),
  516. # ConstantInt values
  517. (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'),
  518. # ConstantFloat values
  519. (r'(half|float|double) +', Keyword.Type, 'constantfloat'),
  520. # Bare immediates
  521. include('integer'),
  522. # MMO's
  523. (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'),
  524. # MIR Comments
  525. (r';.*', Comment),
  526. # If we get here, assume it's a target instruction
  527. (r'[a-zA-Z0-9_]+', Name),
  528. # Everything else that isn't highlighted
  529. (r'[(), \n]+', Text),
  530. ],
  531. # The integer constant from a ConstantInt value
  532. 'constantint': [
  533. include('integer'),
  534. (r'(?=.)', Text, '#pop'),
  535. ],
  536. # The floating point constant from a ConstantFloat value
  537. 'constantfloat': [
  538. include('float'),
  539. (r'(?=.)', Text, '#pop'),
  540. ],
  541. 'vreg': [
  542. # The bank or class if there is one
  543. (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')),
  544. # The LLT if there is one
  545. (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
  546. (r'(?=.)', Text, '#pop'),
  547. ],
  548. 'vreg_bank_or_class': [
  549. # The unassigned bank/class
  550. (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)),
  551. (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)),
  552. # The LLT if there is one
  553. (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
  554. (r'(?=.)', Text, '#pop'),
  555. ],
  556. 'vreg_type': [
  557. # Scalar and pointer types
  558. (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)),
  559. (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)),
  560. (r'\)', Text, '#pop'),
  561. (r'(?=.)', Text, '#pop'),
  562. ],
  563. 'mmo': [
  564. (r'\(', Text),
  565. (r' +', Whitespace),
  566. (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic',
  567. 'acquire', 'release', 'acq_rel', 'seq_cst')),
  568. Keyword),
  569. # IR references
  570. (r'%ir\.[a-zA-Z0-9_.-]+', Name),
  571. (r'%ir-block\.[a-zA-Z0-9_.-]+', Name),
  572. (r'[-+]', Operator),
  573. include('integer'),
  574. include('global'),
  575. (r',', Punctuation),
  576. (r'\), \(', Text),
  577. (r'\)', Text, '#pop'),
  578. ],
  579. 'integer': [(r'-?[0-9]+', Number.Integer),],
  580. 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],
  581. 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],
  582. }
  583. class LlvmMirLexer(RegexLexer):
  584. """
  585. Lexer for the overall LLVM MIR document format.
  586. MIR is a human readable serialization format that's used to represent LLVM's
  587. machine specific intermediate representation. It allows LLVM's developers to
  588. see the state of the compilation process at various points, as well as test
  589. individual pieces of the compiler.
  590. """
  591. name = 'LLVM-MIR'
  592. url = 'https://llvm.org/docs/MIRLangRef.html'
  593. aliases = ['llvm-mir']
  594. filenames = ['*.mir']
  595. version_added = '2.6'
  596. tokens = {
  597. 'root': [
  598. # Comments are hashes at the YAML level
  599. (r'#.*', Comment),
  600. # Documents starting with | are LLVM-IR
  601. (r'--- \|$', Keyword, 'llvm_ir'),
  602. # Other documents are MIR
  603. (r'---', Keyword, 'llvm_mir'),
  604. # Consume everything else in one token for efficiency
  605. (r'[^-#]+|.', Text),
  606. ],
  607. 'llvm_ir': [
  608. # Documents end with '...' or '---'
  609. (r'(\.\.\.|(?=---))', Keyword, '#pop'),
  610. # Delegate to the LlvmLexer
  611. (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
  612. ],
  613. 'llvm_mir': [
  614. # Comments are hashes at the YAML level
  615. (r'#.*', Comment),
  616. # Documents end with '...' or '---'
  617. (r'(\.\.\.|(?=---))', Keyword, '#pop'),
  618. # Handle the simple attributes
  619. (r'name:', Keyword, 'name'),
  620. (words(('alignment', ),
  621. suffix=':'), Keyword, 'number'),
  622. (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
  623. 'selected', 'exposesReturnsTwice'),
  624. suffix=':'), Keyword, 'boolean'),
  625. # Handle the attributes don't highlight inside
  626. (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
  627. 'machineFunctionInfo'),
  628. suffix=':'), Keyword),
  629. # Delegate the body block to the LlvmMirBodyLexer
  630. (r'body: *\|', Keyword, 'llvm_mir_body'),
  631. # Consume everything else
  632. (r'.+', Text),
  633. (r'\n', Whitespace),
  634. ],
  635. 'name': [
  636. (r'[^\n]+', Name),
  637. default('#pop'),
  638. ],
  639. 'boolean': [
  640. (r' *(true|false)', Name.Builtin),
  641. default('#pop'),
  642. ],
  643. 'number': [
  644. (r' *[0-9]+', Number),
  645. default('#pop'),
  646. ],
  647. 'llvm_mir_body': [
  648. # Documents end with '...' or '---'.
  649. # We have to pop llvm_mir_body and llvm_mir
  650. (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
  651. # Delegate the body block to the LlvmMirBodyLexer
  652. (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
  653. # The '...' is optional. If we didn't already find it then it isn't
  654. # there. There might be a '---' instead though.
  655. (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),
  656. ],
  657. }
  658. class NasmLexer(RegexLexer):
  659. """
  660. For Nasm (Intel) assembly code.
  661. """
  662. name = 'NASM'
  663. aliases = ['nasm']
  664. filenames = ['*.asm', '*.ASM', '*.nasm']
  665. mimetypes = ['text/x-nasm']
  666. url = 'https://nasm.us'
  667. version_added = ''
  668. # Tasm uses the same file endings, but TASM is not as common as NASM, so
  669. # we prioritize NASM higher by default
  670. priority = 1.0
  671. identifier = r'[a-z$._?][\w$.?#@~]*'
  672. hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
  673. octn = r'[0-7]+q'
  674. binn = r'[01]+b'
  675. decn = r'[0-9]+'
  676. floatn = decn + r'\.e?' + decn
  677. string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
  678. declkw = r'(?:res|d)[bwdqt]|times'
  679. register = (r'(r[0-9][0-5]?[bwd]?|'
  680. r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
  681. r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|'
  682. r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b')
  683. wordop = r'seg|wrt|strict|rel|abs'
  684. type = r'byte|[dq]?word'
  685. # Directives must be followed by whitespace, otherwise CPU will match
  686. # cpuid for instance.
  687. directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
  688. r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
  689. r'EXPORT|LIBRARY|MODULE)(?=\s)')
  690. flags = re.IGNORECASE | re.MULTILINE
  691. tokens = {
  692. 'root': [
  693. (r'^\s*%', Comment.Preproc, 'preproc'),
  694. include('whitespace'),
  695. (identifier + ':', Name.Label),
  696. (rf'({identifier})(\s+)(equ)',
  697. bygroups(Name.Constant, Whitespace, Keyword.Declaration),
  698. 'instruction-args'),
  699. (directives, Keyword, 'instruction-args'),
  700. (declkw, Keyword.Declaration, 'instruction-args'),
  701. (identifier, Name.Function, 'instruction-args'),
  702. (r'[\r\n]+', Whitespace)
  703. ],
  704. 'instruction-args': [
  705. (string, String),
  706. (hexn, Number.Hex),
  707. (octn, Number.Oct),
  708. (binn, Number.Bin),
  709. (floatn, Number.Float),
  710. (decn, Number.Integer),
  711. include('punctuation'),
  712. (register, Name.Builtin),
  713. (identifier, Name.Variable),
  714. (r'[\r\n]+', Whitespace, '#pop'),
  715. include('whitespace')
  716. ],
  717. 'preproc': [
  718. (r'[^;\n]+', Comment.Preproc),
  719. (r';.*?\n', Comment.Single, '#pop'),
  720. (r'\n', Comment.Preproc, '#pop'),
  721. ],
  722. 'whitespace': [
  723. (r'\n', Whitespace),
  724. (r'[ \t]+', Whitespace),
  725. (r';.*', Comment.Single),
  726. (r'#.*', Comment.Single)
  727. ],
  728. 'punctuation': [
  729. (r'[,{}():\[\]]+', Punctuation),
  730. (r'[&|^<>+*/%~-]+', Operator),
  731. (r'[$]+', Keyword.Constant),
  732. (wordop, Operator.Word),
  733. (type, Keyword.Type)
  734. ],
  735. }
  736. def analyse_text(text):
  737. # Probably TASM
  738. if re.match(r'PROC', text, re.IGNORECASE):
  739. return False
  740. class NasmObjdumpLexer(ObjdumpLexer):
  741. """
  742. For the output of ``objdump -d -M intel``.
  743. """
  744. name = 'objdump-nasm'
  745. aliases = ['objdump-nasm']
  746. filenames = ['*.objdump-intel']
  747. mimetypes = ['text/x-nasm-objdump']
  748. url = 'https://www.gnu.org/software/binutils'
  749. version_added = '2.0'
  750. tokens = _objdump_lexer_tokens(NasmLexer)
  751. class TasmLexer(RegexLexer):
  752. """
  753. For Tasm (Turbo Assembler) assembly code.
  754. """
  755. name = 'TASM'
  756. aliases = ['tasm']
  757. filenames = ['*.asm', '*.ASM', '*.tasm']
  758. mimetypes = ['text/x-tasm']
  759. url = 'https://en.wikipedia.org/wiki/Turbo_Assembler'
  760. version_added = ''
  761. identifier = r'[@a-z$._?][\w$.?#@~]*'
  762. hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
  763. octn = r'[0-7]+q'
  764. binn = r'[01]+b'
  765. decn = r'[0-9]+'
  766. floatn = decn + r'\.e?' + decn
  767. string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
  768. declkw = r'(?:res|d)[bwdqt]|times'
  769. register = (r'(r[0-9][0-5]?[bwd]|'
  770. r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
  771. r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b')
  772. wordop = r'seg|wrt|strict'
  773. type = r'byte|[dq]?word'
  774. directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
  775. r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|'
  776. r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|'
  777. r'P386|MODEL|ASSUME|CODESEG|SIZE')
  778. # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions
  779. # and then 'add' them to datatype somehow.
  780. datatype = (r'db|dd|dw|T[A-Z][a-z]+')
  781. flags = re.IGNORECASE | re.MULTILINE
  782. tokens = {
  783. 'root': [
  784. (r'^\s*%', Comment.Preproc, 'preproc'),
  785. include('whitespace'),
  786. (identifier + ':', Name.Label),
  787. (directives, Keyword, 'instruction-args'),
  788. (rf'({identifier})(\s+)({datatype})',
  789. bygroups(Name.Constant, Whitespace, Keyword.Declaration),
  790. 'instruction-args'),
  791. (declkw, Keyword.Declaration, 'instruction-args'),
  792. (identifier, Name.Function, 'instruction-args'),
  793. (r'[\r\n]+', Whitespace)
  794. ],
  795. 'instruction-args': [
  796. (string, String),
  797. (hexn, Number.Hex),
  798. (octn, Number.Oct),
  799. (binn, Number.Bin),
  800. (floatn, Number.Float),
  801. (decn, Number.Integer),
  802. include('punctuation'),
  803. (register, Name.Builtin),
  804. (identifier, Name.Variable),
  805. # Do not match newline when it's preceded by a backslash
  806. (r'(\\)(\s*)(;.*)([\r\n])',
  807. bygroups(Text, Whitespace, Comment.Single, Whitespace)),
  808. (r'[\r\n]+', Whitespace, '#pop'),
  809. include('whitespace')
  810. ],
  811. 'preproc': [
  812. (r'[^;\n]+', Comment.Preproc),
  813. (r';.*?\n', Comment.Single, '#pop'),
  814. (r'\n', Comment.Preproc, '#pop'),
  815. ],
  816. 'whitespace': [
  817. (r'[\n\r]', Whitespace),
  818. (r'(\\)([\n\r])', bygroups(Text, Whitespace)),
  819. (r'[ \t]+', Whitespace),
  820. (r';.*', Comment.Single)
  821. ],
  822. 'punctuation': [
  823. (r'[,():\[\]]+', Punctuation),
  824. (r'[&|^<>+*=/%~-]+', Operator),
  825. (r'[$]+', Keyword.Constant),
  826. (wordop, Operator.Word),
  827. (type, Keyword.Type)
  828. ],
  829. }
  830. def analyse_text(text):
  831. # See above
  832. if re.match(r'PROC', text, re.I):
  833. return True
  834. class Ca65Lexer(RegexLexer):
  835. """
  836. For ca65 assembler sources.
  837. """
  838. name = 'ca65 assembler'
  839. aliases = ['ca65']
  840. filenames = ['*.s']
  841. url = 'https://cc65.github.io'
  842. version_added = '1.6'
  843. flags = re.IGNORECASE
  844. tokens = {
  845. 'root': [
  846. (r';.*', Comment.Single),
  847. (r'\s+', Whitespace),
  848. (r'[a-z_.@$][\w.@$]*:', Name.Label),
  849. (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'
  850. r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'
  851. r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'
  852. r'|bit)\b', Keyword),
  853. (r'\.\w+', Keyword.Pseudo),
  854. (r'[-+~*/^&|!<>=]', Operator),
  855. (r'"[^"\n]*.', String),
  856. (r"'[^'\n]*.", String.Char),
  857. (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),
  858. (r'\d+', Number.Integer),
  859. (r'%[01]+', Number.Bin),
  860. (r'[#,.:()=\[\]]', Punctuation),
  861. (r'[a-z_.@$][\w.@$]*', Name),
  862. ]
  863. }
  864. def analyse_text(self, text):
  865. # comments in GAS start with "#"
  866. if re.search(r'^\s*;', text, re.MULTILINE):
  867. return 0.9
  868. class Dasm16Lexer(RegexLexer):
  869. """
  870. For DCPU-16 Assembly.
  871. """
  872. name = 'DASM16'
  873. url = 'http://0x10c.com/doc/dcpu-16.txt'
  874. aliases = ['dasm16']
  875. filenames = ['*.dasm16', '*.dasm']
  876. mimetypes = ['text/x-dasm16']
  877. version_added = '2.4'
  878. INSTRUCTIONS = [
  879. 'SET',
  880. 'ADD', 'SUB',
  881. 'MUL', 'MLI',
  882. 'DIV', 'DVI',
  883. 'MOD', 'MDI',
  884. 'AND', 'BOR', 'XOR',
  885. 'SHR', 'ASR', 'SHL',
  886. 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU',
  887. 'ADX', 'SBX',
  888. 'STI', 'STD',
  889. 'JSR',
  890. 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI',
  891. ]
  892. REGISTERS = [
  893. 'A', 'B', 'C',
  894. 'X', 'Y', 'Z',
  895. 'I', 'J',
  896. 'SP', 'PC', 'EX',
  897. 'POP', 'PEEK', 'PUSH'
  898. ]
  899. # Regexes yo
  900. char = r'[a-zA-Z0-9_$@.]'
  901. identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
  902. number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
  903. binary_number = r'0b[01_]+'
  904. instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'
  905. single_char = r"'\\?" + char + "'"
  906. string = r'"(\\"|[^"])*"'
  907. def guess_identifier(lexer, match):
  908. ident = match.group(0)
  909. klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label
  910. yield match.start(), klass, ident
  911. tokens = {
  912. 'root': [
  913. include('whitespace'),
  914. (':' + identifier, Name.Label),
  915. (identifier + ':', Name.Label),
  916. (instruction, Name.Function, 'instruction-args'),
  917. (r'\.' + identifier, Name.Function, 'data-args'),
  918. (r'[\r\n]+', Whitespace)
  919. ],
  920. 'numeric' : [
  921. (binary_number, Number.Integer),
  922. (number, Number.Integer),
  923. (single_char, String),
  924. ],
  925. 'arg' : [
  926. (identifier, guess_identifier),
  927. include('numeric')
  928. ],
  929. 'deref' : [
  930. (r'\+', Punctuation),
  931. (r'\]', Punctuation, '#pop'),
  932. include('arg'),
  933. include('whitespace')
  934. ],
  935. 'instruction-line' : [
  936. (r'[\r\n]+', Whitespace, '#pop'),
  937. (r';.*?$', Comment, '#pop'),
  938. include('whitespace')
  939. ],
  940. 'instruction-args': [
  941. (r',', Punctuation),
  942. (r'\[', Punctuation, 'deref'),
  943. include('arg'),
  944. include('instruction-line')
  945. ],
  946. 'data-args' : [
  947. (r',', Punctuation),
  948. include('numeric'),
  949. (string, String),
  950. include('instruction-line')
  951. ],
  952. 'whitespace': [
  953. (r'\n', Whitespace),
  954. (r'\s+', Whitespace),
  955. (r';.*?\n', Comment)
  956. ],
  957. }