sql.py 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203
  1. """
  2. pygments.lexers.sql
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexers for various SQL dialects and related interactive sessions.
  5. Postgres specific lexers:
  6. `PostgresLexer`
  7. A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
  8. lexer are:
  9. - keywords and data types list parsed from the PG docs (run the
  10. `_postgres_builtins` module to update them);
  11. - Content of $-strings parsed using a specific lexer, e.g. the content
  12. of a PL/Python function is parsed using the Python lexer;
  13. - parse PG specific constructs: E-strings, $-strings, U&-strings,
  14. different operators and punctuation.
  15. `PlPgsqlLexer`
  16. A lexer for the PL/pgSQL language. Adds a few specific construct on
  17. top of the PG SQL lexer (such as <<label>>).
  18. `PostgresConsoleLexer`
  19. A lexer to highlight an interactive psql session:
  20. - identifies the prompt and does its best to detect the end of command
  21. in multiline statement where not all the lines are prefixed by a
  22. prompt, telling them apart from the output;
  23. - highlights errors in the output and notification levels;
  24. - handles psql backslash commands.
  25. `PostgresExplainLexer`
  26. A lexer to highlight Postgres execution plan.
  27. The ``tests/examplefiles`` contains a few test files with data to be
  28. parsed by these lexers.
  29. :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
  30. :license: BSD, see LICENSE for details.
  31. """
  32. import collections
  33. import re
  34. from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
  35. from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
  36. Keyword, Name, String, Number, Generic, Literal
  37. from pygments.lexers import get_lexer_by_name, ClassNotFound
  38. from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
  39. PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS
  40. from pygments.lexers._mysql_builtins import \
  41. MYSQL_CONSTANTS, \
  42. MYSQL_DATATYPES, \
  43. MYSQL_FUNCTIONS, \
  44. MYSQL_KEYWORDS, \
  45. MYSQL_OPTIMIZER_HINTS
  46. from pygments.lexers import _googlesql_builtins
  47. from pygments.lexers import _tsql_builtins
  48. __all__ = ['GoogleSqlLexer', 'PostgresLexer', 'PlPgsqlLexer',
  49. 'PostgresConsoleLexer', 'PostgresExplainLexer', 'SqlLexer',
  50. 'TransactSqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']
  51. line_re = re.compile('.*?\n')
  52. sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
  53. language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
  54. do_re = re.compile(r'\bDO\b', re.IGNORECASE)
  55. # Regular expressions for analyse_text()
  56. name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
  57. name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
  58. tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
  59. tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
  60. tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
  61. # Identifiers for analyse_text()
  62. googlesql_identifiers = (
  63. _googlesql_builtins.functionnames
  64. + _googlesql_builtins.keywords
  65. + _googlesql_builtins.types)
  66. def language_callback(lexer, match):
  67. """Parse the content of a $-string using a lexer
  68. The lexer is chosen looking for a nearby LANGUAGE or assumed as
  69. plpgsql if inside a DO statement and no LANGUAGE has been found.
  70. """
  71. lx = None
  72. m = language_re.match(lexer.text[match.end():match.end()+100])
  73. if m is not None:
  74. lx = lexer._get_lexer(m.group(1))
  75. else:
  76. m = list(language_re.finditer(
  77. lexer.text[max(0, match.start()-100):match.start()]))
  78. if m:
  79. lx = lexer._get_lexer(m[-1].group(1))
  80. else:
  81. m = list(do_re.finditer(
  82. lexer.text[max(0, match.start()-25):match.start()]))
  83. if m:
  84. lx = lexer._get_lexer('plpgsql')
  85. # 1 = $, 2 = delimiter, 3 = $
  86. yield (match.start(1), String, match.group(1))
  87. yield (match.start(2), String.Delimiter, match.group(2))
  88. yield (match.start(3), String, match.group(3))
  89. # 4 = string contents
  90. if lx:
  91. yield from lx.get_tokens_unprocessed(match.group(4))
  92. else:
  93. yield (match.start(4), String, match.group(4))
  94. # 5 = $, 6 = delimiter, 7 = $
  95. yield (match.start(5), String, match.group(5))
  96. yield (match.start(6), String.Delimiter, match.group(6))
  97. yield (match.start(7), String, match.group(7))
  98. class PostgresBase:
  99. """Base class for Postgres-related lexers.
  100. This is implemented as a mixin to avoid the Lexer metaclass kicking in.
  101. this way the different lexer don't have a common Lexer ancestor. If they
  102. had, _tokens could be created on this ancestor and not updated for the
  103. other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
  104. seem to suggest that regexp lexers are not really subclassable.
  105. """
  106. def get_tokens_unprocessed(self, text, *args):
  107. # Have a copy of the entire text to be used by `language_callback`.
  108. self.text = text
  109. yield from super().get_tokens_unprocessed(text, *args)
  110. def _get_lexer(self, lang):
  111. if lang.lower() == 'sql':
  112. return get_lexer_by_name('postgresql', **self.options)
  113. tries = [lang]
  114. if lang.startswith('pl'):
  115. tries.append(lang[2:])
  116. if lang.endswith('u'):
  117. tries.append(lang[:-1])
  118. if lang.startswith('pl') and lang.endswith('u'):
  119. tries.append(lang[2:-1])
  120. for lx in tries:
  121. try:
  122. return get_lexer_by_name(lx, **self.options)
  123. except ClassNotFound:
  124. pass
  125. else:
  126. # TODO: better logging
  127. # print >>sys.stderr, "language not found:", lang
  128. return None
  129. class PostgresLexer(PostgresBase, RegexLexer):
  130. """
  131. Lexer for the PostgreSQL dialect of SQL.
  132. """
  133. name = 'PostgreSQL SQL dialect'
  134. aliases = ['postgresql', 'postgres']
  135. mimetypes = ['text/x-postgresql']
  136. url = 'https://www.postgresql.org'
  137. version_added = '1.5'
  138. flags = re.IGNORECASE
  139. tokens = {
  140. 'root': [
  141. (r'\s+', Whitespace),
  142. (r'--.*\n?', Comment.Single),
  143. (r'/\*', Comment.Multiline, 'multiline-comments'),
  144. (r'(' + '|'.join(s.replace(" ", r"\s+")
  145. for s in DATATYPES + PSEUDO_TYPES) + r')\b',
  146. Name.Builtin),
  147. (words(KEYWORDS, suffix=r'\b'), Keyword),
  148. (r'[+*/<>=~!@#%^&|`?-]+', Operator),
  149. (r'::', Operator), # cast
  150. (r'\$\d+', Name.Variable),
  151. (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
  152. (r'[0-9]+', Number.Integer),
  153. (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
  154. # quoted identifier
  155. (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
  156. (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
  157. (r'[a-z_]\w*', Name),
  158. # psql variable in SQL
  159. (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
  160. (r'[;:()\[\]{},.]', Punctuation),
  161. ],
  162. 'multiline-comments': [
  163. (r'/\*', Comment.Multiline, 'multiline-comments'),
  164. (r'\*/', Comment.Multiline, '#pop'),
  165. (r'[^/*]+', Comment.Multiline),
  166. (r'[/*]', Comment.Multiline)
  167. ],
  168. 'string': [
  169. (r"[^']+", String.Single),
  170. (r"''", String.Single),
  171. (r"'", String.Single, '#pop'),
  172. ],
  173. 'quoted-ident': [
  174. (r'[^"]+', String.Name),
  175. (r'""', String.Name),
  176. (r'"', String.Name, '#pop'),
  177. ],
  178. }
  179. class PlPgsqlLexer(PostgresBase, RegexLexer):
  180. """
  181. Handle the extra syntax in Pl/pgSQL language.
  182. """
  183. name = 'PL/pgSQL'
  184. aliases = ['plpgsql']
  185. mimetypes = ['text/x-plpgsql']
  186. url = 'https://www.postgresql.org/docs/current/plpgsql.html'
  187. version_added = '1.5'
  188. flags = re.IGNORECASE
  189. # FIXME: use inheritance
  190. tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()}
  191. # extend the keywords list
  192. for i, pattern in enumerate(tokens['root']):
  193. if pattern[1] == Keyword:
  194. tokens['root'][i] = (
  195. words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),
  196. Keyword)
  197. del i
  198. break
  199. else:
  200. assert 0, "SQL keywords not found"
  201. # Add specific PL/pgSQL rules (before the SQL ones)
  202. tokens['root'][:0] = [
  203. (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
  204. (r':=', Operator),
  205. (r'\<\<[a-z]\w*\>\>', Name.Label),
  206. (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
  207. ]
  208. class PsqlRegexLexer(PostgresBase, RegexLexer):
  209. """
  210. Extend the PostgresLexer adding support specific for psql commands.
  211. This is not a complete psql lexer yet as it lacks prompt support
  212. and output rendering.
  213. """
  214. name = 'PostgreSQL console - regexp based lexer'
  215. aliases = [] # not public
  216. flags = re.IGNORECASE
  217. tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()}
  218. tokens['root'].append(
  219. (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
  220. tokens['psql-command'] = [
  221. (r'\n', Text, 'root'),
  222. (r'\s+', Whitespace),
  223. (r'\\[^\s]+', Keyword.Pseudo),
  224. (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
  225. (r"'(''|[^'])*'", String.Single),
  226. (r"`([^`])*`", String.Backtick),
  227. (r"[^\s]+", String.Symbol),
  228. ]
  229. re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
  230. re_psql_command = re.compile(r'\s*\\')
  231. re_end_command = re.compile(r';\s*(--.*?)?$')
  232. re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
  233. re_error = re.compile(r'(ERROR|FATAL):')
  234. re_message = re.compile(
  235. r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
  236. r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
  237. class lookahead:
  238. """Wrap an iterator and allow pushing back an item."""
  239. def __init__(self, x):
  240. self.iter = iter(x)
  241. self._nextitem = None
  242. def __iter__(self):
  243. return self
  244. def send(self, i):
  245. self._nextitem = i
  246. return i
  247. def __next__(self):
  248. if self._nextitem is not None:
  249. ni = self._nextitem
  250. self._nextitem = None
  251. return ni
  252. return next(self.iter)
  253. next = __next__
  254. class PostgresConsoleLexer(Lexer):
  255. """
  256. Lexer for psql sessions.
  257. """
  258. name = 'PostgreSQL console (psql)'
  259. aliases = ['psql', 'postgresql-console', 'postgres-console']
  260. mimetypes = ['text/x-postgresql-psql']
  261. url = 'https://www.postgresql.org'
  262. version_added = '1.5'
  263. _example = "psql/psql_session.txt"
  264. def get_tokens_unprocessed(self, data):
  265. sql = PsqlRegexLexer(**self.options)
  266. lines = lookahead(line_re.findall(data))
  267. # prompt-output cycle
  268. while 1:
  269. # consume the lines of the command: start with an optional prompt
  270. # and continue until the end of command is detected
  271. curcode = ''
  272. insertions = []
  273. for line in lines:
  274. # Identify a shell prompt in case of psql commandline example
  275. if line.startswith('$') and not curcode:
  276. lexer = get_lexer_by_name('console', **self.options)
  277. yield from lexer.get_tokens_unprocessed(line)
  278. break
  279. # Identify a psql prompt
  280. mprompt = re_prompt.match(line)
  281. if mprompt is not None:
  282. insertions.append((len(curcode),
  283. [(0, Generic.Prompt, mprompt.group())]))
  284. curcode += line[len(mprompt.group()):]
  285. else:
  286. curcode += line
  287. # Check if this is the end of the command
  288. # TODO: better handle multiline comments at the end with
  289. # a lexer with an external state?
  290. if re_psql_command.match(curcode) \
  291. or re_end_command.search(curcode):
  292. break
  293. # Emit the combined stream of command and prompt(s)
  294. yield from do_insertions(insertions,
  295. sql.get_tokens_unprocessed(curcode))
  296. # Emit the output lines
  297. out_token = Generic.Output
  298. for line in lines:
  299. mprompt = re_prompt.match(line)
  300. if mprompt is not None:
  301. # push the line back to have it processed by the prompt
  302. lines.send(line)
  303. break
  304. mmsg = re_message.match(line)
  305. if mmsg is not None:
  306. if mmsg.group(1).startswith("ERROR") \
  307. or mmsg.group(1).startswith("FATAL"):
  308. out_token = Generic.Error
  309. yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
  310. yield (mmsg.start(2), out_token, mmsg.group(2))
  311. else:
  312. yield (0, out_token, line)
  313. else:
  314. return
  315. class PostgresExplainLexer(RegexLexer):
  316. """
  317. Handle PostgreSQL EXPLAIN output
  318. """
  319. name = 'PostgreSQL EXPLAIN dialect'
  320. aliases = ['postgres-explain']
  321. filenames = ['*.explain']
  322. mimetypes = ['text/x-postgresql-explain']
  323. url = 'https://www.postgresql.org/docs/current/using-explain.html'
  324. version_added = '2.15'
  325. tokens = {
  326. 'root': [
  327. (r'(:|\(|\)|ms|kB|->|\.\.|\,|\/)', Punctuation),
  328. (r'(\s+)', Whitespace),
  329. # This match estimated cost and effectively measured counters with ANALYZE
  330. # Then, we move to instrumentation state
  331. (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
  332. (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
  333. # Misc keywords
  334. (words(('actual', 'Memory Usage', 'Disk Usage', 'Memory', 'Buckets', 'Batches',
  335. 'originally', 'row', 'rows', 'Hits', 'Misses',
  336. 'Evictions', 'Overflows', 'Planned Partitions'), suffix=r'\b'),
  337. Comment.Single),
  338. (r'(hit|read|dirtied|written|write|time|calls)(=)', bygroups(Comment.Single, Operator)),
  339. (r'(shared|temp|local)', Keyword.Pseudo),
  340. # We move to sort state in order to emphasize specific keywords (especially disk access)
  341. (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),
  342. # These keywords can be followed by an object, like a table
  343. (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',
  344. bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),
  345. (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),
  346. # These keywords can be followed by a predicate
  347. (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',
  348. 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',
  349. 'TID Cond', 'Run Condition', 'Order By', 'Function Call',
  350. 'Table Function Call', 'Inner Unique', 'Params Evaluated',
  351. 'Single Copy', 'Sampling', 'One-Time Filter', 'Output',
  352. 'Relations', 'Remote SQL'), suffix=r'\b'),
  353. Comment.Preproc, 'predicate'),
  354. # Special keyword to handle ON CONFLICT
  355. (r'Conflict ', Comment.Preproc, 'conflict'),
  356. # Special keyword for InitPlan or SubPlan
  357. (r'(InitPlan|SubPlan)( )(\d+)( )',
  358. bygroups(Keyword, Whitespace, Number.Integer, Whitespace),
  359. 'init_plan'),
  360. (words(('Sort Method', 'Join Filter', 'Planning time',
  361. 'Planning Time', 'Execution time', 'Execution Time',
  362. 'Workers Planned', 'Workers Launched', 'Buffers',
  363. 'Planning', 'Worker', 'Query Identifier', 'Time',
  364. 'Full-sort Groups', 'Pre-sorted Groups'), suffix=r'\b'), Comment.Preproc),
  365. # Emphasize these keywords
  366. (words(('Rows Removed by Join Filter', 'Rows Removed by Filter',
  367. 'Rows Removed by Index Recheck',
  368. 'Heap Fetches', 'never executed'),
  369. suffix=r'\b'), Name.Exception),
  370. (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),
  371. (words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),
  372. # join keywords
  373. (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),
  374. (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),
  375. (r'Backward', Comment.Preproc),
  376. (r'(Intersect|Except|Hash)', Comment.Preproc),
  377. (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),
  378. # Treat "on" and "using" as a punctuation
  379. (r'(on|using)', Punctuation, 'object_name'),
  380. # strings
  381. (r"'(''|[^'])*'", String.Single),
  382. # numbers
  383. (r'-?\d+\.\d+', Number.Float),
  384. (r'(-?\d+)', Number.Integer),
  385. # boolean
  386. (r'(true|false)', Name.Constant),
  387. # explain header
  388. (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),
  389. # Settings
  390. (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),
  391. # Handle JIT counters
  392. (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),
  393. (r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo),
  394. # Handle Triggers counters
  395. (r'(Trigger)( )(\S*)(:)( )',
  396. bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),
  397. ],
  398. 'expression': [
  399. # matches any kind of parenthesized expression
  400. # the first opening paren is matched by the 'caller'
  401. (r'\(', Punctuation, '#push'),
  402. (r'\)', Punctuation, '#pop'),
  403. (r'(never executed)', Name.Exception),
  404. (r'[^)(]+', Comment),
  405. ],
  406. 'object_name': [
  407. # This is a cost or analyze measure
  408. (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
  409. (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
  410. # if object_name is parenthesized, mark opening paren as
  411. # punctuation, call 'expression', and exit state
  412. (r'\(', Punctuation, 'expression'),
  413. (r'(on)', Punctuation),
  414. # matches possibly schema-qualified table and column names
  415. (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),
  416. (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),
  417. (r'\'\S*\'', Name.Variable),
  418. # if we encounter a comma, another object is listed
  419. (r',\n', Punctuation, 'object_name'),
  420. (r',', Punctuation, 'object_name'),
  421. # special case: "*SELECT*"
  422. (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),
  423. (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),
  424. (r'"ANY_subquery"', Name.Variable),
  425. # Variable $1 ...
  426. (r'\$\d+', Name.Variable),
  427. # cast
  428. (r'::\w+', Name.Variable),
  429. (r' +', Whitespace),
  430. (r'"', Punctuation),
  431. (r'\[\.\.\.\]', Punctuation),
  432. (r'\)', Punctuation, '#pop'),
  433. ],
  434. 'predicate': [
  435. # if predicate is parenthesized, mark paren as punctuation
  436. (r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),
  437. # otherwise color until newline
  438. (r'[^\n]*', Name.Variable, '#pop'),
  439. ],
  440. 'instrumentation': [
  441. (r'=|\.\.', Punctuation),
  442. (r' +', Whitespace),
  443. (r'(rows|width|time|loops)', Name.Class),
  444. (r'\d+\.\d+', Number.Float),
  445. (r'(\d+)', Number.Integer),
  446. (r'\)', Punctuation, '#pop'),
  447. ],
  448. 'conflict': [
  449. (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),
  450. (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),
  451. (r'(Filter: )', Comment.Preproc, 'predicate'),
  452. ],
  453. 'setting': [
  454. (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),
  455. (r'\, ', Punctuation),
  456. ],
  457. 'init_plan': [
  458. (r'\(', Punctuation),
  459. (r'returns \$\d+(,\$\d+)?', Name.Variable),
  460. (r'\)', Punctuation, '#pop'),
  461. ],
  462. 'sort': [
  463. (r':|kB', Punctuation),
  464. (r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc),
  465. (r'(external|merge|Disk|sort)', Name.Exception),
  466. (r'(\d+)', Number.Integer),
  467. (r' +', Whitespace),
  468. ],
  469. }
  470. class SqlLexer(RegexLexer):
  471. """
  472. Lexer for Structured Query Language. Currently, this lexer does
  473. not recognize any special syntax except ANSI SQL.
  474. """
  475. name = 'SQL'
  476. aliases = ['sql']
  477. filenames = ['*.sql']
  478. mimetypes = ['text/x-sql']
  479. url = 'https://en.wikipedia.org/wiki/SQL'
  480. version_added = ''
  481. flags = re.IGNORECASE
  482. tokens = {
  483. 'root': [
  484. (r'\s+', Whitespace),
  485. (r'--.*\n?', Comment.Single),
  486. (r'/\*', Comment.Multiline, 'multiline-comments'),
  487. (words((
  488. 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',
  489. 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',
  490. 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',
  491. 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',
  492. 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',
  493. 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',
  494. 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',
  495. 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',
  496. 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',
  497. 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',
  498. 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',
  499. 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',
  500. 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',
  501. 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',
  502. 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',
  503. 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',
  504. 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',
  505. 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',
  506. 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',
  507. 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
  508. 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',
  509. 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',
  510. 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',
  511. 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',
  512. 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',
  513. 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',
  514. 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',
  515. 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',
  516. 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',
  517. 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',
  518. 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',
  519. 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',
  520. 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',
  521. 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',
  522. 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',
  523. 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',
  524. 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',
  525. 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',
  526. 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',
  527. 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',
  528. 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',
  529. 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',
  530. 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',
  531. 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',
  532. 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',
  533. 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',
  534. 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',
  535. 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',
  536. 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',
  537. 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',
  538. 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',
  539. 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',
  540. 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',
  541. 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',
  542. 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',
  543. 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',
  544. 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',
  545. 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',
  546. 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',
  547. 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',
  548. 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',
  549. 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',
  550. 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',
  551. 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',
  552. 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',
  553. 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',
  554. 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',
  555. 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',
  556. 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',
  557. 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',
  558. 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',
  559. 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',
  560. 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',
  561. 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',
  562. 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',
  563. 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',
  564. 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',
  565. 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',
  566. 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',
  567. 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',
  568. 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',
  569. 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',
  570. 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',
  571. 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',
  572. 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',
  573. 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',
  574. 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),
  575. Keyword),
  576. (words((
  577. 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',
  578. 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',
  579. 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',
  580. 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),
  581. Name.Builtin),
  582. (r'[+*/<>=~!@#%^&|`?-]', Operator),
  583. (r'[0-9]+', Number.Integer),
  584. # TODO: Backslash escapes?
  585. (r"'(''|[^'])*'", String.Single),
  586. (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
  587. (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
  588. (r'[;:()\[\],.]', Punctuation)
  589. ],
  590. 'multiline-comments': [
  591. (r'/\*', Comment.Multiline, 'multiline-comments'),
  592. (r'\*/', Comment.Multiline, '#pop'),
  593. (r'[^/*]+', Comment.Multiline),
  594. (r'[/*]', Comment.Multiline)
  595. ]
  596. }
  597. def analyse_text(self, text):
  598. return
  599. class TransactSqlLexer(RegexLexer):
  600. """
  601. Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
  602. SQL.
  603. The list of keywords includes ODBC and keywords reserved for future use..
  604. """
  605. name = 'Transact-SQL'
  606. aliases = ['tsql', 't-sql']
  607. filenames = ['*.sql']
  608. mimetypes = ['text/x-tsql']
  609. url = 'https://www.tsql.info'
  610. version_added = ''
  611. flags = re.IGNORECASE
  612. tokens = {
  613. 'root': [
  614. (r'\s+', Whitespace),
  615. (r'--.*[$|\n]?', Comment.Single),
  616. (r'/\*', Comment.Multiline, 'multiline-comments'),
  617. (words(_tsql_builtins.OPERATORS), Operator),
  618. (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
  619. (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
  620. (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
  621. (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
  622. (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
  623. (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
  624. (r'0x[0-9a-f]+', Number.Hex),
  625. # Float variant 1, for example: 1., 1.e2, 1.2e3
  626. (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
  627. # Float variant 2, for example: .1, .1e2
  628. (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
  629. # Float variant 3, for example: 123e45
  630. (r'[0-9]+e[+-]?[0-9]+', Number.Float),
  631. (r'[0-9]+', Number.Integer),
  632. (r"'(''|[^'])*'", String.Single),
  633. (r'"(""|[^"])*"', String.Symbol),
  634. (r'[;(),.]', Punctuation),
  635. # Below we use \w even for the first "real" character because
  636. # tokens starting with a digit have already been recognized
  637. # as Number above.
  638. (r'@@\w+', Name.Builtin),
  639. (r'@\w+', Name.Variable),
  640. (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
  641. (r'#?#?\w+', Name), # names for temp tables and anything else
  642. (r'\?', Name.Variable.Magic), # parameter for prepared statements
  643. ],
  644. 'multiline-comments': [
  645. (r'/\*', Comment.Multiline, 'multiline-comments'),
  646. (r'\*/', Comment.Multiline, '#pop'),
  647. (r'[^/*]+', Comment.Multiline),
  648. (r'[/*]', Comment.Multiline)
  649. ]
  650. }
  651. def analyse_text(text):
  652. rating = 0
  653. if tsql_declare_re.search(text):
  654. # Found T-SQL variable declaration.
  655. rating = 1.0
  656. else:
  657. name_between_backtick_count = len(
  658. name_between_backtick_re.findall(text))
  659. name_between_bracket_count = len(
  660. name_between_bracket_re.findall(text))
  661. # We need to check if there are any names using
  662. # backticks or brackets, as otherwise both are 0
  663. # and 0 >= 2 * 0, so we would always assume it's true
  664. dialect_name_count = name_between_backtick_count + name_between_bracket_count
  665. if dialect_name_count >= 1 and \
  666. name_between_bracket_count >= 2 * name_between_backtick_count:
  667. # Found at least twice as many [name] as `name`.
  668. rating += 0.5
  669. elif name_between_bracket_count > name_between_backtick_count:
  670. rating += 0.2
  671. elif name_between_bracket_count > 0:
  672. rating += 0.1
  673. if tsql_variable_re.search(text) is not None:
  674. rating += 0.1
  675. if tsql_go_re.search(text) is not None:
  676. rating += 0.1
  677. return rating
  678. class MySqlLexer(RegexLexer):
  679. """The Oracle MySQL lexer.
  680. This lexer does not attempt to maintain strict compatibility with
  681. MariaDB syntax or keywords. Although MySQL and MariaDB's common code
  682. history suggests there may be significant overlap between the two,
  683. compatibility between the two is not a target for this lexer.
  684. """
  685. name = 'MySQL'
  686. aliases = ['mysql']
  687. mimetypes = ['text/x-mysql']
  688. url = 'https://www.mysql.com'
  689. version_added = ''
  690. flags = re.IGNORECASE
  691. tokens = {
  692. 'root': [
  693. (r'\s+', Whitespace),
  694. # Comments
  695. (r'(?:#|--\s+).*', Comment.Single),
  696. (r'/\*\+', Comment.Special, 'optimizer-hints'),
  697. (r'/\*', Comment.Multiline, 'multiline-comment'),
  698. # Hexadecimal literals
  699. (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
  700. (r'0x[0-9a-f]+', Number.Hex),
  701. # Binary literals
  702. (r"b'[01]+'", Number.Bin),
  703. (r'0b[01]+', Number.Bin),
  704. # Numeric literals
  705. (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
  706. (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
  707. (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
  708. (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
  709. # Date literals
  710. (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
  711. Literal.Date),
  712. # Time literals
  713. (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
  714. Literal.Date),
  715. # Timestamp literals
  716. (
  717. r"\{\s*ts\s*(?P<quote>['\"])\s*"
  718. r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
  719. r"\s+" # Whitespace between date and time
  720. r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
  721. r"\s*(?P=quote)\s*\}",
  722. Literal.Date
  723. ),
  724. # String literals
  725. (r"'", String.Single, 'single-quoted-string'),
  726. (r'"', String.Double, 'double-quoted-string'),
  727. # Variables
  728. (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
  729. (r'@[a-z0-9_$.]+', Name.Variable),
  730. (r"@'", Name.Variable, 'single-quoted-variable'),
  731. (r'@"', Name.Variable, 'double-quoted-variable'),
  732. (r"@`", Name.Variable, 'backtick-quoted-variable'),
  733. (r'\?', Name.Variable), # For demonstrating prepared statements
  734. # Operators
  735. (r'[!%&*+/:<=>^|~-]+', Operator),
  736. # Exceptions; these words tokenize differently in different contexts.
  737. (r'\b(set)(?!\s*\()', Keyword),
  738. (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),
  739. # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
  740. (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
  741. (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
  742. (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
  743. (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
  744. bygroups(Name.Function, Whitespace, Punctuation)),
  745. # Schema object names
  746. #
  747. # Note: Although the first regex supports unquoted all-numeric
  748. # identifiers, this will not be a problem in practice because
  749. # numeric literals have already been handled above.
  750. #
  751. ('[0-9a-z$_\u0080-\uffff]+', Name),
  752. (r'`', Name.Quoted, 'schema-object-name'),
  753. # Punctuation
  754. (r'[(),.;]', Punctuation),
  755. ],
  756. # Multiline comment substates
  757. # ---------------------------
  758. 'optimizer-hints': [
  759. (r'[^*a-z]+', Comment.Special),
  760. (r'\*/', Comment.Special, '#pop'),
  761. (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
  762. ('[a-z]+', Comment.Special),
  763. (r'\*', Comment.Special),
  764. ],
  765. 'multiline-comment': [
  766. (r'[^*]+', Comment.Multiline),
  767. (r'\*/', Comment.Multiline, '#pop'),
  768. (r'\*', Comment.Multiline),
  769. ],
  770. # String substates
  771. # ----------------
  772. 'single-quoted-string': [
  773. (r"[^'\\]+", String.Single),
  774. (r"''", String.Escape),
  775. (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
  776. (r"'", String.Single, '#pop'),
  777. ],
  778. 'double-quoted-string': [
  779. (r'[^"\\]+', String.Double),
  780. (r'""', String.Escape),
  781. (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
  782. (r'"', String.Double, '#pop'),
  783. ],
  784. # Variable substates
  785. # ------------------
  786. 'single-quoted-variable': [
  787. (r"[^']+", Name.Variable),
  788. (r"''", Name.Variable),
  789. (r"'", Name.Variable, '#pop'),
  790. ],
  791. 'double-quoted-variable': [
  792. (r'[^"]+', Name.Variable),
  793. (r'""', Name.Variable),
  794. (r'"', Name.Variable, '#pop'),
  795. ],
  796. 'backtick-quoted-variable': [
  797. (r'[^`]+', Name.Variable),
  798. (r'``', Name.Variable),
  799. (r'`', Name.Variable, '#pop'),
  800. ],
  801. # Schema object name substates
  802. # ----------------------------
  803. #
  804. # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
  805. # formatters will style them as "Name" by default but add
  806. # additional styles based on the token name. This gives users
  807. # flexibility to add custom styles as desired.
  808. #
  809. 'schema-object-name': [
  810. (r'[^`]+', Name.Quoted),
  811. (r'``', Name.Quoted.Escape),
  812. (r'`', Name.Quoted, '#pop'),
  813. ],
  814. }
  815. def analyse_text(text):
  816. rating = 0
  817. name_between_backtick_count = len(
  818. name_between_backtick_re.findall(text))
  819. name_between_bracket_count = len(
  820. name_between_bracket_re.findall(text))
  821. # Same logic as above in the TSQL analysis
  822. dialect_name_count = name_between_backtick_count + name_between_bracket_count
  823. if dialect_name_count >= 1 and \
  824. name_between_backtick_count >= 2 * name_between_bracket_count:
  825. # Found at least twice as many `name` as [name].
  826. rating += 0.5
  827. elif name_between_backtick_count > name_between_bracket_count:
  828. rating += 0.2
  829. elif name_between_backtick_count > 0:
  830. rating += 0.1
  831. return rating
  832. class GoogleSqlLexer(RegexLexer):
  833. """
  834. GoogleSQL is Google's standard SQL dialect, formerly known as ZetaSQL.
  835. The list of keywords includes reserved words for future use.
  836. """
  837. name = 'GoogleSQL'
  838. aliases = ['googlesql', 'zetasql']
  839. filenames = ['*.googlesql', '*.googlesql.sql']
  840. mimetypes = ['text/x-google-sql', 'text/x-google-sql-aux']
  841. url = 'https://cloud.google.com/bigquery/googlesql'
  842. version_added = '2.19'
  843. flags = re.IGNORECASE
  844. tokens = {
  845. 'root': [
  846. (r'\s+', Whitespace),
  847. # Comments
  848. (r'(?:#|--\s+).*', Comment.Single),
  849. (r'/\*', Comment.Multiline, 'multiline-comment'),
  850. # Hexadecimal literals
  851. (r"x'([0-9a-f]{2})+'", Number.Hex),
  852. (r'0x[0-9a-f]+', Number.Hex),
  853. # Binary literals
  854. (r"b'[01]+'", Number.Bin),
  855. (r'0b[01]+', Number.Bin),
  856. # Numeric literals
  857. (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
  858. (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
  859. (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
  860. (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
  861. # Date literals
  862. (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
  863. Literal.Date),
  864. # Time literals
  865. (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
  866. Literal.Date),
  867. # Timestamp literals
  868. (
  869. r"\{\s*ts\s*(?P<quote>['\"])\s*"
  870. r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
  871. r"\s+" # Whitespace between date and time
  872. r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
  873. r"\s*(?P=quote)\s*\}",
  874. Literal.Date
  875. ),
  876. # String literals
  877. (r"'", String.Single, 'single-quoted-string'),
  878. (r'"', String.Double, 'double-quoted-string'),
  879. # Variables
  880. (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
  881. (r'@[a-z0-9_$.]+', Name.Variable),
  882. (r"@'", Name.Variable, 'single-quoted-variable'),
  883. (r'@"', Name.Variable, 'double-quoted-variable'),
  884. (r"@`", Name.Variable, 'backtick-quoted-variable'),
  885. (r'\?', Name.Variable), # For demonstrating prepared statements
  886. # Exceptions; these words tokenize differently in different contexts.
  887. (r'\b(set)(?!\s*\()', Keyword),
  888. (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),
  889. # Constants, types, keywords, functions, operators
  890. (words(_googlesql_builtins.constants, prefix=r'\b', suffix=r'\b'), Name.Constant),
  891. (words(_googlesql_builtins.types, prefix=r'\b', suffix=r'\b'), Keyword.Type),
  892. (words(_googlesql_builtins.keywords, prefix=r'\b', suffix=r'\b'), Keyword),
  893. (words(_googlesql_builtins.functionnames, prefix=r'\b', suffix=r'\b(\s*)(\()'),
  894. bygroups(Name.Function, Whitespace, Punctuation)),
  895. (words(_googlesql_builtins.operators, prefix=r'\b', suffix=r'\b'), Operator),
  896. # Schema object names
  897. #
  898. # Note: Although the first regex supports unquoted all-numeric
  899. # identifiers, this will not be a problem in practice because
  900. # numeric literals have already been handled above.
  901. #
  902. ('[0-9a-z$_\u0080-\uffff]+', Name),
  903. (r'`', Name.Quoted, 'schema-object-name'),
  904. # Punctuation
  905. (r'[(),.;]', Punctuation),
  906. ],
  907. # Multiline comment substates
  908. # ---------------------------
  909. 'multiline-comment': [
  910. (r'[^*]+', Comment.Multiline),
  911. (r'\*/', Comment.Multiline, '#pop'),
  912. (r'\*', Comment.Multiline),
  913. ],
  914. # String substates
  915. # ----------------
  916. 'single-quoted-string': [
  917. (r"[^'\\]+", String.Single),
  918. (r"''", String.Escape),
  919. (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
  920. (r"'", String.Single, '#pop'),
  921. ],
  922. 'double-quoted-string': [
  923. (r'[^"\\]+', String.Double),
  924. (r'""', String.Escape),
  925. (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
  926. (r'"', String.Double, '#pop'),
  927. ],
  928. # Variable substates
  929. # ------------------
  930. 'single-quoted-variable': [
  931. (r"[^']+", Name.Variable),
  932. (r"''", Name.Variable),
  933. (r"'", Name.Variable, '#pop'),
  934. ],
  935. 'double-quoted-variable': [
  936. (r'[^"]+', Name.Variable),
  937. (r'""', Name.Variable),
  938. (r'"', Name.Variable, '#pop'),
  939. ],
  940. 'backtick-quoted-variable': [
  941. (r'[^`]+', Name.Variable),
  942. (r'``', Name.Variable),
  943. (r'`', Name.Variable, '#pop'),
  944. ],
  945. # Schema object name substates
  946. # ----------------------------
  947. #
  948. # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
  949. # formatters will style them as "Name" by default but add
  950. # additional styles based on the token name. This gives users
  951. # flexibility to add custom styles as desired.
  952. #
  953. 'schema-object-name': [
  954. (r'[^`]+', Name.Quoted),
  955. (r'``', Name.Quoted.Escape),
  956. (r'`', Name.Quoted, '#pop'),
  957. ],
  958. }
  959. def analyse_text(text):
  960. tokens = collections.Counter(text.split())
  961. return 0.001 * sum(count for t, count in tokens.items()
  962. if t in googlesql_identifiers)
  963. class SqliteConsoleLexer(Lexer):
  964. """
  965. Lexer for example sessions using sqlite3.
  966. """
  967. name = 'sqlite3con'
  968. aliases = ['sqlite3']
  969. filenames = ['*.sqlite3-console']
  970. mimetypes = ['text/x-sqlite3-console']
  971. url = 'https://www.sqlite.org'
  972. version_added = '0.11'
  973. _example = "sqlite3/sqlite3.sqlite3-console"
  974. def get_tokens_unprocessed(self, data):
  975. sql = SqlLexer(**self.options)
  976. curcode = ''
  977. insertions = []
  978. for match in line_re.finditer(data):
  979. line = match.group()
  980. prompt_match = sqlite_prompt_re.match(line)
  981. if prompt_match is not None:
  982. insertions.append((len(curcode),
  983. [(0, Generic.Prompt, line[:7])]))
  984. insertions.append((len(curcode),
  985. [(7, Whitespace, ' ')]))
  986. curcode += line[8:]
  987. else:
  988. if curcode:
  989. yield from do_insertions(insertions,
  990. sql.get_tokens_unprocessed(curcode))
  991. curcode = ''
  992. insertions = []
  993. if line.startswith('SQL error: '):
  994. yield (match.start(), Generic.Traceback, line)
  995. else:
  996. yield (match.start(), Generic.Output, line)
  997. if curcode:
  998. yield from do_insertions(insertions,
  999. sql.get_tokens_unprocessed(curcode))
  1000. class RqlLexer(RegexLexer):
  1001. """
  1002. Lexer for Relation Query Language.
  1003. """
  1004. name = 'RQL'
  1005. url = 'http://www.logilab.org/project/rql'
  1006. aliases = ['rql']
  1007. filenames = ['*.rql']
  1008. mimetypes = ['text/x-rql']
  1009. version_added = '2.0'
  1010. flags = re.IGNORECASE
  1011. tokens = {
  1012. 'root': [
  1013. (r'\s+', Whitespace),
  1014. (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
  1015. r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
  1016. r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
  1017. (r'[+*/<>=%-]', Operator),
  1018. (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
  1019. (r'[0-9]+', Number.Integer),
  1020. (r'[A-Z_]\w*\??', Name),
  1021. (r"'(''|[^'])*'", String.Single),
  1022. (r'"(""|[^"])*"', String.Single),
  1023. (r'[;:()\[\],.]', Punctuation)
  1024. ],
  1025. }