reflection.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. # dialects/mysql/reflection.py
  2. # Copyright (C) 2005-2024 the SQLAlchemy authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of SQLAlchemy and is released under
  6. # the MIT License: https://www.opensource.org/licenses/mit-license.php
  7. import re
  8. from .enumerated import ENUM
  9. from .enumerated import SET
  10. from .types import DATETIME
  11. from .types import TIME
  12. from .types import TIMESTAMP
  13. from ... import log
  14. from ... import types as sqltypes
  15. from ... import util
  16. class ReflectedState(object):
  17. """Stores raw information about a SHOW CREATE TABLE statement."""
  18. def __init__(self):
  19. self.columns = []
  20. self.table_options = {}
  21. self.table_name = None
  22. self.keys = []
  23. self.fk_constraints = []
  24. self.ck_constraints = []
  25. @log.class_logger
  26. class MySQLTableDefinitionParser(object):
  27. """Parses the results of a SHOW CREATE TABLE statement."""
  28. def __init__(self, dialect, preparer):
  29. self.dialect = dialect
  30. self.preparer = preparer
  31. self._prep_regexes()
  32. def parse(self, show_create, charset):
  33. state = ReflectedState()
  34. state.charset = charset
  35. for line in re.split(r"\r?\n", show_create):
  36. if line.startswith(" " + self.preparer.initial_quote):
  37. self._parse_column(line, state)
  38. # a regular table options line
  39. elif line.startswith(") "):
  40. self._parse_table_options(line, state)
  41. # an ANSI-mode table options line
  42. elif line == ")":
  43. pass
  44. elif line.startswith("CREATE "):
  45. self._parse_table_name(line, state)
  46. # Not present in real reflection, but may be if
  47. # loading from a file.
  48. elif not line:
  49. pass
  50. else:
  51. type_, spec = self._parse_constraints(line)
  52. if type_ is None:
  53. util.warn("Unknown schema content: %r" % line)
  54. elif type_ == "key":
  55. state.keys.append(spec)
  56. elif type_ == "fk_constraint":
  57. state.fk_constraints.append(spec)
  58. elif type_ == "ck_constraint":
  59. state.ck_constraints.append(spec)
  60. else:
  61. pass
  62. return state
  63. def _check_view(self, sql):
  64. return bool(self._re_is_view.match(sql))
  65. def _parse_constraints(self, line):
  66. """Parse a KEY or CONSTRAINT line.
  67. :param line: A line of SHOW CREATE TABLE output
  68. """
  69. # KEY
  70. m = self._re_key.match(line)
  71. if m:
  72. spec = m.groupdict()
  73. # convert columns into name, length pairs
  74. # NOTE: we may want to consider SHOW INDEX as the
  75. # format of indexes in MySQL becomes more complex
  76. spec["columns"] = self._parse_keyexprs(spec["columns"])
  77. if spec["version_sql"]:
  78. m2 = self._re_key_version_sql.match(spec["version_sql"])
  79. if m2 and m2.groupdict()["parser"]:
  80. spec["parser"] = m2.groupdict()["parser"]
  81. if spec["parser"]:
  82. spec["parser"] = self.preparer.unformat_identifiers(
  83. spec["parser"]
  84. )[0]
  85. return "key", spec
  86. # FOREIGN KEY CONSTRAINT
  87. m = self._re_fk_constraint.match(line)
  88. if m:
  89. spec = m.groupdict()
  90. spec["table"] = self.preparer.unformat_identifiers(spec["table"])
  91. spec["local"] = [c[0] for c in self._parse_keyexprs(spec["local"])]
  92. spec["foreign"] = [
  93. c[0] for c in self._parse_keyexprs(spec["foreign"])
  94. ]
  95. return "fk_constraint", spec
  96. # CHECK constraint
  97. m = self._re_ck_constraint.match(line)
  98. if m:
  99. spec = m.groupdict()
  100. return "ck_constraint", spec
  101. # PARTITION and SUBPARTITION
  102. m = self._re_partition.match(line)
  103. if m:
  104. # Punt!
  105. return "partition", line
  106. # No match.
  107. return (None, line)
  108. def _parse_table_name(self, line, state):
  109. """Extract the table name.
  110. :param line: The first line of SHOW CREATE TABLE
  111. """
  112. regex, cleanup = self._pr_name
  113. m = regex.match(line)
  114. if m:
  115. state.table_name = cleanup(m.group("name"))
  116. def _parse_table_options(self, line, state):
  117. """Build a dictionary of all reflected table-level options.
  118. :param line: The final line of SHOW CREATE TABLE output.
  119. """
  120. options = {}
  121. if not line or line == ")":
  122. pass
  123. else:
  124. rest_of_line = line[:]
  125. for regex, cleanup in self._pr_options:
  126. m = regex.search(rest_of_line)
  127. if not m:
  128. continue
  129. directive, value = m.group("directive"), m.group("val")
  130. if cleanup:
  131. value = cleanup(value)
  132. options[directive.lower()] = value
  133. rest_of_line = regex.sub("", rest_of_line)
  134. for nope in ("auto_increment", "data directory", "index directory"):
  135. options.pop(nope, None)
  136. for opt, val in options.items():
  137. state.table_options["%s_%s" % (self.dialect.name, opt)] = val
  138. def _parse_column(self, line, state):
  139. """Extract column details.
  140. Falls back to a 'minimal support' variant if full parse fails.
  141. :param line: Any column-bearing line from SHOW CREATE TABLE
  142. """
  143. spec = None
  144. m = self._re_column.match(line)
  145. if m:
  146. spec = m.groupdict()
  147. spec["full"] = True
  148. else:
  149. m = self._re_column_loose.match(line)
  150. if m:
  151. spec = m.groupdict()
  152. spec["full"] = False
  153. if not spec:
  154. util.warn("Unknown column definition %r" % line)
  155. return
  156. if not spec["full"]:
  157. util.warn("Incomplete reflection of column definition %r" % line)
  158. name, type_, args = spec["name"], spec["coltype"], spec["arg"]
  159. try:
  160. col_type = self.dialect.ischema_names[type_]
  161. except KeyError:
  162. util.warn(
  163. "Did not recognize type '%s' of column '%s'" % (type_, name)
  164. )
  165. col_type = sqltypes.NullType
  166. # Column type positional arguments eg. varchar(32)
  167. if args is None or args == "":
  168. type_args = []
  169. elif args[0] == "'" and args[-1] == "'":
  170. type_args = self._re_csv_str.findall(args)
  171. else:
  172. type_args = [int(v) for v in self._re_csv_int.findall(args)]
  173. # Column type keyword options
  174. type_kw = {}
  175. if issubclass(col_type, (DATETIME, TIME, TIMESTAMP)):
  176. if type_args:
  177. type_kw["fsp"] = type_args.pop(0)
  178. for kw in ("unsigned", "zerofill"):
  179. if spec.get(kw, False):
  180. type_kw[kw] = True
  181. for kw in ("charset", "collate"):
  182. if spec.get(kw, False):
  183. type_kw[kw] = spec[kw]
  184. if issubclass(col_type, (ENUM, SET)):
  185. type_args = _strip_values(type_args)
  186. if issubclass(col_type, SET) and "" in type_args:
  187. type_kw["retrieve_as_bitwise"] = True
  188. type_instance = col_type(*type_args, **type_kw)
  189. col_kw = {}
  190. # NOT NULL
  191. col_kw["nullable"] = True
  192. # this can be "NULL" in the case of TIMESTAMP
  193. if spec.get("notnull", False) == "NOT NULL":
  194. col_kw["nullable"] = False
  195. # AUTO_INCREMENT
  196. if spec.get("autoincr", False):
  197. col_kw["autoincrement"] = True
  198. elif issubclass(col_type, sqltypes.Integer):
  199. col_kw["autoincrement"] = False
  200. # DEFAULT
  201. default = spec.get("default", None)
  202. if default == "NULL":
  203. # eliminates the need to deal with this later.
  204. default = None
  205. comment = spec.get("comment", None)
  206. if comment is not None:
  207. comment = comment.replace("\\\\", "\\").replace("''", "'")
  208. sqltext = spec.get("generated")
  209. if sqltext is not None:
  210. computed = dict(sqltext=sqltext)
  211. persisted = spec.get("persistence")
  212. if persisted is not None:
  213. computed["persisted"] = persisted == "STORED"
  214. col_kw["computed"] = computed
  215. col_d = dict(
  216. name=name, type=type_instance, default=default, comment=comment
  217. )
  218. col_d.update(col_kw)
  219. state.columns.append(col_d)
  220. def _describe_to_create(self, table_name, columns):
  221. """Re-format DESCRIBE output as a SHOW CREATE TABLE string.
  222. DESCRIBE is a much simpler reflection and is sufficient for
  223. reflecting views for runtime use. This method formats DDL
  224. for columns only- keys are omitted.
  225. :param columns: A sequence of DESCRIBE or SHOW COLUMNS 6-tuples.
  226. SHOW FULL COLUMNS FROM rows must be rearranged for use with
  227. this function.
  228. """
  229. buffer = []
  230. for row in columns:
  231. (name, col_type, nullable, default, extra) = [
  232. row[i] for i in (0, 1, 2, 4, 5)
  233. ]
  234. line = [" "]
  235. line.append(self.preparer.quote_identifier(name))
  236. line.append(col_type)
  237. if not nullable:
  238. line.append("NOT NULL")
  239. if default:
  240. if "auto_increment" in default:
  241. pass
  242. elif col_type.startswith("timestamp") and default.startswith(
  243. "C"
  244. ):
  245. line.append("DEFAULT")
  246. line.append(default)
  247. elif default == "NULL":
  248. line.append("DEFAULT")
  249. line.append(default)
  250. else:
  251. line.append("DEFAULT")
  252. line.append("'%s'" % default.replace("'", "''"))
  253. if extra:
  254. line.append(extra)
  255. buffer.append(" ".join(line))
  256. return "".join(
  257. [
  258. (
  259. "CREATE TABLE %s (\n"
  260. % self.preparer.quote_identifier(table_name)
  261. ),
  262. ",\n".join(buffer),
  263. "\n) ",
  264. ]
  265. )
  266. def _parse_keyexprs(self, identifiers):
  267. """Unpack '"col"(2),"col" ASC'-ish strings into components."""
  268. return [
  269. (colname, int(length) if length else None, modifiers)
  270. for colname, length, modifiers in self._re_keyexprs.findall(
  271. identifiers
  272. )
  273. ]
  274. def _prep_regexes(self):
  275. """Pre-compile regular expressions."""
  276. self._re_columns = []
  277. self._pr_options = []
  278. _final = self.preparer.final_quote
  279. quotes = dict(
  280. zip(
  281. ("iq", "fq", "esc_fq"),
  282. [
  283. re.escape(s)
  284. for s in (
  285. self.preparer.initial_quote,
  286. _final,
  287. self.preparer._escape_identifier(_final),
  288. )
  289. ],
  290. )
  291. )
  292. self._pr_name = _pr_compile(
  293. r"^CREATE (?:\w+ +)?TABLE +"
  294. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +\($" % quotes,
  295. self.preparer._unescape_identifier,
  296. )
  297. self._re_is_view = _re_compile(r"^CREATE(?! TABLE)(\s.*)?\sVIEW")
  298. # `col`,`col2`(32),`col3`(15) DESC
  299. #
  300. self._re_keyexprs = _re_compile(
  301. r"(?:"
  302. r"(?:%(iq)s((?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)"
  303. r"(?:\((\d+)\))?(?: +(ASC|DESC))?(?=\,|$))+" % quotes
  304. )
  305. # 'foo' or 'foo','bar' or 'fo,o','ba''a''r'
  306. self._re_csv_str = _re_compile(r"\x27(?:\x27\x27|[^\x27])*\x27")
  307. # 123 or 123,456
  308. self._re_csv_int = _re_compile(r"\d+")
  309. # `colname` <type> [type opts]
  310. # (NOT NULL | NULL)
  311. # DEFAULT ('value' | CURRENT_TIMESTAMP...)
  312. # COMMENT 'comment'
  313. # COLUMN_FORMAT (FIXED|DYNAMIC|DEFAULT)
  314. # STORAGE (DISK|MEMORY)
  315. self._re_column = _re_compile(
  316. r" "
  317. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  318. r"(?P<coltype>\w+)"
  319. r"(?:\((?P<arg>(?:\d+|\d+,\d+|"
  320. r"(?:'(?:''|[^'])*',?)+))\))?"
  321. r"(?: +(?P<unsigned>UNSIGNED))?"
  322. r"(?: +(?P<zerofill>ZEROFILL))?"
  323. r"(?: +CHARACTER SET +(?P<charset>[\w_]+))?"
  324. r"(?: +COLLATE +(?P<collate>[\w_]+))?"
  325. r"(?: +(?P<notnull>(?:NOT )?NULL))?"
  326. r"(?: +DEFAULT +(?P<default>"
  327. r"(?:NULL|'(?:''|[^'])*'|[\-\w\.\(\)]+"
  328. r"(?: +ON UPDATE [\-\w\.\(\)]+)?)"
  329. r"))?"
  330. r"(?: +(?:GENERATED ALWAYS)? ?AS +(?P<generated>\("
  331. r".*\))? ?(?P<persistence>VIRTUAL|STORED)?)?"
  332. r"(?: +(?P<autoincr>AUTO_INCREMENT))?"
  333. r"(?: +COMMENT +'(?P<comment>(?:''|[^'])*)')?"
  334. r"(?: +COLUMN_FORMAT +(?P<colfmt>\w+))?"
  335. r"(?: +STORAGE +(?P<storage>\w+))?"
  336. r"(?: +(?P<extra>.*))?"
  337. r",?$" % quotes
  338. )
  339. # Fallback, try to parse as little as possible
  340. self._re_column_loose = _re_compile(
  341. r" "
  342. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  343. r"(?P<coltype>\w+)"
  344. r"(?:\((?P<arg>(?:\d+|\d+,\d+|\x27(?:\x27\x27|[^\x27])+\x27))\))?"
  345. r".*?(?P<notnull>(?:NOT )NULL)?" % quotes
  346. )
  347. # (PRIMARY|UNIQUE|FULLTEXT|SPATIAL) INDEX `name` (USING (BTREE|HASH))?
  348. # (`col` (ASC|DESC)?, `col` (ASC|DESC)?)
  349. # KEY_BLOCK_SIZE size | WITH PARSER name /*!50100 WITH PARSER name */
  350. self._re_key = _re_compile(
  351. r" "
  352. r"(?:(?P<type>\S+) )?KEY"
  353. r"(?: +%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)?"
  354. r"(?: +USING +(?P<using_pre>\S+))?"
  355. r" +\((?P<columns>.+?)\)"
  356. r"(?: +USING +(?P<using_post>\S+))?"
  357. r"(?: +KEY_BLOCK_SIZE *[ =]? *(?P<keyblock>\S+))?"
  358. r"(?: +WITH PARSER +(?P<parser>\S+))?"
  359. r"(?: +COMMENT +(?P<comment>(\x27\x27|\x27([^\x27])*?\x27)+))?"
  360. r"(?: +/\*(?P<version_sql>.+)\*/ *)?"
  361. r",?$" % quotes
  362. )
  363. # https://forums.mysql.com/read.php?20,567102,567111#msg-567111
  364. # It means if the MySQL version >= \d+, execute what's in the comment
  365. self._re_key_version_sql = _re_compile(
  366. r"\!\d+ " r"(?: *WITH PARSER +(?P<parser>\S+) *)?"
  367. )
  368. # CONSTRAINT `name` FOREIGN KEY (`local_col`)
  369. # REFERENCES `remote` (`remote_col`)
  370. # MATCH FULL | MATCH PARTIAL | MATCH SIMPLE
  371. # ON DELETE CASCADE ON UPDATE RESTRICT
  372. #
  373. # unique constraints come back as KEYs
  374. kw = quotes.copy()
  375. kw["on"] = "RESTRICT|CASCADE|SET NULL|NO ACTION"
  376. self._re_fk_constraint = _re_compile(
  377. r" "
  378. r"CONSTRAINT +"
  379. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  380. r"FOREIGN KEY +"
  381. r"\((?P<local>[^\)]+?)\) REFERENCES +"
  382. r"(?P<table>%(iq)s[^%(fq)s]+%(fq)s"
  383. r"(?:\.%(iq)s[^%(fq)s]+%(fq)s)?) +"
  384. r"\((?P<foreign>[^\)]+?)\)"
  385. r"(?: +(?P<match>MATCH \w+))?"
  386. r"(?: +ON DELETE (?P<ondelete>%(on)s))?"
  387. r"(?: +ON UPDATE (?P<onupdate>%(on)s))?" % kw
  388. )
  389. # CONSTRAINT `CONSTRAINT_1` CHECK (`x` > 5)'
  390. # testing on MariaDB 10.2 shows that the CHECK constraint
  391. # is returned on a line by itself, so to match without worrying
  392. # about parenthesis in the expression we go to the end of the line
  393. self._re_ck_constraint = _re_compile(
  394. r" "
  395. r"CONSTRAINT +"
  396. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  397. r"CHECK +"
  398. r"\((?P<sqltext>.+)\),?" % kw
  399. )
  400. # PARTITION
  401. #
  402. # punt!
  403. self._re_partition = _re_compile(r"(?:.*)(?:SUB)?PARTITION(?:.*)")
  404. # Table-level options (COLLATE, ENGINE, etc.)
  405. # Do the string options first, since they have quoted
  406. # strings we need to get rid of.
  407. for option in _options_of_type_string:
  408. self._add_option_string(option)
  409. for option in (
  410. "ENGINE",
  411. "TYPE",
  412. "AUTO_INCREMENT",
  413. "AVG_ROW_LENGTH",
  414. "CHARACTER SET",
  415. "DEFAULT CHARSET",
  416. "CHECKSUM",
  417. "COLLATE",
  418. "DELAY_KEY_WRITE",
  419. "INSERT_METHOD",
  420. "MAX_ROWS",
  421. "MIN_ROWS",
  422. "PACK_KEYS",
  423. "ROW_FORMAT",
  424. "KEY_BLOCK_SIZE",
  425. ):
  426. self._add_option_word(option)
  427. self._add_option_regex("UNION", r"\([^\)]+\)")
  428. self._add_option_regex("TABLESPACE", r".*? STORAGE DISK")
  429. self._add_option_regex(
  430. "RAID_TYPE",
  431. r"\w+\s+RAID_CHUNKS\s*\=\s*\w+RAID_CHUNKSIZE\s*=\s*\w+",
  432. )
  433. _optional_equals = r"(?:\s*(?:=\s*)|\s+)"
  434. def _add_option_string(self, directive):
  435. regex = r"(?P<directive>%s)%s" r"'(?P<val>(?:[^']|'')*?)'(?!')" % (
  436. re.escape(directive),
  437. self._optional_equals,
  438. )
  439. self._pr_options.append(
  440. _pr_compile(
  441. regex, lambda v: v.replace("\\\\", "\\").replace("''", "'")
  442. )
  443. )
  444. def _add_option_word(self, directive):
  445. regex = r"(?P<directive>%s)%s" r"(?P<val>\w+)" % (
  446. re.escape(directive),
  447. self._optional_equals,
  448. )
  449. self._pr_options.append(_pr_compile(regex))
  450. def _add_option_regex(self, directive, regex):
  451. regex = r"(?P<directive>%s)%s" r"(?P<val>%s)" % (
  452. re.escape(directive),
  453. self._optional_equals,
  454. regex,
  455. )
  456. self._pr_options.append(_pr_compile(regex))
  457. _options_of_type_string = (
  458. "COMMENT",
  459. "DATA DIRECTORY",
  460. "INDEX DIRECTORY",
  461. "PASSWORD",
  462. "CONNECTION",
  463. )
  464. def _pr_compile(regex, cleanup=None):
  465. """Prepare a 2-tuple of compiled regex and callable."""
  466. return (_re_compile(regex), cleanup)
  467. def _re_compile(regex):
  468. """Compile a string to regex, I and UNICODE."""
  469. return re.compile(regex, re.I | re.UNICODE)
  470. def _strip_values(values):
  471. "Strip reflected values quotes"
  472. strip_values = []
  473. for a in values:
  474. if a[0:1] == '"' or a[0:1] == "'":
  475. # strip enclosing quotes and unquote interior
  476. a = a[1:-1].replace(a[0] * 2, a[0])
  477. strip_values.append(a)
  478. return strip_values