__init__.py 93 KB


  1. """Pretty-print tabular data."""
  2. from collections import namedtuple
  3. from collections.abc import Iterable, Sized
  4. from html import escape as htmlescape
  5. from itertools import chain, zip_longest as izip_longest
  6. from functools import reduce, partial
  7. import io
  8. import re
  9. import math
  10. import textwrap
  11. import dataclasses
  12. try:
  13. import wcwidth # optional wide-character (CJK) support
  14. except ImportError:
  15. wcwidth = None
  16. def _is_file(f):
  17. return isinstance(f, io.IOBase)
  18. __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"]
  19. try:
  20. from .version import version as __version__ # noqa: F401
  21. except ImportError:
  22. pass # running __init__.py as a script, AppVeyor pytests
  23. # minimum extra space in headers
  24. MIN_PADDING = 2
  25. # Whether or not to preserve leading/trailing whitespace in data.
  26. PRESERVE_WHITESPACE = False
  27. _DEFAULT_FLOATFMT = "g"
  28. _DEFAULT_INTFMT = ""
  29. _DEFAULT_MISSINGVAL = ""
  30. # default align will be overwritten by "left", "center" or "decimal"
  31. # depending on the formatter
  32. _DEFAULT_ALIGN = "default"
  33. # if True, enable wide-character (CJK) support
  34. WIDE_CHARS_MODE = wcwidth is not None
  35. # Constant that can be used as part of passed rows to generate a separating line
  36. # It is purposely an unprintable character, very unlikely to be used in a table
  37. SEPARATING_LINE = "\001"
  38. Line = namedtuple("Line", ["begin", "hline", "sep", "end"])
  39. DataRow = namedtuple("DataRow", ["begin", "sep", "end"])
  40. # A table structure is supposed to be:
  41. #
  42. # --- lineabove ---------
  43. # headerrow
  44. # --- linebelowheader ---
  45. # datarow
  46. # --- linebetweenrows ---
  47. # ... (more datarows) ...
  48. # --- linebetweenrows ---
  49. # last datarow
  50. # --- linebelow ---------
  51. #
  52. # TableFormat's line* elements can be
  53. #
  54. # - either None, if the element is not used,
  55. # - or a Line tuple,
  56. # - or a function: [col_widths], [col_alignments] -> string.
  57. #
  58. # TableFormat's *row elements can be
  59. #
  60. # - either None, if the element is not used,
  61. # - or a DataRow tuple,
  62. # - or a function: [cell_values], [col_widths], [col_alignments] -> string.
  63. #
  64. # padding (an integer) is the amount of white space around data values.
  65. #
  66. # with_header_hide:
  67. #
  68. # - either None, to display all table elements unconditionally,
  69. # - or a list of elements not to be displayed if the table has column headers.
  70. #
  71. TableFormat = namedtuple(
  72. "TableFormat",
  73. [
  74. "lineabove",
  75. "linebelowheader",
  76. "linebetweenrows",
  77. "linebelow",
  78. "headerrow",
  79. "datarow",
  80. "padding",
  81. "with_header_hide",
  82. ],
  83. )
  84. def _is_separating_line(row):
  85. row_type = type(row)
  86. is_sl = (row_type == list or row_type == str) and (
  87. (len(row) >= 1 and row[0] == SEPARATING_LINE)
  88. or (len(row) >= 2 and row[1] == SEPARATING_LINE)
  89. )
  90. return is_sl
  91. def _pipe_segment_with_colons(align, colwidth):
  92. """Return a segment of a horizontal line with optional colons which
  93. indicate column's alignment (as in `pipe` output format)."""
  94. w = colwidth
  95. if align in ["right", "decimal"]:
  96. return ("-" * (w - 1)) + ":"
  97. elif align == "center":
  98. return ":" + ("-" * (w - 2)) + ":"
  99. elif align == "left":
  100. return ":" + ("-" * (w - 1))
  101. else:
  102. return "-" * w
  103. def _pipe_line_with_colons(colwidths, colaligns):
  104. """Return a horizontal line with optional colons to indicate column's
  105. alignment (as in `pipe` output format)."""
  106. if not colaligns: # e.g. printing an empty data frame (github issue #15)
  107. colaligns = [""] * len(colwidths)
  108. segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)]
  109. return "|" + "|".join(segments) + "|"
  110. def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns):
  111. alignment = {
  112. "left": "",
  113. "right": 'align="right"| ',
  114. "center": 'align="center"| ',
  115. "decimal": 'align="right"| ',
  116. }
  117. # hard-coded padding _around_ align attribute and value together
  118. # rather than padding parameter which affects only the value
  119. values_with_attrs = [
  120. " " + alignment.get(a, "") + c + " " for c, a in zip(cell_values, colaligns)
  121. ]
  122. colsep = separator * 2
  123. return (separator + colsep.join(values_with_attrs)).rstrip()
  124. def _textile_row_with_attrs(cell_values, colwidths, colaligns):
  125. cell_values[0] += " "
  126. alignment = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."}
  127. values = (alignment.get(a, "") + v for a, v in zip(colaligns, cell_values))
  128. return "|" + "|".join(values) + "|"
  129. def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore):
  130. # this table header will be suppressed if there is a header row
  131. return "<table>\n<tbody>"
  132. def _html_row_with_attrs(celltag, unsafe, cell_values, colwidths, colaligns):
  133. alignment = {
  134. "left": "",
  135. "right": ' style="text-align: right;"',
  136. "center": ' style="text-align: center;"',
  137. "decimal": ' style="text-align: right;"',
  138. }
  139. if unsafe:
  140. values_with_attrs = [
  141. "<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ""), c)
  142. for c, a in zip(cell_values, colaligns)
  143. ]
  144. else:
  145. values_with_attrs = [
  146. "<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ""), htmlescape(c))
  147. for c, a in zip(cell_values, colaligns)
  148. ]
  149. rowhtml = "<tr>{}</tr>".format("".join(values_with_attrs).rstrip())
  150. if celltag == "th": # it's a header row, create a new table header
  151. rowhtml = f"<table>\n<thead>\n{rowhtml}\n</thead>\n<tbody>"
  152. return rowhtml
  153. def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=""):
  154. alignment = {
  155. "left": "",
  156. "right": '<style="text-align: right;">',
  157. "center": '<style="text-align: center;">',
  158. "decimal": '<style="text-align: right;">',
  159. }
  160. values_with_attrs = [
  161. "{}{} {} ".format(celltag, alignment.get(a, ""), header + c + header)
  162. for c, a in zip(cell_values, colaligns)
  163. ]
  164. return "".join(values_with_attrs) + "||"
  165. def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False, longtable=False):
  166. alignment = {"left": "l", "right": "r", "center": "c", "decimal": "r"}
  167. tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns])
  168. return "\n".join(
  169. [
  170. ("\\begin{tabular}{" if not longtable else "\\begin{longtable}{")
  171. + tabular_columns_fmt
  172. + "}",
  173. "\\toprule" if booktabs else "\\hline",
  174. ]
  175. )
  176. def _asciidoc_row(is_header, *args):
  177. """handle header and data rows for asciidoc format"""
  178. def make_header_line(is_header, colwidths, colaligns):
  179. # generate the column specifiers
  180. alignment = {"left": "<", "right": ">", "center": "^", "decimal": ">"}
  181. # use the column widths generated by tabulate for the asciidoc column width specifiers
  182. asciidoc_alignments = zip(
  183. colwidths, [alignment[colalign] for colalign in colaligns]
  184. )
  185. asciidoc_column_specifiers = [
  186. "{:d}{}".format(width, align) for width, align in asciidoc_alignments
  187. ]
  188. header_list = ['cols="' + (",".join(asciidoc_column_specifiers)) + '"']
  189. # generate the list of options (currently only "header")
  190. options_list = []
  191. if is_header:
  192. options_list.append("header")
  193. if options_list:
  194. header_list += ['options="' + ",".join(options_list) + '"']
  195. # generate the list of entries in the table header field
  196. return "[{}]\n|====".format(",".join(header_list))
  197. if len(args) == 2:
  198. # two arguments are passed if called in the context of aboveline
  199. # print the table header with column widths and optional header tag
  200. return make_header_line(False, *args)
  201. elif len(args) == 3:
  202. # three arguments are passed if called in the context of dataline or headerline
  203. # print the table line and make the aboveline if it is a header
  204. cell_values, colwidths, colaligns = args
  205. data_line = "|" + "|".join(cell_values)
  206. if is_header:
  207. return make_header_line(True, colwidths, colaligns) + "\n" + data_line
  208. else:
  209. return data_line
  210. else:
  211. raise ValueError(
  212. " _asciidoc_row() requires two (colwidths, colaligns) "
  213. + "or three (cell_values, colwidths, colaligns) arguments) "
  214. )
  215. LATEX_ESCAPE_RULES = {
  216. r"&": r"\&",
  217. r"%": r"\%",
  218. r"$": r"\$",
  219. r"#": r"\#",
  220. r"_": r"\_",
  221. r"^": r"\^{}",
  222. r"{": r"\{",
  223. r"}": r"\}",
  224. r"~": r"\textasciitilde{}",
  225. "\\": r"\textbackslash{}",
  226. r"<": r"\ensuremath{<}",
  227. r">": r"\ensuremath{>}",
  228. }
  229. def _latex_row(cell_values, colwidths, colaligns, escrules=LATEX_ESCAPE_RULES):
  230. def escape_char(c):
  231. return escrules.get(c, c)
  232. escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values]
  233. rowfmt = DataRow("", "&", "\\\\")
  234. return _build_simple_row(escaped_values, rowfmt)
  235. def _rst_escape_first_column(rows, headers):
  236. def escape_empty(val):
  237. if isinstance(val, (str, bytes)) and not val.strip():
  238. return ".."
  239. else:
  240. return val
  241. new_headers = list(headers)
  242. new_rows = []
  243. if headers:
  244. new_headers[0] = escape_empty(headers[0])
  245. for row in rows:
  246. new_row = list(row)
  247. if new_row:
  248. new_row[0] = escape_empty(row[0])
  249. new_rows.append(new_row)
  250. return new_rows, new_headers
  251. _table_formats = {
  252. "simple": TableFormat(
  253. lineabove=Line("", "-", " ", ""),
  254. linebelowheader=Line("", "-", " ", ""),
  255. linebetweenrows=None,
  256. linebelow=Line("", "-", " ", ""),
  257. headerrow=DataRow("", " ", ""),
  258. datarow=DataRow("", " ", ""),
  259. padding=0,
  260. with_header_hide=["lineabove", "linebelow"],
  261. ),
  262. "plain": TableFormat(
  263. lineabove=None,
  264. linebelowheader=None,
  265. linebetweenrows=None,
  266. linebelow=None,
  267. headerrow=DataRow("", " ", ""),
  268. datarow=DataRow("", " ", ""),
  269. padding=0,
  270. with_header_hide=None,
  271. ),
  272. "grid": TableFormat(
  273. lineabove=Line("+", "-", "+", "+"),
  274. linebelowheader=Line("+", "=", "+", "+"),
  275. linebetweenrows=Line("+", "-", "+", "+"),
  276. linebelow=Line("+", "-", "+", "+"),
  277. headerrow=DataRow("|", "|", "|"),
  278. datarow=DataRow("|", "|", "|"),
  279. padding=1,
  280. with_header_hide=None,
  281. ),
  282. "simple_grid": TableFormat(
  283. lineabove=Line("┌", "─", "┬", "┐"),
  284. linebelowheader=Line("├", "─", "┼", "┤"),
  285. linebetweenrows=Line("├", "─", "┼", "┤"),
  286. linebelow=Line("└", "─", "┴", "┘"),
  287. headerrow=DataRow("│", "│", "│"),
  288. datarow=DataRow("│", "│", "│"),
  289. padding=1,
  290. with_header_hide=None,
  291. ),
  292. "rounded_grid": TableFormat(
  293. lineabove=Line("╭", "─", "┬", "╮"),
  294. linebelowheader=Line("├", "─", "┼", "┤"),
  295. linebetweenrows=Line("├", "─", "┼", "┤"),
  296. linebelow=Line("╰", "─", "┴", "╯"),
  297. headerrow=DataRow("│", "│", "│"),
  298. datarow=DataRow("│", "│", "│"),
  299. padding=1,
  300. with_header_hide=None,
  301. ),
  302. "heavy_grid": TableFormat(
  303. lineabove=Line("┏", "━", "┳", "┓"),
  304. linebelowheader=Line("┣", "━", "╋", "┫"),
  305. linebetweenrows=Line("┣", "━", "╋", "┫"),
  306. linebelow=Line("┗", "━", "┻", "┛"),
  307. headerrow=DataRow("┃", "┃", "┃"),
  308. datarow=DataRow("┃", "┃", "┃"),
  309. padding=1,
  310. with_header_hide=None,
  311. ),
  312. "mixed_grid": TableFormat(
  313. lineabove=Line("┍", "━", "┯", "┑"),
  314. linebelowheader=Line("┝", "━", "┿", "┥"),
  315. linebetweenrows=Line("├", "─", "┼", "┤"),
  316. linebelow=Line("┕", "━", "┷", "┙"),
  317. headerrow=DataRow("│", "│", "│"),
  318. datarow=DataRow("│", "│", "│"),
  319. padding=1,
  320. with_header_hide=None,
  321. ),
  322. "double_grid": TableFormat(
  323. lineabove=Line("╔", "═", "╦", "╗"),
  324. linebelowheader=Line("╠", "═", "╬", "╣"),
  325. linebetweenrows=Line("╠", "═", "╬", "╣"),
  326. linebelow=Line("╚", "═", "╩", "╝"),
  327. headerrow=DataRow("║", "║", "║"),
  328. datarow=DataRow("║", "║", "║"),
  329. padding=1,
  330. with_header_hide=None,
  331. ),
  332. "fancy_grid": TableFormat(
  333. lineabove=Line("╒", "═", "╤", "╕"),
  334. linebelowheader=Line("╞", "═", "╪", "╡"),
  335. linebetweenrows=Line("├", "─", "┼", "┤"),
  336. linebelow=Line("╘", "═", "╧", "╛"),
  337. headerrow=DataRow("│", "│", "│"),
  338. datarow=DataRow("│", "│", "│"),
  339. padding=1,
  340. with_header_hide=None,
  341. ),
  342. "outline": TableFormat(
  343. lineabove=Line("+", "-", "+", "+"),
  344. linebelowheader=Line("+", "=", "+", "+"),
  345. linebetweenrows=None,
  346. linebelow=Line("+", "-", "+", "+"),
  347. headerrow=DataRow("|", "|", "|"),
  348. datarow=DataRow("|", "|", "|"),
  349. padding=1,
  350. with_header_hide=None,
  351. ),
  352. "simple_outline": TableFormat(
  353. lineabove=Line("┌", "─", "┬", "┐"),
  354. linebelowheader=Line("├", "─", "┼", "┤"),
  355. linebetweenrows=None,
  356. linebelow=Line("└", "─", "┴", "┘"),
  357. headerrow=DataRow("│", "│", "│"),
  358. datarow=DataRow("│", "│", "│"),
  359. padding=1,
  360. with_header_hide=None,
  361. ),
  362. "rounded_outline": TableFormat(
  363. lineabove=Line("╭", "─", "┬", "╮"),
  364. linebelowheader=Line("├", "─", "┼", "┤"),
  365. linebetweenrows=None,
  366. linebelow=Line("╰", "─", "┴", "╯"),
  367. headerrow=DataRow("│", "│", "│"),
  368. datarow=DataRow("│", "│", "│"),
  369. padding=1,
  370. with_header_hide=None,
  371. ),
  372. "heavy_outline": TableFormat(
  373. lineabove=Line("┏", "━", "┳", "┓"),
  374. linebelowheader=Line("┣", "━", "╋", "┫"),
  375. linebetweenrows=None,
  376. linebelow=Line("┗", "━", "┻", "┛"),
  377. headerrow=DataRow("┃", "┃", "┃"),
  378. datarow=DataRow("┃", "┃", "┃"),
  379. padding=1,
  380. with_header_hide=None,
  381. ),
  382. "mixed_outline": TableFormat(
  383. lineabove=Line("┍", "━", "┯", "┑"),
  384. linebelowheader=Line("┝", "━", "┿", "┥"),
  385. linebetweenrows=None,
  386. linebelow=Line("┕", "━", "┷", "┙"),
  387. headerrow=DataRow("│", "│", "│"),
  388. datarow=DataRow("│", "│", "│"),
  389. padding=1,
  390. with_header_hide=None,
  391. ),
  392. "double_outline": TableFormat(
  393. lineabove=Line("╔", "═", "╦", "╗"),
  394. linebelowheader=Line("╠", "═", "╬", "╣"),
  395. linebetweenrows=None,
  396. linebelow=Line("╚", "═", "╩", "╝"),
  397. headerrow=DataRow("║", "║", "║"),
  398. datarow=DataRow("║", "║", "║"),
  399. padding=1,
  400. with_header_hide=None,
  401. ),
  402. "fancy_outline": TableFormat(
  403. lineabove=Line("╒", "═", "╤", "╕"),
  404. linebelowheader=Line("╞", "═", "╪", "╡"),
  405. linebetweenrows=None,
  406. linebelow=Line("╘", "═", "╧", "╛"),
  407. headerrow=DataRow("│", "│", "│"),
  408. datarow=DataRow("│", "│", "│"),
  409. padding=1,
  410. with_header_hide=None,
  411. ),
  412. "github": TableFormat(
  413. lineabove=Line("|", "-", "|", "|"),
  414. linebelowheader=Line("|", "-", "|", "|"),
  415. linebetweenrows=None,
  416. linebelow=None,
  417. headerrow=DataRow("|", "|", "|"),
  418. datarow=DataRow("|", "|", "|"),
  419. padding=1,
  420. with_header_hide=["lineabove"],
  421. ),
  422. "pipe": TableFormat(
  423. lineabove=_pipe_line_with_colons,
  424. linebelowheader=_pipe_line_with_colons,
  425. linebetweenrows=None,
  426. linebelow=None,
  427. headerrow=DataRow("|", "|", "|"),
  428. datarow=DataRow("|", "|", "|"),
  429. padding=1,
  430. with_header_hide=["lineabove"],
  431. ),
  432. "orgtbl": TableFormat(
  433. lineabove=None,
  434. linebelowheader=Line("|", "-", "+", "|"),
  435. linebetweenrows=None,
  436. linebelow=None,
  437. headerrow=DataRow("|", "|", "|"),
  438. datarow=DataRow("|", "|", "|"),
  439. padding=1,
  440. with_header_hide=None,
  441. ),
  442. "jira": TableFormat(
  443. lineabove=None,
  444. linebelowheader=None,
  445. linebetweenrows=None,
  446. linebelow=None,
  447. headerrow=DataRow("||", "||", "||"),
  448. datarow=DataRow("|", "|", "|"),
  449. padding=1,
  450. with_header_hide=None,
  451. ),
  452. "presto": TableFormat(
  453. lineabove=None,
  454. linebelowheader=Line("", "-", "+", ""),
  455. linebetweenrows=None,
  456. linebelow=None,
  457. headerrow=DataRow("", "|", ""),
  458. datarow=DataRow("", "|", ""),
  459. padding=1,
  460. with_header_hide=None,
  461. ),
  462. "pretty": TableFormat(
  463. lineabove=Line("+", "-", "+", "+"),
  464. linebelowheader=Line("+", "-", "+", "+"),
  465. linebetweenrows=None,
  466. linebelow=Line("+", "-", "+", "+"),
  467. headerrow=DataRow("|", "|", "|"),
  468. datarow=DataRow("|", "|", "|"),
  469. padding=1,
  470. with_header_hide=None,
  471. ),
  472. "psql": TableFormat(
  473. lineabove=Line("+", "-", "+", "+"),
  474. linebelowheader=Line("|", "-", "+", "|"),
  475. linebetweenrows=None,
  476. linebelow=Line("+", "-", "+", "+"),
  477. headerrow=DataRow("|", "|", "|"),
  478. datarow=DataRow("|", "|", "|"),
  479. padding=1,
  480. with_header_hide=None,
  481. ),
  482. "rst": TableFormat(
  483. lineabove=Line("", "=", " ", ""),
  484. linebelowheader=Line("", "=", " ", ""),
  485. linebetweenrows=None,
  486. linebelow=Line("", "=", " ", ""),
  487. headerrow=DataRow("", " ", ""),
  488. datarow=DataRow("", " ", ""),
  489. padding=0,
  490. with_header_hide=None,
  491. ),
  492. "mediawiki": TableFormat(
  493. lineabove=Line(
  494. '{| class="wikitable" style="text-align: left;"',
  495. "",
  496. "",
  497. "\n|+ <!-- caption -->\n|-",
  498. ),
  499. linebelowheader=Line("|-", "", "", ""),
  500. linebetweenrows=Line("|-", "", "", ""),
  501. linebelow=Line("|}", "", "", ""),
  502. headerrow=partial(_mediawiki_row_with_attrs, "!"),
  503. datarow=partial(_mediawiki_row_with_attrs, "|"),
  504. padding=0,
  505. with_header_hide=None,
  506. ),
  507. "moinmoin": TableFormat(
  508. lineabove=None,
  509. linebelowheader=None,
  510. linebetweenrows=None,
  511. linebelow=None,
  512. headerrow=partial(_moin_row_with_attrs, "||", header="'''"),
  513. datarow=partial(_moin_row_with_attrs, "||"),
  514. padding=1,
  515. with_header_hide=None,
  516. ),
  517. "youtrack": TableFormat(
  518. lineabove=None,
  519. linebelowheader=None,
  520. linebetweenrows=None,
  521. linebelow=None,
  522. headerrow=DataRow("|| ", " || ", " || "),
  523. datarow=DataRow("| ", " | ", " |"),
  524. padding=1,
  525. with_header_hide=None,
  526. ),
  527. "html": TableFormat(
  528. lineabove=_html_begin_table_without_header,
  529. linebelowheader="",
  530. linebetweenrows=None,
  531. linebelow=Line("</tbody>\n</table>", "", "", ""),
  532. headerrow=partial(_html_row_with_attrs, "th", False),
  533. datarow=partial(_html_row_with_attrs, "td", False),
  534. padding=0,
  535. with_header_hide=["lineabove"],
  536. ),
  537. "unsafehtml": TableFormat(
  538. lineabove=_html_begin_table_without_header,
  539. linebelowheader="",
  540. linebetweenrows=None,
  541. linebelow=Line("</tbody>\n</table>", "", "", ""),
  542. headerrow=partial(_html_row_with_attrs, "th", True),
  543. datarow=partial(_html_row_with_attrs, "td", True),
  544. padding=0,
  545. with_header_hide=["lineabove"],
  546. ),
  547. "latex": TableFormat(
  548. lineabove=_latex_line_begin_tabular,
  549. linebelowheader=Line("\\hline", "", "", ""),
  550. linebetweenrows=None,
  551. linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
  552. headerrow=_latex_row,
  553. datarow=_latex_row,
  554. padding=1,
  555. with_header_hide=None,
  556. ),
  557. "latex_raw": TableFormat(
  558. lineabove=_latex_line_begin_tabular,
  559. linebelowheader=Line("\\hline", "", "", ""),
  560. linebetweenrows=None,
  561. linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
  562. headerrow=partial(_latex_row, escrules={}),
  563. datarow=partial(_latex_row, escrules={}),
  564. padding=1,
  565. with_header_hide=None,
  566. ),
  567. "latex_booktabs": TableFormat(
  568. lineabove=partial(_latex_line_begin_tabular, booktabs=True),
  569. linebelowheader=Line("\\midrule", "", "", ""),
  570. linebetweenrows=None,
  571. linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""),
  572. headerrow=_latex_row,
  573. datarow=_latex_row,
  574. padding=1,
  575. with_header_hide=None,
  576. ),
  577. "latex_longtable": TableFormat(
  578. lineabove=partial(_latex_line_begin_tabular, longtable=True),
  579. linebelowheader=Line("\\hline\n\\endhead", "", "", ""),
  580. linebetweenrows=None,
  581. linebelow=Line("\\hline\n\\end{longtable}", "", "", ""),
  582. headerrow=_latex_row,
  583. datarow=_latex_row,
  584. padding=1,
  585. with_header_hide=None,
  586. ),
  587. "tsv": TableFormat(
  588. lineabove=None,
  589. linebelowheader=None,
  590. linebetweenrows=None,
  591. linebelow=None,
  592. headerrow=DataRow("", "\t", ""),
  593. datarow=DataRow("", "\t", ""),
  594. padding=0,
  595. with_header_hide=None,
  596. ),
  597. "textile": TableFormat(
  598. lineabove=None,
  599. linebelowheader=None,
  600. linebetweenrows=None,
  601. linebelow=None,
  602. headerrow=DataRow("|_. ", "|_.", "|"),
  603. datarow=_textile_row_with_attrs,
  604. padding=1,
  605. with_header_hide=None,
  606. ),
  607. "asciidoc": TableFormat(
  608. lineabove=partial(_asciidoc_row, False),
  609. linebelowheader=None,
  610. linebetweenrows=None,
  611. linebelow=Line("|====", "", "", ""),
  612. headerrow=partial(_asciidoc_row, True),
  613. datarow=partial(_asciidoc_row, False),
  614. padding=1,
  615. with_header_hide=["lineabove"],
  616. ),
  617. }
  618. tabulate_formats = list(sorted(_table_formats.keys()))
  619. # The table formats for which multiline cells will be folded into subsequent
  620. # table rows. The key is the original format specified at the API. The value is
  621. # the format that will be used to represent the original format.
  622. multiline_formats = {
  623. "plain": "plain",
  624. "simple": "simple",
  625. "grid": "grid",
  626. "simple_grid": "simple_grid",
  627. "rounded_grid": "rounded_grid",
  628. "heavy_grid": "heavy_grid",
  629. "mixed_grid": "mixed_grid",
  630. "double_grid": "double_grid",
  631. "fancy_grid": "fancy_grid",
  632. "pipe": "pipe",
  633. "orgtbl": "orgtbl",
  634. "jira": "jira",
  635. "presto": "presto",
  636. "pretty": "pretty",
  637. "psql": "psql",
  638. "rst": "rst",
  639. }
  640. # TODO: Add multiline support for the remaining table formats:
  641. # - mediawiki: Replace \n with <br>
  642. # - moinmoin: TBD
  643. # - youtrack: TBD
  644. # - html: Replace \n with <br>
  645. # - latex*: Use "makecell" package: In header, replace X\nY with
  646. # \thead{X\\Y} and in data row, replace X\nY with \makecell{X\\Y}
  647. # - tsv: TBD
  648. # - textile: Replace \n with <br/> (must be well-formed XML)
  649. _multiline_codes = re.compile(r"\r|\n|\r\n")
  650. _multiline_codes_bytes = re.compile(b"\r|\n|\r\n")
  651. # Handle ANSI escape sequences for both control sequence introducer (CSI) and
  652. # operating system command (OSC). Both of these begin with 0x1b (or octal 033),
  653. # which will be shown below as ESC.
  654. #
  655. # CSI ANSI escape codes have the following format, defined in section 5.4 of ECMA-48:
  656. #
  657. # CSI: ESC followed by the '[' character (0x5b)
  658. # Parameter Bytes: 0..n bytes in the range 0x30-0x3f
  659. # Intermediate Bytes: 0..n bytes in the range 0x20-0x2f
  660. # Final Byte: a single byte in the range 0x40-0x7e
  661. #
  662. # Also include the terminal hyperlink sequences as described here:
  663. # https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda
  664. #
  665. # OSC 8 ; params ; uri ST display_text OSC 8 ;; ST
  666. #
  667. # Example: \x1b]8;;https://example.com\x5ctext to show\x1b]8;;\x5c
  668. #
  669. # Where:
  670. # OSC: ESC followed by the ']' character (0x5d)
  671. # params: 0..n optional key value pairs separated by ':' (e.g. foo=bar:baz=qux:abc=123)
  672. # URI: the actual URI with protocol scheme (e.g. https://, file://, ftp://)
  673. # ST: ESC followed by the '\' character (0x5c)
  674. _esc = r"\x1b"
  675. _csi = rf"{_esc}\["
  676. _osc = rf"{_esc}\]"
  677. _st = rf"{_esc}\\"
  678. _ansi_escape_pat = rf"""
  679. (
  680. # terminal colors, etc
  681. {_csi} # CSI
  682. [\x30-\x3f]* # parameter bytes
  683. [\x20-\x2f]* # intermediate bytes
  684. [\x40-\x7e] # final byte
  685. |
  686. # terminal hyperlinks
  687. {_osc}8; # OSC opening
  688. (\w+=\w+:?)* # key=value params list (submatch 2)
  689. ; # delimiter
  690. ([^{_esc}]+) # URI - anything but ESC (submatch 3)
  691. {_st} # ST
  692. ([^{_esc}]+) # link text - anything but ESC (submatch 4)
  693. {_osc}8;;{_st} # "closing" OSC sequence
  694. )
  695. """
  696. _ansi_codes = re.compile(_ansi_escape_pat, re.VERBOSE)
  697. _ansi_codes_bytes = re.compile(_ansi_escape_pat.encode("utf8"), re.VERBOSE)
  698. _ansi_color_reset_code = "\033[0m"
  699. _float_with_thousands_separators = re.compile(
  700. r"^(([+-]?[0-9]{1,3})(?:,([0-9]{3}))*)?(?(1)\.[0-9]*|\.[0-9]+)?$"
  701. )
  702. def simple_separated_format(separator):
  703. """Construct a simple TableFormat with columns separated by a separator.
  704. >>> tsv = simple_separated_format("\\t") ; \
  705. tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv) == 'foo \\t 1\\nspam\\t23'
  706. True
  707. """
  708. return TableFormat(
  709. None,
  710. None,
  711. None,
  712. None,
  713. headerrow=DataRow("", separator, ""),
  714. datarow=DataRow("", separator, ""),
  715. padding=0,
  716. with_header_hide=None,
  717. )
  718. def _isnumber_with_thousands_separator(string):
  719. """
  720. >>> _isnumber_with_thousands_separator(".")
  721. False
  722. >>> _isnumber_with_thousands_separator("1")
  723. True
  724. >>> _isnumber_with_thousands_separator("1.")
  725. True
  726. >>> _isnumber_with_thousands_separator(".1")
  727. True
  728. >>> _isnumber_with_thousands_separator("1000")
  729. False
  730. >>> _isnumber_with_thousands_separator("1,000")
  731. True
  732. >>> _isnumber_with_thousands_separator("1,0000")
  733. False
  734. >>> _isnumber_with_thousands_separator("1,000.1234")
  735. True
  736. >>> _isnumber_with_thousands_separator(b"1,000.1234")
  737. True
  738. >>> _isnumber_with_thousands_separator("+1,000.1234")
  739. True
  740. >>> _isnumber_with_thousands_separator("-1,000.1234")
  741. True
  742. """
  743. try:
  744. string = string.decode()
  745. except (UnicodeDecodeError, AttributeError):
  746. pass
  747. return bool(re.match(_float_with_thousands_separators, string))
  748. def _isconvertible(conv, string):
  749. try:
  750. conv(string)
  751. return True
  752. except (ValueError, TypeError):
  753. return False
  754. def _isnumber(string):
  755. """
  756. >>> _isnumber("123.45")
  757. True
  758. >>> _isnumber("123")
  759. True
  760. >>> _isnumber("spam")
  761. False
  762. >>> _isnumber("123e45678")
  763. False
  764. >>> _isnumber("inf")
  765. True
  766. """
  767. if not _isconvertible(float, string):
  768. return False
  769. elif isinstance(string, (str, bytes)) and (
  770. math.isinf(float(string)) or math.isnan(float(string))
  771. ):
  772. return string.lower() in ["inf", "-inf", "nan"]
  773. return True
  774. def _isint(string, inttype=int):
  775. """
  776. >>> _isint("123")
  777. True
  778. >>> _isint("123.45")
  779. False
  780. """
  781. return (
  782. type(string) is inttype
  783. or isinstance(string, (bytes, str))
  784. and _isconvertible(inttype, string)
  785. )
  786. def _isbool(string):
  787. """
  788. >>> _isbool(True)
  789. True
  790. >>> _isbool("False")
  791. True
  792. >>> _isbool(1)
  793. False
  794. """
  795. return type(string) is bool or (
  796. isinstance(string, (bytes, str)) and string in ("True", "False")
  797. )
  798. def _type(string, has_invisible=True, numparse=True):
  799. """The least generic type (type(None), int, float, str, unicode).
  800. >>> _type(None) is type(None)
  801. True
  802. >>> _type("foo") is type("")
  803. True
  804. >>> _type("1") is type(1)
  805. True
  806. >>> _type('\x1b[31m42\x1b[0m') is type(42)
  807. True
  808. >>> _type('\x1b[31m42\x1b[0m') is type(42)
  809. True
  810. """
  811. if has_invisible and isinstance(string, (str, bytes)):
  812. string = _strip_ansi(string)
  813. if string is None:
  814. return type(None)
  815. elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
  816. return str
  817. elif _isbool(string):
  818. return bool
  819. elif _isint(string) and numparse:
  820. return int
  821. elif _isnumber(string) and numparse:
  822. return float
  823. elif isinstance(string, bytes):
  824. return bytes
  825. else:
  826. return str
  827. def _afterpoint(string):
  828. """Symbols after a decimal point, -1 if the string lacks the decimal point.
  829. >>> _afterpoint("123.45")
  830. 2
  831. >>> _afterpoint("1001")
  832. -1
  833. >>> _afterpoint("eggs")
  834. -1
  835. >>> _afterpoint("123e45")
  836. 2
  837. >>> _afterpoint("123,456.78")
  838. 2
  839. """
  840. if _isnumber(string) or _isnumber_with_thousands_separator(string):
  841. if _isint(string):
  842. return -1
  843. else:
  844. pos = string.rfind(".")
  845. pos = string.lower().rfind("e") if pos < 0 else pos
  846. if pos >= 0:
  847. return len(string) - pos - 1
  848. else:
  849. return -1 # no point
  850. else:
  851. return -1 # not a number
  852. def _padleft(width, s):
  853. """Flush right.
  854. >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430'
  855. True
  856. """
  857. fmt = "{0:>%ds}" % width
  858. return fmt.format(s)
  859. def _padright(width, s):
  860. """Flush left.
  861. >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 '
  862. True
  863. """
  864. fmt = "{0:<%ds}" % width
  865. return fmt.format(s)
  866. def _padboth(width, s):
  867. """Center string.
  868. >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 '
  869. True
  870. """
  871. fmt = "{0:^%ds}" % width
  872. return fmt.format(s)
  873. def _padnone(ignore_width, s):
  874. return s
  875. def _strip_ansi(s):
  876. r"""Remove ANSI escape sequences, both CSI (color codes, etc) and OSC hyperlinks.
  877. CSI sequences are simply removed from the output, while OSC hyperlinks are replaced
  878. with the link text. Note: it may be desirable to show the URI instead but this is not
  879. supported.
  880. >>> repr(_strip_ansi('\x1B]8;;https://example.com\x1B\\This is a link\x1B]8;;\x1B\\'))
  881. "'This is a link'"
  882. >>> repr(_strip_ansi('\x1b[31mred\x1b[0m text'))
  883. "'red text'"
  884. """
  885. if isinstance(s, str):
  886. return _ansi_codes.sub(r"\4", s)
  887. else: # a bytestring
  888. return _ansi_codes_bytes.sub(r"\4", s)
  889. def _visible_width(s):
  890. """Visible width of a printed string. ANSI color codes are removed.
  891. >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world")
  892. (5, 5)
  893. """
  894. # optional wide-character support
  895. if wcwidth is not None and WIDE_CHARS_MODE:
  896. len_fn = wcwidth.wcswidth
  897. else:
  898. len_fn = len
  899. if isinstance(s, (str, bytes)):
  900. return len_fn(_strip_ansi(s))
  901. else:
  902. return len_fn(str(s))
  903. def _is_multiline(s):
  904. if isinstance(s, str):
  905. return bool(re.search(_multiline_codes, s))
  906. else: # a bytestring
  907. return bool(re.search(_multiline_codes_bytes, s))
  908. def _multiline_width(multiline_s, line_width_fn=len):
  909. """Visible width of a potentially multiline content."""
  910. return max(map(line_width_fn, re.split("[\r\n]", multiline_s)))
  911. def _choose_width_fn(has_invisible, enable_widechars, is_multiline):
  912. """Return a function to calculate visible cell width."""
  913. if has_invisible:
  914. line_width_fn = _visible_width
  915. elif enable_widechars: # optional wide-character support if available
  916. line_width_fn = wcwidth.wcswidth
  917. else:
  918. line_width_fn = len
  919. if is_multiline:
  920. width_fn = lambda s: _multiline_width(s, line_width_fn) # noqa
  921. else:
  922. width_fn = line_width_fn
  923. return width_fn
  924. def _align_column_choose_padfn(strings, alignment, has_invisible):
  925. if alignment == "right":
  926. if not PRESERVE_WHITESPACE:
  927. strings = [s.strip() for s in strings]
  928. padfn = _padleft
  929. elif alignment == "center":
  930. if not PRESERVE_WHITESPACE:
  931. strings = [s.strip() for s in strings]
  932. padfn = _padboth
  933. elif alignment == "decimal":
  934. if has_invisible:
  935. decimals = [_afterpoint(_strip_ansi(s)) for s in strings]
  936. else:
  937. decimals = [_afterpoint(s) for s in strings]
  938. maxdecimals = max(decimals)
  939. strings = [s + (maxdecimals - decs) * " " for s, decs in zip(strings, decimals)]
  940. padfn = _padleft
  941. elif not alignment:
  942. padfn = _padnone
  943. else:
  944. if not PRESERVE_WHITESPACE:
  945. strings = [s.strip() for s in strings]
  946. padfn = _padright
  947. return strings, padfn
  948. def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline):
  949. if has_invisible:
  950. line_width_fn = _visible_width
  951. elif enable_widechars: # optional wide-character support if available
  952. line_width_fn = wcwidth.wcswidth
  953. else:
  954. line_width_fn = len
  955. if is_multiline:
  956. width_fn = lambda s: _align_column_multiline_width(s, line_width_fn) # noqa
  957. else:
  958. width_fn = line_width_fn
  959. return width_fn
  960. def _align_column_multiline_width(multiline_s, line_width_fn=len):
  961. """Visible width of a potentially multiline content."""
  962. return list(map(line_width_fn, re.split("[\r\n]", multiline_s)))
  963. def _flat_list(nested_list):
  964. ret = []
  965. for item in nested_list:
  966. if isinstance(item, list):
  967. for subitem in item:
  968. ret.append(subitem)
  969. else:
  970. ret.append(item)
  971. return ret
  972. def _align_column(
  973. strings,
  974. alignment,
  975. minwidth=0,
  976. has_invisible=True,
  977. enable_widechars=False,
  978. is_multiline=False,
  979. ):
  980. """[string] -> [padded_string]"""
  981. strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible)
  982. width_fn = _align_column_choose_width_fn(
  983. has_invisible, enable_widechars, is_multiline
  984. )
  985. s_widths = list(map(width_fn, strings))
  986. maxwidth = max(max(_flat_list(s_widths)), minwidth)
  987. # TODO: refactor column alignment in single-line and multiline modes
  988. if is_multiline:
  989. if not enable_widechars and not has_invisible:
  990. padded_strings = [
  991. "\n".join([padfn(maxwidth, s) for s in ms.splitlines()])
  992. for ms in strings
  993. ]
  994. else:
  995. # enable wide-character width corrections
  996. s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings]
  997. visible_widths = [
  998. [maxwidth - (w - l) for w, l in zip(mw, ml)]
  999. for mw, ml in zip(s_widths, s_lens)
  1000. ]
  1001. # wcswidth and _visible_width don't count invisible characters;
  1002. # padfn doesn't need to apply another correction
  1003. padded_strings = [
  1004. "\n".join([padfn(w, s) for s, w in zip((ms.splitlines() or ms), mw)])
  1005. for ms, mw in zip(strings, visible_widths)
  1006. ]
  1007. else: # single-line cell values
  1008. if not enable_widechars and not has_invisible:
  1009. padded_strings = [padfn(maxwidth, s) for s in strings]
  1010. else:
  1011. # enable wide-character width corrections
  1012. s_lens = list(map(len, strings))
  1013. visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)]
  1014. # wcswidth and _visible_width don't count invisible characters;
  1015. # padfn doesn't need to apply another correction
  1016. padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)]
  1017. return padded_strings
  1018. def _more_generic(type1, type2):
  1019. types = {
  1020. type(None): 0,
  1021. bool: 1,
  1022. int: 2,
  1023. float: 3,
  1024. bytes: 4,
  1025. str: 5,
  1026. }
  1027. invtypes = {
  1028. 5: str,
  1029. 4: bytes,
  1030. 3: float,
  1031. 2: int,
  1032. 1: bool,
  1033. 0: type(None),
  1034. }
  1035. moregeneric = max(types.get(type1, 5), types.get(type2, 5))
  1036. return invtypes[moregeneric]
  1037. def _column_type(strings, has_invisible=True, numparse=True):
  1038. """The least generic type all column values are convertible to.
  1039. >>> _column_type([True, False]) is bool
  1040. True
  1041. >>> _column_type(["1", "2"]) is int
  1042. True
  1043. >>> _column_type(["1", "2.3"]) is float
  1044. True
  1045. >>> _column_type(["1", "2.3", "four"]) is str
  1046. True
  1047. >>> _column_type(["four", '\u043f\u044f\u0442\u044c']) is str
  1048. True
  1049. >>> _column_type([None, "brux"]) is str
  1050. True
  1051. >>> _column_type([1, 2, None]) is int
  1052. True
  1053. >>> import datetime as dt
  1054. >>> _column_type([dt.datetime(1991,2,19), dt.time(17,35)]) is str
  1055. True
  1056. """
  1057. types = [_type(s, has_invisible, numparse) for s in strings]
  1058. return reduce(_more_generic, types, bool)
  1059. def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
  1060. """Format a value according to its type.
  1061. Unicode is supported:
  1062. >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \
  1063. tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \
  1064. good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \
  1065. tabulate(tbl, headers=hrow) == good_result
  1066. True
  1067. """ # noqa
  1068. if val is None:
  1069. return missingval
  1070. if valtype is str:
  1071. return f"{val}"
  1072. elif valtype is int:
  1073. return format(val, intfmt)
  1074. elif valtype is bytes:
  1075. try:
  1076. return str(val, "ascii")
  1077. except (TypeError, UnicodeDecodeError):
  1078. return str(val)
  1079. elif valtype is float:
  1080. is_a_colored_number = has_invisible and isinstance(val, (str, bytes))
  1081. if is_a_colored_number:
  1082. raw_val = _strip_ansi(val)
  1083. formatted_val = format(float(raw_val), floatfmt)
  1084. return val.replace(raw_val, formatted_val)
  1085. else:
  1086. return format(float(val), floatfmt)
  1087. else:
  1088. return f"{val}"
  1089. def _align_header(
  1090. header, alignment, width, visible_width, is_multiline=False, width_fn=None
  1091. ):
  1092. "Pad string header to width chars given known visible_width of the header."
  1093. if is_multiline:
  1094. header_lines = re.split(_multiline_codes, header)
  1095. padded_lines = [
  1096. _align_header(h, alignment, width, width_fn(h)) for h in header_lines
  1097. ]
  1098. return "\n".join(padded_lines)
  1099. # else: not multiline
  1100. ninvisible = len(header) - visible_width
  1101. width += ninvisible
  1102. if alignment == "left":
  1103. return _padright(width, header)
  1104. elif alignment == "center":
  1105. return _padboth(width, header)
  1106. elif not alignment:
  1107. return f"{header}"
  1108. else:
  1109. return _padleft(width, header)
  1110. def _remove_separating_lines(rows):
  1111. if type(rows) == list:
  1112. separating_lines = []
  1113. sans_rows = []
  1114. for index, row in enumerate(rows):
  1115. if _is_separating_line(row):
  1116. separating_lines.append(index)
  1117. else:
  1118. sans_rows.append(row)
  1119. return sans_rows, separating_lines
  1120. else:
  1121. return rows, None
  1122. def _reinsert_separating_lines(rows, separating_lines):
  1123. if separating_lines:
  1124. for index in separating_lines:
  1125. rows.insert(index, SEPARATING_LINE)
  1126. def _prepend_row_index(rows, index):
  1127. """Add a left-most index column."""
  1128. if index is None or index is False:
  1129. return rows
  1130. if isinstance(index, Sized) and len(index) != len(rows):
  1131. raise ValueError(
  1132. "index must be as long as the number of data rows: "
  1133. + "len(index)={} len(rows)={}".format(len(index), len(rows))
  1134. )
  1135. sans_rows, separating_lines = _remove_separating_lines(rows)
  1136. new_rows = []
  1137. index_iter = iter(index)
  1138. for row in sans_rows:
  1139. index_v = next(index_iter)
  1140. new_rows.append([index_v] + list(row))
  1141. rows = new_rows
  1142. _reinsert_separating_lines(rows, separating_lines)
  1143. return rows
  1144. def _bool(val):
  1145. "A wrapper around standard bool() which doesn't throw on NumPy arrays"
  1146. try:
  1147. return bool(val)
  1148. except ValueError: # val is likely to be a numpy array with many elements
  1149. return False
  1150. def _normalize_tabular_data(tabular_data, headers, showindex="default"):
  1151. """Transform a supported data type to a list of lists, and a list of headers.
  1152. Supported tabular data types:
  1153. * list-of-lists or another iterable of iterables
  1154. * list of named tuples (usually used with headers="keys")
  1155. * list of dicts (usually used with headers="keys")
  1156. * list of OrderedDicts (usually used with headers="keys")
  1157. * list of dataclasses (Python 3.7+ only, usually used with headers="keys")
  1158. * 2D NumPy arrays
  1159. * NumPy record arrays (usually used with headers="keys")
  1160. * dict of iterables (usually used with headers="keys")
  1161. * pandas.DataFrame (usually used with headers="keys")
  1162. The first row can be used as headers if headers="firstrow",
  1163. column indices can be used as headers if headers="keys".
  1164. If showindex="default", show row indices of the pandas.DataFrame.
  1165. If showindex="always", show row indices for all types of data.
  1166. If showindex="never", don't show row indices for all types of data.
  1167. If showindex is an iterable, show its values as row indices.
  1168. """
  1169. try:
  1170. bool(headers)
  1171. is_headers2bool_broken = False # noqa
  1172. except ValueError: # numpy.ndarray, pandas.core.index.Index, ...
  1173. is_headers2bool_broken = True # noqa
  1174. headers = list(headers)
  1175. index = None
  1176. if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
  1177. # dict-like and pandas.DataFrame?
  1178. if hasattr(tabular_data.values, "__call__"):
  1179. # likely a conventional dict
  1180. keys = tabular_data.keys()
  1181. rows = list(
  1182. izip_longest(*tabular_data.values())
  1183. ) # columns have to be transposed
  1184. elif hasattr(tabular_data, "index"):
  1185. # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
  1186. keys = list(tabular_data)
  1187. if (
  1188. showindex in ["default", "always", True]
  1189. and tabular_data.index.name is not None
  1190. ):
  1191. if isinstance(tabular_data.index.name, list):
  1192. keys[:0] = tabular_data.index.name
  1193. else:
  1194. keys[:0] = [tabular_data.index.name]
  1195. vals = tabular_data.values # values matrix doesn't need to be transposed
  1196. # for DataFrames add an index per default
  1197. index = list(tabular_data.index)
  1198. rows = [list(row) for row in vals]
  1199. else:
  1200. raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
  1201. if headers == "keys":
  1202. headers = list(map(str, keys)) # headers should be strings
  1203. else: # it's a usual iterable of iterables, or a NumPy array, or an iterable of dataclasses
  1204. rows = list(tabular_data)
  1205. if headers == "keys" and not rows:
  1206. # an empty table (issue #81)
  1207. headers = []
  1208. elif (
  1209. headers == "keys"
  1210. and hasattr(tabular_data, "dtype")
  1211. and getattr(tabular_data.dtype, "names")
  1212. ):
  1213. # numpy record array
  1214. headers = tabular_data.dtype.names
  1215. elif (
  1216. headers == "keys"
  1217. and len(rows) > 0
  1218. and isinstance(rows[0], tuple)
  1219. and hasattr(rows[0], "_fields")
  1220. ):
  1221. # namedtuple
  1222. headers = list(map(str, rows[0]._fields))
  1223. elif len(rows) > 0 and hasattr(rows[0], "keys") and hasattr(rows[0], "values"):
  1224. # dict-like object
  1225. uniq_keys = set() # implements hashed lookup
  1226. keys = [] # storage for set
  1227. if headers == "firstrow":
  1228. firstdict = rows[0] if len(rows) > 0 else {}
  1229. keys.extend(firstdict.keys())
  1230. uniq_keys.update(keys)
  1231. rows = rows[1:]
  1232. for row in rows:
  1233. for k in row.keys():
  1234. # Save unique items in input order
  1235. if k not in uniq_keys:
  1236. keys.append(k)
  1237. uniq_keys.add(k)
  1238. if headers == "keys":
  1239. headers = keys
  1240. elif isinstance(headers, dict):
  1241. # a dict of headers for a list of dicts
  1242. headers = [headers.get(k, k) for k in keys]
  1243. headers = list(map(str, headers))
  1244. elif headers == "firstrow":
  1245. if len(rows) > 0:
  1246. headers = [firstdict.get(k, k) for k in keys]
  1247. headers = list(map(str, headers))
  1248. else:
  1249. headers = []
  1250. elif headers:
  1251. raise ValueError(
  1252. "headers for a list of dicts is not a dict or a keyword"
  1253. )
  1254. rows = [[row.get(k) for k in keys] for row in rows]
  1255. elif (
  1256. headers == "keys"
  1257. and hasattr(tabular_data, "description")
  1258. and hasattr(tabular_data, "fetchone")
  1259. and hasattr(tabular_data, "rowcount")
  1260. ):
  1261. # Python Database API cursor object (PEP 0249)
  1262. # print tabulate(cursor, headers='keys')
  1263. headers = [column[0] for column in tabular_data.description]
  1264. elif (
  1265. dataclasses is not None
  1266. and len(rows) > 0
  1267. and dataclasses.is_dataclass(rows[0])
  1268. ):
  1269. # Python 3.7+'s dataclass
  1270. field_names = [field.name for field in dataclasses.fields(rows[0])]
  1271. if headers == "keys":
  1272. headers = field_names
  1273. rows = [[getattr(row, f) for f in field_names] for row in rows]
  1274. elif headers == "keys" and len(rows) > 0:
  1275. # keys are column indices
  1276. headers = list(map(str, range(len(rows[0]))))
  1277. # take headers from the first row if necessary
  1278. if headers == "firstrow" and len(rows) > 0:
  1279. if index is not None:
  1280. headers = [index[0]] + list(rows[0])
  1281. index = index[1:]
  1282. else:
  1283. headers = rows[0]
  1284. headers = list(map(str, headers)) # headers should be strings
  1285. rows = rows[1:]
  1286. elif headers == "firstrow":
  1287. headers = []
  1288. headers = list(map(str, headers))
  1289. # rows = list(map(list, rows))
  1290. rows = list(map(lambda r: r if _is_separating_line(r) else list(r), rows))
  1291. # add or remove an index column
  1292. showindex_is_a_str = type(showindex) in [str, bytes]
  1293. if showindex == "default" and index is not None:
  1294. rows = _prepend_row_index(rows, index)
  1295. elif isinstance(showindex, Sized) and not showindex_is_a_str:
  1296. rows = _prepend_row_index(rows, list(showindex))
  1297. elif isinstance(showindex, Iterable) and not showindex_is_a_str:
  1298. rows = _prepend_row_index(rows, showindex)
  1299. elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str):
  1300. if index is None:
  1301. index = list(range(len(rows)))
  1302. rows = _prepend_row_index(rows, index)
  1303. elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str):
  1304. pass
  1305. # pad with empty headers for initial columns if necessary
  1306. if headers and len(rows) > 0:
  1307. nhs = len(headers)
  1308. ncols = len(rows[0])
  1309. if nhs < ncols:
  1310. headers = [""] * (ncols - nhs) + headers
  1311. return rows, headers
  1312. def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True):
  1313. numparses = _expand_iterable(numparses, len(list_of_lists[0]), True)
  1314. result = []
  1315. for row in list_of_lists:
  1316. new_row = []
  1317. for cell, width, numparse in zip(row, colwidths, numparses):
  1318. if _isnumber(cell) and numparse:
  1319. new_row.append(cell)
  1320. continue
  1321. if width is not None:
  1322. wrapper = _CustomTextWrap(width=width)
  1323. # Cast based on our internal type handling
  1324. # Any future custom formatting of types (such as datetimes)
  1325. # may need to be more explicit than just `str` of the object
  1326. casted_cell = (
  1327. str(cell) if _isnumber(cell) else _type(cell, numparse)(cell)
  1328. )
  1329. wrapped = wrapper.wrap(casted_cell)
  1330. new_row.append("\n".join(wrapped))
  1331. else:
  1332. new_row.append(cell)
  1333. result.append(new_row)
  1334. return result
  1335. def _to_str(s, encoding="utf8", errors="ignore"):
  1336. """
  1337. A type safe wrapper for converting a bytestring to str. This is essentially just
  1338. a wrapper around .decode() intended for use with things like map(), but with some
  1339. specific behavior:
  1340. 1. if the given parameter is not a bytestring, it is returned unmodified
  1341. 2. decode() is called for the given parameter and assumes utf8 encoding, but the
  1342. default error behavior is changed from 'strict' to 'ignore'
  1343. >>> repr(_to_str(b'foo'))
  1344. "'foo'"
  1345. >>> repr(_to_str('foo'))
  1346. "'foo'"
  1347. >>> repr(_to_str(42))
  1348. "'42'"
  1349. """
  1350. if isinstance(s, bytes):
  1351. return s.decode(encoding=encoding, errors=errors)
  1352. return str(s)
  1353. def tabulate(
  1354. tabular_data,
  1355. headers=(),
  1356. tablefmt="simple",
  1357. floatfmt=_DEFAULT_FLOATFMT,
  1358. intfmt=_DEFAULT_INTFMT,
  1359. numalign=_DEFAULT_ALIGN,
  1360. stralign=_DEFAULT_ALIGN,
  1361. missingval=_DEFAULT_MISSINGVAL,
  1362. showindex="default",
  1363. disable_numparse=False,
  1364. colalign=None,
  1365. maxcolwidths=None,
  1366. rowalign=None,
  1367. maxheadercolwidths=None,
  1368. ):
  1369. """Format a fixed width table for pretty printing.
  1370. >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]]))
  1371. --- ---------
  1372. 1 2.34
  1373. -56 8.999
  1374. 2 10001
  1375. --- ---------
  1376. The first required argument (`tabular_data`) can be a
  1377. list-of-lists (or another iterable of iterables), a list of named
  1378. tuples, a dictionary of iterables, an iterable of dictionaries,
  1379. an iterable of dataclasses (Python 3.7+), a two-dimensional NumPy array,
  1380. NumPy record array, or a Pandas' dataframe.
  1381. Table headers
  1382. -------------
  1383. To print nice column headers, supply the second argument (`headers`):
  1384. - `headers` can be an explicit list of column headers
  1385. - if `headers="firstrow"`, then the first row of data is used
  1386. - if `headers="keys"`, then dictionary keys or column indices are used
  1387. Otherwise a headerless table is produced.
  1388. If the number of headers is less than the number of columns, they
  1389. are supposed to be names of the last columns. This is consistent
  1390. with the plain-text format of R and Pandas' dataframes.
  1391. >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]],
  1392. ... headers="firstrow"))
  1393. sex age
  1394. ----- ----- -----
  1395. Alice F 24
  1396. Bob M 19
  1397. By default, pandas.DataFrame data have an additional column called
  1398. row index. To add a similar column to all other types of data,
  1399. use `showindex="always"` or `showindex=True`. To suppress row indices
  1400. for all types of data, pass `showindex="never" or `showindex=False`.
  1401. To add a custom row index column, pass `showindex=some_iterable`.
  1402. >>> print(tabulate([["F",24],["M",19]], showindex="always"))
  1403. - - --
  1404. 0 F 24
  1405. 1 M 19
  1406. - - --
  1407. Column alignment
  1408. ----------------
  1409. `tabulate` tries to detect column types automatically, and aligns
  1410. the values properly. By default it aligns decimal points of the
  1411. numbers (or flushes integer numbers to the right), and flushes
  1412. everything else to the left. Possible column alignments
  1413. (`numalign`, `stralign`) are: "right", "center", "left", "decimal"
  1414. (only for `numalign`), and None (to disable alignment).
  1415. Table formats
  1416. -------------
  1417. `intfmt` is a format specification used for columns which
  1418. contain numeric data without a decimal point. This can also be
  1419. a list or tuple of format strings, one per column.
  1420. `floatfmt` is a format specification used for columns which
  1421. contain numeric data with a decimal point. This can also be
  1422. a list or tuple of format strings, one per column.
  1423. `None` values are replaced with a `missingval` string (like
  1424. `floatfmt`, this can also be a list of values for different
  1425. columns):
  1426. >>> print(tabulate([["spam", 1, None],
  1427. ... ["eggs", 42, 3.14],
  1428. ... ["other", None, 2.7]], missingval="?"))
  1429. ----- -- ----
  1430. spam 1 ?
  1431. eggs 42 3.14
  1432. other ? 2.7
  1433. ----- -- ----
  1434. Various plain-text table formats (`tablefmt`) are supported:
  1435. 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki',
  1436. 'latex', 'latex_raw', 'latex_booktabs', 'latex_longtable' and tsv.
  1437. Variable `tabulate_formats`contains the list of currently supported formats.
  1438. "plain" format doesn't use any pseudographics to draw tables,
  1439. it separates columns with a double space:
  1440. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1441. ... ["strings", "numbers"], "plain"))
  1442. strings numbers
  1443. spam 41.9999
  1444. eggs 451
  1445. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain"))
  1446. spam 41.9999
  1447. eggs 451
  1448. "simple" format is like Pandoc simple_tables:
  1449. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1450. ... ["strings", "numbers"], "simple"))
  1451. strings numbers
  1452. --------- ---------
  1453. spam 41.9999
  1454. eggs 451
  1455. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple"))
  1456. ---- --------
  1457. spam 41.9999
  1458. eggs 451
  1459. ---- --------
  1460. "grid" is similar to tables produced by Emacs table.el package or
  1461. Pandoc grid_tables:
  1462. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1463. ... ["strings", "numbers"], "grid"))
  1464. +-----------+-----------+
  1465. | strings | numbers |
  1466. +===========+===========+
  1467. | spam | 41.9999 |
  1468. +-----------+-----------+
  1469. | eggs | 451 |
  1470. +-----------+-----------+
  1471. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid"))
  1472. +------+----------+
  1473. | spam | 41.9999 |
  1474. +------+----------+
  1475. | eggs | 451 |
  1476. +------+----------+
  1477. "simple_grid" draws a grid using single-line box-drawing
  1478. characters:
  1479. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1480. ... ["strings", "numbers"], "simple_grid"))
  1481. ┌───────────┬───────────┐
  1482. │ strings │ numbers │
  1483. ├───────────┼───────────┤
  1484. │ spam │ 41.9999 │
  1485. ├───────────┼───────────┤
  1486. │ eggs │ 451 │
  1487. └───────────┴───────────┘
  1488. "rounded_grid" draws a grid using single-line box-drawing
  1489. characters with rounded corners:
  1490. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1491. ... ["strings", "numbers"], "rounded_grid"))
  1492. ╭───────────┬───────────╮
  1493. │ strings │ numbers │
  1494. ├───────────┼───────────┤
  1495. │ spam │ 41.9999 │
  1496. ├───────────┼───────────┤
  1497. │ eggs │ 451 │
  1498. ╰───────────┴───────────╯
  1499. "heavy_grid" draws a grid using bold (thick) single-line box-drawing
  1500. characters:
  1501. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1502. ... ["strings", "numbers"], "heavy_grid"))
  1503. ┏━━━━━━━━━━━┳━━━━━━━━━━━┓
  1504. ┃ strings ┃ numbers ┃
  1505. ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
  1506. ┃ spam ┃ 41.9999 ┃
  1507. ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
  1508. ┃ eggs ┃ 451 ┃
  1509. ┗━━━━━━━━━━━┻━━━━━━━━━━━┛
  1510. "mixed_grid" draws a grid using a mix of light (thin) and heavy (thick) lines
  1511. box-drawing characters:
  1512. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1513. ... ["strings", "numbers"], "mixed_grid"))
  1514. ┍━━━━━━━━━━━┯━━━━━━━━━━━┑
  1515. │ strings │ numbers │
  1516. ┝━━━━━━━━━━━┿━━━━━━━━━━━┥
  1517. │ spam │ 41.9999 │
  1518. ├───────────┼───────────┤
  1519. │ eggs │ 451 │
  1520. ┕━━━━━━━━━━━┷━━━━━━━━━━━┙
  1521. "double_grid" draws a grid using double-line box-drawing
  1522. characters:
  1523. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1524. ... ["strings", "numbers"], "double_grid"))
  1525. ╔═══════════╦═══════════╗
  1526. ║ strings ║ numbers ║
  1527. ╠═══════════╬═══════════╣
  1528. ║ spam ║ 41.9999 ║
  1529. ╠═══════════╬═══════════╣
  1530. ║ eggs ║ 451 ║
  1531. ╚═══════════╩═══════════╝
  1532. "fancy_grid" draws a grid using a mix of single and
  1533. double-line box-drawing characters:
  1534. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1535. ... ["strings", "numbers"], "fancy_grid"))
  1536. ╒═══════════╤═══════════╕
  1537. │ strings │ numbers │
  1538. ╞═══════════╪═══════════╡
  1539. │ spam │ 41.9999 │
  1540. ├───────────┼───────────┤
  1541. │ eggs │ 451 │
  1542. ╘═══════════╧═══════════╛
  1543. "outline" is the same as the "grid" format but doesn't draw lines between rows:
  1544. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1545. ... ["strings", "numbers"], "outline"))
  1546. +-----------+-----------+
  1547. | strings | numbers |
  1548. +===========+===========+
  1549. | spam | 41.9999 |
  1550. | eggs | 451 |
  1551. +-----------+-----------+
  1552. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="outline"))
  1553. +------+----------+
  1554. | spam | 41.9999 |
  1555. | eggs | 451 |
  1556. +------+----------+
  1557. "simple_outline" is the same as the "simple_grid" format but doesn't draw lines between rows:
  1558. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1559. ... ["strings", "numbers"], "simple_outline"))
  1560. ┌───────────┬───────────┐
  1561. │ strings │ numbers │
  1562. ├───────────┼───────────┤
  1563. │ spam │ 41.9999 │
  1564. │ eggs │ 451 │
  1565. └───────────┴───────────┘
  1566. "rounded_outline" is the same as the "rounded_grid" format but doesn't draw lines between rows:
  1567. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1568. ... ["strings", "numbers"], "rounded_outline"))
  1569. ╭───────────┬───────────╮
  1570. │ strings │ numbers │
  1571. ├───────────┼───────────┤
  1572. │ spam │ 41.9999 │
  1573. │ eggs │ 451 │
  1574. ╰───────────┴───────────╯
  1575. "heavy_outline" is the same as the "heavy_grid" format but doesn't draw lines between rows:
  1576. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1577. ... ["strings", "numbers"], "heavy_outline"))
  1578. ┏━━━━━━━━━━━┳━━━━━━━━━━━┓
  1579. ┃ strings ┃ numbers ┃
  1580. ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
  1581. ┃ spam ┃ 41.9999 ┃
  1582. ┃ eggs ┃ 451 ┃
  1583. ┗━━━━━━━━━━━┻━━━━━━━━━━━┛
  1584. "mixed_outline" is the same as the "mixed_grid" format but doesn't draw lines between rows:
  1585. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1586. ... ["strings", "numbers"], "mixed_outline"))
  1587. ┍━━━━━━━━━━━┯━━━━━━━━━━━┑
  1588. │ strings │ numbers │
  1589. ┝━━━━━━━━━━━┿━━━━━━━━━━━┥
  1590. │ spam │ 41.9999 │
  1591. │ eggs │ 451 │
  1592. ┕━━━━━━━━━━━┷━━━━━━━━━━━┙
  1593. "double_outline" is the same as the "double_grid" format but doesn't draw lines between rows:
  1594. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1595. ... ["strings", "numbers"], "double_outline"))
  1596. ╔═══════════╦═══════════╗
  1597. ║ strings ║ numbers ║
  1598. ╠═══════════╬═══════════╣
  1599. ║ spam ║ 41.9999 ║
  1600. ║ eggs ║ 451 ║
  1601. ╚═══════════╩═══════════╝
  1602. "fancy_outline" is the same as the "fancy_grid" format but doesn't draw lines between rows:
  1603. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1604. ... ["strings", "numbers"], "fancy_outline"))
  1605. ╒═══════════╤═══════════╕
  1606. │ strings │ numbers │
  1607. ╞═══════════╪═══════════╡
  1608. │ spam │ 41.9999 │
  1609. │ eggs │ 451 │
  1610. ╘═══════════╧═══════════╛
  1611. "pipe" is like tables in PHP Markdown Extra extension or Pandoc
  1612. pipe_tables:
  1613. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1614. ... ["strings", "numbers"], "pipe"))
  1615. | strings | numbers |
  1616. |:----------|----------:|
  1617. | spam | 41.9999 |
  1618. | eggs | 451 |
  1619. "presto" is like tables produce by the Presto CLI:
  1620. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1621. ... ["strings", "numbers"], "presto"))
  1622. strings | numbers
  1623. -----------+-----------
  1624. spam | 41.9999
  1625. eggs | 451
  1626. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe"))
  1627. |:-----|---------:|
  1628. | spam | 41.9999 |
  1629. | eggs | 451 |
  1630. "orgtbl" is like tables in Emacs org-mode and orgtbl-mode. They
  1631. are slightly different from "pipe" format by not using colons to
  1632. define column alignment, and using a "+" sign to indicate line
  1633. intersections:
  1634. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1635. ... ["strings", "numbers"], "orgtbl"))
  1636. | strings | numbers |
  1637. |-----------+-----------|
  1638. | spam | 41.9999 |
  1639. | eggs | 451 |
  1640. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl"))
  1641. | spam | 41.9999 |
  1642. | eggs | 451 |
  1643. "rst" is like a simple table format from reStructuredText; please
  1644. note that reStructuredText accepts also "grid" tables:
  1645. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
  1646. ... ["strings", "numbers"], "rst"))
  1647. ========= =========
  1648. strings numbers
  1649. ========= =========
  1650. spam 41.9999
  1651. eggs 451
  1652. ========= =========
  1653. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst"))
  1654. ==== ========
  1655. spam 41.9999
  1656. eggs 451
  1657. ==== ========
  1658. "mediawiki" produces a table markup used in Wikipedia and on other
  1659. MediaWiki-based sites:
  1660. >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
  1661. ... headers="firstrow", tablefmt="mediawiki"))
  1662. {| class="wikitable" style="text-align: left;"
  1663. |+ <!-- caption -->
  1664. |-
  1665. ! strings !! align="right"| numbers
  1666. |-
  1667. | spam || align="right"| 41.9999
  1668. |-
  1669. | eggs || align="right"| 451
  1670. |}
  1671. "html" produces HTML markup as an html.escape'd str
  1672. with a ._repr_html_ method so that Jupyter Lab and Notebook display the HTML
  1673. and a .str property so that the raw HTML remains accessible
  1674. the unsafehtml table format can be used if an unescaped HTML format is required:
  1675. >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
  1676. ... headers="firstrow", tablefmt="html"))
  1677. <table>
  1678. <thead>
  1679. <tr><th>strings </th><th style="text-align: right;"> numbers</th></tr>
  1680. </thead>
  1681. <tbody>
  1682. <tr><td>spam </td><td style="text-align: right;"> 41.9999</td></tr>
  1683. <tr><td>eggs </td><td style="text-align: right;"> 451 </td></tr>
  1684. </tbody>
  1685. </table>
  1686. "latex" produces a tabular environment of LaTeX document markup:
  1687. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex"))
  1688. \\begin{tabular}{lr}
  1689. \\hline
  1690. spam & 41.9999 \\\\
  1691. eggs & 451 \\\\
  1692. \\hline
  1693. \\end{tabular}
  1694. "latex_raw" is similar to "latex", but doesn't escape special characters,
  1695. such as backslash and underscore, so LaTeX commands may embedded into
  1696. cells' values:
  1697. >>> print(tabulate([["spam$_9$", 41.9999], ["\\\\emph{eggs}", "451.0"]], tablefmt="latex_raw"))
  1698. \\begin{tabular}{lr}
  1699. \\hline
  1700. spam$_9$ & 41.9999 \\\\
  1701. \\emph{eggs} & 451 \\\\
  1702. \\hline
  1703. \\end{tabular}
  1704. "latex_booktabs" produces a tabular environment of LaTeX document markup
  1705. using the booktabs.sty package:
  1706. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs"))
  1707. \\begin{tabular}{lr}
  1708. \\toprule
  1709. spam & 41.9999 \\\\
  1710. eggs & 451 \\\\
  1711. \\bottomrule
  1712. \\end{tabular}
  1713. "latex_longtable" produces a tabular environment that can stretch along
  1714. multiple pages, using the longtable package for LaTeX.
  1715. >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_longtable"))
  1716. \\begin{longtable}{lr}
  1717. \\hline
  1718. spam & 41.9999 \\\\
  1719. eggs & 451 \\\\
  1720. \\hline
  1721. \\end{longtable}
  1722. Number parsing
  1723. --------------
  1724. By default, anything which can be parsed as a number is a number.
  1725. This ensures numbers represented as strings are aligned properly.
  1726. This can lead to weird results for particular strings such as
  1727. specific git SHAs e.g. "42992e1" will be parsed into the number
  1728. 429920 and aligned as such.
  1729. To completely disable number parsing (and alignment), use
  1730. `disable_numparse=True`. For more fine grained control, a list column
  1731. indices is used to disable number parsing only on those columns
  1732. e.g. `disable_numparse=[0, 2]` would disable number parsing only on the
  1733. first and third columns.
  1734. Column Widths and Auto Line Wrapping
  1735. ------------------------------------
  1736. Tabulate will, by default, set the width of each column to the length of the
  1737. longest element in that column. However, in situations where fields are expected
  1738. to reasonably be too long to look good as a single line, tabulate can help automate
  1739. word wrapping long fields for you. Use the parameter `maxcolwidth` to provide a
  1740. list of maximal column widths
  1741. >>> print(tabulate( \
  1742. [('1', 'John Smith', \
  1743. 'This is a rather long description that might look better if it is wrapped a bit')], \
  1744. headers=("Issue Id", "Author", "Description"), \
  1745. maxcolwidths=[None, None, 30], \
  1746. tablefmt="grid" \
  1747. ))
  1748. +------------+------------+-------------------------------+
  1749. | Issue Id | Author | Description |
  1750. +============+============+===============================+
  1751. | 1 | John Smith | This is a rather long |
  1752. | | | description that might look |
  1753. | | | better if it is wrapped a bit |
  1754. +------------+------------+-------------------------------+
  1755. Header column width can be specified in a similar way using `maxheadercolwidth`
  1756. """
  1757. if tabular_data is None:
  1758. tabular_data = []
  1759. list_of_lists, headers = _normalize_tabular_data(
  1760. tabular_data, headers, showindex=showindex
  1761. )
  1762. list_of_lists, separating_lines = _remove_separating_lines(list_of_lists)
  1763. if maxcolwidths is not None:
  1764. num_cols = len(list_of_lists[0])
  1765. if isinstance(maxcolwidths, int): # Expand scalar for all columns
  1766. maxcolwidths = _expand_iterable(maxcolwidths, num_cols, maxcolwidths)
  1767. else: # Ignore col width for any 'trailing' columns
  1768. maxcolwidths = _expand_iterable(maxcolwidths, num_cols, None)
  1769. numparses = _expand_numparse(disable_numparse, num_cols)
  1770. list_of_lists = _wrap_text_to_colwidths(
  1771. list_of_lists, maxcolwidths, numparses=numparses
  1772. )
  1773. if maxheadercolwidths is not None:
  1774. num_cols = len(list_of_lists[0])
  1775. if isinstance(maxheadercolwidths, int): # Expand scalar for all columns
  1776. maxheadercolwidths = _expand_iterable(
  1777. maxheadercolwidths, num_cols, maxheadercolwidths
  1778. )
  1779. else: # Ignore col width for any 'trailing' columns
  1780. maxheadercolwidths = _expand_iterable(maxheadercolwidths, num_cols, None)
  1781. numparses = _expand_numparse(disable_numparse, num_cols)
  1782. headers = _wrap_text_to_colwidths(
  1783. [headers], maxheadercolwidths, numparses=numparses
  1784. )[0]
  1785. # empty values in the first column of RST tables should be escaped (issue #82)
  1786. # "" should be escaped as "\\ " or ".."
  1787. if tablefmt == "rst":
  1788. list_of_lists, headers = _rst_escape_first_column(list_of_lists, headers)
  1789. # PrettyTable formatting does not use any extra padding.
  1790. # Numbers are not parsed and are treated the same as strings for alignment.
  1791. # Check if pretty is the format being used and override the defaults so it
  1792. # does not impact other formats.
  1793. min_padding = MIN_PADDING
  1794. if tablefmt == "pretty":
  1795. min_padding = 0
  1796. disable_numparse = True
  1797. numalign = "center" if numalign == _DEFAULT_ALIGN else numalign
  1798. stralign = "center" if stralign == _DEFAULT_ALIGN else stralign
  1799. else:
  1800. numalign = "decimal" if numalign == _DEFAULT_ALIGN else numalign
  1801. stralign = "left" if stralign == _DEFAULT_ALIGN else stralign
  1802. # optimization: look for ANSI control codes once,
  1803. # enable smart width functions only if a control code is found
  1804. #
  1805. # convert the headers and rows into a single, tab-delimited string ensuring
  1806. # that any bytestrings are decoded safely (i.e. errors ignored)
  1807. plain_text = "\t".join(
  1808. chain(
  1809. # headers
  1810. map(_to_str, headers),
  1811. # rows: chain the rows together into a single iterable after mapping
  1812. # the bytestring conversino to each cell value
  1813. chain.from_iterable(map(_to_str, row) for row in list_of_lists),
  1814. )
  1815. )
  1816. has_invisible = _ansi_codes.search(plain_text) is not None
  1817. enable_widechars = wcwidth is not None and WIDE_CHARS_MODE
  1818. if (
  1819. not isinstance(tablefmt, TableFormat)
  1820. and tablefmt in multiline_formats
  1821. and _is_multiline(plain_text)
  1822. ):
  1823. tablefmt = multiline_formats.get(tablefmt, tablefmt)
  1824. is_multiline = True
  1825. else:
  1826. is_multiline = False
  1827. width_fn = _choose_width_fn(has_invisible, enable_widechars, is_multiline)
  1828. # format rows and columns, convert numeric values to strings
  1829. cols = list(izip_longest(*list_of_lists))
  1830. numparses = _expand_numparse(disable_numparse, len(cols))
  1831. coltypes = [_column_type(col, numparse=np) for col, np in zip(cols, numparses)]
  1832. if isinstance(floatfmt, str): # old version
  1833. float_formats = len(cols) * [
  1834. floatfmt
  1835. ] # just duplicate the string to use in each column
  1836. else: # if floatfmt is list, tuple etc we have one per column
  1837. float_formats = list(floatfmt)
  1838. if len(float_formats) < len(cols):
  1839. float_formats.extend((len(cols) - len(float_formats)) * [_DEFAULT_FLOATFMT])
  1840. if isinstance(intfmt, str): # old version
  1841. int_formats = len(cols) * [
  1842. intfmt
  1843. ] # just duplicate the string to use in each column
  1844. else: # if intfmt is list, tuple etc we have one per column
  1845. int_formats = list(intfmt)
  1846. if len(int_formats) < len(cols):
  1847. int_formats.extend((len(cols) - len(int_formats)) * [_DEFAULT_INTFMT])
  1848. if isinstance(missingval, str):
  1849. missing_vals = len(cols) * [missingval]
  1850. else:
  1851. missing_vals = list(missingval)
  1852. if len(missing_vals) < len(cols):
  1853. missing_vals.extend((len(cols) - len(missing_vals)) * [_DEFAULT_MISSINGVAL])
  1854. cols = [
  1855. [_format(v, ct, fl_fmt, int_fmt, miss_v, has_invisible) for v in c]
  1856. for c, ct, fl_fmt, int_fmt, miss_v in zip(
  1857. cols, coltypes, float_formats, int_formats, missing_vals
  1858. )
  1859. ]
  1860. # align columns
  1861. aligns = [numalign if ct in [int, float] else stralign for ct in coltypes]
  1862. if colalign is not None:
  1863. assert isinstance(colalign, Iterable)
  1864. for idx, align in enumerate(colalign):
  1865. aligns[idx] = align
  1866. minwidths = (
  1867. [width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols)
  1868. )
  1869. cols = [
  1870. _align_column(c, a, minw, has_invisible, enable_widechars, is_multiline)
  1871. for c, a, minw in zip(cols, aligns, minwidths)
  1872. ]
  1873. if headers:
  1874. # align headers and add headers
  1875. t_cols = cols or [[""]] * len(headers)
  1876. t_aligns = aligns or [stralign] * len(headers)
  1877. minwidths = [
  1878. max(minw, max(width_fn(cl) for cl in c))
  1879. for minw, c in zip(minwidths, t_cols)
  1880. ]
  1881. headers = [
  1882. _align_header(h, a, minw, width_fn(h), is_multiline, width_fn)
  1883. for h, a, minw in zip(headers, t_aligns, minwidths)
  1884. ]
  1885. rows = list(zip(*cols))
  1886. else:
  1887. minwidths = [max(width_fn(cl) for cl in c) for c in cols]
  1888. rows = list(zip(*cols))
  1889. if not isinstance(tablefmt, TableFormat):
  1890. tablefmt = _table_formats.get(tablefmt, _table_formats["simple"])
  1891. ra_default = rowalign if isinstance(rowalign, str) else None
  1892. rowaligns = _expand_iterable(rowalign, len(rows), ra_default)
  1893. _reinsert_separating_lines(rows, separating_lines)
  1894. return _format_table(
  1895. tablefmt, headers, rows, minwidths, aligns, is_multiline, rowaligns=rowaligns
  1896. )
  1897. def _expand_numparse(disable_numparse, column_count):
  1898. """
  1899. Return a list of bools of length `column_count` which indicates whether
  1900. number parsing should be used on each column.
  1901. If `disable_numparse` is a list of indices, each of those indices are False,
  1902. and everything else is True.
  1903. If `disable_numparse` is a bool, then the returned list is all the same.
  1904. """
  1905. if isinstance(disable_numparse, Iterable):
  1906. numparses = [True] * column_count
  1907. for index in disable_numparse:
  1908. numparses[index] = False
  1909. return numparses
  1910. else:
  1911. return [not disable_numparse] * column_count
  1912. def _expand_iterable(original, num_desired, default):
  1913. """
  1914. Expands the `original` argument to return a return a list of
  1915. length `num_desired`. If `original` is shorter than `num_desired`, it will
  1916. be padded with the value in `default`.
  1917. If `original` is not a list to begin with (i.e. scalar value) a list of
  1918. length `num_desired` completely populated with `default will be returned
  1919. """
  1920. if isinstance(original, Iterable) and not isinstance(original, str):
  1921. return original + [default] * (num_desired - len(original))
  1922. else:
  1923. return [default] * num_desired
  1924. def _pad_row(cells, padding):
  1925. if cells:
  1926. pad = " " * padding
  1927. padded_cells = [pad + cell + pad for cell in cells]
  1928. return padded_cells
  1929. else:
  1930. return cells
  1931. def _build_simple_row(padded_cells, rowfmt):
  1932. "Format row according to DataRow format without padding."
  1933. begin, sep, end = rowfmt
  1934. return (begin + sep.join(padded_cells) + end).rstrip()
  1935. def _build_row(padded_cells, colwidths, colaligns, rowfmt):
  1936. "Return a string which represents a row of data cells."
  1937. if not rowfmt:
  1938. return None
  1939. if hasattr(rowfmt, "__call__"):
  1940. return rowfmt(padded_cells, colwidths, colaligns)
  1941. else:
  1942. return _build_simple_row(padded_cells, rowfmt)
  1943. def _append_basic_row(lines, padded_cells, colwidths, colaligns, rowfmt, rowalign=None):
  1944. # NOTE: rowalign is ignored and exists for api compatibility with _append_multiline_row
  1945. lines.append(_build_row(padded_cells, colwidths, colaligns, rowfmt))
  1946. return lines
  1947. def _align_cell_veritically(text_lines, num_lines, column_width, row_alignment):
  1948. delta_lines = num_lines - len(text_lines)
  1949. blank = [" " * column_width]
  1950. if row_alignment == "bottom":
  1951. return blank * delta_lines + text_lines
  1952. elif row_alignment == "center":
  1953. top_delta = delta_lines // 2
  1954. bottom_delta = delta_lines - top_delta
  1955. return top_delta * blank + text_lines + bottom_delta * blank
  1956. else:
  1957. return text_lines + blank * delta_lines
  1958. def _append_multiline_row(
  1959. lines, padded_multiline_cells, padded_widths, colaligns, rowfmt, pad, rowalign=None
  1960. ):
  1961. colwidths = [w - 2 * pad for w in padded_widths]
  1962. cells_lines = [c.splitlines() for c in padded_multiline_cells]
  1963. nlines = max(map(len, cells_lines)) # number of lines in the row
  1964. # vertically pad cells where some lines are missing
  1965. # cells_lines = [
  1966. # (cl + [" " * w] * (nlines - len(cl))) for cl, w in zip(cells_lines, colwidths)
  1967. # ]
  1968. cells_lines = [
  1969. _align_cell_veritically(cl, nlines, w, rowalign)
  1970. for cl, w in zip(cells_lines, colwidths)
  1971. ]
  1972. lines_cells = [[cl[i] for cl in cells_lines] for i in range(nlines)]
  1973. for ln in lines_cells:
  1974. padded_ln = _pad_row(ln, pad)
  1975. _append_basic_row(lines, padded_ln, colwidths, colaligns, rowfmt)
  1976. return lines
  1977. def _build_line(colwidths, colaligns, linefmt):
  1978. "Return a string which represents a horizontal line."
  1979. if not linefmt:
  1980. return None
  1981. if hasattr(linefmt, "__call__"):
  1982. return linefmt(colwidths, colaligns)
  1983. else:
  1984. begin, fill, sep, end = linefmt
  1985. cells = [fill * w for w in colwidths]
  1986. return _build_simple_row(cells, (begin, sep, end))
  1987. def _append_line(lines, colwidths, colaligns, linefmt):
  1988. lines.append(_build_line(colwidths, colaligns, linefmt))
  1989. return lines
  1990. class JupyterHTMLStr(str):
  1991. """Wrap the string with a _repr_html_ method so that Jupyter
  1992. displays the HTML table"""
  1993. def _repr_html_(self):
  1994. return self
  1995. @property
  1996. def str(self):
  1997. """add a .str property so that the raw string is still accessible"""
  1998. return self
  1999. def _format_table(fmt, headers, rows, colwidths, colaligns, is_multiline, rowaligns):
  2000. """Produce a plain-text representation of the table."""
  2001. lines = []
  2002. hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else []
  2003. pad = fmt.padding
  2004. headerrow = fmt.headerrow
  2005. padded_widths = [(w + 2 * pad) for w in colwidths]
  2006. if is_multiline:
  2007. pad_row = lambda row, _: row # noqa do it later, in _append_multiline_row
  2008. append_row = partial(_append_multiline_row, pad=pad)
  2009. else:
  2010. pad_row = _pad_row
  2011. append_row = _append_basic_row
  2012. padded_headers = pad_row(headers, pad)
  2013. padded_rows = [pad_row(row, pad) for row in rows]
  2014. if fmt.lineabove and "lineabove" not in hidden:
  2015. _append_line(lines, padded_widths, colaligns, fmt.lineabove)
  2016. if padded_headers:
  2017. append_row(lines, padded_headers, padded_widths, colaligns, headerrow)
  2018. if fmt.linebelowheader and "linebelowheader" not in hidden:
  2019. _append_line(lines, padded_widths, colaligns, fmt.linebelowheader)
  2020. if padded_rows and fmt.linebetweenrows and "linebetweenrows" not in hidden:
  2021. # initial rows with a line below
  2022. for row, ralign in zip(padded_rows[:-1], rowaligns):
  2023. append_row(
  2024. lines, row, padded_widths, colaligns, fmt.datarow, rowalign=ralign
  2025. )
  2026. _append_line(lines, padded_widths, colaligns, fmt.linebetweenrows)
  2027. # the last row without a line below
  2028. append_row(
  2029. lines,
  2030. padded_rows[-1],
  2031. padded_widths,
  2032. colaligns,
  2033. fmt.datarow,
  2034. rowalign=rowaligns[-1],
  2035. )
  2036. else:
  2037. separating_line = (
  2038. fmt.linebetweenrows
  2039. or fmt.linebelowheader
  2040. or fmt.linebelow
  2041. or fmt.lineabove
  2042. or Line("", "", "", "")
  2043. )
  2044. for row in padded_rows:
  2045. # test to see if either the 1st column or the 2nd column (account for showindex) has
  2046. # the SEPARATING_LINE flag
  2047. if _is_separating_line(row):
  2048. _append_line(lines, padded_widths, colaligns, separating_line)
  2049. else:
  2050. append_row(lines, row, padded_widths, colaligns, fmt.datarow)
  2051. if fmt.linebelow and "linebelow" not in hidden:
  2052. _append_line(lines, padded_widths, colaligns, fmt.linebelow)
  2053. if headers or rows:
  2054. output = "\n".join(lines)
  2055. if fmt.lineabove == _html_begin_table_without_header:
  2056. return JupyterHTMLStr(output)
  2057. else:
  2058. return output
  2059. else: # a completely empty table
  2060. return ""
  2061. class _CustomTextWrap(textwrap.TextWrapper):
  2062. """A custom implementation of CPython's textwrap.TextWrapper. This supports
  2063. both wide characters (Korea, Japanese, Chinese) - including mixed string.
  2064. For the most part, the `_handle_long_word` and `_wrap_chunks` functions were
  2065. copy pasted out of the CPython baseline, and updated with our custom length
  2066. and line appending logic.
  2067. """
  2068. def __init__(self, *args, **kwargs):
  2069. self._active_codes = []
  2070. self.max_lines = None # For python2 compatibility
  2071. textwrap.TextWrapper.__init__(self, *args, **kwargs)
  2072. @staticmethod
  2073. def _len(item):
  2074. """Custom len that gets console column width for wide
  2075. and non-wide characters as well as ignores color codes"""
  2076. stripped = _strip_ansi(item)
  2077. if wcwidth:
  2078. return wcwidth.wcswidth(stripped)
  2079. else:
  2080. return len(stripped)
  2081. def _update_lines(self, lines, new_line):
  2082. """Adds a new line to the list of lines the text is being wrapped into
  2083. This function will also track any ANSI color codes in this string as well
  2084. as add any colors from previous lines order to preserve the same formatting
  2085. as a single unwrapped string.
  2086. """
  2087. code_matches = [x for x in _ansi_codes.finditer(new_line)]
  2088. color_codes = [
  2089. code.string[code.span()[0] : code.span()[1]] for code in code_matches
  2090. ]
  2091. # Add color codes from earlier in the unwrapped line, and then track any new ones we add.
  2092. new_line = "".join(self._active_codes) + new_line
  2093. for code in color_codes:
  2094. if code != _ansi_color_reset_code:
  2095. self._active_codes.append(code)
  2096. else: # A single reset code resets everything
  2097. self._active_codes = []
  2098. # Always ensure each line is color terminted if any colors are
  2099. # still active, otherwise colors will bleed into other cells on the console
  2100. if len(self._active_codes) > 0:
  2101. new_line = new_line + _ansi_color_reset_code
  2102. lines.append(new_line)
  2103. def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
  2104. """_handle_long_word(chunks : [string],
  2105. cur_line : [string],
  2106. cur_len : int, width : int)
  2107. Handle a chunk of text (most likely a word, not whitespace) that
  2108. is too long to fit in any line.
  2109. """
  2110. # Figure out when indent is larger than the specified width, and make
  2111. # sure at least one character is stripped off on every pass
  2112. if width < 1:
  2113. space_left = 1
  2114. else:
  2115. space_left = width - cur_len
  2116. # If we're allowed to break long words, then do so: put as much
  2117. # of the next chunk onto the current line as will fit.
  2118. if self.break_long_words:
  2119. # Tabulate Custom: Build the string up piece-by-piece in order to
  2120. # take each charcter's width into account
  2121. chunk = reversed_chunks[-1]
  2122. i = 1
  2123. while self._len(chunk[:i]) <= space_left:
  2124. i = i + 1
  2125. cur_line.append(chunk[: i - 1])
  2126. reversed_chunks[-1] = chunk[i - 1 :]
  2127. # Otherwise, we have to preserve the long word intact. Only add
  2128. # it to the current line if there's nothing already there --
  2129. # that minimizes how much we violate the width constraint.
  2130. elif not cur_line:
  2131. cur_line.append(reversed_chunks.pop())
  2132. # If we're not allowed to break long words, and there's already
  2133. # text on the current line, do nothing. Next time through the
  2134. # main loop of _wrap_chunks(), we'll wind up here again, but
  2135. # cur_len will be zero, so the next line will be entirely
  2136. # devoted to the long word that we can't handle right now.
  2137. def _wrap_chunks(self, chunks):
  2138. """_wrap_chunks(chunks : [string]) -> [string]
  2139. Wrap a sequence of text chunks and return a list of lines of
  2140. length 'self.width' or less. (If 'break_long_words' is false,
  2141. some lines may be longer than this.) Chunks correspond roughly
  2142. to words and the whitespace between them: each chunk is
  2143. indivisible (modulo 'break_long_words'), but a line break can
  2144. come between any two chunks. Chunks should not have internal
  2145. whitespace; ie. a chunk is either all whitespace or a "word".
  2146. Whitespace chunks will be removed from the beginning and end of
  2147. lines, but apart from that whitespace is preserved.
  2148. """
  2149. lines = []
  2150. if self.width <= 0:
  2151. raise ValueError("invalid width %r (must be > 0)" % self.width)
  2152. if self.max_lines is not None:
  2153. if self.max_lines > 1:
  2154. indent = self.subsequent_indent
  2155. else:
  2156. indent = self.initial_indent
  2157. if self._len(indent) + self._len(self.placeholder.lstrip()) > self.width:
  2158. raise ValueError("placeholder too large for max width")
  2159. # Arrange in reverse order so items can be efficiently popped
  2160. # from a stack of chucks.
  2161. chunks.reverse()
  2162. while chunks:
  2163. # Start the list of chunks that will make up the current line.
  2164. # cur_len is just the length of all the chunks in cur_line.
  2165. cur_line = []
  2166. cur_len = 0
  2167. # Figure out which static string will prefix this line.
  2168. if lines:
  2169. indent = self.subsequent_indent
  2170. else:
  2171. indent = self.initial_indent
  2172. # Maximum width for this line.
  2173. width = self.width - self._len(indent)
  2174. # First chunk on line is whitespace -- drop it, unless this
  2175. # is the very beginning of the text (ie. no lines started yet).
  2176. if self.drop_whitespace and chunks[-1].strip() == "" and lines:
  2177. del chunks[-1]
  2178. while chunks:
  2179. chunk_len = self._len(chunks[-1])
  2180. # Can at least squeeze this chunk onto the current line.
  2181. if cur_len + chunk_len <= width:
  2182. cur_line.append(chunks.pop())
  2183. cur_len += chunk_len
  2184. # Nope, this line is full.
  2185. else:
  2186. break
  2187. # The current line is full, and the next chunk is too big to
  2188. # fit on *any* line (not just this one).
  2189. if chunks and self._len(chunks[-1]) > width:
  2190. self._handle_long_word(chunks, cur_line, cur_len, width)
  2191. cur_len = sum(map(self._len, cur_line))
  2192. # If the last chunk on this line is all whitespace, drop it.
  2193. if self.drop_whitespace and cur_line and cur_line[-1].strip() == "":
  2194. cur_len -= self._len(cur_line[-1])
  2195. del cur_line[-1]
  2196. if cur_line:
  2197. if (
  2198. self.max_lines is None
  2199. or len(lines) + 1 < self.max_lines
  2200. or (
  2201. not chunks
  2202. or self.drop_whitespace
  2203. and len(chunks) == 1
  2204. and not chunks[0].strip()
  2205. )
  2206. and cur_len <= width
  2207. ):
  2208. # Convert current line back to a string and store it in
  2209. # list of all lines (return value).
  2210. self._update_lines(lines, indent + "".join(cur_line))
  2211. else:
  2212. while cur_line:
  2213. if (
  2214. cur_line[-1].strip()
  2215. and cur_len + self._len(self.placeholder) <= width
  2216. ):
  2217. cur_line.append(self.placeholder)
  2218. self._update_lines(lines, indent + "".join(cur_line))
  2219. break
  2220. cur_len -= self._len(cur_line[-1])
  2221. del cur_line[-1]
  2222. else:
  2223. if lines:
  2224. prev_line = lines[-1].rstrip()
  2225. if (
  2226. self._len(prev_line) + self._len(self.placeholder)
  2227. <= self.width
  2228. ):
  2229. lines[-1] = prev_line + self.placeholder
  2230. break
  2231. self._update_lines(lines, indent + self.placeholder.lstrip())
  2232. break
  2233. return lines
  2234. def _main():
  2235. """\
  2236. Usage: tabulate [options] [FILE ...]
  2237. Pretty-print tabular data.
  2238. See also https://github.com/astanin/python-tabulate
  2239. FILE a filename of the file with tabular data;
  2240. if "-" or missing, read data from stdin.
  2241. Options:
  2242. -h, --help show this message
  2243. -1, --header use the first row of data as a table header
  2244. -o FILE, --output FILE print table to FILE (default: stdout)
  2245. -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace)
  2246. -F FPFMT, --float FPFMT floating point number format (default: g)
  2247. -I INTFMT, --int INTFMT integer point number format (default: "")
  2248. -f FMT, --format FMT set output table format; supported formats:
  2249. plain, simple, grid, fancy_grid, pipe, orgtbl,
  2250. rst, mediawiki, html, latex, latex_raw,
  2251. latex_booktabs, latex_longtable, tsv
  2252. (default: simple)
  2253. """
  2254. import getopt
  2255. import sys
  2256. import textwrap
  2257. usage = textwrap.dedent(_main.__doc__)
  2258. try:
  2259. opts, args = getopt.getopt(
  2260. sys.argv[1:],
  2261. "h1o:s:F:A:f:",
  2262. ["help", "header", "output", "sep=", "float=", "int=", "align=", "format="],
  2263. )
  2264. except getopt.GetoptError as e:
  2265. print(e)
  2266. print(usage)
  2267. sys.exit(2)
  2268. headers = []
  2269. floatfmt = _DEFAULT_FLOATFMT
  2270. intfmt = _DEFAULT_INTFMT
  2271. colalign = None
  2272. tablefmt = "simple"
  2273. sep = r"\s+"
  2274. outfile = "-"
  2275. for opt, value in opts:
  2276. if opt in ["-1", "--header"]:
  2277. headers = "firstrow"
  2278. elif opt in ["-o", "--output"]:
  2279. outfile = value
  2280. elif opt in ["-F", "--float"]:
  2281. floatfmt = value
  2282. elif opt in ["-I", "--int"]:
  2283. intfmt = value
  2284. elif opt in ["-C", "--colalign"]:
  2285. colalign = value.split()
  2286. elif opt in ["-f", "--format"]:
  2287. if value not in tabulate_formats:
  2288. print("%s is not a supported table format" % value)
  2289. print(usage)
  2290. sys.exit(3)
  2291. tablefmt = value
  2292. elif opt in ["-s", "--sep"]:
  2293. sep = value
  2294. elif opt in ["-h", "--help"]:
  2295. print(usage)
  2296. sys.exit(0)
  2297. files = [sys.stdin] if not args else args
  2298. with (sys.stdout if outfile == "-" else open(outfile, "w")) as out:
  2299. for f in files:
  2300. if f == "-":
  2301. f = sys.stdin
  2302. if _is_file(f):
  2303. _pprint_file(
  2304. f,
  2305. headers=headers,
  2306. tablefmt=tablefmt,
  2307. sep=sep,
  2308. floatfmt=floatfmt,
  2309. intfmt=intfmt,
  2310. file=out,
  2311. colalign=colalign,
  2312. )
  2313. else:
  2314. with open(f) as fobj:
  2315. _pprint_file(
  2316. fobj,
  2317. headers=headers,
  2318. tablefmt=tablefmt,
  2319. sep=sep,
  2320. floatfmt=floatfmt,
  2321. intfmt=intfmt,
  2322. file=out,
  2323. colalign=colalign,
  2324. )
  2325. def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, intfmt, file, colalign):
  2326. rows = fobject.readlines()
  2327. table = [re.split(sep, r.rstrip()) for r in rows if r.strip()]
  2328. print(
  2329. tabulate(
  2330. table,
  2331. headers,
  2332. tablefmt,
  2333. floatfmt=floatfmt,
  2334. intfmt=intfmt,
  2335. colalign=colalign,
  2336. ),
  2337. file=file,
  2338. )
  2339. if __name__ == "__main__":
  2340. _main()