inflection.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. # -*- coding: utf-8 -*-
  2. """
  3. inflection
  4. ~~~~~~~~~~~~
  5. A port of Ruby on Rails' inflector to Python.
  6. :copyright: (c) 2012-2020 by Janne Vanhala
  7. :license: MIT, see LICENSE for more details.
  8. """
  9. import re
  10. import unicodedata
  11. __version__ = '0.5.0'
  12. PLURALS = [
  13. (r"(?i)(quiz)$", r'\1zes'),
  14. (r"(?i)^(oxen)$", r'\1'),
  15. (r"(?i)^(ox)$", r'\1en'),
  16. (r"(?i)(m|l)ice$", r'\1ice'),
  17. (r"(?i)(m|l)ouse$", r'\1ice'),
  18. (r"(?i)(passer)s?by$", r'\1sby'),
  19. (r"(?i)(matr|vert|ind)(?:ix|ex)$", r'\1ices'),
  20. (r"(?i)(x|ch|ss|sh)$", r'\1es'),
  21. (r"(?i)([^aeiouy]|qu)y$", r'\1ies'),
  22. (r"(?i)(hive)$", r'\1s'),
  23. (r"(?i)([lr])f$", r'\1ves'),
  24. (r"(?i)([^f])fe$", r'\1ves'),
  25. (r"(?i)sis$", 'ses'),
  26. (r"(?i)([ti])a$", r'\1a'),
  27. (r"(?i)([ti])um$", r'\1a'),
  28. (r"(?i)(buffal|potat|tomat)o$", r'\1oes'),
  29. (r"(?i)(bu)s$", r'\1ses'),
  30. (r"(?i)(alias|status)$", r'\1es'),
  31. (r"(?i)(octop|vir)i$", r'\1i'),
  32. (r"(?i)(octop|vir)us$", r'\1i'),
  33. (r"(?i)^(ax|test)is$", r'\1es'),
  34. (r"(?i)s$", 's'),
  35. (r"$", 's'),
  36. ]
  37. SINGULARS = [
  38. (r"(?i)(database)s$", r'\1'),
  39. (r"(?i)(quiz)zes$", r'\1'),
  40. (r"(?i)(matr)ices$", r'\1ix'),
  41. (r"(?i)(vert|ind)ices$", r'\1ex'),
  42. (r"(?i)(passer)sby$", r'\1by'),
  43. (r"(?i)^(ox)en", r'\1'),
  44. (r"(?i)(alias|status)(es)?$", r'\1'),
  45. (r"(?i)(octop|vir)(us|i)$", r'\1us'),
  46. (r"(?i)^(a)x[ie]s$", r'\1xis'),
  47. (r"(?i)(cris|test)(is|es)$", r'\1is'),
  48. (r"(?i)(shoe)s$", r'\1'),
  49. (r"(?i)(o)es$", r'\1'),
  50. (r"(?i)(bus)(es)?$", r'\1'),
  51. (r"(?i)(m|l)ice$", r'\1ouse'),
  52. (r"(?i)(x|ch|ss|sh)es$", r'\1'),
  53. (r"(?i)(m)ovies$", r'\1ovie'),
  54. (r"(?i)(s)eries$", r'\1eries'),
  55. (r"(?i)([^aeiouy]|qu)ies$", r'\1y'),
  56. (r"(?i)([lr])ves$", r'\1f'),
  57. (r"(?i)(tive)s$", r'\1'),
  58. (r"(?i)(hive)s$", r'\1'),
  59. (r"(?i)([^f])ves$", r'\1fe'),
  60. (r"(?i)(t)he(sis|ses)$", r"\1hesis"),
  61. (r"(?i)(s)ynop(sis|ses)$", r"\1ynopsis"),
  62. (r"(?i)(p)rogno(sis|ses)$", r"\1rognosis"),
  63. (r"(?i)(p)arenthe(sis|ses)$", r"\1arenthesis"),
  64. (r"(?i)(d)iagno(sis|ses)$", r"\1iagnosis"),
  65. (r"(?i)(b)a(sis|ses)$", r"\1asis"),
  66. (r"(?i)(a)naly(sis|ses)$", r"\1nalysis"),
  67. (r"(?i)([ti])a$", r'\1um'),
  68. (r"(?i)(n)ews$", r'\1ews'),
  69. (r"(?i)(ss)$", r'\1'),
  70. (r"(?i)s$", ''),
  71. ]
  72. UNCOUNTABLES = {
  73. 'equipment',
  74. 'fish',
  75. 'information',
  76. 'jeans',
  77. 'money',
  78. 'rice',
  79. 'series',
  80. 'sheep',
  81. 'species'}
  82. def _irregular(singular: str, plural: str) -> None:
  83. """
  84. A convenience function to add appropriate rules to plurals and singular
  85. for irregular words.
  86. :param singular: irregular word in singular form
  87. :param plural: irregular word in plural form
  88. """
  89. def caseinsensitive(string: str) -> str:
  90. return ''.join('[' + char + char.upper() + ']' for char in string)
  91. if singular[0].upper() == plural[0].upper():
  92. PLURALS.insert(0, (
  93. r"(?i)({}){}$".format(singular[0], singular[1:]),
  94. r'\1' + plural[1:]
  95. ))
  96. PLURALS.insert(0, (
  97. r"(?i)({}){}$".format(plural[0], plural[1:]),
  98. r'\1' + plural[1:]
  99. ))
  100. SINGULARS.insert(0, (
  101. r"(?i)({}){}$".format(plural[0], plural[1:]),
  102. r'\1' + singular[1:]
  103. ))
  104. else:
  105. PLURALS.insert(0, (
  106. r"{}{}$".format(singular[0].upper(),
  107. caseinsensitive(singular[1:])),
  108. plural[0].upper() + plural[1:]
  109. ))
  110. PLURALS.insert(0, (
  111. r"{}{}$".format(singular[0].lower(),
  112. caseinsensitive(singular[1:])),
  113. plural[0].lower() + plural[1:]
  114. ))
  115. PLURALS.insert(0, (
  116. r"{}{}$".format(plural[0].upper(), caseinsensitive(plural[1:])),
  117. plural[0].upper() + plural[1:]
  118. ))
  119. PLURALS.insert(0, (
  120. r"{}{}$".format(plural[0].lower(), caseinsensitive(plural[1:])),
  121. plural[0].lower() + plural[1:]
  122. ))
  123. SINGULARS.insert(0, (
  124. r"{}{}$".format(plural[0].upper(), caseinsensitive(plural[1:])),
  125. singular[0].upper() + singular[1:]
  126. ))
  127. SINGULARS.insert(0, (
  128. r"{}{}$".format(plural[0].lower(), caseinsensitive(plural[1:])),
  129. singular[0].lower() + singular[1:]
  130. ))
  131. def camelize(string: str, uppercase_first_letter: bool = True) -> str:
  132. """
  133. Convert strings to CamelCase.
  134. Examples::
  135. >>> camelize("device_type")
  136. 'DeviceType'
  137. >>> camelize("device_type", False)
  138. 'deviceType'
  139. :func:`camelize` can be thought of as a inverse of :func:`underscore`,
  140. although there are some cases where that does not hold::
  141. >>> camelize(underscore("IOError"))
  142. 'IoError'
  143. :param uppercase_first_letter: if set to `True` :func:`camelize` converts
  144. strings to UpperCamelCase. If set to `False` :func:`camelize` produces
  145. lowerCamelCase. Defaults to `True`.
  146. """
  147. if uppercase_first_letter:
  148. return re.sub(r"(?:^|_)(.)", lambda m: m.group(1).upper(), string)
  149. else:
  150. return string[0].lower() + camelize(string)[1:]
  151. def dasherize(word: str) -> str:
  152. """Replace underscores with dashes in the string.
  153. Example::
  154. >>> dasherize("puni_puni")
  155. 'puni-puni'
  156. """
  157. return word.replace('_', '-')
  158. def humanize(word: str) -> str:
  159. """
  160. Capitalize the first word and turn underscores into spaces and strip a
  161. trailing ``"_id"``, if any. Like :func:`titleize`, this is meant for
  162. creating pretty output.
  163. Examples::
  164. >>> humanize("employee_salary")
  165. 'Employee salary'
  166. >>> humanize("author_id")
  167. 'Author'
  168. """
  169. word = re.sub(r"_id$", "", word)
  170. word = word.replace('_', ' ')
  171. word = re.sub(r"(?i)([a-z\d]*)", lambda m: m.group(1).lower(), word)
  172. word = re.sub(r"^\w", lambda m: m.group(0).upper(), word)
  173. return word
  174. def ordinal(number: int) -> str:
  175. """
  176. Return the suffix that should be added to a number to denote the position
  177. in an ordered sequence such as 1st, 2nd, 3rd, 4th.
  178. Examples::
  179. >>> ordinal(1)
  180. 'st'
  181. >>> ordinal(2)
  182. 'nd'
  183. >>> ordinal(1002)
  184. 'nd'
  185. >>> ordinal(1003)
  186. 'rd'
  187. >>> ordinal(-11)
  188. 'th'
  189. >>> ordinal(-1021)
  190. 'st'
  191. """
  192. number = abs(int(number))
  193. if number % 100 in (11, 12, 13):
  194. return "th"
  195. else:
  196. return {
  197. 1: "st",
  198. 2: "nd",
  199. 3: "rd",
  200. }.get(number % 10, "th")
  201. def ordinalize(number: int) -> str:
  202. """
  203. Turn a number into an ordinal string used to denote the position in an
  204. ordered sequence such as 1st, 2nd, 3rd, 4th.
  205. Examples::
  206. >>> ordinalize(1)
  207. '1st'
  208. >>> ordinalize(2)
  209. '2nd'
  210. >>> ordinalize(1002)
  211. '1002nd'
  212. >>> ordinalize(1003)
  213. '1003rd'
  214. >>> ordinalize(-11)
  215. '-11th'
  216. >>> ordinalize(-1021)
  217. '-1021st'
  218. """
  219. return "{}{}".format(number, ordinal(number))
  220. def parameterize(string: str, separator: str = '-') -> str:
  221. """
  222. Replace special characters in a string so that it may be used as part of a
  223. 'pretty' URL.
  224. Example::
  225. >>> parameterize(u"Donald E. Knuth")
  226. 'donald-e-knuth'
  227. """
  228. string = transliterate(string)
  229. # Turn unwanted chars into the separator
  230. string = re.sub(r"(?i)[^a-z0-9\-_]+", separator, string)
  231. if separator:
  232. re_sep = re.escape(separator)
  233. # No more than one of the separator in a row.
  234. string = re.sub(r'%s{2,}' % re_sep, separator, string)
  235. # Remove leading/trailing separator.
  236. string = re.sub(r"(?i)^{sep}|{sep}$".format(sep=re_sep), '', string)
  237. return string.lower()
  238. def pluralize(word: str) -> str:
  239. """
  240. Return the plural form of a word.
  241. Examples::
  242. >>> pluralize("posts")
  243. 'posts'
  244. >>> pluralize("octopus")
  245. 'octopi'
  246. >>> pluralize("sheep")
  247. 'sheep'
  248. >>> pluralize("CamelOctopus")
  249. 'CamelOctopi'
  250. """
  251. if not word or word.lower() in UNCOUNTABLES:
  252. return word
  253. else:
  254. for rule, replacement in PLURALS:
  255. if re.search(rule, word):
  256. return re.sub(rule, replacement, word)
  257. return word
  258. def singularize(word: str) -> str:
  259. """
  260. Return the singular form of a word, the reverse of :func:`pluralize`.
  261. Examples::
  262. >>> singularize("posts")
  263. 'post'
  264. >>> singularize("octopi")
  265. 'octopus'
  266. >>> singularize("sheep")
  267. 'sheep'
  268. >>> singularize("word")
  269. 'word'
  270. >>> singularize("CamelOctopi")
  271. 'CamelOctopus'
  272. """
  273. for inflection in UNCOUNTABLES:
  274. if re.search(r'(?i)\b(%s)\Z' % inflection, word):
  275. return word
  276. for rule, replacement in SINGULARS:
  277. if re.search(rule, word):
  278. return re.sub(rule, replacement, word)
  279. return word
  280. def tableize(word: str) -> str:
  281. """
  282. Create the name of a table like Rails does for models to table names. This
  283. method uses the :func:`pluralize` method on the last word in the string.
  284. Examples::
  285. >>> tableize('RawScaledScorer')
  286. 'raw_scaled_scorers'
  287. >>> tableize('egg_and_ham')
  288. 'egg_and_hams'
  289. >>> tableize('fancyCategory')
  290. 'fancy_categories'
  291. """
  292. return pluralize(underscore(word))
  293. def titleize(word: str) -> str:
  294. """
  295. Capitalize all the words and replace some characters in the string to
  296. create a nicer looking title. :func:`titleize` is meant for creating pretty
  297. output.
  298. Examples::
  299. >>> titleize("man from the boondocks")
  300. 'Man From The Boondocks'
  301. >>> titleize("x-men: the last stand")
  302. 'X Men: The Last Stand'
  303. >>> titleize("TheManWithoutAPast")
  304. 'The Man Without A Past'
  305. >>> titleize("raiders_of_the_lost_ark")
  306. 'Raiders Of The Lost Ark'
  307. """
  308. return re.sub(
  309. r"\b('?\w)",
  310. lambda match: match.group(1).capitalize(),
  311. humanize(underscore(word)).title()
  312. )
  313. def transliterate(string: str) -> str:
  314. """
  315. Replace non-ASCII characters with an ASCII approximation. If no
  316. approximation exists, the non-ASCII character is ignored. The string must
  317. be ``unicode``.
  318. Examples::
  319. >>> transliterate('älämölö')
  320. 'alamolo'
  321. >>> transliterate('Ærøskøbing')
  322. 'rskbing'
  323. """
  324. normalized = unicodedata.normalize('NFKD', string)
  325. return normalized.encode('ascii', 'ignore').decode('ascii')
  326. def underscore(word: str) -> str:
  327. """
  328. Make an underscored, lowercase form from the expression in the string.
  329. Example::
  330. >>> underscore("DeviceType")
  331. 'device_type'
  332. As a rule of thumb you can think of :func:`underscore` as the inverse of
  333. :func:`camelize`, though there are cases where that does not hold::
  334. >>> camelize(underscore("IOError"))
  335. 'IoError'
  336. """
  337. word = re.sub(r"([A-Z]+)([A-Z][a-z])", r'\1_\2', word)
  338. word = re.sub(r"([a-z\d])([A-Z])", r'\1_\2', word)
  339. word = word.replace("-", "_")
  340. return word.lower()
  341. _irregular('person', 'people')
  342. _irregular('man', 'men')
  343. _irregular('human', 'humans')
  344. _irregular('child', 'children')
  345. _irregular('sex', 'sexes')
  346. _irregular('move', 'moves')
  347. _irregular('cow', 'kine')
  348. _irregular('zombie', 'zombies')