_shlex.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. # This copy of shlex.py from Python 3.6 is distributed with argcomplete.
  2. # It contains only the shlex class, with modifications as noted.
  3. """A lexical analyzer class for simple shell-like syntaxes."""
  4. # Module and documentation by Eric S. Raymond, 21 Dec 1998
  5. # Input stacking and error message cleanup added by ESR, March 2000
  6. # push_source() and pop_source() made explicit by ESR, January 2001.
  7. # Posix compliance, split(), string arguments, and
  8. # iterator interface by Gustavo Niemeyer, April 2003.
  9. # changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
  10. import os
  11. import sys
  12. from collections import deque
  13. from io import StringIO
  14. from typing import Optional
  15. class shlex:
  16. "A lexical analyzer class for simple shell-like syntaxes."
  17. def __init__(self, instream=None, infile=None, posix=False, punctuation_chars=False):
  18. # Modified by argcomplete: 2/3 compatibility
  19. if isinstance(instream, str):
  20. instream = StringIO(instream)
  21. if instream is not None:
  22. self.instream = instream
  23. self.infile = infile
  24. else:
  25. self.instream = sys.stdin
  26. self.infile = None
  27. self.posix = posix
  28. if posix:
  29. self.eof = None
  30. else:
  31. self.eof = ''
  32. self.commenters = '#'
  33. self.wordchars = 'abcdfeghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
  34. # Modified by argcomplete: 2/3 compatibility
  35. # if self.posix:
  36. # self.wordchars += ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
  37. # 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
  38. self.whitespace = ' \t\r\n'
  39. self.whitespace_split = False
  40. self.quotes = '\'"'
  41. self.escape = '\\'
  42. self.escapedquotes = '"'
  43. self.state: Optional[str] = ' '
  44. self.pushback: deque = deque()
  45. self.lineno = 1
  46. self.debug = 0
  47. self.token = ''
  48. self.filestack: deque = deque()
  49. self.source = None
  50. if not punctuation_chars:
  51. punctuation_chars = ''
  52. elif punctuation_chars is True:
  53. punctuation_chars = '();<>|&'
  54. self.punctuation_chars = punctuation_chars
  55. if punctuation_chars:
  56. # _pushback_chars is a push back queue used by lookahead logic
  57. self._pushback_chars: deque = deque()
  58. # these chars added because allowed in file names, args, wildcards
  59. self.wordchars += '~-./*?='
  60. # remove any punctuation chars from wordchars
  61. t = self.wordchars.maketrans(dict.fromkeys(punctuation_chars))
  62. self.wordchars = self.wordchars.translate(t)
  63. # Modified by argcomplete: Record last wordbreak position
  64. self.last_wordbreak_pos = None
  65. self.wordbreaks = ''
  66. def push_token(self, tok):
  67. "Push a token onto the stack popped by the get_token method"
  68. if self.debug >= 1:
  69. print("shlex: pushing token " + repr(tok))
  70. self.pushback.appendleft(tok)
  71. def push_source(self, newstream, newfile=None):
  72. "Push an input source onto the lexer's input source stack."
  73. # Modified by argcomplete: 2/3 compatibility
  74. if isinstance(newstream, str):
  75. newstream = StringIO(newstream)
  76. self.filestack.appendleft((self.infile, self.instream, self.lineno))
  77. self.infile = newfile
  78. self.instream = newstream
  79. self.lineno = 1
  80. if self.debug:
  81. if newfile is not None:
  82. print('shlex: pushing to file %s' % (self.infile,))
  83. else:
  84. print('shlex: pushing to stream %s' % (self.instream,))
  85. def pop_source(self):
  86. "Pop the input source stack."
  87. self.instream.close()
  88. (self.infile, self.instream, self.lineno) = self.filestack.popleft()
  89. if self.debug:
  90. print('shlex: popping to %s, line %d' % (self.instream, self.lineno))
  91. self.state = ' '
  92. def get_token(self):
  93. "Get a token from the input stream (or from stack if it's nonempty)"
  94. if self.pushback:
  95. tok = self.pushback.popleft()
  96. if self.debug >= 1:
  97. print("shlex: popping token " + repr(tok))
  98. return tok
  99. # No pushback. Get a token.
  100. raw = self.read_token()
  101. # Handle inclusions
  102. if self.source is not None:
  103. while raw == self.source:
  104. spec = self.sourcehook(self.read_token())
  105. if spec:
  106. (newfile, newstream) = spec
  107. self.push_source(newstream, newfile)
  108. raw = self.get_token()
  109. # Maybe we got EOF instead?
  110. while raw == self.eof:
  111. if not self.filestack:
  112. return self.eof
  113. else:
  114. self.pop_source()
  115. raw = self.get_token()
  116. # Neither inclusion nor EOF
  117. if self.debug >= 1:
  118. if raw != self.eof:
  119. print("shlex: token=" + repr(raw))
  120. else:
  121. print("shlex: token=EOF")
  122. return raw
  123. def read_token(self):
  124. quoted = False
  125. escapedstate = ' '
  126. while True:
  127. if self.punctuation_chars and self._pushback_chars:
  128. nextchar = self._pushback_chars.pop()
  129. else:
  130. nextchar = self.instream.read(1)
  131. if nextchar == '\n':
  132. self.lineno += 1
  133. if self.debug >= 3:
  134. print("shlex: in state %r I see character: %r" % (self.state, nextchar))
  135. if self.state is None:
  136. self.token = '' # past end of file
  137. break
  138. elif self.state == ' ':
  139. if not nextchar:
  140. self.state = None # end of file
  141. break
  142. elif nextchar in self.whitespace:
  143. if self.debug >= 2:
  144. print("shlex: I see whitespace in whitespace state")
  145. if self.token or (self.posix and quoted):
  146. break # emit current token
  147. else:
  148. continue
  149. elif nextchar in self.commenters:
  150. self.instream.readline()
  151. self.lineno += 1
  152. elif self.posix and nextchar in self.escape:
  153. escapedstate = 'a'
  154. self.state = nextchar
  155. elif nextchar in self.wordchars:
  156. self.token = nextchar
  157. self.state = 'a'
  158. elif nextchar in self.punctuation_chars:
  159. self.token = nextchar
  160. self.state = 'c'
  161. elif nextchar in self.quotes:
  162. if not self.posix:
  163. self.token = nextchar
  164. self.state = nextchar
  165. elif self.whitespace_split:
  166. self.token = nextchar
  167. self.state = 'a'
  168. # Modified by argcomplete: Record last wordbreak position
  169. if nextchar in self.wordbreaks:
  170. self.last_wordbreak_pos = len(self.token) - 1
  171. else:
  172. self.token = nextchar
  173. if self.token or (self.posix and quoted):
  174. break # emit current token
  175. else:
  176. continue
  177. elif self.state in self.quotes:
  178. quoted = True
  179. if not nextchar: # end of file
  180. if self.debug >= 2:
  181. print("shlex: I see EOF in quotes state")
  182. # XXX what error should be raised here?
  183. raise ValueError("No closing quotation")
  184. if nextchar == self.state:
  185. if not self.posix:
  186. self.token += nextchar
  187. self.state = ' '
  188. break
  189. else:
  190. self.state = 'a'
  191. elif self.posix and nextchar in self.escape and self.state in self.escapedquotes:
  192. escapedstate = self.state
  193. self.state = nextchar
  194. else:
  195. self.token += nextchar
  196. elif self.state in self.escape:
  197. if not nextchar: # end of file
  198. if self.debug >= 2:
  199. print("shlex: I see EOF in escape state")
  200. # XXX what error should be raised here?
  201. raise ValueError("No escaped character")
  202. # In posix shells, only the quote itself or the escape
  203. # character may be escaped within quotes.
  204. if escapedstate in self.quotes and nextchar != self.state and nextchar != escapedstate:
  205. self.token += self.state
  206. self.token += nextchar
  207. self.state = escapedstate
  208. elif self.state in ('a', 'c'):
  209. if not nextchar:
  210. self.state = None # end of file
  211. break
  212. elif nextchar in self.whitespace:
  213. if self.debug >= 2:
  214. print("shlex: I see whitespace in word state")
  215. self.state = ' '
  216. if self.token or (self.posix and quoted):
  217. break # emit current token
  218. else:
  219. continue
  220. elif nextchar in self.commenters:
  221. self.instream.readline()
  222. self.lineno += 1
  223. if self.posix:
  224. self.state = ' '
  225. if self.token or (self.posix and quoted):
  226. break # emit current token
  227. else:
  228. continue
  229. elif self.posix and nextchar in self.quotes:
  230. self.state = nextchar
  231. elif self.posix and nextchar in self.escape:
  232. escapedstate = 'a'
  233. self.state = nextchar
  234. elif self.state == 'c':
  235. if nextchar in self.punctuation_chars:
  236. self.token += nextchar
  237. else:
  238. if nextchar not in self.whitespace:
  239. self._pushback_chars.append(nextchar)
  240. self.state = ' '
  241. break
  242. elif nextchar in self.wordchars or nextchar in self.quotes or self.whitespace_split:
  243. self.token += nextchar
  244. # Modified by argcomplete: Record last wordbreak position
  245. if nextchar in self.wordbreaks:
  246. self.last_wordbreak_pos = len(self.token) - 1
  247. else:
  248. if self.punctuation_chars:
  249. self._pushback_chars.append(nextchar)
  250. else:
  251. self.pushback.appendleft(nextchar)
  252. if self.debug >= 2:
  253. print("shlex: I see punctuation in word state")
  254. self.state = ' '
  255. if self.token or (self.posix and quoted):
  256. break # emit current token
  257. else:
  258. continue
  259. result: Optional[str] = self.token
  260. self.token = ''
  261. if self.posix and not quoted and result == '':
  262. result = None
  263. if self.debug > 1:
  264. if result:
  265. print("shlex: raw token=" + repr(result))
  266. else:
  267. print("shlex: raw token=EOF")
  268. # Modified by argcomplete: Record last wordbreak position
  269. if self.state == ' ':
  270. self.last_wordbreak_pos = None
  271. return result
  272. def sourcehook(self, newfile):
  273. "Hook called on a filename to be sourced."
  274. if newfile[0] == '"':
  275. newfile = newfile[1:-1]
  276. # This implements cpp-like semantics for relative-path inclusion.
  277. # Modified by argcomplete: 2/3 compatibility
  278. if isinstance(self.infile, str) and not os.path.isabs(newfile):
  279. newfile = os.path.join(os.path.dirname(self.infile), newfile)
  280. return (newfile, open(newfile, "r"))
  281. def error_leader(self, infile=None, lineno=None):
  282. "Emit a C-compiler-like, Emacs-friendly error-message leader."
  283. if infile is None:
  284. infile = self.infile
  285. if lineno is None:
  286. lineno = self.lineno
  287. return "\"%s\", line %d: " % (infile, lineno)
  288. def __iter__(self):
  289. return self
  290. def __next__(self):
  291. token = self.get_token()
  292. if token == self.eof:
  293. raise StopIteration
  294. return token
  295. # Modified by argcomplete: 2/3 compatibility
  296. next = __next__