_http_parser.pyx 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837
  1. #cython: language_level=3
  2. #
  3. # Based on https://github.com/MagicStack/httptools
  4. #
  5. from cpython cimport (
  6. Py_buffer,
  7. PyBUF_SIMPLE,
  8. PyBuffer_Release,
  9. PyBytes_AsString,
  10. PyBytes_AsStringAndSize,
  11. PyObject_GetBuffer,
  12. )
  13. from cpython.mem cimport PyMem_Free, PyMem_Malloc
  14. from libc.limits cimport ULLONG_MAX
  15. from libc.string cimport memcpy
  16. from multidict import CIMultiDict as _CIMultiDict, CIMultiDictProxy as _CIMultiDictProxy
  17. from yarl import URL as _URL
  18. from aiohttp import hdrs
  19. from aiohttp.helpers import DEBUG, set_exception
  20. from .http_exceptions import (
  21. BadHttpMessage,
  22. BadHttpMethod,
  23. BadStatusLine,
  24. ContentLengthError,
  25. InvalidHeader,
  26. InvalidURLError,
  27. LineTooLong,
  28. PayloadEncodingError,
  29. TransferEncodingError,
  30. )
  31. from .http_parser import DeflateBuffer as _DeflateBuffer
  32. from .http_writer import (
  33. HttpVersion as _HttpVersion,
  34. HttpVersion10 as _HttpVersion10,
  35. HttpVersion11 as _HttpVersion11,
  36. )
  37. from .streams import EMPTY_PAYLOAD as _EMPTY_PAYLOAD, StreamReader as _StreamReader
  38. cimport cython
  39. from aiohttp cimport _cparser as cparser
  40. include "_headers.pxi"
  41. from aiohttp cimport _find_header
  42. ALLOWED_UPGRADES = frozenset({"websocket"})
  43. DEF DEFAULT_FREELIST_SIZE = 250
  44. cdef extern from "Python.h":
  45. int PyByteArray_Resize(object, Py_ssize_t) except -1
  46. Py_ssize_t PyByteArray_Size(object) except -1
  47. char* PyByteArray_AsString(object)
  48. __all__ = ('HttpRequestParser', 'HttpResponseParser',
  49. 'RawRequestMessage', 'RawResponseMessage')
  50. cdef object URL = _URL
  51. cdef object URL_build = URL.build
  52. cdef object CIMultiDict = _CIMultiDict
  53. cdef object CIMultiDictProxy = _CIMultiDictProxy
  54. cdef object HttpVersion = _HttpVersion
  55. cdef object HttpVersion10 = _HttpVersion10
  56. cdef object HttpVersion11 = _HttpVersion11
  57. cdef object SEC_WEBSOCKET_KEY1 = hdrs.SEC_WEBSOCKET_KEY1
  58. cdef object CONTENT_ENCODING = hdrs.CONTENT_ENCODING
  59. cdef object EMPTY_PAYLOAD = _EMPTY_PAYLOAD
  60. cdef object StreamReader = _StreamReader
  61. cdef object DeflateBuffer = _DeflateBuffer
  62. cdef bytes EMPTY_BYTES = b""
  63. cdef inline object extend(object buf, const char* at, size_t length):
  64. cdef Py_ssize_t s
  65. cdef char* ptr
  66. s = PyByteArray_Size(buf)
  67. PyByteArray_Resize(buf, s + length)
  68. ptr = PyByteArray_AsString(buf)
  69. memcpy(ptr + s, at, length)
  70. DEF METHODS_COUNT = 46;
  71. cdef list _http_method = []
  72. for i in range(METHODS_COUNT):
  73. _http_method.append(
  74. cparser.llhttp_method_name(<cparser.llhttp_method_t> i).decode('ascii'))
  75. cdef inline str http_method_str(int i):
  76. if i < METHODS_COUNT:
  77. return <str>_http_method[i]
  78. else:
  79. return "<unknown>"
  80. cdef inline object find_header(bytes raw_header):
  81. cdef Py_ssize_t size
  82. cdef char *buf
  83. cdef int idx
  84. PyBytes_AsStringAndSize(raw_header, &buf, &size)
  85. idx = _find_header.find_header(buf, size)
  86. if idx == -1:
  87. return raw_header.decode('utf-8', 'surrogateescape')
  88. return headers[idx]
  89. @cython.freelist(DEFAULT_FREELIST_SIZE)
  90. cdef class RawRequestMessage:
  91. cdef readonly str method
  92. cdef readonly str path
  93. cdef readonly object version # HttpVersion
  94. cdef readonly object headers # CIMultiDict
  95. cdef readonly object raw_headers # tuple
  96. cdef readonly object should_close
  97. cdef readonly object compression
  98. cdef readonly object upgrade
  99. cdef readonly object chunked
  100. cdef readonly object url # yarl.URL
  101. def __init__(self, method, path, version, headers, raw_headers,
  102. should_close, compression, upgrade, chunked, url):
  103. self.method = method
  104. self.path = path
  105. self.version = version
  106. self.headers = headers
  107. self.raw_headers = raw_headers
  108. self.should_close = should_close
  109. self.compression = compression
  110. self.upgrade = upgrade
  111. self.chunked = chunked
  112. self.url = url
  113. def __repr__(self):
  114. info = []
  115. info.append(("method", self.method))
  116. info.append(("path", self.path))
  117. info.append(("version", self.version))
  118. info.append(("headers", self.headers))
  119. info.append(("raw_headers", self.raw_headers))
  120. info.append(("should_close", self.should_close))
  121. info.append(("compression", self.compression))
  122. info.append(("upgrade", self.upgrade))
  123. info.append(("chunked", self.chunked))
  124. info.append(("url", self.url))
  125. sinfo = ', '.join(name + '=' + repr(val) for name, val in info)
  126. return '<RawRequestMessage(' + sinfo + ')>'
  127. def _replace(self, **dct):
  128. cdef RawRequestMessage ret
  129. ret = _new_request_message(self.method,
  130. self.path,
  131. self.version,
  132. self.headers,
  133. self.raw_headers,
  134. self.should_close,
  135. self.compression,
  136. self.upgrade,
  137. self.chunked,
  138. self.url)
  139. if "method" in dct:
  140. ret.method = dct["method"]
  141. if "path" in dct:
  142. ret.path = dct["path"]
  143. if "version" in dct:
  144. ret.version = dct["version"]
  145. if "headers" in dct:
  146. ret.headers = dct["headers"]
  147. if "raw_headers" in dct:
  148. ret.raw_headers = dct["raw_headers"]
  149. if "should_close" in dct:
  150. ret.should_close = dct["should_close"]
  151. if "compression" in dct:
  152. ret.compression = dct["compression"]
  153. if "upgrade" in dct:
  154. ret.upgrade = dct["upgrade"]
  155. if "chunked" in dct:
  156. ret.chunked = dct["chunked"]
  157. if "url" in dct:
  158. ret.url = dct["url"]
  159. return ret
  160. cdef _new_request_message(str method,
  161. str path,
  162. object version,
  163. object headers,
  164. object raw_headers,
  165. bint should_close,
  166. object compression,
  167. bint upgrade,
  168. bint chunked,
  169. object url):
  170. cdef RawRequestMessage ret
  171. ret = RawRequestMessage.__new__(RawRequestMessage)
  172. ret.method = method
  173. ret.path = path
  174. ret.version = version
  175. ret.headers = headers
  176. ret.raw_headers = raw_headers
  177. ret.should_close = should_close
  178. ret.compression = compression
  179. ret.upgrade = upgrade
  180. ret.chunked = chunked
  181. ret.url = url
  182. return ret
  183. @cython.freelist(DEFAULT_FREELIST_SIZE)
  184. cdef class RawResponseMessage:
  185. cdef readonly object version # HttpVersion
  186. cdef readonly int code
  187. cdef readonly str reason
  188. cdef readonly object headers # CIMultiDict
  189. cdef readonly object raw_headers # tuple
  190. cdef readonly object should_close
  191. cdef readonly object compression
  192. cdef readonly object upgrade
  193. cdef readonly object chunked
  194. def __init__(self, version, code, reason, headers, raw_headers,
  195. should_close, compression, upgrade, chunked):
  196. self.version = version
  197. self.code = code
  198. self.reason = reason
  199. self.headers = headers
  200. self.raw_headers = raw_headers
  201. self.should_close = should_close
  202. self.compression = compression
  203. self.upgrade = upgrade
  204. self.chunked = chunked
  205. def __repr__(self):
  206. info = []
  207. info.append(("version", self.version))
  208. info.append(("code", self.code))
  209. info.append(("reason", self.reason))
  210. info.append(("headers", self.headers))
  211. info.append(("raw_headers", self.raw_headers))
  212. info.append(("should_close", self.should_close))
  213. info.append(("compression", self.compression))
  214. info.append(("upgrade", self.upgrade))
  215. info.append(("chunked", self.chunked))
  216. sinfo = ', '.join(name + '=' + repr(val) for name, val in info)
  217. return '<RawResponseMessage(' + sinfo + ')>'
  218. cdef _new_response_message(object version,
  219. int code,
  220. str reason,
  221. object headers,
  222. object raw_headers,
  223. bint should_close,
  224. object compression,
  225. bint upgrade,
  226. bint chunked):
  227. cdef RawResponseMessage ret
  228. ret = RawResponseMessage.__new__(RawResponseMessage)
  229. ret.version = version
  230. ret.code = code
  231. ret.reason = reason
  232. ret.headers = headers
  233. ret.raw_headers = raw_headers
  234. ret.should_close = should_close
  235. ret.compression = compression
  236. ret.upgrade = upgrade
  237. ret.chunked = chunked
  238. return ret
  239. @cython.internal
  240. cdef class HttpParser:
  241. cdef:
  242. cparser.llhttp_t* _cparser
  243. cparser.llhttp_settings_t* _csettings
  244. bytes _raw_name
  245. object _name
  246. bytes _raw_value
  247. bint _has_value
  248. object _protocol
  249. object _loop
  250. object _timer
  251. size_t _max_line_size
  252. size_t _max_field_size
  253. size_t _max_headers
  254. bint _response_with_body
  255. bint _read_until_eof
  256. bint _started
  257. object _url
  258. bytearray _buf
  259. str _path
  260. str _reason
  261. list _headers
  262. list _raw_headers
  263. bint _upgraded
  264. list _messages
  265. object _payload
  266. bint _payload_error
  267. object _payload_exception
  268. object _last_error
  269. bint _auto_decompress
  270. int _limit
  271. str _content_encoding
  272. Py_buffer py_buf
  273. def __cinit__(self):
  274. self._cparser = <cparser.llhttp_t*> \
  275. PyMem_Malloc(sizeof(cparser.llhttp_t))
  276. if self._cparser is NULL:
  277. raise MemoryError()
  278. self._csettings = <cparser.llhttp_settings_t*> \
  279. PyMem_Malloc(sizeof(cparser.llhttp_settings_t))
  280. if self._csettings is NULL:
  281. raise MemoryError()
  282. def __dealloc__(self):
  283. PyMem_Free(self._cparser)
  284. PyMem_Free(self._csettings)
  285. cdef _init(
  286. self, cparser.llhttp_type mode,
  287. object protocol, object loop, int limit,
  288. object timer=None,
  289. size_t max_line_size=8190, size_t max_headers=32768,
  290. size_t max_field_size=8190, payload_exception=None,
  291. bint response_with_body=True, bint read_until_eof=False,
  292. bint auto_decompress=True,
  293. ):
  294. cparser.llhttp_settings_init(self._csettings)
  295. cparser.llhttp_init(self._cparser, mode, self._csettings)
  296. self._cparser.data = <void*>self
  297. self._cparser.content_length = 0
  298. self._protocol = protocol
  299. self._loop = loop
  300. self._timer = timer
  301. self._buf = bytearray()
  302. self._payload = None
  303. self._payload_error = 0
  304. self._payload_exception = payload_exception
  305. self._messages = []
  306. self._raw_name = EMPTY_BYTES
  307. self._raw_value = EMPTY_BYTES
  308. self._has_value = False
  309. self._max_line_size = max_line_size
  310. self._max_headers = max_headers
  311. self._max_field_size = max_field_size
  312. self._response_with_body = response_with_body
  313. self._read_until_eof = read_until_eof
  314. self._upgraded = False
  315. self._auto_decompress = auto_decompress
  316. self._content_encoding = None
  317. self._csettings.on_url = cb_on_url
  318. self._csettings.on_status = cb_on_status
  319. self._csettings.on_header_field = cb_on_header_field
  320. self._csettings.on_header_value = cb_on_header_value
  321. self._csettings.on_headers_complete = cb_on_headers_complete
  322. self._csettings.on_body = cb_on_body
  323. self._csettings.on_message_begin = cb_on_message_begin
  324. self._csettings.on_message_complete = cb_on_message_complete
  325. self._csettings.on_chunk_header = cb_on_chunk_header
  326. self._csettings.on_chunk_complete = cb_on_chunk_complete
  327. self._last_error = None
  328. self._limit = limit
  329. cdef _process_header(self):
  330. cdef str value
  331. if self._raw_name is not EMPTY_BYTES:
  332. name = find_header(self._raw_name)
  333. value = self._raw_value.decode('utf-8', 'surrogateescape')
  334. self._headers.append((name, value))
  335. if name is CONTENT_ENCODING:
  336. self._content_encoding = value
  337. self._has_value = False
  338. self._raw_headers.append((self._raw_name, self._raw_value))
  339. self._raw_name = EMPTY_BYTES
  340. self._raw_value = EMPTY_BYTES
  341. cdef _on_header_field(self, char* at, size_t length):
  342. if self._has_value:
  343. self._process_header()
  344. if self._raw_name is EMPTY_BYTES:
  345. self._raw_name = at[:length]
  346. else:
  347. self._raw_name += at[:length]
  348. cdef _on_header_value(self, char* at, size_t length):
  349. if self._raw_value is EMPTY_BYTES:
  350. self._raw_value = at[:length]
  351. else:
  352. self._raw_value += at[:length]
  353. self._has_value = True
  354. cdef _on_headers_complete(self):
  355. self._process_header()
  356. should_close = not cparser.llhttp_should_keep_alive(self._cparser)
  357. upgrade = self._cparser.upgrade
  358. chunked = self._cparser.flags & cparser.F_CHUNKED
  359. raw_headers = tuple(self._raw_headers)
  360. headers = CIMultiDictProxy(CIMultiDict(self._headers))
  361. if self._cparser.type == cparser.HTTP_REQUEST:
  362. allowed = upgrade and headers.get("upgrade", "").lower() in ALLOWED_UPGRADES
  363. if allowed or self._cparser.method == cparser.HTTP_CONNECT:
  364. self._upgraded = True
  365. else:
  366. if upgrade and self._cparser.status_code == 101:
  367. self._upgraded = True
  368. # do not support old websocket spec
  369. if SEC_WEBSOCKET_KEY1 in headers:
  370. raise InvalidHeader(SEC_WEBSOCKET_KEY1)
  371. encoding = None
  372. enc = self._content_encoding
  373. if enc is not None:
  374. self._content_encoding = None
  375. enc = enc.lower()
  376. if enc in ('gzip', 'deflate', 'br'):
  377. encoding = enc
  378. if self._cparser.type == cparser.HTTP_REQUEST:
  379. method = http_method_str(self._cparser.method)
  380. msg = _new_request_message(
  381. method, self._path,
  382. self.http_version(), headers, raw_headers,
  383. should_close, encoding, upgrade, chunked, self._url)
  384. else:
  385. msg = _new_response_message(
  386. self.http_version(), self._cparser.status_code, self._reason,
  387. headers, raw_headers, should_close, encoding,
  388. upgrade, chunked)
  389. if (
  390. ULLONG_MAX > self._cparser.content_length > 0 or chunked or
  391. self._cparser.method == cparser.HTTP_CONNECT or
  392. (self._cparser.status_code >= 199 and
  393. self._cparser.content_length == 0 and
  394. self._read_until_eof)
  395. ):
  396. payload = StreamReader(
  397. self._protocol, timer=self._timer, loop=self._loop,
  398. limit=self._limit)
  399. else:
  400. payload = EMPTY_PAYLOAD
  401. self._payload = payload
  402. if encoding is not None and self._auto_decompress:
  403. self._payload = DeflateBuffer(payload, encoding)
  404. if not self._response_with_body:
  405. payload = EMPTY_PAYLOAD
  406. self._messages.append((msg, payload))
  407. cdef _on_message_complete(self):
  408. self._payload.feed_eof()
  409. self._payload = None
  410. cdef _on_chunk_header(self):
  411. self._payload.begin_http_chunk_receiving()
  412. cdef _on_chunk_complete(self):
  413. self._payload.end_http_chunk_receiving()
  414. cdef object _on_status_complete(self):
  415. pass
  416. cdef inline http_version(self):
  417. cdef cparser.llhttp_t* parser = self._cparser
  418. if parser.http_major == 1:
  419. if parser.http_minor == 0:
  420. return HttpVersion10
  421. elif parser.http_minor == 1:
  422. return HttpVersion11
  423. return HttpVersion(parser.http_major, parser.http_minor)
  424. ### Public API ###
  425. def feed_eof(self):
  426. cdef bytes desc
  427. if self._payload is not None:
  428. if self._cparser.flags & cparser.F_CHUNKED:
  429. raise TransferEncodingError(
  430. "Not enough data for satisfy transfer length header.")
  431. elif self._cparser.flags & cparser.F_CONTENT_LENGTH:
  432. raise ContentLengthError(
  433. "Not enough data for satisfy content length header.")
  434. elif cparser.llhttp_get_errno(self._cparser) != cparser.HPE_OK:
  435. desc = cparser.llhttp_get_error_reason(self._cparser)
  436. raise PayloadEncodingError(desc.decode('latin-1'))
  437. else:
  438. self._payload.feed_eof()
  439. elif self._started:
  440. self._on_headers_complete()
  441. if self._messages:
  442. return self._messages[-1][0]
  443. def feed_data(self, data):
  444. cdef:
  445. size_t data_len
  446. size_t nb
  447. cdef cparser.llhttp_errno_t errno
  448. PyObject_GetBuffer(data, &self.py_buf, PyBUF_SIMPLE)
  449. data_len = <size_t>self.py_buf.len
  450. errno = cparser.llhttp_execute(
  451. self._cparser,
  452. <char*>self.py_buf.buf,
  453. data_len)
  454. if errno is cparser.HPE_PAUSED_UPGRADE:
  455. cparser.llhttp_resume_after_upgrade(self._cparser)
  456. nb = cparser.llhttp_get_error_pos(self._cparser) - <char*>self.py_buf.buf
  457. PyBuffer_Release(&self.py_buf)
  458. if errno not in (cparser.HPE_OK, cparser.HPE_PAUSED_UPGRADE):
  459. if self._payload_error == 0:
  460. if self._last_error is not None:
  461. ex = self._last_error
  462. self._last_error = None
  463. else:
  464. after = cparser.llhttp_get_error_pos(self._cparser)
  465. before = data[:after - <char*>self.py_buf.buf]
  466. after_b = after.split(b"\r\n", 1)[0]
  467. before = before.rsplit(b"\r\n", 1)[-1]
  468. data = before + after_b
  469. pointer = " " * (len(repr(before))-1) + "^"
  470. ex = parser_error_from_errno(self._cparser, data, pointer)
  471. self._payload = None
  472. raise ex
  473. if self._messages:
  474. messages = self._messages
  475. self._messages = []
  476. else:
  477. messages = ()
  478. if self._upgraded:
  479. return messages, True, data[nb:]
  480. else:
  481. return messages, False, b""
  482. def set_upgraded(self, val):
  483. self._upgraded = val
  484. cdef class HttpRequestParser(HttpParser):
  485. def __init__(
  486. self, protocol, loop, int limit, timer=None,
  487. size_t max_line_size=8190, size_t max_headers=32768,
  488. size_t max_field_size=8190, payload_exception=None,
  489. bint response_with_body=True, bint read_until_eof=False,
  490. bint auto_decompress=True,
  491. ):
  492. self._init(cparser.HTTP_REQUEST, protocol, loop, limit, timer,
  493. max_line_size, max_headers, max_field_size,
  494. payload_exception, response_with_body, read_until_eof,
  495. auto_decompress)
  496. cdef object _on_status_complete(self):
  497. cdef int idx1, idx2
  498. if not self._buf:
  499. return
  500. self._path = self._buf.decode('utf-8', 'surrogateescape')
  501. try:
  502. idx3 = len(self._path)
  503. if self._cparser.method == cparser.HTTP_CONNECT:
  504. # authority-form,
  505. # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
  506. self._url = URL.build(authority=self._path, encoded=True)
  507. elif idx3 > 1 and self._path[0] == '/':
  508. # origin-form,
  509. # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
  510. idx1 = self._path.find("?")
  511. if idx1 == -1:
  512. query = ""
  513. idx2 = self._path.find("#")
  514. if idx2 == -1:
  515. path = self._path
  516. fragment = ""
  517. else:
  518. path = self._path[0: idx2]
  519. fragment = self._path[idx2+1:]
  520. else:
  521. path = self._path[0:idx1]
  522. idx1 += 1
  523. idx2 = self._path.find("#", idx1+1)
  524. if idx2 == -1:
  525. query = self._path[idx1:]
  526. fragment = ""
  527. else:
  528. query = self._path[idx1: idx2]
  529. fragment = self._path[idx2+1:]
  530. self._url = URL.build(
  531. path=path,
  532. query_string=query,
  533. fragment=fragment,
  534. encoded=True,
  535. )
  536. else:
  537. # absolute-form for proxy maybe,
  538. # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
  539. self._url = URL(self._path, encoded=True)
  540. finally:
  541. PyByteArray_Resize(self._buf, 0)
  542. cdef class HttpResponseParser(HttpParser):
  543. def __init__(
  544. self, protocol, loop, int limit, timer=None,
  545. size_t max_line_size=8190, size_t max_headers=32768,
  546. size_t max_field_size=8190, payload_exception=None,
  547. bint response_with_body=True, bint read_until_eof=False,
  548. bint auto_decompress=True
  549. ):
  550. self._init(cparser.HTTP_RESPONSE, protocol, loop, limit, timer,
  551. max_line_size, max_headers, max_field_size,
  552. payload_exception, response_with_body, read_until_eof,
  553. auto_decompress)
  554. # Use strict parsing on dev mode, so users are warned about broken servers.
  555. if not DEBUG:
  556. cparser.llhttp_set_lenient_headers(self._cparser, 1)
  557. cparser.llhttp_set_lenient_optional_cr_before_lf(self._cparser, 1)
  558. cparser.llhttp_set_lenient_spaces_after_chunk_size(self._cparser, 1)
  559. cdef object _on_status_complete(self):
  560. if self._buf:
  561. self._reason = self._buf.decode('utf-8', 'surrogateescape')
  562. PyByteArray_Resize(self._buf, 0)
  563. else:
  564. self._reason = self._reason or ''
  565. cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1:
  566. cdef HttpParser pyparser = <HttpParser>parser.data
  567. pyparser._started = True
  568. pyparser._headers = []
  569. pyparser._raw_headers = []
  570. PyByteArray_Resize(pyparser._buf, 0)
  571. pyparser._path = None
  572. pyparser._reason = None
  573. return 0
  574. cdef int cb_on_url(cparser.llhttp_t* parser,
  575. const char *at, size_t length) except -1:
  576. cdef HttpParser pyparser = <HttpParser>parser.data
  577. try:
  578. if length > pyparser._max_line_size:
  579. raise LineTooLong(
  580. 'Status line is too long', pyparser._max_line_size, length)
  581. extend(pyparser._buf, at, length)
  582. except BaseException as ex:
  583. pyparser._last_error = ex
  584. return -1
  585. else:
  586. return 0
  587. cdef int cb_on_status(cparser.llhttp_t* parser,
  588. const char *at, size_t length) except -1:
  589. cdef HttpParser pyparser = <HttpParser>parser.data
  590. cdef str reason
  591. try:
  592. if length > pyparser._max_line_size:
  593. raise LineTooLong(
  594. 'Status line is too long', pyparser._max_line_size, length)
  595. extend(pyparser._buf, at, length)
  596. except BaseException as ex:
  597. pyparser._last_error = ex
  598. return -1
  599. else:
  600. return 0
  601. cdef int cb_on_header_field(cparser.llhttp_t* parser,
  602. const char *at, size_t length) except -1:
  603. cdef HttpParser pyparser = <HttpParser>parser.data
  604. cdef Py_ssize_t size
  605. try:
  606. pyparser._on_status_complete()
  607. size = len(pyparser._raw_name) + length
  608. if size > pyparser._max_field_size:
  609. raise LineTooLong(
  610. 'Header name is too long', pyparser._max_field_size, size)
  611. pyparser._on_header_field(at, length)
  612. except BaseException as ex:
  613. pyparser._last_error = ex
  614. return -1
  615. else:
  616. return 0
  617. cdef int cb_on_header_value(cparser.llhttp_t* parser,
  618. const char *at, size_t length) except -1:
  619. cdef HttpParser pyparser = <HttpParser>parser.data
  620. cdef Py_ssize_t size
  621. try:
  622. size = len(pyparser._raw_value) + length
  623. if size > pyparser._max_field_size:
  624. raise LineTooLong(
  625. 'Header value is too long', pyparser._max_field_size, size)
  626. pyparser._on_header_value(at, length)
  627. except BaseException as ex:
  628. pyparser._last_error = ex
  629. return -1
  630. else:
  631. return 0
  632. cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1:
  633. cdef HttpParser pyparser = <HttpParser>parser.data
  634. try:
  635. pyparser._on_status_complete()
  636. pyparser._on_headers_complete()
  637. except BaseException as exc:
  638. pyparser._last_error = exc
  639. return -1
  640. else:
  641. if pyparser._upgraded or pyparser._cparser.method == cparser.HTTP_CONNECT:
  642. return 2
  643. else:
  644. return 0
  645. cdef int cb_on_body(cparser.llhttp_t* parser,
  646. const char *at, size_t length) except -1:
  647. cdef HttpParser pyparser = <HttpParser>parser.data
  648. cdef bytes body = at[:length]
  649. try:
  650. pyparser._payload.feed_data(body, length)
  651. except BaseException as underlying_exc:
  652. reraised_exc = underlying_exc
  653. if pyparser._payload_exception is not None:
  654. reraised_exc = pyparser._payload_exception(str(underlying_exc))
  655. set_exception(pyparser._payload, reraised_exc, underlying_exc)
  656. pyparser._payload_error = 1
  657. return -1
  658. else:
  659. return 0
  660. cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1:
  661. cdef HttpParser pyparser = <HttpParser>parser.data
  662. try:
  663. pyparser._started = False
  664. pyparser._on_message_complete()
  665. except BaseException as exc:
  666. pyparser._last_error = exc
  667. return -1
  668. else:
  669. return 0
  670. cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1:
  671. cdef HttpParser pyparser = <HttpParser>parser.data
  672. try:
  673. pyparser._on_chunk_header()
  674. except BaseException as exc:
  675. pyparser._last_error = exc
  676. return -1
  677. else:
  678. return 0
  679. cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1:
  680. cdef HttpParser pyparser = <HttpParser>parser.data
  681. try:
  682. pyparser._on_chunk_complete()
  683. except BaseException as exc:
  684. pyparser._last_error = exc
  685. return -1
  686. else:
  687. return 0
  688. cdef parser_error_from_errno(cparser.llhttp_t* parser, data, pointer):
  689. cdef cparser.llhttp_errno_t errno = cparser.llhttp_get_errno(parser)
  690. cdef bytes desc = cparser.llhttp_get_error_reason(parser)
  691. err_msg = "{}:\n\n {!r}\n {}".format(desc.decode("latin-1"), data, pointer)
  692. if errno in {cparser.HPE_CB_MESSAGE_BEGIN,
  693. cparser.HPE_CB_HEADERS_COMPLETE,
  694. cparser.HPE_CB_MESSAGE_COMPLETE,
  695. cparser.HPE_CB_CHUNK_HEADER,
  696. cparser.HPE_CB_CHUNK_COMPLETE,
  697. cparser.HPE_INVALID_CONSTANT,
  698. cparser.HPE_INVALID_HEADER_TOKEN,
  699. cparser.HPE_INVALID_CONTENT_LENGTH,
  700. cparser.HPE_INVALID_CHUNK_SIZE,
  701. cparser.HPE_INVALID_EOF_STATE,
  702. cparser.HPE_INVALID_TRANSFER_ENCODING}:
  703. return BadHttpMessage(err_msg)
  704. elif errno == cparser.HPE_INVALID_METHOD:
  705. return BadHttpMethod(error=err_msg)
  706. elif errno in {cparser.HPE_INVALID_STATUS,
  707. cparser.HPE_INVALID_VERSION}:
  708. return BadStatusLine(error=err_msg)
  709. elif errno == cparser.HPE_INVALID_URL:
  710. return InvalidURLError(err_msg)
  711. return BadHttpMessage(err_msg)