_elffile.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. """
  2. ELF file parser.
  3. This provides a class ``ELFFile`` that parses an ELF executable in a similar
  4. interface to ``ZipFile``. Only the read interface is implemented.
  5. Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
  6. ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
  7. """
  8. from __future__ import annotations
  9. import enum
  10. import os
  11. import struct
  12. from typing import IO
  13. class ELFInvalid(ValueError):
  14. pass
  15. class EIClass(enum.IntEnum):
  16. C32 = 1
  17. C64 = 2
  18. class EIData(enum.IntEnum):
  19. Lsb = 1
  20. Msb = 2
  21. class EMachine(enum.IntEnum):
  22. I386 = 3
  23. S390 = 22
  24. Arm = 40
  25. X8664 = 62
  26. AArc64 = 183
  27. class ELFFile:
  28. """
  29. Representation of an ELF executable.
  30. """
  31. def __init__(self, f: IO[bytes]) -> None:
  32. self._f = f
  33. try:
  34. ident = self._read("16B")
  35. except struct.error as e:
  36. raise ELFInvalid("unable to parse identification") from e
  37. magic = bytes(ident[:4])
  38. if magic != b"\x7fELF":
  39. raise ELFInvalid(f"invalid magic: {magic!r}")
  40. self.capacity = ident[4] # Format for program header (bitness).
  41. self.encoding = ident[5] # Data structure encoding (endianness).
  42. try:
  43. # e_fmt: Format for program header.
  44. # p_fmt: Format for section header.
  45. # p_idx: Indexes to find p_type, p_offset, and p_filesz.
  46. e_fmt, self._p_fmt, self._p_idx = {
  47. (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB.
  48. (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB.
  49. (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB.
  50. (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB.
  51. }[(self.capacity, self.encoding)]
  52. except KeyError as e:
  53. raise ELFInvalid(
  54. f"unrecognized capacity ({self.capacity}) or "
  55. f"encoding ({self.encoding})"
  56. ) from e
  57. try:
  58. (
  59. _,
  60. self.machine, # Architecture type.
  61. _,
  62. _,
  63. self._e_phoff, # Offset of program header.
  64. _,
  65. self.flags, # Processor-specific flags.
  66. _,
  67. self._e_phentsize, # Size of section.
  68. self._e_phnum, # Number of sections.
  69. ) = self._read(e_fmt)
  70. except struct.error as e:
  71. raise ELFInvalid("unable to parse machine and section information") from e
  72. def _read(self, fmt: str) -> tuple[int, ...]:
  73. return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
  74. @property
  75. def interpreter(self) -> str | None:
  76. """
  77. The path recorded in the ``PT_INTERP`` section header.
  78. """
  79. for index in range(self._e_phnum):
  80. self._f.seek(self._e_phoff + self._e_phentsize * index)
  81. try:
  82. data = self._read(self._p_fmt)
  83. except struct.error:
  84. continue
  85. if data[self._p_idx[0]] != 3: # Not PT_INTERP.
  86. continue
  87. self._f.seek(data[self._p_idx[1]])
  88. return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
  89. return None