(root)/
glibc-2.38/
scripts/
glibcelf.py
       1  #!/usr/bin/python3
       2  # ELF support functionality for Python.
       3  # Copyright (C) 2022-2023 Free Software Foundation, Inc.
       4  # This file is part of the GNU C Library.
       5  #
       6  # The GNU C Library is free software; you can redistribute it and/or
       7  # modify it under the terms of the GNU Lesser General Public
       8  # License as published by the Free Software Foundation; either
       9  # version 2.1 of the License, or (at your option) any later version.
      10  #
      11  # The GNU C Library is distributed in the hope that it will be useful,
      12  # but WITHOUT ANY WARRANTY; without even the implied warranty of
      13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14  # Lesser General Public License for more details.
      15  #
      16  # You should have received a copy of the GNU Lesser General Public
      17  # License along with the GNU C Library; if not, see
      18  # <https://www.gnu.org/licenses/>.
      19  
      20  """Basic ELF parser.
      21  
      22  Use Image.readfile(path) to read an ELF file into memory and begin
      23  parsing it.
      24  
      25  """
      26  
      27  import collections
      28  import functools
      29  import os
      30  import struct
      31  
      32  import glibcpp
      33  
      34  class ESC[4;38;5;81m_MetaNamedValue(ESC[4;38;5;149mtype):
      35      """Used to set up _NamedValue subclasses."""
      36  
      37      @classmethod
      38      def __prepare__(metacls, cls, bases, **kwds):
      39          # Indicates an int-based class.  Needed for types like Shn.
      40          int_based = False
      41          for base in bases:
      42              if issubclass(base, int):
      43                  int_based = int
      44                  break
      45          return dict(by_value={},
      46                      by_name={},
      47                      prefix=None,
      48                      _int_based=int_based)
      49  
      50      def __contains__(self, other):
      51          return other in self.by_value
      52  
      53  class ESC[4;38;5;81m_NamedValue(metaclass=ESC[4;38;5;149m_MetaNamedValue):
      54      """Typed, named integer constants.
      55  
      56      Constants have the following instance attributes:
      57  
      58      name: The full name of the constant (e.g., "PT_NULL").
      59      short_name: The name with of the constant without the prefix ("NULL").
      60      value: The integer value of the constant.
      61  
      62      The following class attributes are available:
      63  
      64      by_value: A dict mapping integers to constants.
      65      by_name: A dict mapping strings to constants.
      66      prefix: A string that is removed from the start of short names, or None.
      67  
      68      """
      69  
      70      def __new__(cls, arg0, arg1=None):
      71          """Instance creation.
      72  
      73          For the one-argument form, the argument must be a string, an
      74          int, or an instance of this class.  Strings are looked up via
      75          by_name.  Values are looked up via by_value; if value lookup
      76          fails, a new unnamed instance is returned.  Instances of this
      77          class a re returned as-is.
      78  
      79          The two-argument form expects the name (a string) and the
      80          value (an integer).  A new instance is created in this case.
      81          The instance is not registered in the by_value/by_name
      82          dictionaries (but the caller can do that).
      83  
      84          """
      85  
      86          typ0 = type(arg0)
      87          if arg1 is None:
      88              if isinstance(typ0, cls):
      89                  # Re-use the existing object.
      90                  return arg0
      91              if typ0 is int:
      92                  by_value = cls.by_value
      93                  try:
      94                      return by_value[arg0]
      95                  except KeyError:
      96                      # Create a new object of the requested value.
      97                      if cls._int_based:
      98                          result = int.__new__(cls, arg0)
      99                      else:
     100                          result = object.__new__(cls)
     101                      result.value = arg0
     102                      result.name = None
     103                      return result
     104              if typ0 is str:
     105                  by_name = cls.by_name
     106                  try:
     107                      return by_name[arg0]
     108                  except KeyError:
     109                      raise ValueError('unknown {} constant: {!r}'.format(
     110                          cls.__name__, arg0))
     111          else:
     112              # Types for the two-argument form are rigid.
     113              if typ0 is not str and typ0 is not None:
     114                  raise ValueError('type {} of name {!r} should be str'.format(
     115                      typ0.__name__, arg0))
     116              if type(arg1) is not int:
     117                  raise ValueError('type {} of value {!r} should be int'.format(
     118                      type(arg1).__name__, arg1))
     119              # Create a new named constants.
     120              if cls._int_based:
     121                  result = int.__new__(cls, arg1)
     122              else:
     123                  result = object.__new__(cls)
     124              result.value = arg1
     125              result.name = arg0
     126              # Set up the short_name attribute.
     127              prefix = cls.prefix
     128              if prefix and arg0.startswith(prefix):
     129                  result.short_name = arg0[len(prefix):]
     130              else:
     131                  result.short_name = arg0
     132              return result
     133  
     134      def __str__(self):
     135          name = self.name
     136          if name:
     137              return name
     138          else:
     139              return str(self.value)
     140  
     141      def __repr__(self):
     142          name = self.name
     143          if name:
     144              return name
     145          else:
     146              return '{}({})'.format(self.__class__.__name__, self.value)
     147  
     148      def __setattr__(self, name, value):
     149          # Prevent modification of the critical attributes once they
     150          # have been set.
     151          if name in ('name', 'value', 'short_name') and hasattr(self, name):
     152              raise AttributeError('can\'t set attribute {}'.format(name))
     153          object.__setattr__(self, name, value)
     154  
     155  @functools.total_ordering
     156  class ESC[4;38;5;81m_TypedConstant(ESC[4;38;5;149m_NamedValue):
     157      """Base class for integer-valued optionally named constants.
     158  
     159      This type is not an integer type.
     160  
     161      """
     162  
     163      def __eq__(self, other):
     164          return isinstance(other, self.__class__) and self.value == other.value
     165  
     166      def __lt__(self, other):
     167          return isinstance(other, self.__class__) and self.value <= other.value
     168  
     169      def __hash__(self):
     170          return hash(self.value)
     171  
     172  class ESC[4;38;5;81m_IntConstant(ESC[4;38;5;149m_NamedValue, ESC[4;38;5;149mint):
     173      """Base class for integer-like optionally named constants.
     174  
     175      Instances compare equal to the integer of the same value, and can
     176      be used in integer arithmetic.
     177  
     178      """
     179  
     180      pass
     181  
     182  class ESC[4;38;5;81m_FlagConstant(ESC[4;38;5;149m_TypedConstant, ESC[4;38;5;149mint):
     183      pass
     184  
     185  def _parse_elf_h():
     186      """Read ../elf/elf.h and return a dict with the constants in it."""
     187  
     188      path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
     189                          '..', 'elf', 'elf.h')
     190      class ESC[4;38;5;81mTokenizerReporter:
     191          """Report tokenizer errors to standard output."""
     192  
     193          def __init__(self):
     194              self.errors = 0
     195  
     196          def error(self, token, message):
     197              self.errors += 1
     198              print('{}:{}:{}: error: {}'.format(
     199                  path, token.line, token.column, message))
     200  
     201      reporter = TokenizerReporter()
     202      with open(path) as inp:
     203          tokens = glibcpp.tokenize_c(inp.read(), reporter)
     204      if reporter.errors:
     205          raise IOError('parse error in elf.h')
     206  
     207      class ESC[4;38;5;81mMacroReporter:
     208          """Report macro errors to standard output."""
     209  
     210          def __init__(self):
     211              self.errors = 0
     212  
     213          def error(self, line, message):
     214              self.errors += 1
     215              print('{}:{}: error: {}'.format(path, line, message))
     216  
     217          def note(self, line, message):
     218              print('{}:{}: note: {}'.format(path, line, message))
     219  
     220      reporter = MacroReporter()
     221      result = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
     222      if reporter.errors:
     223          raise IOError('parse error in elf.h')
     224  
     225      return result
     226  _elf_h = _parse_elf_h()
     227  del _parse_elf_h
     228  _elf_h_processed = set()
     229  
     230  def _register_elf_h(cls, prefix=None, skip=(), ranges=False, parent=None):
     231      prefix = prefix or cls.prefix
     232      if not prefix:
     233          raise ValueError('missing prefix for {}'.format(cls.__name__))
     234      by_value = cls.by_value
     235      by_name = cls.by_name
     236      processed = _elf_h_processed
     237  
     238      skip = set(skip)
     239      skip.add(prefix + 'NUM')
     240      if ranges:
     241          skip.add(prefix + 'LOOS')
     242          skip.add(prefix + 'HIOS')
     243          skip.add(prefix + 'LOPROC')
     244          skip.add(prefix + 'HIPROC')
     245          cls.os_range = (_elf_h[prefix + 'LOOS'], _elf_h[prefix + 'HIOS'])
     246          cls.proc_range = (_elf_h[prefix + 'LOPROC'], _elf_h[prefix + 'HIPROC'])
     247  
     248      # Inherit the prefix from the parent if not set.
     249      if parent and cls.prefix is None and parent.prefix is not None:
     250          cls.prefix = parent.prefix
     251  
     252      processed_len_start = len(processed)
     253      for name, value in _elf_h.items():
     254          if name in skip or name in processed:
     255              continue
     256          if name.startswith(prefix):
     257              processed.add(name)
     258              if value in by_value:
     259                  raise ValueError('duplicate value {}: {}, {}'.format(
     260                      value, name, by_value[value]))
     261              obj = cls(name, value)
     262              by_value[value] = obj
     263              by_name[name] = obj
     264              setattr(cls, name, obj)
     265              if parent:
     266                  # Make the symbolic name available through the parent as well.
     267                  parent.by_name[name] = obj
     268                  setattr(parent, name, obj)
     269  
     270      if len(processed) == processed_len_start:
     271          raise ValueError('nothing matched prefix {!r}'.format(prefix))
     272  
     273  class ESC[4;38;5;81mElfClass(ESC[4;38;5;149m_TypedConstant):
     274      """ELF word size.  Type of EI_CLASS values."""
     275  _register_elf_h(ElfClass, prefix='ELFCLASS')
     276  
     277  class ESC[4;38;5;81mElfData(ESC[4;38;5;149m_TypedConstant):
     278      """ELF endianness.  Type of EI_DATA values."""
     279  _register_elf_h(ElfData, prefix='ELFDATA')
     280  
     281  class ESC[4;38;5;81mMachine(ESC[4;38;5;149m_TypedConstant):
     282      """ELF machine type.  Type of values in Ehdr.e_machine field."""
     283      prefix = 'EM_'
     284  _register_elf_h(Machine, skip=('EM_ARC_A5',))
     285  
     286  class ESC[4;38;5;81mEt(ESC[4;38;5;149m_TypedConstant):
     287      """ELF file type.  Type of ET_* values and the Ehdr.e_type field."""
     288      prefix = 'ET_'
     289  _register_elf_h(Et, ranges=True)
     290  
     291  class ESC[4;38;5;81mShn(ESC[4;38;5;149m_IntConstant):
     292      """ELF reserved section indices."""
     293      prefix = 'SHN_'
     294  class ESC[4;38;5;81mShnMIPS(ESC[4;38;5;149mShn):
     295      """Supplemental SHN_* constants for EM_MIPS."""
     296  class ESC[4;38;5;81mShnPARISC(ESC[4;38;5;149mShn):
     297      """Supplemental SHN_* constants for EM_PARISC."""
     298  _register_elf_h(ShnMIPS, prefix='SHN_MIPS_', parent=Shn)
     299  _register_elf_h(ShnPARISC, prefix='SHN_PARISC_', parent=Shn)
     300  _register_elf_h(Shn, skip='SHN_LORESERVE SHN_HIRESERVE'.split(), ranges=True)
     301  
     302  class ESC[4;38;5;81mSht(ESC[4;38;5;149m_TypedConstant):
     303      """ELF section types.  Type of SHT_* values."""
     304      prefix = 'SHT_'
     305  class ESC[4;38;5;81mShtALPHA(ESC[4;38;5;149mSht):
     306      """Supplemental SHT_* constants for EM_ALPHA."""
     307  class ESC[4;38;5;81mShtARC(ESC[4;38;5;149mSht):
     308      """Supplemental SHT_* constants for EM_ARC."""
     309  class ESC[4;38;5;81mShtARM(ESC[4;38;5;149mSht):
     310      """Supplemental SHT_* constants for EM_ARM."""
     311  class ESC[4;38;5;81mShtCSKY(ESC[4;38;5;149mSht):
     312      """Supplemental SHT_* constants for EM_CSKY."""
     313  class ESC[4;38;5;81mShtIA_64(ESC[4;38;5;149mSht):
     314      """Supplemental SHT_* constants for EM_IA_64."""
     315  class ESC[4;38;5;81mShtMIPS(ESC[4;38;5;149mSht):
     316      """Supplemental SHT_* constants for EM_MIPS."""
     317  class ESC[4;38;5;81mShtPARISC(ESC[4;38;5;149mSht):
     318      """Supplemental SHT_* constants for EM_PARISC."""
     319  class ESC[4;38;5;81mShtRISCV(ESC[4;38;5;149mSht):
     320      """Supplemental SHT_* constants for EM_RISCV."""
     321  _register_elf_h(ShtALPHA, prefix='SHT_ALPHA_', parent=Sht)
     322  _register_elf_h(ShtARC, prefix='SHT_ARC_', parent=Sht)
     323  _register_elf_h(ShtARM, prefix='SHT_ARM_', parent=Sht)
     324  _register_elf_h(ShtCSKY, prefix='SHT_CSKY_', parent=Sht)
     325  _register_elf_h(ShtIA_64, prefix='SHT_IA_64_', parent=Sht)
     326  _register_elf_h(ShtMIPS, prefix='SHT_MIPS_', parent=Sht)
     327  _register_elf_h(ShtPARISC, prefix='SHT_PARISC_', parent=Sht)
     328  _register_elf_h(ShtRISCV, prefix='SHT_RISCV_', parent=Sht)
     329  _register_elf_h(Sht, ranges=True,
     330                  skip='SHT_LOSUNW SHT_HISUNW SHT_LOUSER SHT_HIUSER'.split())
     331  
     332  class ESC[4;38;5;81mPf(ESC[4;38;5;149m_FlagConstant):
     333      """Program header flags.  Type of Phdr.p_flags values."""
     334      prefix = 'PF_'
     335  class ESC[4;38;5;81mPfARM(ESC[4;38;5;149mPf):
     336      """Supplemental PF_* flags for EM_ARM."""
     337  class ESC[4;38;5;81mPfHP(ESC[4;38;5;149mPf):
     338      """Supplemental PF_* flags for HP-UX."""
     339  class ESC[4;38;5;81mPfIA_64(ESC[4;38;5;149mPf):
     340      """Supplemental PF_* flags for EM_IA_64."""
     341  class ESC[4;38;5;81mPfMIPS(ESC[4;38;5;149mPf):
     342      """Supplemental PF_* flags for EM_MIPS."""
     343  class ESC[4;38;5;81mPfPARISC(ESC[4;38;5;149mPf):
     344      """Supplemental PF_* flags for EM_PARISC."""
     345  _register_elf_h(PfARM, prefix='PF_ARM_', parent=Pf)
     346  _register_elf_h(PfHP, prefix='PF_HP_', parent=Pf)
     347  _register_elf_h(PfIA_64, prefix='PF_IA_64_', parent=Pf)
     348  _register_elf_h(PfMIPS, prefix='PF_MIPS_', parent=Pf)
     349  _register_elf_h(PfPARISC, prefix='PF_PARISC_', parent=Pf)
     350  _register_elf_h(Pf, skip='PF_MASKOS PF_MASKPROC'.split())
     351  
     352  class ESC[4;38;5;81mShf(ESC[4;38;5;149m_FlagConstant):
     353      """Section flags.  Type of Shdr.sh_type values."""
     354      prefix = 'SHF_'
     355  class ESC[4;38;5;81mShfALPHA(ESC[4;38;5;149mShf):
     356      """Supplemental SHF_* constants for EM_ALPHA."""
     357  class ESC[4;38;5;81mShfARM(ESC[4;38;5;149mShf):
     358      """Supplemental SHF_* constants for EM_ARM."""
     359  class ESC[4;38;5;81mShfIA_64(ESC[4;38;5;149mShf):
     360      """Supplemental SHF_* constants for EM_IA_64."""
     361  class ESC[4;38;5;81mShfMIPS(ESC[4;38;5;149mShf):
     362      """Supplemental SHF_* constants for EM_MIPS."""
     363  class ESC[4;38;5;81mShfPARISC(ESC[4;38;5;149mShf):
     364      """Supplemental SHF_* constants for EM_PARISC."""
     365  _register_elf_h(ShfALPHA, prefix='SHF_ALPHA_', parent=Shf)
     366  _register_elf_h(ShfARM, prefix='SHF_ARM_', parent=Shf)
     367  _register_elf_h(ShfIA_64, prefix='SHF_IA_64_', parent=Shf)
     368  _register_elf_h(ShfMIPS, prefix='SHF_MIPS_', parent=Shf)
     369  _register_elf_h(ShfPARISC, prefix='SHF_PARISC_', parent=Shf)
     370  _register_elf_h(Shf, skip='SHF_MASKOS SHF_MASKPROC'.split())
     371  
     372  class ESC[4;38;5;81mStb(ESC[4;38;5;149m_TypedConstant):
     373      """ELF symbol binding type."""
     374      prefix = 'STB_'
     375  _register_elf_h(Stb, ranges=True)
     376  
     377  class ESC[4;38;5;81mStt(ESC[4;38;5;149m_TypedConstant):
     378      """ELF symbol type."""
     379      prefix = 'STT_'
     380  class ESC[4;38;5;81mSttARM(ESC[4;38;5;149mSht):
     381      """Supplemental STT_* constants for EM_ARM."""
     382  class ESC[4;38;5;81mSttPARISC(ESC[4;38;5;149mSht):
     383      """Supplemental STT_* constants for EM_PARISC."""
     384  class ESC[4;38;5;81mSttSPARC(ESC[4;38;5;149mSht):
     385      """Supplemental STT_* constants for EM_SPARC."""
     386      STT_SPARC_REGISTER = 13
     387  class ESC[4;38;5;81mSttX86_64(ESC[4;38;5;149mSht):
     388      """Supplemental STT_* constants for EM_X86_64."""
     389  _register_elf_h(SttARM, prefix='STT_ARM_', parent=Stt)
     390  _register_elf_h(SttPARISC, prefix='STT_PARISC_', parent=Stt)
     391  _register_elf_h(SttSPARC, prefix='STT_SPARC_', parent=Stt)
     392  _register_elf_h(Stt, ranges=True)
     393  
     394  
     395  class ESC[4;38;5;81mPt(ESC[4;38;5;149m_TypedConstant):
     396      """ELF program header types.  Type of Phdr.p_type."""
     397      prefix = 'PT_'
     398  class ESC[4;38;5;81mPtAARCH64(ESC[4;38;5;149mPt):
     399      """Supplemental PT_* constants for EM_AARCH64."""
     400  class ESC[4;38;5;81mPtARM(ESC[4;38;5;149mPt):
     401      """Supplemental PT_* constants for EM_ARM."""
     402  class ESC[4;38;5;81mPtHP(ESC[4;38;5;149mPt):
     403      """Supplemental PT_* constants for HP-U."""
     404  class ESC[4;38;5;81mPtIA_64(ESC[4;38;5;149mPt):
     405      """Supplemental PT_* constants for EM_IA_64."""
     406  class ESC[4;38;5;81mPtMIPS(ESC[4;38;5;149mPt):
     407      """Supplemental PT_* constants for EM_MIPS."""
     408  class ESC[4;38;5;81mPtPARISC(ESC[4;38;5;149mPt):
     409      """Supplemental PT_* constants for EM_PARISC."""
     410  class ESC[4;38;5;81mPtRISCV(ESC[4;38;5;149mPt):
     411      """Supplemental PT_* constants for EM_RISCV."""
     412  _register_elf_h(PtAARCH64, prefix='PT_AARCH64_', parent=Pt)
     413  _register_elf_h(PtARM, prefix='PT_ARM_', parent=Pt)
     414  _register_elf_h(PtHP, prefix='PT_HP_', parent=Pt)
     415  _register_elf_h(PtIA_64, prefix='PT_IA_64_', parent=Pt)
     416  _register_elf_h(PtMIPS, prefix='PT_MIPS_', parent=Pt)
     417  _register_elf_h(PtPARISC, prefix='PT_PARISC_', parent=Pt)
     418  _register_elf_h(PtRISCV, prefix='PT_RISCV_', parent=Pt)
     419  _register_elf_h(Pt, skip='PT_LOSUNW PT_HISUNW'.split(), ranges=True)
     420  
     421  class ESC[4;38;5;81mDt(ESC[4;38;5;149m_TypedConstant):
     422      """ELF dynamic segment tags.  Type of Dyn.d_val."""
     423      prefix = 'DT_'
     424  class ESC[4;38;5;81mDtAARCH64(ESC[4;38;5;149mDt):
     425      """Supplemental DT_* constants for EM_AARCH64."""
     426  class ESC[4;38;5;81mDtALPHA(ESC[4;38;5;149mDt):
     427      """Supplemental DT_* constants for EM_ALPHA."""
     428  class ESC[4;38;5;81mDtALTERA_NIOS2(ESC[4;38;5;149mDt):
     429      """Supplemental DT_* constants for EM_ALTERA_NIOS2."""
     430  class ESC[4;38;5;81mDtIA_64(ESC[4;38;5;149mDt):
     431      """Supplemental DT_* constants for EM_IA_64."""
     432  class ESC[4;38;5;81mDtMIPS(ESC[4;38;5;149mDt):
     433      """Supplemental DT_* constants for EM_MIPS."""
     434  class ESC[4;38;5;81mDtPPC(ESC[4;38;5;149mDt):
     435      """Supplemental DT_* constants for EM_PPC."""
     436  class ESC[4;38;5;81mDtPPC64(ESC[4;38;5;149mDt):
     437      """Supplemental DT_* constants for EM_PPC64."""
     438  class ESC[4;38;5;81mDtRISCV(ESC[4;38;5;149mDt):
     439      """Supplemental DT_* constants for EM_RISCV."""
     440  class ESC[4;38;5;81mDtSPARC(ESC[4;38;5;149mDt):
     441      """Supplemental DT_* constants for EM_SPARC."""
     442  _dt_skip = '''
     443  DT_ENCODING DT_PROCNUM
     444  DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
     445  DT_VALRNGLO DT_VALRNGHI DT_VALNUM
     446  DT_VERSIONTAGNUM DT_EXTRANUM
     447  DT_AARCH64_NUM
     448  DT_ALPHA_NUM
     449  DT_IA_64_NUM
     450  DT_MIPS_NUM
     451  DT_PPC_NUM
     452  DT_PPC64_NUM
     453  DT_SPARC_NUM
     454  '''.strip().split()
     455  _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
     456  _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
     457  _register_elf_h(DtALTERA_NIOS2, prefix='DT_NIOS2_', skip=_dt_skip, parent=Dt)
     458  _register_elf_h(DtIA_64, prefix='DT_IA_64_', skip=_dt_skip, parent=Dt)
     459  _register_elf_h(DtMIPS, prefix='DT_MIPS_', skip=_dt_skip, parent=Dt)
     460  _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
     461  _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
     462  _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
     463  _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
     464  _register_elf_h(Dt, skip=_dt_skip, ranges=True)
     465  del _dt_skip
     466  
     467  # Constant extraction is complete.
     468  del _register_elf_h
     469  del _elf_h
     470  
     471  class ESC[4;38;5;81mStInfo:
     472      """ELF symbol binding and type.  Type of the Sym.st_info field."""
     473      def __init__(self, arg0, arg1=None):
     474          if isinstance(arg0, int) and arg1 is None:
     475              self.bind = Stb(arg0 >> 4)
     476              self.type = Stt(arg0 & 15)
     477          else:
     478              self.bind = Stb(arg0)
     479              self.type = Stt(arg1)
     480  
     481      def value(self):
     482          """Returns the raw value for the bind/type combination."""
     483          return (self.bind.value() << 4) | (self.type.value())
     484  
     485  # Type in an ELF file.  Used for deserialization.
     486  _Layout = collections.namedtuple('_Layout', 'unpack size')
     487  
     488  def _define_layouts(baseclass: type, layout32: str, layout64: str,
     489                      types=None, fields32=None):
     490      """Assign variants dict to baseclass.
     491  
     492      The variants dict is indexed by (ElfClass, ElfData) pairs, and its
     493      values are _Layout instances.
     494  
     495      """
     496      struct32 = struct.Struct(layout32)
     497      struct64 = struct.Struct(layout64)
     498  
     499      # Check that the struct formats yield the right number of components.
     500      for s in (struct32, struct64):
     501          example = s.unpack(b' ' * s.size)
     502          if len(example) != len(baseclass._fields):
     503              raise ValueError('{!r} yields wrong field count: {} != {}'.format(
     504                  s.format, len(example),  len(baseclass._fields)))
     505  
     506      # Check that field names in types are correct.
     507      if types is None:
     508          types = ()
     509      for n in types:
     510          if n not in baseclass._fields:
     511              raise ValueError('{} does not have field {!r}'.format(
     512                  baseclass.__name__, n))
     513  
     514      if fields32 is not None \
     515         and set(fields32) != set(baseclass._fields):
     516          raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
     517              fields32, baseclass._fields))
     518  
     519      def unique_name(name, used_names = (set((baseclass.__name__,))
     520                                          | set(baseclass._fields)
     521                                          | {n.__name__
     522                                             for n in (types or {}).values()})):
     523          """Find a name that is not used for a class or field name."""
     524          candidate = name
     525          n = 0
     526          while candidate in used_names:
     527              n += 1
     528              candidate = '{}{}'.format(name, n)
     529          used_names.add(candidate)
     530          return candidate
     531  
     532      blob_name = unique_name('blob')
     533      struct_unpack_name = unique_name('struct_unpack')
     534      comps_name = unique_name('comps')
     535  
     536      layouts = {}
     537      for (bits, elfclass, layout, fields) in (
     538              (32, ElfClass.ELFCLASS32, layout32, fields32),
     539              (64, ElfClass.ELFCLASS64, layout64, None),
     540      ):
     541          for (elfdata, structprefix, funcsuffix) in (
     542                  (ElfData.ELFDATA2LSB, '<', 'LE'),
     543                  (ElfData.ELFDATA2MSB, '>', 'BE'),
     544          ):
     545              env = {
     546                  baseclass.__name__: baseclass,
     547                  struct_unpack_name: struct.unpack,
     548              }
     549  
     550              # Add the type converters.
     551              if types:
     552                  for cls in types.values():
     553                      env[cls.__name__] = cls
     554  
     555              funcname = ''.join(
     556                  ('unpack_', baseclass.__name__, str(bits), funcsuffix))
     557  
     558              code = '''
     559  def {funcname}({blob_name}):
     560  '''.format(funcname=funcname, blob_name=blob_name)
     561  
     562              indent = ' ' * 4
     563              unpack_call = '{}({!r}, {})'.format(
     564                  struct_unpack_name, structprefix + layout, blob_name)
     565              field_names = ', '.join(baseclass._fields)
     566              if types is None and fields is None:
     567                  code += '{}return {}({})\n'.format(
     568                      indent, baseclass.__name__, unpack_call)
     569              else:
     570                  # Destructuring tuple assignment.
     571                  if fields is None:
     572                      code += '{}{} = {}\n'.format(
     573                          indent, field_names, unpack_call)
     574                  else:
     575                      # Use custom field order.
     576                      code += '{}{} = {}\n'.format(
     577                          indent, ', '.join(fields), unpack_call)
     578  
     579                  # Perform the type conversions.
     580                  for n in baseclass._fields:
     581                      if n in types:
     582                          code += '{}{} = {}({})\n'.format(
     583                              indent, n, types[n].__name__, n)
     584                  # Create the named tuple.
     585                  code += '{}return {}({})\n'.format(
     586                      indent, baseclass.__name__, field_names)
     587  
     588              exec(code, env)
     589              layouts[(elfclass, elfdata)] = _Layout(
     590                  env[funcname], struct.calcsize(layout))
     591      baseclass.layouts = layouts
     592  
     593  
     594  # Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
     595  class ESC[4;38;5;81mIdent(ESC[4;38;5;149mcollectionsESC[4;38;5;149m.ESC[4;38;5;149mnamedtuple('Ident',
     596      'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
     597  
     598      def __new__(cls, *args):
     599          """Construct an object from a blob or its constituent fields."""
     600          if len(args) == 1:
     601              return cls.unpack(args[0])
     602          return cls.__base__.__new__(cls, *args)
     603  
     604      @staticmethod
     605      def unpack(blob: memoryview) -> 'Ident':
     606          """Parse raws data into a tuple."""
     607          ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
     608              ei_pad = struct.unpack('4s5B7s', blob)
     609          return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data),
     610                       ei_version, ei_osabi, ei_abiversion, ei_pad)
     611      size = 16
     612  
     613  # Corresponds to Elf32_Ehdr and Elf64_Ehdr.
     614  Ehdr = collections.namedtuple('Ehdr',
     615     'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
     616      + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
     617  _define_layouts(Ehdr,
     618                  layout32='16s2H5I6H',
     619                  layout64='16s2HI3QI6H',
     620                  types=dict(e_ident=Ident,
     621                             e_machine=Machine,
     622                             e_type=Et,
     623                             e_shstrndx=Shn))
     624  
     625  # Corresponds to Elf32_Phdr and Elf64_Pdhr.  Order follows the latter.
     626  Phdr = collections.namedtuple('Phdr',
     627      'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
     628  _define_layouts(Phdr,
     629                  layout32='8I',
     630                  fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
     631                            'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
     632                  layout64='2I6Q',
     633              types=dict(p_type=Pt, p_flags=Pf))
     634  
     635  
     636  # Corresponds to Elf32_Shdr and Elf64_Shdr.
     637  class ESC[4;38;5;81mShdr(ESC[4;38;5;149mcollectionsESC[4;38;5;149m.ESC[4;38;5;149mnamedtuple('Shdr',
     638      'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
     639      + ' sh_addralign sh_entsize')):
     640      def resolve(self, strtab: 'StringTable') -> 'Shdr':
     641          """Resolve sh_name using a string table."""
     642          return self.__class__(strtab.get(self[0]), *self[1:])
     643  _define_layouts(Shdr,
     644                  layout32='10I',
     645                  layout64='2I4Q2I2Q',
     646                  types=dict(sh_type=Sht,
     647                             sh_flags=Shf,
     648                             sh_link=Shn))
     649  
     650  # Corresponds to Elf32_Dyn and Elf64_Dyn.  The nesting through the
     651  # d_un union is skipped, and d_ptr is missing (its representation in
     652  # Python would be identical to d_val).
     653  Dyn = collections.namedtuple('Dyn', 'd_tag d_val')
     654  _define_layouts(Dyn,
     655                  layout32='2i',
     656                  layout64='2q',
     657                  types=dict(d_tag=Dt))
     658  
     659  # Corresponds to Elf32_Sym and Elf64_Sym.
     660  class ESC[4;38;5;81mSym(ESC[4;38;5;149mcollectionsESC[4;38;5;149m.ESC[4;38;5;149mnamedtuple('Sym',
     661      'st_name st_info st_other st_shndx st_value st_size')):
     662      def resolve(self, strtab: 'StringTable') -> 'Sym':
     663          """Resolve st_name using a string table."""
     664          return self.__class__(strtab.get(self[0]), *self[1:])
     665  _define_layouts(Sym,
     666                  layout32='3I2BH',
     667                  layout64='I2BH2Q',
     668                  fields32=('st_name', 'st_value', 'st_size', 'st_info',
     669                            'st_other', 'st_shndx'),
     670                  types=dict(st_shndx=Shn,
     671                             st_info=StInfo))
     672  
     673  # Corresponds to Elf32_Rel and Elf64_Rel.
     674  Rel = collections.namedtuple('Rel', 'r_offset r_info')
     675  _define_layouts(Rel,
     676                  layout32='2I',
     677                  layout64='2Q')
     678  
     679  # Corresponds to Elf32_Rel and Elf64_Rel.
     680  Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend')
     681  _define_layouts(Rela,
     682                  layout32='3I',
     683                  layout64='3Q')
     684  
     685  class ESC[4;38;5;81mStringTable:
     686      """ELF string table."""
     687      def __init__(self, blob):
     688          """Create a new string table backed by the data in the blob.
     689  
     690          blob: a memoryview-like object
     691  
     692          """
     693          self.blob = blob
     694  
     695      def get(self, index) -> bytes:
     696          """Returns the null-terminated byte string at the index."""
     697          blob = self.blob
     698          endindex = index
     699          while True:
     700              if blob[endindex] == 0:
     701                  return bytes(blob[index:endindex])
     702              endindex += 1
     703  
     704  class ESC[4;38;5;81mImage:
     705      """ELF image parser."""
     706      def __init__(self, image):
     707          """Create an ELF image from binary image data.
     708  
     709          image: a memoryview-like object that supports efficient range
     710          subscripting.
     711  
     712          """
     713          self.image = image
     714          ident = self.read(Ident, 0)
     715          classdata = (ident.ei_class, ident.ei_data)
     716          # Set self.Ehdr etc. to the subtypes with the right parsers.
     717          for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela):
     718              setattr(self, typ.__name__, typ.layouts.get(classdata, None))
     719  
     720          if self.Ehdr is not None:
     721              self.ehdr = self.read(self.Ehdr, 0)
     722              self._shdr_num = self._compute_shdr_num()
     723          else:
     724              self.ehdr = None
     725              self._shdr_num = 0
     726  
     727          self._section = {}
     728          self._stringtab = {}
     729  
     730          if self._shdr_num > 0:
     731              self._shdr_strtab = self._find_shdr_strtab()
     732          else:
     733              self._shdr_strtab = None
     734  
     735      @staticmethod
     736      def readfile(path: str) -> 'Image':
     737          """Reads the ELF file at the specified path."""
     738          with open(path, 'rb') as inp:
     739              return Image(memoryview(inp.read()))
     740  
     741      def _compute_shdr_num(self) -> int:
     742          """Computes the actual number of section headers."""
     743          shnum = self.ehdr.e_shnum
     744          if shnum == 0:
     745              if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
     746                  # No section headers.
     747                  return 0
     748              # Otherwise the extension mechanism is used (which may be
     749              # needed because e_shnum is just 16 bits).
     750              return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
     751          return shnum
     752  
     753      def _find_shdr_strtab(self) -> StringTable:
     754          """Finds the section header string table (maybe via extensions)."""
     755          shstrndx = self.ehdr.e_shstrndx
     756          if shstrndx == Shn.SHN_XINDEX:
     757              shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link
     758          return self._find_stringtab(shstrndx)
     759  
     760      def read(self, typ: type, offset:int ):
     761          """Reads an object at a specific offset.
     762  
     763          The type must have been enhanced using _define_variants.
     764  
     765          """
     766          return typ.unpack(self.image[offset: offset + typ.size])
     767  
     768      def phdrs(self) -> Phdr:
     769          """Generator iterating over the program headers."""
     770          if self.ehdr is None:
     771              return
     772          size = self.ehdr.e_phentsize
     773          if size != self.Phdr.size:
     774              raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
     775                               .format(size, self.Phdr.size))
     776  
     777          offset = self.ehdr.e_phoff
     778          for _ in range(self.ehdr.e_phnum):
     779              yield self.read(self.Phdr, offset)
     780              offset += size
     781  
     782      def shdrs(self, resolve: bool=True) -> Shdr:
     783          """Generator iterating over the section headers.
     784  
     785          If resolve, section names are automatically translated
     786          using the section header string table.
     787  
     788          """
     789          if self._shdr_num == 0:
     790              return
     791  
     792          size = self.ehdr.e_shentsize
     793          if size != self.Shdr.size:
     794              raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
     795                               .format(size, self.Shdr.size))
     796  
     797          offset = self.ehdr.e_shoff
     798          for _ in range(self._shdr_num):
     799              shdr = self.read(self.Shdr, offset)
     800              if resolve:
     801                  shdr = shdr.resolve(self._shdr_strtab)
     802              yield shdr
     803              offset += size
     804  
     805      def dynamic(self) -> Dyn:
     806          """Generator iterating over the dynamic segment."""
     807          for phdr in self.phdrs():
     808              if phdr.p_type == Pt.PT_DYNAMIC:
     809                  # Pick the first dynamic segment, like the loader.
     810                  if phdr.p_filesz == 0:
     811                      # Probably separated debuginfo.
     812                      return
     813                  offset = phdr.p_offset
     814                  end = offset + phdr.p_memsz
     815                  size = self.Dyn.size
     816                  while True:
     817                      next_offset = offset + size
     818                      if next_offset > end:
     819                          raise ValueError(
     820                              'Dynamic segment size {} is not a multiple of Dyn size {}'.format(
     821                                  phdr.p_memsz, size))
     822                      yield self.read(self.Dyn, offset)
     823                      if next_offset == end:
     824                          return
     825                      offset = next_offset
     826  
     827      def syms(self, shdr: Shdr, resolve: bool=True) -> Sym:
     828          """A generator iterating over a symbol table.
     829  
     830          If resolve, symbol names are automatically translated using
     831          the string table for the symbol table.
     832  
     833          """
     834          assert shdr.sh_type == Sht.SHT_SYMTAB
     835          size = shdr.sh_entsize
     836          if size != self.Sym.size:
     837              raise ValueError('Invalid symbol table entry size {}'.format(size))
     838          offset = shdr.sh_offset
     839          end = shdr.sh_offset + shdr.sh_size
     840          if resolve:
     841              strtab = self._find_stringtab(shdr.sh_link)
     842          while offset < end:
     843              sym = self.read(self.Sym, offset)
     844              if resolve:
     845                  sym = sym.resolve(strtab)
     846              yield sym
     847              offset += size
     848          if offset != end:
     849              raise ValueError('Symbol table is not a multiple of entry size')
     850  
     851      def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
     852          """Looks up a string in a string table identified by its link index."""
     853          try:
     854              strtab = self._stringtab[strtab_index]
     855          except KeyError:
     856              strtab = self._find_stringtab(strtab_index)
     857          return strtab.get(strtab_offset)
     858  
     859      def find_section(self, shndx: Shn) -> Shdr:
     860          """Returns the section header for the indexed section.
     861  
     862          The section name is not resolved.
     863          """
     864          try:
     865              return self._section[shndx]
     866          except KeyError:
     867              pass
     868          if shndx in Shn:
     869              raise ValueError('Reserved section index {}'.format(shndx))
     870          idx = shndx.value
     871          if idx < 0 or idx > self._shdr_num:
     872              raise ValueError('Section index {} out of range [0, {})'.format(
     873                  idx, self._shdr_num))
     874          shdr = self.read(
     875              self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
     876          self._section[shndx] = shdr
     877          return shdr
     878  
     879      def _find_stringtab(self, sh_link: int) -> StringTable:
     880          if sh_link in self._stringtab:
     881              return self._stringtab
     882          if sh_link < 0 or sh_link >= self._shdr_num:
     883              raise ValueError('Section index {} out of range [0, {})'.format(
     884                  sh_link, self._shdr_num))
     885          shdr = self.read(
     886              self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
     887          if shdr.sh_type != Sht.SHT_STRTAB:
     888              raise ValueError(
     889                  'Section {} is not a string table: {}'.format(
     890                      sh_link, shdr.sh_type))
     891          strtab = StringTable(
     892              self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
     893          # This could retrain essentially arbitrary amounts of data,
     894          # but caching string tables seems important for performance.
     895          self._stringtab[sh_link] = strtab
     896          return strtab
     897  
     898  def elf_hash(s):
     899      """Computes the ELF hash of the string."""
     900      acc = 0
     901      for ch in s:
     902          if type(ch) is not int:
     903              ch = ord(ch)
     904          acc = ((acc << 4) + ch) & 0xffffffff
     905          top = acc & 0xf0000000
     906          acc = (acc ^ (top >> 24)) & ~top
     907      return acc
     908  
     909  def gnu_hash(s):
     910      """Computes the GNU hash of the string."""
     911      h = 5381
     912      for ch in s:
     913          if type(ch) is not int:
     914              ch = ord(ch)
     915          h = (h * 33 + ch) & 0xffffffff
     916      return h
     917  
     918  __all__ = [name for name in dir() if name[0].isupper()]