1  import os
       2  import re
       3  import shlex
       4  import shutil
       5  import subprocess
       6  import sys
       7  import sysconfig
       8  import unittest
       9  from test import support
      10  
      11  
      12  GDB_PROGRAM = shutil.which('gdb') or 'gdb'
      13  
      14  # Location of custom hooks file in a repository checkout.
      15  CHECKOUT_HOOK_PATH = os.path.join(os.path.dirname(sys.executable),
      16                                    'python-gdb.py')
      17  
      18  SAMPLE_SCRIPT = os.path.join(os.path.dirname(__file__), 'gdb_sample.py')
      19  BREAKPOINT_FN = 'builtin_id'
      20  
      21  PYTHONHASHSEED = '123'
      22  
      23  
      24  def clean_environment():
      25      # Remove PYTHON* environment variables such as PYTHONHOME
      26      return {name: value for name, value in os.environ.items()
      27              if not name.startswith('PYTHON')}
      28  
      29  
      30  # Temporary value until it's initialized by get_gdb_version() below
      31  GDB_VERSION = (0, 0)
      32  
      33  def run_gdb(*args, exitcode=0, check=True, **env_vars):
      34      """Runs gdb in --batch mode with the additional arguments given by *args.
      35  
      36      Returns its (stdout, stderr) decoded from utf-8 using the replace handler.
      37      """
      38      env = clean_environment()
      39      if env_vars:
      40          env.update(env_vars)
      41  
      42      cmd = [GDB_PROGRAM,
      43             # Batch mode: Exit after processing all the command files
      44             # specified with -x/--command
      45             '--batch',
      46              # -nx: Do not execute commands from any .gdbinit initialization
      47              # files (gh-66384)
      48             '-nx']
      49      if GDB_VERSION >= (7, 4):
      50          cmd.extend(('--init-eval-command',
      51                      f'add-auto-load-safe-path {CHECKOUT_HOOK_PATH}'))
      52      cmd.extend(args)
      53  
      54      proc = subprocess.run(
      55          cmd,
      56          # Redirect stdin to prevent gdb from messing with the terminal settings
      57          stdin=subprocess.PIPE,
      58          stdout=subprocess.PIPE,
      59          stderr=subprocess.PIPE,
      60          encoding="utf8", errors="backslashreplace",
      61          env=env)
      62  
      63      stdout = proc.stdout
      64      stderr = proc.stderr
      65      if check and proc.returncode != exitcode:
      66          cmd_text = shlex.join(cmd)
      67          raise Exception(f"{cmd_text} failed with exit code {proc.returncode}, "
      68                          f"expected exit code {exitcode}:\n"
      69                          f"stdout={stdout!r}\n"
      70                          f"stderr={stderr!r}")
      71  
      72      return (stdout, stderr)
      73  
      74  
      75  def get_gdb_version():
      76      try:
      77          stdout, stderr = run_gdb('--version')
      78      except OSError as exc:
      79          # This is what "no gdb" looks like.  There may, however, be other
      80          # errors that manifest this way too.
      81          raise unittest.SkipTest(f"Couldn't find gdb program on the path: {exc}")
      82  
      83      # Regex to parse:
      84      # 'GNU gdb (GDB; SUSE Linux Enterprise 12) 7.7\n' -> 7.7
      85      # 'GNU gdb (GDB) Fedora 7.9.1-17.fc22\n' -> 7.9
      86      # 'GNU gdb 6.1.1 [FreeBSD]\n' -> 6.1
      87      # 'GNU gdb (GDB) Fedora (7.5.1-37.fc18)\n' -> 7.5
      88      # 'HP gdb 6.7 for HP Itanium (32 or 64 bit) and target HP-UX 11iv2 and 11iv3.\n' -> 6.7
      89      match = re.search(r"^(?:GNU|HP) gdb.*?\b(\d+)\.(\d+)", stdout)
      90      if match is None:
      91          raise Exception("unable to parse gdb version: %r" % stdout)
      92      version_text = stdout
      93      major = int(match.group(1))
      94      minor = int(match.group(2))
      95      version = (major, minor)
      96      return (version_text, version)
      97  
      98  GDB_VERSION_TEXT, GDB_VERSION = get_gdb_version()
      99  if GDB_VERSION < (7, 0):
     100      raise unittest.SkipTest(
     101          f"gdb versions before 7.0 didn't support python embedding. "
     102          f"Saw gdb version {GDB_VERSION[0]}.{GDB_VERSION[1]}:\n"
     103          f"{GDB_VERSION_TEXT}")
     104  
     105  
     106  def check_usable_gdb():
     107      # Verify that "gdb" was built with the embedded Python support enabled and
     108      # verify that "gdb" can load our custom hooks, as OS security settings may
     109      # disallow this without a customized .gdbinit.
     110      stdout, stderr = run_gdb(
     111          '--eval-command=python import sys; print(sys.version_info)',
     112          '--args', sys.executable,
     113          check=False)
     114  
     115      if "auto-loading has been declined" in stderr:
     116          raise unittest.SkipTest(
     117              f"gdb security settings prevent use of custom hooks; "
     118              f"stderr: {stderr!r}")
     119  
     120      if not stdout:
     121          raise unittest.SkipTest(
     122              f"gdb not built with embedded python support; "
     123              f"stderr: {stderr!r}")
     124  
     125      if "major=2" in stdout:
     126          raise unittest.SkipTest("gdb built with Python 2")
     127  
     128  check_usable_gdb()
     129  
     130  
     131  # Control-flow enforcement technology
     132  def cet_protection():
     133      cflags = sysconfig.get_config_var('CFLAGS')
     134      if not cflags:
     135          return False
     136      flags = cflags.split()
     137      # True if "-mcet -fcf-protection" options are found, but false
     138      # if "-fcf-protection=none" or "-fcf-protection=return" is found.
     139      return (('-mcet' in flags)
     140              and any((flag.startswith('-fcf-protection')
     141                       and not flag.endswith(("=none", "=return")))
     142                      for flag in flags))
     143  CET_PROTECTION = cet_protection()
     144  
     145  
     146  def setup_module():
     147      if support.verbose:
     148          print(f"gdb version {GDB_VERSION[0]}.{GDB_VERSION[1]}:")
     149          for line in GDB_VERSION_TEXT.splitlines():
     150              print(" " * 4 + line)
     151          print(f"    path: {GDB_PROGRAM}")
     152          print()
     153  
     154  
     155  class ESC[4;38;5;81mDebuggerTests(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     156  
     157      """Test that the debugger can debug Python."""
     158  
     159      def get_stack_trace(self, source=None, script=None,
     160                          breakpoint=BREAKPOINT_FN,
     161                          cmds_after_breakpoint=None,
     162                          import_site=False,
     163                          ignore_stderr=False):
     164          '''
     165          Run 'python -c SOURCE' under gdb with a breakpoint.
     166  
     167          Support injecting commands after the breakpoint is reached
     168  
     169          Returns the stdout from gdb
     170  
     171          cmds_after_breakpoint: if provided, a list of strings: gdb commands
     172          '''
     173          # We use "set breakpoint pending yes" to avoid blocking with a:
     174          #   Function "foo" not defined.
     175          #   Make breakpoint pending on future shared library load? (y or [n])
     176          # error, which typically happens python is dynamically linked (the
     177          # breakpoints of interest are to be found in the shared library)
     178          # When this happens, we still get:
     179          #   Function "textiowrapper_write" not defined.
     180          # emitted to stderr each time, alas.
     181  
     182          # Initially I had "--eval-command=continue" here, but removed it to
     183          # avoid repeated print breakpoints when traversing hierarchical data
     184          # structures
     185  
     186          # Generate a list of commands in gdb's language:
     187          commands = [
     188              'set breakpoint pending yes',
     189              'break %s' % breakpoint,
     190  
     191              # The tests assume that the first frame of printed
     192              #  backtrace will not contain program counter,
     193              #  that is however not guaranteed by gdb
     194              #  therefore we need to use 'set print address off' to
     195              #  make sure the counter is not there. For example:
     196              # #0 in PyObject_Print ...
     197              #  is assumed, but sometimes this can be e.g.
     198              # #0 0x00003fffb7dd1798 in PyObject_Print ...
     199              'set print address off',
     200  
     201              'run',
     202          ]
     203  
     204          # GDB as of 7.4 onwards can distinguish between the
     205          # value of a variable at entry vs current value:
     206          #   http://sourceware.org/gdb/onlinedocs/gdb/Variables.html
     207          # which leads to the selftests failing with errors like this:
     208          #   AssertionError: 'v@entry=()' != '()'
     209          # Disable this:
     210          if GDB_VERSION >= (7, 4):
     211              commands += ['set print entry-values no']
     212  
     213          if cmds_after_breakpoint:
     214              if CET_PROTECTION:
     215                  # bpo-32962: When Python is compiled with -mcet
     216                  # -fcf-protection, function arguments are unusable before
     217                  # running the first instruction of the function entry point.
     218                  # The 'next' command makes the required first step.
     219                  commands += ['next']
     220              commands += cmds_after_breakpoint
     221          else:
     222              commands += ['backtrace']
     223  
     224          # print commands
     225  
     226          # Use "commands" to generate the arguments with which to invoke "gdb":
     227          args = ['--eval-command=%s' % cmd for cmd in commands]
     228          args += ["--args",
     229                   sys.executable]
     230          args.extend(subprocess._args_from_interpreter_flags())
     231  
     232          if not import_site:
     233              # -S suppresses the default 'import site'
     234              args += ["-S"]
     235  
     236          if source:
     237              args += ["-c", source]
     238          elif script:
     239              args += [script]
     240  
     241          # Use "args" to invoke gdb, capturing stdout, stderr:
     242          out, err = run_gdb(*args, PYTHONHASHSEED=PYTHONHASHSEED)
     243  
     244          if not ignore_stderr:
     245              for line in err.splitlines():
     246                  print(line, file=sys.stderr)
     247  
     248          # bpo-34007: Sometimes some versions of the shared libraries that
     249          # are part of the traceback are compiled in optimised mode and the
     250          # Program Counter (PC) is not present, not allowing gdb to walk the
     251          # frames back. When this happens, the Python bindings of gdb raise
     252          # an exception, making the test impossible to succeed.
     253          if "PC not saved" in err:
     254              raise unittest.SkipTest("gdb cannot walk the frame object"
     255                                      " because the Program Counter is"
     256                                      " not present")
     257  
     258          # bpo-40019: Skip the test if gdb failed to read debug information
     259          # because the Python binary is optimized.
     260          for pattern in (
     261              '(frame information optimized out)',
     262              'Unable to read information on python frame',
     263  
     264              # gh-91960: On Python built with "clang -Og", gdb gets
     265              # "frame=<optimized out>" for _PyEval_EvalFrameDefault() parameter
     266              '(unable to read python frame information)',
     267  
     268              # gh-104736: On Python built with "clang -Og" on ppc64le,
     269              # "py-bt" displays a truncated or not traceback, but "where"
     270              # logs this error message:
     271              'Backtrace stopped: frame did not save the PC',
     272  
     273              # gh-104736: When "bt" command displays something like:
     274              # "#1  0x0000000000000000 in ?? ()", the traceback is likely
     275              # truncated or wrong.
     276              ' ?? ()',
     277          ):
     278              if pattern in out:
     279                  raise unittest.SkipTest(f"{pattern!r} found in gdb output")
     280  
     281          return out
     282  
     283      def assertEndsWith(self, actual, exp_end):
     284          '''Ensure that the given "actual" string ends with "exp_end"'''
     285          self.assertTrue(actual.endswith(exp_end),
     286                          msg='%r did not end with %r' % (actual, exp_end))
     287  
     288      def assertMultilineMatches(self, actual, pattern):
     289          m = re.match(pattern, actual, re.DOTALL)
     290          if not m:
     291              self.fail(msg='%r did not match %r' % (actual, pattern))