(root)/
glibc-2.38/
conform/
linknamespace.py
       1  #!/usr/bin/python3
       2  # Check that use of symbols declared in a given header does not result
       3  # in any symbols being brought in that are not reserved with external
       4  # linkage for the given standard.
       5  # Copyright (C) 2014-2023 Free Software Foundation, Inc.
       6  # This file is part of the GNU C Library.
       7  #
       8  # The GNU C Library is free software; you can redistribute it and/or
       9  # modify it under the terms of the GNU Lesser General Public
      10  # License as published by the Free Software Foundation; either
      11  # version 2.1 of the License, or (at your option) any later version.
      12  #
      13  # The GNU C Library is distributed in the hope that it will be useful,
      14  # but WITHOUT ANY WARRANTY; without even the implied warranty of
      15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16  # Lesser General Public License for more details.
      17  #
      18  # You should have received a copy of the GNU Lesser General Public
      19  # License along with the GNU C Library; if not, see
      20  # <https://www.gnu.org/licenses/>.
      21  
      22  import argparse
      23  from collections import defaultdict
      24  import os.path
      25  import re
      26  import subprocess
      27  import sys
      28  import tempfile
      29  
      30  import glibcconform
      31  
      32  # The following whitelisted symbols are also allowed for now.
      33  #
      34  # * Bug 17576: stdin, stdout, stderr only reserved with external
      35  # linkage when stdio.h included (and possibly not then), not
      36  # generally.
      37  #
      38  # * Bug 18442: re_syntax_options wrongly brought in by regcomp and
      39  # used by re_comp.
      40  #
      41  WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'}
      42  
      43  
      44  def list_syms(filename):
      45      """Return information about GLOBAL and WEAK symbols listed in readelf
      46      -s output."""
      47      ret = []
      48      cur_file = filename
      49      with open(filename, 'r') as syms_file:
      50          for line in syms_file:
      51              line = line.rstrip()
      52              if line.startswith('File: '):
      53                  cur_file = line[len('File: '):]
      54                  cur_file = cur_file.split('/')[-1]
      55                  continue
      56              # Architecture-specific st_other bits appear inside [] and
      57              # disrupt the format of readelf output.
      58              line = re.sub(r'\[.*?\]', '', line)
      59              fields = line.split()
      60              if len(fields) < 8:
      61                  continue
      62              bind = fields[4]
      63              ndx = fields[6]
      64              sym = fields[7]
      65              if bind not in ('GLOBAL', 'WEAK'):
      66                  continue
      67              if not re.fullmatch('[A-Za-z0-9_]+', sym):
      68                  continue
      69              ret.append((cur_file, sym, bind, ndx != 'UND'))
      70      return ret
      71  
      72  
      73  def main():
      74      """The main entry point."""
      75      parser = argparse.ArgumentParser(description='Check link-time namespace.')
      76      parser.add_argument('--header', metavar='HEADER',
      77                          help='name of header')
      78      parser.add_argument('--standard', metavar='STD',
      79                          help='standard to use when processing header')
      80      parser.add_argument('--cc', metavar='CC',
      81                          help='C compiler to use')
      82      parser.add_argument('--flags', metavar='CFLAGS',
      83                          help='Compiler flags to use with CC')
      84      parser.add_argument('--stdsyms', metavar='FILE',
      85                          help='File with list of standard symbols')
      86      parser.add_argument('--libsyms', metavar='FILE',
      87                          help='File with symbol information from libraries')
      88      parser.add_argument('--readelf', metavar='READELF',
      89                          help='readelf program to use')
      90      args = parser.parse_args()
      91  
      92      # Load the list of symbols that are OK.
      93      stdsyms = set()
      94      with open(args.stdsyms, 'r') as stdsyms_file:
      95          for line in stdsyms_file:
      96              stdsyms.add(line.rstrip())
      97      stdsyms |= WHITELIST
      98  
      99      # Load information about GLOBAL and WEAK symbols defined or used
     100      # in the standard libraries.
     101      # Symbols from a given object, except for weak defined symbols.
     102      seen_syms = defaultdict(list)
     103      # Strong undefined symbols from a given object.
     104      strong_undef_syms = defaultdict(list)
     105      # Objects defining a given symbol (strongly or weakly).
     106      sym_objs = defaultdict(list)
     107      for file, name, bind, defined in list_syms(args.libsyms):
     108          if defined:
     109              sym_objs[name].append(file)
     110          if bind == 'GLOBAL' or not defined:
     111              seen_syms[file].append(name)
     112          if bind == 'GLOBAL' and not defined:
     113              strong_undef_syms[file].append(name)
     114  
     115      # Determine what ELF-level symbols are brought in by use of C-level
     116      # symbols declared in the given header.
     117      #
     118      # The rules followed are heuristic and so may produce false
     119      # positives and false negatives.
     120      #
     121      # * All undefined symbols are considered of signficance, but it is
     122      # possible that (a) any standard library definition is weak, so
     123      # can be overridden by the user's definition, and (b) the symbol
     124      # is only used conditionally and not if the program is limited to
     125      # standard functionality.
     126      #
     127      # * If a symbol reference is only brought in by the user using a
     128      # data symbol rather than a function from the standard library,
     129      # this will not be detected.
     130      #
     131      # * If a symbol reference is only brought in by crt*.o or libgcc,
     132      # this will not be detected.
     133      #
     134      # * If a symbol reference is only brought in through __builtin_foo
     135      # in a standard macro being compiled to call foo, this will not be
     136      # detected.
     137      #
     138      # * Header inclusions should be compiled several times with
     139      # different options such as -O2, -D_FORTIFY_SOURCE and
     140      # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined
     141      # from such a compilation; this is not yet implemented.
     142      #
     143      # * This script finds symbols referenced through use of macros on
     144      # the basis that if a macro calls an internal function, that
     145      # function must also be declared in the header.  However, the
     146      # header might also declare implementation-namespace functions
     147      # that are not called by any standard macro in the header,
     148      # resulting in false positives for any symbols brought in only
     149      # through use of those implementation-namespace functions.
     150      #
     151      # * Namespace issues can apply for dynamic linking as well as
     152      # static linking, when a call is from one shared library to
     153      # another or uses a PLT entry for a call within a shared library;
     154      # such issues are only detected by this script if the same
     155      # namespace issue applies for static linking.
     156      seen_where = {}
     157      files_seen = set()
     158      all_undef = {}
     159      current_undef = {}
     160      compiler = '%s %s' % (args.cc, args.flags)
     161      c_syms = glibcconform.list_exported_functions(compiler, args.standard,
     162                                                    args.header)
     163      with tempfile.TemporaryDirectory() as temp_dir:
     164          cincfile_name = os.path.join(temp_dir, 'undef.c')
     165          cincfile_o_name = os.path.join(temp_dir, 'undef.o')
     166          cincfile_sym_name = os.path.join(temp_dir, 'undef.sym')
     167          cincfile_text = ('#include <%s>\n%s\n'
     168                           % (args.header,
     169                              '\n'.join('void *__glibc_test_%s = (void *) &%s;'
     170                                        % (sym, sym) for sym in sorted(c_syms))))
     171          with open(cincfile_name, 'w') as cincfile:
     172              cincfile.write(cincfile_text)
     173          cmd = ('%s %s -D_ISOMAC %s -c %s -o %s'
     174                 % (args.cc, args.flags, glibcconform.CFLAGS[args.standard],
     175                    cincfile_name, cincfile_o_name))
     176          subprocess.check_call(cmd, shell=True)
     177          cmd = ('LC_ALL=C %s -W -s %s > %s'
     178                 % (args.readelf, cincfile_o_name, cincfile_sym_name))
     179          subprocess.check_call(cmd, shell=True)
     180          for file, name, bind, defined in list_syms(cincfile_sym_name):
     181              if bind == 'GLOBAL' and not defined:
     182                  sym_text = '[initial] %s' % name
     183                  seen_where[name] = sym_text
     184                  all_undef[name] = sym_text
     185                  current_undef[name] = sym_text
     186  
     187      while current_undef:
     188          new_undef = {}
     189          for sym, cu_sym in sorted(current_undef.items()):
     190              for file in sym_objs[sym]:
     191                  if file in files_seen:
     192                      continue
     193                  files_seen.add(file)
     194                  for ssym in seen_syms[file]:
     195                      if ssym not in seen_where:
     196                          seen_where[ssym] = ('%s -> [%s] %s'
     197                                              % (cu_sym, file, ssym))
     198                  for usym in strong_undef_syms[file]:
     199                      if usym not in all_undef:
     200                          usym_text = '%s -> [%s] %s' % (cu_sym, file, usym)
     201                          all_undef[usym] = usym_text
     202                          new_undef[usym] = usym_text
     203          current_undef = new_undef
     204  
     205      ret = 0
     206      for sym in sorted(seen_where):
     207          if sym.startswith('_'):
     208              continue
     209          if sym in stdsyms:
     210              continue
     211          print(seen_where[sym])
     212          ret = 1
     213      sys.exit(ret)
     214  
     215  
     216  if __name__ == '__main__':
     217      main()