(root)/
gcc-13.2.0/
contrib/
mklog.py
       1  #!/usr/bin/env python3
       2  
       3  # Copyright (C) 2020-2023 Free Software Foundation, Inc.
       4  #
       5  # This file is part of GCC.
       6  #
       7  # GCC is free software; you can redistribute it and/or modify
       8  # it under the terms of the GNU General Public License as published by
       9  # the Free Software Foundation; either version 3, or (at your option)
      10  # any later version.
      11  #
      12  # GCC is distributed in the hope that it will be useful,
      13  # but WITHOUT ANY WARRANTY; without even the implied warranty of
      14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15  # GNU General Public License for more details.
      16  #
      17  # You should have received a copy of the GNU General Public License
      18  # along with GCC; see the file COPYING.  If not, write to
      19  # the Free Software Foundation, 51 Franklin Street, Fifth Floor,
      20  # Boston, MA 02110-1301, USA.
      21  
      22  # This script parses a .diff file generated with 'diff -up' or 'diff -cp'
      23  # and adds a skeleton ChangeLog file to the file. It does not try to be
      24  # too smart when parsing function names, but it produces a reasonable
      25  # approximation.
      26  #
      27  # Author: Martin Liska <mliska@suse.cz>
      28  
      29  import argparse
      30  import datetime
      31  import json
      32  import os
      33  import re
      34  import subprocess
      35  import sys
      36  from itertools import takewhile
      37  
      38  import requests
      39  
      40  from unidiff import PatchSet
      41  
      42  LINE_LIMIT = 100
      43  TAB_WIDTH = 8
      44  CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
      45  
      46  pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
      47  prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
      48  dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
      49  dg_regex = re.compile(r'{\s+dg-(error|warning)')
      50  pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
      51  identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
      52  comment_regex = re.compile(r'^\/\*')
      53  struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
      54                            r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
      55  macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
      56  super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
      57  fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
      58  template_and_param_regex = re.compile(r'<[^<>]*>')
      59  md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
      60  bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
      61                 'include_fields=summary,component'
      62  
      63  function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
      64  
      65  # NB: Makefile.in isn't listed as it's not always generated.
      66  generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
      67  
      68  help_message = """\
      69  Generate ChangeLog template for PATCH.
      70  PATCH must be generated using diff(1)'s -up or -cp options
      71  (or their equivalent in git).
      72  """
      73  
      74  script_folder = os.path.realpath(__file__)
      75  root = os.path.dirname(os.path.dirname(script_folder))
      76  
      77  
      78  def find_changelog(path):
      79      folder = os.path.split(path)[0]
      80      while True:
      81          if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
      82              return folder
      83          folder = os.path.dirname(folder)
      84          if folder == '':
      85              return folder
      86      raise AssertionError()
      87  
      88  
      89  def extract_function_name(line):
      90      if comment_regex.match(line):
      91          return None
      92      m = struct_regex.search(line)
      93      if m:
      94          # Struct declaration
      95          return m.group(1) + ' ' + m.group(3)
      96      m = macro_regex.search(line)
      97      if m:
      98          # Macro definition
      99          return m.group(2)
     100      m = super_macro_regex.search(line)
     101      if m:
     102          # Supermacro
     103          return m.group(1)
     104      m = fn_regex.search(line)
     105      if m:
     106          # Discard template and function parameters.
     107          fn = m.group(1)
     108          fn = re.sub(template_and_param_regex, '', fn)
     109          return fn.rstrip()
     110      return None
     111  
     112  
     113  def try_add_function(functions, line):
     114      fn = extract_function_name(line)
     115      if fn and fn not in functions:
     116          functions.append(fn)
     117      return bool(fn)
     118  
     119  
     120  def sort_changelog_files(changed_file):
     121      return (changed_file.is_added_file, changed_file.is_removed_file)
     122  
     123  
     124  def get_pr_titles(prs):
     125      output = []
     126      for idx, pr in enumerate(prs):
     127          pr_id = pr.split('/')[-1]
     128          r = requests.get(bugzilla_url % pr_id)
     129          bugs = r.json()['bugs']
     130          if len(bugs) == 1:
     131              prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
     132              out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
     133              if out not in output:
     134                  output.append(out)
     135      if output:
     136          output.append('')
     137      return '\n'.join(output)
     138  
     139  
     140  def append_changelog_line(out, relative_path, text):
     141      line = f'\t* {relative_path}:'
     142      if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
     143          out += f'{line} {text}\n'
     144      else:
     145          out += f'{line}\n'
     146          out += f'\t{text}\n'
     147      return out
     148  
     149  
     150  def get_rel_path_if_prefixed(path, folder):
     151      if path.startswith(folder):
     152          return path[len(folder):].lstrip('/')
     153      else:
     154          return path
     155  
     156  
     157  def generate_changelog(data, no_functions=False, fill_pr_titles=False,
     158                         additional_prs=None):
     159      global prs
     160      prs = []
     161  
     162      changelogs = {}
     163      changelog_list = []
     164      out = ''
     165      diff = PatchSet(data)
     166  
     167      if additional_prs:
     168          for apr in additional_prs:
     169              if not apr.startswith('PR ') and '/' in apr:
     170                  apr = 'PR ' + apr
     171              if apr not in prs:
     172                  prs.append(apr)
     173      for file in diff:
     174          # skip files that can't be parsed
     175          if file.path == '/dev/null':
     176              continue
     177          changelog = find_changelog(file.path)
     178          if changelog not in changelogs:
     179              changelogs[changelog] = []
     180              changelog_list.append(changelog)
     181          changelogs[changelog].append(file)
     182  
     183          # Extract PR entries from newly added tests
     184          if 'testsuite' in file.path and file.is_added_file:
     185              # Only search first ten lines as later lines may
     186              # contains commented code which a note that it
     187              # has not been tested due to a certain PR or DR.
     188              this_file_prs = []
     189              hunks = list(file)
     190              if hunks:
     191                  for line in hunks[0][0:10]:
     192                      m = pr_regex.search(line.value)
     193                      if m:
     194                          pr = m.group('pr')
     195                          if pr not in prs:
     196                              prs.append(pr)
     197                              this_file_prs.append(pr.split('/')[-1])
     198                      else:
     199                          m = dr_regex.search(line.value)
     200                          if m:
     201                              dr = m.group('dr')
     202                              if dr not in prs:
     203                                  prs.append(dr)
     204                                  this_file_prs.append(dr.split('/')[-1])
     205                          elif dg_regex.search(line.value):
     206                              # Found dg-warning/dg-error line
     207                              break
     208  
     209              # PR number in the file name
     210              fname = os.path.basename(file.path)
     211              m = pr_filename_regex.search(fname)
     212              if m:
     213                  pr = m.group('pr')
     214                  pr2 = 'PR ' + pr
     215                  if pr not in this_file_prs and pr2 not in prs:
     216                      prs.append(pr2)
     217  
     218      if fill_pr_titles:
     219          out += get_pr_titles(prs)
     220  
     221      # print list of PR entries before ChangeLog entries
     222      if prs:
     223          if not out:
     224              out += '\n'
     225          for pr in prs:
     226              out += '\t%s\n' % pr
     227          out += '\n'
     228  
     229      # sort ChangeLog so that 'testsuite' is at the end
     230      for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
     231          files = changelogs[changelog]
     232          out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
     233          out += '\n'
     234          # new and deleted files should be at the end
     235          for file in sorted(files, key=sort_changelog_files):
     236              assert file.path.startswith(changelog)
     237              in_tests = 'testsuite' in changelog or 'testsuite' in file.path
     238              relative_path = get_rel_path_if_prefixed(file.path, changelog)
     239              functions = []
     240              if file.is_added_file:
     241                  msg = 'New test.' if in_tests else 'New file.'
     242                  out = append_changelog_line(out, relative_path, msg)
     243              elif file.is_removed_file:
     244                  out = append_changelog_line(out, relative_path, 'Removed.')
     245              elif hasattr(file, 'is_rename') and file.is_rename:
     246                  # A file can be theoretically moved to a location that
     247                  # belongs to a different ChangeLog.  Let user fix it.
     248                  #
     249                  # Since unidiff 0.7.0, path.file == path.target_file[2:],
     250                  # it used to be path.source_file[2:]
     251                  relative_path = get_rel_path_if_prefixed(file.source_file[2:],
     252                                                           changelog)
     253                  out = append_changelog_line(out, relative_path, 'Moved to...')
     254                  new_path = get_rel_path_if_prefixed(file.target_file[2:],
     255                                                      changelog)
     256                  out += f'\t* {new_path}: ...here.\n'
     257              elif os.path.basename(file.path) in generated_files:
     258                  out += '\t* %s: Regenerate.\n' % (relative_path)
     259                  append_changelog_line(out, relative_path, 'Regenerate.')
     260              else:
     261                  if not no_functions:
     262                      for hunk in file:
     263                          # Do not add function names for testsuite files
     264                          extension = os.path.splitext(relative_path)[1]
     265                          if not in_tests and extension in function_extensions:
     266                              last_fn = None
     267                              modified_visited = False
     268                              success = False
     269                              for line in hunk:
     270                                  m = identifier_regex.match(line.value)
     271                                  if line.is_added or line.is_removed:
     272                                      # special-case definition in .md files
     273                                      m2 = md_def_regex.match(line.value)
     274                                      if extension == '.md' and m2:
     275                                          fn = m2.group(1)
     276                                          if fn not in functions:
     277                                              functions.append(fn)
     278                                              last_fn = None
     279                                              success = True
     280  
     281                                      if not line.value.strip():
     282                                          continue
     283                                      modified_visited = True
     284                                      if m and try_add_function(functions,
     285                                                                m.group(1)):
     286                                          last_fn = None
     287                                          success = True
     288                                  elif line.is_context:
     289                                      if last_fn and modified_visited:
     290                                          try_add_function(functions, last_fn)
     291                                          last_fn = None
     292                                          modified_visited = False
     293                                          success = True
     294                                      elif m:
     295                                          last_fn = m.group(1)
     296                                          modified_visited = False
     297                              if not success:
     298                                  try_add_function(functions,
     299                                                   hunk.section_header)
     300                  if functions:
     301                      out += '\t* %s (%s):\n' % (relative_path, functions[0])
     302                      for fn in functions[1:]:
     303                          out += '\t(%s):\n' % fn
     304                  else:
     305                      out += '\t* %s:\n' % relative_path
     306          out += '\n'
     307      return out
     308  
     309  
     310  def update_copyright(data):
     311      current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
     312      username = subprocess.check_output('git config user.name', shell=True,
     313                                         encoding='utf8').strip()
     314      email = subprocess.check_output('git config user.email', shell=True,
     315                                      encoding='utf8').strip()
     316  
     317      changelogs = set()
     318      diff = PatchSet(data)
     319  
     320      for file in diff:
     321          changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
     322          if changelog not in changelogs:
     323              changelogs.add(changelog)
     324              with open(changelog) as f:
     325                  content = f.read()
     326              with open(changelog, 'w+') as f:
     327                  f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
     328                  f.write('\tUpdate copyright years.\n\n')
     329                  f.write(content)
     330  
     331  
     332  def skip_line_in_changelog(line):
     333      if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'):
     334          return False
     335      return True
     336  
     337  
     338  if __name__ == '__main__':
     339      extra_args = os.getenv('GCC_MKLOG_ARGS')
     340      if extra_args:
     341          sys.argv += json.loads(extra_args)
     342  
     343      parser = argparse.ArgumentParser(description=help_message)
     344      parser.add_argument('input', nargs='?',
     345                          help='Patch file (or missing, read standard input)')
     346      parser.add_argument('-b', '--pr-numbers', action='store',
     347                          type=lambda arg: arg.split(','), nargs='?',
     348                          help='Add the specified PRs (comma separated)')
     349      parser.add_argument('-s', '--no-functions', action='store_true',
     350                          help='Do not generate function names in ChangeLogs')
     351      parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
     352                          help='Download title of mentioned PRs')
     353      parser.add_argument('-d', '--directory',
     354                          help='Root directory where to search for ChangeLog '
     355                          'files')
     356      parser.add_argument('-c', '--changelog',
     357                          help='Append the ChangeLog to a git commit message '
     358                               'file')
     359      parser.add_argument('--update-copyright', action='store_true',
     360                          help='Update copyright in ChangeLog files')
     361      args = parser.parse_args()
     362      if args.input == '-':
     363          args.input = None
     364      if args.directory:
     365          root = args.directory
     366  
     367      data = open(args.input, newline='\n') if args.input else sys.stdin
     368      if args.update_copyright:
     369          update_copyright(data)
     370      else:
     371          output = generate_changelog(data, args.no_functions,
     372                                      args.fill_up_bug_titles, args.pr_numbers)
     373          if args.changelog:
     374              lines = open(args.changelog).read().split('\n')
     375              start = list(takewhile(skip_line_in_changelog, lines))
     376              end = lines[len(start):]
     377              with open(args.changelog, 'w') as f:
     378                  if not start or not start[0]:
     379                      if len(prs) == 1:
     380                          # initial commit subject line 'component: [PRnnnnn]'
     381                          m = prnum_regex.match(prs[0])
     382                          if m:
     383                              title = f'{m.group("comp")}: [PR{m.group("num")}]'
     384                              start.insert(0, title)
     385                  if start:
     386                      # append empty line
     387                      if start[-1] != '':
     388                          start.append('')
     389                  else:
     390                      # append 2 empty lines
     391                      start = 2 * ['']
     392                  f.write('\n'.join(start))
     393                  f.write('\n')
     394                  f.write(output)
     395                  f.write('\n'.join(end))
     396          else:
     397              print(output, end='')