(root)/
gcc-13.2.0/
contrib/
gcc-changelog/
git_commit.py
       1  #!/usr/bin/env python3
       2  
       3  # Copyright (C) 2020-2023 Free Software Foundation, Inc.
       4  #
       5  # This file is part of GCC.
       6  #
       7  # GCC is free software; you can redistribute it and/or modify it under
       8  # the terms of the GNU General Public License as published by the Free
       9  # Software Foundation; either version 3, or (at your option) any later
      10  # version.
      11  #
      12  # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13  # WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14  # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15  # for more details.
      16  #
      17  # You should have received a copy of the GNU General Public License
      18  # along with GCC; see the file COPYING3.  If not see
      19  # <http://www.gnu.org/licenses/>.  */
      20  
      21  import difflib
      22  import os
      23  import re
      24  import sys
      25  from collections import defaultdict
      26  
      27  default_changelog_locations = {
      28      'c++tools',
      29      'config',
      30      'contrib',
      31      'contrib/header-tools',
      32      'contrib/reghunt',
      33      'contrib/regression',
      34      'fixincludes',
      35      'gcc/ada',
      36      'gcc/analyzer',
      37      'gcc/brig',
      38      'gcc/c',
      39      'gcc/c-family',
      40      'gcc',
      41      'gcc/cp',
      42      'gcc/d',
      43      'gcc/fortran',
      44      'gcc/go',
      45      'gcc/jit',
      46      'gcc/lto',
      47      'gcc/m2',
      48      'gcc/objc',
      49      'gcc/objcp',
      50      'gcc/po',
      51      'gcc/rust',
      52      'gcc/testsuite',
      53      'gnattools',
      54      'gotools',
      55      'include',
      56      'intl',
      57      'libada',
      58      'libatomic',
      59      'libbacktrace',
      60      'libcc1',
      61      'libcody',
      62      'libcpp',
      63      'libcpp/po',
      64      'libdecnumber',
      65      'libffi',
      66      'libgcc',
      67      'libgcc/config/avr/libf7',
      68      'libgcc/config/libbid',
      69      'libgfortran',
      70      'libgm2',
      71      'libgomp',
      72      'libhsail-rt',
      73      'libiberty',
      74      'libitm',
      75      'libobjc',
      76      'libphobos',
      77      'libquadmath',
      78      'libsanitizer',
      79      'libssp',
      80      'libstdc++-v3',
      81      'libvtv',
      82      'lto-plugin',
      83      'maintainer-scripts',
      84      'zlib'}
      85  
      86  bug_components = {
      87      'ada',
      88      'analyzer',
      89      'boehm-gc',
      90      'bootstrap',
      91      'c',
      92      'c++',
      93      'd',
      94      'debug',
      95      'demangler',
      96      'driver',
      97      'fastjar',
      98      'fortran',
      99      'gcov-profile',
     100      'go',
     101      'hsa',
     102      'inline-asm',
     103      'ipa',
     104      'java',
     105      'jit',
     106      'libbacktrace',
     107      'libf2c',
     108      'libffi',
     109      'libfortran',
     110      'libgcc',
     111      'libgcj',
     112      'libgomp',
     113      'libitm',
     114      'libobjc',
     115      'libquadmath',
     116      'libstdc++',
     117      'lto',
     118      'middle-end',
     119      'modula2',
     120      'objc',
     121      'objc++',
     122      'other',
     123      'pch',
     124      'pending',
     125      'plugins',
     126      'preprocessor',
     127      'regression',
     128      'rtl-optimization',
     129      'rust',
     130      'sanitizer',
     131      'spam',
     132      'target',
     133      'testsuite',
     134      'translation',
     135      'tree-optimization',
     136      'web'}
     137  
     138  ignored_prefixes = {
     139      'gcc/d/dmd/',
     140      'gcc/go/gofrontend/',
     141      'gcc/testsuite/gdc.test/',
     142      'gcc/testsuite/go.test/test/',
     143      'libffi/',
     144      'libgo/',
     145      'libphobos/libdruntime/',
     146      'libphobos/src/',
     147      'libsanitizer/',
     148      }
     149  
     150  wildcard_prefixes = {
     151      'gcc/testsuite/',
     152      'libstdc++-v3/doc/html/',
     153      'libstdc++-v3/testsuite/'
     154      }
     155  
     156  misc_files = {
     157      'gcc/DATESTAMP',
     158      'gcc/BASE-VER',
     159      'gcc/DEV-PHASE'
     160      }
     161  
     162  author_line_regex = \
     163          re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
     164  additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
     165  changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
     166  subject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z0-9+-]+)/(?P<pr>\d{4,7})')
     167  subject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
     168  pr_regex = re.compile(r'\tPR (?P<component>[a-z0-9+-]+\/)?(?P<pr>[0-9]+)$')
     169  dr_regex = re.compile(r'\tDR ([0-9]+)$')
     170  star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
     171  end_of_location_regex = re.compile(r'[\[<(:]')
     172  item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
     173  item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
     174  revert_regex = re.compile(r'This reverts commit (?P<hash>[0-9a-f]+)\.$')
     175  cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
     176  
     177  LINE_LIMIT = 100
     178  TAB_WIDTH = 8
     179  CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
     180  
     181  REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
     182                     'acked-by: ', 'tested-by: ', 'reported-by: ',
     183                     'suggested-by: ')
     184  DATE_FORMAT = '%Y-%m-%d'
     185  
     186  
     187  def decode_path(path):
     188      # When core.quotepath is true (default value), utf8 chars are encoded like:
     189      # "b/ko\304\215ka.txt"
     190      #
     191      # The upstream bug is fixed:
     192      # https://github.com/gitpython-developers/GitPython/issues/1099
     193      #
     194      # but we still need a workaround for older versions of the library.
     195      # Please take a look at the explanation of the transformation:
     196      # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
     197  
     198      if path.startswith('"') and path.endswith('"'):
     199          return (path.strip('"').encode('utf8').decode('unicode-escape')
     200                  .encode('latin-1').decode('utf8'))
     201      else:
     202          return path
     203  
     204  
     205  class ESC[4;38;5;81mError:
     206      def __init__(self, message, line=None, details=None):
     207          self.message = message
     208          self.line = line
     209          self.details = details
     210  
     211      def __repr__(self):
     212          s = self.message
     213          if self.line:
     214              s += ': "%s"' % self.line
     215          return s
     216  
     217  
     218  class ESC[4;38;5;81mChangeLogEntry:
     219      def __init__(self, folder, authors, prs):
     220          self.folder = folder
     221          # The 'list.copy()' function is not available before Python 3.3
     222          self.author_lines = list(authors)
     223          self.initial_prs = list(prs)
     224          self.prs = list(prs)
     225          self.lines = []
     226          self.files = []
     227          self.file_patterns = []
     228          self.parentheses_stack = []
     229  
     230      def parse_file_names(self):
     231          # Whether the content currently processed is between a star prefix the
     232          # end of the file list: a colon or an open paren.
     233          in_location = False
     234  
     235          for line in self.lines:
     236              # If this line matches the star prefix, start the location
     237              # processing on the information that follows the star.
     238              # Note that we need to skip macro names that can be in form of:
     239              #
     240              # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
     241              # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
     242              # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
     243              #
     244              m = star_prefix_regex.match(line)
     245              if m and len(m.group('spaces')) == 1:
     246                  in_location = True
     247                  line = m.group('content')
     248  
     249              if in_location:
     250                  # Strip everything that is not a filename in "line":
     251                  # entities "(NAME)", cases "<PATTERN>", conditions
     252                  # "[COND]", entry text (the colon, if present, and
     253                  # anything that follows it).
     254                  m = end_of_location_regex.search(line)
     255                  if m:
     256                      line = line[:m.start()]
     257                      in_location = False
     258  
     259                  # At this point, all that's left is a list of filenames
     260                  # separated by commas and whitespaces.
     261                  for file in line.split(','):
     262                      file = file.strip()
     263                      if file:
     264                          if file.endswith('*'):
     265                              self.file_patterns.append(file[:-1])
     266                          else:
     267                              self.files.append(file)
     268  
     269      @property
     270      def datetime(self):
     271          for author in self.author_lines:
     272              if author[1]:
     273                  return author[1]
     274          return None
     275  
     276      @property
     277      def authors(self):
     278          return [author_line[0] for author_line in self.author_lines]
     279  
     280      @property
     281      def is_empty(self):
     282          return not self.lines and self.prs == self.initial_prs
     283  
     284      def contains_author(self, author):
     285          for author_lines in self.author_lines:
     286              if author_lines[0] == author:
     287                  return True
     288          return False
     289  
     290  
     291  class ESC[4;38;5;81mGitInfo:
     292      def __init__(self, hexsha, date, author, lines, modified_files):
     293          self.hexsha = hexsha
     294          self.date = date
     295          self.author = author
     296          self.lines = lines
     297          self.modified_files = modified_files
     298  
     299  
     300  class ESC[4;38;5;81mGitCommit:
     301      def __init__(self, info, commit_to_info_hook=None, ref_name=None):
     302          self.original_info = info
     303          self.info = info
     304          self.message = None
     305          self.changes = None
     306          self.changelog_entries = []
     307          self.errors = []
     308          self.warnings = []
     309          self.top_level_authors = []
     310          self.co_authors = []
     311          self.top_level_prs = []
     312          self.subject_prs = set()
     313          self.cherry_pick_commit = None
     314          self.revert_commit = None
     315          self.commit_to_info_hook = commit_to_info_hook
     316          self.init_changelog_locations(ref_name)
     317  
     318          # Skip Update copyright years commits
     319          if self.info.lines and self.info.lines[0] == 'Update copyright years.':
     320              return
     321  
     322          if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
     323              self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
     324  
     325          # Identify first if the commit is a Revert commit
     326          for line in self.info.lines:
     327              m = revert_regex.fullmatch(line)
     328              if m:
     329                  self.revert_commit = m.group('hash')
     330                  break
     331          if self.revert_commit:
     332              self.info = self.commit_to_info_hook(self.revert_commit)
     333  
     334          # The following happens for get_email.py:
     335          if not self.info:
     336              return
     337  
     338          self.check_commit_email()
     339  
     340          # Extract PR numbers form the subject line
     341          # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
     342          if self.info.lines and not self.revert_commit:
     343              self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
     344              for m in subject_pr_regex.finditer(info.lines[0]):
     345                  if not m.group('component') in bug_components:
     346                      self.errors.append(Error('invalid PR component in subject', info.lines[0]))
     347                  self.subject_prs.add(m.group('pr'))
     348  
     349          # Allow complete deletion of ChangeLog files in a commit
     350          project_files = [f for f in self.info.modified_files
     351                           if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
     352                           or f[0] in misc_files]
     353          ignored_files = [f for f in self.info.modified_files
     354                           if self.in_ignored_location(f[0])]
     355          if len(project_files) == len(self.info.modified_files):
     356              # All modified files are only MISC files
     357              return
     358          elif project_files:
     359              err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
     360                    'should be done separately from normal commits\n' \
     361                    '(note: ChangeLog entries will be automatically ' \
     362                    'added by a cron job)'
     363              self.errors.append(Error(err))
     364              return
     365  
     366          all_are_ignored = (len(project_files) + len(ignored_files)
     367                             == len(self.info.modified_files))
     368          self.parse_lines(all_are_ignored)
     369          if self.changes:
     370              self.parse_changelog()
     371              self.parse_file_names()
     372              self.check_for_empty_description()
     373              self.check_for_broken_parentheses()
     374              self.deduce_changelog_locations()
     375              self.check_file_patterns()
     376              self.check_line_start()
     377              if not self.errors:
     378                  self.check_mentioned_files()
     379                  self.check_for_correct_changelog()
     380          if self.subject_prs:
     381              self.errors.append(Error('PR %s in subject but not in changelog' %
     382                                       ', '.join(self.subject_prs), self.info.lines[0]))
     383  
     384      @property
     385      def success(self):
     386          return not self.errors
     387  
     388      @property
     389      def new_files(self):
     390          return [x[0] for x in self.info.modified_files if x[1] == 'A']
     391  
     392      @classmethod
     393      def is_changelog_filename(cls, path, allow_suffix=False):
     394          basename = os.path.basename(path)
     395          if basename == 'ChangeLog':
     396              return True
     397          elif allow_suffix and basename.startswith('ChangeLog'):
     398              return True
     399          else:
     400              return False
     401  
     402      def find_changelog_location(self, name):
     403          if name.startswith('\t'):
     404              name = name[1:]
     405          if name.endswith(':'):
     406              name = name[:-1]
     407          if name.endswith('/'):
     408              name = name[:-1]
     409          return name if name in self.changelog_locations else None
     410  
     411      @classmethod
     412      def format_git_author(cls, author):
     413          assert '<' in author
     414          return author.replace('<', ' <')
     415  
     416      @classmethod
     417      def parse_git_name_status(cls, string):
     418          modified_files = []
     419          for entry in string.split('\n'):
     420              parts = entry.split('\t')
     421              t = parts[0]
     422              if t == 'A' or t == 'D' or t == 'M':
     423                  modified_files.append((parts[1], t))
     424              elif t.startswith('R'):
     425                  modified_files.append((parts[1], 'D'))
     426                  modified_files.append((parts[2], 'A'))
     427          return modified_files
     428  
     429      def init_changelog_locations(self, ref_name):
     430          self.changelog_locations = list(default_changelog_locations)
     431          if ref_name:
     432              version = sys.maxsize
     433              if 'releases/gcc-' in ref_name:
     434                  version = int(ref_name.split('-')[-1])
     435              if version >= 12:
     436                  # HSA and BRIG were removed in GCC 12
     437                  self.changelog_locations.remove('gcc/brig')
     438                  self.changelog_locations.remove('libhsail-rt')
     439  
     440      def parse_lines(self, all_are_ignored):
     441          body = self.info.lines
     442  
     443          for i, b in enumerate(body):
     444              if not b:
     445                  continue
     446              if (changelog_regex.match(b) or self.find_changelog_location(b)
     447                      or star_prefix_regex.match(b) or pr_regex.match(b)
     448                      or dr_regex.match(b) or author_line_regex.match(b)
     449                      or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
     450                  self.changes = body[i:]
     451                  return
     452          if not all_are_ignored:
     453              self.errors.append(Error('cannot find a ChangeLog location in '
     454                                       'message'))
     455  
     456      def parse_changelog(self):
     457          last_entry = None
     458          will_deduce = False
     459          for line in self.changes:
     460              if not line:
     461                  if last_entry and will_deduce:
     462                      last_entry = None
     463                  continue
     464              if line != line.rstrip():
     465                  self.errors.append(Error('trailing whitespace', line))
     466              if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
     467                  # support long filenames
     468                  if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
     469                      self.errors.append(Error('line exceeds %d character limit'
     470                                               % LINE_LIMIT, line))
     471              m = changelog_regex.match(line)
     472              if m:
     473                  last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
     474                                              self.top_level_authors,
     475                                              self.top_level_prs)
     476                  self.changelog_entries.append(last_entry)
     477              elif self.find_changelog_location(line):
     478                  last_entry = ChangeLogEntry(self.find_changelog_location(line),
     479                                              self.top_level_authors,
     480                                              self.top_level_prs)
     481                  self.changelog_entries.append(last_entry)
     482              else:
     483                  author_tuple = None
     484                  pr_line = None
     485                  if author_line_regex.match(line):
     486                      m = author_line_regex.match(line)
     487                      author_tuple = (m.group('name'), m.group('datetime'))
     488                  elif additional_author_regex.match(line):
     489                      m = additional_author_regex.match(line)
     490                      if len(m.group('spaces')) != 4:
     491                          msg = 'additional author must be indented with '\
     492                                'one tab and four spaces'
     493                          self.errors.append(Error(msg, line))
     494                      else:
     495                          author_tuple = (m.group('name'), None)
     496                  elif pr_regex.match(line):
     497                      m = pr_regex.match(line)
     498                      component = m.group('component')
     499                      pr = m.group('pr')
     500                      if not component:
     501                          self.errors.append(Error('missing PR component', line))
     502                          continue
     503                      elif not component[:-1] in bug_components:
     504                          self.errors.append(Error('invalid PR component', line))
     505                          continue
     506                      else:
     507                          pr_line = line.lstrip()
     508                      if pr in self.subject_prs:
     509                          self.subject_prs.remove(pr)
     510                  elif dr_regex.match(line):
     511                      pr_line = line.lstrip()
     512  
     513                  lowered_line = line.lower()
     514                  if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
     515                      name = line[len(CO_AUTHORED_BY_PREFIX):]
     516                      author = self.format_git_author(name)
     517                      self.co_authors.append(author)
     518                      continue
     519                  elif lowered_line.startswith(REVIEW_PREFIXES):
     520                      continue
     521                  else:
     522                      m = cherry_pick_regex.search(line)
     523                      if m:
     524                          commit = m.group('hash')
     525                          if self.cherry_pick_commit:
     526                              msg = 'multiple cherry pick lines'
     527                              self.errors.append(Error(msg, line))
     528                          else:
     529                              self.cherry_pick_commit = commit
     530                          continue
     531  
     532                  # ChangeLog name will be deduced later
     533                  if not last_entry:
     534                      if author_tuple:
     535                          self.top_level_authors.append(author_tuple)
     536                          continue
     537                      elif pr_line:
     538                          # append to top_level_prs only when we haven't met
     539                          # a ChangeLog entry
     540                          if (pr_line not in self.top_level_prs
     541                                  and not self.changelog_entries):
     542                              self.top_level_prs.append(pr_line)
     543                          continue
     544                      else:
     545                          last_entry = ChangeLogEntry(None,
     546                                                      self.top_level_authors,
     547                                                      self.top_level_prs)
     548                          self.changelog_entries.append(last_entry)
     549                          will_deduce = True
     550                  elif author_tuple:
     551                      if not last_entry.contains_author(author_tuple[0]):
     552                          last_entry.author_lines.append(author_tuple)
     553                      continue
     554  
     555                  if not line.startswith('\t'):
     556                      err = Error('line should start with a tab', line)
     557                      self.errors.append(err)
     558                  elif pr_line:
     559                      last_entry.prs.append(pr_line)
     560                  else:
     561                      m = star_prefix_regex.match(line)
     562                      if m:
     563                          if (len(m.group('spaces')) != 1 and
     564                                  not last_entry.parentheses_stack):
     565                              msg = 'one space should follow asterisk'
     566                              self.errors.append(Error(msg, line))
     567                          else:
     568                              content = m.group('content')
     569                              parts = content.split(':')
     570                              if len(parts) > 1:
     571                                  for needle in ('()', '[]', '<>'):
     572                                      if ' ' + needle in parts[0]:
     573                                          msg = f'empty group "{needle}" found'
     574                                          self.errors.append(Error(msg, line))
     575                              last_entry.lines.append(line)
     576                              self.process_parentheses(last_entry, line)
     577                      else:
     578                          if last_entry.is_empty:
     579                              msg = 'first line should start with a tab, ' \
     580                                    'an asterisk and a space'
     581                              self.errors.append(Error(msg, line))
     582                          else:
     583                              last_entry.lines.append(line)
     584                              self.process_parentheses(last_entry, line)
     585  
     586      def process_parentheses(self, last_entry, line):
     587          for c in line:
     588              if c == '(':
     589                  last_entry.parentheses_stack.append(line)
     590              elif c == ')':
     591                  if not last_entry.parentheses_stack:
     592                      msg = 'bad wrapping of parenthesis'
     593                      self.errors.append(Error(msg, line))
     594                  else:
     595                      del last_entry.parentheses_stack[-1]
     596  
     597      def parse_file_names(self):
     598          for entry in self.changelog_entries:
     599              entry.parse_file_names()
     600  
     601      def check_file_patterns(self):
     602          for entry in self.changelog_entries:
     603              for pattern in entry.file_patterns:
     604                  name = os.path.join(entry.folder, pattern)
     605                  if not [name.startswith(pr) for pr in wildcard_prefixes]:
     606                      msg = 'unsupported wildcard prefix'
     607                      self.errors.append(Error(msg, name))
     608  
     609      def check_for_empty_description(self):
     610          for entry in self.changelog_entries:
     611              for i, line in enumerate(entry.lines):
     612                  if (item_empty_regex.match(line) and
     613                      (i == len(entry.lines) - 1
     614                       or not entry.lines[i+1].strip()
     615                       or item_parenthesis_regex.match(entry.lines[i+1]))):
     616                      msg = 'missing description of a change'
     617                      self.errors.append(Error(msg, line))
     618  
     619      def check_for_broken_parentheses(self):
     620          for entry in self.changelog_entries:
     621              if entry.parentheses_stack:
     622                  msg = 'bad parentheses wrapping'
     623                  self.errors.append(Error(msg, entry.parentheses_stack[-1]))
     624  
     625      def check_line_start(self):
     626          for entry in self.changelog_entries:
     627              for line in entry.lines:
     628                  if line.startswith('\t '):
     629                      msg = 'extra space after tab'
     630                      self.errors.append(Error(msg, line))
     631  
     632      def get_file_changelog_location(self, changelog_file):
     633          for file in self.info.modified_files:
     634              if file[0] == changelog_file:
     635                  # root ChangeLog file
     636                  return ''
     637              index = file[0].find('/' + changelog_file)
     638              if index != -1:
     639                  return file[0][:index]
     640          return None
     641  
     642      def deduce_changelog_locations(self):
     643          for entry in self.changelog_entries:
     644              if entry.folder is None:
     645                  changelog = None
     646                  for file in entry.files:
     647                      location = self.get_file_changelog_location(file)
     648                      if (location == ''
     649                         or (location and location in self.changelog_locations)):
     650                          if changelog and changelog != location:
     651                              msg = 'could not deduce ChangeLog file, ' \
     652                                    'not unique location'
     653                              self.errors.append(Error(msg))
     654                              return
     655                          changelog = location
     656                  if changelog is not None:
     657                      entry.folder = changelog
     658                  else:
     659                      msg = 'could not deduce ChangeLog file'
     660                      self.errors.append(Error(msg))
     661  
     662      @classmethod
     663      def in_ignored_location(cls, path):
     664          for ignored in ignored_prefixes:
     665              if path.startswith(ignored):
     666                  return True
     667          return False
     668  
     669      def get_changelog_by_path(self, path):
     670          components = path.split('/')
     671          while components:
     672              if '/'.join(components) in self.changelog_locations:
     673                  break
     674              components = components[:-1]
     675          return '/'.join(components)
     676  
     677      def check_mentioned_files(self):
     678          folder_count = len([x.folder for x in self.changelog_entries])
     679          assert folder_count == len(self.changelog_entries)
     680  
     681          mentioned_files = set()
     682          mentioned_patterns = []
     683          used_patterns = set()
     684          for entry in self.changelog_entries:
     685              if not entry.files and not entry.file_patterns:
     686                  msg = 'no files mentioned for ChangeLog in directory'
     687                  self.errors.append(Error(msg, entry.folder))
     688              assert not entry.folder.endswith('/')
     689              for file in entry.files:
     690                  if not self.is_changelog_filename(file):
     691                      item = os.path.join(entry.folder, file)
     692                      if item in mentioned_files:
     693                          msg = 'same file specified multiple times'
     694                          self.errors.append(Error(msg, file))
     695                      else:
     696                          mentioned_files.add(item)
     697              for pattern in entry.file_patterns:
     698                  mentioned_patterns.append(os.path.join(entry.folder, pattern))
     699  
     700          cand = [x[0] for x in self.info.modified_files
     701                  if not self.is_changelog_filename(x[0])]
     702          changed_files = set(cand)
     703          for file in sorted(mentioned_files - changed_files):
     704              msg = 'unchanged file mentioned in a ChangeLog'
     705              candidates = difflib.get_close_matches(file, changed_files, 1)
     706              details = None
     707              if candidates:
     708                  msg += f' (did you mean "{candidates[0]}"?)'
     709                  details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
     710              self.errors.append(Error(msg, file, details))
     711          auto_add_warnings = defaultdict(list)
     712          for file in sorted(changed_files - mentioned_files):
     713              if not self.in_ignored_location(file):
     714                  if file in self.new_files:
     715                      changelog_location = self.get_changelog_by_path(file)
     716                      # Python2: we cannot use next(filter(...))
     717                      entries = filter(lambda x: x.folder == changelog_location,
     718                                       self.changelog_entries)
     719                      entries = list(entries)
     720                      entry = entries[0] if entries else None
     721                      if not entry:
     722                          prs = self.top_level_prs
     723                          if not prs:
     724                              # if all ChangeLog entries have identical PRs
     725                              # then use them
     726                              if self.changelog_entries:
     727                                  prs = self.changelog_entries[0].prs
     728                                  for entry in self.changelog_entries:
     729                                      if entry.prs != prs:
     730                                          prs = []
     731                                          break
     732                          entry = ChangeLogEntry(changelog_location,
     733                                                 self.top_level_authors,
     734                                                 prs)
     735                          self.changelog_entries.append(entry)
     736                      # strip prefix of the file
     737                      assert file.startswith(entry.folder)
     738                      # do not allow auto-addition of New files
     739                      # for the top-level folder
     740                      if entry.folder:
     741                          file = file[len(entry.folder):].lstrip('/')
     742                          entry.lines.append('\t* %s: New file.' % file)
     743                          entry.files.append(file)
     744                          auto_add_warnings[entry.folder].append(file)
     745                      else:
     746                          msg = 'new file in the top-level folder not mentioned in a ChangeLog'
     747                          self.errors.append(Error(msg, file))
     748                  else:
     749                      used_pattern = [p for p in mentioned_patterns
     750                                      if file.startswith(p)]
     751                      used_pattern = used_pattern[0] if used_pattern else None
     752                      if used_pattern:
     753                          used_patterns.add(used_pattern)
     754                      else:
     755                          msg = 'changed file not mentioned in a ChangeLog'
     756                          self.errors.append(Error(msg, file))
     757  
     758          for pattern in mentioned_patterns:
     759              if pattern not in used_patterns:
     760                  error = "pattern doesn't match any changed files"
     761                  self.errors.append(Error(error, pattern))
     762          for entry, val in auto_add_warnings.items():
     763              if len(val) == 1:
     764                  self.warnings.append(f"Auto-added new file '{entry}/{val[0]}'")
     765              else:
     766                  self.warnings.append(f"Auto-added {len(val)} new files in '{entry}'")
     767  
     768      def check_for_correct_changelog(self):
     769          for entry in self.changelog_entries:
     770              for file in entry.files:
     771                  full_path = os.path.join(entry.folder, file)
     772                  changelog_location = self.get_changelog_by_path(full_path)
     773                  if changelog_location != entry.folder:
     774                      msg = 'wrong ChangeLog location "%s", should be "%s"'
     775                      err = Error(msg % (entry.folder, changelog_location), file)
     776                      self.errors.append(err)
     777  
     778      @classmethod
     779      def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
     780          output = ''
     781          for i, author in enumerate(authors):
     782              if i == 0:
     783                  output += '%s%s  %s\n' % (prefix, timestamp, author)
     784              else:
     785                  output += '%s\t    %s\n' % (prefix, author)
     786          output += '\n'
     787          return output
     788  
     789      def to_changelog_entries(self, use_commit_ts=False):
     790          current_timestamp = self.info.date.strftime(DATE_FORMAT)
     791          for entry in self.changelog_entries:
     792              output = ''
     793              timestamp = entry.datetime
     794              if self.revert_commit:
     795                  timestamp = current_timestamp
     796                  orig_date = self.original_info.date
     797                  current_timestamp = orig_date.strftime(DATE_FORMAT)
     798              elif self.cherry_pick_commit:
     799                  info = self.commit_to_info_hook(self.cherry_pick_commit)
     800                  # it can happen that it is a cherry-pick for a different
     801                  # repository
     802                  if info:
     803                      timestamp = info.date.strftime(DATE_FORMAT)
     804                  else:
     805                      timestamp = current_timestamp
     806              elif not timestamp or use_commit_ts:
     807                  timestamp = current_timestamp
     808              authors = entry.authors if entry.authors else [self.info.author]
     809              # add Co-Authored-By authors to all ChangeLog entries
     810              for author in self.co_authors:
     811                  if author not in authors:
     812                      authors.append(author)
     813  
     814              if self.cherry_pick_commit or self.revert_commit:
     815                  original_author = self.original_info.author
     816                  output += self.format_authors_in_changelog([original_author],
     817                                                             current_timestamp)
     818                  if self.revert_commit:
     819                      output += '\tRevert:\n'
     820                  else:
     821                      output += '\tBackported from master:\n'
     822                  output += self.format_authors_in_changelog(authors,
     823                                                             timestamp, '\t')
     824              else:
     825                  output += self.format_authors_in_changelog(authors, timestamp)
     826              for pr in entry.prs:
     827                  output += '\t%s\n' % pr
     828              for line in entry.lines:
     829                  output += line + '\n'
     830              yield (entry.folder, output.rstrip())
     831  
     832      def print_output(self):
     833          for entry, output in self.to_changelog_entries():
     834              print('------ %s/ChangeLog ------ ' % entry)
     835              print(output)
     836  
     837      def print_errors(self):
     838          print('Errors:')
     839          for error in self.errors:
     840              print(error)
     841  
     842      def print_warnings(self):
     843          if self.warnings:
     844              print('Warnings:')
     845              for warning in self.warnings:
     846                  print(warning)
     847  
     848      def check_commit_email(self):
     849          # Parse 'Martin Liska  <mliska@suse.cz>'
     850          email = self.info.author.split(' ')[-1].strip('<>')
     851  
     852          # Verify that all characters are ASCII
     853          # TODO: Python 3.7 provides a nicer function: isascii
     854          if len(email) != len(email.encode()):
     855              self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))