(root)/
Python-3.12.0/
Tools/
build/
generate_levenshtein_examples.py
       1  """Generate 10,000 unique examples for the Levenshtein short-circuit tests."""
       2  
       3  import argparse
       4  from functools import lru_cache
       5  import json
       6  import os.path
       7  from random import choices, randrange
       8  
       9  
      10  # This should be in sync with Lib/traceback.py.  It's not importing those values
      11  # because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree
      12  # build of Python.
      13  _MOVE_COST = 2
      14  _CASE_COST = 1
      15  
      16  
      17  def _substitution_cost(ch_a, ch_b):
      18      if ch_a == ch_b:
      19          return 0
      20      if ch_a.lower() == ch_b.lower():
      21          return _CASE_COST
      22      return _MOVE_COST
      23  
      24  
      25  @lru_cache(None)
      26  def levenshtein(a, b):
      27      if not a or not b:
      28          return (len(a) + len(b)) * _MOVE_COST
      29      option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1])
      30      option2 = levenshtein(a[:-1], b) + _MOVE_COST
      31      option3 = levenshtein(a, b[:-1]) + _MOVE_COST
      32      return min(option1, option2, option3)
      33  
      34  
      35  def main():
      36      parser = argparse.ArgumentParser(description=__doc__)
      37      parser.add_argument('output_path', metavar='FILE', type=str)
      38      parser.add_argument('--overwrite', dest='overwrite', action='store_const',
      39                          const=True, default=False,
      40                          help='overwrite an existing test file')
      41  
      42      args = parser.parse_args()
      43      output_path = os.path.realpath(args.output_path)
      44      if not args.overwrite and os.path.isfile(output_path):
      45          print(f"{output_path} already exists, skipping regeneration.")
      46          print(
      47              "To force, add --overwrite to the invocation of this tool or"
      48              " delete the existing file."
      49          )
      50          return
      51  
      52      examples = set()
      53      # Create a lot of non-empty examples, which should end up with a Gauss-like
      54      # distribution for even costs (moves) and odd costs (case substitutions).
      55      while len(examples) < 9990:
      56          a = ''.join(choices("abcABC", k=randrange(1, 10)))
      57          b = ''.join(choices("abcABC", k=randrange(1, 10)))
      58          expected = levenshtein(a, b)
      59          examples.add((a, b, expected))
      60      # Create one empty case each for strings between 0 and 9 in length.
      61      for i in range(10):
      62          b = ''.join(choices("abcABC", k=i))
      63          expected = levenshtein("", b)
      64          examples.add(("", b, expected))
      65      with open(output_path, "w") as f:
      66          json.dump(sorted(examples), f, indent=2)
      67  
      68  
      69  if __name__ == "__main__":
      70      main()