1 #!/usr/bin/env python3
2 # Copyright (C) 1998, 1999 Tom Tromey
3 # Copyright (C) 2001 Red Hat Software
4 #
5 # SPDX-License-Identifier: GPL-2.0-or-later
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, see <http://www.gnu.org/licenses/>.
19
20 """
21 gen-casefold-txt.py - Generate test cases for casefolding from Unicode data.
22 See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
23 Usage:
24 I consider the output of this program to be unrestricted.
25 Use it as you will.
26 """
27
28 import sys
29 import argparse
30
31
32 def main(argv):
33 parser = argparse.ArgumentParser(
34 description="Generate test cases for casefolding from Unicode data"
35 )
36 parser.add_argument("UNICODE-VERSION")
37 parser.add_argument("CaseFolding.txt")
38 args = parser.parse_args(argv[1:])
39 version = getattr(args, "UNICODE-VERSION")
40 filename = getattr(args, "CaseFolding.txt")
41
42 print(
43 """\
44 # Test cases generated from Unicode {} data
45 # by gen-casefold-txt.py. Do not edit.
46 #
47 # Some special hand crafted tests
48 #
49 AaBbCc@@\taabbcc@@
50 #
51 # Now the automatic tests
52 #""".format(
53 version
54 )
55 )
56
57 # Names of fields in the CaseFolding table
58 CODE, STATUS, MAPPING = range(3)
59
60 with open(filename, encoding="utf-8") as fileobj:
61 for line in fileobj:
62 # strip comments and skip empty lines
63 line = line.split("#", 1)[0].strip()
64 if not line:
65 continue
66
67 fields = [f.strip() for f in line.split(";", 3)[:3]]
68 if len(fields) != 3:
69 raise SystemExit(
70 "Entry for %s has wrong number of fields (%d)"
71 % (fields[CODE], len(fields))
72 )
73
74 status = fields[STATUS]
75 # skip simple and Turkic mappings
76 if status in "ST":
77 continue
78
79 code = chr(int(fields[CODE], 16))
80 values = "".join([chr(int(v, 16)) for v in fields[MAPPING].split()])
81 print("{}\t{}".format(code, values))
82
83
84 if __name__ == "__main__":
85 sys.exit(main(sys.argv))