1 #
2 # genmap_korean.py: Korean Codecs Map Generator
3 #
4 # Original Author: Hye-Shik Chang <perky@FreeBSD.org>
5 # Modified Author: Dong-hee Na <donghee.na92@gmail.com>
6 #
7 import os
8
9 from genmap_support import *
10
11
12 KSX1001_C1 = (0x21, 0x7e)
13 KSX1001_C2 = (0x21, 0x7e)
14 UHCL1_C1 = (0x81, 0xa0)
15 UHCL1_C2 = (0x41, 0xfe)
16 UHCL2_C1 = (0xa1, 0xfe)
17 UHCL2_C2 = (0x41, 0xa0)
18 MAPPINGS_CP949 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT'
19
20
21 def main():
22 mapfile = open_mapping_file('python-mappings/CP949.TXT', MAPPINGS_CP949)
23 print("Loading Mapping File...")
24 decmap = loadmap(mapfile)
25 uhcdecmap, ksx1001decmap, cp949encmap = {}, {}, {}
26 for c1, c2map in decmap.items():
27 for c2, code in c2map.items():
28 if c1 >= 0xa1 and c2 >= 0xa1:
29 ksx1001decmap.setdefault(c1 & 0x7f, {})
30 ksx1001decmap[c1 & 0x7f][c2 & 0x7f] = c2map[c2]
31 cp949encmap.setdefault(code >> 8, {})
32 cp949encmap[code >> 8][code & 0xFF] = (c1 << 8 | c2) & 0x7f7f
33 else:
34 # uhc
35 uhcdecmap.setdefault(c1, {})
36 uhcdecmap[c1][c2] = c2map[c2]
37 cp949encmap.setdefault(code >> 8, {}) # MSB set
38 cp949encmap[code >> 8][code & 0xFF] = (c1 << 8 | c2)
39
40 with open('mappings_kr.h', 'w') as fp:
41 print_autogen(fp, os.path.basename(__file__))
42
43 print("Generating KS X 1001 decode map...")
44 writer = DecodeMapWriter(fp, "ksx1001", ksx1001decmap)
45 writer.update_decode_map(KSX1001_C1, KSX1001_C2)
46 writer.generate()
47
48 print("Generating UHC decode map...")
49 writer = DecodeMapWriter(fp, "cp949ext", uhcdecmap)
50 writer.update_decode_map(UHCL1_C1, UHCL1_C2)
51 writer.update_decode_map(UHCL2_C1, UHCL2_C2)
52 writer.generate()
53
54 print("Generating CP949 (includes KS X 1001) encode map...")
55 writer = EncodeMapWriter(fp, "cp949", cp949encmap)
56 writer.generate()
57
58 print("Done!")
59
60
61 if __name__ == '__main__':
62 main()