1 #
2 # genmap_support.py: Multibyte Codec Map Generator
3 #
4 # Original Author: Hye-Shik Chang <perky@FreeBSD.org>
5 # Modified Author: Dong-hee Na <donghee.na92@gmail.com>
6 #
7
8
9 class ESC[4;38;5;81mBufferedFiller:
10 def __init__(self, column=78):
11 self.column = column
12 self.buffered = []
13 self.cline = []
14 self.clen = 0
15 self.count = 0
16
17 def write(self, *data):
18 for s in data:
19 if len(s) > self.column:
20 raise ValueError("token is too long")
21 if len(s) + self.clen > self.column:
22 self.flush()
23 self.clen += len(s)
24 self.cline.append(s)
25 self.count += 1
26
27 def flush(self):
28 if not self.cline:
29 return
30 self.buffered.append(''.join(self.cline))
31 self.clen = 0
32 del self.cline[:]
33
34 def printout(self, fp):
35 self.flush()
36 for l in self.buffered:
37 fp.write(f'{l}\n')
38 del self.buffered[:]
39
40 def __len__(self):
41 return self.count
42
43
44 class ESC[4;38;5;81mDecodeMapWriter:
45 filler_class = BufferedFiller
46
47 def __init__(self, fp, prefix, decode_map):
48 self.fp = fp
49 self.prefix = prefix
50 self.decode_map = decode_map
51 self.filler = self.filler_class()
52
53 def update_decode_map(self, c1range, c2range, onlymask=(), wide=0):
54 c2values = range(c2range[0], c2range[1] + 1)
55
56 for c1 in range(c1range[0], c1range[1] + 1):
57 if c1 not in self.decode_map or (onlymask and c1 not in onlymask):
58 continue
59 c2map = self.decode_map[c1]
60 rc2values = [n for n in c2values if n in c2map]
61 if not rc2values:
62 continue
63
64 c2map[self.prefix] = True
65 c2map['min'] = rc2values[0]
66 c2map['max'] = rc2values[-1]
67 c2map['midx'] = len(self.filler)
68
69 for v in range(rc2values[0], rc2values[-1] + 1):
70 if v in c2map:
71 self.filler.write('%d,' % c2map[v])
72 else:
73 self.filler.write('U,')
74
75 def generate(self, wide=False):
76 if not wide:
77 self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
78 else:
79 self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
80
81 self.filler.printout(self.fp)
82 self.fp.write("};\n\n")
83
84 if not wide:
85 self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n")
86 else:
87 self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n")
88
89 for i in range(256):
90 if i in self.decode_map and self.prefix in self.decode_map[i]:
91 m = self.decode_map
92 prefix = self.prefix
93 else:
94 self.filler.write("{", "0,", "0,", "0", "},")
95 continue
96
97 self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'],
98 ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
99 self.filler.printout(self.fp)
100 self.fp.write("};\n\n")
101
102
103 class ESC[4;38;5;81mEncodeMapWriter:
104 filler_class = BufferedFiller
105 elemtype = 'DBCHAR'
106 indextype = 'struct unim_index'
107
108 def __init__(self, fp, prefix, encode_map):
109 self.fp = fp
110 self.prefix = prefix
111 self.encode_map = encode_map
112 self.filler = self.filler_class()
113
114 def generate(self):
115 self.buildmap()
116 self.printmap()
117
118 def buildmap(self):
119 for c1 in range(0, 256):
120 if c1 not in self.encode_map:
121 continue
122 c2map = self.encode_map[c1]
123 rc2values = [k for k in c2map.keys()]
124 rc2values.sort()
125 if not rc2values:
126 continue
127
128 c2map[self.prefix] = True
129 c2map['min'] = rc2values[0]
130 c2map['max'] = rc2values[-1]
131 c2map['midx'] = len(self.filler)
132
133 for v in range(rc2values[0], rc2values[-1] + 1):
134 if v not in c2map:
135 self.write_nochar()
136 elif isinstance(c2map[v], int):
137 self.write_char(c2map[v])
138 elif isinstance(c2map[v], tuple):
139 self.write_multic(c2map[v])
140 else:
141 raise ValueError
142
143 def write_nochar(self):
144 self.filler.write('N,')
145
146 def write_multic(self, point):
147 self.filler.write('M,')
148
149 def write_char(self, point):
150 self.filler.write(str(point) + ',')
151
152 def printmap(self):
153 self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
154 self.filler.printout(self.fp)
155 self.fp.write("};\n\n")
156 self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")
157
158 for i in range(256):
159 if i in self.encode_map and self.prefix in self.encode_map[i]:
160 self.filler.write("{", "__%s_encmap" % self.prefix, "+",
161 "%d" % self.encode_map[i]['midx'], ",",
162 "%d," % self.encode_map[i]['min'],
163 "%d" % self.encode_map[i]['max'], "},")
164 else:
165 self.filler.write("{", "0,", "0,", "0", "},")
166 continue
167 self.filler.printout(self.fp)
168 self.fp.write("};\n\n")
169
170
171 def open_mapping_file(path, source):
172 try:
173 f = open(path)
174 except IOError:
175 raise SystemExit(f'{source} is needed')
176 return f
177
178
179 def print_autogen(fo, source):
180 fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')
181
182
183 def loadmap(fo, natcol=0, unicol=1, sbcs=0):
184 print("Loading from", fo)
185 fo.seek(0, 0)
186 decmap = {}
187 for line in fo:
188 line = line.split('#', 1)[0].strip()
189 if not line or len(line.split()) < 2:
190 continue
191
192 row = [eval(e) for e in line.split()]
193 loc, uni = row[natcol], row[unicol]
194 if loc >= 0x100 or sbcs:
195 decmap.setdefault((loc >> 8), {})
196 decmap[(loc >> 8)][(loc & 0xff)] = uni
197
198 return decmap