1 #!/usr/bin/env python3
2
3 """usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt
4
5 Input files:
6 * https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt
7 * https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt
8 * https://unicode.org/Public/UCD/latest/ucd/Blocks.txt
9 """
10
11 import sys
12
13 if len (sys.argv) != 4:
14 sys.exit (__doc__)
15
16 ALLOWED_SINGLES = [0x00A0, 0x25CC]
17 ALLOWED_BLOCKS = [
18 'Basic Latin',
19 'Latin-1 Supplement',
20 'Devanagari',
21 'Bengali',
22 'Gurmukhi',
23 'Gujarati',
24 'Oriya',
25 'Tamil',
26 'Telugu',
27 'Kannada',
28 'Malayalam',
29 'Myanmar',
30 'Khmer',
31 'Vedic Extensions',
32 'General Punctuation',
33 'Superscripts and Subscripts',
34 'Devanagari Extended',
35 'Myanmar Extended-B',
36 'Myanmar Extended-A',
37 ]
38
39 files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
40
41 headers = [[f.readline () for i in range (2)] for f in files]
42
43 unicode_data = [{} for _ in files]
44 for i, f in enumerate (files):
45 for line in f:
46
47 j = line.find ('#')
48 if j >= 0:
49 line = line[:j]
50
51 fields = [x.strip () for x in line.split (';')]
52 if len (fields) == 1:
53 continue
54
55 uu = fields[0].split ('..')
56 start = int (uu[0], 16)
57 if len (uu) == 1:
58 end = start
59 else:
60 end = int (uu[1], 16)
61
62 t = fields[1]
63
64 for u in range (start, end + 1):
65 unicode_data[i][u] = t
66
67 # Merge data into one dict:
68 defaults = ('Other', 'Not_Applicable', 'No_Block')
69 combined = {}
70 for i,d in enumerate (unicode_data):
71 for u,v in d.items ():
72 if i == 2 and not u in combined:
73 continue
74 if not u in combined:
75 combined[u] = list (defaults)
76 combined[u][i] = v
77 combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
78
79
80 # Convert categories & positions types
81
82 categories = {
83 'indic' : [
84 'X',
85 'C',
86 'V',
87 'N',
88 'H',
89 'ZWNJ',
90 'ZWJ',
91 'M',
92 'SM',
93 'A',
94 'VD',
95 'PLACEHOLDER',
96 'DOTTEDCIRCLE',
97 'RS',
98 'MPst',
99 'Repha',
100 'Ra',
101 'CM',
102 'Symbol',
103 'CS',
104 ],
105 'khmer' : [
106 'VAbv',
107 'VBlw',
108 'VPre',
109 'VPst',
110
111 'Robatic',
112 'Xgroup',
113 'Ygroup',
114 ],
115 'myanmar' : [
116 'VAbv',
117 'VBlw',
118 'VPre',
119 'VPst',
120
121 'IV',
122 'As',
123 'DB',
124 'GB',
125 'MH',
126 'MR',
127 'MW',
128 'MY',
129 'PT',
130 'VS',
131 'ML',
132 ],
133 }
134
135 category_map = {
136 'Other' : 'X',
137 'Avagraha' : 'Symbol',
138 'Bindu' : 'SM',
139 'Brahmi_Joining_Number' : 'PLACEHOLDER', # Don't care.
140 'Cantillation_Mark' : 'A',
141 'Consonant' : 'C',
142 'Consonant_Dead' : 'C',
143 'Consonant_Final' : 'CM',
144 'Consonant_Head_Letter' : 'C',
145 'Consonant_Initial_Postfixed' : 'C', # TODO
146 'Consonant_Killer' : 'M', # U+17CD only.
147 'Consonant_Medial' : 'CM',
148 'Consonant_Placeholder' : 'PLACEHOLDER',
149 'Consonant_Preceding_Repha' : 'Repha',
150 'Consonant_Prefixed' : 'X', # Don't care.
151 'Consonant_Subjoined' : 'CM',
152 'Consonant_Succeeding_Repha' : 'CM',
153 'Consonant_With_Stacker' : 'CS',
154 'Gemination_Mark' : 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552
155 'Invisible_Stacker' : 'H',
156 'Joiner' : 'ZWJ',
157 'Modifying_Letter' : 'X',
158 'Non_Joiner' : 'ZWNJ',
159 'Nukta' : 'N',
160 'Number' : 'PLACEHOLDER',
161 'Number_Joiner' : 'PLACEHOLDER', # Don't care.
162 'Pure_Killer' : 'M', # Is like a vowel matra.
163 'Register_Shifter' : 'RS',
164 'Syllable_Modifier' : 'SM',
165 'Tone_Letter' : 'X',
166 'Tone_Mark' : 'N',
167 'Virama' : 'H',
168 'Visarga' : 'SM',
169 'Vowel' : 'V',
170 'Vowel_Dependent' : 'M',
171 'Vowel_Independent' : 'V',
172 }
173 position_map = {
174 'Not_Applicable' : 'END',
175
176 'Left' : 'PRE_C',
177 'Top' : 'ABOVE_C',
178 'Bottom' : 'BELOW_C',
179 'Right' : 'POST_C',
180
181 # These should resolve to the position of the last part of the split sequence.
182 'Bottom_And_Right' : 'POST_C',
183 'Left_And_Right' : 'POST_C',
184 'Top_And_Bottom' : 'BELOW_C',
185 'Top_And_Bottom_And_Left' : 'BELOW_C',
186 'Top_And_Bottom_And_Right' : 'POST_C',
187 'Top_And_Left' : 'ABOVE_C',
188 'Top_And_Left_And_Right' : 'POST_C',
189 'Top_And_Right' : 'POST_C',
190
191 'Overstruck' : 'AFTER_MAIN',
192 'Visual_order_left' : 'PRE_M',
193 }
194
195 category_overrides = {
196
197 # These are the variation-selectors. They only appear in the Myanmar grammar
198 # but are not Myanmar-specific
199 0xFE00: 'VS',
200 0xFE01: 'VS',
201 0xFE02: 'VS',
202 0xFE03: 'VS',
203 0xFE04: 'VS',
204 0xFE05: 'VS',
205 0xFE06: 'VS',
206 0xFE07: 'VS',
207 0xFE08: 'VS',
208 0xFE09: 'VS',
209 0xFE0A: 'VS',
210 0xFE0B: 'VS',
211 0xFE0C: 'VS',
212 0xFE0D: 'VS',
213 0xFE0E: 'VS',
214 0xFE0F: 'VS',
215
216 # These appear in the OT Myanmar spec, but are not Myanmar-specific
217 0x2015: 'PLACEHOLDER',
218 0x2022: 'PLACEHOLDER',
219 0x25FB: 'PLACEHOLDER',
220 0x25FC: 'PLACEHOLDER',
221 0x25FD: 'PLACEHOLDER',
222 0x25FE: 'PLACEHOLDER',
223
224
225 # Indic
226
227 0x0930: 'Ra', # Devanagari
228 0x09B0: 'Ra', # Bengali
229 0x09F0: 'Ra', # Bengali
230 0x0A30: 'Ra', # Gurmukhi No Reph
231 0x0AB0: 'Ra', # Gujarati
232 0x0B30: 'Ra', # Oriya
233 0x0BB0: 'Ra', # Tamil No Reph
234 0x0C30: 'Ra', # Telugu Reph formed only with ZWJ
235 0x0CB0: 'Ra', # Kannada
236 0x0D30: 'Ra', # Malayalam No Reph, Logical Repha
237
238 # The following act more like the Bindus.
239 0x0953: 'SM',
240 0x0954: 'SM',
241
242 # U+0A40 GURMUKHI VOWEL SIGN II may be preceded by U+0A02 GURMUKHI SIGN BINDI.
243 0x0A40: 'MPst',
244
245 # The following act like consonants.
246 0x0A72: 'C',
247 0x0A73: 'C',
248 0x1CF5: 'C',
249 0x1CF6: 'C',
250
251 # TODO: The following should only be allowed after a Visarga.
252 # For now, just treat them like regular tone marks.
253 0x1CE2: 'A',
254 0x1CE3: 'A',
255 0x1CE4: 'A',
256 0x1CE5: 'A',
257 0x1CE6: 'A',
258 0x1CE7: 'A',
259 0x1CE8: 'A',
260
261 # TODO: The following should only be allowed after some of
262 # the nasalization marks, maybe only for U+1CE9..U+1CF1.
263 # For now, just treat them like tone marks.
264 0x1CED: 'A',
265
266 # The following take marks in standalone clusters, similar to Avagraha.
267 0xA8F2: 'Symbol',
268 0xA8F3: 'Symbol',
269 0xA8F4: 'Symbol',
270 0xA8F5: 'Symbol',
271 0xA8F6: 'Symbol',
272 0xA8F7: 'Symbol',
273 0x1CE9: 'Symbol',
274 0x1CEA: 'Symbol',
275 0x1CEB: 'Symbol',
276 0x1CEC: 'Symbol',
277 0x1CEE: 'Symbol',
278 0x1CEF: 'Symbol',
279 0x1CF0: 'Symbol',
280 0x1CF1: 'Symbol',
281
282 0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524
283
284 # According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
285 # so the Indic shaper needs to know their categories.
286 0x11301: 'SM',
287 0x11302: 'SM',
288 0x11303: 'SM',
289 0x1133B: 'N',
290 0x1133C: 'N',
291
292 0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552
293 0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849
294
295 0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613
296 0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623
297 0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511
298
299 0x25CC: 'DOTTEDCIRCLE',
300
301
302 # Khmer
303
304 0x179A: 'Ra',
305
306 0x17CC: 'Robatic',
307 0x17C9: 'Robatic',
308 0x17CA: 'Robatic',
309
310 0x17C6: 'Xgroup',
311 0x17CB: 'Xgroup',
312 0x17CD: 'Xgroup',
313 0x17CE: 'Xgroup',
314 0x17CF: 'Xgroup',
315 0x17D0: 'Xgroup',
316 0x17D1: 'Xgroup',
317
318 0x17C7: 'Ygroup',
319 0x17C8: 'Ygroup',
320 0x17DD: 'Ygroup',
321 0x17D3: 'Ygroup', # Just guessing. Uniscribe doesn't categorize it.
322
323 0x17D9: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/2384
324
325
326 # Myanmar
327
328 # https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze
329
330 0x104E: 'C', # The spec says C, IndicSyllableCategory says Consonant_Placeholder
331
332 0x1004: 'Ra',
333 0x101B: 'Ra',
334 0x105A: 'Ra',
335
336 0x1032: 'A',
337 0x1036: 'A',
338
339 0x103A: 'As',
340
341 #0x1040: 'D0', # XXX The spec says D0, but Uniscribe doesn't seem to do.
342
343 0x103E: 'MH',
344 0x1060: 'ML',
345 0x103C: 'MR',
346 0x103D: 'MW',
347 0x1082: 'MW',
348 0x103B: 'MY',
349 0x105E: 'MY',
350 0x105F: 'MY',
351
352 0x1063: 'PT',
353 0x1064: 'PT',
354 0x1069: 'PT',
355 0x106A: 'PT',
356 0x106B: 'PT',
357 0x106C: 'PT',
358 0x106D: 'PT',
359 0xAA7B: 'PT',
360
361 0x1038: 'SM',
362 0x1087: 'SM',
363 0x1088: 'SM',
364 0x1089: 'SM',
365 0x108A: 'SM',
366 0x108B: 'SM',
367 0x108C: 'SM',
368 0x108D: 'SM',
369 0x108F: 'SM',
370 0x109A: 'SM',
371 0x109B: 'SM',
372 0x109C: 'SM',
373
374 0x104A: 'PLACEHOLDER',
375 }
376 position_overrides = {
377
378 0x0A51: 'BELOW_C', # https://github.com/harfbuzz/harfbuzz/issues/524
379
380 0x0B01: 'BEFORE_SUB', # Oriya Bindu is BeforeSub in the spec.
381 }
382
383 def matra_pos_left(u, block):
384 return "PRE_M"
385 def matra_pos_right(u, block):
386 if block == 'Devanagari': return 'AFTER_SUB'
387 if block == 'Bengali': return 'AFTER_POST'
388 if block == 'Gurmukhi': return 'AFTER_POST'
389 if block == 'Gujarati': return 'AFTER_POST'
390 if block == 'Oriya': return 'AFTER_POST'
391 if block == 'Tamil': return 'AFTER_POST'
392 if block == 'Telugu': return 'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB'
393 if block == 'Kannada': return 'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB'
394 if block == 'Malayalam': return 'AFTER_POST'
395 return 'AFTER_SUB'
396 def matra_pos_top(u, block):
397 # BENG and MLYM don't have top matras.
398 if block == 'Devanagari': return 'AFTER_SUB'
399 if block == 'Gurmukhi': return 'AFTER_POST' # Deviate from spec
400 if block == 'Gujarati': return 'AFTER_SUB'
401 if block == 'Oriya': return 'AFTER_MAIN'
402 if block == 'Tamil': return 'AFTER_SUB'
403 if block == 'Telugu': return 'BEFORE_SUB'
404 if block == 'Kannada': return 'BEFORE_SUB'
405 return 'AFTER_SUB'
406 def matra_pos_bottom(u, block):
407 if block == 'Devanagari': return 'AFTER_SUB'
408 if block == 'Bengali': return 'AFTER_SUB'
409 if block == 'Gurmukhi': return 'AFTER_POST'
410 if block == 'Gujarati': return 'AFTER_POST'
411 if block == 'Oriya': return 'AFTER_SUB'
412 if block == 'Tamil': return 'AFTER_POST'
413 if block == 'Telugu': return 'BEFORE_SUB'
414 if block == 'Kannada': return 'BEFORE_SUB'
415 if block == 'Malayalam': return 'AFTER_POST'
416 return "AFTER_SUB"
417 def indic_matra_position(u, pos, block): # Reposition matra
418 if pos == 'PRE_C': return matra_pos_left(u, block)
419 if pos == 'POST_C': return matra_pos_right(u, block)
420 if pos == 'ABOVE_C': return matra_pos_top(u, block)
421 if pos == 'BELOW_C': return matra_pos_bottom(u, block)
422 assert (False)
423
424 def position_to_category(pos):
425 if pos == 'PRE_C': return 'VPre'
426 if pos == 'ABOVE_C': return 'VAbv'
427 if pos == 'BELOW_C': return 'VBlw'
428 if pos == 'POST_C': return 'VPst'
429 assert(False)
430
431
432 defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2])
433
434 indic_data = {}
435 for k, (cat, pos, block) in combined.items():
436 cat = category_map[cat]
437 pos = position_map[pos]
438 indic_data[k] = (cat, pos, block)
439
440 for k,new_cat in category_overrides.items():
441 (cat, pos, _) = indic_data.get(k, defaults)
442 indic_data[k] = (new_cat, pos, unicode_data[2][k])
443
444 # We only expect position for certain types
445 positioned_categories = ('CM', 'SM', 'RS', 'H', 'M', 'MPst')
446 for k, (cat, pos, block) in indic_data.items():
447 if cat not in positioned_categories:
448 pos = 'END'
449 indic_data[k] = (cat, pos, block)
450
451 # Position overrides are more complicated
452
453 # Keep in sync with CONSONANT_FLAGS in the shaper
454 consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE')
455 matra_categories = ('M', 'MPst')
456 smvd_categories = ('SM', 'VD', 'A', 'Symbol')
457 for k, (cat, pos, block) in indic_data.items():
458 if cat in consonant_categories:
459 pos = 'BASE_C'
460 elif cat in matra_categories:
461 if block.startswith('Khmer') or block.startswith('Myanmar'):
462 cat = position_to_category(pos)
463 else:
464 pos = indic_matra_position(k, pos, block)
465 elif cat in smvd_categories:
466 pos = 'SMVD';
467 indic_data[k] = (cat, pos, block)
468
469 for k,new_pos in position_overrides.items():
470 (cat, pos, _) = indic_data.get(k, defaults)
471 indic_data[k] = (cat, new_pos, unicode_data[2][k])
472
473
474 values = [{_: 1} for _ in defaults]
475 for vv in indic_data.values():
476 for i,v in enumerate(vv):
477 values[i][v] = values[i].get (v, 0) + 1
478
479
480
481
482 # Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
483 singles = {}
484 for u in ALLOWED_SINGLES:
485 singles[u] = indic_data[u]
486 del indic_data[u]
487
488 print ("/* == Start of generated table == */")
489 print ("/*")
490 print (" * The following table is generated by running:")
491 print (" *")
492 print (" * ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt")
493 print (" *")
494 print (" * on files with these headers:")
495 print (" *")
496 for h in headers:
497 for l in h:
498 print (" * %s" % (l.strip()))
499 print (" */")
500 print ()
501 print ('#include "hb.hh"')
502 print ()
503 print ('#ifndef HB_NO_OT_SHAPE')
504 print ()
505 print ('#include "hb-ot-shaper-indic.hh"')
506 print ()
507 print ('#pragma GCC diagnostic push')
508 print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
509 print ()
510
511 # Print categories
512 for shaper in categories:
513 print ('#include "hb-ot-shaper-%s-machine.hh"' % shaper)
514 print ()
515 done = {}
516 for shaper, shaper_cats in categories.items():
517 print ('/* %s */' % shaper)
518 for cat in shaper_cats:
519 v = shaper[0].upper()
520 if cat not in done:
521 print ("#define OT_%s %s_Cat(%s)" % (cat, v, cat))
522 done[cat] = v
523 else:
524 print ('static_assert (OT_%s == %s_Cat(%s), "");' % (cat, v, cat))
525 print ()
526
527 # Shorten values
528 short = [{
529 "Repha": 'Rf',
530 "PLACEHOLDER": 'GB',
531 "DOTTEDCIRCLE": 'DC',
532 "VPst": 'VR',
533 "VPre": 'VL',
534 "Robatic": 'Rt',
535 "Xgroup": 'Xg',
536 "Ygroup": 'Yg',
537 "As": 'As',
538 },{
539 "END": 'X',
540 "BASE_C": 'C',
541 "ABOVE_C": 'T',
542 "BELOW_C": 'B',
543 "POST_C": 'R',
544 "PRE_C": 'L',
545 "PRE_M": 'LM',
546 "AFTER_MAIN": 'A',
547 "AFTER_SUB": 'AS',
548 "BEFORE_SUB": 'BS',
549 "AFTER_POST": 'AP',
550 "SMVD": 'SM',
551 }]
552 all_shorts = [{},{}]
553
554 # Add some of the values, to make them more readable, and to avoid duplicates
555
556 for i in range (2):
557 for v,s in short[i].items ():
558 all_shorts[i][s] = v
559
560 what = ["OT", "POS"]
561 what_short = ["_OT", "_POS"]
562 cat_defs = []
563 for i in range (2):
564 vv = sorted (values[i].keys ())
565 for v in vv:
566 v_no_and = v.replace ('_And_', '_')
567 if v in short[i]:
568 s = short[i][v]
569 else:
570 s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
571 if s in all_shorts[i]:
572 raise Exception ("Duplicate short value alias", v, all_shorts[i][s])
573 all_shorts[i][s] = v
574 short[i][v] = s
575 cat_defs.append ((what_short[i] + '_' + s, what[i] + '_' + (v.upper () if i else v), str (values[i][v]), v))
576
577 maxlen_s = max ([len (c[0]) for c in cat_defs])
578 maxlen_l = max ([len (c[1]) for c in cat_defs])
579 maxlen_n = max ([len (c[2]) for c in cat_defs])
580 for s in what_short:
581 print ()
582 for c in [c for c in cat_defs if s in c[0]]:
583 print ("#define %s %s /* %s chars; %s */" %
584 (c[0].ljust (maxlen_s), c[1].ljust (maxlen_l), c[2].rjust (maxlen_n), c[3]))
585 print ()
586 print ('#pragma GCC diagnostic pop')
587 print ()
588 print ("#define INDIC_COMBINE_CATEGORIES(S,M) ((S) | ((M) << 8))")
589 print ()
590 print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (%s_##S, %s_##M)" % tuple(what_short))
591 print ()
592 print ()
593
594 total = 0
595 used = 0
596 last_block = None
597 def print_block (block, start, end, data):
598 global total, used, last_block
599 if block and block != last_block:
600 print ()
601 print ()
602 print (" /* %s */" % block)
603 num = 0
604 assert start % 8 == 0
605 assert (end+1) % 8 == 0
606 for u in range (start, end+1):
607 if u % 8 == 0:
608 print ()
609 print (" /* %04X */" % u, end="")
610 if u in data:
611 num += 1
612 d = data.get (u, defaults)
613 print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="")
614
615 total += end - start + 1
616 used += num
617 if block:
618 last_block = block
619
620 uu = sorted (indic_data)
621
622 last = -100000
623 num = 0
624 offset = 0
625 starts = []
626 ends = []
627 print ("static const uint16_t indic_table[] = {")
628 for u in uu:
629 if u <= last:
630 continue
631 block = indic_data[u][2]
632
633 start = u//8*8
634 end = start+1
635 while end in uu and block == indic_data[end][2]:
636 end += 1
637 end = (end-1)//8*8 + 7
638
639 if start != last + 1:
640 if start - last <= 1+16*2:
641 print_block (None, last+1, start-1, indic_data)
642 else:
643 if last >= 0:
644 ends.append (last + 1)
645 offset += ends[-1] - starts[-1]
646 print ()
647 print ()
648 print ("#define indic_offset_0x%04xu %d" % (start, offset))
649 starts.append (start)
650
651 print_block (block, start, end, indic_data)
652 last = end
653 ends.append (last + 1)
654 offset += ends[-1] - starts[-1]
655 print ()
656 print ()
657 occupancy = used * 100. / total
658 page_bits = 12
659 print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
660 print ()
661 print ("uint16_t")
662 print ("hb_indic_get_categories (hb_codepoint_t u)")
663 print ("{")
664 print (" switch (u >> %d)" % page_bits)
665 print (" {")
666 pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())])
667 for p in sorted(pages):
668 print (" case 0x%0Xu:" % p)
669 for u,d in singles.items ():
670 if p != u>>page_bits: continue
671 print (" if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]))
672 for (start,end) in zip (starts, ends):
673 if p not in [start>>page_bits, end>>page_bits]: continue
674 offset = "indic_offset_0x%04xu" % start
675 print (" if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset))
676 print (" break;")
677 print ("")
678 print (" default:")
679 print (" break;")
680 print (" }")
681 print (" return _(X,X);")
682 print ("}")
683 print ()
684 print ("#undef _")
685 print ("#undef INDIC_COMBINE_CATEGORIES")
686 for i in range (2):
687 print ()
688 vv = sorted (values[i].keys ())
689 for v in vv:
690 print ("#undef %s_%s" %
691 (what_short[i], short[i][v]))
692 print ()
693 print ('#endif')
694 print ()
695 print ("/* == End of generated table == */")
696
697 # Maintain at least 50% occupancy in the table */
698 if occupancy < 50:
699 raise Exception ("Table too sparse, please investigate: ", occupancy)