(root)/
Python-3.11.7/
Tools/
unicode/
genmap_japanese.py
       1  #
       2  # genmap_ja_codecs.py: Japanese Codecs Map Generator
       3  #
       4  # Original Author:  Hye-Shik Chang <perky@FreeBSD.org>
       5  # Modified Author:  Dong-hee Na <donghee.na92@gmail.com>
       6  #
       7  import os
       8  
       9  from genmap_support import *
      10  
      11  JISX0208_C1 = (0x21, 0x74)
      12  JISX0208_C2 = (0x21, 0x7e)
      13  JISX0212_C1 = (0x22, 0x6d)
      14  JISX0212_C2 = (0x21, 0x7e)
      15  JISX0213_C1 = (0x21, 0x7e)
      16  JISX0213_C2 = (0x21, 0x7e)
      17  CP932P0_C1  = (0x81, 0x81) # patches between shift-jis and cp932
      18  CP932P0_C2  = (0x5f, 0xca)
      19  CP932P1_C1  = (0x87, 0x87) # CP932 P1
      20  CP932P1_C2  = (0x40, 0x9c)
      21  CP932P2_C1  = (0xed, 0xfc) # CP932 P2
      22  CP932P2_C2  = (0x40, 0xfc)
      23  
      24  MAPPINGS_JIS0208 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT'
      25  MAPPINGS_JIS0212 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT'
      26  MAPPINGS_CP932 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT'
      27  MAPPINGS_JISX0213_2004 = 'http://wakaba-web.hp.infoseek.co.jp/table/jisx0213-2004-std.txt'
      28  
      29  
      30  def loadmap_jisx0213(fo):
      31      decmap3, decmap4 = {}, {} # maps to BMP for level 3 and 4
      32      decmap3_2, decmap4_2 = {}, {} # maps to U+2xxxx for level 3 and 4
      33      decmap3_pair = {} # maps to BMP-pair for level 3
      34      for line in fo:
      35          line = line.split('#', 1)[0].strip()
      36          if not line or len(line.split()) < 2:
      37              continue
      38  
      39          row = line.split()
      40          loc = eval('0x' + row[0][2:])
      41          level = eval(row[0][0])
      42          m = None
      43          if len(row[1].split('+')) == 2: # single unicode
      44              uni = eval('0x' + row[1][2:])
      45              if level == 3:
      46                  if uni < 0x10000:
      47                      m = decmap3
      48                  elif 0x20000 <= uni < 0x30000:
      49                      uni -= 0x20000
      50                      m = decmap3_2
      51              elif level == 4:
      52                  if uni < 0x10000:
      53                      m = decmap4
      54                  elif 0x20000 <= uni < 0x30000:
      55                      uni -= 0x20000
      56                      m = decmap4_2
      57              m.setdefault((loc >> 8), {})
      58              m[(loc >> 8)][(loc & 0xff)] = uni
      59          else: # pair
      60              uniprefix = eval('0x' + row[1][2:6]) # body
      61              uni = eval('0x' + row[1][7:11]) # modifier
      62              if level != 3:
      63                  raise ValueError("invalid map")
      64              decmap3_pair.setdefault(uniprefix, {})
      65              m = decmap3_pair[uniprefix]
      66  
      67          if m is None:
      68              raise ValueError("invalid map")
      69          m.setdefault((loc >> 8), {})
      70          m[(loc >> 8)][(loc & 0xff)] = uni
      71  
      72      return decmap3, decmap4, decmap3_2, decmap4_2, decmap3_pair
      73  
      74  
      75  def main():
      76      jisx0208file = open_mapping_file('python-mappings/JIS0208.TXT', MAPPINGS_JIS0208)
      77      jisx0212file = open_mapping_file('python-mappings/JIS0212.TXT', MAPPINGS_JIS0212)
      78      cp932file = open_mapping_file('python-mappings/CP932.TXT', MAPPINGS_CP932)
      79      jisx0213file = open_mapping_file('python-mappings/jisx0213-2004-std.txt', MAPPINGS_JISX0213_2004)
      80  
      81      print("Loading Mapping File...")
      82  
      83      sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2)
      84      jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2)
      85      jisx0212decmap = loadmap(jisx0212file)
      86      cp932decmap = loadmap(cp932file)
      87      jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap = loadmap_jisx0213(jisx0213file)
      88  
      89      if jis3decmap[0x21][0x24] != 0xff0c:
      90          raise SystemExit('Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff')
      91  
      92      sjisencmap, cp932encmap = {}, {}
      93      jisx0208_0212encmap = {}
      94      for c1, m in sjisdecmap.items():
      95          for c2, code in m.items():
      96              sjisencmap.setdefault(code >> 8, {})
      97              sjisencmap[code >> 8][code & 0xff] = c1 << 8 | c2
      98      for c1, m in cp932decmap.items():
      99          for c2, code in m.items():
     100              cp932encmap.setdefault(code >> 8, {})
     101              if (code & 0xff) not in cp932encmap[code >> 8]:
     102                  cp932encmap[code >> 8][code & 0xff] = c1 << 8 | c2
     103      for c1, m in cp932encmap.copy().items():
     104          for c2, code in m.copy().items():
     105              if c1 in sjisencmap and c2 in sjisencmap[c1] and sjisencmap[c1][c2] == code:
     106                  del cp932encmap[c1][c2]
     107                  if not cp932encmap[c1]:
     108                      del cp932encmap[c1]
     109  
     110      jisx0213pairdecmap = {}
     111      jisx0213pairencmap = []
     112      for unibody, m1 in jis3_pairdecmap.items():
     113          for c1, m2 in m1.items():
     114              for c2, modifier in m2.items():
     115                  jisx0213pairencmap.append((unibody, modifier, c1 << 8 | c2))
     116                  jisx0213pairdecmap.setdefault(c1, {})
     117                  jisx0213pairdecmap[c1][c2] = unibody << 16 | modifier
     118  
     119      # Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set)
     120      for c1, m in jisx0208decmap.items():
     121          for c2, code in m.items():
     122              jisx0208_0212encmap.setdefault(code >> 8, {})
     123              jisx0208_0212encmap[code >> 8][code & 0xff] = c1 << 8 | c2
     124  
     125      for c1, m in jisx0212decmap.items():
     126          for c2, code in m.items():
     127              jisx0208_0212encmap.setdefault(code >> 8, {})
     128              if (code & 0xff) in jisx0208_0212encmap[code >> 8]:
     129                  print("OOPS!!!", (code))
     130              jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2
     131  
     132      jisx0213bmpencmap = {}
     133      for c1, m in jis3decmap.copy().items():
     134          for c2, code in m.copy().items():
     135              if c1 in jisx0208decmap and c2 in jisx0208decmap[c1]:
     136                  if code in jis3_pairdecmap:
     137                      jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair
     138                      jisx0213pairencmap.append((code, 0, c1 << 8 | c2))
     139                  elif jisx0208decmap[c1][c2] == code:
     140                      del jis3decmap[c1][c2]
     141                      if not jis3decmap[c1]:
     142                          del jis3decmap[c1]
     143                  else:
     144                      raise ValueError("Difference between JIS X 0208 and JIS X 0213 Plane 1 is found.")
     145              else:
     146                  jisx0213bmpencmap.setdefault(code >> 8, {})
     147                  if code not in jis3_pairdecmap:
     148                      jisx0213bmpencmap[code >> 8][code & 0xff] = c1 << 8 | c2
     149                  else:
     150                      jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair
     151                      jisx0213pairencmap.append((code, 0, c1 << 8 | c2))
     152  
     153      for c1, m in jis4decmap.items():
     154          for c2, code in m.items():
     155              jisx0213bmpencmap.setdefault(code >> 8, {})
     156              jisx0213bmpencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2
     157  
     158      jisx0213empencmap = {}
     159      for c1, m in jis3_2_decmap.items():
     160          for c2, code in m.items():
     161              jisx0213empencmap.setdefault(code >> 8, {})
     162              jisx0213empencmap[code >> 8][code & 0xff] = c1 << 8 | c2
     163      for c1, m in jis4_2_decmap.items():
     164          for c2, code in m.items():
     165              jisx0213empencmap.setdefault(code >> 8, {})
     166              jisx0213empencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2
     167  
     168      with open("mappings_jp.h", "w") as fp:
     169          print_autogen(fp, os.path.basename(__file__))
     170          print("Generating JIS X 0208 decode map...")
     171          writer = DecodeMapWriter(fp, "jisx0208", jisx0208decmap)
     172          writer.update_decode_map(JISX0208_C1, JISX0208_C2)
     173          writer.generate()
     174  
     175          print("Generating JIS X 0212 decode map...")
     176          writer = DecodeMapWriter(fp, "jisx0212", jisx0212decmap)
     177          writer.update_decode_map(JISX0212_C1, JISX0212_C2)
     178          writer.generate()
     179  
     180          print("Generating JIS X 0208 && JIS X 0212 encode map...")
     181          writer = EncodeMapWriter(fp, "jisxcommon", jisx0208_0212encmap)
     182          writer.generate()
     183  
     184          print("Generating CP932 Extension decode map...")
     185          writer = DecodeMapWriter(fp, "cp932ext", cp932decmap)
     186          writer.update_decode_map(CP932P0_C1, CP932P0_C2)
     187          writer.update_decode_map(CP932P1_C1, CP932P1_C2)
     188          writer.update_decode_map(CP932P2_C1, CP932P2_C2)
     189          writer.generate()
     190  
     191          print("Generating CP932 Extension encode map...")
     192          writer = EncodeMapWriter(fp, "cp932ext", cp932encmap)
     193          writer.generate()
     194  
     195          print("Generating JIS X 0213 Plane 1 BMP decode map...")
     196          writer = DecodeMapWriter(fp, "jisx0213_1_bmp", jis3decmap)
     197          writer.update_decode_map(JISX0213_C1, JISX0213_C2)
     198          writer.generate()
     199  
     200          print("Generating JIS X 0213 Plane 2 BMP decode map...")
     201          writer = DecodeMapWriter(fp, "jisx0213_2_bmp", jis4decmap)
     202          writer.update_decode_map(JISX0213_C1, JISX0213_C2)
     203          writer.generate()
     204  
     205          print("Generating JIS X 0213 BMP encode map...")
     206          writer = EncodeMapWriter(fp, "jisx0213_bmp", jisx0213bmpencmap)
     207          writer.generate()
     208  
     209          print("Generating JIS X 0213 Plane 1 EMP decode map...")
     210          writer = DecodeMapWriter(fp, "jisx0213_1_emp", jis3_2_decmap)
     211          writer.update_decode_map(JISX0213_C1, JISX0213_C2)
     212          writer.generate()
     213  
     214          print("Generating JIS X 0213 Plane 2 EMP decode map...")
     215          writer = DecodeMapWriter(fp, "jisx0213_2_emp", jis4_2_decmap)
     216          writer.update_decode_map(JISX0213_C1, JISX0213_C2)
     217          writer.generate()
     218  
     219          print("Generating JIS X 0213 EMP encode map...")
     220          writer = EncodeMapWriter(fp, "jisx0213_emp", jisx0213empencmap)
     221          writer.generate()
     222  
     223      with open('mappings_jisx0213_pair.h', 'w') as fp:
     224          print_autogen(fp, os.path.basename(__file__))
     225          fp.write(f"#define JISX0213_ENCPAIRS {len(jisx0213pairencmap)}\n")
     226          fp.write("""\
     227  #ifdef EXTERN_JISX0213_PAIR
     228  static const struct widedbcs_index *jisx0213_pair_decmap;
     229  static const struct pair_encodemap *jisx0213_pair_encmap;
     230  #else
     231  """)
     232  
     233          print("Generating JIS X 0213 unicode-pair decode map...")
     234          writer = DecodeMapWriter(fp, "jisx0213_pair", jisx0213pairdecmap)
     235          writer.update_decode_map(JISX0213_C1, JISX0213_C2)
     236          writer.generate(wide=True)
     237  
     238          print("Generating JIS X 0213 unicode-pair encode map...")
     239          jisx0213pairencmap.sort()
     240          fp.write("static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {\n")
     241          filler = BufferedFiller()
     242          for body, modifier, jis in jisx0213pairencmap:
     243              filler.write('{', '0x%04x%04x,' % (body, modifier), '0x%04x' % jis, '},')
     244          filler.printout(fp)
     245          fp.write("};\n")
     246          fp.write("#endif\n")
     247  
     248      print("Done!")
     249  
     250  if __name__ == '__main__':
     251      main()