(root)/
Python-3.11.7/
Tools/
unicode/
genmap_support.py
       1  #
       2  # genmap_support.py: Multibyte Codec Map Generator
       3  #
       4  # Original Author:  Hye-Shik Chang <perky@FreeBSD.org>
       5  # Modified Author:  Dong-hee Na <donghee.na92@gmail.com>
       6  #
       7  
       8  
       9  class ESC[4;38;5;81mBufferedFiller:
      10      def __init__(self, column=78):
      11          self.column = column
      12          self.buffered = []
      13          self.cline = []
      14          self.clen = 0
      15          self.count = 0
      16  
      17      def write(self, *data):
      18          for s in data:
      19              if len(s) > self.column:
      20                  raise ValueError("token is too long")
      21              if len(s) + self.clen > self.column:
      22                  self.flush()
      23              self.clen += len(s)
      24              self.cline.append(s)
      25              self.count += 1
      26  
      27      def flush(self):
      28          if not self.cline:
      29              return
      30          self.buffered.append(''.join(self.cline))
      31          self.clen = 0
      32          del self.cline[:]
      33  
      34      def printout(self, fp):
      35          self.flush()
      36          for l in self.buffered:
      37              fp.write(f'{l}\n')
      38          del self.buffered[:]
      39  
      40      def __len__(self):
      41          return self.count
      42  
      43  
      44  class ESC[4;38;5;81mDecodeMapWriter:
      45      filler_class = BufferedFiller
      46  
      47      def __init__(self, fp, prefix, decode_map):
      48          self.fp = fp
      49          self.prefix = prefix
      50          self.decode_map = decode_map
      51          self.filler = self.filler_class()
      52  
      53      def update_decode_map(self, c1range, c2range, onlymask=(), wide=0):
      54          c2values = range(c2range[0], c2range[1] + 1)
      55  
      56          for c1 in range(c1range[0], c1range[1] + 1):
      57              if c1 not in self.decode_map or (onlymask and c1 not in onlymask):
      58                  continue
      59              c2map = self.decode_map[c1]
      60              rc2values = [n for n in c2values if n in c2map]
      61              if not rc2values:
      62                  continue
      63  
      64              c2map[self.prefix] = True
      65              c2map['min'] = rc2values[0]
      66              c2map['max'] = rc2values[-1]
      67              c2map['midx'] = len(self.filler)
      68  
      69              for v in range(rc2values[0], rc2values[-1] + 1):
      70                  if v in c2map:
      71                      self.filler.write('%d,' % c2map[v])
      72                  else:
      73                      self.filler.write('U,')
      74  
      75      def generate(self, wide=False):
      76          if not wide:
      77              self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
      78          else:
      79              self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
      80  
      81          self.filler.printout(self.fp)
      82          self.fp.write("};\n\n")
      83  
      84          if not wide:
      85              self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n")
      86          else:
      87              self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n")
      88  
      89          for i in range(256):
      90              if i in self.decode_map and self.prefix in self.decode_map[i]:
      91                  m = self.decode_map
      92                  prefix = self.prefix
      93              else:
      94                  self.filler.write("{", "0,", "0,", "0", "},")
      95                  continue
      96  
      97              self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'],
      98                                ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
      99          self.filler.printout(self.fp)
     100          self.fp.write("};\n\n")
     101  
     102  
     103  class ESC[4;38;5;81mEncodeMapWriter:
     104      filler_class = BufferedFiller
     105      elemtype = 'DBCHAR'
     106      indextype = 'struct unim_index'
     107  
     108      def __init__(self, fp, prefix, encode_map):
     109          self.fp = fp
     110          self.prefix = prefix
     111          self.encode_map = encode_map
     112          self.filler = self.filler_class()
     113  
     114      def generate(self):
     115          self.buildmap()
     116          self.printmap()
     117  
     118      def buildmap(self):
     119          for c1 in range(0, 256):
     120              if c1 not in self.encode_map:
     121                  continue
     122              c2map = self.encode_map[c1]
     123              rc2values = [k for k in c2map.keys()]
     124              rc2values.sort()
     125              if not rc2values:
     126                  continue
     127  
     128              c2map[self.prefix] = True
     129              c2map['min'] = rc2values[0]
     130              c2map['max'] = rc2values[-1]
     131              c2map['midx'] = len(self.filler)
     132  
     133              for v in range(rc2values[0], rc2values[-1] + 1):
     134                  if v not in c2map:
     135                      self.write_nochar()
     136                  elif isinstance(c2map[v], int):
     137                      self.write_char(c2map[v])
     138                  elif isinstance(c2map[v], tuple):
     139                      self.write_multic(c2map[v])
     140                  else:
     141                      raise ValueError
     142  
     143      def write_nochar(self):
     144          self.filler.write('N,')
     145  
     146      def write_multic(self, point):
     147          self.filler.write('M,')
     148  
     149      def write_char(self, point):
     150          self.filler.write(str(point) + ',')
     151  
     152      def printmap(self):
     153          self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
     154          self.filler.printout(self.fp)
     155          self.fp.write("};\n\n")
     156          self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")
     157  
     158          for i in range(256):
     159              if i in self.encode_map and self.prefix in self.encode_map[i]:
     160                  self.filler.write("{", "__%s_encmap" % self.prefix, "+",
     161                                    "%d" % self.encode_map[i]['midx'], ",",
     162                                    "%d," % self.encode_map[i]['min'],
     163                                    "%d" % self.encode_map[i]['max'], "},")
     164              else:
     165                  self.filler.write("{", "0,", "0,", "0", "},")
     166                  continue
     167          self.filler.printout(self.fp)
     168          self.fp.write("};\n\n")
     169  
     170  
     171  def open_mapping_file(path, source):
     172      try:
     173          f = open(path)
     174      except IOError:
     175          raise SystemExit(f'{source} is needed')
     176      return f
     177  
     178  
     179  def print_autogen(fo, source):
     180      fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')
     181  
     182  
     183  def loadmap(fo, natcol=0, unicol=1, sbcs=0):
     184      print("Loading from", fo)
     185      fo.seek(0, 0)
     186      decmap = {}
     187      for line in fo:
     188          line = line.split('#', 1)[0].strip()
     189          if not line or len(line.split()) < 2:
     190              continue
     191  
     192          row = [eval(e) for e in line.split()]
     193          loc, uni = row[natcol], row[unicol]
     194          if loc >= 0x100 or sbcs:
     195              decmap.setdefault((loc >> 8), {})
     196              decmap[(loc >> 8)][(loc & 0xff)] = uni
     197  
     198      return decmap