python (3.11.7)

(root)/
lib/
python3.11/
encodings/
punycode.py
       1  """ Codec for the Punicode encoding, as specified in RFC 3492
       2  
       3  Written by Martin v. Löwis.
       4  """
       5  
       6  import codecs
       7  
       8  ##################### Encoding #####################################
       9  
      10  def segregate(str):
      11      """3.1 Basic code point segregation"""
      12      base = bytearray()
      13      extended = set()
      14      for c in str:
      15          if ord(c) < 128:
      16              base.append(ord(c))
      17          else:
      18              extended.add(c)
      19      extended = sorted(extended)
      20      return bytes(base), extended
      21  
      22  def selective_len(str, max):
      23      """Return the length of str, considering only characters below max."""
      24      res = 0
      25      for c in str:
      26          if ord(c) < max:
      27              res += 1
      28      return res
      29  
      30  def selective_find(str, char, index, pos):
      31      """Return a pair (index, pos), indicating the next occurrence of
      32      char in str. index is the position of the character considering
      33      only ordinals up to and including char, and pos is the position in
      34      the full string. index/pos is the starting position in the full
      35      string."""
      36  
      37      l = len(str)
      38      while 1:
      39          pos += 1
      40          if pos == l:
      41              return (-1, -1)
      42          c = str[pos]
      43          if c == char:
      44              return index+1, pos
      45          elif c < char:
      46              index += 1
      47  
      48  def insertion_unsort(str, extended):
      49      """3.2 Insertion unsort coding"""
      50      oldchar = 0x80
      51      result = []
      52      oldindex = -1
      53      for c in extended:
      54          index = pos = -1
      55          char = ord(c)
      56          curlen = selective_len(str, char)
      57          delta = (curlen+1) * (char - oldchar)
      58          while 1:
      59              index,pos = selective_find(str,c,index,pos)
      60              if index == -1:
      61                  break
      62              delta += index - oldindex
      63              result.append(delta-1)
      64              oldindex = index
      65              delta = 0
      66          oldchar = char
      67  
      68      return result
      69  
      70  def T(j, bias):
      71      # Punycode parameters: tmin = 1, tmax = 26, base = 36
      72      res = 36 * (j + 1) - bias
      73      if res < 1: return 1
      74      if res > 26: return 26
      75      return res
      76  
      77  digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
      78  def generate_generalized_integer(N, bias):
      79      """3.3 Generalized variable-length integers"""
      80      result = bytearray()
      81      j = 0
      82      while 1:
      83          t = T(j, bias)
      84          if N < t:
      85              result.append(digits[N])
      86              return bytes(result)
      87          result.append(digits[t + ((N - t) % (36 - t))])
      88          N = (N - t) // (36 - t)
      89          j += 1
      90  
      91  def adapt(delta, first, numchars):
      92      if first:
      93          delta //= 700
      94      else:
      95          delta //= 2
      96      delta += delta // numchars
      97      # ((base - tmin) * tmax) // 2 == 455
      98      divisions = 0
      99      while delta > 455:
     100          delta = delta // 35 # base - tmin
     101          divisions += 36
     102      bias = divisions + (36 * delta // (delta + 38))
     103      return bias
     104  
     105  
     106  def generate_integers(baselen, deltas):
     107      """3.4 Bias adaptation"""
     108      # Punycode parameters: initial bias = 72, damp = 700, skew = 38
     109      result = bytearray()
     110      bias = 72
     111      for points, delta in enumerate(deltas):
     112          s = generate_generalized_integer(delta, bias)
     113          result.extend(s)
     114          bias = adapt(delta, points==0, baselen+points+1)
     115      return bytes(result)
     116  
     117  def punycode_encode(text):
     118      base, extended = segregate(text)
     119      deltas = insertion_unsort(text, extended)
     120      extended = generate_integers(len(base), deltas)
     121      if base:
     122          return base + b"-" + extended
     123      return extended
     124  
     125  ##################### Decoding #####################################
     126  
     127  def decode_generalized_number(extended, extpos, bias, errors):
     128      """3.3 Generalized variable-length integers"""
     129      result = 0
     130      w = 1
     131      j = 0
     132      while 1:
     133          try:
     134              char = ord(extended[extpos])
     135          except IndexError:
     136              if errors == "strict":
     137                  raise UnicodeError("incomplete punicode string")
     138              return extpos + 1, None
     139          extpos += 1
     140          if 0x41 <= char <= 0x5A: # A-Z
     141              digit = char - 0x41
     142          elif 0x30 <= char <= 0x39:
     143              digit = char - 22 # 0x30-26
     144          elif errors == "strict":
     145              raise UnicodeError("Invalid extended code point '%s'"
     146                                 % extended[extpos-1])
     147          else:
     148              return extpos, None
     149          t = T(j, bias)
     150          result += digit * w
     151          if digit < t:
     152              return extpos, result
     153          w = w * (36 - t)
     154          j += 1
     155  
     156  
     157  def insertion_sort(base, extended, errors):
     158      """3.2 Insertion unsort coding"""
     159      char = 0x80
     160      pos = -1
     161      bias = 72
     162      extpos = 0
     163      while extpos < len(extended):
     164          newpos, delta = decode_generalized_number(extended, extpos,
     165                                                    bias, errors)
     166          if delta is None:
     167              # There was an error in decoding. We can't continue because
     168              # synchronization is lost.
     169              return base
     170          pos += delta+1
     171          char += pos // (len(base) + 1)
     172          if char > 0x10FFFF:
     173              if errors == "strict":
     174                  raise UnicodeError("Invalid character U+%x" % char)
     175              char = ord('?')
     176          pos = pos % (len(base) + 1)
     177          base = base[:pos] + chr(char) + base[pos:]
     178          bias = adapt(delta, (extpos == 0), len(base))
     179          extpos = newpos
     180      return base
     181  
     182  def punycode_decode(text, errors):
     183      if isinstance(text, str):
     184          text = text.encode("ascii")
     185      if isinstance(text, memoryview):
     186          text = bytes(text)
     187      pos = text.rfind(b"-")
     188      if pos == -1:
     189          base = ""
     190          extended = str(text, "ascii").upper()
     191      else:
     192          base = str(text[:pos], "ascii", errors)
     193          extended = str(text[pos+1:], "ascii").upper()
     194      return insertion_sort(base, extended, errors)
     195  
     196  ### Codec APIs
     197  
     198  class ESC[4;38;5;81mCodec(ESC[4;38;5;149mcodecsESC[4;38;5;149m.ESC[4;38;5;149mCodec):
     199  
     200      def encode(self, input, errors='strict'):
     201          res = punycode_encode(input)
     202          return res, len(input)
     203  
     204      def decode(self, input, errors='strict'):
     205          if errors not in ('strict', 'replace', 'ignore'):
     206              raise UnicodeError("Unsupported error handling "+errors)
     207          res = punycode_decode(input, errors)
     208          return res, len(input)
     209  
     210  class ESC[4;38;5;81mIncrementalEncoder(ESC[4;38;5;149mcodecsESC[4;38;5;149m.ESC[4;38;5;149mIncrementalEncoder):
     211      def encode(self, input, final=False):
     212          return punycode_encode(input)
     213  
     214  class ESC[4;38;5;81mIncrementalDecoder(ESC[4;38;5;149mcodecsESC[4;38;5;149m.ESC[4;38;5;149mIncrementalDecoder):
     215      def decode(self, input, final=False):
     216          if self.errors not in ('strict', 'replace', 'ignore'):
     217              raise UnicodeError("Unsupported error handling "+self.errors)
     218          return punycode_decode(input, self.errors)
     219  
     220  class ESC[4;38;5;81mStreamWriter(ESC[4;38;5;149mCodec,ESC[4;38;5;149mcodecsESC[4;38;5;149m.ESC[4;38;5;149mStreamWriter):
     221      pass
     222  
     223  class ESC[4;38;5;81mStreamReader(ESC[4;38;5;149mCodec,ESC[4;38;5;149mcodecsESC[4;38;5;149m.ESC[4;38;5;149mStreamReader):
     224      pass
     225  
     226  ### encodings module API
     227  
     228  def getregentry():
     229      return codecs.CodecInfo(
     230          name='punycode',
     231          encode=Codec().encode,
     232          decode=Codec().decode,
     233          incrementalencoder=IncrementalEncoder,
     234          incrementaldecoder=IncrementalDecoder,
     235          streamwriter=StreamWriter,
     236          streamreader=StreamReader,
     237      )