1  #! /usr/bin/env python3
       2  
       3  """Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
       4  
       5  # Modified 04-Oct-1995 by Jack Jansen to use binascii module
       6  # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
       7  # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
       8  
       9  import re
      10  import struct
      11  import binascii
      12  
      13  
      14  __all__ = [
      15      # Legacy interface exports traditional RFC 2045 Base64 encodings
      16      'encode', 'decode', 'encodebytes', 'decodebytes',
      17      # Generalized interface for other encodings
      18      'b64encode', 'b64decode', 'b32encode', 'b32decode',
      19      'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
      20      # Base85 and Ascii85 encodings
      21      'b85encode', 'b85decode', 'a85encode', 'a85decode',
      22      # Standard Base64 encoding
      23      'standard_b64encode', 'standard_b64decode',
      24      # Some common Base64 alternatives.  As referenced by RFC 3458, see thread
      25      # starting at:
      26      #
      27      # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
      28      'urlsafe_b64encode', 'urlsafe_b64decode',
      29      ]
      30  
      31  
      32  bytes_types = (bytes, bytearray)  # Types acceptable as binary data
      33  
      34  def _bytes_from_decode_data(s):
      35      if isinstance(s, str):
      36          try:
      37              return s.encode('ascii')
      38          except UnicodeEncodeError:
      39              raise ValueError('string argument should contain only ASCII characters')
      40      if isinstance(s, bytes_types):
      41          return s
      42      try:
      43          return memoryview(s).tobytes()
      44      except TypeError:
      45          raise TypeError("argument should be a bytes-like object or ASCII "
      46                          "string, not %r" % s.__class__.__name__) from None
      47  
      48  
      49  # Base64 encoding/decoding uses binascii
      50  
      51  def b64encode(s, altchars=None):
      52      """Encode the bytes-like object s using Base64 and return a bytes object.
      53  
      54      Optional altchars should be a byte string of length 2 which specifies an
      55      alternative alphabet for the '+' and '/' characters.  This allows an
      56      application to e.g. generate url or filesystem safe Base64 strings.
      57      """
      58      encoded = binascii.b2a_base64(s, newline=False)
      59      if altchars is not None:
      60          assert len(altchars) == 2, repr(altchars)
      61          return encoded.translate(bytes.maketrans(b'+/', altchars))
      62      return encoded
      63  
      64  
      65  def b64decode(s, altchars=None, validate=False):
      66      """Decode the Base64 encoded bytes-like object or ASCII string s.
      67  
      68      Optional altchars must be a bytes-like object or ASCII string of length 2
      69      which specifies the alternative alphabet used instead of the '+' and '/'
      70      characters.
      71  
      72      The result is returned as a bytes object.  A binascii.Error is raised if
      73      s is incorrectly padded.
      74  
      75      If validate is False (the default), characters that are neither in the
      76      normal base-64 alphabet nor the alternative alphabet are discarded prior
      77      to the padding check.  If validate is True, these non-alphabet characters
      78      in the input result in a binascii.Error.
      79      For more information about the strict base64 check, see:
      80  
      81      https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
      82      """
      83      s = _bytes_from_decode_data(s)
      84      if altchars is not None:
      85          altchars = _bytes_from_decode_data(altchars)
      86          assert len(altchars) == 2, repr(altchars)
      87          s = s.translate(bytes.maketrans(altchars, b'+/'))
      88      return binascii.a2b_base64(s, strict_mode=validate)
      89  
      90  
      91  def standard_b64encode(s):
      92      """Encode bytes-like object s using the standard Base64 alphabet.
      93  
      94      The result is returned as a bytes object.
      95      """
      96      return b64encode(s)
      97  
      98  def standard_b64decode(s):
      99      """Decode bytes encoded with the standard Base64 alphabet.
     100  
     101      Argument s is a bytes-like object or ASCII string to decode.  The result
     102      is returned as a bytes object.  A binascii.Error is raised if the input
     103      is incorrectly padded.  Characters that are not in the standard alphabet
     104      are discarded prior to the padding check.
     105      """
     106      return b64decode(s)
     107  
     108  
     109  _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
     110  _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
     111  
     112  def urlsafe_b64encode(s):
     113      """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
     114  
     115      Argument s is a bytes-like object to encode.  The result is returned as a
     116      bytes object.  The alphabet uses '-' instead of '+' and '_' instead of
     117      '/'.
     118      """
     119      return b64encode(s).translate(_urlsafe_encode_translation)
     120  
     121  def urlsafe_b64decode(s):
     122      """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
     123  
     124      Argument s is a bytes-like object or ASCII string to decode.  The result
     125      is returned as a bytes object.  A binascii.Error is raised if the input
     126      is incorrectly padded.  Characters that are not in the URL-safe base-64
     127      alphabet, and are not a plus '+' or slash '/', are discarded prior to the
     128      padding check.
     129  
     130      The alphabet uses '-' instead of '+' and '_' instead of '/'.
     131      """
     132      s = _bytes_from_decode_data(s)
     133      s = s.translate(_urlsafe_decode_translation)
     134      return b64decode(s)
     135  
     136  
     137  
     138  # Base32 encoding/decoding must be done in Python
     139  _B32_ENCODE_DOCSTRING = '''
     140  Encode the bytes-like objects using {encoding} and return a bytes object.
     141  '''
     142  _B32_DECODE_DOCSTRING = '''
     143  Decode the {encoding} encoded bytes-like object or ASCII string s.
     144  
     145  Optional casefold is a flag specifying whether a lowercase alphabet is
     146  acceptable as input.  For security purposes, the default is False.
     147  {extra_args}
     148  The result is returned as a bytes object.  A binascii.Error is raised if
     149  the input is incorrectly padded or if there are non-alphabet
     150  characters present in the input.
     151  '''
     152  _B32_DECODE_MAP01_DOCSTRING = '''
     153  RFC 3548 allows for optional mapping of the digit 0 (zero) to the
     154  letter O (oh), and for optional mapping of the digit 1 (one) to
     155  either the letter I (eye) or letter L (el).  The optional argument
     156  map01 when not None, specifies which letter the digit 1 should be
     157  mapped to (when map01 is not None, the digit 0 is always mapped to
     158  the letter O).  For security purposes the default is None, so that
     159  0 and 1 are not allowed in the input.
     160  '''
     161  _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
     162  _b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
     163  _b32tab2 = {}
     164  _b32rev = {}
     165  
     166  def _b32encode(alphabet, s):
     167      global _b32tab2
     168      # Delay the initialization of the table to not waste memory
     169      # if the function is never called
     170      if alphabet not in _b32tab2:
     171          b32tab = [bytes((i,)) for i in alphabet]
     172          _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
     173          b32tab = None
     174  
     175      if not isinstance(s, bytes_types):
     176          s = memoryview(s).tobytes()
     177      leftover = len(s) % 5
     178      # Pad the last quantum with zero bits if necessary
     179      if leftover:
     180          s = s + b'\0' * (5 - leftover)  # Don't use += !
     181      encoded = bytearray()
     182      from_bytes = int.from_bytes
     183      b32tab2 = _b32tab2[alphabet]
     184      for i in range(0, len(s), 5):
     185          c = from_bytes(s[i: i + 5])              # big endian
     186          encoded += (b32tab2[c >> 30] +           # bits 1 - 10
     187                      b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
     188                      b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
     189                      b32tab2[c & 0x3ff]           # bits 31 - 40
     190                     )
     191      # Adjust for any leftover partial quanta
     192      if leftover == 1:
     193          encoded[-6:] = b'======'
     194      elif leftover == 2:
     195          encoded[-4:] = b'===='
     196      elif leftover == 3:
     197          encoded[-3:] = b'==='
     198      elif leftover == 4:
     199          encoded[-1:] = b'='
     200      return bytes(encoded)
     201  
     202  def _b32decode(alphabet, s, casefold=False, map01=None):
     203      global _b32rev
     204      # Delay the initialization of the table to not waste memory
     205      # if the function is never called
     206      if alphabet not in _b32rev:
     207          _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
     208      s = _bytes_from_decode_data(s)
     209      if len(s) % 8:
     210          raise binascii.Error('Incorrect padding')
     211      # Handle section 2.4 zero and one mapping.  The flag map01 will be either
     212      # False, or the character to map the digit 1 (one) to.  It should be
     213      # either L (el) or I (eye).
     214      if map01 is not None:
     215          map01 = _bytes_from_decode_data(map01)
     216          assert len(map01) == 1, repr(map01)
     217          s = s.translate(bytes.maketrans(b'01', b'O' + map01))
     218      if casefold:
     219          s = s.upper()
     220      # Strip off pad characters from the right.  We need to count the pad
     221      # characters because this will tell us how many null bytes to remove from
     222      # the end of the decoded string.
     223      l = len(s)
     224      s = s.rstrip(b'=')
     225      padchars = l - len(s)
     226      # Now decode the full quanta
     227      decoded = bytearray()
     228      b32rev = _b32rev[alphabet]
     229      for i in range(0, len(s), 8):
     230          quanta = s[i: i + 8]
     231          acc = 0
     232          try:
     233              for c in quanta:
     234                  acc = (acc << 5) + b32rev[c]
     235          except KeyError:
     236              raise binascii.Error('Non-base32 digit found') from None
     237          decoded += acc.to_bytes(5)  # big endian
     238      # Process the last, partial quanta
     239      if l % 8 or padchars not in {0, 1, 3, 4, 6}:
     240          raise binascii.Error('Incorrect padding')
     241      if padchars and decoded:
     242          acc <<= 5 * padchars
     243          last = acc.to_bytes(5)  # big endian
     244          leftover = (43 - 5 * padchars) // 8  # 1: 4, 3: 3, 4: 2, 6: 1
     245          decoded[-5:] = last[:leftover]
     246      return bytes(decoded)
     247  
     248  
     249  def b32encode(s):
     250      return _b32encode(_b32alphabet, s)
     251  b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
     252  
     253  def b32decode(s, casefold=False, map01=None):
     254      return _b32decode(_b32alphabet, s, casefold, map01)
     255  b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
     256                                          extra_args=_B32_DECODE_MAP01_DOCSTRING)
     257  
     258  def b32hexencode(s):
     259      return _b32encode(_b32hexalphabet, s)
     260  b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
     261  
     262  def b32hexdecode(s, casefold=False):
     263      # base32hex does not have the 01 mapping
     264      return _b32decode(_b32hexalphabet, s, casefold)
     265  b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
     266                                                      extra_args='')
     267  
     268  
     269  # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
     270  # lowercase.  The RFC also recommends against accepting input case
     271  # insensitively.
     272  def b16encode(s):
     273      """Encode the bytes-like object s using Base16 and return a bytes object.
     274      """
     275      return binascii.hexlify(s).upper()
     276  
     277  
     278  def b16decode(s, casefold=False):
     279      """Decode the Base16 encoded bytes-like object or ASCII string s.
     280  
     281      Optional casefold is a flag specifying whether a lowercase alphabet is
     282      acceptable as input.  For security purposes, the default is False.
     283  
     284      The result is returned as a bytes object.  A binascii.Error is raised if
     285      s is incorrectly padded or if there are non-alphabet characters present
     286      in the input.
     287      """
     288      s = _bytes_from_decode_data(s)
     289      if casefold:
     290          s = s.upper()
     291      if re.search(b'[^0-9A-F]', s):
     292          raise binascii.Error('Non-base16 digit found')
     293      return binascii.unhexlify(s)
     294  
     295  #
     296  # Ascii85 encoding/decoding
     297  #
     298  
     299  _a85chars = None
     300  _a85chars2 = None
     301  _A85START = b"<~"
     302  _A85END = b"~>"
     303  
     304  def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
     305      # Helper function for a85encode and b85encode
     306      if not isinstance(b, bytes_types):
     307          b = memoryview(b).tobytes()
     308  
     309      padding = (-len(b)) % 4
     310      if padding:
     311          b = b + b'\0' * padding
     312      words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
     313  
     314      chunks = [b'z' if foldnuls and not word else
     315                b'y' if foldspaces and word == 0x20202020 else
     316                (chars2[word // 614125] +
     317                 chars2[word // 85 % 7225] +
     318                 chars[word % 85])
     319                for word in words]
     320  
     321      if padding and not pad:
     322          if chunks[-1] == b'z':
     323              chunks[-1] = chars[0] * 5
     324          chunks[-1] = chunks[-1][:-padding]
     325  
     326      return b''.join(chunks)
     327  
     328  def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
     329      """Encode bytes-like object b using Ascii85 and return a bytes object.
     330  
     331      foldspaces is an optional flag that uses the special short sequence 'y'
     332      instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
     333      feature is not supported by the "standard" Adobe encoding.
     334  
     335      wrapcol controls whether the output should have newline (b'\\n') characters
     336      added to it. If this is non-zero, each output line will be at most this
     337      many characters long.
     338  
     339      pad controls whether the input is padded to a multiple of 4 before
     340      encoding. Note that the btoa implementation always pads.
     341  
     342      adobe controls whether the encoded byte sequence is framed with <~ and ~>,
     343      which is used by the Adobe implementation.
     344      """
     345      global _a85chars, _a85chars2
     346      # Delay the initialization of tables to not waste memory
     347      # if the function is never called
     348      if _a85chars2 is None:
     349          _a85chars = [bytes((i,)) for i in range(33, 118)]
     350          _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
     351  
     352      result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
     353  
     354      if adobe:
     355          result = _A85START + result
     356      if wrapcol:
     357          wrapcol = max(2 if adobe else 1, wrapcol)
     358          chunks = [result[i: i + wrapcol]
     359                    for i in range(0, len(result), wrapcol)]
     360          if adobe:
     361              if len(chunks[-1]) + 2 > wrapcol:
     362                  chunks.append(b'')
     363          result = b'\n'.join(chunks)
     364      if adobe:
     365          result += _A85END
     366  
     367      return result
     368  
     369  def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
     370      """Decode the Ascii85 encoded bytes-like object or ASCII string b.
     371  
     372      foldspaces is a flag that specifies whether the 'y' short sequence should be
     373      accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
     374      not supported by the "standard" Adobe encoding.
     375  
     376      adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
     377      is framed with <~ and ~>).
     378  
     379      ignorechars should be a byte string containing characters to ignore from the
     380      input. This should only contain whitespace characters, and by default
     381      contains all whitespace characters in ASCII.
     382  
     383      The result is returned as a bytes object.
     384      """
     385      b = _bytes_from_decode_data(b)
     386      if adobe:
     387          if not b.endswith(_A85END):
     388              raise ValueError(
     389                  "Ascii85 encoded byte sequences must end "
     390                  "with {!r}".format(_A85END)
     391                  )
     392          if b.startswith(_A85START):
     393              b = b[2:-2]  # Strip off start/end markers
     394          else:
     395              b = b[:-2]
     396      #
     397      # We have to go through this stepwise, so as to ignore spaces and handle
     398      # special short sequences
     399      #
     400      packI = struct.Struct('!I').pack
     401      decoded = []
     402      decoded_append = decoded.append
     403      curr = []
     404      curr_append = curr.append
     405      curr_clear = curr.clear
     406      for x in b + b'u' * 4:
     407          if b'!'[0] <= x <= b'u'[0]:
     408              curr_append(x)
     409              if len(curr) == 5:
     410                  acc = 0
     411                  for x in curr:
     412                      acc = 85 * acc + (x - 33)
     413                  try:
     414                      decoded_append(packI(acc))
     415                  except struct.error:
     416                      raise ValueError('Ascii85 overflow') from None
     417                  curr_clear()
     418          elif x == b'z'[0]:
     419              if curr:
     420                  raise ValueError('z inside Ascii85 5-tuple')
     421              decoded_append(b'\0\0\0\0')
     422          elif foldspaces and x == b'y'[0]:
     423              if curr:
     424                  raise ValueError('y inside Ascii85 5-tuple')
     425              decoded_append(b'\x20\x20\x20\x20')
     426          elif x in ignorechars:
     427              # Skip whitespace
     428              continue
     429          else:
     430              raise ValueError('Non-Ascii85 digit found: %c' % x)
     431  
     432      result = b''.join(decoded)
     433      padding = 4 - len(curr)
     434      if padding:
     435          # Throw away the extra padding
     436          result = result[:-padding]
     437      return result
     438  
     439  # The following code is originally taken (with permission) from Mercurial
     440  
     441  _b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     442                  b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
     443  _b85chars = None
     444  _b85chars2 = None
     445  _b85dec = None
     446  
     447  def b85encode(b, pad=False):
     448      """Encode bytes-like object b in base85 format and return a bytes object.
     449  
     450      If pad is true, the input is padded with b'\\0' so its length is a multiple of
     451      4 bytes before encoding.
     452      """
     453      global _b85chars, _b85chars2
     454      # Delay the initialization of tables to not waste memory
     455      # if the function is never called
     456      if _b85chars2 is None:
     457          _b85chars = [bytes((i,)) for i in _b85alphabet]
     458          _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
     459      return _85encode(b, _b85chars, _b85chars2, pad)
     460  
     461  def b85decode(b):
     462      """Decode the base85-encoded bytes-like object or ASCII string b
     463  
     464      The result is returned as a bytes object.
     465      """
     466      global _b85dec
     467      # Delay the initialization of tables to not waste memory
     468      # if the function is never called
     469      if _b85dec is None:
     470          _b85dec = [None] * 256
     471          for i, c in enumerate(_b85alphabet):
     472              _b85dec[c] = i
     473  
     474      b = _bytes_from_decode_data(b)
     475      padding = (-len(b)) % 5
     476      b = b + b'~' * padding
     477      out = []
     478      packI = struct.Struct('!I').pack
     479      for i in range(0, len(b), 5):
     480          chunk = b[i:i + 5]
     481          acc = 0
     482          try:
     483              for c in chunk:
     484                  acc = acc * 85 + _b85dec[c]
     485          except TypeError:
     486              for j, c in enumerate(chunk):
     487                  if _b85dec[c] is None:
     488                      raise ValueError('bad base85 character at position %d'
     489                                      % (i + j)) from None
     490              raise
     491          try:
     492              out.append(packI(acc))
     493          except struct.error:
     494              raise ValueError('base85 overflow in hunk starting at byte %d'
     495                               % i) from None
     496  
     497      result = b''.join(out)
     498      if padding:
     499          result = result[:-padding]
     500      return result
     501  
     502  # Legacy interface.  This code could be cleaned up since I don't believe
     503  # binascii has any line length limitations.  It just doesn't seem worth it
     504  # though.  The files should be opened in binary mode.
     505  
     506  MAXLINESIZE = 76 # Excluding the CRLF
     507  MAXBINSIZE = (MAXLINESIZE//4)*3
     508  
     509  def encode(input, output):
     510      """Encode a file; input and output are binary files."""
     511      while True:
     512          s = input.read(MAXBINSIZE)
     513          if not s:
     514              break
     515          while len(s) < MAXBINSIZE:
     516              ns = input.read(MAXBINSIZE-len(s))
     517              if not ns:
     518                  break
     519              s += ns
     520          line = binascii.b2a_base64(s)
     521          output.write(line)
     522  
     523  
     524  def decode(input, output):
     525      """Decode a file; input and output are binary files."""
     526      while True:
     527          line = input.readline()
     528          if not line:
     529              break
     530          s = binascii.a2b_base64(line)
     531          output.write(s)
     532  
     533  def _input_type_check(s):
     534      try:
     535          m = memoryview(s)
     536      except TypeError as err:
     537          msg = "expected bytes-like object, not %s" % s.__class__.__name__
     538          raise TypeError(msg) from err
     539      if m.format not in ('c', 'b', 'B'):
     540          msg = ("expected single byte elements, not %r from %s" %
     541                                            (m.format, s.__class__.__name__))
     542          raise TypeError(msg)
     543      if m.ndim != 1:
     544          msg = ("expected 1-D data, not %d-D data from %s" %
     545                                            (m.ndim, s.__class__.__name__))
     546          raise TypeError(msg)
     547  
     548  
     549  def encodebytes(s):
     550      """Encode a bytestring into a bytes object containing multiple lines
     551      of base-64 data."""
     552      _input_type_check(s)
     553      pieces = []
     554      for i in range(0, len(s), MAXBINSIZE):
     555          chunk = s[i : i + MAXBINSIZE]
     556          pieces.append(binascii.b2a_base64(chunk))
     557      return b"".join(pieces)
     558  
     559  
     560  def decodebytes(s):
     561      """Decode a bytestring of base-64 data into a bytes object."""
     562      _input_type_check(s)
     563      return binascii.a2b_base64(s)
     564  
     565  
     566  # Usable as a script...
     567  def main():
     568      """Small main program"""
     569      import sys, getopt
     570      usage = """usage: %s [-h|-d|-e|-u|-t] [file|-]
     571          -h: print this help message and exit
     572          -d, -u: decode
     573          -e: encode (default)
     574          -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
     575      try:
     576          opts, args = getopt.getopt(sys.argv[1:], 'hdeut')
     577      except getopt.error as msg:
     578          sys.stdout = sys.stderr
     579          print(msg)
     580          print(usage)
     581          sys.exit(2)
     582      func = encode
     583      for o, a in opts:
     584          if o == '-e': func = encode
     585          if o == '-d': func = decode
     586          if o == '-u': func = decode
     587          if o == '-t': test(); return
     588          if o == '-h': print(usage); return
     589      if args and args[0] != '-':
     590          with open(args[0], 'rb') as f:
     591              func(f, sys.stdout.buffer)
     592      else:
     593          func(sys.stdin.buffer, sys.stdout.buffer)
     594  
     595  
     596  def test():
     597      s0 = b"Aladdin:open sesame"
     598      print(repr(s0))
     599      s1 = encodebytes(s0)
     600      print(repr(s1))
     601      s2 = decodebytes(s1)
     602      print(repr(s2))
     603      assert s0 == s2
     604  
     605  
     606  if __name__ == '__main__':
     607      main()