(root)/
Python-3.12.0/
Lib/
encodings/
__init__.py
       1  """ Standard "encodings" Package
       2  
       3      Standard Python encoding modules are stored in this package
       4      directory.
       5  
       6      Codec modules must have names corresponding to normalized encoding
       7      names as defined in the normalize_encoding() function below, e.g.
       8      'utf-8' must be implemented by the module 'utf_8.py'.
       9  
      10      Each codec module must export the following interface:
      11  
      12      * getregentry() -> codecs.CodecInfo object
      13      The getregentry() API must return a CodecInfo object with encoder, decoder,
      14      incrementalencoder, incrementaldecoder, streamwriter and streamreader
      15      attributes which adhere to the Python Codec Interface Standard.
      16  
      17      In addition, a module may optionally also define the following
      18      APIs which are then used by the package's codec search function:
      19  
      20      * getaliases() -> sequence of encoding name strings to use as aliases
      21  
      22      Alias names returned by getaliases() must be normalized encoding
      23      names as defined by normalize_encoding().
      24  
      25  Written by Marc-Andre Lemburg (mal@lemburg.com).
      26  
      27  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
      28  
      29  """#"
      30  
      31  import codecs
      32  import sys
      33  from . import aliases
      34  
      35  _cache = {}
      36  _unknown = '--unknown--'
      37  _import_tail = ['*']
      38  _aliases = aliases.aliases
      39  
      40  class ESC[4;38;5;81mCodecRegistryError(ESC[4;38;5;149mLookupError, ESC[4;38;5;149mSystemError):
      41      pass
      42  
      43  def normalize_encoding(encoding):
      44  
      45      """ Normalize an encoding name.
      46  
      47          Normalization works as follows: all non-alphanumeric
      48          characters except the dot used for Python package names are
      49          collapsed and replaced with a single underscore, e.g. '  -;#'
      50          becomes '_'. Leading and trailing underscores are removed.
      51  
      52          Note that encoding names should be ASCII only.
      53  
      54      """
      55      if isinstance(encoding, bytes):
      56          encoding = str(encoding, "ascii")
      57  
      58      chars = []
      59      punct = False
      60      for c in encoding:
      61          if c.isalnum() or c == '.':
      62              if punct and chars:
      63                  chars.append('_')
      64              if c.isascii():
      65                  chars.append(c)
      66              punct = False
      67          else:
      68              punct = True
      69      return ''.join(chars)
      70  
      71  def search_function(encoding):
      72  
      73      # Cache lookup
      74      entry = _cache.get(encoding, _unknown)
      75      if entry is not _unknown:
      76          return entry
      77  
      78      # Import the module:
      79      #
      80      # First try to find an alias for the normalized encoding
      81      # name and lookup the module using the aliased name, then try to
      82      # lookup the module using the standard import scheme, i.e. first
      83      # try in the encodings package, then at top-level.
      84      #
      85      norm_encoding = normalize_encoding(encoding)
      86      aliased_encoding = _aliases.get(norm_encoding) or \
      87                         _aliases.get(norm_encoding.replace('.', '_'))
      88      if aliased_encoding is not None:
      89          modnames = [aliased_encoding,
      90                      norm_encoding]
      91      else:
      92          modnames = [norm_encoding]
      93      for modname in modnames:
      94          if not modname or '.' in modname:
      95              continue
      96          try:
      97              # Import is absolute to prevent the possibly malicious import of a
      98              # module with side-effects that is not in the 'encodings' package.
      99              mod = __import__('encodings.' + modname, fromlist=_import_tail,
     100                               level=0)
     101          except ImportError:
     102              # ImportError may occur because 'encodings.(modname)' does not exist,
     103              # or because it imports a name that does not exist (see mbcs and oem)
     104              pass
     105          else:
     106              break
     107      else:
     108          mod = None
     109  
     110      try:
     111          getregentry = mod.getregentry
     112      except AttributeError:
     113          # Not a codec module
     114          mod = None
     115  
     116      if mod is None:
     117          # Cache misses
     118          _cache[encoding] = None
     119          return None
     120  
     121      # Now ask the module for the registry entry
     122      entry = getregentry()
     123      if not isinstance(entry, codecs.CodecInfo):
     124          if not 4 <= len(entry) <= 7:
     125              raise CodecRegistryError('module "%s" (%s) failed to register'
     126                                       % (mod.__name__, mod.__file__))
     127          if not callable(entry[0]) or not callable(entry[1]) or \
     128             (entry[2] is not None and not callable(entry[2])) or \
     129             (entry[3] is not None and not callable(entry[3])) or \
     130             (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
     131             (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
     132              raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
     133                                       % (mod.__name__, mod.__file__))
     134          if len(entry)<7 or entry[6] is None:
     135              entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
     136          entry = codecs.CodecInfo(*entry)
     137  
     138      # Cache the codec registry entry
     139      _cache[encoding] = entry
     140  
     141      # Register its aliases (without overwriting previously registered
     142      # aliases)
     143      try:
     144          codecaliases = mod.getaliases()
     145      except AttributeError:
     146          pass
     147      else:
     148          for alias in codecaliases:
     149              if alias not in _aliases:
     150                  _aliases[alias] = modname
     151  
     152      # Return the registry entry
     153      return entry
     154  
     155  # Register the search_function in the Python codec registry
     156  codecs.register(search_function)
     157  
     158  if sys.platform == 'win32':
     159      # bpo-671666, bpo-46668: If Python does not implement a codec for current
     160      # Windows ANSI code page, use the "mbcs" codec instead:
     161      # WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP.
     162      # Python does not support custom code pages.
     163      def _alias_mbcs(encoding):
     164          try:
     165              import _winapi
     166              ansi_code_page = "cp%s" % _winapi.GetACP()
     167              if encoding == ansi_code_page:
     168                  import encodings.mbcs
     169                  return encodings.mbcs.getregentry()
     170          except ImportError:
     171              # Imports may fail while we are shutting down
     172              pass
     173  
     174      codecs.register(_alias_mbcs)