(root)/
Python-3.11.7/
Lib/
mimetypes.py
       1  """Guess the MIME type of a file.
       2  
       3  This module defines two useful functions:
       4  
       5  guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
       6  
       7  guess_extension(type, strict=True) -- guess the extension for a given MIME type.
       8  
       9  It also contains the following, for tuning the behavior:
      10  
      11  Data:
      12  
      13  knownfiles -- list of files to parse
      14  inited -- flag set when init() has been called
      15  suffix_map -- dictionary mapping suffixes to suffixes
      16  encodings_map -- dictionary mapping suffixes to encodings
      17  types_map -- dictionary mapping suffixes to types
      18  
      19  Functions:
      20  
      21  init([files]) -- parse a list of files, default knownfiles (on Windows, the
      22    default values are taken from the registry)
      23  read_mime_types(file) -- parse one file, return a dictionary or None
      24  """
      25  
      26  import os
      27  import sys
      28  import posixpath
      29  import urllib.parse
      30  
      31  try:
      32      from _winapi import _mimetypes_read_windows_registry
      33  except ImportError:
      34      _mimetypes_read_windows_registry = None
      35  
      36  try:
      37      import winreg as _winreg
      38  except ImportError:
      39      _winreg = None
      40  
      41  __all__ = [
      42      "knownfiles", "inited", "MimeTypes",
      43      "guess_type", "guess_all_extensions", "guess_extension",
      44      "add_type", "init", "read_mime_types",
      45      "suffix_map", "encodings_map", "types_map", "common_types"
      46  ]
      47  
      48  knownfiles = [
      49      "/etc/mime.types",
      50      "/etc/httpd/mime.types",                    # Mac OS X
      51      "/etc/httpd/conf/mime.types",               # Apache
      52      "/etc/apache/mime.types",                   # Apache 1
      53      "/etc/apache2/mime.types",                  # Apache 2
      54      "/usr/local/etc/httpd/conf/mime.types",
      55      "/usr/local/lib/netscape/mime.types",
      56      "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2
      57      "/usr/local/etc/mime.types",                # Apache 1.3
      58      ]
      59  
      60  inited = False
      61  _db = None
      62  
      63  
      64  class ESC[4;38;5;81mMimeTypes:
      65      """MIME-types datastore.
      66  
      67      This datastore can handle information from mime.types-style files
      68      and supports basic determination of MIME type from a filename or
      69      URL, and can guess a reasonable extension given a MIME type.
      70      """
      71  
      72      def __init__(self, filenames=(), strict=True):
      73          if not inited:
      74              init()
      75          self.encodings_map = _encodings_map_default.copy()
      76          self.suffix_map = _suffix_map_default.copy()
      77          self.types_map = ({}, {}) # dict for (non-strict, strict)
      78          self.types_map_inv = ({}, {})
      79          for (ext, type) in _types_map_default.items():
      80              self.add_type(type, ext, True)
      81          for (ext, type) in _common_types_default.items():
      82              self.add_type(type, ext, False)
      83          for name in filenames:
      84              self.read(name, strict)
      85  
      86      def add_type(self, type, ext, strict=True):
      87          """Add a mapping between a type and an extension.
      88  
      89          When the extension is already known, the new
      90          type will replace the old one. When the type
      91          is already known the extension will be added
      92          to the list of known extensions.
      93  
      94          If strict is true, information will be added to
      95          list of standard types, else to the list of non-standard
      96          types.
      97          """
      98          self.types_map[strict][ext] = type
      99          exts = self.types_map_inv[strict].setdefault(type, [])
     100          if ext not in exts:
     101              exts.append(ext)
     102  
     103      def guess_type(self, url, strict=True):
     104          """Guess the type of a file which is either a URL or a path-like object.
     105  
     106          Return value is a tuple (type, encoding) where type is None if
     107          the type can't be guessed (no or unknown suffix) or a string
     108          of the form type/subtype, usable for a MIME Content-type
     109          header; and encoding is None for no encoding or the name of
     110          the program used to encode (e.g. compress or gzip).  The
     111          mappings are table driven.  Encoding suffixes are case
     112          sensitive; type suffixes are first tried case sensitive, then
     113          case insensitive.
     114  
     115          The suffixes .tgz, .taz and .tz (case sensitive!) are all
     116          mapped to '.tar.gz'.  (This is table-driven too, using the
     117          dictionary suffix_map.)
     118  
     119          Optional `strict' argument when False adds a bunch of commonly found,
     120          but non-standard types.
     121          """
     122          url = os.fspath(url)
     123          scheme, url = urllib.parse._splittype(url)
     124          if scheme == 'data':
     125              # syntax of data URLs:
     126              # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
     127              # mediatype := [ type "/" subtype ] *( ";" parameter )
     128              # data      := *urlchar
     129              # parameter := attribute "=" value
     130              # type/subtype defaults to "text/plain"
     131              comma = url.find(',')
     132              if comma < 0:
     133                  # bad data URL
     134                  return None, None
     135              semi = url.find(';', 0, comma)
     136              if semi >= 0:
     137                  type = url[:semi]
     138              else:
     139                  type = url[:comma]
     140              if '=' in type or '/' not in type:
     141                  type = 'text/plain'
     142              return type, None           # never compressed, so encoding is None
     143          base, ext = posixpath.splitext(url)
     144          while (ext_lower := ext.lower()) in self.suffix_map:
     145              base, ext = posixpath.splitext(base + self.suffix_map[ext_lower])
     146          # encodings_map is case sensitive
     147          if ext in self.encodings_map:
     148              encoding = self.encodings_map[ext]
     149              base, ext = posixpath.splitext(base)
     150          else:
     151              encoding = None
     152          ext = ext.lower()
     153          types_map = self.types_map[True]
     154          if ext in types_map:
     155              return types_map[ext], encoding
     156          elif strict:
     157              return None, encoding
     158          types_map = self.types_map[False]
     159          if ext in types_map:
     160              return types_map[ext], encoding
     161          else:
     162              return None, encoding
     163  
     164      def guess_all_extensions(self, type, strict=True):
     165          """Guess the extensions for a file based on its MIME type.
     166  
     167          Return value is a list of strings giving the possible filename
     168          extensions, including the leading dot ('.').  The extension is not
     169          guaranteed to have been associated with any particular data stream,
     170          but would be mapped to the MIME type `type' by guess_type().
     171  
     172          Optional `strict' argument when false adds a bunch of commonly found,
     173          but non-standard types.
     174          """
     175          type = type.lower()
     176          extensions = list(self.types_map_inv[True].get(type, []))
     177          if not strict:
     178              for ext in self.types_map_inv[False].get(type, []):
     179                  if ext not in extensions:
     180                      extensions.append(ext)
     181          return extensions
     182  
     183      def guess_extension(self, type, strict=True):
     184          """Guess the extension for a file based on its MIME type.
     185  
     186          Return value is a string giving a filename extension,
     187          including the leading dot ('.').  The extension is not
     188          guaranteed to have been associated with any particular data
     189          stream, but would be mapped to the MIME type `type' by
     190          guess_type().  If no extension can be guessed for `type', None
     191          is returned.
     192  
     193          Optional `strict' argument when false adds a bunch of commonly found,
     194          but non-standard types.
     195          """
     196          extensions = self.guess_all_extensions(type, strict)
     197          if not extensions:
     198              return None
     199          return extensions[0]
     200  
     201      def read(self, filename, strict=True):
     202          """
     203          Read a single mime.types-format file, specified by pathname.
     204  
     205          If strict is true, information will be added to
     206          list of standard types, else to the list of non-standard
     207          types.
     208          """
     209          with open(filename, encoding='utf-8') as fp:
     210              self.readfp(fp, strict)
     211  
     212      def readfp(self, fp, strict=True):
     213          """
     214          Read a single mime.types-format file.
     215  
     216          If strict is true, information will be added to
     217          list of standard types, else to the list of non-standard
     218          types.
     219          """
     220          while 1:
     221              line = fp.readline()
     222              if not line:
     223                  break
     224              words = line.split()
     225              for i in range(len(words)):
     226                  if words[i][0] == '#':
     227                      del words[i:]
     228                      break
     229              if not words:
     230                  continue
     231              type, suffixes = words[0], words[1:]
     232              for suff in suffixes:
     233                  self.add_type(type, '.' + suff, strict)
     234  
     235      def read_windows_registry(self, strict=True):
     236          """
     237          Load the MIME types database from Windows registry.
     238  
     239          If strict is true, information will be added to
     240          list of standard types, else to the list of non-standard
     241          types.
     242          """
     243  
     244          if not _mimetypes_read_windows_registry and not _winreg:
     245              return
     246  
     247          add_type = self.add_type
     248          if strict:
     249              add_type = lambda type, ext: self.add_type(type, ext, True)
     250  
     251          # Accelerated function if it is available
     252          if _mimetypes_read_windows_registry:
     253              _mimetypes_read_windows_registry(add_type)
     254          elif _winreg:
     255              self._read_windows_registry(add_type)
     256  
     257      @classmethod
     258      def _read_windows_registry(cls, add_type):
     259          def enum_types(mimedb):
     260              i = 0
     261              while True:
     262                  try:
     263                      ctype = _winreg.EnumKey(mimedb, i)
     264                  except OSError:
     265                      break
     266                  else:
     267                      if '\0' not in ctype:
     268                          yield ctype
     269                  i += 1
     270  
     271          with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
     272              for subkeyname in enum_types(hkcr):
     273                  try:
     274                      with _winreg.OpenKey(hkcr, subkeyname) as subkey:
     275                          # Only check file extensions
     276                          if not subkeyname.startswith("."):
     277                              continue
     278                          # raises OSError if no 'Content Type' value
     279                          mimetype, datatype = _winreg.QueryValueEx(
     280                              subkey, 'Content Type')
     281                          if datatype != _winreg.REG_SZ:
     282                              continue
     283                          add_type(mimetype, subkeyname)
     284                  except OSError:
     285                      continue
     286  
     287  def guess_type(url, strict=True):
     288      """Guess the type of a file based on its URL.
     289  
     290      Return value is a tuple (type, encoding) where type is None if the
     291      type can't be guessed (no or unknown suffix) or a string of the
     292      form type/subtype, usable for a MIME Content-type header; and
     293      encoding is None for no encoding or the name of the program used
     294      to encode (e.g. compress or gzip).  The mappings are table
     295      driven.  Encoding suffixes are case sensitive; type suffixes are
     296      first tried case sensitive, then case insensitive.
     297  
     298      The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
     299      to ".tar.gz".  (This is table-driven too, using the dictionary
     300      suffix_map).
     301  
     302      Optional `strict' argument when false adds a bunch of commonly found, but
     303      non-standard types.
     304      """
     305      if _db is None:
     306          init()
     307      return _db.guess_type(url, strict)
     308  
     309  
     310  def guess_all_extensions(type, strict=True):
     311      """Guess the extensions for a file based on its MIME type.
     312  
     313      Return value is a list of strings giving the possible filename
     314      extensions, including the leading dot ('.').  The extension is not
     315      guaranteed to have been associated with any particular data
     316      stream, but would be mapped to the MIME type `type' by
     317      guess_type().  If no extension can be guessed for `type', None
     318      is returned.
     319  
     320      Optional `strict' argument when false adds a bunch of commonly found,
     321      but non-standard types.
     322      """
     323      if _db is None:
     324          init()
     325      return _db.guess_all_extensions(type, strict)
     326  
     327  def guess_extension(type, strict=True):
     328      """Guess the extension for a file based on its MIME type.
     329  
     330      Return value is a string giving a filename extension, including the
     331      leading dot ('.').  The extension is not guaranteed to have been
     332      associated with any particular data stream, but would be mapped to the
     333      MIME type `type' by guess_type().  If no extension can be guessed for
     334      `type', None is returned.
     335  
     336      Optional `strict' argument when false adds a bunch of commonly found,
     337      but non-standard types.
     338      """
     339      if _db is None:
     340          init()
     341      return _db.guess_extension(type, strict)
     342  
     343  def add_type(type, ext, strict=True):
     344      """Add a mapping between a type and an extension.
     345  
     346      When the extension is already known, the new
     347      type will replace the old one. When the type
     348      is already known the extension will be added
     349      to the list of known extensions.
     350  
     351      If strict is true, information will be added to
     352      list of standard types, else to the list of non-standard
     353      types.
     354      """
     355      if _db is None:
     356          init()
     357      return _db.add_type(type, ext, strict)
     358  
     359  
     360  def init(files=None):
     361      global suffix_map, types_map, encodings_map, common_types
     362      global inited, _db
     363      inited = True    # so that MimeTypes.__init__() doesn't call us again
     364  
     365      if files is None or _db is None:
     366          db = MimeTypes()
     367          # Quick return if not supported
     368          db.read_windows_registry()
     369  
     370          if files is None:
     371              files = knownfiles
     372          else:
     373              files = knownfiles + list(files)
     374      else:
     375          db = _db
     376  
     377      for file in files:
     378          if os.path.isfile(file):
     379              db.read(file)
     380      encodings_map = db.encodings_map
     381      suffix_map = db.suffix_map
     382      types_map = db.types_map[True]
     383      common_types = db.types_map[False]
     384      # Make the DB a global variable now that it is fully initialized
     385      _db = db
     386  
     387  
     388  def read_mime_types(file):
     389      try:
     390          f = open(file, encoding='utf-8')
     391      except OSError:
     392          return None
     393      with f:
     394          db = MimeTypes()
     395          db.readfp(f, True)
     396          return db.types_map[True]
     397  
     398  
     399  def _default_mime_types():
     400      global suffix_map, _suffix_map_default
     401      global encodings_map, _encodings_map_default
     402      global types_map, _types_map_default
     403      global common_types, _common_types_default
     404  
     405      suffix_map = _suffix_map_default = {
     406          '.svgz': '.svg.gz',
     407          '.tgz': '.tar.gz',
     408          '.taz': '.tar.gz',
     409          '.tz': '.tar.gz',
     410          '.tbz2': '.tar.bz2',
     411          '.txz': '.tar.xz',
     412          }
     413  
     414      encodings_map = _encodings_map_default = {
     415          '.gz': 'gzip',
     416          '.Z': 'compress',
     417          '.bz2': 'bzip2',
     418          '.xz': 'xz',
     419          '.br': 'br',
     420          }
     421  
     422      # Before adding new types, make sure they are either registered with IANA,
     423      # at http://www.iana.org/assignments/media-types
     424      # or extensions, i.e. using the x- prefix
     425  
     426      # If you add to these, please keep them sorted by mime type.
     427      # Make sure the entry with the preferred file extension for a particular mime type
     428      # appears before any others of the same mimetype.
     429      types_map = _types_map_default = {
     430          '.js'     : 'application/javascript',
     431          '.mjs'    : 'application/javascript',
     432          '.json'   : 'application/json',
     433          '.webmanifest': 'application/manifest+json',
     434          '.doc'    : 'application/msword',
     435          '.dot'    : 'application/msword',
     436          '.wiz'    : 'application/msword',
     437          '.nq'     : 'application/n-quads',
     438          '.nt'     : 'application/n-triples',
     439          '.bin'    : 'application/octet-stream',
     440          '.a'      : 'application/octet-stream',
     441          '.dll'    : 'application/octet-stream',
     442          '.exe'    : 'application/octet-stream',
     443          '.o'      : 'application/octet-stream',
     444          '.obj'    : 'application/octet-stream',
     445          '.so'     : 'application/octet-stream',
     446          '.oda'    : 'application/oda',
     447          '.pdf'    : 'application/pdf',
     448          '.p7c'    : 'application/pkcs7-mime',
     449          '.ps'     : 'application/postscript',
     450          '.ai'     : 'application/postscript',
     451          '.eps'    : 'application/postscript',
     452          '.trig'   : 'application/trig',
     453          '.m3u'    : 'application/vnd.apple.mpegurl',
     454          '.m3u8'   : 'application/vnd.apple.mpegurl',
     455          '.xls'    : 'application/vnd.ms-excel',
     456          '.xlb'    : 'application/vnd.ms-excel',
     457          '.ppt'    : 'application/vnd.ms-powerpoint',
     458          '.pot'    : 'application/vnd.ms-powerpoint',
     459          '.ppa'    : 'application/vnd.ms-powerpoint',
     460          '.pps'    : 'application/vnd.ms-powerpoint',
     461          '.pwz'    : 'application/vnd.ms-powerpoint',
     462          '.wasm'   : 'application/wasm',
     463          '.bcpio'  : 'application/x-bcpio',
     464          '.cpio'   : 'application/x-cpio',
     465          '.csh'    : 'application/x-csh',
     466          '.dvi'    : 'application/x-dvi',
     467          '.gtar'   : 'application/x-gtar',
     468          '.hdf'    : 'application/x-hdf',
     469          '.h5'     : 'application/x-hdf5',
     470          '.latex'  : 'application/x-latex',
     471          '.mif'    : 'application/x-mif',
     472          '.cdf'    : 'application/x-netcdf',
     473          '.nc'     : 'application/x-netcdf',
     474          '.p12'    : 'application/x-pkcs12',
     475          '.pfx'    : 'application/x-pkcs12',
     476          '.ram'    : 'application/x-pn-realaudio',
     477          '.pyc'    : 'application/x-python-code',
     478          '.pyo'    : 'application/x-python-code',
     479          '.sh'     : 'application/x-sh',
     480          '.shar'   : 'application/x-shar',
     481          '.swf'    : 'application/x-shockwave-flash',
     482          '.sv4cpio': 'application/x-sv4cpio',
     483          '.sv4crc' : 'application/x-sv4crc',
     484          '.tar'    : 'application/x-tar',
     485          '.tcl'    : 'application/x-tcl',
     486          '.tex'    : 'application/x-tex',
     487          '.texi'   : 'application/x-texinfo',
     488          '.texinfo': 'application/x-texinfo',
     489          '.roff'   : 'application/x-troff',
     490          '.t'      : 'application/x-troff',
     491          '.tr'     : 'application/x-troff',
     492          '.man'    : 'application/x-troff-man',
     493          '.me'     : 'application/x-troff-me',
     494          '.ms'     : 'application/x-troff-ms',
     495          '.ustar'  : 'application/x-ustar',
     496          '.src'    : 'application/x-wais-source',
     497          '.xsl'    : 'application/xml',
     498          '.rdf'    : 'application/xml',
     499          '.wsdl'   : 'application/xml',
     500          '.xpdl'   : 'application/xml',
     501          '.zip'    : 'application/zip',
     502          '.3gp'    : 'audio/3gpp',
     503          '.3gpp'   : 'audio/3gpp',
     504          '.3g2'    : 'audio/3gpp2',
     505          '.3gpp2'  : 'audio/3gpp2',
     506          '.aac'    : 'audio/aac',
     507          '.adts'   : 'audio/aac',
     508          '.loas'   : 'audio/aac',
     509          '.ass'    : 'audio/aac',
     510          '.au'     : 'audio/basic',
     511          '.snd'    : 'audio/basic',
     512          '.mp3'    : 'audio/mpeg',
     513          '.mp2'    : 'audio/mpeg',
     514          '.opus'   : 'audio/opus',
     515          '.aif'    : 'audio/x-aiff',
     516          '.aifc'   : 'audio/x-aiff',
     517          '.aiff'   : 'audio/x-aiff',
     518          '.ra'     : 'audio/x-pn-realaudio',
     519          '.wav'    : 'audio/x-wav',
     520          '.avif'   : 'image/avif',
     521          '.bmp'    : 'image/bmp',
     522          '.gif'    : 'image/gif',
     523          '.ief'    : 'image/ief',
     524          '.jpg'    : 'image/jpeg',
     525          '.jpe'    : 'image/jpeg',
     526          '.jpeg'   : 'image/jpeg',
     527          '.heic'   : 'image/heic',
     528          '.heif'   : 'image/heif',
     529          '.png'    : 'image/png',
     530          '.svg'    : 'image/svg+xml',
     531          '.tiff'   : 'image/tiff',
     532          '.tif'    : 'image/tiff',
     533          '.ico'    : 'image/vnd.microsoft.icon',
     534          '.ras'    : 'image/x-cmu-raster',
     535          '.pnm'    : 'image/x-portable-anymap',
     536          '.pbm'    : 'image/x-portable-bitmap',
     537          '.pgm'    : 'image/x-portable-graymap',
     538          '.ppm'    : 'image/x-portable-pixmap',
     539          '.rgb'    : 'image/x-rgb',
     540          '.xbm'    : 'image/x-xbitmap',
     541          '.xpm'    : 'image/x-xpixmap',
     542          '.xwd'    : 'image/x-xwindowdump',
     543          '.eml'    : 'message/rfc822',
     544          '.mht'    : 'message/rfc822',
     545          '.mhtml'  : 'message/rfc822',
     546          '.nws'    : 'message/rfc822',
     547          '.css'    : 'text/css',
     548          '.csv'    : 'text/csv',
     549          '.html'   : 'text/html',
     550          '.htm'    : 'text/html',
     551          '.n3'     : 'text/n3',
     552          '.txt'    : 'text/plain',
     553          '.bat'    : 'text/plain',
     554          '.c'      : 'text/plain',
     555          '.h'      : 'text/plain',
     556          '.ksh'    : 'text/plain',
     557          '.pl'     : 'text/plain',
     558          '.srt'    : 'text/plain',
     559          '.rtx'    : 'text/richtext',
     560          '.tsv'    : 'text/tab-separated-values',
     561          '.vtt'    : 'text/vtt',
     562          '.py'     : 'text/x-python',
     563          '.etx'    : 'text/x-setext',
     564          '.sgm'    : 'text/x-sgml',
     565          '.sgml'   : 'text/x-sgml',
     566          '.vcf'    : 'text/x-vcard',
     567          '.xml'    : 'text/xml',
     568          '.mp4'    : 'video/mp4',
     569          '.mpeg'   : 'video/mpeg',
     570          '.m1v'    : 'video/mpeg',
     571          '.mpa'    : 'video/mpeg',
     572          '.mpe'    : 'video/mpeg',
     573          '.mpg'    : 'video/mpeg',
     574          '.mov'    : 'video/quicktime',
     575          '.qt'     : 'video/quicktime',
     576          '.webm'   : 'video/webm',
     577          '.avi'    : 'video/x-msvideo',
     578          '.movie'  : 'video/x-sgi-movie',
     579          }
     580  
     581      # These are non-standard types, commonly found in the wild.  They will
     582      # only match if strict=0 flag is given to the API methods.
     583  
     584      # Please sort these too
     585      common_types = _common_types_default = {
     586          '.rtf' : 'application/rtf',
     587          '.midi': 'audio/midi',
     588          '.mid' : 'audio/midi',
     589          '.jpg' : 'image/jpg',
     590          '.pict': 'image/pict',
     591          '.pct' : 'image/pict',
     592          '.pic' : 'image/pict',
     593          '.webp': 'image/webp',
     594          '.xul' : 'text/xul',
     595          }
     596  
     597  
     598  _default_mime_types()
     599  
     600  
     601  def _main():
     602      import getopt
     603  
     604      USAGE = """\
     605  Usage: mimetypes.py [options] type
     606  
     607  Options:
     608      --help / -h       -- print this message and exit
     609      --lenient / -l    -- additionally search of some common, but non-standard
     610                           types.
     611      --extension / -e  -- guess extension instead of type
     612  
     613  More than one type argument may be given.
     614  """
     615  
     616      def usage(code, msg=''):
     617          print(USAGE)
     618          if msg: print(msg)
     619          sys.exit(code)
     620  
     621      try:
     622          opts, args = getopt.getopt(sys.argv[1:], 'hle',
     623                                     ['help', 'lenient', 'extension'])
     624      except getopt.error as msg:
     625          usage(1, msg)
     626  
     627      strict = 1
     628      extension = 0
     629      for opt, arg in opts:
     630          if opt in ('-h', '--help'):
     631              usage(0)
     632          elif opt in ('-l', '--lenient'):
     633              strict = 0
     634          elif opt in ('-e', '--extension'):
     635              extension = 1
     636      for gtype in args:
     637          if extension:
     638              guess = guess_extension(gtype, strict)
     639              if not guess: print("I don't know anything about type", gtype)
     640              else: print(guess)
     641          else:
     642              guess, encoding = guess_type(gtype, strict)
     643              if not guess: print("I don't know anything about type", gtype)
     644              else: print('type:', guess, 'encoding:', encoding)
     645  
     646  
     647  if __name__ == '__main__':
     648      _main()