(root)/
Python-3.12.0/
Lib/
mimetypes.py
       1  """Guess the MIME type of a file.
       2  
       3  This module defines two useful functions:
       4  
       5  guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
       6  
       7  guess_extension(type, strict=True) -- guess the extension for a given MIME type.
       8  
       9  It also contains the following, for tuning the behavior:
      10  
      11  Data:
      12  
      13  knownfiles -- list of files to parse
      14  inited -- flag set when init() has been called
      15  suffix_map -- dictionary mapping suffixes to suffixes
      16  encodings_map -- dictionary mapping suffixes to encodings
      17  types_map -- dictionary mapping suffixes to types
      18  
      19  Functions:
      20  
      21  init([files]) -- parse a list of files, default knownfiles (on Windows, the
      22    default values are taken from the registry)
      23  read_mime_types(file) -- parse one file, return a dictionary or None
      24  """
      25  
      26  import os
      27  import sys
      28  import posixpath
      29  import urllib.parse
      30  
      31  try:
      32      from _winapi import _mimetypes_read_windows_registry
      33  except ImportError:
      34      _mimetypes_read_windows_registry = None
      35  
      36  try:
      37      import winreg as _winreg
      38  except ImportError:
      39      _winreg = None
      40  
      41  __all__ = [
      42      "knownfiles", "inited", "MimeTypes",
      43      "guess_type", "guess_all_extensions", "guess_extension",
      44      "add_type", "init", "read_mime_types",
      45      "suffix_map", "encodings_map", "types_map", "common_types"
      46  ]
      47  
      48  knownfiles = [
      49      "/etc/mime.types",
      50      "/etc/httpd/mime.types",                    # Mac OS X
      51      "/etc/httpd/conf/mime.types",               # Apache
      52      "/etc/apache/mime.types",                   # Apache 1
      53      "/etc/apache2/mime.types",                  # Apache 2
      54      "/usr/local/etc/httpd/conf/mime.types",
      55      "/usr/local/lib/netscape/mime.types",
      56      "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2
      57      "/usr/local/etc/mime.types",                # Apache 1.3
      58      ]
      59  
      60  inited = False
      61  _db = None
      62  
      63  
      64  class ESC[4;38;5;81mMimeTypes:
      65      """MIME-types datastore.
      66  
      67      This datastore can handle information from mime.types-style files
      68      and supports basic determination of MIME type from a filename or
      69      URL, and can guess a reasonable extension given a MIME type.
      70      """
      71  
      72      def __init__(self, filenames=(), strict=True):
      73          if not inited:
      74              init()
      75          self.encodings_map = _encodings_map_default.copy()
      76          self.suffix_map = _suffix_map_default.copy()
      77          self.types_map = ({}, {}) # dict for (non-strict, strict)
      78          self.types_map_inv = ({}, {})
      79          for (ext, type) in _types_map_default.items():
      80              self.add_type(type, ext, True)
      81          for (ext, type) in _common_types_default.items():
      82              self.add_type(type, ext, False)
      83          for name in filenames:
      84              self.read(name, strict)
      85  
      86      def add_type(self, type, ext, strict=True):
      87          """Add a mapping between a type and an extension.
      88  
      89          When the extension is already known, the new
      90          type will replace the old one. When the type
      91          is already known the extension will be added
      92          to the list of known extensions.
      93  
      94          If strict is true, information will be added to
      95          list of standard types, else to the list of non-standard
      96          types.
      97          """
      98          self.types_map[strict][ext] = type
      99          exts = self.types_map_inv[strict].setdefault(type, [])
     100          if ext not in exts:
     101              exts.append(ext)
     102  
     103      def guess_type(self, url, strict=True):
     104          """Guess the type of a file which is either a URL or a path-like object.
     105  
     106          Return value is a tuple (type, encoding) where type is None if
     107          the type can't be guessed (no or unknown suffix) or a string
     108          of the form type/subtype, usable for a MIME Content-type
     109          header; and encoding is None for no encoding or the name of
     110          the program used to encode (e.g. compress or gzip).  The
     111          mappings are table driven.  Encoding suffixes are case
     112          sensitive; type suffixes are first tried case sensitive, then
     113          case insensitive.
     114  
     115          The suffixes .tgz, .taz and .tz (case sensitive!) are all
     116          mapped to '.tar.gz'.  (This is table-driven too, using the
     117          dictionary suffix_map.)
     118  
     119          Optional `strict' argument when False adds a bunch of commonly found,
     120          but non-standard types.
     121          """
     122          url = os.fspath(url)
     123          scheme, url = urllib.parse._splittype(url)
     124          if scheme == 'data':
     125              # syntax of data URLs:
     126              # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
     127              # mediatype := [ type "/" subtype ] *( ";" parameter )
     128              # data      := *urlchar
     129              # parameter := attribute "=" value
     130              # type/subtype defaults to "text/plain"
     131              comma = url.find(',')
     132              if comma < 0:
     133                  # bad data URL
     134                  return None, None
     135              semi = url.find(';', 0, comma)
     136              if semi >= 0:
     137                  type = url[:semi]
     138              else:
     139                  type = url[:comma]
     140              if '=' in type or '/' not in type:
     141                  type = 'text/plain'
     142              return type, None           # never compressed, so encoding is None
     143          base, ext = posixpath.splitext(url)
     144          while (ext_lower := ext.lower()) in self.suffix_map:
     145              base, ext = posixpath.splitext(base + self.suffix_map[ext_lower])
     146          # encodings_map is case sensitive
     147          if ext in self.encodings_map:
     148              encoding = self.encodings_map[ext]
     149              base, ext = posixpath.splitext(base)
     150          else:
     151              encoding = None
     152          ext = ext.lower()
     153          types_map = self.types_map[True]
     154          if ext in types_map:
     155              return types_map[ext], encoding
     156          elif strict:
     157              return None, encoding
     158          types_map = self.types_map[False]
     159          if ext in types_map:
     160              return types_map[ext], encoding
     161          else:
     162              return None, encoding
     163  
     164      def guess_all_extensions(self, type, strict=True):
     165          """Guess the extensions for a file based on its MIME type.
     166  
     167          Return value is a list of strings giving the possible filename
     168          extensions, including the leading dot ('.').  The extension is not
     169          guaranteed to have been associated with any particular data stream,
     170          but would be mapped to the MIME type `type' by guess_type().
     171  
     172          Optional `strict' argument when false adds a bunch of commonly found,
     173          but non-standard types.
     174          """
     175          type = type.lower()
     176          extensions = list(self.types_map_inv[True].get(type, []))
     177          if not strict:
     178              for ext in self.types_map_inv[False].get(type, []):
     179                  if ext not in extensions:
     180                      extensions.append(ext)
     181          return extensions
     182  
     183      def guess_extension(self, type, strict=True):
     184          """Guess the extension for a file based on its MIME type.
     185  
     186          Return value is a string giving a filename extension,
     187          including the leading dot ('.').  The extension is not
     188          guaranteed to have been associated with any particular data
     189          stream, but would be mapped to the MIME type `type' by
     190          guess_type().  If no extension can be guessed for `type', None
     191          is returned.
     192  
     193          Optional `strict' argument when false adds a bunch of commonly found,
     194          but non-standard types.
     195          """
     196          extensions = self.guess_all_extensions(type, strict)
     197          if not extensions:
     198              return None
     199          return extensions[0]
     200  
     201      def read(self, filename, strict=True):
     202          """
     203          Read a single mime.types-format file, specified by pathname.
     204  
     205          If strict is true, information will be added to
     206          list of standard types, else to the list of non-standard
     207          types.
     208          """
     209          with open(filename, encoding='utf-8') as fp:
     210              self.readfp(fp, strict)
     211  
     212      def readfp(self, fp, strict=True):
     213          """
     214          Read a single mime.types-format file.
     215  
     216          If strict is true, information will be added to
     217          list of standard types, else to the list of non-standard
     218          types.
     219          """
     220          while line := fp.readline():
     221              words = line.split()
     222              for i in range(len(words)):
     223                  if words[i][0] == '#':
     224                      del words[i:]
     225                      break
     226              if not words:
     227                  continue
     228              type, suffixes = words[0], words[1:]
     229              for suff in suffixes:
     230                  self.add_type(type, '.' + suff, strict)
     231  
     232      def read_windows_registry(self, strict=True):
     233          """
     234          Load the MIME types database from Windows registry.
     235  
     236          If strict is true, information will be added to
     237          list of standard types, else to the list of non-standard
     238          types.
     239          """
     240  
     241          if not _mimetypes_read_windows_registry and not _winreg:
     242              return
     243  
     244          add_type = self.add_type
     245          if strict:
     246              add_type = lambda type, ext: self.add_type(type, ext, True)
     247  
     248          # Accelerated function if it is available
     249          if _mimetypes_read_windows_registry:
     250              _mimetypes_read_windows_registry(add_type)
     251          elif _winreg:
     252              self._read_windows_registry(add_type)
     253  
     254      @classmethod
     255      def _read_windows_registry(cls, add_type):
     256          def enum_types(mimedb):
     257              i = 0
     258              while True:
     259                  try:
     260                      ctype = _winreg.EnumKey(mimedb, i)
     261                  except OSError:
     262                      break
     263                  else:
     264                      if '\0' not in ctype:
     265                          yield ctype
     266                  i += 1
     267  
     268          with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
     269              for subkeyname in enum_types(hkcr):
     270                  try:
     271                      with _winreg.OpenKey(hkcr, subkeyname) as subkey:
     272                          # Only check file extensions
     273                          if not subkeyname.startswith("."):
     274                              continue
     275                          # raises OSError if no 'Content Type' value
     276                          mimetype, datatype = _winreg.QueryValueEx(
     277                              subkey, 'Content Type')
     278                          if datatype != _winreg.REG_SZ:
     279                              continue
     280                          add_type(mimetype, subkeyname)
     281                  except OSError:
     282                      continue
     283  
     284  def guess_type(url, strict=True):
     285      """Guess the type of a file based on its URL.
     286  
     287      Return value is a tuple (type, encoding) where type is None if the
     288      type can't be guessed (no or unknown suffix) or a string of the
     289      form type/subtype, usable for a MIME Content-type header; and
     290      encoding is None for no encoding or the name of the program used
     291      to encode (e.g. compress or gzip).  The mappings are table
     292      driven.  Encoding suffixes are case sensitive; type suffixes are
     293      first tried case sensitive, then case insensitive.
     294  
     295      The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
     296      to ".tar.gz".  (This is table-driven too, using the dictionary
     297      suffix_map).
     298  
     299      Optional `strict' argument when false adds a bunch of commonly found, but
     300      non-standard types.
     301      """
     302      if _db is None:
     303          init()
     304      return _db.guess_type(url, strict)
     305  
     306  
     307  def guess_all_extensions(type, strict=True):
     308      """Guess the extensions for a file based on its MIME type.
     309  
     310      Return value is a list of strings giving the possible filename
     311      extensions, including the leading dot ('.').  The extension is not
     312      guaranteed to have been associated with any particular data
     313      stream, but would be mapped to the MIME type `type' by
     314      guess_type().  If no extension can be guessed for `type', None
     315      is returned.
     316  
     317      Optional `strict' argument when false adds a bunch of commonly found,
     318      but non-standard types.
     319      """
     320      if _db is None:
     321          init()
     322      return _db.guess_all_extensions(type, strict)
     323  
     324  def guess_extension(type, strict=True):
     325      """Guess the extension for a file based on its MIME type.
     326  
     327      Return value is a string giving a filename extension, including the
     328      leading dot ('.').  The extension is not guaranteed to have been
     329      associated with any particular data stream, but would be mapped to the
     330      MIME type `type' by guess_type().  If no extension can be guessed for
     331      `type', None is returned.
     332  
     333      Optional `strict' argument when false adds a bunch of commonly found,
     334      but non-standard types.
     335      """
     336      if _db is None:
     337          init()
     338      return _db.guess_extension(type, strict)
     339  
     340  def add_type(type, ext, strict=True):
     341      """Add a mapping between a type and an extension.
     342  
     343      When the extension is already known, the new
     344      type will replace the old one. When the type
     345      is already known the extension will be added
     346      to the list of known extensions.
     347  
     348      If strict is true, information will be added to
     349      list of standard types, else to the list of non-standard
     350      types.
     351      """
     352      if _db is None:
     353          init()
     354      return _db.add_type(type, ext, strict)
     355  
     356  
     357  def init(files=None):
     358      global suffix_map, types_map, encodings_map, common_types
     359      global inited, _db
     360      inited = True    # so that MimeTypes.__init__() doesn't call us again
     361  
     362      if files is None or _db is None:
     363          db = MimeTypes()
     364          # Quick return if not supported
     365          db.read_windows_registry()
     366  
     367          if files is None:
     368              files = knownfiles
     369          else:
     370              files = knownfiles + list(files)
     371      else:
     372          db = _db
     373  
     374      for file in files:
     375          if os.path.isfile(file):
     376              db.read(file)
     377      encodings_map = db.encodings_map
     378      suffix_map = db.suffix_map
     379      types_map = db.types_map[True]
     380      common_types = db.types_map[False]
     381      # Make the DB a global variable now that it is fully initialized
     382      _db = db
     383  
     384  
     385  def read_mime_types(file):
     386      try:
     387          f = open(file, encoding='utf-8')
     388      except OSError:
     389          return None
     390      with f:
     391          db = MimeTypes()
     392          db.readfp(f, True)
     393          return db.types_map[True]
     394  
     395  
     396  def _default_mime_types():
     397      global suffix_map, _suffix_map_default
     398      global encodings_map, _encodings_map_default
     399      global types_map, _types_map_default
     400      global common_types, _common_types_default
     401  
     402      suffix_map = _suffix_map_default = {
     403          '.svgz': '.svg.gz',
     404          '.tgz': '.tar.gz',
     405          '.taz': '.tar.gz',
     406          '.tz': '.tar.gz',
     407          '.tbz2': '.tar.bz2',
     408          '.txz': '.tar.xz',
     409          }
     410  
     411      encodings_map = _encodings_map_default = {
     412          '.gz': 'gzip',
     413          '.Z': 'compress',
     414          '.bz2': 'bzip2',
     415          '.xz': 'xz',
     416          '.br': 'br',
     417          }
     418  
     419      # Before adding new types, make sure they are either registered with IANA,
     420      # at http://www.iana.org/assignments/media-types
     421      # or extensions, i.e. using the x- prefix
     422  
     423      # If you add to these, please keep them sorted by mime type.
     424      # Make sure the entry with the preferred file extension for a particular mime type
     425      # appears before any others of the same mimetype.
     426      types_map = _types_map_default = {
     427          '.js'     : 'text/javascript',
     428          '.mjs'    : 'text/javascript',
     429          '.json'   : 'application/json',
     430          '.webmanifest': 'application/manifest+json',
     431          '.doc'    : 'application/msword',
     432          '.dot'    : 'application/msword',
     433          '.wiz'    : 'application/msword',
     434          '.nq'     : 'application/n-quads',
     435          '.nt'     : 'application/n-triples',
     436          '.bin'    : 'application/octet-stream',
     437          '.a'      : 'application/octet-stream',
     438          '.dll'    : 'application/octet-stream',
     439          '.exe'    : 'application/octet-stream',
     440          '.o'      : 'application/octet-stream',
     441          '.obj'    : 'application/octet-stream',
     442          '.so'     : 'application/octet-stream',
     443          '.oda'    : 'application/oda',
     444          '.pdf'    : 'application/pdf',
     445          '.p7c'    : 'application/pkcs7-mime',
     446          '.ps'     : 'application/postscript',
     447          '.ai'     : 'application/postscript',
     448          '.eps'    : 'application/postscript',
     449          '.trig'   : 'application/trig',
     450          '.m3u'    : 'application/vnd.apple.mpegurl',
     451          '.m3u8'   : 'application/vnd.apple.mpegurl',
     452          '.xls'    : 'application/vnd.ms-excel',
     453          '.xlb'    : 'application/vnd.ms-excel',
     454          '.ppt'    : 'application/vnd.ms-powerpoint',
     455          '.pot'    : 'application/vnd.ms-powerpoint',
     456          '.ppa'    : 'application/vnd.ms-powerpoint',
     457          '.pps'    : 'application/vnd.ms-powerpoint',
     458          '.pwz'    : 'application/vnd.ms-powerpoint',
     459          '.wasm'   : 'application/wasm',
     460          '.bcpio'  : 'application/x-bcpio',
     461          '.cpio'   : 'application/x-cpio',
     462          '.csh'    : 'application/x-csh',
     463          '.dvi'    : 'application/x-dvi',
     464          '.gtar'   : 'application/x-gtar',
     465          '.hdf'    : 'application/x-hdf',
     466          '.h5'     : 'application/x-hdf5',
     467          '.latex'  : 'application/x-latex',
     468          '.mif'    : 'application/x-mif',
     469          '.cdf'    : 'application/x-netcdf',
     470          '.nc'     : 'application/x-netcdf',
     471          '.p12'    : 'application/x-pkcs12',
     472          '.pfx'    : 'application/x-pkcs12',
     473          '.ram'    : 'application/x-pn-realaudio',
     474          '.pyc'    : 'application/x-python-code',
     475          '.pyo'    : 'application/x-python-code',
     476          '.sh'     : 'application/x-sh',
     477          '.shar'   : 'application/x-shar',
     478          '.swf'    : 'application/x-shockwave-flash',
     479          '.sv4cpio': 'application/x-sv4cpio',
     480          '.sv4crc' : 'application/x-sv4crc',
     481          '.tar'    : 'application/x-tar',
     482          '.tcl'    : 'application/x-tcl',
     483          '.tex'    : 'application/x-tex',
     484          '.texi'   : 'application/x-texinfo',
     485          '.texinfo': 'application/x-texinfo',
     486          '.roff'   : 'application/x-troff',
     487          '.t'      : 'application/x-troff',
     488          '.tr'     : 'application/x-troff',
     489          '.man'    : 'application/x-troff-man',
     490          '.me'     : 'application/x-troff-me',
     491          '.ms'     : 'application/x-troff-ms',
     492          '.ustar'  : 'application/x-ustar',
     493          '.src'    : 'application/x-wais-source',
     494          '.xsl'    : 'application/xml',
     495          '.rdf'    : 'application/xml',
     496          '.wsdl'   : 'application/xml',
     497          '.xpdl'   : 'application/xml',
     498          '.zip'    : 'application/zip',
     499          '.3gp'    : 'audio/3gpp',
     500          '.3gpp'   : 'audio/3gpp',
     501          '.3g2'    : 'audio/3gpp2',
     502          '.3gpp2'  : 'audio/3gpp2',
     503          '.aac'    : 'audio/aac',
     504          '.adts'   : 'audio/aac',
     505          '.loas'   : 'audio/aac',
     506          '.ass'    : 'audio/aac',
     507          '.au'     : 'audio/basic',
     508          '.snd'    : 'audio/basic',
     509          '.mp3'    : 'audio/mpeg',
     510          '.mp2'    : 'audio/mpeg',
     511          '.opus'   : 'audio/opus',
     512          '.aif'    : 'audio/x-aiff',
     513          '.aifc'   : 'audio/x-aiff',
     514          '.aiff'   : 'audio/x-aiff',
     515          '.ra'     : 'audio/x-pn-realaudio',
     516          '.wav'    : 'audio/x-wav',
     517          '.avif'   : 'image/avif',
     518          '.bmp'    : 'image/bmp',
     519          '.gif'    : 'image/gif',
     520          '.ief'    : 'image/ief',
     521          '.jpg'    : 'image/jpeg',
     522          '.jpe'    : 'image/jpeg',
     523          '.jpeg'   : 'image/jpeg',
     524          '.heic'   : 'image/heic',
     525          '.heif'   : 'image/heif',
     526          '.png'    : 'image/png',
     527          '.svg'    : 'image/svg+xml',
     528          '.tiff'   : 'image/tiff',
     529          '.tif'    : 'image/tiff',
     530          '.ico'    : 'image/vnd.microsoft.icon',
     531          '.ras'    : 'image/x-cmu-raster',
     532          '.pnm'    : 'image/x-portable-anymap',
     533          '.pbm'    : 'image/x-portable-bitmap',
     534          '.pgm'    : 'image/x-portable-graymap',
     535          '.ppm'    : 'image/x-portable-pixmap',
     536          '.rgb'    : 'image/x-rgb',
     537          '.xbm'    : 'image/x-xbitmap',
     538          '.xpm'    : 'image/x-xpixmap',
     539          '.xwd'    : 'image/x-xwindowdump',
     540          '.eml'    : 'message/rfc822',
     541          '.mht'    : 'message/rfc822',
     542          '.mhtml'  : 'message/rfc822',
     543          '.nws'    : 'message/rfc822',
     544          '.css'    : 'text/css',
     545          '.csv'    : 'text/csv',
     546          '.html'   : 'text/html',
     547          '.htm'    : 'text/html',
     548          '.n3'     : 'text/n3',
     549          '.txt'    : 'text/plain',
     550          '.bat'    : 'text/plain',
     551          '.c'      : 'text/plain',
     552          '.h'      : 'text/plain',
     553          '.ksh'    : 'text/plain',
     554          '.pl'     : 'text/plain',
     555          '.srt'    : 'text/plain',
     556          '.rtx'    : 'text/richtext',
     557          '.tsv'    : 'text/tab-separated-values',
     558          '.vtt'    : 'text/vtt',
     559          '.py'     : 'text/x-python',
     560          '.etx'    : 'text/x-setext',
     561          '.sgm'    : 'text/x-sgml',
     562          '.sgml'   : 'text/x-sgml',
     563          '.vcf'    : 'text/x-vcard',
     564          '.xml'    : 'text/xml',
     565          '.mp4'    : 'video/mp4',
     566          '.mpeg'   : 'video/mpeg',
     567          '.m1v'    : 'video/mpeg',
     568          '.mpa'    : 'video/mpeg',
     569          '.mpe'    : 'video/mpeg',
     570          '.mpg'    : 'video/mpeg',
     571          '.mov'    : 'video/quicktime',
     572          '.qt'     : 'video/quicktime',
     573          '.webm'   : 'video/webm',
     574          '.avi'    : 'video/x-msvideo',
     575          '.movie'  : 'video/x-sgi-movie',
     576          }
     577  
     578      # These are non-standard types, commonly found in the wild.  They will
     579      # only match if strict=0 flag is given to the API methods.
     580  
     581      # Please sort these too
     582      common_types = _common_types_default = {
     583          '.rtf' : 'application/rtf',
     584          '.midi': 'audio/midi',
     585          '.mid' : 'audio/midi',
     586          '.jpg' : 'image/jpg',
     587          '.pict': 'image/pict',
     588          '.pct' : 'image/pict',
     589          '.pic' : 'image/pict',
     590          '.webp': 'image/webp',
     591          '.xul' : 'text/xul',
     592          }
     593  
     594  
     595  _default_mime_types()
     596  
     597  
     598  def _main():
     599      import getopt
     600  
     601      USAGE = """\
     602  Usage: mimetypes.py [options] type
     603  
     604  Options:
     605      --help / -h       -- print this message and exit
     606      --lenient / -l    -- additionally search of some common, but non-standard
     607                           types.
     608      --extension / -e  -- guess extension instead of type
     609  
     610  More than one type argument may be given.
     611  """
     612  
     613      def usage(code, msg=''):
     614          print(USAGE)
     615          if msg: print(msg)
     616          sys.exit(code)
     617  
     618      try:
     619          opts, args = getopt.getopt(sys.argv[1:], 'hle',
     620                                     ['help', 'lenient', 'extension'])
     621      except getopt.error as msg:
     622          usage(1, msg)
     623  
     624      strict = 1
     625      extension = 0
     626      for opt, arg in opts:
     627          if opt in ('-h', '--help'):
     628              usage(0)
     629          elif opt in ('-l', '--lenient'):
     630              strict = 0
     631          elif opt in ('-e', '--extension'):
     632              extension = 1
     633      for gtype in args:
     634          if extension:
     635              guess = guess_extension(gtype, strict)
     636              if not guess: print("I don't know anything about type", gtype)
     637              else: print(guess)
     638          else:
     639              guess, encoding = guess_type(gtype, strict)
     640              if not guess: print("I don't know anything about type", gtype)
     641              else: print('type:', guess, 'encoding:', encoding)
     642  
     643  
     644  if __name__ == '__main__':
     645      _main()