(root)/
Python-3.12.0/
Lib/
dbm/
dumb.py
       1  """A dumb and slow but simple dbm clone.
       2  
       3  For database spam, spam.dir contains the index (a text file),
       4  spam.bak *may* contain a backup of the index (also a text file),
       5  while spam.dat contains the data (a binary file).
       6  
       7  XXX TO DO:
       8  
       9  - seems to contain a bug when updating...
      10  
      11  - reclaim free space (currently, space once occupied by deleted or expanded
      12  items is never reused)
      13  
      14  - support concurrent access (currently, if two processes take turns making
      15  updates, they can mess up the index)
      16  
      17  - support efficient access to large databases (currently, the whole index
      18  is read when the database is opened, and some updates rewrite the whole index)
      19  
      20  - support opening for read-only (flag = 'm')
      21  
      22  """
      23  
      24  import ast as _ast
      25  import io as _io
      26  import os as _os
      27  import collections.abc
      28  
      29  __all__ = ["error", "open"]
      30  
      31  _BLOCKSIZE = 512
      32  
      33  error = OSError
      34  
      35  class ESC[4;38;5;81m_Database(ESC[4;38;5;149mcollectionsESC[4;38;5;149m.ESC[4;38;5;149mabcESC[4;38;5;149m.ESC[4;38;5;149mMutableMapping):
      36  
      37      # The on-disk directory and data files can remain in mutually
      38      # inconsistent states for an arbitrarily long time (see comments
      39      # at the end of __setitem__).  This is only repaired when _commit()
      40      # gets called.  One place _commit() gets called is from __del__(),
      41      # and if that occurs at program shutdown time, module globals may
      42      # already have gotten rebound to None.  Since it's crucial that
      43      # _commit() finish successfully, we can't ignore shutdown races
      44      # here, and _commit() must not reference any globals.
      45      _os = _os       # for _commit()
      46      _io = _io       # for _commit()
      47  
      48      def __init__(self, filebasename, mode, flag='c'):
      49          filebasename = self._os.fsencode(filebasename)
      50          self._mode = mode
      51          self._readonly = (flag == 'r')
      52  
      53          # The directory file is a text file.  Each line looks like
      54          #    "%r, (%d, %d)\n" % (key, pos, siz)
      55          # where key is the string key, pos is the offset into the dat
      56          # file of the associated value's first byte, and siz is the number
      57          # of bytes in the associated value.
      58          self._dirfile = filebasename + b'.dir'
      59  
      60          # The data file is a binary file pointed into by the directory
      61          # file, and holds the values associated with keys.  Each value
      62          # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
      63          # binary 8-bit string value.
      64          self._datfile = filebasename + b'.dat'
      65          self._bakfile = filebasename + b'.bak'
      66  
      67          # The index is an in-memory dict, mirroring the directory file.
      68          self._index = None  # maps keys to (pos, siz) pairs
      69  
      70          # Handle the creation
      71          self._create(flag)
      72          self._update(flag)
      73  
      74      def _create(self, flag):
      75          if flag == 'n':
      76              for filename in (self._datfile, self._bakfile, self._dirfile):
      77                  try:
      78                      _os.remove(filename)
      79                  except OSError:
      80                      pass
      81          # Mod by Jack: create data file if needed
      82          try:
      83              f = _io.open(self._datfile, 'r', encoding="Latin-1")
      84          except OSError:
      85              if flag not in ('c', 'n'):
      86                  raise
      87              with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
      88                  self._chmod(self._datfile)
      89          else:
      90              f.close()
      91  
      92      # Read directory file into the in-memory index dict.
      93      def _update(self, flag):
      94          self._modified = False
      95          self._index = {}
      96          try:
      97              f = _io.open(self._dirfile, 'r', encoding="Latin-1")
      98          except OSError:
      99              if flag not in ('c', 'n'):
     100                  raise
     101              self._modified = True
     102          else:
     103              with f:
     104                  for line in f:
     105                      line = line.rstrip()
     106                      key, pos_and_siz_pair = _ast.literal_eval(line)
     107                      key = key.encode('Latin-1')
     108                      self._index[key] = pos_and_siz_pair
     109  
     110      # Write the index dict to the directory file.  The original directory
     111      # file (if any) is renamed with a .bak extension first.  If a .bak
     112      # file currently exists, it's deleted.
     113      def _commit(self):
     114          # CAUTION:  It's vital that _commit() succeed, and _commit() can
     115          # be called from __del__().  Therefore we must never reference a
     116          # global in this routine.
     117          if self._index is None or not self._modified:
     118              return  # nothing to do
     119  
     120          try:
     121              self._os.unlink(self._bakfile)
     122          except OSError:
     123              pass
     124  
     125          try:
     126              self._os.rename(self._dirfile, self._bakfile)
     127          except OSError:
     128              pass
     129  
     130          with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
     131              self._chmod(self._dirfile)
     132              for key, pos_and_siz_pair in self._index.items():
     133                  # Use Latin-1 since it has no qualms with any value in any
     134                  # position; UTF-8, though, does care sometimes.
     135                  entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
     136                  f.write(entry)
     137  
     138      sync = _commit
     139  
     140      def _verify_open(self):
     141          if self._index is None:
     142              raise error('DBM object has already been closed')
     143  
     144      def __getitem__(self, key):
     145          if isinstance(key, str):
     146              key = key.encode('utf-8')
     147          self._verify_open()
     148          pos, siz = self._index[key]     # may raise KeyError
     149          with _io.open(self._datfile, 'rb') as f:
     150              f.seek(pos)
     151              dat = f.read(siz)
     152          return dat
     153  
     154      # Append val to the data file, starting at a _BLOCKSIZE-aligned
     155      # offset.  The data file is first padded with NUL bytes (if needed)
     156      # to get to an aligned offset.  Return pair
     157      #     (starting offset of val, len(val))
     158      def _addval(self, val):
     159          with _io.open(self._datfile, 'rb+') as f:
     160              f.seek(0, 2)
     161              pos = int(f.tell())
     162              npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
     163              f.write(b'\0'*(npos-pos))
     164              pos = npos
     165              f.write(val)
     166          return (pos, len(val))
     167  
     168      # Write val to the data file, starting at offset pos.  The caller
     169      # is responsible for ensuring that there's enough room starting at
     170      # pos to hold val, without overwriting some other value.  Return
     171      # pair (pos, len(val)).
     172      def _setval(self, pos, val):
     173          with _io.open(self._datfile, 'rb+') as f:
     174              f.seek(pos)
     175              f.write(val)
     176          return (pos, len(val))
     177  
     178      # key is a new key whose associated value starts in the data file
     179      # at offset pos and with length siz.  Add an index record to
     180      # the in-memory index dict, and append one to the directory file.
     181      def _addkey(self, key, pos_and_siz_pair):
     182          self._index[key] = pos_and_siz_pair
     183          with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
     184              self._chmod(self._dirfile)
     185              f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
     186  
     187      def __setitem__(self, key, val):
     188          if self._readonly:
     189              raise error('The database is opened for reading only')
     190          if isinstance(key, str):
     191              key = key.encode('utf-8')
     192          elif not isinstance(key, (bytes, bytearray)):
     193              raise TypeError("keys must be bytes or strings")
     194          if isinstance(val, str):
     195              val = val.encode('utf-8')
     196          elif not isinstance(val, (bytes, bytearray)):
     197              raise TypeError("values must be bytes or strings")
     198          self._verify_open()
     199          self._modified = True
     200          if key not in self._index:
     201              self._addkey(key, self._addval(val))
     202          else:
     203              # See whether the new value is small enough to fit in the
     204              # (padded) space currently occupied by the old value.
     205              pos, siz = self._index[key]
     206              oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
     207              newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
     208              if newblocks <= oldblocks:
     209                  self._index[key] = self._setval(pos, val)
     210              else:
     211                  # The new value doesn't fit in the (padded) space used
     212                  # by the old value.  The blocks used by the old value are
     213                  # forever lost.
     214                  self._index[key] = self._addval(val)
     215  
     216              # Note that _index may be out of synch with the directory
     217              # file now:  _setval() and _addval() don't update the directory
     218              # file.  This also means that the on-disk directory and data
     219              # files are in a mutually inconsistent state, and they'll
     220              # remain that way until _commit() is called.  Note that this
     221              # is a disaster (for the database) if the program crashes
     222              # (so that _commit() never gets called).
     223  
     224      def __delitem__(self, key):
     225          if self._readonly:
     226              raise error('The database is opened for reading only')
     227          if isinstance(key, str):
     228              key = key.encode('utf-8')
     229          self._verify_open()
     230          self._modified = True
     231          # The blocks used by the associated value are lost.
     232          del self._index[key]
     233          # XXX It's unclear why we do a _commit() here (the code always
     234          # XXX has, so I'm not changing it).  __setitem__ doesn't try to
     235          # XXX keep the directory file in synch.  Why should we?  Or
     236          # XXX why shouldn't __setitem__?
     237          self._commit()
     238  
     239      def keys(self):
     240          try:
     241              return list(self._index)
     242          except TypeError:
     243              raise error('DBM object has already been closed') from None
     244  
     245      def items(self):
     246          self._verify_open()
     247          return [(key, self[key]) for key in self._index.keys()]
     248  
     249      def __contains__(self, key):
     250          if isinstance(key, str):
     251              key = key.encode('utf-8')
     252          try:
     253              return key in self._index
     254          except TypeError:
     255              if self._index is None:
     256                  raise error('DBM object has already been closed') from None
     257              else:
     258                  raise
     259  
     260      def iterkeys(self):
     261          try:
     262              return iter(self._index)
     263          except TypeError:
     264              raise error('DBM object has already been closed') from None
     265      __iter__ = iterkeys
     266  
     267      def __len__(self):
     268          try:
     269              return len(self._index)
     270          except TypeError:
     271              raise error('DBM object has already been closed') from None
     272  
     273      def close(self):
     274          try:
     275              self._commit()
     276          finally:
     277              self._index = self._datfile = self._dirfile = self._bakfile = None
     278  
     279      __del__ = close
     280  
     281      def _chmod(self, file):
     282          self._os.chmod(file, self._mode)
     283  
     284      def __enter__(self):
     285          return self
     286  
     287      def __exit__(self, *args):
     288          self.close()
     289  
     290  
     291  def open(file, flag='c', mode=0o666):
     292      """Open the database file, filename, and return corresponding object.
     293  
     294      The flag argument, used to control how the database is opened in the
     295      other DBM implementations, supports only the semantics of 'c' and 'n'
     296      values.  Other values will default to the semantics of 'c' value:
     297      the database will always opened for update and will be created if it
     298      does not exist.
     299  
     300      The optional mode argument is the UNIX mode of the file, used only when
     301      the database has to be created.  It defaults to octal code 0o666 (and
     302      will be modified by the prevailing umask).
     303  
     304      """
     305  
     306      # Modify mode depending on the umask
     307      try:
     308          um = _os.umask(0)
     309          _os.umask(um)
     310      except AttributeError:
     311          pass
     312      else:
     313          # Turn off any bits that are set in the umask
     314          mode = mode & (~um)
     315      if flag not in ('r', 'w', 'c', 'n'):
     316          raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'")
     317      return _Database(file, mode, flag=flag)