1 """A dumb and slow but simple dbm clone.
2
3 For database spam, spam.dir contains the index (a text file),
4 spam.bak *may* contain a backup of the index (also a text file),
5 while spam.dat contains the data (a binary file).
6
7 XXX TO DO:
8
9 - seems to contain a bug when updating...
10
11 - reclaim free space (currently, space once occupied by deleted or expanded
12 items is never reused)
13
14 - support concurrent access (currently, if two processes take turns making
15 updates, they can mess up the index)
16
17 - support efficient access to large databases (currently, the whole index
18 is read when the database is opened, and some updates rewrite the whole index)
19
20 - support opening for read-only (flag = 'm')
21
22 """
23
24 import ast as _ast
25 import io as _io
26 import os as _os
27 import collections.abc
28
29 __all__ = ["error", "open"]
30
31 _BLOCKSIZE = 512
32
33 error = OSError
34
35 class ESC[4;38;5;81m_Database(ESC[4;38;5;149mcollectionsESC[4;38;5;149m.ESC[4;38;5;149mabcESC[4;38;5;149m.ESC[4;38;5;149mMutableMapping):
36
37 # The on-disk directory and data files can remain in mutually
38 # inconsistent states for an arbitrarily long time (see comments
39 # at the end of __setitem__). This is only repaired when _commit()
40 # gets called. One place _commit() gets called is from __del__(),
41 # and if that occurs at program shutdown time, module globals may
42 # already have gotten rebound to None. Since it's crucial that
43 # _commit() finish successfully, we can't ignore shutdown races
44 # here, and _commit() must not reference any globals.
45 _os = _os # for _commit()
46 _io = _io # for _commit()
47
48 def __init__(self, filebasename, mode, flag='c'):
49 filebasename = self._os.fsencode(filebasename)
50 self._mode = mode
51 self._readonly = (flag == 'r')
52
53 # The directory file is a text file. Each line looks like
54 # "%r, (%d, %d)\n" % (key, pos, siz)
55 # where key is the string key, pos is the offset into the dat
56 # file of the associated value's first byte, and siz is the number
57 # of bytes in the associated value.
58 self._dirfile = filebasename + b'.dir'
59
60 # The data file is a binary file pointed into by the directory
61 # file, and holds the values associated with keys. Each value
62 # begins at a _BLOCKSIZE-aligned byte offset, and is a raw
63 # binary 8-bit string value.
64 self._datfile = filebasename + b'.dat'
65 self._bakfile = filebasename + b'.bak'
66
67 # The index is an in-memory dict, mirroring the directory file.
68 self._index = None # maps keys to (pos, siz) pairs
69
70 # Handle the creation
71 self._create(flag)
72 self._update(flag)
73
74 def _create(self, flag):
75 if flag == 'n':
76 for filename in (self._datfile, self._bakfile, self._dirfile):
77 try:
78 _os.remove(filename)
79 except OSError:
80 pass
81 # Mod by Jack: create data file if needed
82 try:
83 f = _io.open(self._datfile, 'r', encoding="Latin-1")
84 except OSError:
85 if flag not in ('c', 'n'):
86 raise
87 with _io.open(self._datfile, 'w', encoding="Latin-1") as f:
88 self._chmod(self._datfile)
89 else:
90 f.close()
91
92 # Read directory file into the in-memory index dict.
93 def _update(self, flag):
94 self._modified = False
95 self._index = {}
96 try:
97 f = _io.open(self._dirfile, 'r', encoding="Latin-1")
98 except OSError:
99 if flag not in ('c', 'n'):
100 raise
101 self._modified = True
102 else:
103 with f:
104 for line in f:
105 line = line.rstrip()
106 key, pos_and_siz_pair = _ast.literal_eval(line)
107 key = key.encode('Latin-1')
108 self._index[key] = pos_and_siz_pair
109
110 # Write the index dict to the directory file. The original directory
111 # file (if any) is renamed with a .bak extension first. If a .bak
112 # file currently exists, it's deleted.
113 def _commit(self):
114 # CAUTION: It's vital that _commit() succeed, and _commit() can
115 # be called from __del__(). Therefore we must never reference a
116 # global in this routine.
117 if self._index is None or not self._modified:
118 return # nothing to do
119
120 try:
121 self._os.unlink(self._bakfile)
122 except OSError:
123 pass
124
125 try:
126 self._os.rename(self._dirfile, self._bakfile)
127 except OSError:
128 pass
129
130 with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f:
131 self._chmod(self._dirfile)
132 for key, pos_and_siz_pair in self._index.items():
133 # Use Latin-1 since it has no qualms with any value in any
134 # position; UTF-8, though, does care sometimes.
135 entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair)
136 f.write(entry)
137
138 sync = _commit
139
140 def _verify_open(self):
141 if self._index is None:
142 raise error('DBM object has already been closed')
143
144 def __getitem__(self, key):
145 if isinstance(key, str):
146 key = key.encode('utf-8')
147 self._verify_open()
148 pos, siz = self._index[key] # may raise KeyError
149 with _io.open(self._datfile, 'rb') as f:
150 f.seek(pos)
151 dat = f.read(siz)
152 return dat
153
154 # Append val to the data file, starting at a _BLOCKSIZE-aligned
155 # offset. The data file is first padded with NUL bytes (if needed)
156 # to get to an aligned offset. Return pair
157 # (starting offset of val, len(val))
158 def _addval(self, val):
159 with _io.open(self._datfile, 'rb+') as f:
160 f.seek(0, 2)
161 pos = int(f.tell())
162 npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE
163 f.write(b'\0'*(npos-pos))
164 pos = npos
165 f.write(val)
166 return (pos, len(val))
167
168 # Write val to the data file, starting at offset pos. The caller
169 # is responsible for ensuring that there's enough room starting at
170 # pos to hold val, without overwriting some other value. Return
171 # pair (pos, len(val)).
172 def _setval(self, pos, val):
173 with _io.open(self._datfile, 'rb+') as f:
174 f.seek(pos)
175 f.write(val)
176 return (pos, len(val))
177
178 # key is a new key whose associated value starts in the data file
179 # at offset pos and with length siz. Add an index record to
180 # the in-memory index dict, and append one to the directory file.
181 def _addkey(self, key, pos_and_siz_pair):
182 self._index[key] = pos_and_siz_pair
183 with _io.open(self._dirfile, 'a', encoding="Latin-1") as f:
184 self._chmod(self._dirfile)
185 f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair))
186
187 def __setitem__(self, key, val):
188 if self._readonly:
189 raise error('The database is opened for reading only')
190 if isinstance(key, str):
191 key = key.encode('utf-8')
192 elif not isinstance(key, (bytes, bytearray)):
193 raise TypeError("keys must be bytes or strings")
194 if isinstance(val, str):
195 val = val.encode('utf-8')
196 elif not isinstance(val, (bytes, bytearray)):
197 raise TypeError("values must be bytes or strings")
198 self._verify_open()
199 self._modified = True
200 if key not in self._index:
201 self._addkey(key, self._addval(val))
202 else:
203 # See whether the new value is small enough to fit in the
204 # (padded) space currently occupied by the old value.
205 pos, siz = self._index[key]
206 oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
207 newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE
208 if newblocks <= oldblocks:
209 self._index[key] = self._setval(pos, val)
210 else:
211 # The new value doesn't fit in the (padded) space used
212 # by the old value. The blocks used by the old value are
213 # forever lost.
214 self._index[key] = self._addval(val)
215
216 # Note that _index may be out of synch with the directory
217 # file now: _setval() and _addval() don't update the directory
218 # file. This also means that the on-disk directory and data
219 # files are in a mutually inconsistent state, and they'll
220 # remain that way until _commit() is called. Note that this
221 # is a disaster (for the database) if the program crashes
222 # (so that _commit() never gets called).
223
224 def __delitem__(self, key):
225 if self._readonly:
226 raise error('The database is opened for reading only')
227 if isinstance(key, str):
228 key = key.encode('utf-8')
229 self._verify_open()
230 self._modified = True
231 # The blocks used by the associated value are lost.
232 del self._index[key]
233 # XXX It's unclear why we do a _commit() here (the code always
234 # XXX has, so I'm not changing it). __setitem__ doesn't try to
235 # XXX keep the directory file in synch. Why should we? Or
236 # XXX why shouldn't __setitem__?
237 self._commit()
238
239 def keys(self):
240 try:
241 return list(self._index)
242 except TypeError:
243 raise error('DBM object has already been closed') from None
244
245 def items(self):
246 self._verify_open()
247 return [(key, self[key]) for key in self._index.keys()]
248
249 def __contains__(self, key):
250 if isinstance(key, str):
251 key = key.encode('utf-8')
252 try:
253 return key in self._index
254 except TypeError:
255 if self._index is None:
256 raise error('DBM object has already been closed') from None
257 else:
258 raise
259
260 def iterkeys(self):
261 try:
262 return iter(self._index)
263 except TypeError:
264 raise error('DBM object has already been closed') from None
265 __iter__ = iterkeys
266
267 def __len__(self):
268 try:
269 return len(self._index)
270 except TypeError:
271 raise error('DBM object has already been closed') from None
272
273 def close(self):
274 try:
275 self._commit()
276 finally:
277 self._index = self._datfile = self._dirfile = self._bakfile = None
278
279 __del__ = close
280
281 def _chmod(self, file):
282 self._os.chmod(file, self._mode)
283
284 def __enter__(self):
285 return self
286
287 def __exit__(self, *args):
288 self.close()
289
290
291 def open(file, flag='c', mode=0o666):
292 """Open the database file, filename, and return corresponding object.
293
294 The flag argument, used to control how the database is opened in the
295 other DBM implementations, supports only the semantics of 'c' and 'n'
296 values. Other values will default to the semantics of 'c' value:
297 the database will always opened for update and will be created if it
298 does not exist.
299
300 The optional mode argument is the UNIX mode of the file, used only when
301 the database has to be created. It defaults to octal code 0o666 (and
302 will be modified by the prevailing umask).
303
304 """
305
306 # Modify mode depending on the umask
307 try:
308 um = _os.umask(0)
309 _os.umask(um)
310 except AttributeError:
311 pass
312 else:
313 # Turn off any bits that are set in the umask
314 mode = mode & (~um)
315 if flag not in ('r', 'w', 'c', 'n'):
316 raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'")
317 return _Database(file, mode, flag=flag)