1 #. Copyright (C) 2005-2010 Gregory P. Smith (greg@krypto.org)
2 # Licensed to PSF under a Contributor Agreement.
3 #
4
5 __doc__ = """hashlib module - A common interface to many hash functions.
6
7 new(name, data=b'', **kwargs) - returns a new hash object implementing the
8 given hash function; initializing the hash
9 using the given binary data.
10
11 Named constructor functions are also available, these are faster
12 than using new(name):
13
14 md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(),
15 sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256.
16
17 More algorithms may be available on your platform but the above are guaranteed
18 to exist. See the algorithms_guaranteed and algorithms_available attributes
19 to find out what algorithm names can be passed to new().
20
21 NOTE: If you want the adler32 or crc32 hash functions they are available in
22 the zlib module.
23
24 Choose your hash function wisely. Some have known collision weaknesses.
25 sha384 and sha512 will be slow on 32 bit platforms.
26
27 Hash objects have these methods:
28 - update(data): Update the hash object with the bytes in data. Repeated calls
29 are equivalent to a single call with the concatenation of all
30 the arguments.
31 - digest(): Return the digest of the bytes passed to the update() method
32 so far as a bytes object.
33 - hexdigest(): Like digest() except the digest is returned as a string
34 of double length, containing only hexadecimal digits.
35 - copy(): Return a copy (clone) of the hash object. This can be used to
36 efficiently compute the digests of datas that share a common
37 initial substring.
38
39 For example, to obtain the digest of the byte string 'Nobody inspects the
40 spammish repetition':
41
42 >>> import hashlib
43 >>> m = hashlib.md5()
44 >>> m.update(b"Nobody inspects")
45 >>> m.update(b" the spammish repetition")
46 >>> m.digest()
47 b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9'
48
49 More condensed:
50
51 >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest()
52 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2'
53
54 """
55
56 # This tuple and __get_builtin_constructor() must be modified if a new
57 # always available algorithm is added.
58 __always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
59 'blake2b', 'blake2s',
60 'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
61 'shake_128', 'shake_256')
62
63
64 algorithms_guaranteed = set(__always_supported)
65 algorithms_available = set(__always_supported)
66
67 __all__ = __always_supported + ('new', 'algorithms_guaranteed',
68 'algorithms_available', 'pbkdf2_hmac', 'file_digest')
69
70
71 __builtin_constructor_cache = {}
72
73 # Prefer our blake2 implementation
74 # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s. The OpenSSL
75 # implementations neither support keyed blake2 (blake2 MAC) nor advanced
76 # features like salt, personalization, or tree hashing. OpenSSL hash-only
77 # variants are available as 'blake2b512' and 'blake2s256', though.
78 __block_openssl_constructor = {
79 'blake2b', 'blake2s',
80 }
81
82 def __get_builtin_constructor(name):
83 cache = __builtin_constructor_cache
84 constructor = cache.get(name)
85 if constructor is not None:
86 return constructor
87 try:
88 if name in {'SHA1', 'sha1'}:
89 import _sha1
90 cache['SHA1'] = cache['sha1'] = _sha1.sha1
91 elif name in {'MD5', 'md5'}:
92 import _md5
93 cache['MD5'] = cache['md5'] = _md5.md5
94 elif name in {'SHA256', 'sha256', 'SHA224', 'sha224'}:
95 import _sha256
96 cache['SHA224'] = cache['sha224'] = _sha256.sha224
97 cache['SHA256'] = cache['sha256'] = _sha256.sha256
98 elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}:
99 import _sha512
100 cache['SHA384'] = cache['sha384'] = _sha512.sha384
101 cache['SHA512'] = cache['sha512'] = _sha512.sha512
102 elif name in {'blake2b', 'blake2s'}:
103 import _blake2
104 cache['blake2b'] = _blake2.blake2b
105 cache['blake2s'] = _blake2.blake2s
106 elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'}:
107 import _sha3
108 cache['sha3_224'] = _sha3.sha3_224
109 cache['sha3_256'] = _sha3.sha3_256
110 cache['sha3_384'] = _sha3.sha3_384
111 cache['sha3_512'] = _sha3.sha3_512
112 elif name in {'shake_128', 'shake_256'}:
113 import _sha3
114 cache['shake_128'] = _sha3.shake_128
115 cache['shake_256'] = _sha3.shake_256
116 except ImportError:
117 pass # no extension module, this hash is unsupported.
118
119 constructor = cache.get(name)
120 if constructor is not None:
121 return constructor
122
123 raise ValueError('unsupported hash type ' + name)
124
125
126 def __get_openssl_constructor(name):
127 if name in __block_openssl_constructor:
128 # Prefer our builtin blake2 implementation.
129 return __get_builtin_constructor(name)
130 try:
131 # MD5, SHA1, and SHA2 are in all supported OpenSSL versions
132 # SHA3/shake are available in OpenSSL 1.1.1+
133 f = getattr(_hashlib, 'openssl_' + name)
134 # Allow the C module to raise ValueError. The function will be
135 # defined but the hash not actually available. Don't fall back to
136 # builtin if the current security policy blocks a digest, bpo#40695.
137 f(usedforsecurity=False)
138 # Use the C function directly (very fast)
139 return f
140 except (AttributeError, ValueError):
141 return __get_builtin_constructor(name)
142
143
144 def __py_new(name, data=b'', **kwargs):
145 """new(name, data=b'', **kwargs) - Return a new hashing object using the
146 named algorithm; optionally initialized with data (which must be
147 a bytes-like object).
148 """
149 return __get_builtin_constructor(name)(data, **kwargs)
150
151
152 def __hash_new(name, data=b'', **kwargs):
153 """new(name, data=b'') - Return a new hashing object using the named algorithm;
154 optionally initialized with data (which must be a bytes-like object).
155 """
156 if name in __block_openssl_constructor:
157 # Prefer our builtin blake2 implementation.
158 return __get_builtin_constructor(name)(data, **kwargs)
159 try:
160 return _hashlib.new(name, data, **kwargs)
161 except ValueError:
162 # If the _hashlib module (OpenSSL) doesn't support the named
163 # hash, try using our builtin implementations.
164 # This allows for SHA224/256 and SHA384/512 support even though
165 # the OpenSSL library prior to 0.9.8 doesn't provide them.
166 return __get_builtin_constructor(name)(data)
167
168
169 try:
170 import _hashlib
171 new = __hash_new
172 __get_hash = __get_openssl_constructor
173 algorithms_available = algorithms_available.union(
174 _hashlib.openssl_md_meth_names)
175 except ImportError:
176 _hashlib = None
177 new = __py_new
178 __get_hash = __get_builtin_constructor
179
180 try:
181 # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA
182 from _hashlib import pbkdf2_hmac
183 except ImportError:
184 from warnings import warn as _warn
185 _trans_5C = bytes((x ^ 0x5C) for x in range(256))
186 _trans_36 = bytes((x ^ 0x36) for x in range(256))
187
188 def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None):
189 """Password based key derivation function 2 (PKCS #5 v2.0)
190
191 This Python implementations based on the hmac module about as fast
192 as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster
193 for long passwords.
194 """
195 _warn(
196 "Python implementation of pbkdf2_hmac() is deprecated.",
197 category=DeprecationWarning,
198 stacklevel=2
199 )
200 if not isinstance(hash_name, str):
201 raise TypeError(hash_name)
202
203 if not isinstance(password, (bytes, bytearray)):
204 password = bytes(memoryview(password))
205 if not isinstance(salt, (bytes, bytearray)):
206 salt = bytes(memoryview(salt))
207
208 # Fast inline HMAC implementation
209 inner = new(hash_name)
210 outer = new(hash_name)
211 blocksize = getattr(inner, 'block_size', 64)
212 if len(password) > blocksize:
213 password = new(hash_name, password).digest()
214 password = password + b'\x00' * (blocksize - len(password))
215 inner.update(password.translate(_trans_36))
216 outer.update(password.translate(_trans_5C))
217
218 def prf(msg, inner=inner, outer=outer):
219 # PBKDF2_HMAC uses the password as key. We can re-use the same
220 # digest objects and just update copies to skip initialization.
221 icpy = inner.copy()
222 ocpy = outer.copy()
223 icpy.update(msg)
224 ocpy.update(icpy.digest())
225 return ocpy.digest()
226
227 if iterations < 1:
228 raise ValueError(iterations)
229 if dklen is None:
230 dklen = outer.digest_size
231 if dklen < 1:
232 raise ValueError(dklen)
233
234 dkey = b''
235 loop = 1
236 from_bytes = int.from_bytes
237 while len(dkey) < dklen:
238 prev = prf(salt + loop.to_bytes(4))
239 # endianness doesn't matter here as long to / from use the same
240 rkey = from_bytes(prev)
241 for i in range(iterations - 1):
242 prev = prf(prev)
243 # rkey = rkey ^ prev
244 rkey ^= from_bytes(prev)
245 loop += 1
246 dkey += rkey.to_bytes(inner.digest_size)
247
248 return dkey[:dklen]
249
250 try:
251 # OpenSSL's scrypt requires OpenSSL 1.1+
252 from _hashlib import scrypt
253 except ImportError:
254 pass
255
256
257 def file_digest(fileobj, digest, /, *, _bufsize=2**18):
258 """Hash the contents of a file-like object. Returns a digest object.
259
260 *fileobj* must be a file-like object opened for reading in binary mode.
261 It accepts file objects from open(), io.BytesIO(), and SocketIO objects.
262 The function may bypass Python's I/O and use the file descriptor *fileno*
263 directly.
264
265 *digest* must either be a hash algorithm name as a *str*, a hash
266 constructor, or a callable that returns a hash object.
267 """
268 # On Linux we could use AF_ALG sockets and sendfile() to archive zero-copy
269 # hashing with hardware acceleration.
270 if isinstance(digest, str):
271 digestobj = new(digest)
272 else:
273 digestobj = digest()
274
275 if hasattr(fileobj, "getbuffer"):
276 # io.BytesIO object, use zero-copy buffer
277 digestobj.update(fileobj.getbuffer())
278 return digestobj
279
280 # Only binary files implement readinto().
281 if not (
282 hasattr(fileobj, "readinto")
283 and hasattr(fileobj, "readable")
284 and fileobj.readable()
285 ):
286 raise ValueError(
287 f"'{fileobj!r}' is not a file-like object in binary reading mode."
288 )
289
290 # binary file, socket.SocketIO object
291 # Note: socket I/O uses different syscalls than file I/O.
292 buf = bytearray(_bufsize) # Reusable buffer to reduce allocations.
293 view = memoryview(buf)
294 while True:
295 size = fileobj.readinto(buf)
296 if size == 0:
297 break # EOF
298 digestobj.update(view[:size])
299
300 return digestobj
301
302
303 for __func_name in __always_supported:
304 # try them all, some may not work due to the OpenSSL
305 # version not supporting that algorithm.
306 try:
307 globals()[__func_name] = __get_hash(__func_name)
308 except ValueError:
309 import logging
310 logging.exception('code for hash %s was not found.', __func_name)
311
312
313 # Cleanup locals()
314 del __always_supported, __func_name, __get_hash
315 del __py_new, __hash_new, __get_openssl_constructor