1 from test import support, seq_tests
2 import unittest
3
4 import gc
5 import pickle
6
7 # For tuple hashes, we normally only run a test to ensure that we get
8 # the same results across platforms in a handful of cases. If that's
9 # so, there's no real point to running more. Set RUN_ALL_HASH_TESTS to
10 # run more anyway. That's usually of real interest only when analyzing,
11 # or changing, the hash algorithm. In which case it's usually also
12 # most useful to set JUST_SHOW_HASH_RESULTS, to see all the results
13 # instead of wrestling with test "failures". See the bottom of the
14 # file for extensive notes on what we're testing here and why.
15 RUN_ALL_HASH_TESTS = False
16 JUST_SHOW_HASH_RESULTS = False # if RUN_ALL_HASH_TESTS, just display
17
18 class ESC[4;38;5;81mTupleTest(ESC[4;38;5;149mseq_testsESC[4;38;5;149m.ESC[4;38;5;149mCommonTest):
19 type2test = tuple
20
21 def test_getitem_error(self):
22 t = ()
23 msg = "tuple indices must be integers or slices"
24 with self.assertRaisesRegex(TypeError, msg):
25 t['a']
26
27 def test_constructors(self):
28 super().test_constructors()
29 # calling built-in types without argument must return empty
30 self.assertEqual(tuple(), ())
31 t0_3 = (0, 1, 2, 3)
32 t0_3_bis = tuple(t0_3)
33 self.assertTrue(t0_3 is t0_3_bis)
34 self.assertEqual(tuple([]), ())
35 self.assertEqual(tuple([0, 1, 2, 3]), (0, 1, 2, 3))
36 self.assertEqual(tuple(''), ())
37 self.assertEqual(tuple('spam'), ('s', 'p', 'a', 'm'))
38 self.assertEqual(tuple(x for x in range(10) if x % 2),
39 (1, 3, 5, 7, 9))
40
41 def test_keyword_args(self):
42 with self.assertRaisesRegex(TypeError, 'keyword argument'):
43 tuple(sequence=())
44
45 def test_keywords_in_subclass(self):
46 class ESC[4;38;5;81msubclass(ESC[4;38;5;149mtuple):
47 pass
48 u = subclass([1, 2])
49 self.assertIs(type(u), subclass)
50 self.assertEqual(list(u), [1, 2])
51 with self.assertRaises(TypeError):
52 subclass(sequence=())
53
54 class ESC[4;38;5;81msubclass_with_init(ESC[4;38;5;149mtuple):
55 def __init__(self, arg, newarg=None):
56 self.newarg = newarg
57 u = subclass_with_init([1, 2], newarg=3)
58 self.assertIs(type(u), subclass_with_init)
59 self.assertEqual(list(u), [1, 2])
60 self.assertEqual(u.newarg, 3)
61
62 class ESC[4;38;5;81msubclass_with_new(ESC[4;38;5;149mtuple):
63 def __new__(cls, arg, newarg=None):
64 self = super().__new__(cls, arg)
65 self.newarg = newarg
66 return self
67 u = subclass_with_new([1, 2], newarg=3)
68 self.assertIs(type(u), subclass_with_new)
69 self.assertEqual(list(u), [1, 2])
70 self.assertEqual(u.newarg, 3)
71
72 def test_truth(self):
73 super().test_truth()
74 self.assertTrue(not ())
75 self.assertTrue((42, ))
76
77 def test_len(self):
78 super().test_len()
79 self.assertEqual(len(()), 0)
80 self.assertEqual(len((0,)), 1)
81 self.assertEqual(len((0, 1, 2)), 3)
82
83 def test_iadd(self):
84 super().test_iadd()
85 u = (0, 1)
86 u2 = u
87 u += (2, 3)
88 self.assertTrue(u is not u2)
89
90 def test_imul(self):
91 super().test_imul()
92 u = (0, 1)
93 u2 = u
94 u *= 3
95 self.assertTrue(u is not u2)
96
97 def test_tupleresizebug(self):
98 # Check that a specific bug in _PyTuple_Resize() is squashed.
99 def f():
100 for i in range(1000):
101 yield i
102 self.assertEqual(list(tuple(f())), list(range(1000)))
103
104 # We expect tuples whose base components have deterministic hashes to
105 # have deterministic hashes too - and, indeed, the same hashes across
106 # platforms with hash codes of the same bit width.
107 def test_hash_exact(self):
108 def check_one_exact(t, e32, e64):
109 got = hash(t)
110 expected = e32 if support.NHASHBITS == 32 else e64
111 if got != expected:
112 msg = f"FAIL hash({t!r}) == {got} != {expected}"
113 self.fail(msg)
114
115 check_one_exact((), 750394483, 5740354900026072187)
116 check_one_exact((0,), 1214856301, -8753497827991233192)
117 check_one_exact((0, 0), -168982784, -8458139203682520985)
118 check_one_exact((0.5,), 2077348973, -408149959306781352)
119 check_one_exact((0.5, (), (-2, 3, (4, 6))), 714642271,
120 -1845940830829704396)
121
122 # Various tests for hashing of tuples to check that we get few collisions.
123 # Does something only if RUN_ALL_HASH_TESTS is true.
124 #
125 # Earlier versions of the tuple hash algorithm had massive collisions
126 # reported at:
127 # - https://bugs.python.org/issue942952
128 # - https://bugs.python.org/issue34751
129 def test_hash_optional(self):
130 from itertools import product
131
132 if not RUN_ALL_HASH_TESTS:
133 return
134
135 # If specified, `expected` is a 2-tuple of expected
136 # (number_of_collisions, pileup) values, and the test fails if
137 # those aren't the values we get. Also if specified, the test
138 # fails if z > `zlimit`.
139 def tryone_inner(tag, nbins, hashes, expected=None, zlimit=None):
140 from collections import Counter
141
142 nballs = len(hashes)
143 mean, sdev = support.collision_stats(nbins, nballs)
144 c = Counter(hashes)
145 collisions = nballs - len(c)
146 z = (collisions - mean) / sdev
147 pileup = max(c.values()) - 1
148 del c
149 got = (collisions, pileup)
150 failed = False
151 prefix = ""
152 if zlimit is not None and z > zlimit:
153 failed = True
154 prefix = f"FAIL z > {zlimit}; "
155 if expected is not None and got != expected:
156 failed = True
157 prefix += f"FAIL {got} != {expected}; "
158 if failed or JUST_SHOW_HASH_RESULTS:
159 msg = f"{prefix}{tag}; pileup {pileup:,} mean {mean:.1f} "
160 msg += f"coll {collisions:,} z {z:+.1f}"
161 if JUST_SHOW_HASH_RESULTS:
162 import sys
163 print(msg, file=sys.__stdout__)
164 else:
165 self.fail(msg)
166
167 def tryone(tag, xs,
168 native32=None, native64=None, hi32=None, lo32=None,
169 zlimit=None):
170 NHASHBITS = support.NHASHBITS
171 hashes = list(map(hash, xs))
172 tryone_inner(tag + f"; {NHASHBITS}-bit hash codes",
173 1 << NHASHBITS,
174 hashes,
175 native32 if NHASHBITS == 32 else native64,
176 zlimit)
177
178 if NHASHBITS > 32:
179 shift = NHASHBITS - 32
180 tryone_inner(tag + "; 32-bit upper hash codes",
181 1 << 32,
182 [h >> shift for h in hashes],
183 hi32,
184 zlimit)
185
186 mask = (1 << 32) - 1
187 tryone_inner(tag + "; 32-bit lower hash codes",
188 1 << 32,
189 [h & mask for h in hashes],
190 lo32,
191 zlimit)
192
193 # Tuples of smallish positive integers are common - nice if we
194 # get "better than random" for these.
195 tryone("range(100) by 3", list(product(range(100), repeat=3)),
196 (0, 0), (0, 0), (4, 1), (0, 0))
197
198 # A previous hash had systematic problems when mixing integers of
199 # similar magnitude but opposite sign, obscurely related to that
200 # j ^ -2 == -j when j is odd.
201 cands = list(range(-10, -1)) + list(range(9))
202
203 # Note: -1 is omitted because hash(-1) == hash(-2) == -2, and
204 # there's nothing the tuple hash can do to avoid collisions
205 # inherited from collisions in the tuple components' hashes.
206 tryone("-10 .. 8 by 4", list(product(cands, repeat=4)),
207 (0, 0), (0, 0), (0, 0), (0, 0))
208 del cands
209
210 # The hashes here are a weird mix of values where all the
211 # variation is in the lowest bits and across a single high-order
212 # bit - the middle bits are all zeroes. A decent hash has to
213 # both propagate low bits to the left and high bits to the
214 # right. This is also complicated a bit in that there are
215 # collisions among the hashes of the integers in L alone.
216 L = [n << 60 for n in range(100)]
217 tryone("0..99 << 60 by 3", list(product(L, repeat=3)),
218 (0, 0), (0, 0), (0, 0), (324, 1))
219 del L
220
221 # Used to suffer a massive number of collisions.
222 tryone("[-3, 3] by 18", list(product([-3, 3], repeat=18)),
223 (7, 1), (0, 0), (7, 1), (6, 1))
224
225 # And even worse. hash(0.5) has only a single bit set, at the
226 # high end. A decent hash needs to propagate high bits right.
227 tryone("[0, 0.5] by 18", list(product([0, 0.5], repeat=18)),
228 (5, 1), (0, 0), (9, 1), (12, 1))
229
230 # Hashes of ints and floats are the same across platforms.
231 # String hashes vary even on a single platform across runs, due
232 # to hash randomization for strings. So we can't say exactly
233 # what this should do. Instead we insist that the # of
234 # collisions is no more than 4 sdevs above the theoretically
235 # random mean. Even if the tuple hash can't achieve that on its
236 # own, the string hash is trying to be decently pseudo-random
237 # (in all bit positions) on _its_ own. We can at least test
238 # that the tuple hash doesn't systematically ruin that.
239 tryone("4-char tuples",
240 list(product("abcdefghijklmnopqrstuvwxyz", repeat=4)),
241 zlimit=4.0)
242
243 # The "old tuple test". See https://bugs.python.org/issue942952.
244 # Ensures, for example, that the hash:
245 # is non-commutative
246 # spreads closely spaced values
247 # doesn't exhibit cancellation in tuples like (x,(x,y))
248 N = 50
249 base = list(range(N))
250 xp = list(product(base, repeat=2))
251 inps = base + list(product(base, xp)) + \
252 list(product(xp, base)) + xp + list(zip(base))
253 tryone("old tuple test", inps,
254 (2, 1), (0, 0), (52, 49), (7, 1))
255 del base, xp, inps
256
257 # The "new tuple test". See https://bugs.python.org/issue34751.
258 # Even more tortured nesting, and a mix of signed ints of very
259 # small magnitude.
260 n = 5
261 A = [x for x in range(-n, n+1) if x != -1]
262 B = A + [(a,) for a in A]
263 L2 = list(product(A, repeat=2))
264 L3 = L2 + list(product(A, repeat=3))
265 L4 = L3 + list(product(A, repeat=4))
266 # T = list of testcases. These consist of all (possibly nested
267 # at most 2 levels deep) tuples containing at most 4 items from
268 # the set A.
269 T = A
270 T += [(a,) for a in B + L4]
271 T += product(L3, B)
272 T += product(L2, repeat=2)
273 T += product(B, L3)
274 T += product(B, B, L2)
275 T += product(B, L2, B)
276 T += product(L2, B, B)
277 T += product(B, repeat=4)
278 assert len(T) == 345130
279 tryone("new tuple test", T,
280 (9, 1), (0, 0), (21, 5), (6, 1))
281
282 def test_repr(self):
283 l0 = tuple()
284 l2 = (0, 1, 2)
285 a0 = self.type2test(l0)
286 a2 = self.type2test(l2)
287
288 self.assertEqual(str(a0), repr(l0))
289 self.assertEqual(str(a2), repr(l2))
290 self.assertEqual(repr(a0), "()")
291 self.assertEqual(repr(a2), "(0, 1, 2)")
292
293 def _not_tracked(self, t):
294 # Nested tuples can take several collections to untrack
295 gc.collect()
296 gc.collect()
297 self.assertFalse(gc.is_tracked(t), t)
298
299 def _tracked(self, t):
300 self.assertTrue(gc.is_tracked(t), t)
301 gc.collect()
302 gc.collect()
303 self.assertTrue(gc.is_tracked(t), t)
304
305 @support.cpython_only
306 def test_track_literals(self):
307 # Test GC-optimization of tuple literals
308 x, y, z = 1.5, "a", []
309
310 self._not_tracked(())
311 self._not_tracked((1,))
312 self._not_tracked((1, 2))
313 self._not_tracked((1, 2, "a"))
314 self._not_tracked((1, 2, (None, True, False, ()), int))
315 self._not_tracked((object(),))
316 self._not_tracked(((1, x), y, (2, 3)))
317
318 # Tuples with mutable elements are always tracked, even if those
319 # elements are not tracked right now.
320 self._tracked(([],))
321 self._tracked(([1],))
322 self._tracked(({},))
323 self._tracked((set(),))
324 self._tracked((x, y, z))
325
326 def check_track_dynamic(self, tp, always_track):
327 x, y, z = 1.5, "a", []
328
329 check = self._tracked if always_track else self._not_tracked
330 check(tp())
331 check(tp([]))
332 check(tp(set()))
333 check(tp([1, x, y]))
334 check(tp(obj for obj in [1, x, y]))
335 check(tp(set([1, x, y])))
336 check(tp(tuple([obj]) for obj in [1, x, y]))
337 check(tuple(tp([obj]) for obj in [1, x, y]))
338
339 self._tracked(tp([z]))
340 self._tracked(tp([[x, y]]))
341 self._tracked(tp([{x: y}]))
342 self._tracked(tp(obj for obj in [x, y, z]))
343 self._tracked(tp(tuple([obj]) for obj in [x, y, z]))
344 self._tracked(tuple(tp([obj]) for obj in [x, y, z]))
345
346 @support.cpython_only
347 def test_track_dynamic(self):
348 # Test GC-optimization of dynamically constructed tuples.
349 self.check_track_dynamic(tuple, False)
350
351 @support.cpython_only
352 def test_track_subtypes(self):
353 # Tuple subtypes must always be tracked
354 class ESC[4;38;5;81mMyTuple(ESC[4;38;5;149mtuple):
355 pass
356 self.check_track_dynamic(MyTuple, True)
357
358 @support.cpython_only
359 def test_bug7466(self):
360 # Trying to untrack an unfinished tuple could crash Python
361 self._not_tracked(tuple(gc.collect() for i in range(101)))
362
363 def test_repr_large(self):
364 # Check the repr of large list objects
365 def check(n):
366 l = (0,) * n
367 s = repr(l)
368 self.assertEqual(s,
369 '(' + ', '.join(['0'] * n) + ')')
370 check(10) # check our checking code
371 check(1000000)
372
373 def test_iterator_pickle(self):
374 # Userlist iterators don't support pickling yet since
375 # they are based on generators.
376 data = self.type2test([4, 5, 6, 7])
377 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
378 itorg = iter(data)
379 d = pickle.dumps(itorg, proto)
380 it = pickle.loads(d)
381 self.assertEqual(type(itorg), type(it))
382 self.assertEqual(self.type2test(it), self.type2test(data))
383
384 it = pickle.loads(d)
385 next(it)
386 d = pickle.dumps(it, proto)
387 self.assertEqual(self.type2test(it), self.type2test(data)[1:])
388
389 def test_reversed_pickle(self):
390 data = self.type2test([4, 5, 6, 7])
391 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
392 itorg = reversed(data)
393 d = pickle.dumps(itorg, proto)
394 it = pickle.loads(d)
395 self.assertEqual(type(itorg), type(it))
396 self.assertEqual(self.type2test(it), self.type2test(reversed(data)))
397
398 it = pickle.loads(d)
399 next(it)
400 d = pickle.dumps(it, proto)
401 self.assertEqual(self.type2test(it), self.type2test(reversed(data))[1:])
402
403 def test_no_comdat_folding(self):
404 # Issue 8847: In the PGO build, the MSVC linker's COMDAT folding
405 # optimization causes failures in code that relies on distinct
406 # function addresses.
407 class ESC[4;38;5;81mT(ESC[4;38;5;149mtuple): pass
408 with self.assertRaises(TypeError):
409 [3,] + T((1,2))
410
411 def test_lexicographic_ordering(self):
412 # Issue 21100
413 a = self.type2test([1, 2])
414 b = self.type2test([1, 2, 0])
415 c = self.type2test([1, 3])
416 self.assertLess(a, b)
417 self.assertLess(b, c)
418
419 # Notes on testing hash codes. The primary thing is that Python doesn't
420 # care about "random" hash codes. To the contrary, we like them to be
421 # very regular when possible, so that the low-order bits are as evenly
422 # distributed as possible. For integers this is easy: hash(i) == i for
423 # all not-huge i except i==-1.
424 #
425 # For tuples of mixed type there's really no hope of that, so we want
426 # "randomish" here instead. But getting close to pseudo-random in all
427 # bit positions is more expensive than we've been willing to pay for.
428 #
429 # We can tolerate large deviations from random - what we don't want is
430 # catastrophic pileups on a relative handful of hash codes. The dict
431 # and set lookup routines remain effective provided that full-width hash
432 # codes for not-equal objects are distinct.
433 #
434 # So we compute various statistics here based on what a "truly random"
435 # hash would do, but don't automate "pass or fail" based on those
436 # results. Instead those are viewed as inputs to human judgment, and the
437 # automated tests merely ensure we get the _same_ results across
438 # platforms. In fact, we normally don't bother to run them at all -
439 # set RUN_ALL_HASH_TESTS to force it.
440 #
441 # When global JUST_SHOW_HASH_RESULTS is True, the tuple hash statistics
442 # are just displayed to stdout. A typical output line looks like:
443 #
444 # old tuple test; 32-bit upper hash codes; \
445 # pileup 49 mean 7.4 coll 52 z +16.4
446 #
447 # "old tuple test" is just a string name for the test being run.
448 #
449 # "32-bit upper hash codes" means this was run under a 64-bit build and
450 # we've shifted away the lower 32 bits of the hash codes.
451 #
452 # "pileup" is 0 if there were no collisions across those hash codes.
453 # It's 1 less than the maximum number of times any single hash code was
454 # seen. So in this case, there was (at least) one hash code that was
455 # seen 50 times: that hash code "piled up" 49 more times than ideal.
456 #
457 # "mean" is the number of collisions a perfectly random hash function
458 # would have yielded, on average.
459 #
460 # "coll" is the number of collisions actually seen.
461 #
462 # "z" is "coll - mean" divided by the standard deviation of the number
463 # of collisions a perfectly random hash function would suffer. A
464 # positive value is "worse than random", and negative value "better than
465 # random". Anything of magnitude greater than 3 would be highly suspect
466 # for a hash function that claimed to be random. It's essentially
467 # impossible that a truly random function would deliver a result 16.4
468 # sdevs "worse than random".
469 #
470 # But we don't care here! That's why the test isn't coded to fail.
471 # Knowing something about how the high-order hash code bits behave
472 # provides insight, but is irrelevant to how the dict and set lookup
473 # code performs. The low-order bits are much more important to that,
474 # and on the same test those did "just like random":
475 #
476 # old tuple test; 32-bit lower hash codes; \
477 # pileup 1 mean 7.4 coll 7 z -0.2
478 #
479 # So there are always tradeoffs to consider. For another:
480 #
481 # 0..99 << 60 by 3; 32-bit hash codes; \
482 # pileup 0 mean 116.4 coll 0 z -10.8
483 #
484 # That was run under a 32-bit build, and is spectacularly "better than
485 # random". On a 64-bit build the wider hash codes are fine too:
486 #
487 # 0..99 << 60 by 3; 64-bit hash codes; \
488 # pileup 0 mean 0.0 coll 0 z -0.0
489 #
490 # but their lower 32 bits are poor:
491 #
492 # 0..99 << 60 by 3; 32-bit lower hash codes; \
493 # pileup 1 mean 116.4 coll 324 z +19.2
494 #
495 # In a statistical sense that's waaaaay too many collisions, but (a) 324
496 # collisions out of a million hash codes isn't anywhere near being a
497 # real problem; and, (b) the worst pileup on a single hash code is a measly
498 # 1 extra. It's a relatively poor case for the tuple hash, but still
499 # fine for practical use.
500 #
501 # This isn't, which is what Python 3.7.1 produced for the hashes of
502 # itertools.product([0, 0.5], repeat=18). Even with a fat 64-bit
503 # hashcode, the highest pileup was over 16,000 - making a dict/set
504 # lookup on one of the colliding values thousands of times slower (on
505 # average) than we expect.
506 #
507 # [0, 0.5] by 18; 64-bit hash codes; \
508 # pileup 16,383 mean 0.0 coll 262,128 z +6073641856.9
509 # [0, 0.5] by 18; 32-bit lower hash codes; \
510 # pileup 262,143 mean 8.0 coll 262,143 z +92683.6
511
512 if __name__ == "__main__":
513 unittest.main()