(root)/
Python-3.12.0/
Lib/
test/
test_hash.py
       1  # test the invariant that
       2  #   iff a==b then hash(a)==hash(b)
       3  #
       4  # Also test that hash implementations are inherited as expected
       5  
       6  import datetime
       7  import os
       8  import sys
       9  import unittest
      10  from test.support.script_helper import assert_python_ok
      11  from collections.abc import Hashable
      12  
      13  IS_64BIT = sys.maxsize > 2**32
      14  
      15  def lcg(x, length=16):
      16      """Linear congruential generator"""
      17      if x == 0:
      18          return bytes(length)
      19      out = bytearray(length)
      20      for i in range(length):
      21          x = (214013 * x + 2531011) & 0x7fffffff
      22          out[i] = (x >> 16) & 0xff
      23      return bytes(out)
      24  
      25  def pysiphash(uint64):
      26      """Convert SipHash24 output to Py_hash_t
      27      """
      28      assert 0 <= uint64 < (1 << 64)
      29      # simple unsigned to signed int64
      30      if uint64 > (1 << 63) - 1:
      31          int64 = uint64 - (1 << 64)
      32      else:
      33          int64 = uint64
      34      # mangle uint64 to uint32
      35      uint32 = (uint64 ^ uint64 >> 32) & 0xffffffff
      36      # simple unsigned to signed int32
      37      if uint32 > (1 << 31) - 1:
      38          int32 = uint32 - (1 << 32)
      39      else:
      40          int32 = uint32
      41      return int32, int64
      42  
      43  def skip_unless_internalhash(test):
      44      """Skip decorator for tests that depend on SipHash24 or FNV"""
      45      ok = sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"}
      46      msg = "Requires SipHash13, SipHash24 or FNV"
      47      return test if ok else unittest.skip(msg)(test)
      48  
      49  
      50  class ESC[4;38;5;81mHashEqualityTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
      51  
      52      def same_hash(self, *objlist):
      53          # Hash each object given and fail if
      54          # the hash values are not all the same.
      55          hashed = list(map(hash, objlist))
      56          for h in hashed[1:]:
      57              if h != hashed[0]:
      58                  self.fail("hashed values differ: %r" % (objlist,))
      59  
      60      def test_numeric_literals(self):
      61          self.same_hash(1, 1, 1.0, 1.0+0.0j)
      62          self.same_hash(0, 0.0, 0.0+0.0j)
      63          self.same_hash(-1, -1.0, -1.0+0.0j)
      64          self.same_hash(-2, -2.0, -2.0+0.0j)
      65  
      66      def test_coerced_integers(self):
      67          self.same_hash(int(1), int(1), float(1), complex(1),
      68                         int('1'), float('1.0'))
      69          self.same_hash(int(-2**31), float(-2**31))
      70          self.same_hash(int(1-2**31), float(1-2**31))
      71          self.same_hash(int(2**31-1), float(2**31-1))
      72          # for 64-bit platforms
      73          self.same_hash(int(2**31), float(2**31))
      74          self.same_hash(int(-2**63), float(-2**63))
      75          self.same_hash(int(2**63), float(2**63))
      76  
      77      def test_coerced_floats(self):
      78          self.same_hash(int(1.23e300), float(1.23e300))
      79          self.same_hash(float(0.5), complex(0.5, 0.0))
      80  
      81      def test_unaligned_buffers(self):
      82          # The hash function for bytes-like objects shouldn't have
      83          # alignment-dependent results (example in issue #16427).
      84          b = b"123456789abcdefghijklmnopqrstuvwxyz" * 128
      85          for i in range(16):
      86              for j in range(16):
      87                  aligned = b[i:128+j]
      88                  unaligned = memoryview(b)[i:128+j]
      89                  self.assertEqual(hash(aligned), hash(unaligned))
      90  
      91  
      92  _default_hash = object.__hash__
      93  class ESC[4;38;5;81mDefaultHash(ESC[4;38;5;149mobject): pass
      94  
      95  _FIXED_HASH_VALUE = 42
      96  class ESC[4;38;5;81mFixedHash(ESC[4;38;5;149mobject):
      97      def __hash__(self):
      98          return _FIXED_HASH_VALUE
      99  
     100  class ESC[4;38;5;81mOnlyEquality(ESC[4;38;5;149mobject):
     101      def __eq__(self, other):
     102          return self is other
     103  
     104  class ESC[4;38;5;81mOnlyInequality(ESC[4;38;5;149mobject):
     105      def __ne__(self, other):
     106          return self is not other
     107  
     108  class ESC[4;38;5;81mInheritedHashWithEquality(ESC[4;38;5;149mFixedHash, ESC[4;38;5;149mOnlyEquality): pass
     109  class ESC[4;38;5;81mInheritedHashWithInequality(ESC[4;38;5;149mFixedHash, ESC[4;38;5;149mOnlyInequality): pass
     110  
     111  class ESC[4;38;5;81mNoHash(ESC[4;38;5;149mobject):
     112      __hash__ = None
     113  
     114  class ESC[4;38;5;81mHashInheritanceTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     115      default_expected = [object(),
     116                          DefaultHash(),
     117                          OnlyInequality(),
     118                         ]
     119      fixed_expected = [FixedHash(),
     120                        InheritedHashWithEquality(),
     121                        InheritedHashWithInequality(),
     122                        ]
     123      error_expected = [NoHash(),
     124                        OnlyEquality(),
     125                        ]
     126  
     127      def test_default_hash(self):
     128          for obj in self.default_expected:
     129              self.assertEqual(hash(obj), _default_hash(obj))
     130  
     131      def test_fixed_hash(self):
     132          for obj in self.fixed_expected:
     133              self.assertEqual(hash(obj), _FIXED_HASH_VALUE)
     134  
     135      def test_error_hash(self):
     136          for obj in self.error_expected:
     137              self.assertRaises(TypeError, hash, obj)
     138  
     139      def test_hashable(self):
     140          objects = (self.default_expected +
     141                     self.fixed_expected)
     142          for obj in objects:
     143              self.assertIsInstance(obj, Hashable)
     144  
     145      def test_not_hashable(self):
     146          for obj in self.error_expected:
     147              self.assertNotIsInstance(obj, Hashable)
     148  
     149  
     150  # Issue #4701: Check that some builtin types are correctly hashable
     151  class ESC[4;38;5;81mDefaultIterSeq(ESC[4;38;5;149mobject):
     152      seq = range(10)
     153      def __len__(self):
     154          return len(self.seq)
     155      def __getitem__(self, index):
     156          return self.seq[index]
     157  
     158  class ESC[4;38;5;81mHashBuiltinsTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     159      hashes_to_check = [enumerate(range(10)),
     160                         iter(DefaultIterSeq()),
     161                         iter(lambda: 0, 0),
     162                        ]
     163  
     164      def test_hashes(self):
     165          _default_hash = object.__hash__
     166          for obj in self.hashes_to_check:
     167              self.assertEqual(hash(obj), _default_hash(obj))
     168  
     169  class ESC[4;38;5;81mHashRandomizationTests:
     170  
     171      # Each subclass should define a field "repr_", containing the repr() of
     172      # an object to be tested
     173  
     174      def get_hash_command(self, repr_):
     175          return 'print(hash(eval(%a)))' % repr_
     176  
     177      def get_hash(self, repr_, seed=None):
     178          env = os.environ.copy()
     179          env['__cleanenv'] = True  # signal to assert_python not to do a copy
     180                                    # of os.environ on its own
     181          if seed is not None:
     182              env['PYTHONHASHSEED'] = str(seed)
     183          else:
     184              env.pop('PYTHONHASHSEED', None)
     185          out = assert_python_ok(
     186              '-c', self.get_hash_command(repr_),
     187              **env)
     188          stdout = out[1].strip()
     189          return int(stdout)
     190  
     191      def test_randomized_hash(self):
     192          # two runs should return different hashes
     193          run1 = self.get_hash(self.repr_, seed='random')
     194          run2 = self.get_hash(self.repr_, seed='random')
     195          self.assertNotEqual(run1, run2)
     196  
     197  class ESC[4;38;5;81mStringlikeHashRandomizationTests(ESC[4;38;5;149mHashRandomizationTests):
     198      repr_ = None
     199      repr_long = None
     200  
     201      # 32bit little, 64bit little, 32bit big, 64bit big
     202      known_hashes = {
     203          'djba33x': [ # only used for small strings
     204              # seed 0, 'abc'
     205              [193485960, 193485960,  193485960, 193485960],
     206              # seed 42, 'abc'
     207              [-678966196, 573763426263223372, -820489388, -4282905804826039665],
     208              ],
     209          'siphash13': [
     210              # NOTE: PyUCS2 layout depends on endianness
     211              # seed 0, 'abc'
     212              [69611762, -4594863902769663758, 69611762, -4594863902769663758],
     213              # seed 42, 'abc'
     214              [-975800855, 3869580338025362921, -975800855, 3869580338025362921],
     215              # seed 42, 'abcdefghijk'
     216              [-595844228, 7764564197781545852, -595844228, 7764564197781545852],
     217              # seed 0, 'äú∑ℇ'
     218              [-1093288643, -2810468059467891395, -1041341092, 4925090034378237276],
     219              # seed 42, 'äú∑ℇ'
     220              [-585999602, -2845126246016066802, -817336969, -2219421378907968137],
     221          ],
     222          'siphash24': [
     223              # NOTE: PyUCS2 layout depends on endianness
     224              # seed 0, 'abc'
     225              [1198583518, 4596069200710135518, 1198583518, 4596069200710135518],
     226              # seed 42, 'abc'
     227              [273876886, -4501618152524544106, 273876886, -4501618152524544106],
     228              # seed 42, 'abcdefghijk'
     229              [-1745215313, 4436719588892876975, -1745215313, 4436719588892876975],
     230              # seed 0, 'äú∑ℇ'
     231              [493570806, 5749986484189612790, -1006381564, -5915111450199468540],
     232              # seed 42, 'äú∑ℇ'
     233              [-1677110816, -2947981342227738144, -1860207793, -4296699217652516017],
     234          ],
     235          'fnv': [
     236              # seed 0, 'abc'
     237              [-1600925533, 1453079729188098211, -1600925533,
     238               1453079729188098211],
     239              # seed 42, 'abc'
     240              [-206076799, -4410911502303878509, -1024014457,
     241               -3570150969479994130],
     242              # seed 42, 'abcdefghijk'
     243              [811136751, -5046230049376118746, -77208053 ,
     244               -4779029615281019666],
     245              # seed 0, 'äú∑ℇ'
     246              [44402817, 8998297579845987431, -1956240331,
     247               -782697888614047887],
     248              # seed 42, 'äú∑ℇ'
     249              [-283066365, -4576729883824601543, -271871407,
     250               -3927695501187247084],
     251          ]
     252      }
     253  
     254      def get_expected_hash(self, position, length):
     255          if length < sys.hash_info.cutoff:
     256              algorithm = "djba33x"
     257          else:
     258              algorithm = sys.hash_info.algorithm
     259          if sys.byteorder == 'little':
     260              platform = 1 if IS_64BIT else 0
     261          else:
     262              assert(sys.byteorder == 'big')
     263              platform = 3 if IS_64BIT else 2
     264          return self.known_hashes[algorithm][position][platform]
     265  
     266      def test_null_hash(self):
     267          # PYTHONHASHSEED=0 disables the randomized hash
     268          known_hash_of_obj = self.get_expected_hash(0, 3)
     269  
     270          # Randomization is enabled by default:
     271          self.assertNotEqual(self.get_hash(self.repr_), known_hash_of_obj)
     272  
     273          # It can also be disabled by setting the seed to 0:
     274          self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
     275  
     276      @skip_unless_internalhash
     277      def test_fixed_hash(self):
     278          # test a fixed seed for the randomized hash
     279          # Note that all types share the same values:
     280          h = self.get_expected_hash(1, 3)
     281          self.assertEqual(self.get_hash(self.repr_, seed=42), h)
     282  
     283      @skip_unless_internalhash
     284      def test_long_fixed_hash(self):
     285          if self.repr_long is None:
     286              return
     287          h = self.get_expected_hash(2, 11)
     288          self.assertEqual(self.get_hash(self.repr_long, seed=42), h)
     289  
     290  
     291  class ESC[4;38;5;81mStrHashRandomizationTests(ESC[4;38;5;149mStringlikeHashRandomizationTests,
     292                                  ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     293      repr_ = repr('abc')
     294      repr_long = repr('abcdefghijk')
     295      repr_ucs2 = repr('äú∑ℇ')
     296  
     297      @skip_unless_internalhash
     298      def test_empty_string(self):
     299          self.assertEqual(hash(""), 0)
     300  
     301      @skip_unless_internalhash
     302      def test_ucs2_string(self):
     303          h = self.get_expected_hash(3, 6)
     304          self.assertEqual(self.get_hash(self.repr_ucs2, seed=0), h)
     305          h = self.get_expected_hash(4, 6)
     306          self.assertEqual(self.get_hash(self.repr_ucs2, seed=42), h)
     307  
     308  class ESC[4;38;5;81mBytesHashRandomizationTests(ESC[4;38;5;149mStringlikeHashRandomizationTests,
     309                                    ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     310      repr_ = repr(b'abc')
     311      repr_long = repr(b'abcdefghijk')
     312  
     313      @skip_unless_internalhash
     314      def test_empty_string(self):
     315          self.assertEqual(hash(b""), 0)
     316  
     317  class ESC[4;38;5;81mMemoryviewHashRandomizationTests(ESC[4;38;5;149mStringlikeHashRandomizationTests,
     318                                         ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     319      repr_ = "memoryview(b'abc')"
     320      repr_long = "memoryview(b'abcdefghijk')"
     321  
     322      @skip_unless_internalhash
     323      def test_empty_string(self):
     324          self.assertEqual(hash(memoryview(b"")), 0)
     325  
     326  class ESC[4;38;5;81mDatetimeTests(ESC[4;38;5;149mHashRandomizationTests):
     327      def get_hash_command(self, repr_):
     328          return 'import datetime; print(hash(%s))' % repr_
     329  
     330  class ESC[4;38;5;81mDatetimeDateTests(ESC[4;38;5;149mDatetimeTests, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     331      repr_ = repr(datetime.date(1066, 10, 14))
     332  
     333  class ESC[4;38;5;81mDatetimeDatetimeTests(ESC[4;38;5;149mDatetimeTests, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     334      repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
     335  
     336  class ESC[4;38;5;81mDatetimeTimeTests(ESC[4;38;5;149mDatetimeTests, ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     337      repr_ = repr(datetime.time(0))
     338  
     339  
     340  class ESC[4;38;5;81mHashDistributionTestCase(ESC[4;38;5;149munittestESC[4;38;5;149m.ESC[4;38;5;149mTestCase):
     341  
     342      def test_hash_distribution(self):
     343          # check for hash collision
     344          base = "abcdefghabcdefg"
     345          for i in range(1, len(base)):
     346              prefix = base[:i]
     347              with self.subTest(prefix=prefix):
     348                  s15 = set()
     349                  s255 = set()
     350                  for c in range(256):
     351                      h = hash(prefix + chr(c))
     352                      s15.add(h & 0xf)
     353                      s255.add(h & 0xff)
     354                  # SipHash24 distribution depends on key, usually > 60%
     355                  self.assertGreater(len(s15), 8, prefix)
     356                  self.assertGreater(len(s255), 128, prefix)
     357  
     358  if __name__ == "__main__":
     359      unittest.main()