1
2 /* set object implementation
3
4 Written and maintained by Raymond D. Hettinger <python@rcn.com>
5 Derived from Lib/sets.py and Objects/dictobject.c.
6
7 The basic lookup function used by all operations.
8 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9
10 The initial probe index is computed as hash mod the table size.
11 Subsequent probe indices are computed as explained in Objects/dictobject.c.
12
13 To improve cache locality, each probe inspects a series of consecutive
14 nearby entries before moving on to probes elsewhere in memory. This leaves
15 us with a hybrid of linear probing and randomized probing. The linear probing
16 reduces the cost of hash collisions because consecutive memory accesses
17 tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
18 we then use more of the upper bits from the hash value and apply a simple
19 linear congruential random number generator. This helps break-up long
20 chains of collisions.
21
22 All arithmetic on hash should ignore overflow.
23
24 Unlike the dictionary implementation, the lookkey function can return
25 NULL if the rich comparison returns an error.
26
27 Use cases for sets differ considerably from dictionaries where looked-up
28 keys are more likely to be present. In contrast, sets are primarily
29 about membership testing where the presence of an element is not known in
30 advance. Accordingly, the set implementation needs to optimize for both
31 the found and not-found case.
32 */
33
34 #include "Python.h"
35 #include "pycore_object.h" // _PyObject_GC_UNTRACK()
36 #include <stddef.h> // offsetof()
37
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40
41 #define dummy (&_dummy_struct)
42
43
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54
55 static setentry *
56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58 setentry *table;
59 setentry *entry;
60 size_t perturb = hash;
61 size_t mask = so->mask;
62 size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63 int probes;
64 int cmp;
65
66 while (1) {
67 entry = &so->table[i];
68 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69 do {
70 if (entry->hash == 0 && entry->key == NULL)
71 return entry;
72 if (entry->hash == hash) {
73 PyObject *startkey = entry->key;
74 assert(startkey != dummy);
75 if (startkey == key)
76 return entry;
77 if (PyUnicode_CheckExact(startkey)
78 && PyUnicode_CheckExact(key)
79 && _PyUnicode_EQ(startkey, key))
80 return entry;
81 table = so->table;
82 Py_INCREF(startkey);
83 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84 Py_DECREF(startkey);
85 if (cmp < 0)
86 return NULL;
87 if (table != so->table || entry->key != startkey)
88 return set_lookkey(so, key, hash);
89 if (cmp > 0)
90 return entry;
91 mask = so->mask;
92 }
93 entry++;
94 } while (probes--);
95 perturb >>= PERTURB_SHIFT;
96 i = (i * 5 + 1 + perturb) & mask;
97 }
98 }
99
100 static int set_table_resize(PySetObject *, Py_ssize_t);
101
102 static int
103 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104 {
105 setentry *table;
106 setentry *freeslot;
107 setentry *entry;
108 size_t perturb;
109 size_t mask;
110 size_t i; /* Unsigned for defined overflow behavior */
111 int probes;
112 int cmp;
113
114 /* Pre-increment is necessary to prevent arbitrary code in the rich
115 comparison from deallocating the key just before the insertion. */
116 Py_INCREF(key);
117
118 restart:
119
120 mask = so->mask;
121 i = (size_t)hash & mask;
122 freeslot = NULL;
123 perturb = hash;
124
125 while (1) {
126 entry = &so->table[i];
127 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
128 do {
129 if (entry->hash == 0 && entry->key == NULL)
130 goto found_unused_or_dummy;
131 if (entry->hash == hash) {
132 PyObject *startkey = entry->key;
133 assert(startkey != dummy);
134 if (startkey == key)
135 goto found_active;
136 if (PyUnicode_CheckExact(startkey)
137 && PyUnicode_CheckExact(key)
138 && _PyUnicode_EQ(startkey, key))
139 goto found_active;
140 table = so->table;
141 Py_INCREF(startkey);
142 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
143 Py_DECREF(startkey);
144 if (cmp > 0)
145 goto found_active;
146 if (cmp < 0)
147 goto comparison_error;
148 if (table != so->table || entry->key != startkey)
149 goto restart;
150 mask = so->mask;
151 }
152 else if (entry->hash == -1) {
153 assert (entry->key == dummy);
154 freeslot = entry;
155 }
156 entry++;
157 } while (probes--);
158 perturb >>= PERTURB_SHIFT;
159 i = (i * 5 + 1 + perturb) & mask;
160 }
161
162 found_unused_or_dummy:
163 if (freeslot == NULL)
164 goto found_unused;
165 so->used++;
166 freeslot->key = key;
167 freeslot->hash = hash;
168 return 0;
169
170 found_unused:
171 so->fill++;
172 so->used++;
173 entry->key = key;
174 entry->hash = hash;
175 if ((size_t)so->fill*5 < mask*3)
176 return 0;
177 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
178
179 found_active:
180 Py_DECREF(key);
181 return 0;
182
183 comparison_error:
184 Py_DECREF(key);
185 return -1;
186 }
187
188 /*
189 Internal routine used by set_table_resize() to insert an item which is
190 known to be absent from the set. Besides the performance benefit,
191 there is also safety benefit since using set_add_entry() risks making
192 a callback in the middle of a set_table_resize(), see issue 1456209.
193 The caller is responsible for updating the key's reference count and
194 the setobject's fill and used fields.
195 */
196 static void
197 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
198 {
199 setentry *entry;
200 size_t perturb = hash;
201 size_t i = (size_t)hash & mask;
202 size_t j;
203
204 while (1) {
205 entry = &table[i];
206 if (entry->key == NULL)
207 goto found_null;
208 if (i + LINEAR_PROBES <= mask) {
209 for (j = 0; j < LINEAR_PROBES; j++) {
210 entry++;
211 if (entry->key == NULL)
212 goto found_null;
213 }
214 }
215 perturb >>= PERTURB_SHIFT;
216 i = (i * 5 + 1 + perturb) & mask;
217 }
218 found_null:
219 entry->key = key;
220 entry->hash = hash;
221 }
222
223 /* ======== End logic for probing the hash table ========================== */
224 /* ======================================================================== */
225
226 /*
227 Restructure the table by allocating a new table and reinserting all
228 keys again. When entries have been deleted, the new table may
229 actually be smaller than the old one.
230 */
231 static int
232 set_table_resize(PySetObject *so, Py_ssize_t minused)
233 {
234 setentry *oldtable, *newtable, *entry;
235 Py_ssize_t oldmask = so->mask;
236 size_t newmask;
237 int is_oldtable_malloced;
238 setentry small_copy[PySet_MINSIZE];
239
240 assert(minused >= 0);
241
242 /* Find the smallest table size > minused. */
243 /* XXX speed-up with intrinsics */
244 size_t newsize = PySet_MINSIZE;
245 while (newsize <= (size_t)minused) {
246 newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
247 }
248
249 /* Get space for a new table. */
250 oldtable = so->table;
251 assert(oldtable != NULL);
252 is_oldtable_malloced = oldtable != so->smalltable;
253
254 if (newsize == PySet_MINSIZE) {
255 /* A large table is shrinking, or we can't get any smaller. */
256 newtable = so->smalltable;
257 if (newtable == oldtable) {
258 if (so->fill == so->used) {
259 /* No dummies, so no point doing anything. */
260 return 0;
261 }
262 /* We're not going to resize it, but rebuild the
263 table anyway to purge old dummy entries.
264 Subtle: This is *necessary* if fill==size,
265 as set_lookkey needs at least one virgin slot to
266 terminate failing searches. If fill < size, it's
267 merely desirable, as dummies slow searches. */
268 assert(so->fill > so->used);
269 memcpy(small_copy, oldtable, sizeof(small_copy));
270 oldtable = small_copy;
271 }
272 }
273 else {
274 newtable = PyMem_NEW(setentry, newsize);
275 if (newtable == NULL) {
276 PyErr_NoMemory();
277 return -1;
278 }
279 }
280
281 /* Make the set empty, using the new table. */
282 assert(newtable != oldtable);
283 memset(newtable, 0, sizeof(setentry) * newsize);
284 so->mask = newsize - 1;
285 so->table = newtable;
286
287 /* Copy the data over; this is refcount-neutral for active entries;
288 dummy entries aren't copied over, of course */
289 newmask = (size_t)so->mask;
290 if (so->fill == so->used) {
291 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
292 if (entry->key != NULL) {
293 set_insert_clean(newtable, newmask, entry->key, entry->hash);
294 }
295 }
296 } else {
297 so->fill = so->used;
298 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
299 if (entry->key != NULL && entry->key != dummy) {
300 set_insert_clean(newtable, newmask, entry->key, entry->hash);
301 }
302 }
303 }
304
305 if (is_oldtable_malloced)
306 PyMem_Free(oldtable);
307 return 0;
308 }
309
310 static int
311 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312 {
313 setentry *entry;
314
315 entry = set_lookkey(so, key, hash);
316 if (entry != NULL)
317 return entry->key != NULL;
318 return -1;
319 }
320
321 #define DISCARD_NOTFOUND 0
322 #define DISCARD_FOUND 1
323
324 static int
325 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
326 {
327 setentry *entry;
328 PyObject *old_key;
329
330 entry = set_lookkey(so, key, hash);
331 if (entry == NULL)
332 return -1;
333 if (entry->key == NULL)
334 return DISCARD_NOTFOUND;
335 old_key = entry->key;
336 entry->key = dummy;
337 entry->hash = -1;
338 so->used--;
339 Py_DECREF(old_key);
340 return DISCARD_FOUND;
341 }
342
343 static int
344 set_add_key(PySetObject *so, PyObject *key)
345 {
346 Py_hash_t hash;
347
348 if (!PyUnicode_CheckExact(key) ||
349 (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
350 hash = PyObject_Hash(key);
351 if (hash == -1)
352 return -1;
353 }
354 return set_add_entry(so, key, hash);
355 }
356
357 static int
358 set_contains_key(PySetObject *so, PyObject *key)
359 {
360 Py_hash_t hash;
361
362 if (!PyUnicode_CheckExact(key) ||
363 (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
364 hash = PyObject_Hash(key);
365 if (hash == -1)
366 return -1;
367 }
368 return set_contains_entry(so, key, hash);
369 }
370
371 static int
372 set_discard_key(PySetObject *so, PyObject *key)
373 {
374 Py_hash_t hash;
375
376 if (!PyUnicode_CheckExact(key) ||
377 (hash = _PyASCIIObject_CAST(key)->hash) == -1) {
378 hash = PyObject_Hash(key);
379 if (hash == -1)
380 return -1;
381 }
382 return set_discard_entry(so, key, hash);
383 }
384
385 static void
386 set_empty_to_minsize(PySetObject *so)
387 {
388 memset(so->smalltable, 0, sizeof(so->smalltable));
389 so->fill = 0;
390 so->used = 0;
391 so->mask = PySet_MINSIZE - 1;
392 so->table = so->smalltable;
393 so->hash = -1;
394 }
395
396 static int
397 set_clear_internal(PySetObject *so)
398 {
399 setentry *entry;
400 setentry *table = so->table;
401 Py_ssize_t fill = so->fill;
402 Py_ssize_t used = so->used;
403 int table_is_malloced = table != so->smalltable;
404 setentry small_copy[PySet_MINSIZE];
405
406 assert (PyAnySet_Check(so));
407 assert(table != NULL);
408
409 /* This is delicate. During the process of clearing the set,
410 * decrefs can cause the set to mutate. To avoid fatal confusion
411 * (voice of experience), we have to make the set empty before
412 * clearing the slots, and never refer to anything via so->ref while
413 * clearing.
414 */
415 if (table_is_malloced)
416 set_empty_to_minsize(so);
417
418 else if (fill > 0) {
419 /* It's a small table with something that needs to be cleared.
420 * Afraid the only safe way is to copy the set entries into
421 * another small table first.
422 */
423 memcpy(small_copy, table, sizeof(small_copy));
424 table = small_copy;
425 set_empty_to_minsize(so);
426 }
427 /* else it's a small table that's already empty */
428
429 /* Now we can finally clear things. If C had refcounts, we could
430 * assert that the refcount on table is 1 now, i.e. that this function
431 * has unique access to it, so decref side-effects can't alter it.
432 */
433 for (entry = table; used > 0; entry++) {
434 if (entry->key && entry->key != dummy) {
435 used--;
436 Py_DECREF(entry->key);
437 }
438 }
439
440 if (table_is_malloced)
441 PyMem_Free(table);
442 return 0;
443 }
444
445 /*
446 * Iterate over a set table. Use like so:
447 *
448 * Py_ssize_t pos;
449 * setentry *entry;
450 * pos = 0; # important! pos should not otherwise be changed by you
451 * while (set_next(yourset, &pos, &entry)) {
452 * Refer to borrowed reference in entry->key.
453 * }
454 *
455 * CAUTION: In general, it isn't safe to use set_next in a loop that
456 * mutates the table.
457 */
458 static int
459 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
460 {
461 Py_ssize_t i;
462 Py_ssize_t mask;
463 setentry *entry;
464
465 assert (PyAnySet_Check(so));
466 i = *pos_ptr;
467 assert(i >= 0);
468 mask = so->mask;
469 entry = &so->table[i];
470 while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
471 i++;
472 entry++;
473 }
474 *pos_ptr = i+1;
475 if (i > mask)
476 return 0;
477 assert(entry != NULL);
478 *entry_ptr = entry;
479 return 1;
480 }
481
482 static void
483 set_dealloc(PySetObject *so)
484 {
485 setentry *entry;
486 Py_ssize_t used = so->used;
487
488 /* bpo-31095: UnTrack is needed before calling any callbacks */
489 PyObject_GC_UnTrack(so);
490 Py_TRASHCAN_BEGIN(so, set_dealloc)
491 if (so->weakreflist != NULL)
492 PyObject_ClearWeakRefs((PyObject *) so);
493
494 for (entry = so->table; used > 0; entry++) {
495 if (entry->key && entry->key != dummy) {
496 used--;
497 Py_DECREF(entry->key);
498 }
499 }
500 if (so->table != so->smalltable)
501 PyMem_Free(so->table);
502 Py_TYPE(so)->tp_free(so);
503 Py_TRASHCAN_END
504 }
505
506 static PyObject *
507 set_repr(PySetObject *so)
508 {
509 PyObject *result=NULL, *keys, *listrepr, *tmp;
510 int status = Py_ReprEnter((PyObject*)so);
511
512 if (status != 0) {
513 if (status < 0)
514 return NULL;
515 return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
516 }
517
518 /* shortcut for the empty set */
519 if (!so->used) {
520 Py_ReprLeave((PyObject*)so);
521 return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
522 }
523
524 keys = PySequence_List((PyObject *)so);
525 if (keys == NULL)
526 goto done;
527
528 /* repr(keys)[1:-1] */
529 listrepr = PyObject_Repr(keys);
530 Py_DECREF(keys);
531 if (listrepr == NULL)
532 goto done;
533 tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
534 Py_DECREF(listrepr);
535 if (tmp == NULL)
536 goto done;
537 listrepr = tmp;
538
539 if (!PySet_CheckExact(so))
540 result = PyUnicode_FromFormat("%s({%U})",
541 Py_TYPE(so)->tp_name,
542 listrepr);
543 else
544 result = PyUnicode_FromFormat("{%U}", listrepr);
545 Py_DECREF(listrepr);
546 done:
547 Py_ReprLeave((PyObject*)so);
548 return result;
549 }
550
551 static Py_ssize_t
552 set_len(PyObject *so)
553 {
554 return ((PySetObject *)so)->used;
555 }
556
557 static int
558 set_merge(PySetObject *so, PyObject *otherset)
559 {
560 PySetObject *other;
561 PyObject *key;
562 Py_ssize_t i;
563 setentry *so_entry;
564 setentry *other_entry;
565
566 assert (PyAnySet_Check(so));
567 assert (PyAnySet_Check(otherset));
568
569 other = (PySetObject*)otherset;
570 if (other == so || other->used == 0)
571 /* a.update(a) or a.update(set()); nothing to do */
572 return 0;
573 /* Do one big resize at the start, rather than
574 * incrementally resizing as we insert new keys. Expect
575 * that there will be no (or few) overlapping keys.
576 */
577 if ((so->fill + other->used)*5 >= so->mask*3) {
578 if (set_table_resize(so, (so->used + other->used)*2) != 0)
579 return -1;
580 }
581 so_entry = so->table;
582 other_entry = other->table;
583
584 /* If our table is empty, and both tables have the same size, and
585 there are no dummies to eliminate, then just copy the pointers. */
586 if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
587 for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
588 key = other_entry->key;
589 if (key != NULL) {
590 assert(so_entry->key == NULL);
591 so_entry->key = Py_NewRef(key);
592 so_entry->hash = other_entry->hash;
593 }
594 }
595 so->fill = other->fill;
596 so->used = other->used;
597 return 0;
598 }
599
600 /* If our table is empty, we can use set_insert_clean() */
601 if (so->fill == 0) {
602 setentry *newtable = so->table;
603 size_t newmask = (size_t)so->mask;
604 so->fill = other->used;
605 so->used = other->used;
606 for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
607 key = other_entry->key;
608 if (key != NULL && key != dummy) {
609 set_insert_clean(newtable, newmask, Py_NewRef(key),
610 other_entry->hash);
611 }
612 }
613 return 0;
614 }
615
616 /* We can't assure there are no duplicates, so do normal insertions */
617 for (i = 0; i <= other->mask; i++) {
618 other_entry = &other->table[i];
619 key = other_entry->key;
620 if (key != NULL && key != dummy) {
621 if (set_add_entry(so, key, other_entry->hash))
622 return -1;
623 }
624 }
625 return 0;
626 }
627
628 static PyObject *
629 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
630 {
631 /* Make sure the search finger is in bounds */
632 setentry *entry = so->table + (so->finger & so->mask);
633 setentry *limit = so->table + so->mask;
634 PyObject *key;
635
636 if (so->used == 0) {
637 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
638 return NULL;
639 }
640 while (entry->key == NULL || entry->key==dummy) {
641 entry++;
642 if (entry > limit)
643 entry = so->table;
644 }
645 key = entry->key;
646 entry->key = dummy;
647 entry->hash = -1;
648 so->used--;
649 so->finger = entry - so->table + 1; /* next place to start */
650 return key;
651 }
652
653 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
654 Raises KeyError if the set is empty.");
655
656 static int
657 set_traverse(PySetObject *so, visitproc visit, void *arg)
658 {
659 Py_ssize_t pos = 0;
660 setentry *entry;
661
662 while (set_next(so, &pos, &entry))
663 Py_VISIT(entry->key);
664 return 0;
665 }
666
667 /* Work to increase the bit dispersion for closely spaced hash values.
668 This is important because some use cases have many combinations of a
669 small number of elements with nearby hashes so that many distinct
670 combinations collapse to only a handful of distinct hash values. */
671
672 static Py_uhash_t
673 _shuffle_bits(Py_uhash_t h)
674 {
675 return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
676 }
677
678 /* Most of the constants in this hash algorithm are randomly chosen
679 large primes with "interesting bit patterns" and that passed tests
680 for good collision statistics on a variety of problematic datasets
681 including powersets and graph structures (such as David Eppstein's
682 graph recipes in Lib/test/test_set.py) */
683
684 static Py_hash_t
685 frozenset_hash(PyObject *self)
686 {
687 PySetObject *so = (PySetObject *)self;
688 Py_uhash_t hash = 0;
689 setentry *entry;
690
691 if (so->hash != -1)
692 return so->hash;
693
694 /* Xor-in shuffled bits from every entry's hash field because xor is
695 commutative and a frozenset hash should be independent of order.
696
697 For speed, include null entries and dummy entries and then
698 subtract out their effect afterwards so that the final hash
699 depends only on active entries. This allows the code to be
700 vectorized by the compiler and it saves the unpredictable
701 branches that would arise when trying to exclude null and dummy
702 entries on every iteration. */
703
704 for (entry = so->table; entry <= &so->table[so->mask]; entry++)
705 hash ^= _shuffle_bits(entry->hash);
706
707 /* Remove the effect of an odd number of NULL entries */
708 if ((so->mask + 1 - so->fill) & 1)
709 hash ^= _shuffle_bits(0);
710
711 /* Remove the effect of an odd number of dummy entries */
712 if ((so->fill - so->used) & 1)
713 hash ^= _shuffle_bits(-1);
714
715 /* Factor in the number of active entries */
716 hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
717
718 /* Disperse patterns arising in nested frozensets */
719 hash ^= (hash >> 11) ^ (hash >> 25);
720 hash = hash * 69069U + 907133923UL;
721
722 /* -1 is reserved as an error code */
723 if (hash == (Py_uhash_t)-1)
724 hash = 590923713UL;
725
726 so->hash = hash;
727 return hash;
728 }
729
730 /***** Set iterator type ***********************************************/
731
732 typedef struct {
733 PyObject_HEAD
734 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
735 Py_ssize_t si_used;
736 Py_ssize_t si_pos;
737 Py_ssize_t len;
738 } setiterobject;
739
740 static void
741 setiter_dealloc(setiterobject *si)
742 {
743 /* bpo-31095: UnTrack is needed before calling any callbacks */
744 _PyObject_GC_UNTRACK(si);
745 Py_XDECREF(si->si_set);
746 PyObject_GC_Del(si);
747 }
748
749 static int
750 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
751 {
752 Py_VISIT(si->si_set);
753 return 0;
754 }
755
756 static PyObject *
757 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
758 {
759 Py_ssize_t len = 0;
760 if (si->si_set != NULL && si->si_used == si->si_set->used)
761 len = si->len;
762 return PyLong_FromSsize_t(len);
763 }
764
765 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
766
767 static PyObject *setiter_iternext(setiterobject *si);
768
769 static PyObject *
770 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
771 {
772 /* copy the iterator state */
773 setiterobject tmp = *si;
774 Py_XINCREF(tmp.si_set);
775
776 /* iterate the temporary into a list */
777 PyObject *list = PySequence_List((PyObject*)&tmp);
778 Py_XDECREF(tmp.si_set);
779 if (list == NULL) {
780 return NULL;
781 }
782 return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), list);
783 }
784
785 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
786
787 static PyMethodDef setiter_methods[] = {
788 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
789 {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
790 {NULL, NULL} /* sentinel */
791 };
792
793 static PyObject *setiter_iternext(setiterobject *si)
794 {
795 PyObject *key;
796 Py_ssize_t i, mask;
797 setentry *entry;
798 PySetObject *so = si->si_set;
799
800 if (so == NULL)
801 return NULL;
802 assert (PyAnySet_Check(so));
803
804 if (si->si_used != so->used) {
805 PyErr_SetString(PyExc_RuntimeError,
806 "Set changed size during iteration");
807 si->si_used = -1; /* Make this state sticky */
808 return NULL;
809 }
810
811 i = si->si_pos;
812 assert(i>=0);
813 entry = so->table;
814 mask = so->mask;
815 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
816 i++;
817 si->si_pos = i+1;
818 if (i > mask)
819 goto fail;
820 si->len--;
821 key = entry[i].key;
822 return Py_NewRef(key);
823
824 fail:
825 si->si_set = NULL;
826 Py_DECREF(so);
827 return NULL;
828 }
829
830 PyTypeObject PySetIter_Type = {
831 PyVarObject_HEAD_INIT(&PyType_Type, 0)
832 "set_iterator", /* tp_name */
833 sizeof(setiterobject), /* tp_basicsize */
834 0, /* tp_itemsize */
835 /* methods */
836 (destructor)setiter_dealloc, /* tp_dealloc */
837 0, /* tp_vectorcall_offset */
838 0, /* tp_getattr */
839 0, /* tp_setattr */
840 0, /* tp_as_async */
841 0, /* tp_repr */
842 0, /* tp_as_number */
843 0, /* tp_as_sequence */
844 0, /* tp_as_mapping */
845 0, /* tp_hash */
846 0, /* tp_call */
847 0, /* tp_str */
848 PyObject_GenericGetAttr, /* tp_getattro */
849 0, /* tp_setattro */
850 0, /* tp_as_buffer */
851 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
852 0, /* tp_doc */
853 (traverseproc)setiter_traverse, /* tp_traverse */
854 0, /* tp_clear */
855 0, /* tp_richcompare */
856 0, /* tp_weaklistoffset */
857 PyObject_SelfIter, /* tp_iter */
858 (iternextfunc)setiter_iternext, /* tp_iternext */
859 setiter_methods, /* tp_methods */
860 0,
861 };
862
863 static PyObject *
864 set_iter(PySetObject *so)
865 {
866 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
867 if (si == NULL)
868 return NULL;
869 si->si_set = (PySetObject*)Py_NewRef(so);
870 si->si_used = so->used;
871 si->si_pos = 0;
872 si->len = so->used;
873 _PyObject_GC_TRACK(si);
874 return (PyObject *)si;
875 }
876
877 static int
878 set_update_internal(PySetObject *so, PyObject *other)
879 {
880 PyObject *key, *it;
881
882 if (PyAnySet_Check(other))
883 return set_merge(so, other);
884
885 if (PyDict_CheckExact(other)) {
886 PyObject *value;
887 Py_ssize_t pos = 0;
888 Py_hash_t hash;
889 Py_ssize_t dictsize = PyDict_GET_SIZE(other);
890
891 /* Do one big resize at the start, rather than
892 * incrementally resizing as we insert new keys. Expect
893 * that there will be no (or few) overlapping keys.
894 */
895 if (dictsize < 0)
896 return -1;
897 if ((so->fill + dictsize)*5 >= so->mask*3) {
898 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
899 return -1;
900 }
901 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
902 if (set_add_entry(so, key, hash))
903 return -1;
904 }
905 return 0;
906 }
907
908 it = PyObject_GetIter(other);
909 if (it == NULL)
910 return -1;
911
912 while ((key = PyIter_Next(it)) != NULL) {
913 if (set_add_key(so, key)) {
914 Py_DECREF(it);
915 Py_DECREF(key);
916 return -1;
917 }
918 Py_DECREF(key);
919 }
920 Py_DECREF(it);
921 if (PyErr_Occurred())
922 return -1;
923 return 0;
924 }
925
926 static PyObject *
927 set_update(PySetObject *so, PyObject *args)
928 {
929 Py_ssize_t i;
930
931 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
932 PyObject *other = PyTuple_GET_ITEM(args, i);
933 if (set_update_internal(so, other))
934 return NULL;
935 }
936 Py_RETURN_NONE;
937 }
938
939 PyDoc_STRVAR(update_doc,
940 "Update a set with the union of itself and others.");
941
942 /* XXX Todo:
943 If aligned memory allocations become available, make the
944 set object 64 byte aligned so that most of the fields
945 can be retrieved or updated in a single cache line.
946 */
947
948 static PyObject *
949 make_new_set(PyTypeObject *type, PyObject *iterable)
950 {
951 assert(PyType_Check(type));
952 PySetObject *so;
953
954 so = (PySetObject *)type->tp_alloc(type, 0);
955 if (so == NULL)
956 return NULL;
957
958 so->fill = 0;
959 so->used = 0;
960 so->mask = PySet_MINSIZE - 1;
961 so->table = so->smalltable;
962 so->hash = -1;
963 so->finger = 0;
964 so->weakreflist = NULL;
965
966 if (iterable != NULL) {
967 if (set_update_internal(so, iterable)) {
968 Py_DECREF(so);
969 return NULL;
970 }
971 }
972
973 return (PyObject *)so;
974 }
975
976 static PyObject *
977 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
978 {
979 if (type != &PySet_Type && type != &PyFrozenSet_Type) {
980 if (PyType_IsSubtype(type, &PySet_Type))
981 type = &PySet_Type;
982 else
983 type = &PyFrozenSet_Type;
984 }
985 return make_new_set(type, iterable);
986 }
987
988 static PyObject *
989 make_new_frozenset(PyTypeObject *type, PyObject *iterable)
990 {
991 if (type != &PyFrozenSet_Type) {
992 return make_new_set(type, iterable);
993 }
994
995 if (iterable != NULL && PyFrozenSet_CheckExact(iterable)) {
996 /* frozenset(f) is idempotent */
997 return Py_NewRef(iterable);
998 }
999 return make_new_set(type, iterable);
1000 }
1001
1002 static PyObject *
1003 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1004 {
1005 PyObject *iterable = NULL;
1006
1007 if ((type == &PyFrozenSet_Type ||
1008 type->tp_init == PyFrozenSet_Type.tp_init) &&
1009 !_PyArg_NoKeywords("frozenset", kwds)) {
1010 return NULL;
1011 }
1012
1013 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1014 return NULL;
1015 }
1016
1017 return make_new_frozenset(type, iterable);
1018 }
1019
1020 static PyObject *
1021 frozenset_vectorcall(PyObject *type, PyObject * const*args,
1022 size_t nargsf, PyObject *kwnames)
1023 {
1024 if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1025 return NULL;
1026 }
1027
1028 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1029 if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1030 return NULL;
1031 }
1032
1033 PyObject *iterable = (nargs ? args[0] : NULL);
1034 return make_new_frozenset(_PyType_CAST(type), iterable);
1035 }
1036
1037 static PyObject *
1038 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1039 {
1040 return make_new_set(type, NULL);
1041 }
1042
1043 /* set_swap_bodies() switches the contents of any two sets by moving their
1044 internal data pointers and, if needed, copying the internal smalltables.
1045 Semantically equivalent to:
1046
1047 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1048
1049 The function always succeeds and it leaves both objects in a stable state.
1050 Useful for operations that update in-place (by allowing an intermediate
1051 result to be swapped into one of the original inputs).
1052 */
1053
1054 static void
1055 set_swap_bodies(PySetObject *a, PySetObject *b)
1056 {
1057 Py_ssize_t t;
1058 setentry *u;
1059 setentry tab[PySet_MINSIZE];
1060 Py_hash_t h;
1061
1062 t = a->fill; a->fill = b->fill; b->fill = t;
1063 t = a->used; a->used = b->used; b->used = t;
1064 t = a->mask; a->mask = b->mask; b->mask = t;
1065
1066 u = a->table;
1067 if (a->table == a->smalltable)
1068 u = b->smalltable;
1069 a->table = b->table;
1070 if (b->table == b->smalltable)
1071 a->table = a->smalltable;
1072 b->table = u;
1073
1074 if (a->table == a->smalltable || b->table == b->smalltable) {
1075 memcpy(tab, a->smalltable, sizeof(tab));
1076 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1077 memcpy(b->smalltable, tab, sizeof(tab));
1078 }
1079
1080 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1081 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1082 h = a->hash; a->hash = b->hash; b->hash = h;
1083 } else {
1084 a->hash = -1;
1085 b->hash = -1;
1086 }
1087 }
1088
1089 static PyObject *
1090 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1091 {
1092 return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1093 }
1094
1095 static PyObject *
1096 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1097 {
1098 if (PyFrozenSet_CheckExact(so)) {
1099 return Py_NewRef(so);
1100 }
1101 return set_copy(so, NULL);
1102 }
1103
1104 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1105
1106 static PyObject *
1107 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1108 {
1109 set_clear_internal(so);
1110 Py_RETURN_NONE;
1111 }
1112
1113 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1114
1115 static PyObject *
1116 set_union(PySetObject *so, PyObject *args)
1117 {
1118 PySetObject *result;
1119 PyObject *other;
1120 Py_ssize_t i;
1121
1122 result = (PySetObject *)set_copy(so, NULL);
1123 if (result == NULL)
1124 return NULL;
1125
1126 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1127 other = PyTuple_GET_ITEM(args, i);
1128 if ((PyObject *)so == other)
1129 continue;
1130 if (set_update_internal(result, other)) {
1131 Py_DECREF(result);
1132 return NULL;
1133 }
1134 }
1135 return (PyObject *)result;
1136 }
1137
1138 PyDoc_STRVAR(union_doc,
1139 "Return the union of sets as a new set.\n\
1140 \n\
1141 (i.e. all elements that are in either set.)");
1142
1143 static PyObject *
1144 set_or(PySetObject *so, PyObject *other)
1145 {
1146 PySetObject *result;
1147
1148 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1149 Py_RETURN_NOTIMPLEMENTED;
1150
1151 result = (PySetObject *)set_copy(so, NULL);
1152 if (result == NULL)
1153 return NULL;
1154 if ((PyObject *)so == other)
1155 return (PyObject *)result;
1156 if (set_update_internal(result, other)) {
1157 Py_DECREF(result);
1158 return NULL;
1159 }
1160 return (PyObject *)result;
1161 }
1162
1163 static PyObject *
1164 set_ior(PySetObject *so, PyObject *other)
1165 {
1166 if (!PyAnySet_Check(other))
1167 Py_RETURN_NOTIMPLEMENTED;
1168
1169 if (set_update_internal(so, other))
1170 return NULL;
1171 return Py_NewRef(so);
1172 }
1173
1174 static PyObject *
1175 set_intersection(PySetObject *so, PyObject *other)
1176 {
1177 PySetObject *result;
1178 PyObject *key, *it, *tmp;
1179 Py_hash_t hash;
1180 int rv;
1181
1182 if ((PyObject *)so == other)
1183 return set_copy(so, NULL);
1184
1185 result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1186 if (result == NULL)
1187 return NULL;
1188
1189 if (PyAnySet_Check(other)) {
1190 Py_ssize_t pos = 0;
1191 setentry *entry;
1192
1193 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1194 tmp = (PyObject *)so;
1195 so = (PySetObject *)other;
1196 other = tmp;
1197 }
1198
1199 while (set_next((PySetObject *)other, &pos, &entry)) {
1200 key = entry->key;
1201 hash = entry->hash;
1202 Py_INCREF(key);
1203 rv = set_contains_entry(so, key, hash);
1204 if (rv < 0) {
1205 Py_DECREF(result);
1206 Py_DECREF(key);
1207 return NULL;
1208 }
1209 if (rv) {
1210 if (set_add_entry(result, key, hash)) {
1211 Py_DECREF(result);
1212 Py_DECREF(key);
1213 return NULL;
1214 }
1215 }
1216 Py_DECREF(key);
1217 }
1218 return (PyObject *)result;
1219 }
1220
1221 it = PyObject_GetIter(other);
1222 if (it == NULL) {
1223 Py_DECREF(result);
1224 return NULL;
1225 }
1226
1227 while ((key = PyIter_Next(it)) != NULL) {
1228 hash = PyObject_Hash(key);
1229 if (hash == -1)
1230 goto error;
1231 rv = set_contains_entry(so, key, hash);
1232 if (rv < 0)
1233 goto error;
1234 if (rv) {
1235 if (set_add_entry(result, key, hash))
1236 goto error;
1237 if (PySet_GET_SIZE(result) >= PySet_GET_SIZE(so)) {
1238 Py_DECREF(key);
1239 break;
1240 }
1241 }
1242 Py_DECREF(key);
1243 }
1244 Py_DECREF(it);
1245 if (PyErr_Occurred()) {
1246 Py_DECREF(result);
1247 return NULL;
1248 }
1249 return (PyObject *)result;
1250 error:
1251 Py_DECREF(it);
1252 Py_DECREF(result);
1253 Py_DECREF(key);
1254 return NULL;
1255 }
1256
1257 static PyObject *
1258 set_intersection_multi(PySetObject *so, PyObject *args)
1259 {
1260 Py_ssize_t i;
1261
1262 if (PyTuple_GET_SIZE(args) == 0)
1263 return set_copy(so, NULL);
1264
1265 PyObject *result = Py_NewRef(so);
1266 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1267 PyObject *other = PyTuple_GET_ITEM(args, i);
1268 PyObject *newresult = set_intersection((PySetObject *)result, other);
1269 if (newresult == NULL) {
1270 Py_DECREF(result);
1271 return NULL;
1272 }
1273 Py_SETREF(result, newresult);
1274 }
1275 return result;
1276 }
1277
1278 PyDoc_STRVAR(intersection_doc,
1279 "Return the intersection of two sets as a new set.\n\
1280 \n\
1281 (i.e. all elements that are in both sets.)");
1282
1283 static PyObject *
1284 set_intersection_update(PySetObject *so, PyObject *other)
1285 {
1286 PyObject *tmp;
1287
1288 tmp = set_intersection(so, other);
1289 if (tmp == NULL)
1290 return NULL;
1291 set_swap_bodies(so, (PySetObject *)tmp);
1292 Py_DECREF(tmp);
1293 Py_RETURN_NONE;
1294 }
1295
1296 static PyObject *
1297 set_intersection_update_multi(PySetObject *so, PyObject *args)
1298 {
1299 PyObject *tmp;
1300
1301 tmp = set_intersection_multi(so, args);
1302 if (tmp == NULL)
1303 return NULL;
1304 set_swap_bodies(so, (PySetObject *)tmp);
1305 Py_DECREF(tmp);
1306 Py_RETURN_NONE;
1307 }
1308
1309 PyDoc_STRVAR(intersection_update_doc,
1310 "Update a set with the intersection of itself and another.");
1311
1312 static PyObject *
1313 set_and(PySetObject *so, PyObject *other)
1314 {
1315 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1316 Py_RETURN_NOTIMPLEMENTED;
1317 return set_intersection(so, other);
1318 }
1319
1320 static PyObject *
1321 set_iand(PySetObject *so, PyObject *other)
1322 {
1323 PyObject *result;
1324
1325 if (!PyAnySet_Check(other))
1326 Py_RETURN_NOTIMPLEMENTED;
1327 result = set_intersection_update(so, other);
1328 if (result == NULL)
1329 return NULL;
1330 Py_DECREF(result);
1331 return Py_NewRef(so);
1332 }
1333
1334 static PyObject *
1335 set_isdisjoint(PySetObject *so, PyObject *other)
1336 {
1337 PyObject *key, *it, *tmp;
1338 int rv;
1339
1340 if ((PyObject *)so == other) {
1341 if (PySet_GET_SIZE(so) == 0)
1342 Py_RETURN_TRUE;
1343 else
1344 Py_RETURN_FALSE;
1345 }
1346
1347 if (PyAnySet_CheckExact(other)) {
1348 Py_ssize_t pos = 0;
1349 setentry *entry;
1350
1351 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1352 tmp = (PyObject *)so;
1353 so = (PySetObject *)other;
1354 other = tmp;
1355 }
1356 while (set_next((PySetObject *)other, &pos, &entry)) {
1357 PyObject *key = entry->key;
1358 Py_INCREF(key);
1359 rv = set_contains_entry(so, key, entry->hash);
1360 Py_DECREF(key);
1361 if (rv < 0) {
1362 return NULL;
1363 }
1364 if (rv) {
1365 Py_RETURN_FALSE;
1366 }
1367 }
1368 Py_RETURN_TRUE;
1369 }
1370
1371 it = PyObject_GetIter(other);
1372 if (it == NULL)
1373 return NULL;
1374
1375 while ((key = PyIter_Next(it)) != NULL) {
1376 rv = set_contains_key(so, key);
1377 Py_DECREF(key);
1378 if (rv < 0) {
1379 Py_DECREF(it);
1380 return NULL;
1381 }
1382 if (rv) {
1383 Py_DECREF(it);
1384 Py_RETURN_FALSE;
1385 }
1386 }
1387 Py_DECREF(it);
1388 if (PyErr_Occurred())
1389 return NULL;
1390 Py_RETURN_TRUE;
1391 }
1392
1393 PyDoc_STRVAR(isdisjoint_doc,
1394 "Return True if two sets have a null intersection.");
1395
1396 static int
1397 set_difference_update_internal(PySetObject *so, PyObject *other)
1398 {
1399 if ((PyObject *)so == other)
1400 return set_clear_internal(so);
1401
1402 if (PyAnySet_Check(other)) {
1403 setentry *entry;
1404 Py_ssize_t pos = 0;
1405
1406 /* Optimization: When the other set is more than 8 times
1407 larger than the base set, replace the other set with
1408 intersection of the two sets.
1409 */
1410 if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1411 other = set_intersection(so, other);
1412 if (other == NULL)
1413 return -1;
1414 } else {
1415 Py_INCREF(other);
1416 }
1417
1418 while (set_next((PySetObject *)other, &pos, &entry)) {
1419 PyObject *key = entry->key;
1420 Py_INCREF(key);
1421 if (set_discard_entry(so, key, entry->hash) < 0) {
1422 Py_DECREF(other);
1423 Py_DECREF(key);
1424 return -1;
1425 }
1426 Py_DECREF(key);
1427 }
1428
1429 Py_DECREF(other);
1430 } else {
1431 PyObject *key, *it;
1432 it = PyObject_GetIter(other);
1433 if (it == NULL)
1434 return -1;
1435
1436 while ((key = PyIter_Next(it)) != NULL) {
1437 if (set_discard_key(so, key) < 0) {
1438 Py_DECREF(it);
1439 Py_DECREF(key);
1440 return -1;
1441 }
1442 Py_DECREF(key);
1443 }
1444 Py_DECREF(it);
1445 if (PyErr_Occurred())
1446 return -1;
1447 }
1448 /* If more than 1/4th are dummies, then resize them away. */
1449 if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1450 return 0;
1451 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1452 }
1453
1454 static PyObject *
1455 set_difference_update(PySetObject *so, PyObject *args)
1456 {
1457 Py_ssize_t i;
1458
1459 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1460 PyObject *other = PyTuple_GET_ITEM(args, i);
1461 if (set_difference_update_internal(so, other))
1462 return NULL;
1463 }
1464 Py_RETURN_NONE;
1465 }
1466
1467 PyDoc_STRVAR(difference_update_doc,
1468 "Remove all elements of another set from this set.");
1469
1470 static PyObject *
1471 set_copy_and_difference(PySetObject *so, PyObject *other)
1472 {
1473 PyObject *result;
1474
1475 result = set_copy(so, NULL);
1476 if (result == NULL)
1477 return NULL;
1478 if (set_difference_update_internal((PySetObject *) result, other) == 0)
1479 return result;
1480 Py_DECREF(result);
1481 return NULL;
1482 }
1483
1484 static PyObject *
1485 set_difference(PySetObject *so, PyObject *other)
1486 {
1487 PyObject *result;
1488 PyObject *key;
1489 Py_hash_t hash;
1490 setentry *entry;
1491 Py_ssize_t pos = 0, other_size;
1492 int rv;
1493
1494 if (PyAnySet_Check(other)) {
1495 other_size = PySet_GET_SIZE(other);
1496 }
1497 else if (PyDict_CheckExact(other)) {
1498 other_size = PyDict_GET_SIZE(other);
1499 }
1500 else {
1501 return set_copy_and_difference(so, other);
1502 }
1503
1504 /* If len(so) much more than len(other), it's more efficient to simply copy
1505 * so and then iterate other looking for common elements. */
1506 if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1507 return set_copy_and_difference(so, other);
1508 }
1509
1510 result = make_new_set_basetype(Py_TYPE(so), NULL);
1511 if (result == NULL)
1512 return NULL;
1513
1514 if (PyDict_CheckExact(other)) {
1515 while (set_next(so, &pos, &entry)) {
1516 key = entry->key;
1517 hash = entry->hash;
1518 Py_INCREF(key);
1519 rv = _PyDict_Contains_KnownHash(other, key, hash);
1520 if (rv < 0) {
1521 Py_DECREF(result);
1522 Py_DECREF(key);
1523 return NULL;
1524 }
1525 if (!rv) {
1526 if (set_add_entry((PySetObject *)result, key, hash)) {
1527 Py_DECREF(result);
1528 Py_DECREF(key);
1529 return NULL;
1530 }
1531 }
1532 Py_DECREF(key);
1533 }
1534 return result;
1535 }
1536
1537 /* Iterate over so, checking for common elements in other. */
1538 while (set_next(so, &pos, &entry)) {
1539 key = entry->key;
1540 hash = entry->hash;
1541 Py_INCREF(key);
1542 rv = set_contains_entry((PySetObject *)other, key, hash);
1543 if (rv < 0) {
1544 Py_DECREF(result);
1545 Py_DECREF(key);
1546 return NULL;
1547 }
1548 if (!rv) {
1549 if (set_add_entry((PySetObject *)result, key, hash)) {
1550 Py_DECREF(result);
1551 Py_DECREF(key);
1552 return NULL;
1553 }
1554 }
1555 Py_DECREF(key);
1556 }
1557 return result;
1558 }
1559
1560 static PyObject *
1561 set_difference_multi(PySetObject *so, PyObject *args)
1562 {
1563 Py_ssize_t i;
1564 PyObject *result, *other;
1565
1566 if (PyTuple_GET_SIZE(args) == 0)
1567 return set_copy(so, NULL);
1568
1569 other = PyTuple_GET_ITEM(args, 0);
1570 result = set_difference(so, other);
1571 if (result == NULL)
1572 return NULL;
1573
1574 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1575 other = PyTuple_GET_ITEM(args, i);
1576 if (set_difference_update_internal((PySetObject *)result, other)) {
1577 Py_DECREF(result);
1578 return NULL;
1579 }
1580 }
1581 return result;
1582 }
1583
1584 PyDoc_STRVAR(difference_doc,
1585 "Return the difference of two or more sets as a new set.\n\
1586 \n\
1587 (i.e. all elements that are in this set but not the others.)");
1588 static PyObject *
1589 set_sub(PySetObject *so, PyObject *other)
1590 {
1591 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1592 Py_RETURN_NOTIMPLEMENTED;
1593 return set_difference(so, other);
1594 }
1595
1596 static PyObject *
1597 set_isub(PySetObject *so, PyObject *other)
1598 {
1599 if (!PyAnySet_Check(other))
1600 Py_RETURN_NOTIMPLEMENTED;
1601 if (set_difference_update_internal(so, other))
1602 return NULL;
1603 return Py_NewRef(so);
1604 }
1605
1606 static PyObject *
1607 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1608 {
1609 PySetObject *otherset;
1610 PyObject *key;
1611 Py_ssize_t pos = 0;
1612 Py_hash_t hash;
1613 setentry *entry;
1614 int rv;
1615
1616 if ((PyObject *)so == other)
1617 return set_clear(so, NULL);
1618
1619 if (PyDict_CheckExact(other)) {
1620 PyObject *value;
1621 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1622 Py_INCREF(key);
1623 rv = set_discard_entry(so, key, hash);
1624 if (rv < 0) {
1625 Py_DECREF(key);
1626 return NULL;
1627 }
1628 if (rv == DISCARD_NOTFOUND) {
1629 if (set_add_entry(so, key, hash)) {
1630 Py_DECREF(key);
1631 return NULL;
1632 }
1633 }
1634 Py_DECREF(key);
1635 }
1636 Py_RETURN_NONE;
1637 }
1638
1639 if (PyAnySet_Check(other)) {
1640 otherset = (PySetObject *)Py_NewRef(other);
1641 } else {
1642 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1643 if (otherset == NULL)
1644 return NULL;
1645 }
1646
1647 while (set_next(otherset, &pos, &entry)) {
1648 key = entry->key;
1649 hash = entry->hash;
1650 Py_INCREF(key);
1651 rv = set_discard_entry(so, key, hash);
1652 if (rv < 0) {
1653 Py_DECREF(otherset);
1654 Py_DECREF(key);
1655 return NULL;
1656 }
1657 if (rv == DISCARD_NOTFOUND) {
1658 if (set_add_entry(so, key, hash)) {
1659 Py_DECREF(otherset);
1660 Py_DECREF(key);
1661 return NULL;
1662 }
1663 }
1664 Py_DECREF(key);
1665 }
1666 Py_DECREF(otherset);
1667 Py_RETURN_NONE;
1668 }
1669
1670 PyDoc_STRVAR(symmetric_difference_update_doc,
1671 "Update a set with the symmetric difference of itself and another.");
1672
1673 static PyObject *
1674 set_symmetric_difference(PySetObject *so, PyObject *other)
1675 {
1676 PyObject *rv;
1677 PySetObject *otherset;
1678
1679 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1680 if (otherset == NULL)
1681 return NULL;
1682 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1683 if (rv == NULL) {
1684 Py_DECREF(otherset);
1685 return NULL;
1686 }
1687 Py_DECREF(rv);
1688 return (PyObject *)otherset;
1689 }
1690
1691 PyDoc_STRVAR(symmetric_difference_doc,
1692 "Return the symmetric difference of two sets as a new set.\n\
1693 \n\
1694 (i.e. all elements that are in exactly one of the sets.)");
1695
1696 static PyObject *
1697 set_xor(PySetObject *so, PyObject *other)
1698 {
1699 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1700 Py_RETURN_NOTIMPLEMENTED;
1701 return set_symmetric_difference(so, other);
1702 }
1703
1704 static PyObject *
1705 set_ixor(PySetObject *so, PyObject *other)
1706 {
1707 PyObject *result;
1708
1709 if (!PyAnySet_Check(other))
1710 Py_RETURN_NOTIMPLEMENTED;
1711 result = set_symmetric_difference_update(so, other);
1712 if (result == NULL)
1713 return NULL;
1714 Py_DECREF(result);
1715 return Py_NewRef(so);
1716 }
1717
1718 static PyObject *
1719 set_issubset(PySetObject *so, PyObject *other)
1720 {
1721 setentry *entry;
1722 Py_ssize_t pos = 0;
1723 int rv;
1724
1725 if (!PyAnySet_Check(other)) {
1726 PyObject *tmp = set_intersection(so, other);
1727 if (tmp == NULL) {
1728 return NULL;
1729 }
1730 int result = (PySet_GET_SIZE(tmp) == PySet_GET_SIZE(so));
1731 Py_DECREF(tmp);
1732 return PyBool_FromLong(result);
1733 }
1734 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1735 Py_RETURN_FALSE;
1736
1737 while (set_next(so, &pos, &entry)) {
1738 PyObject *key = entry->key;
1739 Py_INCREF(key);
1740 rv = set_contains_entry((PySetObject *)other, key, entry->hash);
1741 Py_DECREF(key);
1742 if (rv < 0) {
1743 return NULL;
1744 }
1745 if (!rv) {
1746 Py_RETURN_FALSE;
1747 }
1748 }
1749 Py_RETURN_TRUE;
1750 }
1751
1752 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1753
1754 static PyObject *
1755 set_issuperset(PySetObject *so, PyObject *other)
1756 {
1757 if (PyAnySet_Check(other)) {
1758 return set_issubset((PySetObject *)other, (PyObject *)so);
1759 }
1760
1761 PyObject *key, *it = PyObject_GetIter(other);
1762 if (it == NULL) {
1763 return NULL;
1764 }
1765 while ((key = PyIter_Next(it)) != NULL) {
1766 int rv = set_contains_key(so, key);
1767 Py_DECREF(key);
1768 if (rv < 0) {
1769 Py_DECREF(it);
1770 return NULL;
1771 }
1772 if (!rv) {
1773 Py_DECREF(it);
1774 Py_RETURN_FALSE;
1775 }
1776 }
1777 Py_DECREF(it);
1778 if (PyErr_Occurred()) {
1779 return NULL;
1780 }
1781 Py_RETURN_TRUE;
1782 }
1783
1784 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1785
1786 static PyObject *
1787 set_richcompare(PySetObject *v, PyObject *w, int op)
1788 {
1789 PyObject *r1;
1790 int r2;
1791
1792 if(!PyAnySet_Check(w))
1793 Py_RETURN_NOTIMPLEMENTED;
1794
1795 switch (op) {
1796 case Py_EQ:
1797 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1798 Py_RETURN_FALSE;
1799 if (v->hash != -1 &&
1800 ((PySetObject *)w)->hash != -1 &&
1801 v->hash != ((PySetObject *)w)->hash)
1802 Py_RETURN_FALSE;
1803 return set_issubset(v, w);
1804 case Py_NE:
1805 r1 = set_richcompare(v, w, Py_EQ);
1806 if (r1 == NULL)
1807 return NULL;
1808 r2 = PyObject_IsTrue(r1);
1809 Py_DECREF(r1);
1810 if (r2 < 0)
1811 return NULL;
1812 return PyBool_FromLong(!r2);
1813 case Py_LE:
1814 return set_issubset(v, w);
1815 case Py_GE:
1816 return set_issuperset(v, w);
1817 case Py_LT:
1818 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1819 Py_RETURN_FALSE;
1820 return set_issubset(v, w);
1821 case Py_GT:
1822 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1823 Py_RETURN_FALSE;
1824 return set_issuperset(v, w);
1825 }
1826 Py_RETURN_NOTIMPLEMENTED;
1827 }
1828
1829 static PyObject *
1830 set_add(PySetObject *so, PyObject *key)
1831 {
1832 if (set_add_key(so, key))
1833 return NULL;
1834 Py_RETURN_NONE;
1835 }
1836
1837 PyDoc_STRVAR(add_doc,
1838 "Add an element to a set.\n\
1839 \n\
1840 This has no effect if the element is already present.");
1841
1842 static int
1843 set_contains(PySetObject *so, PyObject *key)
1844 {
1845 PyObject *tmpkey;
1846 int rv;
1847
1848 rv = set_contains_key(so, key);
1849 if (rv < 0) {
1850 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1851 return -1;
1852 PyErr_Clear();
1853 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1854 if (tmpkey == NULL)
1855 return -1;
1856 rv = set_contains_key(so, tmpkey);
1857 Py_DECREF(tmpkey);
1858 }
1859 return rv;
1860 }
1861
1862 static PyObject *
1863 set_direct_contains(PySetObject *so, PyObject *key)
1864 {
1865 long result;
1866
1867 result = set_contains(so, key);
1868 if (result < 0)
1869 return NULL;
1870 return PyBool_FromLong(result);
1871 }
1872
1873 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1874
1875 static PyObject *
1876 set_remove(PySetObject *so, PyObject *key)
1877 {
1878 PyObject *tmpkey;
1879 int rv;
1880
1881 rv = set_discard_key(so, key);
1882 if (rv < 0) {
1883 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1884 return NULL;
1885 PyErr_Clear();
1886 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1887 if (tmpkey == NULL)
1888 return NULL;
1889 rv = set_discard_key(so, tmpkey);
1890 Py_DECREF(tmpkey);
1891 if (rv < 0)
1892 return NULL;
1893 }
1894
1895 if (rv == DISCARD_NOTFOUND) {
1896 _PyErr_SetKeyError(key);
1897 return NULL;
1898 }
1899 Py_RETURN_NONE;
1900 }
1901
1902 PyDoc_STRVAR(remove_doc,
1903 "Remove an element from a set; it must be a member.\n\
1904 \n\
1905 If the element is not a member, raise a KeyError.");
1906
1907 static PyObject *
1908 set_discard(PySetObject *so, PyObject *key)
1909 {
1910 PyObject *tmpkey;
1911 int rv;
1912
1913 rv = set_discard_key(so, key);
1914 if (rv < 0) {
1915 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1916 return NULL;
1917 PyErr_Clear();
1918 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1919 if (tmpkey == NULL)
1920 return NULL;
1921 rv = set_discard_key(so, tmpkey);
1922 Py_DECREF(tmpkey);
1923 if (rv < 0)
1924 return NULL;
1925 }
1926 Py_RETURN_NONE;
1927 }
1928
1929 PyDoc_STRVAR(discard_doc,
1930 "Remove an element from a set if it is a member.\n\
1931 \n\
1932 Unlike set.remove(), the discard() method does not raise\n\
1933 an exception when an element is missing from the set.");
1934
1935 static PyObject *
1936 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1937 {
1938 PyObject *keys=NULL, *args=NULL, *result=NULL, *state=NULL;
1939
1940 keys = PySequence_List((PyObject *)so);
1941 if (keys == NULL)
1942 goto done;
1943 args = PyTuple_Pack(1, keys);
1944 if (args == NULL)
1945 goto done;
1946 state = _PyObject_GetState((PyObject *)so);
1947 if (state == NULL)
1948 goto done;
1949 result = PyTuple_Pack(3, Py_TYPE(so), args, state);
1950 done:
1951 Py_XDECREF(args);
1952 Py_XDECREF(keys);
1953 Py_XDECREF(state);
1954 return result;
1955 }
1956
1957 static PyObject *
1958 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1959 {
1960 size_t res = _PyObject_SIZE(Py_TYPE(so));
1961 if (so->table != so->smalltable) {
1962 res += ((size_t)so->mask + 1) * sizeof(setentry);
1963 }
1964 return PyLong_FromSize_t(res);
1965 }
1966
1967 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1968 static int
1969 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1970 {
1971 PyObject *iterable = NULL;
1972
1973 if (!_PyArg_NoKeywords("set", kwds))
1974 return -1;
1975 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1976 return -1;
1977 if (self->fill)
1978 set_clear_internal(self);
1979 self->hash = -1;
1980 if (iterable == NULL)
1981 return 0;
1982 return set_update_internal(self, iterable);
1983 }
1984
1985 static PyObject*
1986 set_vectorcall(PyObject *type, PyObject * const*args,
1987 size_t nargsf, PyObject *kwnames)
1988 {
1989 assert(PyType_Check(type));
1990
1991 if (!_PyArg_NoKwnames("set", kwnames)) {
1992 return NULL;
1993 }
1994
1995 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1996 if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
1997 return NULL;
1998 }
1999
2000 if (nargs) {
2001 return make_new_set(_PyType_CAST(type), args[0]);
2002 }
2003
2004 return make_new_set(_PyType_CAST(type), NULL);
2005 }
2006
2007 static PySequenceMethods set_as_sequence = {
2008 set_len, /* sq_length */
2009 0, /* sq_concat */
2010 0, /* sq_repeat */
2011 0, /* sq_item */
2012 0, /* sq_slice */
2013 0, /* sq_ass_item */
2014 0, /* sq_ass_slice */
2015 (objobjproc)set_contains, /* sq_contains */
2016 };
2017
2018 /* set object ********************************************************/
2019
2020 #ifdef Py_DEBUG
2021 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
2022
2023 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2024 All is well if assertions don't fail.");
2025 #endif
2026
2027 static PyMethodDef set_methods[] = {
2028 {"add", (PyCFunction)set_add, METH_O,
2029 add_doc},
2030 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2031 clear_doc},
2032 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2033 contains_doc},
2034 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2035 copy_doc},
2036 {"discard", (PyCFunction)set_discard, METH_O,
2037 discard_doc},
2038 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2039 difference_doc},
2040 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2041 difference_update_doc},
2042 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2043 intersection_doc},
2044 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2045 intersection_update_doc},
2046 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2047 isdisjoint_doc},
2048 {"issubset", (PyCFunction)set_issubset, METH_O,
2049 issubset_doc},
2050 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2051 issuperset_doc},
2052 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2053 pop_doc},
2054 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2055 reduce_doc},
2056 {"remove", (PyCFunction)set_remove, METH_O,
2057 remove_doc},
2058 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2059 sizeof_doc},
2060 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2061 symmetric_difference_doc},
2062 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2063 symmetric_difference_update_doc},
2064 #ifdef Py_DEBUG
2065 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2066 test_c_api_doc},
2067 #endif
2068 {"union", (PyCFunction)set_union, METH_VARARGS,
2069 union_doc},
2070 {"update", (PyCFunction)set_update, METH_VARARGS,
2071 update_doc},
2072 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2073 {NULL, NULL} /* sentinel */
2074 };
2075
2076 static PyNumberMethods set_as_number = {
2077 0, /*nb_add*/
2078 (binaryfunc)set_sub, /*nb_subtract*/
2079 0, /*nb_multiply*/
2080 0, /*nb_remainder*/
2081 0, /*nb_divmod*/
2082 0, /*nb_power*/
2083 0, /*nb_negative*/
2084 0, /*nb_positive*/
2085 0, /*nb_absolute*/
2086 0, /*nb_bool*/
2087 0, /*nb_invert*/
2088 0, /*nb_lshift*/
2089 0, /*nb_rshift*/
2090 (binaryfunc)set_and, /*nb_and*/
2091 (binaryfunc)set_xor, /*nb_xor*/
2092 (binaryfunc)set_or, /*nb_or*/
2093 0, /*nb_int*/
2094 0, /*nb_reserved*/
2095 0, /*nb_float*/
2096 0, /*nb_inplace_add*/
2097 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2098 0, /*nb_inplace_multiply*/
2099 0, /*nb_inplace_remainder*/
2100 0, /*nb_inplace_power*/
2101 0, /*nb_inplace_lshift*/
2102 0, /*nb_inplace_rshift*/
2103 (binaryfunc)set_iand, /*nb_inplace_and*/
2104 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2105 (binaryfunc)set_ior, /*nb_inplace_or*/
2106 };
2107
2108 PyDoc_STRVAR(set_doc,
2109 "set() -> new empty set object\n\
2110 set(iterable) -> new set object\n\
2111 \n\
2112 Build an unordered collection of unique elements.");
2113
2114 PyTypeObject PySet_Type = {
2115 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2116 "set", /* tp_name */
2117 sizeof(PySetObject), /* tp_basicsize */
2118 0, /* tp_itemsize */
2119 /* methods */
2120 (destructor)set_dealloc, /* tp_dealloc */
2121 0, /* tp_vectorcall_offset */
2122 0, /* tp_getattr */
2123 0, /* tp_setattr */
2124 0, /* tp_as_async */
2125 (reprfunc)set_repr, /* tp_repr */
2126 &set_as_number, /* tp_as_number */
2127 &set_as_sequence, /* tp_as_sequence */
2128 0, /* tp_as_mapping */
2129 PyObject_HashNotImplemented, /* tp_hash */
2130 0, /* tp_call */
2131 0, /* tp_str */
2132 PyObject_GenericGetAttr, /* tp_getattro */
2133 0, /* tp_setattro */
2134 0, /* tp_as_buffer */
2135 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2136 Py_TPFLAGS_BASETYPE |
2137 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2138 set_doc, /* tp_doc */
2139 (traverseproc)set_traverse, /* tp_traverse */
2140 (inquiry)set_clear_internal, /* tp_clear */
2141 (richcmpfunc)set_richcompare, /* tp_richcompare */
2142 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2143 (getiterfunc)set_iter, /* tp_iter */
2144 0, /* tp_iternext */
2145 set_methods, /* tp_methods */
2146 0, /* tp_members */
2147 0, /* tp_getset */
2148 0, /* tp_base */
2149 0, /* tp_dict */
2150 0, /* tp_descr_get */
2151 0, /* tp_descr_set */
2152 0, /* tp_dictoffset */
2153 (initproc)set_init, /* tp_init */
2154 PyType_GenericAlloc, /* tp_alloc */
2155 set_new, /* tp_new */
2156 PyObject_GC_Del, /* tp_free */
2157 .tp_vectorcall = set_vectorcall,
2158 };
2159
2160 /* frozenset object ********************************************************/
2161
2162
2163 static PyMethodDef frozenset_methods[] = {
2164 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2165 contains_doc},
2166 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2167 copy_doc},
2168 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2169 difference_doc},
2170 {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
2171 intersection_doc},
2172 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2173 isdisjoint_doc},
2174 {"issubset", (PyCFunction)set_issubset, METH_O,
2175 issubset_doc},
2176 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2177 issuperset_doc},
2178 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2179 reduce_doc},
2180 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2181 sizeof_doc},
2182 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2183 symmetric_difference_doc},
2184 {"union", (PyCFunction)set_union, METH_VARARGS,
2185 union_doc},
2186 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2187 {NULL, NULL} /* sentinel */
2188 };
2189
2190 static PyNumberMethods frozenset_as_number = {
2191 0, /*nb_add*/
2192 (binaryfunc)set_sub, /*nb_subtract*/
2193 0, /*nb_multiply*/
2194 0, /*nb_remainder*/
2195 0, /*nb_divmod*/
2196 0, /*nb_power*/
2197 0, /*nb_negative*/
2198 0, /*nb_positive*/
2199 0, /*nb_absolute*/
2200 0, /*nb_bool*/
2201 0, /*nb_invert*/
2202 0, /*nb_lshift*/
2203 0, /*nb_rshift*/
2204 (binaryfunc)set_and, /*nb_and*/
2205 (binaryfunc)set_xor, /*nb_xor*/
2206 (binaryfunc)set_or, /*nb_or*/
2207 };
2208
2209 PyDoc_STRVAR(frozenset_doc,
2210 "frozenset() -> empty frozenset object\n\
2211 frozenset(iterable) -> frozenset object\n\
2212 \n\
2213 Build an immutable unordered collection of unique elements.");
2214
2215 PyTypeObject PyFrozenSet_Type = {
2216 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2217 "frozenset", /* tp_name */
2218 sizeof(PySetObject), /* tp_basicsize */
2219 0, /* tp_itemsize */
2220 /* methods */
2221 (destructor)set_dealloc, /* tp_dealloc */
2222 0, /* tp_vectorcall_offset */
2223 0, /* tp_getattr */
2224 0, /* tp_setattr */
2225 0, /* tp_as_async */
2226 (reprfunc)set_repr, /* tp_repr */
2227 &frozenset_as_number, /* tp_as_number */
2228 &set_as_sequence, /* tp_as_sequence */
2229 0, /* tp_as_mapping */
2230 frozenset_hash, /* tp_hash */
2231 0, /* tp_call */
2232 0, /* tp_str */
2233 PyObject_GenericGetAttr, /* tp_getattro */
2234 0, /* tp_setattro */
2235 0, /* tp_as_buffer */
2236 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2237 Py_TPFLAGS_BASETYPE |
2238 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2239 frozenset_doc, /* tp_doc */
2240 (traverseproc)set_traverse, /* tp_traverse */
2241 (inquiry)set_clear_internal, /* tp_clear */
2242 (richcmpfunc)set_richcompare, /* tp_richcompare */
2243 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2244 (getiterfunc)set_iter, /* tp_iter */
2245 0, /* tp_iternext */
2246 frozenset_methods, /* tp_methods */
2247 0, /* tp_members */
2248 0, /* tp_getset */
2249 0, /* tp_base */
2250 0, /* tp_dict */
2251 0, /* tp_descr_get */
2252 0, /* tp_descr_set */
2253 0, /* tp_dictoffset */
2254 0, /* tp_init */
2255 PyType_GenericAlloc, /* tp_alloc */
2256 frozenset_new, /* tp_new */
2257 PyObject_GC_Del, /* tp_free */
2258 .tp_vectorcall = frozenset_vectorcall,
2259 };
2260
2261
2262 /***** C API functions *************************************************/
2263
2264 PyObject *
2265 PySet_New(PyObject *iterable)
2266 {
2267 return make_new_set(&PySet_Type, iterable);
2268 }
2269
2270 PyObject *
2271 PyFrozenSet_New(PyObject *iterable)
2272 {
2273 return make_new_set(&PyFrozenSet_Type, iterable);
2274 }
2275
2276 Py_ssize_t
2277 PySet_Size(PyObject *anyset)
2278 {
2279 if (!PyAnySet_Check(anyset)) {
2280 PyErr_BadInternalCall();
2281 return -1;
2282 }
2283 return PySet_GET_SIZE(anyset);
2284 }
2285
2286 int
2287 PySet_Clear(PyObject *set)
2288 {
2289 if (!PySet_Check(set)) {
2290 PyErr_BadInternalCall();
2291 return -1;
2292 }
2293 return set_clear_internal((PySetObject *)set);
2294 }
2295
2296 int
2297 PySet_Contains(PyObject *anyset, PyObject *key)
2298 {
2299 if (!PyAnySet_Check(anyset)) {
2300 PyErr_BadInternalCall();
2301 return -1;
2302 }
2303 return set_contains_key((PySetObject *)anyset, key);
2304 }
2305
2306 int
2307 PySet_Discard(PyObject *set, PyObject *key)
2308 {
2309 if (!PySet_Check(set)) {
2310 PyErr_BadInternalCall();
2311 return -1;
2312 }
2313 return set_discard_key((PySetObject *)set, key);
2314 }
2315
2316 int
2317 PySet_Add(PyObject *anyset, PyObject *key)
2318 {
2319 if (!PySet_Check(anyset) &&
2320 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2321 PyErr_BadInternalCall();
2322 return -1;
2323 }
2324 return set_add_key((PySetObject *)anyset, key);
2325 }
2326
2327 int
2328 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2329 {
2330 setentry *entry;
2331
2332 if (!PyAnySet_Check(set)) {
2333 PyErr_BadInternalCall();
2334 return -1;
2335 }
2336 if (set_next((PySetObject *)set, pos, &entry) == 0)
2337 return 0;
2338 *key = entry->key;
2339 *hash = entry->hash;
2340 return 1;
2341 }
2342
2343 PyObject *
2344 PySet_Pop(PyObject *set)
2345 {
2346 if (!PySet_Check(set)) {
2347 PyErr_BadInternalCall();
2348 return NULL;
2349 }
2350 return set_pop((PySetObject *)set, NULL);
2351 }
2352
2353 int
2354 _PySet_Update(PyObject *set, PyObject *iterable)
2355 {
2356 if (!PySet_Check(set)) {
2357 PyErr_BadInternalCall();
2358 return -1;
2359 }
2360 return set_update_internal((PySetObject *)set, iterable);
2361 }
2362
2363 /* Exported for the gdb plugin's benefit. */
2364 PyObject *_PySet_Dummy = dummy;
2365
2366 #ifdef Py_DEBUG
2367
2368 /* Test code to be called with any three element set.
2369 Returns True and original set is restored. */
2370
2371 #define assertRaises(call_return_value, exception) \
2372 do { \
2373 assert(call_return_value); \
2374 assert(PyErr_ExceptionMatches(exception)); \
2375 PyErr_Clear(); \
2376 } while(0)
2377
2378 static PyObject *
2379 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2380 {
2381 Py_ssize_t count;
2382 const char *s;
2383 Py_ssize_t i;
2384 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2385 PyObject *ob = (PyObject *)so;
2386 Py_hash_t hash;
2387 PyObject *str;
2388
2389 /* Verify preconditions */
2390 assert(PyAnySet_Check(ob));
2391 assert(PyAnySet_CheckExact(ob));
2392 assert(!PyFrozenSet_CheckExact(ob));
2393
2394 /* so.clear(); so |= set("abc"); */
2395 str = PyUnicode_FromString("abc");
2396 if (str == NULL)
2397 return NULL;
2398 set_clear_internal(so);
2399 if (set_update_internal(so, str)) {
2400 Py_DECREF(str);
2401 return NULL;
2402 }
2403 Py_DECREF(str);
2404
2405 /* Exercise type/size checks */
2406 assert(PySet_Size(ob) == 3);
2407 assert(PySet_GET_SIZE(ob) == 3);
2408
2409 /* Raise TypeError for non-iterable constructor arguments */
2410 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2411 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2412
2413 /* Raise TypeError for unhashable key */
2414 dup = PySet_New(ob);
2415 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2416 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2417 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2418
2419 /* Exercise successful pop, contains, add, and discard */
2420 elem = PySet_Pop(ob);
2421 assert(PySet_Contains(ob, elem) == 0);
2422 assert(PySet_GET_SIZE(ob) == 2);
2423 assert(PySet_Add(ob, elem) == 0);
2424 assert(PySet_Contains(ob, elem) == 1);
2425 assert(PySet_GET_SIZE(ob) == 3);
2426 assert(PySet_Discard(ob, elem) == 1);
2427 assert(PySet_GET_SIZE(ob) == 2);
2428 assert(PySet_Discard(ob, elem) == 0);
2429 assert(PySet_GET_SIZE(ob) == 2);
2430
2431 /* Exercise clear */
2432 dup2 = PySet_New(dup);
2433 assert(PySet_Clear(dup2) == 0);
2434 assert(PySet_Size(dup2) == 0);
2435 Py_DECREF(dup2);
2436
2437 /* Raise SystemError on clear or update of frozen set */
2438 f = PyFrozenSet_New(dup);
2439 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2440 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2441 assert(PySet_Add(f, elem) == 0);
2442 Py_INCREF(f);
2443 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2444 Py_DECREF(f);
2445 Py_DECREF(f);
2446
2447 /* Exercise direct iteration */
2448 i = 0, count = 0;
2449 while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2450 s = PyUnicode_AsUTF8(x);
2451 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2452 count++;
2453 }
2454 assert(count == 3);
2455
2456 /* Exercise updates */
2457 dup2 = PySet_New(NULL);
2458 assert(_PySet_Update(dup2, dup) == 0);
2459 assert(PySet_Size(dup2) == 3);
2460 assert(_PySet_Update(dup2, dup) == 0);
2461 assert(PySet_Size(dup2) == 3);
2462 Py_DECREF(dup2);
2463
2464 /* Raise SystemError when self argument is not a set or frozenset. */
2465 t = PyTuple_New(0);
2466 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2467 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2468 Py_DECREF(t);
2469
2470 /* Raise SystemError when self argument is not a set. */
2471 f = PyFrozenSet_New(dup);
2472 assert(PySet_Size(f) == 3);
2473 assert(PyFrozenSet_CheckExact(f));
2474 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2475 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2476 Py_DECREF(f);
2477
2478 /* Raise KeyError when popping from an empty set */
2479 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2480 Py_DECREF(ob);
2481 assert(PySet_GET_SIZE(ob) == 0);
2482 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2483
2484 /* Restore the set from the copy using the PyNumber API */
2485 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2486 Py_DECREF(ob);
2487
2488 /* Verify constructors accept NULL arguments */
2489 f = PySet_New(NULL);
2490 assert(f != NULL);
2491 assert(PySet_GET_SIZE(f) == 0);
2492 Py_DECREF(f);
2493 f = PyFrozenSet_New(NULL);
2494 assert(f != NULL);
2495 assert(PyFrozenSet_CheckExact(f));
2496 assert(PySet_GET_SIZE(f) == 0);
2497 Py_DECREF(f);
2498
2499 Py_DECREF(elem);
2500 Py_DECREF(dup);
2501 Py_RETURN_TRUE;
2502 }
2503
2504 #undef assertRaises
2505
2506 #endif
2507
2508 /***** Dummy Struct *************************************************/
2509
2510 static PyObject *
2511 dummy_repr(PyObject *op)
2512 {
2513 return PyUnicode_FromString("<dummy key>");
2514 }
2515
2516 static void _Py_NO_RETURN
2517 dummy_dealloc(PyObject* ignore)
2518 {
2519 Py_FatalError("deallocating <dummy key>");
2520 }
2521
2522 static PyTypeObject _PySetDummy_Type = {
2523 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2524 "<dummy key> type",
2525 0,
2526 0,
2527 dummy_dealloc, /*tp_dealloc*/ /*never called*/
2528 0, /*tp_vectorcall_offset*/
2529 0, /*tp_getattr*/
2530 0, /*tp_setattr*/
2531 0, /*tp_as_async*/
2532 dummy_repr, /*tp_repr*/
2533 0, /*tp_as_number*/
2534 0, /*tp_as_sequence*/
2535 0, /*tp_as_mapping*/
2536 0, /*tp_hash */
2537 0, /*tp_call */
2538 0, /*tp_str */
2539 0, /*tp_getattro */
2540 0, /*tp_setattro */
2541 0, /*tp_as_buffer */
2542 Py_TPFLAGS_DEFAULT, /*tp_flags */
2543 };
2544
2545 static PyObject _dummy_struct = {
2546 _PyObject_EXTRA_INIT
2547 { _Py_IMMORTAL_REFCNT },
2548 &_PySetDummy_Type
2549 };