1 /*
2
3 Reference Cycle Garbage Collection
4 ==================================
5
6 Neil Schemenauer <nas@arctrix.com>
7
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
10
11 http://www.arctrix.com/nas/python/gc/
12
13 The following mailing list threads provide a historical perspective on
14 the design of this module. Note that a fair amount of refinement has
15 occurred since those discussions.
16
17 http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18 http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19 http://mail.python.org/pipermail/python-dev/2000-March/002497.html
20
21 For a highlevel view of the collection process, read the collect
22 function.
23
24 */
25
26 #include "Python.h"
27 #include "pycore_context.h"
28 #include "pycore_initconfig.h"
29 #include "pycore_interp.h" // PyInterpreterState.gc
30 #include "pycore_object.h"
31 #include "pycore_pyerrors.h"
32 #include "pycore_pystate.h" // _PyThreadState_GET()
33 #include "pydtrace.h"
34
35 typedef struct _gc_runtime_state GCState;
36
37 /*[clinic input]
38 module gc
39 [clinic start generated code]*/
40 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/
41
42
43 #ifdef Py_DEBUG
44 # define GC_DEBUG
45 #endif
46
47 #define GC_NEXT _PyGCHead_NEXT
48 #define GC_PREV _PyGCHead_PREV
49
50 // update_refs() set this bit for all objects in current generation.
51 // subtract_refs() and move_unreachable() uses this to distinguish
52 // visited object is in GCing or not.
53 //
54 // move_unreachable() removes this flag from reachable objects.
55 // Only unreachable objects have this flag.
56 //
57 // No objects in interpreter have this flag after GC ends.
58 #define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING
59
60 // Lowest bit of _gc_next is used for UNREACHABLE flag.
61 //
62 // This flag represents the object is in unreachable list in move_unreachable()
63 //
64 // Although this flag is used only in move_unreachable(), move_unreachable()
65 // doesn't clear this flag to skip unnecessary iteration.
66 // move_legacy_finalizers() removes this flag instead.
67 // Between them, unreachable list is not normal list and we can not use
68 // most gc_list_* functions for it.
69 #define NEXT_MASK_UNREACHABLE (1)
70
71 /* Get an object's GC head */
72 #define AS_GC(o) ((PyGC_Head *)(((char *)(o))-sizeof(PyGC_Head)))
73
74 /* Get the object given the GC head */
75 #define FROM_GC(g) ((PyObject *)(((char *)(g))+sizeof(PyGC_Head)))
76
77 static inline int
78 gc_is_collecting(PyGC_Head *g)
79 {
80 return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
81 }
82
83 static inline void
84 gc_clear_collecting(PyGC_Head *g)
85 {
86 g->_gc_prev &= ~PREV_MASK_COLLECTING;
87 }
88
89 static inline Py_ssize_t
90 gc_get_refs(PyGC_Head *g)
91 {
92 return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
93 }
94
95 static inline void
96 gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
97 {
98 g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
99 | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
100 }
101
102 static inline void
103 gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
104 {
105 g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
106 | PREV_MASK_COLLECTING
107 | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
108 }
109
110 static inline void
111 gc_decref(PyGC_Head *g)
112 {
113 _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
114 gc_get_refs(g) > 0,
115 "refcount is too small");
116 g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
117 }
118
119 /* set for debugging information */
120 #define DEBUG_STATS (1<<0) /* print collection statistics */
121 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
122 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
123 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
124 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
125 DEBUG_UNCOLLECTABLE | \
126 DEBUG_SAVEALL
127
128 #define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
129
130
131 static GCState *
132 get_gc_state(void)
133 {
134 PyInterpreterState *interp = _PyInterpreterState_GET();
135 return &interp->gc;
136 }
137
138
139 void
140 _PyGC_InitState(GCState *gcstate)
141 {
142 #define INIT_HEAD(GEN) \
143 do { \
144 GEN.head._gc_next = (uintptr_t)&GEN.head; \
145 GEN.head._gc_prev = (uintptr_t)&GEN.head; \
146 } while (0)
147
148 for (int i = 0; i < NUM_GENERATIONS; i++) {
149 assert(gcstate->generations[i].count == 0);
150 INIT_HEAD(gcstate->generations[i]);
151 };
152 gcstate->generation0 = GEN_HEAD(gcstate, 0);
153 INIT_HEAD(gcstate->permanent_generation);
154
155 #undef INIT_HEAD
156 }
157
158
159 PyStatus
160 _PyGC_Init(PyInterpreterState *interp)
161 {
162 GCState *gcstate = &interp->gc;
163
164 gcstate->garbage = PyList_New(0);
165 if (gcstate->garbage == NULL) {
166 return _PyStatus_NO_MEMORY();
167 }
168
169 gcstate->callbacks = PyList_New(0);
170 if (gcstate->callbacks == NULL) {
171 return _PyStatus_NO_MEMORY();
172 }
173
174 return _PyStatus_OK();
175 }
176
177
178 /*
179 _gc_prev values
180 ---------------
181
182 Between collections, _gc_prev is used for doubly linked list.
183
184 Lowest two bits of _gc_prev are used for flags.
185 PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
186 or _PyObject_GC_UNTRACK() is called.
187
188 During a collection, _gc_prev is temporary used for gc_refs, and the gc list
189 is singly linked until _gc_prev is restored.
190
191 gc_refs
192 At the start of a collection, update_refs() copies the true refcount
193 to gc_refs, for each object in the generation being collected.
194 subtract_refs() then adjusts gc_refs so that it equals the number of
195 times an object is referenced directly from outside the generation
196 being collected.
197
198 PREV_MASK_COLLECTING
199 Objects in generation being collected are marked PREV_MASK_COLLECTING in
200 update_refs().
201
202
203 _gc_next values
204 ---------------
205
206 _gc_next takes these values:
207
208 0
209 The object is not tracked
210
211 != 0
212 Pointer to the next object in the GC list.
213 Additionally, lowest bit is used temporary for
214 NEXT_MASK_UNREACHABLE flag described below.
215
216 NEXT_MASK_UNREACHABLE
217 move_unreachable() then moves objects not reachable (whether directly or
218 indirectly) from outside the generation into an "unreachable" set and
219 set this flag.
220
221 Objects that are found to be reachable have gc_refs set to 1.
222 When this flag is set for the reachable object, the object must be in
223 "unreachable" set.
224 The flag is unset and the object is moved back to "reachable" set.
225
226 move_legacy_finalizers() will remove this flag from "unreachable" set.
227 */
228
229 /*** list functions ***/
230
231 static inline void
232 gc_list_init(PyGC_Head *list)
233 {
234 // List header must not have flags.
235 // We can assign pointer by simple cast.
236 list->_gc_prev = (uintptr_t)list;
237 list->_gc_next = (uintptr_t)list;
238 }
239
240 static inline int
241 gc_list_is_empty(PyGC_Head *list)
242 {
243 return (list->_gc_next == (uintptr_t)list);
244 }
245
246 /* Append `node` to `list`. */
247 static inline void
248 gc_list_append(PyGC_Head *node, PyGC_Head *list)
249 {
250 PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
251
252 // last <-> node
253 _PyGCHead_SET_PREV(node, last);
254 _PyGCHead_SET_NEXT(last, node);
255
256 // node <-> list
257 _PyGCHead_SET_NEXT(node, list);
258 list->_gc_prev = (uintptr_t)node;
259 }
260
261 /* Remove `node` from the gc list it's currently in. */
262 static inline void
263 gc_list_remove(PyGC_Head *node)
264 {
265 PyGC_Head *prev = GC_PREV(node);
266 PyGC_Head *next = GC_NEXT(node);
267
268 _PyGCHead_SET_NEXT(prev, next);
269 _PyGCHead_SET_PREV(next, prev);
270
271 node->_gc_next = 0; /* object is not currently tracked */
272 }
273
274 /* Move `node` from the gc list it's currently in (which is not explicitly
275 * named here) to the end of `list`. This is semantically the same as
276 * gc_list_remove(node) followed by gc_list_append(node, list).
277 */
278 static void
279 gc_list_move(PyGC_Head *node, PyGC_Head *list)
280 {
281 /* Unlink from current list. */
282 PyGC_Head *from_prev = GC_PREV(node);
283 PyGC_Head *from_next = GC_NEXT(node);
284 _PyGCHead_SET_NEXT(from_prev, from_next);
285 _PyGCHead_SET_PREV(from_next, from_prev);
286
287 /* Relink at end of new list. */
288 // list must not have flags. So we can skip macros.
289 PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
290 _PyGCHead_SET_PREV(node, to_prev);
291 _PyGCHead_SET_NEXT(to_prev, node);
292 list->_gc_prev = (uintptr_t)node;
293 _PyGCHead_SET_NEXT(node, list);
294 }
295
296 /* append list `from` onto list `to`; `from` becomes an empty list */
297 static void
298 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
299 {
300 assert(from != to);
301 if (!gc_list_is_empty(from)) {
302 PyGC_Head *to_tail = GC_PREV(to);
303 PyGC_Head *from_head = GC_NEXT(from);
304 PyGC_Head *from_tail = GC_PREV(from);
305 assert(from_head != from);
306 assert(from_tail != from);
307
308 _PyGCHead_SET_NEXT(to_tail, from_head);
309 _PyGCHead_SET_PREV(from_head, to_tail);
310
311 _PyGCHead_SET_NEXT(from_tail, to);
312 _PyGCHead_SET_PREV(to, from_tail);
313 }
314 gc_list_init(from);
315 }
316
317 static Py_ssize_t
318 gc_list_size(PyGC_Head *list)
319 {
320 PyGC_Head *gc;
321 Py_ssize_t n = 0;
322 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
323 n++;
324 }
325 return n;
326 }
327
328 /* Walk the list and mark all objects as non-collecting */
329 static inline void
330 gc_list_clear_collecting(PyGC_Head *collectable)
331 {
332 PyGC_Head *gc;
333 for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
334 gc_clear_collecting(gc);
335 }
336 }
337
338 /* Append objects in a GC list to a Python list.
339 * Return 0 if all OK, < 0 if error (out of memory for list)
340 */
341 static int
342 append_objects(PyObject *py_list, PyGC_Head *gc_list)
343 {
344 PyGC_Head *gc;
345 for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
346 PyObject *op = FROM_GC(gc);
347 if (op != py_list) {
348 if (PyList_Append(py_list, op)) {
349 return -1; /* exception */
350 }
351 }
352 }
353 return 0;
354 }
355
356 // Constants for validate_list's flags argument.
357 enum flagstates {collecting_clear_unreachable_clear,
358 collecting_clear_unreachable_set,
359 collecting_set_unreachable_clear,
360 collecting_set_unreachable_set};
361
362 #ifdef GC_DEBUG
363 // validate_list checks list consistency. And it works as document
364 // describing when flags are expected to be set / unset.
365 // `head` must be a doubly-linked gc list, although it's fine (expected!) if
366 // the prev and next pointers are "polluted" with flags.
367 // What's checked:
368 // - The `head` pointers are not polluted.
369 // - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
370 // `set or clear, as specified by the 'flags' argument.
371 // - The prev and next pointers are mutually consistent.
372 static void
373 validate_list(PyGC_Head *head, enum flagstates flags)
374 {
375 assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
376 assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
377 uintptr_t prev_value = 0, next_value = 0;
378 switch (flags) {
379 case collecting_clear_unreachable_clear:
380 break;
381 case collecting_set_unreachable_clear:
382 prev_value = PREV_MASK_COLLECTING;
383 break;
384 case collecting_clear_unreachable_set:
385 next_value = NEXT_MASK_UNREACHABLE;
386 break;
387 case collecting_set_unreachable_set:
388 prev_value = PREV_MASK_COLLECTING;
389 next_value = NEXT_MASK_UNREACHABLE;
390 break;
391 default:
392 assert(! "bad internal flags argument");
393 }
394 PyGC_Head *prev = head;
395 PyGC_Head *gc = GC_NEXT(head);
396 while (gc != head) {
397 PyGC_Head *trueprev = GC_PREV(gc);
398 PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
399 assert(truenext != NULL);
400 assert(trueprev == prev);
401 assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
402 assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
403 prev = gc;
404 gc = truenext;
405 }
406 assert(prev == GC_PREV(head));
407 }
408 #else
409 #define validate_list(x, y) do{}while(0)
410 #endif
411
412 /*** end of list stuff ***/
413
414
415 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and
416 * PREV_MASK_COLLECTING bit is set for all objects in containers.
417 */
418 static void
419 update_refs(PyGC_Head *containers)
420 {
421 PyGC_Head *next;
422 PyGC_Head *gc = GC_NEXT(containers);
423
424 while (gc != containers) {
425 next = GC_NEXT(gc);
426 /* Move any object that might have become immortal to the
427 * permanent generation as the reference count is not accurately
428 * reflecting the actual number of live references to this object
429 */
430 if (_Py_IsImmortal(FROM_GC(gc))) {
431 gc_list_move(gc, &get_gc_state()->permanent_generation.head);
432 gc = next;
433 continue;
434 }
435 gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
436 /* Python's cyclic gc should never see an incoming refcount
437 * of 0: if something decref'ed to 0, it should have been
438 * deallocated immediately at that time.
439 * Possible cause (if the assert triggers): a tp_dealloc
440 * routine left a gc-aware object tracked during its teardown
441 * phase, and did something-- or allowed something to happen --
442 * that called back into Python. gc can trigger then, and may
443 * see the still-tracked dying object. Before this assert
444 * was added, such mistakes went on to allow gc to try to
445 * delete the object again. In a debug build, that caused
446 * a mysterious segfault, when _Py_ForgetReference tried
447 * to remove the object from the doubly-linked list of all
448 * objects a second time. In a release build, an actual
449 * double deallocation occurred, which leads to corruption
450 * of the allocator's internal bookkeeping pointers. That's
451 * so serious that maybe this should be a release-build
452 * check instead of an assert?
453 */
454 _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
455 gc = next;
456 }
457 }
458
459 /* A traversal callback for subtract_refs. */
460 static int
461 visit_decref(PyObject *op, void *parent)
462 {
463 _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
464
465 if (_PyObject_IS_GC(op)) {
466 PyGC_Head *gc = AS_GC(op);
467 /* We're only interested in gc_refs for objects in the
468 * generation being collected, which can be recognized
469 * because only they have positive gc_refs.
470 */
471 if (gc_is_collecting(gc)) {
472 gc_decref(gc);
473 }
474 }
475 return 0;
476 }
477
478 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
479 * for all objects in containers, and is GC_REACHABLE for all tracked gc
480 * objects not in containers. The ones with gc_refs > 0 are directly
481 * reachable from outside containers, and so can't be collected.
482 */
483 static void
484 subtract_refs(PyGC_Head *containers)
485 {
486 traverseproc traverse;
487 PyGC_Head *gc = GC_NEXT(containers);
488 for (; gc != containers; gc = GC_NEXT(gc)) {
489 PyObject *op = FROM_GC(gc);
490 traverse = Py_TYPE(op)->tp_traverse;
491 (void) traverse(op,
492 (visitproc)visit_decref,
493 op);
494 }
495 }
496
497 /* A traversal callback for move_unreachable. */
498 static int
499 visit_reachable(PyObject *op, PyGC_Head *reachable)
500 {
501 if (!_PyObject_IS_GC(op)) {
502 return 0;
503 }
504
505 PyGC_Head *gc = AS_GC(op);
506 const Py_ssize_t gc_refs = gc_get_refs(gc);
507
508 // Ignore objects in other generation.
509 // This also skips objects "to the left" of the current position in
510 // move_unreachable's scan of the 'young' list - they've already been
511 // traversed, and no longer have the PREV_MASK_COLLECTING flag.
512 if (! gc_is_collecting(gc)) {
513 return 0;
514 }
515 // It would be a logic error elsewhere if the collecting flag were set on
516 // an untracked object.
517 assert(gc->_gc_next != 0);
518
519 if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
520 /* This had gc_refs = 0 when move_unreachable got
521 * to it, but turns out it's reachable after all.
522 * Move it back to move_unreachable's 'young' list,
523 * and move_unreachable will eventually get to it
524 * again.
525 */
526 // Manually unlink gc from unreachable list because the list functions
527 // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
528 PyGC_Head *prev = GC_PREV(gc);
529 PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
530 _PyObject_ASSERT(FROM_GC(prev),
531 prev->_gc_next & NEXT_MASK_UNREACHABLE);
532 _PyObject_ASSERT(FROM_GC(next),
533 next->_gc_next & NEXT_MASK_UNREACHABLE);
534 prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE
535 _PyGCHead_SET_PREV(next, prev);
536
537 gc_list_append(gc, reachable);
538 gc_set_refs(gc, 1);
539 }
540 else if (gc_refs == 0) {
541 /* This is in move_unreachable's 'young' list, but
542 * the traversal hasn't yet gotten to it. All
543 * we need to do is tell move_unreachable that it's
544 * reachable.
545 */
546 gc_set_refs(gc, 1);
547 }
548 /* Else there's nothing to do.
549 * If gc_refs > 0, it must be in move_unreachable's 'young'
550 * list, and move_unreachable will eventually get to it.
551 */
552 else {
553 _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
554 }
555 return 0;
556 }
557
558 /* Move the unreachable objects from young to unreachable. After this,
559 * all objects in young don't have PREV_MASK_COLLECTING flag and
560 * unreachable have the flag.
561 * All objects in young after this are directly or indirectly reachable
562 * from outside the original young; and all objects in unreachable are
563 * not.
564 *
565 * This function restores _gc_prev pointer. young and unreachable are
566 * doubly linked list after this function.
567 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
568 * So we can not gc_list_* functions for unreachable until we remove the flag.
569 */
570 static void
571 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
572 {
573 // previous elem in the young list, used for restore gc_prev.
574 PyGC_Head *prev = young;
575 PyGC_Head *gc = GC_NEXT(young);
576
577 /* Invariants: all objects "to the left" of us in young are reachable
578 * (directly or indirectly) from outside the young list as it was at entry.
579 *
580 * All other objects from the original young "to the left" of us are in
581 * unreachable now, and have NEXT_MASK_UNREACHABLE. All objects to the
582 * left of us in 'young' now have been scanned, and no objects here
583 * or to the right have been scanned yet.
584 */
585
586 while (gc != young) {
587 if (gc_get_refs(gc)) {
588 /* gc is definitely reachable from outside the
589 * original 'young'. Mark it as such, and traverse
590 * its pointers to find any other objects that may
591 * be directly reachable from it. Note that the
592 * call to tp_traverse may append objects to young,
593 * so we have to wait until it returns to determine
594 * the next object to visit.
595 */
596 PyObject *op = FROM_GC(gc);
597 traverseproc traverse = Py_TYPE(op)->tp_traverse;
598 _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
599 "refcount is too small");
600 // NOTE: visit_reachable may change gc->_gc_next when
601 // young->_gc_prev == gc. Don't do gc = GC_NEXT(gc) before!
602 (void) traverse(op,
603 (visitproc)visit_reachable,
604 (void *)young);
605 // relink gc_prev to prev element.
606 _PyGCHead_SET_PREV(gc, prev);
607 // gc is not COLLECTING state after here.
608 gc_clear_collecting(gc);
609 prev = gc;
610 }
611 else {
612 /* This *may* be unreachable. To make progress,
613 * assume it is. gc isn't directly reachable from
614 * any object we've already traversed, but may be
615 * reachable from an object we haven't gotten to yet.
616 * visit_reachable will eventually move gc back into
617 * young if that's so, and we'll see it again.
618 */
619 // Move gc to unreachable.
620 // No need to gc->next->prev = prev because it is single linked.
621 prev->_gc_next = gc->_gc_next;
622
623 // We can't use gc_list_append() here because we use
624 // NEXT_MASK_UNREACHABLE here.
625 PyGC_Head *last = GC_PREV(unreachable);
626 // NOTE: Since all objects in unreachable set has
627 // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
628 // But this may pollute the unreachable list head's 'next' pointer
629 // too. That's semantically senseless but expedient here - the
630 // damage is repaired when this function ends.
631 last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
632 _PyGCHead_SET_PREV(gc, last);
633 gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
634 unreachable->_gc_prev = (uintptr_t)gc;
635 }
636 gc = (PyGC_Head*)prev->_gc_next;
637 }
638 // young->_gc_prev must be last element remained in the list.
639 young->_gc_prev = (uintptr_t)prev;
640 // don't let the pollution of the list head's next pointer leak
641 unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
642 }
643
644 static void
645 untrack_tuples(PyGC_Head *head)
646 {
647 PyGC_Head *next, *gc = GC_NEXT(head);
648 while (gc != head) {
649 PyObject *op = FROM_GC(gc);
650 next = GC_NEXT(gc);
651 if (PyTuple_CheckExact(op)) {
652 _PyTuple_MaybeUntrack(op);
653 }
654 gc = next;
655 }
656 }
657
658 /* Try to untrack all currently tracked dictionaries */
659 static void
660 untrack_dicts(PyGC_Head *head)
661 {
662 PyGC_Head *next, *gc = GC_NEXT(head);
663 while (gc != head) {
664 PyObject *op = FROM_GC(gc);
665 next = GC_NEXT(gc);
666 if (PyDict_CheckExact(op)) {
667 _PyDict_MaybeUntrack(op);
668 }
669 gc = next;
670 }
671 }
672
673 /* Return true if object has a pre-PEP 442 finalization method. */
674 static int
675 has_legacy_finalizer(PyObject *op)
676 {
677 return Py_TYPE(op)->tp_del != NULL;
678 }
679
680 /* Move the objects in unreachable with tp_del slots into `finalizers`.
681 *
682 * This function also removes NEXT_MASK_UNREACHABLE flag
683 * from _gc_next in unreachable.
684 */
685 static void
686 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
687 {
688 PyGC_Head *gc, *next;
689 assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
690
691 /* March over unreachable. Move objects with finalizers into
692 * `finalizers`.
693 */
694 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
695 PyObject *op = FROM_GC(gc);
696
697 _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
698 gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
699 next = (PyGC_Head*)gc->_gc_next;
700
701 if (has_legacy_finalizer(op)) {
702 gc_clear_collecting(gc);
703 gc_list_move(gc, finalizers);
704 }
705 }
706 }
707
708 static inline void
709 clear_unreachable_mask(PyGC_Head *unreachable)
710 {
711 /* Check that the list head does not have the unreachable bit set */
712 assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
713
714 PyGC_Head *gc, *next;
715 assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
716 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
717 _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
718 gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
719 next = (PyGC_Head*)gc->_gc_next;
720 }
721 validate_list(unreachable, collecting_set_unreachable_clear);
722 }
723
724 /* A traversal callback for move_legacy_finalizer_reachable. */
725 static int
726 visit_move(PyObject *op, PyGC_Head *tolist)
727 {
728 if (_PyObject_IS_GC(op)) {
729 PyGC_Head *gc = AS_GC(op);
730 if (gc_is_collecting(gc)) {
731 gc_list_move(gc, tolist);
732 gc_clear_collecting(gc);
733 }
734 }
735 return 0;
736 }
737
738 /* Move objects that are reachable from finalizers, from the unreachable set
739 * into finalizers set.
740 */
741 static void
742 move_legacy_finalizer_reachable(PyGC_Head *finalizers)
743 {
744 traverseproc traverse;
745 PyGC_Head *gc = GC_NEXT(finalizers);
746 for (; gc != finalizers; gc = GC_NEXT(gc)) {
747 /* Note that the finalizers list may grow during this. */
748 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
749 (void) traverse(FROM_GC(gc),
750 (visitproc)visit_move,
751 (void *)finalizers);
752 }
753 }
754
755 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
756 * callback, invoke it if necessary. Note that it's possible for such
757 * weakrefs to be outside the unreachable set -- indeed, those are precisely
758 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
759 * overview & some details. Some weakrefs with callbacks may be reclaimed
760 * directly by this routine; the number reclaimed is the return value. Other
761 * weakrefs with callbacks may be moved into the `old` generation. Objects
762 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
763 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
764 * no object in `unreachable` is weakly referenced anymore.
765 */
766 static int
767 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
768 {
769 PyGC_Head *gc;
770 PyObject *op; /* generally FROM_GC(gc) */
771 PyWeakReference *wr; /* generally a cast of op */
772 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
773 PyGC_Head *next;
774 int num_freed = 0;
775
776 gc_list_init(&wrcb_to_call);
777
778 /* Clear all weakrefs to the objects in unreachable. If such a weakref
779 * also has a callback, move it into `wrcb_to_call` if the callback
780 * needs to be invoked. Note that we cannot invoke any callbacks until
781 * all weakrefs to unreachable objects are cleared, lest the callback
782 * resurrect an unreachable object via a still-active weakref. We
783 * make another pass over wrcb_to_call, invoking callbacks, after this
784 * pass completes.
785 */
786 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
787 PyWeakReference **wrlist;
788
789 op = FROM_GC(gc);
790 next = GC_NEXT(gc);
791
792 if (PyWeakref_Check(op)) {
793 /* A weakref inside the unreachable set must be cleared. If we
794 * allow its callback to execute inside delete_garbage(), it
795 * could expose objects that have tp_clear already called on
796 * them. Or, it could resurrect unreachable objects. One way
797 * this can happen is if some container objects do not implement
798 * tp_traverse. Then, wr_object can be outside the unreachable
799 * set but can be deallocated as a result of breaking the
800 * reference cycle. If we don't clear the weakref, the callback
801 * will run and potentially cause a crash. See bpo-38006 for
802 * one example.
803 */
804 _PyWeakref_ClearRef((PyWeakReference *)op);
805 }
806
807 if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
808 continue;
809
810 /* It supports weakrefs. Does it have any?
811 *
812 * This is never triggered for static types so we can avoid the
813 * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
814 */
815 wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
816
817 /* `op` may have some weakrefs. March over the list, clear
818 * all the weakrefs, and move the weakrefs with callbacks
819 * that must be called into wrcb_to_call.
820 */
821 for (wr = *wrlist; wr != NULL; wr = *wrlist) {
822 PyGC_Head *wrasgc; /* AS_GC(wr) */
823
824 /* _PyWeakref_ClearRef clears the weakref but leaves
825 * the callback pointer intact. Obscure: it also
826 * changes *wrlist.
827 */
828 _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
829 _PyWeakref_ClearRef(wr);
830 _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
831 if (wr->wr_callback == NULL) {
832 /* no callback */
833 continue;
834 }
835
836 /* Headache time. `op` is going away, and is weakly referenced by
837 * `wr`, which has a callback. Should the callback be invoked? If wr
838 * is also trash, no:
839 *
840 * 1. There's no need to call it. The object and the weakref are
841 * both going away, so it's legitimate to pretend the weakref is
842 * going away first. The user has to ensure a weakref outlives its
843 * referent if they want a guarantee that the wr callback will get
844 * invoked.
845 *
846 * 2. It may be catastrophic to call it. If the callback is also in
847 * cyclic trash (CT), then although the CT is unreachable from
848 * outside the current generation, CT may be reachable from the
849 * callback. Then the callback could resurrect insane objects.
850 *
851 * Since the callback is never needed and may be unsafe in this case,
852 * wr is simply left in the unreachable set. Note that because we
853 * already called _PyWeakref_ClearRef(wr), its callback will never
854 * trigger.
855 *
856 * OTOH, if wr isn't part of CT, we should invoke the callback: the
857 * weakref outlived the trash. Note that since wr isn't CT in this
858 * case, its callback can't be CT either -- wr acted as an external
859 * root to this generation, and therefore its callback did too. So
860 * nothing in CT is reachable from the callback either, so it's hard
861 * to imagine how calling it later could create a problem for us. wr
862 * is moved to wrcb_to_call in this case.
863 */
864 if (gc_is_collecting(AS_GC(wr))) {
865 /* it should already have been cleared above */
866 assert(wr->wr_object == Py_None);
867 continue;
868 }
869
870 /* Create a new reference so that wr can't go away
871 * before we can process it again.
872 */
873 Py_INCREF(wr);
874
875 /* Move wr to wrcb_to_call, for the next pass. */
876 wrasgc = AS_GC(wr);
877 assert(wrasgc != next); /* wrasgc is reachable, but
878 next isn't, so they can't
879 be the same */
880 gc_list_move(wrasgc, &wrcb_to_call);
881 }
882 }
883
884 /* Invoke the callbacks we decided to honor. It's safe to invoke them
885 * because they can't reference unreachable objects.
886 */
887 while (! gc_list_is_empty(&wrcb_to_call)) {
888 PyObject *temp;
889 PyObject *callback;
890
891 gc = (PyGC_Head*)wrcb_to_call._gc_next;
892 op = FROM_GC(gc);
893 _PyObject_ASSERT(op, PyWeakref_Check(op));
894 wr = (PyWeakReference *)op;
895 callback = wr->wr_callback;
896 _PyObject_ASSERT(op, callback != NULL);
897
898 /* copy-paste of weakrefobject.c's handle_callback() */
899 temp = PyObject_CallOneArg(callback, (PyObject *)wr);
900 if (temp == NULL)
901 PyErr_WriteUnraisable(callback);
902 else
903 Py_DECREF(temp);
904
905 /* Give up the reference we created in the first pass. When
906 * op's refcount hits 0 (which it may or may not do right now),
907 * op's tp_dealloc will decref op->wr_callback too. Note
908 * that the refcount probably will hit 0 now, and because this
909 * weakref was reachable to begin with, gc didn't already
910 * add it to its count of freed objects. Example: a reachable
911 * weak value dict maps some key to this reachable weakref.
912 * The callback removes this key->weakref mapping from the
913 * dict, leaving no other references to the weakref (excepting
914 * ours).
915 */
916 Py_DECREF(op);
917 if (wrcb_to_call._gc_next == (uintptr_t)gc) {
918 /* object is still alive -- move it */
919 gc_list_move(gc, old);
920 }
921 else {
922 ++num_freed;
923 }
924 }
925
926 return num_freed;
927 }
928
929 static void
930 debug_cycle(const char *msg, PyObject *op)
931 {
932 PySys_FormatStderr("gc: %s <%s %p>\n",
933 msg, Py_TYPE(op)->tp_name, op);
934 }
935
936 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
937 * only from such cycles).
938 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
939 * garbage list (a Python list), else only the objects in finalizers with
940 * __del__ methods are appended to garbage. All objects in finalizers are
941 * merged into the old list regardless.
942 */
943 static void
944 handle_legacy_finalizers(PyThreadState *tstate,
945 GCState *gcstate,
946 PyGC_Head *finalizers, PyGC_Head *old)
947 {
948 assert(!_PyErr_Occurred(tstate));
949 assert(gcstate->garbage != NULL);
950
951 PyGC_Head *gc = GC_NEXT(finalizers);
952 for (; gc != finalizers; gc = GC_NEXT(gc)) {
953 PyObject *op = FROM_GC(gc);
954
955 if ((gcstate->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
956 if (PyList_Append(gcstate->garbage, op) < 0) {
957 _PyErr_Clear(tstate);
958 break;
959 }
960 }
961 }
962
963 gc_list_merge(finalizers, old);
964 }
965
966 /* Run first-time finalizers (if any) on all the objects in collectable.
967 * Note that this may remove some (or even all) of the objects from the
968 * list, due to refcounts falling to 0.
969 */
970 static void
971 finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
972 {
973 destructor finalize;
974 PyGC_Head seen;
975
976 /* While we're going through the loop, `finalize(op)` may cause op, or
977 * other objects, to be reclaimed via refcounts falling to zero. So
978 * there's little we can rely on about the structure of the input
979 * `collectable` list across iterations. For safety, we always take the
980 * first object in that list and move it to a temporary `seen` list.
981 * If objects vanish from the `collectable` and `seen` lists we don't
982 * care.
983 */
984 gc_list_init(&seen);
985
986 while (!gc_list_is_empty(collectable)) {
987 PyGC_Head *gc = GC_NEXT(collectable);
988 PyObject *op = FROM_GC(gc);
989 gc_list_move(gc, &seen);
990 if (!_PyGCHead_FINALIZED(gc) &&
991 (finalize = Py_TYPE(op)->tp_finalize) != NULL) {
992 _PyGCHead_SET_FINALIZED(gc);
993 Py_INCREF(op);
994 finalize(op);
995 assert(!_PyErr_Occurred(tstate));
996 Py_DECREF(op);
997 }
998 }
999 gc_list_merge(&seen, collectable);
1000 }
1001
1002 /* Break reference cycles by clearing the containers involved. This is
1003 * tricky business as the lists can be changing and we don't know which
1004 * objects may be freed. It is possible I screwed something up here.
1005 */
1006 static void
1007 delete_garbage(PyThreadState *tstate, GCState *gcstate,
1008 PyGC_Head *collectable, PyGC_Head *old)
1009 {
1010 assert(!_PyErr_Occurred(tstate));
1011
1012 while (!gc_list_is_empty(collectable)) {
1013 PyGC_Head *gc = GC_NEXT(collectable);
1014 PyObject *op = FROM_GC(gc);
1015
1016 _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1017 "refcount is too small");
1018
1019 if (gcstate->debug & DEBUG_SAVEALL) {
1020 assert(gcstate->garbage != NULL);
1021 if (PyList_Append(gcstate->garbage, op) < 0) {
1022 _PyErr_Clear(tstate);
1023 }
1024 }
1025 else {
1026 inquiry clear;
1027 if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1028 Py_INCREF(op);
1029 (void) clear(op);
1030 if (_PyErr_Occurred(tstate)) {
1031 _PyErr_WriteUnraisableMsg("in tp_clear of",
1032 (PyObject*)Py_TYPE(op));
1033 }
1034 Py_DECREF(op);
1035 }
1036 }
1037 if (GC_NEXT(collectable) == gc) {
1038 /* object is still alive, move it, it may die later */
1039 gc_clear_collecting(gc);
1040 gc_list_move(gc, old);
1041 }
1042 }
1043 }
1044
1045 /* Clear all free lists
1046 * All free lists are cleared during the collection of the highest generation.
1047 * Allocated items in the free list may keep a pymalloc arena occupied.
1048 * Clearing the free lists may give back memory to the OS earlier.
1049 */
1050 static void
1051 clear_freelists(PyInterpreterState *interp)
1052 {
1053 _PyTuple_ClearFreeList(interp);
1054 _PyFloat_ClearFreeList(interp);
1055 _PyList_ClearFreeList(interp);
1056 _PyDict_ClearFreeList(interp);
1057 _PyAsyncGen_ClearFreeLists(interp);
1058 _PyContext_ClearFreeList(interp);
1059 }
1060
1061 // Show stats for objects in each generations
1062 static void
1063 show_stats_each_generations(GCState *gcstate)
1064 {
1065 char buf[100];
1066 size_t pos = 0;
1067
1068 for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1069 pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1070 " %zd",
1071 gc_list_size(GEN_HEAD(gcstate, i)));
1072 }
1073
1074 PySys_FormatStderr(
1075 "gc: objects in each generation:%s\n"
1076 "gc: objects in permanent generation: %zd\n",
1077 buf, gc_list_size(&gcstate->permanent_generation.head));
1078 }
1079
1080 /* Deduce which objects among "base" are unreachable from outside the list
1081 and move them to 'unreachable'. The process consist in the following steps:
1082
1083 1. Copy all reference counts to a different field (gc_prev is used to hold
1084 this copy to save memory).
1085 2. Traverse all objects in "base" and visit all referred objects using
1086 "tp_traverse" and for every visited object, subtract 1 to the reference
1087 count (the one that we copied in the previous step). After this step, all
1088 objects that can be reached directly from outside must have strictly positive
1089 reference count, while all unreachable objects must have a count of exactly 0.
1090 3. Identify all unreachable objects (the ones with 0 reference count) and move
1091 them to the "unreachable" list. This step also needs to move back to "base" all
1092 objects that were initially marked as unreachable but are referred transitively
1093 by the reachable objects (the ones with strictly positive reference count).
1094
1095 Contracts:
1096
1097 * The "base" has to be a valid list with no mask set.
1098
1099 * The "unreachable" list must be uninitialized (this function calls
1100 gc_list_init over 'unreachable').
1101
1102 IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1103 flag set but it does not clear it to skip unnecessary iteration. Before the
1104 flag is cleared (for example, by using 'clear_unreachable_mask' function or
1105 by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1106 list and we can not use most gc_list_* functions for it. */
1107 static inline void
1108 deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1109 validate_list(base, collecting_clear_unreachable_clear);
1110 /* Using ob_refcnt and gc_refs, calculate which objects in the
1111 * container set are reachable from outside the set (i.e., have a
1112 * refcount greater than 0 when all the references within the
1113 * set are taken into account).
1114 */
1115 update_refs(base); // gc_prev is used for gc_refs
1116 subtract_refs(base);
1117
1118 /* Leave everything reachable from outside base in base, and move
1119 * everything else (in base) to unreachable.
1120 *
1121 * NOTE: This used to move the reachable objects into a reachable
1122 * set instead. But most things usually turn out to be reachable,
1123 * so it's more efficient to move the unreachable things. It "sounds slick"
1124 * to move the unreachable objects, until you think about it - the reason it
1125 * pays isn't actually obvious.
1126 *
1127 * Suppose we create objects A, B, C in that order. They appear in the young
1128 * generation in the same order. If B points to A, and C to B, and C is
1129 * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1130 * respectively.
1131 *
1132 * When move_unreachable finds A, A is moved to the unreachable list. The
1133 * same for B when it's first encountered. Then C is traversed, B is moved
1134 * _back_ to the reachable list. B is eventually traversed, and then A is
1135 * moved back to the reachable list.
1136 *
1137 * So instead of not moving at all, the reachable objects B and A are moved
1138 * twice each. Why is this a win? A straightforward algorithm to move the
1139 * reachable objects instead would move A, B, and C once each.
1140 *
1141 * The key is that this dance leaves the objects in order C, B, A - it's
1142 * reversed from the original order. On all _subsequent_ scans, none of
1143 * them will move. Since most objects aren't in cycles, this can save an
1144 * unbounded number of moves across an unbounded number of later collections.
1145 * It can cost more only the first time the chain is scanned.
1146 *
1147 * Drawback: move_unreachable is also used to find out what's still trash
1148 * after finalizers may resurrect objects. In _that_ case most unreachable
1149 * objects will remain unreachable, so it would be more efficient to move
1150 * the reachable objects instead. But this is a one-time cost, probably not
1151 * worth complicating the code to speed just a little.
1152 */
1153 gc_list_init(unreachable);
1154 move_unreachable(base, unreachable); // gc_prev is pointer again
1155 validate_list(base, collecting_clear_unreachable_clear);
1156 validate_list(unreachable, collecting_set_unreachable_set);
1157 }
1158
1159 /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1160 them to 'old_generation' and placing the rest on 'still_unreachable'.
1161
1162 Contracts:
1163 * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1164 will contain the objects that did not resurrect.
1165
1166 * The "still_unreachable" list must be uninitialized (this function calls
1167 gc_list_init over 'still_unreachable').
1168
1169 IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1170 PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1171 we can skip the expense of clearing the flag to avoid extra iteration. */
1172 static inline void
1173 handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1174 PyGC_Head *old_generation)
1175 {
1176 // Remove the PREV_MASK_COLLECTING from unreachable
1177 // to prepare it for a new call to 'deduce_unreachable'
1178 gc_list_clear_collecting(unreachable);
1179
1180 // After the call to deduce_unreachable, the 'still_unreachable' set will
1181 // have the PREV_MARK_COLLECTING set, but the objects are going to be
1182 // removed so we can skip the expense of clearing the flag.
1183 PyGC_Head* resurrected = unreachable;
1184 deduce_unreachable(resurrected, still_unreachable);
1185 clear_unreachable_mask(still_unreachable);
1186
1187 // Move the resurrected objects to the old generation for future collection.
1188 gc_list_merge(resurrected, old_generation);
1189 }
1190
1191 /* This is the main function. Read this to understand how the
1192 * collection process works. */
1193 static Py_ssize_t
1194 gc_collect_main(PyThreadState *tstate, int generation,
1195 Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
1196 int nofail)
1197 {
1198 int i;
1199 Py_ssize_t m = 0; /* # objects collected */
1200 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
1201 PyGC_Head *young; /* the generation we are examining */
1202 PyGC_Head *old; /* next older generation */
1203 PyGC_Head unreachable; /* non-problematic unreachable trash */
1204 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
1205 PyGC_Head *gc;
1206 _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
1207 GCState *gcstate = &tstate->interp->gc;
1208
1209 // gc_collect_main() must not be called before _PyGC_Init
1210 // or after _PyGC_Fini()
1211 assert(gcstate->garbage != NULL);
1212 assert(!_PyErr_Occurred(tstate));
1213
1214 if (gcstate->debug & DEBUG_STATS) {
1215 PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1216 show_stats_each_generations(gcstate);
1217 t1 = _PyTime_GetPerfCounter();
1218 }
1219
1220 if (PyDTrace_GC_START_ENABLED())
1221 PyDTrace_GC_START(generation);
1222
1223 /* update collection and allocation counters */
1224 if (generation+1 < NUM_GENERATIONS)
1225 gcstate->generations[generation+1].count += 1;
1226 for (i = 0; i <= generation; i++)
1227 gcstate->generations[i].count = 0;
1228
1229 /* merge younger generations with one we are currently collecting */
1230 for (i = 0; i < generation; i++) {
1231 gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1232 }
1233
1234 /* handy references */
1235 young = GEN_HEAD(gcstate, generation);
1236 if (generation < NUM_GENERATIONS-1)
1237 old = GEN_HEAD(gcstate, generation+1);
1238 else
1239 old = young;
1240 validate_list(old, collecting_clear_unreachable_clear);
1241
1242 deduce_unreachable(young, &unreachable);
1243
1244 untrack_tuples(young);
1245 /* Move reachable objects to next generation. */
1246 if (young != old) {
1247 if (generation == NUM_GENERATIONS - 2) {
1248 gcstate->long_lived_pending += gc_list_size(young);
1249 }
1250 gc_list_merge(young, old);
1251 }
1252 else {
1253 /* We only un-track dicts in full collections, to avoid quadratic
1254 dict build-up. See issue #14775. */
1255 untrack_dicts(young);
1256 gcstate->long_lived_pending = 0;
1257 gcstate->long_lived_total = gc_list_size(young);
1258 }
1259
1260 /* All objects in unreachable are trash, but objects reachable from
1261 * legacy finalizers (e.g. tp_del) can't safely be deleted.
1262 */
1263 gc_list_init(&finalizers);
1264 // NEXT_MASK_UNREACHABLE is cleared here.
1265 // After move_legacy_finalizers(), unreachable is normal list.
1266 move_legacy_finalizers(&unreachable, &finalizers);
1267 /* finalizers contains the unreachable objects with a legacy finalizer;
1268 * unreachable objects reachable *from* those are also uncollectable,
1269 * and we move those into the finalizers list too.
1270 */
1271 move_legacy_finalizer_reachable(&finalizers);
1272
1273 validate_list(&finalizers, collecting_clear_unreachable_clear);
1274 validate_list(&unreachable, collecting_set_unreachable_clear);
1275
1276 /* Print debugging information. */
1277 if (gcstate->debug & DEBUG_COLLECTABLE) {
1278 for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1279 debug_cycle("collectable", FROM_GC(gc));
1280 }
1281 }
1282
1283 /* Clear weakrefs and invoke callbacks as necessary. */
1284 m += handle_weakrefs(&unreachable, old);
1285
1286 validate_list(old, collecting_clear_unreachable_clear);
1287 validate_list(&unreachable, collecting_set_unreachable_clear);
1288
1289 /* Call tp_finalize on objects which have one. */
1290 finalize_garbage(tstate, &unreachable);
1291
1292 /* Handle any objects that may have resurrected after the call
1293 * to 'finalize_garbage' and continue the collection with the
1294 * objects that are still unreachable */
1295 PyGC_Head final_unreachable;
1296 handle_resurrected_objects(&unreachable, &final_unreachable, old);
1297
1298 /* Call tp_clear on objects in the final_unreachable set. This will cause
1299 * the reference cycles to be broken. It may also cause some objects
1300 * in finalizers to be freed.
1301 */
1302 m += gc_list_size(&final_unreachable);
1303 delete_garbage(tstate, gcstate, &final_unreachable, old);
1304
1305 /* Collect statistics on uncollectable objects found and print
1306 * debugging information. */
1307 for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1308 n++;
1309 if (gcstate->debug & DEBUG_UNCOLLECTABLE)
1310 debug_cycle("uncollectable", FROM_GC(gc));
1311 }
1312 if (gcstate->debug & DEBUG_STATS) {
1313 double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
1314 PySys_WriteStderr(
1315 "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
1316 n+m, n, d);
1317 }
1318
1319 /* Append instances in the uncollectable set to a Python
1320 * reachable list of garbage. The programmer has to deal with
1321 * this if they insist on creating this type of structure.
1322 */
1323 handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1324 validate_list(old, collecting_clear_unreachable_clear);
1325
1326 /* Clear free list only during the collection of the highest
1327 * generation */
1328 if (generation == NUM_GENERATIONS-1) {
1329 clear_freelists(tstate->interp);
1330 }
1331
1332 if (_PyErr_Occurred(tstate)) {
1333 if (nofail) {
1334 _PyErr_Clear(tstate);
1335 }
1336 else {
1337 _PyErr_WriteUnraisableMsg("in garbage collection", NULL);
1338 }
1339 }
1340
1341 /* Update stats */
1342 if (n_collected) {
1343 *n_collected = m;
1344 }
1345 if (n_uncollectable) {
1346 *n_uncollectable = n;
1347 }
1348
1349 struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
1350 stats->collections++;
1351 stats->collected += m;
1352 stats->uncollectable += n;
1353
1354 if (PyDTrace_GC_DONE_ENABLED()) {
1355 PyDTrace_GC_DONE(n + m);
1356 }
1357
1358 assert(!_PyErr_Occurred(tstate));
1359 return n + m;
1360 }
1361
1362 /* Invoke progress callbacks to notify clients that garbage collection
1363 * is starting or stopping
1364 */
1365 static void
1366 invoke_gc_callback(PyThreadState *tstate, const char *phase,
1367 int generation, Py_ssize_t collected,
1368 Py_ssize_t uncollectable)
1369 {
1370 assert(!_PyErr_Occurred(tstate));
1371
1372 /* we may get called very early */
1373 GCState *gcstate = &tstate->interp->gc;
1374 if (gcstate->callbacks == NULL) {
1375 return;
1376 }
1377
1378 /* The local variable cannot be rebound, check it for sanity */
1379 assert(PyList_CheckExact(gcstate->callbacks));
1380 PyObject *info = NULL;
1381 if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1382 info = Py_BuildValue("{sisnsn}",
1383 "generation", generation,
1384 "collected", collected,
1385 "uncollectable", uncollectable);
1386 if (info == NULL) {
1387 PyErr_WriteUnraisable(NULL);
1388 return;
1389 }
1390 }
1391
1392 PyObject *phase_obj = PyUnicode_FromString(phase);
1393 if (phase_obj == NULL) {
1394 Py_XDECREF(info);
1395 PyErr_WriteUnraisable(NULL);
1396 return;
1397 }
1398
1399 PyObject *stack[] = {phase_obj, info};
1400 for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1401 PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1402 Py_INCREF(cb); /* make sure cb doesn't go away */
1403 r = PyObject_Vectorcall(cb, stack, 2, NULL);
1404 if (r == NULL) {
1405 PyErr_WriteUnraisable(cb);
1406 }
1407 else {
1408 Py_DECREF(r);
1409 }
1410 Py_DECREF(cb);
1411 }
1412 Py_DECREF(phase_obj);
1413 Py_XDECREF(info);
1414 assert(!_PyErr_Occurred(tstate));
1415 }
1416
1417 /* Perform garbage collection of a generation and invoke
1418 * progress callbacks.
1419 */
1420 static Py_ssize_t
1421 gc_collect_with_callback(PyThreadState *tstate, int generation)
1422 {
1423 assert(!_PyErr_Occurred(tstate));
1424 Py_ssize_t result, collected, uncollectable;
1425 invoke_gc_callback(tstate, "start", generation, 0, 0);
1426 result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0);
1427 invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
1428 assert(!_PyErr_Occurred(tstate));
1429 return result;
1430 }
1431
1432 static Py_ssize_t
1433 gc_collect_generations(PyThreadState *tstate)
1434 {
1435 GCState *gcstate = &tstate->interp->gc;
1436 /* Find the oldest generation (highest numbered) where the count
1437 * exceeds the threshold. Objects in the that generation and
1438 * generations younger than it will be collected. */
1439 Py_ssize_t n = 0;
1440 for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1441 if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1442 /* Avoid quadratic performance degradation in number
1443 of tracked objects (see also issue #4074):
1444
1445 To limit the cost of garbage collection, there are two strategies;
1446 - make each collection faster, e.g. by scanning fewer objects
1447 - do less collections
1448 This heuristic is about the latter strategy.
1449
1450 In addition to the various configurable thresholds, we only trigger a
1451 full collection if the ratio
1452
1453 long_lived_pending / long_lived_total
1454
1455 is above a given value (hardwired to 25%).
1456
1457 The reason is that, while "non-full" collections (i.e., collections of
1458 the young and middle generations) will always examine roughly the same
1459 number of objects -- determined by the aforementioned thresholds --,
1460 the cost of a full collection is proportional to the total number of
1461 long-lived objects, which is virtually unbounded.
1462
1463 Indeed, it has been remarked that doing a full collection every
1464 <constant number> of object creations entails a dramatic performance
1465 degradation in workloads which consist in creating and storing lots of
1466 long-lived objects (e.g. building a large list of GC-tracked objects would
1467 show quadratic performance, instead of linear as expected: see issue #4074).
1468
1469 Using the above ratio, instead, yields amortized linear performance in
1470 the total number of objects (the effect of which can be summarized
1471 thusly: "each full garbage collection is more and more costly as the
1472 number of objects grows, but we do fewer and fewer of them").
1473
1474 This heuristic was suggested by Martin von Löwis on python-dev in
1475 June 2008. His original analysis and proposal can be found at:
1476 http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1477 */
1478 if (i == NUM_GENERATIONS - 1
1479 && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1480 continue;
1481 n = gc_collect_with_callback(tstate, i);
1482 break;
1483 }
1484 }
1485 return n;
1486 }
1487
1488 #include "clinic/gcmodule.c.h"
1489
1490 /*[clinic input]
1491 gc.enable
1492
1493 Enable automatic garbage collection.
1494 [clinic start generated code]*/
1495
1496 static PyObject *
1497 gc_enable_impl(PyObject *module)
1498 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
1499 {
1500 PyGC_Enable();
1501 Py_RETURN_NONE;
1502 }
1503
1504 /*[clinic input]
1505 gc.disable
1506
1507 Disable automatic garbage collection.
1508 [clinic start generated code]*/
1509
1510 static PyObject *
1511 gc_disable_impl(PyObject *module)
1512 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
1513 {
1514 PyGC_Disable();
1515 Py_RETURN_NONE;
1516 }
1517
1518 /*[clinic input]
1519 gc.isenabled -> bool
1520
1521 Returns true if automatic garbage collection is enabled.
1522 [clinic start generated code]*/
1523
1524 static int
1525 gc_isenabled_impl(PyObject *module)
1526 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
1527 {
1528 return PyGC_IsEnabled();
1529 }
1530
1531 /*[clinic input]
1532 gc.collect -> Py_ssize_t
1533
1534 generation: int(c_default="NUM_GENERATIONS - 1") = 2
1535
1536 Run the garbage collector.
1537
1538 With no arguments, run a full collection. The optional argument
1539 may be an integer specifying which generation to collect. A ValueError
1540 is raised if the generation number is invalid.
1541
1542 The number of unreachable objects is returned.
1543 [clinic start generated code]*/
1544
1545 static Py_ssize_t
1546 gc_collect_impl(PyObject *module, int generation)
1547 /*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/
1548 {
1549 PyThreadState *tstate = _PyThreadState_GET();
1550
1551 if (generation < 0 || generation >= NUM_GENERATIONS) {
1552 _PyErr_SetString(tstate, PyExc_ValueError, "invalid generation");
1553 return -1;
1554 }
1555
1556 GCState *gcstate = &tstate->interp->gc;
1557 Py_ssize_t n;
1558 if (gcstate->collecting) {
1559 /* already collecting, don't do anything */
1560 n = 0;
1561 }
1562 else {
1563 gcstate->collecting = 1;
1564 n = gc_collect_with_callback(tstate, generation);
1565 gcstate->collecting = 0;
1566 }
1567 return n;
1568 }
1569
1570 /*[clinic input]
1571 gc.set_debug
1572
1573 flags: int
1574 An integer that can have the following bits turned on:
1575 DEBUG_STATS - Print statistics during collection.
1576 DEBUG_COLLECTABLE - Print collectable objects found.
1577 DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects
1578 found.
1579 DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.
1580 DEBUG_LEAK - Debug leaking programs (everything but STATS).
1581 /
1582
1583 Set the garbage collection debugging flags.
1584
1585 Debugging information is written to sys.stderr.
1586 [clinic start generated code]*/
1587
1588 static PyObject *
1589 gc_set_debug_impl(PyObject *module, int flags)
1590 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
1591 {
1592 GCState *gcstate = get_gc_state();
1593 gcstate->debug = flags;
1594 Py_RETURN_NONE;
1595 }
1596
1597 /*[clinic input]
1598 gc.get_debug -> int
1599
1600 Get the garbage collection debugging flags.
1601 [clinic start generated code]*/
1602
1603 static int
1604 gc_get_debug_impl(PyObject *module)
1605 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
1606 {
1607 GCState *gcstate = get_gc_state();
1608 return gcstate->debug;
1609 }
1610
1611 PyDoc_STRVAR(gc_set_thresh__doc__,
1612 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1613 "\n"
1614 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1615 "collection.\n");
1616
1617 static PyObject *
1618 gc_set_threshold(PyObject *self, PyObject *args)
1619 {
1620 GCState *gcstate = get_gc_state();
1621 if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1622 &gcstate->generations[0].threshold,
1623 &gcstate->generations[1].threshold,
1624 &gcstate->generations[2].threshold))
1625 return NULL;
1626 for (int i = 3; i < NUM_GENERATIONS; i++) {
1627 /* generations higher than 2 get the same threshold */
1628 gcstate->generations[i].threshold = gcstate->generations[2].threshold;
1629 }
1630 Py_RETURN_NONE;
1631 }
1632
1633 /*[clinic input]
1634 gc.get_threshold
1635
1636 Return the current collection thresholds.
1637 [clinic start generated code]*/
1638
1639 static PyObject *
1640 gc_get_threshold_impl(PyObject *module)
1641 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
1642 {
1643 GCState *gcstate = get_gc_state();
1644 return Py_BuildValue("(iii)",
1645 gcstate->generations[0].threshold,
1646 gcstate->generations[1].threshold,
1647 gcstate->generations[2].threshold);
1648 }
1649
1650 /*[clinic input]
1651 gc.get_count
1652
1653 Return a three-tuple of the current collection counts.
1654 [clinic start generated code]*/
1655
1656 static PyObject *
1657 gc_get_count_impl(PyObject *module)
1658 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
1659 {
1660 GCState *gcstate = get_gc_state();
1661 return Py_BuildValue("(iii)",
1662 gcstate->generations[0].count,
1663 gcstate->generations[1].count,
1664 gcstate->generations[2].count);
1665 }
1666
1667 static int
1668 referrersvisit(PyObject* obj, PyObject *objs)
1669 {
1670 Py_ssize_t i;
1671 for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1672 if (PyTuple_GET_ITEM(objs, i) == obj)
1673 return 1;
1674 return 0;
1675 }
1676
1677 static int
1678 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1679 {
1680 PyGC_Head *gc;
1681 PyObject *obj;
1682 traverseproc traverse;
1683 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1684 obj = FROM_GC(gc);
1685 traverse = Py_TYPE(obj)->tp_traverse;
1686 if (obj == objs || obj == resultlist)
1687 continue;
1688 if (traverse(obj, (visitproc)referrersvisit, objs)) {
1689 if (PyList_Append(resultlist, obj) < 0)
1690 return 0; /* error */
1691 }
1692 }
1693 return 1; /* no error */
1694 }
1695
1696 PyDoc_STRVAR(gc_get_referrers__doc__,
1697 "get_referrers(*objs) -> list\n\
1698 Return the list of objects that directly refer to any of objs.");
1699
1700 static PyObject *
1701 gc_get_referrers(PyObject *self, PyObject *args)
1702 {
1703 if (PySys_Audit("gc.get_referrers", "(O)", args) < 0) {
1704 return NULL;
1705 }
1706
1707 PyObject *result = PyList_New(0);
1708 if (!result) {
1709 return NULL;
1710 }
1711
1712 GCState *gcstate = get_gc_state();
1713 for (int i = 0; i < NUM_GENERATIONS; i++) {
1714 if (!(gc_referrers_for(args, GEN_HEAD(gcstate, i), result))) {
1715 Py_DECREF(result);
1716 return NULL;
1717 }
1718 }
1719 return result;
1720 }
1721
1722 /* Append obj to list; return true if error (out of memory), false if OK. */
1723 static int
1724 referentsvisit(PyObject *obj, PyObject *list)
1725 {
1726 return PyList_Append(list, obj) < 0;
1727 }
1728
1729 PyDoc_STRVAR(gc_get_referents__doc__,
1730 "get_referents(*objs) -> list\n\
1731 Return the list of objects that are directly referred to by objs.");
1732
1733 static PyObject *
1734 gc_get_referents(PyObject *self, PyObject *args)
1735 {
1736 Py_ssize_t i;
1737 if (PySys_Audit("gc.get_referents", "(O)", args) < 0) {
1738 return NULL;
1739 }
1740 PyObject *result = PyList_New(0);
1741
1742 if (result == NULL)
1743 return NULL;
1744
1745 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1746 traverseproc traverse;
1747 PyObject *obj = PyTuple_GET_ITEM(args, i);
1748
1749 if (!_PyObject_IS_GC(obj))
1750 continue;
1751 traverse = Py_TYPE(obj)->tp_traverse;
1752 if (! traverse)
1753 continue;
1754 if (traverse(obj, (visitproc)referentsvisit, result)) {
1755 Py_DECREF(result);
1756 return NULL;
1757 }
1758 }
1759 return result;
1760 }
1761
1762 /*[clinic input]
1763 gc.get_objects
1764 generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None
1765 Generation to extract the objects from.
1766
1767 Return a list of objects tracked by the collector (excluding the list returned).
1768
1769 If generation is not None, return only the objects tracked by the collector
1770 that are in that generation.
1771 [clinic start generated code]*/
1772
1773 static PyObject *
1774 gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
1775 /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/
1776 {
1777 PyThreadState *tstate = _PyThreadState_GET();
1778 int i;
1779 PyObject* result;
1780 GCState *gcstate = &tstate->interp->gc;
1781
1782 if (PySys_Audit("gc.get_objects", "n", generation) < 0) {
1783 return NULL;
1784 }
1785
1786 result = PyList_New(0);
1787 if (result == NULL) {
1788 return NULL;
1789 }
1790
1791 /* If generation is passed, we extract only that generation */
1792 if (generation != -1) {
1793 if (generation >= NUM_GENERATIONS) {
1794 _PyErr_Format(tstate, PyExc_ValueError,
1795 "generation parameter must be less than the number of "
1796 "available generations (%i)",
1797 NUM_GENERATIONS);
1798 goto error;
1799 }
1800
1801 if (generation < 0) {
1802 _PyErr_SetString(tstate, PyExc_ValueError,
1803 "generation parameter cannot be negative");
1804 goto error;
1805 }
1806
1807 if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1808 goto error;
1809 }
1810
1811 return result;
1812 }
1813
1814 /* If generation is not passed or None, get all objects from all generations */
1815 for (i = 0; i < NUM_GENERATIONS; i++) {
1816 if (append_objects(result, GEN_HEAD(gcstate, i))) {
1817 goto error;
1818 }
1819 }
1820 return result;
1821
1822 error:
1823 Py_DECREF(result);
1824 return NULL;
1825 }
1826
1827 /*[clinic input]
1828 gc.get_stats
1829
1830 Return a list of dictionaries containing per-generation statistics.
1831 [clinic start generated code]*/
1832
1833 static PyObject *
1834 gc_get_stats_impl(PyObject *module)
1835 /*[clinic end generated code: output=a8ab1d8a5d26f3ab input=1ef4ed9d17b1a470]*/
1836 {
1837 int i;
1838 struct gc_generation_stats stats[NUM_GENERATIONS], *st;
1839
1840 /* To get consistent values despite allocations while constructing
1841 the result list, we use a snapshot of the running stats. */
1842 GCState *gcstate = get_gc_state();
1843 for (i = 0; i < NUM_GENERATIONS; i++) {
1844 stats[i] = gcstate->generation_stats[i];
1845 }
1846
1847 PyObject *result = PyList_New(0);
1848 if (result == NULL)
1849 return NULL;
1850
1851 for (i = 0; i < NUM_GENERATIONS; i++) {
1852 PyObject *dict;
1853 st = &stats[i];
1854 dict = Py_BuildValue("{snsnsn}",
1855 "collections", st->collections,
1856 "collected", st->collected,
1857 "uncollectable", st->uncollectable
1858 );
1859 if (dict == NULL)
1860 goto error;
1861 if (PyList_Append(result, dict)) {
1862 Py_DECREF(dict);
1863 goto error;
1864 }
1865 Py_DECREF(dict);
1866 }
1867 return result;
1868
1869 error:
1870 Py_XDECREF(result);
1871 return NULL;
1872 }
1873
1874
1875 /*[clinic input]
1876 gc.is_tracked
1877
1878 obj: object
1879 /
1880
1881 Returns true if the object is tracked by the garbage collector.
1882
1883 Simple atomic objects will return false.
1884 [clinic start generated code]*/
1885
1886 static PyObject *
1887 gc_is_tracked(PyObject *module, PyObject *obj)
1888 /*[clinic end generated code: output=14f0103423b28e31 input=d83057f170ea2723]*/
1889 {
1890 PyObject *result;
1891
1892 if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj))
1893 result = Py_True;
1894 else
1895 result = Py_False;
1896 return Py_NewRef(result);
1897 }
1898
1899 /*[clinic input]
1900 gc.is_finalized
1901
1902 obj: object
1903 /
1904
1905 Returns true if the object has been already finalized by the GC.
1906 [clinic start generated code]*/
1907
1908 static PyObject *
1909 gc_is_finalized(PyObject *module, PyObject *obj)
1910 /*[clinic end generated code: output=e1516ac119a918ed input=201d0c58f69ae390]*/
1911 {
1912 if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
1913 Py_RETURN_TRUE;
1914 }
1915 Py_RETURN_FALSE;
1916 }
1917
1918 /*[clinic input]
1919 gc.freeze
1920
1921 Freeze all current tracked objects and ignore them for future collections.
1922
1923 This can be used before a POSIX fork() call to make the gc copy-on-write friendly.
1924 Note: collection before a POSIX fork() call may free pages for future allocation
1925 which can cause copy-on-write.
1926 [clinic start generated code]*/
1927
1928 static PyObject *
1929 gc_freeze_impl(PyObject *module)
1930 /*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/
1931 {
1932 GCState *gcstate = get_gc_state();
1933 for (int i = 0; i < NUM_GENERATIONS; ++i) {
1934 gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1935 gcstate->generations[i].count = 0;
1936 }
1937 Py_RETURN_NONE;
1938 }
1939
1940 /*[clinic input]
1941 gc.unfreeze
1942
1943 Unfreeze all objects in the permanent generation.
1944
1945 Put all objects in the permanent generation back into oldest generation.
1946 [clinic start generated code]*/
1947
1948 static PyObject *
1949 gc_unfreeze_impl(PyObject *module)
1950 /*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/
1951 {
1952 GCState *gcstate = get_gc_state();
1953 gc_list_merge(&gcstate->permanent_generation.head,
1954 GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1955 Py_RETURN_NONE;
1956 }
1957
1958 /*[clinic input]
1959 gc.get_freeze_count -> Py_ssize_t
1960
1961 Return the number of objects in the permanent generation.
1962 [clinic start generated code]*/
1963
1964 static Py_ssize_t
1965 gc_get_freeze_count_impl(PyObject *module)
1966 /*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/
1967 {
1968 GCState *gcstate = get_gc_state();
1969 return gc_list_size(&gcstate->permanent_generation.head);
1970 }
1971
1972
1973 PyDoc_STRVAR(gc__doc__,
1974 "This module provides access to the garbage collector for reference cycles.\n"
1975 "\n"
1976 "enable() -- Enable automatic garbage collection.\n"
1977 "disable() -- Disable automatic garbage collection.\n"
1978 "isenabled() -- Returns true if automatic collection is enabled.\n"
1979 "collect() -- Do a full collection right now.\n"
1980 "get_count() -- Return the current collection counts.\n"
1981 "get_stats() -- Return list of dictionaries containing per-generation stats.\n"
1982 "set_debug() -- Set debugging flags.\n"
1983 "get_debug() -- Get debugging flags.\n"
1984 "set_threshold() -- Set the collection thresholds.\n"
1985 "get_threshold() -- Return the current the collection thresholds.\n"
1986 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1987 "is_tracked() -- Returns true if a given object is tracked.\n"
1988 "is_finalized() -- Returns true if a given object has been already finalized.\n"
1989 "get_referrers() -- Return the list of objects that refer to an object.\n"
1990 "get_referents() -- Return the list of objects that an object refers to.\n"
1991 "freeze() -- Freeze all tracked objects and ignore them for future collections.\n"
1992 "unfreeze() -- Unfreeze all objects in the permanent generation.\n"
1993 "get_freeze_count() -- Return the number of objects in the permanent generation.\n");
1994
1995 static PyMethodDef GcMethods[] = {
1996 GC_ENABLE_METHODDEF
1997 GC_DISABLE_METHODDEF
1998 GC_ISENABLED_METHODDEF
1999 GC_SET_DEBUG_METHODDEF
2000 GC_GET_DEBUG_METHODDEF
2001 GC_GET_COUNT_METHODDEF
2002 {"set_threshold", gc_set_threshold, METH_VARARGS, gc_set_thresh__doc__},
2003 GC_GET_THRESHOLD_METHODDEF
2004 GC_COLLECT_METHODDEF
2005 GC_GET_OBJECTS_METHODDEF
2006 GC_GET_STATS_METHODDEF
2007 GC_IS_TRACKED_METHODDEF
2008 GC_IS_FINALIZED_METHODDEF
2009 {"get_referrers", gc_get_referrers, METH_VARARGS,
2010 gc_get_referrers__doc__},
2011 {"get_referents", gc_get_referents, METH_VARARGS,
2012 gc_get_referents__doc__},
2013 GC_FREEZE_METHODDEF
2014 GC_UNFREEZE_METHODDEF
2015 GC_GET_FREEZE_COUNT_METHODDEF
2016 {NULL, NULL} /* Sentinel */
2017 };
2018
2019 static int
2020 gcmodule_exec(PyObject *module)
2021 {
2022 GCState *gcstate = get_gc_state();
2023
2024 /* garbage and callbacks are initialized by _PyGC_Init() early in
2025 * interpreter lifecycle. */
2026 assert(gcstate->garbage != NULL);
2027 if (PyModule_AddObjectRef(module, "garbage", gcstate->garbage) < 0) {
2028 return -1;
2029 }
2030 assert(gcstate->callbacks != NULL);
2031 if (PyModule_AddObjectRef(module, "callbacks", gcstate->callbacks) < 0) {
2032 return -1;
2033 }
2034
2035 #define ADD_INT(NAME) if (PyModule_AddIntConstant(module, #NAME, NAME) < 0) { return -1; }
2036 ADD_INT(DEBUG_STATS);
2037 ADD_INT(DEBUG_COLLECTABLE);
2038 ADD_INT(DEBUG_UNCOLLECTABLE);
2039 ADD_INT(DEBUG_SAVEALL);
2040 ADD_INT(DEBUG_LEAK);
2041 #undef ADD_INT
2042 return 0;
2043 }
2044
2045 static PyModuleDef_Slot gcmodule_slots[] = {
2046 {Py_mod_exec, gcmodule_exec},
2047 {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
2048 {0, NULL}
2049 };
2050
2051 static struct PyModuleDef gcmodule = {
2052 PyModuleDef_HEAD_INIT,
2053 .m_name = "gc",
2054 .m_doc = gc__doc__,
2055 .m_size = 0, // per interpreter state, see: get_gc_state()
2056 .m_methods = GcMethods,
2057 .m_slots = gcmodule_slots
2058 };
2059
2060 PyMODINIT_FUNC
2061 PyInit_gc(void)
2062 {
2063 return PyModuleDef_Init(&gcmodule);
2064 }
2065
2066 /* C API for controlling the state of the garbage collector */
2067 int
2068 PyGC_Enable(void)
2069 {
2070 GCState *gcstate = get_gc_state();
2071 int old_state = gcstate->enabled;
2072 gcstate->enabled = 1;
2073 return old_state;
2074 }
2075
2076 int
2077 PyGC_Disable(void)
2078 {
2079 GCState *gcstate = get_gc_state();
2080 int old_state = gcstate->enabled;
2081 gcstate->enabled = 0;
2082 return old_state;
2083 }
2084
2085 int
2086 PyGC_IsEnabled(void)
2087 {
2088 GCState *gcstate = get_gc_state();
2089 return gcstate->enabled;
2090 }
2091
2092 /* Public API to invoke gc.collect() from C */
2093 Py_ssize_t
2094 PyGC_Collect(void)
2095 {
2096 PyThreadState *tstate = _PyThreadState_GET();
2097 GCState *gcstate = &tstate->interp->gc;
2098
2099 if (!gcstate->enabled) {
2100 return 0;
2101 }
2102
2103 Py_ssize_t n;
2104 if (gcstate->collecting) {
2105 /* already collecting, don't do anything */
2106 n = 0;
2107 }
2108 else {
2109 gcstate->collecting = 1;
2110 PyObject *exc = _PyErr_GetRaisedException(tstate);
2111 n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1);
2112 _PyErr_SetRaisedException(tstate, exc);
2113 gcstate->collecting = 0;
2114 }
2115
2116 return n;
2117 }
2118
2119 Py_ssize_t
2120 _PyGC_CollectNoFail(PyThreadState *tstate)
2121 {
2122 /* Ideally, this function is only called on interpreter shutdown,
2123 and therefore not recursively. Unfortunately, when there are daemon
2124 threads, a daemon thread can start a cyclic garbage collection
2125 during interpreter shutdown (and then never finish it).
2126 See http://bugs.python.org/issue8713#msg195178 for an example.
2127 */
2128 GCState *gcstate = &tstate->interp->gc;
2129 if (gcstate->collecting) {
2130 return 0;
2131 }
2132
2133 Py_ssize_t n;
2134 gcstate->collecting = 1;
2135 n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1);
2136 gcstate->collecting = 0;
2137 return n;
2138 }
2139
2140 void
2141 _PyGC_DumpShutdownStats(PyInterpreterState *interp)
2142 {
2143 GCState *gcstate = &interp->gc;
2144 if (!(gcstate->debug & DEBUG_SAVEALL)
2145 && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2146 const char *message;
2147 if (gcstate->debug & DEBUG_UNCOLLECTABLE)
2148 message = "gc: %zd uncollectable objects at " \
2149 "shutdown";
2150 else
2151 message = "gc: %zd uncollectable objects at " \
2152 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2153 /* PyErr_WarnFormat does too many things and we are at shutdown,
2154 the warnings module's dependencies (e.g. linecache) may be gone
2155 already. */
2156 if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2157 "gc", NULL, message,
2158 PyList_GET_SIZE(gcstate->garbage)))
2159 PyErr_WriteUnraisable(NULL);
2160 if (gcstate->debug & DEBUG_UNCOLLECTABLE) {
2161 PyObject *repr = NULL, *bytes = NULL;
2162 repr = PyObject_Repr(gcstate->garbage);
2163 if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
2164 PyErr_WriteUnraisable(gcstate->garbage);
2165 else {
2166 PySys_WriteStderr(
2167 " %s\n",
2168 PyBytes_AS_STRING(bytes)
2169 );
2170 }
2171 Py_XDECREF(repr);
2172 Py_XDECREF(bytes);
2173 }
2174 }
2175 }
2176
2177
2178 void
2179 _PyGC_Fini(PyInterpreterState *interp)
2180 {
2181 GCState *gcstate = &interp->gc;
2182 Py_CLEAR(gcstate->garbage);
2183 Py_CLEAR(gcstate->callbacks);
2184
2185 /* We expect that none of this interpreters objects are shared
2186 with other interpreters.
2187 See https://github.com/python/cpython/issues/90228. */
2188 }
2189
2190 /* for debugging */
2191 void
2192 _PyGC_Dump(PyGC_Head *g)
2193 {
2194 _PyObject_Dump(FROM_GC(g));
2195 }
2196
2197
2198 #ifdef Py_DEBUG
2199 static int
2200 visit_validate(PyObject *op, void *parent_raw)
2201 {
2202 PyObject *parent = _PyObject_CAST(parent_raw);
2203 if (_PyObject_IsFreed(op)) {
2204 _PyObject_ASSERT_FAILED_MSG(parent,
2205 "PyObject_GC_Track() object is not valid");
2206 }
2207 return 0;
2208 }
2209 #endif
2210
2211
2212 /* extension modules might be compiled with GC support so these
2213 functions must always be available */
2214
2215 void
2216 PyObject_GC_Track(void *op_raw)
2217 {
2218 PyObject *op = _PyObject_CAST(op_raw);
2219 if (_PyObject_GC_IS_TRACKED(op)) {
2220 _PyObject_ASSERT_FAILED_MSG(op,
2221 "object already tracked "
2222 "by the garbage collector");
2223 }
2224 _PyObject_GC_TRACK(op);
2225
2226 #ifdef Py_DEBUG
2227 /* Check that the object is valid: validate objects traversed
2228 by tp_traverse() */
2229 traverseproc traverse = Py_TYPE(op)->tp_traverse;
2230 (void)traverse(op, visit_validate, op);
2231 #endif
2232 }
2233
2234 void
2235 PyObject_GC_UnTrack(void *op_raw)
2236 {
2237 PyObject *op = _PyObject_CAST(op_raw);
2238 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
2239 * call PyObject_GC_UnTrack twice on an object.
2240 */
2241 if (_PyObject_GC_IS_TRACKED(op)) {
2242 _PyObject_GC_UNTRACK(op);
2243 }
2244 }
2245
2246 int
2247 PyObject_IS_GC(PyObject *obj)
2248 {
2249 return _PyObject_IS_GC(obj);
2250 }
2251
2252 void
2253 _Py_ScheduleGC(PyInterpreterState *interp)
2254 {
2255 GCState *gcstate = &interp->gc;
2256 if (gcstate->collecting == 1) {
2257 return;
2258 }
2259 struct _ceval_state *ceval = &interp->ceval;
2260 if (!_Py_atomic_load_relaxed(&ceval->gc_scheduled)) {
2261 _Py_atomic_store_relaxed(&ceval->gc_scheduled, 1);
2262 _Py_atomic_store_relaxed(&ceval->eval_breaker, 1);
2263 }
2264 }
2265
2266 void
2267 _PyObject_GC_Link(PyObject *op)
2268 {
2269 PyGC_Head *g = AS_GC(op);
2270 assert(((uintptr_t)g & (sizeof(uintptr_t)-1)) == 0); // g must be correctly aligned
2271
2272 PyThreadState *tstate = _PyThreadState_GET();
2273 GCState *gcstate = &tstate->interp->gc;
2274 g->_gc_next = 0;
2275 g->_gc_prev = 0;
2276 gcstate->generations[0].count++; /* number of allocated GC objects */
2277 if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
2278 gcstate->enabled &&
2279 gcstate->generations[0].threshold &&
2280 !gcstate->collecting &&
2281 !_PyErr_Occurred(tstate))
2282 {
2283 _Py_ScheduleGC(tstate->interp);
2284 }
2285 }
2286
2287 void
2288 _Py_RunGC(PyThreadState *tstate)
2289 {
2290 GCState *gcstate = &tstate->interp->gc;
2291 gcstate->collecting = 1;
2292 gc_collect_generations(tstate);
2293 gcstate->collecting = 0;
2294 }
2295
2296 static PyObject *
2297 gc_alloc(size_t basicsize, size_t presize)
2298 {
2299 PyThreadState *tstate = _PyThreadState_GET();
2300 if (basicsize > PY_SSIZE_T_MAX - presize) {
2301 return _PyErr_NoMemory(tstate);
2302 }
2303 size_t size = presize + basicsize;
2304 char *mem = PyObject_Malloc(size);
2305 if (mem == NULL) {
2306 return _PyErr_NoMemory(tstate);
2307 }
2308 ((PyObject **)mem)[0] = NULL;
2309 ((PyObject **)mem)[1] = NULL;
2310 PyObject *op = (PyObject *)(mem + presize);
2311 _PyObject_GC_Link(op);
2312 return op;
2313 }
2314
2315 PyObject *
2316 _PyObject_GC_New(PyTypeObject *tp)
2317 {
2318 size_t presize = _PyType_PreHeaderSize(tp);
2319 PyObject *op = gc_alloc(_PyObject_SIZE(tp), presize);
2320 if (op == NULL) {
2321 return NULL;
2322 }
2323 _PyObject_Init(op, tp);
2324 return op;
2325 }
2326
2327 PyVarObject *
2328 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2329 {
2330 PyVarObject *op;
2331
2332 if (nitems < 0) {
2333 PyErr_BadInternalCall();
2334 return NULL;
2335 }
2336 size_t presize = _PyType_PreHeaderSize(tp);
2337 size_t size = _PyObject_VAR_SIZE(tp, nitems);
2338 op = (PyVarObject *)gc_alloc(size, presize);
2339 if (op == NULL) {
2340 return NULL;
2341 }
2342 _PyObject_InitVar(op, tp, nitems);
2343 return op;
2344 }
2345
2346 PyObject *
2347 PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
2348 {
2349 size_t presize = _PyType_PreHeaderSize(tp);
2350 PyObject *op = gc_alloc(_PyObject_SIZE(tp) + extra_size, presize);
2351 if (op == NULL) {
2352 return NULL;
2353 }
2354 memset(op, 0, _PyObject_SIZE(tp) + extra_size);
2355 _PyObject_Init(op, tp);
2356 return op;
2357 }
2358
2359 PyVarObject *
2360 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2361 {
2362 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2363 const size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type);
2364 _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2365 if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
2366 return (PyVarObject *)PyErr_NoMemory();
2367 }
2368 char *mem = (char *)op - presize;
2369 mem = (char *)PyObject_Realloc(mem, presize + basicsize);
2370 if (mem == NULL) {
2371 return (PyVarObject *)PyErr_NoMemory();
2372 }
2373 op = (PyVarObject *) (mem + presize);
2374 Py_SET_SIZE(op, nitems);
2375 return op;
2376 }
2377
2378 void
2379 PyObject_GC_Del(void *op)
2380 {
2381 size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type);
2382 PyGC_Head *g = AS_GC(op);
2383 if (_PyObject_GC_IS_TRACKED(op)) {
2384 #ifdef Py_DEBUG
2385 if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2386 "gc", NULL, "Object of type %s is not untracked before destruction",
2387 ((PyObject*)op)->ob_type->tp_name)) {
2388 PyErr_WriteUnraisable(NULL);
2389 }
2390 #endif
2391 gc_list_remove(g);
2392 }
2393 GCState *gcstate = get_gc_state();
2394 if (gcstate->generations[0].count > 0) {
2395 gcstate->generations[0].count--;
2396 }
2397 PyObject_Free(((char *)op)-presize);
2398 }
2399
2400 int
2401 PyObject_GC_IsTracked(PyObject* obj)
2402 {
2403 if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2404 return 1;
2405 }
2406 return 0;
2407 }
2408
2409 int
2410 PyObject_GC_IsFinalized(PyObject *obj)
2411 {
2412 if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
2413 return 1;
2414 }
2415 return 0;
2416 }
2417
2418 void
2419 PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
2420 {
2421 size_t i;
2422 GCState *gcstate = get_gc_state();
2423 int origenstate = gcstate->enabled;
2424 gcstate->enabled = 0;
2425 for (i = 0; i < NUM_GENERATIONS; i++) {
2426 PyGC_Head *gc_list, *gc;
2427 gc_list = GEN_HEAD(gcstate, i);
2428 for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
2429 PyObject *op = FROM_GC(gc);
2430 Py_INCREF(op);
2431 int res = callback(op, arg);
2432 Py_DECREF(op);
2433 if (!res) {
2434 goto done;
2435 }
2436 }
2437 }
2438 done:
2439 gcstate->enabled = origenstate;
2440 }