1 /* Python's malloc wrappers (see pymem.h) */
2
3 #include "Python.h"
4 #include "pycore_code.h" // stats
5 #include "pycore_pystate.h" // _PyInterpreterState_GET
6
7 #include "pycore_obmalloc.h"
8 #include "pycore_pymem.h"
9
10 #include <stdlib.h> // malloc()
11 #include <stdbool.h>
12
13 #undef uint
14 #define uint pymem_uint
15
16
17 /* Defined in tracemalloc.c */
18 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
19
20 static void _PyObject_DebugDumpAddress(const void *p);
21 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
22
23
24 static void set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain);
25 static void set_up_debug_hooks_unlocked(void);
26 static void get_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
27 static void set_allocator_unlocked(PyMemAllocatorDomain, PyMemAllocatorEx *);
28
29
30 /***************************************/
31 /* low-level allocator implementations */
32 /***************************************/
33
34 /* the default raw allocator (wraps malloc) */
35
36 void *
37 _PyMem_RawMalloc(void *Py_UNUSED(ctx), size_t size)
38 {
39 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
40 for malloc(0), which would be treated as an error. Some platforms would
41 return a pointer with no memory behind it, which would break pymalloc.
42 To solve these problems, allocate an extra byte. */
43 if (size == 0)
44 size = 1;
45 return malloc(size);
46 }
47
48 void *
49 _PyMem_RawCalloc(void *Py_UNUSED(ctx), size_t nelem, size_t elsize)
50 {
51 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
52 for calloc(0, 0), which would be treated as an error. Some platforms
53 would return a pointer with no memory behind it, which would break
54 pymalloc. To solve these problems, allocate an extra byte. */
55 if (nelem == 0 || elsize == 0) {
56 nelem = 1;
57 elsize = 1;
58 }
59 return calloc(nelem, elsize);
60 }
61
62 void *
63 _PyMem_RawRealloc(void *Py_UNUSED(ctx), void *ptr, size_t size)
64 {
65 if (size == 0)
66 size = 1;
67 return realloc(ptr, size);
68 }
69
70 void
71 _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
72 {
73 free(ptr);
74 }
75
76 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
77 #define PYRAW_ALLOC MALLOC_ALLOC
78
79 /* the default object allocator */
80
81 // The actual implementation is further down.
82
83 #ifdef WITH_PYMALLOC
84 void* _PyObject_Malloc(void *ctx, size_t size);
85 void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
86 void _PyObject_Free(void *ctx, void *p);
87 void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
88 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
89 # define PYOBJ_ALLOC PYMALLOC_ALLOC
90 #else
91 # define PYOBJ_ALLOC MALLOC_ALLOC
92 #endif // WITH_PYMALLOC
93
94 #define PYMEM_ALLOC PYOBJ_ALLOC
95
96 /* the default debug allocators */
97
98 // The actual implementation is further down.
99
100 void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
101 void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
102 void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
103 void _PyMem_DebugRawFree(void *ctx, void *ptr);
104
105 void* _PyMem_DebugMalloc(void *ctx, size_t size);
106 void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
107 void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
108 void _PyMem_DebugFree(void *ctx, void *p);
109
110 #define PYDBGRAW_ALLOC \
111 {&_PyRuntime.allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
112 #define PYDBGMEM_ALLOC \
113 {&_PyRuntime.allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
114 #define PYDBGOBJ_ALLOC \
115 {&_PyRuntime.allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
116
117 /* the low-level virtual memory allocator */
118
119 #ifdef WITH_PYMALLOC
120 # ifdef MS_WINDOWS
121 # include <windows.h>
122 # elif defined(HAVE_MMAP)
123 # include <sys/mman.h>
124 # ifdef MAP_ANONYMOUS
125 # define ARENAS_USE_MMAP
126 # endif
127 # endif
128 #endif
129
130 void *
131 _PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size)
132 {
133 #ifdef MS_WINDOWS
134 return VirtualAlloc(NULL, size,
135 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
136 #elif defined(ARENAS_USE_MMAP)
137 void *ptr;
138 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
139 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
140 if (ptr == MAP_FAILED)
141 return NULL;
142 assert(ptr != NULL);
143 return ptr;
144 #else
145 return malloc(size);
146 #endif
147 }
148
149 void
150 _PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
151 #if defined(ARENAS_USE_MMAP)
152 size_t size
153 #else
154 size_t Py_UNUSED(size)
155 #endif
156 )
157 {
158 #ifdef MS_WINDOWS
159 VirtualFree(ptr, 0, MEM_RELEASE);
160 #elif defined(ARENAS_USE_MMAP)
161 munmap(ptr, size);
162 #else
163 free(ptr);
164 #endif
165 }
166
167 /*******************************************/
168 /* end low-level allocator implementations */
169 /*******************************************/
170
171
172 #if defined(__has_feature) /* Clang */
173 # if __has_feature(address_sanitizer) /* is ASAN enabled? */
174 # define _Py_NO_SANITIZE_ADDRESS \
175 __attribute__((no_sanitize("address")))
176 # endif
177 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */
178 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
179 # endif
180 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */
181 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
182 # endif
183 #elif defined(__GNUC__)
184 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */
185 # define _Py_NO_SANITIZE_ADDRESS \
186 __attribute__((no_sanitize_address))
187 # endif
188 // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
189 // is provided only since GCC 7.
190 # if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
191 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
192 # endif
193 #endif
194
195 #ifndef _Py_NO_SANITIZE_ADDRESS
196 # define _Py_NO_SANITIZE_ADDRESS
197 #endif
198 #ifndef _Py_NO_SANITIZE_THREAD
199 # define _Py_NO_SANITIZE_THREAD
200 #endif
201 #ifndef _Py_NO_SANITIZE_MEMORY
202 # define _Py_NO_SANITIZE_MEMORY
203 #endif
204
205
206 #define ALLOCATORS_MUTEX (_PyRuntime.allocators.mutex)
207 #define _PyMem_Raw (_PyRuntime.allocators.standard.raw)
208 #define _PyMem (_PyRuntime.allocators.standard.mem)
209 #define _PyObject (_PyRuntime.allocators.standard.obj)
210 #define _PyMem_Debug (_PyRuntime.allocators.debug)
211 #define _PyObject_Arena (_PyRuntime.allocators.obj_arena)
212
213
214 /***************************/
215 /* managing the allocators */
216 /***************************/
217
218 static int
219 set_default_allocator_unlocked(PyMemAllocatorDomain domain, int debug,
220 PyMemAllocatorEx *old_alloc)
221 {
222 if (old_alloc != NULL) {
223 get_allocator_unlocked(domain, old_alloc);
224 }
225
226
227 PyMemAllocatorEx new_alloc;
228 switch(domain)
229 {
230 case PYMEM_DOMAIN_RAW:
231 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
232 break;
233 case PYMEM_DOMAIN_MEM:
234 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
235 break;
236 case PYMEM_DOMAIN_OBJ:
237 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
238 break;
239 default:
240 /* unknown domain */
241 return -1;
242 }
243 set_allocator_unlocked(domain, &new_alloc);
244 if (debug) {
245 set_up_debug_hooks_domain_unlocked(domain);
246 }
247 return 0;
248 }
249
250
251 #ifdef Py_DEBUG
252 static const int pydebug = 1;
253 #else
254 static const int pydebug = 0;
255 #endif
256
257 int
258 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
259 PyMemAllocatorEx *old_alloc)
260 {
261 if (ALLOCATORS_MUTEX == NULL) {
262 /* The runtime must be initializing. */
263 return set_default_allocator_unlocked(domain, pydebug, old_alloc);
264 }
265 PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
266 int res = set_default_allocator_unlocked(domain, pydebug, old_alloc);
267 PyThread_release_lock(ALLOCATORS_MUTEX);
268 return res;
269 }
270
271
272 int
273 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
274 {
275 if (name == NULL || *name == '\0') {
276 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
277 nameions): use default memory allocators */
278 *allocator = PYMEM_ALLOCATOR_DEFAULT;
279 }
280 else if (strcmp(name, "default") == 0) {
281 *allocator = PYMEM_ALLOCATOR_DEFAULT;
282 }
283 else if (strcmp(name, "debug") == 0) {
284 *allocator = PYMEM_ALLOCATOR_DEBUG;
285 }
286 #ifdef WITH_PYMALLOC
287 else if (strcmp(name, "pymalloc") == 0) {
288 *allocator = PYMEM_ALLOCATOR_PYMALLOC;
289 }
290 else if (strcmp(name, "pymalloc_debug") == 0) {
291 *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
292 }
293 #endif
294 else if (strcmp(name, "malloc") == 0) {
295 *allocator = PYMEM_ALLOCATOR_MALLOC;
296 }
297 else if (strcmp(name, "malloc_debug") == 0) {
298 *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
299 }
300 else {
301 /* unknown allocator */
302 return -1;
303 }
304 return 0;
305 }
306
307
308 static int
309 set_up_allocators_unlocked(PyMemAllocatorName allocator)
310 {
311 switch (allocator) {
312 case PYMEM_ALLOCATOR_NOT_SET:
313 /* do nothing */
314 break;
315
316 case PYMEM_ALLOCATOR_DEFAULT:
317 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
318 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
319 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
320 break;
321
322 case PYMEM_ALLOCATOR_DEBUG:
323 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
324 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
325 (void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
326 break;
327
328 #ifdef WITH_PYMALLOC
329 case PYMEM_ALLOCATOR_PYMALLOC:
330 case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
331 {
332 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
333 set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
334
335 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
336 set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
337 set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
338
339 if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
340 set_up_debug_hooks_unlocked();
341 }
342 break;
343 }
344 #endif
345
346 case PYMEM_ALLOCATOR_MALLOC:
347 case PYMEM_ALLOCATOR_MALLOC_DEBUG:
348 {
349 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
350 set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
351 set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
352 set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
353
354 if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
355 set_up_debug_hooks_unlocked();
356 }
357 break;
358 }
359
360 default:
361 /* unknown allocator */
362 return -1;
363 }
364
365 return 0;
366 }
367
368 int
369 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
370 {
371 PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
372 int res = set_up_allocators_unlocked(allocator);
373 PyThread_release_lock(ALLOCATORS_MUTEX);
374 return res;
375 }
376
377
378 static int
379 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
380 {
381 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
382 }
383
384
385 static const char*
386 get_current_allocator_name_unlocked(void)
387 {
388 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
389 #ifdef WITH_PYMALLOC
390 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
391 #endif
392
393 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
394 pymemallocator_eq(&_PyMem, &malloc_alloc) &&
395 pymemallocator_eq(&_PyObject, &malloc_alloc))
396 {
397 return "malloc";
398 }
399 #ifdef WITH_PYMALLOC
400 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
401 pymemallocator_eq(&_PyMem, &pymalloc) &&
402 pymemallocator_eq(&_PyObject, &pymalloc))
403 {
404 return "pymalloc";
405 }
406 #endif
407
408 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
409 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
410 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
411
412 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
413 pymemallocator_eq(&_PyMem, &dbg_mem) &&
414 pymemallocator_eq(&_PyObject, &dbg_obj))
415 {
416 /* Debug hooks installed */
417 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
418 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
419 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
420 {
421 return "malloc_debug";
422 }
423 #ifdef WITH_PYMALLOC
424 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
425 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
426 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
427 {
428 return "pymalloc_debug";
429 }
430 #endif
431 }
432 return NULL;
433 }
434
435 const char*
436 _PyMem_GetCurrentAllocatorName(void)
437 {
438 PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
439 const char *name = get_current_allocator_name_unlocked();
440 PyThread_release_lock(ALLOCATORS_MUTEX);
441 return name;
442 }
443
444
445 #ifdef WITH_PYMALLOC
446 static int
447 _PyMem_DebugEnabled(void)
448 {
449 return (_PyObject.malloc == _PyMem_DebugMalloc);
450 }
451
452 static int
453 _PyMem_PymallocEnabled(void)
454 {
455 if (_PyMem_DebugEnabled()) {
456 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
457 }
458 else {
459 return (_PyObject.malloc == _PyObject_Malloc);
460 }
461 }
462 #endif
463
464
465 static void
466 set_up_debug_hooks_domain_unlocked(PyMemAllocatorDomain domain)
467 {
468 PyMemAllocatorEx alloc;
469
470 if (domain == PYMEM_DOMAIN_RAW) {
471 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
472 return;
473 }
474
475 get_allocator_unlocked(domain, &_PyMem_Debug.raw.alloc);
476 alloc.ctx = &_PyMem_Debug.raw;
477 alloc.malloc = _PyMem_DebugRawMalloc;
478 alloc.calloc = _PyMem_DebugRawCalloc;
479 alloc.realloc = _PyMem_DebugRawRealloc;
480 alloc.free = _PyMem_DebugRawFree;
481 set_allocator_unlocked(domain, &alloc);
482 }
483 else if (domain == PYMEM_DOMAIN_MEM) {
484 if (_PyMem.malloc == _PyMem_DebugMalloc) {
485 return;
486 }
487
488 get_allocator_unlocked(domain, &_PyMem_Debug.mem.alloc);
489 alloc.ctx = &_PyMem_Debug.mem;
490 alloc.malloc = _PyMem_DebugMalloc;
491 alloc.calloc = _PyMem_DebugCalloc;
492 alloc.realloc = _PyMem_DebugRealloc;
493 alloc.free = _PyMem_DebugFree;
494 set_allocator_unlocked(domain, &alloc);
495 }
496 else if (domain == PYMEM_DOMAIN_OBJ) {
497 if (_PyObject.malloc == _PyMem_DebugMalloc) {
498 return;
499 }
500
501 get_allocator_unlocked(domain, &_PyMem_Debug.obj.alloc);
502 alloc.ctx = &_PyMem_Debug.obj;
503 alloc.malloc = _PyMem_DebugMalloc;
504 alloc.calloc = _PyMem_DebugCalloc;
505 alloc.realloc = _PyMem_DebugRealloc;
506 alloc.free = _PyMem_DebugFree;
507 set_allocator_unlocked(domain, &alloc);
508 }
509 }
510
511
512 static void
513 set_up_debug_hooks_unlocked(void)
514 {
515 set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
516 set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
517 set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
518 }
519
520 void
521 PyMem_SetupDebugHooks(void)
522 {
523 if (ALLOCATORS_MUTEX == NULL) {
524 /* The runtime must not be completely initialized yet. */
525 set_up_debug_hooks_unlocked();
526 return;
527 }
528 PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
529 set_up_debug_hooks_unlocked();
530 PyThread_release_lock(ALLOCATORS_MUTEX);
531 }
532
533 static void
534 get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
535 {
536 switch(domain)
537 {
538 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
539 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
540 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
541 default:
542 /* unknown domain: set all attributes to NULL */
543 allocator->ctx = NULL;
544 allocator->malloc = NULL;
545 allocator->calloc = NULL;
546 allocator->realloc = NULL;
547 allocator->free = NULL;
548 }
549 }
550
551 static void
552 set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
553 {
554 switch(domain)
555 {
556 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
557 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
558 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
559 /* ignore unknown domain */
560 }
561 }
562
563 void
564 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
565 {
566 if (ALLOCATORS_MUTEX == NULL) {
567 /* The runtime must not be completely initialized yet. */
568 get_allocator_unlocked(domain, allocator);
569 return;
570 }
571 PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
572 get_allocator_unlocked(domain, allocator);
573 PyThread_release_lock(ALLOCATORS_MUTEX);
574 }
575
576 void
577 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
578 {
579 if (ALLOCATORS_MUTEX == NULL) {
580 /* The runtime must not be completely initialized yet. */
581 set_allocator_unlocked(domain, allocator);
582 return;
583 }
584 PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
585 set_allocator_unlocked(domain, allocator);
586 PyThread_release_lock(ALLOCATORS_MUTEX);
587 }
588
589 void
590 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
591 {
592 if (ALLOCATORS_MUTEX == NULL) {
593 /* The runtime must not be completely initialized yet. */
594 *allocator = _PyObject_Arena;
595 return;
596 }
597 PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
598 *allocator = _PyObject_Arena;
599 PyThread_release_lock(ALLOCATORS_MUTEX);
600 }
601
602 void
603 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
604 {
605 if (ALLOCATORS_MUTEX == NULL) {
606 /* The runtime must not be completely initialized yet. */
607 _PyObject_Arena = *allocator;
608 return;
609 }
610 PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
611 _PyObject_Arena = *allocator;
612 PyThread_release_lock(ALLOCATORS_MUTEX);
613 }
614
615
616 /* Note that there is a possible, but very unlikely, race in any place
617 * below where we call one of the allocator functions. We access two
618 * fields in each case: "malloc", etc. and "ctx".
619 *
620 * It is unlikely that the allocator will be changed while one of those
621 * calls is happening, much less in that very narrow window.
622 * Furthermore, the likelihood of a race is drastically reduced by the
623 * fact that the allocator may not be changed after runtime init
624 * (except with a wrapper).
625 *
626 * With the above in mind, we currently don't worry about locking
627 * around these uses of the runtime-global allocators state. */
628
629
630 /*************************/
631 /* the "arena" allocator */
632 /*************************/
633
634 void *
635 _PyObject_VirtualAlloc(size_t size)
636 {
637 return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
638 }
639
640 void
641 _PyObject_VirtualFree(void *obj, size_t size)
642 {
643 _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
644 }
645
646
647 /***********************/
648 /* the "raw" allocator */
649 /***********************/
650
651 void *
652 PyMem_RawMalloc(size_t size)
653 {
654 /*
655 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
656 * Most python internals blindly use a signed Py_ssize_t to track
657 * things without checking for overflows or negatives.
658 * As size_t is unsigned, checking for size < 0 is not required.
659 */
660 if (size > (size_t)PY_SSIZE_T_MAX)
661 return NULL;
662 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
663 }
664
665 void *
666 PyMem_RawCalloc(size_t nelem, size_t elsize)
667 {
668 /* see PyMem_RawMalloc() */
669 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
670 return NULL;
671 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
672 }
673
674 void*
675 PyMem_RawRealloc(void *ptr, size_t new_size)
676 {
677 /* see PyMem_RawMalloc() */
678 if (new_size > (size_t)PY_SSIZE_T_MAX)
679 return NULL;
680 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
681 }
682
683 void PyMem_RawFree(void *ptr)
684 {
685 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
686 }
687
688
689 /***********************/
690 /* the "mem" allocator */
691 /***********************/
692
693 void *
694 PyMem_Malloc(size_t size)
695 {
696 /* see PyMem_RawMalloc() */
697 if (size > (size_t)PY_SSIZE_T_MAX)
698 return NULL;
699 OBJECT_STAT_INC_COND(allocations512, size < 512);
700 OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
701 OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
702 OBJECT_STAT_INC(allocations);
703 return _PyMem.malloc(_PyMem.ctx, size);
704 }
705
706 void *
707 PyMem_Calloc(size_t nelem, size_t elsize)
708 {
709 /* see PyMem_RawMalloc() */
710 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
711 return NULL;
712 OBJECT_STAT_INC_COND(allocations512, elsize < 512);
713 OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
714 OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
715 OBJECT_STAT_INC(allocations);
716 return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
717 }
718
719 void *
720 PyMem_Realloc(void *ptr, size_t new_size)
721 {
722 /* see PyMem_RawMalloc() */
723 if (new_size > (size_t)PY_SSIZE_T_MAX)
724 return NULL;
725 return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
726 }
727
728 void
729 PyMem_Free(void *ptr)
730 {
731 OBJECT_STAT_INC(frees);
732 _PyMem.free(_PyMem.ctx, ptr);
733 }
734
735
736 /***************************/
737 /* pymem utility functions */
738 /***************************/
739
740 wchar_t*
741 _PyMem_RawWcsdup(const wchar_t *str)
742 {
743 assert(str != NULL);
744
745 size_t len = wcslen(str);
746 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
747 return NULL;
748 }
749
750 size_t size = (len + 1) * sizeof(wchar_t);
751 wchar_t *str2 = PyMem_RawMalloc(size);
752 if (str2 == NULL) {
753 return NULL;
754 }
755
756 memcpy(str2, str, size);
757 return str2;
758 }
759
760 char *
761 _PyMem_RawStrdup(const char *str)
762 {
763 assert(str != NULL);
764 size_t size = strlen(str) + 1;
765 char *copy = PyMem_RawMalloc(size);
766 if (copy == NULL) {
767 return NULL;
768 }
769 memcpy(copy, str, size);
770 return copy;
771 }
772
773 char *
774 _PyMem_Strdup(const char *str)
775 {
776 assert(str != NULL);
777 size_t size = strlen(str) + 1;
778 char *copy = PyMem_Malloc(size);
779 if (copy == NULL) {
780 return NULL;
781 }
782 memcpy(copy, str, size);
783 return copy;
784 }
785
786
787 /**************************/
788 /* the "object" allocator */
789 /**************************/
790
791 void *
792 PyObject_Malloc(size_t size)
793 {
794 /* see PyMem_RawMalloc() */
795 if (size > (size_t)PY_SSIZE_T_MAX)
796 return NULL;
797 OBJECT_STAT_INC_COND(allocations512, size < 512);
798 OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
799 OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
800 OBJECT_STAT_INC(allocations);
801 return _PyObject.malloc(_PyObject.ctx, size);
802 }
803
804 void *
805 PyObject_Calloc(size_t nelem, size_t elsize)
806 {
807 /* see PyMem_RawMalloc() */
808 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
809 return NULL;
810 OBJECT_STAT_INC_COND(allocations512, elsize < 512);
811 OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
812 OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
813 OBJECT_STAT_INC(allocations);
814 return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
815 }
816
817 void *
818 PyObject_Realloc(void *ptr, size_t new_size)
819 {
820 /* see PyMem_RawMalloc() */
821 if (new_size > (size_t)PY_SSIZE_T_MAX)
822 return NULL;
823 return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
824 }
825
826 void
827 PyObject_Free(void *ptr)
828 {
829 OBJECT_STAT_INC(frees);
830 _PyObject.free(_PyObject.ctx, ptr);
831 }
832
833
834 /* If we're using GCC, use __builtin_expect() to reduce overhead of
835 the valgrind checks */
836 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
837 # define UNLIKELY(value) __builtin_expect((value), 0)
838 # define LIKELY(value) __builtin_expect((value), 1)
839 #else
840 # define UNLIKELY(value) (value)
841 # define LIKELY(value) (value)
842 #endif
843
844 #ifdef WITH_PYMALLOC
845
846 #ifdef WITH_VALGRIND
847 #include <valgrind/valgrind.h>
848
849 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
850 static int running_on_valgrind = -1;
851 #endif
852
853 typedef struct _obmalloc_state OMState;
854
855 static inline int
856 has_own_state(PyInterpreterState *interp)
857 {
858 return (_Py_IsMainInterpreter(interp) ||
859 !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
860 _Py_IsMainInterpreterFinalizing(interp));
861 }
862
863 static inline OMState *
864 get_state(void)
865 {
866 PyInterpreterState *interp = _PyInterpreterState_GET();
867 if (!has_own_state(interp)) {
868 interp = _PyInterpreterState_Main();
869 }
870 return &interp->obmalloc;
871 }
872
873 // These macros all rely on a local "state" variable.
874 #define usedpools (state->pools.used)
875 #define allarenas (state->mgmt.arenas)
876 #define maxarenas (state->mgmt.maxarenas)
877 #define unused_arena_objects (state->mgmt.unused_arena_objects)
878 #define usable_arenas (state->mgmt.usable_arenas)
879 #define nfp2lasta (state->mgmt.nfp2lasta)
880 #define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
881 #define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
882 #define narenas_highwater (state->mgmt.narenas_highwater)
883 #define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
884
885 Py_ssize_t
886 _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
887 {
888 #ifdef Py_DEBUG
889 assert(has_own_state(interp));
890 #else
891 if (!has_own_state(interp)) {
892 _Py_FatalErrorFunc(__func__,
893 "the interpreter doesn't have its own allocator");
894 }
895 #endif
896 OMState *state = &interp->obmalloc;
897
898 Py_ssize_t n = raw_allocated_blocks;
899 /* add up allocated blocks for used pools */
900 for (uint i = 0; i < maxarenas; ++i) {
901 /* Skip arenas which are not allocated. */
902 if (allarenas[i].address == 0) {
903 continue;
904 }
905
906 uintptr_t base = (uintptr_t)_Py_ALIGN_UP(allarenas[i].address, POOL_SIZE);
907
908 /* visit every pool in the arena */
909 assert(base <= (uintptr_t) allarenas[i].pool_address);
910 for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
911 poolp p = (poolp)base;
912 n += p->ref.count;
913 }
914 }
915 return n;
916 }
917
918 void
919 _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
920 {
921 if (has_own_state(interp)) {
922 Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
923 assert(has_own_state(interp) || leaked == 0);
924 interp->runtime->obmalloc.interpreter_leaks += leaked;
925 }
926 }
927
928 static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
929
930 /* We preserve the number of blockss leaked during runtime finalization,
931 so they can be reported if the runtime is initialized again. */
932 // XXX We don't lose any information by dropping this,
933 // so we should consider doing so.
934 static Py_ssize_t last_final_leaks = 0;
935
936 void
937 _Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
938 {
939 last_final_leaks = get_num_global_allocated_blocks(runtime);
940 runtime->obmalloc.interpreter_leaks = 0;
941 }
942
943 static Py_ssize_t
944 get_num_global_allocated_blocks(_PyRuntimeState *runtime)
945 {
946 Py_ssize_t total = 0;
947 if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
948 PyInterpreterState *interp = _PyInterpreterState_Main();
949 if (interp == NULL) {
950 /* We are at the very end of runtime finalization.
951 We can't rely on finalizing->interp since that thread
952 state is probably already freed, so we don't worry
953 about it. */
954 assert(PyInterpreterState_Head() == NULL);
955 }
956 else {
957 assert(interp != NULL);
958 /* It is probably the last interpreter but not necessarily. */
959 assert(PyInterpreterState_Next(interp) == NULL);
960 total += _PyInterpreterState_GetAllocatedBlocks(interp);
961 }
962 }
963 else {
964 HEAD_LOCK(runtime);
965 PyInterpreterState *interp = PyInterpreterState_Head();
966 assert(interp != NULL);
967 #ifdef Py_DEBUG
968 int got_main = 0;
969 #endif
970 for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
971 #ifdef Py_DEBUG
972 if (_Py_IsMainInterpreter(interp)) {
973 assert(!got_main);
974 got_main = 1;
975 assert(has_own_state(interp));
976 }
977 #endif
978 if (has_own_state(interp)) {
979 total += _PyInterpreterState_GetAllocatedBlocks(interp);
980 }
981 }
982 HEAD_UNLOCK(runtime);
983 #ifdef Py_DEBUG
984 assert(got_main);
985 #endif
986 }
987 total += runtime->obmalloc.interpreter_leaks;
988 total += last_final_leaks;
989 return total;
990 }
991
992 Py_ssize_t
993 _Py_GetGlobalAllocatedBlocks(void)
994 {
995 return get_num_global_allocated_blocks(&_PyRuntime);
996 }
997
998 #if WITH_PYMALLOC_RADIX_TREE
999 /*==========================================================================*/
1000 /* radix tree for tracking arena usage. */
1001
1002 #define arena_map_root (state->usage.arena_map_root)
1003 #ifdef USE_INTERIOR_NODES
1004 #define arena_map_mid_count (state->usage.arena_map_mid_count)
1005 #define arena_map_bot_count (state->usage.arena_map_bot_count)
1006 #endif
1007
1008 /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1009 * it cannot be created */
1010 static inline Py_ALWAYS_INLINE arena_map_bot_t *
1011 arena_map_get(OMState *state, pymem_block *p, int create)
1012 {
1013 #ifdef USE_INTERIOR_NODES
1014 /* sanity check that IGNORE_BITS is correct */
1015 assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1016 int i1 = MAP_TOP_INDEX(p);
1017 if (arena_map_root.ptrs[i1] == NULL) {
1018 if (!create) {
1019 return NULL;
1020 }
1021 arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1022 if (n == NULL) {
1023 return NULL;
1024 }
1025 arena_map_root.ptrs[i1] = n;
1026 arena_map_mid_count++;
1027 }
1028 int i2 = MAP_MID_INDEX(p);
1029 if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1030 if (!create) {
1031 return NULL;
1032 }
1033 arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1034 if (n == NULL) {
1035 return NULL;
1036 }
1037 arena_map_root.ptrs[i1]->ptrs[i2] = n;
1038 arena_map_bot_count++;
1039 }
1040 return arena_map_root.ptrs[i1]->ptrs[i2];
1041 #else
1042 return &arena_map_root;
1043 #endif
1044 }
1045
1046
1047 /* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw
1048 * away 24 bits of the address. That reduces the space requirement of
1049 * the tree compared to similar radix tree page-map schemes. In
1050 * exchange for slashing the space requirement, it needs more
1051 * computation to check an address.
1052 *
1053 * Tracking coverage is done by "ideal" arena address. It is easier to
1054 * explain in decimal so let's say that the arena size is 100 bytes.
1055 * Then, ideal addresses are 100, 200, 300, etc. For checking if a
1056 * pointer address is inside an actual arena, we have to check two ideal
1057 * arena addresses. E.g. if pointer is 357, we need to check 200 and
1058 * 300. In the rare case that an arena is aligned in the ideal way
1059 * (e.g. base address of arena is 200) then we only have to check one
1060 * ideal address.
1061 *
1062 * The tree nodes for 200 and 300 both store the address of arena.
1063 * There are two cases: the arena starts at a lower ideal arena and
1064 * extends to this one, or the arena starts in this arena and extends to
1065 * the next ideal arena. The tail_lo and tail_hi members correspond to
1066 * these two cases.
1067 */
1068
1069
1070 /* mark or unmark addresses covered by arena */
1071 static int
1072 arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
1073 {
1074 /* sanity check that IGNORE_BITS is correct */
1075 assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1076 arena_map_bot_t *n_hi = arena_map_get(
1077 state, (pymem_block *)arena_base, is_used);
1078 if (n_hi == NULL) {
1079 assert(is_used); /* otherwise node should already exist */
1080 return 0; /* failed to allocate space for node */
1081 }
1082 int i3 = MAP_BOT_INDEX((pymem_block *)arena_base);
1083 int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1084 if (tail == 0) {
1085 /* is ideal arena address */
1086 n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1087 }
1088 else {
1089 /* arena_base address is not ideal (aligned to arena size) and
1090 * so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node
1091 * for the next arena. Note that it might be in different MAP_TOP
1092 * and MAP_MID nodes as well so we need to call arena_map_get()
1093 * again (do the full tree traversal).
1094 */
1095 n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1096 uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1097 /* If arena_base is a legit arena address, so is arena_base_next - 1
1098 * (last address in arena). If arena_base_next overflows then it
1099 * must overflow to 0. However, that would mean arena_base was
1100 * "ideal" and we should not be in this case. */
1101 assert(arena_base < arena_base_next);
1102 arena_map_bot_t *n_lo = arena_map_get(
1103 state, (pymem_block *)arena_base_next, is_used);
1104 if (n_lo == NULL) {
1105 assert(is_used); /* otherwise should already exist */
1106 n_hi->arenas[i3].tail_hi = 0;
1107 return 0; /* failed to allocate space for node */
1108 }
1109 int i3_next = MAP_BOT_INDEX(arena_base_next);
1110 n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1111 }
1112 return 1;
1113 }
1114
1115 /* Return true if 'p' is a pointer inside an obmalloc arena.
1116 * _PyObject_Free() calls this so it needs to be very fast. */
1117 static int
1118 arena_map_is_used(OMState *state, pymem_block *p)
1119 {
1120 arena_map_bot_t *n = arena_map_get(state, p, 0);
1121 if (n == NULL) {
1122 return 0;
1123 }
1124 int i3 = MAP_BOT_INDEX(p);
1125 /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1126 int32_t hi = n->arenas[i3].tail_hi;
1127 int32_t lo = n->arenas[i3].tail_lo;
1128 int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1129 return (tail < lo) || (tail >= hi && hi != 0);
1130 }
1131
1132 /* end of radix tree logic */
1133 /*==========================================================================*/
1134 #endif /* WITH_PYMALLOC_RADIX_TREE */
1135
1136
1137 /* Allocate a new arena. If we run out of memory, return NULL. Else
1138 * allocate a new arena, and return the address of an arena_object
1139 * describing the new arena. It's expected that the caller will set
1140 * `usable_arenas` to the return value.
1141 */
1142 static struct arena_object*
1143 new_arena(OMState *state)
1144 {
1145 struct arena_object* arenaobj;
1146 uint excess; /* number of bytes above pool alignment */
1147 void *address;
1148
1149 int debug_stats = _PyRuntime.obmalloc.dump_debug_stats;
1150 if (debug_stats == -1) {
1151 const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1152 debug_stats = (opt != NULL && *opt != '\0');
1153 _PyRuntime.obmalloc.dump_debug_stats = debug_stats;
1154 }
1155 if (debug_stats) {
1156 _PyObject_DebugMallocStats(stderr);
1157 }
1158
1159 if (unused_arena_objects == NULL) {
1160 uint i;
1161 uint numarenas;
1162 size_t nbytes;
1163
1164 /* Double the number of arena objects on each allocation.
1165 * Note that it's possible for `numarenas` to overflow.
1166 */
1167 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1168 if (numarenas <= maxarenas)
1169 return NULL; /* overflow */
1170 #if SIZEOF_SIZE_T <= SIZEOF_INT
1171 if (numarenas > SIZE_MAX / sizeof(*allarenas))
1172 return NULL; /* overflow */
1173 #endif
1174 nbytes = numarenas * sizeof(*allarenas);
1175 arenaobj = (struct arena_object *)PyMem_RawRealloc(allarenas, nbytes);
1176 if (arenaobj == NULL)
1177 return NULL;
1178 allarenas = arenaobj;
1179
1180 /* We might need to fix pointers that were copied. However,
1181 * new_arena only gets called when all the pages in the
1182 * previous arenas are full. Thus, there are *no* pointers
1183 * into the old array. Thus, we don't have to worry about
1184 * invalid pointers. Just to be sure, some asserts:
1185 */
1186 assert(usable_arenas == NULL);
1187 assert(unused_arena_objects == NULL);
1188
1189 /* Put the new arenas on the unused_arena_objects list. */
1190 for (i = maxarenas; i < numarenas; ++i) {
1191 allarenas[i].address = 0; /* mark as unassociated */
1192 allarenas[i].nextarena = i < numarenas - 1 ?
1193 &allarenas[i+1] : NULL;
1194 }
1195
1196 /* Update globals. */
1197 unused_arena_objects = &allarenas[maxarenas];
1198 maxarenas = numarenas;
1199 }
1200
1201 /* Take the next available arena object off the head of the list. */
1202 assert(unused_arena_objects != NULL);
1203 arenaobj = unused_arena_objects;
1204 unused_arena_objects = arenaobj->nextarena;
1205 assert(arenaobj->address == 0);
1206 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1207 #if WITH_PYMALLOC_RADIX_TREE
1208 if (address != NULL) {
1209 if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
1210 /* marking arena in radix tree failed, abort */
1211 _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1212 address = NULL;
1213 }
1214 }
1215 #endif
1216 if (address == NULL) {
1217 /* The allocation failed: return NULL after putting the
1218 * arenaobj back.
1219 */
1220 arenaobj->nextarena = unused_arena_objects;
1221 unused_arena_objects = arenaobj;
1222 return NULL;
1223 }
1224 arenaobj->address = (uintptr_t)address;
1225
1226 ++narenas_currently_allocated;
1227 ++ntimes_arena_allocated;
1228 if (narenas_currently_allocated > narenas_highwater)
1229 narenas_highwater = narenas_currently_allocated;
1230 arenaobj->freepools = NULL;
1231 /* pool_address <- first pool-aligned address in the arena
1232 nfreepools <- number of whole pools that fit after alignment */
1233 arenaobj->pool_address = (pymem_block*)arenaobj->address;
1234 arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1235 excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1236 if (excess != 0) {
1237 --arenaobj->nfreepools;
1238 arenaobj->pool_address += POOL_SIZE - excess;
1239 }
1240 arenaobj->ntotalpools = arenaobj->nfreepools;
1241
1242 return arenaobj;
1243 }
1244
1245
1246
1247 #if WITH_PYMALLOC_RADIX_TREE
1248 /* Return true if and only if P is an address that was allocated by
1249 pymalloc. When the radix tree is used, 'poolp' is unused.
1250 */
1251 static bool
1252 address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
1253 {
1254 return arena_map_is_used(state, p);
1255 }
1256 #else
1257 /*
1258 address_in_range(P, POOL)
1259
1260 Return true if and only if P is an address that was allocated by pymalloc.
1261 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1262 (the caller is asked to compute this because the macro expands POOL more than
1263 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1264 variable and pass the latter to the macro; because address_in_range is
1265 called on every alloc/realloc/free, micro-efficiency is important here).
1266
1267 Tricky: Let B be the arena base address associated with the pool, B =
1268 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1269
1270 B <= P < B + ARENA_SIZE
1271
1272 Subtracting B throughout, this is true iff
1273
1274 0 <= P-B < ARENA_SIZE
1275
1276 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1277
1278 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1279 before the first arena has been allocated. `arenas` is still NULL in that
1280 case. We're relying on that maxarenas is also 0 in that case, so that
1281 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1282 into a NULL arenas.
1283
1284 Details: given P and POOL, the arena_object corresponding to P is AO =
1285 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1286 stores, etc), POOL is the correct address of P's pool, AO.address is the
1287 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1288 AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1289 (NULL)). Therefore address_in_range correctly reports that obmalloc
1290 controls P.
1291
1292 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1293 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1294 in this case -- it may even be uninitialized trash. If the trash arenaindex
1295 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1296 control P.
1297
1298 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1299 allocated arena, obmalloc controls all the memory in slice AO.address :
1300 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1301 so P doesn't lie in that slice, so the macro correctly reports that P is not
1302 controlled by obmalloc.
1303
1304 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1305 arena_object (one not currently associated with an allocated arena),
1306 AO.address is 0, and the second test in the macro reduces to:
1307
1308 P < ARENA_SIZE
1309
1310 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1311 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1312 of the test still passes, and the third clause (AO.address != 0) is necessary
1313 to get the correct result: AO.address is 0 in this case, so the macro
1314 correctly reports that P is not controlled by obmalloc (despite that P lies in
1315 slice AO.address : AO.address + ARENA_SIZE).
1316
1317 Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1318 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1319 corresponded to a currently-allocated arena, so the "P is not controlled by
1320 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1321 was impossible.
1322
1323 Note that the logic is excruciating, and reading up possibly uninitialized
1324 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1325 creates problems for some memory debuggers. The overwhelming advantage is
1326 that this test determines whether an arbitrary address is controlled by
1327 obmalloc in a small constant time, independent of the number of arenas
1328 obmalloc controls. Since this test is needed at every entry point, it's
1329 extremely desirable that it be this fast.
1330 */
1331
1332 static bool _Py_NO_SANITIZE_ADDRESS
1333 _Py_NO_SANITIZE_THREAD
1334 _Py_NO_SANITIZE_MEMORY
1335 address_in_range(OMState *state, void *p, poolp pool)
1336 {
1337 // Since address_in_range may be reading from memory which was not allocated
1338 // by Python, it is important that pool->arenaindex is read only once, as
1339 // another thread may be concurrently modifying the value without holding
1340 // the GIL. The following dance forces the compiler to read pool->arenaindex
1341 // only once.
1342 uint arenaindex = *((volatile uint *)&pool->arenaindex);
1343 return arenaindex < maxarenas &&
1344 (uintptr_t)p - allarenas[arenaindex].address < ARENA_SIZE &&
1345 allarenas[arenaindex].address != 0;
1346 }
1347
1348 #endif /* !WITH_PYMALLOC_RADIX_TREE */
1349
1350 /*==========================================================================*/
1351
1352 // Called when freelist is exhausted. Extend the freelist if there is
1353 // space for a block. Otherwise, remove this pool from usedpools.
1354 static void
1355 pymalloc_pool_extend(poolp pool, uint size)
1356 {
1357 if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1358 /* There is room for another block. */
1359 pool->freeblock = (pymem_block*)pool + pool->nextoffset;
1360 pool->nextoffset += INDEX2SIZE(size);
1361 *(pymem_block **)(pool->freeblock) = NULL;
1362 return;
1363 }
1364
1365 /* Pool is full, unlink from used pools. */
1366 poolp next;
1367 next = pool->nextpool;
1368 pool = pool->prevpool;
1369 next->prevpool = pool;
1370 pool->nextpool = next;
1371 }
1372
1373 /* called when pymalloc_alloc can not allocate a block from usedpool.
1374 * This function takes new pool and allocate a block from it.
1375 */
1376 static void*
1377 allocate_from_new_pool(OMState *state, uint size)
1378 {
1379 /* There isn't a pool of the right size class immediately
1380 * available: use a free pool.
1381 */
1382 if (UNLIKELY(usable_arenas == NULL)) {
1383 /* No arena has a free pool: allocate a new arena. */
1384 #ifdef WITH_MEMORY_LIMITS
1385 if (narenas_currently_allocated >= MAX_ARENAS) {
1386 return NULL;
1387 }
1388 #endif
1389 usable_arenas = new_arena(state);
1390 if (usable_arenas == NULL) {
1391 return NULL;
1392 }
1393 usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1394 assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1395 nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1396 }
1397 assert(usable_arenas->address != 0);
1398
1399 /* This arena already had the smallest nfreepools value, so decreasing
1400 * nfreepools doesn't change that, and we don't need to rearrange the
1401 * usable_arenas list. However, if the arena becomes wholly allocated,
1402 * we need to remove its arena_object from usable_arenas.
1403 */
1404 assert(usable_arenas->nfreepools > 0);
1405 if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1406 /* It's the last of this size, so there won't be any. */
1407 nfp2lasta[usable_arenas->nfreepools] = NULL;
1408 }
1409 /* If any free pools will remain, it will be the new smallest. */
1410 if (usable_arenas->nfreepools > 1) {
1411 assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1412 nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1413 }
1414
1415 /* Try to get a cached free pool. */
1416 poolp pool = usable_arenas->freepools;
1417 if (LIKELY(pool != NULL)) {
1418 /* Unlink from cached pools. */
1419 usable_arenas->freepools = pool->nextpool;
1420 usable_arenas->nfreepools--;
1421 if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1422 /* Wholly allocated: remove. */
1423 assert(usable_arenas->freepools == NULL);
1424 assert(usable_arenas->nextarena == NULL ||
1425 usable_arenas->nextarena->prevarena ==
1426 usable_arenas);
1427 usable_arenas = usable_arenas->nextarena;
1428 if (usable_arenas != NULL) {
1429 usable_arenas->prevarena = NULL;
1430 assert(usable_arenas->address != 0);
1431 }
1432 }
1433 else {
1434 /* nfreepools > 0: it must be that freepools
1435 * isn't NULL, or that we haven't yet carved
1436 * off all the arena's pools for the first
1437 * time.
1438 */
1439 assert(usable_arenas->freepools != NULL ||
1440 usable_arenas->pool_address <=
1441 (pymem_block*)usable_arenas->address +
1442 ARENA_SIZE - POOL_SIZE);
1443 }
1444 }
1445 else {
1446 /* Carve off a new pool. */
1447 assert(usable_arenas->nfreepools > 0);
1448 assert(usable_arenas->freepools == NULL);
1449 pool = (poolp)usable_arenas->pool_address;
1450 assert((pymem_block*)pool <= (pymem_block*)usable_arenas->address +
1451 ARENA_SIZE - POOL_SIZE);
1452 pool->arenaindex = (uint)(usable_arenas - allarenas);
1453 assert(&allarenas[pool->arenaindex] == usable_arenas);
1454 pool->szidx = DUMMY_SIZE_IDX;
1455 usable_arenas->pool_address += POOL_SIZE;
1456 --usable_arenas->nfreepools;
1457
1458 if (usable_arenas->nfreepools == 0) {
1459 assert(usable_arenas->nextarena == NULL ||
1460 usable_arenas->nextarena->prevarena ==
1461 usable_arenas);
1462 /* Unlink the arena: it is completely allocated. */
1463 usable_arenas = usable_arenas->nextarena;
1464 if (usable_arenas != NULL) {
1465 usable_arenas->prevarena = NULL;
1466 assert(usable_arenas->address != 0);
1467 }
1468 }
1469 }
1470
1471 /* Frontlink to used pools. */
1472 pymem_block *bp;
1473 poolp next = usedpools[size + size]; /* == prev */
1474 pool->nextpool = next;
1475 pool->prevpool = next;
1476 next->nextpool = pool;
1477 next->prevpool = pool;
1478 pool->ref.count = 1;
1479 if (pool->szidx == size) {
1480 /* Luckily, this pool last contained blocks
1481 * of the same size class, so its header
1482 * and free list are already initialized.
1483 */
1484 bp = pool->freeblock;
1485 assert(bp != NULL);
1486 pool->freeblock = *(pymem_block **)bp;
1487 return bp;
1488 }
1489 /*
1490 * Initialize the pool header, set up the free list to
1491 * contain just the second block, and return the first
1492 * block.
1493 */
1494 pool->szidx = size;
1495 size = INDEX2SIZE(size);
1496 bp = (pymem_block *)pool + POOL_OVERHEAD;
1497 pool->nextoffset = POOL_OVERHEAD + (size << 1);
1498 pool->maxnextoffset = POOL_SIZE - size;
1499 pool->freeblock = bp + size;
1500 *(pymem_block **)(pool->freeblock) = NULL;
1501 return bp;
1502 }
1503
1504 /* pymalloc allocator
1505
1506 Return a pointer to newly allocated memory if pymalloc allocated memory.
1507
1508 Return NULL if pymalloc failed to allocate the memory block: on bigger
1509 requests, on error in the code below (as a last chance to serve the request)
1510 or when the max memory limit has been reached.
1511 */
1512 static inline void*
1513 pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
1514 {
1515 #ifdef WITH_VALGRIND
1516 if (UNLIKELY(running_on_valgrind == -1)) {
1517 running_on_valgrind = RUNNING_ON_VALGRIND;
1518 }
1519 if (UNLIKELY(running_on_valgrind)) {
1520 return NULL;
1521 }
1522 #endif
1523
1524 if (UNLIKELY(nbytes == 0)) {
1525 return NULL;
1526 }
1527 if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1528 return NULL;
1529 }
1530
1531 uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1532 poolp pool = usedpools[size + size];
1533 pymem_block *bp;
1534
1535 if (LIKELY(pool != pool->nextpool)) {
1536 /*
1537 * There is a used pool for this size class.
1538 * Pick up the head block of its free list.
1539 */
1540 ++pool->ref.count;
1541 bp = pool->freeblock;
1542 assert(bp != NULL);
1543
1544 if (UNLIKELY((pool->freeblock = *(pymem_block **)bp) == NULL)) {
1545 // Reached the end of the free list, try to extend it.
1546 pymalloc_pool_extend(pool, size);
1547 }
1548 }
1549 else {
1550 /* There isn't a pool of the right size class immediately
1551 * available: use a free pool.
1552 */
1553 bp = allocate_from_new_pool(state, size);
1554 }
1555
1556 return (void *)bp;
1557 }
1558
1559
1560 void *
1561 _PyObject_Malloc(void *ctx, size_t nbytes)
1562 {
1563 OMState *state = get_state();
1564 void* ptr = pymalloc_alloc(state, ctx, nbytes);
1565 if (LIKELY(ptr != NULL)) {
1566 return ptr;
1567 }
1568
1569 ptr = PyMem_RawMalloc(nbytes);
1570 if (ptr != NULL) {
1571 raw_allocated_blocks++;
1572 }
1573 return ptr;
1574 }
1575
1576
1577 void *
1578 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
1579 {
1580 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
1581 size_t nbytes = nelem * elsize;
1582
1583 OMState *state = get_state();
1584 void* ptr = pymalloc_alloc(state, ctx, nbytes);
1585 if (LIKELY(ptr != NULL)) {
1586 memset(ptr, 0, nbytes);
1587 return ptr;
1588 }
1589
1590 ptr = PyMem_RawCalloc(nelem, elsize);
1591 if (ptr != NULL) {
1592 raw_allocated_blocks++;
1593 }
1594 return ptr;
1595 }
1596
1597
1598 static void
1599 insert_to_usedpool(OMState *state, poolp pool)
1600 {
1601 assert(pool->ref.count > 0); /* else the pool is empty */
1602
1603 uint size = pool->szidx;
1604 poolp next = usedpools[size + size];
1605 poolp prev = next->prevpool;
1606
1607 /* insert pool before next: prev <-> pool <-> next */
1608 pool->nextpool = next;
1609 pool->prevpool = prev;
1610 next->prevpool = pool;
1611 prev->nextpool = pool;
1612 }
1613
1614 static void
1615 insert_to_freepool(OMState *state, poolp pool)
1616 {
1617 poolp next = pool->nextpool;
1618 poolp prev = pool->prevpool;
1619 next->prevpool = prev;
1620 prev->nextpool = next;
1621
1622 /* Link the pool to freepools. This is a singly-linked
1623 * list, and pool->prevpool isn't used there.
1624 */
1625 struct arena_object *ao = &allarenas[pool->arenaindex];
1626 pool->nextpool = ao->freepools;
1627 ao->freepools = pool;
1628 uint nf = ao->nfreepools;
1629 /* If this is the rightmost arena with this number of free pools,
1630 * nfp2lasta[nf] needs to change. Caution: if nf is 0, there
1631 * are no arenas in usable_arenas with that value.
1632 */
1633 struct arena_object* lastnf = nfp2lasta[nf];
1634 assert((nf == 0 && lastnf == NULL) ||
1635 (nf > 0 &&
1636 lastnf != NULL &&
1637 lastnf->nfreepools == nf &&
1638 (lastnf->nextarena == NULL ||
1639 nf < lastnf->nextarena->nfreepools)));
1640 if (lastnf == ao) { /* it is the rightmost */
1641 struct arena_object* p = ao->prevarena;
1642 nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
1643 }
1644 ao->nfreepools = ++nf;
1645
1646 /* All the rest is arena management. We just freed
1647 * a pool, and there are 4 cases for arena mgmt:
1648 * 1. If all the pools are free, return the arena to
1649 * the system free(). Except if this is the last
1650 * arena in the list, keep it to avoid thrashing:
1651 * keeping one wholly free arena in the list avoids
1652 * pathological cases where a simple loop would
1653 * otherwise provoke needing to allocate and free an
1654 * arena on every iteration. See bpo-37257.
1655 * 2. If this is the only free pool in the arena,
1656 * add the arena back to the `usable_arenas` list.
1657 * 3. If the "next" arena has a smaller count of free
1658 * pools, we have to "slide this arena right" to
1659 * restore that usable_arenas is sorted in order of
1660 * nfreepools.
1661 * 4. Else there's nothing more to do.
1662 */
1663 if (nf == ao->ntotalpools && ao->nextarena != NULL) {
1664 /* Case 1. First unlink ao from usable_arenas.
1665 */
1666 assert(ao->prevarena == NULL ||
1667 ao->prevarena->address != 0);
1668 assert(ao ->nextarena == NULL ||
1669 ao->nextarena->address != 0);
1670
1671 /* Fix the pointer in the prevarena, or the
1672 * usable_arenas pointer.
1673 */
1674 if (ao->prevarena == NULL) {
1675 usable_arenas = ao->nextarena;
1676 assert(usable_arenas == NULL ||
1677 usable_arenas->address != 0);
1678 }
1679 else {
1680 assert(ao->prevarena->nextarena == ao);
1681 ao->prevarena->nextarena =
1682 ao->nextarena;
1683 }
1684 /* Fix the pointer in the nextarena. */
1685 if (ao->nextarena != NULL) {
1686 assert(ao->nextarena->prevarena == ao);
1687 ao->nextarena->prevarena =
1688 ao->prevarena;
1689 }
1690 /* Record that this arena_object slot is
1691 * available to be reused.
1692 */
1693 ao->nextarena = unused_arena_objects;
1694 unused_arena_objects = ao;
1695
1696 #if WITH_PYMALLOC_RADIX_TREE
1697 /* mark arena region as not under control of obmalloc */
1698 arena_map_mark_used(state, ao->address, 0);
1699 #endif
1700
1701 /* Free the entire arena. */
1702 _PyObject_Arena.free(_PyObject_Arena.ctx,
1703 (void *)ao->address, ARENA_SIZE);
1704 ao->address = 0; /* mark unassociated */
1705 --narenas_currently_allocated;
1706
1707 return;
1708 }
1709
1710 if (nf == 1) {
1711 /* Case 2. Put ao at the head of
1712 * usable_arenas. Note that because
1713 * ao->nfreepools was 0 before, ao isn't
1714 * currently on the usable_arenas list.
1715 */
1716 ao->nextarena = usable_arenas;
1717 ao->prevarena = NULL;
1718 if (usable_arenas)
1719 usable_arenas->prevarena = ao;
1720 usable_arenas = ao;
1721 assert(usable_arenas->address != 0);
1722 if (nfp2lasta[1] == NULL) {
1723 nfp2lasta[1] = ao;
1724 }
1725
1726 return;
1727 }
1728
1729 /* If this arena is now out of order, we need to keep
1730 * the list sorted. The list is kept sorted so that
1731 * the "most full" arenas are used first, which allows
1732 * the nearly empty arenas to be completely freed. In
1733 * a few un-scientific tests, it seems like this
1734 * approach allowed a lot more memory to be freed.
1735 */
1736 /* If this is the only arena with nf, record that. */
1737 if (nfp2lasta[nf] == NULL) {
1738 nfp2lasta[nf] = ao;
1739 } /* else the rightmost with nf doesn't change */
1740 /* If this was the rightmost of the old size, it remains in place. */
1741 if (ao == lastnf) {
1742 /* Case 4. Nothing to do. */
1743 return;
1744 }
1745 /* If ao were the only arena in the list, the last block would have
1746 * gotten us out.
1747 */
1748 assert(ao->nextarena != NULL);
1749
1750 /* Case 3: We have to move the arena towards the end of the list,
1751 * because it has more free pools than the arena to its right. It needs
1752 * to move to follow lastnf.
1753 * First unlink ao from usable_arenas.
1754 */
1755 if (ao->prevarena != NULL) {
1756 /* ao isn't at the head of the list */
1757 assert(ao->prevarena->nextarena == ao);
1758 ao->prevarena->nextarena = ao->nextarena;
1759 }
1760 else {
1761 /* ao is at the head of the list */
1762 assert(usable_arenas == ao);
1763 usable_arenas = ao->nextarena;
1764 }
1765 ao->nextarena->prevarena = ao->prevarena;
1766 /* And insert after lastnf. */
1767 ao->prevarena = lastnf;
1768 ao->nextarena = lastnf->nextarena;
1769 if (ao->nextarena != NULL) {
1770 ao->nextarena->prevarena = ao;
1771 }
1772 lastnf->nextarena = ao;
1773 /* Verify that the swaps worked. */
1774 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
1775 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
1776 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
1777 assert((usable_arenas == ao && ao->prevarena == NULL)
1778 || ao->prevarena->nextarena == ao);
1779 }
1780
1781 /* Free a memory block allocated by pymalloc_alloc().
1782 Return 1 if it was freed.
1783 Return 0 if the block was not allocated by pymalloc_alloc(). */
1784 static inline int
1785 pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
1786 {
1787 assert(p != NULL);
1788
1789 #ifdef WITH_VALGRIND
1790 if (UNLIKELY(running_on_valgrind > 0)) {
1791 return 0;
1792 }
1793 #endif
1794
1795 poolp pool = POOL_ADDR(p);
1796 if (UNLIKELY(!address_in_range(state, p, pool))) {
1797 return 0;
1798 }
1799 /* We allocated this address. */
1800
1801 /* Link p to the start of the pool's freeblock list. Since
1802 * the pool had at least the p block outstanding, the pool
1803 * wasn't empty (so it's already in a usedpools[] list, or
1804 * was full and is in no list -- it's not in the freeblocks
1805 * list in any case).
1806 */
1807 assert(pool->ref.count > 0); /* else it was empty */
1808 pymem_block *lastfree = pool->freeblock;
1809 *(pymem_block **)p = lastfree;
1810 pool->freeblock = (pymem_block *)p;
1811 pool->ref.count--;
1812
1813 if (UNLIKELY(lastfree == NULL)) {
1814 /* Pool was full, so doesn't currently live in any list:
1815 * link it to the front of the appropriate usedpools[] list.
1816 * This mimics LRU pool usage for new allocations and
1817 * targets optimal filling when several pools contain
1818 * blocks of the same size class.
1819 */
1820 insert_to_usedpool(state, pool);
1821 return 1;
1822 }
1823
1824 /* freeblock wasn't NULL, so the pool wasn't full,
1825 * and the pool is in a usedpools[] list.
1826 */
1827 if (LIKELY(pool->ref.count != 0)) {
1828 /* pool isn't empty: leave it in usedpools */
1829 return 1;
1830 }
1831
1832 /* Pool is now empty: unlink from usedpools, and
1833 * link to the front of freepools. This ensures that
1834 * previously freed pools will be allocated later
1835 * (being not referenced, they are perhaps paged out).
1836 */
1837 insert_to_freepool(state, pool);
1838 return 1;
1839 }
1840
1841
1842 void
1843 _PyObject_Free(void *ctx, void *p)
1844 {
1845 /* PyObject_Free(NULL) has no effect */
1846 if (p == NULL) {
1847 return;
1848 }
1849
1850 OMState *state = get_state();
1851 if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
1852 /* pymalloc didn't allocate this address */
1853 PyMem_RawFree(p);
1854 raw_allocated_blocks--;
1855 }
1856 }
1857
1858
1859 /* pymalloc realloc.
1860
1861 If nbytes==0, then as the Python docs promise, we do not treat this like
1862 free(p), and return a non-NULL result.
1863
1864 Return 1 if pymalloc reallocated memory and wrote the new pointer into
1865 newptr_p.
1866
1867 Return 0 if pymalloc didn't allocated p. */
1868 static int
1869 pymalloc_realloc(OMState *state, void *ctx,
1870 void **newptr_p, void *p, size_t nbytes)
1871 {
1872 void *bp;
1873 poolp pool;
1874 size_t size;
1875
1876 assert(p != NULL);
1877
1878 #ifdef WITH_VALGRIND
1879 /* Treat running_on_valgrind == -1 the same as 0 */
1880 if (UNLIKELY(running_on_valgrind > 0)) {
1881 return 0;
1882 }
1883 #endif
1884
1885 pool = POOL_ADDR(p);
1886 if (!address_in_range(state, p, pool)) {
1887 /* pymalloc is not managing this block.
1888
1889 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
1890 over this block. However, if we do, we need to copy the valid data
1891 from the C-managed block to one of our blocks, and there's no
1892 portable way to know how much of the memory space starting at p is
1893 valid.
1894
1895 As bug 1185883 pointed out the hard way, it's possible that the
1896 C-managed block is "at the end" of allocated VM space, so that a
1897 memory fault can occur if we try to copy nbytes bytes starting at p.
1898 Instead we punt: let C continue to manage this block. */
1899 return 0;
1900 }
1901
1902 /* pymalloc is in charge of this block */
1903 size = INDEX2SIZE(pool->szidx);
1904 if (nbytes <= size) {
1905 /* The block is staying the same or shrinking.
1906
1907 If it's shrinking, there's a tradeoff: it costs cycles to copy the
1908 block to a smaller size class, but it wastes memory not to copy it.
1909
1910 The compromise here is to copy on shrink only if at least 25% of
1911 size can be shaved off. */
1912 if (4 * nbytes > 3 * size) {
1913 /* It's the same, or shrinking and new/old > 3/4. */
1914 *newptr_p = p;
1915 return 1;
1916 }
1917 size = nbytes;
1918 }
1919
1920 bp = _PyObject_Malloc(ctx, nbytes);
1921 if (bp != NULL) {
1922 memcpy(bp, p, size);
1923 _PyObject_Free(ctx, p);
1924 }
1925 *newptr_p = bp;
1926 return 1;
1927 }
1928
1929
1930 void *
1931 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
1932 {
1933 void *ptr2;
1934
1935 if (ptr == NULL) {
1936 return _PyObject_Malloc(ctx, nbytes);
1937 }
1938
1939 OMState *state = get_state();
1940 if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
1941 return ptr2;
1942 }
1943
1944 return PyMem_RawRealloc(ptr, nbytes);
1945 }
1946
1947 #else /* ! WITH_PYMALLOC */
1948
1949 /*==========================================================================*/
1950 /* pymalloc not enabled: Redirect the entry points to malloc. These will
1951 * only be used by extensions that are compiled with pymalloc enabled. */
1952
1953 Py_ssize_t
1954 _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
1955 {
1956 return 0;
1957 }
1958
1959 Py_ssize_t
1960 _Py_GetGlobalAllocatedBlocks(void)
1961 {
1962 return 0;
1963 }
1964
1965 void
1966 _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
1967 {
1968 return;
1969 }
1970
1971 void
1972 _Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
1973 {
1974 return;
1975 }
1976
1977 #endif /* WITH_PYMALLOC */
1978
1979
1980 /*==========================================================================*/
1981 /* A x-platform debugging allocator. This doesn't manage memory directly,
1982 * it wraps a real allocator, adding extra debugging info to the memory blocks.
1983 */
1984
1985 /* Uncomment this define to add the "serialno" field */
1986 /* #define PYMEM_DEBUG_SERIALNO */
1987
1988 #ifdef PYMEM_DEBUG_SERIALNO
1989 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
1990
1991 /* serialno is always incremented via calling this routine. The point is
1992 * to supply a single place to set a breakpoint.
1993 */
1994 static void
1995 bumpserialno(void)
1996 {
1997 ++serialno;
1998 }
1999 #endif
2000
2001 #define SST SIZEOF_SIZE_T
2002
2003 #ifdef PYMEM_DEBUG_SERIALNO
2004 # define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2005 #else
2006 # define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2007 #endif
2008
2009 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2010 static size_t
2011 read_size_t(const void *p)
2012 {
2013 const uint8_t *q = (const uint8_t *)p;
2014 size_t result = *q++;
2015 int i;
2016
2017 for (i = SST; --i > 0; ++q)
2018 result = (result << 8) | *q;
2019 return result;
2020 }
2021
2022 /* Write n as a big-endian size_t, MSB at address p, LSB at
2023 * p + sizeof(size_t) - 1.
2024 */
2025 static void
2026 write_size_t(void *p, size_t n)
2027 {
2028 uint8_t *q = (uint8_t *)p + SST - 1;
2029 int i;
2030
2031 for (i = SST; --i >= 0; --q) {
2032 *q = (uint8_t)(n & 0xff);
2033 n >>= 8;
2034 }
2035 }
2036
2037 /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and
2038 fills them with useful stuff, here calling the underlying malloc's result p:
2039
2040 p[0: S]
2041 Number of bytes originally asked for. This is a size_t, big-endian (easier
2042 to read in a memory dump).
2043 p[S]
2044 API ID. See PEP 445. This is a character, but seems undocumented.
2045 p[S+1: 2*S]
2046 Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads.
2047 p[2*S: 2*S+n]
2048 The requested memory, filled with copies of PYMEM_CLEANBYTE.
2049 Used to catch reference to uninitialized memory.
2050 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
2051 handled the request itself.
2052 p[2*S+n: 2*S+n+S]
2053 Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads.
2054 p[2*S+n+S: 2*S+n+2*S]
2055 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2056 and _PyMem_DebugRealloc.
2057 This is a big-endian size_t.
2058 If "bad memory" is detected later, the serial number gives an
2059 excellent way to set a breakpoint on the next run, to capture the
2060 instant at which this block was passed out.
2061
2062 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2063 for 3 * S extra bytes, and omits the last serialno field.
2064 */
2065
2066 static void *
2067 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2068 {
2069 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2070 uint8_t *p; /* base address of malloc'ed pad block */
2071 uint8_t *data; /* p + 2*SST == pointer to data bytes */
2072 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2073 size_t total; /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2074
2075 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2076 /* integer overflow: can't represent total as a Py_ssize_t */
2077 return NULL;
2078 }
2079 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2080
2081 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2082 ^--- p ^--- data ^--- tail
2083 S: nbytes stored as size_t
2084 I: API identifier (1 byte)
2085 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2086 C: Clean bytes used later to store actual data
2087 N: Serial number stored as size_t
2088
2089 If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2090 is omitted. */
2091
2092 if (use_calloc) {
2093 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2094 }
2095 else {
2096 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2097 }
2098 if (p == NULL) {
2099 return NULL;
2100 }
2101 data = p + 2*SST;
2102
2103 #ifdef PYMEM_DEBUG_SERIALNO
2104 bumpserialno();
2105 #endif
2106
2107 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2108 write_size_t(p, nbytes);
2109 p[SST] = (uint8_t)api->api_id;
2110 memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2111
2112 if (nbytes > 0 && !use_calloc) {
2113 memset(data, PYMEM_CLEANBYTE, nbytes);
2114 }
2115
2116 /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2117 tail = data + nbytes;
2118 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2119 #ifdef PYMEM_DEBUG_SERIALNO
2120 write_size_t(tail + SST, serialno);
2121 #endif
2122
2123 return data;
2124 }
2125
2126 void *
2127 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2128 {
2129 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2130 }
2131
2132 void *
2133 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2134 {
2135 size_t nbytes;
2136 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2137 nbytes = nelem * elsize;
2138 return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2139 }
2140
2141
2142 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2143 particular, that the FORBIDDENBYTEs with the api ID are still intact).
2144 Then fills the original bytes with PYMEM_DEADBYTE.
2145 Then calls the underlying free.
2146 */
2147 void
2148 _PyMem_DebugRawFree(void *ctx, void *p)
2149 {
2150 /* PyMem_Free(NULL) has no effect */
2151 if (p == NULL) {
2152 return;
2153 }
2154
2155 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2156 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
2157 size_t nbytes;
2158
2159 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2160 nbytes = read_size_t(q);
2161 nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2162 memset(q, PYMEM_DEADBYTE, nbytes);
2163 api->alloc.free(api->alloc.ctx, q);
2164 }
2165
2166
2167 void *
2168 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2169 {
2170 if (p == NULL) {
2171 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2172 }
2173
2174 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2175 uint8_t *head; /* base address of malloc'ed pad block */
2176 uint8_t *data; /* pointer to data bytes */
2177 uint8_t *r;
2178 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2179 size_t total; /* 2 * SST + nbytes + 2 * SST */
2180 size_t original_nbytes;
2181 #define ERASED_SIZE 64
2182 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
2183
2184 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2185
2186 data = (uint8_t *)p;
2187 head = data - 2*SST;
2188 original_nbytes = read_size_t(head);
2189 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2190 /* integer overflow: can't represent total as a Py_ssize_t */
2191 return NULL;
2192 }
2193 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2194
2195 tail = data + original_nbytes;
2196 #ifdef PYMEM_DEBUG_SERIALNO
2197 size_t block_serialno = read_size_t(tail + SST);
2198 #endif
2199 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2200 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2201 */
2202 if (original_nbytes <= sizeof(save)) {
2203 memcpy(save, data, original_nbytes);
2204 memset(data - 2 * SST, PYMEM_DEADBYTE,
2205 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2206 }
2207 else {
2208 memcpy(save, data, ERASED_SIZE);
2209 memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2210 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2211 memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2212 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2213 }
2214
2215 /* Resize and add decorations. */
2216 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2217 if (r == NULL) {
2218 /* if realloc() failed: rewrite header and footer which have
2219 just been erased */
2220 nbytes = original_nbytes;
2221 }
2222 else {
2223 head = r;
2224 #ifdef PYMEM_DEBUG_SERIALNO
2225 bumpserialno();
2226 block_serialno = serialno;
2227 #endif
2228 }
2229 data = head + 2*SST;
2230
2231 write_size_t(head, nbytes);
2232 head[SST] = (uint8_t)api->api_id;
2233 memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2234
2235 tail = data + nbytes;
2236 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2237 #ifdef PYMEM_DEBUG_SERIALNO
2238 write_size_t(tail + SST, block_serialno);
2239 #endif
2240
2241 /* Restore saved bytes. */
2242 if (original_nbytes <= sizeof(save)) {
2243 memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2244 }
2245 else {
2246 size_t i = original_nbytes - ERASED_SIZE;
2247 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2248 if (nbytes > i) {
2249 memcpy(data + i, &save[ERASED_SIZE],
2250 Py_MIN(nbytes - i, ERASED_SIZE));
2251 }
2252 }
2253
2254 if (r == NULL) {
2255 return NULL;
2256 }
2257
2258 if (nbytes > original_nbytes) {
2259 /* growing: mark new extra memory clean */
2260 memset(data + original_nbytes, PYMEM_CLEANBYTE,
2261 nbytes - original_nbytes);
2262 }
2263
2264 return data;
2265 }
2266
2267 static inline void
2268 _PyMem_DebugCheckGIL(const char *func)
2269 {
2270 if (!PyGILState_Check()) {
2271 _Py_FatalErrorFunc(func,
2272 "Python memory allocator called "
2273 "without holding the GIL");
2274 }
2275 }
2276
2277 void *
2278 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2279 {
2280 _PyMem_DebugCheckGIL(__func__);
2281 return _PyMem_DebugRawMalloc(ctx, nbytes);
2282 }
2283
2284 void *
2285 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2286 {
2287 _PyMem_DebugCheckGIL(__func__);
2288 return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2289 }
2290
2291
2292 void
2293 _PyMem_DebugFree(void *ctx, void *ptr)
2294 {
2295 _PyMem_DebugCheckGIL(__func__);
2296 _PyMem_DebugRawFree(ctx, ptr);
2297 }
2298
2299
2300 void *
2301 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2302 {
2303 _PyMem_DebugCheckGIL(__func__);
2304 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2305 }
2306
2307 /* Check the forbidden bytes on both ends of the memory allocated for p.
2308 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2309 * and call Py_FatalError to kill the program.
2310 * The API id, is also checked.
2311 */
2312 static void
2313 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2314 {
2315 assert(p != NULL);
2316
2317 const uint8_t *q = (const uint8_t *)p;
2318 size_t nbytes;
2319 const uint8_t *tail;
2320 int i;
2321 char id;
2322
2323 /* Check the API id */
2324 id = (char)q[-SST];
2325 if (id != api) {
2326 _PyObject_DebugDumpAddress(p);
2327 _Py_FatalErrorFormat(func,
2328 "bad ID: Allocated using API '%c', "
2329 "verified using API '%c'",
2330 id, api);
2331 }
2332
2333 /* Check the stuff at the start of p first: if there's underwrite
2334 * corruption, the number-of-bytes field may be nuts, and checking
2335 * the tail could lead to a segfault then.
2336 */
2337 for (i = SST-1; i >= 1; --i) {
2338 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2339 _PyObject_DebugDumpAddress(p);
2340 _Py_FatalErrorFunc(func, "bad leading pad byte");
2341 }
2342 }
2343
2344 nbytes = read_size_t(q - 2*SST);
2345 tail = q + nbytes;
2346 for (i = 0; i < SST; ++i) {
2347 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2348 _PyObject_DebugDumpAddress(p);
2349 _Py_FatalErrorFunc(func, "bad trailing pad byte");
2350 }
2351 }
2352 }
2353
2354 /* Display info to stderr about the memory block at p. */
2355 static void
2356 _PyObject_DebugDumpAddress(const void *p)
2357 {
2358 const uint8_t *q = (const uint8_t *)p;
2359 const uint8_t *tail;
2360 size_t nbytes;
2361 int i;
2362 int ok;
2363 char id;
2364
2365 fprintf(stderr, "Debug memory block at address p=%p:", p);
2366 if (p == NULL) {
2367 fprintf(stderr, "\n");
2368 return;
2369 }
2370 id = (char)q[-SST];
2371 fprintf(stderr, " API '%c'\n", id);
2372
2373 nbytes = read_size_t(q - 2*SST);
2374 fprintf(stderr, " %zu bytes originally requested\n", nbytes);
2375
2376 /* In case this is nuts, check the leading pad bytes first. */
2377 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
2378 ok = 1;
2379 for (i = 1; i <= SST-1; ++i) {
2380 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2381 ok = 0;
2382 break;
2383 }
2384 }
2385 if (ok)
2386 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2387 else {
2388 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2389 PYMEM_FORBIDDENBYTE);
2390 for (i = SST-1; i >= 1; --i) {
2391 const uint8_t byte = *(q-i);
2392 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
2393 if (byte != PYMEM_FORBIDDENBYTE)
2394 fputs(" *** OUCH", stderr);
2395 fputc('\n', stderr);
2396 }
2397
2398 fputs(" Because memory is corrupted at the start, the "
2399 "count of bytes requested\n"
2400 " may be bogus, and checking the trailing pad "
2401 "bytes may segfault.\n", stderr);
2402 }
2403
2404 tail = q + nbytes;
2405 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, (void *)tail);
2406 ok = 1;
2407 for (i = 0; i < SST; ++i) {
2408 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2409 ok = 0;
2410 break;
2411 }
2412 }
2413 if (ok)
2414 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2415 else {
2416 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2417 PYMEM_FORBIDDENBYTE);
2418 for (i = 0; i < SST; ++i) {
2419 const uint8_t byte = tail[i];
2420 fprintf(stderr, " at tail+%d: 0x%02x",
2421 i, byte);
2422 if (byte != PYMEM_FORBIDDENBYTE)
2423 fputs(" *** OUCH", stderr);
2424 fputc('\n', stderr);
2425 }
2426 }
2427
2428 #ifdef PYMEM_DEBUG_SERIALNO
2429 size_t serial = read_size_t(tail + SST);
2430 fprintf(stderr,
2431 " The block was made by call #%zu to debug malloc/realloc.\n",
2432 serial);
2433 #endif
2434
2435 if (nbytes > 0) {
2436 i = 0;
2437 fputs(" Data at p:", stderr);
2438 /* print up to 8 bytes at the start */
2439 while (q < tail && i < 8) {
2440 fprintf(stderr, " %02x", *q);
2441 ++i;
2442 ++q;
2443 }
2444 /* and up to 8 at the end */
2445 if (q < tail) {
2446 if (tail - q > 8) {
2447 fputs(" ...", stderr);
2448 q = tail - 8;
2449 }
2450 while (q < tail) {
2451 fprintf(stderr, " %02x", *q);
2452 ++q;
2453 }
2454 }
2455 fputc('\n', stderr);
2456 }
2457 fputc('\n', stderr);
2458
2459 fflush(stderr);
2460 _PyMem_DumpTraceback(fileno(stderr), p);
2461 }
2462
2463
2464 static size_t
2465 printone(FILE *out, const char* msg, size_t value)
2466 {
2467 int i, k;
2468 char buf[100];
2469 size_t origvalue = value;
2470
2471 fputs(msg, out);
2472 for (i = (int)strlen(msg); i < 35; ++i)
2473 fputc(' ', out);
2474 fputc('=', out);
2475
2476 /* Write the value with commas. */
2477 i = 22;
2478 buf[i--] = '\0';
2479 buf[i--] = '\n';
2480 k = 3;
2481 do {
2482 size_t nextvalue = value / 10;
2483 unsigned int digit = (unsigned int)(value - nextvalue * 10);
2484 value = nextvalue;
2485 buf[i--] = (char)(digit + '0');
2486 --k;
2487 if (k == 0 && value && i >= 0) {
2488 k = 3;
2489 buf[i--] = ',';
2490 }
2491 } while (value && i >= 0);
2492
2493 while (i >= 0)
2494 buf[i--] = ' ';
2495 fputs(buf, out);
2496
2497 return origvalue;
2498 }
2499
2500 void
2501 _PyDebugAllocatorStats(FILE *out,
2502 const char *block_name, int num_blocks, size_t sizeof_block)
2503 {
2504 char buf1[128];
2505 char buf2[128];
2506 PyOS_snprintf(buf1, sizeof(buf1),
2507 "%d %ss * %zd bytes each",
2508 num_blocks, block_name, sizeof_block);
2509 PyOS_snprintf(buf2, sizeof(buf2),
2510 "%48s ", buf1);
2511 (void)printone(out, buf2, num_blocks * sizeof_block);
2512 }
2513
2514
2515 #ifdef WITH_PYMALLOC
2516
2517 #ifdef Py_DEBUG
2518 /* Is target in the list? The list is traversed via the nextpool pointers.
2519 * The list may be NULL-terminated, or circular. Return 1 if target is in
2520 * list, else 0.
2521 */
2522 static int
2523 pool_is_in_list(const poolp target, poolp list)
2524 {
2525 poolp origlist = list;
2526 assert(target != NULL);
2527 if (list == NULL)
2528 return 0;
2529 do {
2530 if (target == list)
2531 return 1;
2532 list = list->nextpool;
2533 } while (list != NULL && list != origlist);
2534 return 0;
2535 }
2536 #endif
2537
2538 /* Print summary info to "out" about the state of pymalloc's structures.
2539 * In Py_DEBUG mode, also perform some expensive internal consistency
2540 * checks.
2541 *
2542 * Return 0 if the memory debug hooks are not installed or no statistics was
2543 * written into out, return 1 otherwise.
2544 */
2545 int
2546 _PyObject_DebugMallocStats(FILE *out)
2547 {
2548 if (!_PyMem_PymallocEnabled()) {
2549 return 0;
2550 }
2551 OMState *state = get_state();
2552
2553 uint i;
2554 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2555 /* # of pools, allocated blocks, and free blocks per class index */
2556 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2557 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2558 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2559 /* total # of allocated bytes in used and full pools */
2560 size_t allocated_bytes = 0;
2561 /* total # of available bytes in used pools */
2562 size_t available_bytes = 0;
2563 /* # of free pools + pools not yet carved out of current arena */
2564 uint numfreepools = 0;
2565 /* # of bytes for arena alignment padding */
2566 size_t arena_alignment = 0;
2567 /* # of bytes in used and full pools used for pool_headers */
2568 size_t pool_header_bytes = 0;
2569 /* # of bytes in used and full pools wasted due to quantization,
2570 * i.e. the necessarily leftover space at the ends of used and
2571 * full pools.
2572 */
2573 size_t quantization = 0;
2574 /* # of arenas actually allocated. */
2575 size_t narenas = 0;
2576 /* running total -- should equal narenas * ARENA_SIZE */
2577 size_t total;
2578 char buf[128];
2579
2580 fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2581 SMALL_REQUEST_THRESHOLD, numclasses);
2582
2583 for (i = 0; i < numclasses; ++i)
2584 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2585
2586 /* Because full pools aren't linked to from anything, it's easiest
2587 * to march over all the arenas. If we're lucky, most of the memory
2588 * will be living in full pools -- would be a shame to miss them.
2589 */
2590 for (i = 0; i < maxarenas; ++i) {
2591 uintptr_t base = allarenas[i].address;
2592
2593 /* Skip arenas which are not allocated. */
2594 if (allarenas[i].address == (uintptr_t)NULL)
2595 continue;
2596 narenas += 1;
2597
2598 numfreepools += allarenas[i].nfreepools;
2599
2600 /* round up to pool alignment */
2601 if (base & (uintptr_t)POOL_SIZE_MASK) {
2602 arena_alignment += POOL_SIZE;
2603 base &= ~(uintptr_t)POOL_SIZE_MASK;
2604 base += POOL_SIZE;
2605 }
2606
2607 /* visit every pool in the arena */
2608 assert(base <= (uintptr_t) allarenas[i].pool_address);
2609 for (; base < (uintptr_t) allarenas[i].pool_address; base += POOL_SIZE) {
2610 poolp p = (poolp)base;
2611 const uint sz = p->szidx;
2612 uint freeblocks;
2613
2614 if (p->ref.count == 0) {
2615 /* currently unused */
2616 #ifdef Py_DEBUG
2617 assert(pool_is_in_list(p, allarenas[i].freepools));
2618 #endif
2619 continue;
2620 }
2621 ++numpools[sz];
2622 numblocks[sz] += p->ref.count;
2623 freeblocks = NUMBLOCKS(sz) - p->ref.count;
2624 numfreeblocks[sz] += freeblocks;
2625 #ifdef Py_DEBUG
2626 if (freeblocks > 0)
2627 assert(pool_is_in_list(p, usedpools[sz + sz]));
2628 #endif
2629 }
2630 }
2631 assert(narenas == narenas_currently_allocated);
2632
2633 fputc('\n', out);
2634 fputs("class size num pools blocks in use avail blocks\n"
2635 "----- ---- --------- ------------- ------------\n",
2636 out);
2637
2638 for (i = 0; i < numclasses; ++i) {
2639 size_t p = numpools[i];
2640 size_t b = numblocks[i];
2641 size_t f = numfreeblocks[i];
2642 uint size = INDEX2SIZE(i);
2643 if (p == 0) {
2644 assert(b == 0 && f == 0);
2645 continue;
2646 }
2647 fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
2648 i, size, p, b, f);
2649 allocated_bytes += b * size;
2650 available_bytes += f * size;
2651 pool_header_bytes += p * POOL_OVERHEAD;
2652 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
2653 }
2654 fputc('\n', out);
2655 #ifdef PYMEM_DEBUG_SERIALNO
2656 if (_PyMem_DebugEnabled()) {
2657 (void)printone(out, "# times object malloc called", serialno);
2658 }
2659 #endif
2660 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
2661 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
2662 (void)printone(out, "# arenas highwater mark", narenas_highwater);
2663 (void)printone(out, "# arenas allocated current", narenas);
2664
2665 PyOS_snprintf(buf, sizeof(buf),
2666 "%zu arenas * %d bytes/arena",
2667 narenas, ARENA_SIZE);
2668 (void)printone(out, buf, narenas * ARENA_SIZE);
2669
2670 fputc('\n', out);
2671
2672 /* Account for what all of those arena bytes are being used for. */
2673 total = printone(out, "# bytes in allocated blocks", allocated_bytes);
2674 total += printone(out, "# bytes in available blocks", available_bytes);
2675
2676 PyOS_snprintf(buf, sizeof(buf),
2677 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
2678 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
2679
2680 total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
2681 total += printone(out, "# bytes lost to quantization", quantization);
2682 total += printone(out, "# bytes lost to arena alignment", arena_alignment);
2683 (void)printone(out, "Total", total);
2684 assert(narenas * ARENA_SIZE == total);
2685
2686 #if WITH_PYMALLOC_RADIX_TREE
2687 fputs("\narena map counts\n", out);
2688 #ifdef USE_INTERIOR_NODES
2689 (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
2690 (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
2691 fputc('\n', out);
2692 #endif
2693 total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
2694 #ifdef USE_INTERIOR_NODES
2695 total += printone(out, "# bytes lost to arena map mid",
2696 sizeof(arena_map_mid_t) * arena_map_mid_count);
2697 total += printone(out, "# bytes lost to arena map bot",
2698 sizeof(arena_map_bot_t) * arena_map_bot_count);
2699 (void)printone(out, "Total", total);
2700 #endif
2701 #endif
2702
2703 return 1;
2704 }
2705
2706 #endif /* #ifdef WITH_PYMALLOC */