Line data Source code
1 : #include "Python.h"
2 :
3 : #if defined(__has_feature) /* Clang */
4 : #if __has_feature(address_sanitizer) /* is ASAN enabled? */
5 : #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
6 : __attribute__((no_address_safety_analysis)) \
7 : __attribute__ ((noinline))
8 : #else
9 : #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
10 : #endif
11 : #else
12 : #if defined(__SANITIZE_ADDRESS__) /* GCC 4.8.x, is ASAN enabled? */
13 : #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS \
14 : __attribute__((no_address_safety_analysis)) \
15 : __attribute__ ((noinline))
16 : #else
17 : #define ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
18 : #endif
19 : #endif
20 :
21 : #ifdef WITH_PYMALLOC
22 :
23 : #ifdef HAVE_MMAP
24 : #include <sys/mman.h>
25 : #ifdef MAP_ANONYMOUS
26 : #define ARENAS_USE_MMAP
27 : #endif
28 : #endif
29 :
30 : #ifdef WITH_VALGRIND
31 : #include <valgrind/valgrind.h>
32 :
33 : /* If we're using GCC, use __builtin_expect() to reduce overhead of
34 : the valgrind checks */
35 : #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
36 : # define UNLIKELY(value) __builtin_expect((value), 0)
37 : #else
38 : # define UNLIKELY(value) (value)
39 : #endif
40 :
41 : /* -1 indicates that we haven't checked that we're running on valgrind yet. */
42 : static int running_on_valgrind = -1;
43 : #endif
44 :
45 : /* An object allocator for Python.
46 :
47 : Here is an introduction to the layers of the Python memory architecture,
48 : showing where the object allocator is actually used (layer +2), It is
49 : called for every object allocation and deallocation (PyObject_New/Del),
50 : unless the object-specific allocators implement a proprietary allocation
51 : scheme (ex.: ints use a simple free list). This is also the place where
52 : the cyclic garbage collector operates selectively on container objects.
53 :
54 :
55 : Object-specific allocators
56 : _____ ______ ______ ________
57 : [ int ] [ dict ] [ list ] ... [ string ] Python core |
58 : +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
59 : _______________________________ | |
60 : [ Python's object allocator ] | |
61 : +2 | ####### Object memory ####### | <------ Internal buffers ------> |
62 : ______________________________________________________________ |
63 : [ Python's raw memory allocator (PyMem_ API) ] |
64 : +1 | <----- Python memory (under PyMem manager's control) ------> | |
65 : __________________________________________________________________
66 : [ Underlying general-purpose allocator (ex: C library malloc) ]
67 : 0 | <------ Virtual memory allocated for the python process -------> |
68 :
69 : =========================================================================
70 : _______________________________________________________________________
71 : [ OS-specific Virtual Memory Manager (VMM) ]
72 : -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
73 : __________________________________ __________________________________
74 : [ ] [ ]
75 : -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
76 :
77 : */
78 : /*==========================================================================*/
79 :
80 : /* A fast, special-purpose memory allocator for small blocks, to be used
81 : on top of a general-purpose malloc -- heavily based on previous art. */
82 :
83 : /* Vladimir Marangozov -- August 2000 */
84 :
85 : /*
86 : * "Memory management is where the rubber meets the road -- if we do the wrong
87 : * thing at any level, the results will not be good. And if we don't make the
88 : * levels work well together, we are in serious trouble." (1)
89 : *
90 : * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
91 : * "Dynamic Storage Allocation: A Survey and Critical Review",
92 : * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
93 : */
94 :
95 : /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
96 :
97 : /*==========================================================================*/
98 :
99 : /*
100 : * Allocation strategy abstract:
101 : *
102 : * For small requests, the allocator sub-allocates <Big> blocks of memory.
103 : * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
104 : * system's allocator.
105 : *
106 : * Small requests are grouped in size classes spaced 8 bytes apart, due
107 : * to the required valid alignment of the returned address. Requests of
108 : * a particular size are serviced from memory pools of 4K (one VMM page).
109 : * Pools are fragmented on demand and contain free lists of blocks of one
110 : * particular size class. In other words, there is a fixed-size allocator
111 : * for each size class. Free pools are shared by the different allocators
112 : * thus minimizing the space reserved for a particular size class.
113 : *
114 : * This allocation strategy is a variant of what is known as "simple
115 : * segregated storage based on array of free lists". The main drawback of
116 : * simple segregated storage is that we might end up with lot of reserved
117 : * memory for the different free lists, which degenerate in time. To avoid
118 : * this, we partition each free list in pools and we share dynamically the
119 : * reserved space between all free lists. This technique is quite efficient
120 : * for memory intensive programs which allocate mainly small-sized blocks.
121 : *
122 : * For small requests we have the following table:
123 : *
124 : * Request in bytes Size of allocated block Size class idx
125 : * ----------------------------------------------------------------
126 : * 1-8 8 0
127 : * 9-16 16 1
128 : * 17-24 24 2
129 : * 25-32 32 3
130 : * 33-40 40 4
131 : * 41-48 48 5
132 : * 49-56 56 6
133 : * 57-64 64 7
134 : * 65-72 72 8
135 : * ... ... ...
136 : * 497-504 504 62
137 : * 505-512 512 63
138 : *
139 : * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
140 : * allocator.
141 : */
142 :
143 : /*==========================================================================*/
144 :
145 : /*
146 : * -- Main tunable settings section --
147 : */
148 :
149 : /*
150 : * Alignment of addresses returned to the user. 8-bytes alignment works
151 : * on most current architectures (with 32-bit or 64-bit address busses).
152 : * The alignment value is also used for grouping small requests in size
153 : * classes spaced ALIGNMENT bytes apart.
154 : *
155 : * You shouldn't change this unless you know what you are doing.
156 : */
157 : #define ALIGNMENT 8 /* must be 2^N */
158 : #define ALIGNMENT_SHIFT 3
159 : #define ALIGNMENT_MASK (ALIGNMENT - 1)
160 :
161 : /* Return the number of bytes in size class I, as a uint. */
162 : #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
163 :
164 : /*
165 : * Max size threshold below which malloc requests are considered to be
166 : * small enough in order to use preallocated memory pools. You can tune
167 : * this value according to your application behaviour and memory needs.
168 : *
169 : * The following invariants must hold:
170 : * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
171 : * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
172 : *
173 : * Note: a size threshold of 512 guarantees that newly created dictionaries
174 : * will be allocated from preallocated memory pools on 64-bit.
175 : *
176 : * Although not required, for better performance and space efficiency,
177 : * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
178 : */
179 : #define SMALL_REQUEST_THRESHOLD 512
180 : #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
181 :
182 : /*
183 : * The system's VMM page size can be obtained on most unices with a
184 : * getpagesize() call or deduced from various header files. To make
185 : * things simpler, we assume that it is 4K, which is OK for most systems.
186 : * It is probably better if this is the native page size, but it doesn't
187 : * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
188 : * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
189 : * violation fault. 4K is apparently OK for all the platforms that python
190 : * currently targets.
191 : */
192 : #define SYSTEM_PAGE_SIZE (4 * 1024)
193 : #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
194 :
195 : /*
196 : * Maximum amount of memory managed by the allocator for small requests.
197 : */
198 : #ifdef WITH_MEMORY_LIMITS
199 : #ifndef SMALL_MEMORY_LIMIT
200 : #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
201 : #endif
202 : #endif
203 :
204 : /*
205 : * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
206 : * on a page boundary. This is a reserved virtual address space for the
207 : * current process (obtained through a malloc()/mmap() call). In no way this
208 : * means that the memory arenas will be used entirely. A malloc(<Big>) is
209 : * usually an address range reservation for <Big> bytes, unless all pages within
210 : * this space are referenced subsequently. So malloc'ing big blocks and not
211 : * using them does not mean "wasting memory". It's an addressable range
212 : * wastage...
213 : *
214 : * Arenas are allocated with mmap() on systems supporting anonymous memory
215 : * mappings to reduce heap fragmentation.
216 : */
217 : #define ARENA_SIZE (256 << 10) /* 256KB */
218 :
219 : #ifdef WITH_MEMORY_LIMITS
220 : #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
221 : #endif
222 :
223 : /*
224 : * Size of the pools used for small blocks. Should be a power of 2,
225 : * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
226 : */
227 : #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
228 : #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
229 :
230 : /*
231 : * -- End of tunable settings section --
232 : */
233 :
234 : /*==========================================================================*/
235 :
236 : /*
237 : * Locking
238 : *
239 : * To reduce lock contention, it would probably be better to refine the
240 : * crude function locking with per size class locking. I'm not positive
241 : * however, whether it's worth switching to such locking policy because
242 : * of the performance penalty it might introduce.
243 : *
244 : * The following macros describe the simplest (should also be the fastest)
245 : * lock object on a particular platform and the init/fini/lock/unlock
246 : * operations on it. The locks defined here are not expected to be recursive
247 : * because it is assumed that they will always be called in the order:
248 : * INIT, [LOCK, UNLOCK]*, FINI.
249 : */
250 :
251 : /*
252 : * Python's threads are serialized, so object malloc locking is disabled.
253 : */
254 : #define SIMPLELOCK_DECL(lock) /* simple lock declaration */
255 : #define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */
256 : #define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */
257 : #define SIMPLELOCK_LOCK(lock) /* acquire released lock */
258 : #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
259 :
260 : /*
261 : * Basic types
262 : * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
263 : */
264 : #undef uchar
265 : #define uchar unsigned char /* assuming == 8 bits */
266 :
267 : #undef uint
268 : #define uint unsigned int /* assuming >= 16 bits */
269 :
270 : #undef ulong
271 : #define ulong unsigned long /* assuming >= 32 bits */
272 :
273 : #undef uptr
274 : #define uptr Py_uintptr_t
275 :
276 : /* When you say memory, my mind reasons in terms of (pointers to) blocks */
277 : typedef uchar block;
278 :
279 : /* Pool for small blocks. */
280 : struct pool_header {
281 : union { block *_padding;
282 : uint count; } ref; /* number of allocated blocks */
283 : block *freeblock; /* pool's free list head */
284 : struct pool_header *nextpool; /* next pool of this size class */
285 : struct pool_header *prevpool; /* previous pool "" */
286 : uint arenaindex; /* index into arenas of base adr */
287 : uint szidx; /* block size class index */
288 : uint nextoffset; /* bytes to virgin block */
289 : uint maxnextoffset; /* largest valid nextoffset */
290 : };
291 :
292 : typedef struct pool_header *poolp;
293 :
294 : /* Record keeping for arenas. */
295 : struct arena_object {
296 : /* The address of the arena, as returned by malloc. Note that 0
297 : * will never be returned by a successful malloc, and is used
298 : * here to mark an arena_object that doesn't correspond to an
299 : * allocated arena.
300 : */
301 : uptr address;
302 :
303 : /* Pool-aligned pointer to the next pool to be carved off. */
304 : block* pool_address;
305 :
306 : /* The number of available pools in the arena: free pools + never-
307 : * allocated pools.
308 : */
309 : uint nfreepools;
310 :
311 : /* The total number of pools in the arena, whether or not available. */
312 : uint ntotalpools;
313 :
314 : /* Singly-linked list of available pools. */
315 : struct pool_header* freepools;
316 :
317 : /* Whenever this arena_object is not associated with an allocated
318 : * arena, the nextarena member is used to link all unassociated
319 : * arena_objects in the singly-linked `unused_arena_objects` list.
320 : * The prevarena member is unused in this case.
321 : *
322 : * When this arena_object is associated with an allocated arena
323 : * with at least one available pool, both members are used in the
324 : * doubly-linked `usable_arenas` list, which is maintained in
325 : * increasing order of `nfreepools` values.
326 : *
327 : * Else this arena_object is associated with an allocated arena
328 : * all of whose pools are in use. `nextarena` and `prevarena`
329 : * are both meaningless in this case.
330 : */
331 : struct arena_object* nextarena;
332 : struct arena_object* prevarena;
333 : };
334 :
335 : #undef ROUNDUP
336 : #define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK)
337 : #define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header))
338 :
339 : #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
340 :
341 : /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
342 : #define POOL_ADDR(P) ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK))
343 :
344 : /* Return total number of blocks in pool of size index I, as a uint. */
345 : #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
346 :
347 : /*==========================================================================*/
348 :
349 : /*
350 : * This malloc lock
351 : */
352 : SIMPLELOCK_DECL(_malloc_lock)
353 : #define LOCK() SIMPLELOCK_LOCK(_malloc_lock)
354 : #define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock)
355 : #define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock)
356 : #define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock)
357 :
358 : /*
359 : * Pool table -- headed, circular, doubly-linked lists of partially used pools.
360 :
361 : This is involved. For an index i, usedpools[i+i] is the header for a list of
362 : all partially used pools holding small blocks with "size class idx" i. So
363 : usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
364 : 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
365 :
366 : Pools are carved off an arena's highwater mark (an arena_object's pool_address
367 : member) as needed. Once carved off, a pool is in one of three states forever
368 : after:
369 :
370 : used == partially used, neither empty nor full
371 : At least one block in the pool is currently allocated, and at least one
372 : block in the pool is not currently allocated (note this implies a pool
373 : has room for at least two blocks).
374 : This is a pool's initial state, as a pool is created only when malloc
375 : needs space.
376 : The pool holds blocks of a fixed size, and is in the circular list headed
377 : at usedpools[i] (see above). It's linked to the other used pools of the
378 : same size class via the pool_header's nextpool and prevpool members.
379 : If all but one block is currently allocated, a malloc can cause a
380 : transition to the full state. If all but one block is not currently
381 : allocated, a free can cause a transition to the empty state.
382 :
383 : full == all the pool's blocks are currently allocated
384 : On transition to full, a pool is unlinked from its usedpools[] list.
385 : It's not linked to from anything then anymore, and its nextpool and
386 : prevpool members are meaningless until it transitions back to used.
387 : A free of a block in a full pool puts the pool back in the used state.
388 : Then it's linked in at the front of the appropriate usedpools[] list, so
389 : that the next allocation for its size class will reuse the freed block.
390 :
391 : empty == all the pool's blocks are currently available for allocation
392 : On transition to empty, a pool is unlinked from its usedpools[] list,
393 : and linked to the front of its arena_object's singly-linked freepools list,
394 : via its nextpool member. The prevpool member has no meaning in this case.
395 : Empty pools have no inherent size class: the next time a malloc finds
396 : an empty list in usedpools[], it takes the first pool off of freepools.
397 : If the size class needed happens to be the same as the size class the pool
398 : last had, some pool initialization can be skipped.
399 :
400 :
401 : Block Management
402 :
403 : Blocks within pools are again carved out as needed. pool->freeblock points to
404 : the start of a singly-linked list of free blocks within the pool. When a
405 : block is freed, it's inserted at the front of its pool's freeblock list. Note
406 : that the available blocks in a pool are *not* linked all together when a pool
407 : is initialized. Instead only "the first two" (lowest addresses) blocks are
408 : set up, returning the first such block, and setting pool->freeblock to a
409 : one-block list holding the second such block. This is consistent with that
410 : pymalloc strives at all levels (arena, pool, and block) never to touch a piece
411 : of memory until it's actually needed.
412 :
413 : So long as a pool is in the used state, we're certain there *is* a block
414 : available for allocating, and pool->freeblock is not NULL. If pool->freeblock
415 : points to the end of the free list before we've carved the entire pool into
416 : blocks, that means we simply haven't yet gotten to one of the higher-address
417 : blocks. The offset from the pool_header to the start of "the next" virgin
418 : block is stored in the pool_header nextoffset member, and the largest value
419 : of nextoffset that makes sense is stored in the maxnextoffset member when a
420 : pool is initialized. All the blocks in a pool have been passed out at least
421 : once when and only when nextoffset > maxnextoffset.
422 :
423 :
424 : Major obscurity: While the usedpools vector is declared to have poolp
425 : entries, it doesn't really. It really contains two pointers per (conceptual)
426 : poolp entry, the nextpool and prevpool members of a pool_header. The
427 : excruciating initialization code below fools C so that
428 :
429 : usedpool[i+i]
430 :
431 : "acts like" a genuine poolp, but only so long as you only reference its
432 : nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
433 : compensating for that a pool_header's nextpool and prevpool members
434 : immediately follow a pool_header's first two members:
435 :
436 : union { block *_padding;
437 : uint count; } ref;
438 : block *freeblock;
439 :
440 : each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
441 : contains is a fudged-up pointer p such that *if* C believes it's a poolp
442 : pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
443 : circular list is empty).
444 :
445 : It's unclear why the usedpools setup is so convoluted. It could be to
446 : minimize the amount of cache required to hold this heavily-referenced table
447 : (which only *needs* the two interpool pointer members of a pool_header). OTOH,
448 : referencing code has to remember to "double the index" and doing so isn't
449 : free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
450 : on that C doesn't insert any padding anywhere in a pool_header at or before
451 : the prevpool member.
452 : **************************************************************************** */
453 :
454 : #define PTA(x) ((poolp )((uchar *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
455 : #define PT(x) PTA(x), PTA(x)
456 :
457 : static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
458 : PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
459 : #if NB_SMALL_SIZE_CLASSES > 8
460 : , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
461 : #if NB_SMALL_SIZE_CLASSES > 16
462 : , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
463 : #if NB_SMALL_SIZE_CLASSES > 24
464 : , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
465 : #if NB_SMALL_SIZE_CLASSES > 32
466 : , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
467 : #if NB_SMALL_SIZE_CLASSES > 40
468 : , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
469 : #if NB_SMALL_SIZE_CLASSES > 48
470 : , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
471 : #if NB_SMALL_SIZE_CLASSES > 56
472 : , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
473 : #if NB_SMALL_SIZE_CLASSES > 64
474 : #error "NB_SMALL_SIZE_CLASSES should be less than 64"
475 : #endif /* NB_SMALL_SIZE_CLASSES > 64 */
476 : #endif /* NB_SMALL_SIZE_CLASSES > 56 */
477 : #endif /* NB_SMALL_SIZE_CLASSES > 48 */
478 : #endif /* NB_SMALL_SIZE_CLASSES > 40 */
479 : #endif /* NB_SMALL_SIZE_CLASSES > 32 */
480 : #endif /* NB_SMALL_SIZE_CLASSES > 24 */
481 : #endif /* NB_SMALL_SIZE_CLASSES > 16 */
482 : #endif /* NB_SMALL_SIZE_CLASSES > 8 */
483 : };
484 :
485 : /*==========================================================================
486 : Arena management.
487 :
488 : `arenas` is a vector of arena_objects. It contains maxarenas entries, some of
489 : which may not be currently used (== they're arena_objects that aren't
490 : currently associated with an allocated arena). Note that arenas proper are
491 : separately malloc'ed.
492 :
493 : Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5,
494 : we do try to free() arenas, and use some mild heuristic strategies to increase
495 : the likelihood that arenas eventually can be freed.
496 :
497 : unused_arena_objects
498 :
499 : This is a singly-linked list of the arena_objects that are currently not
500 : being used (no arena is associated with them). Objects are taken off the
501 : head of the list in new_arena(), and are pushed on the head of the list in
502 : PyObject_Free() when the arena is empty. Key invariant: an arena_object
503 : is on this list if and only if its .address member is 0.
504 :
505 : usable_arenas
506 :
507 : This is a doubly-linked list of the arena_objects associated with arenas
508 : that have pools available. These pools are either waiting to be reused,
509 : or have not been used before. The list is sorted to have the most-
510 : allocated arenas first (ascending order based on the nfreepools member).
511 : This means that the next allocation will come from a heavily used arena,
512 : which gives the nearly empty arenas a chance to be returned to the system.
513 : In my unscientific tests this dramatically improved the number of arenas
514 : that could be freed.
515 :
516 : Note that an arena_object associated with an arena all of whose pools are
517 : currently in use isn't on either list.
518 : */
519 :
520 : /* Array of objects used to track chunks of memory (arenas). */
521 : static struct arena_object* arenas = NULL;
522 : /* Number of slots currently allocated in the `arenas` vector. */
523 : static uint maxarenas = 0;
524 :
525 : /* The head of the singly-linked, NULL-terminated list of available
526 : * arena_objects.
527 : */
528 : static struct arena_object* unused_arena_objects = NULL;
529 :
530 : /* The head of the doubly-linked, NULL-terminated at each end, list of
531 : * arena_objects associated with arenas that have pools available.
532 : */
533 : static struct arena_object* usable_arenas = NULL;
534 :
535 : /* How many arena_objects do we initially allocate?
536 : * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
537 : * `arenas` vector.
538 : */
539 : #define INITIAL_ARENA_OBJECTS 16
540 :
541 : /* Number of arenas allocated that haven't been free()'d. */
542 : static size_t narenas_currently_allocated = 0;
543 :
544 : #ifdef PYMALLOC_DEBUG
545 : /* Total number of times malloc() called to allocate an arena. */
546 : static size_t ntimes_arena_allocated = 0;
547 : /* High water mark (max value ever seen) for narenas_currently_allocated. */
548 : static size_t narenas_highwater = 0;
549 : #endif
550 :
551 : /* Allocate a new arena. If we run out of memory, return NULL. Else
552 : * allocate a new arena, and return the address of an arena_object
553 : * describing the new arena. It's expected that the caller will set
554 : * `usable_arenas` to the return value.
555 : */
556 : static struct arena_object*
557 90 : new_arena(void)
558 : {
559 : struct arena_object* arenaobj;
560 : uint excess; /* number of bytes above pool alignment */
561 : void *address;
562 : int err;
563 :
564 : #ifdef PYMALLOC_DEBUG
565 : if (Py_GETENV("PYTHONMALLOCSTATS"))
566 : _PyObject_DebugMallocStats();
567 : #endif
568 90 : if (unused_arena_objects == NULL) {
569 : uint i;
570 : uint numarenas;
571 : size_t nbytes;
572 :
573 : /* Double the number of arena objects on each allocation.
574 : * Note that it's possible for `numarenas` to overflow.
575 : */
576 4 : numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
577 4 : if (numarenas <= maxarenas)
578 0 : return NULL; /* overflow */
579 : #if SIZEOF_SIZE_T <= SIZEOF_INT
580 : if (numarenas > PY_SIZE_MAX / sizeof(*arenas))
581 : return NULL; /* overflow */
582 : #endif
583 4 : nbytes = numarenas * sizeof(*arenas);
584 4 : arenaobj = (struct arena_object *)realloc(arenas, nbytes);
585 4 : if (arenaobj == NULL)
586 0 : return NULL;
587 4 : arenas = arenaobj;
588 :
589 : /* We might need to fix pointers that were copied. However,
590 : * new_arena only gets called when all the pages in the
591 : * previous arenas are full. Thus, there are *no* pointers
592 : * into the old array. Thus, we don't have to worry about
593 : * invalid pointers. Just to be sure, some asserts:
594 : */
595 : assert(usable_arenas == NULL);
596 : assert(unused_arena_objects == NULL);
597 :
598 : /* Put the new arenas on the unused_arena_objects list. */
599 68 : for (i = maxarenas; i < numarenas; ++i) {
600 64 : arenas[i].address = 0; /* mark as unassociated */
601 128 : arenas[i].nextarena = i < numarenas - 1 ?
602 64 : &arenas[i+1] : NULL;
603 : }
604 :
605 : /* Update globals. */
606 4 : unused_arena_objects = &arenas[maxarenas];
607 4 : maxarenas = numarenas;
608 : }
609 :
610 : /* Take the next available arena object off the head of the list. */
611 : assert(unused_arena_objects != NULL);
612 90 : arenaobj = unused_arena_objects;
613 90 : unused_arena_objects = arenaobj->nextarena;
614 : assert(arenaobj->address == 0);
615 : #ifdef ARENAS_USE_MMAP
616 90 : address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE,
617 : MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
618 90 : err = (address == MAP_FAILED);
619 : #else
620 : address = malloc(ARENA_SIZE);
621 : err = (address == 0);
622 : #endif
623 90 : if (err) {
624 : /* The allocation failed: return NULL after putting the
625 : * arenaobj back.
626 : */
627 0 : arenaobj->nextarena = unused_arena_objects;
628 0 : unused_arena_objects = arenaobj;
629 0 : return NULL;
630 : }
631 90 : arenaobj->address = (uptr)address;
632 :
633 90 : ++narenas_currently_allocated;
634 : #ifdef PYMALLOC_DEBUG
635 : ++ntimes_arena_allocated;
636 : if (narenas_currently_allocated > narenas_highwater)
637 : narenas_highwater = narenas_currently_allocated;
638 : #endif
639 90 : arenaobj->freepools = NULL;
640 : /* pool_address <- first pool-aligned address in the arena
641 : nfreepools <- number of whole pools that fit after alignment */
642 90 : arenaobj->pool_address = (block*)arenaobj->address;
643 90 : arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
644 : assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
645 90 : excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
646 90 : if (excess != 0) {
647 0 : --arenaobj->nfreepools;
648 0 : arenaobj->pool_address += POOL_SIZE - excess;
649 : }
650 90 : arenaobj->ntotalpools = arenaobj->nfreepools;
651 :
652 90 : return arenaobj;
653 : }
654 :
655 : /*
656 : Py_ADDRESS_IN_RANGE(P, POOL)
657 :
658 : Return true if and only if P is an address that was allocated by pymalloc.
659 : POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
660 : (the caller is asked to compute this because the macro expands POOL more than
661 : once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
662 : variable and pass the latter to the macro; because Py_ADDRESS_IN_RANGE is
663 : called on every alloc/realloc/free, micro-efficiency is important here).
664 :
665 : Tricky: Let B be the arena base address associated with the pool, B =
666 : arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
667 :
668 : B <= P < B + ARENA_SIZE
669 :
670 : Subtracting B throughout, this is true iff
671 :
672 : 0 <= P-B < ARENA_SIZE
673 :
674 : By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
675 :
676 : Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
677 : before the first arena has been allocated. `arenas` is still NULL in that
678 : case. We're relying on that maxarenas is also 0 in that case, so that
679 : (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
680 : into a NULL arenas.
681 :
682 : Details: given P and POOL, the arena_object corresponding to P is AO =
683 : arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
684 : stores, etc), POOL is the correct address of P's pool, AO.address is the
685 : correct base address of the pool's arena, and P must be within ARENA_SIZE of
686 : AO.address. In addition, AO.address is not 0 (no arena can start at address 0
687 : (NULL)). Therefore Py_ADDRESS_IN_RANGE correctly reports that obmalloc
688 : controls P.
689 :
690 : Now suppose obmalloc does not control P (e.g., P was obtained via a direct
691 : call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
692 : in this case -- it may even be uninitialized trash. If the trash arenaindex
693 : is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
694 : control P.
695 :
696 : Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
697 : allocated arena, obmalloc controls all the memory in slice AO.address :
698 : AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
699 : so P doesn't lie in that slice, so the macro correctly reports that P is not
700 : controlled by obmalloc.
701 :
702 : Finally, if P is not controlled by obmalloc and AO corresponds to an unused
703 : arena_object (one not currently associated with an allocated arena),
704 : AO.address is 0, and the second test in the macro reduces to:
705 :
706 : P < ARENA_SIZE
707 :
708 : If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
709 : that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
710 : of the test still passes, and the third clause (AO.address != 0) is necessary
711 : to get the correct result: AO.address is 0 in this case, so the macro
712 : correctly reports that P is not controlled by obmalloc (despite that P lies in
713 : slice AO.address : AO.address + ARENA_SIZE).
714 :
715 : Note: The third (AO.address != 0) clause was added in Python 2.5. Before
716 : 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
717 : corresponded to a currently-allocated arena, so the "P is not controlled by
718 : obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
719 : was impossible.
720 :
721 : Note that the logic is excruciating, and reading up possibly uninitialized
722 : memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
723 : creates problems for some memory debuggers. The overwhelming advantage is
724 : that this test determines whether an arbitrary address is controlled by
725 : obmalloc in a small constant time, independent of the number of arenas
726 : obmalloc controls. Since this test is needed at every entry point, it's
727 : extremely desirable that it be this fast.
728 :
729 : Since Py_ADDRESS_IN_RANGE may be reading from memory which was not allocated
730 : by Python, it is important that (POOL)->arenaindex is read only once, as
731 : another thread may be concurrently modifying the value without holding the
732 : GIL. To accomplish this, the arenaindex_temp variable is used to store
733 : (POOL)->arenaindex for the duration of the Py_ADDRESS_IN_RANGE macro's
734 : execution. The caller of the macro is responsible for declaring this
735 : variable.
736 : */
737 : #define Py_ADDRESS_IN_RANGE(P, POOL) \
738 : ((arenaindex_temp = (POOL)->arenaindex) < maxarenas && \
739 : (uptr)(P) - arenas[arenaindex_temp].address < (uptr)ARENA_SIZE && \
740 : arenas[arenaindex_temp].address != 0)
741 :
742 :
743 : /* This is only useful when running memory debuggers such as
744 : * Purify or Valgrind. Uncomment to use.
745 : *
746 : #define Py_USING_MEMORY_DEBUGGER
747 : */
748 :
749 : #ifdef Py_USING_MEMORY_DEBUGGER
750 :
751 : /* Py_ADDRESS_IN_RANGE may access uninitialized memory by design
752 : * This leads to thousands of spurious warnings when using
753 : * Purify or Valgrind. By making a function, we can easily
754 : * suppress the uninitialized memory reads in this one function.
755 : * So we won't ignore real errors elsewhere.
756 : *
757 : * Disable the macro and use a function.
758 : */
759 :
760 : #undef Py_ADDRESS_IN_RANGE
761 :
762 : #if defined(__GNUC__) && ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) || \
763 : (__GNUC__ >= 4))
764 : #define Py_NO_INLINE __attribute__((__noinline__))
765 : #else
766 : #define Py_NO_INLINE
767 : #endif
768 :
769 : /* Don't make static, to try to ensure this isn't inlined. */
770 : int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE;
771 : #undef Py_NO_INLINE
772 : #endif
773 :
774 : /*==========================================================================*/
775 :
776 : /* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct
777 : * from all other currently live pointers. This may not be possible.
778 : */
779 :
780 : /*
781 : * The basic blocks are ordered by decreasing execution frequency,
782 : * which minimizes the number of jumps in the most common cases,
783 : * improves branching prediction and instruction scheduling (small
784 : * block allocations typically result in a couple of instructions).
785 : * Unless the optimizer reorders everything, being too smart...
786 : */
787 :
788 : #undef PyObject_Malloc
789 : void *
790 733055 : PyObject_Malloc(size_t nbytes)
791 : {
792 : block *bp;
793 : poolp pool;
794 : poolp next;
795 : uint size;
796 :
797 : #ifdef WITH_VALGRIND
798 : if (UNLIKELY(running_on_valgrind == -1))
799 : running_on_valgrind = RUNNING_ON_VALGRIND;
800 : if (UNLIKELY(running_on_valgrind))
801 : goto redirect;
802 : #endif
803 :
804 : /*
805 : * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
806 : * Most python internals blindly use a signed Py_ssize_t to track
807 : * things without checking for overflows or negatives.
808 : * As size_t is unsigned, checking for nbytes < 0 is not required.
809 : */
810 733055 : if (nbytes > PY_SSIZE_T_MAX)
811 0 : return NULL;
812 :
813 : /*
814 : * This implicitly redirects malloc(0).
815 : */
816 733055 : if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
817 : LOCK();
818 : /*
819 : * Most frequent paths first
820 : */
821 726187 : size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
822 726187 : pool = usedpools[size + size];
823 726187 : if (pool != pool->nextpool) {
824 : /*
825 : * There is a used pool for this size class.
826 : * Pick up the head block of its free list.
827 : */
828 717484 : ++pool->ref.count;
829 717484 : bp = pool->freeblock;
830 : assert(bp != NULL);
831 717484 : if ((pool->freeblock = *(block **)bp) != NULL) {
832 : UNLOCK();
833 349190 : return (void *)bp;
834 : }
835 : /*
836 : * Reached the end of the free list, try to extend it.
837 : */
838 368294 : if (pool->nextoffset <= pool->maxnextoffset) {
839 : /* There is room for another block. */
840 345173 : pool->freeblock = (block*)pool +
841 345173 : pool->nextoffset;
842 345173 : pool->nextoffset += INDEX2SIZE(size);
843 345173 : *(block **)(pool->freeblock) = NULL;
844 : UNLOCK();
845 345173 : return (void *)bp;
846 : }
847 : /* Pool is full, unlink from used pools. */
848 23121 : next = pool->nextpool;
849 23121 : pool = pool->prevpool;
850 23121 : next->prevpool = pool;
851 23121 : pool->nextpool = next;
852 : UNLOCK();
853 23121 : return (void *)bp;
854 : }
855 :
856 : /* There isn't a pool of the right size class immediately
857 : * available: use a free pool.
858 : */
859 8703 : if (usable_arenas == NULL) {
860 : /* No arena has a free pool: allocate a new arena. */
861 : #ifdef WITH_MEMORY_LIMITS
862 : if (narenas_currently_allocated >= MAX_ARENAS) {
863 : UNLOCK();
864 : goto redirect;
865 : }
866 : #endif
867 90 : usable_arenas = new_arena();
868 90 : if (usable_arenas == NULL) {
869 : UNLOCK();
870 0 : goto redirect;
871 : }
872 180 : usable_arenas->nextarena =
873 90 : usable_arenas->prevarena = NULL;
874 : }
875 : assert(usable_arenas->address != 0);
876 :
877 : /* Try to get a cached free pool. */
878 8703 : pool = usable_arenas->freepools;
879 8703 : if (pool != NULL) {
880 : /* Unlink from cached pools. */
881 4867 : usable_arenas->freepools = pool->nextpool;
882 :
883 : /* This arena already had the smallest nfreepools
884 : * value, so decreasing nfreepools doesn't change
885 : * that, and we don't need to rearrange the
886 : * usable_arenas list. However, if the arena has
887 : * become wholly allocated, we need to remove its
888 : * arena_object from usable_arenas.
889 : */
890 4867 : --usable_arenas->nfreepools;
891 4867 : if (usable_arenas->nfreepools == 0) {
892 : /* Wholly allocated: remove. */
893 : assert(usable_arenas->freepools == NULL);
894 : assert(usable_arenas->nextarena == NULL ||
895 : usable_arenas->nextarena->prevarena ==
896 : usable_arenas);
897 :
898 95 : usable_arenas = usable_arenas->nextarena;
899 95 : if (usable_arenas != NULL) {
900 74 : usable_arenas->prevarena = NULL;
901 : assert(usable_arenas->address != 0);
902 : }
903 : }
904 : else {
905 : /* nfreepools > 0: it must be that freepools
906 : * isn't NULL, or that we haven't yet carved
907 : * off all the arena's pools for the first
908 : * time.
909 : */
910 : assert(usable_arenas->freepools != NULL ||
911 : usable_arenas->pool_address <=
912 : (block*)usable_arenas->address +
913 : ARENA_SIZE - POOL_SIZE);
914 : }
915 : init_pool:
916 : /* Frontlink to used pools. */
917 8703 : next = usedpools[size + size]; /* == prev */
918 8703 : pool->nextpool = next;
919 8703 : pool->prevpool = next;
920 8703 : next->nextpool = pool;
921 8703 : next->prevpool = pool;
922 8703 : pool->ref.count = 1;
923 8703 : if (pool->szidx == size) {
924 : /* Luckily, this pool last contained blocks
925 : * of the same size class, so its header
926 : * and free list are already initialized.
927 : */
928 2385 : bp = pool->freeblock;
929 2385 : pool->freeblock = *(block **)bp;
930 : UNLOCK();
931 2385 : return (void *)bp;
932 : }
933 : /*
934 : * Initialize the pool header, set up the free list to
935 : * contain just the second block, and return the first
936 : * block.
937 : */
938 6318 : pool->szidx = size;
939 6318 : size = INDEX2SIZE(size);
940 6318 : bp = (block *)pool + POOL_OVERHEAD;
941 6318 : pool->nextoffset = POOL_OVERHEAD + (size << 1);
942 6318 : pool->maxnextoffset = POOL_SIZE - size;
943 6318 : pool->freeblock = bp + size;
944 6318 : *(block **)(pool->freeblock) = NULL;
945 : UNLOCK();
946 6318 : return (void *)bp;
947 : }
948 :
949 : /* Carve off a new pool. */
950 : assert(usable_arenas->nfreepools > 0);
951 : assert(usable_arenas->freepools == NULL);
952 3836 : pool = (poolp)usable_arenas->pool_address;
953 : assert((block*)pool <= (block*)usable_arenas->address +
954 : ARENA_SIZE - POOL_SIZE);
955 3836 : pool->arenaindex = usable_arenas - arenas;
956 : assert(&arenas[pool->arenaindex] == usable_arenas);
957 3836 : pool->szidx = DUMMY_SIZE_IDX;
958 3836 : usable_arenas->pool_address += POOL_SIZE;
959 3836 : --usable_arenas->nfreepools;
960 :
961 3836 : if (usable_arenas->nfreepools == 0) {
962 : assert(usable_arenas->nextarena == NULL ||
963 : usable_arenas->nextarena->prevarena ==
964 : usable_arenas);
965 : /* Unlink the arena: it is completely allocated. */
966 59 : usable_arenas = usable_arenas->nextarena;
967 59 : if (usable_arenas != NULL) {
968 5 : usable_arenas->prevarena = NULL;
969 : assert(usable_arenas->address != 0);
970 : }
971 : }
972 :
973 3836 : goto init_pool;
974 : }
975 :
976 : /* The small block allocator ends here. */
977 :
978 : redirect:
979 : /* Redirect the original request to the underlying (libc) allocator.
980 : * We jump here on bigger requests, on error in the code above (as a
981 : * last chance to serve the request) or when the max memory limit
982 : * has been reached.
983 : */
984 6868 : if (nbytes == 0)
985 0 : nbytes = 1;
986 6868 : return (void *)malloc(nbytes);
987 : }
988 :
989 : /* free */
990 :
991 : #undef PyObject_Free
992 : ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
993 : void
994 682464 : PyObject_Free(void *p)
995 : {
996 : poolp pool;
997 : block *lastfree;
998 : poolp next, prev;
999 : uint size;
1000 : #ifndef Py_USING_MEMORY_DEBUGGER
1001 : uint arenaindex_temp;
1002 : #endif
1003 :
1004 682464 : if (p == NULL) /* free(NULL) has no effect */
1005 0 : return;
1006 :
1007 : #ifdef WITH_VALGRIND
1008 : if (UNLIKELY(running_on_valgrind > 0))
1009 : goto redirect;
1010 : #endif
1011 :
1012 682464 : pool = POOL_ADDR(p);
1013 682464 : if (Py_ADDRESS_IN_RANGE(p, pool)) {
1014 : /* We allocated this address. */
1015 : LOCK();
1016 : /* Link p to the start of the pool's freeblock list. Since
1017 : * the pool had at least the p block outstanding, the pool
1018 : * wasn't empty (so it's already in a usedpools[] list, or
1019 : * was full and is in no list -- it's not in the freeblocks
1020 : * list in any case).
1021 : */
1022 : assert(pool->ref.count > 0); /* else it was empty */
1023 676664 : *(block **)p = lastfree = pool->freeblock;
1024 676664 : pool->freeblock = (block *)p;
1025 676664 : if (lastfree) {
1026 : struct arena_object* ao;
1027 : uint nf; /* ao->nfreepools */
1028 :
1029 : /* freeblock wasn't NULL, so the pool wasn't full,
1030 : * and the pool is in a usedpools[] list.
1031 : */
1032 653905 : if (--pool->ref.count != 0) {
1033 : /* pool isn't empty: leave it in usedpools */
1034 : UNLOCK();
1035 647633 : return;
1036 : }
1037 : /* Pool is now empty: unlink from usedpools, and
1038 : * link to the front of freepools. This ensures that
1039 : * previously freed pools will be allocated later
1040 : * (being not referenced, they are perhaps paged out).
1041 : */
1042 6272 : next = pool->nextpool;
1043 6272 : prev = pool->prevpool;
1044 6272 : next->prevpool = prev;
1045 6272 : prev->nextpool = next;
1046 :
1047 : /* Link the pool to freepools. This is a singly-linked
1048 : * list, and pool->prevpool isn't used there.
1049 : */
1050 6272 : ao = &arenas[pool->arenaindex];
1051 6272 : pool->nextpool = ao->freepools;
1052 6272 : ao->freepools = pool;
1053 6272 : nf = ++ao->nfreepools;
1054 :
1055 : /* All the rest is arena management. We just freed
1056 : * a pool, and there are 4 cases for arena mgmt:
1057 : * 1. If all the pools are free, return the arena to
1058 : * the system free().
1059 : * 2. If this is the only free pool in the arena,
1060 : * add the arena back to the `usable_arenas` list.
1061 : * 3. If the "next" arena has a smaller count of free
1062 : * pools, we have to "slide this arena right" to
1063 : * restore that usable_arenas is sorted in order of
1064 : * nfreepools.
1065 : * 4. Else there's nothing more to do.
1066 : */
1067 6272 : if (nf == ao->ntotalpools) {
1068 : /* Case 1. First unlink ao from usable_arenas.
1069 : */
1070 : assert(ao->prevarena == NULL ||
1071 : ao->prevarena->address != 0);
1072 : assert(ao ->nextarena == NULL ||
1073 : ao->nextarena->address != 0);
1074 :
1075 : /* Fix the pointer in the prevarena, or the
1076 : * usable_arenas pointer.
1077 : */
1078 39 : if (ao->prevarena == NULL) {
1079 23 : usable_arenas = ao->nextarena;
1080 : assert(usable_arenas == NULL ||
1081 : usable_arenas->address != 0);
1082 : }
1083 : else {
1084 : assert(ao->prevarena->nextarena == ao);
1085 32 : ao->prevarena->nextarena =
1086 16 : ao->nextarena;
1087 : }
1088 : /* Fix the pointer in the nextarena. */
1089 39 : if (ao->nextarena != NULL) {
1090 : assert(ao->nextarena->prevarena == ao);
1091 6 : ao->nextarena->prevarena =
1092 3 : ao->prevarena;
1093 : }
1094 : /* Record that this arena_object slot is
1095 : * available to be reused.
1096 : */
1097 39 : ao->nextarena = unused_arena_objects;
1098 39 : unused_arena_objects = ao;
1099 :
1100 : /* Free the entire arena. */
1101 : #ifdef ARENAS_USE_MMAP
1102 39 : munmap((void *)ao->address, ARENA_SIZE);
1103 : #else
1104 : free((void *)ao->address);
1105 : #endif
1106 39 : ao->address = 0; /* mark unassociated */
1107 39 : --narenas_currently_allocated;
1108 :
1109 : UNLOCK();
1110 39 : return;
1111 : }
1112 6233 : if (nf == 1) {
1113 : /* Case 2. Put ao at the head of
1114 : * usable_arenas. Note that because
1115 : * ao->nfreepools was 0 before, ao isn't
1116 : * currently on the usable_arenas list.
1117 : */
1118 151 : ao->nextarena = usable_arenas;
1119 151 : ao->prevarena = NULL;
1120 151 : if (usable_arenas)
1121 141 : usable_arenas->prevarena = ao;
1122 151 : usable_arenas = ao;
1123 : assert(usable_arenas->address != 0);
1124 :
1125 : UNLOCK();
1126 151 : return;
1127 : }
1128 : /* If this arena is now out of order, we need to keep
1129 : * the list sorted. The list is kept sorted so that
1130 : * the "most full" arenas are used first, which allows
1131 : * the nearly empty arenas to be completely freed. In
1132 : * a few un-scientific tests, it seems like this
1133 : * approach allowed a lot more memory to be freed.
1134 : */
1135 9909 : if (ao->nextarena == NULL ||
1136 3827 : nf <= ao->nextarena->nfreepools) {
1137 : /* Case 4. Nothing to do. */
1138 : UNLOCK();
1139 5943 : return;
1140 : }
1141 : /* Case 3: We have to move the arena towards the end
1142 : * of the list, because it has more free pools than
1143 : * the arena to its right.
1144 : * First unlink ao from usable_arenas.
1145 : */
1146 139 : if (ao->prevarena != NULL) {
1147 : /* ao isn't at the head of the list */
1148 : assert(ao->prevarena->nextarena == ao);
1149 101 : ao->prevarena->nextarena = ao->nextarena;
1150 : }
1151 : else {
1152 : /* ao is at the head of the list */
1153 : assert(usable_arenas == ao);
1154 38 : usable_arenas = ao->nextarena;
1155 : }
1156 139 : ao->nextarena->prevarena = ao->prevarena;
1157 :
1158 : /* Locate the new insertion point by iterating over
1159 : * the list, using our nextarena pointer.
1160 : */
1161 771 : while (ao->nextarena != NULL &&
1162 308 : nf > ao->nextarena->nfreepools) {
1163 185 : ao->prevarena = ao->nextarena;
1164 185 : ao->nextarena = ao->nextarena->nextarena;
1165 : }
1166 :
1167 : /* Insert ao at this point. */
1168 : assert(ao->nextarena == NULL ||
1169 : ao->prevarena == ao->nextarena->prevarena);
1170 : assert(ao->prevarena->nextarena == ao->nextarena);
1171 :
1172 139 : ao->prevarena->nextarena = ao;
1173 139 : if (ao->nextarena != NULL)
1174 123 : ao->nextarena->prevarena = ao;
1175 :
1176 : /* Verify that the swaps worked. */
1177 : assert(ao->nextarena == NULL ||
1178 : nf <= ao->nextarena->nfreepools);
1179 : assert(ao->prevarena == NULL ||
1180 : nf > ao->prevarena->nfreepools);
1181 : assert(ao->nextarena == NULL ||
1182 : ao->nextarena->prevarena == ao);
1183 : assert((usable_arenas == ao &&
1184 : ao->prevarena == NULL) ||
1185 : ao->prevarena->nextarena == ao);
1186 :
1187 : UNLOCK();
1188 139 : return;
1189 : }
1190 : /* Pool was full, so doesn't currently live in any list:
1191 : * link it to the front of the appropriate usedpools[] list.
1192 : * This mimics LRU pool usage for new allocations and
1193 : * targets optimal filling when several pools contain
1194 : * blocks of the same size class.
1195 : */
1196 22759 : --pool->ref.count;
1197 : assert(pool->ref.count > 0); /* else the pool is empty */
1198 22759 : size = pool->szidx;
1199 22759 : next = usedpools[size + size];
1200 22759 : prev = next->prevpool;
1201 : /* insert pool before next: prev <-> pool <-> next */
1202 22759 : pool->nextpool = next;
1203 22759 : pool->prevpool = prev;
1204 22759 : next->prevpool = pool;
1205 22759 : prev->nextpool = pool;
1206 : UNLOCK();
1207 22759 : return;
1208 : }
1209 :
1210 : #ifdef WITH_VALGRIND
1211 : redirect:
1212 : #endif
1213 : /* We didn't allocate this address. */
1214 5800 : free(p);
1215 : }
1216 :
1217 : /* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0,
1218 : * then as the Python docs promise, we do not treat this like free(p), and
1219 : * return a non-NULL result.
1220 : */
1221 :
1222 : #undef PyObject_Realloc
1223 : ATTRIBUTE_NO_ADDRESS_SAFETY_ANALYSIS
1224 : void *
1225 363321 : PyObject_Realloc(void *p, size_t nbytes)
1226 : {
1227 : void *bp;
1228 : poolp pool;
1229 : size_t size;
1230 : #ifndef Py_USING_MEMORY_DEBUGGER
1231 : uint arenaindex_temp;
1232 : #endif
1233 :
1234 363321 : if (p == NULL)
1235 308032 : return PyObject_Malloc(nbytes);
1236 :
1237 : /*
1238 : * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1239 : * Most python internals blindly use a signed Py_ssize_t to track
1240 : * things without checking for overflows or negatives.
1241 : * As size_t is unsigned, checking for nbytes < 0 is not required.
1242 : */
1243 55289 : if (nbytes > PY_SSIZE_T_MAX)
1244 0 : return NULL;
1245 :
1246 : #ifdef WITH_VALGRIND
1247 : /* Treat running_on_valgrind == -1 the same as 0 */
1248 : if (UNLIKELY(running_on_valgrind > 0))
1249 : goto redirect;
1250 : #endif
1251 :
1252 55289 : pool = POOL_ADDR(p);
1253 55289 : if (Py_ADDRESS_IN_RANGE(p, pool)) {
1254 : /* We're in charge of this block */
1255 54278 : size = INDEX2SIZE(pool->szidx);
1256 54278 : if (nbytes <= size) {
1257 : /* The block is staying the same or shrinking. If
1258 : * it's shrinking, there's a tradeoff: it costs
1259 : * cycles to copy the block to a smaller size class,
1260 : * but it wastes memory not to copy it. The
1261 : * compromise here is to copy on shrink only if at
1262 : * least 25% of size can be shaved off.
1263 : */
1264 10192 : if (4 * nbytes > 3 * size) {
1265 : /* It's the same,
1266 : * or shrinking and new/old > 3/4.
1267 : */
1268 6598 : return p;
1269 : }
1270 3594 : size = nbytes;
1271 : }
1272 47680 : bp = PyObject_Malloc(nbytes);
1273 47680 : if (bp != NULL) {
1274 47680 : memcpy(bp, p, size);
1275 47680 : PyObject_Free(p);
1276 : }
1277 47680 : return bp;
1278 : }
1279 : #ifdef WITH_VALGRIND
1280 : redirect:
1281 : #endif
1282 : /* We're not managing this block. If nbytes <=
1283 : * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this
1284 : * block. However, if we do, we need to copy the valid data from
1285 : * the C-managed block to one of our blocks, and there's no portable
1286 : * way to know how much of the memory space starting at p is valid.
1287 : * As bug 1185883 pointed out the hard way, it's possible that the
1288 : * C-managed block is "at the end" of allocated VM space, so that
1289 : * a memory fault can occur if we try to copy nbytes bytes starting
1290 : * at p. Instead we punt: let C continue to manage this block.
1291 : */
1292 1011 : if (nbytes)
1293 1011 : return realloc(p, nbytes);
1294 : /* C doesn't define the result of realloc(p, 0) (it may or may not
1295 : * return NULL then), but Python's docs promise that nbytes==0 never
1296 : * returns NULL. We don't pass 0 to realloc(), to avoid that endcase
1297 : * to begin with. Even then, we can't be sure that realloc() won't
1298 : * return NULL.
1299 : */
1300 0 : bp = realloc(p, 1);
1301 0 : return bp ? bp : p;
1302 : }
1303 :
1304 : #else /* ! WITH_PYMALLOC */
1305 :
1306 : /*==========================================================================*/
1307 : /* pymalloc not enabled: Redirect the entry points to malloc. These will
1308 : * only be used by extensions that are compiled with pymalloc enabled. */
1309 :
1310 : void *
1311 : PyObject_Malloc(size_t n)
1312 : {
1313 : return PyMem_MALLOC(n);
1314 : }
1315 :
1316 : void *
1317 : PyObject_Realloc(void *p, size_t n)
1318 : {
1319 : return PyMem_REALLOC(p, n);
1320 : }
1321 :
1322 : void
1323 : PyObject_Free(void *p)
1324 : {
1325 : PyMem_FREE(p);
1326 : }
1327 : #endif /* WITH_PYMALLOC */
1328 :
1329 : #ifdef PYMALLOC_DEBUG
1330 : /*==========================================================================*/
1331 : /* A x-platform debugging allocator. This doesn't manage memory directly,
1332 : * it wraps a real allocator, adding extra debugging info to the memory blocks.
1333 : */
1334 :
1335 : /* Special bytes broadcast into debug memory blocks at appropriate times.
1336 : * Strings of these are unlikely to be valid addresses, floats, ints or
1337 : * 7-bit ASCII.
1338 : */
1339 : #undef CLEANBYTE
1340 : #undef DEADBYTE
1341 : #undef FORBIDDENBYTE
1342 : #define CLEANBYTE 0xCB /* clean (newly allocated) memory */
1343 : #define DEADBYTE 0xDB /* dead (newly freed) memory */
1344 : #define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */
1345 :
1346 : /* We tag each block with an API ID in order to tag API violations */
1347 : #define _PYMALLOC_MEM_ID 'm' /* the PyMem_Malloc() API */
1348 : #define _PYMALLOC_OBJ_ID 'o' /* The PyObject_Malloc() API */
1349 :
1350 : static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
1351 :
1352 : /* serialno is always incremented via calling this routine. The point is
1353 : * to supply a single place to set a breakpoint.
1354 : */
1355 : static void
1356 : bumpserialno(void)
1357 : {
1358 : ++serialno;
1359 : }
1360 :
1361 : #define SST SIZEOF_SIZE_T
1362 :
1363 : /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
1364 : static size_t
1365 : read_size_t(const void *p)
1366 : {
1367 : const uchar *q = (const uchar *)p;
1368 : size_t result = *q++;
1369 : int i;
1370 :
1371 : for (i = SST; --i > 0; ++q)
1372 : result = (result << 8) | *q;
1373 : return result;
1374 : }
1375 :
1376 : /* Write n as a big-endian size_t, MSB at address p, LSB at
1377 : * p + sizeof(size_t) - 1.
1378 : */
1379 : static void
1380 : write_size_t(void *p, size_t n)
1381 : {
1382 : uchar *q = (uchar *)p + SST - 1;
1383 : int i;
1384 :
1385 : for (i = SST; --i >= 0; --q) {
1386 : *q = (uchar)(n & 0xff);
1387 : n >>= 8;
1388 : }
1389 : }
1390 :
1391 : #ifdef Py_DEBUG
1392 : /* Is target in the list? The list is traversed via the nextpool pointers.
1393 : * The list may be NULL-terminated, or circular. Return 1 if target is in
1394 : * list, else 0.
1395 : */
1396 : static int
1397 : pool_is_in_list(const poolp target, poolp list)
1398 : {
1399 : poolp origlist = list;
1400 : assert(target != NULL);
1401 : if (list == NULL)
1402 : return 0;
1403 : do {
1404 : if (target == list)
1405 : return 1;
1406 : list = list->nextpool;
1407 : } while (list != NULL && list != origlist);
1408 : return 0;
1409 : }
1410 :
1411 : #else
1412 : #define pool_is_in_list(X, Y) 1
1413 :
1414 : #endif /* Py_DEBUG */
1415 :
1416 : /* Let S = sizeof(size_t). The debug malloc asks for 4*S extra bytes and
1417 : fills them with useful stuff, here calling the underlying malloc's result p:
1418 :
1419 : p[0: S]
1420 : Number of bytes originally asked for. This is a size_t, big-endian (easier
1421 : to read in a memory dump).
1422 : p[S: 2*S]
1423 : Copies of FORBIDDENBYTE. Used to catch under- writes and reads.
1424 : p[2*S: 2*S+n]
1425 : The requested memory, filled with copies of CLEANBYTE.
1426 : Used to catch reference to uninitialized memory.
1427 : &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
1428 : handled the request itself.
1429 : p[2*S+n: 2*S+n+S]
1430 : Copies of FORBIDDENBYTE. Used to catch over- writes and reads.
1431 : p[2*S+n+S: 2*S+n+2*S]
1432 : A serial number, incremented by 1 on each call to _PyObject_DebugMalloc
1433 : and _PyObject_DebugRealloc.
1434 : This is a big-endian size_t.
1435 : If "bad memory" is detected later, the serial number gives an
1436 : excellent way to set a breakpoint on the next run, to capture the
1437 : instant at which this block was passed out.
1438 : */
1439 :
1440 : /* debug replacements for the PyMem_* memory API */
1441 : void *
1442 : _PyMem_DebugMalloc(size_t nbytes)
1443 : {
1444 : return _PyObject_DebugMallocApi(_PYMALLOC_MEM_ID, nbytes);
1445 : }
1446 : void *
1447 : _PyMem_DebugRealloc(void *p, size_t nbytes)
1448 : {
1449 : return _PyObject_DebugReallocApi(_PYMALLOC_MEM_ID, p, nbytes);
1450 : }
1451 : void
1452 : _PyMem_DebugFree(void *p)
1453 : {
1454 : _PyObject_DebugFreeApi(_PYMALLOC_MEM_ID, p);
1455 : }
1456 :
1457 : /* debug replacements for the PyObject_* memory API */
1458 : void *
1459 : _PyObject_DebugMalloc(size_t nbytes)
1460 : {
1461 : return _PyObject_DebugMallocApi(_PYMALLOC_OBJ_ID, nbytes);
1462 : }
1463 : void *
1464 : _PyObject_DebugRealloc(void *p, size_t nbytes)
1465 : {
1466 : return _PyObject_DebugReallocApi(_PYMALLOC_OBJ_ID, p, nbytes);
1467 : }
1468 : void
1469 : _PyObject_DebugFree(void *p)
1470 : {
1471 : _PyObject_DebugFreeApi(_PYMALLOC_OBJ_ID, p);
1472 : }
1473 : void
1474 : _PyObject_DebugCheckAddress(const void *p)
1475 : {
1476 : _PyObject_DebugCheckAddressApi(_PYMALLOC_OBJ_ID, p);
1477 : }
1478 :
1479 :
1480 : /* generic debug memory api, with an "id" to identify the API in use */
1481 : void *
1482 : _PyObject_DebugMallocApi(char id, size_t nbytes)
1483 : {
1484 : uchar *p; /* base address of malloc'ed block */
1485 : uchar *tail; /* p + 2*SST + nbytes == pointer to tail pad bytes */
1486 : size_t total; /* nbytes + 4*SST */
1487 :
1488 : bumpserialno();
1489 : total = nbytes + 4*SST;
1490 : if (total < nbytes)
1491 : /* overflow: can't represent total as a size_t */
1492 : return NULL;
1493 :
1494 : p = (uchar *)PyObject_Malloc(total);
1495 : if (p == NULL)
1496 : return NULL;
1497 :
1498 : /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
1499 : write_size_t(p, nbytes);
1500 : p[SST] = (uchar)id;
1501 : memset(p + SST + 1 , FORBIDDENBYTE, SST-1);
1502 :
1503 : if (nbytes > 0)
1504 : memset(p + 2*SST, CLEANBYTE, nbytes);
1505 :
1506 : /* at tail, write pad (SST bytes) and serialno (SST bytes) */
1507 : tail = p + 2*SST + nbytes;
1508 : memset(tail, FORBIDDENBYTE, SST);
1509 : write_size_t(tail + SST, serialno);
1510 :
1511 : return p + 2*SST;
1512 : }
1513 :
1514 : /* The debug free first checks the 2*SST bytes on each end for sanity (in
1515 : particular, that the FORBIDDENBYTEs with the api ID are still intact).
1516 : Then fills the original bytes with DEADBYTE.
1517 : Then calls the underlying free.
1518 : */
1519 : void
1520 : _PyObject_DebugFreeApi(char api, void *p)
1521 : {
1522 : uchar *q = (uchar *)p - 2*SST; /* address returned from malloc */
1523 : size_t nbytes;
1524 :
1525 : if (p == NULL)
1526 : return;
1527 : _PyObject_DebugCheckAddressApi(api, p);
1528 : nbytes = read_size_t(q);
1529 : nbytes += 4*SST;
1530 : if (nbytes > 0)
1531 : memset(q, DEADBYTE, nbytes);
1532 : PyObject_Free(q);
1533 : }
1534 :
1535 : void *
1536 : _PyObject_DebugReallocApi(char api, void *p, size_t nbytes)
1537 : {
1538 : uchar *q = (uchar *)p;
1539 : uchar *tail;
1540 : size_t total; /* nbytes + 4*SST */
1541 : size_t original_nbytes;
1542 : int i;
1543 :
1544 : if (p == NULL)
1545 : return _PyObject_DebugMallocApi(api, nbytes);
1546 :
1547 : _PyObject_DebugCheckAddressApi(api, p);
1548 : bumpserialno();
1549 : original_nbytes = read_size_t(q - 2*SST);
1550 : total = nbytes + 4*SST;
1551 : if (total < nbytes)
1552 : /* overflow: can't represent total as a size_t */
1553 : return NULL;
1554 :
1555 : if (nbytes < original_nbytes) {
1556 : /* shrinking: mark old extra memory dead */
1557 : memset(q + nbytes, DEADBYTE, original_nbytes - nbytes + 2*SST);
1558 : }
1559 :
1560 : /* Resize and add decorations. We may get a new pointer here, in which
1561 : * case we didn't get the chance to mark the old memory with DEADBYTE,
1562 : * but we live with that.
1563 : */
1564 : q = (uchar *)PyObject_Realloc(q - 2*SST, total);
1565 : if (q == NULL)
1566 : return NULL;
1567 :
1568 : write_size_t(q, nbytes);
1569 : assert(q[SST] == (uchar)api);
1570 : for (i = 1; i < SST; ++i)
1571 : assert(q[SST + i] == FORBIDDENBYTE);
1572 : q += 2*SST;
1573 : tail = q + nbytes;
1574 : memset(tail, FORBIDDENBYTE, SST);
1575 : write_size_t(tail + SST, serialno);
1576 :
1577 : if (nbytes > original_nbytes) {
1578 : /* growing: mark new extra memory clean */
1579 : memset(q + original_nbytes, CLEANBYTE,
1580 : nbytes - original_nbytes);
1581 : }
1582 :
1583 : return q;
1584 : }
1585 :
1586 : /* Check the forbidden bytes on both ends of the memory allocated for p.
1587 : * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
1588 : * and call Py_FatalError to kill the program.
1589 : * The API id, is also checked.
1590 : */
1591 : void
1592 : _PyObject_DebugCheckAddressApi(char api, const void *p)
1593 : {
1594 : const uchar *q = (const uchar *)p;
1595 : char msgbuf[64];
1596 : char *msg;
1597 : size_t nbytes;
1598 : const uchar *tail;
1599 : int i;
1600 : char id;
1601 :
1602 : if (p == NULL) {
1603 : msg = "didn't expect a NULL pointer";
1604 : goto error;
1605 : }
1606 :
1607 : /* Check the API id */
1608 : id = (char)q[-SST];
1609 : if (id != api) {
1610 : msg = msgbuf;
1611 : snprintf(msg, sizeof(msgbuf), "bad ID: Allocated using API '%c', verified using API '%c'", id, api);
1612 : msgbuf[sizeof(msgbuf)-1] = 0;
1613 : goto error;
1614 : }
1615 :
1616 : /* Check the stuff at the start of p first: if there's underwrite
1617 : * corruption, the number-of-bytes field may be nuts, and checking
1618 : * the tail could lead to a segfault then.
1619 : */
1620 : for (i = SST-1; i >= 1; --i) {
1621 : if (*(q-i) != FORBIDDENBYTE) {
1622 : msg = "bad leading pad byte";
1623 : goto error;
1624 : }
1625 : }
1626 :
1627 : nbytes = read_size_t(q - 2*SST);
1628 : tail = q + nbytes;
1629 : for (i = 0; i < SST; ++i) {
1630 : if (tail[i] != FORBIDDENBYTE) {
1631 : msg = "bad trailing pad byte";
1632 : goto error;
1633 : }
1634 : }
1635 :
1636 : return;
1637 :
1638 : error:
1639 : _PyObject_DebugDumpAddress(p);
1640 : Py_FatalError(msg);
1641 : }
1642 :
1643 : /* Display info to stderr about the memory block at p. */
1644 : void
1645 : _PyObject_DebugDumpAddress(const void *p)
1646 : {
1647 : const uchar *q = (const uchar *)p;
1648 : const uchar *tail;
1649 : size_t nbytes, serial;
1650 : int i;
1651 : int ok;
1652 : char id;
1653 :
1654 : fprintf(stderr, "Debug memory block at address p=%p:", p);
1655 : if (p == NULL) {
1656 : fprintf(stderr, "\n");
1657 : return;
1658 : }
1659 : id = (char)q[-SST];
1660 : fprintf(stderr, " API '%c'\n", id);
1661 :
1662 : nbytes = read_size_t(q - 2*SST);
1663 : fprintf(stderr, " %" PY_FORMAT_SIZE_T "u bytes originally "
1664 : "requested\n", nbytes);
1665 :
1666 : /* In case this is nuts, check the leading pad bytes first. */
1667 : fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
1668 : ok = 1;
1669 : for (i = 1; i <= SST-1; ++i) {
1670 : if (*(q-i) != FORBIDDENBYTE) {
1671 : ok = 0;
1672 : break;
1673 : }
1674 : }
1675 : if (ok)
1676 : fputs("FORBIDDENBYTE, as expected.\n", stderr);
1677 : else {
1678 : fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
1679 : FORBIDDENBYTE);
1680 : for (i = SST-1; i >= 1; --i) {
1681 : const uchar byte = *(q-i);
1682 : fprintf(stderr, " at p-%d: 0x%02x", i, byte);
1683 : if (byte != FORBIDDENBYTE)
1684 : fputs(" *** OUCH", stderr);
1685 : fputc('\n', stderr);
1686 : }
1687 :
1688 : fputs(" Because memory is corrupted at the start, the "
1689 : "count of bytes requested\n"
1690 : " may be bogus, and checking the trailing pad "
1691 : "bytes may segfault.\n", stderr);
1692 : }
1693 :
1694 : tail = q + nbytes;
1695 : fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, tail);
1696 : ok = 1;
1697 : for (i = 0; i < SST; ++i) {
1698 : if (tail[i] != FORBIDDENBYTE) {
1699 : ok = 0;
1700 : break;
1701 : }
1702 : }
1703 : if (ok)
1704 : fputs("FORBIDDENBYTE, as expected.\n", stderr);
1705 : else {
1706 : fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
1707 : FORBIDDENBYTE);
1708 : for (i = 0; i < SST; ++i) {
1709 : const uchar byte = tail[i];
1710 : fprintf(stderr, " at tail+%d: 0x%02x",
1711 : i, byte);
1712 : if (byte != FORBIDDENBYTE)
1713 : fputs(" *** OUCH", stderr);
1714 : fputc('\n', stderr);
1715 : }
1716 : }
1717 :
1718 : serial = read_size_t(tail + SST);
1719 : fprintf(stderr, " The block was made by call #%" PY_FORMAT_SIZE_T
1720 : "u to debug malloc/realloc.\n", serial);
1721 :
1722 : if (nbytes > 0) {
1723 : i = 0;
1724 : fputs(" Data at p:", stderr);
1725 : /* print up to 8 bytes at the start */
1726 : while (q < tail && i < 8) {
1727 : fprintf(stderr, " %02x", *q);
1728 : ++i;
1729 : ++q;
1730 : }
1731 : /* and up to 8 at the end */
1732 : if (q < tail) {
1733 : if (tail - q > 8) {
1734 : fputs(" ...", stderr);
1735 : q = tail - 8;
1736 : }
1737 : while (q < tail) {
1738 : fprintf(stderr, " %02x", *q);
1739 : ++q;
1740 : }
1741 : }
1742 : fputc('\n', stderr);
1743 : }
1744 : }
1745 :
1746 : static size_t
1747 : printone(const char* msg, size_t value)
1748 : {
1749 : int i, k;
1750 : char buf[100];
1751 : size_t origvalue = value;
1752 :
1753 : fputs(msg, stderr);
1754 : for (i = (int)strlen(msg); i < 35; ++i)
1755 : fputc(' ', stderr);
1756 : fputc('=', stderr);
1757 :
1758 : /* Write the value with commas. */
1759 : i = 22;
1760 : buf[i--] = '\0';
1761 : buf[i--] = '\n';
1762 : k = 3;
1763 : do {
1764 : size_t nextvalue = value / 10;
1765 : unsigned int digit = (unsigned int)(value - nextvalue * 10);
1766 : value = nextvalue;
1767 : buf[i--] = (char)(digit + '0');
1768 : --k;
1769 : if (k == 0 && value && i >= 0) {
1770 : k = 3;
1771 : buf[i--] = ',';
1772 : }
1773 : } while (value && i >= 0);
1774 :
1775 : while (i >= 0)
1776 : buf[i--] = ' ';
1777 : fputs(buf, stderr);
1778 :
1779 : return origvalue;
1780 : }
1781 :
1782 : /* Print summary info to stderr about the state of pymalloc's structures.
1783 : * In Py_DEBUG mode, also perform some expensive internal consistency
1784 : * checks.
1785 : */
1786 : void
1787 : _PyObject_DebugMallocStats(void)
1788 : {
1789 : uint i;
1790 : const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
1791 : /* # of pools, allocated blocks, and free blocks per class index */
1792 : size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1793 : size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1794 : size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1795 : /* total # of allocated bytes in used and full pools */
1796 : size_t allocated_bytes = 0;
1797 : /* total # of available bytes in used pools */
1798 : size_t available_bytes = 0;
1799 : /* # of free pools + pools not yet carved out of current arena */
1800 : uint numfreepools = 0;
1801 : /* # of bytes for arena alignment padding */
1802 : size_t arena_alignment = 0;
1803 : /* # of bytes in used and full pools used for pool_headers */
1804 : size_t pool_header_bytes = 0;
1805 : /* # of bytes in used and full pools wasted due to quantization,
1806 : * i.e. the necessarily leftover space at the ends of used and
1807 : * full pools.
1808 : */
1809 : size_t quantization = 0;
1810 : /* # of arenas actually allocated. */
1811 : size_t narenas = 0;
1812 : /* running total -- should equal narenas * ARENA_SIZE */
1813 : size_t total;
1814 : char buf[128];
1815 :
1816 : fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
1817 : SMALL_REQUEST_THRESHOLD, numclasses);
1818 :
1819 : for (i = 0; i < numclasses; ++i)
1820 : numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
1821 :
1822 : /* Because full pools aren't linked to from anything, it's easiest
1823 : * to march over all the arenas. If we're lucky, most of the memory
1824 : * will be living in full pools -- would be a shame to miss them.
1825 : */
1826 : for (i = 0; i < maxarenas; ++i) {
1827 : uint j;
1828 : uptr base = arenas[i].address;
1829 :
1830 : /* Skip arenas which are not allocated. */
1831 : if (arenas[i].address == (uptr)NULL)
1832 : continue;
1833 : narenas += 1;
1834 :
1835 : numfreepools += arenas[i].nfreepools;
1836 :
1837 : /* round up to pool alignment */
1838 : if (base & (uptr)POOL_SIZE_MASK) {
1839 : arena_alignment += POOL_SIZE;
1840 : base &= ~(uptr)POOL_SIZE_MASK;
1841 : base += POOL_SIZE;
1842 : }
1843 :
1844 : /* visit every pool in the arena */
1845 : assert(base <= (uptr) arenas[i].pool_address);
1846 : for (j = 0;
1847 : base < (uptr) arenas[i].pool_address;
1848 : ++j, base += POOL_SIZE) {
1849 : poolp p = (poolp)base;
1850 : const uint sz = p->szidx;
1851 : uint freeblocks;
1852 :
1853 : if (p->ref.count == 0) {
1854 : /* currently unused */
1855 : assert(pool_is_in_list(p, arenas[i].freepools));
1856 : continue;
1857 : }
1858 : ++numpools[sz];
1859 : numblocks[sz] += p->ref.count;
1860 : freeblocks = NUMBLOCKS(sz) - p->ref.count;
1861 : numfreeblocks[sz] += freeblocks;
1862 : #ifdef Py_DEBUG
1863 : if (freeblocks > 0)
1864 : assert(pool_is_in_list(p, usedpools[sz + sz]));
1865 : #endif
1866 : }
1867 : }
1868 : assert(narenas == narenas_currently_allocated);
1869 :
1870 : fputc('\n', stderr);
1871 : fputs("class size num pools blocks in use avail blocks\n"
1872 : "----- ---- --------- ------------- ------------\n",
1873 : stderr);
1874 :
1875 : for (i = 0; i < numclasses; ++i) {
1876 : size_t p = numpools[i];
1877 : size_t b = numblocks[i];
1878 : size_t f = numfreeblocks[i];
1879 : uint size = INDEX2SIZE(i);
1880 : if (p == 0) {
1881 : assert(b == 0 && f == 0);
1882 : continue;
1883 : }
1884 : fprintf(stderr, "%5u %6u "
1885 : "%11" PY_FORMAT_SIZE_T "u "
1886 : "%15" PY_FORMAT_SIZE_T "u "
1887 : "%13" PY_FORMAT_SIZE_T "u\n",
1888 : i, size, p, b, f);
1889 : allocated_bytes += b * size;
1890 : available_bytes += f * size;
1891 : pool_header_bytes += p * POOL_OVERHEAD;
1892 : quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
1893 : }
1894 : fputc('\n', stderr);
1895 : (void)printone("# times object malloc called", serialno);
1896 :
1897 : (void)printone("# arenas allocated total", ntimes_arena_allocated);
1898 : (void)printone("# arenas reclaimed", ntimes_arena_allocated - narenas);
1899 : (void)printone("# arenas highwater mark", narenas_highwater);
1900 : (void)printone("# arenas allocated current", narenas);
1901 :
1902 : PyOS_snprintf(buf, sizeof(buf),
1903 : "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
1904 : narenas, ARENA_SIZE);
1905 : (void)printone(buf, narenas * ARENA_SIZE);
1906 :
1907 : fputc('\n', stderr);
1908 :
1909 : total = printone("# bytes in allocated blocks", allocated_bytes);
1910 : total += printone("# bytes in available blocks", available_bytes);
1911 :
1912 : PyOS_snprintf(buf, sizeof(buf),
1913 : "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
1914 : total += printone(buf, (size_t)numfreepools * POOL_SIZE);
1915 :
1916 : total += printone("# bytes lost to pool headers", pool_header_bytes);
1917 : total += printone("# bytes lost to quantization", quantization);
1918 : total += printone("# bytes lost to arena alignment", arena_alignment);
1919 : (void)printone("Total", total);
1920 : }
1921 :
1922 : #endif /* PYMALLOC_DEBUG */
1923 :
1924 : #ifdef Py_USING_MEMORY_DEBUGGER
1925 : /* Make this function last so gcc won't inline it since the definition is
1926 : * after the reference.
1927 : */
1928 : int
1929 : Py_ADDRESS_IN_RANGE(void *P, poolp pool)
1930 : {
1931 : uint arenaindex_temp = pool->arenaindex;
1932 :
1933 : return arenaindex_temp < maxarenas &&
1934 : (uptr)P - arenas[arenaindex_temp].address < (uptr)ARENA_SIZE &&
1935 : arenas[arenaindex_temp].address != 0;
1936 : }
1937 : #endif
|