GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: src/rpmalloc.c Lines: 662 1017 65.1 %
Date: 2020-12-10 21:44:00 Branches: 210 494 42.5 %

Line Branch Exec Source
1
/* rpmalloc.c  -  Memory allocator  -  Public Domain  -  2016-2020 Mattias Jansson
2
 *
3
 * This library provides a cross-platform lock free thread caching malloc implementation in C11.
4
 * The latest source code is always available at
5
 *
6
 * https://github.com/mjansson/rpmalloc
7
 *
8
 * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
9
 *
10
 */
11
12
#include "rpmalloc.h"
13
14
////////////
15
///
16
/// Build time configurable limits
17
///
18
//////
19
20
#if defined(__clang__)
21
#pragma clang diagnostic ignored "-Wunused-macros"
22
#pragma clang diagnostic ignored "-Wunused-function"
23
#elif defined(__GCC__)
24
#pragma GCC diagnostic ignored "-Wunused-macros"
25
#pragma GCC diagnostic ignored "-Wunused-function"
26
#endif
27
28
#ifndef HEAP_ARRAY_SIZE
29
//! Size of heap hashmap
30
#define HEAP_ARRAY_SIZE           47
31
#endif
32
#ifndef ENABLE_THREAD_CACHE
33
//! Enable per-thread cache
34
#define ENABLE_THREAD_CACHE       1
35
#endif
36
#ifndef ENABLE_GLOBAL_CACHE
37
//! Enable global cache shared between all threads, requires thread cache
38
#define ENABLE_GLOBAL_CACHE       1
39
#endif
40
#ifndef ENABLE_VALIDATE_ARGS
41
//! Enable validation of args to public entry points
42
#define ENABLE_VALIDATE_ARGS      0
43
#endif
44
#ifndef ENABLE_STATISTICS
45
//! Enable statistics collection
46
#define ENABLE_STATISTICS         0
47
#endif
48
#ifndef ENABLE_ASSERTS
49
//! Enable asserts
50
#define ENABLE_ASSERTS            0
51
#endif
52
#ifndef ENABLE_OVERRIDE
53
//! Override standard library malloc/free and new/delete entry points
54
#define ENABLE_OVERRIDE           0
55
#endif
56
#ifndef ENABLE_PRELOAD
57
//! Support preloading
58
#define ENABLE_PRELOAD            0
59
#endif
60
#ifndef DISABLE_UNMAP
61
//! Disable unmapping memory pages (also enables unlimited cache)
62
#define DISABLE_UNMAP             0
63
#endif
64
#ifndef ENABLE_UNLIMITED_CACHE
65
//! Enable unlimited global cache (no unmapping until finalization)
66
#define ENABLE_UNLIMITED_CACHE    0
67
#endif
68
#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
69
//! Enable adaptive thread cache size based on use heuristics
70
#define ENABLE_ADAPTIVE_THREAD_CACHE 0
71
#endif
72
#ifndef DEFAULT_SPAN_MAP_COUNT
73
//! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
74
#define DEFAULT_SPAN_MAP_COUNT    64
75
#endif
76
#ifndef GLOBAL_CACHE_MULTIPLIER
77
//! Multiplier for global cache
78
#define GLOBAL_CACHE_MULTIPLIER   8
79
#endif
80
81
#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
82
#error Must use global cache if unmap is disabled
83
#endif
84
85
#if DISABLE_UNMAP
86
#undef ENABLE_UNLIMITED_CACHE
87
#define ENABLE_UNLIMITED_CACHE 1
88
#endif
89
90
#if !ENABLE_GLOBAL_CACHE
91
#undef ENABLE_UNLIMITED_CACHE
92
#define ENABLE_UNLIMITED_CACHE 0
93
#endif
94
95
#if !ENABLE_THREAD_CACHE
96
#undef ENABLE_ADAPTIVE_THREAD_CACHE
97
#define ENABLE_ADAPTIVE_THREAD_CACHE 0
98
#endif
99
100
#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
101
#  define PLATFORM_WINDOWS 1
102
#  define PLATFORM_POSIX 0
103
#else
104
#  define PLATFORM_WINDOWS 0
105
#  define PLATFORM_POSIX 1
106
#endif
107
108
/// Platform and arch specifics
109
#if defined(_MSC_VER) && !defined(__clang__)
110
#  ifndef FORCEINLINE
111
#    define FORCEINLINE inline __forceinline
112
#  endif
113
#  define _Static_assert static_assert
114
#else
115
#  ifndef FORCEINLINE
116
#    define FORCEINLINE inline __attribute__((__always_inline__))
117
#  endif
118
#endif
119
#if PLATFORM_WINDOWS
120
#  ifndef WIN32_LEAN_AND_MEAN
121
#    define WIN32_LEAN_AND_MEAN
122
#  endif
123
#  include <Windows.h>
124
#  if ENABLE_VALIDATE_ARGS
125
#    include <Intsafe.h>
126
#  endif
127
#else
128
#  include <unistd.h>
129
#  include <stdio.h>
130
#  include <stdlib.h>
131
#  if defined(__APPLE__)
132
#    include <TargetConditionals.h>
133
#    if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
134
#    include <mach/mach_vm.h>
135
#    include <mach/vm_statistics.h>
136
#    endif
137
#    include <pthread.h>
138
#  endif
139
#  if defined(__HAIKU__)
140
#    include <OS.h>
141
#    include <pthread.h>
142
#  endif
143
#endif
144
145
#include <stdint.h>
146
#include <string.h>
147
#include <errno.h>
148
149
#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
150
#include <fibersapi.h>
151
static DWORD fls_key;
152
static void NTAPI
153
_rpmalloc_thread_destructor(void* value) {
154
	if (value)
155
		rpmalloc_thread_finalize();
156
}
157
#endif
158
159
#if PLATFORM_POSIX
160
#  include <sys/mman.h>
161
#  include <sched.h>
162
#  ifdef __FreeBSD__
163
#    include <sys/sysctl.h>
164
#    define MAP_HUGETLB MAP_ALIGNED_SUPER
165
#  endif
166
#  ifndef MAP_UNINITIALIZED
167
#    define MAP_UNINITIALIZED 0
168
#  endif
169
#endif
170
#include <errno.h>
171
172
#if ENABLE_ASSERTS
173
#  undef NDEBUG
174
#  if defined(_MSC_VER) && !defined(_DEBUG)
175
#    define _DEBUG
176
#  endif
177
#  include <assert.h>
178
#else
179
#  undef  assert
180
#  define assert(x) do {} while(0)
181
#endif
182
#if ENABLE_STATISTICS
183
#  include <stdio.h>
184
#endif
185
186
//////
187
///
188
/// Atomic access abstraction (since MSVC does not do C11 yet)
189
///
190
//////
191
192
#if defined(_MSC_VER) && !defined(__clang__)
193
194
typedef volatile long      atomic32_t;
195
typedef volatile long long atomic64_t;
196
typedef volatile void*     atomicptr_t;
197
198
static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return *src; }
199
static FORCEINLINE void    atomic_store32(atomic32_t* dst, int32_t val) { *dst = val; }
200
static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return (int32_t)InterlockedIncrement(val); }
201
static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return (int32_t)InterlockedDecrement(val); }
202
static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; }
203
static FORCEINLINE int     atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0; }
204
static FORCEINLINE void    atomic_store32_release(atomic32_t* dst, int32_t val) { *dst = val; }
205
static FORCEINLINE int64_t atomic_load64(atomic64_t* src) { return *src; }
206
static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return (int64_t)InterlockedExchangeAdd64(val, add) + add; }
207
static FORCEINLINE void*   atomic_load_ptr(atomicptr_t* src) { return (void*)*src; }
208
static FORCEINLINE void    atomic_store_ptr(atomicptr_t* dst, void* val) { *dst = val; }
209
static FORCEINLINE void    atomic_store_ptr_release(atomicptr_t* dst, void* val) { *dst = val; }
210
static FORCEINLINE void*   atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return (void*)InterlockedExchangePointer((void* volatile*)dst, val); }
211
static FORCEINLINE int     atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return (InterlockedCompareExchangePointer((void* volatile*)dst, val, ref) == ref) ? 1 : 0; }
212
213
#define EXPECTED(x) (x)
214
#define UNEXPECTED(x) (x)
215
216
#else
217
218
#include <stdatomic.h>
219
220
typedef volatile _Atomic(int32_t) atomic32_t;
221
typedef volatile _Atomic(int64_t) atomic64_t;
222
typedef volatile _Atomic(void*) atomicptr_t;
223
224
85
static FORCEINLINE int32_t atomic_load32(atomic32_t* src) { return atomic_load_explicit(src, memory_order_relaxed); }
225
303
static FORCEINLINE void    atomic_store32(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
226
163
static FORCEINLINE int32_t atomic_incr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1; }
227
static FORCEINLINE int32_t atomic_decr32(atomic32_t* val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; }
228
1278
static FORCEINLINE int32_t atomic_add32(atomic32_t* val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
229
10606
static FORCEINLINE int     atomic_cas32_acquire(atomic32_t* dst, int32_t val, int32_t ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); }
230
10646
static FORCEINLINE void    atomic_store32_release(atomic32_t* dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_release); }
231
static FORCEINLINE int64_t atomic_load64(atomic64_t* val) { return atomic_load_explicit(val, memory_order_relaxed); }
232
static FORCEINLINE int64_t atomic_add64(atomic64_t* val, int64_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
233
1546
static FORCEINLINE void*   atomic_load_ptr(atomicptr_t* src) { return atomic_load_explicit(src, memory_order_relaxed); }
234
static FORCEINLINE void    atomic_store_ptr(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
235
1220
static FORCEINLINE void    atomic_store_ptr_release(atomicptr_t* dst, void* val) { atomic_store_explicit(dst, val, memory_order_release); }
236
static FORCEINLINE void*   atomic_exchange_ptr_acquire(atomicptr_t* dst, void* val) { return atomic_exchange_explicit(dst, val, memory_order_acquire); }
237
static FORCEINLINE int     atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_relaxed, memory_order_relaxed); }
238
239
#define EXPECTED(x) __builtin_expect((x), 1)
240
#define UNEXPECTED(x) __builtin_expect((x), 0)
241
242
#endif
243
244
////////////
245
///
246
/// Statistics related functions (evaluate to nothing when statistics not enabled)
247
///
248
//////
249
250
#if ENABLE_STATISTICS
251
#  define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
252
#  define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
253
#  define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value))
254
#  define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value))
255
#  define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0)
256
#  define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value))
257
#  define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \
258
	int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \
259
	if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \
260
		heap->size_class_use[class_idx].alloc_peak = alloc_current; \
261
	atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \
262
} while(0)
263
#  define _rpmalloc_stat_inc_free(heap, class_idx) do { \
264
	atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \
265
	atomic_incr32(&heap->size_class_use[class_idx].free_total); \
266
} while(0)
267
#else
268
#  define _rpmalloc_stat_inc(counter) do {} while(0)
269
#  define _rpmalloc_stat_dec(counter) do {} while(0)
270
#  define _rpmalloc_stat_add(counter, value) do {} while(0)
271
#  define _rpmalloc_stat_add64(counter, value) do {} while(0)
272
#  define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0)
273
#  define _rpmalloc_stat_sub(counter, value) do {} while(0)
274
#  define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0)
275
#  define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0)
276
#endif
277
278
279
///
280
/// Preconfigured limits and sizes
281
///
282
283
//! Granularity of a small allocation block (must be power of two)
284
#define SMALL_GRANULARITY         16
285
//! Small granularity shift count
286
#define SMALL_GRANULARITY_SHIFT   4
287
//! Number of small block size classes
288
#define SMALL_CLASS_COUNT         65
289
//! Maximum size of a small block
290
#define SMALL_SIZE_LIMIT          (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
291
//! Granularity of a medium allocation block
292
#define MEDIUM_GRANULARITY        512
293
//! Medium granularity shift count
294
#define MEDIUM_GRANULARITY_SHIFT  9
295
//! Number of medium block size classes
296
#define MEDIUM_CLASS_COUNT        61
297
//! Total number of small + medium size classes
298
#define SIZE_CLASS_COUNT          (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
299
//! Number of large block size classes
300
#define LARGE_CLASS_COUNT         63
301
//! Maximum size of a medium block
302
#define MEDIUM_SIZE_LIMIT         (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
303
//! Maximum size of a large block
304
#define LARGE_SIZE_LIMIT          ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
305
//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
306
#define SPAN_HEADER_SIZE          128
307
//! Number of spans in thread cache
308
#define MAX_THREAD_SPAN_CACHE     256
309
//! Number of spans to transfer between thread and global cache
310
#define THREAD_SPAN_CACHE_TRANSFER 64
311
//! Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2)
312
#define MAX_THREAD_SPAN_LARGE_CACHE 64
313
//! Number of spans to transfer between thread and global cache for large spans
314
#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
315
316
_Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two");
317
_Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two");
318
319
#if ENABLE_VALIDATE_ARGS
320
//! Maximum allocation size to avoid integer overflow
321
#undef  MAX_ALLOC_SIZE
322
#define MAX_ALLOC_SIZE            (((size_t)-1) - _memory_span_size)
323
#endif
324
325
#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
326
#define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
327
328
#define INVALID_POINTER ((void*)((uintptr_t)-1))
329
330
#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
331
#define SIZE_CLASS_HUGE ((uint32_t)-1)
332
333
////////////
334
///
335
/// Data types
336
///
337
//////
338
339
//! A memory heap, per thread
340
typedef struct heap_t heap_t;
341
//! Span of memory pages
342
typedef struct span_t span_t;
343
//! Span list
344
typedef struct span_list_t span_list_t;
345
//! Span active data
346
typedef struct span_active_t span_active_t;
347
//! Size class definition
348
typedef struct size_class_t size_class_t;
349
//! Global cache
350
typedef struct global_cache_t global_cache_t;
351
352
//! Flag indicating span is the first (master) span of a split superspan
353
#define SPAN_FLAG_MASTER 1U
354
//! Flag indicating span is a secondary (sub) span of a split superspan
355
#define SPAN_FLAG_SUBSPAN 2U
356
//! Flag indicating span has blocks with increased alignment
357
#define SPAN_FLAG_ALIGNED_BLOCKS 4U
358
359
#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
360
struct span_use_t {
361
	//! Current number of spans used (actually used, not in cache)
362
	atomic32_t current;
363
	//! High water mark of spans used
364
	atomic32_t high;
365
#if ENABLE_STATISTICS
366
	//! Number of spans transitioned to global cache
367
	atomic32_t spans_to_global;
368
	//! Number of spans transitioned from global cache
369
	atomic32_t spans_from_global;
370
	//! Number of spans transitioned to thread cache
371
	atomic32_t spans_to_cache;
372
	//! Number of spans transitioned from thread cache
373
	atomic32_t spans_from_cache;
374
	//! Number of spans transitioned to reserved state
375
	atomic32_t spans_to_reserved;
376
	//! Number of spans transitioned from reserved state
377
	atomic32_t spans_from_reserved;
378
	//! Number of raw memory map calls
379
	atomic32_t spans_map_calls;
380
#endif
381
};
382
typedef struct span_use_t span_use_t;
383
#endif
384
385
#if ENABLE_STATISTICS
386
struct size_class_use_t {
387
	//! Current number of allocations
388
	atomic32_t alloc_current;
389
	//! Peak number of allocations
390
	int32_t alloc_peak;
391
	//! Total number of allocations
392
	atomic32_t alloc_total;
393
	//! Total number of frees
394
	atomic32_t free_total;
395
	//! Number of spans in use
396
	atomic32_t spans_current;
397
	//! Number of spans transitioned to cache
398
	int32_t spans_peak;
399
	//! Number of spans transitioned to cache
400
	atomic32_t spans_to_cache;
401
	//! Number of spans transitioned from cache
402
	atomic32_t spans_from_cache;
403
	//! Number of spans transitioned from reserved state
404
	atomic32_t spans_from_reserved;
405
	//! Number of spans mapped
406
	atomic32_t spans_map_calls;
407
	int32_t unused;
408
};
409
typedef struct size_class_use_t size_class_use_t;
410
#endif
411
412
// A span can either represent a single span of memory pages with size declared by span_map_count configuration variable,
413
// or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single
414
// span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first
415
// (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans
416
// that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire
417
// superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released
418
// in the same call to release the virtual memory range, but individual subranges can be decommitted individually
419
// to reduce physical memory use).
420
struct span_t {
421
	//! Free list
422
	void*       free_list;
423
	//! Total block count of size class
424
	uint32_t    block_count;
425
	//! Size class
426
	uint32_t    size_class;
427
	//! Index of last block initialized in free list
428
	uint32_t    free_list_limit;
429
	//! Number of used blocks remaining when in partial state
430
	uint32_t    used_count;
431
	//! Deferred free list
432
	atomicptr_t free_list_deferred;
433
	//! Size of deferred free list, or list of spans when part of a cache list
434
	uint32_t    list_size;
435
	//! Size of a block
436
	uint32_t    block_size;
437
	//! Flags and counters
438
	uint32_t    flags;
439
	//! Number of spans
440
	uint32_t    span_count;
441
	//! Total span counter for master spans
442
	uint32_t    total_spans;
443
	//! Offset from master span for subspans
444
	uint32_t    offset_from_master;
445
	//! Remaining span counter, for master spans
446
	atomic32_t  remaining_spans;
447
	//! Alignment offset
448
	uint32_t    align_offset;
449
	//! Owning heap
450
	heap_t*     heap;
451
	//! Next span
452
	span_t*     next;
453
	//! Previous span
454
	span_t*     prev;
455
};
456
_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
457
458
struct span_cache_t {
459
	size_t       count;
460
	span_t*      span[MAX_THREAD_SPAN_CACHE];
461
};
462
typedef struct span_cache_t span_cache_t;
463
464
struct span_large_cache_t {
465
	size_t       count;
466
	span_t*      span[MAX_THREAD_SPAN_LARGE_CACHE];
467
};
468
typedef struct span_large_cache_t span_large_cache_t;
469
470
struct heap_size_class_t {
471
	//! Free list of active span
472
	void*        free_list;
473
	//! Double linked list of partially used spans with free blocks.
474
	//  Previous span pointer in head points to tail span of list.
475
	span_t*      partial_span;
476
	//! Early level cache of fully free spans
477
	span_t*      cache;
478
};
479
typedef struct heap_size_class_t heap_size_class_t;
480
481
// Control structure for a heap, either a thread heap or a first class heap if enabled
482
struct heap_t {
483
	//! Owning thread ID
484
	uintptr_t    owner_thread;
485
	//! Free lists for each size class
486
	heap_size_class_t size_class[SIZE_CLASS_COUNT];
487
#if ENABLE_THREAD_CACHE
488
	//! Arrays of fully freed spans, single span
489
	span_cache_t span_cache;
490
#endif
491
	//! List of deferred free spans (single linked list)
492
	atomicptr_t  span_free_deferred;
493
	//! Number of full spans
494
	size_t       full_span_count;
495
	//! Mapped but unused spans
496
	span_t*      span_reserve;
497
	//! Master span for mapped but unused spans
498
	span_t*      span_reserve_master;
499
	//! Number of mapped but unused spans
500
	uint32_t     spans_reserved;
501
	//! Child count
502
	atomic32_t   child_count;
503
	//! Next heap in id list
504
	heap_t*      next_heap;
505
	//! Next heap in orphan list
506
	heap_t*      next_orphan;
507
	//! Memory pages alignment offset
508
	size_t       align_offset;
509
	//! Heap ID
510
	int32_t      id;
511
	//! Finalization state flag
512
	int          finalize;
513
	//! Master heap owning the memory pages
514
	heap_t*      master_heap;
515
#if ENABLE_THREAD_CACHE
516
	//! Arrays of fully freed spans, large spans with > 1 span count
517
	span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
518
#endif
519
#if RPMALLOC_FIRST_CLASS_HEAPS
520
	//! Double linked list of fully utilized spans with free blocks for each size class.
521
	//  Previous span pointer in head points to tail span of list.
522
	span_t*      full_span[SIZE_CLASS_COUNT];
523
	//! Double linked list of large and huge spans allocated by this heap
524
	span_t*      large_huge_span;
525
#endif
526
#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
527
	//! Current and high water mark of spans used per span count
528
	span_use_t   span_use[LARGE_CLASS_COUNT];
529
#endif
530
#if ENABLE_STATISTICS
531
	//! Allocation stats per size class
532
	size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
533
	//! Number of bytes transitioned thread -> global
534
	atomic64_t   thread_to_global;
535
	//! Number of bytes transitioned global -> thread
536
	atomic64_t   global_to_thread;
537
#endif
538
};
539
540
// Size class for defining a block size bucket
541
struct size_class_t {
542
	//! Size of blocks in this class
543
	uint32_t block_size;
544
	//! Number of blocks in each chunk
545
	uint16_t block_count;
546
	//! Class index this class is merged with
547
	uint16_t class_idx;
548
};
549
_Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
550
551
struct global_cache_t {
552
	//! Cache lock
553
	atomic32_t lock;
554
	//! Cache count
555
	uint32_t count;
556
	//! Cached spans
557
	span_t* span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
558
#if ENABLE_UNLIMITED_CACHE
559
	//! Unlimited cache overflow
560
	span_t* overflow;
561
#endif
562
};
563
564
////////////
565
///
566
/// Global data
567
///
568
//////
569
570
//! Default span size (64KiB)
571
#define _memory_default_span_size (64 * 1024)
572
#define _memory_default_span_size_shift 16
573
#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
574
575
//! Initialized flag
576
static int _rpmalloc_initialized;
577
//! Configuration
578
static rpmalloc_config_t _memory_config;
579
//! Memory page size
580
static size_t _memory_page_size;
581
//! Shift to divide by page size
582
static size_t _memory_page_size_shift;
583
//! Granularity at which memory pages are mapped by OS
584
static size_t _memory_map_granularity;
585
#if RPMALLOC_CONFIGURABLE
586
//! Size of a span of memory pages
587
static size_t _memory_span_size;
588
//! Shift to divide by span size
589
static size_t _memory_span_size_shift;
590
//! Mask to get to start of a memory span
591
static uintptr_t _memory_span_mask;
592
#else
593
//! Hardwired span size
594
#define _memory_span_size _memory_default_span_size
595
#define _memory_span_size_shift _memory_default_span_size_shift
596
#define _memory_span_mask _memory_default_span_mask
597
#endif
598
//! Number of spans to map in each map call
599
static size_t _memory_span_map_count;
600
//! Number of spans to release from thread cache to global cache (single spans)
601
static size_t _memory_span_release_count;
602
//! Number of spans to release from thread cache to global cache (large multiple spans)
603
static size_t _memory_span_release_count_large;
604
//! Global size classes
605
static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
606
//! Run-time size limit of medium blocks
607
static size_t _memory_medium_size_limit;
608
//! Heap ID counter
609
static atomic32_t _memory_heap_id;
610
//! Huge page support
611
static int _memory_huge_pages;
612
#if ENABLE_GLOBAL_CACHE
613
//! Global span cache
614
static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
615
#endif
616
//! All heaps
617
static heap_t* _memory_heaps[HEAP_ARRAY_SIZE];
618
//! Orphan lock
619
static atomic32_t _memory_orphan_lock;
620
//! Orphaned heaps
621
static heap_t* _memory_orphan_heaps;
622
#if RPMALLOC_FIRST_CLASS_HEAPS
623
//! Orphaned heaps (first class heaps)
624
static heap_t* _memory_first_class_orphan_heaps;
625
#endif
626
#if ENABLE_STATISTICS
627
//! Active heap count
628
static atomic32_t _memory_active_heaps;
629
//! Number of currently mapped memory pages
630
static atomic32_t _mapped_pages;
631
//! Peak number of concurrently mapped memory pages
632
static int32_t _mapped_pages_peak;
633
//! Number of mapped master spans
634
static atomic32_t _master_spans;
635
//! Number of currently unused spans
636
static atomic32_t _reserved_spans;
637
//! Running counter of total number of mapped memory pages since start
638
static atomic32_t _mapped_total;
639
//! Running counter of total number of unmapped memory pages since start
640
static atomic32_t _unmapped_total;
641
//! Number of currently mapped memory pages in OS calls
642
static atomic32_t _mapped_pages_os;
643
//! Number of currently allocated pages in huge allocations
644
static atomic32_t _huge_pages_current;
645
//! Peak number of currently allocated pages in huge allocations
646
static int32_t _huge_pages_peak;
647
#endif
648
649
////////////
650
///
651
/// Thread local heap and ID
652
///
653
//////
654
655
//! Current thread heap
656
#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
657
static pthread_key_t _memory_thread_heap;
658
#else
659
#  ifdef _MSC_VER
660
#    define _Thread_local __declspec(thread)
661
#    define TLS_MODEL
662
#  else
663
#    define TLS_MODEL __attribute__((tls_model("initial-exec")))
664
#    if !defined(__clang__) && defined(__GNUC__)
665
#      define _Thread_local __thread
666
#    endif
667
#  endif
668
static _Thread_local heap_t* _memory_thread_heap TLS_MODEL;
669
#endif
670
671
static inline heap_t*
672
9629
get_thread_heap_raw(void) {
673
#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
674
	return pthread_getspecific(_memory_thread_heap);
675
#else
676
9629
	return _memory_thread_heap;
677
#endif
678
}
679
680
//! Get the current thread heap
681
static inline heap_t*
682
9140
get_thread_heap(void) {
683
9140
	heap_t* heap = get_thread_heap_raw();
684
#if ENABLE_PRELOAD
685
	if (EXPECTED(heap != 0))
686
		return heap;
687
	rpmalloc_initialize();
688
	return get_thread_heap_raw();
689
#else
690
9140
	return heap;
691
#endif
692
}
693
694
//! Fast thread ID
695
static inline uintptr_t
696
9101
get_thread_id(void) {
697
#if defined(_WIN32)
698
	return (uintptr_t)((void*)NtCurrentTeb());
699
#elif defined(__GNUC__) || defined(__clang__)
700
9101
	uintptr_t tid;
701
#  if defined(__i386__)
702
	__asm__("movl %%gs:0, %0" : "=r" (tid) : : );
703
#  elif defined(__MACH__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
704
	__asm__("movq %%gs:0, %0" : "=r" (tid) : : );
705
#  elif defined(__x86_64__)
706
9101
	__asm__("movq %%fs:0, %0" : "=r" (tid) : : );
707
#  elif defined(__arm__)
708
	__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid));
709
#  elif defined(__aarch64__)
710
	__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tid));
711
#  else
712
	tid = (uintptr_t)((void*)get_thread_heap_raw());
713
#  endif
714
9101
	return tid;
715
#else
716
	return (uintptr_t)((void*)get_thread_heap_raw());
717
#endif
718
}
719
720
//! Set the current thread heap
721
static void
722
489
set_thread_heap(heap_t* heap) {
723
#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
724
	pthread_setspecific(_memory_thread_heap, heap);
725
#else
726
489
	_memory_thread_heap = heap;
727
#endif
728
489
	if (heap)
729
163
		heap->owner_thread = get_thread_id();
730
489
}
731
732
////////////
733
///
734
/// Low level memory map/unmap
735
///
736
//////
737
738
//! Map more virtual memory
739
//  size is number of bytes to map
740
//  offset receives the offset in bytes from start of mapped region
741
//  returns address to start of mapped region to use
742
static void*
743
304
_rpmalloc_mmap(size_t size, size_t* offset) {
744
304
	assert(!(size % _memory_page_size));
745
304
	assert(size >= _memory_page_size);
746
304
	_rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak);
747
304
	_rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
748
304
	return _memory_config.memory_map(size, offset);
749
}
750
751
//! Unmap virtual memory
752
//  address is the memory address to unmap, as returned from _memory_map
753
//  size is the number of bytes to unmap, which might be less than full region for a partial unmap
754
//  offset is the offset in bytes to the actual mapped region, as set by _memory_map
755
//  release is set to 0 for partial unmap, or size of entire range for a full unmap
756
static void
757
1289
_rpmalloc_unmap(void* address, size_t size, size_t offset, size_t release) {
758
1289
	assert(!release || (release >= size));
759
1289
	assert(!release || (release >= _memory_page_size));
760
1289
	if (release) {
761
1289
		assert(!(release % _memory_page_size));
762
1289
		_rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
763
1289
		_rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
764
	}
765
1289
	_memory_config.memory_unmap(address, size, offset, release);
766
1289
}
767
768
//! Default implementation to map new pages to virtual memory
769
static void*
770
304
_rpmalloc_mmap_os(size_t size, size_t* offset) {
771
	//Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity
772

304
	size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0;
773
304
	assert(size >= _memory_page_size);
774
#if PLATFORM_WINDOWS
775
	//Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
776
	void* ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
777
	if (!ptr) {
778
		assert(ptr && "Failed to map virtual memory block");
779
		return 0;
780
	}
781
#else
782
304
	int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
783
#  if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
784
	int fd = (int)VM_MAKE_TAG(240U);
785
	if (_memory_huge_pages)
786
		fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
787
	void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
788
#  elif defined(MAP_HUGETLB)
789
608
	void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
790
#  elif defined(MAP_ALIGN)
791
	caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0);
792
	void* ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
793
#  else
794
	void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
795
#  endif
796
304
	if ((ptr == MAP_FAILED) || !ptr) {
797
		assert("Failed to map virtual memory block" == 0);
798
		return 0;
799
	}
800
#endif
801
304
	_rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift));
802
304
	if (padding) {
803
141
		size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
804
141
		assert(final_padding <= _memory_span_size);
805
141
		assert(final_padding <= padding);
806
141
		assert(!(final_padding % 8));
807
141
		ptr = pointer_offset(ptr, final_padding);
808
141
		*offset = final_padding >> 3;
809
	}
810
	assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask));
811
	return ptr;
812
}
813
814
//! Default implementation to unmap pages from virtual memory
815
static void
816
1289
_rpmalloc_unmap_os(void* address, size_t size, size_t offset, size_t release) {
817
1289
	assert(release || (offset == 0));
818
1289
	assert(!release || (release >= _memory_page_size));
819
1289
	assert(size >= _memory_page_size);
820
1289
	if (release && offset) {
821
63
		offset <<= 3;
822
63
		address = pointer_offset(address, -(int32_t)offset);
823

63
		if ((release >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) {
824
			//Padding is always one span size
825
63
			release += _memory_span_size;
826
		}
827
	}
828
#if !DISABLE_UNMAP
829
#if PLATFORM_WINDOWS
830
	if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) {
831
		assert(address && "Failed to unmap virtual memory block");
832
	}
833
#else
834
1289
	if (release) {
835
148
		if (munmap(address, release)) {
836
148
			assert("Failed to unmap virtual memory block" == 0);
837
		}
838
	}
839
	else {
840
#if defined(POSIX_MADV_FREE)
841
		if (posix_madvise(address, size, POSIX_MADV_FREE))
842
#endif
843
1141
		if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
844
1289
			assert("Failed to madvise virtual memory block as free" == 0);
845
		}
846
	}
847
#endif
848
#endif
849
1289
	if (release)
850
1289
		_rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
851
1289
}
852
853
854
////////////
855
///
856
/// Span linked list management
857
///
858
//////
859
860
//! Add a span to double linked list at the head
861
static void
862
1220
_rpmalloc_span_double_link_list_add(span_t** head, span_t* span) {
863
1220
	if (*head) {
864
		span->next = *head;
865
		(*head)->prev = span;
866
	} else {
867
1220
		span->next = 0;
868
	}
869
1220
	*head = span;
870
1220
}
871
872
//! Pop head span from double linked list
873
static void
874
_rpmalloc_span_double_link_list_pop_head(span_t** head, span_t* span) {
875
	assert(*head == span);
876
	span = *head;
877
	*head = span->next;
878
}
879
880
//! Remove a span from double linked list
881
static void
882
1125
_rpmalloc_span_double_link_list_remove(span_t** head, span_t* span) {
883
1125
	assert(*head);
884
1125
	if (*head == span) {
885
1125
		*head = span->next;
886
	} else {
887
		span_t* next_span = span->next;
888
		span_t* prev_span = span->prev;
889
		prev_span->next = next_span;
890
		if (EXPECTED(next_span != 0)) {
891
			next_span->prev = prev_span;
892
		}
893
	}
894
1125
}
895
896
897
////////////
898
///
899
/// Span control
900
///
901
//////
902
903
static void
904
_rpmalloc_heap_cache_insert(heap_t* heap, span_t* span);
905
906
static void
907
_rpmalloc_heap_finalize(heap_t* heap);
908
909
static void
910
_rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count);
911
912
//! Declare the span to be a subspan and store distance from master span and span count
913
static void
914
1233
_rpmalloc_span_mark_as_subspan_unless_master(span_t* master, span_t* subspan, size_t span_count) {
915
1233
	assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER));
916
1233
	if (subspan != master) {
917
1233
		subspan->flags = SPAN_FLAG_SUBSPAN;
918
1233
		subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift);
919
1233
		subspan->align_offset = 0;
920
	}
921
1233
	subspan->span_count = (uint32_t)span_count;
922
1233
}
923
924
//! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller)
925
static span_t*
926
1233
_rpmalloc_span_map_from_reserve(heap_t* heap, size_t span_count) {
927
	//Update the heap span reserve
928
1233
	span_t* span = heap->span_reserve;
929
1233
	heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size);
930
1233
	heap->spans_reserved -= (uint32_t)span_count;
931
932
1233
	_rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count);
933
1233
	if (span_count <= LARGE_CLASS_COUNT)
934
1233
		_rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
935
936
1233
	return span;
937
}
938
939
//! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size
940
static size_t
941
140
_rpmalloc_span_align_count(size_t span_count) {
942
140
	size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count;
943

140
	if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size))
944
		request_count += _memory_span_map_count - (request_count % _memory_span_map_count);
945
140
	return request_count;
946
}
947
948
//! Setup a newly mapped span
949
static void
950
140
_rpmalloc_span_initialize(span_t* span, size_t total_span_count, size_t span_count, size_t align_offset) {
951
140
	span->total_spans = (uint32_t)total_span_count;
952
140
	span->span_count = (uint32_t)span_count;
953
140
	span->align_offset = (uint32_t)align_offset;
954
140
	span->flags = SPAN_FLAG_MASTER;
955
140
	atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
956
140
}
957
958
//! Map an aligned set of spans, taking configured mapping granularity and the page size into account
959
static span_t*
960
140
_rpmalloc_span_map_aligned_count(heap_t* heap, size_t span_count) {
961
	//If we already have some, but not enough, reserved spans, release those to heap cache and map a new
962
	//full set of spans. Otherwise we would waste memory if page size > span size (huge pages)
963
140
	size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
964
140
	size_t align_offset = 0;
965
140
	span_t* span = (span_t*)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset);
966
140
	if (!span)
967
		return 0;
968
140
	_rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
969
140
	_rpmalloc_stat_add(&_reserved_spans, aligned_span_count);
970
140
	_rpmalloc_stat_inc(&_master_spans);
971
140
	if (span_count <= LARGE_CLASS_COUNT)
972
140
		_rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
973
140
	if (aligned_span_count > span_count) {
974
140
		span_t* reserved_spans = (span_t*)pointer_offset(span, span_count * _memory_span_size);
975
140
		size_t reserved_count = aligned_span_count - span_count;
976
140
		if (heap->spans_reserved) {
977
			_rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
978
			_rpmalloc_heap_cache_insert(heap, heap->span_reserve);
979
		}
980
140
		_rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count);
981
	}
982
	return span;
983
}
984
985
//! Map in memory pages for the given number of spans (or use previously reserved pages)
986
static span_t*
987
1373
_rpmalloc_span_map(heap_t* heap, size_t span_count) {
988
1373
	if (span_count <= heap->spans_reserved)
989
1233
		return _rpmalloc_span_map_from_reserve(heap, span_count);
990
140
	return _rpmalloc_span_map_aligned_count(heap, span_count);
991
}
992
993
//! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings)
994
static void
995
1278
_rpmalloc_span_unmap(span_t* span) {
996
1278
	assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
997
1278
	assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
998
999
1278
	int is_master = !!(span->flags & SPAN_FLAG_MASTER);
1000
1278
	span_t* master = is_master ? span : ((span_t*)pointer_offset(span, -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size)));
1001
1278
	assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN));
1002
1278
	assert(master->flags & SPAN_FLAG_MASTER);
1003
1004
1278
	size_t span_count = span->span_count;
1005
1278
	if (!is_master) {
1006
		//Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master)
1007
1141
		assert(span->align_offset == 0);
1008
1141
		if (_memory_span_size >= _memory_page_size) {
1009
1141
			_rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
1010
			_rpmalloc_stat_sub(&_reserved_spans, span_count);
1011
		}
1012
	} else {
1013
		//Special double flag to denote an unmapped master
1014
		//It must be kept in memory since span header must be used
1015
137
		span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN;
1016
	}
1017
1018
1278
	if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
1019
		//Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span
1020
62
		assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN));
1021
62
		size_t unmap_count = master->span_count;
1022
62
		if (_memory_span_size < _memory_page_size)
1023
			unmap_count = master->total_spans;
1024
62
		_rpmalloc_stat_sub(&_reserved_spans, unmap_count);
1025
62
		_rpmalloc_stat_sub(&_master_spans, 1);
1026
62
		_rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size);
1027
	}
1028
1278
}
1029
1030
//! Move the span (used for small or medium allocations) to the heap thread cache
1031
static void
1032
_rpmalloc_span_release_to_cache(heap_t* heap, span_t* span) {
1033
	assert(heap == span->heap);
1034
	assert(span->size_class < SIZE_CLASS_COUNT);
1035
#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1036
	atomic_decr32(&heap->span_use[0].current);
1037
#endif
1038
	_rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1039
	if (!heap->finalize) {
1040
		_rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
1041
		_rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
1042
		if (heap->size_class[span->size_class].cache)
1043
			_rpmalloc_heap_cache_insert(heap, heap->size_class[span->size_class].cache);
1044
		heap->size_class[span->size_class].cache = span;
1045
	} else {
1046
		_rpmalloc_span_unmap(span);
1047
	}
1048
}
1049
1050
//! Initialize a (partial) free list up to next system memory page, while reserving the first block
1051
//! as allocated, returning number of blocks in list
1052
static uint32_t
1053
1220
free_list_partial_init(void** list, void** first_block, void* page_start, void* block_start,
1054
                       uint32_t block_count, uint32_t block_size) {
1055
1220
	assert(block_count);
1056
1220
	*first_block = block_start;
1057
1220
	if (block_count > 1) {
1058
1220
		void* free_block = pointer_offset(block_start, block_size);
1059
1220
		void* block_end = pointer_offset(block_start, (size_t)block_size * block_count);
1060
		//If block size is less than half a memory page, bound init to next memory page boundary
1061
1220
		if (block_size < (_memory_page_size >> 1)) {
1062
1140
			void* page_end = pointer_offset(page_start, _memory_page_size);
1063
1140
			if (page_end < block_end)
1064
1140
				block_end = page_end;
1065
		}
1066
1220
		*list = free_block;
1067
1220
		block_count = 2;
1068
1220
		void* next_block = pointer_offset(free_block, block_size);
1069
71857
		while (next_block < block_end) {
1070
70637
			*((void**)free_block) = next_block;
1071
70637
			free_block = next_block;
1072
70637
			++block_count;
1073
70637
			next_block = pointer_offset(next_block, block_size);
1074
		}
1075
1220
		*((void**)free_block) = 0;
1076
	} else {
1077
		*list = 0;
1078
	}
1079
1220
	return block_count;
1080
}
1081
1082
//! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list
1083
static void*
1084
1220
_rpmalloc_span_initialize_new(heap_t* heap, span_t* span, uint32_t class_idx) {
1085
1220
	assert(span->span_count == 1);
1086
1220
	size_class_t* size_class = _memory_size_class + class_idx;
1087
1220
	span->size_class = class_idx;
1088
1220
	span->heap = heap;
1089
1220
	span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
1090
1220
	span->block_size = size_class->block_size;
1091
1220
	span->block_count = size_class->block_count;
1092
1220
	span->free_list = 0;
1093
1220
	span->list_size = 0;
1094
1220
	atomic_store_ptr_release(&span->free_list_deferred, 0);
1095
1096
	//Setup free list. Only initialize one system page worth of free blocks in list
1097
1220
	void* block;
1098
2440
	span->free_list_limit = free_list_partial_init(&heap->size_class[class_idx].free_list, &block,
1099
1220
		span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size);
1100
	//Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized
1101
1220
	if (span->free_list_limit < span->block_count) {
1102
1140
		_rpmalloc_span_double_link_list_add(&heap->size_class[class_idx].partial_span, span);
1103
1140
		span->used_count = span->free_list_limit;
1104
	} else {
1105
#if RPMALLOC_FIRST_CLASS_HEAPS
1106
		_rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1107
#endif
1108
80
		++heap->full_span_count;
1109
80
		span->used_count = span->block_count;
1110
	}
1111
1220
	return block;
1112
}
1113
1114
static void
1115
_rpmalloc_span_extract_free_list_deferred(span_t* span) {
1116
	// We need acquire semantics on the CAS operation since we are interested in the list size
1117
	// Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency
1118
	do {
1119
		span->free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
1120
	} while (span->free_list == INVALID_POINTER);
1121
	span->used_count -= span->list_size;
1122
	span->list_size = 0;
1123
	atomic_store_ptr_release(&span->free_list_deferred, 0);
1124
}
1125
1126
static int
1127
8924
_rpmalloc_span_is_fully_utilized(span_t* span) {
1128
8924
	assert(span->free_list_limit <= span->block_count);
1129

8924
	return !span->free_list && (span->free_list_limit >= span->block_count);
1130
}
1131
1132
static int
1133
1220
_rpmalloc_span_finalize(heap_t* heap, size_t iclass, span_t* span, span_t** list_head) {
1134
1220
	void* free_list = heap->size_class[iclass].free_list;
1135
1220
	span_t* class_span = (span_t*)((uintptr_t)free_list & _memory_span_mask);
1136
1220
	if (span == class_span) {
1137
		// Adopt the heap class free list back into the span free list
1138
1220
		void* block = span->free_list;
1139
1220
		void* last_block = 0;
1140
10144
		while (block) {
1141
8924
			last_block = block;
1142
8924
			block = *((void**)block);
1143
		}
1144
		uint32_t free_count = 0;
1145
		block = free_list;
1146
65238
		while (block) {
1147
64018
			++free_count;
1148
64018
			block = *((void**)block);
1149
		}
1150
1220
		if (last_block) {
1151
1210
			*((void**)last_block) = free_list;
1152
		} else {
1153
10
			span->free_list = free_list;
1154
		}
1155
1220
		heap->size_class[iclass].free_list = 0;
1156
1220
		span->used_count -= free_count;
1157
	}
1158
	//If this assert triggers you have memory leaks
1159
1220
	assert(span->list_size == span->used_count);
1160
1220
	if (span->list_size == span->used_count) {
1161
1125
		_rpmalloc_stat_dec(&heap->span_use[0].current);
1162
1125
		_rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
1163
		// This function only used for spans in double linked lists
1164
1125
		if (list_head)
1165
1125
			_rpmalloc_span_double_link_list_remove(list_head, span);
1166
1125
		_rpmalloc_span_unmap(span);
1167
1125
		return 1;
1168
	}
1169
	return 0;
1170
}
1171
1172
1173
////////////
1174
///
1175
/// Global cache
1176
///
1177
//////
1178
1179
#if ENABLE_GLOBAL_CACHE
1180
1181
//! Finalize a global cache
1182
static void
1183
10017
_rpmalloc_global_cache_finalize(global_cache_t* cache) {
1184
20034
	while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1185
10017
		/* Spin */;
1186
1187
10030
	for (size_t ispan = 0; ispan < cache->count; ++ispan)
1188
13
		_rpmalloc_span_unmap(cache->span[ispan]);
1189
10017
	cache->count = 0;
1190
1191
#if ENABLE_UNLIMITED_CACHE
1192
	while (cache->overflow) {
1193
		span_t* span = cache->overflow;
1194
		cache->overflow = span->next;
1195
		_rpmalloc_span_unmap(span);
1196
	}
1197
#endif
1198
1199
10017
	atomic_store32_release(&cache->lock, 0);
1200
10017
}
1201
1202
static void
1203
4
_rpmalloc_global_cache_insert_spans(span_t** span, size_t span_count, size_t count) {
1204
8
	const size_t cache_limit = (span_count == 1) ?
1205
4
		GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE :
1206
4
		GLOBAL_CACHE_MULTIPLIER * (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
1207
1208
4
	global_cache_t* cache = &_memory_span_cache[span_count - 1];
1209
1210
4
	size_t insert_count = count;
1211
8
	while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1212
4
		/* Spin */;
1213
1214
4
	if ((cache->count + insert_count) > cache_limit)
1215
		insert_count = cache_limit - cache->count;
1216
1217
4
	memcpy(cache->span + cache->count, span, sizeof(span_t*) * insert_count);
1218
4
	cache->count += (uint32_t)insert_count;
1219
1220
#if ENABLE_UNLIMITED_CACHE
1221
	while (insert_count < count) {
1222
		span_t* current_span = span[insert_count++];
1223
		current_span->next = cache->overflow;
1224
		cache->overflow = current_span;
1225
	}
1226
	atomic_store32_release(&cache->lock, 0);
1227
#else
1228
4
	atomic_store32_release(&cache->lock, 0);
1229
4
	for (size_t ispan = insert_count; ispan < count; ++ispan)
1230
		_rpmalloc_span_unmap(span[ispan]);
1231
#endif
1232
4
}
1233
1234
static size_t
1235
140
_rpmalloc_global_cache_extract_spans(span_t** span, size_t span_count, size_t count) {
1236
140
	global_cache_t* cache = &_memory_span_cache[span_count - 1];
1237
1238
140
	size_t extract_count = count;
1239
280
	while (!atomic_cas32_acquire(&cache->lock, 1, 0))
1240
140
		/* Spin */;
1241
1242
140
	if (extract_count > cache->count)
1243
140
		extract_count = cache->count;
1244
1245
140
	memcpy(span, cache->span + (cache->count - extract_count), sizeof(span_t*) * extract_count);
1246
140
	cache->count -= (uint32_t)extract_count;
1247
#if ENABLE_UNLIMITED_CACHE
1248
	while ((extract_count < count) && cache->overflow) {
1249
		span_t* current_span = cache->overflow;
1250
		span[extract_count++] = current_span;
1251
		cache->overflow = current_span->next;
1252
	}
1253
#endif
1254
140
	atomic_store32_release(&cache->lock, 0);
1255
1256
140
	return extract_count;
1257
}
1258
1259
#endif
1260
1261
////////////
1262
///
1263
/// Heap control
1264
///
1265
//////
1266
1267
static void _rpmalloc_deallocate_huge(span_t*);
1268
1269
//! Store the given spans as reserve in the given heap
1270
static void
1271
140
_rpmalloc_heap_set_reserved_spans(heap_t* heap, span_t* master, span_t* reserve, size_t reserve_span_count) {
1272
140
	heap->span_reserve_master = master;
1273
140
	heap->span_reserve = reserve;
1274
140
	heap->spans_reserved = (uint32_t)reserve_span_count;
1275
140
}
1276
1277
//! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use
1278
static void
1279
1546
_rpmalloc_heap_cache_adopt_deferred(heap_t* heap, span_t** single_span) {
1280
1546
	span_t* span = (span_t*)atomic_load_ptr(&heap->span_free_deferred);
1281
1546
	if (!span)
1282
		return;
1283
	while (!atomic_cas_ptr(&heap->span_free_deferred, 0, span))
1284
		span = (span_t*)atomic_load_ptr(&heap->span_free_deferred);
1285
	while (span) {
1286
		span_t* next_span = (span_t*)span->free_list;
1287
		assert(span->heap == heap);
1288
		if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
1289
			assert(heap->full_span_count);
1290
			--heap->full_span_count;
1291
#if RPMALLOC_FIRST_CLASS_HEAPS
1292
			_rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
1293
#endif
1294
			if (single_span && !*single_span) {
1295
				*single_span = span;
1296
			} else {
1297
				_rpmalloc_stat_dec(&heap->span_use[0].current);
1298
				_rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1299
				_rpmalloc_heap_cache_insert(heap, span);
1300
			}
1301
		} else {
1302
			if (span->size_class == SIZE_CLASS_HUGE) {
1303
				_rpmalloc_deallocate_huge(span);
1304
			} else {
1305
				assert(span->size_class == SIZE_CLASS_LARGE);
1306
				assert(heap->full_span_count);
1307
				--heap->full_span_count;
1308
#if RPMALLOC_FIRST_CLASS_HEAPS
1309
				_rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
1310
#endif
1311
				uint32_t idx = span->span_count - 1;
1312
				if (!idx && single_span && !*single_span) {
1313
					*single_span = span;
1314
				} else {
1315
					_rpmalloc_stat_dec(&heap->span_use[idx].current);
1316
					_rpmalloc_heap_cache_insert(heap, span);
1317
				}
1318
			}
1319
		}
1320
		span = next_span;
1321
	}
1322
}
1323
1324
static void
1325
85
_rpmalloc_heap_unmap(heap_t* heap) {
1326
85
	if (!heap->master_heap) {
1327

85
		if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
1328
85
			size_t heap_size = sizeof(heap_t);
1329
85
			size_t block_size = _memory_page_size * ((heap_size + _memory_page_size - 1) >> _memory_page_size_shift);
1330
85
			_rpmalloc_unmap(heap, block_size, heap->align_offset, block_size);
1331
		}
1332
	} else {
1333
		if (atomic_decr32(&heap->master_heap->child_count) == 0) {
1334
			_rpmalloc_heap_unmap(heap->master_heap);
1335
		}
1336
	}
1337
85
}
1338
1339
static void
1340
163
_rpmalloc_heap_global_finalize(heap_t* heap) {
1341
163
	if (heap->finalize++ > 1) {
1342
		--heap->finalize;
1343
		return;
1344
	}
1345
1346
163
	_rpmalloc_heap_finalize(heap);
1347
1348
#if ENABLE_THREAD_CACHE
1349
10432
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1350
10269
		span_cache_t* span_cache;
1351
10269
		if (!iclass)
1352
163
			span_cache = &heap->span_cache;
1353
		else
1354
10106
			span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
1355
10269
		for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
1356
			_rpmalloc_span_unmap(span_cache->span[ispan]);
1357
10269
		span_cache->count = 0;
1358
	}
1359
#endif
1360
1361
163
	if (heap->full_span_count) {
1362
		--heap->finalize;
1363
		return;
1364
	}
1365
1366
10964
	for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1367

10879
		if (heap->size_class[iclass].free_list || heap->size_class[iclass].partial_span) {
1368
78
			--heap->finalize;
1369
78
			return;
1370
		}
1371
	}
1372
	//Heap is now completely free, unmap and remove from heap list
1373
85
	size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
1374
85
	heap_t* list_heap = _memory_heaps[list_idx];
1375
85
	if (list_heap == heap) {
1376
85
		_memory_heaps[list_idx] = heap->next_heap;
1377
	} else {
1378
		while (list_heap->next_heap != heap)
1379
			list_heap = list_heap->next_heap;
1380
		list_heap->next_heap = heap->next_heap;
1381
	}
1382
1383
85
	_rpmalloc_heap_unmap(heap);
1384
}
1385
1386
//! Insert a single span into thread heap cache, releasing to global cache if overflow
1387
static void
1388
13
_rpmalloc_heap_cache_insert(heap_t* heap, span_t* span) {
1389
13
	if (UNEXPECTED(heap->finalize != 0)) {
1390
		_rpmalloc_span_unmap(span);
1391
		_rpmalloc_heap_global_finalize(heap);
1392
		return;
1393
	}
1394
#if ENABLE_THREAD_CACHE
1395
13
	size_t span_count = span->span_count;
1396
13
	_rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
1397
13
	if (span_count == 1) {
1398
		span_cache_t* span_cache = &heap->span_cache;
1399
		span_cache->span[span_cache->count++] = span;
1400
		if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
1401
			const size_t remain_count = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER;
1402
#if ENABLE_GLOBAL_CACHE
1403
			_rpmalloc_stat_add64(&heap->thread_to_global, THREAD_SPAN_CACHE_TRANSFER * _memory_span_size);
1404
			_rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, THREAD_SPAN_CACHE_TRANSFER);
1405
			_rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, THREAD_SPAN_CACHE_TRANSFER);
1406
#else
1407
			for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
1408
				_rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
1409
#endif
1410
			span_cache->count = remain_count;
1411
		}
1412
	} else {
1413
13
		size_t cache_idx = span_count - 2;
1414
13
		span_large_cache_t* span_cache = heap->span_large_cache + cache_idx;
1415
13
		span_cache->span[span_cache->count++] = span;
1416
13
		const size_t cache_limit = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
1417
13
		if (span_cache->count == cache_limit) {
1418
			const size_t transfer_limit = 2 + (cache_limit >> 2);
1419
			const size_t transfer_count = (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit ? THREAD_SPAN_LARGE_CACHE_TRANSFER : transfer_limit);
1420
			const size_t remain_count = cache_limit - transfer_count;
1421
#if ENABLE_GLOBAL_CACHE
1422
			_rpmalloc_stat_add64(&heap->thread_to_global, transfer_count * span_count * _memory_span_size);
1423
			_rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global, transfer_count);
1424
			_rpmalloc_global_cache_insert_spans(span_cache->span + remain_count, span_count, transfer_count);
1425
#else
1426
			for (size_t ispan = 0; ispan < transfer_count; ++ispan)
1427
				_rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
1428
#endif
1429
			span_cache->count = remain_count;
1430
		}
1431
	}
1432
#else
1433
	(void)sizeof(heap);
1434
	_rpmalloc_span_unmap(span);
1435
#endif
1436
}
1437
1438
//! Extract the given number of spans from the different cache levels
1439
static span_t*
1440
1233
_rpmalloc_heap_thread_cache_extract(heap_t* heap, size_t span_count) {
1441
1233
	span_t* span = 0;
1442
1233
	if (span_count == 1) {
1443
1220
		_rpmalloc_heap_cache_adopt_deferred(heap, &span);
1444
1220
		if (span)
1445
			return span;
1446
	}
1447
#if ENABLE_THREAD_CACHE
1448
1233
	span_cache_t* span_cache;
1449
1233
	if (span_count == 1)
1450
1220
		span_cache = &heap->span_cache;
1451
	else
1452
13
		span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2));
1453
1233
	if (span_cache->count) {
1454
		_rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
1455
		return span_cache->span[--span_cache->count];
1456
	}
1457
#endif
1458
1233
	return span;
1459
}
1460
1461
static span_t*
1462
1233
_rpmalloc_heap_reserved_extract(heap_t* heap, size_t span_count) {
1463
1233
	if (heap->spans_reserved >= span_count)
1464
1093
		return _rpmalloc_span_map(heap, span_count);
1465
	return 0;
1466
}
1467
1468
//! Extract a span from the global cache
1469
static span_t*
1470
140
_rpmalloc_heap_global_cache_extract(heap_t* heap, size_t span_count) {
1471
#if ENABLE_GLOBAL_CACHE
1472
#if ENABLE_THREAD_CACHE
1473
140
	span_cache_t* span_cache;
1474
140
	size_t wanted_count;
1475
140
	if (span_count == 1) {
1476
138
		span_cache = &heap->span_cache;
1477
138
		wanted_count = THREAD_SPAN_CACHE_TRANSFER;
1478
	} else {
1479
2
		span_cache = (span_cache_t*)(heap->span_large_cache + (span_count - 2));
1480
2
		wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
1481
	}
1482
140
	span_cache->count = _rpmalloc_global_cache_extract_spans(span_cache->span, span_count, wanted_count);
1483
140
	if (span_cache->count) {
1484
		_rpmalloc_stat_add64(&heap->global_to_thread, span_count * span_cache->count * _memory_span_size);
1485
		_rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, span_cache->count);
1486
		return span_cache->span[--span_cache->count];
1487
	}
1488
#else
1489
	span_t* span = 0;
1490
	size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
1491
	if (count) {
1492
		_rpmalloc_stat_add64(&heap->global_to_thread, span_count * count * _memory_span_size);
1493
		_rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global, count);
1494
		return span;
1495
	}
1496
#endif
1497
#endif
1498
	(void)sizeof(heap);
1499
	(void)sizeof(span_count);
1500
	return 0;
1501
}
1502
1503
//! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory
1504
static span_t*
1505
1233
_rpmalloc_heap_extract_new_span(heap_t* heap, size_t span_count, uint32_t class_idx) {
1506
1233
	span_t* span;
1507
#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1508
	uint32_t idx = (uint32_t)span_count - 1;
1509
	uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current);
1510
	if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
1511
		atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
1512
	_rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak);
1513
#endif
1514
#if ENABLE_THREAD_CACHE
1515
1233
	if (class_idx < SIZE_CLASS_COUNT) {
1516
1220
		if (heap->size_class[class_idx].cache) {
1517
			span = heap->size_class[class_idx].cache;
1518
			span_t* new_cache = 0;
1519
			if (heap->span_cache.count)
1520
				new_cache = heap->span_cache.span[--heap->span_cache.count];
1521
			heap->size_class[class_idx].cache = new_cache;
1522
			return span;
1523
		}
1524
	}
1525
#else
1526
	(void)sizeof(class_idx);
1527
#endif
1528
1233
	span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
1529
1233
	if (EXPECTED(span != 0)) {
1530
		_rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
1531
		return span;
1532
	}
1533
1233
	span = _rpmalloc_heap_reserved_extract(heap, span_count);
1534
1233
	if (EXPECTED(span != 0)) {
1535
		_rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
1536
		return span;
1537
	}
1538
140
	span = _rpmalloc_heap_global_cache_extract(heap, span_count);
1539
140
	if (EXPECTED(span != 0)) {
1540
		_rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
1541
		return span;
1542
	}
1543
	//Final fallback, map in more virtual memory
1544
140
	span = _rpmalloc_span_map(heap, span_count);
1545
140
	_rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
1546
140
	return span;
1547
}
1548
1549
static void
1550
163
_rpmalloc_heap_initialize(heap_t* heap) {
1551
	//Get a new heap ID
1552
163
	heap->id = 1 + atomic_incr32(&_memory_heap_id);
1553
1554
	//Link in heap in heap ID map
1555
163
	size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
1556
163
	heap->next_heap = _memory_heaps[list_idx];
1557
163
	_memory_heaps[list_idx] = heap;
1558
163
}
1559
1560
static void
1561
163
_rpmalloc_heap_orphan(heap_t* heap, int first_class) {
1562
163
	heap->owner_thread = (uintptr_t)-1;
1563
#if RPMALLOC_FIRST_CLASS_HEAPS
1564
	heap_t** heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
1565
#else
1566
163
	(void)sizeof(first_class);
1567
163
	heap_t** heap_list = &_memory_orphan_heaps;
1568
#endif
1569
163
	heap->next_orphan = *heap_list;
1570
163
	*heap_list = heap;
1571
163
}
1572
1573
//! Allocate a new heap from newly mapped memory pages
1574
static heap_t*
1575
163
_rpmalloc_heap_allocate_new(void) {
1576
	//Map in pages for a new heap
1577
163
	size_t align_offset = 0;
1578
163
	size_t heap_size = sizeof(heap_t);
1579
163
	size_t aligned_heap_size = 64 * ((heap_size + 63) / 64);
1580
163
	size_t block_size = _memory_page_size * ((aligned_heap_size + _memory_page_size - 1) >> _memory_page_size_shift);
1581
163
	heap_t* heap = (heap_t*)_rpmalloc_mmap(block_size, &align_offset);
1582
163
	if (!heap)
1583
		return heap;
1584
1585
163
	_rpmalloc_heap_initialize(heap);
1586
163
	heap->align_offset = align_offset;
1587
1588
	//Put extra heaps as orphans, aligning to make sure ABA protection bits fit in pointer low bits
1589
163
	size_t num_heaps = block_size / aligned_heap_size;
1590
163
	atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
1591
163
	heap_t* extra_heap = (heap_t*)pointer_offset(heap, aligned_heap_size);
1592
163
	while (num_heaps > 1) {
1593
		_rpmalloc_heap_initialize(extra_heap);
1594
		extra_heap->master_heap = heap;
1595
		_rpmalloc_heap_orphan(extra_heap, 1);
1596
		extra_heap = (heap_t*)pointer_offset(extra_heap, aligned_heap_size);
1597
		--num_heaps;
1598
	}
1599
	return heap;
1600
}
1601
1602
static heap_t*
1603
163
_rpmalloc_heap_extract_orphan(heap_t** heap_list) {
1604
163
	heap_t* heap = *heap_list;
1605
163
	*heap_list = (heap ? heap->next_orphan : 0);
1606
163
	return heap;
1607
}
1608
1609
//! Allocate a new heap, potentially reusing a previously orphaned heap
1610
static heap_t*
1611
163
_rpmalloc_heap_allocate(int first_class) {
1612
163
	heap_t* heap = 0;
1613
326
	while (!atomic_cas32_acquire(&_memory_orphan_lock, 1, 0))
1614
163
		/* Spin */;
1615
163
	if (first_class == 0)
1616
163
		heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
1617
#if RPMALLOC_FIRST_CLASS_HEAPS
1618
	if (!heap)
1619
		heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
1620
#endif
1621
163
	if (!heap)
1622
163
		heap = _rpmalloc_heap_allocate_new();
1623
163
	atomic_store32_release(&_memory_orphan_lock, 0);
1624
163
	return heap;
1625
}
1626
1627
static void
1628
163
_rpmalloc_heap_release(void* heapptr, int first_class) {
1629
163
	heap_t* heap = (heap_t*)heapptr;
1630
163
	if (!heap)
1631
		return;
1632
	//Release thread cache spans back to global cache
1633
163
	_rpmalloc_heap_cache_adopt_deferred(heap, 0);
1634
#if ENABLE_THREAD_CACHE
1635
10432
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1636
10269
		span_cache_t* span_cache;
1637
10269
		if (!iclass)
1638
163
			span_cache = &heap->span_cache;
1639
		else
1640
10106
			span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
1641
10269
		if (!span_cache->count)
1642
10265
			continue;
1643
4
		if (heap->finalize) {
1644
			for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
1645
				_rpmalloc_span_unmap(span_cache->span[ispan]);
1646
		} else {
1647
#if ENABLE_GLOBAL_CACHE
1648
4
			_rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size);
1649
4
			_rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count);
1650
4
			_rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count);
1651
#else
1652
			for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
1653
				_rpmalloc_span_unmap(span_cache->span[ispan]);
1654
#endif
1655
		}
1656
4
		span_cache->count = 0;
1657
	}
1658
#endif
1659
1660
163
	if (get_thread_heap_raw() == heap)
1661
163
		set_thread_heap(0);
1662
1663
#if ENABLE_STATISTICS
1664
	atomic_decr32(&_memory_active_heaps);
1665
	assert(atomic_load32(&_memory_active_heaps) >= 0);
1666
#endif
1667
1668
282
	while (!atomic_cas32_acquire(&_memory_orphan_lock, 1, 0))
1669
282
		/* Spin */;
1670
163
	_rpmalloc_heap_orphan(heap, first_class);
1671
163
	atomic_store32_release(&_memory_orphan_lock, 0);
1672
}
1673
1674
static void
1675
163
_rpmalloc_heap_release_raw(void* heapptr) {
1676
163
	_rpmalloc_heap_release(heapptr, 0);
1677
163
}
1678
1679
static void
1680
163
_rpmalloc_heap_finalize(heap_t* heap) {
1681
163
	if (heap->spans_reserved) {
1682
140
		span_t* span = _rpmalloc_span_map(heap, heap->spans_reserved);
1683
140
		_rpmalloc_span_unmap(span);
1684
140
		heap->spans_reserved = 0;
1685
	}
1686
1687
163
	_rpmalloc_heap_cache_adopt_deferred(heap, 0);
1688
1689
20701
	for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1690
20538
		if (heap->size_class[iclass].cache)
1691
			_rpmalloc_span_unmap(heap->size_class[iclass].cache);
1692
20538
		heap->size_class[iclass].cache = 0;
1693
20538
		span_t* span = heap->size_class[iclass].partial_span;
1694
21758
		while (span) {
1695
1220
			span_t* next = span->next;
1696
1220
			_rpmalloc_span_finalize(heap, iclass, span, &heap->size_class[iclass].partial_span);
1697
1220
			span = next;
1698
		}
1699
		// If class still has a free list it must be a full span
1700
20538
		if (heap->size_class[iclass].free_list) {
1701
			span_t* class_span = (span_t*)((uintptr_t)heap->size_class[iclass].free_list & _memory_span_mask);
1702
			span_t** list = 0;
1703
#if RPMALLOC_FIRST_CLASS_HEAPS
1704
			list = &heap->full_span[iclass];
1705
#endif
1706
			--heap->full_span_count;
1707
			if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
1708
				if (list)
1709
					_rpmalloc_span_double_link_list_remove(list, class_span);
1710
				_rpmalloc_span_double_link_list_add(&heap->size_class[iclass].partial_span, class_span);
1711
			}
1712
		}
1713
	}
1714
1715
#if ENABLE_THREAD_CACHE
1716
10432
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1717
10269
		span_cache_t* span_cache;
1718
10269
		if (!iclass)
1719
163
			span_cache = &heap->span_cache;
1720
		else
1721
10106
			span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
1722
10269
		for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
1723
			_rpmalloc_span_unmap(span_cache->span[ispan]);
1724
10269
		span_cache->count = 0;
1725
	}
1726
#endif
1727
163
	assert(!atomic_load_ptr(&heap->span_free_deferred));
1728
163
}
1729
1730
1731
////////////
1732
///
1733
/// Allocation entry points
1734
///
1735
//////
1736
1737
//! Pop first block from a free list
1738
static void*
1739
7839
free_list_pop(void** list) {
1740
7839
	void* block = *list;
1741
7839
	*list = *((void**)block);
1742
7839
	return block;
1743
}
1744
1745
//! Allocate a small/medium sized memory block from the given heap
1746
static void*
1747
1220
_rpmalloc_allocate_from_heap_fallback(heap_t* heap, uint32_t class_idx) {
1748
1220
	span_t* span = heap->size_class[class_idx].partial_span;
1749
1220
	if (EXPECTED(span != 0)) {
1750
		assert(span->block_count == _memory_size_class[span->size_class].block_count);
1751
		assert(!_rpmalloc_span_is_fully_utilized(span));
1752
		void* block;
1753
		if (span->free_list) {
1754
			//Swap in free list if not empty
1755
			heap->size_class[class_idx].free_list = span->free_list;
1756
			span->free_list = 0;
1757
			block = free_list_pop(&heap->size_class[class_idx].free_list);
1758
		} else {
1759
			//If the span did not fully initialize free list, link up another page worth of blocks
1760
			void* block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size));
1761
			span->free_list_limit += free_list_partial_init(&heap->size_class[class_idx].free_list, &block,
1762
				(void*)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start,
1763
				span->block_count - span->free_list_limit, span->block_size);
1764
		}
1765
		assert(span->free_list_limit <= span->block_count);
1766
		span->used_count = span->free_list_limit;
1767
1768
		//Swap in deferred free list if present
1769
		if (atomic_load_ptr(&span->free_list_deferred))
1770
			_rpmalloc_span_extract_free_list_deferred(span);
1771
1772
		//If span is still not fully utilized keep it in partial list and early return block
1773
		if (!_rpmalloc_span_is_fully_utilized(span))
1774
			return block;
1775
1776
		//The span is fully utilized, unlink from partial list and add to fully utilized list
1777
		_rpmalloc_span_double_link_list_pop_head(&heap->size_class[class_idx].partial_span, span);
1778
#if RPMALLOC_FIRST_CLASS_HEAPS
1779
		_rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1780
#endif
1781
		++heap->full_span_count;
1782
		return block;
1783
	}
1784
1785
	//Find a span in one of the cache levels
1786
1220
	span = _rpmalloc_heap_extract_new_span(heap, 1, class_idx);
1787
1220
	if (EXPECTED(span != 0)) {
1788
		//Mark span as owned by this heap and set base data, return first block
1789
1220
		return _rpmalloc_span_initialize_new(heap, span, class_idx);
1790
	}
1791
1792
	return 0;
1793
}
1794
1795
//! Allocate a small sized memory block from the given heap
1796
static void*
1797
8974
_rpmalloc_allocate_small(heap_t* heap, size_t size) {
1798
8974
	assert(heap);
1799
	//Small sizes have unique size classes
1800
8974
	const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
1801
8974
	_rpmalloc_stat_inc_alloc(heap, class_idx);
1802
8974
	if (EXPECTED(heap->size_class[class_idx].free_list != 0))
1803
7837
		return free_list_pop(&heap->size_class[class_idx].free_list);
1804
1137
	return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
1805
}
1806
1807
//! Allocate a medium sized memory block from the given heap
1808
static void*
1809
85
_rpmalloc_allocate_medium(heap_t* heap, size_t size) {
1810
85
	assert(heap);
1811
	//Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes)
1812
85
	const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
1813
85
	const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
1814
85
	_rpmalloc_stat_inc_alloc(heap, class_idx);
1815
85
	if (EXPECTED(heap->size_class[class_idx].free_list != 0))
1816
2
		return free_list_pop(&heap->size_class[class_idx].free_list);
1817
83
	return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
1818
}
1819
1820
//! Allocate a large sized memory block from the given heap
1821
static void*
1822
13
_rpmalloc_allocate_large(heap_t* heap, size_t size) {
1823
13
	assert(heap);
1824
	//Calculate number of needed max sized spans (including header)
1825
	//Since this function is never called if size > LARGE_SIZE_LIMIT
1826
	//the span_count is guaranteed to be <= LARGE_CLASS_COUNT
1827
13
	size += SPAN_HEADER_SIZE;
1828
13
	size_t span_count = size >> _memory_span_size_shift;
1829
13
	if (size & (_memory_span_size - 1))
1830
13
		++span_count;
1831
1832
	//Find a span in one of the cache levels
1833
13
	span_t* span = _rpmalloc_heap_extract_new_span(heap, span_count, SIZE_CLASS_LARGE);
1834
13
	if (!span)
1835
		return span;
1836
1837
	//Mark span as owned by this heap and set base data
1838
13
	assert(span->span_count == span_count);
1839
13
	span->size_class = SIZE_CLASS_LARGE;
1840
13
	span->heap = heap;
1841
1842
#if RPMALLOC_FIRST_CLASS_HEAPS
1843
	_rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1844
#endif
1845
13
	++heap->full_span_count;
1846
1847
13
	return pointer_offset(span, SPAN_HEADER_SIZE);
1848
}
1849
1850
//! Allocate a huge block by mapping memory pages directly
1851
static void*
1852
1
_rpmalloc_allocate_huge(heap_t* heap, size_t size) {
1853
1
	assert(heap);
1854
1
	size += SPAN_HEADER_SIZE;
1855
1
	size_t num_pages = size >> _memory_page_size_shift;
1856
1
	if (size & (_memory_page_size - 1))
1857
1
		++num_pages;
1858
1
	size_t align_offset = 0;
1859
1
	span_t* span = (span_t*)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
1860
1
	if (!span)
1861
		return span;
1862
1863
	//Store page count in span_count
1864
1
	span->size_class = SIZE_CLASS_HUGE;
1865
1
	span->span_count = (uint32_t)num_pages;
1866
1
	span->align_offset = (uint32_t)align_offset;
1867
1
	span->heap = heap;
1868
1
	_rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
1869
1870
#if RPMALLOC_FIRST_CLASS_HEAPS
1871
	_rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1872
#endif
1873
1
	++heap->full_span_count;
1874
1875
1
	return pointer_offset(span, SPAN_HEADER_SIZE);
1876
}
1877
1878
//! Allocate a block of the given size
1879
static void*
1880
9073
_rpmalloc_allocate(heap_t* heap, size_t size) {
1881
9073
	if (EXPECTED(size <= SMALL_SIZE_LIMIT))
1882
8974
		return _rpmalloc_allocate_small(heap, size);
1883
99
	else if (size <= _memory_medium_size_limit)
1884
85
		return _rpmalloc_allocate_medium(heap, size);
1885
14
	else if (size <= LARGE_SIZE_LIMIT)
1886
13
		return _rpmalloc_allocate_large(heap, size);
1887
1
	return _rpmalloc_allocate_huge(heap, size);
1888
}
1889
1890
static void*
1891
_rpmalloc_aligned_allocate(heap_t* heap, size_t alignment, size_t size) {
1892
	if (alignment <= SMALL_GRANULARITY)
1893
		return _rpmalloc_allocate(heap, size);
1894
1895
#if ENABLE_VALIDATE_ARGS
1896
	if ((size + alignment) < size) {
1897
		errno = EINVAL;
1898
		return 0;
1899
	}
1900
	if (alignment & (alignment - 1)) {
1901
		errno = EINVAL;
1902
		return 0;
1903
	}
1904
#endif
1905
1906
	if ((alignment <= SPAN_HEADER_SIZE) && (size < _memory_medium_size_limit)) {
1907
		// If alignment is less or equal to span header size (which is power of two),
1908
		// and size aligned to span header size multiples is less than size + alignment,
1909
		// then use natural alignment of blocks to provide alignment
1910
		size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE;
1911
		assert(!(multiple_size % SPAN_HEADER_SIZE));
1912
		if (multiple_size <= (size + alignment))
1913
			return _rpmalloc_allocate(heap, multiple_size);
1914
	}
1915
1916
	void* ptr = 0;
1917
	size_t align_mask = alignment - 1;
1918
	if (alignment <= _memory_page_size) {
1919
		ptr = _rpmalloc_allocate(heap, size + alignment);
1920
		if ((uintptr_t)ptr & align_mask) {
1921
			ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
1922
			//Mark as having aligned blocks
1923
			span_t* span = (span_t*)((uintptr_t)ptr & _memory_span_mask);
1924
			span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
1925
		}
1926
		return ptr;
1927
	}
1928
1929
	// Fallback to mapping new pages for this request. Since pointers passed
1930
	// to rpfree must be able to reach the start of the span by bitmasking of
1931
	// the address with the span size, the returned aligned pointer from this
1932
	// function must be with a span size of the start of the mapped area.
1933
	// In worst case this requires us to loop and map pages until we get a
1934
	// suitable memory address. It also means we can never align to span size
1935
	// or greater, since the span header will push alignment more than one
1936
	// span size away from span start (thus causing pointer mask to give us
1937
	// an invalid span start on free)
1938
	if (alignment & align_mask) {
1939
		errno = EINVAL;
1940
		return 0;
1941
	}
1942
	if (alignment >= _memory_span_size) {
1943
		errno = EINVAL;
1944
		return 0;
1945
	}
1946
1947
	size_t extra_pages = alignment / _memory_page_size;
1948
1949
	// Since each span has a header, we will at least need one extra memory page
1950
	size_t num_pages = 1 + (size / _memory_page_size);
1951
	if (size & (_memory_page_size - 1))
1952
		++num_pages;
1953
1954
	if (extra_pages > num_pages)
1955
		num_pages = 1 + extra_pages;
1956
1957
	size_t original_pages = num_pages;
1958
	size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
1959
	if (limit_pages < (original_pages * 2))
1960
		limit_pages = original_pages * 2;
1961
1962
	size_t mapped_size, align_offset;
1963
	span_t* span;
1964
1965
retry:
1966
	align_offset = 0;
1967
	mapped_size = num_pages * _memory_page_size;
1968
1969
	span = (span_t*)_rpmalloc_mmap(mapped_size, &align_offset);
1970
	if (!span) {
1971
		errno = ENOMEM;
1972
		return 0;
1973
	}
1974
	ptr = pointer_offset(span, SPAN_HEADER_SIZE);
1975
1976
	if ((uintptr_t)ptr & align_mask)
1977
		ptr = (void*)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
1978
1979
	if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
1980
	    (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
1981
	    (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
1982
		_rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
1983
		++num_pages;
1984
		if (num_pages > limit_pages) {
1985
			errno = EINVAL;
1986
			return 0;
1987
		}
1988
		goto retry;
1989
	}
1990
1991
	//Store page count in span_count
1992
	span->size_class = SIZE_CLASS_HUGE;
1993
	span->span_count = (uint32_t)num_pages;
1994
	span->align_offset = (uint32_t)align_offset;
1995
	span->heap = heap;
1996
	_rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
1997
1998
#if RPMALLOC_FIRST_CLASS_HEAPS
1999
	_rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
2000
#endif
2001
	++heap->full_span_count;
2002
2003
	return ptr;
2004
}
2005
2006
2007
////////////
2008
///
2009
/// Deallocation entry points
2010
///
2011
//////
2012
2013
//! Deallocate the given small/medium memory block in the current thread local heap
2014
static void
2015
8924
_rpmalloc_deallocate_direct_small_or_medium(span_t* span, void* block) {
2016
8924
	heap_t* heap = span->heap;
2017
8924
	assert(heap->owner_thread == get_thread_id() || !heap->owner_thread || heap->finalize);
2018
	//Add block to free list
2019
8924
	if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
2020
80
		span->used_count = span->block_count;
2021
#if RPMALLOC_FIRST_CLASS_HEAPS
2022
		_rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
2023
#endif
2024
80
		_rpmalloc_span_double_link_list_add(&heap->size_class[span->size_class].partial_span, span);
2025
80
		--heap->full_span_count;
2026
	}
2027
8924
	--span->used_count;
2028
8924
	*((void**)block) = span->free_list;
2029
8924
	span->free_list = block;
2030
8924
	if (UNEXPECTED(span->used_count == span->list_size)) {
2031
		_rpmalloc_span_double_link_list_remove(&heap->size_class[span->size_class].partial_span, span);
2032
		_rpmalloc_span_release_to_cache(heap, span);
2033
	}
2034
8924
}
2035
2036
static void
2037
_rpmalloc_deallocate_defer_free_span(heap_t* heap, span_t* span) {
2038
	//This list does not need ABA protection, no mutable side state
2039
	do {
2040
		span->free_list = atomic_load_ptr(&heap->span_free_deferred);
2041
	} while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
2042
}
2043
2044
//! Put the block in the deferred free list of the owning span
2045
static void
2046
_rpmalloc_deallocate_defer_small_or_medium(span_t* span, void* block) {
2047
	// The memory ordering here is a bit tricky, to avoid having to ABA protect
2048
	// the deferred free list to avoid desynchronization of list and list size
2049
	// we need to have acquire semantics on successful CAS of the pointer to
2050
	// guarantee the list_size variable validity + release semantics on pointer store
2051
	void* free_list;
2052
	do {
2053
		free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
2054
	} while (free_list == INVALID_POINTER);
2055
	*((void**)block) = free_list;
2056
	uint32_t free_count = ++span->list_size;
2057
	atomic_store_ptr_release(&span->free_list_deferred, block);
2058
	if (free_count == span->block_count) {
2059
		// Span was completely freed by this block. Due to the INVALID_POINTER spin lock
2060
		// no other thread can reach this state simultaneously on this span.
2061
		// Safe to move to owner heap deferred cache
2062
		_rpmalloc_deallocate_defer_free_span(span->heap, span);
2063
	}
2064
}
2065
2066
static void
2067
8924
_rpmalloc_deallocate_small_or_medium(span_t* span, void* p) {
2068
8924
	_rpmalloc_stat_inc_free(span->heap, span->size_class);
2069
8924
	if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
2070
		//Realign pointer to block start
2071
		void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2072
		uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2073
		p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
2074
	}
2075
	//Check if block belongs to this heap or if deallocation should be deferred
2076
#if RPMALLOC_FIRST_CLASS_HEAPS
2077
	int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2078
#else
2079

8924
	int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2080
#endif
2081
8924
	if (!defer)
2082
8924
		_rpmalloc_deallocate_direct_small_or_medium(span, p);
2083
	else
2084
		_rpmalloc_deallocate_defer_small_or_medium(span, p);
2085
8924
}
2086
2087
//! Deallocate the given large memory block to the current heap
2088
static void
2089
13
_rpmalloc_deallocate_large(span_t* span) {
2090
13
	assert(span->size_class == SIZE_CLASS_LARGE);
2091
13
	assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
2092
13
	assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
2093
	//We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap
2094
#if RPMALLOC_FIRST_CLASS_HEAPS
2095
	int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2096
#else
2097

13
	int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2098
#endif
2099
13
	if (defer) {
2100
		_rpmalloc_deallocate_defer_free_span(span->heap, span);
2101
		return;
2102
	}
2103
13
	assert(span->heap->full_span_count);
2104
13
	--span->heap->full_span_count;
2105
#if RPMALLOC_FIRST_CLASS_HEAPS
2106
	_rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2107
#endif
2108
#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
2109
	//Decrease counter
2110
	size_t idx = span->span_count - 1;
2111
	atomic_decr32(&span->heap->span_use[idx].current);
2112
#endif
2113
13
	heap_t* heap = get_thread_heap();
2114
13
	assert(heap);
2115
13
	span->heap = heap;
2116

13
	if ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved) {
2117
		heap->span_reserve = span;
2118
		heap->spans_reserved = span->span_count;
2119
		if (span->flags & SPAN_FLAG_MASTER) {
2120
			heap->span_reserve_master = span;
2121
		} else { //SPAN_FLAG_SUBSPAN
2122
			span_t* master = (span_t*)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
2123
			heap->span_reserve_master = master;
2124
			assert(master->flags & SPAN_FLAG_MASTER);
2125
			assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count);
2126
		}
2127
		_rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
2128
	} else {
2129
		//Insert into cache list
2130
13
		_rpmalloc_heap_cache_insert(heap, span);
2131
	}
2132
}
2133
2134
//! Deallocate the given huge span
2135
static void
2136
1
_rpmalloc_deallocate_huge(span_t* span) {
2137
1
	assert(span->heap);
2138
#if RPMALLOC_FIRST_CLASS_HEAPS
2139
	int defer = (span->heap->owner_thread && (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2140
#else
2141

1
	int defer = ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
2142
#endif
2143
1
	if (defer) {
2144
		_rpmalloc_deallocate_defer_free_span(span->heap, span);
2145
		return;
2146
	}
2147
1
	assert(span->heap->full_span_count);
2148
1
	--span->heap->full_span_count;
2149
#if RPMALLOC_FIRST_CLASS_HEAPS
2150
	_rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2151
#endif
2152
2153
	//Oversized allocation, page count is stored in span_count
2154
1
	size_t num_pages = span->span_count;
2155
1
	_rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size);
2156
1
	_rpmalloc_stat_sub(&_huge_pages_current, num_pages);
2157
}
2158
2159
//! Deallocate the given block
2160
static void
2161
8938
_rpmalloc_deallocate(void* p) {
2162
	//Grab the span (always at start of span, using span alignment)
2163
8938
	span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask);
2164
8938
	if (UNEXPECTED(!span))
2165
		return;
2166
8938
	if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
2167
8924
		_rpmalloc_deallocate_small_or_medium(span, p);
2168
14
	else if (span->size_class == SIZE_CLASS_LARGE)
2169
13
		_rpmalloc_deallocate_large(span);
2170
	else
2171
1
		_rpmalloc_deallocate_huge(span);
2172
}
2173
2174
2175
////////////
2176
///
2177
/// Reallocation entry points
2178
///
2179
//////
2180
2181
static size_t
2182
_rpmalloc_usable_size(void* p);
2183
2184
//! Reallocate the given block to the given size
2185
static void*
2186
148
_rpmalloc_reallocate(heap_t* heap, void* p, size_t size, size_t oldsize, unsigned int flags) {
2187
148
	if (p) {
2188
		//Grab the span using guaranteed span alignment
2189
140
		span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask);
2190
140
		if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
2191
			//Small/medium sized block
2192
140
			assert(span->span_count == 1);
2193
140
			void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2194
140
			uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2195
140
			uint32_t block_idx = block_offset / span->block_size;
2196
140
			void* block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
2197
140
			if (!oldsize)
2198
140
				oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
2199
140
			if ((size_t)span->block_size >= size) {
2200
				//Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2201

54
				if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2202
					memmove(block, p, oldsize);
2203
54
				return block;
2204
			}
2205
		} else if (span->size_class == SIZE_CLASS_LARGE) {
2206
			//Large block
2207
			size_t total_size = size + SPAN_HEADER_SIZE;
2208
			size_t num_spans = total_size >> _memory_span_size_shift;
2209
			if (total_size & (_memory_span_mask - 1))
2210
				++num_spans;
2211
			size_t current_spans = span->span_count;
2212
			void* block = pointer_offset(span, SPAN_HEADER_SIZE);
2213
			if (!oldsize)
2214
				oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2215
			if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) {
2216
				//Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2217
				if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2218
					memmove(block, p, oldsize);
2219
				return block;
2220
			}
2221
		} else {
2222
			//Oversized block
2223
			size_t total_size = size + SPAN_HEADER_SIZE;
2224
			size_t num_pages = total_size >> _memory_page_size_shift;
2225
			if (total_size & (_memory_page_size - 1))
2226
				++num_pages;
2227
			//Page count is stored in span_count
2228
			size_t current_pages = span->span_count;
2229
			void* block = pointer_offset(span, SPAN_HEADER_SIZE);
2230
			if (!oldsize)
2231
				oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2232
			if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
2233
				//Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2234
				if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2235
					memmove(block, p, oldsize);
2236
				return block;
2237
			}
2238
		}
2239
	} else {
2240
		oldsize = 0;
2241
	}
2242
2243
94
	if (!!(flags & RPMALLOC_GROW_OR_FAIL))
2244
		return 0;
2245
2246
	//Size is greater than block size, need to allocate a new block and deallocate the old
2247
	//Avoid hysteresis by overallocating if increase is small (below 37%)
2248
94
	size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
2249

94
	size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
2250
94
	void* block = _rpmalloc_allocate(heap, new_size);
2251
94
	if (p && block) {
2252
86
		if (!(flags & RPMALLOC_NO_PRESERVE))
2253
86
			memcpy(block, p, oldsize < new_size ? oldsize : new_size);
2254
86
		_rpmalloc_deallocate(p);
2255
	}
2256
2257
	return block;
2258
}
2259
2260
static void*
2261
_rpmalloc_aligned_reallocate(heap_t* heap, void* ptr, size_t alignment, size_t size, size_t oldsize,
2262
                           unsigned int flags) {
2263
	if (alignment <= SMALL_GRANULARITY)
2264
		return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
2265
2266
	int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
2267
	size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0);
2268
	if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
2269
		if (no_alloc || (size >= (usablesize / 2)))
2270
			return ptr;
2271
	}
2272
	// Aligned alloc marks span as having aligned blocks
2273
	void* block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
2274
	if (EXPECTED(block != 0)) {
2275
		if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
2276
			if (!oldsize)
2277
				oldsize = usablesize;
2278
			memcpy(block, ptr, oldsize < size ? oldsize : size);
2279
		}
2280
		_rpmalloc_deallocate(ptr);
2281
	}
2282
	return block;
2283
}
2284
2285
2286
////////////
2287
///
2288
/// Initialization, finalization and utility
2289
///
2290
//////
2291
2292
//! Get the usable size of the given block
2293
static size_t
2294
_rpmalloc_usable_size(void* p) {
2295
	//Grab the span using guaranteed span alignment
2296
	span_t* span = (span_t*)((uintptr_t)p & _memory_span_mask);
2297
	if (span->size_class < SIZE_CLASS_COUNT) {
2298
		//Small/medium block
2299
		void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2300
		return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size);
2301
	}
2302
	if (span->size_class == SIZE_CLASS_LARGE) {
2303
		//Large block
2304
		size_t current_spans = span->span_count;
2305
		return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
2306
	}
2307
	//Oversized block, page count is stored in span_count
2308
	size_t current_pages = span->span_count;
2309
	return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
2310
}
2311
2312
//! Adjust and optimize the size class properties for the given class
2313
static void
2314
20034
_rpmalloc_adjust_size_class(size_t iclass) {
2315
20034
	size_t block_size = _memory_size_class[iclass].block_size;
2316
20034
	size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
2317
2318
20034
	_memory_size_class[iclass].block_count = (uint16_t)block_count;
2319
20034
	_memory_size_class[iclass].class_idx = (uint16_t)iclass;
2320
2321
	//Check if previous size classes can be merged
2322
20034
	if (iclass >= SMALL_CLASS_COUNT) {
2323
		size_t prevclass = iclass;
2324
56286
		while (prevclass > 0) {
2325
56286
			--prevclass;
2326
			//A class can be merged if number of pages and number of blocks are equal
2327
56286
			if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count)
2328
56286
				memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass]));
2329
			else
2330
				break;
2331
		}
2332
	}
2333
20034
}
2334
2335
//! Initialize the allocator and setup global data
2336
extern inline int
2337
159
rpmalloc_initialize(void) {
2338
159
	if (_rpmalloc_initialized) {
2339
		rpmalloc_thread_initialize();
2340
		return 0;
2341
	}
2342
159
	return rpmalloc_initialize_config(0);
2343
}
2344
2345
int
2346
159
rpmalloc_initialize_config(const rpmalloc_config_t* config) {
2347
159
	if (_rpmalloc_initialized) {
2348
		rpmalloc_thread_initialize();
2349
		return 0;
2350
	}
2351
159
	_rpmalloc_initialized = 1;
2352
2353
159
	if (config)
2354
		memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
2355
	else
2356
159
		memset(&_memory_config, 0, sizeof(rpmalloc_config_t));
2357
2358

159
	if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
2359
159
		_memory_config.memory_map = _rpmalloc_mmap_os;
2360
159
		_memory_config.memory_unmap = _rpmalloc_unmap_os;
2361
	}
2362
2363
#if RPMALLOC_CONFIGURABLE
2364
	_memory_page_size = _memory_config.page_size;
2365
#else
2366
159
	_memory_page_size = 0;
2367
#endif
2368
159
	_memory_huge_pages = 0;
2369
159
	_memory_map_granularity = _memory_page_size;
2370
159
	if (!_memory_page_size) {
2371
#if PLATFORM_WINDOWS
2372
		SYSTEM_INFO system_info;
2373
		memset(&system_info, 0, sizeof(system_info));
2374
		GetSystemInfo(&system_info);
2375
		_memory_page_size = system_info.dwPageSize;
2376
		_memory_map_granularity = system_info.dwAllocationGranularity;
2377
#else
2378
159
		_memory_page_size = (size_t)sysconf(_SC_PAGESIZE);
2379
159
		_memory_map_granularity = _memory_page_size;
2380
159
		if (_memory_config.enable_huge_pages) {
2381
#if defined(__linux__)
2382
			size_t huge_page_size = 0;
2383
			FILE* meminfo = fopen("/proc/meminfo", "r");
2384
			if (meminfo) {
2385
				char line[128];
2386
				while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
2387
					line[sizeof(line) - 1] = 0;
2388
					if (strstr(line, "Hugepagesize:"))
2389
						huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
2390
				}
2391
				fclose(meminfo);
2392
			}
2393
			if (huge_page_size) {
2394
				_memory_huge_pages = 1;
2395
				_memory_page_size = huge_page_size;
2396
				_memory_map_granularity = huge_page_size;
2397
			}
2398
#elif defined(__FreeBSD__)
2399
			int rc;
2400
			size_t sz = sizeof(rc);
2401
2402
			if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) {
2403
				_memory_huge_pages = 1;
2404
				_memory_page_size = 2 * 1024 * 1024;
2405
				_memory_map_granularity = _memory_page_size;
2406
			}
2407
#elif defined(__APPLE__)
2408
			_memory_huge_pages = 1;
2409
			_memory_page_size = 2 * 1024 * 1024;
2410
			_memory_map_granularity = _memory_page_size;
2411
#endif
2412
		}
2413
#endif
2414
	} else {
2415
		if (_memory_config.enable_huge_pages)
2416
			_memory_huge_pages = 1;
2417
	}
2418
2419
#if PLATFORM_WINDOWS
2420
	if (_memory_config.enable_huge_pages) {
2421
		HANDLE token = 0;
2422
		size_t large_page_minimum = GetLargePageMinimum();
2423
		if (large_page_minimum)
2424
			OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
2425
		if (token) {
2426
			LUID luid;
2427
			if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
2428
				TOKEN_PRIVILEGES token_privileges;
2429
				memset(&token_privileges, 0, sizeof(token_privileges));
2430
				token_privileges.PrivilegeCount = 1;
2431
				token_privileges.Privileges[0].Luid = luid;
2432
				token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
2433
				if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
2434
					DWORD err = GetLastError();
2435
					if (err == ERROR_SUCCESS) {
2436
						_memory_huge_pages = 1;
2437
						if (large_page_minimum > _memory_page_size)
2438
						 	_memory_page_size = large_page_minimum;
2439
						if (large_page_minimum > _memory_map_granularity)
2440
							_memory_map_granularity = large_page_minimum;
2441
					}
2442
				}
2443
			}
2444
			CloseHandle(token);
2445
		}
2446
	}
2447
#endif
2448
2449
159
	size_t min_span_size = 256;
2450
159
	size_t max_page_size;
2451
#if UINTPTR_MAX > 0xFFFFFFFF
2452
159
	max_page_size = 4096ULL * 1024ULL * 1024ULL;
2453
#else
2454
	max_page_size = 4 * 1024 * 1024;
2455
#endif
2456
159
	if (_memory_page_size < min_span_size)
2457
		_memory_page_size = min_span_size;
2458
159
	if (_memory_page_size > max_page_size)
2459
		_memory_page_size = max_page_size;
2460
159
	_memory_page_size_shift = 0;
2461
159
	size_t page_size_bit = _memory_page_size;
2462
2067
	while (page_size_bit != 1) {
2463
1908
		++_memory_page_size_shift;
2464
1908
		page_size_bit >>= 1;
2465
	}
2466
159
	_memory_page_size = ((size_t)1 << _memory_page_size_shift);
2467
2468
#if RPMALLOC_CONFIGURABLE
2469
	if (!_memory_config.span_size) {
2470
		_memory_span_size = _memory_default_span_size;
2471
		_memory_span_size_shift = _memory_default_span_size_shift;
2472
		_memory_span_mask = _memory_default_span_mask;
2473
	} else {
2474
		size_t span_size = _memory_config.span_size;
2475
		if (span_size > (256 * 1024))
2476
			span_size = (256 * 1024);
2477
		_memory_span_size = 4096;
2478
		_memory_span_size_shift = 12;
2479
		while (_memory_span_size < span_size) {
2480
			_memory_span_size <<= 1;
2481
			++_memory_span_size_shift;
2482
		}
2483
		_memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
2484
	}
2485
#endif
2486
2487
159
	_memory_span_map_count = ( _memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT);
2488
159
	if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
2489
		_memory_span_map_count = (_memory_page_size / _memory_span_size);
2490

159
	if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
2491
		_memory_span_map_count = (_memory_page_size / _memory_span_size);
2492
2493
159
	_memory_config.page_size = _memory_page_size;
2494
159
	_memory_config.span_size = _memory_span_size;
2495
159
	_memory_config.span_map_count = _memory_span_map_count;
2496
159
	_memory_config.enable_huge_pages = _memory_huge_pages;
2497
2498
159
	_memory_span_release_count = (_memory_span_map_count > 4 ? ((_memory_span_map_count < 64) ? _memory_span_map_count : 64) : 4);
2499
159
	_memory_span_release_count_large = (_memory_span_release_count > 8 ? (_memory_span_release_count / 4) : 2);
2500
2501
#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
2502
	if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw))
2503
		return -1;
2504
#endif
2505
#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2506
    fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
2507
#endif
2508
2509
	//Setup all small and medium size classes
2510
159
	size_t iclass = 0;
2511
159
	_memory_size_class[iclass].block_size = SMALL_GRANULARITY;
2512
159
	_rpmalloc_adjust_size_class(iclass);
2513
10494
	for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
2514
10176
		size_t size = iclass * SMALL_GRANULARITY;
2515
10176
		_memory_size_class[iclass].block_size = (uint32_t)size;
2516
10176
		_rpmalloc_adjust_size_class(iclass);
2517
	}
2518
	//At least two blocks per span, then fall back to large allocations
2519
159
	_memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
2520
159
	if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
2521
159
		_memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
2522
9858
	for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
2523
9699
		size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
2524
9699
		if (size > _memory_medium_size_limit)
2525
			break;
2526
9699
		_memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
2527
9699
		_rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
2528
	}
2529
2530
159
	_memory_orphan_heaps = 0;
2531
#if RPMALLOC_FIRST_CLASS_HEAPS
2532
	_memory_first_class_orphan_heaps = 0;
2533
#endif
2534
159
	memset(_memory_heaps, 0, sizeof(_memory_heaps));
2535
159
	atomic_store32_release(&_memory_orphan_lock, 0);
2536
2537
	//Initialize this thread
2538
159
	rpmalloc_thread_initialize();
2539
159
	return 0;
2540
}
2541
2542
//! Finalize the allocator
2543
void
2544
159
rpmalloc_finalize(void) {
2545
159
	rpmalloc_thread_finalize();
2546
	//rpmalloc_dump_statistics(stdout);
2547
2548
	//Free all thread caches and fully free spans
2549
7632
	for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
2550
7473
		heap_t* heap = _memory_heaps[list_idx];
2551
7636
		while (heap) {
2552
163
			heap_t* next_heap = heap->next_heap;
2553
163
			heap->finalize = 1;
2554
163
			_rpmalloc_heap_global_finalize(heap);
2555
163
			heap = next_heap;
2556
		}
2557
	}
2558
2559
#if ENABLE_GLOBAL_CACHE
2560
	//Free global caches
2561
10176
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
2562
10017
		_rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
2563
#endif
2564
2565
#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
2566
	pthread_key_delete(_memory_thread_heap);
2567
#endif
2568
#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2569
	FlsFree(fls_key);
2570
	fls_key = 0;
2571
#endif
2572
#if ENABLE_STATISTICS
2573
	//If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code
2574
	assert(!atomic_load32(&_mapped_pages));
2575
	assert(!atomic_load32(&_reserved_spans));
2576
	assert(!atomic_load32(&_mapped_pages_os));
2577
#endif
2578
2579
159
	_rpmalloc_initialized = 0;
2580
159
}
2581
2582
//! Initialize thread, assign heap
2583
extern inline void
2584
163
rpmalloc_thread_initialize(void) {
2585
163
	if (!get_thread_heap_raw()) {
2586
163
		heap_t* heap = _rpmalloc_heap_allocate(0);
2587
163
		if (heap) {
2588
163
			_rpmalloc_stat_inc(&_memory_active_heaps);
2589
163
			set_thread_heap(heap);
2590
#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2591
			FlsSetValue(fls_key, heap);
2592
#endif
2593
		}
2594
	}
2595
163
}
2596
2597
//! Finalize thread, orphan heap
2598
void
2599
163
rpmalloc_thread_finalize(void) {
2600
163
	heap_t* heap = get_thread_heap_raw();
2601
163
	if (heap)
2602
163
		_rpmalloc_heap_release_raw(heap);
2603
163
	set_thread_heap(0);
2604
#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2605
	FlsSetValue(fls_key, 0);
2606
#endif
2607
163
}
2608
2609
int
2610
rpmalloc_is_thread_initialized(void) {
2611
	return (get_thread_heap_raw() != 0) ? 1 : 0;
2612
}
2613
2614
const rpmalloc_config_t*
2615
rpmalloc_config(void) {
2616
	return &_memory_config;
2617
}
2618
2619
// Extern interface
2620
2621
extern inline RPMALLOC_ALLOCATOR void*
2622
3988
rpmalloc(size_t size) {
2623
#if ENABLE_VALIDATE_ARGS
2624
	if (size >= MAX_ALLOC_SIZE) {
2625
		errno = EINVAL;
2626
		return 0;
2627
	}
2628
#endif
2629
3988
	heap_t* heap = get_thread_heap();
2630
3988
	return _rpmalloc_allocate(heap, size);
2631
}
2632
2633
extern inline void
2634
8852
rpfree(void* ptr) {
2635
8852
	_rpmalloc_deallocate(ptr);
2636
8852
}
2637
2638
extern inline RPMALLOC_ALLOCATOR void*
2639
4991
rpcalloc(size_t num, size_t size) {
2640
4991
	size_t total;
2641
#if ENABLE_VALIDATE_ARGS
2642
#if PLATFORM_WINDOWS
2643
	int err = SizeTMult(num, size, &total);
2644
	if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2645
		errno = EINVAL;
2646
		return 0;
2647
	}
2648
#else
2649
	int err = __builtin_umull_overflow(num, size, &total);
2650
	if (err || (total >= MAX_ALLOC_SIZE)) {
2651
		errno = EINVAL;
2652
		return 0;
2653
	}
2654
#endif
2655
#else
2656
4991
	total = num * size;
2657
#endif
2658
4991
	heap_t* heap = get_thread_heap();
2659
4991
	void* block = _rpmalloc_allocate(heap, total);
2660
4991
	if (block)
2661
4991
		memset(block, 0, total);
2662
4991
	return block;
2663
}
2664
2665
extern inline RPMALLOC_ALLOCATOR void*
2666
148
rprealloc(void* ptr, size_t size) {
2667
#if ENABLE_VALIDATE_ARGS
2668
	if (size >= MAX_ALLOC_SIZE) {
2669
		errno = EINVAL;
2670
		return ptr;
2671
	}
2672
#endif
2673
148
	heap_t* heap = get_thread_heap();
2674
148
	return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
2675
}
2676
2677
extern RPMALLOC_ALLOCATOR void*
2678
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize,
2679
                  unsigned int flags) {
2680
#if ENABLE_VALIDATE_ARGS
2681
	if ((size + alignment < size) || (alignment > _memory_page_size)) {
2682
		errno = EINVAL;
2683
		return 0;
2684
	}
2685
#endif
2686
	heap_t* heap = get_thread_heap();
2687
	return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags);
2688
}
2689
2690
extern RPMALLOC_ALLOCATOR void*
2691
rpaligned_alloc(size_t alignment, size_t size) {
2692
	heap_t* heap = get_thread_heap();
2693
	return _rpmalloc_aligned_allocate(heap, alignment, size);
2694
}
2695
2696
extern inline RPMALLOC_ALLOCATOR void*
2697
rpaligned_calloc(size_t alignment, size_t num, size_t size) {
2698
	size_t total;
2699
#if ENABLE_VALIDATE_ARGS
2700
#if PLATFORM_WINDOWS
2701
	int err = SizeTMult(num, size, &total);
2702
	if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2703
		errno = EINVAL;
2704
		return 0;
2705
	}
2706
#else
2707
	int err = __builtin_umull_overflow(num, size, &total);
2708
	if (err || (total >= MAX_ALLOC_SIZE)) {
2709
		errno = EINVAL;
2710
		return 0;
2711
	}
2712
#endif
2713
#else
2714
	total = num * size;
2715
#endif
2716
	void* block = rpaligned_alloc(alignment, total);
2717
	if (block)
2718
		memset(block, 0, total);
2719
	return block;
2720
}
2721
2722
extern inline RPMALLOC_ALLOCATOR void*
2723
rpmemalign(size_t alignment, size_t size) {
2724
	return rpaligned_alloc(alignment, size);
2725
}
2726
2727
extern inline int
2728
rpposix_memalign(void **memptr, size_t alignment, size_t size) {
2729
	if (memptr)
2730
		*memptr = rpaligned_alloc(alignment, size);
2731
	else
2732
		return EINVAL;
2733
	return *memptr ? 0 : ENOMEM;
2734
}
2735
2736
extern inline size_t
2737
rpmalloc_usable_size(void* ptr) {
2738
	return (ptr ? _rpmalloc_usable_size(ptr) : 0);
2739
}
2740
2741
extern inline void
2742
rpmalloc_thread_collect(void) {
2743
}
2744
2745
void
2746
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats) {
2747
	memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
2748
	heap_t* heap = get_thread_heap_raw();
2749
	if (!heap)
2750
		return;
2751
2752
	for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2753
		size_class_t* size_class = _memory_size_class + iclass;
2754
		span_t* span = heap->size_class[iclass].partial_span;
2755
		while (span) {
2756
			size_t free_count = span->list_size;
2757
			size_t block_count = size_class->block_count;
2758
			if (span->free_list_limit < block_count)
2759
				block_count = span->free_list_limit;
2760
			free_count += (block_count - span->used_count);
2761
			stats->sizecache = free_count * size_class->block_size;
2762
			span = span->next;
2763
		}
2764
	}
2765
2766
#if ENABLE_THREAD_CACHE
2767
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2768
		span_cache_t* span_cache;
2769
		if (!iclass)
2770
			span_cache = &heap->span_cache;
2771
		else
2772
			span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
2773
		stats->spancache = span_cache->count * (iclass + 1) * _memory_span_size;
2774
	}
2775
#endif
2776
2777
	span_t* deferred = (span_t*)atomic_load_ptr(&heap->span_free_deferred);
2778
	while (deferred) {
2779
		if (deferred->size_class != SIZE_CLASS_HUGE)
2780
			stats->spancache = (size_t)deferred->span_count * _memory_span_size;
2781
		deferred = (span_t*)deferred->free_list;
2782
	}
2783
2784
#if ENABLE_STATISTICS
2785
	stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
2786
	stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
2787
2788
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2789
		stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current);
2790
		stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high);
2791
		stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
2792
		stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
2793
		stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
2794
		stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
2795
		stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
2796
		stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
2797
		stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
2798
	}
2799
	for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2800
		stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
2801
		stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak;
2802
		stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
2803
		stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
2804
		stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
2805
		stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
2806
		stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved);
2807
		stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
2808
	}
2809
#endif
2810
}
2811
2812
void
2813
rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) {
2814
	memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
2815
#if ENABLE_STATISTICS
2816
	stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
2817
	stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
2818
	stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
2819
	stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
2820
	stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
2821
	stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
2822
#endif
2823
#if ENABLE_GLOBAL_CACHE
2824
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
2825
		stats->cached += _memory_span_cache[iclass].count * (iclass + 1) * _memory_span_size;
2826
#endif
2827
}
2828
2829
#if ENABLE_STATISTICS
2830
2831
static void
2832
_memory_heap_dump_statistics(heap_t* heap, void* file) {
2833
	fprintf(file, "Heap %d stats:\n", heap->id);
2834
	fprintf(file, "Class   CurAlloc  PeakAlloc   TotAlloc    TotFree  BlkSize BlkCount SpansCur SpansPeak  PeakAllocMiB  ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n");
2835
	for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2836
		if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
2837
			continue;
2838
		fprintf(file, "%3u:  %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass,
2839
			atomic_load32(&heap->size_class_use[iclass].alloc_current),
2840
			heap->size_class_use[iclass].alloc_peak,
2841
			atomic_load32(&heap->size_class_use[iclass].alloc_total),
2842
			atomic_load32(&heap->size_class_use[iclass].free_total),
2843
			_memory_size_class[iclass].block_size,
2844
			_memory_size_class[iclass].block_count,
2845
			atomic_load32(&heap->size_class_use[iclass].spans_current),
2846
			heap->size_class_use[iclass].spans_peak,
2847
			((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024),
2848
			((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024),
2849
			((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024),
2850
			((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024),
2851
			atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
2852
	}
2853
	fprintf(file, "Spans  Current     Peak  PeakMiB  Cached  ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB  MmapCalls\n");
2854
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2855
		if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls))
2856
			continue;
2857
		fprintf(file, "%4u: %8d %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1),
2858
			atomic_load32(&heap->span_use[iclass].current),
2859
			atomic_load32(&heap->span_use[iclass].high),
2860
			((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
2861
#if ENABLE_THREAD_CACHE
2862
			(unsigned int)(!iclass ? heap->span_cache.count : heap->span_large_cache[iclass - 1].count),
2863
			((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2864
			((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2865
#else
2866
			0, (size_t)0, (size_t)0,
2867
#endif
2868
			((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2869
			((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2870
			((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
2871
			((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
2872
			atomic_load32(&heap->span_use[iclass].spans_map_calls));
2873
	}
2874
	fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
2875
	fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024), (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
2876
}
2877
2878
#endif
2879
2880
void
2881
rpmalloc_dump_statistics(void* file) {
2882
#if ENABLE_STATISTICS
2883
	//If you hit this assert, you still have active threads or forgot to finalize some thread(s)
2884
	assert(atomic_load32(&_memory_active_heaps) == 0);
2885
	for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
2886
		heap_t* heap = _memory_heaps[list_idx];
2887
		while (heap) {
2888
			int need_dump = 0;
2889
			for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) {
2890
				if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
2891
					assert(!atomic_load32(&heap->size_class_use[iclass].free_total));
2892
					assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
2893
					continue;
2894
				}
2895
				need_dump = 1;
2896
			}
2897
			for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) {
2898
				if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls))
2899
					continue;
2900
				need_dump = 1;
2901
			}
2902
			if (need_dump)
2903
				_memory_heap_dump_statistics(heap, file);
2904
			heap = heap->next_heap;
2905
		}
2906
	}
2907
	fprintf(file, "Global stats:\n");
2908
	size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
2909
	size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
2910
	fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
2911
	fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024));
2912
2913
	size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
2914
	size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
2915
	size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
2916
	size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
2917
	size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
2918
	size_t reserved_total = (size_t)atomic_load32(&_reserved_spans) * _memory_span_size;
2919
	fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB ReservedTotalMiB\n");
2920
	fprintf(file, "%9zu %11zu %13zu %14zu %16zu %16zu\n",
2921
		mapped / (size_t)(1024 * 1024),
2922
		mapped_os / (size_t)(1024 * 1024),
2923
		mapped_peak / (size_t)(1024 * 1024),
2924
		mapped_total / (size_t)(1024 * 1024),
2925
		unmapped_total / (size_t)(1024 * 1024),
2926
		reserved_total / (size_t)(1024 * 1024));
2927
2928
	fprintf(file, "\n");
2929
#else
2930
	(void)sizeof(file);
2931
#endif
2932
}
2933
2934
#if RPMALLOC_FIRST_CLASS_HEAPS
2935
2936
extern inline rpmalloc_heap_t*
2937
rpmalloc_heap_acquire(void) {
2938
	// Must be a pristine heap from newly mapped memory pages, or else memory blocks
2939
	// could already be allocated from the heap which would (wrongly) be released when
2940
	// heap is cleared with rpmalloc_heap_free_all(). Also heaps guaranteed to be
2941
	// pristine from the dedicated orphan list can be used.
2942
	heap_t* heap = _rpmalloc_heap_allocate(1);
2943
	heap->owner_thread = 0;
2944
	_rpmalloc_stat_inc(&_memory_active_heaps);
2945
	return heap;
2946
}
2947
2948
extern inline void
2949
rpmalloc_heap_release(rpmalloc_heap_t* heap) {
2950
	if (heap)
2951
		_rpmalloc_heap_release(heap, 1);
2952
}
2953
2954
extern inline RPMALLOC_ALLOCATOR void*
2955
rpmalloc_heap_alloc(rpmalloc_heap_t* heap, size_t size) {
2956
#if ENABLE_VALIDATE_ARGS
2957
	if (size >= MAX_ALLOC_SIZE) {
2958
		errno = EINVAL;
2959
		return ptr;
2960
	}
2961
#endif
2962
	return _rpmalloc_allocate(heap, size);
2963
}
2964
2965
extern inline RPMALLOC_ALLOCATOR void*
2966
rpmalloc_heap_aligned_alloc(rpmalloc_heap_t* heap, size_t alignment, size_t size) {
2967
#if ENABLE_VALIDATE_ARGS
2968
	if (size >= MAX_ALLOC_SIZE) {
2969
		errno = EINVAL;
2970
		return ptr;
2971
	}
2972
#endif
2973
	return _rpmalloc_aligned_allocate(heap, alignment, size);
2974
}
2975
2976
extern inline RPMALLOC_ALLOCATOR void*
2977
rpmalloc_heap_calloc(rpmalloc_heap_t* heap, size_t num, size_t size) {
2978
	return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
2979
}
2980
2981
extern inline RPMALLOC_ALLOCATOR void*
2982
rpmalloc_heap_aligned_calloc(rpmalloc_heap_t* heap, size_t alignment, size_t num, size_t size) {
2983
	size_t total;
2984
#if ENABLE_VALIDATE_ARGS
2985
#if PLATFORM_WINDOWS
2986
	int err = SizeTMult(num, size, &total);
2987
	if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2988
		errno = EINVAL;
2989
		return 0;
2990
	}
2991
#else
2992
	int err = __builtin_umull_overflow(num, size, &total);
2993
	if (err || (total >= MAX_ALLOC_SIZE)) {
2994
		errno = EINVAL;
2995
		return 0;
2996
	}
2997
#endif
2998
#else
2999
	total = num * size;
3000
#endif
3001
	void* block = _rpmalloc_aligned_allocate(heap, alignment, total);
3002
	if (block)
3003
		memset(block, 0, total);
3004
	return block;
3005
}
3006
3007
extern inline RPMALLOC_ALLOCATOR void*
3008
rpmalloc_heap_realloc(rpmalloc_heap_t* heap, void* ptr, size_t size, unsigned int flags) {
3009
#if ENABLE_VALIDATE_ARGS
3010
	if (size >= MAX_ALLOC_SIZE) {
3011
		errno = EINVAL;
3012
		return ptr;
3013
	}
3014
#endif
3015
	return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
3016
}
3017
3018
extern inline RPMALLOC_ALLOCATOR void*
3019
rpmalloc_heap_aligned_realloc(rpmalloc_heap_t* heap, void* ptr, size_t alignment, size_t size, unsigned int flags) {
3020
#if ENABLE_VALIDATE_ARGS
3021
	if ((size + alignment < size) || (alignment > _memory_page_size)) {
3022
		errno = EINVAL;
3023
		return 0;
3024
	}
3025
#endif
3026
	return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
3027
}
3028
3029
extern inline void
3030
rpmalloc_heap_free(rpmalloc_heap_t* heap, void* ptr) {
3031
	(void)sizeof(heap);
3032
	_rpmalloc_deallocate(ptr);
3033
}
3034
3035
extern inline void
3036
rpmalloc_heap_free_all(rpmalloc_heap_t* heap) {
3037
	span_t* span;
3038
	span_t* next_span;
3039
3040
	_rpmalloc_heap_cache_adopt_deferred(heap, 0);
3041
3042
	for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3043
		span = heap->size_class[iclass].partial_span;
3044
		while (span) {
3045
			next_span = span->next;
3046
			_rpmalloc_heap_cache_insert(heap, span);
3047
			span = next_span;
3048
		}
3049
		heap->size_class[iclass].partial_span = 0;
3050
		span = heap->full_span[iclass];
3051
		while (span) {
3052
			next_span = span->next;
3053
			_rpmalloc_heap_cache_insert(heap, span);
3054
			span = next_span;
3055
		}
3056
	}
3057
	memset(heap->size_class, 0, sizeof(heap->size_class));
3058
	memset(heap->full_span, 0, sizeof(heap->full_span));
3059
3060
	span = heap->large_huge_span;
3061
	while (span) {
3062
		next_span = span->next;
3063
		if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
3064
			_rpmalloc_deallocate_huge(span);
3065
		else
3066
			_rpmalloc_heap_cache_insert(heap, span);
3067
		span = next_span;
3068
	}
3069
	heap->large_huge_span = 0;
3070
	heap->full_span_count = 0;
3071
3072
#if ENABLE_THREAD_CACHE
3073
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3074
		span_cache_t* span_cache;
3075
		if (!iclass)
3076
			span_cache = &heap->span_cache;
3077
		else
3078
			span_cache = (span_cache_t*)(heap->span_large_cache + (iclass - 1));
3079
		if (!span_cache->count)
3080
			continue;
3081
#if ENABLE_GLOBAL_CACHE
3082
		_rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count * (iclass + 1) * _memory_span_size);
3083
		_rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span_cache->count);
3084
		_rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1, span_cache->count);
3085
#else
3086
		for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
3087
			_rpmalloc_span_unmap(span_cache->span[ispan]);
3088
#endif
3089
		span_cache->count = 0;
3090
	}
3091
#endif
3092
3093
#if ENABLE_STATISTICS
3094
	for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3095
		atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
3096
		atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
3097
	}
3098
	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3099
		atomic_store32(&heap->span_use[iclass].current, 0 );
3100
	}
3101
#endif
3102
}
3103
3104
extern inline void
3105
rpmalloc_heap_thread_set_current(rpmalloc_heap_t* heap) {
3106
	heap_t* prev_heap = get_thread_heap_raw();
3107
	if (prev_heap != heap) {
3108
		set_thread_heap(heap);
3109
		if (prev_heap)
3110
			rpmalloc_heap_release(prev_heap);
3111
	}
3112
}
3113
3114
#endif
3115
3116
#if ENABLE_PRELOAD || ENABLE_OVERRIDE
3117
3118
#include "malloc.c"
3119
3120
#endif