2 * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
3 * Original Author: Hans Boehm
5 * This file may be redistributed and/or modified under the
6 * terms of the GNU General Public License as published by the Free Software
7 * Foundation; either version 2, or (at your option) any later version.
9 * It is distributed in the hope that it will be useful, but WITHOUT ANY
10 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License in the
12 * file doc/COPYING for more details.
15 #if defined(HAVE_CONFIG_H)
19 #define AO_REQUIRE_CAS
20 #include "atomic_ops_stack.h"
21 #include <string.h> /* for ffs, which is assumed reentrant. */
23 #ifdef AO_TRACE_MALLOC
29 * We round up each allocation request to the next power of two
31 * We keep one stack of free objects for each size. Each object
32 * has an initial word (offset -sizeof(AO_t) from the visible pointer)
33 * which contains either
34 * The binary log of the object size in bytes (small objects)
35 * The object size (a multiple of CHUNK_SIZE) for large objects.
36 * The second case only arises if mmap-based allocation is supported.
37 * We align the user-visible part of each object on a GRANULARITY
38 * byte boundary. That means that the actual (hidden) start of
39 * the object starts a word before this boundary.
43 # define LOG_MAX_SIZE 16
44 /* We assume that 2**LOG_MAX_SIZE is a multiple of page size. */
49 /* Assumed to be at least sizeof(AO_t). */
52 #define CHUNK_SIZE (1 << LOG_MAX_SIZE)
54 #ifndef AO_INITIAL_HEAP_SIZE
55 # define AO_INITIAL_HEAP_SIZE (2*(LOG_MAX_SIZE+1)*CHUNK_SIZE)
58 char AO_initial_heap[AO_INITIAL_HEAP_SIZE];
60 static volatile AO_t initial_heap_ptr = (AO_t)AO_initial_heap;
61 static volatile char *initial_heap_lim = AO_initial_heap + AO_INITIAL_HEAP_SIZE;
63 #if defined(HAVE_MMAP)
65 #include <sys/types.h>
70 #if defined(MAP_ANONYMOUS) || defined(MAP_ANON)
71 # define USE_MMAP_ANON
75 # define GC_MMAP_FLAGS MAP_FIXED | MAP_PRIVATE
76 /* Seems to yield better performance on Solaris 2, but can */
77 /* be unreliable if something is already mapped at the address. */
79 # define GC_MMAP_FLAGS MAP_PRIVATE
84 # define OPT_MAP_ANON MAP_ANONYMOUS
86 # define OPT_MAP_ANON MAP_ANON
89 # define OPT_MAP_ANON 0
92 static volatile AO_t mmap_enabled = 0;
95 AO_malloc_enable_mmap(void)
98 AO_store_release(&mmap_enabled, 1);
99 /* Workaround for Sun CC */
101 AO_store(&mmap_enabled, 1);
105 static char *get_mmaped(size_t sz)
108 # ifdef USE_MMAP_ANON
114 assert(!(sz & (CHUNK_SIZE - 1)));
118 # ifndef USE_MMAP_ANON
119 zero_fd = open("/dev/zero", O_RDONLY);
123 result = mmap(0, sz, PROT_READ | PROT_WRITE,
124 GC_MMAP_FLAGS | OPT_MAP_ANON, zero_fd, 0/* offset */);
125 # ifndef USE_MMAP_ANON
128 if (result == MAP_FAILED)
133 /* Allocate an object of size (incl. header) of size > CHUNK_SIZE. */
134 /* sz includes space for an AO_t-sized header. */
136 AO_malloc_large(size_t sz)
139 /* The header will force us to waste ALIGNMENT bytes, incl. header. */
141 /* Round to multiple of CHUNK_SIZE. */
142 sz = (sz + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
143 result = get_mmaped(sz);
144 if (result == 0) return 0;
146 ((AO_t *)result)[-1] = (AO_t)sz;
151 AO_free_large(char * p)
153 AO_t sz = ((AO_t *)p)[-1];
154 if (munmap(p - ALIGNMENT, (size_t)sz) != 0)
155 abort(); /* Programmer error. Not really async-signal-safe, but ... */
162 AO_malloc_enable_mmap(void)
166 static char *get_mmaped(size_t sz)
172 AO_malloc_large(size_t sz)
178 AO_free_large(char * p)
180 abort(); /* Programmer error. Not really async-signal-safe, but ... */
193 initial_ptr = (char *)AO_load(&initial_heap_ptr);
194 my_chunk_ptr = (char *)(((AO_t)initial_ptr + (ALIGNMENT - 1))
196 if (initial_ptr != my_chunk_ptr)
198 /* Align correctly. If this fails, someone else did it for us. */
199 AO_compare_and_swap_acquire(&initial_heap_ptr, (AO_t)initial_ptr,
202 my_lim = my_chunk_ptr + CHUNK_SIZE;
203 if (my_lim <= initial_heap_lim)
205 if (!AO_compare_and_swap(&initial_heap_ptr, (AO_t)my_chunk_ptr,
210 /* We failed. The initial heap is used up. */
211 my_chunk_ptr = get_mmaped(CHUNK_SIZE);
212 assert (!((AO_t)my_chunk_ptr & (ALIGNMENT-1)));
216 /* Object free lists. Ith entry corresponds to objects */
217 /* of total size 2**i bytes. */
218 AO_stack_t AO_free_list[LOG_MAX_SIZE+1];
220 /* Chunk free list, linked through first word in chunks. */
221 /* All entries of size CHUNK_SIZE. */
222 AO_stack_t AO_chunk_free_list;
224 /* Break up the chunk, and add it to the object free list for */
225 /* the given size. Sz must be a power of two. */
226 /* We have exclusive access to chunk. */
228 add_chunk_as(void * chunk, size_t sz, unsigned log_sz)
230 char *first = (char *)chunk + ALIGNMENT - sizeof(AO_t);
231 char *limit = (char *)chunk + CHUNK_SIZE - sz;
234 for (p = first; p <= limit; p = next) {
236 AO_stack_push(AO_free_list+log_sz, (AO_t *)p);
240 static int msbs[16] = {0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4};
242 /* Return the position of the most significant set bit in the */
244 /* We follow the conventions of ffs(), i.e. the least */
245 /* significant bit is number one. */
250 if ((s & 0xff) != s) {
251 /* The following shift often generates warnings on 32-bit arch's */
252 /* That's OK, because it will never be executed there. */
253 /* Doing the shift only in a conditional expression suppresses the */
254 /* warning with the modern compilers. */
255 if (sizeof(size_t) > 4 && (v = s >> 32) != 0)
284 size_t adj_sz = sz + sizeof(AO_t);
287 return AO_malloc_large(sz);
288 log_sz = msb(adj_sz-1);
289 result = AO_stack_pop(AO_free_list+log_sz);
290 while (0 == result) {
291 void * chunk = get_chunk();
292 if (0 == chunk) return 0;
293 adj_sz = 1 << log_sz;
294 add_chunk_as(chunk, adj_sz, log_sz);
295 result = AO_stack_pop(AO_free_list+log_sz);
298 # ifdef AO_TRACE_MALLOC
299 fprintf(stderr, "%x: AO_malloc(%lu) = %p\n",
300 (int)pthread_self(), (unsigned long)sz, result+1);
308 char *base = (char *)p - sizeof(AO_t);
312 log_sz = *(AO_t *)base;
313 # ifdef AO_TRACE_MALLOC
314 fprintf(stderr, "%x: AO_free(%p sz:%lu)\n", (int)pthread_self(), p,
315 (unsigned long)(log_sz > LOG_MAX_SIZE? log_sz : (1 << log_sz)));
317 if (log_sz > LOG_MAX_SIZE)
320 AO_stack_push(AO_free_list+log_sz, (AO_t *)base);