1 #define ALIGNMENT 3 /* 64 bit */
3 /* Sorry, I couldn't use a while loop, because my compiler (gcc) on an
4 Alpha inserts useless "nop" and "unop" instructions into the inner
5 loop. Additionally it reorders basic blocks such that a block from
6 within the loop is branched to and branched back from (adding to
9 Could anyone explain me why?
11 Apparently the later problem is related to using "continue"; if I use
12 explicit "goto"s, I'm left with _only_ the unnecessary "nop" and
13 "unop"; that isn't acceptable either... */
15 /* So why did I do this? Well, this can be easily explained... using the
16 data from one of our test runs we found:
18 >> LOG: 9301464 bytes for 196724 objects allocated.
19 >> LOG: 15 garbage collections performed.
20 >> LOG: 6568440 heapblocks visited, 469249 objects marked
21 >> LOG: 1064447 visits to objects already marked.
22 >> LOG: 988270 potential references not aligned.
23 >> LOG: 4049446 out of heap.
24 >> LOG: 5236 not an object.
26 So how much did we save? Eliminating the inner loop inefficiencies saves
27 at least 6568440 * 2 instruction (1 nop, 1 unop), folding the test for
28 non-objects and marked objects into one saves about 1000000 array accesses,
29 shifts, compares and untaken branches. The pseudo-recursion saves around
30 15 instructions (mostly load/stores for saved registers) for every one of
31 the 469249 marked objects. Adding this up we would save about 25000000
32 instructions (and that's a careful estimate), most of them loads or stores.
35 /* gc_mark_object_at now contains assembler code: This became necessary
36 to get rid of the register saving overhead (callee-saved registers)
37 involved in calling the recursive functions. -- phil. */
39 /* Important note (1):
40 I use the low bit of the "addr" stored on the stack to indicate whether
41 this is the outermost loop (the actual function and not a pseudo-recursion):
42 this bit is clear, if this is a recursion, but set when it is the outermost
43 function. In the latter case the value retrieved is not used, so I don't
44 need to align it. -- phil. */
47 Weird... apparently the alpha compiler backend scatters rather useless
48 instructions throughout the code...
50 "pattern |= (0x3 << offset);" yields:
52 54: 01 74 e0 47 mov 0x3,t0
53 58: 21 07 22 48 sll t0,t1,t0
54 5c: 01 00 3f 40 addl t0,zero,t0
55 60: 04 04 81 44 or t3,t0,t3
57 That's rather depressing... either I am simply dumb, or the "addl"
58 instruction shouldn't be there. Sigh. -- phil. */
60 void gc_mark_object_at(void** addr, unsigned long* bitmap, void* heap_base_reg, void* heap_size_reg)
62 register long pattern;
64 register long* pattern_addr;
65 register void* deref_addr;
68 register long constant_one;
70 if ((unsigned long)addr & ((1 << ALIGNMENT) - 1))
73 if (((unsigned long)addr - (unsigned long)heap_base_reg) >= (unsigned long)heap_size_reg)
76 pattern_addr = &bitmap[(unsigned long)addr >> 8];
77 pattern = *pattern_addr;
78 offset = ((unsigned long)addr >> 2) & ((1 << 5) - 2);
86 /* grow stack & mark as outermost loop */
87 asm("subq $30, %0, $30" : : "i"(sizeof(void*)) : "$30");
88 asm("stq %0, %1 ($30)" : : "r"(constant_one), "i"(0) : "memory");
92 *pattern_addr = pattern | (constant_one << offset);
95 end = addr + *(long*)addr; /* FIXME: insert the real code necessary */
104 if ((unsigned long)(deref_addr) & ((1 << ALIGNMENT) - 1))
107 if (((unsigned long)addr - (unsigned long)heap_base_reg) >= (unsigned long)heap_size_reg)
110 pattern_addr = &bitmap[(unsigned long)(deref_addr) >> 8];
111 pattern = *pattern_addr;
112 offset = ((unsigned long)addr >> 2) & ((1 << 5) - 2);
119 asm("subq $30, %0, $30" : : "i"(2 * sizeof(void*)) : "$30");
121 /* store the current context on the stack: addr, end */
122 asm("stq %0, %1 ($30)" : : "r"(addr), "i"(0) : "memory" );
123 asm("stq %0, %1 ($30)" : : "r"(end), "i"(sizeof(void*)) : "memory" );
130 /* load the addr field of the previous context */
131 asm("ldq %0, %1 ($30)" : "=r"(addr) : "i"(0) : "memory");
133 /* check whether we are returning from the outermost pseudo-recursion */
137 /* load the end field of the previous context */
138 asm("ldq %0, %1 ($30)" : "=r"(end) : "i"(sizeof(void*)) : "memory" );
140 /* shrink the stack */
141 asm("addq $30, %0, $30" : : "i"(2 * sizeof(void*)) : "$30");
143 /* continue the loop in this context */
147 /* pop the no_recursion flag */
148 asm("addq $30, %0, $30" : : "i"(sizeof(void*)) : "$30");
153 * These are local overrides for various environment variables in Emacs.
154 * Please do not remove this and leave it at the end of the file, where
155 * Emacs will automagically detect them.
156 * ---------------------------------------------------------------------
159 * indent-tabs-mode: t