1 ;*****************************************************************************
2 ; AMD Generic Encapsulated Software Architecture
4 ; Workfile: cpcarmac.inc $Revision:: 50472 $ $Date:: 2011-04-11 01:57:56 -0600 (Mon, 11 Apr 2011) $
6 ; Description: Code to setup and break down cache-as-stack
8 ;*****************************************************************************
10 ; Copyright (C) 2012 Advanced Micro Devices, Inc.
11 ; All rights reserved.
13 ; Redistribution and use in source and binary forms, with or without
14 ; modification, are permitted provided that the following conditions are met:
15 ; * Redistributions of source code must retain the above copyright
16 ; notice, this list of conditions and the following disclaimer.
17 ; * Redistributions in binary form must reproduce the above copyright
18 ; notice, this list of conditions and the following disclaimer in the
19 ; documentation and/or other materials provided with the distribution.
20 ; * Neither the name of Advanced Micro Devices, Inc. nor the names of
21 ; its contributors may be used to endorse or promote products derived
22 ; from this software without specific prior written permission.
24 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
25 ; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 ; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 ; DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. BE LIABLE FOR ANY
28 ; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 ; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 ; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31 ; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 ;*****************************************************************************
43 ;======================================================================
44 ; AMD_ENABLE_STACK: Setup a stack
47 ; EBX = Return address (preserved)
50 ; SS:ESP - Our new private stack location
53 ; EDX = Return status code if EAX contains a return code of higher
54 ; severity than AGESA_SUCCESS
55 ; ECX = Stack size in bytes
58 ; * This routine presently is limited to a max of 64 processor cores
62 ; eax, ecx, edx, edi, esi, ds, es, ss, esp
66 ; Fixed MTRR address allocation to cores:
67 ; The BSP gets 64K of stack, Core0 of each node gets 16K of stack, all other cores get 4K.
68 ; There is a max of 1 BSP, 7 core0s and 56 other cores.
69 ; Although each core has it's own cache storage, they share the address space. Each core must
70 ; be assigned a private and unique address space for its stack. To support legacy systems,
71 ; the stack needs to be within the legacy address space (1st 1Meg). Room must also be reserved
72 ; for the other legacy elements (Interrupt vectors, BIOS ROM, video buffer, etc.)
74 ; 80000h 40000h 00000h
75 ; +----------+----------+----------+----------+----------+----------+----------+----------+
76 ; 64K | | | | | | | | | 64K ea
77 ; ea +----------+----------+----------+----------+----------+----------+----------+----------+
78 ; | MTRR 0000_0250 MTRRfix64K_00000 |
79 ; +----------+----------+----------+----------+----------+----------+----------+----------+
80 ; | 7 , 6 | 5 , 4 | 3 , 2 | 1 , 0 | 0 | | | | <-node
81 ; |7..1,7..1 |7..1,7..1 |7..1,7..1 |7..1,7..1 | 0 | | | | <-core
82 ; +----------+----------+----------+----------+----------+----------+----------+----------+
84 ; C0000h B0000h A0000h 90000h 80000h
85 ; +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
86 ;16K | | | | | | | | | | | | | | | | |
87 ; ea +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
88 ; | MTRR 0259 MTRRfix16K_A0000 | MTRR 0258 MTRRfix16K_80000 |
89 ; +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
90 ; | > Dis|play B|uffer | < | | | | | 7 | 6 | 5 | 4 | 3 | 2 | 1 | | <-node
91 ; | > T| e m |p o r |a r y | B u |f f e |r A |r e a<| 0 | 0 | 0 | 0 | 0 | 0 | 0 | | <-core
92 ; +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
94 ; E0000h D0000h C0000h
95 ; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
96 ; 4K | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 4K ea
97 ; ea +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
98 ; | 026B MTRRfix4K_D8000 | 026A MTRRfix4K_D0000 | 0269 MTRRfix4K_C8000 | 0268 MTRRfix4K_C0000 |
99 ; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
100 ; | | | | | | | | | | | | | | | | | >| V| I| D| E| O| |B |I |O |S | |A |r |e |a<|
101 ; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
103 ; 100000h F0000h E0000h
104 ; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
105 ; | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 4K ea
106 ; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
107 ; | 026F MTRRfix4K_F8000 | 026E MTRRfix4K_F0000 | 026D MTRRfix4K_E8000 | 026C MTRRfix4K_E0000 |
108 ; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
109 ; | >|MA|IN| B|IO|S |RA|NG|E | | | | | | |< | >|EX|TE|ND|ED| B|IO|S |ZO|NE| | | | | |< |
110 ; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
111 ;======================================================================
112 AMD_ENABLE_STACK MACRO
113 local AmdEnableStackExit
115 ; Note that SS:ESP will be default stack. Note that this stack
116 ; routine will not be used after memory has been initialized. Because
117 ; of its limited lifetime, it will not conflict with typical PCI devices.
118 movd mm0, ebx ; Put return address in a safe place
119 movd mm1, ebp ; Save some other user registers
121 ; get node id and core id of current executing core
122 GET_NODE_ID_CORE_ID ; Sets ESI[15,8]= Node#; ESI[7,0]= core# (relative to node)
123 ; Note: ESI[31:24] are used for flags: Unrecognized Family, Is_Primary core, Stack already established
125 ; If we detected an unknown processor family or core combination, return AGESA_FATAL.
126 .if (esi & (1 SHL FLAG_UNKNOWN_FAMILY))
127 mov edx, CPU_EVENT_UNKNOWN_PROCESSOR_FAMILY
129 jmp AmdEnableStackExit
130 .elseif (esi & (1 SHL FLAG_CORE_NOT_IDENTIFIED))
131 mov edx, CPU_EVENT_CORE_NOT_IDENTIFIED
133 jmp AmdEnableStackExit
136 ; determine if stack is already enabled. We are using the DefType MSR for this determination.
137 ; It is =0 after reset; CAR setup sets it to enable the MTRRs
138 mov eax, cr0 ; Is cache enabled? (CD or NW bit set)
139 CR0_MASK TEXTEQU %((1 SHL CR0_CD) OR (1 SHL CR0_NW))
140 .if (!(eax & CR0_MASK))
141 mov ecx, AMD_MTRR_DEFTYPE ; MSR:0000_02FF
142 _RDMSR ; Are either of the default types enabled? (MTRR_DEF_TYPE_EN + MTRR_DEF_TYPE_FIX_EN)
143 MSR_MASK TEXTEQU %((1 SHL MTRR_DEF_TYPE_EN)+(1 SHL MTRR_DEF_TYPE_FIX_EN))
145 bts esi, FLAG_STACK_REENTRY ; indicate stack has already been initialized
149 ; Set node to map the first 16MB to node 0; 0000_0000 to 00FF_FFFF as DRAM
150 mov ebx, esi ; Get my Node/Core info
152 shl bh, 3 ; Isolate my node#, match alignment for PCI Dev#
153 mov eax, 8000C144h ; D18F1x44:DRAM Base/Limit; N is Base, N+4 is Limit
155 mov ebx, eax ; Save PCI address for Base/Limit pair
160 xor eax, eax ; Least Significant bit is AD24 so 0 sets mask of 00FF_FFFF (16MB)
161 out dx, eax ; DRAM Limit = node0, no interleave
164 sub eax, 4 ; Now point to the Base register
168 mov eax, 00000003h ; Set the read and write enable bits
169 out dx, eax ; DRAM Base = 0x0000, R/W
171 AMD_ENABLE_STACK_FAMILY_HOOK
173 ; Init CPU MSRs for our init routines
174 mov ecx, MTRR_SYS_CFG ; SYS_CFG
176 bts eax, MTRR_FIX_DRAM_MOD_EN ; Turn on modification enable bit
180 bt eax, FLAG_STACK_REENTRY ; Is this a 2nd entry?
181 .if (!carry?) ; On a re-entry, do not clear MTRRs or reset TOM; just reset the stack SS:ESP
182 bt eax, FLAG_IS_PRIMARY ; Is this core the primary in a compute unit?
183 .if (carry?) ; Families using shared groups do not need to clear the MTRRs since that is done at power-on reset
184 ; Note: Relying on MSRs to be cleared to 0's at reset for families w/shared cores
185 ; Clear all variable and Fixed MTRRs for non-shared cores
186 mov ecx, AMD_MTRR_VARIABLE_BASE0
189 .while (cl != 10h) ; Variable MTRRphysBase[n] and MTRRphysMask[n]
193 mov cx, AMD_MTRR_FIX64k_00000 ; MSR:0000_0250
195 mov cx, AMD_MTRR_FIX16k_80000 ; MSR:0000_0258
197 mov cx, AMD_MTRR_FIX16k_A0000 ; MSR:0000_0259
199 mov cx, AMD_MTRR_FIX4k_C0000 ; Fixed 4Ks: MTRRfix4K_C0000 to MTRRfix4K_F8000
205 ; Set TOP_MEM (C001_001A) for non-shared cores to 16M. This will be increased at heap init.
206 ; - not strictly needed since the FixedMTRRs take presedence.
207 mov eax, (16 * 1024 * 1024)
208 mov ecx, TOP_MEM ; MSR:C001_001A
210 .endif ; End Is_Primary
211 .endif ; End Stack_ReEntry
213 ; Clear IORRs (C001_0016-19) and TOM2(C001_001D) for all cores
216 mov ecx, IORR_BASE ; MSR:C001_0016 - 0019
221 mov ecx, TOP_MEM2 ; MSR:C001_001D
224 ; setup MTTRs for stacks
225 ; A speculative read can be generated by a speculative fetch mis-aligned in a code zone
226 ; or due to a data zone being interpreted as code. When a speculative read occurs outside a
227 ; controlled region (intentionally used by software), it could cause an unwanted cache eviction.
228 ; To prevent speculative reads from causing an eviction, the unused cache ranges are set
229 ; to UC type. Only the actively used regions (stack, heap) are reflected in the MTRRs.
230 ; Note: some core stack regions will share an MTRR since the control granularity is much
231 ; larger than the allocated stack zone. The allocation algorithm must account for this 'extra'
232 ; space covered by the MTRR when parseling out cache space for the various uses. In some cases
233 ; this could reduce the amount of EXE cache available to a core. see cpuCacheInit.c
235 ; Outcome of this block is that: (Note the MTRR map at the top of the file)
236 ; ebp - start address of stack block
237 ; ebx - [31:16] - MTRR MSR address
238 ; - [15:8] - slot# in MTRR register
239 ; - [7:0] - block size in #4K blocks
240 ; review: ESI[31:24]=Flags; SI[15,8]= Node#; SI[7,0]= core# (relative to node)
243 mov eax, esi ; Load Flags, node, core
244 .if (al == 0) ; Is a core 0?
245 .if (ah == 0) ; Is Node 0? (BSP)
246 ; Is BSP, assign a 64K stack; for F10/F12, foce to a 32K stack
247 mov ebx, ((AMD_MTRR_FIX64k_00000 SHL 16) + (3 SHL 8) + (BSP_STACK_SIZE_64K / 1000h))
248 bt eax, FLAG_FORCE_32K_STACK
250 mov ebx, ((AMD_MTRR_FIX64k_00000 SHL 16) + (3 SHL 8) + (BSP_STACK_SIZE_32K / 1000h))
252 mov ebp, BSP_STACK_BASE_ADDR
253 .else ; node 1 to 7, core0
254 ; Is a Core0 of secondary node, assign 16K stacks
255 mov bx, AMD_MTRR_FIX16k_80000
257 mov bh, ah ; Node# is used as slot#
258 mov bl, (CORE0_STACK_SIZE / 1000h)
259 mov al, ah ; Base = (Node# * Size);
262 shl eax, 12 ; Expand back to full byte count (* 4K)
263 add eax, CORE0_STACK_BASE_ADDR
266 .else ;core 1 thru core 7
267 ; Is core 1-7 of any node, assign 4K stacks
268 mov al, 8 ; CoreIndex = ( (Node# * 8) ...
271 add al, bl ; ... + Core#);
273 mov bx, AMD_MTRR_FIX64k_00000
275 mov bh, al ; Slot# = (CoreIndex / 16) + 4;
278 mov bl, (CORE1_STACK_SIZE / 1000h)
280 mul bl ; Base = ( (CoreIndex * Size) ...
282 shl eax, 12 ; Expand back to full byte count (* 4K)
283 add eax, CORE1_STACK_BASE_ADDR ; ... + Base_Addr);
287 ; Now set the MTRR. Add this to already existing settings (don't clear any MTRR)
288 mov edi, WB_DRAM_TYPE ; Load Cache type in 1st slot
289 mov cl, bh ; ShiftCount = ((slot# ...
290 and cl, 03h ; ... % 4) ...
291 shl cl, 3 ; ... * 8);
292 shl edi, cl ; Cache type is now in correct position
293 ror ebx, 16 ; Get the MTRR address
295 rol ebx, 16 ; Put slot# & size back in BX
296 _RDMSR ; Read-modify-write the MSR
297 .if (bh < 4) ; Is value in lower or upper half of MSR?
304 ; Enable MTRR defaults as UC type
305 mov ecx, AMD_MTRR_DEFTYPE ; MSR:0000_02FF
306 _RDMSR ; Read-modify-write the MSR
307 bts eax, MTRR_DEF_TYPE_EN ; MtrrDefTypeEn
308 bts eax, MTRR_DEF_TYPE_FIX_EN ; MtrrDefTypeFixEn
311 ; Close the modification window on the Fixed MTRRs
312 mov ecx, MTRR_SYS_CFG ; MSR:0C001_0010
314 bts eax, MTRR_FIX_DRAM_EN ; MtrrFixDramEn
315 bts eax, MTRR_VAR_DRAM_EN ; variable MTRR enable bit
316 btr eax, MTRR_FIX_DRAM_MOD_EN ; Turn off modification enable bit
319 ; Enable caching in CR0
320 mov eax, CR0 ; Enable WT/WB cache
321 btr eax, CR0_PG ; Make sure paging is disabled
322 btr eax, CR0_CD ; Clear CR0 NW and CD
326 ; Use the Stack Base & size to calculate SS and ESP values
328 ; esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
329 ; ebp - start address of stack block
330 ; ebx - [31:16] - MTRR MSR address
331 ; - [15:8] - slot# in MTRR register
332 ; - [7:0] - block size in #4K blocks
334 mov esp, ebp ; Initialize the stack pointer
335 mov edi, esp ; Copy the stack start to edi
337 movzx ebx, bx ; Clear upper ebx, don't need MSR addr anymore
338 shl ebx, 12 ; Make size full byte count (* 4K)
339 add esp, ebx ; Set the Stack Pointer as full linear address
343 ; esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
344 ; edi - 32b start address of stack block
345 ; ebx - size of stack block
346 ; esp - 32b linear stack pointer
349 ; Determine mode for SS base;
350 mov ecx, CR0 ; Check for 32-bit protect mode
352 .if (!carry?) ; PE=0 means real mode
354 .if (cx >= 0D000h) ; If CS >= D000, it's a real mode segment. PM selector would be 08-> 1000
355 ; alter SS:ESP for 16b Real Mode:
357 shr eax, 4 ; Create a Real Mode segment for ss, ds, es
362 sub edi, eax ; Adjust the clearing pointer for Seg:Offset mode
363 mov esp, ebx ; Make SP an offset from SS
367 ; Default is to use Protected 32b Mode
371 ; Now that we have set the location and the MTRRs, initialize the cache by
372 ; reading then writing to zero all of the stack area.
375 ; esp - stack pointer
376 ; ebx - size of stack block
377 ; esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
378 ; edi - address of start of stack block
381 mov cx, bx ; set cx for size count of DWORDS
382 ; Check our flags - Don't clear an existing stack
383 .if ( !(esi & (1 SHL FLAG_STACK_REENTRY)) )
386 rep lods DWORD PTR [esi] ; Pre-load the range
389 mov esi, edi ; Preserve base for push on stack
390 rep stos DWORD PTR [edi] ; Clear the range
391 mov DWORD PTR [esp], 0ABCDDCBAh ; Put marker in top stack dword
392 shl ebx, 2 ; Put stack size and base
393 push ebx ; in top of stack
396 mov ecx, ebx ; Return size of stack in bytes
397 mov eax, AGESA_SUCCESS ; eax = AGESA_SUCCESS : no error return code
400 shl ecx, 2 ; Return size of stack, in bytes
401 mov edx, CPU_EVENT_STACK_REENTRY
402 mov eax, AGESA_WARNING ; eax = AGESA_WARNING (Stack has already been set up)
406 movd ebx, mm0 ; Restore return address
410 ;======================================================================
411 ; AMD_DISABLE_STACK: Destroy the stack inside the cache. This routine
412 ; should only be executed on the BSP
418 ; EAX = AGESA_SUCCESS
423 ; eax, ecx, edx, esp, mmx5
424 ;======================================================================
425 AMD_DISABLE_STACK MACRO
427 mov esp, ebx ; Save return address
429 ; get node/core/flags of current executing core
430 GET_NODE_ID_CORE_ID ; Sets ESI[15,8]= Node#; ESI[7,0]= core# (relative to node)
432 ; Turn on modification enable bit
433 mov ecx, MTRR_SYS_CFG ; MSR:C001_0010
435 bts eax, MTRR_FIX_DRAM_MOD_EN ; Enable modifications
438 ; Set lower 640K MTRRs for Write-Back memory caching
439 mov ecx, AMD_MTRR_FIX64k_00000
442 _WRMSR ; 0 - 512K = WB Mem
443 mov ecx, AMD_MTRR_FIX16k_80000
444 _WRMSR ; 512K - 640K = WB Mem
446 ; Turn off modification enable bit
447 mov ecx, MTRR_SYS_CFG ; MSR:C001_0010
449 btr eax, MTRR_FIX_DRAM_MOD_EN ; Disable modification
452 AMD_DISABLE_STACK_FAMILY_HOOK ; Re-Enable 'normal' cache operations
454 mov ebx, esp ; restore return address (ebx)