blob: 24a607377cc88e70ba08e91746ce8769c9ca0b48 [file] [log] [blame]
Kerry Shehe8689ed2012-01-20 13:57:48 +08001;*****************************************************************************
2; AMD Generic Encapsulated Software Architecture
3;
4; Workfile: cpcarmac.inc $Revision:: 50472 $ $Date:: 2011-04-11 01:57:56 -0600 (Mon, 11 Apr 2011) $
5;
6; Description: Code to setup and break down cache-as-stack
7;
8;*****************************************************************************
9;
10; Copyright (C) 2012 Advanced Micro Devices, Inc.
11; All rights reserved.
12;
13; Redistribution and use in source and binary forms, with or without
14; modification, are permitted provided that the following conditions are met:
15; * Redistributions of source code must retain the above copyright
16; notice, this list of conditions and the following disclaimer.
17; * Redistributions in binary form must reproduce the above copyright
18; notice, this list of conditions and the following disclaimer in the
19; documentation and/or other materials provided with the distribution.
20; * Neither the name of Advanced Micro Devices, Inc. nor the names of
21; its contributors may be used to endorse or promote products derived
22; from this software without specific prior written permission.
23;
24; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
25; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27; DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. BE LIABLE FOR ANY
28; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34;
35;*****************************************************************************
36
37 .XLIST
38 INCLUDE cpcar.inc
39 .LIST
40 .586P
41 .mmx
42
43;======================================================================
44; AMD_ENABLE_STACK: Setup a stack
45;
46; In:
47; EBX = Return address (preserved)
48;
49; Out:
50; SS:ESP - Our new private stack location
51;
52; EAX = AGESA_STATUS
53; EDX = Return status code if EAX contains a return code of higher
54; severity than AGESA_SUCCESS
55; ECX = Stack size in bytes
56;
57; Requirements:
58; * This routine presently is limited to a max of 64 processor cores
59; Preserved:
60; ebx ebp
61; Destroyed:
62; eax, ecx, edx, edi, esi, ds, es, ss, esp
63; mmx0, mmx1, mmx5
64;
65; Description:
66; Fixed MTRR address allocation to cores:
67; The BSP gets 64K of stack, Core0 of each node gets 16K of stack, all other cores get 4K.
68; There is a max of 1 BSP, 7 core0s and 56 other cores.
69; Although each core has it's own cache storage, they share the address space. Each core must
70; be assigned a private and unique address space for its stack. To support legacy systems,
71; the stack needs to be within the legacy address space (1st 1Meg). Room must also be reserved
72; for the other legacy elements (Interrupt vectors, BIOS ROM, video buffer, etc.)
73;
74; 80000h 40000h 00000h
75; +----------+----------+----------+----------+----------+----------+----------+----------+
76; 64K | | | | | | | | | 64K ea
77; ea +----------+----------+----------+----------+----------+----------+----------+----------+
78; | MTRR 0000_0250 MTRRfix64K_00000 |
79; +----------+----------+----------+----------+----------+----------+----------+----------+
80; | 7 , 6 | 5 , 4 | 3 , 2 | 1 , 0 | 0 | | | | <-node
81; |7..1,7..1 |7..1,7..1 |7..1,7..1 |7..1,7..1 | 0 | | | | <-core
82; +----------+----------+----------+----------+----------+----------+----------+----------+
83;
84; C0000h B0000h A0000h 90000h 80000h
85; +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
86;16K | | | | | | | | | | | | | | | | |
87; ea +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
88; | MTRR 0259 MTRRfix16K_A0000 | MTRR 0258 MTRRfix16K_80000 |
89; +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
90; | > Dis|play B|uffer | < | | | | | 7 | 6 | 5 | 4 | 3 | 2 | 1 | | <-node
91; | > T| e m |p o r |a r y | B u |f f e |r A |r e a<| 0 | 0 | 0 | 0 | 0 | 0 | 0 | | <-core
92; +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+
93;
94; E0000h D0000h C0000h
95; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
96; 4K | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 4K ea
97; ea +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
98; | 026B MTRRfix4K_D8000 | 026A MTRRfix4K_D0000 | 0269 MTRRfix4K_C8000 | 0268 MTRRfix4K_C0000 |
99; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
100; | | | | | | | | | | | | | | | | | >| V| I| D| E| O| |B |I |O |S | |A |r |e |a<|
101; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
102;
103; 100000h F0000h E0000h
104; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
105; | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 4K ea
106; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
107; | 026F MTRRfix4K_F8000 | 026E MTRRfix4K_F0000 | 026D MTRRfix4K_E8000 | 026C MTRRfix4K_E0000 |
108; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
109; | >|MA|IN| B|IO|S |RA|NG|E | | | | | | |< | >|EX|TE|ND|ED| B|IO|S |ZO|NE| | | | | |< |
110; +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
111;======================================================================
112AMD_ENABLE_STACK MACRO
113 local AmdEnableStackExit
114
115; Note that SS:ESP will be default stack. Note that this stack
116; routine will not be used after memory has been initialized. Because
117; of its limited lifetime, it will not conflict with typical PCI devices.
118 movd mm0, ebx ; Put return address in a safe place
119 movd mm1, ebp ; Save some other user registers
120
121 ; get node id and core id of current executing core
122 GET_NODE_ID_CORE_ID ; Sets ESI[15,8]= Node#; ESI[7,0]= core# (relative to node)
123 ; Note: ESI[31:24] are used for flags: Unrecognized Family, Is_Primary core, Stack already established
124
125 ; If we detected an unknown processor family or core combination, return AGESA_FATAL.
126 .if (esi & (1 SHL FLAG_UNKNOWN_FAMILY))
127 mov edx, CPU_EVENT_UNKNOWN_PROCESSOR_FAMILY
128 mov eax, AGESA_FATAL
129 jmp AmdEnableStackExit
130 .elseif (esi & (1 SHL FLAG_CORE_NOT_IDENTIFIED))
131 mov edx, CPU_EVENT_CORE_NOT_IDENTIFIED
132 mov eax, AGESA_FATAL
133 jmp AmdEnableStackExit
134 .endif
135
136 ; determine if stack is already enabled. We are using the DefType MSR for this determination.
137 ; It is =0 after reset; CAR setup sets it to enable the MTRRs
138 mov eax, cr0 ; Is cache enabled? (CD or NW bit set)
139 CR0_MASK TEXTEQU %((1 SHL CR0_CD) OR (1 SHL CR0_NW))
140 .if (!(eax & CR0_MASK))
141 mov ecx, AMD_MTRR_DEFTYPE ; MSR:0000_02FF
142 _RDMSR ; Are either of the default types enabled? (MTRR_DEF_TYPE_EN + MTRR_DEF_TYPE_FIX_EN)
143 MSR_MASK TEXTEQU %((1 SHL MTRR_DEF_TYPE_EN)+(1 SHL MTRR_DEF_TYPE_FIX_EN))
144 .if (eax & MSR_MASK)
145 bts esi, FLAG_STACK_REENTRY ; indicate stack has already been initialized
146 .endif
147 .endif
148
149 ; Set node to map the first 16MB to node 0; 0000_0000 to 00FF_FFFF as DRAM
150 mov ebx, esi ; Get my Node/Core info
151 xor bl, bl
152 shl bh, 3 ; Isolate my node#, match alignment for PCI Dev#
153 mov eax, 8000C144h ; D18F1x44:DRAM Base/Limit; N is Base, N+4 is Limit
154 add ah, bh
155 mov ebx, eax ; Save PCI address for Base/Limit pair
156
157 mov dx, 0CF8h
158 out dx, eax
159 add dx, 4
160 xor eax, eax ; Least Significant bit is AD24 so 0 sets mask of 00FF_FFFF (16MB)
161 out dx, eax ; DRAM Limit = node0, no interleave
162
163 mov eax, ebx
164 sub eax, 4 ; Now point to the Base register
165 mov dx, 0CF8h
166 out dx, eax
167 add dx, 4
168 mov eax, 00000003h ; Set the read and write enable bits
169 out dx, eax ; DRAM Base = 0x0000, R/W
170
171 AMD_ENABLE_STACK_FAMILY_HOOK
172
173 ; Init CPU MSRs for our init routines
174 mov ecx, MTRR_SYS_CFG ; SYS_CFG
175 _RDMSR
176 bts eax, MTRR_FIX_DRAM_MOD_EN ; Turn on modification enable bit
177 _WRMSR
178
179 mov eax, esi
180 bt eax, FLAG_STACK_REENTRY ; Is this a 2nd entry?
181 .if (!carry?) ; On a re-entry, do not clear MTRRs or reset TOM; just reset the stack SS:ESP
182 bt eax, FLAG_IS_PRIMARY ; Is this core the primary in a compute unit?
183 .if (carry?) ; Families using shared groups do not need to clear the MTRRs since that is done at power-on reset
184 ; Note: Relying on MSRs to be cleared to 0's at reset for families w/shared cores
185 ; Clear all variable and Fixed MTRRs for non-shared cores
186 mov ecx, AMD_MTRR_VARIABLE_BASE0
187 xor eax, eax
188 xor edx, edx
189 .while (cl != 10h) ; Variable MTRRphysBase[n] and MTRRphysMask[n]
190 _WRMSR
191 inc cl
192 .endw
193 mov cx, AMD_MTRR_FIX64k_00000 ; MSR:0000_0250
194 _WRMSR
195 mov cx, AMD_MTRR_FIX16k_80000 ; MSR:0000_0258
196 _WRMSR
197 mov cx, AMD_MTRR_FIX16k_A0000 ; MSR:0000_0259
198 _WRMSR
199 mov cx, AMD_MTRR_FIX4k_C0000 ; Fixed 4Ks: MTRRfix4K_C0000 to MTRRfix4K_F8000
200 .while (cl != 70h)
201 _WRMSR
202 inc cl
203 .endw
204
205 ; Set TOP_MEM (C001_001A) for non-shared cores to 16M. This will be increased at heap init.
206 ; - not strictly needed since the FixedMTRRs take presedence.
207 mov eax, (16 * 1024 * 1024)
208 mov ecx, TOP_MEM ; MSR:C001_001A
209 _WRMSR
210 .endif ; End Is_Primary
211 .endif ; End Stack_ReEntry
212
213 ; Clear IORRs (C001_0016-19) and TOM2(C001_001D) for all cores
214 xor eax, eax
215 xor edx, edx
216 mov ecx, IORR_BASE ; MSR:C001_0016 - 0019
217 .while (cl != 1Ah)
218 _WRMSR
219 inc cl
220 .endw
221 mov ecx, TOP_MEM2 ; MSR:C001_001D
222 _WRMSR
223
224 ; setup MTTRs for stacks
225 ; A speculative read can be generated by a speculative fetch mis-aligned in a code zone
226 ; or due to a data zone being interpreted as code. When a speculative read occurs outside a
227 ; controlled region (intentionally used by software), it could cause an unwanted cache eviction.
228 ; To prevent speculative reads from causing an eviction, the unused cache ranges are set
229 ; to UC type. Only the actively used regions (stack, heap) are reflected in the MTRRs.
230 ; Note: some core stack regions will share an MTRR since the control granularity is much
231 ; larger than the allocated stack zone. The allocation algorithm must account for this 'extra'
232 ; space covered by the MTRR when parseling out cache space for the various uses. In some cases
233 ; this could reduce the amount of EXE cache available to a core. see cpuCacheInit.c
234 ;
235 ; Outcome of this block is that: (Note the MTRR map at the top of the file)
236 ; ebp - start address of stack block
237 ; ebx - [31:16] - MTRR MSR address
238 ; - [15:8] - slot# in MTRR register
239 ; - [7:0] - block size in #4K blocks
240 ; review: ESI[31:24]=Flags; SI[15,8]= Node#; SI[7,0]= core# (relative to node)
241 ;
242
243 mov eax, esi ; Load Flags, node, core
244 .if (al == 0) ; Is a core 0?
245 .if (ah == 0) ; Is Node 0? (BSP)
246 ; Is BSP, assign a 64K stack; for F10/F12, foce to a 32K stack
247 mov ebx, ((AMD_MTRR_FIX64k_00000 SHL 16) + (3 SHL 8) + (BSP_STACK_SIZE_64K / 1000h))
248 bt eax, FLAG_FORCE_32K_STACK
249 .if (carry?)
250 mov ebx, ((AMD_MTRR_FIX64k_00000 SHL 16) + (3 SHL 8) + (BSP_STACK_SIZE_32K / 1000h))
251 .endif
252 mov ebp, BSP_STACK_BASE_ADDR
253 .else ; node 1 to 7, core0
254 ; Is a Core0 of secondary node, assign 16K stacks
255 mov bx, AMD_MTRR_FIX16k_80000
256 shl ebx, 16 ;
257 mov bh, ah ; Node# is used as slot#
258 mov bl, (CORE0_STACK_SIZE / 1000h)
259 mov al, ah ; Base = (Node# * Size);
260 mul bl ;
261 movzx eax, ax ;
262 shl eax, 12 ; Expand back to full byte count (* 4K)
263 add eax, CORE0_STACK_BASE_ADDR
264 mov ebp, eax
265 .endif
266 .else ;core 1 thru core 7
267 ; Is core 1-7 of any node, assign 4K stacks
268 mov al, 8 ; CoreIndex = ( (Node# * 8) ...
269 mul ah ;
270 mov bx, si ;
271 add al, bl ; ... + Core#);
272
273 mov bx, AMD_MTRR_FIX64k_00000
274 shl ebx, 16 ;
275 mov bh, al ; Slot# = (CoreIndex / 16) + 4;
276 shr bh, 4 ;
277 add bh, 4 ;
278 mov bl, (CORE1_STACK_SIZE / 1000h)
279
280 mul bl ; Base = ( (CoreIndex * Size) ...
281 movzx eax, ax ;
282 shl eax, 12 ; Expand back to full byte count (* 4K)
283 add eax, CORE1_STACK_BASE_ADDR ; ... + Base_Addr);
284 mov ebp, eax
285 .endif
286
287 ; Now set the MTRR. Add this to already existing settings (don't clear any MTRR)
288 mov edi, WB_DRAM_TYPE ; Load Cache type in 1st slot
289 mov cl, bh ; ShiftCount = ((slot# ...
290 and cl, 03h ; ... % 4) ...
291 shl cl, 3 ; ... * 8);
292 shl edi, cl ; Cache type is now in correct position
293 ror ebx, 16 ; Get the MTRR address
294 movzx ecx, bx ;
295 rol ebx, 16 ; Put slot# & size back in BX
296 _RDMSR ; Read-modify-write the MSR
297 .if (bh < 4) ; Is value in lower or upper half of MSR?
298 or eax, edi ;
299 .else ;
300 or edx, edi ;
301 .endif ;
302 _WRMSR ;
303
304 ; Enable MTRR defaults as UC type
305 mov ecx, AMD_MTRR_DEFTYPE ; MSR:0000_02FF
306 _RDMSR ; Read-modify-write the MSR
307 bts eax, MTRR_DEF_TYPE_EN ; MtrrDefTypeEn
308 bts eax, MTRR_DEF_TYPE_FIX_EN ; MtrrDefTypeFixEn
309 _WRMSR
310
311 ; Close the modification window on the Fixed MTRRs
312 mov ecx, MTRR_SYS_CFG ; MSR:0C001_0010
313 _RDMSR
314 bts eax, MTRR_FIX_DRAM_EN ; MtrrFixDramEn
315 bts eax, MTRR_VAR_DRAM_EN ; variable MTRR enable bit
316 btr eax, MTRR_FIX_DRAM_MOD_EN ; Turn off modification enable bit
317 _WRMSR
318
319 ; Enable caching in CR0
320 mov eax, CR0 ; Enable WT/WB cache
321 btr eax, CR0_PG ; Make sure paging is disabled
322 btr eax, CR0_CD ; Clear CR0 NW and CD
323 btr eax, CR0_NW
324 mov CR0, eax
325
326 ; Use the Stack Base & size to calculate SS and ESP values
327 ; review:
328 ; esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
329 ; ebp - start address of stack block
330 ; ebx - [31:16] - MTRR MSR address
331 ; - [15:8] - slot# in MTRR register
332 ; - [7:0] - block size in #4K blocks
333 ;
334 mov esp, ebp ; Initialize the stack pointer
335 mov edi, esp ; Copy the stack start to edi
336 movzx bx, bl
337 movzx ebx, bx ; Clear upper ebx, don't need MSR addr anymore
338 shl ebx, 12 ; Make size full byte count (* 4K)
339 add esp, ebx ; Set the Stack Pointer as full linear address
340 sub esp, 4
341 ;
342 ; review:
343 ; esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
344 ; edi - 32b start address of stack block
345 ; ebx - size of stack block
346 ; esp - 32b linear stack pointer
347 ;
348
349 ; Determine mode for SS base;
350 mov ecx, CR0 ; Check for 32-bit protect mode
351 bt ecx, CR0_PE ;
352 .if (!carry?) ; PE=0 means real mode
353 mov cx, cs ;
354 .if (cx >= 0D000h) ; If CS >= D000, it's a real mode segment. PM selector would be 08-> 1000
355 ; alter SS:ESP for 16b Real Mode:
356 mov eax, edi ;
357 shr eax, 4 ; Create a Real Mode segment for ss, ds, es
358 mov ss, ax ;
359 mov ds, ax ;
360 mov es, ax ;
361 shl eax, 4 ;
362 sub edi, eax ; Adjust the clearing pointer for Seg:Offset mode
363 mov esp, ebx ; Make SP an offset from SS
364 sub esp, 4 ;
365 .endif ; endif
366 ; else
367 ; Default is to use Protected 32b Mode
368 .endif
369 ;
370 ; Clear The Stack
371 ; Now that we have set the location and the MTRRs, initialize the cache by
372 ; reading then writing to zero all of the stack area.
373 ; review:
374 ; ss - Stack base
375 ; esp - stack pointer
376 ; ebx - size of stack block
377 ; esi[31:24]=Flags; esi[15,8]= Node#; esi[7,0]= core# (relative to node)
378 ; edi - address of start of stack block
379 ;
380 shr ebx, 2 ;
381 mov cx, bx ; set cx for size count of DWORDS
382 ; Check our flags - Don't clear an existing stack
383 .if ( !(esi & (1 SHL FLAG_STACK_REENTRY)) )
384 cld
385 mov esi, edi
386 rep lods DWORD PTR [esi] ; Pre-load the range
387 xor eax, eax
388 mov cx, bx
389 mov esi, edi ; Preserve base for push on stack
390 rep stos DWORD PTR [edi] ; Clear the range
391 mov DWORD PTR [esp], 0ABCDDCBAh ; Put marker in top stack dword
392 shl ebx, 2 ; Put stack size and base
393 push ebx ; in top of stack
394 push esi
395
396 mov ecx, ebx ; Return size of stack in bytes
397 mov eax, AGESA_SUCCESS ; eax = AGESA_SUCCESS : no error return code
398 .else
399 movzx ecx, cx
400 shl ecx, 2 ; Return size of stack, in bytes
401 mov edx, CPU_EVENT_STACK_REENTRY
402 mov eax, AGESA_WARNING ; eax = AGESA_WARNING (Stack has already been set up)
403 .endif
404
405AmdEnableStackExit:
406 movd ebx, mm0 ; Restore return address
407 movd ebp, mm1
408ENDM
409
410;======================================================================
411; AMD_DISABLE_STACK: Destroy the stack inside the cache. This routine
412; should only be executed on the BSP
413;
414; In:
415; none
416;
417; Out:
418; EAX = AGESA_SUCCESS
419;
420; Preserved:
421; ebx
422; Destroyed:
423; eax, ecx, edx, esp, mmx5
424;======================================================================
425AMD_DISABLE_STACK MACRO
426
427 mov esp, ebx ; Save return address
428
429 ; get node/core/flags of current executing core
430 GET_NODE_ID_CORE_ID ; Sets ESI[15,8]= Node#; ESI[7,0]= core# (relative to node)
431
432 ; Turn on modification enable bit
433 mov ecx, MTRR_SYS_CFG ; MSR:C001_0010
434 _RDMSR
435 bts eax, MTRR_FIX_DRAM_MOD_EN ; Enable modifications
436 _WRMSR
437
438 ; Set lower 640K MTRRs for Write-Back memory caching
439 mov ecx, AMD_MTRR_FIX64k_00000
440 mov eax, 1E1E1E1Eh
441 mov edx, eax
442 _WRMSR ; 0 - 512K = WB Mem
443 mov ecx, AMD_MTRR_FIX16k_80000
444 _WRMSR ; 512K - 640K = WB Mem
445
446 ; Turn off modification enable bit
447 mov ecx, MTRR_SYS_CFG ; MSR:C001_0010
448 _RDMSR
449 btr eax, MTRR_FIX_DRAM_MOD_EN ; Disable modification
450 _WRMSR
451
452 AMD_DISABLE_STACK_FAMILY_HOOK ; Re-Enable 'normal' cache operations
453
454 mov ebx, esp ; restore return address (ebx)
455 xor eax, eax
456
457ENDM