blob: b98f3cda6ac265c391617b4029723f73be4e7c7a [file] [log] [blame]
Angel Pons5f249e62020-04-04 18:51:01 +02001/* SPDX-License-Identifier: GPL-2.0-only */
2/* Early initialization code for aarch64 (a.k.a. armv8) */
David Hendricks8cbd5692017-12-01 20:49:48 -08003
4#include <arch/asm.h>
5#include <soc/addressmap.h>
6
Arthur Heymans7f1f29732023-04-18 18:52:10 +02007 .arch armv8-a+fp
8
9
David Hendricks8cbd5692017-12-01 20:49:48 -080010ENTRY(_start)
Arthur Heymans7f1f29732023-04-18 18:52:10 +020011
David Hendricks8cbd5692017-12-01 20:49:48 -080012 .org 0
13 /**
14 * According to the reference manual the first instruction is fetched from
15 * offset 0x100, but at offset 0 a branch instruction is always placed.
16 * Support two entry points for now.
17 * To save memory put the cavium specific init code between those to entry
18 * points.
19 */
20 ic ialluis
21 fmov d30, x0 /* Save X0 in FPR for use later */
Patrick Rudolphe15556e2018-02-16 09:04:38 +010022 /**
23 * The BDK stores X1 for later use, but it turns out that we don't need
24 * this "feature". The idea is to hide the devicetree somewhere in
25 * flash, that only the ROM will find it and point to it using X1.
26 */
David Hendricks8cbd5692017-12-01 20:49:48 -080027 adr x1, _start /* x1 = _start location based on PC */
28 fmov d29, x1 /* Save PC in FPR for use later */
29
30#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
31 /* Change the core to big endian mode for EL3 */
32 mrs x0, SCTLR_EL3
33 mov x1, 1<<25 /* Set SCTLR_EL3[ee]=1 */
34 orr x0, x0, x1
35 msr SCTLR_EL3, x0
36 #define ENDIAN_CONVERT64(reg) rev reg, reg
37 #define ENDIAN_CONVERT32(reg) rev reg, reg
38 #define ENDIAN_CONVERT16(reg) rev16 reg, reg
39#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
40 /* Nothing needed, default is little endian */
41 #define ENDIAN_CONVERT64(reg)
42 #define ENDIAN_CONVERT32(reg)
43 #define ENDIAN_CONVERT16(reg)
44#else
45 #error Unknown endianness
46#endif
47
48 mov x0, (LMC0_PF_BAR0 >> 32)
49 lsl x0, x0, 32
50 mov x1, (LMC0_PF_BAR0 & 0xffffffff)
51 orr x0, x0, x1
52
53 /* Test if DRAM PLL is running */
54 ldr x1, [x0, LMC0_DDR_PLL_CTL0]
55
56 tst x1, 0x80
57
58 b.ne cache_setup_done
59
60 bl _setup_car
61
62cache_setup_done:
63
64 /* Check that we're running on the node we're linked for */
65 mrs x0, MPIDR_EL1
66 ubfx x0, x0, 16, 8 /* Bits 23:16 are the physical node ID */
67 mov x1, 0x0
68 cmp x0, x1
69
70 b.ne _wfi
71
72node_check_done:
73 /* Get code position */
74 mov x1, 0x020000
75 mov x0, BOOTROM_OFFSET
76 add x1, x0, x1
77
78 adr x0, _start
79
80 /**
Patrick Rudolphe15556e2018-02-16 09:04:38 +010081 * Check if IROM has loaded the code to BOOTROM_OFFSET.
David Hendricks8cbd5692017-12-01 20:49:48 -080082 * In case the offset is wrong, try to relocate.
83 * Ideally the following code is never executed.
84 * FIXME: Add region overlap check.
85 */
86 cmp x0, x1
87 b.eq after_relocate
88
89relocate:
90 /* Get bootblock length */
91 ldr x2, =_program
92 ldr x3, =_eprogram
93 sub x2, x2, x3
94 b copy_code
95
96.align 7
97copy_code:
98 ldp q0, q1, [x1], 32 /* Load 32 bytes */
99 subs w2, w2, 32 /* Subtract 32 from length, setting flags */
100 stp q0, q1, [x0], 32 /* Store 32 bytes */
101 b.gt copy_code /* Repeat if length is still positive */
102 dmb sy
103
104 /* Load the actual location we're suppose to be at */
105 adr x0, after_relocate /* Relative address */
106 adr x1, _start /* Relative address */
107 sub x0, x0, x1 /* This only works if _start is suppose to be zero */
108 mov x1, BOOTROM_OFFSET
109 add x0, x0, x1
110 br x0 /* Branch to relocated code */
111
112 ic ialluis /* Clear the icache now that all code is correct */
113
114after_relocate:
115 /* Allow unaligned memory access as long as MMU is disabled */
116 mrs x22, s3_0_c11_c0_4
117 orr x22, x22, # (1 << 37) /* Set DCVA47 */
118 msr s3_0_c11_c0_4, x22
119
120 bl start
121
122 /* Real entry point */
123 .org 0x100
124 b _start
125ENDPROC(_start)
126
127
128ENTRY(_setup_car)
129 mrs x0, MIDR_EL1
130 ubfx x0, x0, 4, 12 /* Bits 15:4 are the part number */
131 cmp x0, 0xb0
132 b.ge _wfi
133
134thunder1_cache_setup:
135 /**
136 * Setup L2 cache to allow secure access to all of the address space
Martin Roth26f97f92021-10-01 14:53:22 -0600137 * thunder1 compatibility list:
David Hendricks8cbd5692017-12-01 20:49:48 -0800138 * - CN81XX
139 * - CN83XX
140 * - CN88XX
141 */
142 #define REGIONX_START 0x1000
143 #define REGIONX_END 0x1008
144 #define REGIONX_ATTR 0x1010
145 mov x0, L2C_PF_BAR0 >> 32
146 lsl x0, x0, 32
147 mov x1, (L2C_PF_BAR0 & 0xffffffff)
148 orr x0, x0, x1
149 str xzr, [x0, REGIONX_START] /* Start of zero */
150 mov x1, 0x3fffff00000 /* End of max address */
151 ENDIAN_CONVERT64(x1)
152 str x1, [x0, REGIONX_END]
153 mov x1, 2 /* Secure only access */
154 ENDIAN_CONVERT64(x1)
155 str x1, [x0, REGIONX_ATTR]
156 /* Update way partition to allow core 0 to write to L2 */
157 #define L2C_WPAR_PP0_OFFSET 0x40000
158 mov x1, L2C_WPAR_PP0_OFFSET
159 str xzr, [x0, x1]
160 ldr xzr, [x0, x1] /* Read back to make sure done */
161 #undef REGIONX_START
162 #undef REGIONX_END
163 #undef REGIONX_ATTR
164 #undef L2C_WPAR_PP0_OFFSET
165
166 /**
167 * At this point the whole CAR is readable and writeable, but if
168 * we touch to many cache-lines our code might get flushed out.
169 * We have to lock all cache-lines that are to be used as RAM, which are
170 * the ones marked as SRAM in memlayout.
171 */
172 mrs x0, CTR_EL0 /* Get cache-line size */
173 /* [19:16] - Indicates (Log2(number of words in cache line) */
174 ubfx x0, x0, 16, 4
175 mov x1, 4 /* Bytes in a word (32-bit) */
176 lsl x0, x1, x0 /* Number of Bytes in x0 */
177
178 sub x1, x0, 1
179 mvn x1, x1 /* Place mask in x1 */
180
181 ldr x3, =_sram
182 and x3, x3, x1 /* Align addresses with cache-lines */
183 ldr x4, =_esram
184 add x4, x4, x0
185 sub x4, x4, 1
186 and x4, x4, x1 /* Align addresses with cache-lines */
187 sub x2, x4, x3 /* Store sram length in x2 */
188
189lock_cache_lines:
190 sys #0, c11, c1, #4, x3
191 add x3, x3, x0 /* Increment address by cache-line bytes */
192 subs w2, w2, w0 /* Subtract cache-line bytes from length */
193 b.gt lock_cache_lines /* Repeat if length is still positive */
194
195 /**
196 * The locked region isn't considered dirty by L2. Do read/write of
197 * each cache line to force each to be dirty. This is needed across the
198 * whole line to make sure the L2 dirty bits are all up to date.
199 * NOTE: If we'd relocate we could memset the whole memory !
200 */
201 ldr x3, =_sram
202 and x3, x3, x1 /* Align addresses with cache-lines */
203 ldr x4, =_esram
204 add x4, x4, x0
205 sub x4, x4, 1
206 and x4, x4, x1 /* Align addresses with cache-lines */
207 sub x2, x4, x3 /* Store sram length in x2 */
208 mov x4, x3
209 b dirty_cache_line
210
211.align 7
212dirty_cache_line:
213 ldp q0, q1, [x3], 32 /* Load 32 bytes */
214 subs w2, w2, 32 /* Subtract 32 from length, setting flags */
215 stp q0, q1, [x4], 32 /* Store 32 bytes */
216 b.gt dirty_cache_line /* Repeat if length is still positive */
217 dmb sy
218
219clear_interrupts:
220 /**
221 * As the memory controller isn't running, but we access the DRAM's
222 * address space, some interrupt flags had been set.
223 * Tidy up our mess now on (valid for CN81XX only).
224 */
225 mov x0, (L2C_TAD0_INT_W1C >> 32)
226 lsl x0, x0, 32
227 mov x1, (L2C_TAD0_INT_W1C & 0xffffffff)
228 orr x0, x0, x1
229
230 ldr x1, [x0]
231 orr x1, x1, 0x1c00 /* Clear WRDISLMC, RDDISLMC, RDNXM */
232 str x1, [x0]
233
234 ret
235ENDPROC(_setup_car)
236
237ENTRY(_wfi)
238 wfi
239ENDPROC(_wfi)
240
241ENTRY(start)
242 bl arm64_init_cpu
243
244 fmov x0, d30 /* The original X0, info from previous image */
Patrick Rudolphe15556e2018-02-16 09:04:38 +0100245 fmov x1, d29 /* The original PC we were loaded at */
David Hendricks8cbd5692017-12-01 20:49:48 -0800246
247 /* Call C entry */
248 bl bootblock_main
249
250ENDPROC(start)