blob: 2576f6e0d97d53bf1f835cd9aef089f108566879 [file] [log] [blame]
Angel Ponsae593872020-04-04 18:50:57 +02001/* SPDX-License-Identifier: GPL-2.0-only */
Martin Roth5c354b92019-04-22 14:55:16 -06002
Felix Held0ced2e82021-04-22 00:01:55 +02003#include <amdblocks/msr_zen.h>
Felix Helda5cdf752021-03-10 15:47:00 +01004#include <amdblocks/reset.h>
Martin Roth5c354b92019-04-22 14:55:16 -06005#include <cpu/x86/msr.h>
Furquan Shaikh76cedd22020-05-02 10:24:23 -07006#include <acpi/acpi.h>
Martin Roth5c354b92019-04-22 14:55:16 -06007#include <soc/cpu.h>
Martin Roth5c354b92019-04-22 14:55:16 -06008#include <console/console.h>
9#include <arch/bert_storage.h>
10#include <cper.h>
11
Felix Held43d8eca2021-04-22 23:25:28 +020012/* MISC4 is the last used register in the MCAX banks of Picasso */
13#define MCAX_USED_REGISTERS_PER_BANK (MCAX_MISC4_OFFSET + 1)
14
Martin Roth5c354b92019-04-22 14:55:16 -060015struct mca_bank {
16 int bank;
17 msr_t ctl;
18 msr_t sts;
19 msr_t addr;
20 msr_t misc;
21 msr_t cmask;
22};
23
24static inline size_t mca_report_size_reqd(void)
25{
26 size_t size;
27
28 size = sizeof(acpi_generic_error_status_t);
29
30 size += sizeof(acpi_hest_generic_data_v300_t);
31 size += sizeof(cper_proc_generic_error_section_t);
32
33 size += sizeof(acpi_hest_generic_data_v300_t);
34 size += sizeof(cper_ia32x64_proc_error_section_t);
35
36 /* Check Error */
37 size += cper_ia32x64_check_sz();
38
39 /* Context of MCG_CAP, MCG_STAT, MCG_CTL */
40 size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 3);
41
Felix Held43d8eca2021-04-22 23:25:28 +020042 /* Context of CTL, STATUS, ADDR, MISC0, CONFIG, IPID, SYND, RESERVED, DESTAT, DEADDR,
43 MISC1, MISC2, MISC3, MISC4 */
44 size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, MCAX_USED_REGISTERS_PER_BANK);
Martin Roth5c354b92019-04-22 14:55:16 -060045
46 /* Context of CTL_MASK */
47 size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 1);
48
49 return size;
50}
51
52static enum cper_x86_check_type error_to_chktype(struct mca_bank *mci)
53{
54 int error = mca_err_type(mci->sts);
55
56 if (error == MCA_ERRTYPE_BUS)
57 return X86_PROCESSOR_BUS_CHK;
58 if (error == MCA_ERRTYPE_INT)
59 return X86_PROCESSOR_MS_CHK;
60 if (error == MCA_ERRTYPE_MEM)
61 return X86_PROCESSOR_CACHE_CHK;
62 if (error == MCA_ERRTYPE_TLB)
63 return X86_PROCESSOR_TLB_CHK;
64
65 return X86_PROCESSOR_MS_CHK; /* unrecognized */
66}
67
68/* Fill additional information in the Generic Processor Error Section. */
69static void fill_generic_section(cper_proc_generic_error_section_t *sec,
70 struct mca_bank *mci)
71{
72 int type = mca_err_type(mci->sts);
73
74 if (type == MCA_ERRTYPE_BUS) /* try to map MCA errors to CPER types */
75 sec->error_type = GENPROC_ERRTYPE_BUS;
76 else if (type == MCA_ERRTYPE_INT)
77 sec->error_type = GENPROC_ERRTYPE_UARCH;
78 else if (type == MCA_ERRTYPE_MEM)
79 sec->error_type = GENPROC_ERRTYPE_CACHE;
80 else if (type == MCA_ERRTYPE_TLB)
81 sec->error_type = GENPROC_ERRTYPE_TLB;
82 else
83 sec->error_type = GENPROC_ERRTYPE_UNKNOWN;
84 sec->validation |= GENPROC_VALID_PROC_ERR_TYPE;
85}
86
87/* Convert an error reported by an MCA bank into BERT information to be reported
88 * by the OS. The ACPI driver doesn't recognize/parse the IA32/X64 structure,
89 * which is the best method to report MSR context. As a result, add two
90 * structures: A "processor generic error" that is parsed, and an IA32/X64 one
91 * to capture complete information.
Martin Roth5c354b92019-04-22 14:55:16 -060092 */
93static void build_bert_mca_error(struct mca_bank *mci)
94{
95 acpi_generic_error_status_t *status;
96 acpi_hest_generic_data_v300_t *gen_entry;
97 acpi_hest_generic_data_v300_t *x86_entry;
98 cper_proc_generic_error_section_t *gen_sec;
99 cper_ia32x64_proc_error_section_t *x86_sec;
100 cper_ia32x64_proc_error_info_t *chk;
101 cper_ia32x64_context_t *ctx;
102
103 if (mca_report_size_reqd() > bert_storage_remaining())
104 goto failed;
105
106 status = bert_new_event(&CPER_SEC_PROC_GENERIC_GUID);
107 if (!status)
108 goto failed;
109
110 gen_entry = acpi_hest_generic_data3(status);
111 gen_sec = section_of_acpientry(gen_sec, gen_entry);
112
113 fill_generic_section(gen_sec, mci);
114
115 x86_entry = bert_append_ia32x64(status);
116 x86_sec = section_of_acpientry(x86_sec, x86_entry);
117
118 chk = new_cper_ia32x64_check(status, x86_sec, error_to_chktype(mci));
119 if (!chk)
120 goto failed;
121
122 ctx = cper_new_ia32x64_context_msr(status, x86_sec, IA32_MCG_CAP, 3);
123 if (!ctx)
124 goto failed;
Felix Held43d8eca2021-04-22 23:25:28 +0200125 ctx = cper_new_ia32x64_context_msr(status, x86_sec, MCAX_CTL_MSR(mci->bank),
126 MCAX_USED_REGISTERS_PER_BANK);
Martin Roth5c354b92019-04-22 14:55:16 -0600127 if (!ctx)
128 goto failed;
Felix Held0ced2e82021-04-22 00:01:55 +0200129 ctx = cper_new_ia32x64_context_msr(status, x86_sec, MCA_CTL_MASK_MSR(mci->bank), 1);
Martin Roth5c354b92019-04-22 14:55:16 -0600130 if (!ctx)
131 goto failed;
132
133 return;
134
135failed:
136 /* We're here because of a hardware error, don't break something else */
137 printk(BIOS_ERR, "Error: Not enough room in BERT region for Machine Check error\n");
138}
139
140static const char *const mca_bank_name[] = {
141 "Load-store unit",
142 "Instruction fetch unit",
Felix Held108a4762021-03-13 02:24:07 +0100143 "L2 cache unit",
Felix Held5052e1f2021-03-25 02:13:50 +0100144 "Decode unit",
Felix Held108a4762021-03-13 02:24:07 +0100145 "",
Martin Roth5c354b92019-04-22 14:55:16 -0600146 "Execution unit",
Felix Held108a4762021-03-13 02:24:07 +0100147 "Floating point unit",
148 "L3 cache unit"
Martin Roth5c354b92019-04-22 14:55:16 -0600149};
150
Felix Held43d8eca2021-04-22 23:25:28 +0200151/* Check the Machine Check Architecture Extension registers */
Martin Roth5c354b92019-04-22 14:55:16 -0600152void check_mca(void)
153{
154 int i;
155 msr_t cap;
156 struct mca_bank mci;
157 int num_banks;
158
159 cap = rdmsr(IA32_MCG_CAP);
160 num_banks = cap.lo & MCA_BANKS_MASK;
161
Felix Heldaea59402021-05-28 19:10:13 +0200162 for (i = 0 ; i < num_banks ; i++) {
163 mci.sts = rdmsr(MCAX_STATUS_MSR(i));
164 if (mci.sts.hi || mci.sts.lo) {
165 int core = cpuid_ebx(1) >> 24;
Martin Roth5c354b92019-04-22 14:55:16 -0600166
Felix Heldaea59402021-05-28 19:10:13 +0200167 printk(BIOS_WARNING, "#MC Error: core %d, bank %d %s\n",
168 core, i,
169 i < ARRAY_SIZE(mca_bank_name) ? mca_bank_name[i] : "");
Martin Roth5c354b92019-04-22 14:55:16 -0600170
Felix Heldaea59402021-05-28 19:10:13 +0200171 printk(BIOS_WARNING, " MC%d_STATUS = %08x_%08x\n",
172 i, mci.sts.hi, mci.sts.lo);
173 mci.addr = rdmsr(MCAX_ADDR_MSR(i));
174 printk(BIOS_WARNING, " MC%d_ADDR = %08x_%08x\n",
175 i, mci.addr.hi, mci.addr.lo);
176 mci.misc = rdmsr(MCAX_MISC0_MSR(i));
177 printk(BIOS_WARNING, " MC%d_MISC = %08x_%08x\n",
178 i, mci.misc.hi, mci.misc.lo);
179 mci.ctl = rdmsr(MCAX_CTL_MSR(i));
180 printk(BIOS_WARNING, " MC%d_CTL = %08x_%08x\n",
181 i, mci.ctl.hi, mci.ctl.lo);
182 mci.cmask = rdmsr(MCA_CTL_MASK_MSR(i));
183 printk(BIOS_WARNING, " MC%d_CTL_MASK = %08x_%08x\n",
184 i, mci.cmask.hi, mci.cmask.lo);
Martin Roth5c354b92019-04-22 14:55:16 -0600185
Felix Heldaea59402021-05-28 19:10:13 +0200186 mci.bank = i;
187 if (CONFIG(ACPI_BERT)
188 && mca_valid(mci.sts))
189 build_bert_mca_error(&mci);
Martin Roth5c354b92019-04-22 14:55:16 -0600190 }
191 }
192
193 /* zero the machine check error status registers */
194 mci.sts.lo = 0;
195 mci.sts.hi = 0;
196 for (i = 0 ; i < num_banks ; i++)
Felix Held43d8eca2021-04-22 23:25:28 +0200197 wrmsr(MCAX_STATUS_MSR(i), mci.sts);
Martin Roth5c354b92019-04-22 14:55:16 -0600198}