blob: eddaba495192d2fda2b55435e9b3f1d8aa8cc0ea [file] [log] [blame]
Marshall Dawson0b4a1e22018-09-04 13:11:42 -06001/*
2 * This file is part of the coreboot project.
3 *
4 * Copyright (C) 2018 Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 */
15
16#include <cpu/x86/msr.h>
17#include <arch/acpi.h>
Marshall Dawson0b4a1e22018-09-04 13:11:42 -060018#include <soc/cpu.h>
19#include <soc/northbridge.h>
20#include <console/console.h>
Marshall Dawson64e1fca2018-09-04 13:18:57 -060021#include <arch/bert_storage.h>
22#include <cper.h>
Marshall Dawson0b4a1e22018-09-04 13:11:42 -060023
Marshall Dawsone1bd38b2018-09-04 13:15:11 -060024struct mca_bank {
Marshall Dawson64e1fca2018-09-04 13:18:57 -060025 int bank;
Marshall Dawsone1bd38b2018-09-04 13:15:11 -060026 msr_t ctl;
27 msr_t sts;
28 msr_t addr;
29 msr_t misc;
30 msr_t cmask;
31};
32
Marshall Dawson64e1fca2018-09-04 13:18:57 -060033static inline size_t mca_report_size_reqd(void)
34{
35 size_t size;
36
37 size = sizeof(acpi_generic_error_status_t);
38
39 size += sizeof(acpi_hest_generic_data_v300_t);
40 size += sizeof(cper_proc_generic_error_section_t);
41
42 size += sizeof(acpi_hest_generic_data_v300_t);
43 size += sizeof(cper_ia32x64_proc_error_section_t);
44
45 /* Check Error */
46 size += cper_ia32x64_check_sz();
47
48 /* Context of MCG_CAP, MCG_STAT, MCG_CTL */
49 size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 3);
50
51 /* Context of MCi_CTL, MCi_STATUS, MCi_ADDR, MCi_MISC */
52 size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 4);
53
54 /* Context of CTL_MASK */
55 size += cper_ia32x64_ctx_sz_bytype(CPER_IA32X64_CTX_MSR, 1);
56
57 return size;
58}
59
60static enum cper_x86_check_type error_to_chktype(struct mca_bank *mci)
61{
62 int error = mca_err_type(mci->sts);
63
64 if (error == MCA_ERRTYPE_BUS)
65 return X86_PROCESSOR_BUS_CHK;
66 if (error == MCA_ERRTYPE_INT)
67 return X86_PROCESSOR_MS_CHK;
68 if (error == MCA_ERRTYPE_MEM)
69 return X86_PROCESSOR_CACHE_CHK;
70 if (error == MCA_ERRTYPE_TLB)
71 return X86_PROCESSOR_TLB_CHK;
72
73 return X86_PROCESSOR_MS_CHK; /* unrecognized */
74}
75
76/* Fill additional information in the Generic Processor Error Section. */
77static void fill_generic_section(cper_proc_generic_error_section_t *sec,
78 struct mca_bank *mci)
79{
80 int type = mca_err_type(mci->sts);
81
82 if (type == MCA_ERRTYPE_BUS) /* try to map MCA errors to CPER types */
83 sec->error_type = GENPROC_ERRTYPE_BUS;
84 else if (type == MCA_ERRTYPE_INT)
85 sec->error_type = GENPROC_ERRTYPE_UARCH;
86 else if (type == MCA_ERRTYPE_MEM)
87 sec->error_type = GENPROC_ERRTYPE_CACHE;
88 else if (type == MCA_ERRTYPE_TLB)
89 sec->error_type = GENPROC_ERRTYPE_TLB;
90 else
91 sec->error_type = GENPROC_ERRTYPE_UNKNOWN;
92 sec->validation |= GENPROC_VALID_PROC_ERR_TYPE;
93}
94
95/* Convert an error reported by an MCA bank into BERT information to be reported
96 * by the OS. The ACPI driver doesn't recognize/parse the IA32/X64 structure,
97 * which is the best method to report MSR context. As a result, add two
98 * structures: A "processor generic error" that is parsed, and an IA32/X64 one
99 * to capture complete information.
100 *
101 * Future work may attempt to interpret the specific Family 15h error symptoms
102 * found in the MCA registers. This data could enhance the reporting of the
103 * Processor Generic section and the failing error/check added to the
104 * IA32/X64 section.
105 */
106static void build_bert_mca_error(struct mca_bank *mci)
107{
108 acpi_generic_error_status_t *status;
109 acpi_hest_generic_data_v300_t *gen_entry;
110 acpi_hest_generic_data_v300_t *x86_entry;
111 cper_proc_generic_error_section_t *gen_sec;
112 cper_ia32x64_proc_error_section_t *x86_sec;
113 cper_ia32x64_proc_error_info_t *chk;
114 cper_ia32x64_context_t *ctx;
115
116 if (mca_report_size_reqd() > bert_storage_remaining())
117 goto failed;
118
119 status = bert_new_event(&CPER_SEC_PROC_GENERIC_GUID);
120 if (!status)
121 goto failed;
122
123 gen_entry = acpi_hest_generic_data3(status);
124 gen_sec = section_of_acpientry(gen_sec, gen_entry);
125
126 fill_generic_section(gen_sec, mci);
127
128 x86_entry = bert_append_ia32x64(status);
129 x86_sec = section_of_acpientry(x86_sec, x86_entry);
130
131 chk = new_cper_ia32x64_check(status, x86_sec, error_to_chktype(mci));
132 if (!chk)
133 goto failed;
134
Elyes HAOUAS400ce552018-10-12 10:54:30 +0200135 ctx = cper_new_ia32x64_context_msr(status, x86_sec, IA32_MCG_CAP, 3);
Marshall Dawson64e1fca2018-09-04 13:18:57 -0600136 if (!ctx)
137 goto failed;
138 ctx = cper_new_ia32x64_context_msr(status, x86_sec,
Elyes HAOUAS400ce552018-10-12 10:54:30 +0200139 IA32_MC0_CTL + (mci->bank * 4), 4);
Marshall Dawson64e1fca2018-09-04 13:18:57 -0600140 if (!ctx)
141 goto failed;
142 ctx = cper_new_ia32x64_context_msr(status, x86_sec,
143 MC0_CTL_MASK + mci->bank, 1);
144 if (!ctx)
145 goto failed;
146
147 return;
148
149failed:
150 /* We're here because of a hardware error, don't break something else */
151 printk(BIOS_ERR, "Error: Not enough room in BERT region for Machine Check error\n");
152}
153
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600154static const char *const mca_bank_name[] = {
155 "Load-store unit",
156 "Instruction fetch unit",
157 "Combined unit",
158 "Reserved",
159 "Northbridge",
160 "Execution unit",
161 "Floating point unit"
162};
163
164void check_mca(void)
165{
166 int i;
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600167 msr_t cap;
168 struct mca_bank mci;
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600169 int num_banks;
170
Elyes HAOUAS400ce552018-10-12 10:54:30 +0200171 cap = rdmsr(IA32_MCG_CAP);
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600172 num_banks = cap.lo & MCA_BANKS_MASK;
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600173
174 if (is_warm_reset()) {
175 for (i = 0 ; i < num_banks ; i++) {
176 if (i == 3) /* Reserved in Family 15h */
177 continue;
178
Elyes HAOUAS400ce552018-10-12 10:54:30 +0200179 mci.sts = rdmsr(IA32_MC0_STATUS + (i * 4));
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600180 if (mci.sts.hi || mci.sts.lo) {
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600181 int core = cpuid_ebx(1) >> 24;
182
183 printk(BIOS_WARNING, "#MC Error: core %d, bank %d %s\n",
184 core, i, mca_bank_name[i]);
185
186 printk(BIOS_WARNING, " MC%d_STATUS = %08x_%08x\n",
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600187 i, mci.sts.hi, mci.sts.lo);
188 mci.addr = rdmsr(MC0_ADDR + (i * 4));
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600189 printk(BIOS_WARNING, " MC%d_ADDR = %08x_%08x\n",
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600190 i, mci.addr.hi, mci.addr.lo);
191 mci.misc = rdmsr(MC0_MISC + (i * 4));
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600192 printk(BIOS_WARNING, " MC%d_MISC = %08x_%08x\n",
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600193 i, mci.misc.hi, mci.misc.lo);
Elyes HAOUAS400ce552018-10-12 10:54:30 +0200194 mci.ctl = rdmsr(IA32_MC0_CTL + (i * 4));
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600195 printk(BIOS_WARNING, " MC%d_CTL = %08x_%08x\n",
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600196 i, mci.ctl.hi, mci.ctl.lo);
197 mci.cmask = rdmsr(MC0_CTL_MASK + i);
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600198 printk(BIOS_WARNING, " MC%d_CTL_MASK = %08x_%08x\n",
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600199 i, mci.cmask.hi, mci.cmask.lo);
Marshall Dawson64e1fca2018-09-04 13:18:57 -0600200
201 mci.bank = i;
202 if (IS_ENABLED(CONFIG_ACPI_BERT)
203 && mca_valid(mci.sts))
204 build_bert_mca_error(&mci);
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600205 }
206 }
207 }
208
209 /* zero the machine check error status registers */
Marshall Dawsone1bd38b2018-09-04 13:15:11 -0600210 mci.sts.lo = 0;
211 mci.sts.hi = 0;
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600212 for (i = 0 ; i < num_banks ; i++)
Elyes HAOUAS400ce552018-10-12 10:54:30 +0200213 wrmsr(IA32_MC0_STATUS + (i * 4), mci.sts);
Marshall Dawson0b4a1e22018-09-04 13:11:42 -0600214}