Annotation of sys/arch/amd64/amd64/amd64errata.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: amd64errata.c,v 1.1 2007/02/17 17:35:43 tom Exp $ */
2: /* $NetBSD: errata.c,v 1.6 2007/02/05 21:05:45 ad Exp $ */
3:
4: /*-
5: * Copyright (c) 2007 The NetBSD Foundation, Inc.
6: * All rights reserved.
7: *
8: * This code is derived from software contributed to The NetBSD Foundation
9: * by Andrew Doran.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the NetBSD
22: * Foundation, Inc. and its contributors.
23: * 4. Neither the name of The NetBSD Foundation nor the names of its
24: * contributors may be used to endorse or promote products derived
25: * from this software without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37: * POSSIBILITY OF SUCH DAMAGE.
38: */
39:
40: /*
41: * Detect, report on, and work around known errata with AMD's amd64 CPUs.
42: *
43: * This is generalised because there are quite a few problems that the
44: * BIOS can patch via MSR, but it is not known if the OS can patch these
45: * yet. The list is expected to grow over time.
46: *
47: * The data here are from: Revision Guide for AMD Athlon 64 and
48: * AMD Opteron Processors, Publication #25759, Revision: 3.69,
49: * Issue Date: September 2006
50: */
51:
52: #include <sys/param.h>
53:
54: #include <sys/types.h>
55: #include <sys/systm.h>
56:
57: #include <machine/cpu.h>
58: #include <machine/cpufunc.h>
59: #include <machine/specialreg.h>
60:
61:
62: typedef struct errata {
63: u_short e_num;
64: u_short e_reported;
65: u_int e_data1;
66: const uint8_t *e_set;
67: int (*e_act)(struct cpu_info *, struct errata *);
68: uint64_t e_data2;
69: } errata_t;
70:
71: typedef enum cpurev {
72: BH_E4, CH_CG, CH_D0, DH_CG, DH_D0, DH_E3, DH_E6, JH_E1,
73: JH_E6, SH_B0, SH_B3, SH_C0, SH_CG, SH_D0, SH_E4, SH_E5,
74: OINK
75: } cpurev_t;
76:
77: static const u_int cpurevs[] = {
78: BH_E4, 0x0020fb1, CH_CG, 0x0000f82, CH_CG, 0x0000fb2,
79: CH_D0, 0x0010f80, CH_D0, 0x0010fb0, DH_CG, 0x0000fc0,
80: DH_CG, 0x0000fe0, DH_CG, 0x0000ff0, DH_D0, 0x0010fc0,
81: DH_D0, 0x0010ff0, DH_E3, 0x0020fc0, DH_E3, 0x0020ff0,
82: DH_E6, 0x0020fc2, DH_E6, 0x0020ff2, JH_E1, 0x0020f10,
83: JH_E6, 0x0020f12, JH_E6, 0x0020f32, SH_B0, 0x0000f40,
84: SH_B3, 0x0000f51, SH_C0, 0x0000f48, SH_C0, 0x0000f58,
85: SH_CG, 0x0000f4a, SH_CG, 0x0000f5a, SH_CG, 0x0000f7a,
86: SH_D0, 0x0010f40, SH_D0, 0x0010f50, SH_D0, 0x0010f70,
87: SH_E4, 0x0020f51, SH_E4, 0x0020f71, SH_E5, 0x0020f42,
88: OINK
89: };
90:
91: static const uint8_t amd64_errata_set1[] = {
92: SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, OINK
93: };
94:
95: static const uint8_t amd64_errata_set2[] = {
96: SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
97: };
98:
99: static const uint8_t amd64_errata_set3[] = {
100: JH_E1, DH_E3, OINK
101: };
102:
103: static const uint8_t amd64_errata_set4[] = {
104: SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, JH_E1,
105: DH_E3, SH_E4, BH_E4, SH_E5, DH_E6, JH_E6, OINK
106: };
107:
108: static const uint8_t amd64_errata_set5[] = {
109: SH_B3, OINK
110: };
111:
112: static const uint8_t amd64_errata_set6[] = {
113: SH_C0, SH_CG, DH_CG, CH_CG, OINK
114: };
115:
116: static const uint8_t amd64_errata_set7[] = {
117: SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
118: };
119:
120: static const uint8_t amd64_errata_set8[] = {
121: BH_E4, CH_CG, CH_CG, CH_D0, CH_D0, DH_CG, DH_CG, DH_CG,
122: DH_D0, DH_D0, DH_E3, DH_E3, DH_E6, DH_E6, JH_E1, JH_E6,
123: JH_E6, SH_B0, SH_B3, SH_C0, SH_C0, SH_CG, SH_CG, SH_CG,
124: SH_D0, SH_D0, SH_D0, SH_E4, SH_E4, SH_E5, OINK
125: };
126:
127: static int amd64_errata_setmsr(struct cpu_info *, errata_t *);
128: static int amd64_errata_testmsr(struct cpu_info *, errata_t *);
129:
130: static errata_t errata[] = {
131: /*
132: * 81: Cache Coherency Problem with Hardware Prefetching
133: * and Streaming Stores
134: */
135: {
136: 81, 0, MSR_DC_CFG, amd64_errata_set5,
137: amd64_errata_testmsr, DC_CFG_DIS_SMC_CHK_BUF
138: },
139: /*
140: * 86: DRAM Data Masking Feature Can Cause ECC Failures
141: */
142: {
143: 86, 0, MSR_NB_CFG, amd64_errata_set1,
144: amd64_errata_testmsr, NB_CFG_DISDATMSK
145: },
146: /*
147: * 89: Potential Deadlock With Locked Transactions
148: */
149: {
150: 89, 0, MSR_NB_CFG, amd64_errata_set8,
151: amd64_errata_testmsr, NB_CFG_DISIOREQLOCK
152: },
153: /*
154: * 94: Sequential Prefetch Feature May Cause Incorrect
155: * Processor Operation
156: */
157: {
158: 94, 0, MSR_IC_CFG, amd64_errata_set1,
159: amd64_errata_testmsr, IC_CFG_DIS_SEQ_PREFETCH
160: },
161: /*
162: * 97: 128-Bit Streaming Stores May Cause Coherency
163: * Failure
164: *
165: * XXX "This workaround must not be applied to processors
166: * prior to revision C0." We don't apply it, but if it
167: * can't be applied, it shouldn't be reported.
168: */
169: {
170: 97, 0, MSR_DC_CFG, amd64_errata_set6,
171: amd64_errata_testmsr, DC_CFG_DIS_CNV_WC_SSO
172: },
173: /*
174: * 104: DRAM Data Masking Feature Causes ChipKill ECC
175: * Failures When Enabled With x8/x16 DRAM Devices
176: */
177: {
178: 104, 0, MSR_NB_CFG, amd64_errata_set7,
179: amd64_errata_testmsr, NB_CFG_DISDATMSK
180: },
181: /*
182: * 113: Enhanced Write-Combining Feature Causes System Hang
183: */
184: {
185: 113, 0, MSR_BU_CFG, amd64_errata_set3,
186: amd64_errata_setmsr, BU_CFG_WBENHWSBDIS
187: },
188: #ifdef MULTIPROCESSOR
189: /*
190: * 69: Multiprocessor Coherency Problem with Hardware
191: * Prefetch Mechanism
192: */
193: {
194: 69, 0, MSR_BU_CFG, amd64_errata_set5,
195: amd64_errata_setmsr, BU_CFG_WBPFSMCCHKDIS
196: },
197: /*
198: * 101: DRAM Scrubber May Cause Data Corruption When Using
199: * Node-Interleaved Memory
200: */
201: {
202: 101, 0, 0, amd64_errata_set2,
203: NULL, 0
204: },
205: /*
206: * 106: Potential Deadlock with Tightly Coupled Semaphores
207: * in an MP System
208: */
209: {
210: 106, 0, MSR_LS_CFG, amd64_errata_set2,
211: amd64_errata_testmsr, LS_CFG_DIS_LS2_SQUISH
212: },
213: /*
214: * 107: Possible Multiprocessor Coherency Problem with
215: * Setting Page Table A/D Bits
216: */
217: {
218: 107, 0, MSR_BU_CFG, amd64_errata_set2,
219: amd64_errata_testmsr, BU_CFG_THRL2IDXCMPDIS
220: },
221: #if 0
222: /*
223: * 122: TLB Flush Filter May Cause Coherency Problem in
224: * Multiprocessor Systems
225: */
226: {
227: 122, 0, MSR_HWCR, amd64_errata_set4,
228: amd64_errata_setmsr, HWCR_FFDIS
229: },
230: #endif
231: #endif /* MULTIPROCESSOR */
232: };
233:
234: static int
235: amd64_errata_testmsr(struct cpu_info *ci, errata_t *e)
236: {
237: uint64_t val;
238:
239: (void)ci;
240:
241: val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
242: if ((val & e->e_data2) != 0)
243: return 0; /* not found */
244:
245: e->e_reported = 1;
246: return 1; /* found */
247: }
248:
249: static int
250: amd64_errata_setmsr(struct cpu_info *ci, errata_t *e)
251: {
252: uint64_t val;
253:
254: (void)ci;
255:
256: val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
257: if ((val & e->e_data2) != 0)
258: return 0; /* not found */
259:
260: wrmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE, val | e->e_data2);
261:
262: #ifdef ERRATA_DEBUG
263: printf("ERRATA: writing a fix\n");
264: val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
265: if ((val & e->e_data2) != 0)
266: printf("ERRATA: fix seems to have worked!\n");
267: #endif
268:
269: e->e_reported = 1;
270: return 2; /* found and fixed */
271: }
272:
273: void
274: amd64_errata(struct cpu_info *ci)
275: {
276: u_int32_t code, dummy;
277: errata_t *e, *ex;
278: cpurev_t rev;
279: int i, j;
280: int rc;
281: int found = 0;
282: int corrected = 0;
283:
284: CPUID(0x80000001, code, dummy, dummy, dummy);
285:
286: for (i = 0; ; i += 2) {
287: if ((rev = cpurevs[i]) == OINK) {
288: #ifdef ERRATA_DEBUG
289: printf("ERRATA: this CPU ok\n");
290: #endif
291: return;
292: }
293: if (cpurevs[i + 1] == code) {
294: #ifdef ERRATA_DEBUG
295: printf("ERRATA: this CPU has errata\n");
296: #endif
297: break;
298: }
299: }
300:
301: ex = errata + sizeof(errata) / sizeof(errata[0]);
302:
303: /* Reset e_reporteds (for multiple CPUs) */
304: for (e = errata; e < ex; e++)
305: e->e_reported = 0;
306:
307: for (e = errata; e < ex; e++) {
308: if (e->e_reported)
309: continue;
310: if (e->e_set != NULL) {
311: for (j = 0; e->e_set[j] != OINK; j++)
312: if (e->e_set[j] == rev)
313: break;
314: if (e->e_set[j] == OINK)
315: continue;
316: }
317:
318: #ifdef ERRATA_DEBUG
319: printf("%s: testing for erratum %d\n",
320: ci->ci_dev->dv_xname, e->e_num);
321: #endif
322:
323: /*
324: * If we have an action routine, call it, otherwise
325: * the default is that this erratum is present.
326: */
327: rc = (e->e_act == NULL) ? 1 : (*e->e_act)(ci, e);
328:
329: if (rc == 0) /* not found */
330: continue;
331: if (rc == 1)
332: found++;
333: if (rc == 2)
334: corrected++;
335:
336: e->e_reported = rc;
337:
338: #ifdef ERRATA_DEBUG
339: printf("%s: erratum %d present%s\n",
340: ci->ci_dev->dv_xname, e->e_num,
341: (rc == 2) ? " and patched" : "");
342: #endif
343: }
344:
345: #define ERRATA_VERBOSE
346: #ifdef ERRATA_VERBOSE
347: if (corrected) {
348: int first = 1;
349:
350: /* Print out found and corrected */
351: printf("%s: AMD %s", ci->ci_dev->dv_xname,
352: (corrected == 1) ? "erratum" : "errata");
353: for (e = errata; e < ex; e++) {
354: if (e->e_reported == 2) {
355: if (! first)
356: printf(",");
357: printf(" %d", e->e_num);
358: first = 0;
359: }
360: }
361: printf(" detected and fixed\n");
362: }
363: #endif
364:
365: if (found) {
366: int first = 1;
367:
368: /* Print out found but not corrected */
369: printf("%s: AMD %s", ci->ci_dev->dv_xname,
370: (found == 1) ? "erratum" : "errata");
371: for (e = errata; e < ex; e++) {
372: if (e->e_reported == 1) {
373: if (! first)
374: printf(",");
375: printf(" %d", e->e_num);
376: first = 0;
377: }
378: }
379: printf(" present, BIOS upgrade may be required\n");
380: }
381: }
CVSweb