Annotation of sys/arch/i386/isa/npx.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: npx.c,v 1.42 2006/10/18 19:48:32 tom Exp $ */
2: /* $NetBSD: npx.c,v 1.57 1996/05/12 23:12:24 mycroft Exp $ */
3:
4: #if 0
5: #define IPRINTF(x) printf x
6: #else
7: #define IPRINTF(x)
8: #endif
9:
10: /*-
11: * Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved.
12: * Copyright (c) 1990 William Jolitz.
13: * Copyright (c) 1991 The Regents of the University of California.
14: * All rights reserved.
15: *
16: * Redistribution and use in source and binary forms, with or without
17: * modification, are permitted provided that the following conditions
18: * are met:
19: * 1. Redistributions of source code must retain the above copyright
20: * notice, this list of conditions and the following disclaimer.
21: * 2. Redistributions in binary form must reproduce the above copyright
22: * notice, this list of conditions and the following disclaimer in the
23: * documentation and/or other materials provided with the distribution.
24: * 3. Neither the name of the University nor the names of its contributors
25: * may be used to endorse or promote products derived from this software
26: * without specific prior written permission.
27: *
28: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38: * SUCH DAMAGE.
39: *
40: * @(#)npx.c 7.2 (Berkeley) 5/12/91
41: */
42:
43: #include <sys/param.h>
44: #include <sys/systm.h>
45: #include <sys/conf.h>
46: #include <sys/file.h>
47: #include <sys/proc.h>
48: #include <sys/signalvar.h>
49: #include <sys/user.h>
50: #include <sys/ioctl.h>
51: #include <sys/device.h>
52:
53: #include <uvm/uvm_extern.h>
54:
55: #include <machine/cpu.h>
56: #include <machine/intr.h>
57: #include <machine/npx.h>
58: #include <machine/pio.h>
59: #include <machine/cpufunc.h>
60: #include <machine/pcb.h>
61: #include <machine/trap.h>
62: #include <machine/specialreg.h>
63: #include <machine/i8259.h>
64:
65: #include <dev/isa/isareg.h>
66: #include <dev/isa/isavar.h>
67:
68: /*
69: * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
70: *
71: * We do lazy initialization and switching using the TS bit in cr0 and the
72: * MDP_USEDFPU bit in mdproc.
73: *
74: * DNA exceptions are handled like this:
75: *
76: * 1) If there is no NPX, return and go to the emulator.
77: * 2) If someone else has used the NPX, save its state into that process's PCB.
78: * 3a) If MDP_USEDFPU is not set, set it and initialize the NPX.
79: * 3b) Otherwise, reload the process's previous NPX state.
80: *
81: * When a process is created or exec()s, its saved cr0 image has the TS bit
82: * set and the MDP_USEDFPU bit clear. The MDP_USEDFPU bit is set when the
83: * process first gets a DNA and the NPX is initialized. The TS bit is turned
84: * off when the NPX is used, and turned on again later when the process's NPX
85: * state is saved.
86: */
87:
88: #define fldcw(addr) __asm("fldcw %0" : : "m" (*addr))
89: #define fnclex() __asm("fnclex")
90: #define fninit() __asm("fninit")
91: #define fnsave(addr) __asm("fnsave %0" : "=m" (*addr))
92: #define fnstcw(addr) __asm("fnstcw %0" : "=m" (*addr))
93: #define fnstsw(addr) __asm("fnstsw %0" : "=m" (*addr))
94: #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fwait")
95: #define frstor(addr) __asm("frstor %0" : : "m" (*addr))
96: #define fwait() __asm("fwait")
97: #define clts() __asm("clts")
98: #define stts() lcr0(rcr0() | CR0_TS)
99:
100: int npxintr(void *);
101: static int npxprobe1(struct isa_attach_args *);
102: static int x86fpflags_to_siginfo(u_int32_t);
103:
104:
105: struct npx_softc {
106: struct device sc_dev;
107: void *sc_ih;
108: };
109:
110: int npxprobe(struct device *, void *, void *);
111: void npxattach(struct device *, struct device *, void *);
112:
113: struct cfattach npx_ca = {
114: sizeof(struct npx_softc), npxprobe, npxattach
115: };
116:
117: struct cfdriver npx_cd = {
118: NULL, "npx", DV_DULL
119: };
120:
121: enum npx_type {
122: NPX_NONE = 0,
123: NPX_INTERRUPT,
124: NPX_EXCEPTION,
125: NPX_BROKEN,
126: NPX_CPUID,
127: };
128:
129: static enum npx_type npx_type;
130: static volatile u_int npx_intrs_while_probing;
131: static volatile u_int npx_traps_while_probing;
132:
133: extern int i386_fpu_present;
134: extern int i386_fpu_exception;
135: extern int i386_fpu_fdivbug;
136:
137: #ifdef I686_CPU
138: #define fxsave(addr) __asm("fxsave %0" : "=m" (*addr))
139: #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr))
140: #endif /* I686_CPU */
141:
142: static __inline void
143: fpu_save(union savefpu *addr)
144: {
145:
146: #ifdef I686_CPU
147: if (i386_use_fxsave) {
148: fxsave(&addr->sv_xmm);
149: /* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */
150: fninit();
151: } else
152: #endif /* I686_CPU */
153: fnsave(&addr->sv_87);
154: }
155:
156: static int
157: npxdna_notset(struct cpu_info *ci)
158: {
159: panic("npxdna vector not initialized");
160: }
161:
162: int (*npxdna_func)(struct cpu_info *) = npxdna_notset;
163: int npxdna_s87(struct cpu_info *);
164: #ifdef I686_CPU
165: int npxdna_xmm(struct cpu_info *);
166: #endif /* I686_CPU */
167: void npxexit(void);
168:
169: /*
170: * Special interrupt handlers. Someday intr0-intr15 will be used to count
171: * interrupts. We'll still need a special exception 16 handler. The busy
172: * latch stuff in probintr() can be moved to npxprobe().
173: */
174: void probeintr(void);
175: asm (".text\n\t"
176: "probeintr:\n\t"
177: "ss\n\t"
178: "incl npx_intrs_while_probing\n\t"
179: "pushl %eax\n\t"
180: "movb $0x20,%al # EOI (asm in strings loses cpp features)\n\t"
181: "outb %al,$0xa0 # IO_ICU2\n\t"
182: "outb %al,$0x20 # IO_ICU1\n\t"
183: "movb $0,%al\n\t"
184: "outb %al,$0xf0 # clear BUSY# latch\n\t"
185: "popl %eax\n\t"
186: "iret\n\t");
187:
188: void probetrap(void);
189: asm (".text\n\t"
190: "probetrap:\n\t"
191: "ss\n\t"
192: "incl npx_traps_while_probing\n\t"
193: "fnclex\n\t"
194: "iret\n\t");
195:
196: static inline int
197: npxprobe1(struct isa_attach_args *ia)
198: {
199: int control;
200: int status;
201:
202: ia->ia_iosize = 16;
203: ia->ia_msize = 0;
204:
205: /*
206: * Finish resetting the coprocessor, if any. If there is an error
207: * pending, then we may get a bogus IRQ13, but probeintr() will handle
208: * it OK. Bogus halts have never been observed, but we enabled
209: * IRQ13 and cleared the BUSY# latch early to handle them anyway.
210: */
211: fninit();
212: delay(1000); /* wait for any IRQ13 (fwait might hang) */
213:
214: /*
215: * Check for a status of mostly zero.
216: */
217: status = 0x5a5a;
218: fnstsw(&status);
219: if ((status & 0xb8ff) == 0) {
220: /*
221: * Good, now check for a proper control word.
222: */
223: control = 0x5a5a;
224: fnstcw(&control);
225: if ((control & 0x1f3f) == 0x033f) {
226: /*
227: * We have an npx, now divide by 0 to see if exception
228: * 16 works.
229: */
230: control &= ~(1 << 2); /* enable divide by 0 trap */
231: fldcw(&control);
232: npx_traps_while_probing = npx_intrs_while_probing = 0;
233: fp_divide_by_0();
234: delay(1);
235: if (npx_traps_while_probing != 0) {
236: /*
237: * Good, exception 16 works.
238: */
239: npx_type = NPX_EXCEPTION;
240: ia->ia_irq = IRQUNK; /* zap the interrupt */
241: i386_fpu_exception = 1;
242: } else if (npx_intrs_while_probing != 0) {
243: /*
244: * Bad, we are stuck with IRQ13.
245: */
246: npx_type = NPX_INTERRUPT;
247: } else {
248: /*
249: * Worse, even IRQ13 is broken.
250: */
251: npx_type = NPX_BROKEN;
252: ia->ia_irq = IRQUNK;
253: }
254: return 1;
255: }
256: }
257:
258: /*
259: * Probe failed. There is no usable FPU.
260: */
261: npx_type = NPX_NONE;
262: return 0;
263: }
264:
265: /*
266: * Probe routine. Initialize cr0 to give correct behaviour for [f]wait
267: * whether the device exists or not (XXX should be elsewhere). Set flags
268: * to tell npxattach() what to do. Modify device struct if npx doesn't
269: * need to use interrupts. Return 1 if device exists.
270: */
271: int
272: npxprobe(struct device *parent, void *match, void *aux)
273: {
274: struct isa_attach_args *ia = aux;
275: int irq;
276: int result;
277: u_long save_eflags;
278: unsigned save_imen;
279: struct gate_descriptor save_idt_npxintr;
280: struct gate_descriptor save_idt_npxtrap;
281:
282: if (cpu_feature & CPUID_FPU) {
283: npx_type = NPX_CPUID;
284: i386_fpu_exception = 1;
285: ia->ia_irq = IRQUNK; /* Don't want the interrupt vector */
286: ia->ia_iosize = 16;
287: ia->ia_msize = 0;
288: return 1;
289: }
290:
291: /*
292: * This routine is now just a wrapper for npxprobe1(), to install
293: * special npx interrupt and trap handlers, to enable npx interrupts
294: * and to disable other interrupts. Someday isa_configure() will
295: * install suitable handlers and run with interrupts enabled so we
296: * won't need to do so much here.
297: */
298: irq = NRSVIDT + ia->ia_irq;
299: save_eflags = read_eflags();
300: disable_intr();
301: save_idt_npxintr = idt[irq];
302: save_idt_npxtrap = idt[16];
303: setgate(&idt[irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL);
304: setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL);
305: save_imen = imen;
306: imen = ~((1 << IRQ_SLAVE) | (1 << ia->ia_irq));
307: SET_ICUS();
308:
309: /*
310: * Partially reset the coprocessor, if any. Some BIOS's don't reset
311: * it after a warm boot.
312: */
313: outb(0xf1, 0); /* full reset on some systems, NOP on others */
314: delay(1000);
315: outb(0xf0, 0); /* clear BUSY# latch */
316:
317: /*
318: * We set CR0 in locore to trap all ESC and WAIT instructions.
319: * We have to turn off the CR0_EM bit temporarily while probing.
320: */
321: lcr0(rcr0() & ~(CR0_EM|CR0_TS));
322: enable_intr();
323: result = npxprobe1(ia);
324: disable_intr();
325: lcr0(rcr0() | (CR0_EM|CR0_TS));
326:
327: imen = save_imen;
328: SET_ICUS();
329: idt[irq] = save_idt_npxintr;
330: idt[16] = save_idt_npxtrap;
331: write_eflags(save_eflags);
332: return (result);
333: }
334:
335: int npx586bug1(int, int);
336: asm (".text\n\t"
337: "npx586bug1:\n\t"
338: "fildl 4(%esp) # x\n\t"
339: "fildl 8(%esp) # y\n\t"
340: "fld %st(1)\n\t"
341: "fdiv %st(1),%st # x/y\n\t"
342: "fmulp %st,%st(1) # (x/y)*y\n\t"
343: "fsubrp %st,%st(1) # x-(x/y)*y\n\t"
344: "pushl $0\n\t"
345: "fistpl (%esp)\n\t"
346: "popl %eax\n\t"
347: "ret\n\t");
348:
349: void
350: npxinit(struct cpu_info *ci)
351: {
352: lcr0(rcr0() & ~(CR0_EM|CR0_TS));
353: fninit();
354: if (npx586bug1(4195835, 3145727) != 0) {
355: i386_fpu_fdivbug = 1;
356: printf("%s: WARNING: Pentium FDIV bug detected!\n",
357: ci->ci_dev.dv_xname);
358: }
359: lcr0(rcr0() | (CR0_TS));
360: }
361:
362: /*
363: * Attach routine - announce which it is, and wire into system
364: */
365: void
366: npxattach(struct device *parent, struct device *self, void *aux)
367: {
368: struct npx_softc *sc = (void *)self;
369: struct isa_attach_args *ia = aux;
370:
371: switch (npx_type) {
372: case NPX_INTERRUPT:
373: printf("\n");
374: lcr0(rcr0() & ~CR0_NE);
375: sc->sc_ih = isa_intr_establish(ia->ia_ic, ia->ia_irq,
376: IST_EDGE, IPL_NONE, npxintr, 0, sc->sc_dev.dv_xname);
377: break;
378: case NPX_EXCEPTION:
379: printf(": using exception 16\n");
380: break;
381: case NPX_CPUID:
382: printf(": reported by CPUID; using exception 16\n");
383: npx_type = NPX_EXCEPTION;
384: break;
385: case NPX_BROKEN:
386: printf(": error reporting broken; not using\n");
387: npx_type = NPX_NONE;
388: return;
389: case NPX_NONE:
390: return;
391: }
392:
393: npxinit(&cpu_info_primary);
394: i386_fpu_present = 1;
395:
396: #ifdef I686_CPU
397: if (i386_use_fxsave)
398: npxdna_func = npxdna_xmm;
399: else
400: #endif /* I686_CPU */
401: npxdna_func = npxdna_s87;
402: }
403:
404: /*
405: * Record the FPU state and reinitialize it all except for the control word.
406: * Then generate a SIGFPE.
407: *
408: * Reinitializing the state allows naive SIGFPE handlers to longjmp without
409: * doing any fixups.
410: *
411: * XXX there is currently no way to pass the full error state to signal
412: * handlers, and if this is a nested interrupt there is no way to pass even
413: * a status code! So there is no way to have a non-naive SIGFPE handler. At
414: * best a handler could do an fninit followed by an fldcw of a static value.
415: * fnclex would be of little use because it would leave junk on the FPU stack.
416: * Returning from the handler would be even less safe than usual because
417: * IRQ13 exception handling makes exceptions even less precise than usual.
418: */
419: int
420: npxintr(void *arg)
421: {
422: struct cpu_info *ci = curcpu();
423: struct proc *p = ci->ci_fpcurproc;
424: union savefpu *addr;
425: struct intrframe *frame = arg;
426: int code;
427: union sigval sv;
428:
429: uvmexp.traps++;
430: IPRINTF(("%s: fp intr\n", ci->ci_dev.dv_xname));
431:
432: if (p == NULL || npx_type == NPX_NONE) {
433: /* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */
434: printf("npxintr: p = %lx, curproc = %lx, npx_type = %d\n",
435: (u_long) p, (u_long) curproc, npx_type);
436: panic("npxintr from nowhere");
437: }
438: /*
439: * Clear the interrupt latch.
440: */
441: outb(0xf0, 0);
442: /*
443: * If we're saving, ignore the interrupt. The FPU will happily
444: * generate another one when we restore the state later.
445: */
446: if (ci->ci_fpsaving)
447: return (1);
448:
449: #ifdef DIAGNOSTIC
450: /*
451: * At this point, fpcurproc should be curproc. If it wasn't, the TS
452: * bit should be set, and we should have gotten a DNA exception.
453: */
454: if (p != curproc)
455: panic("npxintr: wrong process");
456: #endif
457:
458: /*
459: * Find the address of fpcurproc's saved FPU state. (Given the
460: * invariant above, this is always the one in curpcb.)
461: */
462: addr = &p->p_addr->u_pcb.pcb_savefpu;
463: /*
464: * Save state. This does an implied fninit. It had better not halt
465: * the cpu or we'll hang.
466: */
467: fpu_save(addr);
468: fwait();
469: /*
470: * Restore control word (was clobbered by fpu_save).
471: */
472: if (i386_use_fxsave) {
473: fldcw(&addr->sv_xmm.sv_env.en_cw);
474: /*
475: * FNINIT doesn't affect MXCSR or the XMM registers;
476: * no need to re-load MXCSR here.
477: */
478: } else
479: fldcw(&addr->sv_87.sv_env.en_cw);
480: fwait();
481: /*
482: * Remember the exception status word and tag word. The current
483: * (almost fninit'ed) fpu state is in the fpu and the exception
484: * state just saved will soon be junk. However, the implied fninit
485: * doesn't change the error pointers or register contents, and we
486: * preserved the control word and will copy the status and tag
487: * words, so the complete exception state can be recovered.
488: */
489: if (i386_use_fxsave) {
490: addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw;
491: addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw;
492: } else {
493: addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw;
494: addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw;
495: }
496:
497: /*
498: * Pass exception to process. If it's the current process, try to do
499: * it immediately.
500: */
501: if (p == curproc && USERMODE(frame->if_cs, frame->if_eflags)) {
502: /*
503: * Interrupt is essentially a trap, so we can afford to call
504: * the SIGFPE handler (if any) as soon as the interrupt
505: * returns.
506: *
507: * XXX little or nothing is gained from this, and plenty is
508: * lost - the interrupt frame has to contain the trap frame
509: * (this is otherwise only necessary for the rescheduling trap
510: * in doreti, and the frame for that could easily be set up
511: * just before it is used).
512: */
513: p->p_md.md_regs = (struct trapframe *)&frame->if_fs;
514:
515: /*
516: * Encode the appropriate code for detailed information on
517: * this exception.
518: */
519: if (i386_use_fxsave)
520: code = x86fpflags_to_siginfo(addr->sv_xmm.sv_ex_sw);
521: else
522: code = x86fpflags_to_siginfo(addr->sv_87.sv_ex_sw);
523: sv.sival_int = frame->if_eip;
524: trapsignal(p, SIGFPE, T_ARITHTRAP, code, sv);
525: } else {
526: /*
527: * Nested interrupt. These losers occur when:
528: * o an IRQ13 is bogusly generated at a bogus time, e.g.:
529: * o immediately after an fnsave or frstor of an
530: * error state.
531: * o a couple of 386 instructions after
532: * "fstpl _memvar" causes a stack overflow.
533: * These are especially nasty when combined with a
534: * trace trap.
535: * o an IRQ13 occurs at the same time as another higher-
536: * priority interrupt.
537: *
538: * Treat them like a true async interrupt.
539: */
540: psignal(p, SIGFPE);
541: }
542:
543: return (1);
544: }
545:
546: static int
547: x86fpflags_to_siginfo(u_int32_t flags)
548: {
549: int i;
550: static int x86fp_siginfo_table[] = {
551: FPE_FLTINV, /* bit 0 - invalid operation */
552: FPE_FLTRES, /* bit 1 - denormal operand */
553: FPE_FLTDIV, /* bit 2 - divide by zero */
554: FPE_FLTOVF, /* bit 3 - fp overflow */
555: FPE_FLTUND, /* bit 4 - fp underflow */
556: FPE_FLTRES, /* bit 5 - fp precision */
557: FPE_FLTINV, /* bit 6 - stack fault */
558: };
559:
560: for (i=0;i < sizeof(x86fp_siginfo_table)/sizeof(int); i++) {
561: if (flags & (1 << i))
562: return (x86fp_siginfo_table[i]);
563: }
564: /* punt if flags not set */
565: return (FPE_FLTINV);
566: }
567:
568: /*
569: * Implement device not available (DNA) exception
570: *
571: * If we were the last process to use the FPU, we can simply return.
572: * Otherwise, we save the previous state, if necessary, and restore our last
573: * saved state.
574: */
575:
576: /*
577: * XXX It is unclear if the code below is correct in the multiprocessor
578: * XXX case. Check the NetBSD sources once again to be sure.
579: */
580: #ifdef I686_CPU
581: int
582: npxdna_xmm(struct cpu_info *ci)
583: {
584: struct proc *p;
585: int s;
586:
587: if (ci->ci_fpsaving) {
588: printf("recursive npx trap; cr0=%x\n", rcr0());
589: return (0);
590: }
591:
592: s = splipi(); /* lock out IPI's while we clean house.. */
593:
594: #ifdef MULTIPROCESSOR
595: p = ci->ci_curproc;
596: #else
597: p = curproc;
598: #endif
599:
600: IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p,
601: (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : ""));
602:
603: /*
604: * XXX should have a fast-path here when no save/restore is necessary
605: */
606: /*
607: * Initialize the FPU state to clear any exceptions. If someone else
608: * was using the FPU, save their state (which does an implicit
609: * initialization).
610: */
611: if (ci->ci_fpcurproc != NULL) {
612: IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname,
613: (u_long)ci->ci_fpcurproc));
614: npxsave_cpu(ci, 1);
615: } else {
616: clts();
617: IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname));
618: fninit();
619: fwait();
620: stts();
621: }
622: splx(s);
623:
624: IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname));
625: KDASSERT(ci->ci_fpcurproc == NULL);
626: #ifndef MULTIPROCESSOR
627: KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
628: #else
629: if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
630: npxsave_proc(p, 1);
631: #endif
632: p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
633: clts();
634: s = splipi();
635: ci->ci_fpcurproc = p;
636: p->p_addr->u_pcb.pcb_fpcpu = ci;
637: splx(s);
638: uvmexp.fpswtch++;
639:
640: if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
641: fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm.sv_env.en_cw);
642: p->p_md.md_flags |= MDP_USEDFPU;
643: } else {
644: static double zero = 0.0;
645:
646: /*
647: * amd fpu does not restore fip, fdp, fop on fxrstor
648: * thus leaking other process's execution history.
649: */
650: fnclex();
651: __asm __volatile("ffree %%st(7)\n\tfld %0" : : "m" (zero));
652: fxrstor(&p->p_addr->u_pcb.pcb_savefpu.sv_xmm);
653: }
654:
655: return (1);
656: }
657: #endif /* I686_CPU */
658:
659: int
660: npxdna_s87(struct cpu_info *ci)
661: {
662: struct proc *p;
663: int s;
664:
665: KDASSERT(i386_use_fxsave == 0);
666:
667: if (ci->ci_fpsaving) {
668: printf("recursive npx trap; cr0=%x\n", rcr0());
669: return (0);
670: }
671:
672: s = splipi(); /* lock out IPI's while we clean house.. */
673: #ifdef MULTIPROCESSOR
674: p = ci->ci_curproc;
675: #else
676: p = curproc;
677: #endif
678:
679: IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p,
680: (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : ""));
681:
682: /*
683: * If someone else was using our FPU, save their state (which does an
684: * implicit initialization); otherwise, initialize the FPU state to
685: * clear any exceptions.
686: */
687: if (ci->ci_fpcurproc != NULL) {
688: IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname,
689: (u_long)ci->ci_fpcurproc));
690: npxsave_cpu(ci, 1);
691: } else {
692: clts();
693: IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname));
694: fninit();
695: fwait();
696: stts();
697: }
698: splx(s);
699:
700: IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname));
701: KDASSERT(ci->ci_fpcurproc == NULL);
702: #ifndef MULTIPROCESSOR
703: KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL);
704: #else
705: if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
706: npxsave_proc(p, 1);
707: #endif
708: p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS;
709: clts();
710: s = splipi();
711: ci->ci_fpcurproc = p;
712: p->p_addr->u_pcb.pcb_fpcpu = ci;
713: splx(s);
714: uvmexp.fpswtch++;
715:
716: if ((p->p_md.md_flags & MDP_USEDFPU) == 0) {
717: fldcw(&p->p_addr->u_pcb.pcb_savefpu.sv_87.sv_env.en_cw);
718: p->p_md.md_flags |= MDP_USEDFPU;
719: } else {
720: /*
721: * The following frstor may cause an IRQ13 when the state being
722: * restored has a pending error. The error will appear to have
723: * been triggered by the current (npx) user instruction even
724: * when that instruction is a no-wait instruction that should
725: * not trigger an error (e.g., fnclex). On at least one 486
726: * system all of the no-wait instructions are broken the same
727: * as frstor, so our treatment does not amplify the breakage.
728: * On at least one 386/Cyrix 387 system, fnclex works correctly
729: * while frstor and fnsave are broken, so our treatment breaks
730: * fnclex if it is the first FPU instruction after a context
731: * switch.
732: */
733: frstor(&p->p_addr->u_pcb.pcb_savefpu.sv_87);
734: }
735:
736: return (1);
737: }
738:
739: /*
740: * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU
741: * immediately, we clear fpcurproc and turn on CR0_TS to force a DNA and a
742: * reload of the FPU state the next time we try to use it. This routine
743: * is only called when forking, core dumping, or debugging, or swapping,
744: * so the lazy reload at worst forces us to trap once per fork(), and at best
745: * saves us a reload once per fork().
746: */
747: void
748: npxsave_cpu(struct cpu_info *ci, int save)
749: {
750: struct proc *p;
751: int s;
752:
753: KDASSERT(ci == curcpu());
754:
755: p = ci->ci_fpcurproc;
756: if (p == NULL)
757: return;
758:
759: IPRINTF(("%s: fp cpu %s %lx\n", ci->ci_dev.dv_xname,
760: save ? "save" : "flush", (u_long)p));
761:
762: if (save) {
763: #ifdef DIAGNOSTIC
764: if (ci->ci_fpsaving != 0)
765: panic("npxsave_cpu: recursive save!");
766: #endif
767: /*
768: * Set ci->ci_fpsaving, so that any pending exception will be
769: * thrown away. (It will be caught again if/when the FPU
770: * state is restored.)
771: *
772: * XXX on i386 and earlier, this routine should always be
773: * called at spl0; if it might called with the NPX interrupt
774: * masked, it would be necessary to forcibly unmask the NPX
775: * interrupt so that it could succeed.
776: * XXX this is irrelevant on 486 and above (systems
777: * which report FP failures via traps rather than irq13).
778: * XXX punting for now..
779: */
780: clts();
781: ci->ci_fpsaving = 1;
782: fpu_save(&p->p_addr->u_pcb.pcb_savefpu);
783: ci->ci_fpsaving = 0;
784: /* It is unclear if this is needed. */
785: fwait();
786: }
787:
788: /*
789: * We set the TS bit in the saved CR0 for this process, so that it
790: * will get a DNA exception on any FPU instruction and force a reload.
791: */
792: stts();
793: p->p_addr->u_pcb.pcb_cr0 |= CR0_TS;
794:
795: s = splipi();
796: p->p_addr->u_pcb.pcb_fpcpu = NULL;
797: ci->ci_fpcurproc = NULL;
798: splx(s);
799: }
800:
801: /*
802: * Save p's FPU state, which may be on this processor or another processor.
803: */
804: void
805: npxsave_proc(struct proc *p, int save)
806: {
807: struct cpu_info *ci = curcpu();
808: struct cpu_info *oci;
809:
810: KDASSERT(p->p_addr != NULL);
811:
812: oci = p->p_addr->u_pcb.pcb_fpcpu;
813: if (oci == NULL)
814: return;
815:
816: IPRINTF(("%s: fp proc %s %lx\n", ci->ci_dev.dv_xname,
817: save ? "save" : "flush", (u_long)p));
818:
819: #if defined(MULTIPROCESSOR)
820: if (oci == ci) {
821: int s = splipi();
822: npxsave_cpu(ci, save);
823: splx(s);
824: } else {
825: #ifdef DIAGNOSTIC
826: int spincount;
827: #endif
828:
829: IPRINTF(("%s: fp ipi to %s %s %lx\n", ci->ci_dev.dv_xname,
830: oci->ci_dev.dv_xname, save ? "save" : "flush", (u_long)p));
831:
832: i386_send_ipi(oci,
833: save ? I386_IPI_SYNCH_FPU : I386_IPI_FLUSH_FPU);
834:
835: #ifdef DIAGNOSTIC
836: spincount = 0;
837: #endif
838: while (p->p_addr->u_pcb.pcb_fpcpu != NULL) {
839: SPINLOCK_SPIN_HOOK;
840: #ifdef DIAGNOSTIC
841: if (spincount++ > 100000000)
842: panic("%s: fp_save ipi didn't (%s)",
843: ci->ci_dev.dv_xname, oci->ci_dev.dv_xname);
844: #endif
845: }
846: }
847: #else
848: KASSERT(ci->ci_fpcurproc == p);
849: npxsave_cpu(ci, save);
850: #endif
851: }
CVSweb