Annotation of sys/arch/i386/i386/trap.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: trap.c,v 1.85 2007/06/26 13:39:02 tom Exp $ */
2: /* $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $ */
3:
4: /*-
5: * Copyright (c) 1995 Charles M. Hannum. All rights reserved.
6: * Copyright (c) 1990 The Regents of the University of California.
7: * All rights reserved.
8: *
9: * This code is derived from software contributed to Berkeley by
10: * the University of Utah, and William Jolitz.
11: *
12: * Redistribution and use in source and binary forms, with or without
13: * modification, are permitted provided that the following conditions
14: * are met:
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: * 3. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: *
36: * @(#)trap.c 7.4 (Berkeley) 5/13/91
37: */
38:
39: /*
40: * 386 Trap and System call handling
41: */
42:
43: #include <sys/param.h>
44: #include <sys/systm.h>
45: #include <sys/proc.h>
46: #include <sys/signalvar.h>
47: #include <sys/user.h>
48: #include <sys/acct.h>
49: #include <sys/kernel.h>
50: #include <sys/signal.h>
51: #ifdef KTRACE
52: #include <sys/ktrace.h>
53: #endif
54: #include <sys/syscall.h>
55:
56: #include "systrace.h"
57: #include <dev/systrace.h>
58:
59: #include <uvm/uvm_extern.h>
60:
61: #include <machine/cpu.h>
62: #include <machine/cpufunc.h>
63: #include <machine/psl.h>
64: #include <machine/reg.h>
65: #include <machine/trap.h>
66: #ifdef DDB
67: #include <machine/db_machdep.h>
68: #endif
69:
70: #ifdef KGDB
71: #include <sys/kgdb.h>
72: #endif
73:
74: #ifdef COMPAT_IBCS2
75: #include <compat/ibcs2/ibcs2_errno.h>
76: #include <compat/ibcs2/ibcs2_exec.h>
77: extern struct emul emul_ibcs2;
78: #endif
79: #include <sys/exec.h>
80: #ifdef COMPAT_LINUX
81: #include <compat/linux/linux_syscall.h>
82: extern struct emul emul_linux_aout, emul_linux_elf;
83: #endif
84: #ifdef COMPAT_FREEBSD
85: extern struct emul emul_freebsd_aout, emul_freebsd_elf;
86: #endif
87: #ifdef COMPAT_BSDOS
88: extern struct emul emul_bsdos;
89: #endif
90: #ifdef COMPAT_AOUT
91: extern struct emul emul_aout;
92: #endif
93: #ifdef KVM86
94: #include <machine/kvm86.h>
95: #define KVM86MODE (kvm86_incall)
96: #endif
97:
98: #include "npx.h"
99:
100: static __inline void userret(struct proc *);
101: void trap(struct trapframe);
102: void syscall(struct trapframe);
103:
104: /*
105: * Define the code needed before returning to user mode, for
106: * trap and syscall.
107: */
108: static __inline void
109: userret(struct proc *p)
110: {
111: int sig;
112:
113: /* take pending signals */
114: while ((sig = CURSIG(p)) != 0)
115: postsig(sig);
116:
117: p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri;
118: }
119:
120: char *trap_type[] = {
121: "privileged instruction fault", /* 0 T_PRIVINFLT */
122: "breakpoint trap", /* 1 T_BPTFLT */
123: "arithmetic trap", /* 2 T_ARITHTRAP */
124: "asynchronous system trap", /* 3 T_ASTFLT */
125: "protection fault", /* 4 T_PROTFLT */
126: "trace trap", /* 5 T_TRCTRAP */
127: "page fault", /* 6 T_PAGEFLT */
128: "alignment fault", /* 7 T_ALIGNFLT */
129: "integer divide fault", /* 8 T_DIVIDE */
130: "non-maskable interrupt", /* 9 T_NMI */
131: "overflow trap", /* 10 T_OFLOW */
132: "bounds check fault", /* 11 T_BOUND */
133: "FPU not available fault", /* 12 T_DNA */
134: "double fault", /* 13 T_DOUBLEFLT */
135: "FPU operand fetch fault", /* 14 T_FPOPFLT (![P]Pro) */
136: "invalid TSS fault", /* 15 T_TSSFLT */
137: "segment not present fault", /* 16 T_SEGNPFLT */
138: "stack fault", /* 17 T_STKFLT */
139: "machine check", /* 18 T_MACHK ([P]Pro) */
140: "SIMD FP fault", /* 19 T_XFTRAP */
141: "reserved trap", /* 20 T_RESERVED */
142: };
143: int trap_types = sizeof trap_type / sizeof trap_type[0];
144:
145: #ifdef DEBUG
146: int trapdebug = 0;
147: #endif
148:
149: /*
150: * trap(frame):
151: * Exception, fault, and trap interface to BSD kernel. This
152: * common code is called from assembly language IDT gate entry
153: * routines that prepare a suitable stack frame, and restore this
154: * frame after the exception has been processed. Note that the
155: * effect is as if the arguments were passed call by reference.
156: */
157: /*ARGSUSED*/
158: void
159: trap(struct trapframe frame)
160: {
161: struct proc *p = curproc;
162: int type = frame.tf_trapno;
163: struct pcb *pcb = NULL;
164: extern char resume_iret[], resume_pop_ds[], resume_pop_es[],
165: resume_pop_fs[], resume_pop_gs[];
166: struct trapframe *vframe;
167: int resume;
168: vm_prot_t vftype, ftype;
169: union sigval sv;
170: caddr_t onfault;
171: uint32_t cr2;
172:
173: uvmexp.traps++;
174:
175: /* SIGSEGV and SIGBUS need this */
176: if (frame.tf_err & PGEX_W) {
177: vftype = VM_PROT_WRITE;
178: ftype = VM_PROT_READ | VM_PROT_WRITE;
179: } else
180: ftype = vftype = VM_PROT_READ;
181:
182: #ifdef DEBUG
183: if (trapdebug) {
184: printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
185: frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs,
186: frame.tf_eflags, rcr2(), lapic_tpr);
187: printf("curproc %p\n", curproc);
188: }
189: #endif
190:
191: if (!KERNELMODE(frame.tf_cs, frame.tf_eflags)) {
192: type |= T_USER;
193: p->p_md.md_regs = &frame;
194: }
195:
196: switch (type) {
197:
198: /* trace trap */
199: case T_TRCTRAP: {
200: #if defined(DDB) || defined(KGDB)
201: /* Make sure nobody is single stepping into kernel land.
202: * The syscall has to turn off the trace bit itself. The
203: * easiest way, is to simply not call the debugger, until
204: * we are through the problematic "osyscall" stub. This
205: * is a hack, but it does seem to work.
206: */
207: extern int Xosyscall, Xosyscall_end;
208:
209: if (frame.tf_eip >= (int)&Xosyscall &&
210: frame.tf_eip <= (int)&Xosyscall_end)
211: return;
212: #else
213: return; /* Just return if no DDB */
214: #endif
215: }
216: /* FALLTHROUGH */
217:
218: default:
219: we_re_toast:
220: #ifdef KGDB
221: if (kgdb_trap(type, &frame))
222: return;
223: else {
224: /*
225: * If this is a breakpoint, don't panic
226: * if we're not connected.
227: */
228: if (type == T_BPTFLT) {
229: printf("kgdb: ignored %s\n", trap_type[type]);
230: return;
231: }
232: }
233: #endif
234:
235: #ifdef DDB
236: if (kdb_trap(type, 0, &frame))
237: return;
238: #endif
239: if (frame.tf_trapno < trap_types)
240: printf("fatal %s (%d)", trap_type[frame.tf_trapno],
241: frame.tf_trapno);
242: else
243: printf("unknown trap %d", frame.tf_trapno);
244: printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
245: printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
246: type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), lapic_tpr);
247:
248: panic("trap type %d, code=%x, pc=%x",
249: type, frame.tf_err, frame.tf_eip);
250: /*NOTREACHED*/
251:
252: case T_PROTFLT:
253: #ifdef KVM86
254: if (KVM86MODE) {
255: kvm86_gpfault(&frame);
256: return;
257: }
258: #endif
259: case T_SEGNPFLT:
260: case T_ALIGNFLT:
261: /* Check for copyin/copyout fault. */
262: if (p && p->p_addr) {
263: pcb = &p->p_addr->u_pcb;
264: if (pcb->pcb_onfault != 0) {
265: copyfault:
266: frame.tf_eip = (int)pcb->pcb_onfault;
267: return;
268: }
269: }
270:
271: /*
272: * Check for failure during return to user mode.
273: *
274: * We do this by looking at the instruction we faulted on. The
275: * specific instructions we recognize only happen when
276: * returning from a trap, syscall, or interrupt.
277: *
278: * XXX
279: * The heuristic used here will currently fail for the case of
280: * one of the 2 pop instructions faulting when returning from a
281: * a fast interrupt. This should not be possible. It can be
282: * fixed by rearranging the trap frame so that the stack format
283: * at this point is the same as on exit from a `slow'
284: * interrupt.
285: */
286: switch (*(u_char *)frame.tf_eip) {
287: case 0xcf: /* iret */
288: vframe = (void *)((int)&frame.tf_esp -
289: offsetof(struct trapframe, tf_eip));
290: resume = (int)resume_iret;
291: break;
292: case 0x1f: /* popl %ds */
293: vframe = (void *)((int)&frame.tf_esp -
294: offsetof(struct trapframe, tf_ds));
295: resume = (int)resume_pop_ds;
296: break;
297: case 0x07: /* popl %es */
298: vframe = (void *)((int)&frame.tf_esp -
299: offsetof(struct trapframe, tf_es));
300: resume = (int)resume_pop_es;
301: break;
302: case 0x0f: /* 0x0f prefix */
303: switch (*(u_char *)(frame.tf_eip+1)) {
304: case 0xa1: /* popl %fs */
305: vframe = (void *)((int)&frame.tf_esp -
306: offsetof(struct trapframe, tf_fs));
307: resume = (int)resume_pop_fs;
308: break;
309: case 0xa9: /* popl %gs */
310: vframe = (void *)((int)&frame.tf_esp -
311: offsetof(struct trapframe, tf_gs));
312: resume = (int)resume_pop_gs;
313: break;
314: default:
315: goto we_re_toast;
316: }
317: break;
318: default:
319: goto we_re_toast;
320: }
321: if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
322: goto we_re_toast;
323:
324: frame.tf_eip = resume;
325: return;
326:
327: case T_PROTFLT|T_USER: /* protection fault */
328: KERNEL_PROC_LOCK(p);
329: #ifdef VM86
330: if (frame.tf_eflags & PSL_VM) {
331: vm86_gpfault(p, type & ~T_USER);
332: KERNEL_PROC_UNLOCK(p);
333: goto out;
334: }
335: #endif
336: /* If pmap_exec_fixup does something, let's retry the trap. */
337: if (pmap_exec_fixup(&p->p_vmspace->vm_map, &frame,
338: &p->p_addr->u_pcb)) {
339: KERNEL_PROC_UNLOCK(p);
340: goto out;
341: }
342:
343: sv.sival_int = frame.tf_eip;
344: trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
345: KERNEL_PROC_UNLOCK(p);
346: goto out;
347:
348: case T_TSSFLT|T_USER:
349: sv.sival_int = frame.tf_eip;
350: KERNEL_PROC_LOCK(p);
351: trapsignal(p, SIGBUS, vftype, BUS_OBJERR, sv);
352: KERNEL_PROC_UNLOCK(p);
353: goto out;
354:
355: case T_SEGNPFLT|T_USER:
356: case T_STKFLT|T_USER:
357: sv.sival_int = frame.tf_eip;
358: KERNEL_PROC_LOCK(p);
359: trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
360: KERNEL_PROC_UNLOCK(p);
361: goto out;
362:
363: case T_ALIGNFLT|T_USER:
364: sv.sival_int = frame.tf_eip;
365: KERNEL_PROC_LOCK(p);
366: trapsignal(p, SIGBUS, vftype, BUS_ADRALN, sv);
367: KERNEL_PROC_UNLOCK(p);
368: goto out;
369:
370: case T_PRIVINFLT|T_USER: /* privileged instruction fault */
371: sv.sival_int = frame.tf_eip;
372: KERNEL_PROC_LOCK(p);
373: trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv);
374: KERNEL_PROC_UNLOCK(p);
375: goto out;
376:
377: case T_FPOPFLT|T_USER: /* coprocessor operand fault */
378: sv.sival_int = frame.tf_eip;
379: KERNEL_PROC_LOCK(p);
380: trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv);
381: KERNEL_PROC_UNLOCK(p);
382: goto out;
383:
384: case T_ASTFLT|T_USER: /* Allow process switch */
385: uvmexp.softs++;
386: if (p->p_flag & P_OWEUPC) {
387: KERNEL_PROC_LOCK(p);
388: ADDUPROF(p);
389: KERNEL_PROC_UNLOCK(p);
390: }
391: if (want_resched)
392: preempt(NULL);
393: goto out;
394:
395: case T_DNA|T_USER: {
396: printf("pid %d killed due to lack of floating point\n",
397: p->p_pid);
398: sv.sival_int = frame.tf_eip;
399: KERNEL_PROC_LOCK(p);
400: trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv);
401: KERNEL_PROC_UNLOCK(p);
402: goto out;
403: }
404:
405: case T_BOUND|T_USER:
406: sv.sival_int = frame.tf_eip;
407: KERNEL_PROC_LOCK(p);
408: trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
409: KERNEL_PROC_UNLOCK(p);
410: goto out;
411: case T_OFLOW|T_USER:
412: sv.sival_int = frame.tf_eip;
413: KERNEL_PROC_LOCK(p);
414: trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
415: KERNEL_PROC_UNLOCK(p);
416: goto out;
417: case T_DIVIDE|T_USER:
418: sv.sival_int = frame.tf_eip;
419: KERNEL_PROC_LOCK(p);
420: trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv);
421: KERNEL_PROC_UNLOCK(p);
422: goto out;
423:
424: case T_ARITHTRAP|T_USER:
425: sv.sival_int = frame.tf_eip;
426: KERNEL_PROC_LOCK(p);
427: trapsignal(p, SIGFPE, frame.tf_err, FPE_INTOVF, sv);
428: KERNEL_PROC_UNLOCK(p);
429: goto out;
430:
431: case T_PAGEFLT: /* allow page faults in kernel mode */
432: if (p == 0 || p->p_addr == 0)
433: goto we_re_toast;
434: #ifdef LOCKDEBUG
435: /* If we page-fault while in scheduler, we're doomed. */
436: #ifdef notyet
437: if (simple_lock_held(&sched_lock))
438: #else
439: if (__mp_lock_held(&sched_lock))
440: #endif
441: goto we_re_toast;
442: #endif
443:
444: pcb = &p->p_addr->u_pcb;
445: #if 0
446: /* XXX - check only applies to 386's and 486's with WP off */
447: if (frame.tf_err & PGEX_P)
448: goto we_re_toast;
449: #endif
450: cr2 = rcr2();
451: KERNEL_LOCK();
452: goto faultcommon;
453:
454: case T_PAGEFLT|T_USER: { /* page fault */
455: vaddr_t va, fa;
456: struct vmspace *vm;
457: struct vm_map *map;
458: int rv;
459:
460: cr2 = rcr2();
461: KERNEL_PROC_LOCK(p);
462: faultcommon:
463: vm = p->p_vmspace;
464: if (vm == NULL)
465: goto we_re_toast;
466: fa = (vaddr_t)cr2;
467: va = trunc_page(fa);
468: /*
469: * It is only a kernel address space fault iff:
470: * 1. (type & T_USER) == 0 and
471: * 2. pcb_onfault not set or
472: * 3. pcb_onfault set but supervisor space fault
473: * The last can occur during an exec() copyin where the
474: * argument space is lazy-allocated.
475: */
476: if (type == T_PAGEFLT && va >= KERNBASE)
477: map = kernel_map;
478: else
479: map = &vm->vm_map;
480:
481: #ifdef DIAGNOSTIC
482: if (map == kernel_map && va == 0) {
483: printf("trap: bad kernel access at %lx\n", va);
484: goto we_re_toast;
485: }
486: #endif
487:
488: onfault = p->p_addr->u_pcb.pcb_onfault;
489: p->p_addr->u_pcb.pcb_onfault = NULL;
490: rv = uvm_fault(map, va, 0, ftype);
491: p->p_addr->u_pcb.pcb_onfault = onfault;
492:
493: if (rv == 0) {
494: if (map != kernel_map)
495: uvm_grow(p, va);
496: if (type == T_PAGEFLT) {
497: KERNEL_UNLOCK();
498: return;
499: }
500: KERNEL_PROC_UNLOCK(p);
501: goto out;
502: }
503:
504: if (type == T_PAGEFLT) {
505: if (pcb->pcb_onfault != 0) {
506: KERNEL_UNLOCK();
507: goto copyfault;
508: }
509: printf("uvm_fault(%p, 0x%lx, 0, %d) -> %x\n",
510: map, va, ftype, rv);
511: goto we_re_toast;
512: }
513: sv.sival_int = fa;
514: trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
515: KERNEL_PROC_UNLOCK(p);
516: break;
517: }
518:
519: #if 0 /* Should this be left out? */
520: #if !defined(DDB) && !defined(KGDB)
521: /* XXX need to deal with this when DDB is present, too */
522: case T_TRCTRAP: /* kernel trace trap; someone single stepping lcall's */
523: /* syscall has to turn off the trace bit itself */
524: return;
525: #endif
526: #endif
527:
528: case T_BPTFLT|T_USER: /* bpt instruction fault */
529: sv.sival_int = rcr2();
530: KERNEL_PROC_LOCK(p);
531: trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv);
532: KERNEL_PROC_UNLOCK(p);
533: break;
534: case T_TRCTRAP|T_USER: /* trace trap */
535: sv.sival_int = rcr2();
536: KERNEL_PROC_LOCK(p);
537: trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv);
538: KERNEL_PROC_UNLOCK(p);
539: break;
540:
541: #if NISA > 0
542: case T_NMI:
543: case T_NMI|T_USER:
544: #if defined(DDB) || defined(KGDB)
545: /* NMI can be hooked up to a pushbutton for debugging */
546: printf ("NMI ... going to debugger\n");
547: #ifdef KGDB
548: if (kgdb_trap(type, &frame))
549: return;
550: #endif
551: #ifdef DDB
552: if (kdb_trap(type, 0, &frame))
553: return;
554: #endif
555: return;
556: #endif /* DDB || KGDB */
557: /* machine/parity/power fail/"kitchen sink" faults */
558: if (isa_nmi() == 0)
559: return;
560: else
561: goto we_re_toast;
562: #endif
563: }
564:
565: if ((type & T_USER) == 0)
566: return;
567: out:
568: userret(p);
569: }
570:
571: /*
572: * syscall(frame):
573: * System call request from POSIX system call gate interface to kernel.
574: * Like trap(), argument is call by reference.
575: */
576: /*ARGSUSED*/
577: void
578: syscall(struct trapframe frame)
579: {
580: caddr_t params;
581: struct sysent *callp;
582: struct proc *p;
583: int orig_error, error, opc, nsys;
584: size_t argsize;
585: register_t code, args[8], rval[2];
586: #ifdef DIAGNOSTIC
587: int ocpl = lapic_tpr;
588: #endif
589:
590: uvmexp.syscalls++;
591: #ifdef DIAGNOSTIC
592: if (!USERMODE(frame.tf_cs, frame.tf_eflags))
593: panic("syscall");
594: #endif
595: p = curproc;
596: p->p_md.md_regs = &frame;
597: opc = frame.tf_eip;
598: code = frame.tf_eax;
599:
600: nsys = p->p_emul->e_nsysent;
601: callp = p->p_emul->e_sysent;
602:
603: #ifdef COMPAT_IBCS2
604: if (p->p_emul == &emul_ibcs2)
605: if (IBCS2_HIGH_SYSCALL(code))
606: code = IBCS2_CVT_HIGH_SYSCALL(code);
607: #endif
608: params = (caddr_t)frame.tf_esp + sizeof(int);
609:
610: #ifdef VM86
611: /*
612: * VM86 mode application found our syscall trap gate by accident; let
613: * it get a SIGSYS and have the VM86 handler in the process take care
614: * of it.
615: */
616: if (frame.tf_eflags & PSL_VM)
617: code = -1;
618: else
619: #endif
620:
621: switch (code) {
622: case SYS_syscall:
623: #ifdef COMPAT_LINUX
624: /* Linux has a special system setup call as number 0 */
625: if (p->p_emul == &emul_linux_aout ||
626: p->p_emul == &emul_linux_elf)
627: break;
628: #endif
629: /*
630: * Code is first argument, followed by actual args.
631: */
632: copyin(params, &code, sizeof(int));
633: params += sizeof(int);
634: break;
635: case SYS___syscall:
636: /*
637: * Like syscall, but code is a quad, so as to maintain
638: * quad alignment for the rest of the arguments.
639: */
640: if (callp != sysent
641: #ifdef COMPAT_FREEBSD
642: && p->p_emul != &emul_freebsd_aout
643: && p->p_emul != &emul_freebsd_elf
644: #endif
645: #ifdef COMPAT_AOUT
646: && p->p_emul != &emul_aout
647: #endif
648: #ifdef COMPAT_BSDOS
649: && p->p_emul != &emul_bsdos
650: #endif
651: )
652: break;
653: copyin(params + _QUAD_LOWWORD * sizeof(int), &code, sizeof(int));
654: params += sizeof(quad_t);
655: break;
656: default:
657: break;
658: }
659: if (code < 0 || code >= nsys)
660: callp += p->p_emul->e_nosys; /* illegal */
661: else
662: callp += code;
663: argsize = callp->sy_argsize;
664: #ifdef COMPAT_LINUX
665: /* XXX extra if() for every emul type.. */
666: if (p->p_emul == &emul_linux_aout || p->p_emul == &emul_linux_elf) {
667: /*
668: * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in
669: * increasing order.
670: */
671: switch (argsize) {
672: case 24:
673: args[5] = frame.tf_ebp;
674: case 20:
675: args[4] = frame.tf_edi;
676: case 16:
677: args[3] = frame.tf_esi;
678: case 12:
679: args[2] = frame.tf_edx;
680: case 8:
681: args[1] = frame.tf_ecx;
682: case 4:
683: args[0] = frame.tf_ebx;
684: case 0:
685: break;
686: default:
687: panic("linux syscall with weird argument size %d",
688: argsize);
689: break;
690: }
691: error = 0;
692: }
693: else
694: #endif
695: if (argsize)
696: error = copyin(params, (caddr_t)args, argsize);
697: else
698: error = 0;
699: orig_error = error;
700: KERNEL_PROC_LOCK(p);
701: #ifdef SYSCALL_DEBUG
702: scdebug_call(p, code, args);
703: #endif
704: #ifdef KTRACE
705: if (KTRPOINT(p, KTR_SYSCALL))
706: ktrsyscall(p, code, argsize, args);
707: #endif
708: if (error) {
709: KERNEL_PROC_UNLOCK(p);
710: goto bad;
711: }
712: rval[0] = 0;
713: rval[1] = frame.tf_edx;
714: #if NSYSTRACE > 0
715: if (ISSET(p->p_flag, P_SYSTRACE))
716: orig_error = error = systrace_redirect(code, p, args, rval);
717: else
718: #endif
719: orig_error = error = (*callp->sy_call)(p, args, rval);
720: KERNEL_PROC_UNLOCK(p);
721: switch (error) {
722: case 0:
723: frame.tf_eax = rval[0];
724: frame.tf_edx = rval[1];
725: frame.tf_eflags &= ~PSL_C; /* carry bit */
726: break;
727: case ERESTART:
728: /*
729: * The offset to adjust the PC by depends on whether we entered
730: * the kernel through the trap or call gate. We pushed the
731: * size of the instruction into tf_err on entry.
732: */
733: frame.tf_eip = opc - frame.tf_err;
734: break;
735: case EJUSTRETURN:
736: /* nothing to do */
737: break;
738: default:
739: bad:
740: if (p->p_emul->e_errno)
741: error = p->p_emul->e_errno[error];
742: frame.tf_eax = error;
743: frame.tf_eflags |= PSL_C; /* carry bit */
744: break;
745: }
746:
747: #ifdef SYSCALL_DEBUG
748: KERNEL_PROC_LOCK(p);
749: scdebug_ret(p, code, orig_error, rval);
750: KERNEL_PROC_UNLOCK(p);
751: #endif
752: userret(p);
753: #ifdef KTRACE
754: if (KTRPOINT(p, KTR_SYSRET)) {
755: KERNEL_PROC_LOCK(p);
756: ktrsysret(p, code, orig_error, rval[0]);
757: KERNEL_PROC_UNLOCK(p);
758: }
759: #endif
760: #ifdef DIAGNOSTIC
761: if (lapic_tpr != ocpl) {
762: printf("WARNING: SPL (0x%x) NOT LOWERED ON "
763: "syscall(0x%x, 0x%x, 0x%x, 0x%x...) EXIT, PID %d\n",
764: lapic_tpr, code, args[0], args[1], args[2], p->p_pid);
765: lapic_tpr = ocpl;
766: }
767: #endif
768: }
769:
770: void
771: child_return(void *arg)
772: {
773: struct proc *p = (struct proc *)arg;
774: struct trapframe *tf = p->p_md.md_regs;
775:
776: tf->tf_eax = 0;
777: tf->tf_eflags &= ~PSL_C;
778:
779: KERNEL_PROC_UNLOCK(p);
780:
781: userret(p);
782: #ifdef KTRACE
783: if (KTRPOINT(p, KTR_SYSRET)) {
784: KERNEL_PROC_LOCK(p);
785: ktrsysret(p,
786: (p->p_flag & P_PPWAIT) ? SYS_vfork : SYS_fork, 0, 0);
787: KERNEL_PROC_UNLOCK(p);
788: }
789: #endif
790: }
CVSweb