Annotation of sys/arch/i386/i386/trap.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: trap.c,v 1.85 2007/06/26 13:39:02 tom Exp $ */
! 2: /* $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $ */
! 3:
! 4: /*-
! 5: * Copyright (c) 1995 Charles M. Hannum. All rights reserved.
! 6: * Copyright (c) 1990 The Regents of the University of California.
! 7: * All rights reserved.
! 8: *
! 9: * This code is derived from software contributed to Berkeley by
! 10: * the University of Utah, and William Jolitz.
! 11: *
! 12: * Redistribution and use in source and binary forms, with or without
! 13: * modification, are permitted provided that the following conditions
! 14: * are met:
! 15: * 1. Redistributions of source code must retain the above copyright
! 16: * notice, this list of conditions and the following disclaimer.
! 17: * 2. Redistributions in binary form must reproduce the above copyright
! 18: * notice, this list of conditions and the following disclaimer in the
! 19: * documentation and/or other materials provided with the distribution.
! 20: * 3. Neither the name of the University nor the names of its contributors
! 21: * may be used to endorse or promote products derived from this software
! 22: * without specific prior written permission.
! 23: *
! 24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 34: * SUCH DAMAGE.
! 35: *
! 36: * @(#)trap.c 7.4 (Berkeley) 5/13/91
! 37: */
! 38:
! 39: /*
! 40: * 386 Trap and System call handling
! 41: */
! 42:
! 43: #include <sys/param.h>
! 44: #include <sys/systm.h>
! 45: #include <sys/proc.h>
! 46: #include <sys/signalvar.h>
! 47: #include <sys/user.h>
! 48: #include <sys/acct.h>
! 49: #include <sys/kernel.h>
! 50: #include <sys/signal.h>
! 51: #ifdef KTRACE
! 52: #include <sys/ktrace.h>
! 53: #endif
! 54: #include <sys/syscall.h>
! 55:
! 56: #include "systrace.h"
! 57: #include <dev/systrace.h>
! 58:
! 59: #include <uvm/uvm_extern.h>
! 60:
! 61: #include <machine/cpu.h>
! 62: #include <machine/cpufunc.h>
! 63: #include <machine/psl.h>
! 64: #include <machine/reg.h>
! 65: #include <machine/trap.h>
! 66: #ifdef DDB
! 67: #include <machine/db_machdep.h>
! 68: #endif
! 69:
! 70: #ifdef KGDB
! 71: #include <sys/kgdb.h>
! 72: #endif
! 73:
! 74: #ifdef COMPAT_IBCS2
! 75: #include <compat/ibcs2/ibcs2_errno.h>
! 76: #include <compat/ibcs2/ibcs2_exec.h>
! 77: extern struct emul emul_ibcs2;
! 78: #endif
! 79: #include <sys/exec.h>
! 80: #ifdef COMPAT_LINUX
! 81: #include <compat/linux/linux_syscall.h>
! 82: extern struct emul emul_linux_aout, emul_linux_elf;
! 83: #endif
! 84: #ifdef COMPAT_FREEBSD
! 85: extern struct emul emul_freebsd_aout, emul_freebsd_elf;
! 86: #endif
! 87: #ifdef COMPAT_BSDOS
! 88: extern struct emul emul_bsdos;
! 89: #endif
! 90: #ifdef COMPAT_AOUT
! 91: extern struct emul emul_aout;
! 92: #endif
! 93: #ifdef KVM86
! 94: #include <machine/kvm86.h>
! 95: #define KVM86MODE (kvm86_incall)
! 96: #endif
! 97:
! 98: #include "npx.h"
! 99:
! 100: static __inline void userret(struct proc *);
! 101: void trap(struct trapframe);
! 102: void syscall(struct trapframe);
! 103:
! 104: /*
! 105: * Define the code needed before returning to user mode, for
! 106: * trap and syscall.
! 107: */
! 108: static __inline void
! 109: userret(struct proc *p)
! 110: {
! 111: int sig;
! 112:
! 113: /* take pending signals */
! 114: while ((sig = CURSIG(p)) != 0)
! 115: postsig(sig);
! 116:
! 117: p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri;
! 118: }
! 119:
! 120: char *trap_type[] = {
! 121: "privileged instruction fault", /* 0 T_PRIVINFLT */
! 122: "breakpoint trap", /* 1 T_BPTFLT */
! 123: "arithmetic trap", /* 2 T_ARITHTRAP */
! 124: "asynchronous system trap", /* 3 T_ASTFLT */
! 125: "protection fault", /* 4 T_PROTFLT */
! 126: "trace trap", /* 5 T_TRCTRAP */
! 127: "page fault", /* 6 T_PAGEFLT */
! 128: "alignment fault", /* 7 T_ALIGNFLT */
! 129: "integer divide fault", /* 8 T_DIVIDE */
! 130: "non-maskable interrupt", /* 9 T_NMI */
! 131: "overflow trap", /* 10 T_OFLOW */
! 132: "bounds check fault", /* 11 T_BOUND */
! 133: "FPU not available fault", /* 12 T_DNA */
! 134: "double fault", /* 13 T_DOUBLEFLT */
! 135: "FPU operand fetch fault", /* 14 T_FPOPFLT (![P]Pro) */
! 136: "invalid TSS fault", /* 15 T_TSSFLT */
! 137: "segment not present fault", /* 16 T_SEGNPFLT */
! 138: "stack fault", /* 17 T_STKFLT */
! 139: "machine check", /* 18 T_MACHK ([P]Pro) */
! 140: "SIMD FP fault", /* 19 T_XFTRAP */
! 141: "reserved trap", /* 20 T_RESERVED */
! 142: };
! 143: int trap_types = sizeof trap_type / sizeof trap_type[0];
! 144:
! 145: #ifdef DEBUG
! 146: int trapdebug = 0;
! 147: #endif
! 148:
! 149: /*
! 150: * trap(frame):
! 151: * Exception, fault, and trap interface to BSD kernel. This
! 152: * common code is called from assembly language IDT gate entry
! 153: * routines that prepare a suitable stack frame, and restore this
! 154: * frame after the exception has been processed. Note that the
! 155: * effect is as if the arguments were passed call by reference.
! 156: */
! 157: /*ARGSUSED*/
! 158: void
! 159: trap(struct trapframe frame)
! 160: {
! 161: struct proc *p = curproc;
! 162: int type = frame.tf_trapno;
! 163: struct pcb *pcb = NULL;
! 164: extern char resume_iret[], resume_pop_ds[], resume_pop_es[],
! 165: resume_pop_fs[], resume_pop_gs[];
! 166: struct trapframe *vframe;
! 167: int resume;
! 168: vm_prot_t vftype, ftype;
! 169: union sigval sv;
! 170: caddr_t onfault;
! 171: uint32_t cr2;
! 172:
! 173: uvmexp.traps++;
! 174:
! 175: /* SIGSEGV and SIGBUS need this */
! 176: if (frame.tf_err & PGEX_W) {
! 177: vftype = VM_PROT_WRITE;
! 178: ftype = VM_PROT_READ | VM_PROT_WRITE;
! 179: } else
! 180: ftype = vftype = VM_PROT_READ;
! 181:
! 182: #ifdef DEBUG
! 183: if (trapdebug) {
! 184: printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
! 185: frame.tf_trapno, frame.tf_err, frame.tf_eip, frame.tf_cs,
! 186: frame.tf_eflags, rcr2(), lapic_tpr);
! 187: printf("curproc %p\n", curproc);
! 188: }
! 189: #endif
! 190:
! 191: if (!KERNELMODE(frame.tf_cs, frame.tf_eflags)) {
! 192: type |= T_USER;
! 193: p->p_md.md_regs = &frame;
! 194: }
! 195:
! 196: switch (type) {
! 197:
! 198: /* trace trap */
! 199: case T_TRCTRAP: {
! 200: #if defined(DDB) || defined(KGDB)
! 201: /* Make sure nobody is single stepping into kernel land.
! 202: * The syscall has to turn off the trace bit itself. The
! 203: * easiest way, is to simply not call the debugger, until
! 204: * we are through the problematic "osyscall" stub. This
! 205: * is a hack, but it does seem to work.
! 206: */
! 207: extern int Xosyscall, Xosyscall_end;
! 208:
! 209: if (frame.tf_eip >= (int)&Xosyscall &&
! 210: frame.tf_eip <= (int)&Xosyscall_end)
! 211: return;
! 212: #else
! 213: return; /* Just return if no DDB */
! 214: #endif
! 215: }
! 216: /* FALLTHROUGH */
! 217:
! 218: default:
! 219: we_re_toast:
! 220: #ifdef KGDB
! 221: if (kgdb_trap(type, &frame))
! 222: return;
! 223: else {
! 224: /*
! 225: * If this is a breakpoint, don't panic
! 226: * if we're not connected.
! 227: */
! 228: if (type == T_BPTFLT) {
! 229: printf("kgdb: ignored %s\n", trap_type[type]);
! 230: return;
! 231: }
! 232: }
! 233: #endif
! 234:
! 235: #ifdef DDB
! 236: if (kdb_trap(type, 0, &frame))
! 237: return;
! 238: #endif
! 239: if (frame.tf_trapno < trap_types)
! 240: printf("fatal %s (%d)", trap_type[frame.tf_trapno],
! 241: frame.tf_trapno);
! 242: else
! 243: printf("unknown trap %d", frame.tf_trapno);
! 244: printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
! 245: printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
! 246: type, frame.tf_err, frame.tf_eip, frame.tf_cs, frame.tf_eflags, rcr2(), lapic_tpr);
! 247:
! 248: panic("trap type %d, code=%x, pc=%x",
! 249: type, frame.tf_err, frame.tf_eip);
! 250: /*NOTREACHED*/
! 251:
! 252: case T_PROTFLT:
! 253: #ifdef KVM86
! 254: if (KVM86MODE) {
! 255: kvm86_gpfault(&frame);
! 256: return;
! 257: }
! 258: #endif
! 259: case T_SEGNPFLT:
! 260: case T_ALIGNFLT:
! 261: /* Check for copyin/copyout fault. */
! 262: if (p && p->p_addr) {
! 263: pcb = &p->p_addr->u_pcb;
! 264: if (pcb->pcb_onfault != 0) {
! 265: copyfault:
! 266: frame.tf_eip = (int)pcb->pcb_onfault;
! 267: return;
! 268: }
! 269: }
! 270:
! 271: /*
! 272: * Check for failure during return to user mode.
! 273: *
! 274: * We do this by looking at the instruction we faulted on. The
! 275: * specific instructions we recognize only happen when
! 276: * returning from a trap, syscall, or interrupt.
! 277: *
! 278: * XXX
! 279: * The heuristic used here will currently fail for the case of
! 280: * one of the 2 pop instructions faulting when returning from a
! 281: * a fast interrupt. This should not be possible. It can be
! 282: * fixed by rearranging the trap frame so that the stack format
! 283: * at this point is the same as on exit from a `slow'
! 284: * interrupt.
! 285: */
! 286: switch (*(u_char *)frame.tf_eip) {
! 287: case 0xcf: /* iret */
! 288: vframe = (void *)((int)&frame.tf_esp -
! 289: offsetof(struct trapframe, tf_eip));
! 290: resume = (int)resume_iret;
! 291: break;
! 292: case 0x1f: /* popl %ds */
! 293: vframe = (void *)((int)&frame.tf_esp -
! 294: offsetof(struct trapframe, tf_ds));
! 295: resume = (int)resume_pop_ds;
! 296: break;
! 297: case 0x07: /* popl %es */
! 298: vframe = (void *)((int)&frame.tf_esp -
! 299: offsetof(struct trapframe, tf_es));
! 300: resume = (int)resume_pop_es;
! 301: break;
! 302: case 0x0f: /* 0x0f prefix */
! 303: switch (*(u_char *)(frame.tf_eip+1)) {
! 304: case 0xa1: /* popl %fs */
! 305: vframe = (void *)((int)&frame.tf_esp -
! 306: offsetof(struct trapframe, tf_fs));
! 307: resume = (int)resume_pop_fs;
! 308: break;
! 309: case 0xa9: /* popl %gs */
! 310: vframe = (void *)((int)&frame.tf_esp -
! 311: offsetof(struct trapframe, tf_gs));
! 312: resume = (int)resume_pop_gs;
! 313: break;
! 314: default:
! 315: goto we_re_toast;
! 316: }
! 317: break;
! 318: default:
! 319: goto we_re_toast;
! 320: }
! 321: if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
! 322: goto we_re_toast;
! 323:
! 324: frame.tf_eip = resume;
! 325: return;
! 326:
! 327: case T_PROTFLT|T_USER: /* protection fault */
! 328: KERNEL_PROC_LOCK(p);
! 329: #ifdef VM86
! 330: if (frame.tf_eflags & PSL_VM) {
! 331: vm86_gpfault(p, type & ~T_USER);
! 332: KERNEL_PROC_UNLOCK(p);
! 333: goto out;
! 334: }
! 335: #endif
! 336: /* If pmap_exec_fixup does something, let's retry the trap. */
! 337: if (pmap_exec_fixup(&p->p_vmspace->vm_map, &frame,
! 338: &p->p_addr->u_pcb)) {
! 339: KERNEL_PROC_UNLOCK(p);
! 340: goto out;
! 341: }
! 342:
! 343: sv.sival_int = frame.tf_eip;
! 344: trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
! 345: KERNEL_PROC_UNLOCK(p);
! 346: goto out;
! 347:
! 348: case T_TSSFLT|T_USER:
! 349: sv.sival_int = frame.tf_eip;
! 350: KERNEL_PROC_LOCK(p);
! 351: trapsignal(p, SIGBUS, vftype, BUS_OBJERR, sv);
! 352: KERNEL_PROC_UNLOCK(p);
! 353: goto out;
! 354:
! 355: case T_SEGNPFLT|T_USER:
! 356: case T_STKFLT|T_USER:
! 357: sv.sival_int = frame.tf_eip;
! 358: KERNEL_PROC_LOCK(p);
! 359: trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
! 360: KERNEL_PROC_UNLOCK(p);
! 361: goto out;
! 362:
! 363: case T_ALIGNFLT|T_USER:
! 364: sv.sival_int = frame.tf_eip;
! 365: KERNEL_PROC_LOCK(p);
! 366: trapsignal(p, SIGBUS, vftype, BUS_ADRALN, sv);
! 367: KERNEL_PROC_UNLOCK(p);
! 368: goto out;
! 369:
! 370: case T_PRIVINFLT|T_USER: /* privileged instruction fault */
! 371: sv.sival_int = frame.tf_eip;
! 372: KERNEL_PROC_LOCK(p);
! 373: trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv);
! 374: KERNEL_PROC_UNLOCK(p);
! 375: goto out;
! 376:
! 377: case T_FPOPFLT|T_USER: /* coprocessor operand fault */
! 378: sv.sival_int = frame.tf_eip;
! 379: KERNEL_PROC_LOCK(p);
! 380: trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv);
! 381: KERNEL_PROC_UNLOCK(p);
! 382: goto out;
! 383:
! 384: case T_ASTFLT|T_USER: /* Allow process switch */
! 385: uvmexp.softs++;
! 386: if (p->p_flag & P_OWEUPC) {
! 387: KERNEL_PROC_LOCK(p);
! 388: ADDUPROF(p);
! 389: KERNEL_PROC_UNLOCK(p);
! 390: }
! 391: if (want_resched)
! 392: preempt(NULL);
! 393: goto out;
! 394:
! 395: case T_DNA|T_USER: {
! 396: printf("pid %d killed due to lack of floating point\n",
! 397: p->p_pid);
! 398: sv.sival_int = frame.tf_eip;
! 399: KERNEL_PROC_LOCK(p);
! 400: trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv);
! 401: KERNEL_PROC_UNLOCK(p);
! 402: goto out;
! 403: }
! 404:
! 405: case T_BOUND|T_USER:
! 406: sv.sival_int = frame.tf_eip;
! 407: KERNEL_PROC_LOCK(p);
! 408: trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
! 409: KERNEL_PROC_UNLOCK(p);
! 410: goto out;
! 411: case T_OFLOW|T_USER:
! 412: sv.sival_int = frame.tf_eip;
! 413: KERNEL_PROC_LOCK(p);
! 414: trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
! 415: KERNEL_PROC_UNLOCK(p);
! 416: goto out;
! 417: case T_DIVIDE|T_USER:
! 418: sv.sival_int = frame.tf_eip;
! 419: KERNEL_PROC_LOCK(p);
! 420: trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv);
! 421: KERNEL_PROC_UNLOCK(p);
! 422: goto out;
! 423:
! 424: case T_ARITHTRAP|T_USER:
! 425: sv.sival_int = frame.tf_eip;
! 426: KERNEL_PROC_LOCK(p);
! 427: trapsignal(p, SIGFPE, frame.tf_err, FPE_INTOVF, sv);
! 428: KERNEL_PROC_UNLOCK(p);
! 429: goto out;
! 430:
! 431: case T_PAGEFLT: /* allow page faults in kernel mode */
! 432: if (p == 0 || p->p_addr == 0)
! 433: goto we_re_toast;
! 434: #ifdef LOCKDEBUG
! 435: /* If we page-fault while in scheduler, we're doomed. */
! 436: #ifdef notyet
! 437: if (simple_lock_held(&sched_lock))
! 438: #else
! 439: if (__mp_lock_held(&sched_lock))
! 440: #endif
! 441: goto we_re_toast;
! 442: #endif
! 443:
! 444: pcb = &p->p_addr->u_pcb;
! 445: #if 0
! 446: /* XXX - check only applies to 386's and 486's with WP off */
! 447: if (frame.tf_err & PGEX_P)
! 448: goto we_re_toast;
! 449: #endif
! 450: cr2 = rcr2();
! 451: KERNEL_LOCK();
! 452: goto faultcommon;
! 453:
! 454: case T_PAGEFLT|T_USER: { /* page fault */
! 455: vaddr_t va, fa;
! 456: struct vmspace *vm;
! 457: struct vm_map *map;
! 458: int rv;
! 459:
! 460: cr2 = rcr2();
! 461: KERNEL_PROC_LOCK(p);
! 462: faultcommon:
! 463: vm = p->p_vmspace;
! 464: if (vm == NULL)
! 465: goto we_re_toast;
! 466: fa = (vaddr_t)cr2;
! 467: va = trunc_page(fa);
! 468: /*
! 469: * It is only a kernel address space fault iff:
! 470: * 1. (type & T_USER) == 0 and
! 471: * 2. pcb_onfault not set or
! 472: * 3. pcb_onfault set but supervisor space fault
! 473: * The last can occur during an exec() copyin where the
! 474: * argument space is lazy-allocated.
! 475: */
! 476: if (type == T_PAGEFLT && va >= KERNBASE)
! 477: map = kernel_map;
! 478: else
! 479: map = &vm->vm_map;
! 480:
! 481: #ifdef DIAGNOSTIC
! 482: if (map == kernel_map && va == 0) {
! 483: printf("trap: bad kernel access at %lx\n", va);
! 484: goto we_re_toast;
! 485: }
! 486: #endif
! 487:
! 488: onfault = p->p_addr->u_pcb.pcb_onfault;
! 489: p->p_addr->u_pcb.pcb_onfault = NULL;
! 490: rv = uvm_fault(map, va, 0, ftype);
! 491: p->p_addr->u_pcb.pcb_onfault = onfault;
! 492:
! 493: if (rv == 0) {
! 494: if (map != kernel_map)
! 495: uvm_grow(p, va);
! 496: if (type == T_PAGEFLT) {
! 497: KERNEL_UNLOCK();
! 498: return;
! 499: }
! 500: KERNEL_PROC_UNLOCK(p);
! 501: goto out;
! 502: }
! 503:
! 504: if (type == T_PAGEFLT) {
! 505: if (pcb->pcb_onfault != 0) {
! 506: KERNEL_UNLOCK();
! 507: goto copyfault;
! 508: }
! 509: printf("uvm_fault(%p, 0x%lx, 0, %d) -> %x\n",
! 510: map, va, ftype, rv);
! 511: goto we_re_toast;
! 512: }
! 513: sv.sival_int = fa;
! 514: trapsignal(p, SIGSEGV, vftype, SEGV_MAPERR, sv);
! 515: KERNEL_PROC_UNLOCK(p);
! 516: break;
! 517: }
! 518:
! 519: #if 0 /* Should this be left out? */
! 520: #if !defined(DDB) && !defined(KGDB)
! 521: /* XXX need to deal with this when DDB is present, too */
! 522: case T_TRCTRAP: /* kernel trace trap; someone single stepping lcall's */
! 523: /* syscall has to turn off the trace bit itself */
! 524: return;
! 525: #endif
! 526: #endif
! 527:
! 528: case T_BPTFLT|T_USER: /* bpt instruction fault */
! 529: sv.sival_int = rcr2();
! 530: KERNEL_PROC_LOCK(p);
! 531: trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv);
! 532: KERNEL_PROC_UNLOCK(p);
! 533: break;
! 534: case T_TRCTRAP|T_USER: /* trace trap */
! 535: sv.sival_int = rcr2();
! 536: KERNEL_PROC_LOCK(p);
! 537: trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv);
! 538: KERNEL_PROC_UNLOCK(p);
! 539: break;
! 540:
! 541: #if NISA > 0
! 542: case T_NMI:
! 543: case T_NMI|T_USER:
! 544: #if defined(DDB) || defined(KGDB)
! 545: /* NMI can be hooked up to a pushbutton for debugging */
! 546: printf ("NMI ... going to debugger\n");
! 547: #ifdef KGDB
! 548: if (kgdb_trap(type, &frame))
! 549: return;
! 550: #endif
! 551: #ifdef DDB
! 552: if (kdb_trap(type, 0, &frame))
! 553: return;
! 554: #endif
! 555: return;
! 556: #endif /* DDB || KGDB */
! 557: /* machine/parity/power fail/"kitchen sink" faults */
! 558: if (isa_nmi() == 0)
! 559: return;
! 560: else
! 561: goto we_re_toast;
! 562: #endif
! 563: }
! 564:
! 565: if ((type & T_USER) == 0)
! 566: return;
! 567: out:
! 568: userret(p);
! 569: }
! 570:
! 571: /*
! 572: * syscall(frame):
! 573: * System call request from POSIX system call gate interface to kernel.
! 574: * Like trap(), argument is call by reference.
! 575: */
! 576: /*ARGSUSED*/
! 577: void
! 578: syscall(struct trapframe frame)
! 579: {
! 580: caddr_t params;
! 581: struct sysent *callp;
! 582: struct proc *p;
! 583: int orig_error, error, opc, nsys;
! 584: size_t argsize;
! 585: register_t code, args[8], rval[2];
! 586: #ifdef DIAGNOSTIC
! 587: int ocpl = lapic_tpr;
! 588: #endif
! 589:
! 590: uvmexp.syscalls++;
! 591: #ifdef DIAGNOSTIC
! 592: if (!USERMODE(frame.tf_cs, frame.tf_eflags))
! 593: panic("syscall");
! 594: #endif
! 595: p = curproc;
! 596: p->p_md.md_regs = &frame;
! 597: opc = frame.tf_eip;
! 598: code = frame.tf_eax;
! 599:
! 600: nsys = p->p_emul->e_nsysent;
! 601: callp = p->p_emul->e_sysent;
! 602:
! 603: #ifdef COMPAT_IBCS2
! 604: if (p->p_emul == &emul_ibcs2)
! 605: if (IBCS2_HIGH_SYSCALL(code))
! 606: code = IBCS2_CVT_HIGH_SYSCALL(code);
! 607: #endif
! 608: params = (caddr_t)frame.tf_esp + sizeof(int);
! 609:
! 610: #ifdef VM86
! 611: /*
! 612: * VM86 mode application found our syscall trap gate by accident; let
! 613: * it get a SIGSYS and have the VM86 handler in the process take care
! 614: * of it.
! 615: */
! 616: if (frame.tf_eflags & PSL_VM)
! 617: code = -1;
! 618: else
! 619: #endif
! 620:
! 621: switch (code) {
! 622: case SYS_syscall:
! 623: #ifdef COMPAT_LINUX
! 624: /* Linux has a special system setup call as number 0 */
! 625: if (p->p_emul == &emul_linux_aout ||
! 626: p->p_emul == &emul_linux_elf)
! 627: break;
! 628: #endif
! 629: /*
! 630: * Code is first argument, followed by actual args.
! 631: */
! 632: copyin(params, &code, sizeof(int));
! 633: params += sizeof(int);
! 634: break;
! 635: case SYS___syscall:
! 636: /*
! 637: * Like syscall, but code is a quad, so as to maintain
! 638: * quad alignment for the rest of the arguments.
! 639: */
! 640: if (callp != sysent
! 641: #ifdef COMPAT_FREEBSD
! 642: && p->p_emul != &emul_freebsd_aout
! 643: && p->p_emul != &emul_freebsd_elf
! 644: #endif
! 645: #ifdef COMPAT_AOUT
! 646: && p->p_emul != &emul_aout
! 647: #endif
! 648: #ifdef COMPAT_BSDOS
! 649: && p->p_emul != &emul_bsdos
! 650: #endif
! 651: )
! 652: break;
! 653: copyin(params + _QUAD_LOWWORD * sizeof(int), &code, sizeof(int));
! 654: params += sizeof(quad_t);
! 655: break;
! 656: default:
! 657: break;
! 658: }
! 659: if (code < 0 || code >= nsys)
! 660: callp += p->p_emul->e_nosys; /* illegal */
! 661: else
! 662: callp += code;
! 663: argsize = callp->sy_argsize;
! 664: #ifdef COMPAT_LINUX
! 665: /* XXX extra if() for every emul type.. */
! 666: if (p->p_emul == &emul_linux_aout || p->p_emul == &emul_linux_elf) {
! 667: /*
! 668: * Linux passes the args in ebx, ecx, edx, esi, edi, ebp, in
! 669: * increasing order.
! 670: */
! 671: switch (argsize) {
! 672: case 24:
! 673: args[5] = frame.tf_ebp;
! 674: case 20:
! 675: args[4] = frame.tf_edi;
! 676: case 16:
! 677: args[3] = frame.tf_esi;
! 678: case 12:
! 679: args[2] = frame.tf_edx;
! 680: case 8:
! 681: args[1] = frame.tf_ecx;
! 682: case 4:
! 683: args[0] = frame.tf_ebx;
! 684: case 0:
! 685: break;
! 686: default:
! 687: panic("linux syscall with weird argument size %d",
! 688: argsize);
! 689: break;
! 690: }
! 691: error = 0;
! 692: }
! 693: else
! 694: #endif
! 695: if (argsize)
! 696: error = copyin(params, (caddr_t)args, argsize);
! 697: else
! 698: error = 0;
! 699: orig_error = error;
! 700: KERNEL_PROC_LOCK(p);
! 701: #ifdef SYSCALL_DEBUG
! 702: scdebug_call(p, code, args);
! 703: #endif
! 704: #ifdef KTRACE
! 705: if (KTRPOINT(p, KTR_SYSCALL))
! 706: ktrsyscall(p, code, argsize, args);
! 707: #endif
! 708: if (error) {
! 709: KERNEL_PROC_UNLOCK(p);
! 710: goto bad;
! 711: }
! 712: rval[0] = 0;
! 713: rval[1] = frame.tf_edx;
! 714: #if NSYSTRACE > 0
! 715: if (ISSET(p->p_flag, P_SYSTRACE))
! 716: orig_error = error = systrace_redirect(code, p, args, rval);
! 717: else
! 718: #endif
! 719: orig_error = error = (*callp->sy_call)(p, args, rval);
! 720: KERNEL_PROC_UNLOCK(p);
! 721: switch (error) {
! 722: case 0:
! 723: frame.tf_eax = rval[0];
! 724: frame.tf_edx = rval[1];
! 725: frame.tf_eflags &= ~PSL_C; /* carry bit */
! 726: break;
! 727: case ERESTART:
! 728: /*
! 729: * The offset to adjust the PC by depends on whether we entered
! 730: * the kernel through the trap or call gate. We pushed the
! 731: * size of the instruction into tf_err on entry.
! 732: */
! 733: frame.tf_eip = opc - frame.tf_err;
! 734: break;
! 735: case EJUSTRETURN:
! 736: /* nothing to do */
! 737: break;
! 738: default:
! 739: bad:
! 740: if (p->p_emul->e_errno)
! 741: error = p->p_emul->e_errno[error];
! 742: frame.tf_eax = error;
! 743: frame.tf_eflags |= PSL_C; /* carry bit */
! 744: break;
! 745: }
! 746:
! 747: #ifdef SYSCALL_DEBUG
! 748: KERNEL_PROC_LOCK(p);
! 749: scdebug_ret(p, code, orig_error, rval);
! 750: KERNEL_PROC_UNLOCK(p);
! 751: #endif
! 752: userret(p);
! 753: #ifdef KTRACE
! 754: if (KTRPOINT(p, KTR_SYSRET)) {
! 755: KERNEL_PROC_LOCK(p);
! 756: ktrsysret(p, code, orig_error, rval[0]);
! 757: KERNEL_PROC_UNLOCK(p);
! 758: }
! 759: #endif
! 760: #ifdef DIAGNOSTIC
! 761: if (lapic_tpr != ocpl) {
! 762: printf("WARNING: SPL (0x%x) NOT LOWERED ON "
! 763: "syscall(0x%x, 0x%x, 0x%x, 0x%x...) EXIT, PID %d\n",
! 764: lapic_tpr, code, args[0], args[1], args[2], p->p_pid);
! 765: lapic_tpr = ocpl;
! 766: }
! 767: #endif
! 768: }
! 769:
! 770: void
! 771: child_return(void *arg)
! 772: {
! 773: struct proc *p = (struct proc *)arg;
! 774: struct trapframe *tf = p->p_md.md_regs;
! 775:
! 776: tf->tf_eax = 0;
! 777: tf->tf_eflags &= ~PSL_C;
! 778:
! 779: KERNEL_PROC_UNLOCK(p);
! 780:
! 781: userret(p);
! 782: #ifdef KTRACE
! 783: if (KTRPOINT(p, KTR_SYSRET)) {
! 784: KERNEL_PROC_LOCK(p);
! 785: ktrsysret(p,
! 786: (p->p_flag & P_PPWAIT) ? SYS_vfork : SYS_fork, 0, 0);
! 787: KERNEL_PROC_UNLOCK(p);
! 788: }
! 789: #endif
! 790: }
CVSweb