Annotation of sys/arch/i386/i386/linux_machdep.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: linux_machdep.c,v 1.34 2007/02/14 00:53:47 jsg Exp $ */
2: /* $NetBSD: linux_machdep.c,v 1.29 1996/05/03 19:42:11 christos Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Frank van der Linden
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed for the NetBSD Project
19: * by Frank van der Linden
20: * 4. The name of the author may not be used to endorse or promote products
21: * derived from this software without specific prior written permission
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33: */
34:
35: #include <sys/param.h>
36: #include <sys/systm.h>
37: #include <sys/signalvar.h>
38: #include <sys/kernel.h>
39: #include <sys/proc.h>
40: #include <sys/user.h>
41: #include <sys/buf.h>
42: #include <sys/reboot.h>
43: #include <sys/conf.h>
44: #include <sys/file.h>
45: #include <sys/malloc.h>
46: #include <sys/mbuf.h>
47: #include <sys/msgbuf.h>
48: #include <sys/mount.h>
49: #include <sys/vnode.h>
50: #include <sys/device.h>
51: #include <sys/sysctl.h>
52: #include <sys/syscallargs.h>
53: #include <sys/filedesc.h>
54:
55: #include <compat/linux/linux_types.h>
56: #include <compat/linux/linux_signal.h>
57: #include <compat/linux/linux_syscallargs.h>
58: #include <compat/linux/linux_util.h>
59: #include <compat/linux/linux_ioctl.h>
60:
61: #include <machine/cpu.h>
62: #include <machine/cpufunc.h>
63: #include <machine/psl.h>
64: #include <machine/reg.h>
65: #include <machine/segments.h>
66: #include <machine/specialreg.h>
67: #include <machine/sysarch.h>
68: #include <machine/vm86.h>
69: #include <machine/linux_machdep.h>
70:
71: /*
72: * To see whether wsdisplay is configured (for virtual console ioctl calls).
73: */
74: #include "wsdisplay.h"
75: #include <sys/ioctl.h>
76: #if NWSDISPLAY > 0 && defined(WSDISPLAY_COMPAT_USL)
77: #include <dev/wscons/wsconsio.h>
78: #include <dev/wscons/wsdisplay_usl_io.h>
79: #endif
80:
81: #ifdef USER_LDT
82: #include <machine/cpu.h>
83: int linux_read_ldt(struct proc *, struct linux_sys_modify_ldt_args *,
84: register_t *);
85: int linux_write_ldt(struct proc *, struct linux_sys_modify_ldt_args *,
86: register_t *);
87: #endif
88:
89: /*
90: * Deal with some i386-specific things in the Linux emulation code.
91: * This means just signals for now, will include stuff like
92: * I/O map permissions and V86 mode sometime.
93: */
94:
95: /*
96: * Send an interrupt to process.
97: *
98: * Stack is set up to allow sigcode stored
99: * in u. to call routine, followed by kcall
100: * to sigreturn routine below. After sigreturn
101: * resets the signal mask, the stack, and the
102: * frame pointer, it returns to the user
103: * specified pc, psl.
104: */
105:
106: void
107: linux_sendsig(sig_t catcher, int sig, int mask, u_long code, int type,
108: union sigval val)
109: {
110: struct proc *p = curproc;
111: struct trapframe *tf;
112: struct linux_sigframe *fp, frame;
113: struct sigacts *psp = p->p_sigacts;
114: int oonstack;
115:
116: tf = p->p_md.md_regs;
117: oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
118:
119: /*
120: * Allocate space for the signal handler context.
121: */
122: if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
123: (psp->ps_sigonstack & sigmask(sig))) {
124: fp = (struct linux_sigframe *)((char *)psp->ps_sigstk.ss_sp +
125: psp->ps_sigstk.ss_size - sizeof(struct linux_sigframe));
126: psp->ps_sigstk.ss_flags |= SS_ONSTACK;
127: } else {
128: fp = (struct linux_sigframe *)tf->tf_esp - 1;
129: }
130:
131: frame.sf_handler = catcher;
132: frame.sf_sig = bsd_to_linux_sig[sig];
133:
134: /*
135: * Build the signal context to be used by sigreturn.
136: */
137: frame.sf_sc.sc_mask = mask;
138: #ifdef VM86
139: if (tf->tf_eflags & PSL_VM) {
140: frame.sf_sc.sc_gs = tf->tf_vm86_gs;
141: frame.sf_sc.sc_fs = tf->tf_vm86_fs;
142: frame.sf_sc.sc_es = tf->tf_vm86_es;
143: frame.sf_sc.sc_ds = tf->tf_vm86_ds;
144: frame.sf_sc.sc_eflags = get_vflags(p);
145: } else
146: #endif
147: {
148: frame.sf_sc.sc_fs = tf->tf_fs;
149: frame.sf_sc.sc_gs = tf->tf_gs;
150: frame.sf_sc.sc_es = tf->tf_es;
151: frame.sf_sc.sc_ds = tf->tf_ds;
152: frame.sf_sc.sc_eflags = tf->tf_eflags;
153: }
154: frame.sf_sc.sc_edi = tf->tf_edi;
155: frame.sf_sc.sc_esi = tf->tf_esi;
156: frame.sf_sc.sc_ebp = tf->tf_ebp;
157: frame.sf_sc.sc_ebx = tf->tf_ebx;
158: frame.sf_sc.sc_edx = tf->tf_edx;
159: frame.sf_sc.sc_ecx = tf->tf_ecx;
160: frame.sf_sc.sc_eax = tf->tf_eax;
161: frame.sf_sc.sc_eip = tf->tf_eip;
162: frame.sf_sc.sc_cs = tf->tf_cs;
163: frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
164: frame.sf_sc.sc_ss = tf->tf_ss;
165: frame.sf_sc.sc_err = tf->tf_err;
166: frame.sf_sc.sc_trapno = tf->tf_trapno;
167:
168: if (copyout(&frame, fp, sizeof(frame)) != 0) {
169: /*
170: * Process has trashed its stack; give it an illegal
171: * instruction to halt it in its tracks.
172: */
173: sigexit(p, SIGILL);
174: /* NOTREACHED */
175: }
176:
177: /*
178: * Build context to run handler in.
179: */
180: tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
181: tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
182: tf->tf_eip = p->p_sigcode;
183: tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
184: tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
185: tf->tf_esp = (int)fp;
186: tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
187: }
188:
189: /*
190: * System call to cleanup state after a signal
191: * has been taken. Reset signal mask and
192: * stack state from context left by sendsig (above).
193: * Return to previous pc and psl as specified by
194: * context left by sendsig. Check carefully to
195: * make sure that the user has not modified the
196: * psl to gain improper privileges or to cause
197: * a machine fault.
198: */
199: int
200: linux_sys_sigreturn(struct proc *p, void *v, register_t *retval)
201: {
202: struct linux_sys_sigreturn_args /* {
203: syscallarg(struct linux_sigcontext *) scp;
204: } */ *uap = v;
205: struct linux_sigcontext *scp, context;
206: struct trapframe *tf;
207:
208: tf = p->p_md.md_regs;
209:
210: /*
211: * The trampoline code hands us the context.
212: * It is unsafe to keep track of it ourselves, in the event that a
213: * program jumps out of a signal handler.
214: */
215: scp = SCARG(uap, scp);
216: if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
217: return (EFAULT);
218:
219: /*
220: * Restore signal context.
221: */
222: #ifdef VM86
223: if (context.sc_eflags & PSL_VM) {
224: tf->tf_vm86_gs = context.sc_gs;
225: tf->tf_vm86_fs = context.sc_fs;
226: tf->tf_vm86_es = context.sc_es;
227: tf->tf_vm86_ds = context.sc_ds;
228: set_vflags(p, context.sc_eflags);
229: } else
230: #endif
231: {
232: /*
233: * Check for security violations. If we're returning to
234: * protected mode, the CPU will validate the segment registers
235: * automatically and generate a trap on violations. We handle
236: * the trap, rather than doing all of the checking here.
237: */
238: if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
239: !USERMODE(context.sc_cs, context.sc_eflags))
240: return (EINVAL);
241:
242: tf->tf_fs = context.sc_fs;
243: tf->tf_gs = context.sc_gs;
244: tf->tf_es = context.sc_es;
245: tf->tf_ds = context.sc_ds;
246: tf->tf_eflags = context.sc_eflags;
247: }
248: tf->tf_edi = context.sc_edi;
249: tf->tf_esi = context.sc_esi;
250: tf->tf_ebp = context.sc_ebp;
251: tf->tf_ebx = context.sc_ebx;
252: tf->tf_edx = context.sc_edx;
253: tf->tf_ecx = context.sc_ecx;
254: tf->tf_eax = context.sc_eax;
255: tf->tf_eip = context.sc_eip;
256: tf->tf_cs = context.sc_cs;
257: tf->tf_esp = context.sc_esp_at_signal;
258: tf->tf_ss = context.sc_ss;
259:
260: p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
261: p->p_sigmask = context.sc_mask & ~sigcantmask;
262:
263: return (EJUSTRETURN);
264: }
265:
266: int
267: linux_sys_rt_sigreturn(struct proc *p, void *v, register_t *retval)
268: {
269: return(ENOSYS);
270: }
271:
272: #ifdef USER_LDT
273:
274: int
275: linux_read_ldt(struct proc *p, struct linux_sys_modify_ldt_args *uap,
276: register_t *retval)
277: {
278: struct i386_get_ldt_args gl;
279: int error;
280: caddr_t sg;
281: char *parms;
282:
283: if (user_ldt_enable == 0)
284: return (ENOSYS);
285:
286: sg = stackgap_init(p->p_emul);
287:
288: gl.start = 0;
289: gl.desc = SCARG(uap, ptr);
290: gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
291:
292: parms = stackgap_alloc(&sg, sizeof(gl));
293:
294: if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
295: return (error);
296:
297: if ((error = i386_get_ldt(p, parms, retval)) != 0)
298: return (error);
299:
300: *retval *= sizeof(union descriptor);
301: return (0);
302: }
303:
304: struct linux_ldt_info {
305: u_int entry_number;
306: u_long base_addr;
307: u_int limit;
308: u_int seg_32bit:1;
309: u_int contents:2;
310: u_int read_exec_only:1;
311: u_int limit_in_pages:1;
312: u_int seg_not_present:1;
313: };
314:
315: int
316: linux_write_ldt(struct proc *p, struct linux_sys_modify_ldt_args *uap,
317: register_t *retval)
318: {
319: struct linux_ldt_info ldt_info;
320: struct segment_descriptor sd;
321: struct i386_set_ldt_args sl;
322: int error;
323: caddr_t sg;
324: char *parms;
325:
326: if (user_ldt_enable == 0)
327: return (ENOSYS);
328:
329: if (SCARG(uap, bytecount) != sizeof(ldt_info))
330: return (EINVAL);
331: if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
332: return error;
333: if (ldt_info.contents == 3)
334: return (EINVAL);
335:
336: sg = stackgap_init(p->p_emul);
337:
338: sd.sd_lobase = ldt_info.base_addr & 0xffffff;
339: sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
340: sd.sd_lolimit = ldt_info.limit & 0xffff;
341: sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
342: sd.sd_type =
343: 16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
344: sd.sd_dpl = SEL_UPL;
345: sd.sd_p = !ldt_info.seg_not_present;
346: sd.sd_def32 = ldt_info.seg_32bit;
347: sd.sd_gran = ldt_info.limit_in_pages;
348:
349: sl.start = ldt_info.entry_number;
350: sl.desc = stackgap_alloc(&sg, sizeof(sd));
351: sl.num = 1;
352:
353: #if 0
354: printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
355: ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
356: #endif
357:
358: parms = stackgap_alloc(&sg, sizeof(sl));
359:
360: if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
361: return (error);
362: if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
363: return (error);
364:
365: if ((error = i386_set_ldt(p, parms, retval)) != 0)
366: return (error);
367:
368: *retval = 0;
369: return (0);
370: }
371:
372: #endif /* USER_LDT */
373:
374: int
375: linux_sys_modify_ldt(struct proc *p, void *v, register_t *retval)
376: {
377: struct linux_sys_modify_ldt_args /* {
378: syscallarg(int) func;
379: syscallarg(void *) ptr;
380: syscallarg(size_t) bytecount;
381: } */ *uap = v;
382:
383: switch (SCARG(uap, func)) {
384: #ifdef USER_LDT
385: case 0:
386: return (linux_read_ldt(p, uap, retval));
387:
388: case 1:
389: return (linux_write_ldt(p, uap, retval));
390: #endif /* USER_LDT */
391:
392: default:
393: return (ENOSYS);
394: }
395: }
396:
397: /*
398: * XXX Pathetic hack to make svgalib work. This will fake the major
399: * device number of an opened VT so that svgalib likes it. grmbl.
400: * Should probably do it 'wrong the right way' and use a mapping
401: * array for all major device numbers, and map linux_mknod too.
402: */
403: dev_t
404: linux_fakedev(dev_t dev)
405: {
406:
407: if (major(dev) == NATIVE_CONS_MAJOR)
408: return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
409: return dev;
410: }
411:
412: /*
413: * We come here in a last attempt to satisfy a Linux ioctl() call
414: */
415: int
416: linux_machdepioctl(struct proc *p, void *v, register_t *retval)
417: {
418: struct linux_sys_ioctl_args /* {
419: syscallarg(int) fd;
420: syscallarg(u_long) com;
421: syscallarg(caddr_t) data;
422: } */ *uap = v;
423: struct sys_ioctl_args bia;
424: u_long com;
425: int error;
426: #if (NWSDISPLAY > 0 && defined(WSDISPLAY_COMPAT_USL))
427: struct vt_mode lvt;
428: caddr_t bvtp, sg;
429: #endif
430: struct filedesc *fdp;
431: struct file *fp;
432: int fd;
433: int (*ioctlf)(struct file *, u_long, caddr_t, struct proc *);
434: struct ioctl_pt pt;
435:
436: fd = SCARG(uap, fd);
437: SCARG(&bia, fd) = SCARG(uap, fd);
438: SCARG(&bia, data) = SCARG(uap, data);
439: com = SCARG(uap, com);
440:
441: fdp = p->p_fd;
442: if ((fp = fd_getfile(fdp, fd)) == NULL)
443: return (EBADF);
444:
445: switch (com) {
446: #if (NWSDISPLAY > 0 && defined(WSDISPLAY_COMPAT_USL))
447: case LINUX_KDGKBMODE:
448: com = KDGKBMODE;
449: break;
450: case LINUX_KDSKBMODE:
451: com = KDSKBMODE;
452: if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
453: SCARG(&bia, data) = (caddr_t)K_RAW;
454: break;
455: case LINUX_KIOCSOUND:
456: SCARG(&bia, data) =
457: (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
458: /* FALLTHROUGH */
459: case LINUX_KDMKTONE:
460: com = KDMKTONE;
461: break;
462: case LINUX_KDSETMODE:
463: com = KDSETMODE;
464: break;
465: case LINUX_KDGETMODE:
466: #if NWSDISPLAY > 0 && defined(WSDISPLAY_COMPAT_USL)
467: com = WSDISPLAYIO_GMODE;
468: #else
469: com = KDGETMODE;
470: #endif
471: break;
472: case LINUX_KDENABIO:
473: com = KDENABIO;
474: break;
475: case LINUX_KDDISABIO:
476: com = KDDISABIO;
477: break;
478: case LINUX_KDGETLED:
479: com = KDGETLED;
480: break;
481: case LINUX_KDSETLED:
482: com = KDSETLED;
483: break;
484: case LINUX_VT_OPENQRY:
485: com = VT_OPENQRY;
486: break;
487: case LINUX_VT_GETMODE: {
488: int sig;
489:
490: SCARG(&bia, com) = VT_GETMODE;
491: if ((error = sys_ioctl(p, &bia, retval)))
492: return error;
493: if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
494: sizeof (struct vt_mode))))
495: return error;
496: /* We need to bounds check here in case there
497: is a race with another thread */
498: if ((error = bsd_to_linux_signal(lvt.relsig, &sig)))
499: return error;
500: lvt.relsig = sig;
501:
502: if ((error = bsd_to_linux_signal(lvt.acqsig, &sig)))
503: return error;
504: lvt.acqsig = sig;
505:
506: if ((error = bsd_to_linux_signal(lvt.frsig, &sig)))
507: return error;
508: lvt.frsig = sig;
509:
510: return copyout((caddr_t)&lvt, SCARG(uap, data),
511: sizeof (struct vt_mode));
512: }
513: case LINUX_VT_SETMODE: {
514: int sig;
515:
516: com = VT_SETMODE;
517: if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
518: sizeof (struct vt_mode))))
519: return error;
520: if ((error = linux_to_bsd_signal(lvt.relsig, &sig)))
521: return error;
522: lvt.relsig = sig;
523:
524: if ((error = linux_to_bsd_signal(lvt.acqsig, &sig)))
525: return error;
526: lvt.acqsig = sig;
527:
528: if ((error = linux_to_bsd_signal(lvt.frsig, &sig)))
529: return error;
530: lvt.frsig = sig;
531:
532: sg = stackgap_init(p->p_emul);
533: bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
534: if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
535: return error;
536: SCARG(&bia, data) = bvtp;
537: break;
538: }
539: case LINUX_VT_DISALLOCATE:
540: /* XXX should use WSDISPLAYIO_DELSCREEN */
541: return 0;
542: case LINUX_VT_RELDISP:
543: com = VT_RELDISP;
544: break;
545: case LINUX_VT_ACTIVATE:
546: com = VT_ACTIVATE;
547: break;
548: case LINUX_VT_WAITACTIVE:
549: com = VT_WAITACTIVE;
550: break;
551: case LINUX_VT_GETSTATE:
552: com = VT_GETSTATE;
553: break;
554: case LINUX_KDGKBTYPE:
555: {
556: char tmp = KB_101;
557:
558: /* This is what Linux does */
559: return copyout(&tmp, SCARG(uap, data), sizeof(char));
560: }
561: #endif
562: default:
563: /*
564: * Unknown to us. If it's on a device, just pass it through
565: * using PTIOCLINUX, the device itself might be able to
566: * make some sense of it.
567: * XXX hack: if the function returns EJUSTRETURN,
568: * it has stuffed a sysctl return value in pt.data.
569: */
570: FREF(fp);
571: ioctlf = fp->f_ops->fo_ioctl;
572: pt.com = SCARG(uap, com);
573: pt.data = SCARG(uap, data);
574: error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
575: FRELE(fp);
576: if (error == EJUSTRETURN) {
577: retval[0] = (register_t)pt.data;
578: error = 0;
579: }
580:
581: if (error == ENOTTY)
582: printf("linux_machdepioctl: invalid ioctl %08lx\n",
583: com);
584: return (error);
585: }
586: SCARG(&bia, com) = com;
587: return sys_ioctl(p, &bia, retval);
588: }
589:
590: /*
591: * Set I/O permissions for a process. Just set the maximum level
592: * right away (ignoring the argument), otherwise we would have
593: * to rely on I/O permission maps, which are not implemented.
594: */
595: int
596: linux_sys_iopl(struct proc *p, void *v, register_t *retval)
597: {
598: #if 0
599: struct linux_sys_iopl_args /* {
600: syscallarg(int) level;
601: } */ *uap = v;
602: #endif
603: struct trapframe *fp = p->p_md.md_regs;
604:
605: if (suser(p, 0) != 0)
606: return EPERM;
607: if (securelevel > 0)
608: return EPERM;
609: fp->tf_eflags |= PSL_IOPL;
610: *retval = 0;
611: return 0;
612: }
613:
614: /*
615: * See above. If a root process tries to set access to an I/O port,
616: * just let it have the whole range.
617: */
618: int
619: linux_sys_ioperm(struct proc *p, void *v, register_t *retval)
620: {
621: struct linux_sys_ioperm_args /* {
622: syscallarg(unsigned int) lo;
623: syscallarg(unsigned int) hi;
624: syscallarg(int) val;
625: } */ *uap = v;
626: struct trapframe *fp = p->p_md.md_regs;
627:
628: if (suser(p, 0) != 0)
629: return EPERM;
630: if (securelevel > 0)
631: return EPERM;
632: if (SCARG(uap, val))
633: fp->tf_eflags |= PSL_IOPL;
634: *retval = 0;
635: return 0;
636: }
CVSweb