Annotation of sys/arch/amd64/amd64/machdep.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: machdep.c,v 1.60 2007/08/02 16:40:27 deraadt Exp $ */
2: /* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */
3:
4: /*-
5: * Copyright (c) 1996, 1997, 1998, 2000 The NetBSD Foundation, Inc.
6: * All rights reserved.
7: *
8: * This code is derived from software contributed to The NetBSD Foundation
9: * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
10: * Simulation Facility, NASA Ames Research Center.
11: *
12: * Redistribution and use in source and binary forms, with or without
13: * modification, are permitted provided that the following conditions
14: * are met:
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: * 3. All advertising materials mentioning features or use of this software
21: * must display the following acknowledgement:
22: * This product includes software developed by the NetBSD
23: * Foundation, Inc. and its contributors.
24: * 4. Neither the name of The NetBSD Foundation nor the names of its
25: * contributors may be used to endorse or promote products derived
26: * from this software without specific prior written permission.
27: *
28: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38: * POSSIBILITY OF SUCH DAMAGE.
39: */
40:
41: /*-
42: * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
43: * All rights reserved.
44: *
45: * This code is derived from software contributed to Berkeley by
46: * William Jolitz.
47: *
48: * Redistribution and use in source and binary forms, with or without
49: * modification, are permitted provided that the following conditions
50: * are met:
51: * 1. Redistributions of source code must retain the above copyright
52: * notice, this list of conditions and the following disclaimer.
53: * 2. Redistributions in binary form must reproduce the above copyright
54: * notice, this list of conditions and the following disclaimer in the
55: * documentation and/or other materials provided with the distribution.
56: * 3. Neither the name of the University nor the names of its contributors
57: * may be used to endorse or promote products derived from this software
58: * without specific prior written permission.
59: *
60: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
61: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
62: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
63: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
64: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
65: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
66: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70: * SUCH DAMAGE.
71: *
72: * @(#)machdep.c 7.4 (Berkeley) 6/3/91
73: */
74:
75: #include <sys/param.h>
76: #include <sys/systm.h>
77: #include <sys/signal.h>
78: #include <sys/signalvar.h>
79: #include <sys/kernel.h>
80: #include <sys/proc.h>
81: #include <sys/user.h>
82: #include <sys/exec.h>
83: #include <sys/buf.h>
84: #include <sys/reboot.h>
85: #include <sys/conf.h>
86: #include <sys/file.h>
87: #include <sys/malloc.h>
88: #include <sys/mbuf.h>
89: #include <sys/msgbuf.h>
90: #include <sys/mount.h>
91: #include <sys/vnode.h>
92: #include <sys/extent.h>
93: #include <sys/core.h>
94: #include <sys/kcore.h>
95: #include <sys/syscallargs.h>
96:
97: #ifdef SYSVMSG
98: #include <sys/msg.h>
99: #endif
100:
101: #ifdef KGDB
102: #include <sys/kgdb.h>
103: #endif
104:
105: #include <dev/cons.h>
106: #include <stand/boot/bootarg.h>
107:
108: #include <uvm/uvm_extern.h>
109: #include <uvm/uvm_page.h>
110:
111: #include <sys/sysctl.h>
112:
113: #include <machine/cpu.h>
114: #include <machine/cpufunc.h>
115: #include <machine/gdt.h>
116: #include <machine/pio.h>
117: #include <machine/psl.h>
118: #include <machine/reg.h>
119: #include <machine/specialreg.h>
120: #include <machine/fpu.h>
121: #include <machine/mtrr.h>
122: #include <machine/biosvar.h>
123: #include <machine/mpbiosvar.h>
124: #include <machine/reg.h>
125: #include <machine/kcore.h>
126:
127: #include <dev/isa/isareg.h>
128: #include <machine/isa_machdep.h>
129: #include <dev/ic/i8042reg.h>
130: #include <amd64/isa/nvram.h>
131:
132: #ifdef DDB
133: #include <machine/db_machdep.h>
134: #include <ddb/db_extern.h>
135: #endif
136:
137: #include "isa.h"
138: #include "isadma.h"
139: #include "ksyms.h"
140:
141: #include "acpi.h"
142: #if NACPI > 0
143: #include <dev/acpi/acpivar.h>
144: #endif
145:
146:
147: /* the following is used externally (sysctl_hw) */
148: char machine[] = MACHINE;
149:
150: /* the following is used externally for concurrent handlers */
151: int setperf_prio = 0;
152:
153: #ifdef CPURESET_DELAY
154: int cpureset_delay = CPURESET_DELAY;
155: #else
156: int cpureset_delay = 2000; /* default to 2s */
157: #endif
158:
159: int physmem;
160: u_int64_t dumpmem_low;
161: u_int64_t dumpmem_high;
162: extern int boothowto;
163: int cpu_class;
164:
165: char *ssym = NULL;
166: vaddr_t kern_end;
167:
168: vaddr_t msgbuf_vaddr;
169: paddr_t msgbuf_paddr;
170:
171: vaddr_t idt_vaddr;
172: paddr_t idt_paddr;
173:
174: vaddr_t lo32_vaddr;
175: paddr_t lo32_paddr;
176:
177: int kbd_reset;
178:
179: struct vm_map *exec_map = NULL;
180: struct vm_map *phys_map = NULL;
181:
182: #ifndef BUFCACHEPERCENT
183: #define BUFCACHEPERCENT 10
184: #endif
185:
186: #ifdef BUFPAGES
187: int bufpages = BUFPAGES;
188: #else
189: int bufpages = 0;
190: #endif
191: int bufcachepercent = BUFCACHEPERCENT;
192:
193: #ifdef DEBUG
194: int sigdebug = 0;
195: pid_t sigpid = 0;
196: #define SDB_FOLLOW 0x01
197: #endif
198:
199: extern paddr_t avail_start, avail_end;
200:
201: void (*delay_func)(int) = i8254_delay;
202: void (*initclock_func)(void) = i8254_initclocks;
203:
204: struct mtrr_funcs *mtrr_funcs;
205:
206: /*
207: * Format of boot information passed to us by 32-bit /boot
208: */
209: typedef struct _boot_args32 {
210: int ba_type;
211: int ba_size;
212: int ba_nextX; /* a ptr in 32-bit world, but not here */
213: char ba_arg[1];
214: } bootarg32_t;
215:
216: #define BOOTARGC_MAX NBPG /* one page */
217:
218: #ifdef NFSCLIENT
219: bios_bootmac_t *bios_bootmac;
220: #endif
221:
222: /* locore copies the arguments from /boot to here for us */
223: char bootinfo[BOOTARGC_MAX];
224: int bootinfo_size = BOOTARGC_MAX;
225:
226: void getbootinfo(char *, int);
227:
228: /* Data passed to us by /boot, filled in by getbootinfo() */
229: #if NAPM > 0 || defined(DEBUG)
230: bios_apminfo_t *apm;
231: #endif
232: #if NPCI > 0
233: bios_pciinfo_t *bios_pciinfo;
234: #endif
235: bios_diskinfo_t *bios_diskinfo;
236: bios_memmap_t *bios_memmap;
237: u_int32_t bios_cksumlen;
238:
239: /*
240: * Size of memory segments, before any memory is stolen.
241: */
242: phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
243: int mem_cluster_cnt;
244:
245: vaddr_t allocsys(vaddr_t);
246: void setup_buffers(void);
247: int cpu_dump(void);
248: int cpu_dumpsize(void);
249: u_long cpu_dump_mempagecnt(void);
250: void dumpsys(void);
251: void init_x86_64(paddr_t);
252:
253: #ifdef KGDB
254: #ifndef KGDB_DEVNAME
255: #define KGDB_DEVNAME "com"
256: #endif /* KGDB_DEVNAME */
257: char kgdb_devname[] = KGDB_DEVNAME;
258: #if NCOM > 0
259: #ifndef KGDBADDR
260: #define KGDBADDR 0x3f8
261: #endif /* KGDBADDR */
262: int comkgdbaddr = KGDBADDR;
263: #ifndef KGDBRATE
264: #define KGDBRATE TTYDEF_SPEED
265: #endif /* KGDBRATE */
266: int comkgdbrate = KGDBRATE;
267: #ifndef KGDBMODE
268: #define KGDBMODE ((TTYDEF_CFLAG & ~(CSIZE | CSTOPB | PARENB)) | CS8)
269: #endif /* KGDBMODE */
270: int comkgdbmode = KGDBMODE;
271: #endif /* NCOM */
272: void kgdb_port_init(void);
273: #endif /* KGDB */
274:
275: #ifdef APERTURE
276: #ifdef INSECURE
277: int allowaperture = 1;
278: #else
279: int allowaperture = 0;
280: #endif
281: #endif
282:
283: /*
284: * Machine-dependent startup code
285: */
286: void
287: cpu_startup(void)
288: {
289: vaddr_t v;
290: vsize_t sz;
291: vaddr_t minaddr, maxaddr;
292:
293: msgbuf_vaddr = PMAP_DIRECT_MAP(msgbuf_paddr);
294: initmsgbuf((caddr_t)msgbuf_vaddr, round_page(MSGBUFSIZE));
295:
296: printf("%s", version);
297:
298: printf("real mem = %u (%uMB)\n", ctob(physmem),
299: ctob(physmem)/1024/1024);
300:
301: if (physmem >= btoc(1ULL << 32)) {
302: extern int amdgart_enable;
303:
304: amdgart_enable = 1;
305: }
306:
307: /*
308: * Find out how much space we need, allocate it,
309: * and then give everything true virtual addresses.
310: */
311: sz = allocsys(0);
312: if ((v = uvm_km_zalloc(kernel_map, round_page(sz))) == 0)
313: panic("startup: no room for tables");
314: if (allocsys(v) - v != sz)
315: panic("startup: table size inconsistency");
316:
317: setup_buffers();
318:
319: /*
320: * Allocate a submap for exec arguments. This map effectively
321: * limits the number of processes exec'ing at any time.
322: */
323: minaddr = vm_map_min(kernel_map);
324: exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
325: 16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
326:
327: /*
328: * Allocate a submap for physio
329: */
330: minaddr = vm_map_min(kernel_map);
331: phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
332: VM_PHYS_SIZE, 0, FALSE, NULL);
333:
334: printf("avail mem = %lu (%luMB)\n", ptoa(uvmexp.free),
335: ptoa(uvmexp.free)/1024/1024);
336:
337: bufinit();
338:
339: if (boothowto & RB_CONFIG) {
340: #ifdef BOOT_CONFIG
341: user_config();
342: #else
343: printf("kernel does not support - c; continuing..\n");
344: #endif
345: }
346:
347: /* Safe for i/o port / memory space allocation to use malloc now. */
348: x86_bus_space_mallocok();
349: }
350:
351: /*
352: * Allocate space for system data structures. We are given
353: * a starting virtual address and we return a final virtual
354: * address; along the way we set each data structure pointer.
355: *
356: * We call allocsys() with 0 to find out how much space we want,
357: * allocate that much and fill it with zeroes, and then call
358: * allocsys() again with the correct base virtual address.
359: */
360: vaddr_t
361: allocsys(vaddr_t v)
362: {
363:
364: #define valloc(name, type, num) \
365: v = (vaddr_t)(((name) = (type *)v) + (num))
366:
367: #ifdef SYSVMSG
368: valloc(msgpool, char, msginfo.msgmax);
369: valloc(msgmaps, struct msgmap, msginfo.msgseg);
370: valloc(msghdrs, struct msg, msginfo.msgtql);
371: valloc(msqids, struct msqid_ds, msginfo.msgmni);
372: #endif
373:
374: return v;
375: }
376:
377: void
378: setup_buffers()
379: {
380: /*
381: * Determine how many buffers to allocate.
382: * We allocate bufcachepercent% of memory for buffer space.
383: */
384: if (bufpages == 0)
385: bufpages = physmem * bufcachepercent / 100;
386:
387: /* Restrict to at most 25% filled kvm */
388: if (bufpages >
389: (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE / 4)
390: bufpages = (VM_MAX_KERNEL_ADDRESS-VM_MIN_KERNEL_ADDRESS) /
391: PAGE_SIZE / 4;
392: }
393:
394: /*
395: * Set up proc0's TSS and LDT.
396: */
397: void
398: x86_64_proc0_tss_ldt_init(void)
399: {
400: struct pcb *pcb;
401: int x;
402:
403: gdt_init();
404:
405: cpu_info_primary.ci_curpcb = pcb = &proc0.p_addr->u_pcb;
406:
407: pcb->pcb_flags = 0;
408: pcb->pcb_tss.tss_iobase =
409: (u_int16_t)((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss);
410: for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
411: pcb->pcb_iomap[x] = 0xffffffff;
412:
413: pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel =
414: GSYSSEL(GLDT_SEL, SEL_KPL);
415: pcb->pcb_cr0 = rcr0();
416: pcb->pcb_tss.tss_rsp0 = (u_int64_t)proc0.p_addr + USPACE - 16;
417: pcb->pcb_tss.tss_ist[0] = (u_int64_t)proc0.p_addr + PAGE_SIZE;
418: proc0.p_md.md_regs = (struct trapframe *)pcb->pcb_tss.tss_rsp0 - 1;
419: proc0.p_md.md_tss_sel = tss_alloc(pcb);
420:
421: ltr(proc0.p_md.md_tss_sel);
422: lldt(pcb->pcb_ldt_sel);
423: }
424:
425: /*
426: * Set up TSS and LDT for a new PCB.
427: */
428:
429: #ifdef MULTIPROCESSOR
430: void
431: x86_64_init_pcb_tss_ldt(struct cpu_info *ci)
432: {
433: int x;
434: struct pcb *pcb = ci->ci_idle_pcb;
435:
436: pcb->pcb_tss.tss_iobase =
437: (u_int16_t)((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss);
438: for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
439: pcb->pcb_iomap[x] = 0xffffffff;
440:
441: /* XXXfvdl pmap_kernel not needed */
442: pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel =
443: GSYSSEL(GLDT_SEL, SEL_KPL);
444: pcb->pcb_cr0 = rcr0();
445:
446: ci->ci_idle_tss_sel = tss_alloc(pcb);
447: }
448: #endif /* MULTIPROCESSOR */
449:
450: bios_diskinfo_t *
451: bios_getdiskinfo(dev_t dev)
452: {
453: bios_diskinfo_t *pdi;
454:
455: if (bios_diskinfo == NULL)
456: return NULL;
457:
458: for (pdi = bios_diskinfo; pdi->bios_number != -1; pdi++) {
459: if ((dev & B_MAGICMASK) == B_DEVMAGIC) { /* search by bootdev */
460: if (pdi->bsd_dev == dev)
461: break;
462: } else {
463: if (pdi->bios_number == dev)
464: break;
465: }
466: }
467:
468: if (pdi->bios_number == -1)
469: return NULL;
470: else
471: return pdi;
472: }
473:
474: int
475: bios_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
476: size_t newlen, struct proc *p)
477: {
478: bios_diskinfo_t *pdi;
479: extern dev_t bootdev;
480: int biosdev;
481:
482: /* all sysctl names at this level except diskinfo are terminal */
483: if (namelen != 1 && name[0] != BIOS_DISKINFO)
484: return (ENOTDIR); /* overloaded */
485:
486: if (!(bootapiver & BAPIV_VECTOR))
487: return EOPNOTSUPP;
488:
489: switch (name[0]) {
490: case BIOS_DEV:
491: if ((pdi = bios_getdiskinfo(bootdev)) == NULL)
492: return ENXIO;
493: biosdev = pdi->bios_number;
494: return sysctl_rdint(oldp, oldlenp, newp, biosdev);
495: case BIOS_DISKINFO:
496: if (namelen != 2)
497: return ENOTDIR;
498: if ((pdi = bios_getdiskinfo(name[1])) == NULL)
499: return ENXIO;
500: return sysctl_rdstruct(oldp, oldlenp, newp, pdi, sizeof(*pdi));
501: case BIOS_CKSUMLEN:
502: return sysctl_rdint(oldp, oldlenp, newp, bios_cksumlen);
503: default:
504: return EOPNOTSUPP;
505: }
506: /* NOTREACHED */
507: }
508:
509: /*
510: * machine dependent system variables.
511: */
512: int
513: cpu_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
514: size_t newlen, struct proc *p)
515: {
516: dev_t consdev;
517: dev_t dev;
518:
519: switch (name[0]) {
520: case CPU_CONSDEV:
521: if (namelen != 1)
522: return (ENOTDIR); /* overloaded */
523: if (cn_tab != NULL)
524: consdev = cn_tab->cn_dev;
525: else
526: consdev = NODEV;
527: return (sysctl_rdstruct(oldp, oldlenp, newp, &consdev,
528: sizeof consdev));
529: case CPU_CHR2BLK:
530: if (namelen != 2)
531: return (ENOTDIR); /* overloaded */
532: dev = chrtoblk((dev_t)name[1]);
533: return sysctl_rdstruct(oldp, oldlenp, newp, &dev, sizeof(dev));
534: case CPU_BIOS:
535: return bios_sysctl(name + 1, namelen - 1, oldp, oldlenp,
536: newp, newlen, p);
537: case CPU_CPUVENDOR:
538: return (sysctl_rdstring(oldp, oldlenp, newp, cpu_vendor));
539: case CPU_CPUFEATURE:
540: return (sysctl_rdint(oldp, oldlenp, newp, cpu_feature));
541: case CPU_KBDRESET:
542: if (securelevel > 0)
543: return (sysctl_rdint(oldp, oldlenp, newp,
544: kbd_reset));
545: else
546: return (sysctl_int(oldp, oldlenp, newp, newlen,
547: &kbd_reset));
548: case CPU_ALLOWAPERTURE:
549: if (namelen != 1)
550: return (ENOTDIR); /* overloaded */
551: #ifdef APERTURE
552: if (securelevel > 0)
553: return (sysctl_int_lower(oldp, oldlenp, newp, newlen,
554: &allowaperture));
555: else
556: return (sysctl_int(oldp, oldlenp, newp, newlen,
557: &allowaperture));
558: #else
559: return (sysctl_rdint(oldp, oldlenp, newp, 0));
560: #endif
561: default:
562: return (EOPNOTSUPP);
563: }
564: /* NOTREACHED */
565: }
566:
567: /*
568: * Send an interrupt to process.
569: *
570: * Stack is set up to allow sigcode stored
571: * in u. to call routine, followed by kcall
572: * to sigreturn routine below. After sigreturn
573: * resets the signal mask, the stack, and the
574: * frame pointer, it returns to the user
575: * specified pc, psl.
576: */
577: void
578: sendsig(sig_t catcher, int sig, int mask, u_long code, int type,
579: union sigval val)
580: {
581: struct proc *p = curproc;
582: struct trapframe *tf = p->p_md.md_regs;
583: struct sigacts * psp = p->p_sigacts;
584: struct sigcontext ksc;
585: siginfo_t ksi;
586: register_t sp, scp, sip;
587: u_long sss;
588:
589: #ifdef DEBUG
590: if ((sigdebug & SDB_FOLLOW) && (!sigpid || p->p_pid == sigpid))
591: printf("sendsig: %s[%d] sig %d catcher %p\n",
592: p->p_comm, p->p_pid, sig, catcher);
593: #endif
594:
595: bcopy(tf, &ksc, sizeof(*tf));
596: ksc.sc_onstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
597: ksc.sc_mask = mask;
598: ksc.sc_fpstate = NULL;
599:
600: /* Allocate space for the signal handler context. */
601: if ((psp->ps_flags & SAS_ALTSTACK) && !ksc.sc_onstack &&
602: (psp->ps_sigonstack & sigmask(sig))) {
603: sp = (register_t)psp->ps_sigstk.ss_sp + psp->ps_sigstk.ss_size;
604: psp->ps_sigstk.ss_flags |= SS_ONSTACK;
605: } else
606: sp = tf->tf_rsp - 128;
607:
608: sp &= ~15ULL; /* just in case */
609: sss = (sizeof(ksc) + 15) & ~15;
610:
611: if (p->p_md.md_flags & MDP_USEDFPU) {
612: fpusave_proc(p, 1);
613: sp -= sizeof(struct fxsave64);
614: ksc.sc_fpstate = (struct fxsave64 *)sp;
615: if (copyout(&p->p_addr->u_pcb.pcb_savefpu.fp_fxsave,
616: (void *)sp, sizeof(struct fxsave64)))
617: sigexit(p, SIGILL);
618: }
619:
620: sip = 0;
621: if (psp->ps_siginfo & sigmask(sig)) {
622: sip = sp - ((sizeof(ksi) + 15) & ~15);
623: sss += (sizeof(ksi) + 15) & ~15;
624:
625: initsiginfo(&ksi, sig, code, type, val);
626: if (copyout(&ksi, (void *)sip, sizeof(ksi)))
627: sigexit(p, SIGILL);
628: }
629: scp = sp - sss;
630:
631: if (copyout(&ksc, (void *)scp, sizeof(ksc)))
632: sigexit(p, SIGILL);
633:
634: /*
635: * Build context to run handler in.
636: */
637: tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
638: tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
639: tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL);
640: tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL);
641:
642: tf->tf_rax = (u_int64_t)catcher;
643: tf->tf_rdi = sig;
644: tf->tf_rsi = sip;
645: tf->tf_rdx = scp;
646:
647: tf->tf_rip = (u_int64_t)p->p_sigcode;
648: tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
649: tf->tf_rflags &= ~(PSL_T|PSL_VM|PSL_AC);
650: tf->tf_rsp = scp;
651: tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
652:
653: #ifdef DEBUG
654: if ((sigdebug & SDB_FOLLOW) && (!sigpid || p->p_pid == sigpid))
655: printf("sendsig(%d): pc 0x%x, catcher 0x%x\n", p->p_pid,
656: tf->tf_rip, tf->tf_rax);
657: #endif
658: }
659:
660: /*
661: * System call to cleanup state after a signal
662: * has been taken. Reset signal mask and
663: * stack state from context left by sendsig (above).
664: * Return to previous pc and psl as specified by
665: * context left by sendsig. Check carefully to
666: * make sure that the user has not modified the
667: * psl to gain improper privileges or to cause
668: * a machine fault.
669: */
670: int
671: sys_sigreturn(struct proc *p, void *v, register_t *retval)
672: {
673: struct sys_sigreturn_args /* {
674: syscallarg(struct sigcontext *) sigcntxp;
675: } */ *uap = v;
676: struct sigcontext *scp, ksc;
677: struct trapframe *tf = p->p_md.md_regs;
678: int error;
679:
680: scp = SCARG(uap, sigcntxp);
681: #ifdef DEBUG
682: if ((sigdebug & SDB_FOLLOW) && (!sigpid || p->p_pid == sigpid))
683: printf("sigreturn: pid %d, scp %p\n", p->p_pid, scp);
684: #endif
685: if ((error = copyin((caddr_t)scp, &ksc, sizeof ksc)))
686: return (error);
687:
688: if (((ksc.sc_rflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0 ||
689: !USERMODE(ksc.sc_cs, ksc.sc_eflags))
690: return (EINVAL);
691:
692: if (p->p_md.md_flags & MDP_USEDFPU)
693: fpusave_proc(p, 0);
694:
695: if (ksc.sc_fpstate && (error = copyin(ksc.sc_fpstate,
696: &p->p_addr->u_pcb.pcb_savefpu.fp_fxsave, sizeof (struct fxsave64))))
697: return (error);
698:
699: ksc.sc_trapno = tf->tf_trapno;
700: ksc.sc_err = tf->tf_err;
701: bcopy(&ksc, tf, sizeof(*tf));
702:
703: /* Restore signal stack. */
704: if (ksc.sc_onstack)
705: p->p_sigacts->ps_sigstk.ss_flags |= SS_ONSTACK;
706: else
707: p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
708: p->p_sigmask = ksc.sc_mask & ~sigcantmask;
709:
710: return (EJUSTRETURN);
711: }
712:
713: /*
714: * Notify the current process (p) that it has a signal pending,
715: * process as soon as possible.
716: */
717: void
718: signotify(struct proc *p)
719: {
720: aston(p);
721: #ifdef MULTIPROCESSOR
722: if (p->p_cpu != curcpu() && p->p_cpu != NULL)
723: x86_send_ipi(p->p_cpu, X86_IPI_NOP);
724: #endif
725: }
726:
727: int waittime = -1;
728: struct pcb dumppcb;
729:
730: void
731: boot(int howto)
732: {
733:
734: if (cold) {
735: /*
736: * If the system is cold, just halt, unless the user
737: * explicitly asked for reboot.
738: */
739: if ((howto & RB_USERREQ) == 0)
740: howto |= RB_HALT;
741: goto haltsys;
742: }
743:
744: boothowto = howto;
745: if ((howto & RB_NOSYNC) == 0 && waittime < 0) {
746: waittime = 0;
747:
748: if (curproc == NULL)
749: curproc = &proc0; /* XXX */
750: vfs_shutdown();
751: /*
752: * If we've been adjusting the clock, the todr
753: * will be out of synch; adjust it now.
754: */
755: if ((howto & RB_TIMEBAD) == 0) {
756: resettodr();
757: } else {
758: printf("WARNING: not updating battery clock\n");
759: }
760: }
761:
762: /* Disable interrupts. */
763: splhigh();
764:
765: /* Do a dump if requested. */
766: if (howto & RB_DUMP)
767: dumpsys();
768:
769: haltsys:
770: doshutdownhooks();
771:
772: #ifdef MULTIPROCESSOR
773: x86_broadcast_ipi(X86_IPI_HALT);
774: #endif
775:
776: if (howto & RB_HALT) {
777: #if NACPI > 0 && !defined(SMALL_KERNEL)
778: extern int acpi_s5, acpi_enabled;
779:
780: if (acpi_enabled) {
781: delay(500000);
782: if (howto & RB_POWERDOWN || acpi_s5)
783: acpi_powerdown();
784: }
785: #endif
786: printf("\n");
787: printf("The operating system has halted.\n");
788: printf("Please press any key to reboot.\n\n");
789: cnpollc(1); /* for proper keyboard command handling */
790: cngetc();
791: cnpollc(0);
792: }
793:
794: printf("rebooting...\n");
795: if (cpureset_delay > 0)
796: delay(cpureset_delay * 1000);
797: cpu_reset();
798: for(;;) ;
799: /*NOTREACHED*/
800: }
801:
802: /*
803: * XXXfvdl share dumpcode.
804: */
805:
806: /*
807: * These variables are needed by /sbin/savecore
808: */
809: u_int32_t dumpmag = 0x8fca0101; /* magic number */
810: int dumpsize = 0; /* pages */
811: long dumplo = 0; /* blocks */
812:
813: /*
814: * cpu_dump: dump the machine-dependent kernel core dump headers.
815: */
816: int
817: cpu_dump(void)
818: {
819: int (*dump)(dev_t, daddr64_t, caddr_t, size_t);
820: char buf[dbtob(1)];
821: kcore_seg_t *segp;
822: cpu_kcore_hdr_t *cpuhdrp;
823: phys_ram_seg_t *memsegp;
824: int i;
825:
826: dump = bdevsw[major(dumpdev)].d_dump;
827:
828: memset(buf, 0, sizeof buf);
829: segp = (kcore_seg_t *)buf;
830: cpuhdrp = (cpu_kcore_hdr_t *)&buf[ALIGN(sizeof(*segp))];
831: memsegp = (phys_ram_seg_t *)&buf[ALIGN(sizeof(*segp)) +
832: ALIGN(sizeof(*cpuhdrp))];
833:
834: /*
835: * Generate a segment header.
836: */
837: CORE_SETMAGIC(*segp, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
838: segp->c_size = dbtob(1) - ALIGN(sizeof(*segp));
839:
840: /*
841: * Add the machine-dependent header info.
842: */
843: cpuhdrp->ptdpaddr = PTDpaddr;
844: cpuhdrp->nmemsegs = mem_cluster_cnt;
845:
846: /*
847: * Fill in the memory segment descriptors.
848: */
849: for (i = 0; i < mem_cluster_cnt; i++) {
850: memsegp[i].start = mem_clusters[i].start;
851: memsegp[i].size = mem_clusters[i].size & ~PAGE_MASK;
852: }
853:
854: return (dump(dumpdev, dumplo, (caddr_t)buf, dbtob(1)));
855: }
856:
857: /*
858: * This is called by main to set dumplo and dumpsize.
859: * Dumps always skip the first PAGE_SIZE of disk space
860: * in case there might be a disk label stored there.
861: * If there is extra space, put dump at the end to
862: * reduce the chance that swapping trashes it.
863: */
864: void
865: dumpconf(void)
866: {
867: int nblks, dumpblks; /* size of dump area */
868:
869: if (dumpdev == NODEV ||
870: (nblks = (bdevsw[major(dumpdev)].d_psize)(dumpdev)) == 0)
871: return;
872: if (nblks <= ctod(1))
873: return;
874:
875: dumpblks = cpu_dumpsize();
876: if (dumpblks < 0)
877: return;
878: dumpblks += ctod(cpu_dump_mempagecnt());
879:
880: /* If dump won't fit (incl. room for possible label), punt. */
881: if (dumpblks > (nblks - ctod(1)))
882: return;
883:
884: /* Put dump at end of partition */
885: dumplo = nblks - dumpblks;
886:
887: /* dumpsize is in page units, and doesn't include headers. */
888: dumpsize = cpu_dump_mempagecnt();
889: }
890:
891: /*
892: * Doadump comes here after turning off memory management and
893: * getting on the dump stack, either when called above, or by
894: * the auto-restart code.
895: */
896: #define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */
897: static vaddr_t dumpspace;
898:
899: vaddr_t
900: reserve_dumppages(vaddr_t p)
901: {
902:
903: dumpspace = p;
904: return (p + BYTES_PER_DUMP);
905: }
906:
907: void
908: dumpsys(void)
909: {
910: u_long totalbytesleft, bytes, i, n, memseg;
911: u_long maddr;
912: daddr64_t blkno;
913: int (*dump)(dev_t, daddr64_t, caddr_t, size_t);
914: int error;
915:
916: /* Save registers. */
917: savectx(&dumppcb);
918:
919: if (dumpdev == NODEV)
920: return;
921:
922: /*
923: * For dumps during autoconfiguration,
924: * if dump device has already configured...
925: */
926: if (dumpsize == 0)
927: dumpconf();
928: if (dumplo <= 0 || dumpsize == 0) {
929: printf("\ndump to dev %u,%u not possible\n", major(dumpdev),
930: minor(dumpdev));
931: return;
932: }
933: printf("\ndumping to dev %u,%u offset %ld\n", major(dumpdev),
934: minor(dumpdev), dumplo);
935:
936: error = (*bdevsw[major(dumpdev)].d_psize)(dumpdev);
937: printf("dump ");
938: if (error == -1) {
939: printf("area unavailable\n");
940: return;
941: }
942:
943: if ((error = cpu_dump()) != 0)
944: goto err;
945:
946: totalbytesleft = ptoa(cpu_dump_mempagecnt());
947: blkno = dumplo + cpu_dumpsize();
948: dump = bdevsw[major(dumpdev)].d_dump;
949: error = 0;
950:
951: for (memseg = 0; memseg < mem_cluster_cnt; memseg++) {
952: maddr = mem_clusters[memseg].start;
953: bytes = mem_clusters[memseg].size;
954:
955: for (i = 0; i < bytes; i += n, totalbytesleft -= n) {
956: /* Print out how many MBs we have left to go. */
957: if ((totalbytesleft % (1024*1024)) == 0)
958: printf("%ld ", totalbytesleft / (1024 * 1024));
959:
960: /* Limit size for next transfer. */
961: n = bytes - i;
962: if (n > BYTES_PER_DUMP)
963: n = BYTES_PER_DUMP;
964:
965: (void) pmap_map(dumpspace, maddr, maddr + n,
966: VM_PROT_READ);
967:
968: error = (*dump)(dumpdev, blkno, (caddr_t)dumpspace, n);
969: if (error)
970: goto err;
971: maddr += n;
972: blkno += btodb(n); /* XXX? */
973:
974: #if 0 /* XXX this doesn't work. grr. */
975: /* operator aborting dump? */
976: if (sget() != NULL) {
977: error = EINTR;
978: break;
979: }
980: #endif
981: }
982: }
983:
984: err:
985: switch (error) {
986:
987: case ENXIO:
988: printf("device bad\n");
989: break;
990:
991: case EFAULT:
992: printf("device not ready\n");
993: break;
994:
995: case EINVAL:
996: printf("area improper\n");
997: break;
998:
999: case EIO:
1000: printf("i/o error\n");
1001: break;
1002:
1003: case EINTR:
1004: printf("aborted from console\n");
1005: break;
1006:
1007: case 0:
1008: printf("succeeded\n");
1009: break;
1010:
1011: default:
1012: printf("error %d\n", error);
1013: break;
1014: }
1015: printf("\n\n");
1016: delay(5000000); /* 5 seconds */
1017: }
1018:
1019: /*
1020: * Clear registers on exec
1021: */
1022: void
1023: setregs(struct proc *p, struct exec_package *pack, u_long stack,
1024: register_t *retval)
1025: {
1026: struct pcb *pcb = &p->p_addr->u_pcb;
1027: struct trapframe *tf;
1028:
1029: /* If we were using the FPU, forget about it. */
1030: if (p->p_addr->u_pcb.pcb_fpcpu != NULL)
1031: fpusave_proc(p, 0);
1032:
1033: #ifdef USER_LDT
1034: pmap_ldt_cleanup(p);
1035: #endif
1036:
1037: p->p_md.md_flags &= ~MDP_USEDFPU;
1038: pcb->pcb_flags = 0;
1039: pcb->pcb_savefpu.fp_fxsave.fx_fcw = __INITIAL_NPXCW__;
1040: pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
1041: pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
1042:
1043: tf = p->p_md.md_regs;
1044: tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
1045: tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
1046: tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL);
1047: tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL);
1048: tf->tf_rdi = 0;
1049: tf->tf_rsi = 0;
1050: tf->tf_rbp = 0;
1051: tf->tf_rbx = 0;
1052: tf->tf_rdx = 0;
1053: tf->tf_rcx = 0;
1054: tf->tf_rax = 0;
1055: tf->tf_rip = pack->ep_entry;
1056: tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
1057: tf->tf_rflags = PSL_USERSET;
1058: tf->tf_rsp = stack;
1059: tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
1060:
1061: retval[1] = 0;
1062: }
1063:
1064: /*
1065: * Initialize segments and descriptor tables
1066: */
1067:
1068: struct gate_descriptor *idt;
1069: char idt_allocmap[NIDT];
1070: struct simplelock idt_lock;
1071: char *ldtstore;
1072: char *gdtstore;
1073: extern struct user *proc0paddr;
1074:
1075: void
1076: setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl,
1077: int sel)
1078: {
1079: gd->gd_looffset = (u_int64_t)func & 0xffff;
1080: gd->gd_selector = sel;
1081: gd->gd_ist = ist;
1082: gd->gd_type = type;
1083: gd->gd_dpl = dpl;
1084: gd->gd_p = 1;
1085: gd->gd_hioffset = (u_int64_t)func >> 16;
1086: gd->gd_zero = 0;
1087: gd->gd_xx1 = 0;
1088: gd->gd_xx2 = 0;
1089: gd->gd_xx3 = 0;
1090: }
1091:
1092: void
1093: unsetgate(struct gate_descriptor *gd)
1094: {
1095: memset(gd, 0, sizeof (*gd));
1096: }
1097:
1098: void
1099: setregion(struct region_descriptor *rd, void *base, u_int16_t limit)
1100: {
1101: rd->rd_limit = limit;
1102: rd->rd_base = (u_int64_t)base;
1103: }
1104:
1105: /*
1106: * Note that the base and limit fields are ignored in long mode.
1107: */
1108: void
1109: set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit,
1110: int type, int dpl, int gran, int def32, int is64)
1111: {
1112: sd->sd_lolimit = (unsigned)limit;
1113: sd->sd_lobase = (unsigned long)base;
1114: sd->sd_type = type;
1115: sd->sd_dpl = dpl;
1116: sd->sd_p = 1;
1117: sd->sd_hilimit = (unsigned)limit >> 16;
1118: sd->sd_avl = 0;
1119: sd->sd_long = is64;
1120: sd->sd_def32 = def32;
1121: sd->sd_gran = gran;
1122: sd->sd_hibase = (unsigned long)base >> 24;
1123: }
1124:
1125: void
1126: set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit,
1127: int type, int dpl, int gran)
1128: {
1129: memset(sd, 0, sizeof *sd);
1130: sd->sd_lolimit = (unsigned)limit;
1131: sd->sd_lobase = (u_int64_t)base;
1132: sd->sd_type = type;
1133: sd->sd_dpl = dpl;
1134: sd->sd_p = 1;
1135: sd->sd_hilimit = (unsigned)limit >> 16;
1136: sd->sd_gran = gran;
1137: sd->sd_hibase = (u_int64_t)base >> 24;
1138: }
1139:
1140: void cpu_init_idt(void)
1141: {
1142: struct region_descriptor region;
1143:
1144: setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1);
1145: lidt(®ion);
1146: }
1147:
1148:
1149: #define IDTVEC(name) __CONCAT(X, name)
1150: typedef void (vector)(void);
1151: extern vector IDTVEC(syscall);
1152: extern vector IDTVEC(syscall32);
1153: extern vector IDTVEC(osyscall);
1154: extern vector IDTVEC(oosyscall);
1155: extern vector *IDTVEC(exceptions)[];
1156:
1157: #define KBTOB(x) ((size_t)(x) * 1024UL)
1158:
1159: void
1160: init_x86_64(paddr_t first_avail)
1161: {
1162: extern void consinit(void);
1163: extern struct extent *iomem_ex;
1164: struct region_descriptor region;
1165: struct mem_segment_descriptor *ldt_segp;
1166: int x, first16q, ist;
1167: u_int64_t seg_start, seg_end;
1168: u_int64_t seg_start1, seg_end1;
1169:
1170: cpu_init_msrs(&cpu_info_primary);
1171:
1172: proc0.p_addr = proc0paddr;
1173: cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb;
1174:
1175: x86_bus_space_init();
1176:
1177: consinit(); /* XXX SHOULD NOT BE DONE HERE */
1178:
1179: /*
1180: * Initailize PAGE_SIZE-dependent variables.
1181: */
1182: uvm_setpagesize();
1183:
1184: #if 0
1185: uvmexp.ncolors = 2;
1186: #endif
1187:
1188: /*
1189: * Boot arguments are in a single page specified by /boot.
1190: *
1191: * We require the "new" vector form, as well as memory ranges
1192: * to be given in bytes rather than KB.
1193: *
1194: * locore copies the data into bootinfo[] for us.
1195: */
1196: if ((bootapiver & (BAPIV_VECTOR | BAPIV_BMEMMAP)) ==
1197: (BAPIV_VECTOR | BAPIV_BMEMMAP)) {
1198: if (bootinfo_size >= sizeof(bootinfo))
1199: panic("boot args too big");
1200:
1201: getbootinfo(bootinfo, bootinfo_size);
1202: } else
1203: panic("invalid /boot");
1204:
1205: avail_start = PAGE_SIZE; /* BIOS leaves data in low memory */
1206: /* and VM system doesn't work with phys 0 */
1207: #ifdef MULTIPROCESSOR
1208: if (avail_start < MP_TRAMPOLINE + PAGE_SIZE)
1209: avail_start = MP_TRAMPOLINE + PAGE_SIZE;
1210: #endif
1211:
1212: /*
1213: * Call pmap initialization to make new kernel address space.
1214: * We must do this before loading pages into the VM system.
1215: */
1216: pmap_bootstrap(VM_MIN_KERNEL_ADDRESS,
1217: IOM_END + trunc_page(KBTOB(biosextmem)));
1218:
1219: if (avail_start != PAGE_SIZE)
1220: pmap_prealloc_lowmem_ptps();
1221:
1222: if (mem_cluster_cnt == 0) {
1223: /*
1224: * Allocate the physical addresses used by RAM from the iomem
1225: * extent map. This is done before the addresses are
1226: * page rounded just to make sure we get them all.
1227: */
1228: if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem),
1229: EX_NOWAIT)) {
1230: /* XXX What should we do? */
1231: printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM "
1232: "IOMEM EXTENT MAP!\n");
1233: }
1234: mem_clusters[0].start = 0;
1235: mem_clusters[0].size = trunc_page(KBTOB(biosbasemem));
1236: physmem += atop(mem_clusters[0].size);
1237: if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem),
1238: EX_NOWAIT)) {
1239: /* XXX What should we do? */
1240: printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM "
1241: "IOMEM EXTENT MAP!\n");
1242: }
1243: #if 0
1244: #if NISADMA > 0
1245: /*
1246: * Some motherboards/BIOSes remap the 384K of RAM that would
1247: * normally be covered by the ISA hole to the end of memory
1248: * so that it can be used. However, on a 16M system, this
1249: * would cause bounce buffers to be allocated and used.
1250: * This is not desirable behaviour, as more than 384K of
1251: * bounce buffers might be allocated. As a work-around,
1252: * we round memory down to the nearest 1M boundary if
1253: * we're using any isadma devices and the remapped memory
1254: * is what puts us over 16M.
1255: */
1256: if (biosextmem > (15*1024) && biosextmem < (16*1024)) {
1257: char pbuf[9];
1258:
1259: format_bytes(pbuf, sizeof(pbuf),
1260: biosextmem - (15*1024));
1261: printf("Warning: ignoring %s of remapped memory\n",
1262: pbuf);
1263: biosextmem = (15*1024);
1264: }
1265: #endif
1266: #endif
1267: mem_clusters[1].start = IOM_END;
1268: mem_clusters[1].size = trunc_page(KBTOB(biosextmem));
1269: physmem += atop(mem_clusters[1].size);
1270:
1271: mem_cluster_cnt = 2;
1272:
1273: avail_end = IOM_END + trunc_page(KBTOB(biosextmem));
1274: }
1275:
1276: /*
1277: * If we have 16M of RAM or less, just put it all on
1278: * the default free list. Otherwise, put the first
1279: * 16M of RAM on a lower priority free list (so that
1280: * all of the ISA DMA'able memory won't be eaten up
1281: * first-off).
1282: */
1283: if (avail_end <= (16 * 1024 * 1024))
1284: first16q = VM_FREELIST_DEFAULT;
1285: else
1286: first16q = VM_FREELIST_FIRST16;
1287:
1288: /* Make sure the end of the space used by the kernel is rounded. */
1289: first_avail = round_page(first_avail);
1290: kern_end = KERNBASE + first_avail;
1291:
1292: /*
1293: * Now, load the memory clusters (which have already been
1294: * rounded and truncated) into the VM system.
1295: *
1296: * NOTE: WE ASSUME THAT MEMORY STARTS AT 0 AND THAT THE KERNEL
1297: * IS LOADED AT IOM_END (1M).
1298: */
1299: for (x = 0; x < mem_cluster_cnt; x++) {
1300: seg_start = mem_clusters[x].start;
1301: seg_end = mem_clusters[x].start + mem_clusters[x].size;
1302: seg_start1 = 0;
1303: seg_end1 = 0;
1304:
1305: if (seg_start > 0xffffffffULL) {
1306: printf("skipping %lld bytes of memory above 4GB\n",
1307: seg_end - seg_start);
1308: continue;
1309: }
1310: if (seg_end > 0x100000000ULL) {
1311: printf("skipping %lld bytes of memory above 4GB\n",
1312: seg_end - 0x100000000ULL);
1313: seg_end = 0x100000000ULL;
1314: }
1315:
1316: /*
1317: * Skip memory before our available starting point.
1318: */
1319: if (seg_end <= avail_start)
1320: continue;
1321:
1322: if (avail_start >= seg_start && avail_start < seg_end) {
1323: if (seg_start != 0)
1324: panic("init_x86_64: memory doesn't start at 0");
1325: seg_start = avail_start;
1326: if (seg_start == seg_end)
1327: continue;
1328: }
1329:
1330: /*
1331: * If this segment contains the kernel, split it
1332: * in two, around the kernel.
1333: */
1334: if (seg_start <= IOM_END && first_avail <= seg_end) {
1335: seg_start1 = first_avail;
1336: seg_end1 = seg_end;
1337: seg_end = IOM_END;
1338: }
1339:
1340: /* First hunk */
1341: if (seg_start != seg_end) {
1342: if (seg_start <= (16 * 1024 * 1024) &&
1343: first16q != VM_FREELIST_DEFAULT) {
1344: u_int64_t tmp;
1345:
1346: if (seg_end > (16 * 1024 * 1024))
1347: tmp = (16 * 1024 * 1024);
1348: else
1349: tmp = seg_end;
1350: #if DEBUG_MEMLOAD
1351: printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
1352: (unsigned long long)seg_start,
1353: (unsigned long long)tmp,
1354: atop(seg_start), atop(tmp));
1355: #endif
1356: uvm_page_physload(atop(seg_start),
1357: atop(tmp), atop(seg_start),
1358: atop(tmp), first16q);
1359: seg_start = tmp;
1360: }
1361:
1362: if (seg_start != seg_end) {
1363: #if DEBUG_MEMLOAD
1364: printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
1365: (unsigned long long)seg_start,
1366: (unsigned long long)seg_end,
1367: atop(seg_start), atop(seg_end));
1368: #endif
1369: uvm_page_physload(atop(seg_start),
1370: atop(seg_end), atop(seg_start),
1371: atop(seg_end), VM_FREELIST_DEFAULT);
1372: }
1373: }
1374:
1375: /* Second hunk */
1376: if (seg_start1 != seg_end1) {
1377: if (seg_start1 <= (16 * 1024 * 1024) &&
1378: first16q != VM_FREELIST_DEFAULT) {
1379: u_int64_t tmp;
1380:
1381: if (seg_end1 > (16 * 1024 * 1024))
1382: tmp = (16 * 1024 * 1024);
1383: else
1384: tmp = seg_end1;
1385: #if DEBUG_MEMLOAD
1386: printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
1387: (unsigned long long)seg_start1,
1388: (unsigned long long)tmp,
1389: atop(seg_start1), atop(tmp));
1390: #endif
1391: uvm_page_physload(atop(seg_start1),
1392: atop(tmp), atop(seg_start1),
1393: atop(tmp), first16q);
1394: seg_start1 = tmp;
1395: }
1396:
1397: if (seg_start1 != seg_end1) {
1398: #if DEBUG_MEMLOAD
1399: printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
1400: (unsigned long long)seg_start1,
1401: (unsigned long long)seg_end1,
1402: atop(seg_start1), atop(seg_end1));
1403: #endif
1404: uvm_page_physload(atop(seg_start1),
1405: atop(seg_end1), atop(seg_start1),
1406: atop(seg_end1), VM_FREELIST_DEFAULT);
1407: }
1408: }
1409: }
1410:
1411: /*
1412: * Steal memory for the message buffer (at end of core).
1413: */
1414: {
1415: struct vm_physseg *vps = NULL;
1416: psize_t sz = round_page(MSGBUFSIZE);
1417: psize_t reqsz = sz;
1418:
1419: for (x = 0; x < vm_nphysseg; x++) {
1420: vps = &vm_physmem[x];
1421: if (ptoa(vps->avail_end) == avail_end)
1422: break;
1423: }
1424: if (x == vm_nphysseg)
1425: panic("init_x86_64: can't find end of memory");
1426:
1427: /* Shrink so it'll fit in the last segment. */
1428: if ((vps->avail_end - vps->avail_start) < atop(sz))
1429: sz = ptoa(vps->avail_end - vps->avail_start);
1430:
1431: vps->avail_end -= atop(sz);
1432: vps->end -= atop(sz);
1433: msgbuf_paddr = ptoa(vps->avail_end);
1434:
1435: /* Remove the last segment if it now has no pages. */
1436: if (vps->start == vps->end) {
1437: for (vm_nphysseg--; x < vm_nphysseg; x++)
1438: vm_physmem[x] = vm_physmem[x + 1];
1439: }
1440:
1441: /* Now find where the new avail_end is. */
1442: for (avail_end = 0, x = 0; x < vm_nphysseg; x++)
1443: if (vm_physmem[x].avail_end > avail_end)
1444: avail_end = vm_physmem[x].avail_end;
1445: avail_end = ptoa(avail_end);
1446:
1447: /* Warn if the message buffer had to be shrunk. */
1448: if (sz != reqsz)
1449: printf("WARNING: %ld bytes not available for msgbuf "
1450: "in last cluster (%ld used)\n", reqsz, sz);
1451: }
1452:
1453: /*
1454: * XXXfvdl todo: acpi wakeup code.
1455: */
1456:
1457: pmap_growkernel(VM_MIN_KERNEL_ADDRESS + 32 * 1024 * 1024);
1458:
1459: pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE);
1460: pmap_kenter_pa(idt_vaddr + PAGE_SIZE, idt_paddr + PAGE_SIZE,
1461: VM_PROT_READ|VM_PROT_WRITE);
1462:
1463: pmap_kenter_pa(lo32_vaddr, lo32_paddr, VM_PROT_READ|VM_PROT_WRITE);
1464:
1465: idt = (struct gate_descriptor *)idt_vaddr;
1466: gdtstore = (char *)(idt + NIDT);
1467: ldtstore = gdtstore + DYNSEL_START;
1468:
1469: /* make gdt gates and memory segments */
1470: set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0, 0xfffff, SDT_MEMERA,
1471: SEL_KPL, 1, 0, 1);
1472:
1473: set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0, 0xfffff, SDT_MEMRWA,
1474: SEL_KPL, 1, 0, 1);
1475:
1476: set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore, LDT_SIZE - 1,
1477: SDT_SYSLDT, SEL_KPL, 0);
1478:
1479: set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0,
1480: atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1);
1481:
1482: set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0,
1483: atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1);
1484:
1485: /* make ldt gates and memory segments */
1486: setgate((struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
1487: &IDTVEC(oosyscall), 0, SDT_SYS386CGT, SEL_UPL,
1488: GSEL(GCODE_SEL, SEL_KPL));
1489:
1490: *(struct mem_segment_descriptor *)(ldtstore + LUCODE_SEL) =
1491: *GDT_ADDR_MEM(gdtstore, GUCODE_SEL);
1492: *(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) =
1493: *GDT_ADDR_MEM(gdtstore, GUDATA_SEL);
1494:
1495: /*
1496: * 32 bit GDT entries.
1497: */
1498:
1499: set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0,
1500: atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0);
1501:
1502: set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0,
1503: atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);
1504:
1505: /*
1506: * 32 bit LDT entries.
1507: */
1508: ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUCODE32_SEL);
1509: set_mem_segment(ldt_segp, 0, atop(VM_MAXUSER_ADDRESS32) - 1,
1510: SDT_MEMERA, SEL_UPL, 1, 1, 0);
1511: ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUDATA32_SEL);
1512: set_mem_segment(ldt_segp, 0, atop(VM_MAXUSER_ADDRESS32) - 1,
1513: SDT_MEMRWA, SEL_UPL, 1, 1, 0);
1514:
1515: /*
1516: * Other entries.
1517: */
1518: memcpy((struct gate_descriptor *)(ldtstore + LSOL26CALLS_SEL),
1519: (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
1520: sizeof (struct gate_descriptor));
1521: memcpy((struct gate_descriptor *)(ldtstore + LBSDICALLS_SEL),
1522: (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
1523: sizeof (struct gate_descriptor));
1524:
1525: /* exceptions */
1526: for (x = 0; x < 32; x++) {
1527: ist = (x == 8) ? 1 : 0;
1528: setgate(&idt[x], IDTVEC(exceptions)[x], ist, SDT_SYS386IGT,
1529: (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
1530: GSEL(GCODE_SEL, SEL_KPL));
1531: idt_allocmap[x] = 1;
1532: }
1533:
1534: /* new-style interrupt gate for syscalls */
1535: setgate(&idt[128], &IDTVEC(osyscall), 0, SDT_SYS386IGT, SEL_UPL,
1536: GSEL(GCODE_SEL, SEL_KPL));
1537: idt_allocmap[128] = 1;
1538:
1539: setregion(®ion, gdtstore, DYNSEL_START - 1);
1540: lgdt(®ion);
1541:
1542: cpu_init_idt();
1543:
1544: #ifdef DDB
1545: db_machine_init();
1546: ddb_init();
1547: if (boothowto & RB_KDB)
1548: Debugger();
1549: #endif
1550: #ifdef KGDB
1551: kgdb_port_init();
1552: if (boothowto & RB_KDB) {
1553: kgdb_debug_init = 1;
1554: kgdb_connect(1);
1555: }
1556: #endif
1557:
1558: intr_default_setup();
1559:
1560: softintr_init();
1561: splraise(IPL_IPI);
1562: enable_intr();
1563:
1564: /* Make sure maxproc is sane */
1565: if (maxproc > cpu_maxproc())
1566: maxproc = cpu_maxproc();
1567: }
1568:
1569: #ifdef KGDB
1570: void
1571: kgdb_port_init(void)
1572: {
1573: #if NCOM > 0
1574: if (!strcmp(kgdb_devname, "com")) {
1575: bus_space_tag_t tag = X86_BUS_SPACE_IO;
1576: com_kgdb_attach(tag, comkgdbaddr, comkgdbrate, COM_FREQ,
1577: comkgdbmode);
1578: }
1579: #endif
1580: }
1581: #endif /* KGDB */
1582:
1583: void
1584: cpu_reset(void)
1585: {
1586:
1587: disable_intr();
1588:
1589: /*
1590: * The keyboard controller has 4 random output pins, one of which is
1591: * connected to the RESET pin on the CPU in many PCs. We tell the
1592: * keyboard controller to pulse this line a couple of times.
1593: */
1594: outb(IO_KBD + KBCMDP, KBC_PULSE0);
1595: delay(100000);
1596: outb(IO_KBD + KBCMDP, KBC_PULSE0);
1597: delay(100000);
1598:
1599: /*
1600: * Try to cause a triple fault and watchdog reset by making the IDT
1601: * invalid and causing a fault.
1602: */
1603: memset((caddr_t)idt, 0, NIDT * sizeof(idt[0]));
1604: __asm __volatile("divl %0,%1" : : "q" (0), "a" (0));
1605:
1606: #if 0
1607: /*
1608: * Try to cause a triple fault and watchdog reset by unmapping the
1609: * entire address space and doing a TLB flush.
1610: */
1611: memset((caddr_t)PTD, 0, PAGE_SIZE);
1612: tlbflush();
1613: #endif
1614:
1615: for (;;);
1616: }
1617:
1618: /*
1619: * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers.
1620: */
1621: int
1622: cpu_dumpsize(void)
1623: {
1624: int size;
1625:
1626: size = ALIGN(sizeof(kcore_seg_t)) +
1627: ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
1628: if (roundup(size, dbtob(1)) != dbtob(1))
1629: return (-1);
1630:
1631: return (1);
1632: }
1633:
1634: /*
1635: * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped.
1636: */
1637: u_long
1638: cpu_dump_mempagecnt(void)
1639: {
1640: u_long i, n;
1641:
1642: n = 0;
1643: for (i = 0; i < mem_cluster_cnt; i++)
1644: n += atop(mem_clusters[i].size);
1645: return (n);
1646: }
1647:
1648: void
1649: cpu_initclocks(void)
1650: {
1651: (*initclock_func)();
1652:
1653: if (initclock_func == i8254_initclocks)
1654: i8254_inittimecounter();
1655: else
1656: i8254_inittimecounter_simple();
1657: }
1658:
1659: void
1660: need_resched(struct cpu_info *ci)
1661: {
1662: ci->ci_want_resched = 1;
1663: if ((ci)->ci_curproc != NULL)
1664: aston((ci)->ci_curproc);
1665: }
1666:
1667: /*
1668: * Allocate an IDT vector slot within the given range.
1669: * XXX needs locking to avoid MP allocation races.
1670: * XXXfvdl share idt code
1671: */
1672:
1673: int
1674: idt_vec_alloc(int low, int high)
1675: {
1676: int vec;
1677:
1678: simple_lock(&idt_lock);
1679: for (vec = low; vec <= high; vec++) {
1680: if (idt_allocmap[vec] == 0) {
1681: idt_allocmap[vec] = 1;
1682: simple_unlock(&idt_lock);
1683: return vec;
1684: }
1685: }
1686: simple_unlock(&idt_lock);
1687: return 0;
1688: }
1689:
1690: void
1691: idt_vec_set(int vec, void (*function)(void))
1692: {
1693: /*
1694: * Vector should be allocated, so no locking needed.
1695: */
1696: KASSERT(idt_allocmap[vec] == 1);
1697: setgate(&idt[vec], function, 0, SDT_SYS386IGT, SEL_KPL,
1698: GSEL(GCODE_SEL, SEL_KPL));
1699: }
1700:
1701: void
1702: idt_vec_free(int vec)
1703: {
1704: simple_lock(&idt_lock);
1705: unsetgate(&idt[vec]);
1706: idt_allocmap[vec] = 0;
1707: simple_unlock(&idt_lock);
1708: }
1709:
1710: /*
1711: * Number of processes is limited by number of available GDT slots.
1712: */
1713: int
1714: cpu_maxproc(void)
1715: {
1716: #ifdef USER_LDT
1717: return ((MAXGDTSIZ - DYNSEL_START) / 32);
1718: #else
1719: return (MAXGDTSIZ - DYNSEL_START) / 16;
1720: #endif
1721: }
1722:
1723: #ifdef DIAGNOSTIC
1724: void
1725: splassert_check(int wantipl, const char *func)
1726: {
1727: int cpl = curcpu()->ci_ilevel;
1728:
1729: if (cpl < wantipl) {
1730: splassert_fail(wantipl, cpl, func);
1731: }
1732: }
1733: #endif
1734:
1735: void
1736: getbootinfo(char *bootinfo, int bootinfo_size)
1737: {
1738: bootarg32_t *q;
1739:
1740: #undef BOOTINFO_DEBUG
1741: #ifdef BOOTINFO_DEBUG
1742: printf("bootargv:");
1743: #endif
1744:
1745: for (q = (bootarg32_t *)bootinfo;
1746: (q->ba_type != BOOTARG_END) &&
1747: ((((char *)q) - bootinfo) < bootinfo_size);
1748: q = (bootarg32_t *)(((char *)q) + q->ba_size)) {
1749:
1750: switch (q->ba_type) {
1751: case BOOTARG_MEMMAP:
1752: bios_memmap = (bios_memmap_t *)q->ba_arg;
1753: #ifdef BOOTINFO_DEBUG
1754: printf(" memmap %p", bios_memmap);
1755: #endif
1756: break;
1757: case BOOTARG_DISKINFO:
1758: bios_diskinfo = (bios_diskinfo_t *)q->ba_arg;
1759: #ifdef BOOTINFO_DEBUG
1760: printf(" diskinfo %p", bios_diskinfo);
1761: #endif
1762: break;
1763: #if 0
1764: #if NAPM > 0 || defined(DEBUG)
1765: case BOOTARG_APMINFO:
1766: #ifdef BOOTINFO_DEBUG
1767: printf(" apminfo %p", q->ba_arg);
1768: #endif
1769: apm = (bios_apminfo_t *)q->ba_arg;
1770: break;
1771: #endif
1772: #endif
1773: case BOOTARG_CKSUMLEN:
1774: bios_cksumlen = *(u_int32_t *)q->ba_arg;
1775: #ifdef BOOTINFO_DEBUG
1776: printf(" cksumlen %d", bios_cksumlen);
1777: #endif
1778: break;
1779: #if 0
1780: #if NPCI > 0
1781: case BOOTARG_PCIINFO:
1782: bios_pciinfo = (bios_pciinfo_t *)q->ba_arg;
1783: #ifdef BOOTINFO_DEBUG
1784: printf(" pciinfo %p", bios_pciinfo);
1785: #endif
1786: break;
1787: #endif
1788: #endif
1789: case BOOTARG_CONSDEV:
1790: if (q->ba_size >= sizeof(bios_consdev_t))
1791: {
1792: bios_consdev_t *cdp =
1793: (bios_consdev_t*)q->ba_arg;
1794: #include "com.h"
1795: #if NCOM > 0
1796: extern int comdefaultrate; /* ic/com.c */
1797: comdefaultrate = cdp->conspeed;
1798: #endif
1799: #ifdef BOOTINFO_DEBUG
1800: printf(" console 0x%x:%d",
1801: cdp->consdev, cdp->conspeed);
1802: #endif
1803: cnset(cdp->consdev);
1804: }
1805: break;
1806: #ifdef NFSCLIENT
1807: case BOOTARG_BOOTMAC:
1808: bios_bootmac = (bios_bootmac_t *)q->ba_arg;
1809: break;
1810: #endif
1811:
1812: default:
1813: #ifdef BOOTINFO_DEBUG
1814: printf(" unsupported arg (%d) %p", q->ba_type,
1815: q->ba_arg);
1816: #endif
1817: break;
1818: }
1819: }
1820: #ifdef BOOTINFO_DEBUG
1821: printf("\n");
1822: #endif
1823: }
1824:
1825: int
1826: check_context(const struct reg *regs, struct trapframe *tf)
1827: {
1828: uint16_t sel;
1829:
1830: if (((regs->r_rflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0)
1831: return EINVAL;
1832:
1833: sel = regs->r_es & 0xffff;
1834: if (sel != 0 && !VALID_USER_DSEL(sel))
1835: return EINVAL;
1836:
1837: sel = regs->r_fs & 0xffff;
1838: if (sel != 0 && !VALID_USER_DSEL(sel))
1839: return EINVAL;
1840:
1841: sel = regs->r_gs & 0xffff;
1842: if (sel != 0 && !VALID_USER_DSEL(sel))
1843: return EINVAL;
1844:
1845: sel = regs->r_ds & 0xffff;
1846: if (!VALID_USER_DSEL(sel))
1847: return EINVAL;
1848:
1849: sel = regs->r_ss & 0xffff;
1850: if (!VALID_USER_DSEL(sel))
1851: return EINVAL;
1852:
1853: sel = regs->r_cs & 0xffff;
1854: if (!VALID_USER_CSEL(sel))
1855: return EINVAL;
1856:
1857: if (regs->r_rip >= VM_MAXUSER_ADDRESS)
1858: return EINVAL;
1859:
1860: return 0;
1861: }
CVSweb