Annotation of sys/arch/i386/i386/pmap.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: pmap.c,v 1.119 2007/06/27 16:16:53 art Exp $ */
2: /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */
3:
4: /*
5: *
6: * Copyright (c) 1997 Charles D. Cranor and Washington University.
7: * All rights reserved.
8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed by Charles D. Cranor and
20: * Washington University.
21: * 4. The name of the author may not be used to endorse or promote products
22: * derived from this software without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34: */
35:
36: /*
37: * pmap.c: i386 pmap module rewrite
38: * Chuck Cranor <chuck@ccrc.wustl.edu>
39: * 11-Aug-97
40: *
41: * history of this pmap module: in addition to my own input, i used
42: * the following references for this rewrite of the i386 pmap:
43: *
44: * [1] the NetBSD i386 pmap. this pmap appears to be based on the
45: * BSD hp300 pmap done by Mike Hibler at University of Utah.
46: * it was then ported to the i386 by William Jolitz of UUNET
47: * Technologies, Inc. Then Charles M. Hannum of the NetBSD
48: * project fixed some bugs and provided some speed ups.
49: *
50: * [2] the FreeBSD i386 pmap. this pmap seems to be the
51: * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
52: * and David Greenman.
53: *
54: * [3] the Mach pmap. this pmap, from CMU, seems to have migrated
55: * between several processors. the VAX version was done by
56: * Avadis Tevanian, Jr., and Michael Wayne Young. the i386
57: * version was done by Lance Berc, Mike Kupfer, Bob Baron,
58: * David Golub, and Richard Draves. the alpha version was
59: * done by Alessandro Forin (CMU/Mach) and Chris Demetriou
60: * (NetBSD/alpha).
61: */
62:
63: #include <sys/param.h>
64: #include <sys/systm.h>
65: #include <sys/proc.h>
66: #include <sys/malloc.h>
67: #include <sys/pool.h>
68: #include <sys/user.h>
69: #include <sys/kernel.h>
70: #include <sys/mutex.h>
71:
72: #include <uvm/uvm.h>
73:
74: #include <machine/atomic.h>
75: #include <machine/cpu.h>
76: #include <machine/specialreg.h>
77: #include <machine/gdt.h>
78:
79: #include <dev/isa/isareg.h>
80: #include <sys/msgbuf.h>
81: #include <stand/boot/bootarg.h>
82:
83: /*
84: * general info:
85: *
86: * - for an explanation of how the i386 MMU hardware works see
87: * the comments in <machine/pte.h>.
88: *
89: * - for an explanation of the general memory structure used by
90: * this pmap (including the recursive mapping), see the comments
91: * in <machine/pmap.h>.
92: *
93: * this file contains the code for the "pmap module." the module's
94: * job is to manage the hardware's virtual to physical address mappings.
95: * note that there are two levels of mapping in the VM system:
96: *
97: * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
98: * to map ranges of virtual address space to objects/files. for
99: * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
100: * to the file /bin/ls starting at offset zero." note that
101: * the upper layer mapping is not concerned with how individual
102: * vm_pages are mapped.
103: *
104: * [2] the lower layer of the VM system (the pmap) maintains the mappings
105: * from virtual addresses. it is concerned with which vm_page is
106: * mapped where. for example, when you run /bin/ls and start
107: * at page 0x1000 the fault routine may lookup the correct page
108: * of the /bin/ls file and then ask the pmap layer to establish
109: * a mapping for it.
110: *
111: * note that information in the lower layer of the VM system can be
112: * thrown away since it can easily be reconstructed from the info
113: * in the upper layer.
114: *
115: * data structures we use include:
116: *
117: * - struct pmap: describes the address space of one thread
118: * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
119: * - struct pv_head: there is one pv_head per managed page of
120: * physical memory. the pv_head points to a list of pv_entry
121: * structures which describe all the <PMAP,VA> pairs that this
122: * page is mapped in. this is critical for page based operations
123: * such as pmap_page_protect() [change protection on _all_ mappings
124: * of a page]
125: * - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
126: * if we run out of pv_entry's we allocate a new pv_page and free
127: * its pv_entrys.
128: */
129: /*
130: * memory allocation
131: *
132: * - there are three data structures that we must dynamically allocate:
133: *
134: * [A] new process' page directory page (PDP)
135: * - plan 1: done at pmap_create() we use
136: * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
137: * allocation.
138: *
139: * if we are low in free physical memory then we sleep in
140: * uvm_km_alloc -- in this case this is ok since we are creating
141: * a new pmap and should not be holding any locks.
142: *
143: * if the kernel is totally out of virtual space
144: * (i.e. uvm_km_alloc returns NULL), then we panic.
145: *
146: * XXX: the fork code currently has no way to return an "out of
147: * memory, try again" error code since uvm_fork [fka vm_fork]
148: * is a void function.
149: *
150: * [B] new page tables pages (PTP)
151: * call uvm_pagealloc()
152: * => success: zero page, add to pm_pdir
153: * => failure: we are out of free vm_pages, let pmap_enter()
154: * tell UVM about it.
155: *
156: * note: for kernel PTPs, we start with NKPTP of them. as we map
157: * kernel memory (at uvm_map time) we check to see if we've grown
158: * the kernel pmap. if so, we call the optional function
159: * pmap_growkernel() to grow the kernel PTPs in advance.
160: *
161: * [C] pv_entry structures
162: * - plan 1: try to allocate one off the free list
163: * => success: done!
164: * => failure: no more free pv_entrys on the list
165: * - plan 2: try to allocate a new pv_page to add a chunk of
166: * pv_entrys to the free list
167: * [a] obtain a free, unmapped, VA in kmem_map. either
168: * we have one saved from a previous call, or we allocate
169: * one now using a "vm_map_lock_try" in uvm_map
170: * => success: we have an unmapped VA, continue to [b]
171: * => failure: unable to lock kmem_map or out of VA in it.
172: * move on to plan 3.
173: * [b] allocate a page for the VA
174: * => success: map it in, free the pv_entry's, DONE!
175: * => failure: no free vm_pages, etc.
176: * save VA for later call to [a], go to plan 3.
177: * If we fail, we simply let pmap_enter() tell UVM about it.
178: */
179: /*
180: * locking
181: *
182: * we have the following locks that we must contend with:
183: *
184: * "simple" locks:
185: *
186: * - pmap lock (per pmap, part of uvm_object)
187: * this lock protects the fields in the pmap structure including
188: * the non-kernel PDEs in the PDP, and the PTEs. it also locks
189: * in the alternate PTE space (since that is determined by the
190: * entry in the PDP).
191: *
192: * - pvalloc_lock
193: * this lock protects the data structures which are used to manage
194: * the free list of pv_entry structures.
195: *
196: * - pmaps_lock
197: * this lock protects the list of active pmaps (headed by "pmaps").
198: * we lock it when adding or removing pmaps from this list.
199: *
200: */
201:
202: /*
203: * locking data structures
204: */
205:
206: struct simplelock pvalloc_lock;
207: struct simplelock pmaps_lock;
208:
209: #define PMAP_MAP_TO_HEAD_LOCK() /* null */
210: #define PMAP_MAP_TO_HEAD_UNLOCK() /* null */
211:
212: #define PMAP_HEAD_TO_MAP_LOCK() /* null */
213: #define PMAP_HEAD_TO_MAP_UNLOCK() /* null */
214:
215: /*
216: * global data structures
217: */
218:
219: struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
220:
221: /*
222: * nkpde is the number of kernel PTPs allocated for the kernel at
223: * boot time (NKPTP is a compile time override). this number can
224: * grow dynamically as needed (but once allocated, we never free
225: * kernel PTPs).
226: */
227:
228: int nkpde = NKPTP;
229: #ifdef NKPDE
230: #error "obsolete NKPDE: use NKPTP"
231: #endif
232:
233: /*
234: * pmap_pg_g: if our processor supports PG_G in the PTE then we
235: * set pmap_pg_g to PG_G (otherwise it is zero).
236: */
237:
238: int pmap_pg_g = 0;
239:
240: /*
241: * i386 physical memory comes in a big contig chunk with a small
242: * hole toward the front of it... the following 4 paddr_t's
243: * (shared with machdep.c) describe the physical address space
244: * of this machine.
245: */
246: paddr_t avail_start; /* PA of first available physical page */
247: paddr_t hole_start; /* PA of start of "hole" */
248: paddr_t hole_end; /* PA of end of "hole" */
249:
250: /*
251: * other data structures
252: */
253:
254: static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */
255: static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */
256:
257: /*
258: * the following two vaddr_t's are used during system startup
259: * to keep track of how much of the kernel's VM space we have used.
260: * once the system is started, the management of the remaining kernel
261: * VM space is turned over to the kernel_map vm_map.
262: */
263:
264: static vaddr_t virtual_avail; /* VA of first free KVA */
265: static vaddr_t virtual_end; /* VA of last free KVA */
266:
267: /*
268: * pv_page management structures: locked by pvalloc_lock
269: */
270:
271: TAILQ_HEAD(pv_pagelist, pv_page);
272: static struct pv_pagelist pv_freepages; /* list of pv_pages with free entries */
273: static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */
274: static int pv_nfpvents; /* # of free pv entries */
275: static struct pv_page *pv_initpage; /* bootstrap page from kernel_map */
276: static vaddr_t pv_cachedva; /* cached VA for later use */
277:
278: #define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */
279: #define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2))
280: /* high water mark */
281:
282: /*
283: * linked list of all non-kernel pmaps
284: */
285:
286: struct pmap_head pmaps;
287:
288: /*
289: * pool that pmap structures are allocated from
290: */
291:
292: struct pool pmap_pmap_pool;
293:
294: /*
295: * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
296: * I386_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing
297: * due to false sharing.
298: */
299:
300: #ifdef MULTIPROCESSOR
301: #define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
302: #define VASLEW(va,id) ((va)+(id)*NPTECL*NBPG)
303: #else
304: #define PTESLEW(pte, id) (pte)
305: #define VASLEW(va,id) (va)
306: #endif
307:
308: /*
309: * special VAs and the PTEs that map them
310: */
311:
312: static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
313: static caddr_t csrcp, cdstp, zerop, ptpp;
314: caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */
315:
316: #if defined(I586_CPU)
317: /* stuff to fix the pentium f00f bug */
318: extern vaddr_t pentium_idt_vaddr;
319: #endif
320:
321:
322: /*
323: * local prototypes
324: */
325:
326: struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t);
327: struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t, pt_entry_t);
328: struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */
329: #define ALLOCPV_NEED 0 /* need PV now */
330: #define ALLOCPV_TRY 1 /* just try to allocate */
331: #define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */
332: struct pv_entry *pmap_alloc_pvpage(struct pmap *, int);
333: void pmap_enter_pv(struct vm_page *, struct pv_entry *,
334: struct pmap *, vaddr_t, struct vm_page *);
335: void pmap_free_pv(struct pmap *, struct pv_entry *);
336: void pmap_free_pvs(struct pmap *, struct pv_entry *);
337: void pmap_free_pv_doit(struct pv_entry *);
338: void pmap_free_pvpage(void);
339: struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t);
340: boolean_t pmap_is_curpmap(struct pmap *);
341: boolean_t pmap_is_active(struct pmap *, int);
342: void pmap_sync_flags_pte(struct vm_page *, u_long);
343: pt_entry_t *pmap_map_ptes(struct pmap *);
344: struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t);
345: void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
346: boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
347: vaddr_t, int);
348: void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t,
349: vaddr_t, vaddr_t, int);
350:
351: #define PMAP_REMOVE_ALL 0
352: #define PMAP_REMOVE_SKIPWIRED 1
353:
354: vaddr_t pmap_tmpmap_pa(paddr_t);
355: pt_entry_t *pmap_tmpmap_pvepte(struct pv_entry *);
356: void pmap_tmpunmap_pa(void);
357: void pmap_tmpunmap_pvepte(struct pv_entry *);
358: void pmap_apte_flush(struct pmap *);
359: void pmap_unmap_ptes(struct pmap *);
360: void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t,
361: pt_entry_t);
362:
363: void pmap_pinit(pmap_t);
364: void pmap_release(pmap_t);
365:
366: void pmap_zero_phys(paddr_t);
367:
368: void setcslimit(struct pmap *, struct trapframe *, struct pcb *, vaddr_t);
369:
370: /*
371: * p m a p i n l i n e h e l p e r f u n c t i o n s
372: */
373:
374: /*
375: * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
376: * of course the kernel is always loaded
377: */
378:
379: boolean_t
380: pmap_is_curpmap(pmap)
381: struct pmap *pmap;
382: {
383: return((pmap == pmap_kernel()) ||
384: (pmap->pm_pdirpa == (paddr_t) rcr3()));
385: }
386:
387: /*
388: * pmap_is_active: is this pmap loaded into the specified processor's %cr3?
389: */
390:
391: boolean_t
392: pmap_is_active(pmap, cpu_id)
393: struct pmap *pmap;
394: int cpu_id;
395: {
396:
397: return (pmap == pmap_kernel() ||
398: (pmap->pm_cpus & (1U << cpu_id)) != 0);
399: }
400:
401: static __inline u_int
402: pmap_pte2flags(u_long pte)
403: {
404: return (((pte & PG_U) ? PG_PMAP_REF : 0) |
405: ((pte & PG_M) ? PG_PMAP_MOD : 0));
406: }
407:
408: static __inline u_int
409: pmap_flags2pte(u_long pte)
410: {
411: return (((pte & PG_PMAP_REF) ? PG_U : 0) |
412: ((pte & PG_PMAP_MOD) ? PG_M : 0));
413: }
414:
415: void
416: pmap_sync_flags_pte(struct vm_page *pg, u_long pte)
417: {
418: if (pte & (PG_U|PG_M)) {
419: atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(pte));
420: }
421: }
422:
423: /*
424: * pmap_tmpmap_pa: map a page in for tmp usage
425: */
426:
427: vaddr_t
428: pmap_tmpmap_pa(paddr_t pa)
429: {
430: #ifdef MULTIPROCESSOR
431: int id = cpu_number();
432: #endif
433: pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
434: caddr_t ptpva = VASLEW(ptpp, id);
435: #if defined(DIAGNOSTIC)
436: if (*ptpte)
437: panic("pmap_tmpmap_pa: ptp_pte in use?");
438: #endif
439: *ptpte = PG_V | PG_RW | pa; /* always a new mapping */
440: return((vaddr_t)ptpva);
441: }
442:
443: /*
444: * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
445: */
446:
447: void
448: pmap_tmpunmap_pa()
449: {
450: #ifdef MULTIPROCESSOR
451: int id = cpu_number();
452: #endif
453: pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
454: caddr_t ptpva = VASLEW(ptpp, id);
455: #if defined(DIAGNOSTIC)
456: if (!pmap_valid_entry(*ptpte))
457: panic("pmap_tmpunmap_pa: our pte invalid?");
458: #endif
459: *ptpte = 0; /* zap! */
460: pmap_update_pg((vaddr_t)ptpva);
461: #ifdef MULTIPROCESSOR
462: /*
463: * No need for tlb shootdown here, since ptp_pte is per-CPU.
464: */
465: #endif
466: }
467:
468: /*
469: * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
470: *
471: * => do NOT use this on kernel mappings [why? because pv_ptp may be NULL]
472: */
473:
474: pt_entry_t *
475: pmap_tmpmap_pvepte(struct pv_entry *pve)
476: {
477: #ifdef DIAGNOSTIC
478: if (pve->pv_pmap == pmap_kernel())
479: panic("pmap_tmpmap_pvepte: attempt to map kernel");
480: #endif
481:
482: /* is it current pmap? use direct mapping... */
483: if (pmap_is_curpmap(pve->pv_pmap))
484: return(vtopte(pve->pv_va));
485:
486: return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp)))
487: + ptei((unsigned)pve->pv_va));
488: }
489:
490: /*
491: * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
492: */
493:
494: void
495: pmap_tmpunmap_pvepte(struct pv_entry *pve)
496: {
497: /* was it current pmap? if so, return */
498: if (pmap_is_curpmap(pve->pv_pmap))
499: return;
500:
501: pmap_tmpunmap_pa();
502: }
503:
504: void
505: pmap_apte_flush(struct pmap *pmap)
506: {
507: pmap_tlb_shoottlb();
508: pmap_tlb_shootwait();
509: }
510:
511: /*
512: * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
513: *
514: * => we lock enough pmaps to keep things locked in
515: * => must be undone with pmap_unmap_ptes before returning
516: */
517:
518: pt_entry_t *
519: pmap_map_ptes(struct pmap *pmap)
520: {
521: pd_entry_t opde;
522:
523: /* the kernel's pmap is always accessible */
524: if (pmap == pmap_kernel()) {
525: return(PTE_BASE);
526: }
527:
528: /* if curpmap then we are always mapped */
529: if (pmap_is_curpmap(pmap)) {
530: simple_lock(&pmap->pm_obj.vmobjlock);
531: return(PTE_BASE);
532: }
533:
534: /* need to lock both curpmap and pmap: use ordered locking */
535: if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) {
536: simple_lock(&pmap->pm_obj.vmobjlock);
537: simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
538: } else {
539: simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
540: simple_lock(&pmap->pm_obj.vmobjlock);
541: }
542:
543: /* need to load a new alternate pt space into curpmap? */
544: opde = *APDP_PDE;
545: if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
546: *APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V |
547: PG_U | PG_M);
548: if (pmap_valid_entry(opde))
549: pmap_apte_flush(curpcb->pcb_pmap);
550: }
551: return(APTE_BASE);
552: }
553:
554: /*
555: * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
556: */
557:
558: void
559: pmap_unmap_ptes(struct pmap *pmap)
560: {
561: if (pmap == pmap_kernel())
562: return;
563:
564: if (pmap_is_curpmap(pmap)) {
565: simple_unlock(&pmap->pm_obj.vmobjlock);
566: } else {
567: #if defined(MULTIPROCESSOR)
568: *APDP_PDE = 0;
569: pmap_apte_flush(curpcb->pcb_pmap);
570: #endif
571: simple_unlock(&pmap->pm_obj.vmobjlock);
572: simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
573: }
574: }
575:
576: void
577: pmap_exec_account(struct pmap *pm, vaddr_t va,
578: pt_entry_t opte, pt_entry_t npte)
579: {
580: if (pm == pmap_kernel())
581: return;
582:
583: if (curproc == NULL || curproc->p_vmspace == NULL ||
584: pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
585: return;
586:
587: if ((opte ^ npte) & PG_X)
588: pmap_tlb_shootpage(pm, va);
589:
590: /*
591: * Executability was removed on the last executable change.
592: * Reset the code segment to something conservative and
593: * let the trap handler deal with setting the right limit.
594: * We can't do that because of locking constraints on the vm map.
595: *
596: * XXX - floating cs - set this _really_ low.
597: */
598: if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
599: struct trapframe *tf = curproc->p_md.md_regs;
600: struct pcb *pcb = &curproc->p_addr->u_pcb;
601:
602: pm->pm_hiexec = I386_MAX_EXE_ADDR;
603: setcslimit(pm, tf, pcb, I386_MAX_EXE_ADDR);
604: }
605: }
606:
607: /*
608: * Fixup the code segment to cover all potential executable mappings.
609: * Called by kernel SEGV trap handler.
610: * returns 0 if no changes to the code segment were made.
611: */
612: int
613: pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
614: {
615: struct vm_map_entry *ent;
616: struct pmap *pm = vm_map_pmap(map);
617: vaddr_t va = 0;
618:
619: vm_map_lock(map);
620: for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
621: /*
622: * This entry has greater va than the entries before.
623: * We need to make it point to the last page, not past it.
624: */
625: if (ent->protection & VM_PROT_EXECUTE)
626: va = trunc_page(ent->end - 1);
627: }
628: vm_map_unlock(map);
629:
630: if (va <= pm->pm_hiexec) {
631: return (0);
632: }
633:
634: pm->pm_hiexec = va;
635:
636: /*
637: * We have a new 'highest executable' va, so we need to update
638: * the value for the code segment limit, which is stored in the
639: * PCB.
640: */
641: setcslimit(pm, tf, pcb, va);
642:
643: return (1);
644: }
645:
646: void
647: setcslimit(struct pmap *pm, struct trapframe *tf, struct pcb *pcb,
648: vaddr_t limit)
649: {
650: /*
651: * Called when we have a new 'highest executable' va, so we need
652: * to update the value for the code segment limit, which is stored
653: * in the PCB.
654: *
655: * There are no caching issues to be concerned with: the
656: * processor reads the whole descriptor from the GDT when the
657: * appropriate selector is loaded into a segment register, and
658: * this only happens on the return to userland.
659: *
660: * This also works in the MP case, since whichever CPU gets to
661: * run the process will pick up the right descriptor value from
662: * the PCB.
663: */
664: limit = min(limit, VM_MAXUSER_ADDRESS - 1);
665:
666: setsegment(&pm->pm_codeseg, 0, atop(limit),
667: SDT_MEMERA, SEL_UPL, 1, 1);
668:
669: /* And update the GDT and LDT since we may be called by the
670: * trap handler (cpu_switch won't get a chance).
671: */
672: curcpu()->ci_gdt[GUCODE_SEL].sd = pcb->pcb_ldt[LUCODE_SEL].sd =
673: pm->pm_codeseg;
674:
675: pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
676: }
677:
678: /*
679: * p m a p k e n t e r f u n c t i o n s
680: *
681: * functions to quickly enter/remove pages from the kernel address
682: * space. pmap_kremove is exported to MI kernel. we make use of
683: * the recursive PTE mappings.
684: */
685:
686: /*
687: * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
688: *
689: * => no need to lock anything, assume va is already allocated
690: * => should be faster than normal pmap enter function
691: */
692:
693: void
694: pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
695: {
696: pt_entry_t *pte, opte, npte;
697:
698: pte = vtopte(va);
699: npte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) | PG_V |
700: pmap_pg_g | PG_U | PG_M;
701: opte = i386_atomic_testset_ul(pte, npte); /* zap! */
702: if (pmap_valid_entry(opte)) {
703: /* NB. - this should not happen. */
704: pmap_tlb_shootpage(pmap_kernel(), va);
705: pmap_tlb_shootwait();
706: }
707: }
708:
709: /*
710: * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
711: *
712: * => no need to lock anything
713: * => caller must dispose of any vm_page mapped in the va range
714: * => note: not an inline function
715: * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
716: */
717:
718: void
719: pmap_kremove(vaddr_t sva, vsize_t len)
720: {
721: pt_entry_t *pte, opte;
722: vaddr_t va, eva;
723:
724: eva = sva + len;
725:
726: for (va = sva; va != eva; va += PAGE_SIZE) {
727: pte = kvtopte(va);
728: opte = i386_atomic_testset_ul(pte, 0);
729: #ifdef DIAGNOSTIC
730: if (opte & PG_PVLIST)
731: panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va);
732: #endif
733: }
734: pmap_tlb_shootrange(pmap_kernel(), sva, eva);
735: pmap_tlb_shootwait();
736: }
737:
738: /*
739: * p m a p i n i t f u n c t i o n s
740: *
741: * pmap_bootstrap and pmap_init are called during system startup
742: * to init the pmap module. pmap_bootstrap() does a low level
743: * init just to get things rolling. pmap_init() finishes the job.
744: */
745:
746: /*
747: * pmap_bootstrap: get the system in a state where it can run with VM
748: * properly enabled (called before main()). the VM system is
749: * fully init'd later...
750: *
751: * => on i386, locore.s has already enabled the MMU by allocating
752: * a PDP for the kernel, and nkpde PTP's for the kernel.
753: * => kva_start is the first free virtual address in kernel space
754: */
755:
756: void
757: pmap_bootstrap(vaddr_t kva_start)
758: {
759: extern paddr_t avail_end;
760: struct pmap *kpm;
761: vaddr_t kva;
762: pt_entry_t *pte;
763:
764: /*
765: * set the page size (default value is 4K which is ok)
766: */
767:
768: uvm_setpagesize();
769:
770: /*
771: * a quick sanity check
772: */
773:
774: if (PAGE_SIZE != NBPG)
775: panic("pmap_bootstrap: PAGE_SIZE != NBPG");
776:
777: /*
778: * use the very last page of physical memory for the message buffer
779: */
780:
781: avail_end -= round_page(MSGBUFSIZE);
782: /*
783: * The arguments passed in from /boot needs space too.
784: */
785: avail_end -= round_page(bootargc);
786:
787: /*
788: * set up our local static global vars that keep track of the
789: * usage of KVM before kernel_map is set up
790: */
791:
792: virtual_avail = kva_start; /* first free KVA */
793: virtual_end = VM_MAX_KERNEL_ADDRESS; /* last KVA */
794:
795: /*
796: * set up protection_codes: we need to be able to convert from
797: * a MI protection code (some combo of VM_PROT...) to something
798: * we can jam into a i386 PTE.
799: */
800:
801: protection_codes[UVM_PROT_NONE] = 0; /* --- */
802: protection_codes[UVM_PROT_EXEC] = PG_X; /* --x */
803: protection_codes[UVM_PROT_READ] = PG_RO; /* -r- */
804: protection_codes[UVM_PROT_RX] = PG_X; /* -rx */
805: protection_codes[UVM_PROT_WRITE] = PG_RW; /* w-- */
806: protection_codes[UVM_PROT_WX] = PG_RW|PG_X; /* w-x */
807: protection_codes[UVM_PROT_RW] = PG_RW; /* wr- */
808: protection_codes[UVM_PROT_RWX] = PG_RW|PG_X; /* wrx */
809:
810: /*
811: * now we init the kernel's pmap
812: *
813: * the kernel pmap's pm_obj is not used for much. however, in
814: * user pmaps the pm_obj contains the list of active PTPs.
815: * the pm_obj currently does not have a pager. it might be possible
816: * to add a pager that would allow a process to read-only mmap its
817: * own page tables (fast user level vtophys?). this may or may not
818: * be useful.
819: */
820:
821: kpm = pmap_kernel();
822: simple_lock_init(&kpm->pm_obj.vmobjlock);
823: kpm->pm_obj.pgops = NULL;
824: TAILQ_INIT(&kpm->pm_obj.memq);
825: kpm->pm_obj.uo_npages = 0;
826: kpm->pm_obj.uo_refs = 1;
827: bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */
828: kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
829: kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3;
830: kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
831: atop(kva_start - VM_MIN_KERNEL_ADDRESS);
832:
833: /*
834: * the above is just a rough estimate and not critical to the proper
835: * operation of the system.
836: */
837:
838: /*
839: * enable global TLB entries if they are supported
840: */
841:
842: if (cpu_feature & CPUID_PGE) {
843: lcr4(rcr4() | CR4_PGE); /* enable hardware (via %cr4) */
844: pmap_pg_g = PG_G; /* enable software */
845:
846: /* add PG_G attribute to already mapped kernel pages */
847: for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ;
848: kva += PAGE_SIZE)
849: if (pmap_valid_entry(PTE_BASE[atop(kva)]))
850: PTE_BASE[atop(kva)] |= PG_G;
851: }
852:
853: /*
854: * now we allocate the "special" VAs which are used for tmp mappings
855: * by the pmap (and other modules). we allocate the VAs by advancing
856: * virtual_avail (note that there are no pages mapped at these VAs).
857: * we find the PTE that maps the allocated VA via the linear PTE
858: * mapping.
859: */
860:
861: pte = PTE_BASE + atop(virtual_avail);
862:
863: #ifdef MULTIPROCESSOR
864: /*
865: * Waste some VA space to avoid false sharing of cache lines
866: * for page table pages: Give each possible CPU a cache line
867: * of PTE's (8) to play with, though we only need 4. We could
868: * recycle some of this waste by putting the idle stacks here
869: * as well; we could waste less space if we knew the largest
870: * CPU ID beforehand.
871: */
872: csrcp = (caddr_t) virtual_avail; csrc_pte = pte;
873:
874: cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1;
875:
876: zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2;
877:
878: ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3;
879:
880: virtual_avail += PAGE_SIZE * I386_MAXPROCS * NPTECL;
881: pte += I386_MAXPROCS * NPTECL;
882: #else
883: csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */
884: virtual_avail += PAGE_SIZE; pte++; /* advance */
885:
886: cdstp = (caddr_t) virtual_avail; cdst_pte = pte;
887: virtual_avail += PAGE_SIZE; pte++;
888:
889: zerop = (caddr_t) virtual_avail; zero_pte = pte;
890: virtual_avail += PAGE_SIZE; pte++;
891:
892: ptpp = (caddr_t) virtual_avail; ptp_pte = pte;
893: virtual_avail += PAGE_SIZE; pte++;
894: #endif
895:
896: /* XXX: vmmap used by mem.c... should be uvm_map_reserve */
897: vmmap = (char *)virtual_avail; /* don't need pte */
898: virtual_avail += PAGE_SIZE;
899:
900: msgbufp = (struct msgbuf *)virtual_avail; /* don't need pte */
901: virtual_avail += round_page(MSGBUFSIZE); pte++;
902:
903: bootargp = (bootarg_t *)virtual_avail;
904: virtual_avail += round_page(bootargc); pte++;
905:
906: /*
907: * now we reserve some VM for mapping pages when doing a crash dump
908: */
909:
910: virtual_avail = reserve_dumppages(virtual_avail);
911:
912: /*
913: * init the static-global locks and global lists.
914: */
915:
916: simple_lock_init(&pvalloc_lock);
917: simple_lock_init(&pmaps_lock);
918: LIST_INIT(&pmaps);
919: TAILQ_INIT(&pv_freepages);
920: TAILQ_INIT(&pv_unusedpgs);
921:
922: /*
923: * initialize the pmap pool.
924: */
925:
926: pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
927: &pool_allocator_nointr);
928:
929: /*
930: * ensure the TLB is sync'd with reality by flushing it...
931: */
932:
933: tlbflush();
934: }
935:
936: /*
937: * pmap_init: called from uvm_init, our job is to get the pmap
938: * system ready to manage mappings... this mainly means initing
939: * the pv_entry stuff.
940: */
941:
942: void
943: pmap_init(void)
944: {
945: /*
946: * now we need to free enough pv_entry structures to allow us to get
947: * the kmem_map allocated and inited (done after this function is
948: * finished). to do this we allocate one bootstrap page out of
949: * kernel_map and use it to provide an initial pool of pv_entry
950: * structures. we never free this page.
951: */
952:
953: pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, PAGE_SIZE);
954: if (pv_initpage == NULL)
955: panic("pmap_init: pv_initpage");
956: pv_cachedva = 0; /* a VA we have allocated but not used yet */
957: pv_nfpvents = 0;
958: (void) pmap_add_pvpage(pv_initpage, FALSE);
959:
960: /*
961: * done: pmap module is up (and ready for business)
962: */
963:
964: pmap_initialized = TRUE;
965: }
966:
967: /*
968: * p v _ e n t r y f u n c t i o n s
969: */
970:
971: /*
972: * pv_entry allocation functions:
973: * the main pv_entry allocation functions are:
974: * pmap_alloc_pv: allocate a pv_entry structure
975: * pmap_free_pv: free one pv_entry
976: * pmap_free_pvs: free a list of pv_entrys
977: *
978: * the rest are helper functions
979: */
980:
981: /*
982: * pmap_alloc_pv: inline function to allocate a pv_entry structure
983: * => we lock pvalloc_lock
984: * => if we fail, we call out to pmap_alloc_pvpage
985: * => 3 modes:
986: * ALLOCPV_NEED = we really need a pv_entry
987: * ALLOCPV_TRY = we want a pv_entry
988: * ALLOCPV_NONEED = we are trying to grow our free list, don't really need
989: * one now
990: *
991: * "try" is for optional functions like pmap_copy().
992: */
993:
994: struct pv_entry *
995: pmap_alloc_pv(struct pmap *pmap, int mode)
996: {
997: struct pv_page *pvpage;
998: struct pv_entry *pv;
999:
1000: simple_lock(&pvalloc_lock);
1001:
1002: if (!TAILQ_EMPTY(&pv_freepages)) {
1003: pvpage = TAILQ_FIRST(&pv_freepages);
1004: pvpage->pvinfo.pvpi_nfree--;
1005: if (pvpage->pvinfo.pvpi_nfree == 0) {
1006: /* nothing left in this one? */
1007: TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list);
1008: }
1009: pv = pvpage->pvinfo.pvpi_pvfree;
1010: #ifdef DIAGNOSTIC
1011: if (pv == NULL)
1012: panic("pmap_alloc_pv: pvpi_nfree off");
1013: #endif
1014: pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
1015: pv_nfpvents--; /* took one from pool */
1016: } else {
1017: pv = NULL; /* need more of them */
1018: }
1019:
1020: /*
1021: * if below low water mark or we didn't get a pv_entry we try and
1022: * create more pv_entrys ...
1023: */
1024:
1025: if (pv_nfpvents < PVE_LOWAT || pv == NULL) {
1026: if (pv == NULL)
1027: pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ?
1028: mode : ALLOCPV_NEED);
1029: else
1030: (void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED);
1031: }
1032:
1033: simple_unlock(&pvalloc_lock);
1034: return(pv);
1035: }
1036:
1037: /*
1038: * pmap_alloc_pvpage: maybe allocate a new pvpage
1039: *
1040: * if need_entry is false: try and allocate a new pv_page
1041: * if need_entry is true: try and allocate a new pv_page and return a
1042: * new pv_entry from it.
1043: *
1044: * => we assume that the caller holds pvalloc_lock
1045: */
1046:
1047: struct pv_entry *
1048: pmap_alloc_pvpage(struct pmap *pmap, int mode)
1049: {
1050: struct vm_page *pg;
1051: struct pv_page *pvpage;
1052: struct pv_entry *pv;
1053: int s;
1054:
1055: /*
1056: * if we need_entry and we've got unused pv_pages, allocate from there
1057: */
1058:
1059: if (mode != ALLOCPV_NONEED && !TAILQ_EMPTY(&pv_unusedpgs)) {
1060:
1061: /* move it to pv_freepages list */
1062: pvpage = TAILQ_FIRST(&pv_unusedpgs);
1063: TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
1064: TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
1065:
1066: /* allocate a pv_entry */
1067: pvpage->pvinfo.pvpi_nfree--; /* can't go to zero */
1068: pv = pvpage->pvinfo.pvpi_pvfree;
1069: #ifdef DIAGNOSTIC
1070: if (pv == NULL)
1071: panic("pmap_alloc_pvpage: pvpi_nfree off");
1072: #endif
1073: pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
1074:
1075: pv_nfpvents--; /* took one from pool */
1076: return(pv);
1077: }
1078:
1079: /*
1080: * see if we've got a cached unmapped VA that we can map a page in.
1081: * if not, try to allocate one.
1082: */
1083:
1084: s = splvm(); /* must protect kmem_map with splvm! */
1085: if (pv_cachedva == 0) {
1086: pv_cachedva = uvm_km_kmemalloc(kmem_map, NULL,
1087: NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC);
1088: }
1089: splx(s);
1090: if (pv_cachedva == 0)
1091: return (NULL);
1092:
1093: pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE);
1094: if (pg == NULL)
1095: return (NULL);
1096:
1097: atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
1098:
1099: /*
1100: * add a mapping for our new pv_page and free its entries (save one!)
1101: *
1102: * NOTE: If we are allocating a PV page for the kernel pmap, the
1103: * pmap is already locked! (...but entering the mapping is safe...)
1104: */
1105:
1106: pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg),
1107: VM_PROT_READ|VM_PROT_WRITE);
1108: pvpage = (struct pv_page *) pv_cachedva;
1109: pv_cachedva = 0;
1110: return (pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
1111: }
1112:
1113: /*
1114: * pmap_add_pvpage: add a pv_page's pv_entrys to the free list
1115: *
1116: * => caller must hold pvalloc_lock
1117: * => if need_entry is true, we allocate and return one pv_entry
1118: */
1119:
1120: struct pv_entry *
1121: pmap_add_pvpage(struct pv_page *pvp, boolean_t need_entry)
1122: {
1123: int tofree, lcv;
1124:
1125: /* do we need to return one? */
1126: tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE;
1127:
1128: pvp->pvinfo.pvpi_pvfree = NULL;
1129: pvp->pvinfo.pvpi_nfree = tofree;
1130: for (lcv = 0 ; lcv < tofree ; lcv++) {
1131: pvp->pvents[lcv].pv_next = pvp->pvinfo.pvpi_pvfree;
1132: pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv];
1133: }
1134: if (need_entry)
1135: TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list);
1136: else
1137: TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1138: pv_nfpvents += tofree;
1139: return((need_entry) ? &pvp->pvents[lcv] : NULL);
1140: }
1141:
1142: /*
1143: * pmap_free_pv_doit: actually free a pv_entry
1144: *
1145: * => do not call this directly! instead use either
1146: * 1. pmap_free_pv ==> free a single pv_entry
1147: * 2. pmap_free_pvs => free a list of pv_entrys
1148: * => we must be holding pvalloc_lock
1149: */
1150:
1151: void
1152: pmap_free_pv_doit(struct pv_entry *pv)
1153: {
1154: struct pv_page *pvp;
1155:
1156: pvp = (struct pv_page*)trunc_page((vaddr_t)pv);
1157: pv_nfpvents++;
1158: pvp->pvinfo.pvpi_nfree++;
1159:
1160: /* nfree == 1 => fully allocated page just became partly allocated */
1161: if (pvp->pvinfo.pvpi_nfree == 1) {
1162: TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list);
1163: }
1164:
1165: /* free it */
1166: pv->pv_next = pvp->pvinfo.pvpi_pvfree;
1167: pvp->pvinfo.pvpi_pvfree = pv;
1168:
1169: /*
1170: * are all pv_page's pv_entry's free? move it to unused queue.
1171: */
1172:
1173: if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) {
1174: TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list);
1175: TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1176: }
1177: }
1178:
1179: /*
1180: * pmap_free_pv: free a single pv_entry
1181: *
1182: * => we gain the pvalloc_lock
1183: */
1184:
1185: void
1186: pmap_free_pv(struct pmap *pmap, struct pv_entry *pv)
1187: {
1188: simple_lock(&pvalloc_lock);
1189: pmap_free_pv_doit(pv);
1190:
1191: /*
1192: * Can't free the PV page if the PV entries were associated with
1193: * the kernel pmap; the pmap is already locked.
1194: */
1195: if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
1196: pmap != pmap_kernel())
1197: pmap_free_pvpage();
1198:
1199: simple_unlock(&pvalloc_lock);
1200: }
1201:
1202: /*
1203: * pmap_free_pvs: free a list of pv_entrys
1204: *
1205: * => we gain the pvalloc_lock
1206: */
1207:
1208: void
1209: pmap_free_pvs(struct pmap *pmap, struct pv_entry *pvs)
1210: {
1211: struct pv_entry *nextpv;
1212:
1213: simple_lock(&pvalloc_lock);
1214:
1215: for ( /* null */ ; pvs != NULL ; pvs = nextpv) {
1216: nextpv = pvs->pv_next;
1217: pmap_free_pv_doit(pvs);
1218: }
1219:
1220: /*
1221: * Can't free the PV page if the PV entries were associated with
1222: * the kernel pmap; the pmap is already locked.
1223: */
1224: if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
1225: pmap != pmap_kernel())
1226: pmap_free_pvpage();
1227:
1228: simple_unlock(&pvalloc_lock);
1229: }
1230:
1231:
1232: /*
1233: * pmap_free_pvpage: try and free an unused pv_page structure
1234: *
1235: * => assume caller is holding the pvalloc_lock and that
1236: * there is a page on the pv_unusedpgs list
1237: * => if we can't get a lock on the kmem_map we try again later
1238: */
1239:
1240: void
1241: pmap_free_pvpage(void)
1242: {
1243: int s;
1244: struct vm_map *map;
1245: struct vm_map_entry *dead_entries;
1246: struct pv_page *pvp;
1247:
1248: s = splvm(); /* protect kmem_map */
1249: pvp = TAILQ_FIRST(&pv_unusedpgs);
1250:
1251: /*
1252: * note: watch out for pv_initpage which is allocated out of
1253: * kernel_map rather than kmem_map.
1254: */
1255:
1256: if (pvp == pv_initpage)
1257: map = kernel_map;
1258: else
1259: map = kmem_map;
1260: if (vm_map_lock_try(map)) {
1261:
1262: /* remove pvp from pv_unusedpgs */
1263: TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1264:
1265: /* unmap the page */
1266: dead_entries = NULL;
1267: uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE,
1268: &dead_entries, NULL);
1269: vm_map_unlock(map);
1270:
1271: if (dead_entries != NULL)
1272: uvm_unmap_detach(dead_entries, 0);
1273:
1274: pv_nfpvents -= PVE_PER_PVPAGE; /* update free count */
1275: }
1276:
1277: if (pvp == pv_initpage)
1278: /* no more initpage, we've freed it */
1279: pv_initpage = NULL;
1280:
1281: splx(s);
1282: }
1283:
1284: /*
1285: * main pv_entry manipulation functions:
1286: * pmap_enter_pv: enter a mapping onto a pv list
1287: * pmap_remove_pv: remove a mappiing from a pv list
1288: */
1289:
1290: /*
1291: * pmap_enter_pv: enter a mapping onto a pv list
1292: *
1293: * => caller should have pmap locked
1294: * => we will gain the lock on the pv and allocate the new pv_entry
1295: * => caller should adjust ptp's wire_count before calling
1296: *
1297: * pve: preallocated pve for us to use
1298: * ptp: PTP in pmap that maps this VA
1299: */
1300:
1301: void
1302: pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap,
1303: vaddr_t va, struct vm_page *ptp)
1304: {
1305: pve->pv_pmap = pmap;
1306: pve->pv_va = va;
1307: pve->pv_ptp = ptp; /* NULL for kernel pmap */
1308: pve->pv_next = pg->mdpage.pv_list; /* add to ... */
1309: pg->mdpage.pv_list = pve; /* ... locked list */
1310: }
1311:
1312: /*
1313: * pmap_remove_pv: try to remove a mapping from a pv_list
1314: *
1315: * => pmap should be locked
1316: * => caller should hold lock on pv [so that attrs can be adjusted]
1317: * => caller should adjust ptp's wire_count and free PTP if needed
1318: * => we return the removed pve
1319: */
1320:
1321: struct pv_entry *
1322: pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va)
1323: {
1324: struct pv_entry *pve, **prevptr;
1325:
1326: prevptr = &pg->mdpage.pv_list; /* previous pv_entry pointer */
1327: while ((pve = *prevptr) != NULL) {
1328: if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */
1329: *prevptr = pve->pv_next; /* remove it! */
1330: break;
1331: }
1332: prevptr = &pve->pv_next; /* previous pointer */
1333: }
1334: return(pve); /* return removed pve */
1335: }
1336:
1337: /*
1338: * p t p f u n c t i o n s
1339: */
1340:
1341: /*
1342: * pmap_alloc_ptp: allocate a PTP for a PMAP
1343: *
1344: * => pmap should already be locked by caller
1345: * => we use the ptp's wire_count to count the number of active mappings
1346: * in the PTP (we start it at one to prevent any chance this PTP
1347: * will ever leak onto the active/inactive queues)
1348: * => we may need to lock pv lists if we have to steal a PTP
1349: * => just_try: true if we want a PTP, but not enough to steal one
1350: * from another pmap (e.g. during optional functions like pmap_copy)
1351: */
1352:
1353: struct vm_page *
1354: pmap_alloc_ptp(struct pmap *pmap, int pde_index, boolean_t just_try,
1355: pt_entry_t pde_flags)
1356: {
1357: struct vm_page *ptp;
1358:
1359: ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
1360: UVM_PGA_USERESERVE|UVM_PGA_ZERO);
1361: if (ptp == NULL)
1362: return (NULL);
1363:
1364: /* got one! */
1365: atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
1366: ptp->wire_count = 1; /* no mappings yet */
1367: pmap->pm_pdir[pde_index] = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) |
1368: PG_RW | PG_V | PG_M | PG_U | pde_flags);
1369: pmap->pm_stats.resident_count++; /* count PTP as resident */
1370: pmap->pm_ptphint = ptp;
1371: return(ptp);
1372: }
1373:
1374: /*
1375: * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
1376: *
1377: * => pmap should NOT be pmap_kernel()
1378: * => pmap should be locked
1379: */
1380:
1381: struct vm_page *
1382: pmap_get_ptp(struct pmap *pmap, int pde_index, boolean_t just_try)
1383: {
1384: struct vm_page *ptp;
1385:
1386: if (pmap_valid_entry(pmap->pm_pdir[pde_index])) {
1387:
1388: /* valid... check hint (saves us a PA->PG lookup) */
1389: if (pmap->pm_ptphint &&
1390: (pmap->pm_pdir[pde_index] & PG_FRAME) ==
1391: VM_PAGE_TO_PHYS(pmap->pm_ptphint))
1392: return(pmap->pm_ptphint);
1393:
1394: ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
1395: #ifdef DIAGNOSTIC
1396: if (ptp == NULL)
1397: panic("pmap_get_ptp: unmanaged user PTP");
1398: #endif
1399: pmap->pm_ptphint = ptp;
1400: return(ptp);
1401: }
1402:
1403: /* allocate a new PTP (updates ptphint) */
1404: return (pmap_alloc_ptp(pmap, pde_index, just_try, PG_u));
1405: }
1406:
1407: /*
1408: * p m a p l i f e c y c l e f u n c t i o n s
1409: */
1410:
1411: /*
1412: * pmap_create: create a pmap
1413: *
1414: * => note: old pmap interface took a "size" args which allowed for
1415: * the creation of "software only" pmaps (not in bsd).
1416: */
1417:
1418: struct pmap *
1419: pmap_create(void)
1420: {
1421: struct pmap *pmap;
1422:
1423: pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
1424: pmap_pinit(pmap);
1425: return(pmap);
1426: }
1427:
1428: /*
1429: * pmap_pinit: given a zero'd pmap structure, init it.
1430: */
1431:
1432: void
1433: pmap_pinit(struct pmap *pmap)
1434: {
1435: /* init uvm_object */
1436: simple_lock_init(&pmap->pm_obj.vmobjlock);
1437: pmap->pm_obj.pgops = NULL; /* currently not a mappable object */
1438: TAILQ_INIT(&pmap->pm_obj.memq);
1439: pmap->pm_obj.uo_npages = 0;
1440: pmap->pm_obj.uo_refs = 1;
1441: pmap->pm_stats.wired_count = 0;
1442: pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */
1443: pmap->pm_ptphint = NULL;
1444: pmap->pm_hiexec = 0;
1445: pmap->pm_flags = 0;
1446: pmap->pm_cpus = 0;
1447:
1448: setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1,
1449: SDT_MEMERA, SEL_UPL, 1, 1);
1450:
1451: /* allocate PDP */
1452: pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG);
1453: if (pmap->pm_pdir == NULL)
1454: panic("pmap_pinit: kernel_map out of virtual space!");
1455: (void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir,
1456: (paddr_t *)&pmap->pm_pdirpa);
1457:
1458: /* init PDP */
1459: /* zero init area */
1460: bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
1461: /* put in recursive PDE to map the PTEs */
1462: pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW | PG_U |
1463: PG_M;
1464:
1465: /* init the LDT */
1466: pmap->pm_ldt = NULL;
1467: pmap->pm_ldt_len = 0;
1468: pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
1469:
1470: /*
1471: * we need to lock pmaps_lock to prevent nkpde from changing on
1472: * us. note that there is no need to splvm to protect us from
1473: * malloc since malloc allocates out of a submap and we should have
1474: * already allocated kernel PTPs to cover the range...
1475: */
1476: simple_lock(&pmaps_lock);
1477: /* put in kernel VM PDEs */
1478: bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN],
1479: nkpde * sizeof(pd_entry_t));
1480: /* zero the rest */
1481: bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde],
1482: NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
1483: LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
1484: simple_unlock(&pmaps_lock);
1485: }
1486:
1487: /*
1488: * pmap_destroy: drop reference count on pmap. free pmap if
1489: * reference count goes to zero.
1490: */
1491:
1492: void
1493: pmap_destroy(struct pmap *pmap)
1494: {
1495: int refs;
1496:
1497: /*
1498: * drop reference count
1499: */
1500:
1501: simple_lock(&pmap->pm_obj.vmobjlock);
1502: refs = --pmap->pm_obj.uo_refs;
1503: simple_unlock(&pmap->pm_obj.vmobjlock);
1504: if (refs > 0)
1505: return;
1506:
1507: /*
1508: * reference count is zero, free pmap resources and then free pmap.
1509: */
1510:
1511: pmap_release(pmap);
1512: pool_put(&pmap_pmap_pool, pmap);
1513: }
1514:
1515: /*
1516: * pmap_release: release all resources held by a pmap
1517: *
1518: * => if pmap is still referenced it should be locked
1519: * => XXX: we currently don't expect any busy PTPs because we don't
1520: * allow anything to map them (except for the kernel's private
1521: * recursive mapping) or make them busy.
1522: */
1523:
1524: void
1525: pmap_release(struct pmap *pmap)
1526: {
1527: struct vm_page *pg;
1528:
1529: /*
1530: * remove it from global list of pmaps
1531: */
1532:
1533: simple_lock(&pmaps_lock);
1534: LIST_REMOVE(pmap, pm_list);
1535: simple_unlock(&pmaps_lock);
1536:
1537: /*
1538: * Before we free the pmap just make sure it's not cached anywhere.
1539: */
1540: tlbflushg();
1541:
1542: /*
1543: * free any remaining PTPs
1544: */
1545:
1546: while (!TAILQ_EMPTY(&pmap->pm_obj.memq)) {
1547: pg = TAILQ_FIRST(&pmap->pm_obj.memq);
1548: #ifdef DIAGNOSTIC
1549: if (pg->pg_flags & PG_BUSY)
1550: panic("pmap_release: busy page table page");
1551: #endif
1552: /* pmap_page_protect? currently no need for it. */
1553:
1554: pg->wire_count = 0;
1555: uvm_pagefree(pg);
1556: }
1557:
1558: /*
1559: * MULTIPROCESSOR -- no need to flush out of other processors'
1560: * APTE space because we do that in pmap_unmap_ptes().
1561: */
1562: uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG);
1563:
1564: #ifdef USER_LDT
1565: if (pmap->pm_flags & PMF_USER_LDT) {
1566: /*
1567: * no need to switch the LDT; this address space is gone,
1568: * nothing is using it.
1569: *
1570: * No need to lock the pmap for ldt_free (or anything else),
1571: * we're the last one to use it.
1572: */
1573: ldt_free(pmap);
1574: uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
1575: pmap->pm_ldt_len * sizeof(union descriptor));
1576: }
1577: #endif
1578: }
1579:
1580: /*
1581: * Add a reference to the specified pmap.
1582: */
1583:
1584: void
1585: pmap_reference(struct pmap *pmap)
1586: {
1587: simple_lock(&pmap->pm_obj.vmobjlock);
1588: pmap->pm_obj.uo_refs++;
1589: simple_unlock(&pmap->pm_obj.vmobjlock);
1590: }
1591:
1592: #if defined(PMAP_FORK)
1593: /*
1594: * pmap_fork: perform any necessary data structure manipulation when
1595: * a VM space is forked.
1596: */
1597:
1598: void
1599: pmap_fork(struct pmap *pmap1, struct pmap *pmap2)
1600: {
1601: simple_lock(&pmap1->pm_obj.vmobjlock);
1602: simple_lock(&pmap2->pm_obj.vmobjlock);
1603:
1604: #ifdef USER_LDT
1605: /* Copy the LDT, if necessary. */
1606: if (pmap1->pm_flags & PMF_USER_LDT) {
1607: union descriptor *new_ldt;
1608: size_t len;
1609:
1610: len = pmap1->pm_ldt_len * sizeof(union descriptor);
1611: new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len);
1612: bcopy(pmap1->pm_ldt, new_ldt, len);
1613: pmap2->pm_ldt = new_ldt;
1614: pmap2->pm_ldt_len = pmap1->pm_ldt_len;
1615: pmap2->pm_flags |= PMF_USER_LDT;
1616: ldt_alloc(pmap2, new_ldt, len);
1617: }
1618: #endif /* USER_LDT */
1619:
1620: simple_unlock(&pmap2->pm_obj.vmobjlock);
1621: simple_unlock(&pmap1->pm_obj.vmobjlock);
1622: }
1623: #endif /* PMAP_FORK */
1624:
1625: #ifdef USER_LDT
1626: /*
1627: * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
1628: * restore the default.
1629: */
1630:
1631: void
1632: pmap_ldt_cleanup(struct proc *p)
1633: {
1634: struct pcb *pcb = &p->p_addr->u_pcb;
1635: pmap_t pmap = p->p_vmspace->vm_map.pmap;
1636: union descriptor *old_ldt = NULL;
1637: size_t len = 0;
1638:
1639: simple_lock(&pmap->pm_obj.vmobjlock);
1640:
1641: if (pmap->pm_flags & PMF_USER_LDT) {
1642: ldt_free(pmap);
1643: pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
1644: pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
1645: /* Reset the cached address of the LDT that this process uses */
1646: #ifdef MULTIPROCESSOR
1647: pcb->pcb_ldt = curcpu()->ci_ldt;
1648: #else
1649: pcb->pcb_ldt = ldt;
1650: #endif
1651: if (pcb == curpcb)
1652: lldt(pcb->pcb_ldt_sel);
1653: old_ldt = pmap->pm_ldt;
1654: len = pmap->pm_ldt_len * sizeof(union descriptor);
1655: pmap->pm_ldt = NULL;
1656: pmap->pm_ldt_len = 0;
1657: pmap->pm_flags &= ~PMF_USER_LDT;
1658: }
1659:
1660: simple_unlock(&pmap->pm_obj.vmobjlock);
1661:
1662: if (old_ldt != NULL)
1663: uvm_km_free(kernel_map, (vaddr_t)old_ldt, len);
1664: }
1665: #endif /* USER_LDT */
1666:
1667: /*
1668: * pmap_activate: activate a process' pmap (fill in %cr3 and LDT info)
1669: *
1670: * => called from cpu_switch()
1671: * => if proc is the curproc, then load it into the MMU
1672: */
1673:
1674: void
1675: pmap_activate(struct proc *p)
1676: {
1677: struct pcb *pcb = &p->p_addr->u_pcb;
1678: struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1679: struct cpu_info *self = curcpu();
1680:
1681: pcb->pcb_pmap = pmap;
1682: /* Get the LDT that this process will actually use */
1683: #ifdef MULTIPROCESSOR
1684: pcb->pcb_ldt = pmap->pm_ldt == NULL ? self->ci_ldt : pmap->pm_ldt;
1685: #else
1686: pcb->pcb_ldt = pmap->pm_ldt == NULL ? ldt : pmap->pm_ldt;
1687: #endif
1688: pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
1689: pcb->pcb_cr3 = pmap->pm_pdirpa;
1690: if (p == curproc) {
1691: /*
1692: * Set the correct descriptor value (i.e. with the
1693: * correct code segment X limit) in the GDT and the LDT.
1694: */
1695: self->ci_gdt[GUCODE_SEL].sd = pcb->pcb_ldt[LUCODE_SEL].sd =
1696: pmap->pm_codeseg;
1697:
1698: lcr3(pcb->pcb_cr3);
1699: lldt(pcb->pcb_ldt_sel);
1700:
1701: /*
1702: * mark the pmap in use by this processor.
1703: */
1704: i386_atomic_setbits_l(&pmap->pm_cpus, (1U << cpu_number()));
1705: }
1706: }
1707:
1708: /*
1709: * pmap_deactivate: deactivate a process' pmap
1710: */
1711:
1712: void
1713: pmap_deactivate(struct proc *p)
1714: {
1715: struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1716:
1717: /*
1718: * mark the pmap no longer in use by this processor.
1719: */
1720: i386_atomic_clearbits_l(&pmap->pm_cpus, (1U << cpu_number()));
1721: }
1722:
1723: /*
1724: * end of lifecycle functions
1725: */
1726:
1727: /*
1728: * some misc. functions
1729: */
1730:
1731: /*
1732: * pmap_extract: extract a PA for the given VA
1733: */
1734:
1735: boolean_t
1736: pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
1737: {
1738: pt_entry_t *ptes, pte;
1739:
1740: if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
1741: ptes = pmap_map_ptes(pmap);
1742: pte = ptes[atop(va)];
1743: pmap_unmap_ptes(pmap);
1744: if (!pmap_valid_entry(pte))
1745: return (FALSE);
1746: if (pap != NULL)
1747: *pap = (pte & PG_FRAME) | (va & ~PG_FRAME);
1748: return (TRUE);
1749: }
1750: return (FALSE);
1751: }
1752:
1753: /*
1754: * pmap_virtual_space: used during bootup [pmap_steal_memory] to
1755: * determine the bounds of the kernel virtual address space.
1756: */
1757:
1758: void
1759: pmap_virtual_space(vaddr_t *startp, vaddr_t *endp)
1760: {
1761: *startp = virtual_avail;
1762: *endp = virtual_end;
1763: }
1764:
1765: /*
1766: * pmap_zero_page: zero a page
1767: */
1768: void (*pagezero)(void *, size_t) = bzero;
1769:
1770: void
1771: pmap_zero_page(struct vm_page *pg)
1772: {
1773: pmap_zero_phys(VM_PAGE_TO_PHYS(pg));
1774: }
1775:
1776: /*
1777: * pmap_zero_phys: same as pmap_zero_page, but for use before vm_pages are
1778: * initialized.
1779: */
1780: void
1781: pmap_zero_phys(paddr_t pa)
1782: {
1783: #ifdef MULTIPROCESSOR
1784: int id = cpu_number();
1785: #endif
1786: pt_entry_t *zpte = PTESLEW(zero_pte, id);
1787: caddr_t zerova = VASLEW(zerop, id);
1788:
1789: #ifdef DIAGNOSTIC
1790: if (*zpte)
1791: panic("pmap_zero_phys: lock botch");
1792: #endif
1793:
1794: *zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */
1795: pmap_update_pg((vaddr_t)zerova); /* flush TLB */
1796: pagezero(zerova, PAGE_SIZE); /* zero */
1797: *zpte = 0; /* zap! */
1798: }
1799:
1800: /*
1801: * pmap_zero_page_uncached: the same, except uncached.
1802: */
1803:
1804: boolean_t
1805: pmap_zero_page_uncached(paddr_t pa)
1806: {
1807: #ifdef MULTIPROCESSOR
1808: int id = cpu_number();
1809: #endif
1810: pt_entry_t *zpte = PTESLEW(zero_pte, id);
1811: caddr_t zerova = VASLEW(zerop, id);
1812:
1813: #ifdef DIAGNOSTIC
1814: if (*zpte)
1815: panic("pmap_zero_page_uncached: lock botch");
1816: #endif
1817:
1818: *zpte = (pa & PG_FRAME) | PG_V | PG_RW | PG_N; /* map in */
1819: pmap_update_pg((vaddr_t)zerova); /* flush TLB */
1820: pagezero(zerova, PAGE_SIZE); /* zero */
1821: *zpte = 0; /* zap! */
1822:
1823: return (TRUE);
1824: }
1825:
1826: /*
1827: * pmap_copy_page: copy a page
1828: */
1829:
1830: void
1831: pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1832: {
1833: paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
1834: paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
1835: #ifdef MULTIPROCESSOR
1836: int id = cpu_number();
1837: #endif
1838: pt_entry_t *spte = PTESLEW(csrc_pte, id);
1839: pt_entry_t *dpte = PTESLEW(cdst_pte, id);
1840: caddr_t csrcva = VASLEW(csrcp, id);
1841: caddr_t cdstva = VASLEW(cdstp, id);
1842:
1843: #ifdef DIAGNOSTIC
1844: if (*spte || *dpte)
1845: panic("pmap_copy_page: lock botch");
1846: #endif
1847:
1848: *spte = (srcpa & PG_FRAME) | PG_V | PG_RW;
1849: *dpte = (dstpa & PG_FRAME) | PG_V | PG_RW;
1850: pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
1851: bcopy(csrcva, cdstva, PAGE_SIZE);
1852: *spte = *dpte = 0; /* zap! */
1853: pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
1854: }
1855:
1856: /*
1857: * p m a p r e m o v e f u n c t i o n s
1858: *
1859: * functions that remove mappings
1860: */
1861:
1862: /*
1863: * pmap_remove_ptes: remove PTEs from a PTP
1864: *
1865: * => must have proper locking on pmap_master_lock
1866: * => caller must hold pmap's lock
1867: * => PTP must be mapped into KVA
1868: * => PTP should be null if pmap == pmap_kernel()
1869: */
1870:
1871: void
1872: pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
1873: vaddr_t startva, vaddr_t endva, int flags)
1874: {
1875: struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */
1876: struct pv_entry *pve;
1877: pt_entry_t *pte = (pt_entry_t *) ptpva;
1878: struct vm_page *pg;
1879: pt_entry_t opte;
1880:
1881: /*
1882: * note that ptpva points to the PTE that maps startva. this may
1883: * or may not be the first PTE in the PTP.
1884: *
1885: * we loop through the PTP while there are still PTEs to look at
1886: * and the wire_count is greater than 1 (because we use the wire_count
1887: * to keep track of the number of real PTEs in the PTP).
1888: */
1889:
1890: for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
1891: ; pte++, startva += NBPG) {
1892: if (!pmap_valid_entry(*pte))
1893: continue; /* VA not mapped */
1894:
1895: if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W))
1896: continue;
1897:
1898: /* atomically save the old PTE and zap! it */
1899: opte = i386_atomic_testset_ul(pte, 0);
1900:
1901: if (opte & PG_W)
1902: pmap->pm_stats.wired_count--;
1903: pmap->pm_stats.resident_count--;
1904:
1905: if (ptp)
1906: ptp->wire_count--; /* dropping a PTE */
1907:
1908: /*
1909: * Unnecessary work if not PG_VLIST.
1910: */
1911: pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
1912:
1913: /*
1914: * if we are not on a pv list we are done.
1915: */
1916: if ((opte & PG_PVLIST) == 0) {
1917: #ifdef DIAGNOSTIC
1918: if (pg != NULL)
1919: panic("pmap_remove_ptes: managed page without "
1920: "PG_PVLIST for 0x%lx", startva);
1921: #endif
1922: continue;
1923: }
1924:
1925: #ifdef DIAGNOSTIC
1926: if (pg == NULL)
1927: panic("pmap_remove_ptes: unmanaged page marked "
1928: "PG_PVLIST, va = 0x%lx, pa = 0x%lx",
1929: startva, (u_long)(opte & PG_FRAME));
1930: #endif
1931:
1932: /* sync R/M bits */
1933: pmap_sync_flags_pte(pg, opte);
1934: pve = pmap_remove_pv(pg, pmap, startva);
1935: if (pve) {
1936: pve->pv_next = pv_tofree;
1937: pv_tofree = pve;
1938: }
1939:
1940: /* end of "for" loop: time for next pte */
1941: }
1942: if (pv_tofree)
1943: pmap_free_pvs(pmap, pv_tofree);
1944: }
1945:
1946:
1947: /*
1948: * pmap_remove_pte: remove a single PTE from a PTP
1949: *
1950: * => must have proper locking on pmap_master_lock
1951: * => caller must hold pmap's lock
1952: * => PTP must be mapped into KVA
1953: * => PTP should be null if pmap == pmap_kernel()
1954: * => returns true if we removed a mapping
1955: */
1956:
1957: boolean_t
1958: pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
1959: vaddr_t va, int flags)
1960: {
1961: struct pv_entry *pve;
1962: struct vm_page *pg;
1963: pt_entry_t opte;
1964:
1965: if (!pmap_valid_entry(*pte))
1966: return (FALSE); /* VA not mapped */
1967:
1968: if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W))
1969: return (FALSE);
1970:
1971: opte = *pte; /* save the old PTE */
1972: *pte = 0; /* zap! */
1973:
1974: pmap_exec_account(pmap, va, opte, 0);
1975:
1976: if (opte & PG_W)
1977: pmap->pm_stats.wired_count--;
1978: pmap->pm_stats.resident_count--;
1979:
1980: if (ptp)
1981: ptp->wire_count--; /* dropping a PTE */
1982:
1983: pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
1984:
1985: /*
1986: * if we are not on a pv list we are done.
1987: */
1988: if ((opte & PG_PVLIST) == 0) {
1989: #ifdef DIAGNOSTIC
1990: if (pg != NULL)
1991: panic("pmap_remove_pte: managed page without "
1992: "PG_PVLIST for 0x%lx", va);
1993: #endif
1994: return(TRUE);
1995: }
1996:
1997: #ifdef DIAGNOSTIC
1998: if (pg == NULL)
1999: panic("pmap_remove_pte: unmanaged page marked "
2000: "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va,
2001: (u_long)(opte & PG_FRAME));
2002: #endif
2003:
2004: pmap_sync_flags_pte(pg, opte);
2005: pve = pmap_remove_pv(pg, pmap, va);
2006: if (pve)
2007: pmap_free_pv(pmap, pve);
2008: return(TRUE);
2009: }
2010:
2011: /*
2012: * pmap_remove: top level mapping removal function
2013: *
2014: * => caller should not be holding any pmap locks
2015: */
2016:
2017: void
2018: pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
2019: {
2020: pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL);
2021: }
2022:
2023: void
2024: pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags)
2025: {
2026: pt_entry_t *ptes, opte;
2027: boolean_t result;
2028: paddr_t ptppa;
2029: vaddr_t blkendva;
2030: struct vm_page *ptp;
2031: TAILQ_HEAD(, vm_page) empty_ptps;
2032: int shootall;
2033: vaddr_t va;
2034:
2035: TAILQ_INIT(&empty_ptps);
2036:
2037: PMAP_MAP_TO_HEAD_LOCK();
2038: ptes = pmap_map_ptes(pmap); /* locks pmap */
2039:
2040: /*
2041: * removing one page? take shortcut function.
2042: */
2043:
2044: if (sva + PAGE_SIZE == eva) {
2045:
2046: if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) {
2047:
2048: /* PA of the PTP */
2049: ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME;
2050:
2051: /* get PTP if non-kernel mapping */
2052:
2053: if (pmap == pmap_kernel()) {
2054: /* we never free kernel PTPs */
2055: ptp = NULL;
2056: } else {
2057: if (pmap->pm_ptphint &&
2058: VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
2059: ptppa) {
2060: ptp = pmap->pm_ptphint;
2061: } else {
2062: ptp = PHYS_TO_VM_PAGE(ptppa);
2063: #ifdef DIAGNOSTIC
2064: if (ptp == NULL)
2065: panic("pmap_remove: unmanaged "
2066: "PTP detected");
2067: #endif
2068: }
2069: }
2070:
2071: /* do it! */
2072: result = pmap_remove_pte(pmap, ptp, &ptes[atop(sva)],
2073: sva, flags);
2074:
2075: /*
2076: * if mapping removed and the PTP is no longer
2077: * being used, free it!
2078: */
2079:
2080: if (result && ptp && ptp->wire_count <= 1) {
2081: opte = i386_atomic_testset_ul(
2082: &pmap->pm_pdir[pdei(sva)], 0);
2083: #ifdef MULTIPROCESSOR
2084: /*
2085: * XXXthorpej Redundant shootdown can happen
2086: * here if we're using APTE space.
2087: */
2088: #endif
2089: pmap_tlb_shootpage(curpcb->pcb_pmap,
2090: ((vaddr_t)ptes) + ptp->offset);
2091: #ifdef MULTIPROCESSOR
2092: /*
2093: * Always shoot down the pmap's self-mapping
2094: * of the PTP.
2095: * XXXthorpej Redundant shootdown can happen
2096: * here if pmap == curpcb->pcb_pmap (not APTE
2097: * space).
2098: */
2099: pmap_tlb_shootpage(pmap,
2100: ((vaddr_t)PTE_BASE) + ptp->offset);
2101: #endif
2102: pmap->pm_stats.resident_count--;
2103: if (pmap->pm_ptphint == ptp)
2104: pmap->pm_ptphint =
2105: TAILQ_FIRST(&pmap->pm_obj.memq);
2106: ptp->wire_count = 0;
2107: /* Postpone free to after shootdown. */
2108: uvm_pagerealloc(ptp, NULL, 0);
2109: TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
2110: }
2111: /*
2112: * Shoot the tlb after any updates to the PDE.
2113: */
2114: pmap_tlb_shootpage(pmap, sva);
2115: }
2116: pmap_tlb_shootwait();
2117: pmap_unmap_ptes(pmap); /* unlock pmap */
2118: PMAP_MAP_TO_HEAD_UNLOCK();
2119: while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
2120: TAILQ_REMOVE(&empty_ptps, ptp, listq);
2121: uvm_pagefree(ptp);
2122: }
2123: return;
2124: }
2125:
2126: /*
2127: * Decide if we want to shoot the whole tlb or just the range.
2128: * Right now, we simply shoot everything when we remove more
2129: * than 32 pages, but never in the kernel pmap. XXX - tune.
2130: */
2131: if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())
2132: shootall = 1;
2133: else
2134: shootall = 0;
2135:
2136: for (va = sva ; va < eva ; va = blkendva) {
2137: /* determine range of block */
2138: blkendva = i386_round_pdr(va + 1);
2139: if (blkendva > eva)
2140: blkendva = eva;
2141:
2142: /*
2143: * XXXCDC: our PTE mappings should never be removed
2144: * with pmap_remove! if we allow this (and why would
2145: * we?) then we end up freeing the pmap's page
2146: * directory page (PDP) before we are finished using
2147: * it when we hit in in the recursive mapping. this
2148: * is BAD.
2149: *
2150: * long term solution is to move the PTEs out of user
2151: * address space. and into kernel address space (up
2152: * with APTE). then we can set VM_MAXUSER_ADDRESS to
2153: * be VM_MAX_ADDRESS.
2154: */
2155:
2156: if (pdei(va) == PDSLOT_PTE)
2157: /* XXXCDC: ugly hack to avoid freeing PDP here */
2158: continue;
2159:
2160: if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
2161: /* valid block? */
2162: continue;
2163:
2164: /* PA of the PTP */
2165: ptppa = (pmap->pm_pdir[pdei(va)] & PG_FRAME);
2166:
2167: /* get PTP if non-kernel mapping */
2168: if (pmap == pmap_kernel()) {
2169: /* we never free kernel PTPs */
2170: ptp = NULL;
2171: } else {
2172: if (pmap->pm_ptphint &&
2173: VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
2174: ptp = pmap->pm_ptphint;
2175: } else {
2176: ptp = PHYS_TO_VM_PAGE(ptppa);
2177: #ifdef DIAGNOSTIC
2178: if (ptp == NULL)
2179: panic("pmap_remove: unmanaged PTP "
2180: "detected");
2181: #endif
2182: }
2183: }
2184: pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(va)],
2185: va, blkendva, flags);
2186:
2187: /* if PTP is no longer being used, free it! */
2188: if (ptp && ptp->wire_count <= 1) {
2189: opte = i386_atomic_testset_ul(
2190: &pmap->pm_pdir[pdei(va)], 0);
2191: #if defined(MULTIPROCESSOR)
2192: /*
2193: * XXXthorpej Redundant shootdown can happen here
2194: * if we're using APTE space.
2195: */
2196: #endif
2197: pmap_tlb_shootpage(curpcb->pcb_pmap,
2198: ((vaddr_t)ptes) + ptp->offset);
2199: #if defined(MULTIPROCESSOR)
2200: /*
2201: * Always shoot down the pmap's self-mapping
2202: * of the PTP.
2203: * XXXthorpej Redundant shootdown can happen here
2204: * if pmap == curpcb->pcb_pmap (not APTE space).
2205: */
2206: pmap_tlb_shootpage(pmap,
2207: ((vaddr_t)PTE_BASE) + ptp->offset);
2208: #endif
2209: pmap->pm_stats.resident_count--;
2210: if (pmap->pm_ptphint == ptp) /* update hint? */
2211: pmap->pm_ptphint =
2212: TAILQ_FIRST(&pmap->pm_obj.memq);
2213: ptp->wire_count = 0;
2214: /* Postpone free to after shootdown. */
2215: uvm_pagerealloc(ptp, NULL, 0);
2216: TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
2217: }
2218: }
2219: if (!shootall)
2220: pmap_tlb_shootrange(pmap, sva, eva);
2221: else
2222: pmap_tlb_shoottlb();
2223:
2224: pmap_tlb_shootwait();
2225: pmap_unmap_ptes(pmap);
2226: PMAP_MAP_TO_HEAD_UNLOCK();
2227: while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
2228: TAILQ_REMOVE(&empty_ptps, ptp, listq);
2229: uvm_pagefree(ptp);
2230: }
2231: }
2232:
2233: /*
2234: * pmap_page_remove: remove a managed vm_page from all pmaps that map it
2235: *
2236: * => R/M bits are sync'd back to attrs
2237: */
2238:
2239: void
2240: pmap_page_remove(struct vm_page *pg)
2241: {
2242: struct pv_entry *pve;
2243: pt_entry_t *ptes, opte;
2244: TAILQ_HEAD(, vm_page) empty_ptps;
2245: struct vm_page *ptp;
2246:
2247: if (pg->mdpage.pv_list == NULL)
2248: return;
2249:
2250: TAILQ_INIT(&empty_ptps);
2251:
2252: PMAP_HEAD_TO_MAP_LOCK();
2253:
2254: for (pve = pg->mdpage.pv_list ; pve != NULL ; pve = pve->pv_next) {
2255: ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
2256:
2257: #ifdef DIAGNOSTIC
2258: if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva)
2259: printf("pmap_page_remove: found pager VA on pv_list\n");
2260: if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
2261: PG_FRAME)
2262: != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
2263: printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
2264: pg, pve->pv_va, pve->pv_ptp);
2265: printf("pmap_page_remove: PTP's phys addr: "
2266: "actual=%x, recorded=%lx\n",
2267: (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
2268: PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
2269: panic("pmap_page_remove: mapped managed page has "
2270: "invalid pv_ptp field");
2271: }
2272: #endif
2273:
2274: opte = i386_atomic_testset_ul(&ptes[atop(pve->pv_va)], 0);
2275:
2276: if (opte & PG_W)
2277: pve->pv_pmap->pm_stats.wired_count--;
2278: pve->pv_pmap->pm_stats.resident_count--;
2279:
2280: /* sync R/M bits */
2281: pmap_sync_flags_pte(pg, opte);
2282:
2283: /* update the PTP reference count. free if last reference. */
2284: if (pve->pv_ptp) {
2285: pve->pv_ptp->wire_count--;
2286: if (pve->pv_ptp->wire_count <= 1) {
2287: opte = i386_atomic_testset_ul(
2288: &pve->pv_pmap->pm_pdir[pdei(pve->pv_va)],
2289: 0);
2290: pmap_tlb_shootpage(curpcb->pcb_pmap,
2291: ((vaddr_t)ptes) + pve->pv_ptp->offset);
2292: #if defined(MULTIPROCESSOR)
2293: /*
2294: * Always shoot down the other pmap's
2295: * self-mapping of the PTP.
2296: */
2297: pmap_tlb_shootpage(pve->pv_pmap,
2298: ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset);
2299: #endif
2300: pve->pv_pmap->pm_stats.resident_count--;
2301: /* update hint? */
2302: if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
2303: pve->pv_pmap->pm_ptphint =
2304: TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq);
2305: pve->pv_ptp->wire_count = 0;
2306: /* Postpone free to after shootdown. */
2307: uvm_pagerealloc(pve->pv_ptp, NULL, 0);
2308: TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
2309: listq);
2310: }
2311: }
2312:
2313: pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
2314:
2315: pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
2316: }
2317: pmap_free_pvs(NULL, pg->mdpage.pv_list);
2318: pg->mdpage.pv_list = NULL;
2319: PMAP_HEAD_TO_MAP_UNLOCK();
2320: pmap_tlb_shootwait();
2321:
2322: while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
2323: TAILQ_REMOVE(&empty_ptps, ptp, listq);
2324: uvm_pagefree(ptp);
2325: }
2326: }
2327:
2328: /*
2329: * p m a p a t t r i b u t e f u n c t i o n s
2330: * functions that test/change managed page's attributes
2331: * since a page can be mapped multiple times we must check each PTE that
2332: * maps it by going down the pv lists.
2333: */
2334:
2335: /*
2336: * pmap_test_attrs: test a page's attributes
2337: */
2338:
2339: boolean_t
2340: pmap_test_attrs(struct vm_page *pg, int testbits)
2341: {
2342: struct pv_entry *pve;
2343: pt_entry_t *ptes, pte;
2344: u_long mybits, testflags;
2345:
2346: testflags = pmap_pte2flags(testbits);
2347:
2348: if (pg->pg_flags & testflags)
2349: return (TRUE);
2350:
2351: PMAP_HEAD_TO_MAP_LOCK();
2352: mybits = 0;
2353: for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0;
2354: pve = pve->pv_next) {
2355: ptes = pmap_map_ptes(pve->pv_pmap);
2356: pte = ptes[atop(pve->pv_va)];
2357: pmap_unmap_ptes(pve->pv_pmap);
2358: mybits |= (pte & testbits);
2359: }
2360: PMAP_HEAD_TO_MAP_UNLOCK();
2361:
2362: if (mybits == 0)
2363: return (FALSE);
2364:
2365: atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(mybits));
2366:
2367: return (TRUE);
2368: }
2369:
2370: /*
2371: * pmap_clear_attrs: change a page's attributes
2372: *
2373: * => we return TRUE if we cleared one of the bits we were asked to
2374: */
2375:
2376: boolean_t
2377: pmap_clear_attrs(struct vm_page *pg, int clearbits)
2378: {
2379: struct pv_entry *pve;
2380: pt_entry_t *ptes, npte, opte;
2381: u_long clearflags;
2382: int result;
2383:
2384: clearflags = pmap_pte2flags(clearbits);
2385:
2386: PMAP_HEAD_TO_MAP_LOCK();
2387:
2388: result = pg->pg_flags & clearflags;
2389: if (result)
2390: atomic_clearbits_int(&pg->pg_flags, clearflags);
2391:
2392: for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) {
2393: #ifdef DIAGNOSTIC
2394: if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]))
2395: panic("pmap_change_attrs: mapping without PTP "
2396: "detected");
2397: #endif
2398:
2399: ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */
2400: npte = ptes[atop(pve->pv_va)];
2401: if (npte & clearbits) {
2402: result = TRUE;
2403: npte &= ~clearbits;
2404: opte = i386_atomic_testset_ul(
2405: &ptes[atop(pve->pv_va)], npte);
2406: pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va);
2407: }
2408: pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */
2409: }
2410:
2411: PMAP_HEAD_TO_MAP_UNLOCK();
2412: pmap_tlb_shootwait();
2413:
2414: return (result != 0);
2415: }
2416:
2417: /*
2418: * p m a p p r o t e c t i o n f u n c t i o n s
2419: */
2420:
2421: /*
2422: * pmap_page_protect: change the protection of all recorded mappings
2423: * of a managed page
2424: *
2425: * => NOTE: this is an inline function in pmap.h
2426: */
2427:
2428: /* see pmap.h */
2429:
2430: /*
2431: * pmap_protect: set the protection in of the pages in a pmap
2432: *
2433: * => NOTE: this is an inline function in pmap.h
2434: */
2435:
2436: /* see pmap.h */
2437:
2438: /*
2439: * pmap_write_protect: write-protect pages in a pmap
2440: */
2441:
2442: void
2443: pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva,
2444: vm_prot_t prot)
2445: {
2446: pt_entry_t *ptes, *spte, *epte, npte;
2447: vaddr_t blockend;
2448: u_int32_t md_prot;
2449: vaddr_t va;
2450: int shootall = 0;
2451:
2452: ptes = pmap_map_ptes(pmap); /* locks pmap */
2453:
2454: /* should be ok, but just in case ... */
2455: sva &= PG_FRAME;
2456: eva &= PG_FRAME;
2457:
2458: if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel())
2459: shootall = 1;
2460:
2461: for (va = sva; va < eva; va = blockend) {
2462: blockend = (va & PD_MASK) + NBPD;
2463: if (blockend > eva)
2464: blockend = eva;
2465:
2466: /*
2467: * XXXCDC: our PTE mappings should never be write-protected!
2468: *
2469: * long term solution is to move the PTEs out of user
2470: * address space. and into kernel address space (up
2471: * with APTE). then we can set VM_MAXUSER_ADDRESS to
2472: * be VM_MAX_ADDRESS.
2473: */
2474:
2475: /* XXXCDC: ugly hack to avoid freeing PDP here */
2476: if (pdei(va) == PDSLOT_PTE)
2477: continue;
2478:
2479: /* empty block? */
2480: if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
2481: continue;
2482:
2483: md_prot = protection_codes[prot];
2484: if (va < VM_MAXUSER_ADDRESS)
2485: md_prot |= PG_u;
2486: else if (va < VM_MAX_ADDRESS)
2487: /* XXX: write-prot our PTES? never! */
2488: md_prot |= (PG_u | PG_RW);
2489:
2490: spte = &ptes[atop(va)];
2491: epte = &ptes[atop(blockend)];
2492:
2493: for (/*null */; spte < epte ; spte++, va += PAGE_SIZE) {
2494:
2495: if (!pmap_valid_entry(*spte)) /* no mapping? */
2496: continue;
2497:
2498: npte = (*spte & ~PG_PROT) | md_prot;
2499:
2500: if (npte != *spte) {
2501: pmap_exec_account(pmap, va, *spte, npte);
2502: i386_atomic_testset_ul(spte, npte);
2503: }
2504: }
2505: }
2506: if (shootall)
2507: pmap_tlb_shoottlb();
2508: else
2509: pmap_tlb_shootrange(pmap, sva, eva);
2510:
2511: pmap_tlb_shootwait();
2512: pmap_unmap_ptes(pmap); /* unlocks pmap */
2513: }
2514:
2515: /*
2516: * end of protection functions
2517: */
2518:
2519: /*
2520: * pmap_unwire: clear the wired bit in the PTE
2521: *
2522: * => mapping should already be in map
2523: */
2524:
2525: void
2526: pmap_unwire(struct pmap *pmap, vaddr_t va)
2527: {
2528: pt_entry_t *ptes;
2529:
2530: if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
2531: ptes = pmap_map_ptes(pmap); /* locks pmap */
2532:
2533: #ifdef DIAGNOSTIC
2534: if (!pmap_valid_entry(ptes[atop(va)]))
2535: panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
2536: #endif
2537: if ((ptes[atop(va)] & PG_W) != 0) {
2538: ptes[atop(va)] &= ~PG_W;
2539: pmap->pm_stats.wired_count--;
2540: }
2541: #ifdef DIAGNOSTIC
2542: else {
2543: printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2544: "didn't change!\n", pmap, va);
2545: }
2546: #endif
2547: pmap_unmap_ptes(pmap); /* unlocks map */
2548: }
2549: #ifdef DIAGNOSTIC
2550: else {
2551: panic("pmap_unwire: invalid PDE");
2552: }
2553: #endif
2554: }
2555:
2556: /*
2557: * pmap_collect: free resources held by a pmap
2558: *
2559: * => optional function.
2560: * => called when a process is swapped out to free memory.
2561: */
2562:
2563: void
2564: pmap_collect(struct pmap *pmap)
2565: {
2566: /*
2567: * free all of the pt pages by removing the physical mappings
2568: * for its entire address space.
2569: */
2570:
2571: pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS,
2572: PMAP_REMOVE_SKIPWIRED);
2573: }
2574:
2575: /*
2576: * pmap_copy: copy mappings from one pmap to another
2577: *
2578: * => optional function
2579: * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2580: */
2581:
2582: /*
2583: * defined as macro in pmap.h
2584: */
2585:
2586: /*
2587: * pmap_enter: enter a mapping into a pmap
2588: *
2589: * => must be done "now" ... no lazy-evaluation
2590: */
2591:
2592: int
2593: pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa,
2594: vm_prot_t prot, int flags)
2595: {
2596: pt_entry_t *ptes, opte, npte;
2597: struct vm_page *ptp;
2598: struct pv_entry *pve = NULL;
2599: boolean_t wired = (flags & PMAP_WIRED) != 0;
2600: struct vm_page *pg = NULL;
2601: int error;
2602:
2603: #ifdef DIAGNOSTIC
2604: /* sanity check: totally out of range? */
2605: if (va >= VM_MAX_KERNEL_ADDRESS)
2606: panic("pmap_enter: too big");
2607:
2608: if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
2609: panic("pmap_enter: trying to map over PDP/APDP!");
2610:
2611: /* sanity check: kernel PTPs should already have been pre-allocated */
2612: if (va >= VM_MIN_KERNEL_ADDRESS &&
2613: !pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
2614: panic("pmap_enter: missing kernel PTP!");
2615: #endif
2616:
2617: /* get lock */
2618: PMAP_MAP_TO_HEAD_LOCK();
2619:
2620: /*
2621: * map in ptes and get a pointer to our PTP (unless we are the kernel)
2622: */
2623:
2624: ptes = pmap_map_ptes(pmap); /* locks pmap */
2625: if (pmap == pmap_kernel()) {
2626: ptp = NULL;
2627: } else {
2628: ptp = pmap_get_ptp(pmap, pdei(va), FALSE);
2629: if (ptp == NULL) {
2630: if (flags & PMAP_CANFAIL) {
2631: error = ENOMEM;
2632: goto out;
2633: }
2634: panic("pmap_enter: get ptp failed");
2635: }
2636: }
2637: opte = ptes[atop(va)]; /* old PTE */
2638:
2639: /*
2640: * is there currently a valid mapping at our VA?
2641: */
2642:
2643: if (pmap_valid_entry(opte)) {
2644:
2645: /*
2646: * first, update pm_stats. resident count will not
2647: * change since we are replacing/changing a valid
2648: * mapping. wired count might change...
2649: */
2650:
2651: if (wired && (opte & PG_W) == 0)
2652: pmap->pm_stats.wired_count++;
2653: else if (!wired && (opte & PG_W) != 0)
2654: pmap->pm_stats.wired_count--;
2655:
2656: /*
2657: * is the currently mapped PA the same as the one we
2658: * want to map?
2659: */
2660:
2661: if ((opte & PG_FRAME) == pa) {
2662:
2663: /* if this is on the PVLIST, sync R/M bit */
2664: if (opte & PG_PVLIST) {
2665: pg = PHYS_TO_VM_PAGE(pa);
2666: #ifdef DIAGNOSTIC
2667: if (pg == NULL)
2668: panic("pmap_enter: same pa PG_PVLIST "
2669: "mapping with unmanaged page "
2670: "pa = 0x%lx (0x%lx)", pa,
2671: atop(pa));
2672: #endif
2673: pmap_sync_flags_pte(pg, opte);
2674: }
2675: goto enter_now;
2676: }
2677:
2678: /*
2679: * changing PAs: we must remove the old one first
2680: */
2681:
2682: /*
2683: * if current mapping is on a pvlist,
2684: * remove it (sync R/M bits)
2685: */
2686:
2687: if (opte & PG_PVLIST) {
2688: pg = PHYS_TO_VM_PAGE(opte & PG_FRAME);
2689: #ifdef DIAGNOSTIC
2690: if (pg == NULL)
2691: panic("pmap_enter: PG_PVLIST mapping with "
2692: "unmanaged page "
2693: "pa = 0x%lx (0x%lx)", pa, atop(pa));
2694: #endif
2695: pmap_sync_flags_pte(pg, opte);
2696: pve = pmap_remove_pv(pg, pmap, va);
2697: pg = NULL; /* This is not page we are looking for */
2698: }
2699: } else { /* opte not valid */
2700: pmap->pm_stats.resident_count++;
2701: if (wired)
2702: pmap->pm_stats.wired_count++;
2703: if (ptp)
2704: ptp->wire_count++; /* count # of valid entries */
2705: }
2706:
2707: /*
2708: * at this point pm_stats has been updated. pve is either NULL
2709: * or points to a now-free pv_entry structure (the latter case is
2710: * if we called pmap_remove_pv above).
2711: *
2712: * if this entry is to be on a pvlist, enter it now.
2713: */
2714:
2715: if (pmap_initialized && pg == NULL)
2716: pg = PHYS_TO_VM_PAGE(pa);
2717:
2718: if (pg != NULL) {
2719: if (pve == NULL) {
2720: pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
2721: if (pve == NULL) {
2722: if (flags & PMAP_CANFAIL) {
2723: /*
2724: * XXX - Back out stats changes!
2725: */
2726: error = ENOMEM;
2727: goto out;
2728: }
2729: panic("pmap_enter: no pv entries available");
2730: }
2731: }
2732: /* lock pvh when adding */
2733: pmap_enter_pv(pg, pve, pmap, va, ptp);
2734: } else {
2735:
2736: /* new mapping is not PG_PVLIST. free pve if we've got one */
2737: if (pve)
2738: pmap_free_pv(pmap, pve);
2739: }
2740:
2741: enter_now:
2742: /*
2743: * at this point pvh is !NULL if we want the PG_PVLIST bit set
2744: */
2745:
2746: npte = pa | protection_codes[prot] | PG_V;
2747: pmap_exec_account(pmap, va, opte, npte);
2748: if (wired)
2749: npte |= PG_W;
2750: if (va < VM_MAXUSER_ADDRESS)
2751: npte |= PG_u;
2752: else if (va < VM_MAX_ADDRESS)
2753: npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
2754: if (pmap == pmap_kernel())
2755: npte |= pmap_pg_g;
2756: if (flags & VM_PROT_READ)
2757: npte |= PG_U;
2758: if (flags & VM_PROT_WRITE)
2759: npte |= PG_M;
2760: if (pg) {
2761: npte |= PG_PVLIST;
2762: pmap_sync_flags_pte(pg, npte);
2763: }
2764:
2765: opte = i386_atomic_testset_ul(&ptes[atop(va)], npte);
2766:
2767: if (opte & PG_V) {
2768: pmap_tlb_shootpage(pmap, va);
2769: pmap_tlb_shootwait();
2770: }
2771:
2772: error = 0;
2773:
2774: out:
2775: pmap_unmap_ptes(pmap);
2776: PMAP_MAP_TO_HEAD_UNLOCK();
2777:
2778: return error;
2779: }
2780:
2781: /*
2782: * pmap_growkernel: increase usage of KVM space
2783: *
2784: * => we allocate new PTPs for the kernel and install them in all
2785: * the pmaps on the system.
2786: */
2787:
2788: vaddr_t
2789: pmap_growkernel(vaddr_t maxkvaddr)
2790: {
2791: struct pmap *kpm = pmap_kernel(), *pm;
2792: int needed_kpde; /* needed number of kernel PTPs */
2793: int s;
2794: paddr_t ptaddr;
2795:
2796: needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
2797: / NBPD;
2798: if (needed_kpde <= nkpde)
2799: goto out; /* we are OK */
2800:
2801: /*
2802: * whoops! we need to add kernel PTPs
2803: */
2804:
2805: s = splhigh(); /* to be safe */
2806: simple_lock(&kpm->pm_obj.vmobjlock);
2807:
2808: for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
2809:
2810: if (uvm.page_init_done == FALSE) {
2811:
2812: /*
2813: * we're growing the kernel pmap early (from
2814: * uvm_pageboot_alloc()). this case must be
2815: * handled a little differently.
2816: */
2817:
2818: if (uvm_page_physget(&ptaddr) == FALSE)
2819: panic("pmap_growkernel: out of memory");
2820: pmap_zero_phys(ptaddr);
2821:
2822: kpm->pm_pdir[PDSLOT_KERN + nkpde] =
2823: ptaddr | PG_RW | PG_V | PG_U | PG_M;
2824:
2825: /* count PTP as resident */
2826: kpm->pm_stats.resident_count++;
2827: continue;
2828: }
2829:
2830: /*
2831: * THIS *MUST* BE CODED SO AS TO WORK IN THE
2832: * pmap_initialized == FALSE CASE! WE MAY BE
2833: * INVOKED WHILE pmap_init() IS RUNNING!
2834: */
2835:
2836: while (!pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE, 0))
2837: uvm_wait("pmap_growkernel");
2838:
2839: /* distribute new kernel PTP to all active pmaps */
2840: simple_lock(&pmaps_lock);
2841: LIST_FOREACH(pm, &pmaps, pm_list) {
2842: pm->pm_pdir[PDSLOT_KERN + nkpde] =
2843: kpm->pm_pdir[PDSLOT_KERN + nkpde];
2844: }
2845: simple_unlock(&pmaps_lock);
2846: }
2847:
2848: simple_unlock(&kpm->pm_obj.vmobjlock);
2849: splx(s);
2850:
2851: out:
2852: return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
2853: }
2854:
2855: #ifdef DEBUG
2856: void pmap_dump(struct pmap *, vaddr_t, vaddr_t);
2857:
2858: /*
2859: * pmap_dump: dump all the mappings from a pmap
2860: *
2861: * => caller should not be holding any pmap locks
2862: */
2863:
2864: void
2865: pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
2866: {
2867: pt_entry_t *ptes, *pte;
2868: vaddr_t blkendva;
2869:
2870: /*
2871: * if end is out of range truncate.
2872: * if (end == start) update to max.
2873: */
2874:
2875: if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
2876: eva = VM_MAXUSER_ADDRESS;
2877:
2878: PMAP_MAP_TO_HEAD_LOCK();
2879: ptes = pmap_map_ptes(pmap); /* locks pmap */
2880:
2881: /*
2882: * dumping a range of pages: we dump in PTP sized blocks (4MB)
2883: */
2884:
2885: for (/* null */ ; sva < eva ; sva = blkendva) {
2886:
2887: /* determine range of block */
2888: blkendva = i386_round_pdr(sva+1);
2889: if (blkendva > eva)
2890: blkendva = eva;
2891:
2892: /* valid block? */
2893: if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
2894: continue;
2895:
2896: pte = &ptes[atop(sva)];
2897: for (/* null */; sva < blkendva ; sva += NBPG, pte++) {
2898: if (!pmap_valid_entry(*pte))
2899: continue;
2900: printf("va %#lx -> pa %#x (pte=%#x)\n",
2901: sva, *pte, *pte & PG_FRAME);
2902: }
2903: }
2904: pmap_unmap_ptes(pmap);
2905: PMAP_MAP_TO_HEAD_UNLOCK();
2906: }
2907: #endif
2908:
2909: #ifdef MULTIPROCESSOR
2910: /*
2911: * Locking for tlb shootdown.
2912: *
2913: * We lock by setting tlb_shoot_wait to the number of cpus that will
2914: * receive our tlb shootdown. After sending the IPIs, we don't need to
2915: * worry about locking order or interrupts spinning for the lock because
2916: * the call that grabs the "lock" isn't the one that releases it. And
2917: * there is nothing that can block the IPI that releases the lock.
2918: *
2919: * The functions are organized so that we first count the number of
2920: * cpus we need to send the IPI to, then we grab the counter, then
2921: * we send the IPIs, then we finally do our own shootdown.
2922: *
2923: * Our shootdown is last to make it parallell with the other cpus
2924: * to shorten the spin time.
2925: *
2926: * Notice that we depend on failures to send IPIs only being able to
2927: * happen during boot. If they happen later, the above assumption
2928: * doesn't hold since we can end up in situations where noone will
2929: * release the lock if we get an interrupt in a bad moment.
2930: */
2931:
2932: volatile int tlb_shoot_wait;
2933:
2934: volatile vaddr_t tlb_shoot_addr1;
2935: volatile vaddr_t tlb_shoot_addr2;
2936:
2937: void
2938: pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
2939: {
2940: struct cpu_info *ci, *self = curcpu();
2941: CPU_INFO_ITERATOR cii;
2942: int wait = 0;
2943: int mask = 0;
2944:
2945: CPU_INFO_FOREACH(cii, ci) {
2946: if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||
2947: !(ci->ci_flags & CPUF_RUNNING))
2948: continue;
2949: mask |= 1 << ci->ci_cpuid;
2950: wait++;
2951: }
2952:
2953: if (wait > 0) {
2954: int s = splvm();
2955:
2956: while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
2957: while (tlb_shoot_wait != 0)
2958: SPINLOCK_SPIN_HOOK;
2959: }
2960: tlb_shoot_addr1 = va;
2961: CPU_INFO_FOREACH(cii, ci) {
2962: if ((mask & 1 << ci->ci_cpuid) == 0)
2963: continue;
2964: if (i386_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0)
2965: panic("pmap_tlb_shootpage: ipi failed");
2966: }
2967: splx(s);
2968: }
2969:
2970: if (pmap_is_curpmap(pm))
2971: pmap_update_pg(va);
2972: }
2973:
2974: void
2975: pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)
2976: {
2977: struct cpu_info *ci, *self = curcpu();
2978: CPU_INFO_ITERATOR cii;
2979: int wait = 0;
2980: int mask = 0;
2981: vaddr_t va;
2982:
2983: CPU_INFO_FOREACH(cii, ci) {
2984: if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||
2985: !(ci->ci_flags & CPUF_RUNNING))
2986: continue;
2987: mask |= 1 << ci->ci_cpuid;
2988: wait++;
2989: }
2990:
2991: if (wait > 0) {
2992: int s = splvm();
2993:
2994: while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
2995: while (tlb_shoot_wait != 0)
2996: SPINLOCK_SPIN_HOOK;
2997: }
2998: tlb_shoot_addr1 = sva;
2999: tlb_shoot_addr2 = eva;
3000: CPU_INFO_FOREACH(cii, ci) {
3001: if ((mask & 1 << ci->ci_cpuid) == 0)
3002: continue;
3003: if (i386_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0)
3004: panic("pmap_tlb_shootrange: ipi failed");
3005: }
3006: splx(s);
3007: }
3008:
3009: if (pmap_is_curpmap(pm))
3010: for (va = sva; va < eva; va += PAGE_SIZE)
3011: pmap_update_pg(va);
3012: }
3013:
3014: void
3015: pmap_tlb_shoottlb(void)
3016: {
3017: struct cpu_info *ci, *self = curcpu();
3018: CPU_INFO_ITERATOR cii;
3019: int wait = 0;
3020: int mask = 0;
3021:
3022: CPU_INFO_FOREACH(cii, ci) {
3023: if (ci == self || !(ci->ci_flags & CPUF_RUNNING))
3024: continue;
3025: mask |= 1 << ci->ci_cpuid;
3026: wait++;
3027: }
3028:
3029: if (wait) {
3030: int s = splvm();
3031:
3032: while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) {
3033: while (tlb_shoot_wait != 0)
3034: SPINLOCK_SPIN_HOOK;
3035: }
3036:
3037: CPU_INFO_FOREACH(cii, ci) {
3038: if ((mask & 1 << ci->ci_cpuid) == 0)
3039: continue;
3040: if (i386_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0)
3041: panic("pmap_tlb_shoottlb: ipi failed");
3042: }
3043: splx(s);
3044: }
3045:
3046: tlbflush();
3047: }
3048:
3049: void
3050: pmap_tlb_shootwait(void)
3051: {
3052: while (tlb_shoot_wait != 0)
3053: SPINLOCK_SPIN_HOOK;
3054: }
3055:
3056: #else
3057:
3058: void
3059: pmap_tlb_shootpage(struct pmap *pm, vaddr_t va)
3060: {
3061: if (pmap_is_curpmap(pm))
3062: pmap_update_pg(va);
3063:
3064: }
3065:
3066: void
3067: pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva)
3068: {
3069: vaddr_t va;
3070:
3071: for (va = sva; va < eva; va += PAGE_SIZE)
3072: pmap_update_pg(va);
3073: }
3074:
3075: void
3076: pmap_tlb_shoottlb(void)
3077: {
3078: tlbflush();
3079: }
3080: #endif /* MULTIPROCESSOR */
CVSweb