/* $OpenBSD: pmapae.c,v 1.13 2007/07/20 19:48:15 mk Exp $ */
/*
* Copyright (c) 2006 Michael Shalayeff
* All rights reserved.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Charles D. Cranor and
* Washington University.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* from OpenBSD: pmap.c,v 1.85 2005/11/18 17:05:04 brad Exp
*/
/*
* pmap.c: i386 pmap module rewrite
* Chuck Cranor <chuck@ccrc.wustl.edu>
* 11-Aug-97
*
* history of this pmap module: in addition to my own input, i used
* the following references for this rewrite of the i386 pmap:
*
* [1] the NetBSD i386 pmap. this pmap appears to be based on the
* BSD hp300 pmap done by Mike Hibler at University of Utah.
* it was then ported to the i386 by William Jolitz of UUNET
* Technologies, Inc. Then Charles M. Hannum of the NetBSD
* project fixed some bugs and provided some speed ups.
*
* [2] the FreeBSD i386 pmap. this pmap seems to be the
* Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
* and David Greenman.
*
* [3] the Mach pmap. this pmap, from CMU, seems to have migrated
* between several processors. the VAX version was done by
* Avadis Tevanian, Jr., and Michael Wayne Young. the i386
* version was done by Lance Berc, Mike Kupfer, Bob Baron,
* David Golub, and Richard Draves. the alpha version was
* done by Alessandro Forin (CMU/Mach) and Chris Demetriou
* (NetBSD/alpha).
*/
/*
* PAE support
* Michael Shalayeff <mickey@lucifier.net>
*
* This module implements PAE mode for i386.
*
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/pool.h>
#include <sys/user.h>
#include <sys/kernel.h>
#include <sys/mutex.h>
#include <uvm/uvm.h>
#include <machine/atomic.h>
#include <machine/cpu.h>
#include <machine/specialreg.h>
#include <machine/gdt.h>
#include <dev/isa/isareg.h>
#include <sys/msgbuf.h>
#include <stand/boot/bootarg.h>
/*
* this file contains the code for the "pmap module." the module's
* job is to manage the hardware's virtual to physical address mappings.
* note that there are two levels of mapping in the VM system:
*
* [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
* to map ranges of virtual address space to objects/files. for
* example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
* to the file /bin/ls starting at offset zero." note that
* the upper layer mapping is not concerned with how individual
* vm_pages are mapped.
*
* [2] the lower layer of the VM system (the pmap) maintains the mappings
* from virtual addresses. it is concerned with which vm_page is
* mapped where. for example, when you run /bin/ls and start
* at page 0x1000 the fault routine may lookup the correct page
* of the /bin/ls file and then ask the pmap layer to establish
* a mapping for it.
*
* note that information in the lower layer of the VM system can be
* thrown away since it can easily be reconstructed from the info
* in the upper layer.
*
* data structures we use include:
*
* - struct pmap: describes the address space of one thread
* - struct pv_entry: describes one <PMAP,VA> mapping of a PA
* - struct pv_head: there is one pv_head per managed page of
* physical memory. the pv_head points to a list of pv_entry
* structures which describe all the <PMAP,VA> pairs that this
* page is mapped in. this is critical for page based operations
* such as pmap_page_protect() [change protection on _all_ mappings
* of a page]
* - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
* if we run out of pv_entry's we allocate a new pv_page and free
* its pv_entrys.
*/
/*
* i386 PAE hardware Page Tables structure:
*
* the i386 PAE Page Table is a three-level PT which maps 4GB of VA.
* the pagesize is 4K (4096 [0x1000] bytes) or 2MB.
*
* the first level table is called "page directory index" and consists
* of 4 page directory index entries (PDIE) each 64 bits in size.
*
* the second level table is called a "page directory" and it contains
* 512 page directory entries (PDEs). each PDE is
* 8 bytes (a long long), so a PD fits in a single 4K page. this page is
* the page directory page (PDP). each PDE in a PDP maps 1GB of space
* (512 * 2MB = 1GB). a PDE contains the physical address of the
* second level table: the page table. or, if 2MB pages are being used,
* then the PDE contains the PA of the 2MB page being mapped.
*
* a page table consists of 512 page table entries (PTEs). each PTE is
* 8 bytes (a long long), so a page table also fits in a single 4K page.
* a 4K page being used as a page table is called a page table page (PTP).
* each PTE in a PTP maps one 4K page (512 * 4K = 2MB). a PTE contains
* the physical address of the page it maps and some flag bits (described
* below).
*
* the processor has a special register, "cr3", which points to the
* the PDP which is currently controlling the mappings of the virtual
* address space.
*
* the following picture shows the translation process for a 4K page:
*
* %cr3 register [PA of PDPT]
* |
* | bits <31-30> of VA
* | index the DPE (0-3)
* | |
* v v
* +-----------+
* | PDP Ptr |
* | 4 entries |
* +-----------+
* |
* PA of PDP
* |
* |
* | bits <29-21> of VA bits <20-12> of VA bits <11-0>
* | index the PDP (0 - 512) index the PTP are the page offset
* | | | |
* | v | |
* +-->+---------+ | |
* | PD Page | PA of v |
* | |-----PTP----->+------------+ |
* | 512 PDE | | page table |--PTE--+ |
* | entries | | (aka PTP) | | |
* +---------+ | 512 PTE | | |
* | entries | | |
* +------------+ | |
* | |
* bits <35-12> bits <11-0>
* p h y s i c a l a d d r
*
* the i386 caches PTEs in a TLB. it is important to flush out old
* TLB mappings when making a change to a mappings. writing to the
* %cr3 will flush the entire TLB. newer processors also have an
* instruction that will invalidate the mapping of a single page (which
* is useful if you are changing a single mappings because it preserves
* all the cached TLB entries).
*
* as shows, bits 31-12 of the PTE contain PA of the page being mapped.
* the rest of the PTE is defined as follows:
* bit# name use
* 63 NX no-execute bit (0=ITLB, 1=DTLB), optional
* 11 n/a available for OS use, hardware ignores it
* 10 n/a available for OS use, hardware ignores it
* 9 n/a available for OS use, hardware ignores it
* 8 G global bit (see discussion below)
* 7 PS page size [for PDEs] (0=4k, 1=4M <if supported>)
* 6 D dirty (modified) page
* 5 A accessed (referenced) page
* 4 PCD cache disable
* 3 PWT prevent write through (cache)
* 2 U/S user/supervisor bit (0=supervisor only, 1=both u&s)
* 1 R/W read/write bit (0=read only, 1=read-write)
* 0 P present (valid)
*
* notes:
* - on the i386 the R/W bit is ignored if processor is in supervisor
* state (bug!)
* - PS is only supported on newer processors
* - PTEs with the G bit are global in the sense that they are not
* flushed from the TLB when %cr3 is written (to flush, use the
* "flush single page" instruction). this is only supported on
* newer processors. this bit can be used to keep the kernel's
* TLB entries around while context switching. since the kernel
* is mapped into all processes at the same place it does not make
* sense to flush these entries when switching from one process'
* pmap to another.
*/
/*
* A pmap describes a process' 4GB virtual address space. This
* virtual address space can be broken up into 2048 2MB regions which
* are described by PDEs in the PDP. The PDEs are defined as follows:
*
* Ranges are inclusive -> exclusive, just like vm_map_entry start/end.
* The following assumes that KERNBASE is 0xd0000000.
*
* PDE#s VA range Usage
* 0->1660 0x0 -> 0xcf800000 user address space, note that the
* max user address is 0xcfbfe000
* the final two pages in the last 4MB
* used to be reserved for the UAREA
* but now are no longer used.
* 1660 0xcf800000-> recursive mapping of PDP (used for
* 0xd0000000 linear mapping of PTPs).
* 1664->2044 0xd0000000-> kernel address space (constant
* 0xff800000 across all pmaps/processes).
* 2044 0xff800000-> "alternate" recursive PDP mapping
* <end> (for other pmaps).
*
*
* Note: A recursive PDP mapping provides a way to map all the PTEs for
* a 4GB address space into a linear chunk of virtual memory. In other
* words, the PTE for page 0 is the first int mapped into the 2MB recursive
* area. The PTE for page 1 is the second int. The very last int in the
* 2MB range is the PTE that maps VA 0xffffe000 (the last page in a 4GB
* address).
*
* All pmaps' PDs must have the same values in slots 1660->2043 so that
* the kernel is always mapped in every process. These values are loaded
* into the PD at pmap creation time.
*
* At any one time only one pmap can be active on a processor. This is
* the pmap whose PDP is pointed to by processor register %cr3. This pmap
* will have all its PTEs mapped into memory at the recursive mapping
* point (slots #1660-3 as show above). When the pmap code wants to find the
* PTE for a virtual address, all it has to do is the following:
*
* Address of PTE = (1660 * 2MB) + (VA / NBPG) * sizeof(pt_entry_t)
* = 0xcf800000 + (VA / 4096) * 8
*
* What happens if the pmap layer is asked to perform an operation
* on a pmap that is not the one which is currently active? In that
* case we take the PA of the PDP of non-active pmap and put it in
* slots 2044-7 of the active pmap. This causes the non-active pmap's
* PTEs to get mapped in the final 4MB of the 4GB address space
* (e.g. starting at 0xffc00000).
*
* The following figure shows the effects of the recursive PDP mapping:
*
* PDP (%cr3->PDPTP)
* +----+
* | 0| -> PTP#0 that maps VA 0x0 -> 0x200000
* | |
* | |
* |1660| -> points back to PDP (%cr3) mapping VA 0xcf800000 -> 0xd0000000
* |1661| (PDP is 4 pages)
* |1662|
* |1663|
* |1664| -> first kernel PTP (maps 0xd0000000 -> 0xe0200000)
* | |
* |2044| -> points to alternate pmap's PDP (maps 0xff800000 -> end)
* |2045|
* |2046|
* |2047|
* +----+
*
* Note that the PDE#1660 VA (0xcf8033e0) is defined as "PTE_BASE".
* Note that the PDE#2044 VA (0xff803fe0) is defined as "APTE_BASE".
*
* Starting at VA 0xcf8033e0 the current active PDPs (%cr3) acts as a
* PDPTP and references four consequetly mapped pages:
*
* PTP#1660-3 == PDP(%cr3) => maps VA 0xcf800000 -> 0xd0000000
* +----+
* | 0| -> maps the contents of PTP#0 at VA 0xcf800000->0xcf801000
* | |
* | |
* |1660| -> maps the contents of PTP#1660 (the PDP) at VA 0xcfe7c000
* |1661|
* |1662|
* |1663|
* |1664| -> maps the contents of first kernel PTP
* | |
* |2047|
* +----+
*
* Note that mapping of the PDP at PTP#1660's VA (0xcfe7c000) is
* defined as "PDP_BASE".... within that mapping there are two
* defines:
* "PDP_PDE" (0xcfe7f3e0) is the VA of the PDE in the PDP
* which points back to itself.
* "APDP_PDE" (0xfff02fe0) is the VA of the PDE in the PDP which
* establishes the recursive mapping of the alternate pmap.
* To set the alternate PDP, one just has to put the correct
* PA info in *APDP_PDE.
*
* Note that in the APTE_BASE space, the APDP appears at VA
* "APDP_BASE" (0xffffc000).
*
* unfortunately we cannot use recursive PDPT from the page tables
* because in their infinite wisdom they have defined cr3 32 bits!
*
*/
/*
* memory allocation
*
* - there are three data structures that we must dynamically allocate:
*
* [A] new process' page directory page (PDP)
* - plan 1: done at pmap_create() we use
* uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
* allocation.
*
* if we are low in free physical memory then we sleep in
* uvm_km_alloc -- in this case this is ok since we are creating
* a new pmap and should not be holding any locks.
*
* if the kernel is totally out of virtual space
* (i.e. uvm_km_alloc returns NULL), then we panic.
*
* XXX: the fork code currently has no way to return an "out of
* memory, try again" error code since uvm_fork [fka vm_fork]
* is a void function.
*
* [B] new page tables pages (PTP)
* call pae_pagealloc()
* => success: zero page, add to pm_pdir
* => failure: we are out of free vm_pages, let pmap_enter()
* tell UVM about it.
*
* note: for kernel PTPs, we start with NKPTP of them. as we map
* kernel memory (at uvm_map time) we check to see if we've grown
* the kernel pmap. if so, we call the optional function
* pmap_growkernel() to grow the kernel PTPs in advance.
*
* [C] pv_entry structures
* - plan 1: try to allocate one off the free list
* => success: done!
* => failure: no more free pv_entrys on the list
* - plan 2: try to allocate a new pv_page to add a chunk of
* pv_entrys to the free list
* [a] obtain a free, unmapped, VA in kmem_map. either
* we have one saved from a previous call, or we allocate
* one now using a "vm_map_lock_try" in uvm_map
* => success: we have an unmapped VA, continue to [b]
* => failure: unable to lock kmem_map or out of VA in it.
* move on to plan 3.
* [b] allocate a page in kmem_object for the VA
* => success: map it in, free the pv_entry's, DONE!
* => failure: kmem_object locked, no free vm_pages, etc.
* save VA for later call to [a], go to plan 3.
* If we fail, we simply let pmap_enter() tell UVM about it.
*/
/*
* locking
*
* we have the following locks that we must contend with:
*
* "simple" locks:
*
* - pmap lock (per pmap, part of uvm_object)
* this lock protects the fields in the pmap structure including
* the non-kernel PDEs in the PDP, and the PTEs. it also locks
* in the alternate PTE space (since that is determined by the
* entry in the PDP).
*
* - pvh_lock (per pv_head)
* this lock protects the pv_entry list which is chained off the
* pv_head structure for a specific managed PA. it is locked
* when traversing the list (e.g. adding/removing mappings,
* syncing R/M bits, etc.)
*
* - pvalloc_lock
* this lock protects the data structures which are used to manage
* the free list of pv_entry structures.
*
* - pmaps_lock
* this lock protects the list of active pmaps (headed by "pmaps").
* we lock it when adding or removing pmaps from this list.
*
*/
/*
* locking data structures
*/
#define PMAP_MAP_TO_HEAD_LOCK() /* null */
#define PMAP_MAP_TO_HEAD_UNLOCK() /* null */
#define PMAP_HEAD_TO_MAP_LOCK() /* null */
#define PMAP_HEAD_TO_MAP_UNLOCK() /* null */
#define PG_FRAME 0xffffff000ULL /* page frame mask */
#define PG_LGFRAME 0xfffe00000ULL /* large (2M) page frame mask */
/*
* Redefine the PDSHIFT, NBPD
*/
#undef PDSHIFT
#define PD_MASK 0xffe00000 /* page directory address bits */
#define PDSHIFT 21 /* page directory address shift */
#define PT_MASK 0x001ff000 /* page table address bits */
#undef NBPD
#define NBPD (1U << PDSHIFT) /* # bytes mapped by PD (2MB) */
/*
*
*/
#undef PDSLOT_PTE
#define PDSLOT_PTE (1660U) /* 1660: for recursive PDP map */
#undef PDSLOT_KERN
#define PDSLOT_KERN (1664U) /* 1664: start of kernel space */
#undef PDSLOT_APTE
#define PDSLOT_APTE (2044U) /* 2044: alternative recursive slot */
/*
* The following defines give the virtual addresses of various MMU
* data structures:
* PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings
* PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD
* PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP
*/
#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) )
#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) )
#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG)))
#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG)))
#define PDP_PDE (PDP_BASE + PDSLOT_PTE)
#define APDP_PDE (PDP_BASE + PDSLOT_APTE)
#define PTES_PER_PTP (NBPG / sizeof(pt_entry_t)) /* # of PTEs in a PTP */
/*
* various address macros
*
* vtopte: return a pointer to the PTE mapping a VA
*
*/
#define vtopte(VA) (PTE_BASE + atop((vaddr_t)VA))
/*
* pdei/ptei: generate index into PDP/PTP from a VA
*/
#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT)
#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT)
/*
* Mach derived conversion macros
*/
#define i386_round_pdr(x) ((((unsigned)(x)) + ~PD_MASK) & PD_MASK)
/*
* PTP macros:
* A PTP's index is the PD index of the PDE that points to it.
* A PTP's offset is the byte-offset in the PTE space that this PTP is at.
* A PTP's VA is the first VA mapped by that PTP.
*
* Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries)
* NBPD == number of bytes a PTP can map (4MB)
*/
#define ptp_i2o(I) ((I) * NBPG) /* index => offset */
#define ptp_o2i(O) ((O) / NBPG) /* offset => index */
#define ptp_i2v(I) ((I) * NBPD) /* index => VA */
#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */
/*
* Access PD and PT
*/
#define PDE(pm,i) (((pd_entry_t *)(pm)->pm_pdir)[(i)])
/*
* here we define the data types for PDEs and PTEs
*/
typedef u_int64_t pd_entry_t; /* PDE */
typedef u_int64_t pt_entry_t; /* PTE */
/*
* Number of PTE's per cache line. 8 byte pte, 32-byte cache line
* Used to avoid false sharing of cache lines.
*/
#define NPTECL 4
/*
* other data structures
*/
extern u_int32_t protection_codes[]; /* maps MI prot to i386 prot code */
extern boolean_t pmap_initialized; /* pmap_init done yet? */
/*
* MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
* I386_MAXPROCS*NPTECL array of PTE's, to avoid cache line thrashing
* due to false sharing.
*/
#ifdef MULTIPROCESSOR
#define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
#define VASLEW(va,id) ((va)+(id)*NPTECL*NBPG)
#else
#define PTESLEW(pte, id) (pte)
#define VASLEW(va,id) (va)
#endif
/*
* special VAs and the PTEs that map them
*/
static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
extern caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp;
extern int pmap_pg_g;
extern struct pmap_head pmaps;
/*
* a towards larger memory prioritised version opf uvm_pagealloc()
*/
#define pae_pagealloc(obj, off, anon, flags) \
uvm_pagealloc_strat((obj), (off), (anon), (flags), \
UVM_PGA_STRAT_FALLBACK, VM_FREELIST_ABOVE4G)
/*
* local prototypes
*/
struct vm_page *pmap_alloc_ptp_pae(struct pmap *, int, boolean_t);
#define ALLOCPV_NEED 0 /* need PV now */
#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */
#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */
struct vm_page *pmap_get_ptp_pae(struct pmap *, int, boolean_t);
pt_entry_t *pmap_map_ptes_pae(struct pmap *);
void pmap_remove_ptes_pae(struct pmap *, struct vm_page *,
vaddr_t, vaddr_t, vaddr_t, int32_t *);
boolean_t pmap_remove_pte_pae(struct pmap *, struct vm_page *,
pt_entry_t *, vaddr_t, int32_t *);
void pmap_unmap_ptes_pae(struct pmap *);
vaddr_t pmap_tmpmap_pa_pae(paddr_t);
pt_entry_t *pmap_tmpmap_pvepte_pae(struct pv_entry *);
void pmap_tmpunmap_pa_pae(void);
void pmap_tmpunmap_pvepte_pae(struct pv_entry *);
/*
* pmap_tmpmap_pa: map a page in for tmp usage
*/
vaddr_t
pmap_tmpmap_pa_pae(paddr_t pa)
{
#ifdef MULTIPROCESSOR
int id = cpu_number();
#endif
pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
caddr_t ptpva = VASLEW(pmap_ptpp, id);
#if defined(DIAGNOSTIC)
if (*ptpte)
panic("pmap_tmpmap_pa: ptp_pte in use?");
#endif
*ptpte = PG_V | PG_RW | pa; /* always a new mapping */
return((vaddr_t)ptpva);
}
/*
* pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
*/
void
pmap_tmpunmap_pa_pae()
{
#ifdef MULTIPROCESSOR
int id = cpu_number();
#endif
pt_entry_t *ptpte = PTESLEW(ptp_pte, id);
caddr_t ptpva = VASLEW(pmap_ptpp, id);
#if defined(DIAGNOSTIC)
if (!pmap_valid_entry(*ptpte))
panic("pmap_tmpunmap_pa: our pte invalid?");
#endif
*ptpte = 0; /* zap! */
pmap_update_pg((vaddr_t)ptpva);
#ifdef MULTIPROCESSOR
/*
* No need for tlb shootdown here, since ptp_pte is per-CPU.
*/
#endif
}
/*
* pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
*
* => do NOT use this on kernel mappings [why? because pv_ptp may be NULL]
*/
pt_entry_t *
pmap_tmpmap_pvepte_pae(struct pv_entry *pve)
{
#ifdef DIAGNOSTIC
if (pve->pv_pmap == pmap_kernel())
panic("pmap_tmpmap_pvepte: attempt to map kernel");
#endif
/* is it current pmap? use direct mapping... */
if (pmap_is_curpmap(pve->pv_pmap))
return(vtopte(pve->pv_va));
return(((pt_entry_t *)pmap_tmpmap_pa_pae(VM_PAGE_TO_PHYS(pve->pv_ptp)))
+ ptei((unsigned)pve->pv_va));
}
/*
* pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
*/
void
pmap_tmpunmap_pvepte_pae(struct pv_entry *pve)
{
/* was it current pmap? if so, return */
if (pmap_is_curpmap(pve->pv_pmap))
return;
pmap_tmpunmap_pa_pae();
}
/*
* pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
*
* => we lock enough pmaps to keep things locked in
* => must be undone with pmap_unmap_ptes before returning
*/
pt_entry_t *
pmap_map_ptes_pae(struct pmap *pmap)
{
pd_entry_t opde;
/* the kernel's pmap is always accessible */
if (pmap == pmap_kernel()) {
return(PTE_BASE);
}
/* if curpmap then we are always mapped */
if (pmap_is_curpmap(pmap)) {
simple_lock(&pmap->pm_obj.vmobjlock);
return(PTE_BASE);
}
/* need to lock both curpmap and pmap: use ordered locking */
if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) {
simple_lock(&pmap->pm_obj.vmobjlock);
simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
} else {
simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
simple_lock(&pmap->pm_obj.vmobjlock);
}
/* need to load a new alternate pt space into curpmap? */
opde = *APDP_PDE;
if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdidx[0]) {
APDP_PDE[0] = pmap->pm_pdidx[0] | PG_RW | PG_V;
APDP_PDE[1] = pmap->pm_pdidx[1] | PG_RW | PG_V;
APDP_PDE[2] = pmap->pm_pdidx[2] | PG_RW | PG_V;
APDP_PDE[3] = pmap->pm_pdidx[3] | PG_RW | PG_V;
if (pmap_valid_entry(opde))
pmap_apte_flush(curpcb->pcb_pmap);
}
return(APTE_BASE);
}
/*
* pmap_unmap_ptes: unlock the PTE mapping of "pmap"
*/
void
pmap_unmap_ptes_pae(struct pmap *pmap)
{
if (pmap == pmap_kernel())
return;
if (pmap_is_curpmap(pmap)) {
simple_unlock(&pmap->pm_obj.vmobjlock);
} else {
#if defined(MULTIPROCESSOR)
APDP_PDE[0] = 0;
APDP_PDE[1] = 0;
APDP_PDE[2] = 0;
APDP_PDE[3] = 0;
pmap_apte_flush(curpcb->pcb_pmap);
#endif
simple_unlock(&pmap->pm_obj.vmobjlock);
simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
}
}
u_int32_t
pmap_pte_set_pae(vaddr_t va, paddr_t pa, u_int32_t bits)
{
pt_entry_t pte, *ptep = vtopte(va);
pte = i386_atomic_testset_uq(ptep, pa | bits);
return (pte & ~PG_FRAME);
}
u_int32_t
pmap_pte_setbits_pae(vaddr_t va, u_int32_t set, u_int32_t clr)
{
pt_entry_t *ptep = vtopte(va);
pt_entry_t pte = *ptep;
i386_atomic_testset_uq(ptep, (pte | set) & ~(pt_entry_t)clr);
return (pte & ~PG_FRAME);
}
u_int32_t
pmap_pte_bits_pae(vaddr_t va)
{
pt_entry_t *ptep = vtopte(va);
return (*ptep & ~PG_FRAME);
}
paddr_t
pmap_pte_paddr_pae(vaddr_t va)
{
pt_entry_t *ptep = vtopte(va);
return (*ptep & PG_FRAME);
}
/*
* Switch over to PAE page tables
*/
void
pmap_bootstrap_pae()
{
extern paddr_t avail_end, avail_end2;
extern int cpu_pae, nkpde;
struct pmap *kpm = pmap_kernel();
struct vm_page *ptp;
paddr_t ptaddr;
u_int32_t bits;
vaddr_t va, eva;
int i, pn, pe;
if (!cpu_pae || avail_end >= avail_end2 || !(cpu_feature & CPUID_PAE)){
avail_end2 = avail_end;
return;
}
va = (vaddr_t)kpm->pm_pdir;
kpm->pm_pdidx[0] = (va + 0*NBPG - KERNBASE) | PG_V;
kpm->pm_pdidx[1] = (va + 1*NBPG - KERNBASE) | PG_V;
kpm->pm_pdidx[2] = (va + 2*NBPG - KERNBASE) | PG_V;
kpm->pm_pdidx[3] = (va + 3*NBPG - KERNBASE) | PG_V;
/* map pde recursively into itself */
PDE(kpm, PDSLOT_PTE+0) = kpm->pm_pdidx[0] | PG_KW;
PDE(kpm, PDSLOT_PTE+1) = kpm->pm_pdidx[1] | PG_KW;
PDE(kpm, PDSLOT_PTE+2) = kpm->pm_pdidx[2] | PG_KW;
PDE(kpm, PDSLOT_PTE+3) = kpm->pm_pdidx[3] | PG_KW;
/* transfer all kernel mappings over into pae tables */
for (va = KERNBASE, eva = va + (nkpde << 22);
va < eva; va += PAGE_SIZE) {
if (!pmap_valid_entry(PDE(kpm, pdei(va)))) {
ptp = pae_pagealloc(&kpm->pm_obj, va, NULL,
UVM_PGA_ZERO);
ptaddr = VM_PAGE_TO_PHYS(ptp);
PDE(kpm, pdei(va)) = ptaddr | PG_KW | PG_V;
pmap_pte_set_86((vaddr_t)vtopte(va),
ptaddr, PG_KW | PG_V);
/* count PTP as resident */
kpm->pm_stats.resident_count++;
}
bits = pmap_pte_bits_86(va) | pmap_pg_g;
if (pmap_valid_entry(bits))
pmap_pte_set_pae(va, pmap_pte_paddr_86(va), bits);
}
if (!cpu_paenable(&kpm->pm_pdidx[0])) {
extern struct user *proc0paddr;
proc0paddr->u_pcb.pcb_cr3 = kpm->pm_pdirpa =
(vaddr_t)kpm - KERNBASE;
kpm->pm_pdirsize = 4 * NBPG;
csrc_pte = vtopte(pmap_csrcp);
cdst_pte = vtopte(pmap_cdstp);
zero_pte = vtopte(pmap_zerop);
ptp_pte = vtopte(pmap_ptpp);
nkpde *= 2;
nkptp_max = 2048 - PDSLOT_KERN - 4;
vm_max_address = (PDSLOT_PTE << PDSHIFT) +
(PDSLOT_PTE << PGSHIFT);
pmap_pte_set_p = pmap_pte_set_pae;
pmap_pte_setbits_p = pmap_pte_setbits_pae;
pmap_pte_bits_p = pmap_pte_bits_pae;
pmap_pte_paddr_p = pmap_pte_paddr_pae;
pmap_change_attrs_p = pmap_change_attrs_pae;
pmap_enter_p = pmap_enter_pae;
pmap_extract_p = pmap_extract_pae;
pmap_growkernel_p = pmap_growkernel_pae;
pmap_page_remove_p = pmap_page_remove_pae;
pmap_remove_p = pmap_remove_pae;
pmap_test_attrs_p = pmap_test_attrs_pae;
pmap_unwire_p = pmap_unwire_pae;
pmap_write_protect_p = pmap_write_protect_pae;
pmap_pinit_pd_p = pmap_pinit_pd_pae;
pmap_zero_phys_p = pmap_zero_phys_pae;
pmap_zero_page_uncached_p = pmap_zero_page_uncached_pae;
pmap_copy_page_p = pmap_copy_page_pae;
pmap_try_steal_pv_p = pmap_try_steal_pv_pae;
bzero((void *)kpm->pm_pdir + 8, (PDSLOT_PTE-1) * 8);
/* TODO also reclaim old PDPs */
for (i = 0; i < vm_nphysseg; i++)
if (vm_physmem[i].start > atop(0xfffff000)) {
vm_physmem[i].avail_end = vm_physmem[i].end;
/* free vm_pages (uvm had already zeroed 'em) */
for (pn = 0, pe = vm_physmem[i].end -
vm_physmem[i].start; pn < pe ; pn++) {
uvmexp.npages++;
/* add page to free pool */
uvm_pagefree(&vm_physmem[i].pgs[pn]);
}
}
uvm_page_rehash();
}
}
/*
* p v _ e n t r y f u n c t i o n s
*/
/*
* pv_entry allocation functions:
* the main pv_entry allocation functions are:
* pmap_alloc_pv: allocate a pv_entry structure
* pmap_free_pv: free one pv_entry
* pmap_free_pvs: free a list of pv_entrys
*
* the rest are helper functions
*/
/*
* pmap_try_steal_pv: try and steal a pv_entry from a pmap
*
* => return true if we did it!
*/
boolean_t
pmap_try_steal_pv_pae(struct pv_head *pvh, struct pv_entry *cpv,
struct pv_entry *prevpv)
{
pt_entry_t *ptep, opte;
#ifdef MULTIPROCESSOR
int32_t cpumask = 0;
#endif
/*
* we never steal kernel mappings or mappings from pmaps we can't lock
*/
if (cpv->pv_pmap == pmap_kernel() ||
!simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock))
return(FALSE);
/*
* yes, we can try and steal it. first we need to remove the
* mapping from the pmap.
*/
ptep = pmap_tmpmap_pvepte_pae(cpv);
if (*ptep & PG_W) {
ptep = NULL; /* wired page, avoid stealing this one */
} else {
opte = i386_atomic_testset_uq(ptep, 0); /* zap! */
#ifdef MULTIPROCESSOR
pmap_tlb_shootdown(cpv->pv_pmap, cpv->pv_va, opte, &cpumask);
pmap_tlb_shootnow(cpumask);
#else
/* Don't bother deferring in the single CPU case. */
if (pmap_is_curpmap(cpv->pv_pmap))
pmap_update_pg(cpv->pv_va);
#endif
pmap_tmpunmap_pvepte_pae(cpv);
}
if (ptep == NULL) {
simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock);
return(FALSE); /* wired page, abort! */
}
cpv->pv_pmap->pm_stats.resident_count--;
if (cpv->pv_ptp && cpv->pv_ptp->wire_count)
/* drop PTP's wired count */
cpv->pv_ptp->wire_count--;
/*
* XXX: if wire_count goes to one the PTP could be freed, however,
* we'd have to lock the page queues (etc.) to do that and it could
* cause deadlock headaches. besides, the pmap we just stole from
* may want the mapping back anyway, so leave the PTP around.
*/
/*
* now we need to remove the entry from the pvlist
*/
if (cpv == pvh->pvh_list)
pvh->pvh_list = cpv->pv_next;
else
prevpv->pv_next = cpv->pv_next;
return(TRUE);
}
/*
* p t p f u n c t i o n s
*/
/*
* pmap_alloc_ptp: allocate a PTP for a PMAP
*
* => pmap should already be locked by caller
* => we use the ptp's wire_count to count the number of active mappings
* in the PTP (we start it at one to prevent any chance this PTP
* will ever leak onto the active/inactive queues)
* => we should not be holding any pv_head locks (in case we are forced
* to call pmap_steal_ptp())
* => we may need to lock pv_head's if we have to steal a PTP
* => just_try: true if we want a PTP, but not enough to steal one
* from another pmap (e.g. during optional functions like pmap_copy)
*/
struct vm_page *
pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try)
{
struct vm_page *ptp;
ptp = pae_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
UVM_PGA_USERESERVE|UVM_PGA_ZERO);
if (ptp == NULL)
return(NULL);
/* got one! */
atomic_clearbits_int(&ptp->pg_flags, PG_BUSY);
ptp->wire_count = 1; /* no mappings yet */
PDE(pmap, pde_index) =
(pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V);
pmap->pm_stats.resident_count++; /* count PTP as resident */
pmap->pm_ptphint = ptp;
return(ptp);
}
/*
* pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
*
* => pmap should NOT be pmap_kernel()
* => pmap should be locked
*/
struct vm_page *
pmap_get_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try)
{
struct vm_page *ptp;
if (pmap_valid_entry(PDE(pmap, pde_index))) {
/* valid... check hint (saves us a PA->PG lookup) */
if (pmap->pm_ptphint &&
(PDE(pmap, pde_index) & PG_FRAME) ==
VM_PAGE_TO_PHYS(pmap->pm_ptphint))
return(pmap->pm_ptphint);
ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
#ifdef DIAGNOSTIC
if (ptp == NULL)
panic("pmap_get_ptp: unmanaged user PTP");
#endif
pmap->pm_ptphint = ptp;
return(ptp);
}
/* allocate a new PTP (updates ptphint) */
return (pmap_alloc_ptp_pae(pmap, pde_index, just_try));
}
/*
* pmap_pinit_pd: given a freshly allocated pmap structure, give it a PD
*/
void
pmap_pinit_pd_pae(struct pmap *pmap)
{
extern int nkpde;
vaddr_t va;
/* allocate PDP */
pmap->pm_pdir = uvm_km_alloc(kernel_map, 4 * NBPG);
if (pmap->pm_pdir == NULL)
panic("pmap_pinit: kernel_map out of virtual space!");
/* page index is in the pmap! */
pmap_extract(pmap_kernel(), (vaddr_t)pmap, &pmap->pm_pdirpa);
/* fill out the PDPT entries */
va = (vaddr_t)pmap->pm_pdir;
pmap_extract(pmap_kernel(), va + 0*NBPG, &pmap->pm_pdidx[0]);
pmap_extract(pmap_kernel(), va + 1*NBPG, &pmap->pm_pdidx[1]);
pmap_extract(pmap_kernel(), va + 2*NBPG, &pmap->pm_pdidx[2]);
pmap_extract(pmap_kernel(), va + 3*NBPG, &pmap->pm_pdidx[3]);
pmap->pm_pdidx[0] |= PG_V;
pmap->pm_pdidx[1] |= PG_V;
pmap->pm_pdidx[2] |= PG_V;
pmap->pm_pdidx[3] |= PG_V;
pmap->pm_pdirsize = 4 * NBPG;
/* init PDP */
/* zero init area */
bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
/* put in recursive PDE to map the PTEs */
PDE(pmap, PDSLOT_PTE+0) = pmap->pm_pdidx[0] | PG_KW;
PDE(pmap, PDSLOT_PTE+1) = pmap->pm_pdidx[1] | PG_KW;
PDE(pmap, PDSLOT_PTE+2) = pmap->pm_pdidx[2] | PG_KW;
PDE(pmap, PDSLOT_PTE+3) = pmap->pm_pdidx[3] | PG_KW;
/*
* we need to lock pmaps_lock to prevent nkpde from changing on
* us. note that there is no need to splvm to protect us from
* malloc since malloc allocates out of a submap and we should have
* already allocated kernel PTPs to cover the range...
*/
simple_lock(&pmaps_lock);
/* put in kernel VM PDEs */
bcopy(&PDP_BASE[PDSLOT_KERN], &PDE(pmap, PDSLOT_KERN),
nkpde * sizeof(pd_entry_t));
/* zero the rest */
bzero(&PDE(pmap, PDSLOT_KERN + nkpde), pmap->pm_pdirsize -
((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
simple_unlock(&pmaps_lock);
}
/*
* some misc. functions
*/
/*
* pmap_extract: extract a PA for the given VA
*/
boolean_t
pmap_extract_pae(struct pmap *pmap, vaddr_t va, paddr_t *pap)
{
paddr_t retval;
pt_entry_t *ptes;
if (PDE(pmap, pdei(va))) {
ptes = pmap_map_ptes_pae(pmap);
retval = (paddr_t)(ptes[atop(va)] & PG_FRAME);
pmap_unmap_ptes_pae(pmap);
if (pap != NULL)
*pap = retval | (va & ~PG_FRAME);
return (TRUE);
}
return (FALSE);
}
extern void (*pagezero)(void *, size_t);
/*
* pmap_zero_phys: same as pmap_zero_page, but for use before vm_pages are
* initialized.
*/
void
pmap_zero_phys_pae(paddr_t pa)
{
#ifdef MULTIPROCESSOR
int id = cpu_number();
#endif
pt_entry_t *zpte = PTESLEW(zero_pte, id);
caddr_t zerova = VASLEW(pmap_zerop, id);
#ifdef DIAGNOSTIC
if (*zpte)
panic("pmap_zero_phys: lock botch");
#endif
*zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */
pmap_update_pg((vaddr_t)zerova); /* flush TLB */
pagezero(zerova, PAGE_SIZE); /* zero */
*zpte = 0; /* zap! */
}
/*
* pmap_zero_page_uncached: the same, except uncached.
*/
boolean_t
pmap_zero_page_uncached_pae(paddr_t pa)
{
#ifdef MULTIPROCESSOR
int id = cpu_number();
#endif
pt_entry_t *zpte = PTESLEW(zero_pte, id);
caddr_t zerova = VASLEW(pmap_zerop, id);
#ifdef DIAGNOSTIC
if (*zpte)
panic("pmap_zero_page_uncached: lock botch");
#endif
*zpte = (pa & PG_FRAME) | PG_V | PG_RW | PG_N); /* map in */
pmap_update_pg((vaddr_t)zerova); /* flush TLB */
pagezero(zerova, PAGE_SIZE); /* zero */
*zpte = 0; /* zap! */
return (TRUE);
}
/*
* pmap_copy_page: copy a page
*/
void
pmap_copy_page_pae(struct vm_page *srcpg, struct vm_page *dstpg)
{
paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
#ifdef MULTIPROCESSOR
int id = cpu_number();
#endif
pt_entry_t *spte = PTESLEW(csrc_pte,id);
pt_entry_t *dpte = PTESLEW(cdst_pte,id);
caddr_t csrcva = VASLEW(pmap_csrcp, id);
caddr_t cdstva = VASLEW(pmap_cdstp, id);
#ifdef DIAGNOSTIC
if (*spte || *dpte)
panic("pmap_copy_page: lock botch");
#endif
*spte = (srcpa & PG_FRAME) | PG_V | PG_RW;
*dpte = (dstpa & PG_FRAME) | PG_V | PG_RW;
pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
bcopy(csrcva, cdstva, PAGE_SIZE);
*spte = *dpte = 0; /* zap! */
pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva);
#ifdef MULTIPROCESSOR
/* Using per-cpu VA; no shootdown required here. */
#endif
}
/*
* p m a p r e m o v e f u n c t i o n s
*
* functions that remove mappings
*/
/*
* pmap_remove_ptes: remove PTEs from a PTP
*
* => must have proper locking on pmap_master_lock
* => caller must hold pmap's lock
* => PTP must be mapped into KVA
* => PTP should be null if pmap == pmap_kernel()
*/
void
pmap_remove_ptes_pae(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
vaddr_t startva, vaddr_t endva, int32_t *cpumaskp)
{
struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */
struct pv_entry *pve;
pt_entry_t *pte = (pt_entry_t *) ptpva;
pt_entry_t opte;
int bank, off;
/*
* note that ptpva points to the PTE that maps startva. this may
* or may not be the first PTE in the PTP.
*
* we loop through the PTP while there are still PTEs to look at
* and the wire_count is greater than 1 (because we use the wire_count
* to keep track of the number of real PTEs in the PTP).
*/
for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
; pte++, startva += NBPG) {
if (!pmap_valid_entry(*pte))
continue; /* VA not mapped */
opte = i386_atomic_testset_uq(pte, 0); /* zap! */
if (opte & PG_W)
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
if (opte & PG_U)
pmap_tlb_shootdown(pmap, startva, opte, cpumaskp);
if (ptp) {
ptp->wire_count--; /* dropping a PTE */
/* Make sure that the PDE is flushed */
if ((ptp->wire_count <= 1) && !(opte & PG_U))
pmap_tlb_shootdown(pmap, startva, opte,
cpumaskp);
}
/*
* if we are not on a pv_head list we are done.
*/
if ((opte & PG_PVLIST) == 0) {
#ifdef DIAGNOSTIC
if (vm_physseg_find(atop(opte & PG_FRAME), &off)
!= -1)
panic("pmap_remove_ptes: managed page without "
"PG_PVLIST for 0x%lx", startva);
#endif
continue;
}
bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
#ifdef DIAGNOSTIC
if (bank == -1)
panic("pmap_remove_ptes: unmanaged page marked "
"PG_PVLIST, va = 0x%lx, pa = 0x%lx",
startva, (u_long)(opte & PG_FRAME));
#endif
/* sync R/M bits */
simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap,
startva);
simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
if (pve) {
pve->pv_next = pv_tofree;
pv_tofree = pve;
}
/* end of "for" loop: time for next pte */
}
if (pv_tofree)
pmap_free_pvs(pmap, pv_tofree);
}
/*
* pmap_remove_pte: remove a single PTE from a PTP
*
* => must have proper locking on pmap_master_lock
* => caller must hold pmap's lock
* => PTP must be mapped into KVA
* => PTP should be null if pmap == pmap_kernel()
* => returns true if we removed a mapping
*/
boolean_t
pmap_remove_pte_pae(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
vaddr_t va, int32_t *cpumaskp)
{
pt_entry_t opte;
int bank, off;
struct pv_entry *pve;
if (!pmap_valid_entry(*pte))
return(FALSE); /* VA not mapped */
opte = *pte; /* save the old PTE */
*pte = 0; /* zap! */
pmap_exec_account(pmap, va, opte, 0);
if (opte & PG_W)
pmap->pm_stats.wired_count--;
pmap->pm_stats.resident_count--;
if (opte & PG_U)
pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
if (ptp) {
ptp->wire_count--; /* dropping a PTE */
/* Make sure that the PDE is flushed */
if ((ptp->wire_count <= 1) && !(opte & PG_U))
pmap_tlb_shootdown(pmap, va, opte, cpumaskp);
}
/*
* if we are not on a pv_head list we are done.
*/
if ((opte & PG_PVLIST) == 0) {
#ifdef DIAGNOSTIC
if (vm_physseg_find(atop(opte & PG_FRAME), &off) != -1)
panic("pmap_remove_pte: managed page without "
"PG_PVLIST for 0x%lx", va);
#endif
return(TRUE);
}
bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
#ifdef DIAGNOSTIC
if (bank == -1)
panic("pmap_remove_pte: unmanaged page marked "
"PG_PVLIST, va = 0x%lx, pa = 0x%lx", va,
(u_long)(opte & PG_FRAME));
#endif
/* sync R/M bits */
simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va);
simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
if (pve)
pmap_free_pv(pmap, pve);
return(TRUE);
}
/*
* pmap_remove: top level mapping removal function
*
* => caller should not be holding any pmap locks
*/
void
pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
pt_entry_t *ptes, opte;
boolean_t result;
paddr_t ptppa;
vaddr_t blkendva;
struct vm_page *ptp;
int32_t cpumask = 0;
TAILQ_HEAD(, vm_page) empty_ptps;
/*
* we lock in the pmap => pv_head direction
*/
TAILQ_INIT(&empty_ptps);
PMAP_MAP_TO_HEAD_LOCK();
ptes = pmap_map_ptes_pae(pmap); /* locks pmap */
/*
* removing one page? take shortcut function.
*/
if (sva + PAGE_SIZE == eva) {
if (pmap_valid_entry(PDE(pmap, pdei(sva)))) {
/* PA of the PTP */
ptppa = PDE(pmap, pdei(sva)) & PG_FRAME;
/* get PTP if non-kernel mapping */
if (pmap == pmap_kernel()) {
/* we never free kernel PTPs */
ptp = NULL;
} else {
if (pmap->pm_ptphint &&
VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
ptppa) {
ptp = pmap->pm_ptphint;
} else {
ptp = PHYS_TO_VM_PAGE(ptppa);
#ifdef DIAGNOSTIC
if (ptp == NULL)
panic("pmap_remove: unmanaged "
"PTP detected");
#endif
}
}
/* do it! */
result = pmap_remove_pte_pae(pmap, ptp,
&ptes[atop(sva)], sva, &cpumask);
/*
* if mapping removed and the PTP is no longer
* being used, free it!
*/
if (result && ptp && ptp->wire_count <= 1) {
opte = i386_atomic_testset_uq(&PDE(pmap,
pdei(sva)), 0); /* zap! */
#ifdef MULTIPROCESSOR
/*
* XXXthorpej Redundant shootdown can happen
* here if we're using APTE space.
*/
#endif
pmap_tlb_shootdown(curpcb->pcb_pmap,
((vaddr_t)ptes) + ptp->offset, opte,
&cpumask);
#ifdef MULTIPROCESSOR
/*
* Always shoot down the pmap's self-mapping
* of the PTP.
* XXXthorpej Redundant shootdown can happen
* here if pmap == curpcb->pcb_pmap (not APTE
* space).
*/
pmap_tlb_shootdown(pmap,
((vaddr_t)PTE_BASE) + ptp->offset, opte,
&cpumask);
#endif
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp)
pmap->pm_ptphint =
TAILQ_FIRST(&pmap->pm_obj.memq);
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
}
}
pmap_tlb_shootnow(cpumask);
pmap_unmap_ptes_pae(pmap); /* unlock pmap */
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
return;
}
for (/* null */ ; sva < eva ; sva = blkendva) {
/* determine range of block */
blkendva = i386_round_pdr(sva+1);
if (blkendva > eva)
blkendva = eva;
/*
* XXXCDC: our PTE mappings should never be removed
* with pmap_remove! if we allow this (and why would
* we?) then we end up freeing the pmap's page
* directory page (PDP) before we are finished using
* it when we hit in in the recursive mapping. this
* is BAD.
*
* long term solution is to move the PTEs out of user
* address space. and into kernel address space (up
* with APTE). then we can set VM_MAXUSER_ADDRESS to
* be VM_MAX_ADDRESS.
*/
if (pdei(sva) == PDSLOT_PTE)
/* XXXCDC: ugly hack to avoid freeing PDP here */
continue;
if (!pmap_valid_entry(PDE(pmap, pdei(sva))))
/* valid block? */
continue;
/* PA of the PTP */
ptppa = PDE(pmap, pdei(sva)) & PG_FRAME;
/* get PTP if non-kernel mapping */
if (pmap == pmap_kernel()) {
/* we never free kernel PTPs */
ptp = NULL;
} else {
if (pmap->pm_ptphint &&
VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
ptp = pmap->pm_ptphint;
} else {
ptp = PHYS_TO_VM_PAGE(ptppa);
#ifdef DIAGNOSTIC
if (ptp == NULL)
panic("pmap_remove: unmanaged PTP "
"detected");
#endif
}
}
pmap_remove_ptes_pae(pmap, ptp, (vaddr_t)&ptes[atop(sva)],
sva, blkendva, &cpumask);
/* if PTP is no longer being used, free it! */
if (ptp && ptp->wire_count <= 1) {
opte = i386_atomic_testset_uq(&PDE(pmap, pdei(sva)),0);
#if defined(MULTIPROCESSOR)
/*
* XXXthorpej Redundant shootdown can happen here
* if we're using APTE space.
*/
#endif
pmap_tlb_shootdown(curpcb->pcb_pmap,
((vaddr_t)ptes) + ptp->offset, opte, &cpumask);
#if defined(MULTIPROCESSOR)
/*
* Always shoot down the pmap's self-mapping
* of the PTP.
* XXXthorpej Redundant shootdown can happen here
* if pmap == curpcb->pcb_pmap (not APTE space).
*/
pmap_tlb_shootdown(pmap,
((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask);
#endif
pmap->pm_stats.resident_count--;
if (pmap->pm_ptphint == ptp) /* update hint? */
pmap->pm_ptphint =
TAILQ_FIRST(&pmap->pm_obj.memq);
ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(ptp, NULL, 0);
TAILQ_INSERT_TAIL(&empty_ptps, ptp, listq);
}
}
pmap_tlb_shootnow(cpumask);
pmap_unmap_ptes_pae(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
}
/*
* pmap_page_remove: remove a managed vm_page from all pmaps that map it
*
* => we set pv_head => pmap locking
* => R/M bits are sync'd back to attrs
*/
void
pmap_page_remove_pae(struct vm_page *pg)
{
int bank, off;
struct pv_head *pvh;
struct pv_entry *pve;
pt_entry_t *ptes, opte;
int32_t cpumask = 0;
TAILQ_HEAD(, vm_page) empty_ptps;
struct vm_page *ptp;
/* XXX: vm_page should either contain pv_head or have a pointer to it */
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
if (bank == -1) {
printf("pmap_page_remove: unmanaged page?\n");
return;
}
pvh = &vm_physmem[bank].pmseg.pvhead[off];
if (pvh->pvh_list == NULL) {
return;
}
TAILQ_INIT(&empty_ptps);
/* set pv_head => pmap locking */
PMAP_HEAD_TO_MAP_LOCK();
/* XXX: needed if we hold head->map lock? */
simple_lock(&pvh->pvh_lock);
for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) {
ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */
#ifdef DIAGNOSTIC
if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva)
printf("pmap_page_remove: found pager VA on pv_list\n");
if (pve->pv_ptp && (PDE(pve->pv_pmap,
pdei(pve->pv_va)) & PG_FRAME) !=
VM_PAGE_TO_PHYS(pve->pv_ptp)) {
printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
pg, pve->pv_va, pve->pv_ptp);
printf("pmap_page_remove: PTP's phys addr: "
"actual=%llx, recorded=%llx\n",
(PDE(pve->pv_pmap, pdei(pve->pv_va)) &
PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
panic("pmap_page_remove: mapped managed page has "
"invalid pv_ptp field");
}
#endif
opte = ptes[atop(pve->pv_va)];
ptes[atop(pve->pv_va)] = 0; /* zap! */
if (opte & PG_W)
pve->pv_pmap->pm_stats.wired_count--;
pve->pv_pmap->pm_stats.resident_count--;
/* Shootdown only if referenced */
if (opte & PG_U)
pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte,
&cpumask);
/* sync R/M bits */
vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
/* update the PTP reference count. free if last reference. */
if (pve->pv_ptp) {
pve->pv_ptp->wire_count--;
if (pve->pv_ptp->wire_count <= 1) {
/*
* Do we have to shootdown the page just to
* get the pte out of the TLB ?
*/
if(!(opte & PG_U))
pmap_tlb_shootdown(pve->pv_pmap,
pve->pv_va, opte, &cpumask);
opte = i386_atomic_testset_uq(&PDE(pve->pv_pmap,
pdei(pve->pv_va)), 0);
pmap_tlb_shootdown(curpcb->pcb_pmap,
((vaddr_t)ptes) + pve->pv_ptp->offset,
opte, &cpumask);
#if defined(MULTIPROCESSOR)
/*
* Always shoot down the other pmap's
* self-mapping of the PTP.
*/
pmap_tlb_shootdown(pve->pv_pmap,
((vaddr_t)PTE_BASE) + pve->pv_ptp->offset,
opte, &cpumask);
#endif
pve->pv_pmap->pm_stats.resident_count--;
/* update hint? */
if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
pve->pv_pmap->pm_ptphint =
TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq);
pve->pv_ptp->wire_count = 0;
/* Postpone free to after shootdown. */
uvm_pagerealloc(pve->pv_ptp, NULL, 0);
TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp,
listq);
}
}
pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */
}
pmap_free_pvs(NULL, pvh->pvh_list);
pvh->pvh_list = NULL;
simple_unlock(&pvh->pvh_lock);
PMAP_HEAD_TO_MAP_UNLOCK();
pmap_tlb_shootnow(cpumask);
while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) {
TAILQ_REMOVE(&empty_ptps, ptp, listq);
uvm_pagefree(ptp);
}
}
/*
* p m a p a t t r i b u t e f u n c t i o n s
* functions that test/change managed page's attributes
* since a page can be mapped multiple times we must check each PTE that
* maps it by going down the pv lists.
*/
/*
* pmap_test_attrs: test a page's attributes
*
* => we set pv_head => pmap locking
*/
boolean_t
pmap_test_attrs_pae(struct vm_page *pg, int testbits)
{
int bank, off;
char *myattrs;
struct pv_head *pvh;
struct pv_entry *pve;
pt_entry_t *ptes, pte;
/* XXX: vm_page should either contain pv_head or have a pointer to it */
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
if (bank == -1) {
printf("pmap_test_attrs: unmanaged page?\n");
return(FALSE);
}
/*
* before locking: see if attributes are already set and if so,
* return!
*/
myattrs = &vm_physmem[bank].pmseg.attrs[off];
if (*myattrs & testbits)
return(TRUE);
/* test to see if there is a list before bothering to lock */
pvh = &vm_physmem[bank].pmseg.pvhead[off];
if (pvh->pvh_list == NULL) {
return(FALSE);
}
/* nope, gonna have to do it the hard way */
PMAP_HEAD_TO_MAP_LOCK();
/* XXX: needed if we hold head->map lock? */
simple_lock(&pvh->pvh_lock);
for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0;
pve = pve->pv_next) {
ptes = pmap_map_ptes_pae(pve->pv_pmap);
pte = ptes[atop(pve->pv_va)];
pmap_unmap_ptes_pae(pve->pv_pmap);
*myattrs |= pte;
}
/*
* note that we will exit the for loop with a non-null pve if
* we have found the bits we are testing for.
*/
simple_unlock(&pvh->pvh_lock);
PMAP_HEAD_TO_MAP_UNLOCK();
return((*myattrs & testbits) != 0);
}
/*
* pmap_change_attrs: change a page's attributes
*
* => we set pv_head => pmap locking
* => we return TRUE if we cleared one of the bits we were asked to
*/
boolean_t
pmap_change_attrs_pae(struct vm_page *pg, int setbits, int clearbits)
{
u_int32_t result;
int bank, off;
struct pv_head *pvh;
struct pv_entry *pve;
pt_entry_t *ptes, npte, opte;
char *myattrs;
int32_t cpumask = 0;
/* XXX: vm_page should either contain pv_head or have a pointer to it */
bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
if (bank == -1) {
printf("pmap_change_attrs: unmanaged page?\n");
return(FALSE);
}
PMAP_HEAD_TO_MAP_LOCK();
pvh = &vm_physmem[bank].pmseg.pvhead[off];
/* XXX: needed if we hold head->map lock? */
simple_lock(&pvh->pvh_lock);
myattrs = &vm_physmem[bank].pmseg.attrs[off];
result = *myattrs & clearbits;
*myattrs = (*myattrs | setbits) & ~clearbits;
for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) {
#ifdef DIAGNOSTIC
if (!pmap_valid_entry(PDE(pve->pv_pmap, pdei(pve->pv_va))))
panic("pmap_change_attrs: mapping without PTP "
"detected");
#endif
ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */
npte = ptes[atop(pve->pv_va)];
result |= (npte & clearbits);
npte = (npte | setbits) & ~(pt_entry_t)clearbits;
if (ptes[atop(pve->pv_va)] != npte) {
opte = i386_atomic_testset_uq(&ptes[atop(pve->pv_va)],
npte);
pmap_tlb_shootdown(pve->pv_pmap,
atop(pve->pv_va), opte, &cpumask);
}
pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */
}
simple_unlock(&pvh->pvh_lock);
PMAP_HEAD_TO_MAP_UNLOCK();
pmap_tlb_shootnow(cpumask);
return(result != 0);
}
/*
* p m a p p r o t e c t i o n f u n c t i o n s
*/
/*
* pmap_page_protect: change the protection of all recorded mappings
* of a managed page
*
* => NOTE: this is an inline function in pmap.h
*/
/* see pmap.h */
/*
* pmap_protect: set the protection in of the pages in a pmap
*
* => NOTE: this is an inline function in pmap.h
*/
/* see pmap.h */
/*
* pmap_write_protect: write-protect pages in a pmap
*/
void
pmap_write_protect_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva,
vm_prot_t prot)
{
pt_entry_t *ptes, *spte, *epte, opte, npte;
vaddr_t blockend;
u_int32_t md_prot;
int32_t cpumask = 0;
ptes = pmap_map_ptes_pae(pmap); /* locks pmap */
/* should be ok, but just in case ... */
sva &= PG_FRAME;
eva &= PG_FRAME;
for (/* null */ ; sva < eva ; sva = blockend) {
blockend = (sva & PD_MASK) + NBPD;
if (blockend > eva)
blockend = eva;
/*
* XXXCDC: our PTE mappings should never be write-protected!
*
* long term solution is to move the PTEs out of user
* address space. and into kernel address space (up
* with APTE). then we can set VM_MAXUSER_ADDRESS to
* be VM_MAX_ADDRESS.
*/
/* XXXCDC: ugly hack to avoid freeing PDP here */
if (pdei(sva) == PDSLOT_PTE)
continue;
/* empty block? */
if (!pmap_valid_entry(PDE(pmap, pdei(sva))))
continue;
md_prot = protection_codes[prot];
if (sva < VM_MAXUSER_ADDRESS)
md_prot |= PG_u;
else if (sva < VM_MAX_ADDRESS)
/* XXX: write-prot our PTES? never! */
md_prot |= (PG_u | PG_RW);
spte = &ptes[atop(sva)];
epte = &ptes[atop(blockend)];
for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) {
if (!pmap_valid_entry(*spte)) /* no mapping? */
continue;
npte = (*spte & ~(pt_entry_t)PG_PROT) | md_prot;
if (npte != *spte) {
pmap_exec_account(pmap, sva, *spte, npte);
opte = *spte;
*spte = npte;
pmap_tlb_shootdown(pmap, sva, opte, &cpumask);
}
}
}
pmap_tlb_shootnow(cpumask);
pmap_unmap_ptes_pae(pmap); /* unlocks pmap */
}
/*
* end of protection functions
*/
/*
* pmap_unwire: clear the wired bit in the PTE
*
* => mapping should already be in map
*/
void
pmap_unwire_pae(struct pmap *pmap, vaddr_t va)
{
pt_entry_t *ptes;
if (pmap_valid_entry(PDE(pmap, pdei(va)))) {
ptes = pmap_map_ptes_pae(pmap); /* locks pmap */
#ifdef DIAGNOSTIC
if (!pmap_valid_entry(ptes[atop(va)]))
panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
#endif
if ((ptes[atop(va)] & PG_W) != 0) {
ptes[atop(va)] &= ~PG_W;
pmap->pm_stats.wired_count--;
}
#ifdef DIAGNOSTIC
else {
printf("pmap_unwire: wiring for pmap %p va 0x%lx "
"didn't change!\n", pmap, va);
}
#endif
pmap_unmap_ptes_pae(pmap); /* unlocks map */
}
#ifdef DIAGNOSTIC
else {
panic("pmap_unwire: invalid PDE");
}
#endif
}
/*
* pmap_copy: copy mappings from one pmap to another
*
* => optional function
* void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
*/
/*
* defined as macro in pmap.h
*/
/*
* pmap_enter: enter a mapping into a pmap
*
* => must be done "now" ... no lazy-evaluation
* => we set pmap => pv_head locking
*/
int
pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
int flags)
{
pt_entry_t *ptes, opte, npte;
struct vm_page *ptp;
struct pv_head *pvh;
struct pv_entry *pve;
int bank, off, error;
boolean_t wired = (flags & PMAP_WIRED) != 0;
#ifdef DIAGNOSTIC
/* sanity check: totally out of range? */
if (va >= VM_MAX_KERNEL_ADDRESS)
panic("pmap_enter: too big");
if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
panic("pmap_enter: trying to map over PDP/APDP!");
/* sanity check: kernel PTPs should already have been pre-allocated */
if (va >= VM_MIN_KERNEL_ADDRESS &&
!pmap_valid_entry(PDE(pmap, pdei(va))))
panic("pmap_enter: missing kernel PTP!");
#endif
/* get lock */
PMAP_MAP_TO_HEAD_LOCK();
/*
* map in ptes and get a pointer to our PTP (unless we are the kernel)
*/
ptes = pmap_map_ptes_pae(pmap); /* locks pmap */
if (pmap == pmap_kernel()) {
ptp = NULL;
} else {
ptp = pmap_get_ptp_pae(pmap, pdei(va), FALSE);
if (ptp == NULL) {
if (flags & PMAP_CANFAIL) {
error = ENOMEM;
goto out;
}
panic("pmap_enter: get ptp failed");
}
}
opte = ptes[atop(va)]; /* old PTE */
/*
* is there currently a valid mapping at our VA?
*/
if (pmap_valid_entry(opte)) {
/*
* first, update pm_stats. resident count will not
* change since we are replacing/changing a valid
* mapping. wired count might change...
*/
if (wired && (opte & PG_W) == 0)
pmap->pm_stats.wired_count++;
else if (!wired && (opte & PG_W) != 0)
pmap->pm_stats.wired_count--;
/*
* is the currently mapped PA the same as the one we
* want to map?
*/
if ((opte & PG_FRAME) == pa) {
/* if this is on the PVLIST, sync R/M bit */
if (opte & PG_PVLIST) {
bank = vm_physseg_find(atop(pa), &off);
#ifdef DIAGNOSTIC
if (bank == -1)
panic("pmap_enter: same pa PG_PVLIST "
"mapping with unmanaged page "
"pa = 0x%lx (0x%lx)", pa,
atop(pa));
#endif
pvh = &vm_physmem[bank].pmseg.pvhead[off];
simple_lock(&pvh->pvh_lock);
vm_physmem[bank].pmseg.attrs[off] |= opte;
simple_unlock(&pvh->pvh_lock);
} else {
pvh = NULL; /* ensure !PG_PVLIST */
}
goto enter_now;
}
/*
* changing PAs: we must remove the old one first
*/
/*
* if current mapping is on a pvlist,
* remove it (sync R/M bits)
*/
if (opte & PG_PVLIST) {
bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
#ifdef DIAGNOSTIC
if (bank == -1)
panic("pmap_enter: PG_PVLIST mapping with "
"unmanaged page "
"pa = 0x%lx (0x%lx)", pa, atop(pa));
#endif
pvh = &vm_physmem[bank].pmseg.pvhead[off];
simple_lock(&pvh->pvh_lock);
pve = pmap_remove_pv(pvh, pmap, va);
vm_physmem[bank].pmseg.attrs[off] |= opte;
simple_unlock(&pvh->pvh_lock);
} else {
pve = NULL;
}
} else { /* opte not valid */
pve = NULL;
pmap->pm_stats.resident_count++;
if (wired)
pmap->pm_stats.wired_count++;
if (ptp)
ptp->wire_count++; /* count # of valid entrys */
}
/*
* at this point pm_stats has been updated. pve is either NULL
* or points to a now-free pv_entry structure (the latter case is
* if we called pmap_remove_pv above).
*
* if this entry is to be on a pvlist, enter it now.
*/
bank = vm_physseg_find(atop(pa), &off);
if (pmap_initialized && bank != -1) {
pvh = &vm_physmem[bank].pmseg.pvhead[off];
if (pve == NULL) {
pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
if (pve == NULL) {
if (flags & PMAP_CANFAIL) {
error = ENOMEM;
goto out;
}
panic("pmap_enter: no pv entries available");
}
}
/* lock pvh when adding */
pmap_enter_pv(pvh, pve, pmap, va, ptp);
} else {
/* new mapping is not PG_PVLIST. free pve if we've got one */
pvh = NULL; /* ensure !PG_PVLIST */
if (pve)
pmap_free_pv(pmap, pve);
}
enter_now:
/*
* at this point pvh is !NULL if we want the PG_PVLIST bit set
*/
npte = pa | protection_codes[prot] | PG_V;
pmap_exec_account(pmap, va, opte, npte);
if (pvh)
npte |= PG_PVLIST;
if (wired)
npte |= PG_W;
if (va < VM_MAXUSER_ADDRESS)
npte |= PG_u;
else if (va < VM_MAX_ADDRESS)
npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
if (pmap == pmap_kernel())
npte |= pmap_pg_g;
ptes[atop(va)] = npte; /* zap! */
if ((opte & ~(pt_entry_t)(PG_M|PG_U)) != npte) {
#ifdef MULTIPROCESSOR
int32_t cpumask = 0;
pmap_tlb_shootdown(pmap, va, opte, &cpumask);
pmap_tlb_shootnow(cpumask);
#else
/* Don't bother deferring in the single CPU case. */
if (pmap_is_curpmap(pmap))
pmap_update_pg(va);
#endif
}
error = 0;
out:
pmap_unmap_ptes_pae(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
return error;
}
/*
* pmap_growkernel: increase usage of KVM space
*
* => we allocate new PTPs for the kernel and install them in all
* the pmaps on the system.
*/
vaddr_t
pmap_growkernel_pae(vaddr_t maxkvaddr)
{
extern int nkpde;
struct pmap *kpm = pmap_kernel(), *pm;
int needed_kpde; /* needed number of kernel PTPs */
int s;
paddr_t ptaddr;
needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
/ NBPD;
if (needed_kpde <= nkpde)
goto out; /* we are OK */
/*
* whoops! we need to add kernel PTPs
*/
s = splhigh(); /* to be safe */
simple_lock(&kpm->pm_obj.vmobjlock);
for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
if (uvm.page_init_done == FALSE) {
/*
* we're growing the kernel pmap early (from
* uvm_pageboot_alloc()). this case must be
* handled a little differently.
*/
if (uvm_page_physget(&ptaddr) == FALSE)
panic("pmap_growkernel: out of memory");
pmap_zero_phys(ptaddr);
PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V;
/* count PTP as resident */
kpm->pm_stats.resident_count++;
continue;
}
/*
* THIS *MUST* BE CODED SO AS TO WORK IN THE
* pmap_initialized == FALSE CASE! WE MAY BE
* INVOKED WHILE pmap_init() IS RUNNING!
*/
while (!pmap_alloc_ptp_pae(kpm, PDSLOT_KERN + nkpde, FALSE))
uvm_wait("pmap_growkernel");
/* PG_u not for kernel */
PDE(kpm, PDSLOT_KERN + nkpde) &= ~PG_u;
/* distribute new kernel PTP to all active pmaps */
simple_lock(&pmaps_lock);
LIST_FOREACH(pm, &pmaps, pm_list) {
PDE(pm, PDSLOT_KERN + nkpde) =
PDE(kpm, PDSLOT_KERN + nkpde);
}
simple_unlock(&pmaps_lock);
}
simple_unlock(&kpm->pm_obj.vmobjlock);
splx(s);
out:
return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
}
#ifdef DEBUG
void pmap_dump_pae(struct pmap *, vaddr_t, vaddr_t);
/*
* pmap_dump: dump all the mappings from a pmap
*
* => caller should not be holding any pmap locks
*/
void
pmap_dump_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
pt_entry_t *ptes, *pte;
vaddr_t blkendva;
/*
* if end is out of range truncate.
* if (end == start) update to max.
*/
if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
eva = VM_MAXUSER_ADDRESS;
/*
* we lock in the pmap => pv_head direction
*/
PMAP_MAP_TO_HEAD_LOCK();
ptes = pmap_map_ptes_pae(pmap); /* locks pmap */
/*
* dumping a range of pages: we dump in PTP sized blocks (4MB)
*/
for (/* null */ ; sva < eva ; sva = blkendva) {
/* determine range of block */
blkendva = i386_round_pdr(sva+1);
if (blkendva > eva)
blkendva = eva;
/* valid block? */
if (!pmap_valid_entry(PDE(pmap, pdei(sva))))
continue;
pte = &ptes[atop(sva)];
for (/* null */; sva < blkendva ; sva += NBPG, pte++) {
if (!pmap_valid_entry(*pte))
continue;
printf("va %#lx -> pa %#x (pte=%#x)\n",
sva, *pte, *pte & PG_FRAME);
}
}
pmap_unmap_ptes_pae(pmap);
PMAP_MAP_TO_HEAD_UNLOCK();
}
#endif