Annotation of sys/uvm/uvm_mmap.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: uvm_mmap.c,v 1.69 2007/06/18 21:51:15 pedro Exp $ */
! 2: /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1997 Charles D. Cranor and Washington University.
! 6: * Copyright (c) 1991, 1993 The Regents of the University of California.
! 7: * Copyright (c) 1988 University of Utah.
! 8: *
! 9: * All rights reserved.
! 10: *
! 11: * This code is derived from software contributed to Berkeley by
! 12: * the Systems Programming Group of the University of Utah Computer
! 13: * Science Department.
! 14: *
! 15: * Redistribution and use in source and binary forms, with or without
! 16: * modification, are permitted provided that the following conditions
! 17: * are met:
! 18: * 1. Redistributions of source code must retain the above copyright
! 19: * notice, this list of conditions and the following disclaimer.
! 20: * 2. Redistributions in binary form must reproduce the above copyright
! 21: * notice, this list of conditions and the following disclaimer in the
! 22: * documentation and/or other materials provided with the distribution.
! 23: * 3. All advertising materials mentioning features or use of this software
! 24: * must display the following acknowledgement:
! 25: * This product includes software developed by the Charles D. Cranor,
! 26: * Washington University, University of California, Berkeley and
! 27: * its contributors.
! 28: * 4. Neither the name of the University nor the names of its contributors
! 29: * may be used to endorse or promote products derived from this software
! 30: * without specific prior written permission.
! 31: *
! 32: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 33: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 34: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 35: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 36: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 37: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 38: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 39: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 40: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 41: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 42: * SUCH DAMAGE.
! 43: *
! 44: * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
! 45: * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
! 46: * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
! 47: */
! 48:
! 49: /*
! 50: * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
! 51: * function.
! 52: */
! 53: #include <sys/param.h>
! 54: #include <sys/systm.h>
! 55: #include <sys/file.h>
! 56: #include <sys/filedesc.h>
! 57: #include <sys/resourcevar.h>
! 58: #include <sys/mman.h>
! 59: #include <sys/mount.h>
! 60: #include <sys/proc.h>
! 61: #include <sys/malloc.h>
! 62: #include <sys/vnode.h>
! 63: #include <sys/conf.h>
! 64: #include <sys/stat.h>
! 65:
! 66: #include <machine/exec.h> /* for __LDPGSZ */
! 67:
! 68: #include <miscfs/specfs/specdev.h>
! 69:
! 70: #include <sys/syscallargs.h>
! 71:
! 72: #include <uvm/uvm.h>
! 73: #include <uvm/uvm_device.h>
! 74: #include <uvm/uvm_vnode.h>
! 75:
! 76: /*
! 77: * Page align addr and size, returning EINVAL on wraparound.
! 78: */
! 79: #define ALIGN_ADDR(addr, size, pageoff) do { \
! 80: pageoff = (addr & PAGE_MASK); \
! 81: if (pageoff != 0) { \
! 82: if (size > SIZE_MAX - pageoff) \
! 83: return (EINVAL); /* wraparound */ \
! 84: addr -= pageoff; \
! 85: size += pageoff; \
! 86: } \
! 87: if (size != 0) { \
! 88: size = (vsize_t)round_page(size); \
! 89: if (size == 0) \
! 90: return (EINVAL); /* wraparound */ \
! 91: } \
! 92: } while (0)
! 93:
! 94: /*
! 95: * unimplemented VM system calls:
! 96: */
! 97:
! 98: /*
! 99: * sys_sbrk: sbrk system call.
! 100: */
! 101:
! 102: /* ARGSUSED */
! 103: int
! 104: sys_sbrk(p, v, retval)
! 105: struct proc *p;
! 106: void *v;
! 107: register_t *retval;
! 108: {
! 109: #if 0
! 110: struct sys_sbrk_args /* {
! 111: syscallarg(int) incr;
! 112: } */ *uap = v;
! 113: #endif
! 114:
! 115: return (ENOSYS);
! 116: }
! 117:
! 118: /*
! 119: * sys_sstk: sstk system call.
! 120: */
! 121:
! 122: /* ARGSUSED */
! 123: int
! 124: sys_sstk(p, v, retval)
! 125: struct proc *p;
! 126: void *v;
! 127: register_t *retval;
! 128: {
! 129: #if 0
! 130: struct sys_sstk_args /* {
! 131: syscallarg(int) incr;
! 132: } */ *uap = v;
! 133: #endif
! 134:
! 135: return (ENOSYS);
! 136: }
! 137:
! 138: /*
! 139: * sys_mquery: provide mapping hints to applications that do fixed mappings
! 140: *
! 141: * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and
! 142: * don't care about PMAP_PREFER or such)
! 143: * addr: hint where we'd like to place the mapping.
! 144: * size: size of the mapping
! 145: * fd: fd of the file we want to map
! 146: * off: offset within the file
! 147: */
! 148:
! 149: int
! 150: sys_mquery(p, v, retval)
! 151: struct proc *p;
! 152: void *v;
! 153: register_t *retval;
! 154: {
! 155: struct sys_mquery_args /* {
! 156: syscallarg(void *) addr;
! 157: syscallarg(size_t) len;
! 158: syscallarg(int) prot;
! 159: syscallarg(int) flags;
! 160: syscallarg(int) fd;
! 161: syscallarg(long) pad;
! 162: syscallarg(off_t) pos;
! 163: } */ *uap = v;
! 164: struct file *fp;
! 165: struct uvm_object *uobj;
! 166: voff_t uoff;
! 167: int error;
! 168: vaddr_t vaddr;
! 169: int flags = 0;
! 170: vsize_t size;
! 171: vm_prot_t prot;
! 172: int fd;
! 173:
! 174: vaddr = (vaddr_t) SCARG(uap, addr);
! 175: prot = SCARG(uap, prot);
! 176: size = (vsize_t) SCARG(uap, len);
! 177: fd = SCARG(uap, fd);
! 178:
! 179: if ((prot & VM_PROT_ALL) != prot)
! 180: return (EINVAL);
! 181:
! 182: if (SCARG(uap, flags) & MAP_FIXED)
! 183: flags |= UVM_FLAG_FIXED;
! 184:
! 185: if (fd >= 0) {
! 186: if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
! 187: return (error);
! 188: uobj = &((struct vnode *)fp->f_data)->v_uvm.u_obj;
! 189: uoff = SCARG(uap, pos);
! 190: } else {
! 191: fp = NULL;
! 192: uobj = NULL;
! 193: uoff = 0;
! 194: }
! 195:
! 196: if (vaddr == 0)
! 197: vaddr = uvm_map_hint(p, prot);
! 198:
! 199: /* prevent a user requested address from falling in heap space */
! 200: if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
! 201: (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) {
! 202: if (flags & UVM_FLAG_FIXED) {
! 203: error = EINVAL;
! 204: goto done;
! 205: }
! 206: vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ);
! 207: }
! 208: again:
! 209:
! 210: if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, size,
! 211: &vaddr, uobj, uoff, 0, flags) == NULL) {
! 212: if (flags & UVM_FLAG_FIXED)
! 213: error = EINVAL;
! 214: else
! 215: error = ENOMEM;
! 216: } else {
! 217: /* prevent a returned address from falling in heap space */
! 218: if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr)
! 219: && (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) {
! 220: vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
! 221: MAXDSIZ);
! 222: goto again;
! 223: }
! 224: error = 0;
! 225: *retval = (register_t)(vaddr);
! 226: }
! 227: done:
! 228: if (fp != NULL)
! 229: FRELE(fp);
! 230: return (error);
! 231: }
! 232:
! 233: /*
! 234: * sys_mincore: determine if pages are in core or not.
! 235: */
! 236:
! 237: /* ARGSUSED */
! 238: int
! 239: sys_mincore(p, v, retval)
! 240: struct proc *p;
! 241: void *v;
! 242: register_t *retval;
! 243: {
! 244: struct sys_mincore_args /* {
! 245: syscallarg(void *) addr;
! 246: syscallarg(size_t) len;
! 247: syscallarg(char *) vec;
! 248: } */ *uap = v;
! 249: vm_page_t m;
! 250: char *vec, pgi;
! 251: struct uvm_object *uobj;
! 252: struct vm_amap *amap;
! 253: struct vm_anon *anon;
! 254: vm_map_entry_t entry;
! 255: vaddr_t start, end, lim;
! 256: vm_map_t map;
! 257: vsize_t len, npgs;
! 258: int error = 0;
! 259:
! 260: map = &p->p_vmspace->vm_map;
! 261:
! 262: start = (vaddr_t)SCARG(uap, addr);
! 263: len = SCARG(uap, len);
! 264: vec = SCARG(uap, vec);
! 265:
! 266: if (start & PAGE_MASK)
! 267: return (EINVAL);
! 268: len = round_page(len);
! 269: end = start + len;
! 270: if (end <= start)
! 271: return (EINVAL);
! 272:
! 273: npgs = len >> PAGE_SHIFT;
! 274:
! 275: /*
! 276: * Lock down vec, so our returned status isn't outdated by
! 277: * storing the status byte for a page.
! 278: */
! 279: if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0)
! 280: return (error);
! 281:
! 282: vm_map_lock_read(map);
! 283:
! 284: if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
! 285: error = ENOMEM;
! 286: goto out;
! 287: }
! 288:
! 289: for (/* nothing */;
! 290: entry != &map->header && entry->start < end;
! 291: entry = entry->next) {
! 292: KASSERT(!UVM_ET_ISSUBMAP(entry));
! 293: KASSERT(start >= entry->start);
! 294:
! 295: /* Make sure there are no holes. */
! 296: if (entry->end < end &&
! 297: (entry->next == &map->header ||
! 298: entry->next->start > entry->end)) {
! 299: error = ENOMEM;
! 300: goto out;
! 301: }
! 302:
! 303: lim = end < entry->end ? end : entry->end;
! 304:
! 305: /*
! 306: * Special case for objects with no "real" pages. Those
! 307: * are always considered resident (mapped devices).
! 308: */
! 309: if (UVM_ET_ISOBJ(entry)) {
! 310: KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
! 311: if (entry->object.uvm_obj->pgops->pgo_releasepg
! 312: == NULL) {
! 313: pgi = 1;
! 314: for (/* nothing */; start < lim;
! 315: start += PAGE_SIZE, vec++)
! 316: copyout(&pgi, vec, sizeof(char));
! 317: continue;
! 318: }
! 319: }
! 320:
! 321: amap = entry->aref.ar_amap; /* top layer */
! 322: uobj = entry->object.uvm_obj; /* bottom layer */
! 323:
! 324: if (uobj != NULL)
! 325: simple_lock(&uobj->vmobjlock);
! 326:
! 327: for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
! 328: pgi = 0;
! 329: if (amap != NULL) {
! 330: /* Check the top layer first. */
! 331: anon = amap_lookup(&entry->aref,
! 332: start - entry->start);
! 333: /* Don't need to lock anon here. */
! 334: if (anon != NULL && anon->an_page != NULL) {
! 335: /*
! 336: * Anon has the page for this entry
! 337: * offset.
! 338: */
! 339: pgi = 1;
! 340: }
! 341: }
! 342:
! 343: if (uobj != NULL && pgi == 0) {
! 344: /* Check the bottom layer. */
! 345: m = uvm_pagelookup(uobj,
! 346: entry->offset + (start - entry->start));
! 347: if (m != NULL) {
! 348: /*
! 349: * Object has the page for this entry
! 350: * offset.
! 351: */
! 352: pgi = 1;
! 353: }
! 354: }
! 355:
! 356: copyout(&pgi, vec, sizeof(char));
! 357: }
! 358:
! 359: if (uobj != NULL)
! 360: simple_unlock(&uobj->vmobjlock);
! 361: }
! 362:
! 363: out:
! 364: vm_map_unlock_read(map);
! 365: uvm_vsunlock(p, SCARG(uap, vec), npgs);
! 366: return (error);
! 367: }
! 368:
! 369: /*
! 370: * sys_mmap: mmap system call.
! 371: *
! 372: * => file offset and address may not be page aligned
! 373: * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
! 374: * - if address isn't page aligned the mapping starts at trunc_page(addr)
! 375: * and the return value is adjusted up by the page offset.
! 376: */
! 377:
! 378: int
! 379: sys_mmap(p, v, retval)
! 380: struct proc *p;
! 381: void *v;
! 382: register_t *retval;
! 383: {
! 384: struct sys_mmap_args /* {
! 385: syscallarg(void *) addr;
! 386: syscallarg(size_t) len;
! 387: syscallarg(int) prot;
! 388: syscallarg(int) flags;
! 389: syscallarg(int) fd;
! 390: syscallarg(long) pad;
! 391: syscallarg(off_t) pos;
! 392: } */ *uap = v;
! 393: vaddr_t addr;
! 394: struct vattr va;
! 395: off_t pos;
! 396: vsize_t size, pageoff;
! 397: vm_prot_t prot, maxprot;
! 398: int flags, fd;
! 399: vaddr_t vm_min_address = VM_MIN_ADDRESS;
! 400: struct filedesc *fdp = p->p_fd;
! 401: struct file *fp = NULL;
! 402: struct vnode *vp;
! 403: caddr_t handle;
! 404: int error;
! 405:
! 406: /*
! 407: * first, extract syscall args from the uap.
! 408: */
! 409:
! 410: addr = (vaddr_t) SCARG(uap, addr);
! 411: size = (vsize_t) SCARG(uap, len);
! 412: prot = SCARG(uap, prot);
! 413: flags = SCARG(uap, flags);
! 414: fd = SCARG(uap, fd);
! 415: pos = SCARG(uap, pos);
! 416:
! 417: /*
! 418: * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
! 419: * validate the flags.
! 420: */
! 421: if ((prot & VM_PROT_ALL) != prot)
! 422: return (EINVAL);
! 423: if ((flags & MAP_FLAGMASK) != flags)
! 424: return (EINVAL);
! 425: if (flags & MAP_COPY)
! 426: flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
! 427: if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
! 428: return (EINVAL);
! 429:
! 430: /*
! 431: * align file position and save offset. adjust size.
! 432: */
! 433: ALIGN_ADDR(pos, size, pageoff);
! 434:
! 435: /*
! 436: * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
! 437: */
! 438:
! 439: if (flags & MAP_FIXED) {
! 440:
! 441: /* adjust address by the same amount as we did the offset */
! 442: addr -= pageoff;
! 443: if (addr & PAGE_MASK)
! 444: return (EINVAL); /* not page aligned */
! 445:
! 446: if (addr > SIZE_MAX - size)
! 447: return (EINVAL); /* no wrapping! */
! 448: if (VM_MAXUSER_ADDRESS > 0 &&
! 449: (addr + size) > VM_MAXUSER_ADDRESS)
! 450: return (EINVAL);
! 451: if (vm_min_address > 0 && addr < vm_min_address)
! 452: return (EINVAL);
! 453:
! 454: } else {
! 455:
! 456: /*
! 457: * not fixed: make sure we skip over the largest possible heap.
! 458: * we will refine our guess later (e.g. to account for VAC, etc)
! 459: */
! 460: if (addr == 0)
! 461: addr = uvm_map_hint(p, prot);
! 462: else if (!(flags & MAP_TRYFIXED) &&
! 463: addr < (vaddr_t)p->p_vmspace->vm_daddr)
! 464: addr = uvm_map_hint(p, prot);
! 465: }
! 466:
! 467: /*
! 468: * check for file mappings (i.e. not anonymous) and verify file.
! 469: */
! 470: if ((flags & MAP_ANON) == 0) {
! 471:
! 472: if ((fp = fd_getfile(fdp, fd)) == NULL)
! 473: return (EBADF);
! 474:
! 475: FREF(fp);
! 476:
! 477: if (fp->f_type != DTYPE_VNODE) {
! 478: error = ENODEV; /* only mmap vnodes! */
! 479: goto out;
! 480: }
! 481: vp = (struct vnode *)fp->f_data; /* convert to vnode */
! 482:
! 483: if (vp->v_type != VREG && vp->v_type != VCHR &&
! 484: vp->v_type != VBLK) {
! 485: error = ENODEV; /* only REG/CHR/BLK support mmap */
! 486: goto out;
! 487: }
! 488:
! 489: if (vp->v_type == VREG && (pos + size) < pos) {
! 490: error = EINVAL; /* no offset wrapping */
! 491: goto out;
! 492: }
! 493:
! 494: /* special case: catch SunOS style /dev/zero */
! 495: if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
! 496: flags |= MAP_ANON;
! 497: FRELE(fp);
! 498: fp = NULL;
! 499: goto is_anon;
! 500: }
! 501:
! 502: /*
! 503: * Old programs may not select a specific sharing type, so
! 504: * default to an appropriate one.
! 505: *
! 506: * XXX: how does MAP_ANON fit in the picture?
! 507: */
! 508: if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
! 509: #if defined(DEBUG)
! 510: printf("WARNING: defaulted mmap() share type to "
! 511: "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
! 512: "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
! 513: p->p_comm);
! 514: #endif
! 515: if (vp->v_type == VCHR)
! 516: flags |= MAP_SHARED; /* for a device */
! 517: else
! 518: flags |= MAP_PRIVATE; /* for a file */
! 519: }
! 520:
! 521: /*
! 522: * MAP_PRIVATE device mappings don't make sense (and aren't
! 523: * supported anyway). However, some programs rely on this,
! 524: * so just change it to MAP_SHARED.
! 525: */
! 526: if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
! 527: flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
! 528: }
! 529:
! 530: /*
! 531: * now check protection
! 532: */
! 533:
! 534: maxprot = VM_PROT_EXECUTE;
! 535:
! 536: /* check read access */
! 537: if (fp->f_flag & FREAD)
! 538: maxprot |= VM_PROT_READ;
! 539: else if (prot & PROT_READ) {
! 540: error = EACCES;
! 541: goto out;
! 542: }
! 543:
! 544: /* check write access, shared case first */
! 545: if (flags & MAP_SHARED) {
! 546: /*
! 547: * if the file is writable, only add PROT_WRITE to
! 548: * maxprot if the file is not immutable, append-only.
! 549: * otherwise, if we have asked for PROT_WRITE, return
! 550: * EPERM.
! 551: */
! 552: if (fp->f_flag & FWRITE) {
! 553: if ((error =
! 554: VOP_GETATTR(vp, &va, p->p_ucred, p)))
! 555: goto out;
! 556: if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
! 557: maxprot |= VM_PROT_WRITE;
! 558: else if (prot & PROT_WRITE) {
! 559: error = EPERM;
! 560: goto out;
! 561: }
! 562: } else if (prot & PROT_WRITE) {
! 563: error = EACCES;
! 564: goto out;
! 565: }
! 566: } else {
! 567: /* MAP_PRIVATE mappings can always write to */
! 568: maxprot |= VM_PROT_WRITE;
! 569: }
! 570:
! 571: /*
! 572: * set handle to vnode
! 573: */
! 574:
! 575: handle = (caddr_t)vp;
! 576:
! 577: } else { /* MAP_ANON case */
! 578: /*
! 579: * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
! 580: */
! 581: if (fd != -1) {
! 582: error = EINVAL;
! 583: goto out;
! 584: }
! 585:
! 586: is_anon: /* label for SunOS style /dev/zero */
! 587: handle = NULL;
! 588: maxprot = VM_PROT_ALL;
! 589: pos = 0;
! 590: }
! 591:
! 592: if ((flags & MAP_ANON) != 0 ||
! 593: ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
! 594: if (size >
! 595: (p->p_rlimit[RLIMIT_DATA].rlim_cur - ctob(p->p_vmspace->vm_dused))) {
! 596: error = ENOMEM;
! 597: goto out;
! 598: }
! 599: }
! 600:
! 601: /*
! 602: * now let kernel internal function uvm_mmap do the work.
! 603: */
! 604:
! 605: error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
! 606: flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur, p);
! 607:
! 608: if (error == 0)
! 609: /* remember to add offset */
! 610: *retval = (register_t)(addr + pageoff);
! 611:
! 612: out:
! 613: if (fp)
! 614: FRELE(fp);
! 615: return (error);
! 616: }
! 617:
! 618: /*
! 619: * sys_msync: the msync system call (a front-end for flush)
! 620: */
! 621:
! 622: int
! 623: sys_msync(p, v, retval)
! 624: struct proc *p;
! 625: void *v;
! 626: register_t *retval;
! 627: {
! 628: struct sys_msync_args /* {
! 629: syscallarg(void *) addr;
! 630: syscallarg(size_t) len;
! 631: syscallarg(int) flags;
! 632: } */ *uap = v;
! 633: vaddr_t addr;
! 634: vsize_t size, pageoff;
! 635: vm_map_t map;
! 636: int rv, flags, uvmflags;
! 637:
! 638: /*
! 639: * extract syscall args from the uap
! 640: */
! 641:
! 642: addr = (vaddr_t)SCARG(uap, addr);
! 643: size = (vsize_t)SCARG(uap, len);
! 644: flags = SCARG(uap, flags);
! 645:
! 646: /* sanity check flags */
! 647: if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
! 648: (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
! 649: (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
! 650: return (EINVAL);
! 651: if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
! 652: flags |= MS_SYNC;
! 653:
! 654: /*
! 655: * align the address to a page boundary, and adjust the size accordingly
! 656: */
! 657: ALIGN_ADDR(addr, size, pageoff);
! 658: if (addr > SIZE_MAX - size)
! 659: return (EINVAL); /* disallow wrap-around. */
! 660:
! 661: /*
! 662: * get map
! 663: */
! 664:
! 665: map = &p->p_vmspace->vm_map;
! 666:
! 667: /*
! 668: * XXXCDC: do we really need this semantic?
! 669: *
! 670: * XXX Gak! If size is zero we are supposed to sync "all modified
! 671: * pages with the region containing addr". Unfortunately, we
! 672: * don't really keep track of individual mmaps so we approximate
! 673: * by flushing the range of the map entry containing addr.
! 674: * This can be incorrect if the region splits or is coalesced
! 675: * with a neighbor.
! 676: */
! 677: if (size == 0) {
! 678: vm_map_entry_t entry;
! 679:
! 680: vm_map_lock_read(map);
! 681: rv = uvm_map_lookup_entry(map, addr, &entry);
! 682: if (rv == TRUE) {
! 683: addr = entry->start;
! 684: size = entry->end - entry->start;
! 685: }
! 686: vm_map_unlock_read(map);
! 687: if (rv == FALSE)
! 688: return (EINVAL);
! 689: }
! 690:
! 691: /*
! 692: * translate MS_ flags into PGO_ flags
! 693: */
! 694: uvmflags = PGO_CLEANIT;
! 695: if (flags & MS_INVALIDATE)
! 696: uvmflags |= PGO_FREE;
! 697: if (flags & MS_SYNC)
! 698: uvmflags |= PGO_SYNCIO;
! 699: else
! 700: uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
! 701:
! 702: return (uvm_map_clean(map, addr, addr+size, uvmflags));
! 703: }
! 704:
! 705: /*
! 706: * sys_munmap: unmap a users memory
! 707: */
! 708:
! 709: int
! 710: sys_munmap(p, v, retval)
! 711: struct proc *p;
! 712: void *v;
! 713: register_t *retval;
! 714: {
! 715: struct sys_munmap_args /* {
! 716: syscallarg(void *) addr;
! 717: syscallarg(size_t) len;
! 718: } */ *uap = v;
! 719: vaddr_t addr;
! 720: vsize_t size, pageoff;
! 721: vm_map_t map;
! 722: vaddr_t vm_min_address = VM_MIN_ADDRESS;
! 723: struct vm_map_entry *dead_entries;
! 724:
! 725: /*
! 726: * get syscall args...
! 727: */
! 728:
! 729: addr = (vaddr_t) SCARG(uap, addr);
! 730: size = (vsize_t) SCARG(uap, len);
! 731:
! 732: /*
! 733: * align the address to a page boundary, and adjust the size accordingly
! 734: */
! 735: ALIGN_ADDR(addr, size, pageoff);
! 736:
! 737: /*
! 738: * Check for illegal addresses. Watch out for address wrap...
! 739: * Note that VM_*_ADDRESS are not constants due to casts (argh).
! 740: */
! 741: if (addr > SIZE_MAX - size)
! 742: return (EINVAL);
! 743: if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
! 744: return (EINVAL);
! 745: if (vm_min_address > 0 && addr < vm_min_address)
! 746: return (EINVAL);
! 747: map = &p->p_vmspace->vm_map;
! 748:
! 749:
! 750: vm_map_lock(map); /* lock map so we can checkprot */
! 751:
! 752: /*
! 753: * interesting system call semantic: make sure entire range is
! 754: * allocated before allowing an unmap.
! 755: */
! 756:
! 757: if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
! 758: vm_map_unlock(map);
! 759: return (EINVAL);
! 760: }
! 761:
! 762: /*
! 763: * doit!
! 764: */
! 765: uvm_unmap_remove(map, addr, addr + size, &dead_entries, p);
! 766:
! 767: vm_map_unlock(map); /* and unlock */
! 768:
! 769: if (dead_entries != NULL)
! 770: uvm_unmap_detach(dead_entries, 0);
! 771:
! 772: return (0);
! 773: }
! 774:
! 775: /*
! 776: * sys_mprotect: the mprotect system call
! 777: */
! 778:
! 779: int
! 780: sys_mprotect(p, v, retval)
! 781: struct proc *p;
! 782: void *v;
! 783: register_t *retval;
! 784: {
! 785: struct sys_mprotect_args /* {
! 786: syscallarg(void *) addr;
! 787: syscallarg(size_t) len;
! 788: syscallarg(int) prot;
! 789: } */ *uap = v;
! 790: vaddr_t addr;
! 791: vsize_t size, pageoff;
! 792: vm_prot_t prot;
! 793:
! 794: /*
! 795: * extract syscall args from uap
! 796: */
! 797:
! 798: addr = (vaddr_t)SCARG(uap, addr);
! 799: size = (vsize_t)SCARG(uap, len);
! 800: prot = SCARG(uap, prot);
! 801:
! 802: if ((prot & VM_PROT_ALL) != prot)
! 803: return (EINVAL);
! 804:
! 805: /*
! 806: * align the address to a page boundary, and adjust the size accordingly
! 807: */
! 808: ALIGN_ADDR(addr, size, pageoff);
! 809: if (addr > SIZE_MAX - size)
! 810: return (EINVAL); /* disallow wrap-around. */
! 811:
! 812: return (uvm_map_protect(&p->p_vmspace->vm_map, addr, addr+size,
! 813: prot, FALSE));
! 814: }
! 815:
! 816: /*
! 817: * sys_minherit: the minherit system call
! 818: */
! 819:
! 820: int
! 821: sys_minherit(p, v, retval)
! 822: struct proc *p;
! 823: void *v;
! 824: register_t *retval;
! 825: {
! 826: struct sys_minherit_args /* {
! 827: syscallarg(void *) addr;
! 828: syscallarg(size_t) len;
! 829: syscallarg(int) inherit;
! 830: } */ *uap = v;
! 831: vaddr_t addr;
! 832: vsize_t size, pageoff;
! 833: vm_inherit_t inherit;
! 834:
! 835: addr = (vaddr_t)SCARG(uap, addr);
! 836: size = (vsize_t)SCARG(uap, len);
! 837: inherit = SCARG(uap, inherit);
! 838:
! 839: /*
! 840: * align the address to a page boundary, and adjust the size accordingly
! 841: */
! 842: ALIGN_ADDR(addr, size, pageoff);
! 843: if (addr > SIZE_MAX - size)
! 844: return (EINVAL); /* disallow wrap-around. */
! 845:
! 846: return (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
! 847: inherit));
! 848: }
! 849:
! 850: /*
! 851: * sys_madvise: give advice about memory usage.
! 852: */
! 853:
! 854: /* ARGSUSED */
! 855: int
! 856: sys_madvise(p, v, retval)
! 857: struct proc *p;
! 858: void *v;
! 859: register_t *retval;
! 860: {
! 861: struct sys_madvise_args /* {
! 862: syscallarg(void *) addr;
! 863: syscallarg(size_t) len;
! 864: syscallarg(int) behav;
! 865: } */ *uap = v;
! 866: vaddr_t addr;
! 867: vsize_t size, pageoff;
! 868: int advice, error;
! 869:
! 870: addr = (vaddr_t)SCARG(uap, addr);
! 871: size = (vsize_t)SCARG(uap, len);
! 872: advice = SCARG(uap, behav);
! 873:
! 874: /*
! 875: * align the address to a page boundary, and adjust the size accordingly
! 876: */
! 877: ALIGN_ADDR(addr, size, pageoff);
! 878: if (addr > SIZE_MAX - size)
! 879: return (EINVAL); /* disallow wrap-around. */
! 880:
! 881: switch (advice) {
! 882: case MADV_NORMAL:
! 883: case MADV_RANDOM:
! 884: case MADV_SEQUENTIAL:
! 885: error = uvm_map_advice(&p->p_vmspace->vm_map, addr,
! 886: addr + size, advice);
! 887: break;
! 888:
! 889: case MADV_WILLNEED:
! 890: /*
! 891: * Activate all these pages, pre-faulting them in if
! 892: * necessary.
! 893: */
! 894: /*
! 895: * XXX IMPLEMENT ME.
! 896: * Should invent a "weak" mode for uvm_fault()
! 897: * which would only do the PGO_LOCKED pgo_get().
! 898: */
! 899: return (0);
! 900:
! 901: case MADV_DONTNEED:
! 902: /*
! 903: * Deactivate all these pages. We don't need them
! 904: * any more. We don't, however, toss the data in
! 905: * the pages.
! 906: */
! 907: error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
! 908: PGO_DEACTIVATE);
! 909: break;
! 910:
! 911: case MADV_FREE:
! 912: /*
! 913: * These pages contain no valid data, and may be
! 914: * garbage-collected. Toss all resources, including
! 915: * any swap space in use.
! 916: */
! 917: error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
! 918: PGO_FREE);
! 919: break;
! 920:
! 921: case MADV_SPACEAVAIL:
! 922: /*
! 923: * XXXMRG What is this? I think it's:
! 924: *
! 925: * Ensure that we have allocated backing-store
! 926: * for these pages.
! 927: *
! 928: * This is going to require changes to the page daemon,
! 929: * as it will free swap space allocated to pages in core.
! 930: * There's also what to do for device/file/anonymous memory.
! 931: */
! 932: return (EINVAL);
! 933:
! 934: default:
! 935: return (EINVAL);
! 936: }
! 937:
! 938: return (error);
! 939: }
! 940:
! 941: /*
! 942: * sys_mlock: memory lock
! 943: */
! 944:
! 945: int
! 946: sys_mlock(p, v, retval)
! 947: struct proc *p;
! 948: void *v;
! 949: register_t *retval;
! 950: {
! 951: struct sys_mlock_args /* {
! 952: syscallarg(const void *) addr;
! 953: syscallarg(size_t) len;
! 954: } */ *uap = v;
! 955: vaddr_t addr;
! 956: vsize_t size, pageoff;
! 957: int error;
! 958:
! 959: /*
! 960: * extract syscall args from uap
! 961: */
! 962: addr = (vaddr_t)SCARG(uap, addr);
! 963: size = (vsize_t)SCARG(uap, len);
! 964:
! 965: /*
! 966: * align the address to a page boundary and adjust the size accordingly
! 967: */
! 968: ALIGN_ADDR(addr, size, pageoff);
! 969: if (addr > SIZE_MAX - size)
! 970: return (EINVAL); /* disallow wrap-around. */
! 971:
! 972: if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
! 973: return (EAGAIN);
! 974:
! 975: #ifdef pmap_wired_count
! 976: if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
! 977: p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
! 978: return (EAGAIN);
! 979: #else
! 980: if ((error = suser(p, 0)) != 0)
! 981: return (error);
! 982: #endif
! 983:
! 984: error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
! 985: 0);
! 986: return (error == 0 ? 0 : ENOMEM);
! 987: }
! 988:
! 989: /*
! 990: * sys_munlock: unlock wired pages
! 991: */
! 992:
! 993: int
! 994: sys_munlock(p, v, retval)
! 995: struct proc *p;
! 996: void *v;
! 997: register_t *retval;
! 998: {
! 999: struct sys_munlock_args /* {
! 1000: syscallarg(const void *) addr;
! 1001: syscallarg(size_t) len;
! 1002: } */ *uap = v;
! 1003: vaddr_t addr;
! 1004: vsize_t size, pageoff;
! 1005: int error;
! 1006:
! 1007: /*
! 1008: * extract syscall args from uap
! 1009: */
! 1010:
! 1011: addr = (vaddr_t)SCARG(uap, addr);
! 1012: size = (vsize_t)SCARG(uap, len);
! 1013:
! 1014: /*
! 1015: * align the address to a page boundary, and adjust the size accordingly
! 1016: */
! 1017: ALIGN_ADDR(addr, size, pageoff);
! 1018: if (addr > SIZE_MAX - size)
! 1019: return (EINVAL); /* disallow wrap-around. */
! 1020:
! 1021: #ifndef pmap_wired_count
! 1022: if ((error = suser(p, 0)) != 0)
! 1023: return (error);
! 1024: #endif
! 1025:
! 1026: error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
! 1027: 0);
! 1028: return (error == 0 ? 0 : ENOMEM);
! 1029: }
! 1030:
! 1031: /*
! 1032: * sys_mlockall: lock all pages mapped into an address space.
! 1033: */
! 1034:
! 1035: int
! 1036: sys_mlockall(p, v, retval)
! 1037: struct proc *p;
! 1038: void *v;
! 1039: register_t *retval;
! 1040: {
! 1041: struct sys_mlockall_args /* {
! 1042: syscallarg(int) flags;
! 1043: } */ *uap = v;
! 1044: int error, flags;
! 1045:
! 1046: flags = SCARG(uap, flags);
! 1047:
! 1048: if (flags == 0 ||
! 1049: (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
! 1050: return (EINVAL);
! 1051:
! 1052: #ifndef pmap_wired_count
! 1053: if ((error = suser(p, 0)) != 0)
! 1054: return (error);
! 1055: #endif
! 1056:
! 1057: error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
! 1058: p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
! 1059: if (error != 0 && error != ENOMEM)
! 1060: return (EAGAIN);
! 1061: return (error);
! 1062: }
! 1063:
! 1064: /*
! 1065: * sys_munlockall: unlock all pages mapped into an address space.
! 1066: */
! 1067:
! 1068: int
! 1069: sys_munlockall(p, v, retval)
! 1070: struct proc *p;
! 1071: void *v;
! 1072: register_t *retval;
! 1073: {
! 1074:
! 1075: (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
! 1076: return (0);
! 1077: }
! 1078:
! 1079: /*
! 1080: * uvm_mmap: internal version of mmap
! 1081: *
! 1082: * - used by sys_mmap, exec, and sysv shm
! 1083: * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
! 1084: * sysv shm uses "named anonymous memory")
! 1085: * - caller must page-align the file offset
! 1086: */
! 1087:
! 1088: int
! 1089: uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit, p)
! 1090: vm_map_t map;
! 1091: vaddr_t *addr;
! 1092: vsize_t size;
! 1093: vm_prot_t prot, maxprot;
! 1094: int flags;
! 1095: caddr_t handle; /* XXX: VNODE? */
! 1096: voff_t foff;
! 1097: vsize_t locklimit;
! 1098: struct proc *p;
! 1099: {
! 1100: struct uvm_object *uobj;
! 1101: struct vnode *vp;
! 1102: int error;
! 1103: int advice = UVM_ADV_NORMAL;
! 1104: uvm_flag_t uvmflag = 0;
! 1105: vsize_t align = 0; /* userland page size */
! 1106:
! 1107: /*
! 1108: * check params
! 1109: */
! 1110:
! 1111: if (size == 0)
! 1112: return(0);
! 1113: if (foff & PAGE_MASK)
! 1114: return(EINVAL);
! 1115: if ((prot & maxprot) != prot)
! 1116: return(EINVAL);
! 1117:
! 1118: /*
! 1119: * for non-fixed mappings, round off the suggested address.
! 1120: * for fixed mappings, check alignment and zap old mappings.
! 1121: */
! 1122:
! 1123: if ((flags & MAP_FIXED) == 0) {
! 1124: *addr = round_page(*addr); /* round */
! 1125: } else {
! 1126: if (*addr & PAGE_MASK)
! 1127: return(EINVAL);
! 1128: uvmflag |= UVM_FLAG_FIXED;
! 1129: uvm_unmap_p(map, *addr, *addr + size, p); /* zap! */
! 1130: }
! 1131:
! 1132: /*
! 1133: * handle anon vs. non-anon mappings. for non-anon mappings attach
! 1134: * to underlying vm object.
! 1135: */
! 1136:
! 1137: if (flags & MAP_ANON) {
! 1138: if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ)
! 1139: align = __LDPGSZ;
! 1140: foff = UVM_UNKNOWN_OFFSET;
! 1141: uobj = NULL;
! 1142: if ((flags & MAP_SHARED) == 0)
! 1143: /* XXX: defer amap create */
! 1144: uvmflag |= UVM_FLAG_COPYONW;
! 1145: else
! 1146: /* shared: create amap now */
! 1147: uvmflag |= UVM_FLAG_OVERLAY;
! 1148:
! 1149: } else {
! 1150:
! 1151: vp = (struct vnode *) handle; /* get vnode */
! 1152: if (vp->v_type != VCHR) {
! 1153: uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
! 1154: maxprot : (maxprot & ~VM_PROT_WRITE));
! 1155:
! 1156: #ifndef UBC
! 1157: /*
! 1158: * XXXCDC: hack from old code
! 1159: * don't allow vnodes which have been mapped
! 1160: * shared-writeable to persist [forces them to be
! 1161: * flushed out when last reference goes].
! 1162: * XXXCDC: interesting side effect: avoids a bug.
! 1163: * note that in WRITE [ufs_readwrite.c] that we
! 1164: * allocate buffer, uncache, and then do the write.
! 1165: * the problem with this is that if the uncache causes
! 1166: * VM data to be flushed to the same area of the file
! 1167: * we are writing to... in that case we've got the
! 1168: * buffer locked and our process goes to sleep forever.
! 1169: *
! 1170: * XXXCDC: checking maxprot protects us from the
! 1171: * "persistbug" program but this is not a long term
! 1172: * solution.
! 1173: *
! 1174: * XXXCDC: we don't bother calling uncache with the vp
! 1175: * VOP_LOCKed since we know that we are already
! 1176: * holding a valid reference to the uvn (from the
! 1177: * uvn_attach above), and thus it is impossible for
! 1178: * the uncache to kill the uvn and trigger I/O.
! 1179: */
! 1180: if (flags & MAP_SHARED) {
! 1181: if ((prot & VM_PROT_WRITE) ||
! 1182: (maxprot & VM_PROT_WRITE)) {
! 1183: uvm_vnp_uncache(vp);
! 1184: }
! 1185: }
! 1186: #else
! 1187: /* XXX for now, attach doesn't gain a ref */
! 1188: VREF(vp);
! 1189: #endif
! 1190: } else {
! 1191: uobj = udv_attach((void *) &vp->v_rdev,
! 1192: (flags & MAP_SHARED) ? maxprot :
! 1193: (maxprot & ~VM_PROT_WRITE), foff, size);
! 1194: /*
! 1195: * XXX Some devices don't like to be mapped with
! 1196: * XXX PROT_EXEC, but we don't really have a
! 1197: * XXX better way of handling this, right now
! 1198: */
! 1199: if (uobj == NULL && (prot & PROT_EXEC) == 0) {
! 1200: maxprot &= ~VM_PROT_EXECUTE;
! 1201: uobj = udv_attach((void *) &vp->v_rdev,
! 1202: (flags & MAP_SHARED) ? maxprot :
! 1203: (maxprot & ~VM_PROT_WRITE), foff, size);
! 1204: }
! 1205: advice = UVM_ADV_RANDOM;
! 1206: }
! 1207:
! 1208: if (uobj == NULL)
! 1209: return((vp->v_type == VREG) ? ENOMEM : EINVAL);
! 1210:
! 1211: if ((flags & MAP_SHARED) == 0)
! 1212: uvmflag |= UVM_FLAG_COPYONW;
! 1213: }
! 1214:
! 1215: /*
! 1216: * set up mapping flags
! 1217: */
! 1218:
! 1219: uvmflag = UVM_MAPFLAG(prot, maxprot,
! 1220: (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
! 1221: advice, uvmflag);
! 1222:
! 1223: error = uvm_map_p(map, addr, size, uobj, foff, align, uvmflag, p);
! 1224:
! 1225: if (error == 0) {
! 1226: /*
! 1227: * POSIX 1003.1b -- if our address space was configured
! 1228: * to lock all future mappings, wire the one we just made.
! 1229: */
! 1230: if (prot == VM_PROT_NONE) {
! 1231: /*
! 1232: * No more work to do in this case.
! 1233: */
! 1234: return (0);
! 1235: }
! 1236:
! 1237: vm_map_lock(map);
! 1238:
! 1239: if (map->flags & VM_MAP_WIREFUTURE) {
! 1240: if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
! 1241: #ifdef pmap_wired_count
! 1242: || (locklimit != 0 && (size +
! 1243: ptoa(pmap_wired_count(vm_map_pmap(map)))) >
! 1244: locklimit)
! 1245: #endif
! 1246: ) {
! 1247: error = ENOMEM;
! 1248: vm_map_unlock(map);
! 1249: /* unmap the region! */
! 1250: uvm_unmap(map, *addr, *addr + size);
! 1251: goto bad;
! 1252: }
! 1253: /*
! 1254: * uvm_map_pageable() always returns the map
! 1255: * unlocked.
! 1256: */
! 1257: error = uvm_map_pageable(map, *addr, *addr + size,
! 1258: FALSE, UVM_LK_ENTER);
! 1259: if (error != 0) {
! 1260: /* unmap the region! */
! 1261: uvm_unmap(map, *addr, *addr + size);
! 1262: goto bad;
! 1263: }
! 1264: return (0);
! 1265: }
! 1266:
! 1267: vm_map_unlock(map);
! 1268:
! 1269: return (0);
! 1270: }
! 1271:
! 1272: /*
! 1273: * errors: first detach from the uobj, if any.
! 1274: */
! 1275:
! 1276: if (uobj)
! 1277: uobj->pgops->pgo_detach(uobj);
! 1278:
! 1279: bad:
! 1280: return (error);
! 1281: }
CVSweb