Annotation of sys/uvm/uvm_pdaemon.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: uvm_pdaemon.c,v 1.33 2007/06/18 21:51:15 pedro Exp $ */
! 2: /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1997 Charles D. Cranor and Washington University.
! 6: * Copyright (c) 1991, 1993, The Regents of the University of California.
! 7: *
! 8: * All rights reserved.
! 9: *
! 10: * This code is derived from software contributed to Berkeley by
! 11: * The Mach Operating System project at Carnegie-Mellon University.
! 12: *
! 13: * Redistribution and use in source and binary forms, with or without
! 14: * modification, are permitted provided that the following conditions
! 15: * are met:
! 16: * 1. Redistributions of source code must retain the above copyright
! 17: * notice, this list of conditions and the following disclaimer.
! 18: * 2. Redistributions in binary form must reproduce the above copyright
! 19: * notice, this list of conditions and the following disclaimer in the
! 20: * documentation and/or other materials provided with the distribution.
! 21: * 3. All advertising materials mentioning features or use of this software
! 22: * must display the following acknowledgement:
! 23: * This product includes software developed by Charles D. Cranor,
! 24: * Washington University, the University of California, Berkeley and
! 25: * its contributors.
! 26: * 4. Neither the name of the University nor the names of its contributors
! 27: * may be used to endorse or promote products derived from this software
! 28: * without specific prior written permission.
! 29: *
! 30: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 31: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 32: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 33: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 34: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 35: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 36: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 37: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 38: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 39: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 40: * SUCH DAMAGE.
! 41: *
! 42: * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
! 43: * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
! 44: *
! 45: *
! 46: * Copyright (c) 1987, 1990 Carnegie-Mellon University.
! 47: * All rights reserved.
! 48: *
! 49: * Permission to use, copy, modify and distribute this software and
! 50: * its documentation is hereby granted, provided that both the copyright
! 51: * notice and this permission notice appear in all copies of the
! 52: * software, derivative works or modified versions, and any portions
! 53: * thereof, and that both notices appear in supporting documentation.
! 54: *
! 55: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 56: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 57: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 58: *
! 59: * Carnegie Mellon requests users of this software to return to
! 60: *
! 61: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 62: * School of Computer Science
! 63: * Carnegie Mellon University
! 64: * Pittsburgh PA 15213-3890
! 65: *
! 66: * any improvements or extensions that they make and grant Carnegie the
! 67: * rights to redistribute these changes.
! 68: */
! 69:
! 70: /*
! 71: * uvm_pdaemon.c: the page daemon
! 72: */
! 73:
! 74: #include <sys/param.h>
! 75: #include <sys/proc.h>
! 76: #include <sys/systm.h>
! 77: #include <sys/kernel.h>
! 78: #include <sys/pool.h>
! 79: #include <sys/buf.h>
! 80: #include <sys/vnode.h>
! 81:
! 82: #include <uvm/uvm.h>
! 83:
! 84: /*
! 85: * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
! 86: * in a pass thru the inactive list when swap is full. the value should be
! 87: * "small"... if it's too large we'll cycle the active pages thru the inactive
! 88: * queue too quickly to for them to be referenced and avoid being freed.
! 89: */
! 90:
! 91: #define UVMPD_NUMDIRTYREACTS 16
! 92:
! 93:
! 94: /*
! 95: * local prototypes
! 96: */
! 97:
! 98: static void uvmpd_scan(void);
! 99: static boolean_t uvmpd_scan_inactive(struct pglist *);
! 100: static void uvmpd_tune(void);
! 101:
! 102: /*
! 103: * uvm_wait: wait (sleep) for the page daemon to free some pages
! 104: *
! 105: * => should be called with all locks released
! 106: * => should _not_ be called by the page daemon (to avoid deadlock)
! 107: */
! 108:
! 109: void
! 110: uvm_wait(wmsg)
! 111: const char *wmsg;
! 112: {
! 113: int timo = 0;
! 114: int s = splbio();
! 115:
! 116: /*
! 117: * check for page daemon going to sleep (waiting for itself)
! 118: */
! 119:
! 120: if (curproc == uvm.pagedaemon_proc) {
! 121: /*
! 122: * now we have a problem: the pagedaemon wants to go to
! 123: * sleep until it frees more memory. but how can it
! 124: * free more memory if it is asleep? that is a deadlock.
! 125: * we have two options:
! 126: * [1] panic now
! 127: * [2] put a timeout on the sleep, thus causing the
! 128: * pagedaemon to only pause (rather than sleep forever)
! 129: *
! 130: * note that option [2] will only help us if we get lucky
! 131: * and some other process on the system breaks the deadlock
! 132: * by exiting or freeing memory (thus allowing the pagedaemon
! 133: * to continue). for now we panic if DEBUG is defined,
! 134: * otherwise we hope for the best with option [2] (better
! 135: * yet, this should never happen in the first place!).
! 136: */
! 137:
! 138: printf("pagedaemon: deadlock detected!\n");
! 139: timo = hz >> 3; /* set timeout */
! 140: #if defined(DEBUG)
! 141: /* DEBUG: panic so we can debug it */
! 142: panic("pagedaemon deadlock");
! 143: #endif
! 144: }
! 145:
! 146: simple_lock(&uvm.pagedaemon_lock);
! 147: wakeup(&uvm.pagedaemon); /* wake the daemon! */
! 148: UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg,
! 149: timo);
! 150:
! 151: splx(s);
! 152: }
! 153:
! 154:
! 155: /*
! 156: * uvmpd_tune: tune paging parameters
! 157: *
! 158: * => called when ever memory is added (or removed?) to the system
! 159: * => caller must call with page queues locked
! 160: */
! 161:
! 162: static void
! 163: uvmpd_tune()
! 164: {
! 165: UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
! 166:
! 167: uvmexp.freemin = uvmexp.npages / 30;
! 168:
! 169: /* between 16k and 512k */
! 170: /* XXX: what are these values good for? */
! 171: uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
! 172: uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
! 173:
! 174: /* Make sure there's always a user page free. */
! 175: if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
! 176: uvmexp.freemin = uvmexp.reserve_kernel + 1;
! 177:
! 178: uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
! 179: if (uvmexp.freetarg <= uvmexp.freemin)
! 180: uvmexp.freetarg = uvmexp.freemin + 1;
! 181:
! 182: /* uvmexp.inactarg: computed in main daemon loop */
! 183:
! 184: uvmexp.wiredmax = uvmexp.npages / 3;
! 185: UVMHIST_LOG(pdhist, "<- done, freemin=%ld, freetarg=%ld, wiredmax=%ld",
! 186: uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
! 187: }
! 188:
! 189: /*
! 190: * uvm_pageout: the main loop for the pagedaemon
! 191: */
! 192:
! 193: void
! 194: uvm_pageout(void *arg)
! 195: {
! 196: int npages = 0;
! 197: UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
! 198:
! 199: UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
! 200:
! 201: /*
! 202: * ensure correct priority and set paging parameters...
! 203: */
! 204:
! 205: uvm.pagedaemon_proc = curproc;
! 206: (void) spl0();
! 207: uvm_lock_pageq();
! 208: npages = uvmexp.npages;
! 209: uvmpd_tune();
! 210: uvm_unlock_pageq();
! 211:
! 212: /*
! 213: * main loop
! 214: */
! 215:
! 216: for (;;) {
! 217: simple_lock(&uvm.pagedaemon_lock);
! 218:
! 219: UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
! 220: UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
! 221: &uvm.pagedaemon_lock, FALSE, "pgdaemon", 0);
! 222: uvmexp.pdwoke++;
! 223: UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
! 224:
! 225: /*
! 226: * now lock page queues and recompute inactive count
! 227: */
! 228:
! 229: uvm_lock_pageq();
! 230: if (npages != uvmexp.npages) { /* check for new pages? */
! 231: npages = uvmexp.npages;
! 232: uvmpd_tune();
! 233: }
! 234:
! 235: uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
! 236: if (uvmexp.inactarg <= uvmexp.freetarg) {
! 237: uvmexp.inactarg = uvmexp.freetarg + 1;
! 238: }
! 239:
! 240: UVMHIST_LOG(pdhist," free/ftarg=%ld/%ld, inact/itarg=%ld/%ld",
! 241: uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
! 242: uvmexp.inactarg);
! 243:
! 244: /*
! 245: * scan if needed
! 246: */
! 247:
! 248: #ifdef UBC
! 249: if (uvmexp.free + uvmexp.paging < uvmexp.freetarg ||
! 250: uvmexp.inactive < uvmexp.inactarg ||
! 251: uvm_pgcnt_vnode >
! 252: (uvmexp.active + uvmexp.inactive + uvmexp.wired +
! 253: uvmexp.free) * 13 / 16) {
! 254: #else
! 255: if (uvmexp.free < uvmexp.freetarg ||
! 256: uvmexp.inactive < uvmexp.inactarg) {
! 257: #endif
! 258: uvmpd_scan();
! 259: }
! 260:
! 261: /*
! 262: * if there's any free memory to be had,
! 263: * wake up any waiters.
! 264: */
! 265:
! 266: if (uvmexp.free > uvmexp.reserve_kernel ||
! 267: uvmexp.paging == 0) {
! 268: wakeup(&uvmexp.free);
! 269: }
! 270:
! 271: /*
! 272: * scan done. unlock page queues (the only lock we are holding)
! 273: */
! 274:
! 275: uvm_unlock_pageq();
! 276: }
! 277: /*NOTREACHED*/
! 278: }
! 279:
! 280:
! 281: /*
! 282: * uvm_aiodone_daemon: main loop for the aiodone daemon.
! 283: */
! 284:
! 285: void
! 286: uvm_aiodone_daemon(void *arg)
! 287: {
! 288: int s, free;
! 289: struct buf *bp, *nbp;
! 290: UVMHIST_FUNC("uvm_aiodoned"); UVMHIST_CALLED(pdhist);
! 291:
! 292: for (;;) {
! 293:
! 294: /*
! 295: * carefully attempt to go to sleep (without losing "wakeups"!).
! 296: * we need splbio because we want to make sure the aio_done list
! 297: * is totally empty before we go to sleep.
! 298: */
! 299:
! 300: s = splbio();
! 301: simple_lock(&uvm.aiodoned_lock);
! 302: if (TAILQ_FIRST(&uvm.aio_done) == NULL) {
! 303: UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
! 304: UVM_UNLOCK_AND_WAIT(&uvm.aiodoned,
! 305: &uvm.aiodoned_lock, FALSE, "aiodoned", 0);
! 306: UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
! 307:
! 308: /* relock aiodoned_lock, still at splbio */
! 309: simple_lock(&uvm.aiodoned_lock);
! 310: }
! 311:
! 312: /*
! 313: * check for done aio structures
! 314: */
! 315:
! 316: bp = TAILQ_FIRST(&uvm.aio_done);
! 317: if (bp) {
! 318: TAILQ_INIT(&uvm.aio_done);
! 319: }
! 320:
! 321: simple_unlock(&uvm.aiodoned_lock);
! 322: splx(s);
! 323:
! 324: /*
! 325: * process each i/o that's done.
! 326: */
! 327:
! 328: free = uvmexp.free;
! 329: while (bp != NULL) {
! 330: if (bp->b_flags & B_PDAEMON) {
! 331: uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
! 332: }
! 333: nbp = TAILQ_NEXT(bp, b_freelist);
! 334: s = splbio(); /* b_iodone must by called at splbio */
! 335: (*bp->b_iodone)(bp);
! 336: splx(s);
! 337: bp = nbp;
! 338: }
! 339: if (free <= uvmexp.reserve_kernel) {
! 340: s = uvm_lock_fpageq();
! 341: wakeup(&uvm.pagedaemon);
! 342: uvm_unlock_fpageq(s);
! 343: } else {
! 344: simple_lock(&uvm.pagedaemon_lock);
! 345: wakeup(&uvmexp.free);
! 346: simple_unlock(&uvm.pagedaemon_lock);
! 347: }
! 348: }
! 349: }
! 350:
! 351:
! 352:
! 353: /*
! 354: * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
! 355: *
! 356: * => called with page queues locked
! 357: * => we work on meeting our free target by converting inactive pages
! 358: * into free pages.
! 359: * => we handle the building of swap-backed clusters
! 360: * => we return TRUE if we are exiting because we met our target
! 361: */
! 362:
! 363: static boolean_t
! 364: uvmpd_scan_inactive(pglst)
! 365: struct pglist *pglst;
! 366: {
! 367: boolean_t retval = FALSE; /* assume we haven't hit target */
! 368: int s, free, result;
! 369: struct vm_page *p, *nextpg;
! 370: struct uvm_object *uobj;
! 371: struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
! 372: int npages;
! 373: struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; /* XXX: see below */
! 374: int swnpages, swcpages; /* XXX: see below */
! 375: int swslot;
! 376: struct vm_anon *anon;
! 377: boolean_t swap_backed;
! 378: vaddr_t start;
! 379: int dirtyreacts;
! 380: UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
! 381:
! 382: /*
! 383: * note: we currently keep swap-backed pages on a separate inactive
! 384: * list from object-backed pages. however, merging the two lists
! 385: * back together again hasn't been ruled out. thus, we keep our
! 386: * swap cluster in "swpps" rather than in pps (allows us to mix
! 387: * clustering types in the event of a mixed inactive queue).
! 388: */
! 389:
! 390: /*
! 391: * swslot is non-zero if we are building a swap cluster. we want
! 392: * to stay in the loop while we have a page to scan or we have
! 393: * a swap-cluster to build.
! 394: */
! 395:
! 396: swslot = 0;
! 397: swnpages = swcpages = 0;
! 398: free = 0;
! 399: dirtyreacts = 0;
! 400:
! 401: for (p = TAILQ_FIRST(pglst); p != NULL || swslot != 0; p = nextpg) {
! 402:
! 403: /*
! 404: * note that p can be NULL iff we have traversed the whole
! 405: * list and need to do one final swap-backed clustered pageout.
! 406: */
! 407:
! 408: uobj = NULL;
! 409: anon = NULL;
! 410:
! 411: if (p) {
! 412:
! 413: /*
! 414: * update our copy of "free" and see if we've met
! 415: * our target
! 416: */
! 417:
! 418: s = uvm_lock_fpageq();
! 419: free = uvmexp.free;
! 420: uvm_unlock_fpageq(s);
! 421:
! 422: if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
! 423: dirtyreacts == UVMPD_NUMDIRTYREACTS) {
! 424: UVMHIST_LOG(pdhist," met free target: "
! 425: "exit loop", 0, 0, 0, 0);
! 426: retval = TRUE;
! 427:
! 428: if (swslot == 0) {
! 429: /* exit now if no swap-i/o pending */
! 430: break;
! 431: }
! 432:
! 433: /* set p to null to signal final swap i/o */
! 434: p = NULL;
! 435: }
! 436: }
! 437:
! 438: if (p) { /* if (we have a new page to consider) */
! 439:
! 440: /*
! 441: * we are below target and have a new page to consider.
! 442: */
! 443: uvmexp.pdscans++;
! 444: nextpg = TAILQ_NEXT(p, pageq);
! 445:
! 446: /*
! 447: * move referenced pages back to active queue and
! 448: * skip to next page (unlikely to happen since
! 449: * inactive pages shouldn't have any valid mappings
! 450: * and we cleared reference before deactivating).
! 451: */
! 452:
! 453: if (pmap_is_referenced(p)) {
! 454: uvm_pageactivate(p);
! 455: uvmexp.pdreact++;
! 456: continue;
! 457: }
! 458:
! 459: /*
! 460: * first we attempt to lock the object that this page
! 461: * belongs to. if our attempt fails we skip on to
! 462: * the next page (no harm done). it is important to
! 463: * "try" locking the object as we are locking in the
! 464: * wrong order (pageq -> object) and we don't want to
! 465: * deadlock.
! 466: *
! 467: * the only time we expect to see an ownerless page
! 468: * (i.e. a page with no uobject and !PQ_ANON) is if an
! 469: * anon has loaned a page from a uvm_object and the
! 470: * uvm_object has dropped the ownership. in that
! 471: * case, the anon can "take over" the loaned page
! 472: * and make it its own.
! 473: */
! 474:
! 475: /* is page part of an anon or ownerless ? */
! 476: if ((p->pg_flags & PQ_ANON) || p->uobject == NULL) {
! 477: anon = p->uanon;
! 478: KASSERT(anon != NULL);
! 479: if (!simple_lock_try(&anon->an_lock)) {
! 480: /* lock failed, skip this page */
! 481: continue;
! 482: }
! 483:
! 484: /*
! 485: * if the page is ownerless, claim it in the
! 486: * name of "anon"!
! 487: */
! 488:
! 489: if ((p->pg_flags & PQ_ANON) == 0) {
! 490: KASSERT(p->loan_count > 0);
! 491: p->loan_count--;
! 492: atomic_setbits_int(&p->pg_flags,
! 493: PQ_ANON);
! 494: /* anon now owns it */
! 495: }
! 496: if (p->pg_flags & PG_BUSY) {
! 497: simple_unlock(&anon->an_lock);
! 498: uvmexp.pdbusy++;
! 499: /* someone else owns page, skip it */
! 500: continue;
! 501: }
! 502: uvmexp.pdanscan++;
! 503: } else {
! 504: uobj = p->uobject;
! 505: KASSERT(uobj != NULL);
! 506: if (!simple_lock_try(&uobj->vmobjlock)) {
! 507: /* lock failed, skip this page */
! 508: continue;
! 509: }
! 510: if (p->pg_flags & PG_BUSY) {
! 511: simple_unlock(&uobj->vmobjlock);
! 512: uvmexp.pdbusy++;
! 513: /* someone else owns page, skip it */
! 514: continue;
! 515: }
! 516: uvmexp.pdobscan++;
! 517: }
! 518:
! 519: /*
! 520: * we now have the object and the page queues locked.
! 521: * the page is not busy. if the page is clean we
! 522: * can free it now and continue.
! 523: */
! 524:
! 525: if (p->pg_flags & PG_CLEAN) {
! 526: if (p->pg_flags & PQ_SWAPBACKED) {
! 527: /* this page now lives only in swap */
! 528: simple_lock(&uvm.swap_data_lock);
! 529: uvmexp.swpgonly++;
! 530: simple_unlock(&uvm.swap_data_lock);
! 531: }
! 532:
! 533: /* zap all mappings with pmap_page_protect... */
! 534: pmap_page_protect(p, VM_PROT_NONE);
! 535: uvm_pagefree(p);
! 536: uvmexp.pdfreed++;
! 537:
! 538: if (anon) {
! 539:
! 540: /*
! 541: * an anonymous page can only be clean
! 542: * if it has backing store assigned.
! 543: */
! 544:
! 545: KASSERT(anon->an_swslot != 0);
! 546:
! 547: /* remove from object */
! 548: anon->an_page = NULL;
! 549: simple_unlock(&anon->an_lock);
! 550: } else {
! 551: /* pagefree has already removed the
! 552: * page from the object */
! 553: simple_unlock(&uobj->vmobjlock);
! 554: }
! 555: continue;
! 556: }
! 557:
! 558: /*
! 559: * this page is dirty, skip it if we'll have met our
! 560: * free target when all the current pageouts complete.
! 561: */
! 562:
! 563: if (free + uvmexp.paging > uvmexp.freetarg << 2) {
! 564: if (anon) {
! 565: simple_unlock(&anon->an_lock);
! 566: } else {
! 567: simple_unlock(&uobj->vmobjlock);
! 568: }
! 569: continue;
! 570: }
! 571:
! 572: /*
! 573: * this page is dirty, but we can't page it out
! 574: * since all pages in swap are only in swap.
! 575: * reactivate it so that we eventually cycle
! 576: * all pages thru the inactive queue.
! 577: */
! 578:
! 579: KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
! 580: if ((p->pg_flags & PQ_SWAPBACKED) &&
! 581: uvmexp.swpgonly == uvmexp.swpages) {
! 582: dirtyreacts++;
! 583: uvm_pageactivate(p);
! 584: if (anon) {
! 585: simple_unlock(&anon->an_lock);
! 586: } else {
! 587: simple_unlock(&uobj->vmobjlock);
! 588: }
! 589: continue;
! 590: }
! 591:
! 592: /*
! 593: * if the page is swap-backed and dirty and swap space
! 594: * is full, free any swap allocated to the page
! 595: * so that other pages can be paged out.
! 596: */
! 597:
! 598: KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
! 599: if ((p->pg_flags & PQ_SWAPBACKED) &&
! 600: uvmexp.swpginuse == uvmexp.swpages) {
! 601:
! 602: if ((p->pg_flags & PQ_ANON) &&
! 603: p->uanon->an_swslot) {
! 604: uvm_swap_free(p->uanon->an_swslot, 1);
! 605: p->uanon->an_swslot = 0;
! 606: }
! 607: if (p->pg_flags & PQ_AOBJ) {
! 608: uao_dropswap(p->uobject,
! 609: p->offset >> PAGE_SHIFT);
! 610: }
! 611: }
! 612:
! 613: /*
! 614: * the page we are looking at is dirty. we must
! 615: * clean it before it can be freed. to do this we
! 616: * first mark the page busy so that no one else will
! 617: * touch the page. we write protect all the mappings
! 618: * of the page so that no one touches it while it is
! 619: * in I/O.
! 620: */
! 621:
! 622: swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
! 623: atomic_setbits_int(&p->pg_flags, PG_BUSY);
! 624: UVM_PAGE_OWN(p, "scan_inactive");
! 625: pmap_page_protect(p, VM_PROT_READ);
! 626: uvmexp.pgswapout++;
! 627:
! 628: /*
! 629: * for swap-backed pages we need to (re)allocate
! 630: * swap space.
! 631: */
! 632:
! 633: if (swap_backed) {
! 634:
! 635: /*
! 636: * free old swap slot (if any)
! 637: */
! 638:
! 639: if (anon) {
! 640: if (anon->an_swslot) {
! 641: uvm_swap_free(anon->an_swslot,
! 642: 1);
! 643: anon->an_swslot = 0;
! 644: }
! 645: } else {
! 646: uao_dropswap(uobj,
! 647: p->offset >> PAGE_SHIFT);
! 648: }
! 649:
! 650: /*
! 651: * start new cluster (if necessary)
! 652: */
! 653:
! 654: if (swslot == 0) {
! 655: swnpages = MAXBSIZE >> PAGE_SHIFT;
! 656: swslot = uvm_swap_alloc(&swnpages,
! 657: TRUE);
! 658: if (swslot == 0) {
! 659: /* no swap? give up! */
! 660: atomic_clearbits_int(
! 661: &p->pg_flags,
! 662: PG_BUSY);
! 663: UVM_PAGE_OWN(p, NULL);
! 664: if (anon)
! 665: simple_unlock(
! 666: &anon->an_lock);
! 667: else
! 668: simple_unlock(
! 669: &uobj->vmobjlock);
! 670: continue;
! 671: }
! 672: swcpages = 0; /* cluster is empty */
! 673: }
! 674:
! 675: /*
! 676: * add block to cluster
! 677: */
! 678:
! 679: swpps[swcpages] = p;
! 680: if (anon)
! 681: anon->an_swslot = swslot + swcpages;
! 682: else
! 683: uao_set_swslot(uobj,
! 684: p->offset >> PAGE_SHIFT,
! 685: swslot + swcpages);
! 686: swcpages++;
! 687: }
! 688: } else {
! 689:
! 690: /* if p == NULL we must be doing a last swap i/o */
! 691: swap_backed = TRUE;
! 692: }
! 693:
! 694: /*
! 695: * now consider doing the pageout.
! 696: *
! 697: * for swap-backed pages, we do the pageout if we have either
! 698: * filled the cluster (in which case (swnpages == swcpages) or
! 699: * run out of pages (p == NULL).
! 700: *
! 701: * for object pages, we always do the pageout.
! 702: */
! 703:
! 704: if (swap_backed) {
! 705: if (p) { /* if we just added a page to cluster */
! 706: if (anon)
! 707: simple_unlock(&anon->an_lock);
! 708: else
! 709: simple_unlock(&uobj->vmobjlock);
! 710:
! 711: /* cluster not full yet? */
! 712: if (swcpages < swnpages)
! 713: continue;
! 714: }
! 715:
! 716: /* starting I/O now... set up for it */
! 717: npages = swcpages;
! 718: ppsp = swpps;
! 719: /* for swap-backed pages only */
! 720: start = (vaddr_t) swslot;
! 721:
! 722: /* if this is final pageout we could have a few
! 723: * extra swap blocks */
! 724: if (swcpages < swnpages) {
! 725: uvm_swap_free(swslot + swcpages,
! 726: (swnpages - swcpages));
! 727: }
! 728: } else {
! 729: /* normal object pageout */
! 730: ppsp = pps;
! 731: npages = sizeof(pps) / sizeof(struct vm_page *);
! 732: /* not looked at because PGO_ALLPAGES is set */
! 733: start = 0;
! 734: }
! 735:
! 736: /*
! 737: * now do the pageout.
! 738: *
! 739: * for swap_backed pages we have already built the cluster.
! 740: * for !swap_backed pages, uvm_pager_put will call the object's
! 741: * "make put cluster" function to build a cluster on our behalf.
! 742: *
! 743: * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
! 744: * it to free the cluster pages for us on a successful I/O (it
! 745: * always does this for un-successful I/O requests). this
! 746: * allows us to do clustered pageout without having to deal
! 747: * with cluster pages at this level.
! 748: *
! 749: * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
! 750: * IN: locked: uobj (if !swap_backed), page queues
! 751: * OUT: locked: uobj (if !swap_backed && result !=VM_PAGER_PEND)
! 752: * !locked: pageqs, uobj (if swap_backed || VM_PAGER_PEND)
! 753: *
! 754: * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
! 755: */
! 756:
! 757: /* locked: uobj (if !swap_backed), page queues */
! 758: uvmexp.pdpageouts++;
! 759: result = uvm_pager_put(swap_backed ? NULL : uobj, p,
! 760: &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
! 761: /* locked: uobj (if !swap_backed && result != PEND) */
! 762: /* unlocked: pageqs, object (if swap_backed ||result == PEND) */
! 763:
! 764: /*
! 765: * if we did i/o to swap, zero swslot to indicate that we are
! 766: * no longer building a swap-backed cluster.
! 767: */
! 768:
! 769: if (swap_backed)
! 770: swslot = 0; /* done with this cluster */
! 771:
! 772: /*
! 773: * first, we check for VM_PAGER_PEND which means that the
! 774: * async I/O is in progress and the async I/O done routine
! 775: * will clean up after us. in this case we move on to the
! 776: * next page.
! 777: *
! 778: * there is a very remote chance that the pending async i/o can
! 779: * finish _before_ we get here. if that happens, our page "p"
! 780: * may no longer be on the inactive queue. so we verify this
! 781: * when determining the next page (starting over at the head if
! 782: * we've lost our inactive page).
! 783: */
! 784:
! 785: if (result == VM_PAGER_PEND) {
! 786: uvmexp.paging += npages;
! 787: uvm_lock_pageq();
! 788: uvmexp.pdpending++;
! 789: if (p) {
! 790: if (p->pg_flags & PQ_INACTIVE)
! 791: nextpg = TAILQ_NEXT(p, pageq);
! 792: else
! 793: nextpg = TAILQ_FIRST(pglst);
! 794: } else {
! 795: nextpg = NULL;
! 796: }
! 797: continue;
! 798: }
! 799:
! 800: #ifdef UBC
! 801: if (result == VM_PAGER_ERROR &&
! 802: curproc == uvm.pagedaemon_proc) {
! 803: uvm_lock_pageq();
! 804: nextpg = TAILQ_NEXT(p, pageq);
! 805: uvm_pageactivate(p);
! 806: continue;
! 807: }
! 808: #endif
! 809:
! 810: /*
! 811: * clean up "p" if we have one
! 812: */
! 813:
! 814: if (p) {
! 815: /*
! 816: * the I/O request to "p" is done and uvm_pager_put
! 817: * has freed any cluster pages it may have allocated
! 818: * during I/O. all that is left for us to do is
! 819: * clean up page "p" (which is still PG_BUSY).
! 820: *
! 821: * our result could be one of the following:
! 822: * VM_PAGER_OK: successful pageout
! 823: *
! 824: * VM_PAGER_AGAIN: tmp resource shortage, we skip
! 825: * to next page
! 826: * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
! 827: * "reactivate" page to get it out of the way (it
! 828: * will eventually drift back into the inactive
! 829: * queue for a retry).
! 830: * VM_PAGER_UNLOCK: should never see this as it is
! 831: * only valid for "get" operations
! 832: */
! 833:
! 834: /* relock p's object: page queues not lock yet, so
! 835: * no need for "try" */
! 836:
! 837: /* !swap_backed case: already locked... */
! 838: if (swap_backed) {
! 839: if (anon)
! 840: simple_lock(&anon->an_lock);
! 841: else
! 842: simple_lock(&uobj->vmobjlock);
! 843: }
! 844:
! 845: #ifdef DIAGNOSTIC
! 846: if (result == VM_PAGER_UNLOCK)
! 847: panic("pagedaemon: pageout returned "
! 848: "invalid 'unlock' code");
! 849: #endif
! 850:
! 851: /* handle PG_WANTED now */
! 852: if (p->pg_flags & PG_WANTED)
! 853: /* still holding object lock */
! 854: wakeup(p);
! 855:
! 856: atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
! 857: UVM_PAGE_OWN(p, NULL);
! 858:
! 859: /* released during I/O? */
! 860: if (p->pg_flags & PG_RELEASED) {
! 861: if (anon) {
! 862: /* remove page so we can get nextpg */
! 863: anon->an_page = NULL;
! 864:
! 865: simple_unlock(&anon->an_lock);
! 866: uvm_anfree(anon); /* kills anon */
! 867: pmap_page_protect(p, VM_PROT_NONE);
! 868: anon = NULL;
! 869: uvm_lock_pageq();
! 870: nextpg = TAILQ_NEXT(p, pageq);
! 871: /* free released page */
! 872: uvm_pagefree(p);
! 873:
! 874: } else {
! 875:
! 876: /*
! 877: * pgo_releasepg nukes the page and
! 878: * gets "nextpg" for us. it returns
! 879: * with the page queues locked (when
! 880: * given nextpg ptr).
! 881: */
! 882:
! 883: if (!uobj->pgops->pgo_releasepg(p,
! 884: &nextpg))
! 885: /* uobj died after release */
! 886: uobj = NULL;
! 887:
! 888: /*
! 889: * lock page queues here so that they're
! 890: * always locked at the end of the loop.
! 891: */
! 892:
! 893: uvm_lock_pageq();
! 894: }
! 895: } else { /* page was not released during I/O */
! 896: uvm_lock_pageq();
! 897: nextpg = TAILQ_NEXT(p, pageq);
! 898: if (result != VM_PAGER_OK) {
! 899: /* pageout was a failure... */
! 900: if (result != VM_PAGER_AGAIN)
! 901: uvm_pageactivate(p);
! 902: pmap_clear_reference(p);
! 903: /* XXXCDC: if (swap_backed) FREE p's
! 904: * swap block? */
! 905: } else {
! 906: /* pageout was a success... */
! 907: pmap_clear_reference(p);
! 908: pmap_clear_modify(p);
! 909: atomic_setbits_int(&p->pg_flags,
! 910: PG_CLEAN);
! 911: }
! 912: }
! 913:
! 914: /*
! 915: * drop object lock (if there is an object left). do
! 916: * a safety check of nextpg to make sure it is on the
! 917: * inactive queue (it should be since PG_BUSY pages on
! 918: * the inactive queue can't be re-queued [note: not
! 919: * true for active queue]).
! 920: */
! 921:
! 922: if (anon)
! 923: simple_unlock(&anon->an_lock);
! 924: else if (uobj)
! 925: simple_unlock(&uobj->vmobjlock);
! 926:
! 927: } else {
! 928:
! 929: /*
! 930: * if p is null in this loop, make sure it stays null
! 931: * in the next loop.
! 932: */
! 933:
! 934: nextpg = NULL;
! 935:
! 936: /*
! 937: * lock page queues here just so they're always locked
! 938: * at the end of the loop.
! 939: */
! 940:
! 941: uvm_lock_pageq();
! 942: }
! 943:
! 944: if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
! 945: nextpg = TAILQ_FIRST(pglst); /* reload! */
! 946: }
! 947: }
! 948: return (retval);
! 949: }
! 950:
! 951: /*
! 952: * uvmpd_scan: scan the page queues and attempt to meet our targets.
! 953: *
! 954: * => called with pageq's locked
! 955: */
! 956:
! 957: void
! 958: uvmpd_scan()
! 959: {
! 960: int s, free, inactive_shortage, swap_shortage, pages_freed;
! 961: struct vm_page *p, *nextpg;
! 962: struct uvm_object *uobj;
! 963: boolean_t got_it;
! 964: UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
! 965:
! 966: uvmexp.pdrevs++; /* counter */
! 967: uobj = NULL;
! 968:
! 969: /*
! 970: * get current "free" page count
! 971: */
! 972: s = uvm_lock_fpageq();
! 973: free = uvmexp.free;
! 974: uvm_unlock_fpageq(s);
! 975:
! 976: #ifndef __SWAP_BROKEN
! 977: /*
! 978: * swap out some processes if we are below our free target.
! 979: * we need to unlock the page queues for this.
! 980: */
! 981: if (free < uvmexp.freetarg) {
! 982: uvmexp.pdswout++;
! 983: UVMHIST_LOG(pdhist," free %ld < target %ld: swapout", free,
! 984: uvmexp.freetarg, 0, 0);
! 985: uvm_unlock_pageq();
! 986: uvm_swapout_threads();
! 987: uvm_lock_pageq();
! 988:
! 989: }
! 990: #endif
! 991:
! 992: /*
! 993: * now we want to work on meeting our targets. first we work on our
! 994: * free target by converting inactive pages into free pages. then
! 995: * we work on meeting our inactive target by converting active pages
! 996: * to inactive ones.
! 997: */
! 998:
! 999: UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
! 1000:
! 1001: /*
! 1002: * alternate starting queue between swap and object based on the
! 1003: * low bit of uvmexp.pdrevs (which we bump by one each call).
! 1004: */
! 1005:
! 1006: got_it = FALSE;
! 1007: pages_freed = uvmexp.pdfreed;
! 1008: if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
! 1009: got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
! 1010: if (!got_it)
! 1011: got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
! 1012: if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
! 1013: (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
! 1014: pages_freed = uvmexp.pdfreed - pages_freed;
! 1015:
! 1016: /*
! 1017: * we have done the scan to get free pages. now we work on meeting
! 1018: * our inactive target.
! 1019: */
! 1020:
! 1021: inactive_shortage = uvmexp.inactarg - uvmexp.inactive;
! 1022:
! 1023: /*
! 1024: * detect if we're not going to be able to page anything out
! 1025: * until we free some swap resources from active pages.
! 1026: */
! 1027:
! 1028: swap_shortage = 0;
! 1029: if (uvmexp.free < uvmexp.freetarg &&
! 1030: uvmexp.swpginuse == uvmexp.swpages &&
! 1031: uvmexp.swpgonly < uvmexp.swpages &&
! 1032: pages_freed == 0) {
! 1033: swap_shortage = uvmexp.freetarg - uvmexp.free;
! 1034: }
! 1035:
! 1036: UVMHIST_LOG(pdhist, " loop 2: inactive_shortage=%ld swap_shortage=%ld",
! 1037: inactive_shortage, swap_shortage,0,0);
! 1038: for (p = TAILQ_FIRST(&uvm.page_active);
! 1039: p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
! 1040: p = nextpg) {
! 1041: nextpg = TAILQ_NEXT(p, pageq);
! 1042: if (p->pg_flags & PG_BUSY)
! 1043: continue; /* quick check before trying to lock */
! 1044:
! 1045: /*
! 1046: * lock the page's owner.
! 1047: */
! 1048: /* is page anon owned or ownerless? */
! 1049: if ((p->pg_flags & PQ_ANON) || p->uobject == NULL) {
! 1050: KASSERT(p->uanon != NULL);
! 1051: if (!simple_lock_try(&p->uanon->an_lock))
! 1052: continue;
! 1053:
! 1054: /* take over the page? */
! 1055: if ((p->pg_flags & PQ_ANON) == 0) {
! 1056: KASSERT(p->loan_count > 0);
! 1057: p->loan_count--;
! 1058: atomic_setbits_int(&p->pg_flags, PQ_ANON);
! 1059: }
! 1060: } else {
! 1061: if (!simple_lock_try(&p->uobject->vmobjlock))
! 1062: continue;
! 1063: }
! 1064:
! 1065: /*
! 1066: * skip this page if it's busy.
! 1067: */
! 1068:
! 1069: if ((p->pg_flags & PG_BUSY) != 0) {
! 1070: if (p->pg_flags & PQ_ANON)
! 1071: simple_unlock(&p->uanon->an_lock);
! 1072: else
! 1073: simple_unlock(&p->uobject->vmobjlock);
! 1074: continue;
! 1075: }
! 1076:
! 1077: /*
! 1078: * if there's a shortage of swap, free any swap allocated
! 1079: * to this page so that other pages can be paged out.
! 1080: */
! 1081:
! 1082: if (swap_shortage > 0) {
! 1083: if ((p->pg_flags & PQ_ANON) && p->uanon->an_swslot) {
! 1084: uvm_swap_free(p->uanon->an_swslot, 1);
! 1085: p->uanon->an_swslot = 0;
! 1086: atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
! 1087: swap_shortage--;
! 1088: }
! 1089: if (p->pg_flags & PQ_AOBJ) {
! 1090: int slot = uao_set_swslot(p->uobject,
! 1091: p->offset >> PAGE_SHIFT, 0);
! 1092: if (slot) {
! 1093: uvm_swap_free(slot, 1);
! 1094: atomic_clearbits_int(&p->pg_flags,
! 1095: PG_CLEAN);
! 1096: swap_shortage--;
! 1097: }
! 1098: }
! 1099: }
! 1100:
! 1101: /*
! 1102: * deactivate this page if there's a shortage of
! 1103: * inactive pages.
! 1104: */
! 1105:
! 1106: if (inactive_shortage > 0) {
! 1107: pmap_page_protect(p, VM_PROT_NONE);
! 1108: /* no need to check wire_count as pg is "active" */
! 1109: uvm_pagedeactivate(p);
! 1110: uvmexp.pddeact++;
! 1111: inactive_shortage--;
! 1112: }
! 1113: if (p->pg_flags & PQ_ANON)
! 1114: simple_unlock(&p->uanon->an_lock);
! 1115: else
! 1116: simple_unlock(&p->uobject->vmobjlock);
! 1117: }
! 1118: }
CVSweb