[BACK]Return to uvm_pager.c CVS log [TXT][DIR] Up to [local] / sys / uvm

Annotation of sys/uvm/uvm_pager.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: uvm_pager.c,v 1.43 2007/06/06 17:15:14 deraadt Exp $  */
                      2: /*     $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $       */
                      3:
                      4: /*
                      5:  *
                      6:  * Copyright (c) 1997 Charles D. Cranor and Washington University.
                      7:  * All rights reserved.
                      8:  *
                      9:  * Redistribution and use in source and binary forms, with or without
                     10:  * modification, are permitted provided that the following conditions
                     11:  * are met:
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in the
                     16:  *    documentation and/or other materials provided with the distribution.
                     17:  * 3. All advertising materials mentioning features or use of this software
                     18:  *    must display the following acknowledgement:
                     19:  *      This product includes software developed by Charles D. Cranor and
                     20:  *      Washington University.
                     21:  * 4. The name of the author may not be used to endorse or promote products
                     22:  *    derived from this software without specific prior written permission.
                     23:  *
                     24:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     25:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     26:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     27:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     28:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     29:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     30:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     31:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     32:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     33:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     34:  *
                     35:  * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp
                     36:  */
                     37:
                     38: /*
                     39:  * uvm_pager.c: generic functions used to assist the pagers.
                     40:  */
                     41:
                     42: #define UVM_PAGER
                     43: #include <sys/param.h>
                     44: #include <sys/systm.h>
                     45: #include <sys/proc.h>
                     46: #include <sys/malloc.h>
                     47: #include <sys/pool.h>
                     48: #include <sys/vnode.h>
                     49: #include <sys/buf.h>
                     50:
                     51: #include <uvm/uvm.h>
                     52:
                     53: struct pool *uvm_aiobuf_pool;
                     54:
                     55: struct uvm_pagerops *uvmpagerops[] = {
                     56:        &aobj_pager,
                     57:        &uvm_deviceops,
                     58:        &uvm_vnodeops,
                     59: };
                     60:
                     61: /*
                     62:  * the pager map: provides KVA for I/O
                     63:  */
                     64:
                     65: vm_map_t pager_map;            /* XXX */
                     66: simple_lock_data_t pager_map_wanted_lock;
                     67: boolean_t pager_map_wanted;    /* locked by pager map */
                     68: static vaddr_t emergva;
                     69: static boolean_t emerginuse;
                     70:
                     71: /*
                     72:  * uvm_pager_init: init pagers (at boot time)
                     73:  */
                     74:
                     75: void
                     76: uvm_pager_init()
                     77: {
                     78:        int lcv;
                     79:
                     80:        /*
                     81:         * init pager map
                     82:         */
                     83:
                     84:        pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva,
                     85:                                    PAGER_MAP_SIZE, 0, FALSE, NULL);
                     86:        simple_lock_init(&pager_map_wanted_lock);
                     87:        pager_map_wanted = FALSE;
                     88:        emergva = uvm_km_valloc(kernel_map, MAXBSIZE);
                     89:        emerginuse = FALSE;
                     90:
                     91:        /*
                     92:         * init ASYNC I/O queue
                     93:         */
                     94:
                     95:        TAILQ_INIT(&uvm.aio_done);
                     96:
                     97:        /*
                     98:         * call pager init functions
                     99:         */
                    100:        for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *);
                    101:            lcv++) {
                    102:                if (uvmpagerops[lcv]->pgo_init)
                    103:                        uvmpagerops[lcv]->pgo_init();
                    104:        }
                    105: }
                    106:
                    107: /*
                    108:  * uvm_pagermapin: map pages into KVA (pager_map) for I/O that needs mappings
                    109:  *
                    110:  * we basically just map in a blank map entry to reserve the space in the
                    111:  * map and then use pmap_enter() to put the mappings in by hand.
                    112:  */
                    113:
                    114: vaddr_t
                    115: uvm_pagermapin(pps, npages, flags)
                    116:        struct vm_page **pps;
                    117:        int npages;
                    118:        int flags;
                    119: {
                    120:        vsize_t size;
                    121:        vaddr_t kva;
                    122:        vaddr_t cva;
                    123:        struct vm_page *pp;
                    124:        vm_prot_t prot;
                    125:        UVMHIST_FUNC("uvm_pagermapin"); UVMHIST_CALLED(maphist);
                    126:
                    127:        UVMHIST_LOG(maphist,"(pps=%p, npages=%ld)", pps, npages,0,0);
                    128:
                    129:        /*
                    130:         * compute protection.  outgoing I/O only needs read
                    131:         * access to the page, whereas incoming needs read/write.
                    132:         */
                    133:
                    134:        prot = VM_PROT_READ;
                    135:        if (flags & UVMPAGER_MAPIN_READ)
                    136:                prot |= VM_PROT_WRITE;
                    137:
                    138: ReStart:
                    139:        size = npages << PAGE_SHIFT;
                    140:        kva = 0;                        /* let system choose VA */
                    141:
                    142:        if (uvm_map(pager_map, &kva, size, NULL,
                    143:              UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != 0) {
                    144:                if (curproc == uvm.pagedaemon_proc) {
                    145:                        simple_lock(&pager_map_wanted_lock);
                    146:                        if (emerginuse) {
                    147:                                UVM_UNLOCK_AND_WAIT(&emergva,
                    148:                                    &pager_map_wanted_lock, FALSE,
                    149:                                    "emergva", 0);
                    150:                                goto ReStart;
                    151:                        }
                    152:                        emerginuse = TRUE;
                    153:                        simple_unlock(&pager_map_wanted_lock);
                    154:                        kva = emergva;
                    155:                        KASSERT(npages <= MAXBSIZE >> PAGE_SHIFT);
                    156:                        goto enter;
                    157:                }
                    158:                if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) {
                    159:                        UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0);
                    160:                        return(0);
                    161:                }
                    162:                simple_lock(&pager_map_wanted_lock);
                    163:                pager_map_wanted = TRUE;
                    164:                UVMHIST_LOG(maphist, "  SLEEPING on pager_map",0,0,0,0);
                    165:                UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE,
                    166:                    "pager_map", 0);
                    167:                goto ReStart;
                    168:        }
                    169:
                    170: enter:
                    171:        /* got it */
                    172:        for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) {
                    173:                pp = *pps++;
                    174:                KASSERT(pp);
                    175:                KASSERT(pp->pg_flags & PG_BUSY);
                    176:                pmap_enter(vm_map_pmap(pager_map), cva, VM_PAGE_TO_PHYS(pp),
                    177:                    prot, PMAP_WIRED | prot);
                    178:        }
                    179:        pmap_update(vm_map_pmap(pager_map));
                    180:
                    181:        UVMHIST_LOG(maphist, "<- done (KVA=0x%lx)", kva,0,0,0);
                    182:        return(kva);
                    183: }
                    184:
                    185: /*
                    186:  * uvm_pagermapout: remove pager_map mapping
                    187:  *
                    188:  * we remove our mappings by hand and then remove the mapping (waking
                    189:  * up anyone wanting space).
                    190:  */
                    191:
                    192: void
                    193: uvm_pagermapout(kva, npages)
                    194:        vaddr_t kva;
                    195:        int npages;
                    196: {
                    197:        vsize_t size = npages << PAGE_SHIFT;
                    198:        vm_map_entry_t entries;
                    199:        UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist);
                    200:
                    201:        UVMHIST_LOG(maphist, " (kva=0x%lx, npages=%ld)", kva, npages,0,0);
                    202:
                    203:        /*
                    204:         * duplicate uvm_unmap, but add in pager_map_wanted handling.
                    205:         */
                    206:
                    207:        if (kva == emergva) {
                    208:                simple_lock(&pager_map_wanted_lock);
                    209:                emerginuse = FALSE;
                    210:                wakeup(&emergva);
                    211:                simple_unlock(&pager_map_wanted_lock);
                    212:                entries = NULL;
                    213:                goto remove;
                    214:        }
                    215:
                    216:        vm_map_lock(pager_map);
                    217:        uvm_unmap_remove(pager_map, kva, kva + size, &entries, NULL);
                    218:        simple_lock(&pager_map_wanted_lock);
                    219:        if (pager_map_wanted) {
                    220:                pager_map_wanted = FALSE;
                    221:                wakeup(pager_map);
                    222:        }
                    223:        simple_unlock(&pager_map_wanted_lock);
                    224:        vm_map_unlock(pager_map);
                    225: remove:
                    226:        pmap_remove(pmap_kernel(), kva, kva + (npages << PAGE_SHIFT));
                    227:        if (entries)
                    228:                uvm_unmap_detach(entries, 0);
                    229:
                    230:        pmap_update(pmap_kernel());
                    231:        UVMHIST_LOG(maphist,"<- done",0,0,0,0);
                    232: }
                    233:
                    234: /*
                    235:  * uvm_mk_pcluster
                    236:  *
                    237:  * generic "make 'pager put' cluster" function.  a pager can either
                    238:  * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this
                    239:  * generic function, or [3] set it to a pager specific function.
                    240:  *
                    241:  * => caller must lock object _and_ pagequeues (since we need to look
                    242:  *    at active vs. inactive bits, etc.)
                    243:  * => caller must make center page busy and write-protect it
                    244:  * => we mark all cluster pages busy for the caller
                    245:  * => the caller must unbusy all pages (and check wanted/released
                    246:  *    status if it drops the object lock)
                    247:  * => flags:
                    248:  *      PGO_ALLPAGES:  all pages in object are valid targets
                    249:  *      !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster
                    250:  *      PGO_DOACTCLUST: include active pages in cluster.
                    251:  *        NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST.
                    252:  *              PG_CLEANCHK is only a hint, but clearing will help reduce
                    253:  *             the number of calls we make to the pmap layer.
                    254:  */
                    255:
                    256: struct vm_page **
                    257: uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
                    258:        struct uvm_object *uobj;        /* IN */
                    259:        struct vm_page **pps, *center;  /* IN/OUT, IN */
                    260:        int *npages, flags;             /* IN/OUT, IN */
                    261:        voff_t mlo, mhi;                /* IN (if !PGO_ALLPAGES) */
                    262: {
                    263:        struct vm_page **ppsp, *pclust;
                    264:        voff_t lo, hi, curoff;
                    265:        int center_idx, forward, incr;
                    266:        UVMHIST_FUNC("uvm_mk_pcluster"); UVMHIST_CALLED(maphist);
                    267:
                    268:        /*
                    269:         * center page should already be busy and write protected.  XXX:
                    270:         * suppose page is wired?  if we lock, then a process could
                    271:         * fault/block on it.  if we don't lock, a process could write the
                    272:         * pages in the middle of an I/O.  (consider an msync()).  let's
                    273:         * lock it for now (better to delay than corrupt data?).
                    274:         */
                    275:
                    276:        /*
                    277:         * get cluster boundaries, check sanity, and apply our limits as well.
                    278:         */
                    279:
                    280:        uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi);
                    281:        if ((flags & PGO_ALLPAGES) == 0) {
                    282:                if (lo < mlo)
                    283:                        lo = mlo;
                    284:                if (hi > mhi)
                    285:                        hi = mhi;
                    286:        }
                    287:        if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */
                    288:                pps[0] = center;
                    289:                *npages = 1;
                    290:                return(pps);
                    291:        }
                    292:
                    293:        /*
                    294:         * now determine the center and attempt to cluster around the
                    295:         * edges
                    296:         */
                    297:
                    298:        center_idx = (center->offset - lo) >> PAGE_SHIFT;
                    299:        pps[center_idx] = center;       /* plug in the center page */
                    300:        ppsp = &pps[center_idx];
                    301:        *npages = 1;
                    302:
                    303:        /*
                    304:         * attempt to cluster around the left [backward], and then
                    305:         * the right side [forward].
                    306:         *
                    307:         * note that for inactive pages (pages that have been deactivated)
                    308:         * there are no valid mappings and PG_CLEAN should be up to date.
                    309:         * [i.e. there is no need to query the pmap with pmap_is_modified
                    310:         * since there are no mappings].
                    311:         */
                    312:
                    313:        for (forward  = 0 ; forward <= 1 ; forward++) {
                    314:                incr = forward ? PAGE_SIZE : -PAGE_SIZE;
                    315:                curoff = center->offset + incr;
                    316:                for ( ;(forward == 0 && curoff >= lo) ||
                    317:                       (forward && curoff < hi);
                    318:                      curoff += incr) {
                    319:
                    320:                        pclust = uvm_pagelookup(uobj, curoff); /* lookup page */
                    321:                        if (pclust == NULL) {
                    322:                                break;                  /* no page */
                    323:                        }
                    324:                        /* handle active pages */
                    325:                        /* NOTE: inactive pages don't have pmap mappings */
                    326:                        if ((pclust->pg_flags & PQ_INACTIVE) == 0) {
                    327:                                if ((flags & PGO_DOACTCLUST) == 0) {
                    328:                                        /* dont want mapped pages at all */
                    329:                                        break;
                    330:                                }
                    331:
                    332:                                /* make sure "clean" bit is sync'd */
                    333:                                if ((pclust->pg_flags & PG_CLEANCHK) == 0) {
                    334:                                        if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY))
                    335:                                           == PG_CLEAN &&
                    336:                                           pmap_is_modified(pclust))
                    337:                                                atomic_clearbits_int(
                    338:                                                    &pclust->pg_flags,
                    339:                                                    PG_CLEAN);
                    340:                                        /* now checked */
                    341:                                        atomic_setbits_int(&pclust->pg_flags,
                    342:                                            PG_CLEANCHK);
                    343:                                }
                    344:                        }
                    345:
                    346:                        /* is page available for cleaning and does it need it */
                    347:                        if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) != 0) {
                    348:                                break;  /* page is already clean or is busy */
                    349:                        }
                    350:
                    351:                        /* yes!   enroll the page in our array */
                    352:                        atomic_setbits_int(&pclust->pg_flags, PG_BUSY);
                    353:                        UVM_PAGE_OWN(pclust, "uvm_mk_pcluster");
                    354:
                    355:                        /* XXX: protect wired page?   see above comment. */
                    356:                        pmap_page_protect(pclust, VM_PROT_READ);
                    357:                        if (!forward) {
                    358:                                ppsp--;                 /* back up one page */
                    359:                                *ppsp = pclust;
                    360:                        } else {
                    361:                                /* move forward one page */
                    362:                                ppsp[*npages] = pclust;
                    363:                        }
                    364:                        (*npages)++;
                    365:                }
                    366:        }
                    367:
                    368:        /*
                    369:         * done!  return the cluster array to the caller!!!
                    370:         */
                    371:
                    372:        UVMHIST_LOG(maphist, "<- done",0,0,0,0);
                    373:        return(ppsp);
                    374: }
                    375:
                    376: /*
                    377:  * uvm_pager_put: high level pageout routine
                    378:  *
                    379:  * we want to pageout page "pg" to backing store, clustering if
                    380:  * possible.
                    381:  *
                    382:  * => page queues must be locked by caller
                    383:  * => if page is not swap-backed, then "uobj" points to the object
                    384:  *     backing it.   this object should be locked by the caller.
                    385:  * => if page is swap-backed, then "uobj" should be NULL.
                    386:  * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN
                    387:  *    for swap-backed memory, "pg" can be NULL if there is no page
                    388:  *    of interest [sometimes the case for the pagedaemon]
                    389:  * => "ppsp_ptr" should point to an array of npages vm_page pointers
                    390:  *     for possible cluster building
                    391:  * => flags (first two for non-swap-backed pages)
                    392:  *     PGO_ALLPAGES: all pages in uobj are valid targets
                    393:  *     PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets
                    394:  *     PGO_SYNCIO: do SYNC I/O (no async)
                    395:  *     PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O
                    396:  * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range
                    397:  *               if (!uobj) start is the (daddr64_t) of the starting swapblk
                    398:  * => return state:
                    399:  *     1. we return the VM_PAGER status code of the pageout
                    400:  *     2. we return with the page queues unlocked
                    401:  *     3. if (uobj != NULL) [!swap_backed] we return with
                    402:  *             uobj locked _only_ if PGO_PDFREECLUST is set
                    403:  *             AND result != VM_PAGER_PEND.   in all other cases
                    404:  *             we return with uobj unlocked.   [this is a hack
                    405:  *             that allows the pagedaemon to save one lock/unlock
                    406:  *             pair in the !swap_backed case since we have to
                    407:  *             lock the uobj to drop the cluster anyway]
                    408:  *     4. on errors we always drop the cluster.   thus, if we return
                    409:  *             !PEND, !OK, then the caller only has to worry about
                    410:  *             un-busying the main page (not the cluster pages).
                    411:  *     5. on success, if !PGO_PDFREECLUST, we return the cluster
                    412:  *             with all pages busy (caller must un-busy and check
                    413:  *             wanted/released flags).
                    414:  */
                    415:
                    416: int
                    417: uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop)
                    418:        struct uvm_object *uobj;        /* IN */
                    419:        struct vm_page *pg, ***ppsp_ptr;/* IN, IN/OUT */
                    420:        int *npages;                    /* IN/OUT */
                    421:        int flags;                      /* IN */
                    422:        voff_t start, stop;             /* IN, IN */
                    423: {
                    424:        int result;
                    425:        daddr64_t swblk;
                    426:        struct vm_page **ppsp = *ppsp_ptr;
                    427:        UVMHIST_FUNC("uvm_pager_put"); UVMHIST_CALLED(pdhist);
                    428:
                    429:        /*
                    430:         * note that uobj is null  if we are doing a swap-backed pageout.
                    431:         * note that uobj is !null if we are doing normal object pageout.
                    432:         * note that the page queues must be locked to cluster.
                    433:         */
                    434:
                    435:        if (uobj) {     /* if !swap-backed */
                    436:
                    437:                /*
                    438:                 * attempt to build a cluster for pageout using its
                    439:                 * make-put-cluster function (if it has one).
                    440:                 */
                    441:
                    442:                if (uobj->pgops->pgo_mk_pcluster) {
                    443:                        ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp,
                    444:                            npages, pg, flags, start, stop);
                    445:                        *ppsp_ptr = ppsp;  /* update caller's pointer */
                    446:                } else {
                    447:                        ppsp[0] = pg;
                    448:                        *npages = 1;
                    449:                }
                    450:
                    451:                swblk = 0;              /* XXX: keep gcc happy */
                    452:
                    453:        } else {
                    454:
                    455:                /*
                    456:                 * for swap-backed pageout, the caller (the pagedaemon) has
                    457:                 * already built the cluster for us.   the starting swap
                    458:                 * block we are writing to has been passed in as "start."
                    459:                 * "pg" could be NULL if there is no page we are especially
                    460:                 * interested in (in which case the whole cluster gets dropped
                    461:                 * in the event of an error or a sync "done").
                    462:                 */
                    463:                swblk = (daddr64_t) start;
                    464:                /* ppsp and npages should be ok */
                    465:        }
                    466:
                    467:        /* now that we've clustered we can unlock the page queues */
                    468:        uvm_unlock_pageq();
                    469:
                    470:        /*
                    471:         * now attempt the I/O.   if we have a failure and we are
                    472:         * clustered, we will drop the cluster and try again.
                    473:         */
                    474:
                    475: ReTry:
                    476:        if (uobj) {
                    477:                /* object is locked */
                    478:                result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags);
                    479:                UVMHIST_LOG(pdhist, "put -> %ld", result, 0,0,0);
                    480:                /* object is now unlocked */
                    481:        } else {
                    482:                /* nothing locked */
                    483:                /* XXX daddr64_t -> int */
                    484:                result = uvm_swap_put(swblk, ppsp, *npages, flags);
                    485:                /* nothing locked */
                    486:        }
                    487:
                    488:        /*
                    489:         * we have attempted the I/O.
                    490:         *
                    491:         * if the I/O was a success then:
                    492:         *      if !PGO_PDFREECLUST, we return the cluster to the
                    493:         *              caller (who must un-busy all pages)
                    494:         *      else we un-busy cluster pages for the pagedaemon
                    495:         *
                    496:         * if I/O is pending (async i/o) then we return the pending code.
                    497:         * [in this case the async i/o done function must clean up when
                    498:         *  i/o is done...]
                    499:         */
                    500:
                    501:        if (result == VM_PAGER_PEND || result == VM_PAGER_OK) {
                    502:                if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) {
                    503:                        /*
                    504:                         * drop cluster and relock object (only if I/O is
                    505:                         * not pending)
                    506:                         */
                    507:                        if (uobj)
                    508:                                /* required for dropcluster */
                    509:                                simple_lock(&uobj->vmobjlock);
                    510:                        if (*npages > 1 || pg == NULL)
                    511:                                uvm_pager_dropcluster(uobj, pg, ppsp, npages,
                    512:                                    PGO_PDFREECLUST);
                    513:                        /* if (uobj): object still locked, as per
                    514:                         * return-state item #3 */
                    515:                }
                    516:                return (result);
                    517:        }
                    518:
                    519:        /*
                    520:         * a pager error occured (even after dropping the cluster, if there
                    521:         * was one).  give up! the caller only has one page ("pg")
                    522:         * to worry about.
                    523:         */
                    524:
                    525:        if (*npages > 1 || pg == NULL) {
                    526:                if (uobj) {
                    527:                        simple_lock(&uobj->vmobjlock);
                    528:                }
                    529:                uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP);
                    530:
                    531:                /*
                    532:                 * for failed swap-backed pageouts with a "pg",
                    533:                 * we need to reset pg's swslot to either:
                    534:                 * "swblk" (for transient errors, so we can retry),
                    535:                 * or 0 (for hard errors).
                    536:                 */
                    537:
                    538:                if (uobj == NULL && pg != NULL) {
                    539:                        /* XXX daddr64_t -> int */
                    540:                        int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0;
                    541:                        if (pg->pg_flags & PQ_ANON) {
                    542:                                simple_lock(&pg->uanon->an_lock);
                    543:                                pg->uanon->an_swslot = nswblk;
                    544:                                simple_unlock(&pg->uanon->an_lock);
                    545:                        } else {
                    546:                                simple_lock(&pg->uobject->vmobjlock);
                    547:                                uao_set_swslot(pg->uobject,
                    548:                                               pg->offset >> PAGE_SHIFT,
                    549:                                               nswblk);
                    550:                                simple_unlock(&pg->uobject->vmobjlock);
                    551:                        }
                    552:                }
                    553:                if (result == VM_PAGER_AGAIN) {
                    554:
                    555:                        /*
                    556:                         * for transient failures, free all the swslots that
                    557:                         * we're not going to retry with.
                    558:                         */
                    559:
                    560:                        if (uobj == NULL) {
                    561:                                if (pg) {
                    562:                                        /* XXX daddr64_t -> int */
                    563:                                        uvm_swap_free(swblk + 1, *npages - 1);
                    564:                                } else {
                    565:                                        /* XXX daddr64_t -> int */
                    566:                                        uvm_swap_free(swblk, *npages);
                    567:                                }
                    568:                        }
                    569:                        if (pg) {
                    570:                                ppsp[0] = pg;
                    571:                                *npages = 1;
                    572:                                goto ReTry;
                    573:                        }
                    574:                } else if (uobj == NULL) {
                    575:
                    576:                        /*
                    577:                         * for hard errors on swap-backed pageouts,
                    578:                         * mark the swslots as bad.  note that we do not
                    579:                         * free swslots that we mark bad.
                    580:                         */
                    581:
                    582:                        /* XXX daddr64_t -> int */
                    583:                        uvm_swap_markbad(swblk, *npages);
                    584:                }
                    585:        }
                    586:
                    587:        /*
                    588:         * a pager error occurred (even after dropping the cluster, if there
                    589:         * was one).    give up!   the caller only has one page ("pg")
                    590:         * to worry about.
                    591:         */
                    592:
                    593:        if (uobj && (flags & PGO_PDFREECLUST) != 0)
                    594:                simple_lock(&uobj->vmobjlock);
                    595:        return(result);
                    596: }
                    597:
                    598: /*
                    599:  * uvm_pager_dropcluster: drop a cluster we have built (because we
                    600:  * got an error, or, if PGO_PDFREECLUST we are un-busying the
                    601:  * cluster pages on behalf of the pagedaemon).
                    602:  *
                    603:  * => uobj, if non-null, is a non-swap-backed object that is
                    604:  *     locked by the caller.   we return with this object still
                    605:  *     locked.
                    606:  * => page queues are not locked
                    607:  * => pg is our page of interest (the one we clustered around, can be null)
                    608:  * => ppsp/npages is our current cluster
                    609:  * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster
                    610:  *     pages on behalf of the pagedaemon.
                    611:  *           PGO_REALLOCSWAP: drop previously allocated swap slots for
                    612:  *             clustered swap-backed pages (except for "pg" if !NULL)
                    613:  *             "swblk" is the start of swap alloc (e.g. for ppsp[0])
                    614:  *             [only meaningful if swap-backed (uobj == NULL)]
                    615:  */
                    616:
                    617: void
                    618: uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
                    619:        struct uvm_object *uobj;        /* IN */
                    620:        struct vm_page *pg, **ppsp;     /* IN, IN/OUT */
                    621:        int *npages;                    /* IN/OUT */
                    622:        int flags;
                    623: {
                    624:        int lcv;
                    625:        boolean_t obj_is_alive;
                    626:        struct uvm_object *saved_uobj;
                    627:
                    628:        /*
                    629:         * drop all pages but "pg"
                    630:         */
                    631:
                    632:        for (lcv = 0 ; lcv < *npages ; lcv++) {
                    633:
                    634:                /* skip "pg" or empty slot */
                    635:                if (ppsp[lcv] == pg || ppsp[lcv] == NULL)
                    636:                        continue;
                    637:
                    638:                /*
                    639:                 * if swap-backed, gain lock on object that owns page.  note
                    640:                 * that PQ_ANON bit can't change as long as we are holding
                    641:                 * the PG_BUSY bit (so there is no need to lock the page
                    642:                 * queues to test it).
                    643:                 *
                    644:                 * once we have the lock, dispose of the pointer to swap, if
                    645:                 * requested
                    646:                 */
                    647:                if (!uobj) {
                    648:                        if (ppsp[lcv]->pg_flags & PQ_ANON) {
                    649:                                simple_lock(&ppsp[lcv]->uanon->an_lock);
                    650:                                if (flags & PGO_REALLOCSWAP)
                    651:                                          /* zap swap block */
                    652:                                          ppsp[lcv]->uanon->an_swslot = 0;
                    653:                        } else {
                    654:                                simple_lock(&ppsp[lcv]->uobject->vmobjlock);
                    655:                                if (flags & PGO_REALLOCSWAP)
                    656:                                        uao_set_swslot(ppsp[lcv]->uobject,
                    657:                                            ppsp[lcv]->offset >> PAGE_SHIFT, 0);
                    658:                        }
                    659:                }
                    660:
                    661:                /* did someone want the page while we had it busy-locked? */
                    662:                if (ppsp[lcv]->pg_flags & PG_WANTED) {
                    663:                        /* still holding obj lock */
                    664:                        wakeup(ppsp[lcv]);
                    665:                }
                    666:
                    667:                /* if page was released, release it.  otherwise un-busy it */
                    668:                if (ppsp[lcv]->pg_flags & PG_RELEASED) {
                    669:
                    670:                        if (ppsp[lcv]->pg_flags & PQ_ANON) {
                    671:                                /* so that anfree will free */
                    672:                                atomic_clearbits_int(&ppsp[lcv]->pg_flags,
                    673:                                    PG_BUSY);
                    674:                                UVM_PAGE_OWN(ppsp[lcv], NULL);
                    675:
                    676:                                pmap_page_protect(ppsp[lcv], VM_PROT_NONE);
                    677:                                simple_unlock(&ppsp[lcv]->uanon->an_lock);
                    678:                                /* kills anon and frees pg */
                    679:                                uvm_anfree(ppsp[lcv]->uanon);
                    680:
                    681:                                continue;
                    682:                        }
                    683:
                    684:                        /*
                    685:                         * pgo_releasepg will dump the page for us
                    686:                         */
                    687:
                    688:                        saved_uobj = ppsp[lcv]->uobject;
                    689:                        obj_is_alive =
                    690:                            saved_uobj->pgops->pgo_releasepg(ppsp[lcv], NULL);
                    691:
                    692:                        /* for normal objects, "pg" is still PG_BUSY by us,
                    693:                         * so obj can't die */
                    694:                        KASSERT(!uobj || obj_is_alive);
                    695:
                    696:                        /* only unlock the object if it is still alive...  */
                    697:                        if (obj_is_alive && saved_uobj != uobj)
                    698:                                simple_unlock(&saved_uobj->vmobjlock);
                    699:
                    700:                        /*
                    701:                         * XXXCDC: suppose uobj died in the pgo_releasepg?
                    702:                         * how pass that
                    703:                         * info up to caller.  we are currently ignoring it...
                    704:                         */
                    705:
                    706:                        continue;               /* next page */
                    707:                } else {
                    708:                        atomic_clearbits_int(&ppsp[lcv]->pg_flags,
                    709:                            PG_BUSY|PG_WANTED|PG_FAKE);
                    710:                        UVM_PAGE_OWN(ppsp[lcv], NULL);
                    711:                }
                    712:
                    713:                /*
                    714:                 * if we are operating on behalf of the pagedaemon and we
                    715:                 * had a successful pageout update the page!
                    716:                 */
                    717:                if (flags & PGO_PDFREECLUST) {
                    718:                        pmap_clear_reference(ppsp[lcv]);
                    719:                        pmap_clear_modify(ppsp[lcv]);
                    720:                        atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN);
                    721:                }
                    722:
                    723:                /* if anonymous cluster, unlock object and move on */
                    724:                if (!uobj) {
                    725:                        if (ppsp[lcv]->pg_flags & PQ_ANON)
                    726:                                simple_unlock(&ppsp[lcv]->uanon->an_lock);
                    727:                        else
                    728:                                simple_unlock(&ppsp[lcv]->uobject->vmobjlock);
                    729:                }
                    730:        }
                    731: }
                    732:
                    733: #ifdef UBC
                    734: /*
                    735:  * interrupt-context iodone handler for nested i/o bufs.
                    736:  *
                    737:  * => must be at splbio().
                    738:  */
                    739:
                    740: void
                    741: uvm_aio_biodone1(bp)
                    742:        struct buf *bp;
                    743: {
                    744:        struct buf *mbp = bp->b_private;
                    745:
                    746:        splassert(IPL_BIO);
                    747:
                    748:        KASSERT(mbp != bp);
                    749:        if (bp->b_flags & B_ERROR) {
                    750:                mbp->b_flags |= B_ERROR;
                    751:                mbp->b_error = bp->b_error;
                    752:        }
                    753:        mbp->b_resid -= bp->b_bcount;
                    754:        pool_put(&bufpool, bp);
                    755:        if (mbp->b_resid == 0) {
                    756:                biodone(mbp);
                    757:        }
                    758: }
                    759: #endif
                    760:
                    761: /*
                    762:  * interrupt-context iodone handler for single-buf i/os
                    763:  * or the top-level buf of a nested-buf i/o.
                    764:  *
                    765:  * => must be at splbio().
                    766:  */
                    767:
                    768: void
                    769: uvm_aio_biodone(bp)
                    770:        struct buf *bp;
                    771: {
                    772:        splassert(IPL_BIO);
                    773:
                    774:        /* reset b_iodone for when this is a single-buf i/o. */
                    775:        bp->b_iodone = uvm_aio_aiodone;
                    776:
                    777:        simple_lock(&uvm.aiodoned_lock);        /* locks uvm.aio_done */
                    778:        TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist);
                    779:        wakeup(&uvm.aiodoned);
                    780:        simple_unlock(&uvm.aiodoned_lock);
                    781: }
                    782:
                    783: /*
                    784:  * uvm_aio_aiodone: do iodone processing for async i/os.
                    785:  * this should be called in thread context, not interrupt context.
                    786:  */
                    787:
                    788: void
                    789: uvm_aio_aiodone(bp)
                    790:        struct buf *bp;
                    791: {
                    792:        int npages = bp->b_bufsize >> PAGE_SHIFT;
                    793:        struct vm_page *pg, *pgs[npages];
                    794:        struct uvm_object *uobj;
                    795:        int i, error;
                    796:        boolean_t write, swap;
                    797:        UVMHIST_FUNC("uvm_aio_aiodone"); UVMHIST_CALLED(pdhist);
                    798:        UVMHIST_LOG(pdhist, "bp %p", bp, 0,0,0);
                    799:
                    800:        splassert(IPL_BIO);
                    801:
                    802:        error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
                    803:        write = (bp->b_flags & B_READ) == 0;
                    804: #ifdef UBC
                    805:        /* XXXUBC B_NOCACHE is for swap pager, should be done differently */
                    806:        if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
                    807:                (*bioops.io_pageiodone)(bp);
                    808:        }
                    809: #endif
                    810:
                    811:        uobj = NULL;
                    812:        for (i = 0; i < npages; i++) {
                    813:                pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
                    814:                UVMHIST_LOG(pdhist, "pgs[%ld] = %p", i, pgs[i],0,0);
                    815:        }
                    816:        uvm_pagermapout((vaddr_t)bp->b_data, npages);
                    817: #ifdef UVM_SWAP_ENCRYPT
                    818:        /*
                    819:         * XXX - assumes that we only get ASYNC writes. used to be above.
                    820:         */
                    821:        if (pgs[0]->pg_flags & PQ_ENCRYPT) {
                    822:                uvm_swap_freepages(pgs, npages);
                    823:                goto freed;
                    824:        }
                    825: #endif /* UVM_SWAP_ENCRYPT */
                    826:        for (i = 0; i < npages; i++) {
                    827:                pg = pgs[i];
                    828:
                    829:                if (i == 0) {
                    830:                        swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
                    831:                        if (!swap) {
                    832:                                uobj = pg->uobject;
                    833:                                simple_lock(&uobj->vmobjlock);
                    834:                        }
                    835:                }
                    836:                KASSERT(swap || pg->uobject == uobj);
                    837:                if (swap) {
                    838:                        if (pg->pg_flags & PQ_ANON) {
                    839:                                simple_lock(&pg->uanon->an_lock);
                    840:                        } else {
                    841:                                simple_lock(&pg->uobject->vmobjlock);
                    842:                        }
                    843:                }
                    844:
                    845:                /*
                    846:                 * if this is a read and we got an error, mark the pages
                    847:                 * PG_RELEASED so that uvm_page_unbusy() will free them.
                    848:                 */
                    849:                if (!write && error) {
                    850:                        atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
                    851:                        continue;
                    852:                }
                    853:                KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0);
                    854:
                    855:                /*
                    856:                 * if this is a read and the page is PG_FAKE,
                    857:                 * or this was a successful write,
                    858:                 * mark the page PG_CLEAN and not PG_FAKE.
                    859:                 */
                    860:
                    861:                if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) {
                    862:                        pmap_clear_reference(pgs[i]);
                    863:                        pmap_clear_modify(pgs[i]);
                    864:                        atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN);
                    865:                        atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE);
                    866:                }
                    867:                if (swap) {
                    868:                        if (pg->pg_flags & PQ_ANON) {
                    869:                                simple_unlock(&pg->uanon->an_lock);
                    870:                        } else {
                    871:                                simple_unlock(&pg->uobject->vmobjlock);
                    872:                        }
                    873:                }
                    874:        }
                    875:        uvm_page_unbusy(pgs, npages);
                    876:        if (!swap) {
                    877:                simple_unlock(&uobj->vmobjlock);
                    878:        }
                    879:
                    880: #ifdef UVM_SWAP_ENCRYPT
                    881: freed:
                    882: #endif
                    883:        if (write && (bp->b_flags & B_AGE) != 0 && bp->b_vp != NULL) {
                    884:                vwakeup(bp->b_vp);
                    885:        }
                    886:        pool_put(&bufpool, bp);
                    887: }
                    888:
                    889: /*
                    890:  * translate unix errno values to VM_PAGER_*.
                    891:  */
                    892:
                    893: int
                    894: uvm_errno2vmerror(errno)
                    895:        int errno;
                    896: {
                    897:        switch (errno) {
                    898:        case 0:
                    899:                return VM_PAGER_OK;
                    900:        case EINVAL:
                    901:                return VM_PAGER_BAD;
                    902:        case EINPROGRESS:
                    903:                return VM_PAGER_PEND;
                    904:        case EIO:
                    905:                return VM_PAGER_ERROR;
                    906:        case EAGAIN:
                    907:                return VM_PAGER_AGAIN;
                    908:        case EBUSY:
                    909:                return VM_PAGER_UNLOCK;
                    910:        default:
                    911:                return VM_PAGER_ERROR;
                    912:        }
                    913: }

CVSweb