[BACK]Return to uvm_swap.c CVS log [TXT][DIR] Up to [local] / sys / uvm

Annotation of sys/uvm/uvm_swap.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: uvm_swap.c,v 1.72 2007/06/18 21:51:15 pedro Exp $     */
        !             2: /*     $NetBSD: uvm_swap.c,v 1.40 2000/11/17 11:39:39 mrg Exp $        */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1995, 1996, 1997 Matthew R. Green
        !             6:  * All rights reserved.
        !             7:  *
        !             8:  * Redistribution and use in source and binary forms, with or without
        !             9:  * modification, are permitted provided that the following conditions
        !            10:  * are met:
        !            11:  * 1. Redistributions of source code must retain the above copyright
        !            12:  *    notice, this list of conditions and the following disclaimer.
        !            13:  * 2. Redistributions in binary form must reproduce the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer in the
        !            15:  *    documentation and/or other materials provided with the distribution.
        !            16:  * 3. The name of the author may not be used to endorse or promote products
        !            17:  *    derived from this software without specific prior written permission.
        !            18:  *
        !            19:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
        !            20:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
        !            21:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
        !            22:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
        !            23:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
        !            24:  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
        !            25:  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
        !            26:  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
        !            27:  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            28:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            29:  * SUCH DAMAGE.
        !            30:  *
        !            31:  * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp
        !            32:  * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp
        !            33:  */
        !            34:
        !            35: #include <sys/param.h>
        !            36: #include <sys/systm.h>
        !            37: #include <sys/buf.h>
        !            38: #include <sys/conf.h>
        !            39: #include <sys/proc.h>
        !            40: #include <sys/namei.h>
        !            41: #include <sys/disklabel.h>
        !            42: #include <sys/errno.h>
        !            43: #include <sys/kernel.h>
        !            44: #include <sys/malloc.h>
        !            45: #include <sys/vnode.h>
        !            46: #include <sys/file.h>
        !            47: #include <sys/extent.h>
        !            48: #include <sys/mount.h>
        !            49: #include <sys/pool.h>
        !            50: #include <sys/syscallargs.h>
        !            51: #include <sys/swap.h>
        !            52:
        !            53: #include <uvm/uvm.h>
        !            54: #ifdef UVM_SWAP_ENCRYPT
        !            55: #include <sys/syslog.h>
        !            56: #endif
        !            57:
        !            58: #include <miscfs/specfs/specdev.h>
        !            59:
        !            60: /*
        !            61:  * uvm_swap.c: manage configuration and i/o to swap space.
        !            62:  */
        !            63:
        !            64: /*
        !            65:  * swap space is managed in the following way:
        !            66:  *
        !            67:  * each swap partition or file is described by a "swapdev" structure.
        !            68:  * each "swapdev" structure contains a "swapent" structure which contains
        !            69:  * information that is passed up to the user (via system calls).
        !            70:  *
        !            71:  * each swap partition is assigned a "priority" (int) which controls
        !            72:  * swap partition usage.
        !            73:  *
        !            74:  * the system maintains a global data structure describing all swap
        !            75:  * partitions/files.   there is a sorted LIST of "swappri" structures
        !            76:  * which describe "swapdev"'s at that priority.   this LIST is headed
        !            77:  * by the "swap_priority" global var.    each "swappri" contains a
        !            78:  * CIRCLEQ of "swapdev" structures at that priority.
        !            79:  *
        !            80:  * locking:
        !            81:  *  - swap_syscall_lock (sleep lock): this lock serializes the swapctl
        !            82:  *    system call and prevents the swap priority list from changing
        !            83:  *    while we are in the middle of a system call (e.g. SWAP_STATS).
        !            84:  *  - uvm.swap_data_lock (simple_lock): this lock protects all swap data
        !            85:  *    structures including the priority list, the swapdev structures,
        !            86:  *    and the swapmap extent.
        !            87:  *
        !            88:  * each swap device has the following info:
        !            89:  *  - swap device in use (could be disabled, preventing future use)
        !            90:  *  - swap enabled (allows new allocations on swap)
        !            91:  *  - map info in /dev/drum
        !            92:  *  - vnode pointer
        !            93:  * for swap files only:
        !            94:  *  - block size
        !            95:  *  - max byte count in buffer
        !            96:  *  - buffer
        !            97:  *  - credentials to use when doing i/o to file
        !            98:  *
        !            99:  * userland controls and configures swap with the swapctl(2) system call.
        !           100:  * the sys_swapctl performs the following operations:
        !           101:  *  [1] SWAP_NSWAP: returns the number of swap devices currently configured
        !           102:  *  [2] SWAP_STATS: given a pointer to an array of swapent structures
        !           103:  *     (passed in via "arg") of a size passed in via "misc" ... we load
        !           104:  *     the current swap config into the array.
        !           105:  *  [3] SWAP_ON: given a pathname in arg (could be device or file) and a
        !           106:  *     priority in "misc", start swapping on it.
        !           107:  *  [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device
        !           108:  *  [5] SWAP_CTL: changes the priority of a swap device (new priority in
        !           109:  *     "misc")
        !           110:  */
        !           111:
        !           112: /*
        !           113:  * swapdev: describes a single swap partition/file
        !           114:  *
        !           115:  * note the following should be true:
        !           116:  * swd_inuse <= swd_nblks  [number of blocks in use is <= total blocks]
        !           117:  * swd_nblks <= swd_mapsize [because mapsize includes miniroot+disklabel]
        !           118:  */
        !           119: struct swapdev {
        !           120:        struct swapent  swd_se;
        !           121: #define        swd_dev         swd_se.se_dev           /* device id */
        !           122: #define        swd_flags       swd_se.se_flags         /* flags:inuse/enable/fake */
        !           123: #define        swd_priority    swd_se.se_priority      /* our priority */
        !           124: #define        swd_inuse       swd_se.se_inuse         /* our priority */
        !           125: #define        swd_nblks       swd_se.se_nblks         /* our priority */
        !           126:        char                    *swd_path;      /* saved pathname of device */
        !           127:        int                     swd_pathlen;    /* length of pathname */
        !           128:        int                     swd_npages;     /* #pages we can use */
        !           129:        int                     swd_npginuse;   /* #pages in use */
        !           130:        int                     swd_npgbad;     /* #pages bad */
        !           131:        int                     swd_drumoffset; /* page0 offset in drum */
        !           132:        int                     swd_drumsize;   /* #pages in drum */
        !           133:        struct extent           *swd_ex;        /* extent for this swapdev */
        !           134:        char                    swd_exname[12]; /* name of extent above */
        !           135:        struct vnode            *swd_vp;        /* backing vnode */
        !           136:        CIRCLEQ_ENTRY(swapdev)  swd_next;       /* priority circleq */
        !           137:
        !           138:        int                     swd_bsize;      /* blocksize (bytes) */
        !           139:        int                     swd_maxactive;  /* max active i/o reqs */
        !           140:        struct buf              swd_tab;        /* buffer list */
        !           141:        struct ucred            *swd_cred;      /* cred for file access */
        !           142: #ifdef UVM_SWAP_ENCRYPT
        !           143: #define SWD_KEY_SHIFT          7               /* One key per 0.5 MByte */
        !           144: #define SWD_KEY(x,y)           &((x)->swd_keys[((y) - (x)->swd_drumoffset) >> SWD_KEY_SHIFT])
        !           145:
        !           146: #define SWD_DCRYPT_SHIFT       5
        !           147: #define SWD_DCRYPT_BITS                32
        !           148: #define SWD_DCRYPT_MASK                (SWD_DCRYPT_BITS - 1)
        !           149: #define SWD_DCRYPT_OFF(x)      ((x) >> SWD_DCRYPT_SHIFT)
        !           150: #define SWD_DCRYPT_BIT(x)      ((x) & SWD_DCRYPT_MASK)
        !           151: #define SWD_DCRYPT_SIZE(x)     (SWD_DCRYPT_OFF((x) + SWD_DCRYPT_MASK) * sizeof(u_int32_t))
        !           152:        u_int32_t               *swd_decrypt;   /* bitmap for decryption */
        !           153:        struct swap_key         *swd_keys;      /* keys for different parts */
        !           154:        int                     swd_nkeys;      /* active keys */
        !           155: #endif
        !           156: };
        !           157:
        !           158: /*
        !           159:  * swap device priority entry; the list is kept sorted on `spi_priority'.
        !           160:  */
        !           161: struct swappri {
        !           162:        int                     spi_priority;     /* priority */
        !           163:        CIRCLEQ_HEAD(spi_swapdev, swapdev)      spi_swapdev;
        !           164:        /* circleq of swapdevs at this priority */
        !           165:        LIST_ENTRY(swappri)     spi_swappri;      /* global list of pri's */
        !           166: };
        !           167:
        !           168: /*
        !           169:  * The following two structures are used to keep track of data transfers
        !           170:  * on swap devices associated with regular files.
        !           171:  * NOTE: this code is more or less a copy of vnd.c; we use the same
        !           172:  * structure names here to ease porting..
        !           173:  */
        !           174: struct vndxfer {
        !           175:        struct buf      *vx_bp;         /* Pointer to parent buffer */
        !           176:        struct swapdev  *vx_sdp;
        !           177:        int             vx_error;
        !           178:        int             vx_pending;     /* # of pending aux buffers */
        !           179:        int             vx_flags;
        !           180: #define VX_BUSY                1
        !           181: #define VX_DEAD                2
        !           182: };
        !           183:
        !           184: struct vndbuf {
        !           185:        struct buf      vb_buf;
        !           186:        struct vndxfer  *vb_xfer;
        !           187: };
        !           188:
        !           189:
        !           190: /*
        !           191:  * We keep a of pool vndbuf's and vndxfer structures.
        !           192:  */
        !           193: struct pool vndxfer_pool;
        !           194: struct pool vndbuf_pool;
        !           195:
        !           196: #define        getvndxfer(vnx) do {                                            \
        !           197:        int s = splbio();                                               \
        !           198:        vnx = pool_get(&vndxfer_pool, PR_WAITOK);                       \
        !           199:        splx(s);                                                        \
        !           200: } while (0)
        !           201:
        !           202: #define putvndxfer(vnx) {                                              \
        !           203:        pool_put(&vndxfer_pool, (void *)(vnx));                         \
        !           204: }
        !           205:
        !           206: #define        getvndbuf(vbp)  do {                                            \
        !           207:        int s = splbio();                                               \
        !           208:        vbp = pool_get(&vndbuf_pool, PR_WAITOK);                        \
        !           209:        splx(s);                                                        \
        !           210: } while (0)
        !           211:
        !           212: #define putvndbuf(vbp) {                                               \
        !           213:        pool_put(&vndbuf_pool, (void *)(vbp));                          \
        !           214: }
        !           215:
        !           216: /* /dev/drum */
        !           217: bdev_decl(sw);
        !           218: cdev_decl(sw);
        !           219:
        !           220: /*
        !           221:  * local variables
        !           222:  */
        !           223: static struct extent *swapmap;         /* controls the mapping of /dev/drum */
        !           224:
        !           225: /* list of all active swap devices [by priority] */
        !           226: LIST_HEAD(swap_priority, swappri);
        !           227: static struct swap_priority swap_priority;
        !           228:
        !           229: /* locks */
        !           230: struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER("swplk");
        !           231:
        !           232: /*
        !           233:  * prototypes
        !           234:  */
        !           235: static void             swapdrum_add(struct swapdev *, int);
        !           236: static struct swapdev  *swapdrum_getsdp(int);
        !           237:
        !           238: static struct swapdev  *swaplist_find(struct vnode *, int);
        !           239: static void             swaplist_insert(struct swapdev *,
        !           240:                                             struct swappri *, int);
        !           241: static void             swaplist_trim(void);
        !           242:
        !           243: static int swap_on(struct proc *, struct swapdev *);
        !           244: static int swap_off(struct proc *, struct swapdev *);
        !           245:
        !           246: static void sw_reg_strategy(struct swapdev *, struct buf *, int);
        !           247: static void sw_reg_iodone(struct buf *);
        !           248: static void sw_reg_start(struct swapdev *);
        !           249:
        !           250: static int uvm_swap_io(struct vm_page **, int, int, int);
        !           251:
        !           252: static void swapmount(void);
        !           253:
        !           254: #ifdef UVM_SWAP_ENCRYPT
        !           255: /* for swap encrypt */
        !           256: boolean_t uvm_swap_allocpages(struct vm_page **, int);
        !           257: void uvm_swap_markdecrypt(struct swapdev *, int, int, int);
        !           258: boolean_t uvm_swap_needdecrypt(struct swapdev *, int);
        !           259: void uvm_swap_initcrypt(struct swapdev *, int);
        !           260: #endif
        !           261:
        !           262: /*
        !           263:  * uvm_swap_init: init the swap system data structures and locks
        !           264:  *
        !           265:  * => called at boot time from init_main.c after the filesystems
        !           266:  *     are brought up (which happens after uvm_init())
        !           267:  */
        !           268: void
        !           269: uvm_swap_init()
        !           270: {
        !           271:        UVMHIST_FUNC("uvm_swap_init");
        !           272:
        !           273:        UVMHIST_CALLED(pdhist);
        !           274:        /*
        !           275:         * first, init the swap list, its counter, and its lock.
        !           276:         * then get a handle on the vnode for /dev/drum by using
        !           277:         * the its dev_t number ("swapdev", from MD conf.c).
        !           278:         */
        !           279:
        !           280:        LIST_INIT(&swap_priority);
        !           281:        uvmexp.nswapdev = 0;
        !           282:        simple_lock_init(&uvm.swap_data_lock);
        !           283:
        !           284:        if (!swapdev_vp && bdevvp(swapdev, &swapdev_vp))
        !           285:                panic("uvm_swap_init: can't get vnode for swap device");
        !           286:
        !           287:        /*
        !           288:         * create swap block resource map to map /dev/drum.   the range
        !           289:         * from 1 to INT_MAX allows 2 gigablocks of swap space.  note
        !           290:         * that block 0 is reserved (used to indicate an allocation
        !           291:         * failure, or no allocation).
        !           292:         */
        !           293:        swapmap = extent_create("swapmap", 1, INT_MAX,
        !           294:                                M_VMSWAP, 0, 0, EX_NOWAIT);
        !           295:        if (swapmap == 0)
        !           296:                panic("uvm_swap_init: extent_create failed");
        !           297:
        !           298:        /*
        !           299:         * allocate pools for structures used for swapping to files.
        !           300:         */
        !           301:
        !           302:
        !           303:        pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx",
        !           304:            NULL);
        !           305:
        !           306:        pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd",
        !           307:            NULL);
        !           308:
        !           309:        /*
        !           310:         * Setup the initial swap partition
        !           311:         */
        !           312:        swapmount();
        !           313:
        !           314:        /*
        !           315:         * done!
        !           316:         */
        !           317:        UVMHIST_LOG(pdhist, "<- done", 0, 0, 0, 0);
        !           318: }
        !           319:
        !           320: #ifdef UVM_SWAP_ENCRYPT
        !           321: void
        !           322: uvm_swap_initcrypt_all(void)
        !           323: {
        !           324:        struct swapdev *sdp;
        !           325:        struct swappri *spp;
        !           326:
        !           327:        simple_lock(&uvm.swap_data_lock);
        !           328:
        !           329:        LIST_FOREACH(spp, &swap_priority, spi_swappri) {
        !           330:                CIRCLEQ_FOREACH(sdp, &spp->spi_swapdev, swd_next)
        !           331:                        if (sdp->swd_decrypt == NULL)
        !           332:                                uvm_swap_initcrypt(sdp, sdp->swd_npages);
        !           333:        }
        !           334:        simple_unlock(&uvm.swap_data_lock);
        !           335: }
        !           336:
        !           337: void
        !           338: uvm_swap_initcrypt(struct swapdev *sdp, int npages)
        !           339: {
        !           340:        /*
        !           341:         * keep information if a page needs to be decrypted when we get it
        !           342:         * from the swap device.
        !           343:         * We cannot chance a malloc later, if we are doing ASYNC puts,
        !           344:         * we may not call malloc with M_WAITOK.  This consumes only
        !           345:         * 8KB memory for a 256MB swap partition.
        !           346:         */
        !           347:        sdp->swd_decrypt = malloc(SWD_DCRYPT_SIZE(npages), M_VMSWAP, M_WAITOK);
        !           348:        memset(sdp->swd_decrypt, 0, SWD_DCRYPT_SIZE(npages));
        !           349:        sdp->swd_keys = malloc((npages >> SWD_KEY_SHIFT) * sizeof(struct swap_key),
        !           350:                               M_VMSWAP, M_WAITOK);
        !           351:        memset(sdp->swd_keys, 0, (npages >> SWD_KEY_SHIFT) * sizeof(struct swap_key));
        !           352:        sdp->swd_nkeys = 0;
        !           353: }
        !           354:
        !           355: boolean_t
        !           356: uvm_swap_allocpages(struct vm_page **pps, int npages)
        !           357: {
        !           358:        int i, s;
        !           359:        int minus, reserve;
        !           360:        boolean_t fail;
        !           361:
        !           362:        /* Estimate if we will succeed */
        !           363:        s = uvm_lock_fpageq();
        !           364:
        !           365:        minus = uvmexp.free - npages;
        !           366:        reserve = uvmexp.reserve_kernel;
        !           367:        fail = uvmexp.free - npages < uvmexp.reserve_kernel;
        !           368:
        !           369:        uvm_unlock_fpageq(s);
        !           370:
        !           371:        if (fail)
        !           372:                return FALSE;
        !           373:
        !           374:        /* Get new pages */
        !           375:        for (i = 0; i < npages; i++) {
        !           376:                pps[i] = uvm_pagealloc(NULL, 0, NULL, 0);
        !           377:                if (pps[i] == NULL)
        !           378:                        break;
        !           379:        }
        !           380:
        !           381:        /* On failure free and return */
        !           382:        if (i < npages) {
        !           383:                uvm_swap_freepages(pps, i);
        !           384:                return FALSE;
        !           385:        }
        !           386:
        !           387:        return TRUE;
        !           388: }
        !           389:
        !           390: void
        !           391: uvm_swap_freepages(struct vm_page **pps, int npages)
        !           392: {
        !           393:        int i;
        !           394:
        !           395:        uvm_lock_pageq();
        !           396:        for (i = 0; i < npages; i++)
        !           397:                uvm_pagefree(pps[i]);
        !           398:        uvm_unlock_pageq();
        !           399: }
        !           400:
        !           401: /*
        !           402:  * Mark pages on the swap device for later decryption
        !           403:  */
        !           404:
        !           405: void
        !           406: uvm_swap_markdecrypt(struct swapdev *sdp, int startslot, int npages,
        !           407:                     int decrypt)
        !           408: {
        !           409:        int pagestart, i;
        !           410:        int off, bit;
        !           411:
        !           412:        if (!sdp)
        !           413:                return;
        !           414:
        !           415:        pagestart = startslot - sdp->swd_drumoffset;
        !           416:        for (i = 0; i < npages; i++, pagestart++) {
        !           417:                off = SWD_DCRYPT_OFF(pagestart);
        !           418:                bit = SWD_DCRYPT_BIT(pagestart);
        !           419:                if (decrypt)
        !           420:                        /* pages read need decryption */
        !           421:                        sdp->swd_decrypt[off] |= 1 << bit;
        !           422:                else
        !           423:                        /* pages read do not need decryption */
        !           424:                        sdp->swd_decrypt[off] &= ~(1 << bit);
        !           425:        }
        !           426: }
        !           427:
        !           428: /*
        !           429:  * Check if the page that we got from disk needs to be decrypted
        !           430:  */
        !           431:
        !           432: boolean_t
        !           433: uvm_swap_needdecrypt(struct swapdev *sdp, int off)
        !           434: {
        !           435:        if (!sdp)
        !           436:                return FALSE;
        !           437:
        !           438:        off -= sdp->swd_drumoffset;
        !           439:        return sdp->swd_decrypt[SWD_DCRYPT_OFF(off)] & (1 << SWD_DCRYPT_BIT(off)) ?
        !           440:                TRUE : FALSE;
        !           441: }
        !           442: #endif /* UVM_SWAP_ENCRYPT */
        !           443: /*
        !           444:  * swaplist functions: functions that operate on the list of swap
        !           445:  * devices on the system.
        !           446:  */
        !           447:
        !           448: /*
        !           449:  * swaplist_insert: insert swap device "sdp" into the global list
        !           450:  *
        !           451:  * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
        !           452:  * => caller must provide a newly malloc'd swappri structure (we will
        !           453:  *     FREE it if we don't need it... this it to prevent malloc blocking
        !           454:  *     here while adding swap)
        !           455:  */
        !           456: static void
        !           457: swaplist_insert(sdp, newspp, priority)
        !           458:        struct swapdev *sdp;
        !           459:        struct swappri *newspp;
        !           460:        int priority;
        !           461: {
        !           462:        struct swappri *spp, *pspp;
        !           463:        UVMHIST_FUNC("swaplist_insert"); UVMHIST_CALLED(pdhist);
        !           464:
        !           465:        /*
        !           466:         * find entry at or after which to insert the new device.
        !           467:         */
        !           468:        for (pspp = NULL, spp = LIST_FIRST(&swap_priority); spp != NULL;
        !           469:             spp = LIST_NEXT(spp, spi_swappri)) {
        !           470:                if (priority <= spp->spi_priority)
        !           471:                        break;
        !           472:                pspp = spp;
        !           473:        }
        !           474:
        !           475:        /*
        !           476:         * new priority?
        !           477:         */
        !           478:        if (spp == NULL || spp->spi_priority != priority) {
        !           479:                spp = newspp;  /* use newspp! */
        !           480:                UVMHIST_LOG(pdhist, "created new swappri = %ld",
        !           481:                            priority, 0, 0, 0);
        !           482:
        !           483:                spp->spi_priority = priority;
        !           484:                CIRCLEQ_INIT(&spp->spi_swapdev);
        !           485:
        !           486:                if (pspp)
        !           487:                        LIST_INSERT_AFTER(pspp, spp, spi_swappri);
        !           488:                else
        !           489:                        LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
        !           490:        } else {
        !           491:                /* we don't need a new priority structure, free it */
        !           492:                FREE(newspp, M_VMSWAP);
        !           493:        }
        !           494:
        !           495:        /*
        !           496:         * priority found (or created).   now insert on the priority's
        !           497:         * circleq list and bump the total number of swapdevs.
        !           498:         */
        !           499:        sdp->swd_priority = priority;
        !           500:        CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
        !           501:        uvmexp.nswapdev++;
        !           502: }
        !           503:
        !           504: /*
        !           505:  * swaplist_find: find and optionally remove a swap device from the
        !           506:  *     global list.
        !           507:  *
        !           508:  * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
        !           509:  * => we return the swapdev we found (and removed)
        !           510:  */
        !           511: static struct swapdev *
        !           512: swaplist_find(vp, remove)
        !           513:        struct vnode *vp;
        !           514:        boolean_t remove;
        !           515: {
        !           516:        struct swapdev *sdp;
        !           517:        struct swappri *spp;
        !           518:
        !           519:        /*
        !           520:         * search the lists for the requested vp
        !           521:         */
        !           522:        for (spp = LIST_FIRST(&swap_priority); spp != NULL;
        !           523:             spp = LIST_NEXT(spp, spi_swappri)) {
        !           524:                for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
        !           525:                     sdp != (void *)&spp->spi_swapdev;
        !           526:                     sdp = CIRCLEQ_NEXT(sdp, swd_next))
        !           527:                        if (sdp->swd_vp == vp) {
        !           528:                                if (remove) {
        !           529:                                        CIRCLEQ_REMOVE(&spp->spi_swapdev,
        !           530:                                            sdp, swd_next);
        !           531:                                        uvmexp.nswapdev--;
        !           532:                                }
        !           533:                                return(sdp);
        !           534:                        }
        !           535:        }
        !           536:        return (NULL);
        !           537: }
        !           538:
        !           539:
        !           540: /*
        !           541:  * swaplist_trim: scan priority list for empty priority entries and kill
        !           542:  *     them.
        !           543:  *
        !           544:  * => caller must hold both swap_syscall_lock and uvm.swap_data_lock
        !           545:  */
        !           546: static void
        !           547: swaplist_trim()
        !           548: {
        !           549:        struct swappri *spp, *nextspp;
        !           550:
        !           551:        for (spp = LIST_FIRST(&swap_priority); spp != NULL; spp = nextspp) {
        !           552:                nextspp = LIST_NEXT(spp, spi_swappri);
        !           553:                if (CIRCLEQ_FIRST(&spp->spi_swapdev) !=
        !           554:                    (void *)&spp->spi_swapdev)
        !           555:                        continue;
        !           556:                LIST_REMOVE(spp, spi_swappri);
        !           557:                free(spp, M_VMSWAP);
        !           558:        }
        !           559: }
        !           560:
        !           561: /*
        !           562:  * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area.
        !           563:  *
        !           564:  * => caller must hold swap_syscall_lock
        !           565:  * => uvm.swap_data_lock should be unlocked (we may sleep)
        !           566:  */
        !           567: static void
        !           568: swapdrum_add(sdp, npages)
        !           569:        struct swapdev *sdp;
        !           570:        int     npages;
        !           571: {
        !           572:        u_long result;
        !           573:
        !           574:        if (extent_alloc(swapmap, npages, EX_NOALIGN, 0, EX_NOBOUNDARY,
        !           575:            EX_WAITOK, &result))
        !           576:                panic("swapdrum_add");
        !           577:
        !           578:        sdp->swd_drumoffset = result;
        !           579:        sdp->swd_drumsize = npages;
        !           580: }
        !           581:
        !           582: /*
        !           583:  * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
        !           584:  *     to the "swapdev" that maps that section of the drum.
        !           585:  *
        !           586:  * => each swapdev takes one big contig chunk of the drum
        !           587:  * => caller must hold uvm.swap_data_lock
        !           588:  */
        !           589: static struct swapdev *
        !           590: swapdrum_getsdp(pgno)
        !           591:        int pgno;
        !           592: {
        !           593:        struct swapdev *sdp;
        !           594:        struct swappri *spp;
        !           595:
        !           596:        for (spp = LIST_FIRST(&swap_priority); spp != NULL;
        !           597:             spp = LIST_NEXT(spp, spi_swappri))
        !           598:                for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
        !           599:                     sdp != (void *)&spp->spi_swapdev;
        !           600:                     sdp = CIRCLEQ_NEXT(sdp, swd_next))
        !           601:                        if (pgno >= sdp->swd_drumoffset &&
        !           602:                            pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
        !           603:                                return sdp;
        !           604:                        }
        !           605:        return NULL;
        !           606: }
        !           607:
        !           608:
        !           609: /*
        !           610:  * sys_swapctl: main entry point for swapctl(2) system call
        !           611:  *     [with two helper functions: swap_on and swap_off]
        !           612:  */
        !           613: int
        !           614: sys_swapctl(p, v, retval)
        !           615:        struct proc *p;
        !           616:        void *v;
        !           617:        register_t *retval;
        !           618: {
        !           619:        struct sys_swapctl_args /* {
        !           620:                syscallarg(int) cmd;
        !           621:                syscallarg(void *) arg;
        !           622:                syscallarg(int) misc;
        !           623:        } */ *uap = (struct sys_swapctl_args *)v;
        !           624:        struct vnode *vp;
        !           625:        struct nameidata nd;
        !           626:        struct swappri *spp;
        !           627:        struct swapdev *sdp;
        !           628:        struct swapent *sep;
        !           629:        char    userpath[MAXPATHLEN];
        !           630:        size_t  len;
        !           631:        int     count, error, misc;
        !           632:        int     priority;
        !           633:        UVMHIST_FUNC("sys_swapctl"); UVMHIST_CALLED(pdhist);
        !           634:
        !           635:        misc = SCARG(uap, misc);
        !           636:
        !           637:        /*
        !           638:         * ensure serialized syscall access by grabbing the swap_syscall_lock
        !           639:         */
        !           640:        rw_enter_write(&swap_syscall_lock);
        !           641:
        !           642:        /*
        !           643:         * we handle the non-priv NSWAP and STATS request first.
        !           644:         *
        !           645:         * SWAP_NSWAP: return number of config'd swap devices
        !           646:         * [can also be obtained with uvmexp sysctl]
        !           647:         */
        !           648:        if (SCARG(uap, cmd) == SWAP_NSWAP) {
        !           649:                UVMHIST_LOG(pdhist, "<- done SWAP_NSWAP=%ld", uvmexp.nswapdev,
        !           650:                    0, 0, 0);
        !           651:                *retval = uvmexp.nswapdev;
        !           652:                error = 0;
        !           653:                goto out;
        !           654:        }
        !           655:
        !           656:        /*
        !           657:         * SWAP_STATS: get stats on current # of configured swap devs
        !           658:         *
        !           659:         * note that the swap_priority list can't change as long
        !           660:         * as we are holding the swap_syscall_lock.  we don't want
        !           661:         * to grab the uvm.swap_data_lock because we may fault&sleep during
        !           662:         * copyout() and we don't want to be holding that lock then!
        !           663:         */
        !           664:        if (SCARG(uap, cmd) == SWAP_STATS
        !           665: #if defined(COMPAT_13)
        !           666:            || SCARG(uap, cmd) == SWAP_OSTATS
        !           667: #endif
        !           668:            ) {
        !           669:                sep = (struct swapent *)SCARG(uap, arg);
        !           670:                count = 0;
        !           671:
        !           672:                for (spp = LIST_FIRST(&swap_priority); spp != NULL;
        !           673:                    spp = LIST_NEXT(spp, spi_swappri)) {
        !           674:                        for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
        !           675:                             sdp != (void *)&spp->spi_swapdev && misc-- > 0;
        !           676:                             sdp = CIRCLEQ_NEXT(sdp, swd_next)) {
        !           677:                                sdp->swd_inuse =
        !           678:                                    btodb((u_int64_t)sdp->swd_npginuse <<
        !           679:                                    PAGE_SHIFT);
        !           680:                                error = copyout(&sdp->swd_se, sep,
        !           681:                                    sizeof(struct swapent));
        !           682:
        !           683:                                /* now copy out the path if necessary */
        !           684: #if defined(COMPAT_13)
        !           685:                                if (error == 0 && SCARG(uap, cmd) == SWAP_STATS)
        !           686: #else
        !           687:                                if (error == 0)
        !           688: #endif
        !           689:                                        error = copyout(sdp->swd_path,
        !           690:                                            &sep->se_path, sdp->swd_pathlen);
        !           691:
        !           692:                                if (error)
        !           693:                                        goto out;
        !           694:                                count++;
        !           695: #if defined(COMPAT_13)
        !           696:                                if (SCARG(uap, cmd) == SWAP_OSTATS)
        !           697:                                        ((struct oswapent *)sep)++;
        !           698:                                else
        !           699: #endif
        !           700:                                        sep++;
        !           701:                        }
        !           702:                }
        !           703:
        !           704:                UVMHIST_LOG(pdhist, "<- done SWAP_STATS", 0, 0, 0, 0);
        !           705:
        !           706:                *retval = count;
        !           707:                error = 0;
        !           708:                goto out;
        !           709:        }
        !           710:
        !           711:        /*
        !           712:         * all other requests require superuser privs.   verify.
        !           713:         */
        !           714:        if ((error = suser(p, 0)))
        !           715:                goto out;
        !           716:
        !           717:        /*
        !           718:         * at this point we expect a path name in arg.   we will
        !           719:         * use namei() to gain a vnode reference (vref), and lock
        !           720:         * the vnode (VOP_LOCK).
        !           721:         *
        !           722:         * XXX: a NULL arg means use the root vnode pointer (e.g. for
        !           723:         * miniroot)
        !           724:         */
        !           725:        if (SCARG(uap, arg) == NULL) {
        !           726:                vp = rootvp;            /* miniroot */
        !           727:                if (vget(vp, LK_EXCLUSIVE, p)) {
        !           728:                        error = EBUSY;
        !           729:                        goto out;
        !           730:                }
        !           731:                if (SCARG(uap, cmd) == SWAP_ON &&
        !           732:                    copystr("miniroot", userpath, sizeof userpath, &len))
        !           733:                        panic("swapctl: miniroot copy failed");
        !           734:        } else {
        !           735:                int     space;
        !           736:                char    *where;
        !           737:
        !           738:                if (SCARG(uap, cmd) == SWAP_ON) {
        !           739:                        if ((error = copyinstr(SCARG(uap, arg), userpath,
        !           740:                            sizeof userpath, &len)))
        !           741:                                goto out;
        !           742:                        space = UIO_SYSSPACE;
        !           743:                        where = userpath;
        !           744:                } else {
        !           745:                        space = UIO_USERSPACE;
        !           746:                        where = (char *)SCARG(uap, arg);
        !           747:                }
        !           748:                NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p);
        !           749:                if ((error = namei(&nd)))
        !           750:                        goto out;
        !           751:                vp = nd.ni_vp;
        !           752:        }
        !           753:        /* note: "vp" is referenced and locked */
        !           754:
        !           755:        error = 0;              /* assume no error */
        !           756:        switch(SCARG(uap, cmd)) {
        !           757:
        !           758:        case SWAP_DUMPDEV:
        !           759:                if (vp->v_type != VBLK) {
        !           760:                        error = ENOTBLK;
        !           761:                        break;
        !           762:                }
        !           763:                dumpdev = vp->v_rdev;
        !           764:                break;
        !           765:
        !           766:        case SWAP_CTL:
        !           767:                /*
        !           768:                 * get new priority, remove old entry (if any) and then
        !           769:                 * reinsert it in the correct place.  finally, prune out
        !           770:                 * any empty priority structures.
        !           771:                 */
        !           772:                priority = SCARG(uap, misc);
        !           773:                spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
        !           774:                simple_lock(&uvm.swap_data_lock);
        !           775:                if ((sdp = swaplist_find(vp, 1)) == NULL) {
        !           776:                        error = ENOENT;
        !           777:                } else {
        !           778:                        swaplist_insert(sdp, spp, priority);
        !           779:                        swaplist_trim();
        !           780:                }
        !           781:                simple_unlock(&uvm.swap_data_lock);
        !           782:                if (error)
        !           783:                        free(spp, M_VMSWAP);
        !           784:                break;
        !           785:
        !           786:        case SWAP_ON:
        !           787:
        !           788:                /*
        !           789:                 * check for duplicates.   if none found, then insert a
        !           790:                 * dummy entry on the list to prevent someone else from
        !           791:                 * trying to enable this device while we are working on
        !           792:                 * it.
        !           793:                 */
        !           794:
        !           795:                priority = SCARG(uap, misc);
        !           796:                simple_lock(&uvm.swap_data_lock);
        !           797:                if ((sdp = swaplist_find(vp, 0)) != NULL) {
        !           798:                        error = EBUSY;
        !           799:                        simple_unlock(&uvm.swap_data_lock);
        !           800:                        break;
        !           801:                }
        !           802:                sdp = malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
        !           803:                spp = malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
        !           804:                memset(sdp, 0, sizeof(*sdp));
        !           805:                sdp->swd_flags = SWF_FAKE;      /* placeholder only */
        !           806:                sdp->swd_vp = vp;
        !           807:                sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
        !           808:
        !           809:                /*
        !           810:                 * XXX Is NFS elaboration necessary?
        !           811:                 */
        !           812:                if (vp->v_type == VREG) {
        !           813:                        sdp->swd_cred = crdup(p->p_ucred);
        !           814:                }
        !           815:
        !           816:                swaplist_insert(sdp, spp, priority);
        !           817:                simple_unlock(&uvm.swap_data_lock);
        !           818:
        !           819:                sdp->swd_pathlen = len;
        !           820:                sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
        !           821:                if (copystr(userpath, sdp->swd_path, sdp->swd_pathlen, 0) != 0)
        !           822:                        panic("swapctl: copystr");
        !           823:
        !           824:                /*
        !           825:                 * we've now got a FAKE placeholder in the swap list.
        !           826:                 * now attempt to enable swap on it.  if we fail, undo
        !           827:                 * what we've done and kill the fake entry we just inserted.
        !           828:                 * if swap_on is a success, it will clear the SWF_FAKE flag
        !           829:                 */
        !           830:
        !           831:                if ((error = swap_on(p, sdp)) != 0) {
        !           832:                        simple_lock(&uvm.swap_data_lock);
        !           833:                        (void) swaplist_find(vp, 1);  /* kill fake entry */
        !           834:                        swaplist_trim();
        !           835:                        simple_unlock(&uvm.swap_data_lock);
        !           836:                        if (vp->v_type == VREG) {
        !           837:                                crfree(sdp->swd_cred);
        !           838:                        }
        !           839:                        free(sdp->swd_path, M_VMSWAP);
        !           840:                        free(sdp, M_VMSWAP);
        !           841:                        break;
        !           842:                }
        !           843:                break;
        !           844:
        !           845:        case SWAP_OFF:
        !           846:                simple_lock(&uvm.swap_data_lock);
        !           847:                if ((sdp = swaplist_find(vp, 0)) == NULL) {
        !           848:                        simple_unlock(&uvm.swap_data_lock);
        !           849:                        error = ENXIO;
        !           850:                        break;
        !           851:                }
        !           852:
        !           853:                /*
        !           854:                 * If a device isn't in use or enabled, we
        !           855:                 * can't stop swapping from it (again).
        !           856:                 */
        !           857:                if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) {
        !           858:                        simple_unlock(&uvm.swap_data_lock);
        !           859:                        error = EBUSY;
        !           860:                        break;
        !           861:                }
        !           862:
        !           863:                /*
        !           864:                 * do the real work.
        !           865:                 */
        !           866:                error = swap_off(p, sdp);
        !           867:                break;
        !           868:
        !           869:        default:
        !           870:                error = EINVAL;
        !           871:        }
        !           872:
        !           873:        /*
        !           874:         * done!  release the ref gained by namei() and unlock.
        !           875:         */
        !           876:        vput(vp);
        !           877:
        !           878: out:
        !           879:        rw_exit_write(&swap_syscall_lock);
        !           880:
        !           881:        UVMHIST_LOG(pdhist, "<- done!  error=%ld", error, 0, 0, 0);
        !           882:        return (error);
        !           883: }
        !           884:
        !           885: /*
        !           886:  * swap_on: attempt to enable a swapdev for swapping.   note that the
        !           887:  *     swapdev is already on the global list, but disabled (marked
        !           888:  *     SWF_FAKE).
        !           889:  *
        !           890:  * => we avoid the start of the disk (to protect disk labels)
        !           891:  * => we also avoid the miniroot, if we are swapping to root.
        !           892:  * => caller should leave uvm.swap_data_lock unlocked, we may lock it
        !           893:  *     if needed.
        !           894:  */
        !           895: static int
        !           896: swap_on(p, sdp)
        !           897:        struct proc *p;
        !           898:        struct swapdev *sdp;
        !           899: {
        !           900:        static int count = 0;   /* static */
        !           901:        struct vnode *vp;
        !           902:        int error, npages, nblocks, size;
        !           903:        long addr;
        !           904:        struct vattr va;
        !           905: #if defined(NFSCLIENT)
        !           906:        extern int (**nfsv2_vnodeop_p)(void *);
        !           907: #endif /* defined(NFSCLIENT) */
        !           908:        dev_t dev;
        !           909:        UVMHIST_FUNC("swap_on"); UVMHIST_CALLED(pdhist);
        !           910:
        !           911:        /*
        !           912:         * we want to enable swapping on sdp.   the swd_vp contains
        !           913:         * the vnode we want (locked and ref'd), and the swd_dev
        !           914:         * contains the dev_t of the file, if it a block device.
        !           915:         */
        !           916:
        !           917:        vp = sdp->swd_vp;
        !           918:        dev = sdp->swd_dev;
        !           919:
        !           920:        /*
        !           921:         * open the swap file (mostly useful for block device files to
        !           922:         * let device driver know what is up).
        !           923:         *
        !           924:         * we skip the open/close for root on swap because the root
        !           925:         * has already been opened when root was mounted (mountroot).
        !           926:         */
        !           927:        if (vp != rootvp) {
        !           928:                if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))
        !           929:                        return (error);
        !           930:        }
        !           931:
        !           932:        /* XXX this only works for block devices */
        !           933:        UVMHIST_LOG(pdhist, "  dev=%ld, major(dev)=%ld", dev, major(dev), 0,0);
        !           934:
        !           935:        /*
        !           936:         * we now need to determine the size of the swap area.   for
        !           937:         * block specials we can call the d_psize function.
        !           938:         * for normal files, we must stat [get attrs].
        !           939:         *
        !           940:         * we put the result in nblks.
        !           941:         * for normal files, we also want the filesystem block size
        !           942:         * (which we get with statfs).
        !           943:         */
        !           944:        switch (vp->v_type) {
        !           945:        case VBLK:
        !           946:                if (bdevsw[major(dev)].d_psize == 0 ||
        !           947:                    (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
        !           948:                        error = ENXIO;
        !           949:                        goto bad;
        !           950:                }
        !           951:                break;
        !           952:
        !           953:        case VREG:
        !           954:                if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
        !           955:                        goto bad;
        !           956:                nblocks = (int)btodb(va.va_size);
        !           957:                if ((error =
        !           958:                     VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)
        !           959:                        goto bad;
        !           960:
        !           961:                sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
        !           962:                /*
        !           963:                 * limit the max # of outstanding I/O requests we issue
        !           964:                 * at any one time.   take it easy on NFS servers.
        !           965:                 */
        !           966: #if defined(NFSCLIENT)
        !           967:                if (vp->v_op == nfsv2_vnodeop_p)
        !           968:                        sdp->swd_maxactive = 2; /* XXX */
        !           969:                else
        !           970: #endif /* defined(NFSCLIENT) */
        !           971:                        sdp->swd_maxactive = 8; /* XXX */
        !           972:                break;
        !           973:
        !           974:        default:
        !           975:                error = ENXIO;
        !           976:                goto bad;
        !           977:        }
        !           978:
        !           979:        /*
        !           980:         * save nblocks in a safe place and convert to pages.
        !           981:         */
        !           982:
        !           983:        sdp->swd_nblks = nblocks;
        !           984:        npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT;
        !           985:
        !           986:        /*
        !           987:         * for block special files, we want to make sure that leave
        !           988:         * the disklabel and bootblocks alone, so we arrange to skip
        !           989:         * over them (arbitrarily choosing to skip PAGE_SIZE bytes).
        !           990:         * note that because of this the "size" can be less than the
        !           991:         * actual number of blocks on the device.
        !           992:         */
        !           993:        if (vp->v_type == VBLK) {
        !           994:                /* we use pages 1 to (size - 1) [inclusive] */
        !           995:                size = npages - 1;
        !           996:                addr = 1;
        !           997:        } else {
        !           998:                /* we use pages 0 to (size - 1) [inclusive] */
        !           999:                size = npages;
        !          1000:                addr = 0;
        !          1001:        }
        !          1002:
        !          1003:        /*
        !          1004:         * make sure we have enough blocks for a reasonable sized swap
        !          1005:         * area.   we want at least one page.
        !          1006:         */
        !          1007:
        !          1008:        if (size < 1) {
        !          1009:                UVMHIST_LOG(pdhist, "  size <= 1!!", 0, 0, 0, 0);
        !          1010:                error = EINVAL;
        !          1011:                goto bad;
        !          1012:        }
        !          1013:
        !          1014:        UVMHIST_LOG(pdhist, "  dev=%lx: size=%ld addr=0x%lx\n",
        !          1015:            dev, size, addr, 0);
        !          1016:
        !          1017:        /*
        !          1018:         * now we need to allocate an extent to manage this swap device
        !          1019:         */
        !          1020:        snprintf(sdp->swd_exname, sizeof(sdp->swd_exname), "swap0x%04x",
        !          1021:            count++);
        !          1022:
        !          1023:        /* note that extent_create's 3rd arg is inclusive, thus "- 1" */
        !          1024:        sdp->swd_ex = extent_create(sdp->swd_exname, 0, npages - 1, M_VMSWAP,
        !          1025:                                    0, 0, EX_WAITOK);
        !          1026:        /* allocate the `saved' region from the extent so it won't be used */
        !          1027:        if (addr) {
        !          1028:                if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
        !          1029:                        panic("disklabel region");
        !          1030:        }
        !          1031:
        !          1032:        /*
        !          1033:         * if the vnode we are swapping to is the root vnode
        !          1034:         * (i.e. we are swapping to the miniroot) then we want
        !          1035:         * to make sure we don't overwrite it.   do a statfs to
        !          1036:         * find its size and skip over it.
        !          1037:         */
        !          1038:        if (vp == rootvp) {
        !          1039:                struct mount *mp;
        !          1040:                struct statfs *sp;
        !          1041:                int rootblocks, rootpages;
        !          1042:
        !          1043:                mp = rootvnode->v_mount;
        !          1044:                sp = &mp->mnt_stat;
        !          1045:                rootblocks = sp->f_blocks * btodb(sp->f_bsize);
        !          1046:                rootpages = round_page(dbtob(rootblocks)) >> PAGE_SHIFT;
        !          1047:                if (rootpages > size)
        !          1048:                        panic("swap_on: miniroot larger than swap?");
        !          1049:
        !          1050:                if (extent_alloc_region(sdp->swd_ex, addr,
        !          1051:                                        rootpages, EX_WAITOK))
        !          1052:                        panic("swap_on: unable to preserve miniroot");
        !          1053:
        !          1054:                size -= rootpages;
        !          1055:                printf("Preserved %d pages of miniroot ", rootpages);
        !          1056:                printf("leaving %d pages of swap\n", size);
        !          1057:        }
        !          1058:
        !          1059:        /*
        !          1060:         * add a ref to vp to reflect usage as a swap device.
        !          1061:         */
        !          1062:        vref(vp);
        !          1063:
        !          1064: #ifdef UVM_SWAP_ENCRYPT
        !          1065:        if (uvm_doswapencrypt)
        !          1066:                uvm_swap_initcrypt(sdp, npages);
        !          1067: #endif
        !          1068:        /*
        !          1069:         * now add the new swapdev to the drum and enable.
        !          1070:         */
        !          1071:        simple_lock(&uvm.swap_data_lock);
        !          1072:        swapdrum_add(sdp, npages);
        !          1073:        sdp->swd_npages = size;
        !          1074:        sdp->swd_flags &= ~SWF_FAKE;    /* going live */
        !          1075:        sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
        !          1076:        uvmexp.swpages += size;
        !          1077:        simple_unlock(&uvm.swap_data_lock);
        !          1078:        return (0);
        !          1079:
        !          1080: bad:
        !          1081:        /*
        !          1082:         * failure: close device if necessary and return error.
        !          1083:         */
        !          1084:        if (vp != rootvp)
        !          1085:                (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
        !          1086:        return (error);
        !          1087: }
        !          1088:
        !          1089: /*
        !          1090:  * swap_off: stop swapping on swapdev
        !          1091:  *
        !          1092:  * => swap data should be locked, we will unlock.
        !          1093:  */
        !          1094: static int
        !          1095: swap_off(p, sdp)
        !          1096:        struct proc *p;
        !          1097:        struct swapdev *sdp;
        !          1098: {
        !          1099:        int error;
        !          1100:        UVMHIST_FUNC("swap_off"); UVMHIST_CALLED(pdhist);
        !          1101:        UVMHIST_LOG(pdhist, "  dev=%lx", sdp->swd_dev,0,0,0);
        !          1102:
        !          1103:        /* disable the swap area being removed */
        !          1104:        sdp->swd_flags &= ~SWF_ENABLE;
        !          1105:        simple_unlock(&uvm.swap_data_lock);
        !          1106:
        !          1107:        /*
        !          1108:         * the idea is to find all the pages that are paged out to this
        !          1109:         * device, and page them all in.  in uvm, swap-backed pageable
        !          1110:         * memory can take two forms: aobjs and anons.  call the
        !          1111:         * swapoff hook for each subsystem to bring in pages.
        !          1112:         */
        !          1113:
        !          1114:        if (uao_swap_off(sdp->swd_drumoffset,
        !          1115:                         sdp->swd_drumoffset + sdp->swd_drumsize) ||
        !          1116:            amap_swap_off(sdp->swd_drumoffset,
        !          1117:                          sdp->swd_drumoffset + sdp->swd_drumsize)) {
        !          1118:
        !          1119:                error = ENOMEM;
        !          1120:        } else if (sdp->swd_npginuse > sdp->swd_npgbad) {
        !          1121:                error = EBUSY;
        !          1122:        }
        !          1123:
        !          1124:        if (error) {
        !          1125:                simple_lock(&uvm.swap_data_lock);
        !          1126:                sdp->swd_flags |= SWF_ENABLE;
        !          1127:                simple_unlock(&uvm.swap_data_lock);
        !          1128:                return (error);
        !          1129:        }
        !          1130:
        !          1131:        /*
        !          1132:         * done with the vnode and saved creds.
        !          1133:         * drop our ref on the vnode before calling VOP_CLOSE()
        !          1134:         * so that spec_close() can tell if this is the last close.
        !          1135:         */
        !          1136:        if (sdp->swd_vp->v_type == VREG) {
        !          1137:                crfree(sdp->swd_cred);
        !          1138:        }
        !          1139:        vrele(sdp->swd_vp);
        !          1140:        if (sdp->swd_vp != rootvp) {
        !          1141:                (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
        !          1142:        }
        !          1143:
        !          1144:        simple_lock(&uvm.swap_data_lock);
        !          1145:        uvmexp.swpages -= sdp->swd_npages;
        !          1146:
        !          1147:        if (swaplist_find(sdp->swd_vp, 1) == NULL)
        !          1148:                panic("swap_off: swapdev not in list");
        !          1149:        swaplist_trim();
        !          1150:
        !          1151:        /*
        !          1152:         * free all resources!
        !          1153:         */
        !          1154:        extent_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize,
        !          1155:                    EX_WAITOK);
        !          1156:        extent_destroy(sdp->swd_ex);
        !          1157:        free(sdp, M_VMSWAP);
        !          1158:        simple_unlock(&uvm.swap_data_lock);
        !          1159:        return (0);
        !          1160: }
        !          1161:
        !          1162: /*
        !          1163:  * /dev/drum interface and i/o functions
        !          1164:  */
        !          1165:
        !          1166: /*
        !          1167:  * swread: the read function for the drum (just a call to physio)
        !          1168:  */
        !          1169: /*ARGSUSED*/
        !          1170: int
        !          1171: swread(dev, uio, ioflag)
        !          1172:        dev_t dev;
        !          1173:        struct uio *uio;
        !          1174:        int ioflag;
        !          1175: {
        !          1176:        UVMHIST_FUNC("swread"); UVMHIST_CALLED(pdhist);
        !          1177:
        !          1178:        UVMHIST_LOG(pdhist, "  dev=%lx offset=%lx",
        !          1179:            dev, (u_long)uio->uio_offset, 0, 0);
        !          1180:        return (physio(swstrategy, NULL, dev, B_READ, minphys, uio));
        !          1181: }
        !          1182:
        !          1183: /*
        !          1184:  * swwrite: the write function for the drum (just a call to physio)
        !          1185:  */
        !          1186: /*ARGSUSED*/
        !          1187: int
        !          1188: swwrite(dev, uio, ioflag)
        !          1189:        dev_t dev;
        !          1190:        struct uio *uio;
        !          1191:        int ioflag;
        !          1192: {
        !          1193:        UVMHIST_FUNC("swwrite"); UVMHIST_CALLED(pdhist);
        !          1194:
        !          1195:        UVMHIST_LOG(pdhist, "  dev=%lx offset=%lx",
        !          1196:            dev, (u_long)uio->uio_offset, 0, 0);
        !          1197:        return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio));
        !          1198: }
        !          1199:
        !          1200: /*
        !          1201:  * swstrategy: perform I/O on the drum
        !          1202:  *
        !          1203:  * => we must map the i/o request from the drum to the correct swapdev.
        !          1204:  */
        !          1205: void
        !          1206: swstrategy(bp)
        !          1207:        struct buf *bp;
        !          1208: {
        !          1209:        struct swapdev *sdp;
        !          1210:        int s, pageno, bn;
        !          1211:        UVMHIST_FUNC("swstrategy"); UVMHIST_CALLED(pdhist);
        !          1212:
        !          1213:        /*
        !          1214:         * convert block number to swapdev.   note that swapdev can't
        !          1215:         * be yanked out from under us because we are holding resources
        !          1216:         * in it (i.e. the blocks we are doing I/O on).
        !          1217:         */
        !          1218:        pageno = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
        !          1219:        simple_lock(&uvm.swap_data_lock);
        !          1220:        sdp = swapdrum_getsdp(pageno);
        !          1221:        simple_unlock(&uvm.swap_data_lock);
        !          1222:        if (sdp == NULL) {
        !          1223:                bp->b_error = EINVAL;
        !          1224:                bp->b_flags |= B_ERROR;
        !          1225:                s = splbio();
        !          1226:                biodone(bp);
        !          1227:                splx(s);
        !          1228:                UVMHIST_LOG(pdhist, "  failed to get swap device", 0, 0, 0, 0);
        !          1229:                return;
        !          1230:        }
        !          1231:
        !          1232:        /*
        !          1233:         * convert drum page number to block number on this swapdev.
        !          1234:         */
        !          1235:
        !          1236:        pageno -= sdp->swd_drumoffset;  /* page # on swapdev */
        !          1237:        bn = btodb((u_int64_t)pageno << PAGE_SHIFT); /* convert to diskblock */
        !          1238:
        !          1239:        UVMHIST_LOG(pdhist, "  %s: mapoff=%lx bn=0x%lx bcount=%ld",
        !          1240:                ((bp->b_flags & B_READ) == 0) ? "write" : "read",
        !          1241:                sdp->swd_drumoffset, bn, bp->b_bcount);
        !          1242:
        !          1243:        /*
        !          1244:         * for block devices we finish up here.
        !          1245:         * for regular files we have to do more work which we delegate
        !          1246:         * to sw_reg_strategy().
        !          1247:         */
        !          1248:
        !          1249:        switch (sdp->swd_vp->v_type) {
        !          1250:        default:
        !          1251:                panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
        !          1252:
        !          1253:        case VBLK:
        !          1254:
        !          1255:                /*
        !          1256:                 * must convert "bp" from an I/O on /dev/drum to an I/O
        !          1257:                 * on the swapdev (sdp).
        !          1258:                 */
        !          1259:                s = splbio();
        !          1260:                buf_replacevnode(bp, sdp->swd_vp);
        !          1261:
        !          1262:                bp->b_blkno = bn;
        !          1263:                splx(s);
        !          1264:                VOP_STRATEGY(bp);
        !          1265:                return;
        !          1266:
        !          1267:        case VREG:
        !          1268:                /*
        !          1269:                 * delegate to sw_reg_strategy function.
        !          1270:                 */
        !          1271:                sw_reg_strategy(sdp, bp, bn);
        !          1272:                return;
        !          1273:        }
        !          1274:        /* NOTREACHED */
        !          1275: }
        !          1276:
        !          1277: /*
        !          1278:  * sw_reg_strategy: handle swap i/o to regular files
        !          1279:  */
        !          1280: static void
        !          1281: sw_reg_strategy(sdp, bp, bn)
        !          1282:        struct swapdev  *sdp;
        !          1283:        struct buf      *bp;
        !          1284:        int             bn;
        !          1285: {
        !          1286:        struct vnode    *vp;
        !          1287:        struct vndxfer  *vnx;
        !          1288:        daddr64_t       nbn;
        !          1289:        caddr_t         addr;
        !          1290:        off_t           byteoff;
        !          1291:        int             s, off, nra, error, sz, resid;
        !          1292:        UVMHIST_FUNC("sw_reg_strategy"); UVMHIST_CALLED(pdhist);
        !          1293:
        !          1294:        /*
        !          1295:         * allocate a vndxfer head for this transfer and point it to
        !          1296:         * our buffer.
        !          1297:         */
        !          1298:        getvndxfer(vnx);
        !          1299:        vnx->vx_flags = VX_BUSY;
        !          1300:        vnx->vx_error = 0;
        !          1301:        vnx->vx_pending = 0;
        !          1302:        vnx->vx_bp = bp;
        !          1303:        vnx->vx_sdp = sdp;
        !          1304:
        !          1305:        /*
        !          1306:         * setup for main loop where we read filesystem blocks into
        !          1307:         * our buffer.
        !          1308:         */
        !          1309:        error = 0;
        !          1310:        bp->b_resid = bp->b_bcount;     /* nothing transferred yet! */
        !          1311:        addr = bp->b_data;              /* current position in buffer */
        !          1312:        byteoff = dbtob((u_int64_t)bn);
        !          1313:
        !          1314:        for (resid = bp->b_resid; resid; resid -= sz) {
        !          1315:                struct vndbuf   *nbp;
        !          1316:
        !          1317:                /*
        !          1318:                 * translate byteoffset into block number.  return values:
        !          1319:                 *   vp = vnode of underlying device
        !          1320:                 *  nbn = new block number (on underlying vnode dev)
        !          1321:                 *  nra = num blocks we can read-ahead (excludes requested
        !          1322:                 *      block)
        !          1323:                 */
        !          1324:                nra = 0;
        !          1325:                error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
        !          1326:                                        &vp, &nbn, &nra);
        !          1327:
        !          1328:                if (error == 0 && nbn == (daddr64_t)-1) {
        !          1329:                        /*
        !          1330:                         * this used to just set error, but that doesn't
        !          1331:                         * do the right thing.  Instead, it causes random
        !          1332:                         * memory errors.  The panic() should remain until
        !          1333:                         * this condition doesn't destabilize the system.
        !          1334:                         */
        !          1335: #if 1
        !          1336:                        panic("sw_reg_strategy: swap to sparse file");
        !          1337: #else
        !          1338:                        error = EIO;    /* failure */
        !          1339: #endif
        !          1340:                }
        !          1341:
        !          1342:                /*
        !          1343:                 * punt if there was an error or a hole in the file.
        !          1344:                 * we must wait for any i/o ops we have already started
        !          1345:                 * to finish before returning.
        !          1346:                 *
        !          1347:                 * XXX we could deal with holes here but it would be
        !          1348:                 * a hassle (in the write case).
        !          1349:                 */
        !          1350:                if (error) {
        !          1351:                        s = splbio();
        !          1352:                        vnx->vx_error = error;  /* pass error up */
        !          1353:                        goto out;
        !          1354:                }
        !          1355:
        !          1356:                /*
        !          1357:                 * compute the size ("sz") of this transfer (in bytes).
        !          1358:                 */
        !          1359:                off = byteoff % sdp->swd_bsize;
        !          1360:                sz = (1 + nra) * sdp->swd_bsize - off;
        !          1361:                if (sz > resid)
        !          1362:                        sz = resid;
        !          1363:
        !          1364:                UVMHIST_LOG(pdhist, "sw_reg_strategy: "
        !          1365:                            "vp %p/%p offset 0x%lx/0x%llx",
        !          1366:                            sdp->swd_vp, vp, (u_long)byteoff, nbn);
        !          1367:
        !          1368:                /*
        !          1369:                 * now get a buf structure.   note that the vb_buf is
        !          1370:                 * at the front of the nbp structure so that you can
        !          1371:                 * cast pointers between the two structure easily.
        !          1372:                 */
        !          1373:                getvndbuf(nbp);
        !          1374:                nbp->vb_buf.b_flags    = bp->b_flags | B_CALL;
        !          1375:                nbp->vb_buf.b_bcount   = sz;
        !          1376:                nbp->vb_buf.b_bufsize  = sz;
        !          1377:                nbp->vb_buf.b_error    = 0;
        !          1378:                nbp->vb_buf.b_data     = addr;
        !          1379:                nbp->vb_buf.b_blkno    = nbn + btodb(off);
        !          1380:                nbp->vb_buf.b_proc     = bp->b_proc;
        !          1381:                nbp->vb_buf.b_iodone   = sw_reg_iodone;
        !          1382:                nbp->vb_buf.b_vp       = NULLVP;
        !          1383:                nbp->vb_buf.b_vnbufs.le_next = NOLIST;
        !          1384:                LIST_INIT(&nbp->vb_buf.b_dep);
        !          1385:
        !          1386:                /*
        !          1387:                 * set b_dirtyoff/end and b_validoff/end.   this is
        !          1388:                 * required by the NFS client code (otherwise it will
        !          1389:                 * just discard our I/O request).
        !          1390:                 */
        !          1391:                if (bp->b_dirtyend == 0) {
        !          1392:                        nbp->vb_buf.b_dirtyoff = 0;
        !          1393:                        nbp->vb_buf.b_dirtyend = sz;
        !          1394:                } else {
        !          1395:                        nbp->vb_buf.b_dirtyoff =
        !          1396:                            max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
        !          1397:                        nbp->vb_buf.b_dirtyend =
        !          1398:                            min(sz,
        !          1399:                                max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
        !          1400:                }
        !          1401:                if (bp->b_validend == 0) {
        !          1402:                        nbp->vb_buf.b_validoff = 0;
        !          1403:                        nbp->vb_buf.b_validend = sz;
        !          1404:                } else {
        !          1405:                        nbp->vb_buf.b_validoff =
        !          1406:                            max(0, bp->b_validoff - (bp->b_bcount-resid));
        !          1407:                        nbp->vb_buf.b_validend =
        !          1408:                            min(sz,
        !          1409:                                max(0, bp->b_validend - (bp->b_bcount-resid)));
        !          1410:                }
        !          1411:
        !          1412:                nbp->vb_xfer = vnx;     /* patch it back in to vnx */
        !          1413:
        !          1414:                /*
        !          1415:                 * Just sort by block number
        !          1416:                 */
        !          1417:                nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
        !          1418:                s = splbio();
        !          1419:                if (vnx->vx_error != 0) {
        !          1420:                        putvndbuf(nbp);
        !          1421:                        goto out;
        !          1422:                }
        !          1423:                vnx->vx_pending++;
        !          1424:
        !          1425:                /* assoc new buffer with underlying vnode */
        !          1426:                bgetvp(vp, &nbp->vb_buf);
        !          1427:
        !          1428:                /* sort it in and start I/O if we are not over our limit */
        !          1429:                disksort(&sdp->swd_tab, &nbp->vb_buf);
        !          1430:                sw_reg_start(sdp);
        !          1431:                splx(s);
        !          1432:
        !          1433:                /*
        !          1434:                 * advance to the next I/O
        !          1435:                 */
        !          1436:                byteoff += sz;
        !          1437:                addr += sz;
        !          1438:        }
        !          1439:
        !          1440:        s = splbio();
        !          1441:
        !          1442: out: /* Arrive here at splbio */
        !          1443:        vnx->vx_flags &= ~VX_BUSY;
        !          1444:        if (vnx->vx_pending == 0) {
        !          1445:                if (vnx->vx_error != 0) {
        !          1446:                        bp->b_error = vnx->vx_error;
        !          1447:                        bp->b_flags |= B_ERROR;
        !          1448:                }
        !          1449:                putvndxfer(vnx);
        !          1450:                biodone(bp);
        !          1451:        }
        !          1452:        splx(s);
        !          1453: }
        !          1454:
        !          1455: /*
        !          1456:  * sw_reg_start: start an I/O request on the requested swapdev
        !          1457:  *
        !          1458:  * => reqs are sorted by disksort (above)
        !          1459:  */
        !          1460: static void
        !          1461: sw_reg_start(sdp)
        !          1462:        struct swapdev  *sdp;
        !          1463: {
        !          1464:        struct buf      *bp;
        !          1465:        UVMHIST_FUNC("sw_reg_start"); UVMHIST_CALLED(pdhist);
        !          1466:
        !          1467:        /* recursion control */
        !          1468:        if ((sdp->swd_flags & SWF_BUSY) != 0)
        !          1469:                return;
        !          1470:
        !          1471:        sdp->swd_flags |= SWF_BUSY;
        !          1472:
        !          1473:        while (sdp->swd_tab.b_active < sdp->swd_maxactive) {
        !          1474:                bp = sdp->swd_tab.b_actf;
        !          1475:                if (bp == NULL)
        !          1476:                        break;
        !          1477:                sdp->swd_tab.b_actf = bp->b_actf;
        !          1478:                sdp->swd_tab.b_active++;
        !          1479:
        !          1480:                UVMHIST_LOG(pdhist,
        !          1481:                    "sw_reg_start:  bp %p vp %p blkno 0x%lx cnt 0x%lx",
        !          1482:                    bp, bp->b_vp, bp->b_blkno, bp->b_bcount);
        !          1483:                if ((bp->b_flags & B_READ) == 0)
        !          1484:                        bp->b_vp->v_numoutput++;
        !          1485:
        !          1486:                VOP_STRATEGY(bp);
        !          1487:        }
        !          1488:        sdp->swd_flags &= ~SWF_BUSY;
        !          1489: }
        !          1490:
        !          1491: /*
        !          1492:  * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup
        !          1493:  *
        !          1494:  * => note that we can recover the vndbuf struct by casting the buf ptr
        !          1495:  */
        !          1496: static void
        !          1497: sw_reg_iodone(bp)
        !          1498:        struct buf *bp;
        !          1499: {
        !          1500:        struct vndbuf *vbp = (struct vndbuf *) bp;
        !          1501:        struct vndxfer *vnx = vbp->vb_xfer;
        !          1502:        struct buf *pbp = vnx->vx_bp;           /* parent buffer */
        !          1503:        struct swapdev  *sdp = vnx->vx_sdp;
        !          1504:        int resid;
        !          1505:        UVMHIST_FUNC("sw_reg_iodone"); UVMHIST_CALLED(pdhist);
        !          1506:
        !          1507:        UVMHIST_LOG(pdhist, "  vbp=%p vp=%p blkno=0x%lx addr=%p",
        !          1508:            vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, vbp->vb_buf.b_data);
        !          1509:        UVMHIST_LOG(pdhist, "  cnt=%lx resid=%lx",
        !          1510:            vbp->vb_buf.b_bcount, vbp->vb_buf.b_resid, 0, 0);
        !          1511:
        !          1512:        splassert(IPL_BIO);
        !          1513:
        !          1514:        resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
        !          1515:        pbp->b_resid -= resid;
        !          1516:        vnx->vx_pending--;
        !          1517:
        !          1518:        if (vbp->vb_buf.b_error) {
        !          1519:                UVMHIST_LOG(pdhist, "  got error=%ld !",
        !          1520:                    vbp->vb_buf.b_error, 0, 0, 0);
        !          1521:
        !          1522:                /* pass error upward */
        !          1523:                vnx->vx_error = vbp->vb_buf.b_error;
        !          1524:        }
        !          1525:
        !          1526:        /*
        !          1527:         * disassociate this buffer from the vnode (if any).
        !          1528:         */
        !          1529:        if (vbp->vb_buf.b_vp != NULL) {
        !          1530:                brelvp(&vbp->vb_buf);
        !          1531:        }
        !          1532:
        !          1533:        /*
        !          1534:         * kill vbp structure
        !          1535:         */
        !          1536:        putvndbuf(vbp);
        !          1537:
        !          1538:        /*
        !          1539:         * wrap up this transaction if it has run to completion or, in
        !          1540:         * case of an error, when all auxiliary buffers have returned.
        !          1541:         */
        !          1542:        if (vnx->vx_error != 0) {
        !          1543:                /* pass error upward */
        !          1544:                pbp->b_flags |= B_ERROR;
        !          1545:                pbp->b_error = vnx->vx_error;
        !          1546:                if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
        !          1547:                        putvndxfer(vnx);
        !          1548:                        biodone(pbp);
        !          1549:                }
        !          1550:        } else if (pbp->b_resid == 0) {
        !          1551:                KASSERT(vnx->vx_pending == 0);
        !          1552:                if ((vnx->vx_flags & VX_BUSY) == 0) {
        !          1553:                        UVMHIST_LOG(pdhist, "  iodone error=%ld !",
        !          1554:                            pbp, vnx->vx_error, 0, 0);
        !          1555:                        putvndxfer(vnx);
        !          1556:                        biodone(pbp);
        !          1557:                }
        !          1558:        }
        !          1559:
        !          1560:        /*
        !          1561:         * done!   start next swapdev I/O if one is pending
        !          1562:         */
        !          1563:        sdp->swd_tab.b_active--;
        !          1564:        sw_reg_start(sdp);
        !          1565: }
        !          1566:
        !          1567:
        !          1568: /*
        !          1569:  * uvm_swap_alloc: allocate space on swap
        !          1570:  *
        !          1571:  * => allocation is done "round robin" down the priority list, as we
        !          1572:  *     allocate in a priority we "rotate" the circle queue.
        !          1573:  * => space can be freed with uvm_swap_free
        !          1574:  * => we return the page slot number in /dev/drum (0 == invalid slot)
        !          1575:  * => we lock uvm.swap_data_lock
        !          1576:  * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM
        !          1577:  */
        !          1578: int
        !          1579: uvm_swap_alloc(nslots, lessok)
        !          1580:        int *nslots;    /* IN/OUT */
        !          1581:        boolean_t lessok;
        !          1582: {
        !          1583:        struct swapdev *sdp;
        !          1584:        struct swappri *spp;
        !          1585:        u_long  result;
        !          1586:        UVMHIST_FUNC("uvm_swap_alloc"); UVMHIST_CALLED(pdhist);
        !          1587:
        !          1588:        /*
        !          1589:         * no swap devices configured yet?   definite failure.
        !          1590:         */
        !          1591:        if (uvmexp.nswapdev < 1)
        !          1592:                return 0;
        !          1593:
        !          1594:        /*
        !          1595:         * lock data lock, convert slots into blocks, and enter loop
        !          1596:         */
        !          1597:        simple_lock(&uvm.swap_data_lock);
        !          1598:
        !          1599: ReTry: /* XXXMRG */
        !          1600:        for (spp = LIST_FIRST(&swap_priority); spp != NULL;
        !          1601:             spp = LIST_NEXT(spp, spi_swappri)) {
        !          1602:                for (sdp = CIRCLEQ_FIRST(&spp->spi_swapdev);
        !          1603:                     sdp != (void *)&spp->spi_swapdev;
        !          1604:                     sdp = CIRCLEQ_NEXT(sdp,swd_next)) {
        !          1605:                        /* if it's not enabled, then we can't swap from it */
        !          1606:                        if ((sdp->swd_flags & SWF_ENABLE) == 0)
        !          1607:                                continue;
        !          1608:                        if (sdp->swd_npginuse + *nslots > sdp->swd_npages)
        !          1609:                                continue;
        !          1610:                        if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN, 0,
        !          1611:                                         EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,
        !          1612:                                         &result) != 0) {
        !          1613:                                continue;
        !          1614:                        }
        !          1615:
        !          1616:                        /*
        !          1617:                         * successful allocation!  now rotate the circleq.
        !          1618:                         */
        !          1619:                        CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
        !          1620:                        CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
        !          1621:                        sdp->swd_npginuse += *nslots;
        !          1622:                        uvmexp.swpginuse += *nslots;
        !          1623:                        simple_unlock(&uvm.swap_data_lock);
        !          1624:                        /* done!  return drum slot number */
        !          1625:                        UVMHIST_LOG(pdhist,
        !          1626:                            "success!  returning %ld slots starting at %ld",
        !          1627:                            *nslots, result + sdp->swd_drumoffset, 0, 0);
        !          1628:                        return(result + sdp->swd_drumoffset);
        !          1629:                }
        !          1630:        }
        !          1631:
        !          1632:        /* XXXMRG: BEGIN HACK */
        !          1633:        if (*nslots > 1 && lessok) {
        !          1634:                *nslots = 1;
        !          1635:                goto ReTry;     /* XXXMRG: ugh!  extent should support this for us */
        !          1636:        }
        !          1637:        /* XXXMRG: END HACK */
        !          1638:
        !          1639:        simple_unlock(&uvm.swap_data_lock);
        !          1640:        return 0;               /* failed */
        !          1641: }
        !          1642:
        !          1643: /*
        !          1644:  * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors
        !          1645:  *
        !          1646:  * => we lock uvm.swap_data_lock
        !          1647:  */
        !          1648: void
        !          1649: uvm_swap_markbad(startslot, nslots)
        !          1650:        int startslot;
        !          1651:        int nslots;
        !          1652: {
        !          1653:        struct swapdev *sdp;
        !          1654:        UVMHIST_FUNC("uvm_swap_markbad"); UVMHIST_CALLED(pdhist);
        !          1655:
        !          1656:        simple_lock(&uvm.swap_data_lock);
        !          1657:        sdp = swapdrum_getsdp(startslot);
        !          1658:        if (sdp != NULL) {
        !          1659:                /*
        !          1660:                 * we just keep track of how many pages have been marked bad
        !          1661:                 * in this device, to make everything add up in swap_off().
        !          1662:                 * we assume here that the range of slots will all be within
        !          1663:                 * one swap device.
        !          1664:                 */
        !          1665:                sdp->swd_npgbad += nslots;
        !          1666:                UVMHIST_LOG(pdhist, "now %ld bad", sdp->swd_npgbad, 0,0,0);
        !          1667:        }
        !          1668:        simple_unlock(&uvm.swap_data_lock);
        !          1669: }
        !          1670:
        !          1671: /*
        !          1672:  * uvm_swap_free: free swap slots
        !          1673:  *
        !          1674:  * => this can be all or part of an allocation made by uvm_swap_alloc
        !          1675:  * => we lock uvm.swap_data_lock
        !          1676:  */
        !          1677: void
        !          1678: uvm_swap_free(startslot, nslots)
        !          1679:        int startslot;
        !          1680:        int nslots;
        !          1681: {
        !          1682:        struct swapdev *sdp;
        !          1683:        UVMHIST_FUNC("uvm_swap_free"); UVMHIST_CALLED(pdhist);
        !          1684:
        !          1685:        UVMHIST_LOG(pdhist, "freeing %ld slots starting at %ld", nslots,
        !          1686:            startslot, 0, 0);
        !          1687:
        !          1688:        /*
        !          1689:         * ignore attempts to free the "bad" slot.
        !          1690:         */
        !          1691:
        !          1692:        if (startslot == SWSLOT_BAD) {
        !          1693:                return;
        !          1694:        }
        !          1695:
        !          1696:        /*
        !          1697:         * convert drum slot offset back to sdp, free the blocks
        !          1698:         * in the extent, and return.   must hold pri lock to do
        !          1699:         * lookup and access the extent.
        !          1700:         */
        !          1701:
        !          1702:        simple_lock(&uvm.swap_data_lock);
        !          1703:        sdp = swapdrum_getsdp(startslot);
        !          1704:        KASSERT(uvmexp.nswapdev >= 1);
        !          1705:        KASSERT(sdp != NULL);
        !          1706:        KASSERT(sdp->swd_npginuse >= nslots);
        !          1707:        if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots,
        !          1708:                        EX_MALLOCOK|EX_NOWAIT) != 0) {
        !          1709:                printf("warning: resource shortage: %d pages of swap lost\n",
        !          1710:                        nslots);
        !          1711:        }
        !          1712:
        !          1713:        sdp->swd_npginuse -= nslots;
        !          1714:        uvmexp.swpginuse -= nslots;
        !          1715: #ifdef UVM_SWAP_ENCRYPT
        !          1716:        {
        !          1717:                int i;
        !          1718:                if (swap_encrypt_initialized) {
        !          1719:                        /* Dereference keys */
        !          1720:                        for (i = 0; i < nslots; i++)
        !          1721:                                if (uvm_swap_needdecrypt(sdp, startslot + i))
        !          1722:                                        SWAP_KEY_PUT(sdp, SWD_KEY(sdp, startslot + i));
        !          1723:
        !          1724:                        /* Mark range as not decrypt */
        !          1725:                        uvm_swap_markdecrypt(sdp, startslot, nslots, 0);
        !          1726:                }
        !          1727:        }
        !          1728: #endif /* UVM_SWAP_ENCRYPT */
        !          1729:        simple_unlock(&uvm.swap_data_lock);
        !          1730: }
        !          1731:
        !          1732: /*
        !          1733:  * uvm_swap_put: put any number of pages into a contig place on swap
        !          1734:  *
        !          1735:  * => can be sync or async
        !          1736:  * => XXXMRG: consider making it an inline or macro
        !          1737:  */
        !          1738: int
        !          1739: uvm_swap_put(swslot, ppsp, npages, flags)
        !          1740:        int swslot;
        !          1741:        struct vm_page **ppsp;
        !          1742:        int     npages;
        !          1743:        int     flags;
        !          1744: {
        !          1745:        int     result;
        !          1746:
        !          1747:        result = uvm_swap_io(ppsp, swslot, npages, B_WRITE |
        !          1748:            ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
        !          1749:
        !          1750:        return (result);
        !          1751: }
        !          1752:
        !          1753: /*
        !          1754:  * uvm_swap_get: get a single page from swap
        !          1755:  *
        !          1756:  * => usually a sync op (from fault)
        !          1757:  * => XXXMRG: consider making it an inline or macro
        !          1758:  */
        !          1759: int
        !          1760: uvm_swap_get(page, swslot, flags)
        !          1761:        struct vm_page *page;
        !          1762:        int swslot, flags;
        !          1763: {
        !          1764:        int     result;
        !          1765:
        !          1766:        uvmexp.nswget++;
        !          1767:        KASSERT(flags & PGO_SYNCIO);
        !          1768:        if (swslot == SWSLOT_BAD) {
        !          1769:                return VM_PAGER_ERROR;
        !          1770:        }
        !          1771:
        !          1772:        /*
        !          1773:         * this page is (about to be) no longer only in swap.
        !          1774:         */
        !          1775:        simple_lock(&uvm.swap_data_lock);
        !          1776:        uvmexp.swpgonly--;
        !          1777:        simple_unlock(&uvm.swap_data_lock);
        !          1778:
        !          1779:        result = uvm_swap_io(&page, swslot, 1, B_READ |
        !          1780:            ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
        !          1781:
        !          1782:        if (result != VM_PAGER_OK && result != VM_PAGER_PEND) {
        !          1783:                /*
        !          1784:                 * oops, the read failed so it really is still only in swap.
        !          1785:                 */
        !          1786:                simple_lock(&uvm.swap_data_lock);
        !          1787:                uvmexp.swpgonly++;
        !          1788:                simple_unlock(&uvm.swap_data_lock);
        !          1789:        }
        !          1790:
        !          1791:        return (result);
        !          1792: }
        !          1793:
        !          1794: /*
        !          1795:  * uvm_swap_io: do an i/o operation to swap
        !          1796:  */
        !          1797:
        !          1798: static int
        !          1799: uvm_swap_io(pps, startslot, npages, flags)
        !          1800:        struct vm_page **pps;
        !          1801:        int startslot, npages, flags;
        !          1802: {
        !          1803:        daddr64_t startblk;
        !          1804:        struct  buf *bp;
        !          1805:        vaddr_t kva;
        !          1806:        int     result, s, mapinflags, pflag;
        !          1807:        boolean_t write, async;
        !          1808: #ifdef UVM_SWAP_ENCRYPT
        !          1809:        vaddr_t dstkva;
        !          1810:        struct vm_page *tpps[MAXBSIZE >> PAGE_SHIFT];
        !          1811:        struct swapdev *sdp;
        !          1812:        int     encrypt = 0;
        !          1813: #endif
        !          1814:        UVMHIST_FUNC("uvm_swap_io"); UVMHIST_CALLED(pdhist);
        !          1815:
        !          1816:        UVMHIST_LOG(pdhist, "<- called, startslot=%ld, npages=%ld, flags=%ld",
        !          1817:            startslot, npages, flags, 0);
        !          1818:
        !          1819:        write = (flags & B_READ) == 0;
        !          1820:        async = (flags & B_ASYNC) != 0;
        !          1821:
        !          1822:        /*
        !          1823:         * convert starting drum slot to block number
        !          1824:         */
        !          1825:        startblk = btodb((u_int64_t)startslot << PAGE_SHIFT);
        !          1826:
        !          1827:        /*
        !          1828:         * first, map the pages into the kernel (XXX: currently required
        !          1829:         * by buffer system).
        !          1830:         */
        !          1831:        mapinflags = !write ? UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
        !          1832:        if (!async)
        !          1833:                mapinflags |= UVMPAGER_MAPIN_WAITOK;
        !          1834:        kva = uvm_pagermapin(pps, npages, mapinflags);
        !          1835:        if (kva == 0)
        !          1836:                return (VM_PAGER_AGAIN);
        !          1837:
        !          1838: #ifdef UVM_SWAP_ENCRYPT
        !          1839:        if (write) {
        !          1840:                /*
        !          1841:                 * Check if we need to do swap encryption on old pages.
        !          1842:                 * Later we need a different scheme, that swap encrypts
        !          1843:                 * all pages of a process that had at least one page swap
        !          1844:                 * encrypted.  Then we might not need to copy all pages
        !          1845:                 * in the cluster, and avoid the memory overheard in
        !          1846:                 * swapping.
        !          1847:                 */
        !          1848:                if (uvm_doswapencrypt)
        !          1849:                        encrypt = 1;
        !          1850:        }
        !          1851:
        !          1852:        if (swap_encrypt_initialized  || encrypt) {
        !          1853:                /*
        !          1854:                 * we need to know the swap device that we are swapping to/from
        !          1855:                 * to see if the pages need to be marked for decryption or
        !          1856:                 * actually need to be decrypted.
        !          1857:                 * XXX - does this information stay the same over the whole
        !          1858:                 * execution of this function?
        !          1859:                 */
        !          1860:                simple_lock(&uvm.swap_data_lock);
        !          1861:                sdp = swapdrum_getsdp(startslot);
        !          1862:                simple_unlock(&uvm.swap_data_lock);
        !          1863:        }
        !          1864:
        !          1865:        /*
        !          1866:         * encrypt to swap
        !          1867:         */
        !          1868:        if (write && encrypt) {
        !          1869:                int i, opages;
        !          1870:                caddr_t src, dst;
        !          1871:                struct swap_key *key;
        !          1872:                u_int64_t block;
        !          1873:                int swmapflags;
        !          1874:
        !          1875:                /* We always need write access. */
        !          1876:                swmapflags = UVMPAGER_MAPIN_READ;
        !          1877:                if (!async)
        !          1878:                        swmapflags |= UVMPAGER_MAPIN_WAITOK;
        !          1879:
        !          1880:                if (!uvm_swap_allocpages(tpps, npages)) {
        !          1881:                        uvm_pagermapout(kva, npages);
        !          1882:                        return (VM_PAGER_AGAIN);
        !          1883:                }
        !          1884:
        !          1885:                dstkva = uvm_pagermapin(tpps, npages, swmapflags);
        !          1886:                if (dstkva == 0) {
        !          1887:                        uvm_pagermapout(kva, npages);
        !          1888:                        uvm_swap_freepages(tpps, npages);
        !          1889:                        return (VM_PAGER_AGAIN);
        !          1890:                }
        !          1891:
        !          1892:                src = (caddr_t) kva;
        !          1893:                dst = (caddr_t) dstkva;
        !          1894:                block = startblk;
        !          1895:                for (i = 0; i < npages; i++) {
        !          1896:                        key = SWD_KEY(sdp, startslot + i);
        !          1897:                        SWAP_KEY_GET(sdp, key); /* add reference */
        !          1898:
        !          1899:                        /* mark for async writes */
        !          1900:                        atomic_setbits_int(&tpps[i]->pg_flags, PQ_ENCRYPT);
        !          1901:                        swap_encrypt(key, src, dst, block, 1 << PAGE_SHIFT);
        !          1902:                        src += 1 << PAGE_SHIFT;
        !          1903:                        dst += 1 << PAGE_SHIFT;
        !          1904:                        block += btodb(1 << PAGE_SHIFT);
        !          1905:                }
        !          1906:
        !          1907:                uvm_pagermapout(kva, npages);
        !          1908:
        !          1909:                /* dispose of pages we dont use anymore */
        !          1910:                opages = npages;
        !          1911:                uvm_pager_dropcluster(NULL, NULL, pps, &opages,
        !          1912:                                      PGO_PDFREECLUST);
        !          1913:
        !          1914:                kva = dstkva;
        !          1915:        }
        !          1916: #endif /* UVM_SWAP_ENCRYPT */
        !          1917:
        !          1918:        /*
        !          1919:         * now allocate a buf for the i/o.
        !          1920:         * [make sure we don't put the pagedaemon to sleep...]
        !          1921:         */
        !          1922:        s = splbio();
        !          1923:        pflag = (async || curproc == uvm.pagedaemon_proc) ? 0 : PR_WAITOK;
        !          1924:        bp = pool_get(&bufpool, pflag);
        !          1925:        splx(s);
        !          1926:
        !          1927:        /*
        !          1928:         * if we failed to get a swapbuf, return "try again"
        !          1929:         */
        !          1930:        if (bp == NULL) {
        !          1931: #ifdef UVM_SWAP_ENCRYPT
        !          1932:                if (write && encrypt) {
        !          1933:                        int i;
        !          1934:
        !          1935:                        /* swap encrypt needs cleanup */
        !          1936:                        for (i = 0; i < npages; i++)
        !          1937:                                SWAP_KEY_PUT(sdp, SWD_KEY(sdp, startslot + i));
        !          1938:
        !          1939:                        uvm_pagermapout(kva, npages);
        !          1940:                        uvm_swap_freepages(tpps, npages);
        !          1941:                }
        !          1942: #endif
        !          1943:                return (VM_PAGER_AGAIN);
        !          1944:        }
        !          1945:
        !          1946: #ifdef UVM_SWAP_ENCRYPT
        !          1947:        /*
        !          1948:         * prevent ASYNC reads.
        !          1949:         * uvm_swap_io is only called from uvm_swap_get, uvm_swap_get
        !          1950:         * assumes that all gets are SYNCIO.  Just make sure here.
        !          1951:         * XXXARTUBC - might not be true anymore.
        !          1952:         */
        !          1953:        if (!write) {
        !          1954:                flags &= ~B_ASYNC;
        !          1955:                async = 0;
        !          1956:        }
        !          1957: #endif
        !          1958:        /*
        !          1959:         * fill in the bp.   we currently route our i/o through
        !          1960:         * /dev/drum's vnode [swapdev_vp].
        !          1961:         */
        !          1962:        bp->b_flags = B_BUSY | B_NOCACHE | (flags & (B_READ|B_ASYNC));
        !          1963:        bp->b_proc = &proc0;    /* XXX */
        !          1964:        bp->b_vnbufs.le_next = NOLIST;
        !          1965:        bp->b_data = (caddr_t)kva;
        !          1966:        bp->b_blkno = startblk;
        !          1967:        LIST_INIT(&bp->b_dep);
        !          1968:        s = splbio();
        !          1969:        bp->b_vp = NULL;
        !          1970:        buf_replacevnode(bp, swapdev_vp);
        !          1971:        splx(s);
        !          1972:        bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT;
        !          1973:
        !          1974:        /*
        !          1975:         * for pageouts we must set "dirtyoff" [NFS client code needs it].
        !          1976:         * and we bump v_numoutput (counter of number of active outputs).
        !          1977:         */
        !          1978:        if (write) {
        !          1979:                bp->b_dirtyoff = 0;
        !          1980:                bp->b_dirtyend = npages << PAGE_SHIFT;
        !          1981: #ifdef UVM_SWAP_ENCRYPT
        !          1982:                /* mark the pages in the drum for decryption */
        !          1983:                if (swap_encrypt_initialized)
        !          1984:                        uvm_swap_markdecrypt(sdp, startslot, npages, encrypt);
        !          1985: #endif
        !          1986:                s = splbio();
        !          1987:                swapdev_vp->v_numoutput++;
        !          1988:                splx(s);
        !          1989:        }
        !          1990:
        !          1991:        /*
        !          1992:         * for async ops we must set up the iodone handler.
        !          1993:         */
        !          1994:        if (async) {
        !          1995:                bp->b_flags |= B_CALL | (curproc == uvm.pagedaemon_proc ?
        !          1996:                                         B_PDAEMON : 0);
        !          1997:                bp->b_iodone = uvm_aio_biodone;
        !          1998:                UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
        !          1999:        }
        !          2000:        UVMHIST_LOG(pdhist,
        !          2001:            "about to start io: data = %p blkno = 0x%lx, bcount = %ld",
        !          2002:            bp->b_data, bp->b_blkno, bp->b_bcount, 0);
        !          2003:
        !          2004:        /*
        !          2005:         * now we start the I/O, and if async, return.
        !          2006:         */
        !          2007:        VOP_STRATEGY(bp);
        !          2008:        if (async)
        !          2009:                return (VM_PAGER_PEND);
        !          2010:
        !          2011:        /*
        !          2012:         * must be sync i/o.   wait for it to finish
        !          2013:         */
        !          2014:        (void) biowait(bp);
        !          2015:        result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
        !          2016:
        !          2017: #ifdef UVM_SWAP_ENCRYPT
        !          2018:        /*
        !          2019:         * decrypt swap
        !          2020:         */
        !          2021:        if (swap_encrypt_initialized &&
        !          2022:            (bp->b_flags & B_READ) && !(bp->b_flags & B_ERROR)) {
        !          2023:                int i;
        !          2024:                caddr_t data = bp->b_data;
        !          2025:                u_int64_t block = startblk;
        !          2026:                struct swap_key *key = NULL;
        !          2027:
        !          2028:                for (i = 0; i < npages; i++) {
        !          2029:                        /* Check if we need to decrypt */
        !          2030:                        if (uvm_swap_needdecrypt(sdp, startslot + i)) {
        !          2031:                                key = SWD_KEY(sdp, startslot + i);
        !          2032:                                swap_decrypt(key, data, data, block,
        !          2033:                                             1 << PAGE_SHIFT);
        !          2034:                        }
        !          2035:                        data += 1 << PAGE_SHIFT;
        !          2036:                        block += btodb(1 << PAGE_SHIFT);
        !          2037:                }
        !          2038:        }
        !          2039: #endif
        !          2040:        /*
        !          2041:         * kill the pager mapping
        !          2042:         */
        !          2043:        uvm_pagermapout(kva, npages);
        !          2044:
        !          2045: #ifdef UVM_SWAP_ENCRYPT
        !          2046:        /*
        !          2047:         *  Not anymore needed, free after encryption
        !          2048:         */
        !          2049:        if ((bp->b_flags & B_READ) == 0 && encrypt)
        !          2050:                uvm_swap_freepages(tpps, npages);
        !          2051: #endif
        !          2052:        /*
        !          2053:         * now dispose of the buf
        !          2054:         */
        !          2055:        s = splbio();
        !          2056:        if (bp->b_vp)
        !          2057:                brelvp(bp);
        !          2058:
        !          2059:        if (write && bp->b_vp)
        !          2060:                vwakeup(bp->b_vp);
        !          2061:        pool_put(&bufpool, bp);
        !          2062:        splx(s);
        !          2063:
        !          2064:        /*
        !          2065:         * finally return.
        !          2066:         */
        !          2067:        UVMHIST_LOG(pdhist, "<- done (sync)  result=%ld", result, 0, 0, 0);
        !          2068:        return (result);
        !          2069: }
        !          2070:
        !          2071: static void
        !          2072: swapmount()
        !          2073: {
        !          2074:        struct swapdev *sdp;
        !          2075:        struct swappri *spp;
        !          2076:        struct vnode *vp;
        !          2077:        dev_t swap_dev = swdevt[0].sw_dev;
        !          2078:
        !          2079:        /*
        !          2080:         * No locking here since we happen to know that we will just be called
        !          2081:         * once before any other process has forked.
        !          2082:         */
        !          2083:
        !          2084:        if (swap_dev == NODEV) {
        !          2085:                printf("swapmount: no device\n");
        !          2086:                return;
        !          2087:        }
        !          2088:
        !          2089:        if (bdevvp(swap_dev, &vp)) {
        !          2090:                printf("swapmount: no device 2\n");
        !          2091:                return;
        !          2092:        }
        !          2093:
        !          2094:        sdp = malloc(sizeof(*sdp), M_VMSWAP, M_WAITOK);
        !          2095:        spp = malloc(sizeof(*spp), M_VMSWAP, M_WAITOK);
        !          2096:        memset(sdp, 0, sizeof(*sdp));
        !          2097:
        !          2098:        sdp->swd_flags = SWF_FAKE;
        !          2099:        sdp->swd_dev = swap_dev;
        !          2100:        sdp->swd_vp = vp;
        !          2101:        swaplist_insert(sdp, spp, 0);
        !          2102:        sdp->swd_pathlen = strlen("swap_device") + 1;
        !          2103:        sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
        !          2104:        if (copystr("swap_device", sdp->swd_path, sdp->swd_pathlen, 0))
        !          2105:                panic("swapmount: copystr");
        !          2106:
        !          2107:        if (swap_on(curproc, sdp)) {
        !          2108:                swaplist_find(vp, 1);
        !          2109:                swaplist_trim();
        !          2110:                vput(sdp->swd_vp);
        !          2111:                free(sdp->swd_path, M_VMSWAP);
        !          2112:                free(sdp, M_VMSWAP);
        !          2113:                return;
        !          2114:        }
        !          2115:
        !          2116:        VOP_UNLOCK(vp, 0, curproc);
        !          2117: }

CVSweb