[BACK]Return to vnd.c CVS log [TXT][DIR] Up to [local] / sys / dev

Annotation of sys/dev/vnd.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: vnd.c,v 1.79 2007/06/20 18:15:46 deraadt Exp $        */
                      2: /*     $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $        */
                      3:
                      4: /*
                      5:  * Copyright (c) 1988 University of Utah.
                      6:  * Copyright (c) 1990, 1993
                      7:  *     The Regents of the University of California.  All rights reserved.
                      8:  *
                      9:  * This code is derived from software contributed to Berkeley by
                     10:  * the Systems Programming Group of the University of Utah Computer
                     11:  * Science Department.
                     12:  *
                     13:  * Redistribution and use in source and binary forms, with or without
                     14:  * modification, are permitted provided that the following conditions
                     15:  * are met:
                     16:  * 1. Redistributions of source code must retain the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer.
                     18:  * 2. Redistributions in binary form must reproduce the above copyright
                     19:  *    notice, this list of conditions and the following disclaimer in the
                     20:  *    documentation and/or other materials provided with the distribution.
                     21:  * 3. Neither the name of the University nor the names of its contributors
                     22:  *    may be used to endorse or promote products derived from this software
                     23:  *    without specific prior written permission.
                     24:  *
                     25:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     26:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     27:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     28:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     29:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     30:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     31:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     32:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     33:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     34:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     35:  * SUCH DAMAGE.
                     36:  *
                     37:  * from: Utah $Hdr: vn.c 1.13 94/04/02$
                     38:  *
                     39:  *     @(#)vn.c        8.6 (Berkeley) 4/1/94
                     40:  */
                     41:
                     42: /*
                     43:  * Vnode disk driver.
                     44:  *
                     45:  * Block/character interface to a vnode.  Allows one to treat a file
                     46:  * as a disk (e.g. build a filesystem in it, mount it, etc.).
                     47:  *
                     48:  * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the
                     49:  * vnode or simple VOP_READ/VOP_WRITE.  The former is suitable for swapping
                     50:  * as it doesn't distort the local buffer cache.  The latter is good for
                     51:  * building disk images as it keeps the cache consistent after the block
                     52:  * device is closed.
                     53:  *
                     54:  * NOTE 2: There is a security issue involved with this driver.
                     55:  * Once mounted all access to the contents of the "mapped" file via
                     56:  * the special file is controlled by the permissions on the special
                     57:  * file, the protection of the mapped file is ignored (effectively,
                     58:  * by using root credentials in all transactions).
                     59:  *
                     60:  * NOTE 3: Doesn't interact with leases, should it?
                     61:  */
                     62:
                     63: #include <sys/param.h>
                     64: #include <sys/systm.h>
                     65: #include <sys/namei.h>
                     66: #include <sys/proc.h>
                     67: #include <sys/errno.h>
                     68: #include <sys/buf.h>
                     69: #include <sys/malloc.h>
                     70: #include <sys/pool.h>
                     71: #include <sys/ioctl.h>
                     72: #include <sys/disklabel.h>
                     73: #include <sys/device.h>
                     74: #include <sys/disk.h>
                     75: #include <sys/stat.h>
                     76: #include <sys/mount.h>
                     77: #include <sys/vnode.h>
                     78: #include <sys/file.h>
                     79: #include <sys/rwlock.h>
                     80: #include <sys/uio.h>
                     81: #include <sys/conf.h>
                     82:
                     83: #include <crypto/blf.h>
                     84:
                     85: #include <miscfs/specfs/specdev.h>
                     86:
                     87: #include <dev/vndioctl.h>
                     88:
                     89: #ifdef VNDDEBUG
                     90: int dovndcluster = 1;
                     91: int vnddebug = 0x00;
                     92: #define        VDB_FOLLOW      0x01
                     93: #define        VDB_INIT        0x02
                     94: #define        VDB_IO          0x04
                     95: #define        DNPRINTF(f, p...)       do { if ((f) & vnddebug) printf(p); } while (0)
                     96: #else
                     97: #define        DNPRINTF(f, p...)       /* nothing */
                     98: #endif /* VNDDEBUG */
                     99:
                    100: /*
                    101:  * vndunit is a bit weird.  have to reconstitute the dev_t for
                    102:  * DISKUNIT(), but with the minor masked off.
                    103:  */
                    104: #define        vndunit(x)      DISKUNIT(makedev(major(x), minor(x) & 0x7ff))
                    105: #define        vndsimple(x)    (minor(x) & 0x800)
                    106:
                    107: /* same as MAKEDISKDEV, preserving the vndsimple() property */
                    108: #define        VNDLABELDEV(dev)        \
                    109:        makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \
                    110:            (vndsimple(dev) ? 0x800 : 0))
                    111:
                    112: struct vndbuf {
                    113:        struct buf      vb_buf;
                    114:        struct buf      *vb_obp;
                    115: };
                    116:
                    117: /*
                    118:  * struct vndbuf allocator
                    119:  */
                    120: struct pool     vndbufpl;
                    121:
                    122: #define        getvndbuf()     pool_get(&vndbufpl, PR_WAITOK)
                    123: #define        putvndbuf(vbp)  pool_put(&vndbufpl, vbp);
                    124:
                    125: struct vnd_softc {
                    126:        struct device    sc_dev;
                    127:        struct disk      sc_dk;
                    128:
                    129:        char             sc_file[VNDNLEN];      /* file we're covering */
                    130:        int              sc_flags;              /* flags */
                    131:        size_t           sc_size;               /* size of vnd in blocks */
                    132:        struct vnode    *sc_vp;                 /* vnode */
                    133:        struct ucred    *sc_cred;               /* credentials */
                    134:        struct buf       sc_tab;                /* transfer queue */
                    135:        blf_ctx         *sc_keyctx;             /* key context */
                    136:        struct rwlock    sc_rwlock;
                    137: };
                    138:
                    139: /* sc_flags */
                    140: #define        VNF_ALIVE       0x0001
                    141: #define        VNF_INITED      0x0002
                    142: #define        VNF_LABELLING   0x0100
                    143: #define        VNF_WLABEL      0x0200
                    144: #define        VNF_HAVELABEL   0x0400
                    145: #define        VNF_SIMPLE      0x1000
                    146: #define        VNF_READONLY    0x2000
                    147:
                    148: #define        VNDRW(v)        ((v)->sc_flags & VNF_READONLY ? FREAD : FREAD|FWRITE)
                    149:
                    150: struct vnd_softc *vnd_softc;
                    151: int numvnd = 0;
                    152:
                    153: struct dkdriver vnddkdriver = { vndstrategy };
                    154:
                    155: /* called by main() at boot time */
                    156: void   vndattach(int);
                    157:
                    158: void   vndclear(struct vnd_softc *);
                    159: void   vndstart(struct vnd_softc *);
                    160: int    vndsetcred(struct vnd_softc *, struct ucred *);
                    161: void   vndiodone(struct buf *);
                    162: void   vndshutdown(void);
                    163: void   vndgetdisklabel(dev_t, struct vnd_softc *);
                    164: void   vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr64_t, int);
                    165:
                    166: #define vndlock(sc) rw_enter(&sc->sc_rwlock, RW_WRITE|RW_INTR)
                    167: #define vndunlock(sc) rw_exit_write(&sc->sc_rwlock)
                    168:
                    169: void
                    170: vndencrypt(struct vnd_softc *vnd, caddr_t addr, size_t size, daddr64_t off,
                    171:     int encrypt)
                    172: {
                    173:        int i, bsize;
                    174:        u_char iv[8];
                    175:
                    176:        bsize = dbtob(1);
                    177:        for (i = 0; i < size/bsize; i++) {
                    178:                bzero(iv, sizeof(iv));
                    179:                bcopy((u_char *)&off, iv, sizeof(off));
                    180:                blf_ecb_encrypt(vnd->sc_keyctx, iv, sizeof(iv));
                    181:                if (encrypt)
                    182:                        blf_cbc_encrypt(vnd->sc_keyctx, iv, addr, bsize);
                    183:                else
                    184:                        blf_cbc_decrypt(vnd->sc_keyctx, iv, addr, bsize);
                    185:
                    186:                addr += bsize;
                    187:                off++;
                    188:        }
                    189: }
                    190:
                    191: void
                    192: vndattach(int num)
                    193: {
                    194:        char *mem;
                    195:        u_long size;
                    196:        int i;
                    197:
                    198:        if (num <= 0)
                    199:                return;
                    200:        size = num * sizeof(struct vnd_softc);
                    201:        mem = malloc(size, M_DEVBUF, M_NOWAIT);
                    202:        if (mem == NULL) {
                    203:                printf("WARNING: no memory for vnode disks\n");
                    204:                return;
                    205:        }
                    206:        bzero(mem, size);
                    207:        vnd_softc = (struct vnd_softc *)mem;
                    208:        for (i = 0; i < num; i++) {
                    209:                rw_init(&vnd_softc[i].sc_rwlock, "vndlock");
                    210:        }
                    211:        numvnd = num;
                    212:
                    213:        pool_init(&vndbufpl, sizeof(struct vndbuf), 0, 0, 0, "vndbufpl", NULL);
                    214:        pool_setlowat(&vndbufpl, 16);
                    215:        pool_sethiwat(&vndbufpl, 1024);
                    216: }
                    217:
                    218: int
                    219: vndopen(dev_t dev, int flags, int mode, struct proc *p)
                    220: {
                    221:        int unit = vndunit(dev);
                    222:        struct vnd_softc *sc;
                    223:        int error = 0, part, pmask;
                    224:
                    225:        DNPRINTF(VDB_FOLLOW, "vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p);
                    226:
                    227:        if (unit >= numvnd)
                    228:                return (ENXIO);
                    229:        sc = &vnd_softc[unit];
                    230:
                    231:        if ((error = vndlock(sc)) != 0)
                    232:                return (error);
                    233:
                    234:        if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) {
                    235:                error = EROFS;
                    236:                goto bad;
                    237:        }
                    238:
                    239:        if ((sc->sc_flags & VNF_INITED) &&
                    240:            (sc->sc_flags & VNF_HAVELABEL) == 0) {
                    241:                sc->sc_flags |= VNF_HAVELABEL;
                    242:                vndgetdisklabel(dev, sc);
                    243:        }
                    244:
                    245:        part = DISKPART(dev);
                    246:        pmask = 1 << part;
                    247:
                    248:        /*
                    249:         * If any partition is open, all succeeding openings must be of the
                    250:         * same type or read-only.
                    251:         */
                    252:        if (sc->sc_dk.dk_openmask) {
                    253:                if (((sc->sc_flags & VNF_SIMPLE) != 0) !=
                    254:                    (vndsimple(dev) != 0) && (flags & FWRITE)) {
                    255:                        error = EBUSY;
                    256:                        goto bad;
                    257:                }
                    258:        } else if (vndsimple(dev))
                    259:                sc->sc_flags |= VNF_SIMPLE;
                    260:        else
                    261:                sc->sc_flags &= ~VNF_SIMPLE;
                    262:
                    263:        /* Check that the partition exists. */
                    264:        if (part != RAW_PART &&
                    265:            ((sc->sc_flags & VNF_HAVELABEL) == 0 ||
                    266:            part >= sc->sc_dk.dk_label->d_npartitions ||
                    267:            sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) {
                    268:                error = ENXIO;
                    269:                goto bad;
                    270:        }
                    271:
                    272:        /* Prevent our unit from being unconfigured while open. */
                    273:        switch (mode) {
                    274:        case S_IFCHR:
                    275:                sc->sc_dk.dk_copenmask |= pmask;
                    276:                break;
                    277:
                    278:        case S_IFBLK:
                    279:                sc->sc_dk.dk_bopenmask |= pmask;
                    280:                break;
                    281:        }
                    282:        sc->sc_dk.dk_openmask =
                    283:            sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
                    284:
                    285:        error = 0;
                    286: bad:
                    287:        vndunlock(sc);
                    288:        return (error);
                    289: }
                    290:
                    291: /*
                    292:  * Load the label information on the named device
                    293:  */
                    294: void
                    295: vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
                    296: {
                    297:        struct disklabel *lp = sc->sc_dk.dk_label;
                    298:        char *errstring = NULL;
                    299:
                    300:        bzero(lp, sizeof(struct disklabel));
                    301:
                    302:        lp->d_secsize = 512;
                    303:        lp->d_ntracks = 1;
                    304:        lp->d_nsectors = 100;
                    305:        lp->d_ncylinders = sc->sc_size / 100;
                    306:        lp->d_secpercyl = 100;          /* lp->d_ntracks * lp->d_nsectors */
                    307:
                    308:        strncpy(lp->d_typename, "vnd device", sizeof(lp->d_typename));
                    309:        lp->d_type = DTYPE_VND;
                    310:        strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
                    311:        DL_SETDSIZE(lp, sc->sc_size);
                    312:        lp->d_rpm = 3600;
                    313:        lp->d_interleave = 1;
                    314:        lp->d_flags = 0;
                    315:        lp->d_version = 1;
                    316:
                    317:        lp->d_magic = DISKMAGIC;
                    318:        lp->d_magic2 = DISKMAGIC;
                    319:        lp->d_checksum = dkcksum(lp);
                    320:
                    321:        /* Call the generic disklabel extraction routine */
                    322:        errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, 0);
                    323:        if (errstring) {
                    324:                DNPRINTF(VDB_IO, "%s: %s\n", sc->sc_dev.dv_xname,
                    325:                    errstring);
                    326:                return;
                    327:        }
                    328: }
                    329:
                    330: int
                    331: vndclose(dev_t dev, int flags, int mode, struct proc *p)
                    332: {
                    333:        int unit = vndunit(dev);
                    334:        struct vnd_softc *sc;
                    335:        int error = 0, part;
                    336:
                    337:        DNPRINTF(VDB_FOLLOW, "vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p);
                    338:
                    339:        if (unit >= numvnd)
                    340:                return (ENXIO);
                    341:        sc = &vnd_softc[unit];
                    342:
                    343:        if ((error = vndlock(sc)) != 0)
                    344:                return (error);
                    345:
                    346:        part = DISKPART(dev);
                    347:
                    348:        /* ...that much closer to allowing unconfiguration... */
                    349:        switch (mode) {
                    350:        case S_IFCHR:
                    351:                sc->sc_dk.dk_copenmask &= ~(1 << part);
                    352:                break;
                    353:
                    354:        case S_IFBLK:
                    355:                sc->sc_dk.dk_bopenmask &= ~(1 << part);
                    356:                break;
                    357:        }
                    358:        sc->sc_dk.dk_openmask =
                    359:            sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
                    360:
                    361:        vndunlock(sc);
                    362:        return (0);
                    363: }
                    364:
                    365: /*
                    366:  * Two methods are used, the traditional buffercache bypassing and the
                    367:  * newer, cache-coherent on unmount, one.
                    368:  *
                    369:  * Former method:
                    370:  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
                    371:  * Note that this driver can only be used for swapping over NFS on the hp
                    372:  * since nfs_strategy on the vax cannot handle u-areas and page tables.
                    373:  *
                    374:  * Latter method:
                    375:  * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to
                    376:  * access the underlying file.
                    377:  */
                    378: void
                    379: vndstrategy(struct buf *bp)
                    380: {
                    381:        int unit = vndunit(bp->b_dev);
                    382:        struct vnd_softc *vnd = &vnd_softc[unit];
                    383:        struct vndbuf *nbp;
                    384:        int bsize;
                    385:        off_t bn;
                    386:        caddr_t addr;
                    387:        size_t resid;
                    388:        int sz, flags, error, s;
                    389:        struct iovec aiov;
                    390:        struct uio auio;
                    391:        struct proc *p = curproc;
                    392:
                    393:        DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit);
                    394:
                    395:        if ((vnd->sc_flags & VNF_INITED) == 0) {
                    396:                bp->b_error = ENXIO;
                    397:                bp->b_flags |= B_ERROR;
                    398:                s = splbio();
                    399:                biodone(bp);
                    400:                splx(s);
                    401:                return;
                    402:        }
                    403:
                    404:        bn = bp->b_blkno;
                    405:        bp->b_resid = bp->b_bcount;
                    406:
                    407:        if (bn < 0) {
                    408:                bp->b_error = EINVAL;
                    409:                bp->b_flags |= B_ERROR;
                    410:                s = splbio();
                    411:                biodone(bp);
                    412:                splx(s);
                    413:                return;
                    414:        }
                    415:
                    416:        /* If we have a label, do a boundary check. */
                    417:        if (vnd->sc_flags & VNF_HAVELABEL) {
                    418:                if (bounds_check_with_label(bp, vnd->sc_dk.dk_label, 1) <= 0) {
                    419:                        s = splbio();
                    420:                        biodone(bp);
                    421:                        splx(s);
                    422:                        return;
                    423:                }
                    424:
                    425:                /*
                    426:                 * bounds_check_with_label() changes bp->b_resid, reset it
                    427:                 */
                    428:                bp->b_resid = bp->b_bcount;
                    429:        }
                    430:
                    431:        sz = howmany(bp->b_bcount, DEV_BSIZE);
                    432:
                    433:        /* No bypassing of buffer cache?  */
                    434:        if (vndsimple(bp->b_dev)) {
                    435:                /* Loop until all queued requests are handled.  */
                    436:                for (;;) {
                    437:                        int part = DISKPART(bp->b_dev);
                    438:                        daddr64_t off = DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[part]);
                    439:
                    440:                        aiov.iov_base = bp->b_data;
                    441:                        auio.uio_resid = aiov.iov_len = bp->b_bcount;
                    442:                        auio.uio_iov = &aiov;
                    443:                        auio.uio_iovcnt = 1;
                    444:                        auio.uio_offset = dbtob((off_t)(bp->b_blkno + off));
                    445:                        auio.uio_segflg = UIO_SYSSPACE;
                    446:                        auio.uio_procp = p;
                    447:
                    448:                        vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
                    449:                        if (bp->b_flags & B_READ) {
                    450:                                auio.uio_rw = UIO_READ;
                    451:                                bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0,
                    452:                                    vnd->sc_cred);
                    453:                                if (vnd->sc_keyctx)
                    454:                                        vndencrypt(vnd, bp->b_data,
                    455:                                           bp->b_bcount, bp->b_blkno, 0);
                    456:                        } else {
                    457:                                if (vnd->sc_keyctx)
                    458:                                        vndencrypt(vnd, bp->b_data,
                    459:                                           bp->b_bcount, bp->b_blkno, 1);
                    460:                                auio.uio_rw = UIO_WRITE;
                    461:                                /*
                    462:                                 * Upper layer has already checked I/O for
                    463:                                 * limits, so there is no need to do it again.
                    464:                                 */
                    465:                                bp->b_error = VOP_WRITE(vnd->sc_vp, &auio,
                    466:                                    IO_NOLIMIT, vnd->sc_cred);
                    467:                                /* Data in buffer cache needs to be in clear */
                    468:                                if (vnd->sc_keyctx)
                    469:                                        vndencrypt(vnd, bp->b_data,
                    470:                                           bp->b_bcount, bp->b_blkno, 0);
                    471:                        }
                    472:                        VOP_UNLOCK(vnd->sc_vp, 0, p);
                    473:                        if (bp->b_error)
                    474:                                bp->b_flags |= B_ERROR;
                    475:                        bp->b_resid = auio.uio_resid;
                    476:                        s = splbio();
                    477:                        biodone(bp);
                    478:                        splx(s);
                    479:
                    480:                        /* If nothing more is queued, we are done.  */
                    481:                        if (!vnd->sc_tab.b_active)
                    482:                                return;
                    483:
                    484:                        /*
                    485:                         * Dequeue now since lower level strategy
                    486:                         * routine might queue using same links.
                    487:                         */
                    488:                        s = splbio();
                    489:                        bp = vnd->sc_tab.b_actf;
                    490:                        vnd->sc_tab.b_actf = bp->b_actf;
                    491:                        vnd->sc_tab.b_active--;
                    492:                        splx(s);
                    493:                }
                    494:        }
                    495:
                    496:        /* The old-style buffercache bypassing method.  */
                    497:        bn += DL_GETPOFFSET(&vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)]);
                    498:        bn = dbtob(bn);
                    499:        bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
                    500:        addr = bp->b_data;
                    501:        flags = bp->b_flags | B_CALL;
                    502:        for (resid = bp->b_resid; resid; resid -= sz) {
                    503:                struct vnode *vp;
                    504:                daddr64_t nbn;
                    505:                int off, s, nra;
                    506:
                    507:                nra = 0;
                    508:                vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
                    509:                error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
                    510:                VOP_UNLOCK(vnd->sc_vp, 0, p);
                    511:                if (error == 0 && (long)nbn == -1)
                    512:                        error = EIO;
                    513: #ifdef VNDDEBUG
                    514:                if (!dovndcluster)
                    515:                        nra = 0;
                    516: #endif
                    517:
                    518:                if ((off = bn % bsize) != 0)
                    519:                        sz = bsize - off;
                    520:                else
                    521:                        sz = (1 + nra) * bsize;
                    522:                if (resid < sz)
                    523:                        sz = resid;
                    524:
                    525:                DNPRINTF(VDB_IO, "vndstrategy: vp %p/%p bn %x/%x sz %x\n",
                    526:                    vnd->sc_vp, vp, bn, nbn, sz);
                    527:
                    528:                s = splbio();
                    529:                nbp = getvndbuf();
                    530:                splx(s);
                    531:                nbp->vb_buf.b_flags = flags;
                    532:                nbp->vb_buf.b_bcount = sz;
                    533:                nbp->vb_buf.b_bufsize = bp->b_bufsize;
                    534:                nbp->vb_buf.b_error = 0;
                    535:                if (vp->v_type == VBLK || vp->v_type == VCHR)
                    536:                        nbp->vb_buf.b_dev = vp->v_rdev;
                    537:                else
                    538:                        nbp->vb_buf.b_dev = NODEV;
                    539:                nbp->vb_buf.b_data = addr;
                    540:                nbp->vb_buf.b_blkno = nbn + btodb(off);
                    541:                nbp->vb_buf.b_proc = bp->b_proc;
                    542:                nbp->vb_buf.b_iodone = vndiodone;
                    543:                nbp->vb_buf.b_vp = vp;
                    544:                nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
                    545:                nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
                    546:                nbp->vb_buf.b_validoff = bp->b_validoff;
                    547:                nbp->vb_buf.b_validend = bp->b_validend;
                    548:                LIST_INIT(&nbp->vb_buf.b_dep);
                    549:
                    550:                /* save a reference to the old buffer */
                    551:                nbp->vb_obp = bp;
                    552:
                    553:                /*
                    554:                 * If there was an error or a hole in the file...punt.
                    555:                 * Note that we deal with this after the nbp allocation.
                    556:                 * This ensures that we properly clean up any operations
                    557:                 * that we have already fired off.
                    558:                 *
                    559:                 * XXX we could deal with holes here but it would be
                    560:                 * a hassle (in the write case).
                    561:                 * We must still however charge for the write even if there
                    562:                 * was an error.
                    563:                 */
                    564:                if (error) {
                    565:                        nbp->vb_buf.b_error = error;
                    566:                        nbp->vb_buf.b_flags |= B_ERROR;
                    567:                        bp->b_resid -= (resid - sz);
                    568:                        s = splbio();
                    569:                        /* charge for the write */
                    570:                        if ((nbp->vb_buf.b_flags & B_READ) == 0)
                    571:                                nbp->vb_buf.b_vp->v_numoutput++;
                    572:                        biodone(&nbp->vb_buf);
                    573:                        splx(s);
                    574:                        return;
                    575:                }
                    576:                /*
                    577:                 * Just sort by block number
                    578:                 */
                    579:                nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
                    580:                s = splbio();
                    581:                disksort(&vnd->sc_tab, &nbp->vb_buf);
                    582:                vnd->sc_tab.b_active++;
                    583:                vndstart(vnd);
                    584:                splx(s);
                    585:                bn += sz;
                    586:                addr += sz;
                    587:        }
                    588: }
                    589:
                    590: /*
                    591:  * Feed requests sequentially.
                    592:  * We do it this way to keep from flooding NFS servers if we are connected
                    593:  * to an NFS file.  This places the burden on the client rather than the
                    594:  * server.
                    595:  */
                    596: void
                    597: vndstart(struct vnd_softc *vnd)
                    598: {
                    599:        struct buf *bp;
                    600:
                    601:        /*
                    602:         * Dequeue now since lower level strategy routine might
                    603:         * queue using same links
                    604:         */
                    605:        bp = vnd->sc_tab.b_actf;
                    606:        vnd->sc_tab.b_actf = bp->b_actf;
                    607:
                    608:        DNPRINTF(VDB_IO,
                    609:            "vndstart(%d): bp %p vp %p blkno %x addr %p cnt %lx\n",
                    610:            vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
                    611:            bp->b_bcount);
                    612:
                    613:        /* Instrumentation. */
                    614:        disk_busy(&vnd->sc_dk);
                    615:
                    616:        if ((bp->b_flags & B_READ) == 0)
                    617:                bp->b_vp->v_numoutput++;
                    618:        VOP_STRATEGY(bp);
                    619: }
                    620:
                    621: void
                    622: vndiodone(struct buf *bp)
                    623: {
                    624:        struct vndbuf *vbp = (struct vndbuf *) bp;
                    625:        struct buf *pbp = vbp->vb_obp;
                    626:        struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
                    627:
                    628:        splassert(IPL_BIO);
                    629:
                    630:        DNPRINTF(VDB_IO,
                    631:            "vndiodone(%d): vbp %p vp %p blkno %x addr %p cnt %lx\n",
                    632:            vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
                    633:            vbp->vb_buf.b_data, vbp->vb_buf.b_bcount);
                    634:
                    635:        if (vbp->vb_buf.b_error) {
                    636:                DNPRINTF(VDB_IO, "vndiodone: vbp %p error %d\n", vbp,
                    637:                    vbp->vb_buf.b_error);
                    638:
                    639:                pbp->b_flags |= B_ERROR;
                    640:                pbp->b_error = biowait(&vbp->vb_buf);
                    641:        }
                    642:        pbp->b_resid -= vbp->vb_buf.b_bcount;
                    643:        putvndbuf(vbp);
                    644:        if (vnd->sc_tab.b_active) {
                    645:                disk_unbusy(&vnd->sc_dk, (pbp->b_bcount - pbp->b_resid),
                    646:                    (pbp->b_flags & B_READ));
                    647:                if (!vnd->sc_tab.b_actf)
                    648:                        vnd->sc_tab.b_active--;
                    649:        }
                    650:        if (pbp->b_resid == 0) {
                    651:                DNPRINTF(VDB_IO, "vndiodone: pbp %p iodone\n", pbp);
                    652:                biodone(pbp);
                    653:        }
                    654:
                    655: }
                    656:
                    657: /* ARGSUSED */
                    658: int
                    659: vndread(dev_t dev, struct uio *uio, int flags)
                    660: {
                    661:        int unit = vndunit(dev);
                    662:        struct vnd_softc *sc;
                    663:
                    664:        DNPRINTF(VDB_FOLLOW, "vndread(%x, %p)\n", dev, uio);
                    665:
                    666:        if (unit >= numvnd)
                    667:                return (ENXIO);
                    668:        sc = &vnd_softc[unit];
                    669:
                    670:        if ((sc->sc_flags & VNF_INITED) == 0)
                    671:                return (ENXIO);
                    672:
                    673:        return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
                    674: }
                    675:
                    676: /* ARGSUSED */
                    677: int
                    678: vndwrite(dev_t dev, struct uio *uio, int flags)
                    679: {
                    680:        int unit = vndunit(dev);
                    681:        struct vnd_softc *sc;
                    682:
                    683:        DNPRINTF(VDB_FOLLOW, "vndwrite(%x, %p)\n", dev, uio);
                    684:
                    685:        if (unit >= numvnd)
                    686:                return (ENXIO);
                    687:        sc = &vnd_softc[unit];
                    688:
                    689:        if ((sc->sc_flags & VNF_INITED) == 0)
                    690:                return (ENXIO);
                    691:
                    692:        return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
                    693: }
                    694:
                    695: /* ARGSUSED */
                    696: int
                    697: vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
                    698: {
                    699:        int unit = vndunit(dev);
                    700:        struct vnd_softc *vnd;
                    701:        struct vnd_ioctl *vio;
                    702:        struct vnd_user *vnu;
                    703:        struct vattr vattr;
                    704:        struct nameidata nd;
                    705:        int error, part, pmask, s;
                    706:
                    707:        DNPRINTF(VDB_FOLLOW, "vndioctl(%x, %lx, %p, %x, %p): unit %d\n",
                    708:            dev, cmd, addr, flag, p, unit);
                    709:
                    710:        error = suser(p, 0);
                    711:        if (error)
                    712:                return (error);
                    713:        if (unit >= numvnd)
                    714:                return (ENXIO);
                    715:
                    716:        vnd = &vnd_softc[unit];
                    717:        vio = (struct vnd_ioctl *)addr;
                    718:        switch (cmd) {
                    719:
                    720:        case VNDIOCSET:
                    721:                if (vnd->sc_flags & VNF_INITED)
                    722:                        return (EBUSY);
                    723:                if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen)
                    724:                        return (EINVAL);
                    725:
                    726:                if ((error = vndlock(vnd)) != 0)
                    727:                        return (error);
                    728:
                    729:                if ((error = copyinstr(vio->vnd_file, vnd->sc_file,
                    730:                    sizeof(vnd->sc_file), NULL))) {
                    731:                        vndunlock(vnd);
                    732:                        return (error);
                    733:                }
                    734:
                    735:                bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname));
                    736:                if (snprintf(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname),
                    737:                    "vnd%d", unit) >= sizeof(vnd->sc_dev.dv_xname)) {
                    738:                        printf("VNDIOCSET: device name too long\n");
                    739:                        vndunlock(vnd);
                    740:                        return(ENXIO);
                    741:                }
                    742:
                    743:                /*
                    744:                 * Open for read and write first. This lets vn_open() weed out
                    745:                 * directories, sockets, etc. so we don't have to worry about
                    746:                 * them.
                    747:                 */
                    748:                NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
                    749:                vnd->sc_flags &= ~VNF_READONLY;
                    750:                error = vn_open(&nd, FREAD|FWRITE, 0);
                    751:                if (error == EROFS) {
                    752:                        vnd->sc_flags |= VNF_READONLY;
                    753:                        error = vn_open(&nd, FREAD, 0);
                    754:                }
                    755:                if (error) {
                    756:                        vndunlock(vnd);
                    757:                        return (error);
                    758:                }
                    759:
                    760:                error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
                    761:                if (error) {
                    762:                        VOP_UNLOCK(nd.ni_vp, 0, p);
                    763:                        (void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
                    764:                        vndunlock(vnd);
                    765:                        return (error);
                    766:                }
                    767:                VOP_UNLOCK(nd.ni_vp, 0, p);
                    768:                vnd->sc_vp = nd.ni_vp;
                    769:                vnd->sc_size = btodb(vattr.va_size);    /* note truncation */
                    770:                if ((error = vndsetcred(vnd, p->p_ucred)) != 0) {
                    771:                        (void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
                    772:                        vndunlock(vnd);
                    773:                        return (error);
                    774:                }
                    775:
                    776:                if (vio->vnd_keylen > 0) {
                    777:                        char key[BLF_MAXUTILIZED];
                    778:
                    779:                        if (vio->vnd_keylen > sizeof(key))
                    780:                                vio->vnd_keylen = sizeof(key);
                    781:
                    782:                        if ((error = copyin(vio->vnd_key, key,
                    783:                            vio->vnd_keylen)) != 0) {
                    784:                                (void) vn_close(nd.ni_vp, VNDRW(vnd),
                    785:                                    p->p_ucred, p);
                    786:                                vndunlock(vnd);
                    787:                                return (error);
                    788:                        }
                    789:
                    790:                        vnd->sc_keyctx = malloc(sizeof(*vnd->sc_keyctx), M_DEVBUF,
                    791:                            M_WAITOK);
                    792:                        blf_key(vnd->sc_keyctx, key, vio->vnd_keylen);
                    793:                        bzero(key, vio->vnd_keylen);
                    794:                } else
                    795:                        vnd->sc_keyctx = NULL;
                    796:
                    797:                vio->vnd_size = dbtob((off_t)vnd->sc_size);
                    798:                vnd->sc_flags |= VNF_INITED;
                    799:
                    800:                DNPRINTF(VDB_INIT, "vndioctl: SET vp %p size %llx\n",
                    801:                    vnd->sc_vp, (unsigned long long)vnd->sc_size);
                    802:
                    803:                /* Attach the disk. */
                    804:                vnd->sc_dk.dk_driver = &vnddkdriver;
                    805:                vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname;
                    806:                disk_attach(&vnd->sc_dk);
                    807:
                    808:                vndunlock(vnd);
                    809:
                    810:                break;
                    811:
                    812:        case VNDIOCCLR:
                    813:                if ((vnd->sc_flags & VNF_INITED) == 0)
                    814:                        return (ENXIO);
                    815:
                    816:                if ((error = vndlock(vnd)) != 0)
                    817:                        return (error);
                    818:
                    819:                /*
                    820:                 * Don't unconfigure if any other partitions are open
                    821:                 * or if both the character and block flavors of this
                    822:                 * partition are open.
                    823:                 */
                    824:                part = DISKPART(dev);
                    825:                pmask = (1 << part);
                    826:                if ((vnd->sc_dk.dk_openmask & ~pmask) ||
                    827:                    ((vnd->sc_dk.dk_bopenmask & pmask) &&
                    828:                    (vnd->sc_dk.dk_copenmask & pmask))) {
                    829:                        vndunlock(vnd);
                    830:                        return (EBUSY);
                    831:                }
                    832:
                    833:                vndclear(vnd);
                    834:                DNPRINTF(VDB_INIT, "vndioctl: CLRed\n");
                    835:
                    836:                /* Free crypto key */
                    837:                if (vnd->sc_keyctx) {
                    838:                        bzero(vnd->sc_keyctx, sizeof(*vnd->sc_keyctx));
                    839:                        free(vnd->sc_keyctx, M_DEVBUF);
                    840:                }
                    841:
                    842:                /* Detatch the disk. */
                    843:                disk_detach(&vnd->sc_dk);
                    844:
                    845:                /* This must be atomic. */
                    846:                s = splhigh();
                    847:                vndunlock(vnd);
                    848:                bzero(vnd, sizeof(struct vnd_softc));
                    849:                splx(s);
                    850:                break;
                    851:
                    852:        case VNDIOCGET:
                    853:                vnu = (struct vnd_user *)addr;
                    854:
                    855:                if (vnu->vnu_unit == -1)
                    856:                        vnu->vnu_unit = unit;
                    857:                if (vnu->vnu_unit >= numvnd)
                    858:                        return (ENXIO);
                    859:                if (vnu->vnu_unit < 0)
                    860:                        return (EINVAL);
                    861:
                    862:                vnd = &vnd_softc[vnu->vnu_unit];
                    863:
                    864:                if (vnd->sc_flags & VNF_INITED) {
                    865:                        error = VOP_GETATTR(vnd->sc_vp, &vattr, p->p_ucred, p);
                    866:                        if (error)
                    867:                                return (error);
                    868:
                    869:                        strlcpy(vnu->vnu_file, vnd->sc_file,
                    870:                            sizeof(vnu->vnu_file));
                    871:                        vnu->vnu_dev = vattr.va_fsid;
                    872:                        vnu->vnu_ino = vattr.va_fileid;
                    873:                } else {
                    874:                        vnu->vnu_dev = 0;
                    875:                        vnu->vnu_ino = 0;
                    876:                }
                    877:
                    878:                break;
                    879:
                    880:        case DIOCGDINFO:
                    881:                if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
                    882:                        return (ENOTTY);
                    883:                *(struct disklabel *)addr = *(vnd->sc_dk.dk_label);
                    884:                return (0);
                    885:
                    886:        case DIOCGPART:
                    887:                if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
                    888:                        return (ENOTTY);
                    889:                ((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label;
                    890:                ((struct partinfo *)addr)->part =
                    891:                    &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)];
                    892:                return (0);
                    893:
                    894:        case DIOCWDINFO:
                    895:        case DIOCSDINFO:
                    896:                if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
                    897:                        return (ENOTTY);
                    898:                if ((flag & FWRITE) == 0)
                    899:                        return (EBADF);
                    900:
                    901:                if ((error = vndlock(vnd)) != 0)
                    902:                        return (error);
                    903:                vnd->sc_flags |= VNF_LABELLING;
                    904:
                    905:                error = setdisklabel(vnd->sc_dk.dk_label,
                    906:                    (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0);
                    907:                if (error == 0) {
                    908:                        if (cmd == DIOCWDINFO)
                    909:                                error = writedisklabel(VNDLABELDEV(dev),
                    910:                                    vndstrategy, vnd->sc_dk.dk_label);
                    911:                }
                    912:
                    913:                vnd->sc_flags &= ~VNF_LABELLING;
                    914:                vndunlock(vnd);
                    915:                return (error);
                    916:
                    917:        case DIOCWLABEL:
                    918:                if ((flag & FWRITE) == 0)
                    919:                        return (EBADF);
                    920:                if (*(int *)addr)
                    921:                        vnd->sc_flags |= VNF_WLABEL;
                    922:                else
                    923:                        vnd->sc_flags &= ~VNF_WLABEL;
                    924:                return (0);
                    925:
                    926:        default:
                    927:                return (ENOTTY);
                    928:        }
                    929:
                    930:        return (0);
                    931: }
                    932:
                    933: /*
                    934:  * Duplicate the current processes' credentials.  Since we are called only
                    935:  * as the result of a SET ioctl and only root can do that, any future access
                    936:  * to this "disk" is essentially as root.  Note that credentials may change
                    937:  * if some other uid can write directly to the mapped file (NFS).
                    938:  */
                    939: int
                    940: vndsetcred(struct vnd_softc *vnd, struct ucred *cred)
                    941: {
                    942:        struct uio auio;
                    943:        struct iovec aiov;
                    944:        char *tmpbuf;
                    945:        int error;
                    946:        struct proc *p = curproc;
                    947:
                    948:        vnd->sc_cred = crdup(cred);
                    949:        tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
                    950:
                    951:        /* XXX: Horrible kludge to establish credentials for NFS */
                    952:        aiov.iov_base = tmpbuf;
                    953:        aiov.iov_len = MIN(DEV_BSIZE, dbtob((off_t)vnd->sc_size));
                    954:        auio.uio_iov = &aiov;
                    955:        auio.uio_iovcnt = 1;
                    956:        auio.uio_offset = 0;
                    957:        auio.uio_rw = UIO_READ;
                    958:        auio.uio_segflg = UIO_SYSSPACE;
                    959:        auio.uio_resid = aiov.iov_len;
                    960:        vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
                    961:        error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
                    962:        VOP_UNLOCK(vnd->sc_vp, 0, p);
                    963:
                    964:        free(tmpbuf, M_TEMP);
                    965:        return (error);
                    966: }
                    967:
                    968: void
                    969: vndshutdown(void)
                    970: {
                    971:        struct vnd_softc *vnd;
                    972:
                    973:        for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
                    974:                if (vnd->sc_flags & VNF_INITED)
                    975:                        vndclear(vnd);
                    976: }
                    977:
                    978: void
                    979: vndclear(struct vnd_softc *vnd)
                    980: {
                    981:        struct vnode *vp = vnd->sc_vp;
                    982:        struct proc *p = curproc;               /* XXX */
                    983:
                    984:        DNPRINTF(VDB_FOLLOW, "vndclear(%p): vp %p\n", vnd, vp);
                    985:
                    986:        vnd->sc_flags &= ~VNF_INITED;
                    987:        if (vp == NULL)
                    988:                panic("vndioctl: null vp");
                    989:        (void) vn_close(vp, VNDRW(vnd), vnd->sc_cred, p);
                    990:        crfree(vnd->sc_cred);
                    991:        vnd->sc_vp = NULL;
                    992:        vnd->sc_cred = NULL;
                    993:        vnd->sc_size = 0;
                    994: }
                    995:
                    996: daddr64_t
                    997: vndsize(dev_t dev)
                    998: {
                    999:        int unit = vndunit(dev);
                   1000:        struct vnd_softc *vnd = &vnd_softc[unit];
                   1001:
                   1002:        if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0)
                   1003:                return (-1);
                   1004:        return (vnd->sc_size);
                   1005: }
                   1006:
                   1007: int
                   1008: vnddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size)
                   1009: {
                   1010:
                   1011:        /* Not implemented. */
                   1012:        return (ENXIO);
                   1013: }

CVSweb