sys/nfs/nfs_bio.c - annotate

Return to nfs_bio.c CVS log
Up to [local] / sys / nfs
Annotation of sys/nfs/nfs_bio.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: nfs_bio.c,v 1.46 2007/06/01 23:47:57 deraadt Exp $    */
                      2: /*     $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $     */
                      3:
                      4: /*
                      5:  * Copyright (c) 1989, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  *
                      8:  * This code is derived from software contributed to Berkeley by
                      9:  * Rick Macklem at The University of Guelph.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. Neither the name of the University nor the names of its contributors
                     20:  *    may be used to endorse or promote products derived from this software
                     21:  *    without specific prior written permission.
                     22:  *
                     23:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     24:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     25:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     26:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     27:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     28:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     29:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     30:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     31:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     32:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     33:  * SUCH DAMAGE.
                     34:  *
                     35:  *     @(#)nfs_bio.c   8.9 (Berkeley) 3/30/95
                     36:  */
                     37:
                     38: #include <sys/param.h>
                     39: #include <sys/systm.h>
                     40: #include <sys/resourcevar.h>
                     41: #include <sys/signalvar.h>
                     42: #include <sys/proc.h>
                     43: #include <sys/buf.h>
                     44: #include <sys/vnode.h>
                     45: #include <sys/mount.h>
                     46: #include <sys/kernel.h>
                     47: #include <sys/namei.h>
                     48:
                     49: #include <uvm/uvm_extern.h>
                     50:
                     51: #include <nfs/rpcv2.h>
                     52: #include <nfs/nfsproto.h>
                     53: #include <nfs/nfs.h>
                     54: #include <nfs/nfsmount.h>
                     55: #include <nfs/nfsnode.h>
                     56: #include <nfs/nfs_var.h>
                     57:
                     58: extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
                     59: extern int nfs_numasync;
                     60: extern struct nfsstats nfsstats;
                     61: struct nfs_bufqhead nfs_bufq;
                     62:
                     63: /*
                     64:  * Vnode op for read using bio
                     65:  * Any similarity to readip() is purely coincidental
                     66:  */
                     67: int
                     68: nfs_bioread(vp, uio, ioflag, cred)
                     69:        struct vnode *vp;
                     70:        struct uio *uio;
                     71:        int ioflag;
                     72:        struct ucred *cred;
                     73: {
                     74:        struct nfsnode *np = VTONFS(vp);
                     75:        int biosize, diff;
                     76:        struct buf *bp = NULL, *rabp;
                     77:        struct vattr vattr;
                     78:        struct proc *p;
                     79:        struct nfsmount *nmp = VFSTONFS(vp->v_mount);
                     80:        daddr64_t lbn, bn, rabn;
                     81:        caddr_t baddr;
                     82:        int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
                     83:        off_t offdiff;
                     84:
                     85: #ifdef DIAGNOSTIC
                     86:        if (uio->uio_rw != UIO_READ)
                     87:                panic("nfs_read mode");
                     88: #endif
                     89:        if (uio->uio_resid == 0)
                     90:                return (0);
                     91:        if (uio->uio_offset < 0)
                     92:                return (EINVAL);
                     93:        p = uio->uio_procp;
                     94:        if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
                     95:                (void)nfs_fsinfo(nmp, vp, cred, p);
                     96:        biosize = nmp->nm_rsize;
                     97:        /*
                     98:         * For nfs, cache consistency can only be maintained approximately.
                     99:         * Although RFC1094 does not specify the criteria, the following is
                    100:         * believed to be compatible with the reference port.
                    101:         * For nfs:
                    102:         * If the file's modify time on the server has changed since the
                    103:         * last read rpc or you have written to the file,
                    104:         * you may have lost data cache consistency with the
                    105:         * server, so flush all of the file's data out of the cache.
                    106:         * Then force a getattr rpc to ensure that you have up to date
                    107:         * attributes.
                    108:         * NB: This implies that cache data can be read when up to
                    109:         * NFS_ATTRTIMEO seconds out of date. If you find that you need current
                    110:         * attributes this could be forced by setting n_attrstamp to 0 before
                    111:         * the VOP_GETATTR() call.
                    112:         */
                    113:        if (np->n_flag & NMODIFIED) {
                    114:                np->n_attrstamp = 0;
                    115:                error = VOP_GETATTR(vp, &vattr, cred, p);
                    116:                if (error)
                    117:                        return (error);
                    118:                np->n_mtime = vattr.va_mtime.tv_sec;
                    119:        } else {
                    120:                error = VOP_GETATTR(vp, &vattr, cred, p);
                    121:                if (error)
                    122:                        return (error);
                    123:                if (np->n_mtime != vattr.va_mtime.tv_sec) {
                    124:                        error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
                    125:                        if (error)
                    126:                                return (error);
                    127:                        np->n_mtime = vattr.va_mtime.tv_sec;
                    128:                }
                    129:        }
                    130:
                    131:        /*
                    132:         * update the cache read creds for this vnode
                    133:         */
                    134:        if (np->n_rcred)
                    135:                crfree(np->n_rcred);
                    136:        np->n_rcred = cred;
                    137:        crhold(cred);
                    138:
                    139:        do {
                    140:            if ((vp->v_flag & VROOT) && vp->v_type == VLNK) {
                    141:                    return (nfs_readlinkrpc(vp, uio, cred));
                    142:            }
                    143:            baddr = (caddr_t)0;
                    144:            switch (vp->v_type) {
                    145:            case VREG:
                    146:                nfsstats.biocache_reads++;
                    147:                lbn = uio->uio_offset / biosize;
                    148:                on = uio->uio_offset & (biosize - 1);
                    149:                bn = lbn * (biosize / DEV_BSIZE);
                    150:                not_readin = 1;
                    151:
                    152:                /*
                    153:                 * Start the read ahead(s), as required.
                    154:                 */
                    155:                if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
                    156:                    for (nra = 0; nra < nmp->nm_readahead &&
                    157:                        (lbn + 1 + nra) * biosize < np->n_size; nra++) {
                    158:                        rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
                    159:                        if (!incore(vp, rabn)) {
                    160:                            rabp = nfs_getcacheblk(vp, rabn, biosize, p);
                    161:                            if (!rabp)
                    162:                                return (EINTR);
                    163:                            if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
                    164:                                rabp->b_flags |= (B_READ | B_ASYNC);
                    165:                                if (nfs_asyncio(rabp)) {
                    166:                                    rabp->b_flags |= B_INVAL;
                    167:                                    brelse(rabp);
                    168:                                }
                    169:                            } else
                    170:                                brelse(rabp);
                    171:                        }
                    172:                    }
                    173:                }
                    174:
                    175:                /*
                    176:                 * If the block is in the cache and has the required data
                    177:                 * in a valid region, just copy it out.
                    178:                 * Otherwise, get the block and write back/read in,
                    179:                 * as required.
                    180:                 */
                    181:                if ((bp = incore(vp, bn)) &&
                    182:                    (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
                    183:                    (B_BUSY | B_WRITEINPROG))
                    184:                        got_buf = 0;
                    185:                else {
                    186: again:
                    187:                        bp = nfs_getcacheblk(vp, bn, biosize, p);
                    188:                        if (!bp)
                    189:                                return (EINTR);
                    190:                        got_buf = 1;
                    191:                        if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
                    192:                                bp->b_flags |= B_READ;
                    193:                                not_readin = 0;
                    194:                                error = nfs_doio(bp, p);
                    195:                                if (error) {
                    196:                                    brelse(bp);
                    197:                                    return (error);
                    198:                                }
                    199:                        }
                    200:                }
                    201:                n = min((unsigned)(biosize - on), uio->uio_resid);
                    202:                offdiff = np->n_size - uio->uio_offset;
                    203:                if (offdiff < (off_t)n)
                    204:                        n = (int)offdiff;
                    205:                if (not_readin && n > 0) {
                    206:                        if (on < bp->b_validoff || (on + n) > bp->b_validend) {
                    207:                                if (!got_buf) {
                    208:                                    bp = nfs_getcacheblk(vp, bn, biosize, p);
                    209:                                    if (!bp)
                    210:                                        return (EINTR);
                    211:                                    got_buf = 1;
                    212:                                }
                    213:                                bp->b_flags |= B_INVAFTERWRITE;
                    214:                                if (bp->b_dirtyend > 0) {
                    215:                                    if ((bp->b_flags & B_DELWRI) == 0)
                    216:                                        panic("nfsbioread");
                    217:                                    if (VOP_BWRITE(bp) == EINTR)
                    218:                                        return (EINTR);
                    219:                                } else
                    220:                                    brelse(bp);
                    221:                                goto again;
                    222:                        }
                    223:                }
                    224:                diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
                    225:                if (diff < n)
                    226:                        n = diff;
                    227:                break;
                    228:            case VLNK:
                    229:                nfsstats.biocache_readlinks++;
                    230:                bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p);
                    231:                if (!bp)
                    232:                        return (EINTR);
                    233:                if ((bp->b_flags & B_DONE) == 0) {
                    234:                        bp->b_flags |= B_READ;
                    235:                        error = nfs_doio(bp, p);
                    236:                        if (error) {
                    237:                                brelse(bp);
                    238:                                return (error);
                    239:                        }
                    240:                }
                    241:                n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
                    242:                got_buf = 1;
                    243:                on = 0;
                    244:                break;
                    245:            default:
                    246:                printf(" nfsbioread: type %x unexpected\n",vp->v_type);
                    247:                break;
                    248:            }
                    249:
                    250:            if (n > 0) {
                    251:                if (!baddr)
                    252:                        baddr = bp->b_data;
                    253:                error = uiomove(baddr + on, (int)n, uio);
                    254:            }
                    255:            switch (vp->v_type) {
                    256:            case VREG:
                    257:                break;
                    258:            case VLNK:
                    259:                n = 0;
                    260:                break;
                    261:            default:
                    262:                printf(" nfsbioread: type %x unexpected\n",vp->v_type);
                    263:            }
                    264:            if (got_buf)
                    265:                brelse(bp);
                    266:        } while (error == 0 && uio->uio_resid > 0 && n > 0);
                    267:        return (error);
                    268: }
                    269:
                    270: /*
                    271:  * Vnode op for write using bio
                    272:  */
                    273: int
                    274: nfs_write(v)
                    275:        void *v;
                    276: {
                    277:        struct vop_write_args *ap = v;
                    278:        int biosize;
                    279:        struct uio *uio = ap->a_uio;
                    280:        struct proc *p = uio->uio_procp;
                    281:        struct vnode *vp = ap->a_vp;
                    282:        struct nfsnode *np = VTONFS(vp);
                    283:        struct ucred *cred = ap->a_cred;
                    284:        int ioflag = ap->a_ioflag;
                    285:        struct buf *bp;
                    286:        struct vattr vattr;
                    287:        struct nfsmount *nmp = VFSTONFS(vp->v_mount);
                    288:        daddr64_t lbn, bn;
                    289:        int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0;
                    290:
                    291: #ifdef DIAGNOSTIC
                    292:        if (uio->uio_rw != UIO_WRITE)
                    293:                panic("nfs_write mode");
                    294:        if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
                    295:                panic("nfs_write proc");
                    296: #endif
                    297:        if (vp->v_type != VREG)
                    298:                return (EIO);
                    299:        if (np->n_flag & NWRITEERR) {
                    300:                np->n_flag &= ~NWRITEERR;
                    301:                return (np->n_error);
                    302:        }
                    303:        if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
                    304:                (void)nfs_fsinfo(nmp, vp, cred, p);
                    305:        if (ioflag & (IO_APPEND | IO_SYNC)) {
                    306:                if (np->n_flag & NMODIFIED) {
                    307:                        np->n_attrstamp = 0;
                    308:                        error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
                    309:                        if (error)
                    310:                                return (error);
                    311:                }
                    312:                if (ioflag & IO_APPEND) {
                    313:                        np->n_attrstamp = 0;
                    314:                        error = VOP_GETATTR(vp, &vattr, cred, p);
                    315:                        if (error)
                    316:                                return (error);
                    317:                        uio->uio_offset = np->n_size;
                    318:                }
                    319:        }
                    320:        if (uio->uio_offset < 0)
                    321:                return (EINVAL);
                    322:        if (uio->uio_resid == 0)
                    323:                return (0);
                    324:        /*
                    325:         * Maybe this should be above the vnode op call, but so long as
                    326:         * file servers have no limits, i don't think it matters
                    327:         */
                    328:        if (p && uio->uio_offset + uio->uio_resid >
                    329:              p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
                    330:                psignal(p, SIGXFSZ);
                    331:                return (EFBIG);
                    332:        }
                    333:
                    334:        /*
                    335:         * update the cache write creds for this node.
                    336:         */
                    337:        if (np->n_wcred)
                    338:                crfree(np->n_wcred);
                    339:        np->n_wcred = cred;
                    340:        crhold(cred);
                    341:
                    342:        /*
                    343:         * I use nm_rsize, not nm_wsize so that all buffer cache blocks
                    344:         * will be the same size within a filesystem. nfs_writerpc will
                    345:         * still use nm_wsize when sizing the rpc's.
                    346:         */
                    347:        biosize = nmp->nm_rsize;
                    348:        do {
                    349:
                    350:                /*
                    351:                 * XXX make sure we aren't cached in the VM page cache
                    352:                 */
                    353:                uvm_vnp_uncache(vp);
                    354:
                    355:                nfsstats.biocache_writes++;
                    356:                lbn = uio->uio_offset / biosize;
                    357:                on = uio->uio_offset & (biosize-1);
                    358:                n = min((unsigned)(biosize - on), uio->uio_resid);
                    359:                bn = lbn * (biosize / DEV_BSIZE);
                    360: again:
                    361:                bp = nfs_getcacheblk(vp, bn, biosize, p);
                    362:                if (!bp)
                    363:                        return (EINTR);
                    364:                np->n_flag |= NMODIFIED;
                    365:                if (uio->uio_offset + n > np->n_size) {
                    366:                        np->n_size = uio->uio_offset + n;
                    367:                        uvm_vnp_setsize(vp, (u_long)np->n_size);
                    368:                        extended = 1;
                    369:                } else if (uio->uio_offset + n < np->n_size)
                    370:                        truncated = 1;
                    371:
                    372:                /*
                    373:                 * If the new write will leave a contiguous dirty
                    374:                 * area, just update the b_dirtyoff and b_dirtyend,
                    375:                 * otherwise force a write rpc of the old dirty area.
                    376:                 */
                    377:                if (bp->b_dirtyend > 0 &&
                    378:                    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
                    379:                        bp->b_proc = p;
                    380:                        if (VOP_BWRITE(bp) == EINTR)
                    381:                                return (EINTR);
                    382:                        goto again;
                    383:                }
                    384:
                    385:                error = uiomove((char *)bp->b_data + on, n, uio);
                    386:                if (error) {
                    387:                        bp->b_flags |= B_ERROR;
                    388:                        brelse(bp);
                    389:                        return (error);
                    390:                }
                    391:                if (bp->b_dirtyend > 0) {
                    392:                        bp->b_dirtyoff = min(on, bp->b_dirtyoff);
                    393:                        bp->b_dirtyend = max((on + n), bp->b_dirtyend);
                    394:                } else {
                    395:                        bp->b_dirtyoff = on;
                    396:                        bp->b_dirtyend = on + n;
                    397:                }
                    398:                if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
                    399:                    bp->b_validoff > bp->b_dirtyend) {
                    400:                        bp->b_validoff = bp->b_dirtyoff;
                    401:                        bp->b_validend = bp->b_dirtyend;
                    402:                } else {
                    403:                        bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
                    404:                        bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
                    405:                }
                    406:
                    407:                wrotedta = 1;
                    408:
                    409:                /*
                    410:                 * Since this block is being modified, it must be written
                    411:                 * again and not just committed.
                    412:                 */
                    413:
                    414:                if (NFS_ISV3(vp)) {
                    415:                        rw_enter_write(&np->n_commitlock);
                    416:                        if (bp->b_flags & B_NEEDCOMMIT) {
                    417:                                bp->b_flags &= ~B_NEEDCOMMIT;
                    418:                                nfs_del_tobecommitted_range(vp, bp);
                    419:                        }
                    420:                        nfs_del_committed_range(vp, bp);
                    421:                        rw_exit_write(&np->n_commitlock);
                    422:                } else
                    423:                        bp->b_flags &= ~B_NEEDCOMMIT;
                    424:
                    425:                /*
                    426:                 * If the lease is non-cachable or IO_SYNC do bwrite().
                    427:                 */
                    428:                if (ioflag & IO_SYNC) {
                    429:                        bp->b_proc = p;
                    430:                        error = VOP_BWRITE(bp);
                    431:                        if (error)
                    432:                                return (error);
                    433:                } else if ((n + on) == biosize) {
                    434:                        bp->b_proc = (struct proc *)0;
                    435:                        bp->b_flags |= B_ASYNC;
                    436:                        (void)nfs_writebp(bp, 0);
                    437:                } else {
                    438:                        bdwrite(bp);
                    439:                }
                    440:        } while (uio->uio_resid > 0 && n > 0);
                    441:
                    442:        if (wrotedta)
                    443:                VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) |
                    444:                    (truncated ? NOTE_TRUNCATE : 0));
                    445:
                    446:        return (0);
                    447: }
                    448:
                    449: /*
                    450:  * Get an nfs cache block.
                    451:  * Allocate a new one if the block isn't currently in the cache
                    452:  * and return the block marked busy. If the calling process is
                    453:  * interrupted by a signal for an interruptible mount point, return
                    454:  * NULL.
                    455:  */
                    456: struct buf *
                    457: nfs_getcacheblk(vp, bn, size, p)
                    458:        struct vnode *vp;
                    459:        daddr64_t bn;
                    460:        int size;
                    461:        struct proc *p;
                    462: {
                    463:        struct buf *bp;
                    464:        struct nfsmount *nmp = VFSTONFS(vp->v_mount);
                    465:
                    466:        if (nmp->nm_flag & NFSMNT_INT) {
                    467:                bp = getblk(vp, bn, size, PCATCH, 0);
                    468:                while (bp == (struct buf *)0) {
                    469:                        if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
                    470:                                return ((struct buf *)0);
                    471:                        bp = getblk(vp, bn, size, 0, 2 * hz);
                    472:                }
                    473:        } else
                    474:                bp = getblk(vp, bn, size, 0, 0);
                    475:        return (bp);
                    476: }
                    477:
                    478: /*
                    479:  * Flush and invalidate all dirty buffers. If another process is already
                    480:  * doing the flush, just wait for completion.
                    481:  */
                    482: int
                    483: nfs_vinvalbuf(vp, flags, cred, p, intrflg)
                    484:        struct vnode *vp;
                    485:        int flags;
                    486:        struct ucred *cred;
                    487:        struct proc *p;
                    488:        int intrflg;
                    489: {
                    490:        struct nfsnode *np = VTONFS(vp);
                    491:        struct nfsmount *nmp = VFSTONFS(vp->v_mount);
                    492:        int error = 0, slpflag, slptimeo;
                    493:
                    494:        if ((nmp->nm_flag & NFSMNT_INT) == 0)
                    495:                intrflg = 0;
                    496:        if (intrflg) {
                    497:                slpflag = PCATCH;
                    498:                slptimeo = 2 * hz;
                    499:        } else {
                    500:                slpflag = 0;
                    501:                slptimeo = 0;
                    502:        }
                    503:        /*
                    504:         * First wait for any other process doing a flush to complete.
                    505:         */
                    506:        while (np->n_flag & NFLUSHINPROG) {
                    507:                np->n_flag |= NFLUSHWANT;
                    508:                error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
                    509:                        slptimeo);
                    510:                if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
                    511:                        return (EINTR);
                    512:        }
                    513:
                    514:        /*
                    515:         * Now, flush as required.
                    516:         */
                    517:        np->n_flag |= NFLUSHINPROG;
                    518:        error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
                    519:        while (error) {
                    520:                if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
                    521:                        np->n_flag &= ~NFLUSHINPROG;
                    522:                        if (np->n_flag & NFLUSHWANT) {
                    523:                                np->n_flag &= ~NFLUSHWANT;
                    524:                                wakeup((caddr_t)&np->n_flag);
                    525:                        }
                    526:                        return (EINTR);
                    527:                }
                    528:                error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
                    529:        }
                    530:        np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
                    531:        if (np->n_flag & NFLUSHWANT) {
                    532:                np->n_flag &= ~NFLUSHWANT;
                    533:                wakeup((caddr_t)&np->n_flag);
                    534:        }
                    535:        return (0);
                    536: }
                    537:
                    538: /*
                    539:  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
                    540:  * This is mainly to avoid queueing async I/O requests when the nfsiods
                    541:  * are all hung on a dead server.
                    542:  */
                    543: int
                    544: nfs_asyncio(bp)
                    545:        struct buf *bp;
                    546: {
                    547:        int i,s;
                    548:
                    549:        if (nfs_numasync == 0)
                    550:                return (EIO);
                    551:        for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
                    552:            if (nfs_iodwant[i]) {
                    553:                if ((bp->b_flags & B_READ) == 0) {
                    554:                        bp->b_flags |= B_WRITEINPROG;
                    555:                }
                    556:
                    557:                TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
                    558:                nfs_iodwant[i] = (struct proc *)0;
                    559:                wakeup((caddr_t)&nfs_iodwant[i]);
                    560:                return (0);
                    561:            }
                    562:
                    563:        /*
                    564:         * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
                    565:         * return EIO so the process will call nfs_doio() and do it
                    566:         * synchronously.
                    567:         */
                    568:        if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
                    569:                return (EIO);
                    570:
                    571:        /*
                    572:         * Just turn the async write into a delayed write, instead of
                    573:         * doing in synchronously. Hopefully, at least one of the nfsiods
                    574:         * is currently doing a write for this file and will pick up the
                    575:         * delayed writes before going back to sleep.
                    576:         */
                    577:        s = splbio();
                    578:        buf_dirty(bp);
                    579:        biodone(bp);
                    580:        splx(s);
                    581:        return (0);
                    582: }
                    583:
                    584: /*
                    585:  * Do an I/O operation to/from a cache block. This may be called
                    586:  * synchronously or from an nfsiod.
                    587:  */
                    588: int
                    589: nfs_doio(bp, p)
                    590:        struct buf *bp;
                    591:        struct proc *p;
                    592: {
                    593:        struct uio *uiop;
                    594:        struct vnode *vp;
                    595:        struct nfsnode *np;
                    596:        struct nfsmount *nmp;
                    597:        int s, error = 0, diff, len, iomode, must_commit = 0;
                    598:        struct uio uio;
                    599:        struct iovec io;
                    600:
                    601:        vp = bp->b_vp;
                    602:        np = VTONFS(vp);
                    603:        nmp = VFSTONFS(vp->v_mount);
                    604:        uiop = &uio;
                    605:        uiop->uio_iov = &io;
                    606:        uiop->uio_iovcnt = 1;
                    607:        uiop->uio_segflg = UIO_SYSSPACE;
                    608:        uiop->uio_procp = p;
                    609:
                    610:        /*
                    611:         * Historically, paging was done with physio, but no more...
                    612:         */
                    613:        if (bp->b_flags & B_PHYS) {
                    614:            /*
                    615:             * ...though reading /dev/drum still gets us here.
                    616:             */
                    617:            io.iov_len = uiop->uio_resid = bp->b_bcount;
                    618:            /* mapping was done by vmapbuf() */
                    619:            io.iov_base = bp->b_data;
                    620:            uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
                    621:            if (bp->b_flags & B_READ) {
                    622:                uiop->uio_rw = UIO_READ;
                    623:                nfsstats.read_physios++;
                    624:                error = nfs_readrpc(vp, uiop);
                    625:            } else {
                    626:                iomode = NFSV3WRITE_DATASYNC;
                    627:                uiop->uio_rw = UIO_WRITE;
                    628:                nfsstats.write_physios++;
                    629:                error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
                    630:            }
                    631:            if (error) {
                    632:                bp->b_flags |= B_ERROR;
                    633:                bp->b_error = error;
                    634:            }
                    635:        } else if (bp->b_flags & B_READ) {
                    636:            io.iov_len = uiop->uio_resid = bp->b_bcount;
                    637:            io.iov_base = bp->b_data;
                    638:            uiop->uio_rw = UIO_READ;
                    639:            switch (vp->v_type) {
                    640:            case VREG:
                    641:                uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
                    642:                nfsstats.read_bios++;
                    643:                error = nfs_readrpc(vp, uiop);
                    644:                if (!error) {
                    645:                    bp->b_validoff = 0;
                    646:                    if (uiop->uio_resid) {
                    647:                        /*
                    648:                         * If len > 0, there is a hole in the file and
                    649:                         * no writes after the hole have been pushed to
                    650:                         * the server yet.
                    651:                         * Just zero fill the rest of the valid area.
                    652:                         */
                    653:                        diff = bp->b_bcount - uiop->uio_resid;
                    654:                        len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
                    655:                                + diff);
                    656:                        if (len > 0) {
                    657:                            len = min(len, uiop->uio_resid);
                    658:                            bzero((char *)bp->b_data + diff, len);
                    659:                            bp->b_validend = diff + len;
                    660:                        } else
                    661:                            bp->b_validend = diff;
                    662:                    } else
                    663:                        bp->b_validend = bp->b_bcount;
                    664:                }
                    665:                if (p && (vp->v_flag & VTEXT) &&
                    666:                    (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {
                    667:                        uprintf("Process killed due to text file modification\n");
                    668:                        psignal(p, SIGKILL);
                    669:                }
                    670:                break;
                    671:            case VLNK:
                    672:                uiop->uio_offset = (off_t)0;
                    673:                nfsstats.readlink_bios++;
                    674:                error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred);
                    675:                break;
                    676:            default:
                    677:                printf("nfs_doio:  type %x unexpected\n", vp->v_type);
                    678:                break;
                    679:            };
                    680:            if (error) {
                    681:                bp->b_flags |= B_ERROR;
                    682:                bp->b_error = error;
                    683:            }
                    684:        } else {
                    685:            io.iov_len = uiop->uio_resid = bp->b_dirtyend
                    686:                - bp->b_dirtyoff;
                    687:            uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
                    688:                + bp->b_dirtyoff;
                    689:            io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
                    690:            uiop->uio_rw = UIO_WRITE;
                    691:            nfsstats.write_bios++;
                    692:            if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
                    693:                iomode = NFSV3WRITE_UNSTABLE;
                    694:            else
                    695:                iomode = NFSV3WRITE_FILESYNC;
                    696:            bp->b_flags |= B_WRITEINPROG;
                    697:            error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
                    698:
                    699:            rw_enter_write(&np->n_commitlock);
                    700:            if (!error && iomode == NFSV3WRITE_UNSTABLE) {
                    701:                bp->b_flags |= B_NEEDCOMMIT;
                    702:                nfs_add_tobecommitted_range(vp, bp);
                    703:            } else {
                    704:                bp->b_flags &= ~B_NEEDCOMMIT;
                    705:                nfs_del_committed_range(vp, bp);
                    706:            }
                    707:            rw_exit_write(&np->n_commitlock);
                    708:
                    709:            bp->b_flags &= ~B_WRITEINPROG;
                    710:
                    711:            /*
                    712:             * For an interrupted write, the buffer is still valid and the
                    713:             * write hasn't been pushed to the server yet, so we can't set
                    714:             * B_ERROR and report the interruption by setting B_EINTR. For
                    715:             * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
                    716:             * is essentially a noop.
                    717:             * For the case of a V3 write rpc not being committed to stable
                    718:             * storage, the block is still dirty and requires either a commit
                    719:             * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
                    720:             * before the block is reused. This is indicated by setting the
                    721:             * B_DELWRI and B_NEEDCOMMIT flags.
                    722:             */
                    723:            if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
                    724:                    s = splbio();
                    725:                    buf_dirty(bp);
                    726:                    splx(s);
                    727:
                    728:                    if (!(bp->b_flags & B_ASYNC) && error)
                    729:                            bp->b_flags |= B_EINTR;
                    730:            } else {
                    731:                if (error) {
                    732:                    bp->b_flags |= B_ERROR;
                    733:                    bp->b_error = np->n_error = error;
                    734:                    np->n_flag |= NWRITEERR;
                    735:                }
                    736:                bp->b_dirtyoff = bp->b_dirtyend = 0;
                    737:            }
                    738:        }
                    739:        bp->b_resid = uiop->uio_resid;
                    740:        if (must_commit)
                    741:                nfs_clearcommit(vp->v_mount);
                    742:        s = splbio();
                    743:        biodone(bp);
                    744:        splx(s);
                    745:        return (error);
                    746: }
CVSweb