Annotation of sys/nfs/nfs_bio.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: nfs_bio.c,v 1.46 2007/06/01 23:47:57 deraadt Exp $ */
2: /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */
3:
4: /*
5: * Copyright (c) 1989, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This code is derived from software contributed to Berkeley by
9: * Rick Macklem at The University of Guelph.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. Neither the name of the University nor the names of its contributors
20: * may be used to endorse or promote products derived from this software
21: * without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33: * SUCH DAMAGE.
34: *
35: * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
36: */
37:
38: #include <sys/param.h>
39: #include <sys/systm.h>
40: #include <sys/resourcevar.h>
41: #include <sys/signalvar.h>
42: #include <sys/proc.h>
43: #include <sys/buf.h>
44: #include <sys/vnode.h>
45: #include <sys/mount.h>
46: #include <sys/kernel.h>
47: #include <sys/namei.h>
48:
49: #include <uvm/uvm_extern.h>
50:
51: #include <nfs/rpcv2.h>
52: #include <nfs/nfsproto.h>
53: #include <nfs/nfs.h>
54: #include <nfs/nfsmount.h>
55: #include <nfs/nfsnode.h>
56: #include <nfs/nfs_var.h>
57:
58: extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
59: extern int nfs_numasync;
60: extern struct nfsstats nfsstats;
61: struct nfs_bufqhead nfs_bufq;
62:
63: /*
64: * Vnode op for read using bio
65: * Any similarity to readip() is purely coincidental
66: */
67: int
68: nfs_bioread(vp, uio, ioflag, cred)
69: struct vnode *vp;
70: struct uio *uio;
71: int ioflag;
72: struct ucred *cred;
73: {
74: struct nfsnode *np = VTONFS(vp);
75: int biosize, diff;
76: struct buf *bp = NULL, *rabp;
77: struct vattr vattr;
78: struct proc *p;
79: struct nfsmount *nmp = VFSTONFS(vp->v_mount);
80: daddr64_t lbn, bn, rabn;
81: caddr_t baddr;
82: int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
83: off_t offdiff;
84:
85: #ifdef DIAGNOSTIC
86: if (uio->uio_rw != UIO_READ)
87: panic("nfs_read mode");
88: #endif
89: if (uio->uio_resid == 0)
90: return (0);
91: if (uio->uio_offset < 0)
92: return (EINVAL);
93: p = uio->uio_procp;
94: if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
95: (void)nfs_fsinfo(nmp, vp, cred, p);
96: biosize = nmp->nm_rsize;
97: /*
98: * For nfs, cache consistency can only be maintained approximately.
99: * Although RFC1094 does not specify the criteria, the following is
100: * believed to be compatible with the reference port.
101: * For nfs:
102: * If the file's modify time on the server has changed since the
103: * last read rpc or you have written to the file,
104: * you may have lost data cache consistency with the
105: * server, so flush all of the file's data out of the cache.
106: * Then force a getattr rpc to ensure that you have up to date
107: * attributes.
108: * NB: This implies that cache data can be read when up to
109: * NFS_ATTRTIMEO seconds out of date. If you find that you need current
110: * attributes this could be forced by setting n_attrstamp to 0 before
111: * the VOP_GETATTR() call.
112: */
113: if (np->n_flag & NMODIFIED) {
114: np->n_attrstamp = 0;
115: error = VOP_GETATTR(vp, &vattr, cred, p);
116: if (error)
117: return (error);
118: np->n_mtime = vattr.va_mtime.tv_sec;
119: } else {
120: error = VOP_GETATTR(vp, &vattr, cred, p);
121: if (error)
122: return (error);
123: if (np->n_mtime != vattr.va_mtime.tv_sec) {
124: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
125: if (error)
126: return (error);
127: np->n_mtime = vattr.va_mtime.tv_sec;
128: }
129: }
130:
131: /*
132: * update the cache read creds for this vnode
133: */
134: if (np->n_rcred)
135: crfree(np->n_rcred);
136: np->n_rcred = cred;
137: crhold(cred);
138:
139: do {
140: if ((vp->v_flag & VROOT) && vp->v_type == VLNK) {
141: return (nfs_readlinkrpc(vp, uio, cred));
142: }
143: baddr = (caddr_t)0;
144: switch (vp->v_type) {
145: case VREG:
146: nfsstats.biocache_reads++;
147: lbn = uio->uio_offset / biosize;
148: on = uio->uio_offset & (biosize - 1);
149: bn = lbn * (biosize / DEV_BSIZE);
150: not_readin = 1;
151:
152: /*
153: * Start the read ahead(s), as required.
154: */
155: if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
156: for (nra = 0; nra < nmp->nm_readahead &&
157: (lbn + 1 + nra) * biosize < np->n_size; nra++) {
158: rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
159: if (!incore(vp, rabn)) {
160: rabp = nfs_getcacheblk(vp, rabn, biosize, p);
161: if (!rabp)
162: return (EINTR);
163: if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
164: rabp->b_flags |= (B_READ | B_ASYNC);
165: if (nfs_asyncio(rabp)) {
166: rabp->b_flags |= B_INVAL;
167: brelse(rabp);
168: }
169: } else
170: brelse(rabp);
171: }
172: }
173: }
174:
175: /*
176: * If the block is in the cache and has the required data
177: * in a valid region, just copy it out.
178: * Otherwise, get the block and write back/read in,
179: * as required.
180: */
181: if ((bp = incore(vp, bn)) &&
182: (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
183: (B_BUSY | B_WRITEINPROG))
184: got_buf = 0;
185: else {
186: again:
187: bp = nfs_getcacheblk(vp, bn, biosize, p);
188: if (!bp)
189: return (EINTR);
190: got_buf = 1;
191: if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
192: bp->b_flags |= B_READ;
193: not_readin = 0;
194: error = nfs_doio(bp, p);
195: if (error) {
196: brelse(bp);
197: return (error);
198: }
199: }
200: }
201: n = min((unsigned)(biosize - on), uio->uio_resid);
202: offdiff = np->n_size - uio->uio_offset;
203: if (offdiff < (off_t)n)
204: n = (int)offdiff;
205: if (not_readin && n > 0) {
206: if (on < bp->b_validoff || (on + n) > bp->b_validend) {
207: if (!got_buf) {
208: bp = nfs_getcacheblk(vp, bn, biosize, p);
209: if (!bp)
210: return (EINTR);
211: got_buf = 1;
212: }
213: bp->b_flags |= B_INVAFTERWRITE;
214: if (bp->b_dirtyend > 0) {
215: if ((bp->b_flags & B_DELWRI) == 0)
216: panic("nfsbioread");
217: if (VOP_BWRITE(bp) == EINTR)
218: return (EINTR);
219: } else
220: brelse(bp);
221: goto again;
222: }
223: }
224: diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
225: if (diff < n)
226: n = diff;
227: break;
228: case VLNK:
229: nfsstats.biocache_readlinks++;
230: bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p);
231: if (!bp)
232: return (EINTR);
233: if ((bp->b_flags & B_DONE) == 0) {
234: bp->b_flags |= B_READ;
235: error = nfs_doio(bp, p);
236: if (error) {
237: brelse(bp);
238: return (error);
239: }
240: }
241: n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
242: got_buf = 1;
243: on = 0;
244: break;
245: default:
246: printf(" nfsbioread: type %x unexpected\n",vp->v_type);
247: break;
248: }
249:
250: if (n > 0) {
251: if (!baddr)
252: baddr = bp->b_data;
253: error = uiomove(baddr + on, (int)n, uio);
254: }
255: switch (vp->v_type) {
256: case VREG:
257: break;
258: case VLNK:
259: n = 0;
260: break;
261: default:
262: printf(" nfsbioread: type %x unexpected\n",vp->v_type);
263: }
264: if (got_buf)
265: brelse(bp);
266: } while (error == 0 && uio->uio_resid > 0 && n > 0);
267: return (error);
268: }
269:
270: /*
271: * Vnode op for write using bio
272: */
273: int
274: nfs_write(v)
275: void *v;
276: {
277: struct vop_write_args *ap = v;
278: int biosize;
279: struct uio *uio = ap->a_uio;
280: struct proc *p = uio->uio_procp;
281: struct vnode *vp = ap->a_vp;
282: struct nfsnode *np = VTONFS(vp);
283: struct ucred *cred = ap->a_cred;
284: int ioflag = ap->a_ioflag;
285: struct buf *bp;
286: struct vattr vattr;
287: struct nfsmount *nmp = VFSTONFS(vp->v_mount);
288: daddr64_t lbn, bn;
289: int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0;
290:
291: #ifdef DIAGNOSTIC
292: if (uio->uio_rw != UIO_WRITE)
293: panic("nfs_write mode");
294: if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
295: panic("nfs_write proc");
296: #endif
297: if (vp->v_type != VREG)
298: return (EIO);
299: if (np->n_flag & NWRITEERR) {
300: np->n_flag &= ~NWRITEERR;
301: return (np->n_error);
302: }
303: if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
304: (void)nfs_fsinfo(nmp, vp, cred, p);
305: if (ioflag & (IO_APPEND | IO_SYNC)) {
306: if (np->n_flag & NMODIFIED) {
307: np->n_attrstamp = 0;
308: error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
309: if (error)
310: return (error);
311: }
312: if (ioflag & IO_APPEND) {
313: np->n_attrstamp = 0;
314: error = VOP_GETATTR(vp, &vattr, cred, p);
315: if (error)
316: return (error);
317: uio->uio_offset = np->n_size;
318: }
319: }
320: if (uio->uio_offset < 0)
321: return (EINVAL);
322: if (uio->uio_resid == 0)
323: return (0);
324: /*
325: * Maybe this should be above the vnode op call, but so long as
326: * file servers have no limits, i don't think it matters
327: */
328: if (p && uio->uio_offset + uio->uio_resid >
329: p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
330: psignal(p, SIGXFSZ);
331: return (EFBIG);
332: }
333:
334: /*
335: * update the cache write creds for this node.
336: */
337: if (np->n_wcred)
338: crfree(np->n_wcred);
339: np->n_wcred = cred;
340: crhold(cred);
341:
342: /*
343: * I use nm_rsize, not nm_wsize so that all buffer cache blocks
344: * will be the same size within a filesystem. nfs_writerpc will
345: * still use nm_wsize when sizing the rpc's.
346: */
347: biosize = nmp->nm_rsize;
348: do {
349:
350: /*
351: * XXX make sure we aren't cached in the VM page cache
352: */
353: uvm_vnp_uncache(vp);
354:
355: nfsstats.biocache_writes++;
356: lbn = uio->uio_offset / biosize;
357: on = uio->uio_offset & (biosize-1);
358: n = min((unsigned)(biosize - on), uio->uio_resid);
359: bn = lbn * (biosize / DEV_BSIZE);
360: again:
361: bp = nfs_getcacheblk(vp, bn, biosize, p);
362: if (!bp)
363: return (EINTR);
364: np->n_flag |= NMODIFIED;
365: if (uio->uio_offset + n > np->n_size) {
366: np->n_size = uio->uio_offset + n;
367: uvm_vnp_setsize(vp, (u_long)np->n_size);
368: extended = 1;
369: } else if (uio->uio_offset + n < np->n_size)
370: truncated = 1;
371:
372: /*
373: * If the new write will leave a contiguous dirty
374: * area, just update the b_dirtyoff and b_dirtyend,
375: * otherwise force a write rpc of the old dirty area.
376: */
377: if (bp->b_dirtyend > 0 &&
378: (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
379: bp->b_proc = p;
380: if (VOP_BWRITE(bp) == EINTR)
381: return (EINTR);
382: goto again;
383: }
384:
385: error = uiomove((char *)bp->b_data + on, n, uio);
386: if (error) {
387: bp->b_flags |= B_ERROR;
388: brelse(bp);
389: return (error);
390: }
391: if (bp->b_dirtyend > 0) {
392: bp->b_dirtyoff = min(on, bp->b_dirtyoff);
393: bp->b_dirtyend = max((on + n), bp->b_dirtyend);
394: } else {
395: bp->b_dirtyoff = on;
396: bp->b_dirtyend = on + n;
397: }
398: if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
399: bp->b_validoff > bp->b_dirtyend) {
400: bp->b_validoff = bp->b_dirtyoff;
401: bp->b_validend = bp->b_dirtyend;
402: } else {
403: bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
404: bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
405: }
406:
407: wrotedta = 1;
408:
409: /*
410: * Since this block is being modified, it must be written
411: * again and not just committed.
412: */
413:
414: if (NFS_ISV3(vp)) {
415: rw_enter_write(&np->n_commitlock);
416: if (bp->b_flags & B_NEEDCOMMIT) {
417: bp->b_flags &= ~B_NEEDCOMMIT;
418: nfs_del_tobecommitted_range(vp, bp);
419: }
420: nfs_del_committed_range(vp, bp);
421: rw_exit_write(&np->n_commitlock);
422: } else
423: bp->b_flags &= ~B_NEEDCOMMIT;
424:
425: /*
426: * If the lease is non-cachable or IO_SYNC do bwrite().
427: */
428: if (ioflag & IO_SYNC) {
429: bp->b_proc = p;
430: error = VOP_BWRITE(bp);
431: if (error)
432: return (error);
433: } else if ((n + on) == biosize) {
434: bp->b_proc = (struct proc *)0;
435: bp->b_flags |= B_ASYNC;
436: (void)nfs_writebp(bp, 0);
437: } else {
438: bdwrite(bp);
439: }
440: } while (uio->uio_resid > 0 && n > 0);
441:
442: if (wrotedta)
443: VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) |
444: (truncated ? NOTE_TRUNCATE : 0));
445:
446: return (0);
447: }
448:
449: /*
450: * Get an nfs cache block.
451: * Allocate a new one if the block isn't currently in the cache
452: * and return the block marked busy. If the calling process is
453: * interrupted by a signal for an interruptible mount point, return
454: * NULL.
455: */
456: struct buf *
457: nfs_getcacheblk(vp, bn, size, p)
458: struct vnode *vp;
459: daddr64_t bn;
460: int size;
461: struct proc *p;
462: {
463: struct buf *bp;
464: struct nfsmount *nmp = VFSTONFS(vp->v_mount);
465:
466: if (nmp->nm_flag & NFSMNT_INT) {
467: bp = getblk(vp, bn, size, PCATCH, 0);
468: while (bp == (struct buf *)0) {
469: if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
470: return ((struct buf *)0);
471: bp = getblk(vp, bn, size, 0, 2 * hz);
472: }
473: } else
474: bp = getblk(vp, bn, size, 0, 0);
475: return (bp);
476: }
477:
478: /*
479: * Flush and invalidate all dirty buffers. If another process is already
480: * doing the flush, just wait for completion.
481: */
482: int
483: nfs_vinvalbuf(vp, flags, cred, p, intrflg)
484: struct vnode *vp;
485: int flags;
486: struct ucred *cred;
487: struct proc *p;
488: int intrflg;
489: {
490: struct nfsnode *np = VTONFS(vp);
491: struct nfsmount *nmp = VFSTONFS(vp->v_mount);
492: int error = 0, slpflag, slptimeo;
493:
494: if ((nmp->nm_flag & NFSMNT_INT) == 0)
495: intrflg = 0;
496: if (intrflg) {
497: slpflag = PCATCH;
498: slptimeo = 2 * hz;
499: } else {
500: slpflag = 0;
501: slptimeo = 0;
502: }
503: /*
504: * First wait for any other process doing a flush to complete.
505: */
506: while (np->n_flag & NFLUSHINPROG) {
507: np->n_flag |= NFLUSHWANT;
508: error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
509: slptimeo);
510: if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
511: return (EINTR);
512: }
513:
514: /*
515: * Now, flush as required.
516: */
517: np->n_flag |= NFLUSHINPROG;
518: error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
519: while (error) {
520: if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
521: np->n_flag &= ~NFLUSHINPROG;
522: if (np->n_flag & NFLUSHWANT) {
523: np->n_flag &= ~NFLUSHWANT;
524: wakeup((caddr_t)&np->n_flag);
525: }
526: return (EINTR);
527: }
528: error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
529: }
530: np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
531: if (np->n_flag & NFLUSHWANT) {
532: np->n_flag &= ~NFLUSHWANT;
533: wakeup((caddr_t)&np->n_flag);
534: }
535: return (0);
536: }
537:
538: /*
539: * Initiate asynchronous I/O. Return an error if no nfsiods are available.
540: * This is mainly to avoid queueing async I/O requests when the nfsiods
541: * are all hung on a dead server.
542: */
543: int
544: nfs_asyncio(bp)
545: struct buf *bp;
546: {
547: int i,s;
548:
549: if (nfs_numasync == 0)
550: return (EIO);
551: for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
552: if (nfs_iodwant[i]) {
553: if ((bp->b_flags & B_READ) == 0) {
554: bp->b_flags |= B_WRITEINPROG;
555: }
556:
557: TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
558: nfs_iodwant[i] = (struct proc *)0;
559: wakeup((caddr_t)&nfs_iodwant[i]);
560: return (0);
561: }
562:
563: /*
564: * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
565: * return EIO so the process will call nfs_doio() and do it
566: * synchronously.
567: */
568: if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
569: return (EIO);
570:
571: /*
572: * Just turn the async write into a delayed write, instead of
573: * doing in synchronously. Hopefully, at least one of the nfsiods
574: * is currently doing a write for this file and will pick up the
575: * delayed writes before going back to sleep.
576: */
577: s = splbio();
578: buf_dirty(bp);
579: biodone(bp);
580: splx(s);
581: return (0);
582: }
583:
584: /*
585: * Do an I/O operation to/from a cache block. This may be called
586: * synchronously or from an nfsiod.
587: */
588: int
589: nfs_doio(bp, p)
590: struct buf *bp;
591: struct proc *p;
592: {
593: struct uio *uiop;
594: struct vnode *vp;
595: struct nfsnode *np;
596: struct nfsmount *nmp;
597: int s, error = 0, diff, len, iomode, must_commit = 0;
598: struct uio uio;
599: struct iovec io;
600:
601: vp = bp->b_vp;
602: np = VTONFS(vp);
603: nmp = VFSTONFS(vp->v_mount);
604: uiop = &uio;
605: uiop->uio_iov = &io;
606: uiop->uio_iovcnt = 1;
607: uiop->uio_segflg = UIO_SYSSPACE;
608: uiop->uio_procp = p;
609:
610: /*
611: * Historically, paging was done with physio, but no more...
612: */
613: if (bp->b_flags & B_PHYS) {
614: /*
615: * ...though reading /dev/drum still gets us here.
616: */
617: io.iov_len = uiop->uio_resid = bp->b_bcount;
618: /* mapping was done by vmapbuf() */
619: io.iov_base = bp->b_data;
620: uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
621: if (bp->b_flags & B_READ) {
622: uiop->uio_rw = UIO_READ;
623: nfsstats.read_physios++;
624: error = nfs_readrpc(vp, uiop);
625: } else {
626: iomode = NFSV3WRITE_DATASYNC;
627: uiop->uio_rw = UIO_WRITE;
628: nfsstats.write_physios++;
629: error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
630: }
631: if (error) {
632: bp->b_flags |= B_ERROR;
633: bp->b_error = error;
634: }
635: } else if (bp->b_flags & B_READ) {
636: io.iov_len = uiop->uio_resid = bp->b_bcount;
637: io.iov_base = bp->b_data;
638: uiop->uio_rw = UIO_READ;
639: switch (vp->v_type) {
640: case VREG:
641: uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
642: nfsstats.read_bios++;
643: error = nfs_readrpc(vp, uiop);
644: if (!error) {
645: bp->b_validoff = 0;
646: if (uiop->uio_resid) {
647: /*
648: * If len > 0, there is a hole in the file and
649: * no writes after the hole have been pushed to
650: * the server yet.
651: * Just zero fill the rest of the valid area.
652: */
653: diff = bp->b_bcount - uiop->uio_resid;
654: len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
655: + diff);
656: if (len > 0) {
657: len = min(len, uiop->uio_resid);
658: bzero((char *)bp->b_data + diff, len);
659: bp->b_validend = diff + len;
660: } else
661: bp->b_validend = diff;
662: } else
663: bp->b_validend = bp->b_bcount;
664: }
665: if (p && (vp->v_flag & VTEXT) &&
666: (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {
667: uprintf("Process killed due to text file modification\n");
668: psignal(p, SIGKILL);
669: }
670: break;
671: case VLNK:
672: uiop->uio_offset = (off_t)0;
673: nfsstats.readlink_bios++;
674: error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred);
675: break;
676: default:
677: printf("nfs_doio: type %x unexpected\n", vp->v_type);
678: break;
679: };
680: if (error) {
681: bp->b_flags |= B_ERROR;
682: bp->b_error = error;
683: }
684: } else {
685: io.iov_len = uiop->uio_resid = bp->b_dirtyend
686: - bp->b_dirtyoff;
687: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
688: + bp->b_dirtyoff;
689: io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
690: uiop->uio_rw = UIO_WRITE;
691: nfsstats.write_bios++;
692: if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
693: iomode = NFSV3WRITE_UNSTABLE;
694: else
695: iomode = NFSV3WRITE_FILESYNC;
696: bp->b_flags |= B_WRITEINPROG;
697: error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
698:
699: rw_enter_write(&np->n_commitlock);
700: if (!error && iomode == NFSV3WRITE_UNSTABLE) {
701: bp->b_flags |= B_NEEDCOMMIT;
702: nfs_add_tobecommitted_range(vp, bp);
703: } else {
704: bp->b_flags &= ~B_NEEDCOMMIT;
705: nfs_del_committed_range(vp, bp);
706: }
707: rw_exit_write(&np->n_commitlock);
708:
709: bp->b_flags &= ~B_WRITEINPROG;
710:
711: /*
712: * For an interrupted write, the buffer is still valid and the
713: * write hasn't been pushed to the server yet, so we can't set
714: * B_ERROR and report the interruption by setting B_EINTR. For
715: * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
716: * is essentially a noop.
717: * For the case of a V3 write rpc not being committed to stable
718: * storage, the block is still dirty and requires either a commit
719: * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
720: * before the block is reused. This is indicated by setting the
721: * B_DELWRI and B_NEEDCOMMIT flags.
722: */
723: if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
724: s = splbio();
725: buf_dirty(bp);
726: splx(s);
727:
728: if (!(bp->b_flags & B_ASYNC) && error)
729: bp->b_flags |= B_EINTR;
730: } else {
731: if (error) {
732: bp->b_flags |= B_ERROR;
733: bp->b_error = np->n_error = error;
734: np->n_flag |= NWRITEERR;
735: }
736: bp->b_dirtyoff = bp->b_dirtyend = 0;
737: }
738: }
739: bp->b_resid = uiop->uio_resid;
740: if (must_commit)
741: nfs_clearcommit(vp->v_mount);
742: s = splbio();
743: biodone(bp);
744: splx(s);
745: return (error);
746: }
CVSweb