Annotation of sys/ufs/ffs/ffs_vnops.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: ffs_vnops.c,v 1.45 2007/06/01 23:47:57 deraadt Exp $ */
2: /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */
3:
4: /*
5: * Copyright (c) 1982, 1986, 1989, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. Neither the name of the University nor the names of its contributors
17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: *
32: * @(#)ffs_vnops.c 8.10 (Berkeley) 8/10/94
33: */
34:
35: #include <sys/param.h>
36: #include <sys/systm.h>
37: #include <sys/resourcevar.h>
38: #include <sys/kernel.h>
39: #include <sys/file.h>
40: #include <sys/stat.h>
41: #include <sys/buf.h>
42: #include <sys/proc.h>
43: #include <sys/conf.h>
44: #include <sys/mount.h>
45: #include <sys/vnode.h>
46: #include <sys/malloc.h>
47: #include <sys/signalvar.h>
48: #include <sys/pool.h>
49: #include <sys/event.h>
50:
51: #include <uvm/uvm_extern.h>
52:
53: #include <miscfs/specfs/specdev.h>
54: #include <miscfs/fifofs/fifo.h>
55:
56: #include <ufs/ufs/quota.h>
57: #include <ufs/ufs/inode.h>
58: #include <ufs/ufs/dir.h>
59: #include <ufs/ufs/ufs_extern.h>
60: #include <ufs/ufs/ufsmount.h>
61:
62: #include <ufs/ffs/fs.h>
63: #include <ufs/ffs/ffs_extern.h>
64:
65: /* Global vfs data structures for ufs. */
66: int (**ffs_vnodeop_p)(void *);
67: struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
68: { &vop_default_desc, vn_default_error },
69: { &vop_lookup_desc, ufs_lookup }, /* lookup */
70: { &vop_create_desc, ufs_create }, /* create */
71: { &vop_mknod_desc, ufs_mknod }, /* mknod */
72: { &vop_open_desc, ufs_open }, /* open */
73: { &vop_close_desc, ufs_close }, /* close */
74: { &vop_access_desc, ufs_access }, /* access */
75: { &vop_getattr_desc, ufs_getattr }, /* getattr */
76: { &vop_setattr_desc, ufs_setattr }, /* setattr */
77: { &vop_read_desc, ffs_read }, /* read */
78: { &vop_write_desc, ffs_write }, /* write */
79: { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */
80: { &vop_poll_desc, ufs_poll }, /* poll */
81: { &vop_kqfilter_desc, ufs_kqfilter }, /* kqfilter */
82: { &vop_revoke_desc, ufs_revoke }, /* revoke */
83: { &vop_fsync_desc, ffs_fsync }, /* fsync */
84: { &vop_remove_desc, ufs_remove }, /* remove */
85: { &vop_link_desc, ufs_link }, /* link */
86: { &vop_rename_desc, ufs_rename }, /* rename */
87: { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */
88: { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */
89: { &vop_symlink_desc, ufs_symlink }, /* symlink */
90: { &vop_readdir_desc, ufs_readdir }, /* readdir */
91: { &vop_readlink_desc, ufs_readlink }, /* readlink */
92: { &vop_abortop_desc, vop_generic_abortop }, /* abortop */
93: { &vop_inactive_desc, ufs_inactive }, /* inactive */
94: { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */
95: { &vop_lock_desc, ufs_lock }, /* lock */
96: { &vop_unlock_desc, ufs_unlock }, /* unlock */
97: { &vop_bmap_desc, ufs_bmap }, /* bmap */
98: { &vop_strategy_desc, ufs_strategy }, /* strategy */
99: { &vop_print_desc, ufs_print }, /* print */
100: { &vop_islocked_desc, ufs_islocked }, /* islocked */
101: { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
102: { &vop_advlock_desc, ufs_advlock }, /* advlock */
103: { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */
104: { &vop_bwrite_desc, vop_generic_bwrite },
105: { NULL, NULL }
106: };
107:
108: struct vnodeopv_desc ffs_vnodeop_opv_desc =
109: { &ffs_vnodeop_p, ffs_vnodeop_entries };
110:
111: int (**ffs_specop_p)(void *);
112: struct vnodeopv_entry_desc ffs_specop_entries[] = {
113: { &vop_default_desc, spec_vnoperate },
114: { &vop_close_desc, ufsspec_close }, /* close */
115: { &vop_access_desc, ufs_access }, /* access */
116: { &vop_getattr_desc, ufs_getattr }, /* getattr */
117: { &vop_setattr_desc, ufs_setattr }, /* setattr */
118: { &vop_read_desc, ufsspec_read }, /* read */
119: { &vop_write_desc, ufsspec_write }, /* write */
120: { &vop_fsync_desc, ffs_fsync }, /* fsync */
121: { &vop_inactive_desc, ufs_inactive }, /* inactive */
122: { &vop_reclaim_desc, ffs_reclaim }, /* reclaim */
123: { &vop_lock_desc, ufs_lock }, /* lock */
124: { &vop_unlock_desc, ufs_unlock }, /* unlock */
125: { &vop_print_desc, ufs_print }, /* print */
126: { &vop_islocked_desc, ufs_islocked }, /* islocked */
127: { NULL, NULL }
128: };
129:
130: struct vnodeopv_desc ffs_specop_opv_desc =
131: { &ffs_specop_p, ffs_specop_entries };
132:
133: #ifdef FIFO
134: int (**ffs_fifoop_p)(void *);
135: struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
136: { &vop_default_desc, fifo_vnoperate },
137: { &vop_close_desc, ufsfifo_close }, /* close */
138: { &vop_access_desc, ufs_access }, /* access */
139: { &vop_getattr_desc, ufs_getattr }, /* getattr */
140: { &vop_setattr_desc, ufs_setattr }, /* setattr */
141: { &vop_read_desc, ufsfifo_read }, /* read */
142: { &vop_write_desc, ufsfifo_write }, /* write */
143: { &vop_fsync_desc, ffs_fsync }, /* fsync */
144: { &vop_inactive_desc, ufs_inactive }, /* inactive */
145: { &vop_reclaim_desc, ffsfifo_reclaim }, /* reclaim */
146: { &vop_lock_desc, ufs_lock }, /* lock */
147: { &vop_unlock_desc, ufs_unlock }, /* unlock */
148: { &vop_print_desc, ufs_print }, /* print */
149: { &vop_islocked_desc, ufs_islocked }, /* islocked */
150: { &vop_bwrite_desc, vop_generic_bwrite },
151: { NULL, NULL }
152: };
153:
154: struct vnodeopv_desc ffs_fifoop_opv_desc =
155: { &ffs_fifoop_p, ffs_fifoop_entries };
156: #endif /* FIFO */
157:
158: /*
159: * Enabling cluster read/write operations.
160: */
161: int doclusterread = 1;
162: int doclusterwrite = 1;
163:
164: /*
165: * Vnode op for reading.
166: */
167: /* ARGSUSED */
168: int
169: ffs_read(void *v)
170: {
171: struct vop_read_args *ap = v;
172: struct vnode *vp;
173: struct inode *ip;
174: struct uio *uio;
175: struct fs *fs;
176: struct buf *bp;
177: daddr64_t lbn, nextlbn;
178: off_t bytesinfile;
179: long size, xfersize, blkoffset;
180: mode_t mode;
181: int error;
182:
183: vp = ap->a_vp;
184: ip = VTOI(vp);
185: mode = DIP(ip, mode);
186: uio = ap->a_uio;
187:
188: #ifdef DIAGNOSTIC
189: if (uio->uio_rw != UIO_READ)
190: panic("ffs_read: mode");
191:
192: if (vp->v_type == VLNK) {
193: if ((int)DIP(ip, size) < vp->v_mount->mnt_maxsymlinklen ||
194: (vp->v_mount->mnt_maxsymlinklen == 0 &&
195: DIP(ip, blocks) == 0))
196: panic("ffs_read: short symlink");
197: } else if (vp->v_type != VREG && vp->v_type != VDIR)
198: panic("ffs_read: type %d", vp->v_type);
199: #endif
200: fs = ip->i_fs;
201: if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
202: return (EFBIG);
203:
204: if (uio->uio_resid == 0)
205: return (0);
206:
207: for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
208: if ((bytesinfile = DIP(ip, size) - uio->uio_offset) <= 0)
209: break;
210: lbn = lblkno(fs, uio->uio_offset);
211: nextlbn = lbn + 1;
212: size = fs->fs_bsize; /* WAS blksize(fs, ip, lbn); */
213: blkoffset = blkoff(fs, uio->uio_offset);
214: xfersize = fs->fs_bsize - blkoffset;
215: if (uio->uio_resid < xfersize)
216: xfersize = uio->uio_resid;
217: if (bytesinfile < xfersize)
218: xfersize = bytesinfile;
219:
220: if (lblktosize(fs, nextlbn) >= DIP(ip, size))
221: error = bread(vp, lbn, size, NOCRED, &bp);
222: else if (lbn - 1 == ip->i_ci.ci_lastr) {
223: error = bread_cluster(vp, lbn, size, &bp);
224: } else
225: error = bread(vp, lbn, size, NOCRED, &bp);
226:
227: if (error)
228: break;
229: ip->i_ci.ci_lastr = lbn;
230:
231: /*
232: * We should only get non-zero b_resid when an I/O error
233: * has occurred, which should cause us to break above.
234: * However, if the short read did not cause an error,
235: * then we want to ensure that we do not uiomove bad
236: * or uninitialized data.
237: */
238: size -= bp->b_resid;
239: if (size < xfersize) {
240: if (size == 0)
241: break;
242: xfersize = size;
243: }
244: error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize,
245: uio);
246: if (error)
247: break;
248: brelse(bp);
249: }
250: if (bp != NULL)
251: brelse(bp);
252: ip->i_flag |= IN_ACCESS;
253: return (error);
254: }
255:
256: /*
257: * Vnode op for writing.
258: */
259: int
260: ffs_write(void *v)
261: {
262: struct vop_write_args *ap = v;
263: struct vnode *vp;
264: struct uio *uio;
265: struct inode *ip;
266: struct fs *fs;
267: struct buf *bp;
268: struct proc *p;
269: daddr_t lbn;
270: off_t osize;
271: int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
272:
273: extended = 0;
274: ioflag = ap->a_ioflag;
275: uio = ap->a_uio;
276: vp = ap->a_vp;
277: ip = VTOI(vp);
278:
279: #ifdef DIAGNOSTIC
280: if (uio->uio_rw != UIO_WRITE)
281: panic("ffs_write: mode");
282: #endif
283:
284: /*
285: * If writing 0 bytes, succeed and do not change
286: * update time or file offset (standards compliance)
287: */
288: if (uio->uio_resid == 0)
289: return (0);
290:
291: switch (vp->v_type) {
292: case VREG:
293: if (ioflag & IO_APPEND)
294: uio->uio_offset = DIP(ip, size);
295: if ((DIP(ip, flags) & APPEND) && uio->uio_offset != DIP(ip, size))
296: return (EPERM);
297: /* FALLTHROUGH */
298: case VLNK:
299: break;
300: case VDIR:
301: if ((ioflag & IO_SYNC) == 0)
302: panic("ffs_write: nonsync dir write");
303: break;
304: default:
305: panic("ffs_write: type");
306: }
307:
308: fs = ip->i_fs;
309: if (uio->uio_offset < 0 ||
310: (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
311: return (EFBIG);
312: /*
313: * Maybe this should be above the vnode op call, but so long as
314: * file servers have no limits, I don't think it matters.
315: */
316: p = uio->uio_procp;
317: if (vp->v_type == VREG && p && !(ioflag & IO_NOLIMIT) &&
318: uio->uio_offset + uio->uio_resid >
319: p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
320: psignal(p, SIGXFSZ);
321: return (EFBIG);
322: }
323:
324: resid = uio->uio_resid;
325: osize = DIP(ip, size);
326: flags = ioflag & IO_SYNC ? B_SYNC : 0;
327:
328: for (error = 0; uio->uio_resid > 0;) {
329: lbn = lblkno(fs, uio->uio_offset);
330: blkoffset = blkoff(fs, uio->uio_offset);
331: xfersize = fs->fs_bsize - blkoffset;
332: if (uio->uio_resid < xfersize)
333: xfersize = uio->uio_resid;
334: if (fs->fs_bsize > xfersize)
335: flags |= B_CLRBUF;
336: else
337: flags &= ~B_CLRBUF;
338:
339: if ((error = UFS_BUF_ALLOC(ip, uio->uio_offset, xfersize,
340: ap->a_cred, flags, &bp)) != 0)
341: break;
342: if (uio->uio_offset + xfersize > DIP(ip, size)) {
343: DIP_ASSIGN(ip, size, uio->uio_offset + xfersize);
344: uvm_vnp_setsize(vp, DIP(ip, size));
345: extended = 1;
346: }
347: (void)uvm_vnp_uncache(vp);
348:
349: size = blksize(fs, ip, lbn) - bp->b_resid;
350: if (size < xfersize)
351: xfersize = size;
352:
353: error =
354: uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
355:
356: if (error != 0)
357: bzero((char *)bp->b_data + blkoffset, xfersize);
358:
359: if (ioflag & IO_SYNC)
360: (void)bwrite(bp);
361: else if (xfersize + blkoffset == fs->fs_bsize) {
362: if (doclusterwrite)
363: cluster_write(bp, &ip->i_ci, DIP(ip, size));
364: else
365: bawrite(bp);
366: } else
367: bdwrite(bp);
368:
369: if (error || xfersize == 0)
370: break;
371: ip->i_flag |= IN_CHANGE | IN_UPDATE;
372: }
373: /*
374: * If we successfully wrote any data, and we are not the superuser
375: * we clear the setuid and setgid bits as a precaution against
376: * tampering.
377: */
378: if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
379: DIP(ip, mode) &= ~(ISUID | ISGID);
380: if (resid > uio->uio_resid)
381: VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
382: if (error) {
383: if (ioflag & IO_UNIT) {
384: (void)UFS_TRUNCATE(ip, osize,
385: ioflag & IO_SYNC, ap->a_cred);
386: uio->uio_offset -= resid - uio->uio_resid;
387: uio->uio_resid = resid;
388: }
389: } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
390: error = UFS_UPDATE(ip, MNT_WAIT);
391: }
392: return (error);
393: }
394:
395: /*
396: * Synch an open file.
397: */
398: /* ARGSUSED */
399: int
400: ffs_fsync(void *v)
401: {
402: struct vop_fsync_args *ap = v;
403: struct vnode *vp = ap->a_vp;
404: struct buf *bp, *nbp;
405: int s, error, passes, skipmeta;
406:
407: if (vp->v_type == VBLK &&
408: vp->v_specmountpoint != NULL &&
409: (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
410: softdep_fsync_mountdev(vp, ap->a_waitfor);
411:
412: /*
413: * Flush all dirty buffers associated with a vnode.
414: */
415: passes = NIADDR + 1;
416: skipmeta = 0;
417: if (ap->a_waitfor == MNT_WAIT)
418: skipmeta = 1;
419: s = splbio();
420: loop:
421: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp;
422: bp = LIST_NEXT(bp, b_vnbufs))
423: bp->b_flags &= ~B_SCANNED;
424: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
425: nbp = LIST_NEXT(bp, b_vnbufs);
426: /*
427: * Reasons to skip this buffer: it has already been considered
428: * on this pass, this pass is the first time through on a
429: * synchronous flush request and the buffer being considered
430: * is metadata, the buffer has dependencies that will cause
431: * it to be redirtied and it has not already been deferred,
432: * or it is already being written.
433: */
434: if (bp->b_flags & (B_BUSY | B_SCANNED))
435: continue;
436: if ((bp->b_flags & B_DELWRI) == 0)
437: panic("ffs_fsync: not dirty");
438: if (skipmeta && bp->b_lblkno < 0)
439: continue;
440: if (ap->a_waitfor != MNT_WAIT &&
441: LIST_FIRST(&bp->b_dep) != NULL &&
442: (bp->b_flags & B_DEFERRED) == 0 &&
443: buf_countdeps(bp, 0, 1)) {
444: bp->b_flags |= B_DEFERRED;
445: continue;
446: }
447:
448: bremfree(bp);
449: bp->b_flags |= B_BUSY | B_SCANNED;
450: splx(s);
451: /*
452: * On our final pass through, do all I/O synchronously
453: * so that we can find out if our flush is failing
454: * because of write errors.
455: */
456: if (passes > 0 || ap->a_waitfor != MNT_WAIT)
457: (void) bawrite(bp);
458: else if ((error = bwrite(bp)) != 0)
459: return (error);
460: s = splbio();
461: /*
462: * Since we may have slept during the I/O, we need
463: * to start from a known point.
464: */
465: nbp = LIST_FIRST(&vp->v_dirtyblkhd);
466: }
467: if (skipmeta) {
468: skipmeta = 0;
469: goto loop;
470: }
471: if (ap->a_waitfor == MNT_WAIT) {
472: vwaitforio(vp, 0, "ffs_fsync", 0);
473:
474: /*
475: * Ensure that any filesystem metadata associated
476: * with the vnode has been written.
477: */
478: splx(s);
479: if ((error = softdep_sync_metadata(ap)) != 0)
480: return (error);
481: s = splbio();
482: if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
483: /*
484: * Block devices associated with filesystems may
485: * have new I/O requests posted for them even if
486: * the vnode is locked, so no amount of trying will
487: * get them clean. Thus we give block devices a
488: * good effort, then just give up. For all other file
489: * types, go around and try again until it is clean.
490: */
491: if (passes > 0) {
492: passes -= 1;
493: goto loop;
494: }
495: #ifdef DIAGNOSTIC
496: if (vp->v_type != VBLK)
497: vprint("ffs_fsync: dirty", vp);
498: #endif
499: }
500: }
501: splx(s);
502: return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
503: }
504:
505: /*
506: * Reclaim an inode so that it can be used for other purposes.
507: */
508: int
509: ffs_reclaim(void *v)
510: {
511: struct vop_reclaim_args *ap = v;
512: struct vnode *vp = ap->a_vp;
513: struct inode *ip = VTOI(vp);
514: int error;
515:
516: if ((error = ufs_reclaim(vp, ap->a_p)) != 0)
517: return (error);
518:
519: if (ip->i_din1 != NULL) {
520: #ifdef FFS2
521: if (ip->i_ump->um_fstype == UM_UFS2)
522: pool_put(&ffs_dinode2_pool, ip->i_din2);
523: else
524: #endif
525: pool_put(&ffs_dinode1_pool, ip->i_din1);
526: }
527:
528: pool_put(&ffs_ino_pool, ip);
529:
530: vp->v_data = NULL;
531:
532: return (0);
533: }
534:
535: #ifdef FIFO
536: int
537: ffsfifo_reclaim(void *v)
538: {
539: fifo_reclaim(v);
540: return (ffs_reclaim(v));
541: }
542: #endif
CVSweb