[BACK]Return to vfs_sync.c CVS log [TXT][DIR] Up to [local] / sys / kern

Annotation of sys/kern/vfs_sync.c, Revision 1.1

1.1     ! nbrk        1: /*       $OpenBSD: vfs_sync.c,v 1.43 2007/06/01 23:47:56 deraadt Exp $  */
        !             2:
        !             3: /*
        !             4:  *  Portions of this code are:
        !             5:  *
        !             6:  * Copyright (c) 1989, 1993
        !             7:  *     The Regents of the University of California.  All rights reserved.
        !             8:  * (c) UNIX System Laboratories, Inc.
        !             9:  * All or some portions of this file are derived from material licensed
        !            10:  * to the University of California by American Telephone and Telegraph
        !            11:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
        !            12:  * the permission of UNIX System Laboratories, Inc.
        !            13:  *
        !            14:  * Redistribution and use in source and binary forms, with or without
        !            15:  * modification, are permitted provided that the following conditions
        !            16:  * are met:
        !            17:  * 1. Redistributions of source code must retain the above copyright
        !            18:  *    notice, this list of conditions and the following disclaimer.
        !            19:  * 2. Redistributions in binary form must reproduce the above copyright
        !            20:  *    notice, this list of conditions and the following disclaimer in the
        !            21:  *    documentation and/or other materials provided with the distribution.
        !            22:  * 3. Neither the name of the University nor the names of its contributors
        !            23:  *    may be used to endorse or promote products derived from this software
        !            24:  *    without specific prior written permission.
        !            25:  *
        !            26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            36:  * SUCH DAMAGE.
        !            37:  */
        !            38:
        !            39: /*
        !            40:  * Syncer daemon
        !            41:  */
        !            42:
        !            43: #include <sys/queue.h>
        !            44: #include <sys/param.h>
        !            45: #include <sys/systm.h>
        !            46: #include <sys/proc.h>
        !            47: #include <sys/mount.h>
        !            48: #include <sys/vnode.h>
        !            49: #include <sys/buf.h>
        !            50: #include <sys/malloc.h>
        !            51:
        !            52: #include <sys/kernel.h>
        !            53: #include <sys/sched.h>
        !            54:
        !            55: #ifdef FFS_SOFTUPDATES
        !            56: int   softdep_process_worklist(struct mount *);
        !            57: #endif
        !            58:
        !            59: /*
        !            60:  * The workitem queue.
        !            61:  */
        !            62: #define SYNCER_MAXDELAY        32              /* maximum sync delay time */
        !            63: #define SYNCER_DEFAULT 30              /* default sync delay time */
        !            64: int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */
        !            65: time_t syncdelay = SYNCER_DEFAULT;     /* time to delay syncing vnodes */
        !            66:
        !            67: int rushjob = 0;                       /* number of slots to run ASAP */
        !            68: int stat_rush_requests = 0;            /* number of rush requests */
        !            69:
        !            70: static int syncer_delayno = 0;
        !            71: static long syncer_mask;
        !            72: LIST_HEAD(synclist, vnode);
        !            73: static struct synclist *syncer_workitem_pending;
        !            74:
        !            75: struct proc *syncerproc;
        !            76:
        !            77: /*
        !            78:  * The workitem queue.
        !            79:  *
        !            80:  * It is useful to delay writes of file data and filesystem metadata
        !            81:  * for tens of seconds so that quickly created and deleted files need
        !            82:  * not waste disk bandwidth being created and removed. To realize this,
        !            83:  * we append vnodes to a "workitem" queue. When running with a soft
        !            84:  * updates implementation, most pending metadata dependencies should
        !            85:  * not wait for more than a few seconds. Thus, mounted block devices
        !            86:  * are delayed only about half the time that file data is delayed.
        !            87:  * Similarly, directory updates are more critical, so are only delayed
        !            88:  * about a third the time that file data is delayed. Thus, there are
        !            89:  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
        !            90:  * one each second (driven off the filesystem syncer process). The
        !            91:  * syncer_delayno variable indicates the next queue that is to be processed.
        !            92:  * Items that need to be processed soon are placed in this queue:
        !            93:  *
        !            94:  *     syncer_workitem_pending[syncer_delayno]
        !            95:  *
        !            96:  * A delay of fifteen seconds is done by placing the request fifteen
        !            97:  * entries later in the queue:
        !            98:  *
        !            99:  *     syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
        !           100:  *
        !           101:  */
        !           102:
        !           103: void
        !           104: vn_initialize_syncerd(void)
        !           105: {
        !           106:        syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, M_WAITOK,
        !           107:            &syncer_mask);
        !           108:        syncer_maxdelay = syncer_mask + 1;
        !           109: }
        !           110:
        !           111: /*
        !           112:  * Add an item to the syncer work queue.
        !           113:  */
        !           114: void
        !           115: vn_syncer_add_to_worklist(struct vnode *vp, int delay)
        !           116: {
        !           117:        int s, slot;
        !           118:
        !           119:        if (delay > syncer_maxdelay - 2)
        !           120:                delay = syncer_maxdelay - 2;
        !           121:        slot = (syncer_delayno + delay) & syncer_mask;
        !           122:
        !           123:        s = splbio();
        !           124:        if (vp->v_bioflag & VBIOONSYNCLIST)
        !           125:                LIST_REMOVE(vp, v_synclist);
        !           126:
        !           127:        vp->v_bioflag |= VBIOONSYNCLIST;
        !           128:        LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
        !           129:        splx(s);
        !           130: }
        !           131:
        !           132: /*
        !           133:  * System filesystem synchronizer daemon.
        !           134:  */
        !           135: void
        !           136: sched_sync(struct proc *p)
        !           137: {
        !           138:        struct synclist *slp;
        !           139:        struct vnode *vp;
        !           140:        long starttime;
        !           141:        int s;
        !           142:
        !           143:        syncerproc = curproc;
        !           144:
        !           145:        for (;;) {
        !           146:                starttime = time_second;
        !           147:
        !           148:                /*
        !           149:                 * Push files whose dirty time has expired.
        !           150:                 */
        !           151:                s = splbio();
        !           152:                slp = &syncer_workitem_pending[syncer_delayno];
        !           153:
        !           154:                syncer_delayno += 1;
        !           155:                if (syncer_delayno == syncer_maxdelay)
        !           156:                        syncer_delayno = 0;
        !           157:
        !           158:                while ((vp = LIST_FIRST(slp)) != NULL) {
        !           159:                        if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, p)) {
        !           160:                                /*
        !           161:                                 * If we fail to get the lock, we move this
        !           162:                                 * vnode one second ahead in time.
        !           163:                                 * XXX - no good, but the best we can do.
        !           164:                                 */
        !           165:                                vn_syncer_add_to_worklist(vp, 1);
        !           166:                                continue;
        !           167:                        }
        !           168:                        splx(s);
        !           169:                        (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
        !           170:                        vput(vp);
        !           171:                        s = splbio();
        !           172:                        if (LIST_FIRST(slp) == vp) {
        !           173:                                /*
        !           174:                                 * Note: disk vps can remain on the
        !           175:                                 * worklist too with no dirty blocks, but
        !           176:                                 * since sync_fsync() moves it to a different
        !           177:                                 * slot we are safe.
        !           178:                                 */
        !           179: #ifdef DIAGNOSTIC
        !           180:                                if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
        !           181:                                    vp->v_type != VBLK) {
        !           182:                                        vprint("fsync failed", vp);
        !           183:                                        if (vp->v_mount != NULL)
        !           184:                                                printf("mounted on: %s\n",
        !           185:                                                    vp->v_mount->mnt_stat.f_mntonname);
        !           186:                                        panic("sched_sync: fsync failed");
        !           187:                                }
        !           188: #endif /* DIAGNOSTIC */
        !           189:                                /*
        !           190:                                 * Put us back on the worklist.  The worklist
        !           191:                                 * routine will remove us from our current
        !           192:                                 * position and then add us back in at a later
        !           193:                                 * position.
        !           194:                                 */
        !           195:                                vn_syncer_add_to_worklist(vp, syncdelay);
        !           196:                        }
        !           197:                }
        !           198:
        !           199:                splx(s);
        !           200:
        !           201: #ifdef FFS_SOFTUPDATES
        !           202:                /*
        !           203:                 * Do soft update processing.
        !           204:                 */
        !           205:                softdep_process_worklist(NULL);
        !           206: #endif
        !           207:
        !           208:                /*
        !           209:                 * The variable rushjob allows the kernel to speed up the
        !           210:                 * processing of the filesystem syncer process. A rushjob
        !           211:                 * value of N tells the filesystem syncer to process the next
        !           212:                 * N seconds worth of work on its queue ASAP. Currently rushjob
        !           213:                 * is used by the soft update code to speed up the filesystem
        !           214:                 * syncer process when the incore state is getting so far
        !           215:                 * ahead of the disk that the kernel memory pool is being
        !           216:                 * threatened with exhaustion.
        !           217:                 */
        !           218:                if (rushjob > 0) {
        !           219:                        rushjob -= 1;
        !           220:                        continue;
        !           221:                }
        !           222:                /*
        !           223:                 * If it has taken us less than a second to process the
        !           224:                 * current work, then wait. Otherwise start right over
        !           225:                 * again. We can still lose time if any single round
        !           226:                 * takes more than two seconds, but it does not really
        !           227:                 * matter as we are just trying to generally pace the
        !           228:                 * filesystem activity.
        !           229:                 */
        !           230:                if (time_second == starttime)
        !           231:                        tsleep(&lbolt, PPAUSE, "syncer", 0);
        !           232:        }
        !           233: }
        !           234:
        !           235: /*
        !           236:  * Request the syncer daemon to speed up its work.
        !           237:  * We never push it to speed up more than half of its
        !           238:  * normal turn time, otherwise it could take over the cpu.
        !           239:  */
        !           240: int
        !           241: speedup_syncer(void)
        !           242: {
        !           243:        int s;
        !           244:
        !           245:        SCHED_LOCK(s);
        !           246:        if (syncerproc && syncerproc->p_wchan == &lbolt)
        !           247:                setrunnable(syncerproc);
        !           248:        SCHED_UNLOCK(s);
        !           249:        if (rushjob < syncdelay / 2) {
        !           250:                rushjob += 1;
        !           251:                stat_rush_requests += 1;
        !           252:                return 1;
        !           253:        }
        !           254:        return 0;
        !           255: }
        !           256:
        !           257: /*
        !           258:  * Routine to create and manage a filesystem syncer vnode.
        !           259:  */
        !           260: #define sync_close nullop
        !           261: int   sync_fsync(void *);
        !           262: int   sync_inactive(void *);
        !           263: #define sync_reclaim nullop
        !           264: #define sync_lock vop_generic_lock
        !           265: #define sync_unlock vop_generic_unlock
        !           266: int   sync_print(void *);
        !           267: #define sync_islocked vop_generic_islocked
        !           268:
        !           269: int (**sync_vnodeop_p)(void *);
        !           270: struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
        !           271:       { &vop_default_desc, vn_default_error },
        !           272:       { &vop_close_desc, sync_close },                /* close */
        !           273:       { &vop_fsync_desc, sync_fsync },                /* fsync */
        !           274:       { &vop_inactive_desc, sync_inactive },          /* inactive */
        !           275:       { &vop_reclaim_desc, sync_reclaim },            /* reclaim */
        !           276:       { &vop_lock_desc, sync_lock },                  /* lock */
        !           277:       { &vop_unlock_desc, sync_unlock },              /* unlock */
        !           278:       { &vop_print_desc, sync_print },                /* print */
        !           279:       { &vop_islocked_desc, sync_islocked },          /* islocked */
        !           280:       { (struct vnodeop_desc*)NULL, (int(*)(void *))NULL }
        !           281: };
        !           282: struct vnodeopv_desc sync_vnodeop_opv_desc = {
        !           283:        &sync_vnodeop_p, sync_vnodeop_entries
        !           284: };
        !           285:
        !           286: /*
        !           287:  * Create a new filesystem syncer vnode for the specified mount point.
        !           288:  */
        !           289: int
        !           290: vfs_allocate_syncvnode(struct mount *mp)
        !           291: {
        !           292:        struct vnode *vp;
        !           293:        static long start, incr, next;
        !           294:        int error;
        !           295:
        !           296:        /* Allocate a new vnode */
        !           297:        if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
        !           298:                mp->mnt_syncer = NULL;
        !           299:                return (error);
        !           300:        }
        !           301:        vp->v_writecount = 1;
        !           302:        vp->v_type = VNON;
        !           303:        /*
        !           304:         * Place the vnode onto the syncer worklist. We attempt to
        !           305:         * scatter them about on the list so that they will go off
        !           306:         * at evenly distributed times even if all the filesystems
        !           307:         * are mounted at once.
        !           308:         */
        !           309:        next += incr;
        !           310:        if (next == 0 || next > syncer_maxdelay) {
        !           311:                start /= 2;
        !           312:                incr /= 2;
        !           313:                if (start == 0) {
        !           314:                        start = syncer_maxdelay / 2;
        !           315:                        incr = syncer_maxdelay;
        !           316:                }
        !           317:                next = start;
        !           318:        }
        !           319:        vn_syncer_add_to_worklist(vp, next);
        !           320:        mp->mnt_syncer = vp;
        !           321:        return (0);
        !           322: }
        !           323:
        !           324: /*
        !           325:  * Do a lazy sync of the filesystem.
        !           326:  */
        !           327: int
        !           328: sync_fsync(void *v)
        !           329: {
        !           330:        struct vop_fsync_args *ap = v;
        !           331:        struct vnode *syncvp = ap->a_vp;
        !           332:        struct mount *mp = syncvp->v_mount;
        !           333:        int asyncflag;
        !           334:
        !           335:        /*
        !           336:         * We only need to do something if this is a lazy evaluation.
        !           337:         */
        !           338:        if (ap->a_waitfor != MNT_LAZY)
        !           339:                return (0);
        !           340:
        !           341:        /*
        !           342:         * Move ourselves to the back of the sync list.
        !           343:         */
        !           344:        vn_syncer_add_to_worklist(syncvp, syncdelay);
        !           345:
        !           346:        /*
        !           347:         * Walk the list of vnodes pushing all that are dirty and
        !           348:         * not already on the sync list.
        !           349:         */
        !           350:        if (vfs_busy(mp, VB_READ|VB_NOWAIT) == 0) {
        !           351:                asyncflag = mp->mnt_flag & MNT_ASYNC;
        !           352:                mp->mnt_flag &= ~MNT_ASYNC;
        !           353:                VFS_SYNC(mp, MNT_LAZY, ap->a_cred, ap->a_p);
        !           354:                if (asyncflag)
        !           355:                        mp->mnt_flag |= MNT_ASYNC;
        !           356:                vfs_unbusy(mp);
        !           357:        }
        !           358:
        !           359:        return (0);
        !           360: }
        !           361:
        !           362: /*
        !           363:  * The syncer vnode is no longer needed and is being decommissioned.
        !           364:  */
        !           365: int
        !           366: sync_inactive(void *v)
        !           367: {
        !           368:        struct vop_inactive_args *ap = v;
        !           369:
        !           370:        struct vnode *vp = ap->a_vp;
        !           371:        int s;
        !           372:
        !           373:        if (vp->v_usecount == 0) {
        !           374:                VOP_UNLOCK(vp, 0, ap->a_p);
        !           375:                return (0);
        !           376:        }
        !           377:
        !           378:        vp->v_mount->mnt_syncer = NULL;
        !           379:
        !           380:        s = splbio();
        !           381:
        !           382:        LIST_REMOVE(vp, v_synclist);
        !           383:        vp->v_bioflag &= ~VBIOONSYNCLIST;
        !           384:
        !           385:        splx(s);
        !           386:
        !           387:        vp->v_writecount = 0;
        !           388:        vput(vp);
        !           389:
        !           390:        return (0);
        !           391: }
        !           392:
        !           393: /*
        !           394:  * Print out a syncer vnode.
        !           395:  */
        !           396: int
        !           397: sync_print(void *v)
        !           398: {
        !           399:        printf("syncer vnode\n");
        !           400:
        !           401:        return (0);
        !           402: }

CVSweb