[BACK]Return to vfs_bio.c CVS log [TXT][DIR] Up to [local] / sys / kern

Annotation of sys/kern/vfs_bio.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: vfs_bio.c,v 1.99 2007/08/07 04:32:45 beck Exp $       */
                      2: /*     $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $  */
                      3:
                      4: /*-
                      5:  * Copyright (c) 1994 Christopher G. Demetriou
                      6:  * Copyright (c) 1982, 1986, 1989, 1993
                      7:  *     The Regents of the University of California.  All rights reserved.
                      8:  * (c) UNIX System Laboratories, Inc.
                      9:  * All or some portions of this file are derived from material licensed
                     10:  * to the University of California by American Telephone and Telegraph
                     11:  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
                     12:  * the permission of UNIX System Laboratories, Inc.
                     13:  *
                     14:  * Redistribution and use in source and binary forms, with or without
                     15:  * modification, are permitted provided that the following conditions
                     16:  * are met:
                     17:  * 1. Redistributions of source code must retain the above copyright
                     18:  *    notice, this list of conditions and the following disclaimer.
                     19:  * 2. Redistributions in binary form must reproduce the above copyright
                     20:  *    notice, this list of conditions and the following disclaimer in the
                     21:  *    documentation and/or other materials provided with the distribution.
                     22:  * 3. Neither the name of the University nor the names of its contributors
                     23:  *    may be used to endorse or promote products derived from this software
                     24:  *    without specific prior written permission.
                     25:  *
                     26:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     28:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     29:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     30:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     31:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     32:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     33:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     34:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     35:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     36:  * SUCH DAMAGE.
                     37:  *
                     38:  *     @(#)vfs_bio.c   8.6 (Berkeley) 1/11/94
                     39:  */
                     40:
                     41: /*
                     42:  * Some references:
                     43:  *     Bach: The Design of the UNIX Operating System (Prentice Hall, 1986)
                     44:  *     Leffler, et al.: The Design and Implementation of the 4.3BSD
                     45:  *             UNIX Operating System (Addison Welley, 1989)
                     46:  */
                     47:
                     48: #include <sys/param.h>
                     49: #include <sys/systm.h>
                     50: #include <sys/proc.h>
                     51: #include <sys/buf.h>
                     52: #include <sys/vnode.h>
                     53: #include <sys/mount.h>
                     54: #include <sys/malloc.h>
                     55: #include <sys/pool.h>
                     56: #include <sys/resourcevar.h>
                     57: #include <sys/conf.h>
                     58: #include <sys/kernel.h>
                     59:
                     60: #include <uvm/uvm_extern.h>
                     61:
                     62: #include <miscfs/specfs/specdev.h>
                     63:
                     64: /*
                     65:  * Definitions for the buffer hash lists.
                     66:  */
                     67: #define        BUFHASH(dvp, lbn)       \
                     68:        (&bufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
                     69: LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
                     70: u_long bufhash;
                     71:
                     72: /*
                     73:  * Insq/Remq for the buffer hash lists.
                     74:  */
                     75: #define        binshash(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_hash)
                     76: #define        bremhash(bp)            LIST_REMOVE(bp, b_hash)
                     77:
                     78: /*
                     79:  * Definitions for the buffer free lists.
                     80:  */
                     81: #define        BQUEUES         6               /* number of free buffer queues */
                     82:
                     83: #define        BQ_DIRTY        0               /* LRU queue with dirty buffers */
                     84:
                     85:
                     86: TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
                     87: int bqpages[BQUEUES];          /* pages allocated, per queue */
                     88: int bqpagelow;
                     89: int needbuffer;
                     90: struct bio_ops bioops;
                     91:
                     92: /*
                     93:  * Buffer pool for I/O buffers.
                     94:  */
                     95: struct pool bufpool;
                     96: struct vm_map *buf_map;
                     97: struct bufhead bufhead = LIST_HEAD_INITIALIZER(bufhead);
                     98: struct buf *buf_get(size_t);
                     99: struct buf *buf_stub(struct vnode *, daddr64_t);
                    100: void buf_put(struct buf *);
                    101:
                    102: /*
                    103:  * Insq/Remq for the buffer free lists.
                    104:  */
                    105: #define        binsheadfree(bp, dp)    TAILQ_INSERT_HEAD(dp, bp, b_freelist)
                    106: #define        binstailfree(bp, dp)    TAILQ_INSERT_TAIL(dp, bp, b_freelist)
                    107:
                    108: struct buf *bio_doread(struct vnode *, daddr64_t, int, int);
                    109: struct buf *getnewbuf(size_t, int, int, int *);
                    110: void buf_init(struct buf *, int);
                    111: void bread_cluster_callback(struct buf *);
                    112:
                    113: /*
                    114:  * We keep a few counters to monitor the utilization of the buffer cache
                    115:  *
                    116:  *  numbufpages   - number of pages totally allocated.
                    117:  *  numdirtypages - number of pages on BQ_DIRTY queue.
                    118:  *  lodirtypages  - low water mark for buffer cleaning daemon.
                    119:  *  hidirtypages  - high water mark for buffer cleaning daemon.
                    120:  *  numfreepages  - number of pages on BQ_CLEAN and BQ_DIRTY queues. unused.
                    121:  *  numcleanpages - number of pages on BQ_CLEAN queue.
                    122:  *                 Used to track the need to speedup the cleaner and
                    123:  *                 as a reserve for special processes like syncer.
                    124:  *  maxcleanpages - the highest page count on BQ_CLEAN.
                    125:  */
                    126: long numbufpages;
                    127: long numdirtypages;
                    128: long lodirtypages;
                    129: long hidirtypages;
                    130: long numfreepages;
                    131: long numcleanpages;
                    132: long locleanpages;
                    133: long hicleanpages;
                    134: long maxcleanpages;
                    135:
                    136: struct proc *cleanerproc;
                    137: int bd_req;                    /* Sleep point for cleaner daemon. */
                    138:
                    139: int size2cqueue(int *size);
                    140:
                    141: int
                    142: size2cqueue(int *size)
                    143: {
                    144:        int i = 0, q;
                    145:        int s = *size;
                    146:        s -= 1;
                    147:        while (s > 0) {
                    148:                s = s >> 1;
                    149:                i++;
                    150:        }
                    151:        if (i < PAGE_SHIFT) {
                    152:                i = PAGE_SHIFT; /* < 4096 -> 4096 */
                    153:         }
                    154:        *size = 1 << i;
                    155:        q = (i + 1 - PAGE_SHIFT); /* XXX 4096 is queue 1 */
                    156:        if (q >= BQUEUES)
                    157:                panic("queue %d > BQUEUES %d", q, BQUEUES);
                    158:        if (q == 0)
                    159:                panic("can't return dirty q");
                    160:        return(q);
                    161: }
                    162:
                    163: void
                    164: bremfree(struct buf *bp)
                    165: {
                    166:        struct bqueues *dp = NULL;
                    167:        int queue;
                    168:
                    169:        /*
                    170:         * We only calculate the head of the freelist when removing
                    171:         * the last element of the list as that is the only time that
                    172:         * it is needed (e.g. to reset the tail pointer).
                    173:         *
                    174:         * NB: This makes an assumption about how tailq's are implemented.
                    175:         */
                    176:        if (TAILQ_NEXT(bp, b_freelist) == NULL) {
                    177:                for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
                    178:                        if (dp->tqh_last == &TAILQ_NEXT(bp, b_freelist))
                    179:                                break;
                    180:                if (dp == &bufqueues[BQUEUES])
                    181:                        panic("bremfree: lost tail");
                    182:        }
                    183:        numfreepages -= btoc(bp->b_bufsize);
                    184:        if (!ISSET(bp->b_flags, B_DELWRI)) {
                    185:                int qs = bp->b_bufsize;
                    186:                queue = size2cqueue(&qs);
                    187:                numcleanpages -= btoc(bp->b_bufsize);
                    188:                bqpages[queue] -= btoc(bp->b_bufsize);
                    189:        } else
                    190:                numdirtypages -= btoc(bp->b_bufsize);
                    191:        TAILQ_REMOVE(dp, bp, b_freelist);
                    192: }
                    193:
                    194: void
                    195: buf_init(struct buf *bp, int size)
                    196: {
                    197:        int npages, queue;
                    198:
                    199:        splassert(IPL_BIO);
                    200:
                    201:        npages = btoc(size);
                    202:        bzero((char *)bp, sizeof *bp);
                    203:        bp->b_vnbufs.le_next = NOLIST;
                    204:        bp->b_freelist.tqe_next = NOLIST;
                    205:        bp->b_synctime = time_uptime + 300;
                    206:        bp->b_dev = NODEV;
                    207:        queue = size2cqueue(&size);
                    208:        LIST_INIT(&bp->b_dep);
                    209:        numbufpages += npages;
                    210:        numfreepages += npages;
                    211:        numcleanpages += npages;
                    212:        bqpages[queue] += npages;
                    213:        if (maxcleanpages < numcleanpages)
                    214:                maxcleanpages = numcleanpages;
                    215: }
                    216:
                    217: /*
                    218:  * This is a non-sleeping expanded equivalent of getblk() that allocates only
                    219:  * the buffer structure, and not its contents.
                    220:  */
                    221: struct buf *
                    222: buf_stub(struct vnode *vp, daddr64_t lblkno)
                    223: {
                    224:        struct buf *bp;
                    225:        int s;
                    226:
                    227:        s = splbio();
                    228:        bp = pool_get(&bufpool, PR_NOWAIT);
                    229:        splx(s);
                    230:
                    231:        if (bp == NULL)
                    232:                return (NULL);
                    233:
                    234:        bzero((char *)bp, sizeof *bp);
                    235:        bp->b_vnbufs.le_next = NOLIST;
                    236:        bp->b_freelist.tqe_next = NOLIST;
                    237:        bp->b_synctime = time_uptime + 300;
                    238:        bp->b_dev = NODEV;
                    239:        bp->b_bufsize = 0;
                    240:        bp->b_data = NULL;
                    241:        bp->b_flags = B_BUSY;
                    242:        bp->b_dev = NODEV;
                    243:        bp->b_blkno = bp->b_lblkno = lblkno;
                    244:        bp->b_iodone = NULL;
                    245:        bp->b_error = 0;
                    246:        bp->b_resid = 0;
                    247:        bp->b_bcount = 0;
                    248:        bp->b_dirtyoff = bp->b_dirtyend = 0;
                    249:        bp->b_validoff = bp->b_validend = 0;
                    250:
                    251:        LIST_INIT(&bp->b_dep);
                    252:
                    253:        s = splbio();
                    254:        LIST_INSERT_HEAD(&bufhead, bp, b_list);
                    255:        bgetvp(vp, bp);
                    256:        splx(s);
                    257:
                    258:        return (bp);
                    259: }
                    260:
                    261: struct buf *
                    262: buf_get(size_t size)
                    263: {
                    264:        struct bqueues *dp;
                    265:        struct buf *bp;
                    266:        int npages;
                    267:        int queue, qs;
                    268:        void *data;
                    269:
                    270:        splassert(IPL_BIO);
                    271:
                    272:        KASSERT(size > 0);
                    273:
                    274:        size = round_page(size);
                    275:        qs = size;
                    276:        queue = size2cqueue(&qs);
                    277:        npages = btoc(qs);
                    278:
                    279:        if (numbufpages + npages > bufpages)
                    280:                return (NULL);
                    281:
                    282:        bp = pool_get(&bufpool, PR_WAITOK);
                    283:
                    284:        data = (void *)uvm_km_alloc(buf_map, qs);
                    285:        if (data == NULL) {
                    286:                pool_put(&bufpool, bp);
                    287:                return (NULL);
                    288:        }
                    289:        buf_init(bp, qs);
                    290:        bp->b_flags = B_INVAL;
                    291:        bp->b_bufsize = qs;
                    292:        bp->b_data = data;
                    293:        dp = &bufqueues[queue];
                    294:        binsheadfree(bp, dp);
                    295:        binshash(bp, &invalhash);
                    296:        LIST_INSERT_HEAD(&bufhead, bp, b_list);
                    297:
                    298:        return (bp);
                    299: }
                    300:
                    301: void
                    302: buf_put(struct buf *bp)
                    303: {
                    304:        splassert(IPL_BIO);
                    305: #ifdef DIAGNOSTIC
                    306:        if (bp->b_data != NULL)
                    307:                KASSERT(bp->b_bufsize > 0);
                    308: #endif
                    309: #ifdef QUEUE_MACRO_DEBUG
                    310:        if (bp->b_freelist.tqe_next != NOLIST &&
                    311:            bp->b_freelist.tqe_next != (void *)-1)
                    312:                panic("buf_put: still on the free list");
                    313:
                    314:        if (bp->b_vnbufs.le_next != NOLIST &&
                    315:            bp->b_vnbufs.le_next != (void *)-1)
                    316:                panic("buf_put: still on the vnode list");
                    317: #endif
                    318: #ifdef DIAGNOSTIC
                    319:        if (!LIST_EMPTY(&bp->b_dep))
                    320:                panic("buf_put: b_dep is not empty");
                    321: #endif
                    322:        LIST_REMOVE(bp, b_list);
                    323:
                    324:        if (bp->b_data != NULL) {
                    325:                bremhash(bp);
                    326:                numbufpages -= btoc(bp->b_bufsize);
                    327:                uvm_km_free(buf_map, (vaddr_t)bp->b_data, bp->b_bufsize);
                    328:        }
                    329:
                    330:        pool_put(&bufpool, bp);
                    331: }
                    332:
                    333: /*
                    334:  * Initialize buffers and hash links for buffers.
                    335:  */
                    336: void
                    337: bufinit(void)
                    338: {
                    339:        vaddr_t minaddr, maxaddr;
                    340:        struct bqueues *dp;
                    341:
                    342:        pool_init(&bufpool, sizeof(struct buf), 0, 0, 0, "bufpl", NULL);
                    343:        pool_setipl(&bufpool, IPL_BIO);
                    344:        for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
                    345:                TAILQ_INIT(dp);
                    346:        minaddr = vm_map_min(kernel_map);
                    347:        buf_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
                    348:            ptoa(bufpages), 0, FALSE, NULL);
                    349:
                    350:        /*
                    351:         * XXX don't starve any one queue below 5% of the total number
                    352:         * of buffer cache pages.
                    353:         */
                    354:        bqpagelow = bufpages / 20;
                    355:
                    356:        bufhashtbl = hashinit(bufpages / 4, M_CACHE, M_WAITOK, &bufhash);
                    357:        hidirtypages = (bufpages / 4) * 3;
                    358:        lodirtypages = bufpages / 2;
                    359:
                    360:        /*
                    361:         * Reserve 5% of bufpages for syncer's needs,
                    362:         * but not more than 25% and if possible
                    363:         * not less than 2 * MAXBSIZE. locleanpages
                    364:         * value must be not too small
                    365:         */
                    366:        hicleanpages = bufpages / 2;
                    367:        locleanpages = hicleanpages / 2;
                    368:        if (locleanpages < btoc(2 * MAXBSIZE))
                    369:                locleanpages = btoc(2 * MAXBSIZE);
                    370:        if (locleanpages > bufpages / 4)
                    371:                locleanpages = bufpages / 4;
                    372:
                    373:        maxcleanpages = locleanpages;
                    374: }
                    375:
                    376: struct buf *
                    377: bio_doread(struct vnode *vp, daddr64_t blkno, int size, int async)
                    378: {
                    379:        struct buf *bp;
                    380:
                    381:        bp = getblk(vp, blkno, size, 0, 0);
                    382:
                    383:        /*
                    384:         * If buffer does not have valid data, start a read.
                    385:         * Note that if buffer is B_INVAL, getblk() won't return it.
                    386:         * Therefore, it's valid if its I/O has completed or been delayed.
                    387:         */
                    388:        if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) {
                    389:                SET(bp->b_flags, B_READ | async);
                    390:                VOP_STRATEGY(bp);
                    391:
                    392:                /* Pay for the read. */
                    393:                curproc->p_stats->p_ru.ru_inblock++;            /* XXX */
                    394:        } else if (async) {
                    395:                brelse(bp);
                    396:        }
                    397:
                    398:        return (bp);
                    399: }
                    400:
                    401: /*
                    402:  * Read a disk block.
                    403:  * This algorithm described in Bach (p.54).
                    404:  */
                    405: int
                    406: bread(struct vnode *vp, daddr64_t blkno, int size, struct ucred *cred,
                    407:     struct buf **bpp)
                    408: {
                    409:        struct buf *bp;
                    410:
                    411:        /* Get buffer for block. */
                    412:        bp = *bpp = bio_doread(vp, blkno, size, 0);
                    413:
                    414:        /* Wait for the read to complete, and return result. */
                    415:        return (biowait(bp));
                    416: }
                    417:
                    418: /*
                    419:  * Read-ahead multiple disk blocks. The first is sync, the rest async.
                    420:  * Trivial modification to the breada algorithm presented in Bach (p.55).
                    421:  */
                    422: int
                    423: breadn(struct vnode *vp, daddr64_t blkno, int size, daddr64_t rablks[],
                    424:     int rasizes[], int nrablks, struct ucred *cred, struct buf **bpp)
                    425: {
                    426:        struct buf *bp;
                    427:        int i;
                    428:
                    429:        bp = *bpp = bio_doread(vp, blkno, size, 0);
                    430:
                    431:        /*
                    432:         * For each of the read-ahead blocks, start a read, if necessary.
                    433:         */
                    434:        for (i = 0; i < nrablks; i++) {
                    435:                /* If it's in the cache, just go on to next one. */
                    436:                if (incore(vp, rablks[i]))
                    437:                        continue;
                    438:
                    439:                /* Get a buffer for the read-ahead block */
                    440:                (void) bio_doread(vp, rablks[i], rasizes[i], B_ASYNC);
                    441:        }
                    442:
                    443:        /* Otherwise, we had to start a read for it; wait until it's valid. */
                    444:        return (biowait(bp));
                    445: }
                    446:
                    447: /*
                    448:  * Called from interrupt context.
                    449:  */
                    450: void
                    451: bread_cluster_callback(struct buf *bp)
                    452: {
                    453:        int i;
                    454:        struct buf **xbpp;
                    455:
                    456:        xbpp = (struct buf **)bp->b_saveaddr;
                    457:
                    458:        for (i = 0; xbpp[i] != 0; i++) {
                    459:                if (ISSET(bp->b_flags, B_ERROR))
                    460:                        SET(xbpp[i]->b_flags, B_INVAL | B_ERROR);
                    461:                biodone(xbpp[i]);
                    462:        }
                    463:
                    464:        free(xbpp, M_TEMP);
                    465:        bp->b_data = NULL;
                    466:        buf_put(bp);
                    467: }
                    468:
                    469: int
                    470: bread_cluster(struct vnode *vp, daddr64_t blkno, int size, struct buf **rbpp)
                    471: {
                    472:        struct buf *bp, **xbpp;
                    473:        int howmany, i, maxra, inc;
                    474:        daddr64_t sblkno;
                    475:        size_t spill;
                    476:
                    477:        *rbpp = bio_doread(vp, blkno, size, 0);
                    478:
                    479:        if (size != round_page(size))
                    480:                return (biowait(*rbpp));
                    481:
                    482:        if (VOP_BMAP(vp, blkno + 1, NULL, &sblkno, &maxra))
                    483:                return (biowait(*rbpp));
                    484:
                    485:        maxra++;
                    486:        if (sblkno == -1 || maxra < 2)
                    487:                return (biowait(*rbpp));
                    488:
                    489:        howmany = MAXPHYS / size;
                    490:        if (howmany > maxra)
                    491:                howmany = maxra;
                    492:
                    493:        xbpp = malloc((howmany + 1) * sizeof(struct buf *), M_TEMP, M_NOWAIT);
                    494:        if (xbpp == NULL)
                    495:                return (biowait(*rbpp));
                    496:
                    497:        for (i = 0; i < howmany; i++) {
                    498:                if (incore(vp, blkno + i + 1)) {
                    499:                        for (--i; i >= 0; i--) {
                    500:                                SET(xbpp[i]->b_flags, B_INVAL);
                    501:                                brelse(xbpp[i]);
                    502:                        }
                    503:                        free(xbpp, M_TEMP);
                    504:                        return (biowait(*rbpp));
                    505:                }
                    506:                xbpp[i] = buf_stub(vp, blkno + i + 1);
                    507:                if (xbpp[i] == NULL) {
                    508:                        for (--i; i >= 0; i--) {
                    509:                                SET(xbpp[i]->b_flags, B_INVAL);
                    510:                                brelse(xbpp[i]);
                    511:                        }
                    512:                        free(xbpp, M_TEMP);
                    513:                        return (biowait(*rbpp));
                    514:                }
                    515:        }
                    516:
                    517:        xbpp[howmany] = 0;
                    518:
                    519:        bp = getnewbuf(howmany * size, 0, 0, NULL);
                    520:        if (bp == NULL) {
                    521:                for (i = 0; i < howmany; i++) {
                    522:                        SET(xbpp[i]->b_flags, B_INVAL);
                    523:                        brelse(xbpp[i]);
                    524:                }
                    525:                free(xbpp, M_TEMP);
                    526:                return (biowait(*rbpp));
                    527:        }
                    528:
                    529:        inc = btodb(size);
                    530:
                    531:        for (i = 0; i < howmany; i++) {
                    532:                SET(xbpp[i]->b_flags, B_READ | B_ASYNC);
                    533:                binshash(xbpp[i], BUFHASH(vp, xbpp[i]->b_lblkno));
                    534:                xbpp[i]->b_blkno = sblkno + (i * inc);
                    535:                xbpp[i]->b_bufsize = xbpp[i]->b_bcount = size;
                    536:                xbpp[i]->b_data = bp->b_data + (i * size);
                    537:        }
                    538:
                    539:        bp->b_blkno = sblkno;
                    540:        bp->b_lblkno = blkno + 1;
                    541:        SET(bp->b_flags, B_READ | B_ASYNC | B_CALL);
                    542:        bp->b_saveaddr = (void *)xbpp;
                    543:        bp->b_iodone = bread_cluster_callback;
                    544:        bp->b_vp = vp;
                    545:        spill = bp->b_bufsize - bp->b_bcount;
                    546:        if (spill) {
                    547:                uvm_km_free(buf_map, (vaddr_t) bp->b_data + bp->b_bcount,
                    548:                    spill);
                    549:                numbufpages -= atop(spill);
                    550:        }
                    551:        VOP_STRATEGY(bp);
                    552:        curproc->p_stats->p_ru.ru_inblock++;
                    553:
                    554:        return (biowait(*rbpp));
                    555: }
                    556:
                    557: /*
                    558:  * Block write.  Described in Bach (p.56)
                    559:  */
                    560: int
                    561: bwrite(struct buf *bp)
                    562: {
                    563:        int rv, async, wasdelayed, s;
                    564:        struct vnode *vp;
                    565:        struct mount *mp;
                    566:
                    567:        vp = bp->b_vp;
                    568:        if (vp != NULL)
                    569:                mp = vp->v_type == VBLK? vp->v_specmountpoint : vp->v_mount;
                    570:        else
                    571:                mp = NULL;
                    572:
                    573:        /*
                    574:         * Remember buffer type, to switch on it later.  If the write was
                    575:         * synchronous, but the file system was mounted with MNT_ASYNC,
                    576:         * convert it to a delayed write.
                    577:         * XXX note that this relies on delayed tape writes being converted
                    578:         * to async, not sync writes (which is safe, but ugly).
                    579:         */
                    580:        async = ISSET(bp->b_flags, B_ASYNC);
                    581:        if (!async && mp && ISSET(mp->mnt_flag, MNT_ASYNC)) {
                    582:                bdwrite(bp);
                    583:                return (0);
                    584:        }
                    585:
                    586:        /*
                    587:         * Collect statistics on synchronous and asynchronous writes.
                    588:         * Writes to block devices are charged to their associated
                    589:         * filesystem (if any).
                    590:         */
                    591:        if (mp != NULL) {
                    592:                if (async)
                    593:                        mp->mnt_stat.f_asyncwrites++;
                    594:                else
                    595:                        mp->mnt_stat.f_syncwrites++;
                    596:        }
                    597:
                    598:        wasdelayed = ISSET(bp->b_flags, B_DELWRI);
                    599:        CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI));
                    600:
                    601:        s = splbio();
                    602:
                    603:        /*
                    604:         * If not synchronous, pay for the I/O operation and make
                    605:         * sure the buf is on the correct vnode queue.  We have
                    606:         * to do this now, because if we don't, the vnode may not
                    607:         * be properly notified that its I/O has completed.
                    608:         */
                    609:        if (wasdelayed) {
                    610:                reassignbuf(bp);
                    611:        } else
                    612:                curproc->p_stats->p_ru.ru_oublock++;
                    613:
                    614:
                    615:        /* Initiate disk write.  Make sure the appropriate party is charged. */
                    616:        bp->b_vp->v_numoutput++;
                    617:        splx(s);
                    618:        SET(bp->b_flags, B_WRITEINPROG);
                    619:        VOP_STRATEGY(bp);
                    620:
                    621:        if (async)
                    622:                return (0);
                    623:
                    624:        /*
                    625:         * If I/O was synchronous, wait for it to complete.
                    626:         */
                    627:        rv = biowait(bp);
                    628:
                    629:        /* Release the buffer. */
                    630:        brelse(bp);
                    631:
                    632:        return (rv);
                    633: }
                    634:
                    635:
                    636: /*
                    637:  * Delayed write.
                    638:  *
                    639:  * The buffer is marked dirty, but is not queued for I/O.
                    640:  * This routine should be used when the buffer is expected
                    641:  * to be modified again soon, typically a small write that
                    642:  * partially fills a buffer.
                    643:  *
                    644:  * NB: magnetic tapes cannot be delayed; they must be
                    645:  * written in the order that the writes are requested.
                    646:  *
                    647:  * Described in Leffler, et al. (pp. 208-213).
                    648:  */
                    649: void
                    650: bdwrite(struct buf *bp)
                    651: {
                    652:        int s;
                    653:
                    654:        /*
                    655:         * If the block hasn't been seen before:
                    656:         *      (1) Mark it as having been seen,
                    657:         *      (2) Charge for the write.
                    658:         *      (3) Make sure it's on its vnode's correct block list,
                    659:         *      (4) If a buffer is rewritten, move it to end of dirty list
                    660:         */
                    661:        if (!ISSET(bp->b_flags, B_DELWRI)) {
                    662:                SET(bp->b_flags, B_DELWRI);
                    663:                bp->b_synctime = time_uptime + 35;
                    664:                s = splbio();
                    665:                reassignbuf(bp);
                    666:                splx(s);
                    667:                curproc->p_stats->p_ru.ru_oublock++;    /* XXX */
                    668:        } else {
                    669:                /*
                    670:                 * see if this buffer has slacked through the syncer
                    671:                 * and enforce an async write upon it.
                    672:                 */
                    673:                if (bp->b_synctime < time_uptime) {
                    674:                        bawrite(bp);
                    675:                        return;
                    676:                }
                    677:        }
                    678:
                    679:        /* If this is a tape block, write the block now. */
                    680:        if (major(bp->b_dev) < nblkdev &&
                    681:            bdevsw[major(bp->b_dev)].d_type == D_TAPE) {
                    682:                bawrite(bp);
                    683:                return;
                    684:        }
                    685:
                    686:        /* Otherwise, the "write" is done, so mark and release the buffer. */
                    687:        CLR(bp->b_flags, B_NEEDCOMMIT);
                    688:        SET(bp->b_flags, B_DONE);
                    689:        brelse(bp);
                    690: }
                    691:
                    692: /*
                    693:  * Asynchronous block write; just an asynchronous bwrite().
                    694:  */
                    695: void
                    696: bawrite(struct buf *bp)
                    697: {
                    698:
                    699:        SET(bp->b_flags, B_ASYNC);
                    700:        VOP_BWRITE(bp);
                    701: }
                    702:
                    703: /*
                    704:  * Must be called at splbio()
                    705:  */
                    706: void
                    707: buf_dirty(struct buf *bp)
                    708: {
                    709:        splassert(IPL_BIO);
                    710:
                    711:        if (ISSET(bp->b_flags, B_DELWRI) == 0) {
                    712:                SET(bp->b_flags, B_DELWRI);
                    713:                bp->b_synctime = time_uptime + 35;
                    714:                reassignbuf(bp);
                    715:        }
                    716: }
                    717:
                    718: /*
                    719:  * Must be called at splbio()
                    720:  */
                    721: void
                    722: buf_undirty(struct buf *bp)
                    723: {
                    724:        splassert(IPL_BIO);
                    725:
                    726:        if (ISSET(bp->b_flags, B_DELWRI)) {
                    727:                CLR(bp->b_flags, B_DELWRI);
                    728:                reassignbuf(bp);
                    729:        }
                    730: }
                    731:
                    732: /*
                    733:  * Release a buffer on to the free lists.
                    734:  * Described in Bach (p. 46).
                    735:  */
                    736: void
                    737: brelse(struct buf *bp)
                    738: {
                    739:        struct bqueues *bufq;
                    740:        int s;
                    741:
                    742:        /* Block disk interrupts. */
                    743:        s = splbio();
                    744:
                    745:        if (bp->b_data != NULL)
                    746:                KASSERT(bp->b_bufsize > 0);
                    747:
                    748:        /*
                    749:         * Determine which queue the buffer should be on, then put it there.
                    750:         */
                    751:
                    752:        /* If it's not cacheable, or an error, mark it invalid. */
                    753:        if (ISSET(bp->b_flags, (B_NOCACHE|B_ERROR)))
                    754:                SET(bp->b_flags, B_INVAL);
                    755:
                    756:        if (ISSET(bp->b_flags, B_INVAL)) {
                    757:                int queue, qs;
                    758:
                    759:                /*
                    760:                 * If the buffer is invalid, place it in the clean queue, so it
                    761:                 * can be reused.
                    762:                 */
                    763:                if (LIST_FIRST(&bp->b_dep) != NULL)
                    764:                        buf_deallocate(bp);
                    765:
                    766:                if (ISSET(bp->b_flags, B_DELWRI)) {
                    767:                        CLR(bp->b_flags, B_DELWRI);
                    768:                }
                    769:
                    770:                if (bp->b_vp)
                    771:                        brelvp(bp);
                    772:
                    773:                /*
                    774:                 * If the buffer has no associated data, place it back in the
                    775:                 * pool.
                    776:                 */
                    777:                if (bp->b_data == NULL) {
                    778:                        buf_put(bp);
                    779:                        splx(s);
                    780:                        return;
                    781:                }
                    782:
                    783:                qs = bp->b_bufsize;
                    784:                queue = size2cqueue(&qs);
                    785:                numcleanpages += btoc(bp->b_bufsize);
                    786:                bqpages[queue] += btoc(bp->b_bufsize);
                    787:                if (maxcleanpages < numcleanpages)
                    788:                        maxcleanpages = numcleanpages;
                    789:                binsheadfree(bp, &bufqueues[queue]);
                    790:        } else {
                    791:                /*
                    792:                 * It has valid data.  Put it on the end of the appropriate
                    793:                 * queue, so that it'll stick around for as long as possible.
                    794:                 */
                    795:                int queue, qs;
                    796:                numfreepages += btoc(bp->b_bufsize);
                    797:                qs = bp->b_bufsize;
                    798:                queue = size2cqueue(&qs);
                    799:
                    800:                if (!ISSET(bp->b_flags, B_DELWRI)) {
                    801:                        numcleanpages += btoc(bp->b_bufsize);
                    802:                        bqpages[queue] += btoc(bp->b_bufsize);
                    803:                        if (maxcleanpages < numcleanpages)
                    804:                                maxcleanpages = numcleanpages;
                    805:                        bufq = &bufqueues[queue];
                    806:                } else {
                    807:                        numdirtypages += btoc(bp->b_bufsize);
                    808:                        bufq = &bufqueues[BQ_DIRTY];
                    809:                }
                    810:                if (ISSET(bp->b_flags, B_AGE)) {
                    811:                        binsheadfree(bp, bufq);
                    812:                        bp->b_synctime = time_uptime + 30;
                    813:                } else {
                    814:                        binstailfree(bp, bufq);
                    815:                        bp->b_synctime = time_uptime + 300;
                    816:                }
                    817:        }
                    818:
                    819:        /* Unlock the buffer. */
                    820:        CLR(bp->b_flags, (B_AGE | B_ASYNC | B_BUSY | B_NOCACHE | B_DEFERRED));
                    821:
                    822:        /* Wake up any processes waiting for any buffer to become free. */
                    823:        if (needbuffer) {
                    824:                needbuffer--;
                    825:                wakeup_one(&needbuffer);
                    826:        }
                    827:
                    828:        /* Wake up any processes waiting for _this_ buffer to become free. */
                    829:        if (ISSET(bp->b_flags, B_WANTED)) {
                    830:                CLR(bp->b_flags, B_WANTED);
                    831:                wakeup(bp);
                    832:        }
                    833:
                    834:        splx(s);
                    835: }
                    836:
                    837: /*
                    838:  * Determine if a block is in the cache. Just look on what would be its hash
                    839:  * chain. If it's there, return a pointer to it, unless it's marked invalid.
                    840:  */
                    841: struct buf *
                    842: incore(struct vnode *vp, daddr64_t blkno)
                    843: {
                    844:        struct buf *bp;
                    845:
                    846:        /* Search hash chain */
                    847:        LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) {
                    848:                if (bp->b_lblkno == blkno && bp->b_vp == vp &&
                    849:                    !ISSET(bp->b_flags, B_INVAL))
                    850:                        return (bp);
                    851:        }
                    852:
                    853:        return (NULL);
                    854: }
                    855:
                    856: /*
                    857:  * Get a block of requested size that is associated with
                    858:  * a given vnode and block offset. If it is found in the
                    859:  * block cache, mark it as having been found, make it busy
                    860:  * and return it. Otherwise, return an empty block of the
                    861:  * correct size. It is up to the caller to ensure that the
                    862:  * cached blocks be of the correct size.
                    863:  */
                    864: struct buf *
                    865: getblk(struct vnode *vp, daddr64_t blkno, int size, int slpflag, int slptimeo)
                    866: {
                    867:        struct bufhashhdr *bh;
                    868:        struct buf *bp, *nb = NULL;
                    869:        int s, error;
                    870:
                    871:        /*
                    872:         * XXX
                    873:         * The following is an inlined version of 'incore()', but with
                    874:         * the 'invalid' test moved to after the 'busy' test.  It's
                    875:         * necessary because there are some cases in which the NFS
                    876:         * code sets B_INVAL prior to writing data to the server, but
                    877:         * in which the buffers actually contain valid data.  In this
                    878:         * case, we can't allow the system to allocate a new buffer for
                    879:         * the block until the write is finished.
                    880:         */
                    881:        bh = BUFHASH(vp, blkno);
                    882: start:
                    883:        LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) {
                    884:                if (bp->b_lblkno != blkno || bp->b_vp != vp)
                    885:                        continue;
                    886:
                    887:                s = splbio();
                    888:                if (ISSET(bp->b_flags, B_BUSY)) {
                    889:                        if (nb != NULL) {
                    890:                                SET(nb->b_flags, B_INVAL);
                    891:                                binshash(nb, &invalhash);
                    892:                                brelse(nb);
                    893:                                nb = NULL;
                    894:                        }
                    895:                        SET(bp->b_flags, B_WANTED);
                    896:                        error = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
                    897:                            slptimeo);
                    898:                        splx(s);
                    899:                        if (error)
                    900:                                return (NULL);
                    901:                        goto start;
                    902:                }
                    903:
                    904:                if (!ISSET(bp->b_flags, B_INVAL)) {
                    905:                        SET(bp->b_flags, (B_BUSY | B_CACHE));
                    906:                        bremfree(bp);
                    907:                        splx(s);
                    908:                        break;
                    909:                }
                    910:                splx(s);
                    911:        }
                    912:        if (nb && bp) {
                    913:                SET(nb->b_flags, B_INVAL);
                    914:                binshash(nb, &invalhash);
                    915:                brelse(nb);
                    916:                nb = NULL;
                    917:        }
                    918:        if (bp == NULL && nb == NULL) {
                    919:                nb = getnewbuf(size, slpflag, slptimeo, &error);
                    920:                if (nb == NULL) {
                    921:                        if (error == ERESTART || error == EINTR)
                    922:                                return (NULL);
                    923:                }
                    924:                goto start;
                    925:        }
                    926:        if (nb) {
                    927:                bp = nb;
                    928:                binshash(bp, bh);
                    929:                bp->b_blkno = bp->b_lblkno = blkno;
                    930:                s = splbio();
                    931:                bgetvp(vp, bp);
                    932:                splx(s);
                    933:        }
                    934:        return (bp);
                    935: }
                    936:
                    937: /*
                    938:  * Get an empty, disassociated buffer of given size.
                    939:  */
                    940: struct buf *
                    941: geteblk(int size)
                    942: {
                    943:        struct buf *bp;
                    944:
                    945:        while ((bp = getnewbuf(size, 0, 0, NULL)) == NULL)
                    946:                ;
                    947:        SET(bp->b_flags, B_INVAL);
                    948:        binshash(bp, &invalhash);
                    949:
                    950:        return (bp);
                    951: }
                    952:
                    953: /*
                    954:  * Find a buffer which is available for use.
                    955:  */
                    956: struct buf *
                    957: getnewbuf(size_t size, int slpflag, int slptimeo, int *ep)
                    958: {
                    959:        struct buf *bp;
                    960:        int s, error, queue, qs;
                    961:
                    962: #if 0          /* we would really like this but sblock update kills it */
                    963:        KASSERT(curproc != syncerproc && curproc != cleanerproc);
                    964: #endif
                    965:
                    966:        s = splbio();
                    967:        /*
                    968:         * Wake up cleaner if we're getting low on pages.
                    969:         */
                    970:        if (numdirtypages >= hidirtypages || numcleanpages <= locleanpages)
                    971:                wakeup(&bd_req);
                    972:
                    973:        /* we just ask. it can say no.. */
                    974: getsome:
                    975:        qs = size;
                    976:        queue = size2cqueue(&qs);
                    977:        bp = buf_get(qs); /* XXX use qs instead and no need in buf_get? */
                    978:        if (bp == NULL) {
                    979:                /*
                    980:                 * No free ones, try to reuse a clean one of the same or
                    981:                 * larger size.
                    982:                 */
                    983:                do {
                    984:                        bp = TAILQ_FIRST(&bufqueues[queue]);
                    985:                        queue++;
                    986:                } while (bp == NULL && queue < BQUEUES);
                    987:        }
                    988:        if (bp == NULL) {
                    989:                /* we couldn't reuse a free one, nothing of the right size */
                    990:                /* XXX free 20 buffers per q - ugly hack  should really
                    991:                 * reuse big ones without truncating. fix later
                    992:                 */
                    993:                int q, gotsome = 0;
                    994:                int freemax = 20;
                    995:                for (q = 1; q < BQUEUES; q++) {
                    996:                        int i = freemax;
                    997:                        while (bqpages[q] > bqpagelow
                    998:                            && (bp = TAILQ_FIRST(&bufqueues[q]))
                    999:                            && i--) {
                   1000:                                gotsome++;
                   1001:                                bremfree(bp);
                   1002:                                if (LIST_FIRST(&bp->b_dep) != NULL)
                   1003:                                        buf_deallocate(bp);
                   1004:
                   1005:                                if (ISSET(bp->b_flags, B_DELWRI)) {
                   1006:                                        CLR(bp->b_flags, B_DELWRI);
                   1007:                                }
                   1008:
                   1009:                                if (bp->b_vp)
                   1010:                                        brelvp(bp);
                   1011:
                   1012:                                buf_put(bp);
                   1013:                        }
                   1014:                }
                   1015:                if (gotsome)
                   1016:                        goto getsome;
                   1017:        }
                   1018:        if (bp == NULL) {
                   1019:                /* wait for a free buffer of any kind */
                   1020:                needbuffer++;
                   1021:                error = tsleep(&needbuffer, slpflag | (PRIBIO + 1),
                   1022:                    "getnewbuf", slptimeo);
                   1023:                if (ep != NULL) {
                   1024:                        *ep = error;
                   1025:                        if (error) {
                   1026:                                splx(s);
                   1027:                                return (NULL);
                   1028:                        }
                   1029:                }
                   1030:                goto getsome;
                   1031:        }
                   1032:
                   1033:        bremfree(bp);
                   1034:        /* Buffer is no longer on free lists. */
                   1035:        SET(bp->b_flags, B_BUSY);
                   1036:
                   1037: #ifdef DIAGNOSTIC
                   1038:        if (ISSET(bp->b_flags, B_DELWRI))
                   1039:                panic("Dirty buffer on BQ_CLEAN");
                   1040: #endif
                   1041:
                   1042:        /* disassociate us from our vnode, if we had one... */
                   1043:        if (bp->b_vp)
                   1044:                brelvp(bp);
                   1045:
                   1046:        splx(s);
                   1047:
                   1048: #ifdef DIAGNOSTIC
                   1049:        /* CLEAN buffers must have no dependencies */
                   1050:        if (LIST_FIRST(&bp->b_dep) != NULL)
                   1051:                panic("BQ_CLEAN has buffer with dependencies");
                   1052: #endif
                   1053:
                   1054:        /* clear out various other fields */
                   1055:        bp->b_flags = B_BUSY;
                   1056:        bp->b_dev = NODEV;
                   1057:        bp->b_blkno = bp->b_lblkno = 0;
                   1058:        bp->b_iodone = NULL;
                   1059:        bp->b_error = 0;
                   1060:        bp->b_resid = 0;
                   1061:        bp->b_bcount = size;
                   1062:        bp->b_dirtyoff = bp->b_dirtyend = 0;
                   1063:        bp->b_validoff = bp->b_validend = 0;
                   1064:
                   1065:        bremhash(bp);
                   1066:        return (bp);
                   1067: }
                   1068:
                   1069: /*
                   1070:  * Buffer cleaning daemon.
                   1071:  */
                   1072: void
                   1073: buf_daemon(struct proc *p)
                   1074: {
                   1075:        struct timeval starttime, timediff;
                   1076:        struct buf *bp;
                   1077:        int s;
                   1078:
                   1079:        cleanerproc = curproc;
                   1080:
                   1081:        s = splbio();
                   1082:        for (;;) {
                   1083:                if (!numdirtypages ||
                   1084:                    (numdirtypages < hidirtypages && !needbuffer))
                   1085:                        tsleep(&bd_req, PRIBIO - 7, "cleaner", 0);
                   1086:
                   1087:                getmicrouptime(&starttime);
                   1088:
                   1089:                while ((bp = TAILQ_FIRST(&bufqueues[BQ_DIRTY]))) {
                   1090:                        struct timeval tv;
                   1091:
                   1092:                        if (numdirtypages < lodirtypages && !needbuffer)
                   1093:                                break;
                   1094:
                   1095:                        bremfree(bp);
                   1096:                        SET(bp->b_flags, B_BUSY);
                   1097:                        splx(s);
                   1098:
                   1099:                        if (ISSET(bp->b_flags, B_INVAL)) {
                   1100:                                brelse(bp);
                   1101:                                s = splbio();
                   1102:                                continue;
                   1103:                        }
                   1104: #ifdef DIAGNOSTIC
                   1105:                        if (!ISSET(bp->b_flags, B_DELWRI))
                   1106:                                panic("Clean buffer on BQ_DIRTY");
                   1107: #endif
                   1108:                        if (LIST_FIRST(&bp->b_dep) != NULL &&
                   1109:                            !ISSET(bp->b_flags, B_DEFERRED) &&
                   1110:                            buf_countdeps(bp, 0, 0)) {
                   1111:                                SET(bp->b_flags, B_DEFERRED);
                   1112:                                s = splbio();
                   1113:                                numfreepages += btoc(bp->b_bufsize);
                   1114:                                numdirtypages += btoc(bp->b_bufsize);
                   1115:                                binstailfree(bp, &bufqueues[BQ_DIRTY]);
                   1116:                                CLR(bp->b_flags, B_BUSY);
                   1117:                                continue;
                   1118:                        }
                   1119:
                   1120:                        bawrite(bp);
                   1121:
                   1122:                        /* Never allow processing to run for more than 1 sec */
                   1123:                        getmicrouptime(&tv);
                   1124:                        timersub(&tv, &starttime, &timediff);
                   1125:                        if (timediff.tv_sec)
                   1126:                                break;
                   1127:
                   1128:                        s = splbio();
                   1129:                }
                   1130:        }
                   1131: }
                   1132:
                   1133: /*
                   1134:  * Wait for operations on the buffer to complete.
                   1135:  * When they do, extract and return the I/O's error value.
                   1136:  */
                   1137: int
                   1138: biowait(struct buf *bp)
                   1139: {
                   1140:        int s;
                   1141:
                   1142:        s = splbio();
                   1143:        while (!ISSET(bp->b_flags, B_DONE))
                   1144:                tsleep(bp, PRIBIO + 1, "biowait", 0);
                   1145:        splx(s);
                   1146:
                   1147:        /* check for interruption of I/O (e.g. via NFS), then errors. */
                   1148:        if (ISSET(bp->b_flags, B_EINTR)) {
                   1149:                CLR(bp->b_flags, B_EINTR);
                   1150:                return (EINTR);
                   1151:        }
                   1152:
                   1153:        if (ISSET(bp->b_flags, B_ERROR))
                   1154:                return (bp->b_error ? bp->b_error : EIO);
                   1155:        else
                   1156:                return (0);
                   1157: }
                   1158:
                   1159: /*
                   1160:  * Mark I/O complete on a buffer.
                   1161:  *
                   1162:  * If a callback has been requested, e.g. the pageout
                   1163:  * daemon, do so. Otherwise, awaken waiting processes.
                   1164:  *
                   1165:  * [ Leffler, et al., says on p.247:
                   1166:  *     "This routine wakes up the blocked process, frees the buffer
                   1167:  *     for an asynchronous write, or, for a request by the pagedaemon
                   1168:  *     process, invokes a procedure specified in the buffer structure" ]
                   1169:  *
                   1170:  * In real life, the pagedaemon (or other system processes) wants
                   1171:  * to do async stuff to, and doesn't want the buffer brelse()'d.
                   1172:  * (for swap pager, that puts swap buffers on the free lists (!!!),
                   1173:  * for the vn device, that puts malloc'd buffers on the free lists!)
                   1174:  *
                   1175:  * Must be called at splbio().
                   1176:  */
                   1177: void
                   1178: biodone(struct buf *bp)
                   1179: {
                   1180:        splassert(IPL_BIO);
                   1181:
                   1182:        if (ISSET(bp->b_flags, B_DONE))
                   1183:                panic("biodone already");
                   1184:        SET(bp->b_flags, B_DONE);               /* note that it's done */
                   1185:
                   1186:        if (LIST_FIRST(&bp->b_dep) != NULL)
                   1187:                buf_complete(bp);
                   1188:
                   1189:        if (!ISSET(bp->b_flags, B_READ)) {
                   1190:                CLR(bp->b_flags, B_WRITEINPROG);
                   1191:                vwakeup(bp->b_vp);
                   1192:        }
                   1193:
                   1194:        if (ISSET(bp->b_flags, B_CALL)) {       /* if necessary, call out */
                   1195:                CLR(bp->b_flags, B_CALL);       /* but note callout done */
                   1196:                (*bp->b_iodone)(bp);
                   1197:        } else {
                   1198:                if (ISSET(bp->b_flags, B_ASYNC)) {/* if async, release it */
                   1199:                        brelse(bp);
                   1200:                } else {                        /* or just wakeup the buffer */
                   1201:                        CLR(bp->b_flags, B_WANTED);
                   1202:                        wakeup(bp);
                   1203:                }
                   1204:        }
                   1205: }
                   1206:
                   1207: #if 1
                   1208: void
                   1209: vfs_bufstats(void) {
                   1210:        return;
                   1211: }
                   1212: /* #ifdef DDB */
                   1213: #else
                   1214: /*
                   1215:  * Print out statistics on the current allocation of the buffer pool.
                   1216:  * Can be enabled to print out on every ``sync'' by setting "syncprt"
                   1217:  * in vfs_syscalls.c using sysctl.
                   1218:  */
                   1219: void
                   1220: vfs_bufstats(void)
                   1221: {
                   1222:        int s, i, j, count;
                   1223:        struct buf *bp;
                   1224:        struct bqueues *dp;
                   1225:        int counts[MAXBSIZE/PAGE_SIZE+1];
                   1226:        int totals[BQUEUES];
                   1227:        long ptotals[BQUEUES];
                   1228:        long pages;
                   1229:        static char *bname[BQUEUES] = { "CLEAN", "DIRTY", "EMPTY" };
                   1230:
                   1231:        s = splbio();
                   1232:        for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
                   1233:                count = 0;
                   1234:                pages = 0;
                   1235:                for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++)
                   1236:                        counts[j] = 0;
                   1237:                TAILQ_FOREACH(bp, dp, b_freelist) {
                   1238:                        counts[bp->b_bufsize/PAGE_SIZE]++;
                   1239:                        count++;
                   1240:                        pages += btoc(bp->b_bufsize);
                   1241:                }
                   1242:                totals[i] = count;
                   1243:                ptotals[i] = pages;
                   1244:                printf("%s: total-%d(%d pages)", bname[i], count, pages);
                   1245:                for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++)
                   1246:                        if (counts[j] != 0)
                   1247:                                printf(", %d-%d", j * PAGE_SIZE, counts[j]);
                   1248:                printf("\n");
                   1249:        }
                   1250:        if ((ptotals[BQ_CLEAN] + ptotals[BQ_DIRTY]) != numfreepages)
                   1251:                printf("numfreepages counter wrong: %ld != %ld\n",
                   1252:                    numfreepages, ptotals[BQ_CLEAN] + ptotals[BQ_DIRTY]);
                   1253:        if (ptotals[BQ_CLEAN] != numcleanpages)
                   1254:                printf("numcleanpages counter wrong: %ld != %ld\n",
                   1255:                    numcleanpages, ptotals[<BQ_CLEAN]);
                   1256:        else
                   1257:                printf("numcleanpages: %ld\n", numcleanpages);
                   1258:        if (numdirtypages != ptotals[BQ_DIRTY])
                   1259:                printf("numdirtypages counter wrong: %ld != %ld\n",
                   1260:                    numdirtypages, ptotals[BQ_DIRTY]);
                   1261:        else
                   1262:                printf("numdirtypages: %ld\n", numdirtypages);
                   1263:
                   1264:        printf("syncer eating up to %ld pages from %ld reserved\n",
                   1265:            maxcleanpages - hicleanpages, locleanpages);
                   1266:        splx(s);
                   1267: }
                   1268: #endif /* DEBUG */

CVSweb