[BACK]Return to ffs_alloc.c CVS log [TXT][DIR] Up to [local] / sys / ufs / ffs

Annotation of sys/ufs/ffs/ffs_alloc.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: ffs_alloc.c,v 1.78 2007/06/22 13:59:12 thib Exp $     */
                      2: /*     $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $   */
                      3:
                      4: /*
                      5:  * Copyright (c) 2002 Networks Associates Technology, Inc.
                      6:  * All rights reserved.
                      7:  *
                      8:  * This software was developed for the FreeBSD Project by Marshall
                      9:  * Kirk McKusick and Network Associates Laboratories, the Security
                     10:  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
                     11:  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
                     12:  * research program.
                     13:  *
                     14:  * Copyright (c) 1982, 1986, 1989, 1993
                     15:  *     The Regents of the University of California.  All rights reserved.
                     16:  *
                     17:  * Redistribution and use in source and binary forms, with or without
                     18:  * modification, are permitted provided that the following conditions
                     19:  * are met:
                     20:  * 1. Redistributions of source code must retain the above copyright
                     21:  *    notice, this list of conditions and the following disclaimer.
                     22:  * 2. Redistributions in binary form must reproduce the above copyright
                     23:  *    notice, this list of conditions and the following disclaimer in the
                     24:  *    documentation and/or other materials provided with the distribution.
                     25:  * 3. Neither the name of the University nor the names of its contributors
                     26:  *    may be used to endorse or promote products derived from this software
                     27:  *    without specific prior written permission.
                     28:  *
                     29:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     30:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     31:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     32:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     33:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     34:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     35:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     36:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     37:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     38:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     39:  * SUCH DAMAGE.
                     40:  *
                     41:  *     @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
                     42:  */
                     43:
                     44: #include <sys/param.h>
                     45: #include <sys/systm.h>
                     46: #include <sys/buf.h>
                     47: #include <sys/proc.h>
                     48: #include <sys/vnode.h>
                     49: #include <sys/mount.h>
                     50: #include <sys/kernel.h>
                     51: #include <sys/syslog.h>
                     52: #include <sys/stdint.h>
                     53:
                     54: #include <uvm/uvm_extern.h>
                     55:
                     56: #include <dev/rndvar.h>
                     57:
                     58: #include <ufs/ufs/quota.h>
                     59: #include <ufs/ufs/inode.h>
                     60: #include <ufs/ufs/ufsmount.h>
                     61: #include <ufs/ufs/ufs_extern.h>
                     62:
                     63: #include <ufs/ffs/fs.h>
                     64: #include <ufs/ffs/ffs_extern.h>
                     65:
                     66: #define ffs_fserr(fs, uid, cp) do {                            \
                     67:        log(LOG_ERR, "uid %u on %s: %s\n", (uid),               \
                     68:            (fs)->fs_fsmnt, (cp));                              \
                     69: } while (0)
                     70:
                     71: daddr_t        ffs_alloccg(struct inode *, int, daddr_t, int);
                     72: daddr_t        ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
                     73: daddr_t        ffs_clusteralloc(struct inode *, int, daddr_t, int);
                     74: ino_t  ffs_dirpref(struct inode *);
                     75: daddr_t        ffs_fragextend(struct inode *, int, long, int, int);
                     76: u_long ffs_hashalloc(struct inode *, int, long, int,
                     77:     daddr_t (*)(struct inode *, int, daddr_t, int));
                     78: daddr_t        ffs_nodealloccg(struct inode *, int, daddr_t, int);
                     79: daddr_t        ffs_mapsearch(struct fs *, struct cg *, daddr_t, int);
                     80:
                     81: int ffs1_reallocblks(void *);
                     82: #ifdef FFS2
                     83: int ffs2_reallocblks(void *);
                     84: #endif
                     85:
                     86: #ifdef DIAGNOSTIC
                     87: int      ffs_checkblk(struct inode *, daddr_t, long);
                     88: #endif
                     89:
                     90: /*
                     91:  * Allocate a block in the file system.
                     92:  *
                     93:  * The size of the requested block is given, which must be some
                     94:  * multiple of fs_fsize and <= fs_bsize.
                     95:  * A preference may be optionally specified. If a preference is given
                     96:  * the following hierarchy is used to allocate a block:
                     97:  *   1) allocate the requested block.
                     98:  *   2) allocate a rotationally optimal block in the same cylinder.
                     99:  *   3) allocate a block in the same cylinder group.
                    100:  *   4) quadratically rehash into other cylinder groups, until an
                    101:  *      available block is located.
                    102:  * If no block preference is given the following hierarchy is used
                    103:  * to allocate a block:
                    104:  *   1) allocate a block in the cylinder group that contains the
                    105:  *      inode for the file.
                    106:  *   2) quadratically rehash into other cylinder groups, until an
                    107:  *      available block is located.
                    108:  */
                    109: int
                    110: ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
                    111:     struct ucred *cred, daddr_t *bnp)
                    112: {
                    113:        struct fs *fs;
                    114:        daddr_t bno;
                    115:        int cg;
                    116:        int error;
                    117:
                    118:        *bnp = 0;
                    119:        fs = ip->i_fs;
                    120: #ifdef DIAGNOSTIC
                    121:        if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
                    122:                printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
                    123:                    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
                    124:                panic("ffs_alloc: bad size");
                    125:        }
                    126:        if (cred == NOCRED)
                    127:                panic("ffs_alloc: missing credential");
                    128: #endif /* DIAGNOSTIC */
                    129:        if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
                    130:                goto nospace;
                    131:        if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
                    132:                goto nospace;
                    133:
                    134:        if ((error = ufs_quota_alloc_blocks(ip, btodb(size), cred)) != 0)
                    135:                return (error);
                    136:
                    137:        /*
                    138:         * Start allocation in the preferred block's cylinder group or
                    139:         * the file's inode's cylinder group if no preferred block was
                    140:         * specified.
                    141:         */
                    142:        if (bpref >= fs->fs_size)
                    143:                bpref = 0;
                    144:        if (bpref == 0)
                    145:                cg = ino_to_cg(fs, ip->i_number);
                    146:        else
                    147:                cg = dtog(fs, bpref);
                    148:
                    149:        /* Try allocating a block. */
                    150:        bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, ffs_alloccg);
                    151:        if (bno > 0) {
                    152:                /* allocation successful, update inode data */
                    153:                DIP_ADD(ip, blocks, btodb(size));
                    154:                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                    155:                *bnp = bno;
                    156:                return (0);
                    157:        }
                    158:
                    159:        /* Restore user's disk quota because allocation failed. */
                    160:        (void) ufs_quota_free_blocks(ip, btodb(size), cred);
                    161:
                    162: nospace:
                    163:        ffs_fserr(fs, cred->cr_uid, "file system full");
                    164:        uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
                    165:        return (ENOSPC);
                    166: }
                    167:
                    168: /*
                    169:  * Reallocate a fragment to a bigger size
                    170:  *
                    171:  * The number and size of the old block is given, and a preference
                    172:  * and new size is also specified. The allocator attempts to extend
                    173:  * the original block. Failing that, the regular block allocator is
                    174:  * invoked to get an appropriate block.
                    175:  */
                    176: int
                    177: ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
                    178:     int nsize, struct ucred *cred, struct buf **bpp, daddr_t *blknop)
                    179: {
                    180:        struct fs *fs;
                    181:        struct buf *bp = NULL;
                    182:        daddr_t quota_updated = 0;
                    183:        int cg, request, error;
                    184:        daddr_t bprev, bno;
                    185:
                    186:        if (bpp != NULL)
                    187:                *bpp = NULL;
                    188:        fs = ip->i_fs;
                    189: #ifdef DIAGNOSTIC
                    190:        if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
                    191:            (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
                    192:                printf(
                    193:                    "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
                    194:                    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
                    195:                panic("ffs_realloccg: bad size");
                    196:        }
                    197:        if (cred == NOCRED)
                    198:                panic("ffs_realloccg: missing credential");
                    199: #endif /* DIAGNOSTIC */
                    200:        if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
                    201:                goto nospace;
                    202:
                    203:        bprev = DIP(ip, db[lbprev]);
                    204:
                    205:        if (bprev == 0) {
                    206:                printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
                    207:                    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
                    208:                panic("ffs_realloccg: bad bprev");
                    209:        }
                    210:
                    211:        /*
                    212:         * Allocate the extra space in the buffer.
                    213:         */
                    214:        if (bpp != NULL) {
                    215:                if ((error = bread(ITOV(ip), lbprev, fs->fs_bsize,
                    216:                    NOCRED, &bp)) != 0)
                    217:                        goto error;
                    218:                bp->b_bcount = osize;
                    219:        }
                    220:
                    221:        if ((error = ufs_quota_alloc_blocks(ip, btodb(nsize - osize), cred))
                    222:            != 0)
                    223:                goto error;
                    224:
                    225:        quota_updated = btodb(nsize - osize);
                    226:
                    227:        /*
                    228:         * Check for extension in the existing location.
                    229:         */
                    230:        cg = dtog(fs, bprev);
                    231:        if ((bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) != 0) {
                    232:                DIP_ADD(ip, blocks, btodb(nsize - osize));
                    233:                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                    234:                if (bpp != NULL) {
                    235:                        if (bp->b_blkno != fsbtodb(fs, bno))
                    236:                                panic("ffs_realloccg: bad blockno");
                    237: #ifdef DIAGNOSTIC
                    238:                        if (nsize > bp->b_bufsize)
                    239:                                panic("ffs_realloccg: small buf");
                    240: #endif
                    241:                        bp->b_bcount = nsize;
                    242:                        bp->b_flags |= B_DONE;
                    243:                        bzero(bp->b_data + osize, (u_int)nsize - osize);
                    244:                        *bpp = bp;
                    245:                }
                    246:                if (blknop != NULL) {
                    247:                        *blknop = bno;
                    248:                }
                    249:                return (0);
                    250:        }
                    251:        /*
                    252:         * Allocate a new disk location.
                    253:         */
                    254:        if (bpref >= fs->fs_size)
                    255:                bpref = 0;
                    256:        switch (fs->fs_optim) {
                    257:        case FS_OPTSPACE:
                    258:                /*
                    259:                 * Allocate an exact sized fragment. Although this makes
                    260:                 * best use of space, we will waste time relocating it if
                    261:                 * the file continues to grow. If the fragmentation is
                    262:                 * less than half of the minimum free reserve, we choose
                    263:                 * to begin optimizing for time.
                    264:                 */
                    265:                request = nsize;
                    266:                if (fs->fs_minfree < 5 ||
                    267:                    fs->fs_cstotal.cs_nffree >
                    268:                    fs->fs_dsize * fs->fs_minfree / (2 * 100))
                    269:                        break;
                    270:                fs->fs_optim = FS_OPTTIME;
                    271:                break;
                    272:        case FS_OPTTIME:
                    273:                /*
                    274:                 * At this point we have discovered a file that is trying to
                    275:                 * grow a small fragment to a larger fragment. To save time,
                    276:                 * we allocate a full sized block, then free the unused portion.
                    277:                 * If the file continues to grow, the `ffs_fragextend' call
                    278:                 * above will be able to grow it in place without further
                    279:                 * copying. If aberrant programs cause disk fragmentation to
                    280:                 * grow within 2% of the free reserve, we choose to begin
                    281:                 * optimizing for space.
                    282:                 */
                    283:                request = fs->fs_bsize;
                    284:                if (fs->fs_cstotal.cs_nffree <
                    285:                    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
                    286:                        break;
                    287:                fs->fs_optim = FS_OPTSPACE;
                    288:                break;
                    289:        default:
                    290:                printf("dev = 0x%x, optim = %d, fs = %s\n",
                    291:                    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
                    292:                panic("ffs_realloccg: bad optim");
                    293:                /* NOTREACHED */
                    294:        }
                    295:        bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
                    296:                                     ffs_alloccg);
                    297:        if (bno <= 0)
                    298:                goto nospace;
                    299:
                    300:        (void) uvm_vnp_uncache(ITOV(ip));
                    301:        if (!DOINGSOFTDEP(ITOV(ip)))
                    302:                ffs_blkfree(ip, bprev, (long)osize);
                    303:        if (nsize < request)
                    304:                ffs_blkfree(ip, bno + numfrags(fs, nsize),
                    305:                    (long)(request - nsize));
                    306:        DIP_ADD(ip, blocks, btodb(nsize - osize));
                    307:        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                    308:        if (bpp != NULL) {
                    309:                bp->b_blkno = fsbtodb(fs, bno);
                    310: #ifdef DIAGNOSTIC
                    311:                if (nsize > bp->b_bufsize)
                    312:                        panic("ffs_realloccg: small buf 2");
                    313: #endif
                    314:                bp->b_bcount = nsize;
                    315:                bp->b_flags |= B_DONE;
                    316:                bzero(bp->b_data + osize, (u_int)nsize - osize);
                    317:                *bpp = bp;
                    318:        }
                    319:        if (blknop != NULL) {
                    320:                *blknop = bno;
                    321:        }
                    322:        return (0);
                    323:
                    324: nospace:
                    325:        /*
                    326:         * no space available
                    327:         */
                    328:        ffs_fserr(fs, cred->cr_uid, "file system full");
                    329:        uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
                    330:        error = ENOSPC;
                    331:
                    332: error:
                    333:        if (bp != NULL) {
                    334:                brelse(bp);
                    335:                bp = NULL;
                    336:        }
                    337:
                    338:        /*
                    339:         * Restore user's disk quota because allocation failed.
                    340:         */
                    341:        if (quota_updated != 0)
                    342:                (void)ufs_quota_free_blocks(ip, quota_updated, cred);
                    343:
                    344:        return error;
                    345: }
                    346:
                    347: /*
                    348:  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
                    349:  *
                    350:  * The vnode and an array of buffer pointers for a range of sequential
                    351:  * logical blocks to be made contiguous are given. The allocator attempts
                    352:  * to find a range of sequential blocks starting as close as possible to
                    353:  * an fs_rotdelay offset from the end of the allocation for the logical
                    354:  * block immediately preceding the current range. If successful, the
                    355:  * physical block numbers in the buffer pointers and in the inode are
                    356:  * changed to reflect the new allocation. If unsuccessful, the allocation
                    357:  * is left unchanged. The success in doing the reallocation is returned.
                    358:  * Note that the error return is not reflected back to the user. Rather
                    359:  * the previous block allocation will be used.
                    360:  */
                    361:
                    362: int doasyncfree = 1;
                    363: int doreallocblks = 1;
                    364: int prtrealloc = 0;
                    365:
                    366: int
                    367: ffs1_reallocblks(void *v)
                    368: {
                    369:        struct vop_reallocblks_args *ap = v;
                    370:        struct fs *fs;
                    371:        struct inode *ip;
                    372:        struct vnode *vp;
                    373:        struct buf *sbp, *ebp;
                    374:        daddr_t *bap, *sbap, *ebap = NULL;
                    375:        struct cluster_save *buflist;
                    376:        daddr_t start_lbn, end_lbn, soff, newblk, blkno;
                    377:        struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
                    378:        int i, len, start_lvl, end_lvl, pref, ssize;
                    379:
                    380:        vp = ap->a_vp;
                    381:        ip = VTOI(vp);
                    382:        fs = ip->i_fs;
                    383:        if (fs->fs_contigsumsize <= 0)
                    384:                return (ENOSPC);
                    385:        buflist = ap->a_buflist;
                    386:        len = buflist->bs_nchildren;
                    387:        start_lbn = buflist->bs_children[0]->b_lblkno;
                    388:        end_lbn = start_lbn + len - 1;
                    389:
                    390: #ifdef DIAGNOSTIC
                    391:        for (i = 0; i < len; i++)
                    392:                if (!ffs_checkblk(ip,
                    393:                   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
                    394:                        panic("ffs1_reallocblks: unallocated block 1");
                    395:
                    396:        for (i = 1; i < len; i++)
                    397:                if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
                    398:                        panic("ffs1_reallocblks: non-logical cluster");
                    399:
                    400:        blkno = buflist->bs_children[0]->b_blkno;
                    401:        ssize = fsbtodb(fs, fs->fs_frag);
                    402:        for (i = 1; i < len - 1; i++)
                    403:                if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
                    404:                        panic("ffs1_reallocblks: non-physical cluster %d", i);
                    405: #endif
                    406:        /*
                    407:         * If the latest allocation is in a new cylinder group, assume that
                    408:         * the filesystem has decided to move and do not force it back to
                    409:         * the previous cylinder group.
                    410:         */
                    411:        if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
                    412:            dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
                    413:                return (ENOSPC);
                    414:        if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
                    415:            ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
                    416:                return (ENOSPC);
                    417:        /*
                    418:         * Get the starting offset and block map for the first block.
                    419:         */
                    420:        if (start_lvl == 0) {
                    421:                sbap = &ip->i_ffs1_db[0];
                    422:                soff = start_lbn;
                    423:        } else {
                    424:                idp = &start_ap[start_lvl - 1];
                    425:                if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
                    426:                        brelse(sbp);
                    427:                        return (ENOSPC);
                    428:                }
                    429:                sbap = (daddr_t *)sbp->b_data;
                    430:                soff = idp->in_off;
                    431:        }
                    432:        /*
                    433:         * Find the preferred location for the cluster.
                    434:         */
                    435:        pref = ffs1_blkpref(ip, start_lbn, soff, sbap);
                    436:        /*
                    437:         * If the block range spans two block maps, get the second map.
                    438:         */
                    439:        if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
                    440:                ssize = len;
                    441:        } else {
                    442: #ifdef DIAGNOSTIC
                    443:                if (start_lvl > 1 &&
                    444:                    start_ap[start_lvl-1].in_lbn == idp->in_lbn)
                    445:                        panic("ffs1_reallocblk: start == end");
                    446: #endif
                    447:                ssize = len - (idp->in_off + 1);
                    448:                if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
                    449:                        goto fail;
                    450:                ebap = (daddr_t *)ebp->b_data;
                    451:        }
                    452:        /*
                    453:         * Search the block map looking for an allocation of the desired size.
                    454:         */
                    455:        if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
                    456:            len, ffs_clusteralloc)) == 0)
                    457:                goto fail;
                    458:        /*
                    459:         * We have found a new contiguous block.
                    460:         *
                    461:         * First we have to replace the old block pointers with the new
                    462:         * block pointers in the inode and indirect blocks associated
                    463:         * with the file.
                    464:         */
                    465: #ifdef DEBUG
                    466:        if (prtrealloc)
                    467:                printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
                    468:                    start_lbn, end_lbn);
                    469: #endif
                    470:        blkno = newblk;
                    471:        for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
                    472:                if (i == ssize) {
                    473:                        bap = ebap;
                    474:                        soff = -i;
                    475:                }
                    476: #ifdef DIAGNOSTIC
                    477:                if (!ffs_checkblk(ip,
                    478:                   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
                    479:                        panic("ffs1_reallocblks: unallocated block 2");
                    480:                if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
                    481:                        panic("ffs1_reallocblks: alloc mismatch");
                    482: #endif
                    483: #ifdef DEBUG
                    484:                if (prtrealloc)
                    485:                        printf(" %d,", *bap);
                    486: #endif
                    487:                if (DOINGSOFTDEP(vp)) {
                    488:                        if (sbap == &ip->i_ffs1_db[0] && i < ssize)
                    489:                                softdep_setup_allocdirect(ip, start_lbn + i,
                    490:                                    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
                    491:                                    buflist->bs_children[i]);
                    492:                        else
                    493:                                softdep_setup_allocindir_page(ip, start_lbn + i,
                    494:                                    i < ssize ? sbp : ebp, soff + i, blkno,
                    495:                                    *bap, buflist->bs_children[i]);
                    496:                }
                    497:
                    498:                *bap++ = blkno;
                    499:        }
                    500:        /*
                    501:         * Next we must write out the modified inode and indirect blocks.
                    502:         * For strict correctness, the writes should be synchronous since
                    503:         * the old block values may have been written to disk. In practise
                    504:         * they are almost never written, but if we are concerned about
                    505:         * strict correctness, the `doasyncfree' flag should be set to zero.
                    506:         *
                    507:         * The test on `doasyncfree' should be changed to test a flag
                    508:         * that shows whether the associated buffers and inodes have
                    509:         * been written. The flag should be set when the cluster is
                    510:         * started and cleared whenever the buffer or inode is flushed.
                    511:         * We can then check below to see if it is set, and do the
                    512:         * synchronous write only when it has been cleared.
                    513:         */
                    514:        if (sbap != &ip->i_ffs1_db[0]) {
                    515:                if (doasyncfree)
                    516:                        bdwrite(sbp);
                    517:                else
                    518:                        bwrite(sbp);
                    519:        } else {
                    520:                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                    521:                if (!doasyncfree) {
                    522:                        UFS_UPDATE(ip, MNT_WAIT);
                    523:                }
                    524:        }
                    525:        if (ssize < len) {
                    526:                if (doasyncfree)
                    527:                        bdwrite(ebp);
                    528:                else
                    529:                        bwrite(ebp);
                    530:        }
                    531:        /*
                    532:         * Last, free the old blocks and assign the new blocks to the buffers.
                    533:         */
                    534: #ifdef DEBUG
                    535:        if (prtrealloc)
                    536:                printf("\n\tnew:");
                    537: #endif
                    538:        for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
                    539:                if (!DOINGSOFTDEP(vp))
                    540:                        ffs_blkfree(ip,
                    541:                            dbtofsb(fs, buflist->bs_children[i]->b_blkno),
                    542:                            fs->fs_bsize);
                    543:                buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
                    544: #ifdef DIAGNOSTIC
                    545:                if (!ffs_checkblk(ip,
                    546:                   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
                    547:                        panic("ffs1_reallocblks: unallocated block 3");
                    548:                if (prtrealloc)
                    549:                        printf(" %d,", blkno);
                    550: #endif
                    551:        }
                    552: #ifdef DEBUG
                    553:        if (prtrealloc) {
                    554:                prtrealloc--;
                    555:                printf("\n");
                    556:        }
                    557: #endif
                    558:        return (0);
                    559:
                    560: fail:
                    561:        if (ssize < len)
                    562:                brelse(ebp);
                    563:        if (sbap != &ip->i_ffs1_db[0])
                    564:                brelse(sbp);
                    565:        return (ENOSPC);
                    566: }
                    567:
                    568: #ifdef FFS2
                    569: int
                    570: ffs2_reallocblks(void *v)
                    571: {
                    572:        struct vop_reallocblks_args *ap = v;
                    573:        struct fs *fs;
                    574:        struct inode *ip;
                    575:        struct vnode *vp;
                    576:        struct buf *sbp, *ebp;
                    577:        daddr64_t *bap, *sbap, *ebap = 0;
                    578:        struct cluster_save *buflist;
                    579:        struct ufsmount *ump;
                    580:        daddr64_t start_lbn, end_lbn;
                    581:        daddr64_t soff, newblk, blkno, pref;
                    582:        struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
                    583:        int i, len, start_lvl, end_lvl, ssize;
                    584:
                    585:        vp = ap->a_vp;
                    586:        ip = VTOI(vp);
                    587:        fs = ip->i_fs;
                    588:        ump = ip->i_ump;
                    589:
                    590:        if (fs->fs_contigsumsize <= 0)
                    591:                return (ENOSPC);
                    592:
                    593:        buflist = ap->a_buflist;
                    594:        len = buflist->bs_nchildren;
                    595:        start_lbn = buflist->bs_children[0]->b_lblkno;
                    596:        end_lbn = start_lbn + len - 1;
                    597:
                    598: #ifdef DIAGNOSTIC
                    599:        for (i = 0; i < len; i++)
                    600:                if (!ffs_checkblk(ip,
                    601:                   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
                    602:                        panic("ffs2_reallocblks: unallocated block 1");
                    603:
                    604:        for (i = 1; i < len; i++)
                    605:                if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
                    606:                        panic("ffs2_reallocblks: non-logical cluster");
                    607:
                    608:        blkno = buflist->bs_children[0]->b_blkno;
                    609:        ssize = fsbtodb(fs, fs->fs_frag);
                    610:
                    611:        for (i = 1; i < len - 1; i++)
                    612:                if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
                    613:                        panic("ffs2_reallocblks: non-physical cluster %d", i);
                    614: #endif
                    615:
                    616:        /*
                    617:         * If the latest allocation is in a new cylinder group, assume that
                    618:         * the filesystem has decided to move and do not force it back to
                    619:         * the previous cylinder group.
                    620:         */
                    621:        if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
                    622:            dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
                    623:                return (ENOSPC);
                    624:        if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
                    625:            ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
                    626:                return (ENOSPC);
                    627:
                    628:        /*
                    629:         * Get the starting offset and block map for the first block.
                    630:         */
                    631:        if (start_lvl == 0) {
                    632:                sbap = &ip->i_din2->di_db[0];
                    633:                soff = start_lbn;
                    634:        } else {
                    635:                idp = &start_ap[start_lvl - 1];
                    636:                if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
                    637:                        brelse(sbp);
                    638:                        return (ENOSPC);
                    639:                }
                    640:                sbap = (daddr64_t *)sbp->b_data;
                    641:                soff = idp->in_off;
                    642:        }
                    643:
                    644:        /*
                    645:         * If the block range spans two block maps, get the second map.
                    646:         */
                    647:        if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
                    648:                ssize = len;
                    649:        } else {
                    650: #ifdef DIAGNOSTIC
                    651:                if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
                    652:                        panic("ffs2_reallocblk: start == end");
                    653: #endif
                    654:                ssize = len - (idp->in_off + 1);
                    655:                if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
                    656:                        goto fail;
                    657:                ebap = (daddr64_t *)ebp->b_data;
                    658:        }
                    659:
                    660:        /*
                    661:         * Find the preferred location for the cluster.
                    662:         */
                    663:        pref = ffs2_blkpref(ip, start_lbn, soff, sbap);
                    664:
                    665:        /*
                    666:         * Search the block map looking for an allocation of the desired size.
                    667:         */
                    668:        if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
                    669:            len, ffs_clusteralloc)) == 0)
                    670:                goto fail;
                    671:
                    672:        /*
                    673:         * We have found a new contiguous block.
                    674:         *
                    675:         * First we have to replace the old block pointers with the new
                    676:         * block pointers in the inode and indirect blocks associated
                    677:         * with the file.
                    678:         */
                    679: #ifdef DEBUG
                    680:        if (prtrealloc)
                    681:                printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number,
                    682:                    (intmax_t)start_lbn, (intmax_t)end_lbn);
                    683: #endif
                    684:
                    685:        blkno = newblk;
                    686:
                    687:        for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
                    688:                if (i == ssize) {
                    689:                        bap = ebap;
                    690:                        soff = -i;
                    691:                }
                    692: #ifdef DIAGNOSTIC
                    693:                if (!ffs_checkblk(ip,
                    694:                   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
                    695:                        panic("ffs2_reallocblks: unallocated block 2");
                    696:                if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
                    697:                        panic("ffs2_reallocblks: alloc mismatch");
                    698: #endif
                    699: #ifdef DEBUG
                    700:                if (prtrealloc)
                    701:                        printf(" %jd,", (intmax_t)*bap);
                    702: #endif
                    703:                if (DOINGSOFTDEP(vp)) {
                    704:                        if (sbap == &ip->i_din2->di_db[0] && i < ssize)
                    705:                                softdep_setup_allocdirect(ip, start_lbn + i,
                    706:                                    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
                    707:                                    buflist->bs_children[i]);
                    708:                        else
                    709:                                softdep_setup_allocindir_page(ip, start_lbn + i,
                    710:                                    i < ssize ? sbp : ebp, soff + i, blkno,
                    711:                                    *bap, buflist->bs_children[i]);
                    712:                }
                    713:                *bap++ = blkno;
                    714:        }
                    715:
                    716:        /*
                    717:         * Next we must write out the modified inode and indirect blocks.
                    718:         * For strict correctness, the writes should be synchronous since
                    719:         * the old block values may have been written to disk. In practise
                    720:         * they are almost never written, but if we are concerned about
                    721:         * strict correctness, the `doasyncfree' flag should be set to zero.
                    722:         *
                    723:         * The test on `doasyncfree' should be changed to test a flag
                    724:         * that shows whether the associated buffers and inodes have
                    725:         * been written. The flag should be set when the cluster is
                    726:         * started and cleared whenever the buffer or inode is flushed.
                    727:         * We can then check below to see if it is set, and do the
                    728:         * synchronous write only when it has been cleared.
                    729:         */
                    730:        if (sbap != &ip->i_din2->di_db[0]) {
                    731:                if (doasyncfree)
                    732:                        bdwrite(sbp);
                    733:                else
                    734:                        bwrite(sbp);
                    735:        } else {
                    736:                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                    737:                if (!doasyncfree)
                    738:                        ffs_update(ip, NULL, NULL, MNT_WAIT);
                    739:        }
                    740:
                    741:        if (ssize < len) {
                    742:                if (doasyncfree)
                    743:                        bdwrite(ebp);
                    744:                else
                    745:                        bwrite(ebp);
                    746:        }
                    747:
                    748:        /*
                    749:         * Last, free the old blocks and assign the new blocks to the buffers.
                    750:         */
                    751: #ifdef DEBUG
                    752:        if (prtrealloc)
                    753:                printf("\n\tnew:");
                    754: #endif
                    755:        for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
                    756:                if (!DOINGSOFTDEP(vp))
                    757:                        ffs_blkfree(ip, dbtofsb(fs,
                    758:                            buflist->bs_children[i]->b_blkno), fs->fs_bsize);
                    759:                buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
                    760: #ifdef DIAGNOSTIC
                    761:                if (!ffs_checkblk(ip,
                    762:                   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
                    763:                        panic("ffs2_reallocblks: unallocated block 3");
                    764: #endif
                    765: #ifdef DEBUG
                    766:                if (prtrealloc)
                    767:                        printf(" %jd,", (intmax_t)blkno);
                    768: #endif
                    769:        }
                    770: #ifdef DEBUG
                    771:        if (prtrealloc) {
                    772:                prtrealloc--;
                    773:                printf("\n");
                    774:        }
                    775: #endif
                    776:
                    777:        return (0);
                    778:
                    779: fail:
                    780:        if (ssize < len)
                    781:                brelse(ebp);
                    782:
                    783:        if (sbap != &ip->i_din2->di_db[0])
                    784:                brelse(sbp);
                    785:
                    786:        return (ENOSPC);
                    787: }
                    788: #endif /* FFS2 */
                    789:
                    790: int
                    791: ffs_reallocblks(void *v)
                    792: {
                    793: #ifdef FFS2
                    794:        struct vop_reallocblks_args *ap = v;
                    795: #endif
                    796:
                    797:        if (!doreallocblks)
                    798:                return (ENOSPC);
                    799:
                    800: #ifdef FFS2
                    801:        if (VTOI(ap->a_vp)->i_ump->um_fstype == UM_UFS2)
                    802:                return (ffs2_reallocblks(v));
                    803: #endif
                    804:
                    805:        return (ffs1_reallocblks(v));
                    806: }
                    807:
                    808: /*
                    809:  * Allocate an inode in the file system.
                    810:  *
                    811:  * If allocating a directory, use ffs_dirpref to select the inode.
                    812:  * If allocating in a directory, the following hierarchy is followed:
                    813:  *   1) allocate the preferred inode.
                    814:  *   2) allocate an inode in the same cylinder group.
                    815:  *   3) quadratically rehash into other cylinder groups, until an
                    816:  *      available inode is located.
                    817:  * If no inode preference is given the following hierarchy is used
                    818:  * to allocate an inode:
                    819:  *   1) allocate an inode in cylinder group 0.
                    820:  *   2) quadratically rehash into other cylinder groups, until an
                    821:  *      available inode is located.
                    822:  */
                    823: int
                    824: ffs_inode_alloc(struct inode *pip, mode_t mode, struct ucred *cred,
                    825:     struct vnode **vpp)
                    826: {
                    827:        struct vnode *pvp = ITOV(pip);
                    828:        struct fs *fs;
                    829:        struct inode *ip;
                    830:        ino_t ino, ipref;
                    831:        int cg, error;
                    832:
                    833:        *vpp = NULL;
                    834:        fs = pip->i_fs;
                    835:        if (fs->fs_cstotal.cs_nifree == 0)
                    836:                goto noinodes;
                    837:
                    838:        if ((mode & IFMT) == IFDIR)
                    839:                ipref = ffs_dirpref(pip);
                    840:        else
                    841:                ipref = pip->i_number;
                    842:        if (ipref >= fs->fs_ncg * fs->fs_ipg)
                    843:                ipref = 0;
                    844:        cg = ino_to_cg(fs, ipref);
                    845:
                    846:        /*
                    847:         * Track number of dirs created one after another
                    848:         * in a same cg without intervening by files.
                    849:         */
                    850:        if ((mode & IFMT) == IFDIR) {
                    851:                if (fs->fs_contigdirs[cg] < 255)
                    852:                        fs->fs_contigdirs[cg]++;
                    853:        } else {
                    854:                if (fs->fs_contigdirs[cg] > 0)
                    855:                        fs->fs_contigdirs[cg]--;
                    856:        }
                    857:        ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
                    858:        if (ino == 0)
                    859:                goto noinodes;
                    860:        error = VFS_VGET(pvp->v_mount, ino, vpp);
                    861:        if (error) {
                    862:                ffs_inode_free(pip, ino, mode);
                    863:                return (error);
                    864:        }
                    865:
                    866:        ip = VTOI(*vpp);
                    867:
                    868:        if (DIP(ip, mode)) {
                    869:                printf("mode = 0%o, inum = %d, fs = %s\n",
                    870:                    DIP(ip, mode), ip->i_number, fs->fs_fsmnt);
                    871:                panic("ffs_valloc: dup alloc");
                    872:        }
                    873:
                    874:        if (DIP(ip, blocks)) {
                    875:                printf("free inode %s/%d had %d blocks\n",
                    876:                    fs->fs_fsmnt, ino, DIP(ip, blocks));
                    877:                DIP_ASSIGN(ip, blocks, 0);
                    878:        }
                    879:
                    880:        DIP_ASSIGN(ip, flags, 0);
                    881:
                    882:        /*
                    883:         * Set up a new generation number for this inode.
                    884:         * XXX - just increment for now, this is wrong! (millert)
                    885:         *       Need a way to preserve randomization.
                    886:         */
                    887:        if (DIP(ip, gen) == 0 || ++(DIP(ip, gen)) == 0)
                    888:                DIP_ASSIGN(ip, gen, arc4random() & INT_MAX);
                    889:
                    890:        if (DIP(ip, gen) == 0 || DIP(ip, gen) == -1)
                    891:                DIP_ASSIGN(ip, gen, 1); /* Shouldn't happen */
                    892:
                    893:        return (0);
                    894:
                    895: noinodes:
                    896:        ffs_fserr(fs, cred->cr_uid, "out of inodes");
                    897:        uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
                    898:        return (ENOSPC);
                    899: }
                    900:
                    901: /*
                    902:  * Find a cylinder group to place a directory.
                    903:  *
                    904:  * The policy implemented by this algorithm is to allocate a
                    905:  * directory inode in the same cylinder group as its parent
                    906:  * directory, but also to reserve space for its files inodes
                    907:  * and data. Restrict the number of directories which may be
                    908:  * allocated one after another in the same cylinder group
                    909:  * without intervening allocation of files.
                    910:  *
                    911:  * If we allocate a first level directory then force allocation
                    912:  * in another cylinder group.
                    913:  */
                    914: ino_t
                    915: ffs_dirpref(struct inode *pip)
                    916: {
                    917:        struct fs *fs;
                    918:        int     cg, prefcg, dirsize, cgsize;
                    919:        int     avgifree, avgbfree, avgndir, curdirsize;
                    920:        int     minifree, minbfree, maxndir;
                    921:        int     mincg, minndir;
                    922:        int     maxcontigdirs;
                    923:
                    924:        fs = pip->i_fs;
                    925:
                    926:        avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
                    927:        avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
                    928:        avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
                    929: #if 1
                    930:
                    931:        /*
                    932:         * Force allocation in another cg if creating a first level dir.
                    933:         */
                    934:        if (ITOV(pip)->v_flag & VROOT) {
                    935:                prefcg = (arc4random() & INT_MAX) % fs->fs_ncg;
                    936:                mincg = prefcg;
                    937:                minndir = fs->fs_ipg;
                    938:                for (cg = prefcg; cg < fs->fs_ncg; cg++)
                    939:                        if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
                    940:                            fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
                    941:                            fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                    942:                                mincg = cg;
                    943:                                minndir = fs->fs_cs(fs, cg).cs_ndir;
                    944:                        }
                    945:                for (cg = 0; cg < prefcg; cg++)
                    946:                        if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
                    947:                            fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
                    948:                            fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                    949:                                mincg = cg;
                    950:                                minndir = fs->fs_cs(fs, cg).cs_ndir;
                    951:                        }
                    952:                cg = mincg;
                    953:                goto end;
                    954:        } else
                    955:                prefcg = ino_to_cg(fs, pip->i_number);
                    956: #else
                    957:        prefcg = ino_to_cg(fs, pip->i_number);
                    958: #endif
                    959:
                    960:        /*
                    961:         * Count various limits which used for
                    962:         * optimal allocation of a directory inode.
                    963:         */
                    964: #if 1
                    965:        maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
                    966:        minifree = avgifree - fs->fs_ipg / 4;
                    967:        if (minifree < 0)
                    968:                minifree = 0;
                    969:        minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4;
                    970:        if (minbfree < 0)
                    971:                minbfree = 0;
                    972: #else
                    973:        maxndir = avgndir + (fs->fs_ipg - avgndir) / 16;
                    974:        minifree = avgifree * 3 / 4;
                    975:        minbfree = avgbfree * 3 / 4;
                    976: #endif
                    977:        cgsize = fs->fs_fsize * fs->fs_fpg;
                    978:        dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
                    979:        curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
                    980:        if (dirsize < curdirsize)
                    981:                dirsize = curdirsize;
                    982:        maxcontigdirs = min(cgsize / dirsize, 255);
                    983:        if (fs->fs_avgfpdir > 0)
                    984:                maxcontigdirs = min(maxcontigdirs,
                    985:                                    fs->fs_ipg / fs->fs_avgfpdir);
                    986:        if (maxcontigdirs == 0)
                    987:                maxcontigdirs = 1;
                    988:
                    989:        /*
                    990:         * Limit number of dirs in one cg and reserve space for
                    991:         * regular files, but only if we have no deficit in
                    992:         * inodes or space.
                    993:         */
                    994:        for (cg = prefcg; cg < fs->fs_ncg; cg++)
                    995:                if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
                    996:                    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
                    997:                    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
                    998:                        if (fs->fs_contigdirs[cg] < maxcontigdirs)
                    999:                                goto end;
                   1000:                }
                   1001:        for (cg = 0; cg < prefcg; cg++)
                   1002:                if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
                   1003:                    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
                   1004:                    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
                   1005:                        if (fs->fs_contigdirs[cg] < maxcontigdirs)
                   1006:                                goto end;
                   1007:                }
                   1008:        /*
                   1009:         * This is a backstop when we have deficit in space.
                   1010:         */
                   1011:        for (cg = prefcg; cg < fs->fs_ncg; cg++)
                   1012:                if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
                   1013:                        goto end;
                   1014:        for (cg = 0; cg < prefcg; cg++)
                   1015:                if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
                   1016:                        goto end;
                   1017: end:
                   1018:        return ((ino_t)(fs->fs_ipg * cg));
                   1019: }
                   1020:
                   1021: /*
                   1022:  * Select the desired position for the next block in a file.  The file is
                   1023:  * logically divided into sections. The first section is composed of the
                   1024:  * direct blocks. Each additional section contains fs_maxbpg blocks.
                   1025:  *
                   1026:  * If no blocks have been allocated in the first section, the policy is to
                   1027:  * request a block in the same cylinder group as the inode that describes
                   1028:  * the file. If no blocks have been allocated in any other section, the
                   1029:  * policy is to place the section in a cylinder group with a greater than
                   1030:  * average number of free blocks.  An appropriate cylinder group is found
                   1031:  * by using a rotor that sweeps the cylinder groups. When a new group of
                   1032:  * blocks is needed, the sweep begins in the cylinder group following the
                   1033:  * cylinder group from which the previous allocation was made. The sweep
                   1034:  * continues until a cylinder group with greater than the average number
                   1035:  * of free blocks is found. If the allocation is for the first block in an
                   1036:  * indirect block, the information on the previous allocation is unavailable;
                   1037:  * here a best guess is made based upon the logical block number being
                   1038:  * allocated.
                   1039:  */
                   1040: int32_t
                   1041: ffs1_blkpref(struct inode *ip, daddr_t lbn, int indx, int32_t *bap)
                   1042: {
                   1043:        struct fs *fs;
                   1044:        int cg, avgbfree, startcg;
                   1045:
                   1046:        fs = ip->i_fs;
                   1047:        if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
                   1048:                if (lbn < NDADDR + NINDIR(fs)) {
                   1049:                        cg = ino_to_cg(fs, ip->i_number);
                   1050:                        return (fs->fs_fpg * cg + fs->fs_frag);
                   1051:                }
                   1052:                /*
                   1053:                 * Find a cylinder with greater than average number of
                   1054:                 * unused data blocks.
                   1055:                 */
                   1056:                if (indx == 0 || bap[indx - 1] == 0)
                   1057:                        startcg =
                   1058:                            ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
                   1059:                else
                   1060:                        startcg = dtog(fs, bap[indx - 1]) + 1;
                   1061:                startcg %= fs->fs_ncg;
                   1062:                avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
                   1063:                for (cg = startcg; cg < fs->fs_ncg; cg++)
                   1064:                        if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                   1065:                                fs->fs_cgrotor = cg;
                   1066:                                return (fs->fs_fpg * cg + fs->fs_frag);
                   1067:                        }
                   1068:                for (cg = 0; cg <= startcg; cg++)
                   1069:                        if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                   1070:                                fs->fs_cgrotor = cg;
                   1071:                                return (fs->fs_fpg * cg + fs->fs_frag);
                   1072:                        }
                   1073:                return (0);
                   1074:        }
                   1075:
                   1076:        return (bap[indx - 1] + fs->fs_frag);
                   1077: }
                   1078:
                   1079: /*
                   1080:  * Same as above, for UFS2.
                   1081:  */
                   1082: #ifdef FFS2
                   1083: int64_t
                   1084: ffs2_blkpref(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
                   1085: {
                   1086:        struct fs *fs;
                   1087:        int cg, avgbfree, startcg;
                   1088:
                   1089:        fs = ip->i_fs;
                   1090:
                   1091:        if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
                   1092:                if (lbn < NDADDR + NINDIR(fs)) {
                   1093:                        cg = ino_to_cg(fs, ip->i_number);
                   1094:                        return (fs->fs_fpg * cg + fs->fs_frag);
                   1095:                }
                   1096:
                   1097:                /*
                   1098:                 * Find a cylinder with greater than average number of
                   1099:                 * unused data blocks.
                   1100:                 */
                   1101:                if (indx == 0 || bap[indx - 1] == 0)
                   1102:                        startcg = ino_to_cg(fs, ip->i_number) +
                   1103:                            lbn / fs->fs_maxbpg;
                   1104:                else
                   1105:                        startcg = dtog(fs, bap[indx - 1] + 1);
                   1106:
                   1107:                startcg %= fs->fs_ncg;
                   1108:                avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
                   1109:
                   1110:                for (cg = startcg; cg < fs->fs_ncg; cg++)
                   1111:                        if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree)
                   1112:                                return (fs->fs_fpg * cg + fs->fs_frag);
                   1113:
                   1114:                for (cg = 0; cg < startcg; cg++)
                   1115:                        if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree)
                   1116:                                return (fs->fs_fpg * cg + fs->fs_frag);
                   1117:
                   1118:                return (0);
                   1119:        }
                   1120:
                   1121:        /*
                   1122:         * We always just try to lay things out contiguously.
                   1123:         */
                   1124:        return (bap[indx - 1] + fs->fs_frag);
                   1125: }
                   1126: #endif /* FFS2 */
                   1127:
                   1128: /*
                   1129:  * Implement the cylinder overflow algorithm.
                   1130:  *
                   1131:  * The policy implemented by this algorithm is:
                   1132:  *   1) allocate the block in its requested cylinder group.
                   1133:  *   2) quadratically rehash on the cylinder group number.
                   1134:  *   3) brute force search for a free block.
                   1135:  */
                   1136: /*VARARGS5*/
                   1137: u_long
                   1138: ffs_hashalloc(struct inode *ip, int cg, long pref, int size,
                   1139:     daddr_t (*allocator)(struct inode *, int, daddr_t, int))
                   1140: {
                   1141:        struct fs *fs;
                   1142:        long result;
                   1143:        int i, icg = cg;
                   1144:
                   1145:        fs = ip->i_fs;
                   1146:        /*
                   1147:         * 1: preferred cylinder group
                   1148:         */
                   1149:        result = (*allocator)(ip, cg, pref, size);
                   1150:        if (result)
                   1151:                return (result);
                   1152:        /*
                   1153:         * 2: quadratic rehash
                   1154:         */
                   1155:        for (i = 1; i < fs->fs_ncg; i *= 2) {
                   1156:                cg += i;
                   1157:                if (cg >= fs->fs_ncg)
                   1158:                        cg -= fs->fs_ncg;
                   1159:                result = (*allocator)(ip, cg, 0, size);
                   1160:                if (result)
                   1161:                        return (result);
                   1162:        }
                   1163:        /*
                   1164:         * 3: brute force search
                   1165:         * Note that we start at i == 2, since 0 was checked initially,
                   1166:         * and 1 is always checked in the quadratic rehash.
                   1167:         */
                   1168:        cg = (icg + 2) % fs->fs_ncg;
                   1169:        for (i = 2; i < fs->fs_ncg; i++) {
                   1170:                result = (*allocator)(ip, cg, 0, size);
                   1171:                if (result)
                   1172:                        return (result);
                   1173:                cg++;
                   1174:                if (cg == fs->fs_ncg)
                   1175:                        cg = 0;
                   1176:        }
                   1177:        return (0);
                   1178: }
                   1179:
                   1180: /*
                   1181:  * Determine whether a fragment can be extended.
                   1182:  *
                   1183:  * Check to see if the necessary fragments are available, and
                   1184:  * if they are, allocate them.
                   1185:  */
                   1186: daddr_t
                   1187: ffs_fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize)
                   1188: {
                   1189:        struct fs *fs;
                   1190:        struct cg *cgp;
                   1191:        struct buf *bp;
                   1192:        long bno;
                   1193:        int frags, bbase;
                   1194:        int i, error;
                   1195:
                   1196:        fs = ip->i_fs;
                   1197:        if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
                   1198:                return (0);
                   1199:        frags = numfrags(fs, nsize);
                   1200:        bbase = fragnum(fs, bprev);
                   1201:        if (bbase > fragnum(fs, (bprev + frags - 1))) {
                   1202:                /* cannot extend across a block boundary */
                   1203:                return (0);
                   1204:        }
                   1205:        error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
                   1206:                (int)fs->fs_cgsize, NOCRED, &bp);
                   1207:        if (error) {
                   1208:                brelse(bp);
                   1209:                return (0);
                   1210:        }
                   1211:        cgp = (struct cg *)bp->b_data;
                   1212:        if (!cg_chkmagic(cgp)) {
                   1213:                brelse(bp);
                   1214:                return (0);
                   1215:        }
                   1216:
                   1217:        cgp->cg_ffs2_time = cgp->cg_time = time_second;
                   1218:
                   1219:        bno = dtogd(fs, bprev);
                   1220:        for (i = numfrags(fs, osize); i < frags; i++)
                   1221:                if (isclr(cg_blksfree(cgp), bno + i)) {
                   1222:                        brelse(bp);
                   1223:                        return (0);
                   1224:                }
                   1225:        /*
                   1226:         * the current fragment can be extended
                   1227:         * deduct the count on fragment being extended into
                   1228:         * increase the count on the remaining fragment (if any)
                   1229:         * allocate the extended piece
                   1230:         */
                   1231:        for (i = frags; i < fs->fs_frag - bbase; i++)
                   1232:                if (isclr(cg_blksfree(cgp), bno + i))
                   1233:                        break;
                   1234:        cgp->cg_frsum[i - numfrags(fs, osize)]--;
                   1235:        if (i != frags)
                   1236:                cgp->cg_frsum[i - frags]++;
                   1237:        for (i = numfrags(fs, osize); i < frags; i++) {
                   1238:                clrbit(cg_blksfree(cgp), bno + i);
                   1239:                cgp->cg_cs.cs_nffree--;
                   1240:                fs->fs_cstotal.cs_nffree--;
                   1241:                fs->fs_cs(fs, cg).cs_nffree--;
                   1242:        }
                   1243:        fs->fs_fmod = 1;
                   1244:        if (DOINGSOFTDEP(ITOV(ip)))
                   1245:                softdep_setup_blkmapdep(bp, fs, bprev);
                   1246:
                   1247:        bdwrite(bp);
                   1248:        return (bprev);
                   1249: }
                   1250:
                   1251: /*
                   1252:  * Determine whether a block can be allocated.
                   1253:  *
                   1254:  * Check to see if a block of the appropriate size is available,
                   1255:  * and if it is, allocate it.
                   1256:  */
                   1257: daddr_t
                   1258: ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
                   1259: {
                   1260:        struct fs *fs;
                   1261:        struct cg *cgp;
                   1262:        struct buf *bp;
                   1263:        daddr_t bno, blkno;
                   1264:        int error, i, frags, allocsiz;
                   1265:
                   1266:        fs = ip->i_fs;
                   1267:        if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
                   1268:                return (0);
                   1269:        /* read cylinder group block */
                   1270:        error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
                   1271:                (int)fs->fs_cgsize, NOCRED, &bp);
                   1272:        if (error) {
                   1273:                brelse(bp);
                   1274:                return (0);
                   1275:        }
                   1276:        cgp = (struct cg *)bp->b_data;
                   1277:        if (!cg_chkmagic(cgp) ||
                   1278:            (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
                   1279:                brelse(bp);
                   1280:                return (0);
                   1281:        }
                   1282:
                   1283:        cgp->cg_ffs2_time = cgp->cg_time = time_second;
                   1284:
                   1285:        if (size == fs->fs_bsize) {
                   1286:                /* allocate and return a complete data block */
                   1287:                bno = ffs_alloccgblk(ip, bp, bpref);
                   1288:                bdwrite(bp);
                   1289:                return (bno);
                   1290:        }
                   1291:        /*
                   1292:         * check to see if any fragments are already available
                   1293:         * allocsiz is the size which will be allocated, hacking
                   1294:         * it down to a smaller size if necessary
                   1295:         */
                   1296:        frags = numfrags(fs, size);
                   1297:        for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
                   1298:                if (cgp->cg_frsum[allocsiz] != 0)
                   1299:                        break;
                   1300:        if (allocsiz == fs->fs_frag) {
                   1301:                /*
                   1302:                 * no fragments were available, so a block will be
                   1303:                 * allocated, and hacked up
                   1304:                 */
                   1305:                if (cgp->cg_cs.cs_nbfree == 0) {
                   1306:                        brelse(bp);
                   1307:                        return (0);
                   1308:                }
                   1309:                bno = ffs_alloccgblk(ip, bp, bpref);
                   1310:                bpref = dtogd(fs, bno);
                   1311:                for (i = frags; i < fs->fs_frag; i++)
                   1312:                        setbit(cg_blksfree(cgp), bpref + i);
                   1313:                i = fs->fs_frag - frags;
                   1314:                cgp->cg_cs.cs_nffree += i;
                   1315:                fs->fs_cstotal.cs_nffree += i;
                   1316:                fs->fs_cs(fs, cg).cs_nffree += i;
                   1317:                fs->fs_fmod = 1;
                   1318:                cgp->cg_frsum[i]++;
                   1319:                bdwrite(bp);
                   1320:                return (bno);
                   1321:        }
                   1322:        bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
                   1323:        if (bno < 0) {
                   1324:                brelse(bp);
                   1325:                return (0);
                   1326:        }
                   1327:
                   1328:        for (i = 0; i < frags; i++)
                   1329:                clrbit(cg_blksfree(cgp), bno + i);
                   1330:        cgp->cg_cs.cs_nffree -= frags;
                   1331:        fs->fs_cstotal.cs_nffree -= frags;
                   1332:        fs->fs_cs(fs, cg).cs_nffree -= frags;
                   1333:        fs->fs_fmod = 1;
                   1334:        cgp->cg_frsum[allocsiz]--;
                   1335:        if (frags != allocsiz)
                   1336:                cgp->cg_frsum[allocsiz - frags]++;
                   1337:
                   1338:        blkno = cg * fs->fs_fpg + bno;
                   1339:        if (DOINGSOFTDEP(ITOV(ip)))
                   1340:                softdep_setup_blkmapdep(bp, fs, blkno);
                   1341:        bdwrite(bp);
                   1342:        return ((u_long)blkno);
                   1343: }
                   1344:
                   1345: /*
                   1346:  * Allocate a block in a cylinder group.
                   1347:  * Note that this routine only allocates fs_bsize blocks; these
                   1348:  * blocks may be fragmented by the routine that allocates them.
                   1349:  */
                   1350: daddr_t
                   1351: ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
                   1352: {
                   1353:        struct fs *fs;
                   1354:        struct cg *cgp;
                   1355:        daddr_t bno, blkno;
                   1356:        u_int8_t *blksfree;
                   1357:        int cylno;
                   1358:
                   1359:        fs = ip->i_fs;
                   1360:        cgp = (struct cg *) bp->b_data;
                   1361:        blksfree = cg_blksfree(cgp);
                   1362:
                   1363:        if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx)
                   1364:                bpref = cgp->cg_rotor;
                   1365:        else {
                   1366:                bpref = blknum(fs, bpref);
                   1367:                bno = dtogd(fs, bpref);
                   1368:                /*
                   1369:                 * If the requested block is available, use it.
                   1370:                 */
                   1371:                if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
                   1372:                        goto gotit;
                   1373:        }
                   1374:
                   1375:        /*
                   1376:         * Take the next available block in this cylinder group.
                   1377:         */
                   1378:        bno = ffs_mapsearch(fs, cgp, bpref, (int) fs->fs_frag);
                   1379:        if (bno < 0)
                   1380:                return (0);
                   1381:
                   1382:        cgp->cg_rotor = bno;
                   1383:
                   1384: gotit:
                   1385:        blkno = fragstoblks(fs, bno);
                   1386:        ffs_clrblock(fs, blksfree, (long) blkno);
                   1387:        ffs_clusteracct(fs, cgp, blkno, -1);
                   1388:        cgp->cg_cs.cs_nbfree--;
                   1389:        fs->fs_cstotal.cs_nbfree--;
                   1390:        fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
                   1391:
                   1392:        if (fs->fs_magic != FS_UFS2_MAGIC) {
                   1393:                cylno = cbtocylno(fs, bno);
                   1394:                cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
                   1395:                cg_blktot(cgp)[cylno]--;
                   1396:        }
                   1397:
                   1398:        fs->fs_fmod = 1;
                   1399:        blkno = cgp->cg_cgx * fs->fs_fpg + bno;
                   1400:
                   1401:        if (DOINGSOFTDEP(ITOV(ip)))
                   1402:                softdep_setup_blkmapdep(bp, fs, blkno);
                   1403:
                   1404:        return (blkno);
                   1405: }
                   1406:
                   1407: /*
                   1408:  * Determine whether a cluster can be allocated.
                   1409:  *
                   1410:  * We do not currently check for optimal rotational layout if there
                   1411:  * are multiple choices in the same cylinder group. Instead we just
                   1412:  * take the first one that we find following bpref.
                   1413:  */
                   1414: daddr_t
                   1415: ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
                   1416: {
                   1417:        struct fs *fs;
                   1418:        struct cg *cgp;
                   1419:        struct buf *bp;
                   1420:        int i, got, run, bno, bit, map;
                   1421:        u_char *mapp;
                   1422:        int32_t *lp;
                   1423:
                   1424:        fs = ip->i_fs;
                   1425:        if (fs->fs_maxcluster[cg] < len)
                   1426:                return (0);
                   1427:        if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
                   1428:            NOCRED, &bp))
                   1429:                goto fail;
                   1430:        cgp = (struct cg *)bp->b_data;
                   1431:        if (!cg_chkmagic(cgp))
                   1432:                goto fail;
                   1433:        /*
                   1434:         * Check to see if a cluster of the needed size (or bigger) is
                   1435:         * available in this cylinder group.
                   1436:         */
                   1437:        lp = &cg_clustersum(cgp)[len];
                   1438:        for (i = len; i <= fs->fs_contigsumsize; i++)
                   1439:                if (*lp++ > 0)
                   1440:                        break;
                   1441:        if (i > fs->fs_contigsumsize) {
                   1442:                /*
                   1443:                 * This is the first time looking for a cluster in this
                   1444:                 * cylinder group. Update the cluster summary information
                   1445:                 * to reflect the true maximum sized cluster so that
                   1446:                 * future cluster allocation requests can avoid reading
                   1447:                 * the cylinder group map only to find no clusters.
                   1448:                 */
                   1449:                lp = &cg_clustersum(cgp)[len - 1];
                   1450:                for (i = len - 1; i > 0; i--)
                   1451:                        if (*lp-- > 0)
                   1452:                                break;
                   1453:                fs->fs_maxcluster[cg] = i;
                   1454:                goto fail;
                   1455:        }
                   1456:        /*
                   1457:         * Search the cluster map to find a big enough cluster.
                   1458:         * We take the first one that we find, even if it is larger
                   1459:         * than we need as we prefer to get one close to the previous
                   1460:         * block allocation. We do not search before the current
                   1461:         * preference point as we do not want to allocate a block
                   1462:         * that is allocated before the previous one (as we will
                   1463:         * then have to wait for another pass of the elevator
                   1464:         * algorithm before it will be read). We prefer to fail and
                   1465:         * be recalled to try an allocation in the next cylinder group.
                   1466:         */
                   1467:        if (dtog(fs, bpref) != cg)
                   1468:                bpref = 0;
                   1469:        else
                   1470:                bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
                   1471:        mapp = &cg_clustersfree(cgp)[bpref / NBBY];
                   1472:        map = *mapp++;
                   1473:        bit = 1 << (bpref % NBBY);
                   1474:        for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
                   1475:                if ((map & bit) == 0) {
                   1476:                        run = 0;
                   1477:                } else {
                   1478:                        run++;
                   1479:                        if (run == len)
                   1480:                                break;
                   1481:                }
                   1482:                if ((got & (NBBY - 1)) != (NBBY - 1)) {
                   1483:                        bit <<= 1;
                   1484:                } else {
                   1485:                        map = *mapp++;
                   1486:                        bit = 1;
                   1487:                }
                   1488:        }
                   1489:        if (got >= cgp->cg_nclusterblks)
                   1490:                goto fail;
                   1491:        /*
                   1492:         * Allocate the cluster that we have found.
                   1493:         */
                   1494:        cgp->cg_ffs2_time = cgp->cg_time = time_second;
                   1495:
                   1496: #ifdef DIAGNOSTIC
                   1497:        for (i = 1; i <= len; i++)
                   1498:                if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
                   1499:                        panic("ffs_clusteralloc: map mismatch");
                   1500: #endif
                   1501:        bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
                   1502: #ifdef DIAGNOSTIC
                   1503:        if (dtog(fs, bno) != cg)
                   1504:                panic("ffs_clusteralloc: allocated out of group");
                   1505: #endif
                   1506:
                   1507:        len = blkstofrags(fs, len);
                   1508:        for (i = 0; i < len; i += fs->fs_frag)
                   1509:                if (ffs_alloccgblk(ip, bp, bno + i) != bno + i)
                   1510:                        panic("ffs_clusteralloc: lost block");
                   1511:        bdwrite(bp);
                   1512:        return (bno);
                   1513:
                   1514: fail:
                   1515:        brelse(bp);
                   1516:        return (0);
                   1517: }
                   1518:
                   1519: /* inode allocation routine */
                   1520: daddr_t
                   1521: ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
                   1522: {
                   1523:        struct fs *fs;
                   1524:        struct cg *cgp;
                   1525:        struct buf *bp;
                   1526:        int error, start, len, loc, map, i;
                   1527: #ifdef FFS2
                   1528:        struct buf *ibp = NULL;
                   1529:        struct ufs2_dinode *dp2;
                   1530: #endif
                   1531:
                   1532:        /*
                   1533:         * For efficiency, before looking at the bitmaps for free inodes,
                   1534:         * check the counters kept in the superblock cylinder group summaries,
                   1535:         * and in the cylinder group itself.
                   1536:         */
                   1537:        fs = ip->i_fs;
                   1538:        if (fs->fs_cs(fs, cg).cs_nifree == 0)
                   1539:                return (0);
                   1540:
                   1541:        error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
                   1542:                (int)fs->fs_cgsize, NOCRED, &bp);
                   1543:        if (error) {
                   1544:                brelse(bp);
                   1545:                return (0);
                   1546:        }
                   1547:
                   1548:        cgp = (struct cg *)bp->b_data;
                   1549:        if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
                   1550:                brelse(bp);
                   1551:                return (0);
                   1552:        }
                   1553:
                   1554:        /*
                   1555:         * We are committed to the allocation from now on, so update the time
                   1556:         * on the cylinder group.
                   1557:         */
                   1558:        cgp->cg_ffs2_time = cgp->cg_time = time_second;
                   1559:
                   1560:        /*
                   1561:         * If there was a preferred location for the new inode, try to find it.
                   1562:         */
                   1563:        if (ipref) {
                   1564:                ipref %= fs->fs_ipg;
                   1565:                if (isclr(cg_inosused(cgp), ipref))
                   1566:                        goto gotit; /* inode is free, grab it. */
                   1567:        }
                   1568:
                   1569:        /*
                   1570:         * Otherwise, look for the next available inode, starting at cg_irotor
                   1571:         * (the position in the bitmap of the last used inode).
                   1572:         */
                   1573:        start = cgp->cg_irotor / NBBY;
                   1574:        len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
                   1575:        loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
                   1576:        if (loc == 0) {
                   1577:                /*
                   1578:                 * If we didn't find a free inode in the upper part of the
                   1579:                 * bitmap (from cg_irotor to the end), then look at the bottom
                   1580:                 * part (from 0 to cg_irotor).
                   1581:                 */
                   1582:                len = start + 1;
                   1583:                start = 0;
                   1584:                loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
                   1585:                if (loc == 0) {
                   1586:                        /*
                   1587:                         * If we failed again, then either the bitmap or the
                   1588:                         * counters kept for the cylinder group are wrong.
                   1589:                         */
                   1590:                        printf("cg = %d, irotor = %d, fs = %s\n",
                   1591:                            cg, cgp->cg_irotor, fs->fs_fsmnt);
                   1592:                        panic("ffs_nodealloccg: map corrupted");
                   1593:                        /* NOTREACHED */
                   1594:                }
                   1595:        }
                   1596:
                   1597:        /* skpc() returns the position relative to the end */
                   1598:        i = start + len - loc;
                   1599:
                   1600:        /*
                   1601:         * Okay, so now in 'i' we have the location in the bitmap of a byte
                   1602:         * holding a free inode. Find the corresponding bit and set it,
                   1603:         * updating cg_irotor as well, accordingly.
                   1604:         */
                   1605:        map = cg_inosused(cgp)[i];
                   1606:        ipref = i * NBBY;
                   1607:        for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
                   1608:                if ((map & i) == 0) {
                   1609:                        cgp->cg_irotor = ipref;
                   1610:                        goto gotit;
                   1611:                }
                   1612:        }
                   1613:
                   1614:        printf("fs = %s\n", fs->fs_fsmnt);
                   1615:        panic("ffs_nodealloccg: block not in map");
                   1616:        /* NOTREACHED */
                   1617:
                   1618: gotit:
                   1619:
                   1620: #ifdef FFS2
                   1621:        /*
                   1622:         * For FFS2, check if all inodes in this cylinder group have been used
                   1623:         * at least once. If they haven't, and we are allocating an inode past
                   1624:         * the last allocated block of inodes, read in a block and initialize
                   1625:         * all inodes in it.
                   1626:         */
                   1627:        if (fs->fs_magic == FS_UFS2_MAGIC &&
                   1628:            /* Inode is beyond last initialized block of inodes? */
                   1629:            ipref + INOPB(fs) > cgp->cg_initediblk &&
                   1630:            /* Has any inode not been used at least once? */
                   1631:            cgp->cg_initediblk < cgp->cg_ffs2_niblk) {
                   1632:
                   1633:                 ibp = getblk(ip->i_devvp, fsbtodb(fs,
                   1634:                     ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)),
                   1635:                     (int)fs->fs_bsize, 0, 0);
                   1636:
                   1637:                 bzero(ibp->b_data, (int)fs->fs_bsize);
                   1638:                 dp2 = (struct ufs2_dinode *)(ibp->b_data);
                   1639:
                   1640:                /* Give each inode a positive generation number */
                   1641:                 for (i = 0; i < INOPB(fs); i++) {
                   1642:                         dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1;
                   1643:                         dp2++;
                   1644:                 }
                   1645:
                   1646:                /* Update the counter of initialized inodes */
                   1647:                 cgp->cg_initediblk += INOPB(fs);
                   1648:         }
                   1649: #endif /* FFS2 */
                   1650:
                   1651:        if (DOINGSOFTDEP(ITOV(ip)))
                   1652:                softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
                   1653:
                   1654:        setbit(cg_inosused(cgp), ipref);
                   1655:
                   1656:        /* Update the counters we keep on free inodes */
                   1657:        cgp->cg_cs.cs_nifree--;
                   1658:        fs->fs_cstotal.cs_nifree--;
                   1659:        fs->fs_cs(fs, cg).cs_nifree--;
                   1660:        fs->fs_fmod = 1; /* file system was modified */
                   1661:
                   1662:        /* Update the counters we keep on allocated directories */
                   1663:        if ((mode & IFMT) == IFDIR) {
                   1664:                cgp->cg_cs.cs_ndir++;
                   1665:                fs->fs_cstotal.cs_ndir++;
                   1666:                fs->fs_cs(fs, cg).cs_ndir++;
                   1667:        }
                   1668:
                   1669:        bdwrite(bp);
                   1670:
                   1671: #ifdef FFS2
                   1672:        if (ibp != NULL)
                   1673:                bawrite(ibp);
                   1674: #endif
                   1675:
                   1676:        /* Return the allocated inode number */
                   1677:        return (cg * fs->fs_ipg + ipref);
                   1678: }
                   1679:
                   1680: /*
                   1681:  * Free a block or fragment.
                   1682:  *
                   1683:  * The specified block or fragment is placed back in the
                   1684:  * free map. If a fragment is deallocated, a possible
                   1685:  * block reassembly is checked.
                   1686:  */
                   1687: void
                   1688: ffs_blkfree(struct inode *ip, daddr64_t bno, long size)
                   1689: {
                   1690:        struct fs *fs;
                   1691:        struct cg *cgp;
                   1692:        struct buf *bp;
                   1693:        daddr_t blkno;
                   1694:        int i, error, cg, blk, frags, bbase;
                   1695:
                   1696:        fs = ip->i_fs;
                   1697:        if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
                   1698:            fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
                   1699:                printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n",
                   1700:                    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
                   1701:                panic("ffs_blkfree: bad size");
                   1702:        }
                   1703:        cg = dtog(fs, bno);
                   1704:        if ((u_int)bno >= fs->fs_size) {
                   1705:                printf("bad block %d, ino %u\n", bno, ip->i_number);
                   1706:                ffs_fserr(fs, DIP(ip, uid), "bad block");
                   1707:                return;
                   1708:        }
                   1709:        error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
                   1710:                (int)fs->fs_cgsize, NOCRED, &bp);
                   1711:        if (error) {
                   1712:                brelse(bp);
                   1713:                return;
                   1714:        }
                   1715:        cgp = (struct cg *)bp->b_data;
                   1716:        if (!cg_chkmagic(cgp)) {
                   1717:                brelse(bp);
                   1718:                return;
                   1719:        }
                   1720:
                   1721:        cgp->cg_ffs2_time = cgp->cg_time = time_second;
                   1722:
                   1723:        bno = dtogd(fs, bno);
                   1724:        if (size == fs->fs_bsize) {
                   1725:                blkno = fragstoblks(fs, bno);
                   1726:                if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) {
                   1727:                        printf("dev = 0x%x, block = %d, fs = %s\n",
                   1728:                            ip->i_dev, bno, fs->fs_fsmnt);
                   1729:                        panic("ffs_blkfree: freeing free block");
                   1730:                }
                   1731:                ffs_setblock(fs, cg_blksfree(cgp), blkno);
                   1732:                ffs_clusteracct(fs, cgp, blkno, 1);
                   1733:                cgp->cg_cs.cs_nbfree++;
                   1734:                fs->fs_cstotal.cs_nbfree++;
                   1735:                fs->fs_cs(fs, cg).cs_nbfree++;
                   1736:
                   1737:                if (fs->fs_magic != FS_UFS2_MAGIC) {
                   1738:                        i = cbtocylno(fs, bno);
                   1739:                        cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
                   1740:                        cg_blktot(cgp)[i]++;
                   1741:                }
                   1742:
                   1743:        } else {
                   1744:                bbase = bno - fragnum(fs, bno);
                   1745:                /*
                   1746:                 * decrement the counts associated with the old frags
                   1747:                 */
                   1748:                blk = blkmap(fs, cg_blksfree(cgp), bbase);
                   1749:                ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
                   1750:                /*
                   1751:                 * deallocate the fragment
                   1752:                 */
                   1753:                frags = numfrags(fs, size);
                   1754:                for (i = 0; i < frags; i++) {
                   1755:                        if (isset(cg_blksfree(cgp), bno + i)) {
                   1756:                                printf("dev = 0x%x, block = %d, fs = %s\n",
                   1757:                                    ip->i_dev, bno + i, fs->fs_fsmnt);
                   1758:                                panic("ffs_blkfree: freeing free frag");
                   1759:                        }
                   1760:                        setbit(cg_blksfree(cgp), bno + i);
                   1761:                }
                   1762:                cgp->cg_cs.cs_nffree += i;
                   1763:                fs->fs_cstotal.cs_nffree += i;
                   1764:                fs->fs_cs(fs, cg).cs_nffree += i;
                   1765:                /*
                   1766:                 * add back in counts associated with the new frags
                   1767:                 */
                   1768:                blk = blkmap(fs, cg_blksfree(cgp), bbase);
                   1769:                ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
                   1770:                /*
                   1771:                 * if a complete block has been reassembled, account for it
                   1772:                 */
                   1773:                blkno = fragstoblks(fs, bbase);
                   1774:                if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
                   1775:                        cgp->cg_cs.cs_nffree -= fs->fs_frag;
                   1776:                        fs->fs_cstotal.cs_nffree -= fs->fs_frag;
                   1777:                        fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
                   1778:                        ffs_clusteracct(fs, cgp, blkno, 1);
                   1779:                        cgp->cg_cs.cs_nbfree++;
                   1780:                        fs->fs_cstotal.cs_nbfree++;
                   1781:                        fs->fs_cs(fs, cg).cs_nbfree++;
                   1782:
                   1783:                        if (fs->fs_magic != FS_UFS2_MAGIC) {
                   1784:                                i = cbtocylno(fs, bbase);
                   1785:                                cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
                   1786:                                cg_blktot(cgp)[i]++;
                   1787:                        }
                   1788:                }
                   1789:        }
                   1790:        fs->fs_fmod = 1;
                   1791:        bdwrite(bp);
                   1792: }
                   1793:
                   1794: int
                   1795: ffs_inode_free(struct inode *pip, ino_t ino, mode_t mode)
                   1796: {
                   1797:        struct vnode *pvp = ITOV(pip);
                   1798:
                   1799:        if (DOINGSOFTDEP(pvp)) {
                   1800:                softdep_freefile(pvp, ino, mode);
                   1801:                return (0);
                   1802:        }
                   1803:
                   1804:        return (ffs_freefile(pip, ino, mode));
                   1805: }
                   1806:
                   1807: /*
                   1808:  * Do the actual free operation.
                   1809:  * The specified inode is placed back in the free map.
                   1810:  */
                   1811: int
                   1812: ffs_freefile(struct inode *pip, ino_t ino, mode_t mode)
                   1813: {
                   1814:        struct fs *fs;
                   1815:        struct cg *cgp;
                   1816:        struct buf *bp;
                   1817:        int error, cg;
                   1818:
                   1819:        fs = pip->i_fs;
                   1820:        if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
                   1821:                panic("ffs_freefile: range: dev = 0x%x, ino = %d, fs = %s",
                   1822:                    pip->i_dev, ino, fs->fs_fsmnt);
                   1823:        cg = ino_to_cg(fs, ino);
                   1824:        error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
                   1825:                (int)fs->fs_cgsize, NOCRED, &bp);
                   1826:        if (error) {
                   1827:                brelse(bp);
                   1828:                return (error);
                   1829:        }
                   1830:        cgp = (struct cg *)bp->b_data;
                   1831:        if (!cg_chkmagic(cgp)) {
                   1832:                brelse(bp);
                   1833:                return (0);
                   1834:        }
                   1835:
                   1836:        cgp->cg_ffs2_time = cgp->cg_time = time_second;
                   1837:
                   1838:        ino %= fs->fs_ipg;
                   1839:        if (isclr(cg_inosused(cgp), ino)) {
                   1840:                printf("dev = 0x%x, ino = %u, fs = %s\n",
                   1841:                    pip->i_dev, ino, fs->fs_fsmnt);
                   1842:                if (fs->fs_ronly == 0)
                   1843:                        panic("ffs_freefile: freeing free inode");
                   1844:        }
                   1845:        clrbit(cg_inosused(cgp), ino);
                   1846:        if (ino < cgp->cg_irotor)
                   1847:                cgp->cg_irotor = ino;
                   1848:        cgp->cg_cs.cs_nifree++;
                   1849:        fs->fs_cstotal.cs_nifree++;
                   1850:        fs->fs_cs(fs, cg).cs_nifree++;
                   1851:        if ((mode & IFMT) == IFDIR) {
                   1852:                cgp->cg_cs.cs_ndir--;
                   1853:                fs->fs_cstotal.cs_ndir--;
                   1854:                fs->fs_cs(fs, cg).cs_ndir--;
                   1855:        }
                   1856:        fs->fs_fmod = 1;
                   1857:        bdwrite(bp);
                   1858:        return (0);
                   1859: }
                   1860:
                   1861: #ifdef DIAGNOSTIC
                   1862: /*
                   1863:  * Verify allocation of a block or fragment. Returns true if block or
                   1864:  * fragment is allocated, false if it is free.
                   1865:  */
                   1866: int
                   1867: ffs_checkblk(struct inode *ip, daddr_t bno, long size)
                   1868: {
                   1869:        struct fs *fs;
                   1870:        struct cg *cgp;
                   1871:        struct buf *bp;
                   1872:        int i, error, frags, free;
                   1873:
                   1874:        fs = ip->i_fs;
                   1875:        if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
                   1876:                printf("bsize = %d, size = %ld, fs = %s\n",
                   1877:                    fs->fs_bsize, size, fs->fs_fsmnt);
                   1878:                panic("ffs_checkblk: bad size");
                   1879:        }
                   1880:        if ((u_int)bno >= fs->fs_size)
                   1881:                panic("ffs_checkblk: bad block %d", bno);
                   1882:        error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
                   1883:                (int)fs->fs_cgsize, NOCRED, &bp);
                   1884:        if (error) {
                   1885:                brelse(bp);
                   1886:                return (0);
                   1887:        }
                   1888:
                   1889:        cgp = (struct cg *)bp->b_data;
                   1890:        if (!cg_chkmagic(cgp)) {
                   1891:                brelse(bp);
                   1892:                return (0);
                   1893:        }
                   1894:
                   1895:        bno = dtogd(fs, bno);
                   1896:        if (size == fs->fs_bsize) {
                   1897:                free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
                   1898:        } else {
                   1899:                frags = numfrags(fs, size);
                   1900:                for (free = 0, i = 0; i < frags; i++)
                   1901:                        if (isset(cg_blksfree(cgp), bno + i))
                   1902:                                free++;
                   1903:                if (free != 0 && free != frags)
                   1904:                        panic("ffs_checkblk: partially free fragment");
                   1905:        }
                   1906:        brelse(bp);
                   1907:        return (!free);
                   1908: }
                   1909: #endif /* DIAGNOSTIC */
                   1910:
                   1911:
                   1912: /*
                   1913:  * Find a block of the specified size in the specified cylinder group.
                   1914:  *
                   1915:  * It is a panic if a request is made to find a block if none are
                   1916:  * available.
                   1917:  */
                   1918: daddr_t
                   1919: ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
                   1920: {
                   1921:        daddr_t bno;
                   1922:        int start, len, loc, i;
                   1923:        int blk, field, subfield, pos;
                   1924:
                   1925:        /*
                   1926:         * find the fragment by searching through the free block
                   1927:         * map for an appropriate bit pattern
                   1928:         */
                   1929:        if (bpref)
                   1930:                start = dtogd(fs, bpref) / NBBY;
                   1931:        else
                   1932:                start = cgp->cg_frotor / NBBY;
                   1933:        len = howmany(fs->fs_fpg, NBBY) - start;
                   1934:        loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
                   1935:                (u_char *)fragtbl[fs->fs_frag],
                   1936:                (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
                   1937:        if (loc == 0) {
                   1938:                len = start + 1;
                   1939:                start = 0;
                   1940:                loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
                   1941:                        (u_char *)fragtbl[fs->fs_frag],
                   1942:                        (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
                   1943:                if (loc == 0) {
                   1944:                        printf("start = %d, len = %d, fs = %s\n",
                   1945:                            start, len, fs->fs_fsmnt);
                   1946:                        panic("ffs_alloccg: map corrupted");
                   1947:                        /* NOTREACHED */
                   1948:                }
                   1949:        }
                   1950:        bno = (start + len - loc) * NBBY;
                   1951:        cgp->cg_frotor = bno;
                   1952:        /*
                   1953:         * found the byte in the map
                   1954:         * sift through the bits to find the selected frag
                   1955:         */
                   1956:        for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
                   1957:                blk = blkmap(fs, cg_blksfree(cgp), bno);
                   1958:                blk <<= 1;
                   1959:                field = around[allocsiz];
                   1960:                subfield = inside[allocsiz];
                   1961:                for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
                   1962:                        if ((blk & field) == subfield)
                   1963:                                return (bno + pos);
                   1964:                        field <<= 1;
                   1965:                        subfield <<= 1;
                   1966:                }
                   1967:        }
                   1968:        printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
                   1969:        panic("ffs_alloccg: block not in map");
                   1970:        return (-1);
                   1971: }
                   1972:
                   1973: /*
                   1974:  * Update the cluster map because of an allocation or free.
                   1975:  *
                   1976:  * Cnt == 1 means free; cnt == -1 means allocating.
                   1977:  */
                   1978: void
                   1979: ffs_clusteracct(struct fs *fs, struct cg *cgp, daddr_t blkno, int cnt)
                   1980: {
                   1981:        int32_t *sump;
                   1982:        int32_t *lp;
                   1983:        u_char *freemapp, *mapp;
                   1984:        int i, start, end, forw, back, map, bit;
                   1985:
                   1986:        if (fs->fs_contigsumsize <= 0)
                   1987:                return;
                   1988:        freemapp = cg_clustersfree(cgp);
                   1989:        sump = cg_clustersum(cgp);
                   1990:        /*
                   1991:         * Allocate or clear the actual block.
                   1992:         */
                   1993:        if (cnt > 0)
                   1994:                setbit(freemapp, blkno);
                   1995:        else
                   1996:                clrbit(freemapp, blkno);
                   1997:        /*
                   1998:         * Find the size of the cluster going forward.
                   1999:         */
                   2000:        start = blkno + 1;
                   2001:        end = start + fs->fs_contigsumsize;
                   2002:        if (end >= cgp->cg_nclusterblks)
                   2003:                end = cgp->cg_nclusterblks;
                   2004:        mapp = &freemapp[start / NBBY];
                   2005:        map = *mapp++;
                   2006:        bit = 1 << (start % NBBY);
                   2007:        for (i = start; i < end; i++) {
                   2008:                if ((map & bit) == 0)
                   2009:                        break;
                   2010:                if ((i & (NBBY - 1)) != (NBBY - 1)) {
                   2011:                        bit <<= 1;
                   2012:                } else {
                   2013:                        map = *mapp++;
                   2014:                        bit = 1;
                   2015:                }
                   2016:        }
                   2017:        forw = i - start;
                   2018:        /*
                   2019:         * Find the size of the cluster going backward.
                   2020:         */
                   2021:        start = blkno - 1;
                   2022:        end = start - fs->fs_contigsumsize;
                   2023:        if (end < 0)
                   2024:                end = -1;
                   2025:        mapp = &freemapp[start / NBBY];
                   2026:        map = *mapp--;
                   2027:        bit = 1 << (start % NBBY);
                   2028:        for (i = start; i > end; i--) {
                   2029:                if ((map & bit) == 0)
                   2030:                        break;
                   2031:                if ((i & (NBBY - 1)) != 0) {
                   2032:                        bit >>= 1;
                   2033:                } else {
                   2034:                        map = *mapp--;
                   2035:                        bit = 1 << (NBBY - 1);
                   2036:                }
                   2037:        }
                   2038:        back = start - i;
                   2039:        /*
                   2040:         * Account for old cluster and the possibly new forward and
                   2041:         * back clusters.
                   2042:         */
                   2043:        i = back + forw + 1;
                   2044:        if (i > fs->fs_contigsumsize)
                   2045:                i = fs->fs_contigsumsize;
                   2046:        sump[i] += cnt;
                   2047:        if (back > 0)
                   2048:                sump[back] -= cnt;
                   2049:        if (forw > 0)
                   2050:                sump[forw] -= cnt;
                   2051:        /*
                   2052:         * Update cluster summary information.
                   2053:         */
                   2054:        lp = &sump[fs->fs_contigsumsize];
                   2055:        for (i = fs->fs_contigsumsize; i > 0; i--)
                   2056:                if (*lp-- > 0)
                   2057:                        break;
                   2058:        fs->fs_maxcluster[cgp->cg_cgx] = i;
                   2059: }

CVSweb