[BACK]Return to rf_decluster.c CVS log [TXT][DIR] Up to [local] / sys / dev / raidframe

Annotation of sys/dev/raidframe/rf_decluster.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: rf_decluster.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */
        !             2: /*     $NetBSD: rf_decluster.c,v 1.5 2000/03/07 01:54:29 oster Exp $   */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1995 Carnegie-Mellon University.
        !             6:  * All rights reserved.
        !             7:  *
        !             8:  * Author: Mark Holland
        !             9:  *
        !            10:  * Permission to use, copy, modify and distribute this software and
        !            11:  * its documentation is hereby granted, provided that both the copyright
        !            12:  * notice and this permission notice appear in all copies of the
        !            13:  * software, derivative works or modified versions, and any portions
        !            14:  * thereof, and that both notices appear in supporting documentation.
        !            15:  *
        !            16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
        !            17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
        !            18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
        !            19:  *
        !            20:  * Carnegie Mellon requests users of this software to return to
        !            21:  *
        !            22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
        !            23:  *  School of Computer Science
        !            24:  *  Carnegie Mellon University
        !            25:  *  Pittsburgh PA 15213-3890
        !            26:  *
        !            27:  * any improvements or extensions that they make and grant Carnegie the
        !            28:  * rights to redistribute these changes.
        !            29:  */
        !            30:
        !            31: /*****************************************************************************
        !            32:  *
        !            33:  * rf_decluster.c -- Code related to the declustered layout.
        !            34:  *
        !            35:  * Created 10-21-92 (MCH)
        !            36:  *
        !            37:  * Nov 93:     Adding support for distributed sparing. This code is a little
        !            38:  *             complex; the basic layout used is as follows:
        !            39:  *             Let F = (v-1)/GCD(r,v-1). The spare space for each set of
        !            40:  *             F consecutive fulltables is grouped together and placed after
        !            41:  *             that set of tables.
        !            42:  *                     +-------------------------------+
        !            43:  *                     |         F fulltables          |
        !            44:  *                     |         Spare Space           |
        !            45:  *                     |         F fulltables          |
        !            46:  *                     |         Spare Space           |
        !            47:  *                     |             ...               |
        !            48:  *                     +-------------------------------+
        !            49:  *
        !            50:  *****************************************************************************/
        !            51:
        !            52: #include "rf_types.h"
        !            53: #include "rf_raid.h"
        !            54: #include "rf_raidframe.h"
        !            55: #include "rf_configure.h"
        !            56: #include "rf_decluster.h"
        !            57: #include "rf_debugMem.h"
        !            58: #include "rf_utils.h"
        !            59: #include "rf_alloclist.h"
        !            60: #include "rf_general.h"
        !            61: #include "rf_shutdown.h"
        !            62:
        !            63: extern int rf_copyback_in_progress;    /* Debug only. */
        !            64:
        !            65: /* Found in rf_kintf.c */
        !            66: int  rf_GetSpareTableFromDaemon(RF_SparetWait_t *);
        !            67:
        !            68: /* Configuration code. */
        !            69:
        !            70: int
        !            71: rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
        !            72:     RF_Config_t *cfgPtr)
        !            73: {
        !            74:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
        !            75:        int b, v, k, r, lambda; /* block design params */
        !            76:        int i, j;
        !            77:        RF_RowCol_t *first_avail_slot;
        !            78:        RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
        !            79:        RF_DeclusteredConfigInfo_t *info;
        !            80:        RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs,
        !            81:            numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
        !            82:        RF_StripeCount_t totSparePUsPerDisk;
        !            83:        RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
        !            84:        RF_SectorCount_t SpareSpaceInSUs;
        !            85:        char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
        !            86:        RF_StripeNum_t l, SUID;
        !            87:
        !            88:        SUID = l = 0;
        !            89:        numCompleteSpareRegionsPerDisk = 0;
        !            90:
        !            91:        /* 1. Create layout specific structure. */
        !            92:        RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t),
        !            93:            (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
        !            94:        if (info == NULL)
        !            95:                return (ENOMEM);
        !            96:        layoutPtr->layoutSpecificInfo = (void *) info;
        !            97:        info->SpareTable = NULL;
        !            98:
        !            99:        /* 2. Extract parameters from the config structure. */
        !           100:        if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
        !           101:                bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
        !           102:        }
        !           103:        cfgBuf += RF_SPAREMAP_NAME_LEN;
        !           104:
        !           105:        b = *((int *) cfgBuf);
        !           106:        cfgBuf += sizeof(int);
        !           107:        v = *((int *) cfgBuf);
        !           108:        cfgBuf += sizeof(int);
        !           109:        k = *((int *) cfgBuf);
        !           110:        cfgBuf += sizeof(int);
        !           111:        r = *((int *) cfgBuf);
        !           112:        cfgBuf += sizeof(int);
        !           113:        lambda = *((int *) cfgBuf);
        !           114:        cfgBuf += sizeof(int);
        !           115:        raidPtr->noRotate = *((int *) cfgBuf);
        !           116:        cfgBuf += sizeof(int);
        !           117:
        !           118:        /*
        !           119:         * The sparemaps are generated assuming that parity is rotated, so we
        !           120:         * issue a warning if both distributed sparing and no-rotate are on at
        !           121:         * the same time.
        !           122:         */
        !           123:        if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) &&
        !           124:            raidPtr->noRotate) {
        !           125:                RF_ERRORMSG("Warning:  distributed sparing specified without"
        !           126:                    " parity rotation.\n");
        !           127:        }
        !           128:        if (raidPtr->numCol != v) {
        !           129:                RF_ERRORMSG2("RAID: config error: table element count (%d)"
        !           130:                    " not equal to no. of cols (%d).\n", v, raidPtr->numCol);
        !           131:                return (EINVAL);
        !           132:        }
        !           133:        /* 3. Set up the values used in the mapping code. */
        !           134:        info->BlocksPerTable = b;
        !           135:        info->Lambda = lambda;
        !           136:        info->NumParityReps = info->groupSize = k;
        !           137:        /* b blks, k-1 SUs each. */
        !           138:        info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU;
        !           139:        info->SUsPerFullTable = k * info->SUsPerTable;  /* rot k times */
        !           140:        info->PUsPerBlock = k - 1;
        !           141:        info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
        !           142:        info->TableDepthInPUs = (b * k) / v;
        !           143:        /* k repetitions. */
        !           144:        info->FullTableDepthInPUs = info->TableDepthInPUs * k;
        !           145:
        !           146:        /* Used only in distributed sparing case. */
        !           147:        /* (v-1)/gcd fulltables. */
        !           148:        info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1);
        !           149:        info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
        !           150:        info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion /
        !           151:            (v - 1)) * layoutPtr->SUsPerPU;
        !           152:
        !           153:        /* Check to make sure the block design is sufficiently small. */
        !           154:        if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
        !           155:                if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU +
        !           156:                    info->SpareSpaceDepthPerRegionInSUs >
        !           157:                    layoutPtr->stripeUnitsPerDisk) {
        !           158:                        RF_ERRORMSG3("RAID: config error: Full Table depth"
        !           159:                            " (%d) + Spare Space (%d) larger than disk size"
        !           160:                            " (%d) (BD too big).\n",
        !           161:                            (int) info->FullTableDepthInPUs,
        !           162:                            (int) info->SpareSpaceDepthPerRegionInSUs,
        !           163:                            (int) layoutPtr->stripeUnitsPerDisk);
        !           164:                        return (EINVAL);
        !           165:                }
        !           166:        } else {
        !           167:                if (info->TableDepthInPUs * layoutPtr->SUsPerPU >
        !           168:                    layoutPtr->stripeUnitsPerDisk) {
        !           169:                        RF_ERRORMSG2("RAID: config error: Table depth (%d)"
        !           170:                            " larger than disk size (%d) (BD too big).\n",
        !           171:                            (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU),
        !           172:                            (int) layoutPtr->stripeUnitsPerDisk);
        !           173:                        return (EINVAL);
        !           174:                }
        !           175:        }
        !           176:
        !           177:
        !           178:        /*
        !           179:         * Compute the size of each disk, and the number of tables in the last
        !           180:         * fulltable (which need not be complete).
        !           181:         */
        !           182:        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
        !           183:
        !           184:                PUsPerDisk = layoutPtr->stripeUnitsPerDisk /
        !           185:                    layoutPtr->SUsPerPU;
        !           186:                spareRegionDepthInPUs =
        !           187:                    (info->TablesPerSpareRegion * info->TableDepthInPUs +
        !           188:                    (info->TablesPerSpareRegion * info->TableDepthInPUs) /
        !           189:                    (v - 1));
        !           190:                info->SpareRegionDepthInSUs =
        !           191:                    spareRegionDepthInPUs * layoutPtr->SUsPerPU;
        !           192:
        !           193:                numCompleteSpareRegionsPerDisk =
        !           194:                    PUsPerDisk / spareRegionDepthInPUs;
        !           195:                info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
        !           196:                extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
        !           197:
        !           198:                /*
        !           199:                 * Assume conservatively that we need the full amount of spare
        !           200:                 * space in one region in order to provide spares for the
        !           201:                 * partial spare region at the end of the array. We set "i"
        !           202:                 * to the number of tables in the partial spare region. This
        !           203:                 * may actually include some fulltables.
        !           204:                 */
        !           205:                extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs /
        !           206:                    layoutPtr->SUsPerPU);
        !           207:                if (extraPUsPerDisk <= 0)
        !           208:                        i = 0;
        !           209:                else
        !           210:                        i = extraPUsPerDisk / info->TableDepthInPUs;
        !           211:
        !           212:                complete_FT_count = raidPtr->numRow *
        !           213:                    (numCompleteSpareRegionsPerDisk *
        !           214:                    (info->TablesPerSpareRegion / k) + i / k);
        !           215:                info->FullTableLimitSUID =
        !           216:                    complete_FT_count * info->SUsPerFullTable;
        !           217:                info->ExtraTablesPerDisk = i % k;
        !           218:
        !           219:                /*
        !           220:                 * Note that in the last spare region, the spare space is
        !           221:                 * complete even though data/parity space is not.
        !           222:                 */
        !           223:                totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) *
        !           224:                    (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
        !           225:                info->TotSparePUsPerDisk = totSparePUsPerDisk;
        !           226:
        !           227:                layoutPtr->stripeUnitsPerDisk =
        !           228:                    ((complete_FT_count / raidPtr->numRow) *
        !           229:                    info->FullTableDepthInPUs + /* data & parity space */
        !           230:                    info->ExtraTablesPerDisk * info->TableDepthInPUs +
        !           231:                    totSparePUsPerDisk          /* spare space */
        !           232:                    ) * layoutPtr->SUsPerPU;
        !           233:                layoutPtr->dataStripeUnitsPerDisk =
        !           234:                    (complete_FT_count * info->FullTableDepthInPUs +
        !           235:                    info->ExtraTablesPerDisk * info->TableDepthInPUs) *
        !           236:                    layoutPtr->SUsPerPU * (k - 1) / k;
        !           237:
        !           238:        } else {
        !           239:                /*
        !           240:                 * Non-dist spare case:  force each disk to contain an
        !           241:                 * integral number of tables.
        !           242:                 */
        !           243:                layoutPtr->stripeUnitsPerDisk /=
        !           244:                    (info->TableDepthInPUs * layoutPtr->SUsPerPU);
        !           245:                layoutPtr->stripeUnitsPerDisk *=
        !           246:                    (info->TableDepthInPUs * layoutPtr->SUsPerPU);
        !           247:
        !           248:                /*
        !           249:                 * Compute the number of tables in the last fulltable, which
        !           250:                 * need not be complete.
        !           251:                 */
        !           252:                complete_FT_count =
        !           253:                    ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) /
        !           254:                    info->FullTableDepthInPUs) * raidPtr->numRow;
        !           255:
        !           256:                info->FullTableLimitSUID =
        !           257:                    complete_FT_count * info->SUsPerFullTable;
        !           258:                info->ExtraTablesPerDisk =
        !           259:                    ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) /
        !           260:                    info->TableDepthInPUs) % k;
        !           261:        }
        !           262:
        !           263:        raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk *
        !           264:                    layoutPtr->sectorsPerStripeUnit;
        !           265:
        !           266:        /*
        !           267:         * Find the disk offset of the stripe unit where the last fulltable
        !           268:         * starts.
        !           269:         */
        !           270:        numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
        !           271:        diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk *
        !           272:            info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
        !           273:        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
        !           274:                SpareSpaceInSUs = numCompleteSpareRegionsPerDisk *
        !           275:                    info->SpareSpaceDepthPerRegionInSUs;
        !           276:                diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
        !           277:                info->DiskOffsetOfLastSpareSpaceChunkInSUs =
        !           278:                    diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk *
        !           279:                    info->TableDepthInPUs * layoutPtr->SUsPerPU;
        !           280:        }
        !           281:        info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
        !           282:        info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
        !           283:
        !           284:        /* 4. Create and initialize the lookup tables. */
        !           285:        info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
        !           286:        if (info->LayoutTable == NULL)
        !           287:                return (ENOMEM);
        !           288:        info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
        !           289:        if (info->OffsetTable == NULL)
        !           290:                return (ENOMEM);
        !           291:        info->BlockTable = rf_make_2d_array(info->TableDepthInPUs *
        !           292:            layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
        !           293:        if (info->BlockTable == NULL)
        !           294:                return (ENOMEM);
        !           295:
        !           296:        first_avail_slot = rf_make_1d_array(v, NULL);
        !           297:        if (first_avail_slot == NULL)
        !           298:                return (ENOMEM);
        !           299:
        !           300:        for (i = 0; i < b; i++)
        !           301:                for (j = 0; j < k; j++)
        !           302:                        info->LayoutTable[i][j] = *cfgBuf++;
        !           303:
        !           304:        /* Initialize the offset table. */
        !           305:        for (i = 0; i < b; i++)
        !           306:                for (j = 0; j < k; j++) {
        !           307:                        info->OffsetTable[i][j] =
        !           308:                            first_avail_slot[info->LayoutTable[i][j]];
        !           309:                        first_avail_slot[info->LayoutTable[i][j]]++;
        !           310:                }
        !           311:
        !           312:        /* Initialize the block table. */
        !           313:        for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) {
        !           314:                for (i = 0; i < b; i++) {
        !           315:                        for (j = 0; j < k; j++) {
        !           316:                                info->BlockTable[(info->OffsetTable[i][j] *
        !           317:                                    layoutPtr->SUsPerPU) + l]
        !           318:                                    [info->LayoutTable[i][j]] = SUID;
        !           319:                        }
        !           320:                        SUID++;
        !           321:                }
        !           322:        }
        !           323:
        !           324:        rf_free_1d_array(first_avail_slot, v);
        !           325:
        !           326:        /* 5. Set up the remaining redundant-but-useful parameters. */
        !           327:
        !           328:        raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow *
        !           329:            info->ExtraTablesPerDisk) * info->SUsPerTable *
        !           330:            layoutPtr->sectorsPerStripeUnit;
        !           331:        layoutPtr->numStripe = (raidPtr->totalSectors /
        !           332:            layoutPtr->sectorsPerStripeUnit) / (k - 1);
        !           333:
        !           334:        /*
        !           335:         * Strange evaluation order below to try and minimize overflow
        !           336:         * problems.
        !           337:         */
        !           338:
        !           339:        layoutPtr->dataSectorsPerStripe =
        !           340:            (k - 1) * layoutPtr->sectorsPerStripeUnit;
        !           341:        layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
        !           342:            raidPtr->logBytesPerSector;
        !           343:        layoutPtr->numDataCol = k - 1;
        !           344:        layoutPtr->numParityCol = 1;
        !           345:
        !           346:        return (0);
        !           347: }
        !           348:
        !           349: /* Declustering with distributed sparing. */
        !           350: void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
        !           351: void
        !           352: rf_ShutdownDeclusteredDS(RF_ThreadArg_t arg)
        !           353: {
        !           354:        RF_DeclusteredConfigInfo_t *info;
        !           355:        RF_Raid_t *raidPtr;
        !           356:
        !           357:        raidPtr = (RF_Raid_t *) arg;
        !           358:        info =
        !           359:            (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
        !           360:        if (info->SpareTable)
        !           361:                rf_FreeSpareTable(raidPtr);
        !           362: }
        !           363:
        !           364: int
        !           365: rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
        !           366:     RF_Config_t *cfgPtr)
        !           367: {
        !           368:        int rc;
        !           369:
        !           370:        rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
        !           371:        if (rc)
        !           372:                return (rc);
        !           373:
        !           374:        rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
        !           375:        if (rc) {
        !           376:                RF_ERRORMSG1("Got %d adding shutdown event for"
        !           377:                    " DeclusteredDS.\n", rc);
        !           378:                rf_ShutdownDeclusteredDS(raidPtr);
        !           379:                return (rc);
        !           380:        }
        !           381:
        !           382:        return (0);
        !           383: }
        !           384:
        !           385: void
        !           386: rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
        !           387:     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
        !           388: {
        !           389:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
        !           390:        RF_DeclusteredConfigInfo_t *info =
        !           391:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
        !           392:        RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
        !           393:        RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
        !           394:        RF_StripeNum_t BlockID, BlockOffset, RepIndex;
        !           395:        RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
        !           396:        RF_StripeCount_t fulltable_depth =
        !           397:            info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
        !           398:        RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
        !           399:
        !           400:        rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
        !           401:            &fulltable_depth, &base_suid);
        !           402:
        !           403:        /* Fulltable ID within array (across rows). */
        !           404:        FullTableID = SUID / sus_per_fulltable;
        !           405:        if (raidPtr->numRow == 1)
        !           406:                *row = 0;       /* Avoid a mod and a div in the common case. */
        !           407:        else {
        !           408:                *row = FullTableID % raidPtr->numRow;
        !           409:                /* Convert to fulltable ID on this disk. */
        !           410:                FullTableID /= raidPtr->numRow;
        !           411:        }
        !           412:        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
        !           413:                SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
        !           414:                SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
        !           415:        }
        !           416:        FullTableOffset = SUID % sus_per_fulltable;
        !           417:        TableID = FullTableOffset / info->SUsPerTable;
        !           418:        TableOffset = FullTableOffset - TableID * info->SUsPerTable;
        !           419:        BlockID = TableOffset / info->PUsPerBlock;
        !           420:        BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
        !           421:        BlockID %= info->BlocksPerTable;
        !           422:        RepIndex = info->PUsPerBlock - TableID;
        !           423:        if (!raidPtr->noRotate)
        !           424:                BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
        !           425:        *col = info->LayoutTable[BlockID][BlockOffset];
        !           426:
        !           427:        /* Remap to distributed spare space if indicated. */
        !           428:        if (remap) {
        !           429:                RF_ASSERT(raidPtr->Disks[*row][*col].status ==
        !           430:                    rf_ds_reconstructing ||
        !           431:                    raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
        !           432:                    (rf_copyback_in_progress &&
        !           433:                    raidPtr->Disks[*row][*col].status == rf_ds_optimal));
        !           434:                rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID,
        !           435:                    TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col,
        !           436:                    &outSU);
        !           437:        } else {
        !           438:
        !           439:                outSU = base_suid;
        !           440:                outSU += FullTableID * fulltable_depth;
        !           441:                        /* Offset to start of FT. */
        !           442:                outSU += SpareSpace;
        !           443:                        /* Skip rsvd spare space. */
        !           444:                outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
        !           445:                        /* Offset to start of table. */
        !           446:                outSU += info->OffsetTable[BlockID][BlockOffset] *
        !           447:                    layoutPtr->SUsPerPU;
        !           448:                        /* Offset to the PU. */
        !           449:        }
        !           450:        outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
        !           451:                /* offs to the SU within a PU */
        !           452:
        !           453:        /*
        !           454:         * Convert SUs to sectors, and, if not aligned to SU boundary, add in
        !           455:         * offset to sector.
        !           456:         */
        !           457:        *diskSector = outSU * layoutPtr->sectorsPerStripeUnit +
        !           458:            (raidSector % layoutPtr->sectorsPerStripeUnit);
        !           459:
        !           460:        RF_ASSERT(*col != -1);
        !           461: }
        !           462:
        !           463: /*
        !           464:  * Prototyping this inexplicably causes the compile of the layout table
        !           465:  * (rf_layout.c) to fail.
        !           466:  */
        !           467: void
        !           468: rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
        !           469:     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
        !           470: {
        !           471:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
        !           472:        RF_DeclusteredConfigInfo_t *info =
        !           473:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
        !           474:        RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
        !           475:        RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
        !           476:        RF_StripeNum_t BlockID, BlockOffset, RepIndex;
        !           477:        RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
        !           478:        RF_StripeCount_t fulltable_depth =
        !           479:            info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
        !           480:        RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
        !           481:
        !           482:        rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
        !           483:            &fulltable_depth, &base_suid);
        !           484:
        !           485:        /* Compute row & (possibly) spare space exactly as before. */
        !           486:        FullTableID = SUID / sus_per_fulltable;
        !           487:        if (raidPtr->numRow == 1)
        !           488:                *row = 0;       /* Avoid a mod and a div in the common case. */
        !           489:        else {
        !           490:                *row = FullTableID % raidPtr->numRow;
        !           491:                /* Convert to fulltable ID on this disk. */
        !           492:                FullTableID /= raidPtr->numRow;
        !           493:        }
        !           494:        if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
        !           495:                SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
        !           496:                SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
        !           497:        }
        !           498:        /* Compute BlockID and RepIndex exactly as before. */
        !           499:        FullTableOffset = SUID % sus_per_fulltable;
        !           500:        TableID = FullTableOffset / info->SUsPerTable;
        !           501:        TableOffset = FullTableOffset - TableID * info->SUsPerTable;
        !           502:        /*TableOffset   = FullTableOffset % info->SUsPerTable;*/
        !           503:        /*BlockID       = (TableOffset / info->PUsPerBlock) %
        !           504:         *info->BlocksPerTable;*/
        !           505:        BlockID = TableOffset / info->PUsPerBlock;
        !           506:        /*BlockOffset   = TableOffset % info->PUsPerBlock;*/
        !           507:        BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
        !           508:        BlockID %= info->BlocksPerTable;
        !           509:
        !           510:        /* The parity block is in the position indicated by RepIndex. */
        !           511:        RepIndex = (raidPtr->noRotate) ?
        !           512:            info->PUsPerBlock : info->PUsPerBlock - TableID;
        !           513:        *col = info->LayoutTable[BlockID][RepIndex];
        !           514:
        !           515:        if (remap) {
        !           516:                RF_ASSERT(raidPtr->Disks[*row][*col].status ==
        !           517:                    rf_ds_reconstructing ||
        !           518:                    raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
        !           519:                    (rf_copyback_in_progress &&
        !           520:                    raidPtr->Disks[*row][*col].status == rf_ds_optimal));
        !           521:                rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID,
        !           522:                    TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col,
        !           523:                    &outSU);
        !           524:        } else {
        !           525:
        !           526:                /*
        !           527:                 * Compute sector as before, except use RepIndex instead of
        !           528:                 * BlockOffset.
        !           529:                 */
        !           530:                outSU = base_suid;
        !           531:                outSU += FullTableID * fulltable_depth;
        !           532:                outSU += SpareSpace;    /* skip rsvd spare space */
        !           533:                outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
        !           534:                outSU += info->OffsetTable[BlockID][RepIndex] *
        !           535:                    layoutPtr->SUsPerPU;
        !           536:        }
        !           537:
        !           538:        outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
        !           539:        *diskSector = outSU * layoutPtr->sectorsPerStripeUnit +
        !           540:            (raidSector % layoutPtr->sectorsPerStripeUnit);
        !           541:
        !           542:        RF_ASSERT(*col != -1);
        !           543: }
        !           544:
        !           545: /*
        !           546:  * Return an array of ints identifying the disks that comprise the stripe
        !           547:  * containing the indicated address.
        !           548:  * The caller must _never_ attempt to modify this array.
        !           549:  */
        !           550: void
        !           551: rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
        !           552:     RF_RowCol_t **diskids, RF_RowCol_t *outRow)
        !           553: {
        !           554:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
        !           555:        RF_DeclusteredConfigInfo_t *info =
        !           556:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
        !           557:        RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
        !           558:        RF_StripeCount_t fulltable_depth =
        !           559:            info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
        !           560:        RF_StripeNum_t base_suid = 0;
        !           561:        RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
        !           562:        RF_StripeNum_t stripeID, FullTableID;
        !           563:        int tableOffset;
        !           564:
        !           565:        rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
        !           566:            &fulltable_depth, &base_suid);
        !           567:        /* Fulltable ID within array (across rows). */
        !           568:        FullTableID = SUID / sus_per_fulltable;
        !           569:        *outRow = FullTableID % raidPtr->numRow;
        !           570:        /* Find stripe offset into array. */
        !           571:        stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID);
        !           572:        /* Find offset into block design table. */
        !           573:        tableOffset = (stripeID % info->BlocksPerTable);
        !           574:        *diskids = info->LayoutTable[tableOffset];
        !           575: }
        !           576:
        !           577: /*
        !           578:  * This returns the default head-separation limit, measured in
        !           579:  * "required units for reconstruction". Each time a disk fetches
        !           580:  * a unit, it bumps a counter. The head-sep code prohibits any disk
        !           581:  * from getting more than headSepLimit counter values ahead of any
        !           582:  * other.
        !           583:  *
        !           584:  * We assume here that the number of floating recon buffers is already
        !           585:  * set. There are r stripes to be reconstructed in each table, and so
        !           586:  * if we have a total of B buffers, we can have at most B/r tables
        !           587:  * under recon at any one time. In each table, lambda units are required
        !           588:  * from each disk, so given B buffers, the head sep limit has to be
        !           589:  * (lambda*B)/r units. We subtract one to avoid weird boundary cases.
        !           590:  *
        !           591:  * For example, suppose we are given 50 buffers, r=19, and lambda=4 as in
        !           592:  * the 20.5 design. There are 19 stripes/table to be reconstructed, so
        !           593:  * we can have 50/19 tables concurrently under reconstruction, which means
        !           594:  * we can allow the fastest disk to get 50/19 tables ahead of the slower
        !           595:  * disk. There are lambda "required units" for each disk, so the fastest
        !           596:  * disk can get 4*50/19 = 10 counter values ahead of the slowest.
        !           597:  *
        !           598:  * If numBufsToAccumulate is not 1, we need to limit the head sep further
        !           599:  * because multiple bufs will be required for each stripe under recon.
        !           600:  */
        !           601: RF_HeadSepLimit_t
        !           602: rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr)
        !           603: {
        !           604:        RF_DeclusteredConfigInfo_t *info =
        !           605:            (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
        !           606:
        !           607:        return (info->Lambda * raidPtr->numFloatingReconBufs /
        !           608:            info->TableDepthInPUs / rf_numBufsToAccumulate);
        !           609: }
        !           610:
        !           611: /*
        !           612:  * Return the default number of recon buffers to use. The value
        !           613:  * is somewhat arbitrary...  It's intended to be large enough to
        !           614:  * allow for a reasonably large head-sep limit, but small enough
        !           615:  * that you don't use up all your system memory with buffers.
        !           616:  */
        !           617: int
        !           618: rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
        !           619: {
        !           620:        return (100 * rf_numBufsToAccumulate);
        !           621: }
        !           622:
        !           623: /*
        !           624:  * Sectors in the last fulltable of the array need to be handled
        !           625:  * specially since this fulltable can be incomplete. This function
        !           626:  * changes the values of certain params to handle this.
        !           627:  *
        !           628:  * The idea here is that MapSector et. al. figure out which disk the
        !           629:  * addressed unit lives on by computing the modulos of the unit number
        !           630:  * with the number of units per fulltable, table, etc.  In the last
        !           631:  * fulltable, there are fewer units per fulltable, so we need to adjust
        !           632:  * the number of user data units per fulltable to reflect this.
        !           633:  *
        !           634:  * So, we (1) convert the fulltable size and depth parameters to
        !           635:  * the size of the partial fulltable at the end, (2) compute the
        !           636:  * disk sector offset where this fulltable starts, and (3) convert
        !           637:  * the users stripe unit number from an offset into the array to
        !           638:  * an offset into the last fulltable.
        !           639:  */
        !           640: void
        !           641: rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t *SUID,
        !           642:     RF_StripeCount_t *sus_per_fulltable, RF_StripeCount_t *fulltable_depth,
        !           643:     RF_StripeNum_t *base_suid)
        !           644: {
        !           645:        RF_DeclusteredConfigInfo_t *info =
        !           646:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
        !           647:
        !           648:        if (*SUID >= info->FullTableLimitSUID) {
        !           649:                /* New full table size is size of last full table on disk. */
        !           650:                *sus_per_fulltable =
        !           651:                    info->ExtraTablesPerDisk * info->SUsPerTable;
        !           652:
        !           653:                /* New full table depth is corresponding depth. */
        !           654:                *fulltable_depth =
        !           655:                    info->ExtraTablesPerDisk * info->TableDepthInPUs *
        !           656:                    layoutPtr->SUsPerPU;
        !           657:
        !           658:                /* Set up the new base offset. */
        !           659:                *base_suid = info->DiskOffsetOfLastFullTableInSUs;
        !           660:
        !           661:                /*
        !           662:                 * Convert user's array address to an offset into the last
        !           663:                 * fulltable.
        !           664:                 */
        !           665:                *SUID -= info->FullTableLimitSUID;
        !           666:        }
        !           667: }
        !           668:
        !           669: /*
        !           670:  * Map a stripe ID to a parity stripe ID.
        !           671:  * See comment above RaidAddressToParityStripeID in layout.c.
        !           672:  */
        !           673: void
        !           674: rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
        !           675:     RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
        !           676: {
        !           677:        RF_DeclusteredConfigInfo_t *info;
        !           678:
        !           679:        info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
        !           680:
        !           681:        *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) *
        !           682:            info->BlocksPerTable + (stripeID % info->BlocksPerTable);
        !           683:        *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) /
        !           684:            info->BlocksPerTable;
        !           685:        RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU);
        !           686: }
        !           687:
        !           688: /*
        !           689:  * Called from MapSector and MapParity to retarget an access at the spare unit.
        !           690:  * Modifies the "col" and "outSU" parameters only.
        !           691:  */
        !           692: void
        !           693: rf_remap_to_spare_space(RF_RaidLayout_t *layoutPtr,
        !           694:     RF_DeclusteredConfigInfo_t *info, RF_RowCol_t row,
        !           695:     RF_StripeNum_t FullTableID, RF_StripeNum_t TableID, RF_SectorNum_t BlockID,
        !           696:     RF_StripeNum_t base_suid, RF_StripeNum_t SpareRegion, RF_RowCol_t *outCol,
        !           697:     RF_StripeNum_t *outSU)
        !           698: {
        !           699:        RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion,
        !           700:            lastSROffset, which_ft;
        !           701:
        !           702:        /*
        !           703:         * Note that FullTableID and hence SpareRegion may have gotten
        !           704:         * tweaked by rf_decluster_adjust_params. We detect this by
        !           705:         * noticing that base_suid is not 0.
        !           706:         */
        !           707:        if (base_suid == 0) {
        !           708:                ftID = FullTableID;
        !           709:        } else {
        !           710:                /*
        !           711:                 * There may be > 1.0 full tables in the last (i.e. partial)
        !           712:                 * spare region. Find out which of these we are in.
        !           713:                 */
        !           714:                lastSROffset = info->NumCompleteSRs *
        !           715:                    info->SpareRegionDepthInSUs;
        !           716:                which_ft =
        !           717:                    (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) /
        !           718:                    (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
        !           719:
        !           720:                /* Compute the actual full table ID. */
        !           721:                ftID = info->DiskOffsetOfLastFullTableInSUs /
        !           722:                    (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) +
        !           723:                    which_ft;
        !           724:                SpareRegion = info->NumCompleteSRs;
        !           725:        }
        !           726:        TableInSpareRegion = (ftID * info->NumParityReps + TableID) %
        !           727:            info->TablesPerSpareRegion;
        !           728:
        !           729:        *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
        !           730:        RF_ASSERT(*outCol != -1);
        !           731:
        !           732:        spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
        !           733:            info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk *
        !           734:            info->TableDepthInPUs * layoutPtr->SUsPerPU :
        !           735:            (SpareRegion + 1) * info->SpareRegionDepthInSUs -
        !           736:            info->SpareSpaceDepthPerRegionInSUs;
        !           737:        *outSU = spareTableStartSU +
        !           738:            info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
        !           739:        if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
        !           740:                printf("rf_remap_to_spare_space: invalid remapped disk SU"
        !           741:                    " offset %ld.\n", (long) *outSU);
        !           742:        }
        !           743: }
        !           744:
        !           745: int
        !           746: rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol)
        !           747: {
        !           748:        RF_DeclusteredConfigInfo_t *info =
        !           749:            (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
        !           750:        RF_SparetWait_t *req;
        !           751:        int retcode;
        !           752:
        !           753:        RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
        !           754:        req->C = raidPtr->numCol;
        !           755:        req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
        !           756:        req->fcol = fcol;
        !           757:        req->SUsPerPU = raidPtr->Layout.SUsPerPU;
        !           758:        req->TablesPerSpareRegion = info->TablesPerSpareRegion;
        !           759:        req->BlocksPerTable = info->BlocksPerTable;
        !           760:        req->TableDepthInPUs = info->TableDepthInPUs;
        !           761:        req->SpareSpaceDepthPerRegionInSUs =
        !           762:            info->SpareSpaceDepthPerRegionInSUs;
        !           763:
        !           764:        retcode = rf_GetSpareTableFromDaemon(req);
        !           765:        RF_ASSERT(!retcode);
        !           766:        /* XXX -- Fix this to recover gracefully. -- XXX */
        !           767:
        !           768:        return (retcode);
        !           769: }
        !           770:
        !           771: /*
        !           772:  * Invoked via ioctl to install a spare table in the kernel.
        !           773:  */
        !           774: int
        !           775: rf_SetSpareTable(RF_Raid_t *raidPtr, void *data)
        !           776: {
        !           777:        RF_DeclusteredConfigInfo_t *info =
        !           778:            (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
        !           779:        RF_SpareTableEntry_t **ptrs;
        !           780:        int i, retcode;
        !           781:
        !           782:        /*
        !           783:         * What we need to copyin is a 2-d array, so first copyin the user
        !           784:         * pointers to the rows in the table.
        !           785:         */
        !           786:        RF_Malloc(ptrs, info->TablesPerSpareRegion *
        !           787:            sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
        !           788:        retcode = copyin((caddr_t) data, (caddr_t) ptrs,
        !           789:            info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
        !           790:
        !           791:        if (retcode)
        !           792:                return (retcode);
        !           793:
        !           794:        /* Now allocate kernel space for the row pointers. */
        !           795:        RF_Malloc(info->SpareTable, info->TablesPerSpareRegion *
        !           796:            sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
        !           797:
        !           798:        /*
        !           799:         * Now allocate kernel space for each row in the table, and copy it in
        !           800:         * from user space. */
        !           801:        for (i = 0; i < info->TablesPerSpareRegion; i++) {
        !           802:                RF_Malloc(info->SpareTable[i], info->BlocksPerTable *
        !           803:                    sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
        !           804:                retcode = copyin(ptrs[i], info->SpareTable[i],
        !           805:                    info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
        !           806:                if (retcode) {
        !           807:                        /* Blow off the memory we have allocated. */
        !           808:                        info->SpareTable = NULL;
        !           809:                        return (retcode);
        !           810:                }
        !           811:        }
        !           812:
        !           813:        /* Free up the temporary array we used. */
        !           814:        RF_Free(ptrs, info->TablesPerSpareRegion *
        !           815:            sizeof(RF_SpareTableEntry_t *));
        !           816:
        !           817:        return (0);
        !           818: }
        !           819:
        !           820: RF_ReconUnitCount_t
        !           821: rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr)
        !           822: {
        !           823:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        !           824:
        !           825:        return (((RF_DeclusteredConfigInfo_t *)
        !           826:            layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk);
        !           827: }
        !           828:
        !           829:
        !           830: void
        !           831: rf_FreeSpareTable(RF_Raid_t *raidPtr)
        !           832: {
        !           833:        long i;
        !           834:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        !           835:        RF_DeclusteredConfigInfo_t *info =
        !           836:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
        !           837:        RF_SpareTableEntry_t **table = info->SpareTable;
        !           838:
        !           839:        for (i = 0; i < info->TablesPerSpareRegion; i++) {
        !           840:                RF_Free(table[i], info->BlocksPerTable *
        !           841:                    sizeof(RF_SpareTableEntry_t));
        !           842:        }
        !           843:        RF_Free(table, info->TablesPerSpareRegion *
        !           844:            sizeof(RF_SpareTableEntry_t *));
        !           845:        info->SpareTable = (RF_SpareTableEntry_t **) NULL;
        !           846: }

CVSweb