[BACK]Return to rf_decluster.c CVS log [TXT][DIR] Up to [local] / sys / dev / raidframe

Annotation of sys/dev/raidframe/rf_decluster.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: rf_decluster.c,v 1.5 2002/12/16 07:01:03 tdeval Exp $ */
                      2: /*     $NetBSD: rf_decluster.c,v 1.5 2000/03/07 01:54:29 oster Exp $   */
                      3:
                      4: /*
                      5:  * Copyright (c) 1995 Carnegie-Mellon University.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Author: Mark Holland
                      9:  *
                     10:  * Permission to use, copy, modify and distribute this software and
                     11:  * its documentation is hereby granted, provided that both the copyright
                     12:  * notice and this permission notice appear in all copies of the
                     13:  * software, derivative works or modified versions, and any portions
                     14:  * thereof, and that both notices appear in supporting documentation.
                     15:  *
                     16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     19:  *
                     20:  * Carnegie Mellon requests users of this software to return to
                     21:  *
                     22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     23:  *  School of Computer Science
                     24:  *  Carnegie Mellon University
                     25:  *  Pittsburgh PA 15213-3890
                     26:  *
                     27:  * any improvements or extensions that they make and grant Carnegie the
                     28:  * rights to redistribute these changes.
                     29:  */
                     30:
                     31: /*****************************************************************************
                     32:  *
                     33:  * rf_decluster.c -- Code related to the declustered layout.
                     34:  *
                     35:  * Created 10-21-92 (MCH)
                     36:  *
                     37:  * Nov 93:     Adding support for distributed sparing. This code is a little
                     38:  *             complex; the basic layout used is as follows:
                     39:  *             Let F = (v-1)/GCD(r,v-1). The spare space for each set of
                     40:  *             F consecutive fulltables is grouped together and placed after
                     41:  *             that set of tables.
                     42:  *                     +-------------------------------+
                     43:  *                     |         F fulltables          |
                     44:  *                     |         Spare Space           |
                     45:  *                     |         F fulltables          |
                     46:  *                     |         Spare Space           |
                     47:  *                     |             ...               |
                     48:  *                     +-------------------------------+
                     49:  *
                     50:  *****************************************************************************/
                     51:
                     52: #include "rf_types.h"
                     53: #include "rf_raid.h"
                     54: #include "rf_raidframe.h"
                     55: #include "rf_configure.h"
                     56: #include "rf_decluster.h"
                     57: #include "rf_debugMem.h"
                     58: #include "rf_utils.h"
                     59: #include "rf_alloclist.h"
                     60: #include "rf_general.h"
                     61: #include "rf_shutdown.h"
                     62:
                     63: extern int rf_copyback_in_progress;    /* Debug only. */
                     64:
                     65: /* Found in rf_kintf.c */
                     66: int  rf_GetSpareTableFromDaemon(RF_SparetWait_t *);
                     67:
                     68: /* Configuration code. */
                     69:
                     70: int
                     71: rf_ConfigureDeclustered(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                     72:     RF_Config_t *cfgPtr)
                     73: {
                     74:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
                     75:        int b, v, k, r, lambda; /* block design params */
                     76:        int i, j;
                     77:        RF_RowCol_t *first_avail_slot;
                     78:        RF_StripeCount_t complete_FT_count, numCompleteFullTablesPerDisk;
                     79:        RF_DeclusteredConfigInfo_t *info;
                     80:        RF_StripeCount_t PUsPerDisk, spareRegionDepthInPUs,
                     81:            numCompleteSpareRegionsPerDisk, extraPUsPerDisk;
                     82:        RF_StripeCount_t totSparePUsPerDisk;
                     83:        RF_SectorNum_t diskOffsetOfLastFullTableInSUs;
                     84:        RF_SectorCount_t SpareSpaceInSUs;
                     85:        char *cfgBuf = (char *) (cfgPtr->layoutSpecific);
                     86:        RF_StripeNum_t l, SUID;
                     87:
                     88:        SUID = l = 0;
                     89:        numCompleteSpareRegionsPerDisk = 0;
                     90:
                     91:        /* 1. Create layout specific structure. */
                     92:        RF_MallocAndAdd(info, sizeof(RF_DeclusteredConfigInfo_t),
                     93:            (RF_DeclusteredConfigInfo_t *), raidPtr->cleanupList);
                     94:        if (info == NULL)
                     95:                return (ENOMEM);
                     96:        layoutPtr->layoutSpecificInfo = (void *) info;
                     97:        info->SpareTable = NULL;
                     98:
                     99:        /* 2. Extract parameters from the config structure. */
                    100:        if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
                    101:                bcopy(cfgBuf, info->sparemap_fname, RF_SPAREMAP_NAME_LEN);
                    102:        }
                    103:        cfgBuf += RF_SPAREMAP_NAME_LEN;
                    104:
                    105:        b = *((int *) cfgBuf);
                    106:        cfgBuf += sizeof(int);
                    107:        v = *((int *) cfgBuf);
                    108:        cfgBuf += sizeof(int);
                    109:        k = *((int *) cfgBuf);
                    110:        cfgBuf += sizeof(int);
                    111:        r = *((int *) cfgBuf);
                    112:        cfgBuf += sizeof(int);
                    113:        lambda = *((int *) cfgBuf);
                    114:        cfgBuf += sizeof(int);
                    115:        raidPtr->noRotate = *((int *) cfgBuf);
                    116:        cfgBuf += sizeof(int);
                    117:
                    118:        /*
                    119:         * The sparemaps are generated assuming that parity is rotated, so we
                    120:         * issue a warning if both distributed sparing and no-rotate are on at
                    121:         * the same time.
                    122:         */
                    123:        if ((layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) &&
                    124:            raidPtr->noRotate) {
                    125:                RF_ERRORMSG("Warning:  distributed sparing specified without"
                    126:                    " parity rotation.\n");
                    127:        }
                    128:        if (raidPtr->numCol != v) {
                    129:                RF_ERRORMSG2("RAID: config error: table element count (%d)"
                    130:                    " not equal to no. of cols (%d).\n", v, raidPtr->numCol);
                    131:                return (EINVAL);
                    132:        }
                    133:        /* 3. Set up the values used in the mapping code. */
                    134:        info->BlocksPerTable = b;
                    135:        info->Lambda = lambda;
                    136:        info->NumParityReps = info->groupSize = k;
                    137:        /* b blks, k-1 SUs each. */
                    138:        info->SUsPerTable = b * (k - 1) * layoutPtr->SUsPerPU;
                    139:        info->SUsPerFullTable = k * info->SUsPerTable;  /* rot k times */
                    140:        info->PUsPerBlock = k - 1;
                    141:        info->SUsPerBlock = info->PUsPerBlock * layoutPtr->SUsPerPU;
                    142:        info->TableDepthInPUs = (b * k) / v;
                    143:        /* k repetitions. */
                    144:        info->FullTableDepthInPUs = info->TableDepthInPUs * k;
                    145:
                    146:        /* Used only in distributed sparing case. */
                    147:        /* (v-1)/gcd fulltables. */
                    148:        info->FullTablesPerSpareRegion = (v - 1) / rf_gcd(r, v - 1);
                    149:        info->TablesPerSpareRegion = k * info->FullTablesPerSpareRegion;
                    150:        info->SpareSpaceDepthPerRegionInSUs = (r * info->TablesPerSpareRegion /
                    151:            (v - 1)) * layoutPtr->SUsPerPU;
                    152:
                    153:        /* Check to make sure the block design is sufficiently small. */
                    154:        if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
                    155:                if (info->FullTableDepthInPUs * layoutPtr->SUsPerPU +
                    156:                    info->SpareSpaceDepthPerRegionInSUs >
                    157:                    layoutPtr->stripeUnitsPerDisk) {
                    158:                        RF_ERRORMSG3("RAID: config error: Full Table depth"
                    159:                            " (%d) + Spare Space (%d) larger than disk size"
                    160:                            " (%d) (BD too big).\n",
                    161:                            (int) info->FullTableDepthInPUs,
                    162:                            (int) info->SpareSpaceDepthPerRegionInSUs,
                    163:                            (int) layoutPtr->stripeUnitsPerDisk);
                    164:                        return (EINVAL);
                    165:                }
                    166:        } else {
                    167:                if (info->TableDepthInPUs * layoutPtr->SUsPerPU >
                    168:                    layoutPtr->stripeUnitsPerDisk) {
                    169:                        RF_ERRORMSG2("RAID: config error: Table depth (%d)"
                    170:                            " larger than disk size (%d) (BD too big).\n",
                    171:                            (int) (info->TableDepthInPUs * layoutPtr->SUsPerPU),
                    172:                            (int) layoutPtr->stripeUnitsPerDisk);
                    173:                        return (EINVAL);
                    174:                }
                    175:        }
                    176:
                    177:
                    178:        /*
                    179:         * Compute the size of each disk, and the number of tables in the last
                    180:         * fulltable (which need not be complete).
                    181:         */
                    182:        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                    183:
                    184:                PUsPerDisk = layoutPtr->stripeUnitsPerDisk /
                    185:                    layoutPtr->SUsPerPU;
                    186:                spareRegionDepthInPUs =
                    187:                    (info->TablesPerSpareRegion * info->TableDepthInPUs +
                    188:                    (info->TablesPerSpareRegion * info->TableDepthInPUs) /
                    189:                    (v - 1));
                    190:                info->SpareRegionDepthInSUs =
                    191:                    spareRegionDepthInPUs * layoutPtr->SUsPerPU;
                    192:
                    193:                numCompleteSpareRegionsPerDisk =
                    194:                    PUsPerDisk / spareRegionDepthInPUs;
                    195:                info->NumCompleteSRs = numCompleteSpareRegionsPerDisk;
                    196:                extraPUsPerDisk = PUsPerDisk % spareRegionDepthInPUs;
                    197:
                    198:                /*
                    199:                 * Assume conservatively that we need the full amount of spare
                    200:                 * space in one region in order to provide spares for the
                    201:                 * partial spare region at the end of the array. We set "i"
                    202:                 * to the number of tables in the partial spare region. This
                    203:                 * may actually include some fulltables.
                    204:                 */
                    205:                extraPUsPerDisk -= (info->SpareSpaceDepthPerRegionInSUs /
                    206:                    layoutPtr->SUsPerPU);
                    207:                if (extraPUsPerDisk <= 0)
                    208:                        i = 0;
                    209:                else
                    210:                        i = extraPUsPerDisk / info->TableDepthInPUs;
                    211:
                    212:                complete_FT_count = raidPtr->numRow *
                    213:                    (numCompleteSpareRegionsPerDisk *
                    214:                    (info->TablesPerSpareRegion / k) + i / k);
                    215:                info->FullTableLimitSUID =
                    216:                    complete_FT_count * info->SUsPerFullTable;
                    217:                info->ExtraTablesPerDisk = i % k;
                    218:
                    219:                /*
                    220:                 * Note that in the last spare region, the spare space is
                    221:                 * complete even though data/parity space is not.
                    222:                 */
                    223:                totSparePUsPerDisk = (numCompleteSpareRegionsPerDisk + 1) *
                    224:                    (info->SpareSpaceDepthPerRegionInSUs / layoutPtr->SUsPerPU);
                    225:                info->TotSparePUsPerDisk = totSparePUsPerDisk;
                    226:
                    227:                layoutPtr->stripeUnitsPerDisk =
                    228:                    ((complete_FT_count / raidPtr->numRow) *
                    229:                    info->FullTableDepthInPUs + /* data & parity space */
                    230:                    info->ExtraTablesPerDisk * info->TableDepthInPUs +
                    231:                    totSparePUsPerDisk          /* spare space */
                    232:                    ) * layoutPtr->SUsPerPU;
                    233:                layoutPtr->dataStripeUnitsPerDisk =
                    234:                    (complete_FT_count * info->FullTableDepthInPUs +
                    235:                    info->ExtraTablesPerDisk * info->TableDepthInPUs) *
                    236:                    layoutPtr->SUsPerPU * (k - 1) / k;
                    237:
                    238:        } else {
                    239:                /*
                    240:                 * Non-dist spare case:  force each disk to contain an
                    241:                 * integral number of tables.
                    242:                 */
                    243:                layoutPtr->stripeUnitsPerDisk /=
                    244:                    (info->TableDepthInPUs * layoutPtr->SUsPerPU);
                    245:                layoutPtr->stripeUnitsPerDisk *=
                    246:                    (info->TableDepthInPUs * layoutPtr->SUsPerPU);
                    247:
                    248:                /*
                    249:                 * Compute the number of tables in the last fulltable, which
                    250:                 * need not be complete.
                    251:                 */
                    252:                complete_FT_count =
                    253:                    ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) /
                    254:                    info->FullTableDepthInPUs) * raidPtr->numRow;
                    255:
                    256:                info->FullTableLimitSUID =
                    257:                    complete_FT_count * info->SUsPerFullTable;
                    258:                info->ExtraTablesPerDisk =
                    259:                    ((layoutPtr->stripeUnitsPerDisk / layoutPtr->SUsPerPU) /
                    260:                    info->TableDepthInPUs) % k;
                    261:        }
                    262:
                    263:        raidPtr->sectorsPerDisk = layoutPtr->stripeUnitsPerDisk *
                    264:                    layoutPtr->sectorsPerStripeUnit;
                    265:
                    266:        /*
                    267:         * Find the disk offset of the stripe unit where the last fulltable
                    268:         * starts.
                    269:         */
                    270:        numCompleteFullTablesPerDisk = complete_FT_count / raidPtr->numRow;
                    271:        diskOffsetOfLastFullTableInSUs = numCompleteFullTablesPerDisk *
                    272:            info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
                    273:        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                    274:                SpareSpaceInSUs = numCompleteSpareRegionsPerDisk *
                    275:                    info->SpareSpaceDepthPerRegionInSUs;
                    276:                diskOffsetOfLastFullTableInSUs += SpareSpaceInSUs;
                    277:                info->DiskOffsetOfLastSpareSpaceChunkInSUs =
                    278:                    diskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk *
                    279:                    info->TableDepthInPUs * layoutPtr->SUsPerPU;
                    280:        }
                    281:        info->DiskOffsetOfLastFullTableInSUs = diskOffsetOfLastFullTableInSUs;
                    282:        info->numCompleteFullTablesPerDisk = numCompleteFullTablesPerDisk;
                    283:
                    284:        /* 4. Create and initialize the lookup tables. */
                    285:        info->LayoutTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
                    286:        if (info->LayoutTable == NULL)
                    287:                return (ENOMEM);
                    288:        info->OffsetTable = rf_make_2d_array(b, k, raidPtr->cleanupList);
                    289:        if (info->OffsetTable == NULL)
                    290:                return (ENOMEM);
                    291:        info->BlockTable = rf_make_2d_array(info->TableDepthInPUs *
                    292:            layoutPtr->SUsPerPU, raidPtr->numCol, raidPtr->cleanupList);
                    293:        if (info->BlockTable == NULL)
                    294:                return (ENOMEM);
                    295:
                    296:        first_avail_slot = rf_make_1d_array(v, NULL);
                    297:        if (first_avail_slot == NULL)
                    298:                return (ENOMEM);
                    299:
                    300:        for (i = 0; i < b; i++)
                    301:                for (j = 0; j < k; j++)
                    302:                        info->LayoutTable[i][j] = *cfgBuf++;
                    303:
                    304:        /* Initialize the offset table. */
                    305:        for (i = 0; i < b; i++)
                    306:                for (j = 0; j < k; j++) {
                    307:                        info->OffsetTable[i][j] =
                    308:                            first_avail_slot[info->LayoutTable[i][j]];
                    309:                        first_avail_slot[info->LayoutTable[i][j]]++;
                    310:                }
                    311:
                    312:        /* Initialize the block table. */
                    313:        for (SUID = l = 0; l < layoutPtr->SUsPerPU; l++) {
                    314:                for (i = 0; i < b; i++) {
                    315:                        for (j = 0; j < k; j++) {
                    316:                                info->BlockTable[(info->OffsetTable[i][j] *
                    317:                                    layoutPtr->SUsPerPU) + l]
                    318:                                    [info->LayoutTable[i][j]] = SUID;
                    319:                        }
                    320:                        SUID++;
                    321:                }
                    322:        }
                    323:
                    324:        rf_free_1d_array(first_avail_slot, v);
                    325:
                    326:        /* 5. Set up the remaining redundant-but-useful parameters. */
                    327:
                    328:        raidPtr->totalSectors = (k * complete_FT_count + raidPtr->numRow *
                    329:            info->ExtraTablesPerDisk) * info->SUsPerTable *
                    330:            layoutPtr->sectorsPerStripeUnit;
                    331:        layoutPtr->numStripe = (raidPtr->totalSectors /
                    332:            layoutPtr->sectorsPerStripeUnit) / (k - 1);
                    333:
                    334:        /*
                    335:         * Strange evaluation order below to try and minimize overflow
                    336:         * problems.
                    337:         */
                    338:
                    339:        layoutPtr->dataSectorsPerStripe =
                    340:            (k - 1) * layoutPtr->sectorsPerStripeUnit;
                    341:        layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
                    342:            raidPtr->logBytesPerSector;
                    343:        layoutPtr->numDataCol = k - 1;
                    344:        layoutPtr->numParityCol = 1;
                    345:
                    346:        return (0);
                    347: }
                    348:
                    349: /* Declustering with distributed sparing. */
                    350: void rf_ShutdownDeclusteredDS(RF_ThreadArg_t);
                    351: void
                    352: rf_ShutdownDeclusteredDS(RF_ThreadArg_t arg)
                    353: {
                    354:        RF_DeclusteredConfigInfo_t *info;
                    355:        RF_Raid_t *raidPtr;
                    356:
                    357:        raidPtr = (RF_Raid_t *) arg;
                    358:        info =
                    359:            (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
                    360:        if (info->SpareTable)
                    361:                rf_FreeSpareTable(raidPtr);
                    362: }
                    363:
                    364: int
                    365: rf_ConfigureDeclusteredDS(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                    366:     RF_Config_t *cfgPtr)
                    367: {
                    368:        int rc;
                    369:
                    370:        rc = rf_ConfigureDeclustered(listp, raidPtr, cfgPtr);
                    371:        if (rc)
                    372:                return (rc);
                    373:
                    374:        rc = rf_ShutdownCreate(listp, rf_ShutdownDeclusteredDS, raidPtr);
                    375:        if (rc) {
                    376:                RF_ERRORMSG1("Got %d adding shutdown event for"
                    377:                    " DeclusteredDS.\n", rc);
                    378:                rf_ShutdownDeclusteredDS(raidPtr);
                    379:                return (rc);
                    380:        }
                    381:
                    382:        return (0);
                    383: }
                    384:
                    385: void
                    386: rf_MapSectorDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
                    387:     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
                    388: {
                    389:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
                    390:        RF_DeclusteredConfigInfo_t *info =
                    391:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
                    392:        RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
                    393:        RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
                    394:        RF_StripeNum_t BlockID, BlockOffset, RepIndex;
                    395:        RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
                    396:        RF_StripeCount_t fulltable_depth =
                    397:            info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
                    398:        RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
                    399:
                    400:        rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
                    401:            &fulltable_depth, &base_suid);
                    402:
                    403:        /* Fulltable ID within array (across rows). */
                    404:        FullTableID = SUID / sus_per_fulltable;
                    405:        if (raidPtr->numRow == 1)
                    406:                *row = 0;       /* Avoid a mod and a div in the common case. */
                    407:        else {
                    408:                *row = FullTableID % raidPtr->numRow;
                    409:                /* Convert to fulltable ID on this disk. */
                    410:                FullTableID /= raidPtr->numRow;
                    411:        }
                    412:        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                    413:                SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
                    414:                SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
                    415:        }
                    416:        FullTableOffset = SUID % sus_per_fulltable;
                    417:        TableID = FullTableOffset / info->SUsPerTable;
                    418:        TableOffset = FullTableOffset - TableID * info->SUsPerTable;
                    419:        BlockID = TableOffset / info->PUsPerBlock;
                    420:        BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
                    421:        BlockID %= info->BlocksPerTable;
                    422:        RepIndex = info->PUsPerBlock - TableID;
                    423:        if (!raidPtr->noRotate)
                    424:                BlockOffset += ((BlockOffset >= RepIndex) ? 1 : 0);
                    425:        *col = info->LayoutTable[BlockID][BlockOffset];
                    426:
                    427:        /* Remap to distributed spare space if indicated. */
                    428:        if (remap) {
                    429:                RF_ASSERT(raidPtr->Disks[*row][*col].status ==
                    430:                    rf_ds_reconstructing ||
                    431:                    raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
                    432:                    (rf_copyback_in_progress &&
                    433:                    raidPtr->Disks[*row][*col].status == rf_ds_optimal));
                    434:                rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID,
                    435:                    TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col,
                    436:                    &outSU);
                    437:        } else {
                    438:
                    439:                outSU = base_suid;
                    440:                outSU += FullTableID * fulltable_depth;
                    441:                        /* Offset to start of FT. */
                    442:                outSU += SpareSpace;
                    443:                        /* Skip rsvd spare space. */
                    444:                outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
                    445:                        /* Offset to start of table. */
                    446:                outSU += info->OffsetTable[BlockID][BlockOffset] *
                    447:                    layoutPtr->SUsPerPU;
                    448:                        /* Offset to the PU. */
                    449:        }
                    450:        outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
                    451:                /* offs to the SU within a PU */
                    452:
                    453:        /*
                    454:         * Convert SUs to sectors, and, if not aligned to SU boundary, add in
                    455:         * offset to sector.
                    456:         */
                    457:        *diskSector = outSU * layoutPtr->sectorsPerStripeUnit +
                    458:            (raidSector % layoutPtr->sectorsPerStripeUnit);
                    459:
                    460:        RF_ASSERT(*col != -1);
                    461: }
                    462:
                    463: /*
                    464:  * Prototyping this inexplicably causes the compile of the layout table
                    465:  * (rf_layout.c) to fail.
                    466:  */
                    467: void
                    468: rf_MapParityDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
                    469:     RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
                    470: {
                    471:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
                    472:        RF_DeclusteredConfigInfo_t *info =
                    473:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
                    474:        RF_StripeNum_t SUID = raidSector / layoutPtr->sectorsPerStripeUnit;
                    475:        RF_StripeNum_t FullTableID, FullTableOffset, TableID, TableOffset;
                    476:        RF_StripeNum_t BlockID, BlockOffset, RepIndex;
                    477:        RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
                    478:        RF_StripeCount_t fulltable_depth =
                    479:            info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
                    480:        RF_StripeNum_t base_suid = 0, outSU, SpareRegion = 0, SpareSpace = 0;
                    481:
                    482:        rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
                    483:            &fulltable_depth, &base_suid);
                    484:
                    485:        /* Compute row & (possibly) spare space exactly as before. */
                    486:        FullTableID = SUID / sus_per_fulltable;
                    487:        if (raidPtr->numRow == 1)
                    488:                *row = 0;       /* Avoid a mod and a div in the common case. */
                    489:        else {
                    490:                *row = FullTableID % raidPtr->numRow;
                    491:                /* Convert to fulltable ID on this disk. */
                    492:                FullTableID /= raidPtr->numRow;
                    493:        }
                    494:        if ((raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
                    495:                SpareRegion = FullTableID / info->FullTablesPerSpareRegion;
                    496:                SpareSpace = SpareRegion * info->SpareSpaceDepthPerRegionInSUs;
                    497:        }
                    498:        /* Compute BlockID and RepIndex exactly as before. */
                    499:        FullTableOffset = SUID % sus_per_fulltable;
                    500:        TableID = FullTableOffset / info->SUsPerTable;
                    501:        TableOffset = FullTableOffset - TableID * info->SUsPerTable;
                    502:        /*TableOffset   = FullTableOffset % info->SUsPerTable;*/
                    503:        /*BlockID       = (TableOffset / info->PUsPerBlock) %
                    504:         *info->BlocksPerTable;*/
                    505:        BlockID = TableOffset / info->PUsPerBlock;
                    506:        /*BlockOffset   = TableOffset % info->PUsPerBlock;*/
                    507:        BlockOffset = TableOffset - BlockID * info->PUsPerBlock;
                    508:        BlockID %= info->BlocksPerTable;
                    509:
                    510:        /* The parity block is in the position indicated by RepIndex. */
                    511:        RepIndex = (raidPtr->noRotate) ?
                    512:            info->PUsPerBlock : info->PUsPerBlock - TableID;
                    513:        *col = info->LayoutTable[BlockID][RepIndex];
                    514:
                    515:        if (remap) {
                    516:                RF_ASSERT(raidPtr->Disks[*row][*col].status ==
                    517:                    rf_ds_reconstructing ||
                    518:                    raidPtr->Disks[*row][*col].status == rf_ds_dist_spared ||
                    519:                    (rf_copyback_in_progress &&
                    520:                    raidPtr->Disks[*row][*col].status == rf_ds_optimal));
                    521:                rf_remap_to_spare_space(layoutPtr, info, *row, FullTableID,
                    522:                    TableID, BlockID, (base_suid) ? 1 : 0, SpareRegion, col,
                    523:                    &outSU);
                    524:        } else {
                    525:
                    526:                /*
                    527:                 * Compute sector as before, except use RepIndex instead of
                    528:                 * BlockOffset.
                    529:                 */
                    530:                outSU = base_suid;
                    531:                outSU += FullTableID * fulltable_depth;
                    532:                outSU += SpareSpace;    /* skip rsvd spare space */
                    533:                outSU += TableID * info->TableDepthInPUs * layoutPtr->SUsPerPU;
                    534:                outSU += info->OffsetTable[BlockID][RepIndex] *
                    535:                    layoutPtr->SUsPerPU;
                    536:        }
                    537:
                    538:        outSU += TableOffset / (info->BlocksPerTable * info->PUsPerBlock);
                    539:        *diskSector = outSU * layoutPtr->sectorsPerStripeUnit +
                    540:            (raidSector % layoutPtr->sectorsPerStripeUnit);
                    541:
                    542:        RF_ASSERT(*col != -1);
                    543: }
                    544:
                    545: /*
                    546:  * Return an array of ints identifying the disks that comprise the stripe
                    547:  * containing the indicated address.
                    548:  * The caller must _never_ attempt to modify this array.
                    549:  */
                    550: void
                    551: rf_IdentifyStripeDeclustered(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
                    552:     RF_RowCol_t **diskids, RF_RowCol_t *outRow)
                    553: {
                    554:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
                    555:        RF_DeclusteredConfigInfo_t *info =
                    556:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
                    557:        RF_StripeCount_t sus_per_fulltable = info->SUsPerFullTable;
                    558:        RF_StripeCount_t fulltable_depth =
                    559:            info->FullTableDepthInPUs * layoutPtr->SUsPerPU;
                    560:        RF_StripeNum_t base_suid = 0;
                    561:        RF_StripeNum_t SUID = rf_RaidAddressToStripeUnitID(layoutPtr, addr);
                    562:        RF_StripeNum_t stripeID, FullTableID;
                    563:        int tableOffset;
                    564:
                    565:        rf_decluster_adjust_params(layoutPtr, &SUID, &sus_per_fulltable,
                    566:            &fulltable_depth, &base_suid);
                    567:        /* Fulltable ID within array (across rows). */
                    568:        FullTableID = SUID / sus_per_fulltable;
                    569:        *outRow = FullTableID % raidPtr->numRow;
                    570:        /* Find stripe offset into array. */
                    571:        stripeID = rf_StripeUnitIDToStripeID(layoutPtr, SUID);
                    572:        /* Find offset into block design table. */
                    573:        tableOffset = (stripeID % info->BlocksPerTable);
                    574:        *diskids = info->LayoutTable[tableOffset];
                    575: }
                    576:
                    577: /*
                    578:  * This returns the default head-separation limit, measured in
                    579:  * "required units for reconstruction". Each time a disk fetches
                    580:  * a unit, it bumps a counter. The head-sep code prohibits any disk
                    581:  * from getting more than headSepLimit counter values ahead of any
                    582:  * other.
                    583:  *
                    584:  * We assume here that the number of floating recon buffers is already
                    585:  * set. There are r stripes to be reconstructed in each table, and so
                    586:  * if we have a total of B buffers, we can have at most B/r tables
                    587:  * under recon at any one time. In each table, lambda units are required
                    588:  * from each disk, so given B buffers, the head sep limit has to be
                    589:  * (lambda*B)/r units. We subtract one to avoid weird boundary cases.
                    590:  *
                    591:  * For example, suppose we are given 50 buffers, r=19, and lambda=4 as in
                    592:  * the 20.5 design. There are 19 stripes/table to be reconstructed, so
                    593:  * we can have 50/19 tables concurrently under reconstruction, which means
                    594:  * we can allow the fastest disk to get 50/19 tables ahead of the slower
                    595:  * disk. There are lambda "required units" for each disk, so the fastest
                    596:  * disk can get 4*50/19 = 10 counter values ahead of the slowest.
                    597:  *
                    598:  * If numBufsToAccumulate is not 1, we need to limit the head sep further
                    599:  * because multiple bufs will be required for each stripe under recon.
                    600:  */
                    601: RF_HeadSepLimit_t
                    602: rf_GetDefaultHeadSepLimitDeclustered(RF_Raid_t *raidPtr)
                    603: {
                    604:        RF_DeclusteredConfigInfo_t *info =
                    605:            (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
                    606:
                    607:        return (info->Lambda * raidPtr->numFloatingReconBufs /
                    608:            info->TableDepthInPUs / rf_numBufsToAccumulate);
                    609: }
                    610:
                    611: /*
                    612:  * Return the default number of recon buffers to use. The value
                    613:  * is somewhat arbitrary...  It's intended to be large enough to
                    614:  * allow for a reasonably large head-sep limit, but small enough
                    615:  * that you don't use up all your system memory with buffers.
                    616:  */
                    617: int
                    618: rf_GetDefaultNumFloatingReconBuffersDeclustered(RF_Raid_t *raidPtr)
                    619: {
                    620:        return (100 * rf_numBufsToAccumulate);
                    621: }
                    622:
                    623: /*
                    624:  * Sectors in the last fulltable of the array need to be handled
                    625:  * specially since this fulltable can be incomplete. This function
                    626:  * changes the values of certain params to handle this.
                    627:  *
                    628:  * The idea here is that MapSector et. al. figure out which disk the
                    629:  * addressed unit lives on by computing the modulos of the unit number
                    630:  * with the number of units per fulltable, table, etc.  In the last
                    631:  * fulltable, there are fewer units per fulltable, so we need to adjust
                    632:  * the number of user data units per fulltable to reflect this.
                    633:  *
                    634:  * So, we (1) convert the fulltable size and depth parameters to
                    635:  * the size of the partial fulltable at the end, (2) compute the
                    636:  * disk sector offset where this fulltable starts, and (3) convert
                    637:  * the users stripe unit number from an offset into the array to
                    638:  * an offset into the last fulltable.
                    639:  */
                    640: void
                    641: rf_decluster_adjust_params(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t *SUID,
                    642:     RF_StripeCount_t *sus_per_fulltable, RF_StripeCount_t *fulltable_depth,
                    643:     RF_StripeNum_t *base_suid)
                    644: {
                    645:        RF_DeclusteredConfigInfo_t *info =
                    646:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
                    647:
                    648:        if (*SUID >= info->FullTableLimitSUID) {
                    649:                /* New full table size is size of last full table on disk. */
                    650:                *sus_per_fulltable =
                    651:                    info->ExtraTablesPerDisk * info->SUsPerTable;
                    652:
                    653:                /* New full table depth is corresponding depth. */
                    654:                *fulltable_depth =
                    655:                    info->ExtraTablesPerDisk * info->TableDepthInPUs *
                    656:                    layoutPtr->SUsPerPU;
                    657:
                    658:                /* Set up the new base offset. */
                    659:                *base_suid = info->DiskOffsetOfLastFullTableInSUs;
                    660:
                    661:                /*
                    662:                 * Convert user's array address to an offset into the last
                    663:                 * fulltable.
                    664:                 */
                    665:                *SUID -= info->FullTableLimitSUID;
                    666:        }
                    667: }
                    668:
                    669: /*
                    670:  * Map a stripe ID to a parity stripe ID.
                    671:  * See comment above RaidAddressToParityStripeID in layout.c.
                    672:  */
                    673: void
                    674: rf_MapSIDToPSIDDeclustered(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
                    675:     RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
                    676: {
                    677:        RF_DeclusteredConfigInfo_t *info;
                    678:
                    679:        info = (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
                    680:
                    681:        *psID = (stripeID / (layoutPtr->SUsPerPU * info->BlocksPerTable)) *
                    682:            info->BlocksPerTable + (stripeID % info->BlocksPerTable);
                    683:        *which_ru = (stripeID % (info->BlocksPerTable * layoutPtr->SUsPerPU)) /
                    684:            info->BlocksPerTable;
                    685:        RF_ASSERT((*which_ru) < layoutPtr->SUsPerPU / layoutPtr->SUsPerRU);
                    686: }
                    687:
                    688: /*
                    689:  * Called from MapSector and MapParity to retarget an access at the spare unit.
                    690:  * Modifies the "col" and "outSU" parameters only.
                    691:  */
                    692: void
                    693: rf_remap_to_spare_space(RF_RaidLayout_t *layoutPtr,
                    694:     RF_DeclusteredConfigInfo_t *info, RF_RowCol_t row,
                    695:     RF_StripeNum_t FullTableID, RF_StripeNum_t TableID, RF_SectorNum_t BlockID,
                    696:     RF_StripeNum_t base_suid, RF_StripeNum_t SpareRegion, RF_RowCol_t *outCol,
                    697:     RF_StripeNum_t *outSU)
                    698: {
                    699:        RF_StripeNum_t ftID, spareTableStartSU, TableInSpareRegion,
                    700:            lastSROffset, which_ft;
                    701:
                    702:        /*
                    703:         * Note that FullTableID and hence SpareRegion may have gotten
                    704:         * tweaked by rf_decluster_adjust_params. We detect this by
                    705:         * noticing that base_suid is not 0.
                    706:         */
                    707:        if (base_suid == 0) {
                    708:                ftID = FullTableID;
                    709:        } else {
                    710:                /*
                    711:                 * There may be > 1.0 full tables in the last (i.e. partial)
                    712:                 * spare region. Find out which of these we are in.
                    713:                 */
                    714:                lastSROffset = info->NumCompleteSRs *
                    715:                    info->SpareRegionDepthInSUs;
                    716:                which_ft =
                    717:                    (info->DiskOffsetOfLastFullTableInSUs - lastSROffset) /
                    718:                    (info->FullTableDepthInPUs * layoutPtr->SUsPerPU);
                    719:
                    720:                /* Compute the actual full table ID. */
                    721:                ftID = info->DiskOffsetOfLastFullTableInSUs /
                    722:                    (info->FullTableDepthInPUs * layoutPtr->SUsPerPU) +
                    723:                    which_ft;
                    724:                SpareRegion = info->NumCompleteSRs;
                    725:        }
                    726:        TableInSpareRegion = (ftID * info->NumParityReps + TableID) %
                    727:            info->TablesPerSpareRegion;
                    728:
                    729:        *outCol = info->SpareTable[TableInSpareRegion][BlockID].spareDisk;
                    730:        RF_ASSERT(*outCol != -1);
                    731:
                    732:        spareTableStartSU = (SpareRegion == info->NumCompleteSRs) ?
                    733:            info->DiskOffsetOfLastFullTableInSUs + info->ExtraTablesPerDisk *
                    734:            info->TableDepthInPUs * layoutPtr->SUsPerPU :
                    735:            (SpareRegion + 1) * info->SpareRegionDepthInSUs -
                    736:            info->SpareSpaceDepthPerRegionInSUs;
                    737:        *outSU = spareTableStartSU +
                    738:            info->SpareTable[TableInSpareRegion][BlockID].spareBlockOffsetInSUs;
                    739:        if (*outSU >= layoutPtr->stripeUnitsPerDisk) {
                    740:                printf("rf_remap_to_spare_space: invalid remapped disk SU"
                    741:                    " offset %ld.\n", (long) *outSU);
                    742:        }
                    743: }
                    744:
                    745: int
                    746: rf_InstallSpareTable(RF_Raid_t *raidPtr, RF_RowCol_t frow, RF_RowCol_t fcol)
                    747: {
                    748:        RF_DeclusteredConfigInfo_t *info =
                    749:            (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
                    750:        RF_SparetWait_t *req;
                    751:        int retcode;
                    752:
                    753:        RF_Malloc(req, sizeof(*req), (RF_SparetWait_t *));
                    754:        req->C = raidPtr->numCol;
                    755:        req->G = raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol;
                    756:        req->fcol = fcol;
                    757:        req->SUsPerPU = raidPtr->Layout.SUsPerPU;
                    758:        req->TablesPerSpareRegion = info->TablesPerSpareRegion;
                    759:        req->BlocksPerTable = info->BlocksPerTable;
                    760:        req->TableDepthInPUs = info->TableDepthInPUs;
                    761:        req->SpareSpaceDepthPerRegionInSUs =
                    762:            info->SpareSpaceDepthPerRegionInSUs;
                    763:
                    764:        retcode = rf_GetSpareTableFromDaemon(req);
                    765:        RF_ASSERT(!retcode);
                    766:        /* XXX -- Fix this to recover gracefully. -- XXX */
                    767:
                    768:        return (retcode);
                    769: }
                    770:
                    771: /*
                    772:  * Invoked via ioctl to install a spare table in the kernel.
                    773:  */
                    774: int
                    775: rf_SetSpareTable(RF_Raid_t *raidPtr, void *data)
                    776: {
                    777:        RF_DeclusteredConfigInfo_t *info =
                    778:            (RF_DeclusteredConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
                    779:        RF_SpareTableEntry_t **ptrs;
                    780:        int i, retcode;
                    781:
                    782:        /*
                    783:         * What we need to copyin is a 2-d array, so first copyin the user
                    784:         * pointers to the rows in the table.
                    785:         */
                    786:        RF_Malloc(ptrs, info->TablesPerSpareRegion *
                    787:            sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
                    788:        retcode = copyin((caddr_t) data, (caddr_t) ptrs,
                    789:            info->TablesPerSpareRegion * sizeof(RF_SpareTableEntry_t *));
                    790:
                    791:        if (retcode)
                    792:                return (retcode);
                    793:
                    794:        /* Now allocate kernel space for the row pointers. */
                    795:        RF_Malloc(info->SpareTable, info->TablesPerSpareRegion *
                    796:            sizeof(RF_SpareTableEntry_t *), (RF_SpareTableEntry_t **));
                    797:
                    798:        /*
                    799:         * Now allocate kernel space for each row in the table, and copy it in
                    800:         * from user space. */
                    801:        for (i = 0; i < info->TablesPerSpareRegion; i++) {
                    802:                RF_Malloc(info->SpareTable[i], info->BlocksPerTable *
                    803:                    sizeof(RF_SpareTableEntry_t), (RF_SpareTableEntry_t *));
                    804:                retcode = copyin(ptrs[i], info->SpareTable[i],
                    805:                    info->BlocksPerTable * sizeof(RF_SpareTableEntry_t));
                    806:                if (retcode) {
                    807:                        /* Blow off the memory we have allocated. */
                    808:                        info->SpareTable = NULL;
                    809:                        return (retcode);
                    810:                }
                    811:        }
                    812:
                    813:        /* Free up the temporary array we used. */
                    814:        RF_Free(ptrs, info->TablesPerSpareRegion *
                    815:            sizeof(RF_SpareTableEntry_t *));
                    816:
                    817:        return (0);
                    818: }
                    819:
                    820: RF_ReconUnitCount_t
                    821: rf_GetNumSpareRUsDeclustered(RF_Raid_t *raidPtr)
                    822: {
                    823:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
                    824:
                    825:        return (((RF_DeclusteredConfigInfo_t *)
                    826:            layoutPtr->layoutSpecificInfo)->TotSparePUsPerDisk);
                    827: }
                    828:
                    829:
                    830: void
                    831: rf_FreeSpareTable(RF_Raid_t *raidPtr)
                    832: {
                    833:        long i;
                    834:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
                    835:        RF_DeclusteredConfigInfo_t *info =
                    836:            (RF_DeclusteredConfigInfo_t *) layoutPtr->layoutSpecificInfo;
                    837:        RF_SpareTableEntry_t **table = info->SpareTable;
                    838:
                    839:        for (i = 0; i < info->TablesPerSpareRegion; i++) {
                    840:                RF_Free(table[i], info->BlocksPerTable *
                    841:                    sizeof(RF_SpareTableEntry_t));
                    842:        }
                    843:        RF_Free(table, info->TablesPerSpareRegion *
                    844:            sizeof(RF_SpareTableEntry_t *));
                    845:        info->SpareTable = (RF_SpareTableEntry_t **) NULL;
                    846: }

CVSweb