[BACK]Return to rf_pq.c CVS log [TXT][DIR] Up to [local] / sys / dev / raidframe

Annotation of sys/dev/raidframe/rf_pq.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: rf_pq.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $        */
                      2: /*     $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $  */
                      3:
                      4: /*
                      5:  * Copyright (c) 1995 Carnegie-Mellon University.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Author: Daniel Stodolsky
                      9:  *
                     10:  * Permission to use, copy, modify and distribute this software and
                     11:  * its documentation is hereby granted, provided that both the copyright
                     12:  * notice and this permission notice appear in all copies of the
                     13:  * software, derivative works or modified versions, and any portions
                     14:  * thereof, and that both notices appear in supporting documentation.
                     15:  *
                     16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     19:  *
                     20:  * Carnegie Mellon requests users of this software to return to
                     21:  *
                     22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     23:  *  School of Computer Science
                     24:  *  Carnegie Mellon University
                     25:  *  Pittsburgh PA 15213-3890
                     26:  *
                     27:  * any improvements or extensions that they make and grant Carnegie the
                     28:  * rights to redistribute these changes.
                     29:  */
                     30:
                     31: /*
                     32:  * Code for RAID level 6 (P + Q) disk array architecture.
                     33:  */
                     34:
                     35: #include "rf_archs.h"
                     36: #include "rf_types.h"
                     37: #include "rf_raid.h"
                     38: #include "rf_dag.h"
                     39: #include "rf_dagffrd.h"
                     40: #include "rf_dagffwr.h"
                     41: #include "rf_dagdegrd.h"
                     42: #include "rf_dagdegwr.h"
                     43: #include "rf_dagutils.h"
                     44: #include "rf_dagfuncs.h"
                     45: #include "rf_etimer.h"
                     46: #include "rf_pqdeg.h"
                     47: #include "rf_general.h"
                     48: #include "rf_map.h"
                     49: #include "rf_pq.h"
                     50:
                     51: RF_RedFuncs_t rf_pFuncs = {
                     52:        rf_RegularONPFunc, "Regular Old-New P",
                     53:        rf_SimpleONPFunc, "Simple Old-New P"
                     54: };
                     55: RF_RedFuncs_t rf_pRecoveryFuncs = {
                     56:        rf_RecoveryPFunc, "Recovery P Func",
                     57:        rf_RecoveryPFunc, "Recovery P Func"
                     58: };
                     59:
                     60: int
                     61: rf_RegularONPFunc(RF_DagNode_t *node)
                     62: {
                     63:        return (rf_RegularXorFunc(node));
                     64: }
                     65:
                     66:
                     67: /*
                     68:  * Same as simpleONQ func, but the coefficient is always 1.
                     69:  */
                     70:
                     71: int
                     72: rf_SimpleONPFunc(RF_DagNode_t *node)
                     73: {
                     74:        return (rf_SimpleXorFunc(node));
                     75: }
                     76:
                     77: int
                     78: rf_RecoveryPFunc(RF_DagNode_t *node)
                     79: {
                     80:        return (rf_RecoveryXorFunc(node));
                     81: }
                     82:
                     83: int
                     84: rf_RegularPFunc(RF_DagNode_t *node)
                     85: {
                     86:        return (rf_RegularXorFunc(node));
                     87: }
                     88:
                     89:
                     90: #if    (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
                     91:
                     92: void rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
                     93:        unsigned char coeff);
                     94: void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
                     95:        unsigned coeff);
                     96:
                     97: RF_RedFuncs_t rf_qFuncs = {
                     98:        rf_RegularONQFunc, "Regular Old-New Q",
                     99:        rf_SimpleONQFunc, "Simple Old-New Q"
                    100: };
                    101: RF_RedFuncs_t rf_qRecoveryFuncs = {
                    102:        rf_RecoveryQFunc, "Recovery Q Func",
                    103:        rf_RecoveryQFunc, "Recovery Q Func"
                    104: };
                    105: RF_RedFuncs_t rf_pqRecoveryFuncs = {
                    106:        rf_RecoveryPQFunc, "Recovery PQ Func",
                    107:        rf_RecoveryPQFunc, "Recovery PQ Func"
                    108: };
                    109:
                    110: void
                    111: rf_PQDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
                    112:     RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
                    113: {
                    114:        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
                    115:        unsigned ndfail = asmap->numDataFailed;
                    116:        unsigned npfail = asmap->numParityFailed;
                    117:        unsigned ntfail = npfail + ndfail;
                    118:
                    119:        RF_ASSERT(RF_IO_IS_R_OR_W(type));
                    120:        if (ntfail > 2) {
                    121:                RF_ERRORMSG("more than two disks failed in a single group !"
                    122:                            "  Aborting I/O operation.\n");
                    123:                 /* *infoFunc = */ *createFunc = NULL;
                    124:                return;
                    125:        }
                    126:        /* Ok, we can do this I/O. */
                    127:        if (type == RF_IO_TYPE_READ) {
                    128:                switch (ndfail) {
                    129:                case 0:
                    130:                        /* Fault free read. */
                    131:                        *createFunc = (RF_VoidFuncPtr)
                    132:                            rf_CreateFaultFreeReadDAG;  /* Same as raid 5. */
                    133:                        break;
                    134:                case 1:
                    135:                        /* Lost a single data unit. */
                    136:                        /*
                    137:                         * Two cases:
                    138:                         * (1) Parity is not lost. Do a normal raid 5
                    139:                         *     reconstruct read.
                    140:                         * (2) Parity is lost. Do a reconstruct read using "q".
                    141:                         */
                    142:                        if (ntfail == 2) {      /* Also lost redundancy. */
                    143:                                if (asmap->failedPDAs[1]->type ==
                    144:                                    RF_PDA_TYPE_PARITY)
                    145:                                        *createFunc = (RF_VoidFuncPtr)
                    146:                                            rf_PQ_110_CreateReadDAG;
                    147:                                else
                    148:                                        *createFunc = (RF_VoidFuncPtr)
                    149:                                            rf_PQ_101_CreateReadDAG;
                    150:                        } else {
                    151:                                /*
                    152:                                 * P and Q are ok. But is there a failure in
                    153:                                 * some unaccessed data unit ?
                    154:                                 */
                    155:                                if (rf_NumFailedDataUnitsInStripe(raidPtr,
                    156:                                    asmap) == 2)
                    157:                                        *createFunc = (RF_VoidFuncPtr)
                    158:                                            rf_PQ_200_CreateReadDAG;
                    159:                                else
                    160:                                        *createFunc = (RF_VoidFuncPtr)
                    161:                                            rf_PQ_100_CreateReadDAG;
                    162:                        }
                    163:                        break;
                    164:                case 2:
                    165:                        /* Lost two data units. */
                    166:                        /* *infoFunc = rf_PQOneTwo; */
                    167:                        *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
                    168:                        break;
                    169:                }
                    170:                return;
                    171:        }
                    172:        /* A write. */
                    173:        switch (ntfail) {
                    174:        case 0:         /* Fault free. */
                    175:                if (rf_suppressLocksAndLargeWrites ||
                    176:                    (((asmap->numStripeUnitsAccessed <=
                    177:                       (layoutPtr->numDataCol / 2)) &&
                    178:                      (layoutPtr->numDataCol != 1)) ||
                    179:                     (asmap->parityInfo->next != NULL) ||
                    180:                     (asmap->qInfo->next != NULL) ||
                    181:                     rf_CheckStripeForFailures(raidPtr, asmap))) {
                    182:
                    183:                        *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
                    184:                } else {
                    185:                        *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
                    186:                }
                    187:                break;
                    188:
                    189:        case 1:         /* Single disk fault. */
                    190:                if (npfail == 1) {
                    191:                        RF_ASSERT((asmap->failedPDAs[0]->type ==
                    192:                            RF_PDA_TYPE_PARITY) ||
                    193:                            (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
                    194:                        if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {
                    195:                                /*
                    196:                                 * Q died, treat like normal mode raid5 write.
                    197:                                 */
                    198:                                if (((asmap->numStripeUnitsAccessed <=
                    199:                                      (layoutPtr->numDataCol / 2)) ||
                    200:                                     (asmap->numStripeUnitsAccessed == 1)) ||
                    201:                                    rf_NumFailedDataUnitsInStripe(raidPtr,
                    202:                                     asmap))
                    203:                                        *createFunc = (RF_VoidFuncPtr)
                    204:                                            rf_PQ_001_CreateSmallWriteDAG;
                    205:                                else
                    206:                                        *createFunc = (RF_VoidFuncPtr)
                    207:                                            rf_PQ_001_CreateLargeWriteDAG;
                    208:                        } else {/* Parity died, small write only updating Q. */
                    209:                                if (((asmap->numStripeUnitsAccessed <=
                    210:                                      (layoutPtr->numDataCol / 2)) ||
                    211:                                     (asmap->numStripeUnitsAccessed == 1)) ||
                    212:                                    rf_NumFailedDataUnitsInStripe(raidPtr,
                    213:                                     asmap))
                    214:                                        *createFunc = (RF_VoidFuncPtr)
                    215:                                            rf_PQ_010_CreateSmallWriteDAG;
                    216:                                else
                    217:                                        *createFunc = (RF_VoidFuncPtr)
                    218:                                            rf_PQ_010_CreateLargeWriteDAG;
                    219:                        }
                    220:                } else {        /*
                    221:                                 * Data missing. Do a P reconstruct write if
                    222:                                 * only a single data unit is lost in the
                    223:                                 * stripe, otherwise a PQ reconstruct write.
                    224:                                 */
                    225:                        if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
                    226:                                *createFunc = (RF_VoidFuncPtr)
                    227:                                    rf_PQ_200_CreateWriteDAG;
                    228:                        else
                    229:                                *createFunc = (RF_VoidFuncPtr)
                    230:                                    rf_PQ_100_CreateWriteDAG;
                    231:                }
                    232:                break;
                    233:
                    234:        case 2:         /* Two disk faults. */
                    235:                switch (npfail) {
                    236:                case 2: /* Both p and q dead. */
                    237:                        *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
                    238:                        break;
                    239:                case 1: /* Either p or q and dead data. */
                    240:                        RF_ASSERT(asmap->failedPDAs[0]->type ==
                    241:                                  RF_PDA_TYPE_DATA);
                    242:                        RF_ASSERT((asmap->failedPDAs[1]->type ==
                    243:                                   RF_PDA_TYPE_PARITY) ||
                    244:                                  (asmap->failedPDAs[1]->type ==
                    245:                                   RF_PDA_TYPE_Q));
                    246:                        if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
                    247:                                *createFunc = (RF_VoidFuncPtr)
                    248:                                    rf_PQ_101_CreateWriteDAG;
                    249:                        else
                    250:                                *createFunc = (RF_VoidFuncPtr)
                    251:                                    rf_PQ_110_CreateWriteDAG;
                    252:                        break;
                    253:                case 0: /* Double data loss. */
                    254:                        *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
                    255:                        break;
                    256:                }
                    257:                break;
                    258:
                    259:        default:        /* More than 2 disk faults. */
                    260:                *createFunc = NULL;
                    261:                RF_PANIC();
                    262:        }
                    263:        return;
                    264: }
                    265:
                    266:
                    267: /*
                    268:  * Used as a stop gap info function.
                    269:  */
                    270: #if 0
                    271: void
                    272: rf_PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
                    273:     RF_AccessStripeMap_t *asmap)
                    274: {
                    275:        *nSucc = *nAnte = 1;
                    276: }
                    277:
                    278: void
                    279: rf_PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
                    280:     RF_AccessStripeMap_t *asmap)
                    281: {
                    282:        *nSucc = 1;
                    283:        *nAnte = 2;
                    284: }
                    285: #endif
                    286:
                    287: RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
                    288: {
                    289:        rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
                    290:            allocList, 2, rf_RegularPQFunc, RF_FALSE);
                    291: }
                    292:
                    293: int
                    294: rf_RegularONQFunc(RF_DagNode_t *node)
                    295: {
                    296:        int np = node->numParams;
                    297:        int d;
                    298:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
                    299:        int i;
                    300:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    301:        RF_Etimer_t timer;
                    302:        char *qbuf, *qpbuf;
                    303:        char *obuf, *nbuf;
                    304:        RF_PhysDiskAddr_t *old, *new;
                    305:        unsigned long coeff;
                    306:        unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                    307:
                    308:        RF_ETIMER_START(timer);
                    309:
                    310:        d = (np - 3) / 4;
                    311:        RF_ASSERT(4 * d + 3 == np);
                    312:        qbuf = (char *) node->params[2 * d + 1].p;      /* Q buffer. */
                    313:        for (i = 0; i < d; i++) {
                    314:                old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
                    315:                obuf = (char *) node->params[2 * i + 1].p;
                    316:                new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
                    317:                nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
                    318:                RF_ASSERT(new->numSector == old->numSector);
                    319:                RF_ASSERT(new->raidAddress == old->raidAddress);
                    320:                /*
                    321:                 * The stripe unit within the stripe tells us the coefficient
                    322:                 * to use for the multiply.
                    323:                 */
                    324:                coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
                    325:                    new->raidAddress);
                    326:                /*
                    327:                 * Compute the data unit offset within the column, then add
                    328:                 * one.
                    329:                 */
                    330:                coeff = (coeff % raidPtr->Layout.numDataCol);
                    331:                qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
                    332:                    old->startSector % secPerSU);
                    333:                rf_QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
                    334:                    old->numSector), coeff);
                    335:        }
                    336:
                    337:        RF_ETIMER_STOP(timer);
                    338:        RF_ETIMER_EVAL(timer);
                    339:        tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    340:        rf_GenericWakeupFunc(node, 0);  /*
                    341:                                         * Call wake func explicitly since no
                    342:                                         * I/O in this node.
                    343:                                         */
                    344:        return (0);
                    345: }
                    346:
                    347:
                    348: /*
                    349:  * See the SimpleXORFunc for the difference between a simple and regular func.
                    350:  * These Q functions should be used for
                    351:  *     new q = Q(data, old data, old q)
                    352:  * style updates and not for
                    353:  *     q = (new data, new data, ...)
                    354:  * computations.
                    355:  *
                    356:  * The simple q takes 2(2d+1)+1 params, where d is the number
                    357:  * of stripes written. The order of params is
                    358:  *   old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ...
                    359:  *   old data pda_d, old data buffer_d
                    360:  *   [2d] old q pda_0, old q buffer
                    361:  *   [2d_2] new data pda_0, new data buffer_0, ...
                    362:  *   new data pda_d, new data buffer_d
                    363:  *   raidPtr
                    364:  */
                    365:
                    366: int
                    367: rf_SimpleONQFunc(RF_DagNode_t *node)
                    368: {
                    369:        int np = node->numParams;
                    370:        int d;
                    371:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
                    372:        int i;
                    373:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    374:        RF_Etimer_t timer;
                    375:        char *qbuf;
                    376:        char *obuf, *nbuf;
                    377:        RF_PhysDiskAddr_t *old, *new;
                    378:        unsigned long coeff;
                    379:
                    380:        RF_ETIMER_START(timer);
                    381:
                    382:        d = (np - 3) / 4;
                    383:        RF_ASSERT(4 * d + 3 == np);
                    384:        qbuf = (char *) node->params[2 * d + 1].p;      /* Q buffer. */
                    385:        for (i = 0; i < d; i++) {
                    386:                old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
                    387:                obuf = (char *) node->params[2 * i + 1].p;
                    388:                new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
                    389:                nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
                    390:                RF_ASSERT(new->numSector == old->numSector);
                    391:                RF_ASSERT(new->raidAddress == old->raidAddress);
                    392:                /*
                    393:                 * The stripe unit within the stripe tells us the coefficient
                    394:                 * to use for the multiply.
                    395:                 */
                    396:                coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
                    397:                    new->raidAddress);
                    398:                /*
                    399:                 * Compute the data unit offset within the column, then add
                    400:                 * one.
                    401:                 */
                    402:                coeff = (coeff % raidPtr->Layout.numDataCol);
                    403:                rf_QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
                    404:                    old->numSector), coeff);
                    405:        }
                    406:
                    407:        RF_ETIMER_STOP(timer);
                    408:        RF_ETIMER_EVAL(timer);
                    409:        tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    410:        rf_GenericWakeupFunc(node, 0);  /*
                    411:                                         * Call wake func explicitly since no
                    412:                                         * I/O in this node.
                    413:                                         */
                    414:        return (0);
                    415: }
                    416:
                    417: RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
                    418: {
                    419:        rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
                    420:            allocList, &rf_pFuncs, &rf_qFuncs);
                    421: }
                    422:
                    423:
                    424: void rf_RegularQSubr(RF_DagNode_t *, char *);
                    425:
                    426: void
                    427: rf_RegularQSubr(RF_DagNode_t *node, char *qbuf)
                    428: {
                    429:        int np = node->numParams;
                    430:        int d;
                    431:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
                    432:        unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                    433:        int i;
                    434:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    435:        RF_Etimer_t timer;
                    436:        char *obuf, *qpbuf;
                    437:        RF_PhysDiskAddr_t *old;
                    438:        unsigned long coeff;
                    439:
                    440:        RF_ETIMER_START(timer);
                    441:
                    442:        d = (np - 1) / 2;
                    443:        RF_ASSERT(2 * d + 1 == np);
                    444:        for (i = 0; i < d; i++) {
                    445:                old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
                    446:                obuf = (char *) node->params[2 * i + 1].p;
                    447:                coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
                    448:                    old->raidAddress);
                    449:                /*
                    450:                 * Compute the data unit offset within the column, then add
                    451:                 * one.
                    452:                 */
                    453:                coeff = (coeff % raidPtr->Layout.numDataCol);
                    454:                /*
                    455:                 * The input buffers may not all be aligned with the start of
                    456:                 * the stripe. So shift by their sector offset within the
                    457:                 * stripe unit.
                    458:                 */
                    459:                qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
                    460:                    old->startSector % secPerSU);
                    461:                rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
                    462:                    rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
                    463:        }
                    464:
                    465:        RF_ETIMER_STOP(timer);
                    466:        RF_ETIMER_EVAL(timer);
                    467:        tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    468: }
                    469:
                    470:
                    471: /*
                    472:  * Used in degraded writes.
                    473:  */
                    474:
                    475: void rf_DegrQSubr(RF_DagNode_t *);
                    476:
                    477: void
                    478: rf_DegrQSubr(RF_DagNode_t *node)
                    479: {
                    480:        int np = node->numParams;
                    481:        int d;
                    482:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
                    483:        unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                    484:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    485:        RF_Etimer_t timer;
                    486:        char *qbuf = node->results[1];
                    487:        char *obuf, *qpbuf;
                    488:        RF_PhysDiskAddr_t *old;
                    489:        unsigned long coeff;
                    490:        unsigned fail_start;
                    491:        int i, j;
                    492:
                    493:        old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
                    494:        fail_start = old->startSector % secPerSU;
                    495:
                    496:        RF_ETIMER_START(timer);
                    497:
                    498:        d = (np - 2) / 2;
                    499:        RF_ASSERT(2 * d + 2 == np);
                    500:        for (i = 0; i < d; i++) {
                    501:                old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
                    502:                obuf = (char *) node->params[2 * i + 1].p;
                    503:                coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
                    504:                    old->raidAddress);
                    505:                /*
                    506:                 * Compute the data unit offset within the column, then add
                    507:                 * one.
                    508:                 */
                    509:                coeff = (coeff % raidPtr->Layout.numDataCol);
                    510:                /*
                    511:                 * The input buffers may not all be aligned with the start of
                    512:                 * the stripe. So shift by their sector offset within the
                    513:                 * stripe unit.
                    514:                 */
                    515:                j = old->startSector % secPerSU;
                    516:                RF_ASSERT(j >= fail_start);
                    517:                qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
                    518:                rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
                    519:                    rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
                    520:        }
                    521:
                    522:        RF_ETIMER_STOP(timer);
                    523:        RF_ETIMER_EVAL(timer);
                    524:        tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    525: }
                    526:
                    527:
                    528: /*
                    529:  * Called by large write code to compute the new parity and the new q.
                    530:  *
                    531:  * Structure of the params:
                    532:  *
                    533:  *   pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d (d = numDataCol)
                    534:  *   raidPtr
                    535:  *
                    536:  * For a total of 2d+1 arguments.
                    537:  * The result buffers results[0], results[1] are the buffers for the p and q,
                    538:  * respectively.
                    539:  *
                    540:  * We compute Q first, then compute P. The P calculation may try to reuse
                    541:  * one of the input buffers for its output, so if we computed P first, we would
                    542:  * corrupt the input for the q calculation.
                    543:  */
                    544:
                    545: int
                    546: rf_RegularPQFunc(RF_DagNode_t *node)
                    547: {
                    548:        rf_RegularQSubr(node, node->results[1]);
                    549:        return (rf_RegularXorFunc(node));       /* Does the wakeup. */
                    550: }
                    551:
                    552: int
                    553: rf_RegularQFunc(RF_DagNode_t *node)
                    554: {
                    555:        /* Almost ... adjust Qsubr args. */
                    556:        rf_RegularQSubr(node, node->results[0]);
                    557:        rf_GenericWakeupFunc(node, 0);  /*
                    558:                                         * Call wake func explicitly since no
                    559:                                         * I/O in this node.
                    560:                                         */
                    561:        return (0);
                    562: }
                    563:
                    564:
                    565: /*
                    566:  * Called by singly degraded write code to compute the new parity and
                    567:  * the new q.
                    568:  *
                    569:  * Structure of the params:
                    570:  *
                    571:  *   pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d
                    572:  *   failedPDA raidPtr
                    573:  *
                    574:  * for a total of 2d+2 arguments.
                    575:  * The result buffers results[0], results[1] are the buffers for the parity
                    576:  * and q, respectively.
                    577:  *
                    578:  * We compute Q first, then compute parity. The parity calculation may try
                    579:  * to reuse one of the input buffers for its output, so if we computed parity
                    580:  * first, we would corrupt the input for the q calculation.
                    581:  *
                    582:  * We treat this identically to the regularPQ case, ignoring the failedPDA
                    583:  * extra argument.
                    584:  */
                    585:
                    586: void
                    587: rf_Degraded_100_PQFunc(RF_DagNode_t *node)
                    588: {
                    589:        int     np = node->numParams;
                    590:
                    591:        RF_ASSERT(np >= 2);
                    592:        rf_DegrQSubr(node);
                    593:        rf_RecoveryXorFunc(node);
                    594: }
                    595:
                    596:
                    597: /*
                    598:  * The two below are used when reading a stripe with a single lost data unit.
                    599:  * The parameters are
                    600:  *
                    601:  *  pda_0, buffer_0, ..., pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
                    602:  *
                    603:  * and results[0] contains the data buffer, which is originally zero-filled.
                    604:  */
                    605:
                    606: /*
                    607:  * This Q func is used by the degraded-mode dag functions to recover lost data.
                    608:  * The second-to-last parameter is the PDA for the failed portion of the
                    609:  * access. The code here looks at this PDA and assumes that the xor target
                    610:  * buffer is equal in size to the number of sectors in the failed PDA. It then
                    611:  * uses the other PDAs in the parameter list to determine where within the
                    612:  * target buffer the corresponding data should be xored.
                    613:  *
                    614:  * Recall the basic equation is
                    615:  *
                    616:  *     Q = (data_1 + 2 * data_2 ... + k * data_k) mod 256
                    617:  *
                    618:  * so to recover data_j we need
                    619:  *
                    620:  *    J data_j = (Q - data_1 - 2 data_2 ... - k * data_k) mod 256
                    621:  *
                    622:  * So the coefficient for each buffer is (255 - data_col), and j should be
                    623:  * initialized by copying Q into it. Then we need to do a table lookup to
                    624:  * convert to solve
                    625:  *   data_j /= J
                    626:  *
                    627:  */
                    628:
                    629: int
                    630: rf_RecoveryQFunc(RF_DagNode_t *node)
                    631: {
                    632:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    633:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
                    634:        RF_PhysDiskAddr_t *failedPDA =
                    635:            (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
                    636:        int i;
                    637:        RF_PhysDiskAddr_t *pda;
                    638:        RF_RaidAddr_t suoffset;
                    639:        RF_RaidAddr_t failedSUOffset =
                    640:            rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
                    641:        char *srcbuf, *destbuf;
                    642:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    643:        RF_Etimer_t timer;
                    644:        unsigned long coeff;
                    645:
                    646:        RF_ETIMER_START(timer);
                    647:        /* Start by copying Q into the buffer. */
                    648:        bcopy(node->params[node->numParams - 3].p, node->results[0],
                    649:            rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
                    650:        for (i = 0; i < node->numParams - 4; i += 2) {
                    651:                RF_ASSERT(node->params[i + 1].p != node->results[0]);
                    652:                pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                    653:                srcbuf = (char *) node->params[i + 1].p;
                    654:                suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
                    655:                destbuf = ((char *) node->results[0]) +
                    656:                    rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
                    657:                coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
                    658:                    pda->raidAddress);
                    659:                /* Compute the data unit offset within the column. */
                    660:                coeff = (coeff % raidPtr->Layout.numDataCol);
                    661:                rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf,
                    662:                    rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
                    663:        }
                    664:        /* Do the nasty inversion now. */
                    665:        coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
                    666:            failedPDA->startSector) % raidPtr->Layout.numDataCol);
                    667:        rf_InvertQ(node->results[0], node->results[0],
                    668:            rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
                    669:        RF_ETIMER_STOP(timer);
                    670:        RF_ETIMER_EVAL(timer);
                    671:        tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    672:        rf_GenericWakeupFunc(node, 0);
                    673:        return (0);
                    674: }
                    675:
                    676: int
                    677: rf_RecoveryPQFunc(RF_DagNode_t *node)
                    678: {
                    679:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    680:        printf("raid%d: Recovery from PQ not implemented.\n", raidPtr->raidid);
                    681:        return (1);
                    682: }
                    683:
                    684:
                    685: /*
                    686:  * Degraded write Q subroutine.
                    687:  * Used when P is dead.
                    688:  * Large-write style Q computation.
                    689:  * Parameters:
                    690:  *
                    691:  * (pda, buf), (pda, buf), ..., (failedPDA, bufPtr), failedPDA, raidPtr.
                    692:  *
                    693:  * We ignore failedPDA.
                    694:  *
                    695:  * This is a "simple style" recovery func.
                    696:  */
                    697:
                    698: void
                    699: rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node)
                    700: {
                    701:        int np = node->numParams;
                    702:        int d;
                    703:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
                    704:        unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                    705:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    706:        RF_Etimer_t timer;
                    707:        char *qbuf = node->results[0];
                    708:        char *obuf, *qpbuf;
                    709:        RF_PhysDiskAddr_t *old;
                    710:        unsigned long coeff;
                    711:        int fail_start, i, j;
                    712:
                    713:        old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
                    714:        fail_start = old->startSector % secPerSU;
                    715:
                    716:        RF_ETIMER_START(timer);
                    717:
                    718:        d = (np - 2) / 2;
                    719:        RF_ASSERT(2 * d + 2 == np);
                    720:
                    721:        for (i = 0; i < d; i++) {
                    722:                old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
                    723:                obuf = (char *) node->params[2 * i + 1].p;
                    724:                coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
                    725:                    old->raidAddress);
                    726:                /*
                    727:                 * Compute the data unit offset within the column, then add
                    728:                 * one.
                    729:                 */
                    730:                coeff = (coeff % raidPtr->Layout.numDataCol);
                    731:                j = old->startSector % secPerSU;
                    732:                RF_ASSERT(j >= fail_start);
                    733:                qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
                    734:                rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
                    735:                    rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
                    736:        }
                    737:
                    738:        RF_ETIMER_STOP(timer);
                    739:        RF_ETIMER_EVAL(timer);
                    740:        tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    741:        rf_GenericWakeupFunc(node, 0);
                    742: }
                    743:
                    744:
                    745: /* Q computations. */
                    746:
                    747: /*
                    748:  * Coeff - colummn;
                    749:  *
                    750:  * Compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
                    751:  *
                    752:  * On 5-bit basis;
                    753:  * Length in bytes;
                    754:  */
                    755:
                    756: void
                    757: rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length,
                    758:     unsigned coeff)
                    759: {
                    760:        unsigned long a, d, new;
                    761:        unsigned long a1, a2;
                    762:        unsigned int *q = &(rf_qfor[28 - coeff][0]);
                    763:        unsigned r = rf_rn[coeff + 1];
                    764:
                    765: #define        EXTRACT(a,i)    ((a >> (5L*i)) & 0x1f)
                    766: #define        INSERT(a,i)     (a << (5L*i))
                    767:
                    768:        length /= 8;
                    769:        /* 13 5 bit quants in a 64 bit word. */
                    770:        while (length) {
                    771:                a = *buf++;
                    772:                d = *dest;
                    773:                a1 = EXTRACT(a, 0) ^ r;
                    774:                a2 = EXTRACT(a, 1) ^ r;
                    775:                new = INSERT(a2, 1) | a1;
                    776:                a1 = EXTRACT(a, 2) ^ r;
                    777:                a2 = EXTRACT(a, 3) ^ r;
                    778:                a1 = q[a1];
                    779:                a2 = q[a2];
                    780:                new = new | INSERT(a1, 2) | INSERT(a2, 3);
                    781:                a1 = EXTRACT(a, 4) ^ r;
                    782:                a2 = EXTRACT(a, 5) ^ r;
                    783:                a1 = q[a1];
                    784:                a2 = q[a2];
                    785:                new = new | INSERT(a1, 4) | INSERT(a2, 5);
                    786:                a1 = EXTRACT(a, 5) ^ r;
                    787:                a2 = EXTRACT(a, 6) ^ r;
                    788:                a1 = q[a1];
                    789:                a2 = q[a2];
                    790:                new = new | INSERT(a1, 5) | INSERT(a2, 6);
                    791: #if    RF_LONGSHIFT > 2
                    792:                a1 = EXTRACT(a, 7) ^ r;
                    793:                a2 = EXTRACT(a, 8) ^ r;
                    794:                a1 = q[a1];
                    795:                a2 = q[a2];
                    796:                new = new | INSERT(a1, 7) | INSERT(a2, 8);
                    797:                a1 = EXTRACT(a, 9) ^ r;
                    798:                a2 = EXTRACT(a, 10) ^ r;
                    799:                a1 = q[a1];
                    800:                a2 = q[a2];
                    801:                new = new | INSERT(a1, 9) | INSERT(a2, 10);
                    802:                a1 = EXTRACT(a, 11) ^ r;
                    803:                a2 = EXTRACT(a, 12) ^ r;
                    804:                a1 = q[a1];
                    805:                a2 = q[a2];
                    806:                new = new | INSERT(a1, 11) | INSERT(a2, 12);
                    807: #endif /* RF_LONGSHIFT > 2 */
                    808:                d ^= new;
                    809:                *dest++ = d;
                    810:                length--;
                    811:        }
                    812: }
                    813:
                    814:
                    815: /*
                    816:  * Compute.
                    817:  *
                    818:  * dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new)]
                    819:  *
                    820:  * On a five bit basis.
                    821:  * Optimization: compute old ^ new on 64 bit basis.
                    822:  *
                    823:  * Length in bytes.
                    824:  */
                    825:
                    826: void
                    827: rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
                    828:     unsigned char coeff)
                    829: {
                    830:        unsigned long a, d, new;
                    831:        unsigned long a1, a2;
                    832:        unsigned int *q = &(rf_qfor[28 - coeff][0]);
                    833:        unsigned int r = rf_rn[coeff + 1];
                    834:
                    835:        r = a1 = a2 = new = d = a = 0; /* XXX For now... */
                    836:        q = NULL; /* XXX For now */
                    837:
                    838: #ifdef _KERNEL
                    839:        /*
                    840:         * PQ in kernel currently not supported because the encoding/decoding
                    841:         * table is not present.
                    842:         */
                    843:        bzero(dest, length);
                    844: #else  /* _KERNEL */
                    845:        /* This code probably doesn't work and should be rewritten. -wvcii */
                    846:        /* 13 5 bit quants in a 64 bit word. */
                    847:        length /= 8;
                    848:        while (length) {
                    849:                a = *obuf++;    /*
                    850:                                 * XXX Need to reorg to avoid cache conflicts.
                    851:                                 */
                    852:                a ^= *nbuf++;
                    853:                d = *dest;
                    854:                a1 = EXTRACT(a, 0) ^ r;
                    855:                a2 = EXTRACT(a, 1) ^ r;
                    856:                a1 = q[a1];
                    857:                a2 = q[a2];
                    858:                new = INSERT(a2, 1) | a1;
                    859:                a1 = EXTRACT(a, 2) ^ r;
                    860:                a2 = EXTRACT(a, 3) ^ r;
                    861:                a1 = q[a1];
                    862:                a2 = q[a2];
                    863:                new = new | INSERT(a1, 2) | INSERT(a2, 3);
                    864:                a1 = EXTRACT(a, 4) ^ r;
                    865:                a2 = EXTRACT(a, 5) ^ r;
                    866:                a1 = q[a1];
                    867:                a2 = q[a2];
                    868:                new = new | INSERT(a1, 4) | INSERT(a2, 5);
                    869:                a1 = EXTRACT(a, 5) ^ r;
                    870:                a2 = EXTRACT(a, 6) ^ r;
                    871:                a1 = q[a1];
                    872:                a2 = q[a2];
                    873:                new = new | INSERT(a1, 5) | INSERT(a2, 6);
                    874: #if    RF_LONGSHIFT > 2
                    875:                a1 = EXTRACT(a, 7) ^ r;
                    876:                a2 = EXTRACT(a, 8) ^ r;
                    877:                a1 = q[a1];
                    878:                a2 = q[a2];
                    879:                new = new | INSERT(a1, 7) | INSERT(a2, 8);
                    880:                a1 = EXTRACT(a, 9) ^ r;
                    881:                a2 = EXTRACT(a, 10) ^ r;
                    882:                a1 = q[a1];
                    883:                a2 = q[a2];
                    884:                new = new | INSERT(a1, 9) | INSERT(a2, 10);
                    885:                a1 = EXTRACT(a, 11) ^ r;
                    886:                a2 = EXTRACT(a, 12) ^ r;
                    887:                a1 = q[a1];
                    888:                a2 = q[a2];
                    889:                new = new | INSERT(a1, 11) | INSERT(a2, 12);
                    890: #endif /* RF_LONGSHIFT > 2 */
                    891:                d ^= new;
                    892:                *dest++ = d;
                    893:                length--;
                    894:        }
                    895: #endif /* _KERNEL */
                    896: }
                    897:
                    898:
                    899: /*
                    900:  * Recover columns a and b from the given p and q into
                    901:  * bufs abuf and bbuf. All bufs are word aligned.
                    902:  * Length is in bytes.
                    903:  */
                    904:
                    905: /*
                    906:  * XXX
                    907:  *
                    908:  * Everything about this seems wrong.
                    909:  */
                    910:
                    911: void
                    912: rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf,
                    913:     unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b)
                    914: {
                    915:        unsigned long p, q, a, a0, a1;
                    916:        int col = (29 * coeff_a) + coeff_b;
                    917:        unsigned char *q0 = &(rf_qinv[col][0]);
                    918:
                    919:        length /= 8;
                    920:        while (length) {
                    921:                p = *pbuf++;
                    922:                q = *qbuf++;
                    923:                a0 = EXTRACT(p, 0);
                    924:                a1 = EXTRACT(q, 0);
                    925:                a = q0[a0 << 5 | a1];
                    926:
                    927: #define        MF(i)                                                           \
                    928: do {                                                                   \
                    929:        a0 = EXTRACT(p, i);                                             \
                    930:        a1 = EXTRACT(q, i);                                             \
                    931:        a  = a | INSERT(q0[a0<<5 | a1], i);                             \
                    932: } while (0)
                    933:
                    934:                MF(1);
                    935:                MF(2);
                    936:                MF(3);
                    937:                MF(4);
                    938:                MF(5);
                    939:                MF(6);
                    940: #if 0
                    941:                MF(7);
                    942:                MF(8);
                    943:                MF(9);
                    944:                MF(10);
                    945:                MF(11);
                    946:                MF(12);
                    947: #endif /* 0 */
                    948:                *abuf++ = a;
                    949:                *bbuf++ = a ^ p;
                    950:                length--;
                    951:        }
                    952: }
                    953:
                    954:
                    955: /*
                    956:  * Lost parity and a data column. Recover that data column.
                    957:  * Assume col coeff is lost. Let q the contents of Q after
                    958:  * all surviving data columns have been q-xored out of it.
                    959:  * Then we have the equation
                    960:  *
                    961:  *   q[28-coeff][a_i ^ r_i+1] = q
                    962:  *
                    963:  * but q is cyclic with period 31.
                    964:  * So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
                    965:  *    q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
                    966:  *
                    967:  * so a_i = r_{coeff+1} ^ q[3+coeff][q]
                    968:  *
                    969:  * The routine is passed q buffer and the buffer
                    970:  * the data is to be recoverd into. They can be the same.
                    971:  */
                    972:
                    973: void
                    974: rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
                    975:     unsigned coeff)
                    976: {
                    977:        unsigned long a, new;
                    978:        unsigned long a1, a2;
                    979:        unsigned int *q = &(rf_qfor[3 + coeff][0]);
                    980:        unsigned r = rf_rn[coeff + 1];
                    981:
                    982:        /* 13 5 bit quants in a 64 bit word. */
                    983:        length /= 8;
                    984:        while (length) {
                    985:                a = *qbuf++;
                    986:                a1 = EXTRACT(a, 0);
                    987:                a2 = EXTRACT(a, 1);
                    988:                a1 = r ^ q[a1];
                    989:                a2 = r ^ q[a2];
                    990:                new = INSERT(a2, 1) | a1;
                    991:
                    992: #define        M(i,j)                                                          \
                    993: do {                                                                   \
                    994:        a1 = EXTRACT(a, i);                                             \
                    995:        a2 = EXTRACT(a, j);                                             \
                    996:        a1 = r ^ q[a1];                                                 \
                    997:        a2 = r ^ q[a2];                                                 \
                    998:        new = new | INSERT(a1, i) | INSERT(a2, j);                      \
                    999: } while (0)
                   1000:
                   1001:                M(2, 3);
                   1002:                M(4, 5);
                   1003:                M(5, 6);
                   1004: #if    RF_LONGSHIFT > 2
                   1005:                M(7, 8);
                   1006:                M(9, 10);
                   1007:                M(11, 12);
                   1008: #endif /* RF_LONGSHIFT > 2 */
                   1009:                *abuf++ = new;
                   1010:                length--;
                   1011:        }
                   1012: }
                   1013: #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */

CVSweb