[BACK]Return to rf_evenodd_dagfuncs.c CVS log [TXT][DIR] Up to [local] / sys / dev / raidframe

Annotation of sys/dev/raidframe/rf_evenodd_dagfuncs.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: rf_evenodd_dagfuncs.c,v 1.7 2002/12/16 07:01:04 tdeval Exp $  */
                      2: /*     $NetBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */
                      3:
                      4: /*
                      5:  * Copyright (c) 1995 Carnegie-Mellon University.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Author: ChangMing Wu
                      9:  *
                     10:  * Permission to use, copy, modify and distribute this software and
                     11:  * its documentation is hereby granted, provided that both the copyright
                     12:  * notice and this permission notice appear in all copies of the
                     13:  * software, derivative works or modified versions, and any portions
                     14:  * thereof, and that both notices appear in supporting documentation.
                     15:  *
                     16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     19:  *
                     20:  * Carnegie Mellon requests users of this software to return to
                     21:  *
                     22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     23:  *  School of Computer Science
                     24:  *  Carnegie Mellon University
                     25:  *  Pittsburgh PA 15213-3890
                     26:  *
                     27:  * any improvements or extensions that they make and grant Carnegie the
                     28:  * rights to redistribute these changes.
                     29:  */
                     30:
                     31: /*
                     32:  * Code for RAID-EVENODD architecture.
                     33:  */
                     34:
                     35: #include "rf_types.h"
                     36: #include "rf_raid.h"
                     37: #include "rf_dag.h"
                     38: #include "rf_dagffrd.h"
                     39: #include "rf_dagffwr.h"
                     40: #include "rf_dagdegrd.h"
                     41: #include "rf_dagdegwr.h"
                     42: #include "rf_dagutils.h"
                     43: #include "rf_dagfuncs.h"
                     44: #include "rf_etimer.h"
                     45: #include "rf_general.h"
                     46: #include "rf_configure.h"
                     47: #include "rf_parityscan.h"
                     48: #include "rf_evenodd.h"
                     49: #include "rf_evenodd_dagfuncs.h"
                     50:
                     51: /* These redundant functions are for small write. */
                     52: RF_RedFuncs_t rf_EOSmallWritePFuncs = {
                     53:        rf_RegularXorFunc, "Regular Old-New P",
                     54:        rf_SimpleXorFunc, "Simple Old-New P"
                     55: };
                     56: RF_RedFuncs_t rf_EOSmallWriteEFuncs = {
                     57:        rf_RegularONEFunc, "Regular Old-New E",
                     58:        rf_SimpleONEFunc, "Regular Old-New E"
                     59: };
                     60: /* These redundant functions are for degraded read. */
                     61: RF_RedFuncs_t rf_eoPRecoveryFuncs = {
                     62:        rf_RecoveryXorFunc, "Recovery Xr",
                     63:        rf_RecoveryXorFunc, "Recovery Xr"
                     64: };
                     65: RF_RedFuncs_t rf_eoERecoveryFuncs = {
                     66:        rf_RecoveryEFunc, "Recovery E Func",
                     67:        rf_RecoveryEFunc, "Recovery E Func"
                     68: };
                     69:
                     70:
                     71: /*****************************************************************************
                     72:  *   The following encoding node functions is used in
                     73:  *   EO_000_CreateLargeWriteDAG.
                     74:  *****************************************************************************/
                     75: int
                     76: rf_RegularPEFunc(RF_DagNode_t *node)
                     77: {
                     78:        rf_RegularESubroutine(node, node->results[1]);
                     79:        rf_RegularXorFunc(node);        /* Do the wakeup here ! */
                     80: #if 1
                     81:        return (0);             /* XXX This was missing... GO */
                     82: #endif
                     83: }
                     84:
                     85:
                     86: /*****************************************************************************
                     87:  *  For EO_001_CreateSmallWriteDAG, there are (i) RegularONEFunc() and
                     88:  *  (ii) SimpleONEFunc() to be used. The previous case is when write accesses
                     89:  *  at least sectors of full stripe unit.
                     90:  *  The later function is used when the write accesses two stripe units but
                     91:  *  with total sectors less than sectors per SU. In this case, the access of
                     92:  *  parity and 'E' are shown as disconnected areas in their stripe unit and
                     93:  *  parity write and 'E' write are both divided into two distinct writes
                     94:  *  (totally four). This simple old-new write and regular old-new write happen
                     95:  *  as in RAID-5.
                     96:  *****************************************************************************/
                     97:
                     98: /*
                     99:  * Algorithm:
                    100:  *   1. Store the difference of old data and new data in the Rod buffer.
                    101:  *   2. Then encode this buffer into the buffer that already have old 'E'
                    102:  *     information inside it, the result can be shown to be the new 'E'
                    103:  *     information.
                    104:  *   3. Xor the Wnd buffer into the difference buffer to recover the original
                    105:  *     old data.
                    106:  * Here we have another alternative: to allocate a temporary buffer for
                    107:  * storing the difference of old data and new data, then encode temp buf
                    108:  * into old 'E' buf to form new 'E', but this approach takes the same speed
                    109:  * as the previous, and needs more memory.
                    110:  */
                    111: int
                    112: rf_RegularONEFunc(RF_DagNode_t *node)
                    113: {
                    114:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    115:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
                    116:        int EpdaIndex = (node->numParams - 1) / 2 - 1;  /*
                    117:                                                         * The parameter of node
                    118:                                                         * where you can find
                    119:                                                         * e-pda.
                    120:                                                         */
                    121:        int i, k, retcode = 0;
                    122:        int suoffset, length;
                    123:        RF_RowCol_t scol;
                    124:        char *srcbuf, *destbuf;
                    125:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    126:        RF_Etimer_t timer;
                    127:        RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *)
                    128:            node->params[EpdaIndex].p;
                    129:        /* Generally zero. */
                    130:        int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
                    131:
                    132:        RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
                    133:        RF_ASSERT(ESUOffset == 0);
                    134:
                    135:        RF_ETIMER_START(timer);
                    136:
                    137:        /*
                    138:         * Xor the Wnd buffer into Rod buffer. The difference of old data and
                    139:         * new data is stored in Rod buffer.
                    140:         */
                    141:        for (k = 0; k < EpdaIndex; k += 2) {
                    142:                length = rf_RaidAddressToByte(raidPtr,
                    143:                    ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
                    144:                retcode = rf_bxor(node->params[k + EpdaIndex + 3].p,
                    145:                    node->params[k + 1].p, length, node->dagHdr->bp);
                    146:        }
                    147:        /*
                    148:         * Start to encode the buffer, storing the difference of old data and
                    149:         * new data into 'E' buffer.
                    150:         */
                    151:        for (i = 0; i < EpdaIndex; i += 2)
                    152:                if (node->params[i + 1].p != node->results[0]) {
                    153:                        /* results[0] is buf ptr of E. */
                    154:                        pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                    155:                        srcbuf = (char *) node->params[i + 1].p;
                    156:                        scol = rf_EUCol(layoutPtr, pda->raidAddress);
                    157:                        suoffset = rf_StripeUnitOffset(layoutPtr,
                    158:                            pda->startSector);
                    159:                        destbuf = ((char *) node->results[0]) +
                    160:                            rf_RaidAddressToByte(raidPtr, suoffset);
                    161:                        rf_e_encToBuf(raidPtr, scol, srcbuf,
                    162:                            RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
                    163:                }
                    164:        /*
                    165:         * Recover the original old data to be used by parity encoding
                    166:         * function in XorNode.
                    167:         */
                    168:        for (k = 0; k < EpdaIndex; k += 2) {
                    169:                length = rf_RaidAddressToByte(raidPtr,
                    170:                    ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
                    171:                retcode = rf_bxor(node->params[k + EpdaIndex + 3].p,
                    172:                    node->params[k + 1].p, length, node->dagHdr->bp);
                    173:        }
                    174:        RF_ETIMER_STOP(timer);
                    175:        RF_ETIMER_EVAL(timer);
                    176:        tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    177:        rf_GenericWakeupFunc(node, 0);
                    178: #if 1
                    179:        return (0);             /* XXX This was missing... GO */
                    180: #endif
                    181: }
                    182:
                    183: int
                    184: rf_SimpleONEFunc(RF_DagNode_t *node)
                    185: {
                    186:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    187:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
                    188:        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
                    189:        int retcode = 0;
                    190:        char *srcbuf, *destbuf;
                    191:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    192:        int length;
                    193:        RF_RowCol_t scol;
                    194:        RF_Etimer_t timer;
                    195:
                    196:        RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type ==
                    197:            RF_PDA_TYPE_Q);
                    198:        if (node->dagHdr->status == rf_enable) {
                    199:                RF_ETIMER_START(timer);
                    200:                /* This is a pda of writeDataNodes. */
                    201:                length = rf_RaidAddressToByte(raidPtr,
                    202:                    ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);
                    203:                /* bxor to buffer of readDataNodes. */
                    204:                retcode = rf_bxor(node->params[5].p, node->params[1].p,
                    205:                    length, node->dagHdr->bp);
                    206:                /*
                    207:                 * Find out the corresponding column in encoding matrix for
                    208:                 * write column to be encoded into redundant disk 'E'.
                    209:                 */
                    210:                scol = rf_EUCol(layoutPtr, pda->raidAddress);
                    211:                srcbuf = node->params[1].p;
                    212:                destbuf = node->params[3].p;
                    213:                /* Start encoding process. */
                    214:                rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2,
                    215:                    destbuf, pda->numSector);
                    216:                rf_bxor(node->params[5].p, node->params[1].p, length,
                    217:                    node->dagHdr->bp);
                    218:                RF_ETIMER_STOP(timer);
                    219:                RF_ETIMER_EVAL(timer);
                    220:                tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    221:
                    222:        }
                    223:        return (rf_GenericWakeupFunc(node, retcode));   /*
                    224:                                                         * Call wake func
                    225:                                                         * explicitly since no
                    226:                                                         * I/O in this node.
                    227:                                                         */
                    228: }
                    229:
                    230:
                    231: /*
                    232:  * Called by rf_RegularPEFunc(node) and rf_RegularEFunc(node)
                    233:  * in f.f. large write.
                    234:  */
                    235: void
                    236: rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf)
                    237: {
                    238:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    239:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
                    240:        RF_PhysDiskAddr_t *pda;
                    241:        int i, suoffset;
                    242:        RF_RowCol_t scol;
                    243:        char *srcbuf, *destbuf;
                    244:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    245:        RF_Etimer_t timer;
                    246:
                    247:        RF_ETIMER_START(timer);
                    248:        for (i = 0; i < node->numParams - 2; i += 2) {
                    249:                RF_ASSERT(node->params[i + 1].p != ebuf);
                    250:                pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                    251:                suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
                    252:                scol = rf_EUCol(layoutPtr, pda->raidAddress);
                    253:                srcbuf = (char *) node->params[i + 1].p;
                    254:                destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
                    255:                rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2,
                    256:                    destbuf, pda->numSector);
                    257:        }
                    258:        RF_ETIMER_STOP(timer);
                    259:        RF_ETIMER_EVAL(timer);
                    260:        tracerec->xor_us += RF_ETIMER_VAL_US(timer);
                    261: }
                    262:
                    263:
                    264: /*****************************************************************************
                    265:  *                      Used in  EO_001_CreateLargeWriteDAG.
                    266:  *****************************************************************************/
                    267: int
                    268: rf_RegularEFunc(RF_DagNode_t *node)
                    269: {
                    270:        rf_RegularESubroutine(node, node->results[0]);
                    271:        rf_GenericWakeupFunc(node, 0);
                    272: #if 1
                    273:        return (0);             /* XXX This was missing... GO */
                    274: #endif
                    275: }
                    276:
                    277:
                    278: /*****************************************************************************
                    279:  * This degraded function allow only two cases:
                    280:  *   1. When write accesses the full failed stripe unit, then the access can
                    281:  *     be more than one stripe unit.
                    282:  *   2. When write accesses only part of the failed SU, we assume accesses of
                    283:  *     more than one stripe unit are not allowed so that the write can be
                    284:  *     dealt with like a large write.
                    285:  * The following function is based on these assumptions. So except in the
                    286:  * second case, it looks the same as a large write encoding function. But
                    287:  * this is not exactly the normal way of doing a degraded write, since
                    288:  * RAIDframe has to break cases of accesses other than the above two into
                    289:  * smaller accesses. We may have to change DegrESubroutin in the future.
                    290:  *****************************************************************************/
                    291: void
                    292: rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf)
                    293: {
                    294:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    295:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
                    296:        RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
                    297:        RF_PhysDiskAddr_t *pda;
                    298:        int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
                    299:        RF_RowCol_t scol;
                    300:        char *srcbuf, *destbuf;
                    301:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    302:        RF_Etimer_t timer;
                    303:
                    304:        RF_ETIMER_START(timer);
                    305:        for (i = 0; i < node->numParams - 2; i += 2) {
                    306:                RF_ASSERT(node->params[i + 1].p != ebuf);
                    307:                pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                    308:                suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
                    309:                scol = rf_EUCol(layoutPtr, pda->raidAddress);
                    310:                srcbuf = (char *) node->params[i + 1].p;
                    311:                destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
                    312:                rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
                    313:        }
                    314:
                    315:        RF_ETIMER_STOP(timer);
                    316:        RF_ETIMER_EVAL(timer);
                    317:        tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    318: }
                    319:
                    320:
                    321: /*****************************************************************************
                    322:  * This function is used in case where one data disk failed and both redundant
                    323:  * disks are alive. It is used in the EO_100_CreateWriteDAG. Note: if there is
                    324:  * another disk failed in the stripe but not accessed at this time, then we
                    325:  * should, instead, use the rf_EOWriteDoubleRecoveryFunc().
                    326:  *****************************************************************************/
                    327: int
                    328: rf_Degraded_100_EOFunc(RF_DagNode_t *node)
                    329: {
                    330:        rf_DegrESubroutine(node, node->results[1]);
                    331:        rf_RecoveryXorFunc(node);       /* Does the wakeup here ! */
                    332: #if 1
                    333:        return (0);             /* XXX This was missing... Should these be
                    334:                                 * void functions ??? GO */
                    335: #endif
                    336: }
                    337:
                    338:
                    339: /*****************************************************************************
                    340:  * This function is to encode one sector in one of the data disks to the E
                    341:  * disk. However, in evenodd this function can also be used as decoding
                    342:  * function to recover data from dead disk in the case of parity failure and
                    343:  * a single data failure.
                    344:  *****************************************************************************/
                    345: void
                    346: rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf,
                    347:     RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector)
                    348: {
                    349:        int S_index;            /*
                    350:                                 * Index of the EU in the src col which need
                    351:                                 * be Xored into all EUs in a dest sector.
                    352:                                 */
                    353:        int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1;
                    354:        RF_RowCol_t j, indexInDest;     /*
                    355:                                         * Row index of an encoding unit in
                    356:                                         * the destination column of encoding
                    357:                                         * matrix.
                    358:                                         */
                    359:        RF_RowCol_t indexInSrc; /*
                    360:                                 * Row index of an encoding unit in the source
                    361:                                 * column used for recovery.
                    362:                                 */
                    363:        int bytesPerEU = bytesPerSector / numRowInEncMatrix;
                    364:
                    365: #if    RF_EO_MATRIX_DIM > 17
                    366:        int shortsPerEU = bytesPerEU / sizeof(short);
                    367:        short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
                    368:        short temp1;
                    369: #elif  RF_EO_MATRIX_DIM == 17
                    370:        int longsPerEU = bytesPerEU / sizeof(long);
                    371:        long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
                    372:        long temp1;
                    373: #endif
                    374:
                    375: #if    RF_EO_MATRIX_DIM > 17
                    376:        RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
                    377:        RF_ASSERT(bytesPerEU % sizeof(short) == 0);
                    378: #elif  RF_EO_MATRIX_DIM == 17
                    379:        RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
                    380:        RF_ASSERT(bytesPerEU % sizeof(long) == 0);
                    381: #endif
                    382:
                    383:        S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
                    384: #if    RF_EO_MATRIX_DIM > 17
                    385:        srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
                    386: #elif  RF_EO_MATRIX_DIM == 17
                    387:        srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
                    388: #endif
                    389:
                    390:        for (indexInDest = 0; indexInDest < numRowInEncMatrix; indexInDest++) {
                    391:                indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
                    392:
                    393: #if    RF_EO_MATRIX_DIM > 17
                    394:                destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
                    395:                srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
                    396:                for (j = 0; j < shortsPerEU; j++) {
                    397:                        temp1 = destShortBuf[j] ^ srcShortBuf1[j];
                    398:                        /* Note: S_index won't be at the end row for any src
                    399:                         * col ! */
                    400:                        if (indexInSrc != RF_EO_MATRIX_DIM - 1)
                    401:                                destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
                    402:                        /* if indexInSrc is at the end row, ie.
                    403:                         * RF_EO_MATRIX_DIM -1, then all elements are zero ! */
                    404:                        else
                    405:                                destShortBuf[j] = temp1;
                    406:                }
                    407:
                    408: #elif  RF_EO_MATRIX_DIM == 17
                    409:                destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
                    410:                srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
                    411:                for (j = 0; j < longsPerEU; j++) {
                    412:                        temp1 = destLongBuf[j] ^ srcLongBuf1[j];
                    413:                        if (indexInSrc != RF_EO_MATRIX_DIM - 1)
                    414:                                destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
                    415:                        else
                    416:                                destLongBuf[j] = temp1;
                    417:                }
                    418: #endif
                    419:        }
                    420: }
                    421:
                    422: void
                    423: rf_e_encToBuf(RF_Raid_t *raidPtr, RF_RowCol_t srcLogicCol, char *srcbuf,
                    424:     RF_RowCol_t destLogicCol, char *destbuf, int numSector)
                    425: {
                    426:        int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
                    427:
                    428:        for (i = 0; i < numSector; i++) {
                    429:                rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
                    430:                srcbuf += bytesPerSector;
                    431:                destbuf += bytesPerSector;
                    432:        }
                    433: }
                    434:
                    435:
                    436: /*****************************************************************************
                    437:  * when parity die and one data die, We use second redundant information, 'E',
                    438:  * to recover the data in dead disk. This function is used in the recovery node of
                    439:  * for EO_110_CreateReadDAG
                    440:  *****************************************************************************/
                    441: int
                    442: rf_RecoveryEFunc(RF_DagNode_t *node)
                    443: {
                    444:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    445:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
                    446:        RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
                    447:        RF_RowCol_t scol;       /* source logical column */
                    448:        RF_RowCol_t fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of
                    449:                                                                         * failed SU */
                    450:        int i;
                    451:        RF_PhysDiskAddr_t *pda;
                    452:        int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
                    453:        char *srcbuf, *destbuf;
                    454:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    455:        RF_Etimer_t timer;
                    456:
                    457:        bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
                    458:        if (node->dagHdr->status == rf_enable) {
                    459:                RF_ETIMER_START(timer);
                    460:                for (i = 0; i < node->numParams - 2; i += 2)
                    461:                        if (node->params[i + 1].p != node->results[0]) {
                    462:                                pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                    463:                                if (i == node->numParams - 4)
                    464:                                        scol = RF_EO_MATRIX_DIM - 2;    /* the colume of
                    465:                                                                         * redundant E */
                    466:                                else
                    467:                                        scol = rf_EUCol(layoutPtr, pda->raidAddress);
                    468:                                srcbuf = (char *) node->params[i + 1].p;
                    469:                                suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
                    470:                                destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
                    471:                                rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
                    472:                        }
                    473:                RF_ETIMER_STOP(timer);
                    474:                RF_ETIMER_EVAL(timer);
                    475:                tracerec->xor_us += RF_ETIMER_VAL_US(timer);
                    476:        }
                    477:        return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
                    478: }
                    479:
                    480:
                    481: /*****************************************************************************
                    482:  * This function is used in the case where one data and the parity have filed.
                    483:  * (in EO_110_CreateWriteDAG)
                    484:  *****************************************************************************/
                    485: int
                    486: rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
                    487: {
                    488:        rf_DegrESubroutine(node, node->results[0]);
                    489:        rf_GenericWakeupFunc(node, 0);
                    490: #if 1
                    491:        return (0);             /* XXX Yet another one !!! GO */
                    492: #endif
                    493: }
                    494:
                    495:
                    496:
                    497: /*****************************************************************************
                    498:  *     THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES.
                    499:  *****************************************************************************/
                    500:
                    501: void
                    502: rf_doubleEOdecode(RF_Raid_t *raidPtr, char **rrdbuf, char **dest,
                    503:     RF_RowCol_t *fcol, char *pbuf, char *ebuf)
                    504: {
                    505:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
                    506:        int i, j, k, f1, f2, row;
                    507:        int rrdrow, erow, count = 0;
                    508:        int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
                    509:        int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1;
                    510: #if 0
                    511:        int pcol = (RF_EO_MATRIX_DIM) - 1;
                    512: #endif
                    513:        int ecol = (RF_EO_MATRIX_DIM) - 2;
                    514:        int bytesPerEU = bytesPerSector / numRowInEncMatrix;
                    515:        int numDataCol = layoutPtr->numDataCol;
                    516: #if    RF_EO_MATRIX_DIM > 17
                    517:        int shortsPerEU = bytesPerEU / sizeof(short);
                    518:        short *rrdbuf_current, *pbuf_current, *ebuf_current;
                    519:        short *dest_smaller, *dest_smaller_current;
                    520:        short *dest_larger, *dest_larger_current;
                    521:        short *temp;
                    522:        short *P;
                    523:
                    524:        RF_ASSERT(bytesPerEU % sizeof(short) == 0);
                    525:        RF_Malloc(P, bytesPerEU, (short *));
                    526:        RF_Malloc(temp, bytesPerEU, (short *));
                    527: #elif  RF_EO_MATRIX_DIM == 17
                    528:        int longsPerEU = bytesPerEU / sizeof(long);
                    529:        long *rrdbuf_current, *pbuf_current, *ebuf_current;
                    530:        long *dest_smaller, *dest_smaller_current;
                    531:        long *dest_larger, *dest_larger_current;
                    532:        long *temp;
                    533:        long *P;
                    534:
                    535:        RF_ASSERT(bytesPerEU % sizeof(long) == 0);
                    536:        RF_Malloc(P, bytesPerEU, (long *));
                    537:        RF_Malloc(temp, bytesPerEU, (long *));
                    538: #endif
                    539:        RF_ASSERT(*((long *) dest[0]) == 0);
                    540:        RF_ASSERT(*((long *) dest[1]) == 0);
                    541:        bzero((char *) P, bytesPerEU);
                    542:        bzero((char *) temp, bytesPerEU);
                    543:        RF_ASSERT(*P == 0);
                    544:        /*
                    545:         * Calculate the 'P' parameter, which, not parity, is the Xor of all
                    546:         * elements in the last two column, ie. 'E' and 'parity' columns, see
                    547:         * the Ref. paper by Blaum, et al 1993.
                    548:         */
                    549:        for (i = 0; i < numRowInEncMatrix; i++)
                    550:                for (k = 0; k < longsPerEU; k++) {
                    551: #if    RF_EO_MATRIX_DIM > 17
                    552:                        ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
                    553:                        pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
                    554: #elif  RF_EO_MATRIX_DIM == 17
                    555:                        ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
                    556:                        pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
                    557: #endif
                    558:                        P[k] ^= *ebuf_current;
                    559:                        P[k] ^= *pbuf_current;
                    560:                }
                    561:        RF_ASSERT(fcol[0] != fcol[1]);
                    562:        if (fcol[0] < fcol[1]) {
                    563: #if    RF_EO_MATRIX_DIM > 17
                    564:                dest_smaller = (short *) (dest[0]);
                    565:                dest_larger = (short *) (dest[1]);
                    566: #elif  RF_EO_MATRIX_DIM == 17
                    567:                dest_smaller = (long *) (dest[0]);
                    568:                dest_larger = (long *) (dest[1]);
                    569: #endif
                    570:                f1 = fcol[0];
                    571:                f2 = fcol[1];
                    572:        } else {
                    573: #if    RF_EO_MATRIX_DIM > 17
                    574:                dest_smaller = (short *) (dest[1]);
                    575:                dest_larger = (short *) (dest[0]);
                    576: #elif  RF_EO_MATRIX_DIM == 17
                    577:                dest_smaller = (long *) (dest[1]);
                    578:                dest_larger = (long *) (dest[0]);
                    579: #endif
                    580:                f1 = fcol[1];
                    581:                f2 = fcol[0];
                    582:        }
                    583:        row = (RF_EO_MATRIX_DIM) - 1;
                    584:        while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) !=
                    585:            ((RF_EO_MATRIX_DIM) - 1)) {
                    586: #if    RF_EO_MATRIX_DIM > 17
                    587:                dest_larger_current = dest_larger + row * shortsPerEU;
                    588:                dest_smaller_current = dest_smaller + row * shortsPerEU;
                    589: #elif  RF_EO_MATRIX_DIM == 17
                    590:                dest_larger_current = dest_larger + row * longsPerEU;
                    591:                dest_smaller_current = dest_smaller + row * longsPerEU;
                    592: #endif
                    593:                /*
                    594:                 * Do the diagonal recovery. Initially, temp[k] = (failed 1),
                    595:                 * which is the failed data in the column that has smaller
                    596:                 * col index.
                    597:                 */
                    598:                /* Step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
                    599:                for (j = 0; j < numDataCol; j++) {
                    600:                        if (j == f1 || j == f2)
                    601:                                continue;
                    602:                        rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
                    603:                        if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
                    604: #if    RF_EO_MATRIX_DIM > 17
                    605:                                rrdbuf_current = (short *) (rrdbuf[j]) +
                    606:                                    rrdrow * shortsPerEU;
                    607:                                for (k = 0; k < shortsPerEU; k++)
                    608:                                        temp[k] ^= *(rrdbuf_current + k);
                    609: #elif  RF_EO_MATRIX_DIM == 17
                    610:                                rrdbuf_current = (long *) (rrdbuf[j]) +
                    611:                                    rrdrow * longsPerEU;
                    612:                                for (k = 0; k < longsPerEU; k++)
                    613:                                        temp[k] ^= *(rrdbuf_current + k);
                    614: #endif
                    615:                        }
                    616:                }
                    617:                /*
                    618:                 * Step 2:  ^E(erow,m-2), If erow is at the bottom row, don't
                    619:                 * Xor into it.  E(erow,m-2) = (principle diagonal) ^ (failed
                    620:                 * 1) ^ (failed 2) ^ (SUM of nonfailed in-diagonal
                    621:                 * A(rrdrow,0..m-3))
                    622:                 * After this step, temp[k] = (principle diagonal) ^ (failed 2).
                    623:                 */
                    624:
                    625:                erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
                    626:                if (erow != (RF_EO_MATRIX_DIM) - 1) {
                    627: #if    RF_EO_MATRIX_DIM > 17
                    628:                        ebuf_current = (short *) ebuf + shortsPerEU * erow;
                    629:                        for (k = 0; k < shortsPerEU; k++)
                    630:                                temp[k] ^= *(ebuf_current + k);
                    631: #elif  RF_EO_MATRIX_DIM == 17
                    632:                        ebuf_current = (long *) ebuf + longsPerEU * erow;
                    633:                        for (k = 0; k < longsPerEU; k++)
                    634:                                temp[k] ^= *(ebuf_current + k);
                    635: #endif
                    636:                }
                    637:                /*
                    638:                 * Step 3: ^P to obtain the failed data (failed 2). P can be
                    639:                 * proved to be actually (principal diagonal). After this
                    640:                 * step, temp[k] = (failed 2), the failed data to be recovered.
                    641:                 */
                    642: #if    RF_EO_MATRIX_DIM > 17
                    643:                for (k = 0; k < shortsPerEU; k++)
                    644:                        temp[k] ^= P[k];
                    645:                /* Put the data into the destination buffer. */
                    646:                for (k = 0; k < shortsPerEU; k++)
                    647:                        dest_larger_current[k] = temp[k];
                    648: #elif  RF_EO_MATRIX_DIM == 17
                    649:                for (k = 0; k < longsPerEU; k++)
                    650:                        temp[k] ^= P[k];
                    651:                /* Put the data into the destination buffer. */
                    652:                for (k = 0; k < longsPerEU; k++)
                    653:                        dest_larger_current[k] = temp[k];
                    654: #endif
                    655:
                    656:                /* THE FOLLOWING DO THE HORIZONTAL XOR. */
                    657:                /*
                    658:                 * Step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
                    659:                 * columns.
                    660:                 */
                    661:                for (j = 0; j < numDataCol; j++) {
                    662:                        if (j == f1 || j == f2)
                    663:                                continue;
                    664: #if    RF_EO_MATRIX_DIM > 17
                    665:                        rrdbuf_current = (short *) (rrdbuf[j]) +
                    666:                            row * shortsPerEU;
                    667:                        for (k = 0; k < shortsPerEU; k++)
                    668:                                temp[k] ^= *(rrdbuf_current + k);
                    669: #elif  RF_EO_MATRIX_DIM == 17
                    670:                        rrdbuf_current = (long *) (rrdbuf[j]) +
                    671:                            row * longsPerEU;
                    672:                        for (k = 0; k < longsPerEU; k++)
                    673:                                temp[k] ^= *(rrdbuf_current + k);
                    674: #endif
                    675:                }
                    676:                /* Step 2: ^A(row,m-1) */
                    677:                /* Step 3: Put the data into the destination buffer. */
                    678: #if    RF_EO_MATRIX_DIM > 17
                    679:                pbuf_current = (short *) pbuf + shortsPerEU * row;
                    680:                for (k = 0; k < shortsPerEU; k++)
                    681:                        temp[k] ^= *(pbuf_current + k);
                    682:                for (k = 0; k < shortsPerEU; k++)
                    683:                        dest_smaller_current[k] = temp[k];
                    684: #elif  RF_EO_MATRIX_DIM == 17
                    685:                pbuf_current = (long *) pbuf + longsPerEU * row;
                    686:                for (k = 0; k < longsPerEU; k++)
                    687:                        temp[k] ^= *(pbuf_current + k);
                    688:                for (k = 0; k < longsPerEU; k++)
                    689:                        dest_smaller_current[k] = temp[k];
                    690: #endif
                    691:                count++;
                    692:        }
                    693:        /*
                    694:         * Check if all Encoding Unit in the data buffer have been decoded ?
                    695:         * According to EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime
                    696:         * number, this algorithm will covered all buffer.
                    697:         */
                    698:        RF_ASSERT(count == numRowInEncMatrix);
                    699:        RF_Free((char *) P, bytesPerEU);
                    700:        RF_Free((char *) temp, bytesPerEU);
                    701: }
                    702:
                    703:
                    704: /*****************************************************************************
                    705:  *     This function is called by double degraded read EO_200_CreateReadDAG.
                    706:  *****************************************************************************/
                    707: int
                    708: rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node)
                    709: {
                    710:        int ndataParam = 0;
                    711:        int np = node->numParams;
                    712:        RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *)
                    713:            node->params[np - 1].p;
                    714:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
                    715:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
                    716:        int i, prm, sector, nresults = node->numResults;
                    717:        RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
                    718:        unsigned sosAddr;
                    719:        int two = 0, mallc_one = 0, mallc_two = 0;      /*
                    720:                                                         * Flags to indicate if
                    721:                                                         * memory is allocated.
                    722:                                                         */
                    723:        int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
                    724:        RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
                    725:            npda;
                    726:        RF_RowCol_t fcol[2], fsuoff[2], fsuend[2],
                    727:            numDataCol = layoutPtr->numDataCol;
                    728:        char **buf, *ebuf, *pbuf, *dest[2];
                    729:        long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
                    730:        RF_SectorNum_t startSector, endSector;
                    731:        RF_Etimer_t timer;
                    732:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    733:
                    734:        RF_ETIMER_START(timer);
                    735:
                    736:        /*
                    737:         * Find out the number of parameters that are pdas for data
                    738:         * information.
                    739:         */
                    740:        for (i = 0; i <= np; i++)
                    741:                if (((RF_PhysDiskAddr_t *) node->params[i].p)->type !=
                    742:                    RF_PDA_TYPE_DATA) {
                    743:                        ndataParam = i;
                    744:                        break;
                    745:                }
                    746:        RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
                    747:        if (ndataParam != 0) {
                    748:                RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
                    749:                RF_Malloc(suend, ndataParam * sizeof(long), (long *));
                    750:                RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
                    751:        }
                    752:        if (asmap->failedPDAs[1] &&
                    753:            (asmap->failedPDAs[1]->numSector +
                    754:             asmap->failedPDAs[0]->numSector) < secPerSU) {
                    755:                RF_ASSERT(0);   /* Currently, no support for this situation. */
                    756:                ppda = node->params[np - 6].p;
                    757:                ppda2 = node->params[np - 5].p;
                    758:                RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
                    759:                epda = node->params[np - 4].p;
                    760:                epda2 = node->params[np - 3].p;
                    761:                RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
                    762:                two = 1;
                    763:        } else {
                    764:                ppda = node->params[np - 4].p;
                    765:                epda = node->params[np - 3].p;
                    766:                psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
                    767:                esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
                    768:                RF_ASSERT(psuoff == esuoff);
                    769:        }
                    770:        /*
                    771:         * The followings have three goals:
                    772:         *   1. Determine the startSector to begin decoding and endSector
                    773:         *      to end decoding.
                    774:         *   2. Determine the column numbers of the two failed disks.
                    775:         *   3. Determine the offset and end offset of the access within
                    776:         *      each failed stripe unit.
                    777:         */
                    778:        if (nresults == 1) {
                    779:                /* Find the startSector to begin decoding. */
                    780:                pda = node->results[0];
                    781:                bzero(pda->bufPtr, bytesPerSector * pda->numSector);
                    782:                fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
                    783:                fsuend[0] = fsuoff[0] + pda->numSector;
                    784:                startSector = fsuoff[0];
                    785:                endSector = fsuend[0];
                    786:
                    787:                /* Find out the column of failed disk being accessed. */
                    788:                fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
                    789:
                    790:                /* Find out the other failed column not accessed. */
                    791:                sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
                    792:                    asmap->raidAddress);
                    793:                for (i = 0; i < numDataCol; i++) {
                    794:                        npda.raidAddress = sosAddr + (i * secPerSU);
                    795:                        (raidPtr->Layout.map->MapSector) (raidPtr,
                    796:                            npda.raidAddress, &(npda.row), &(npda.col),
                    797:                            &(npda.startSector), 0);
                    798:                        /* Skip over dead disks. */
                    799:                        if (RF_DEAD_DISK(raidPtr
                    800:                            ->Disks[npda.row][npda.col].status))
                    801:                                if (i != fcol[0])
                    802:                                        break;
                    803:                }
                    804:                RF_ASSERT(i < numDataCol);
                    805:                fcol[1] = i;
                    806:        } else {
                    807:                RF_ASSERT(nresults == 2);
                    808:                pda0 = node->results[0];
                    809:                bzero(pda0->bufPtr, bytesPerSector * pda0->numSector);
                    810:                pda1 = node->results[1];
                    811:                bzero(pda1->bufPtr, bytesPerSector * pda1->numSector);
                    812:                /*
                    813:                 * Determine the failed column numbers of the two failed
                    814:                 * disks.
                    815:                 */
                    816:                fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
                    817:                fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
                    818:                /*
                    819:                 * Determine the offset and end offset of the access within
                    820:                 * each failed stripe unit.
                    821:                 */
                    822:                fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
                    823:                fsuend[0] = fsuoff[0] + pda0->numSector;
                    824:                fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
                    825:                fsuend[1] = fsuoff[1] + pda1->numSector;
                    826:                /* Determine the startSector to begin decoding. */
                    827:                startSector = RF_MIN(pda0->startSector, pda1->startSector);
                    828:                /* Determine the endSector to end decoding. */
                    829:                endSector = RF_MAX(fsuend[0], fsuend[1]);
                    830:        }
                    831:        /*
                    832:         * Assign the beginning sector and the end sector for each parameter.
                    833:         * Find out the corresponding column # for each parameter.
                    834:         */
                    835:        for (prm = 0; prm < ndataParam; prm++) {
                    836:                pda = node->params[prm].p;
                    837:                suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
                    838:                suend[prm] = suoff[prm] + pda->numSector;
                    839:                prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
                    840:        }
                    841:        /*
                    842:         * 'sector' is the sector for the current decoding algorithm. For each
                    843:         * sector in the failed SU
                    844:         * 1. Find out the corresponding parameters that cover the current
                    845:         *    sector and that are needed for the decoding of this sector in
                    846:         *    failed SU.
                    847:         * 2. Find out if sector is in the shadow of any accessed failed SU.
                    848:         *    If not, malloc a temporary space of a sector in size.
                    849:         */
                    850:        for (sector = startSector; sector < endSector; sector++) {
                    851:                if (nresults == 2)
                    852:                        if (!(fsuoff[0] <= sector && sector < fsuend[0]) &&
                    853:                            !(fsuoff[1] <= sector && sector < fsuend[1]))
                    854:                                continue;
                    855:                for (prm = 0; prm < ndataParam; prm++)
                    856:                        if (suoff[prm] <= sector && sector < suend[prm])
                    857:                                buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)
                    858:                                    node->params[prm].p)->bufPtr +
                    859:                                    rf_RaidAddressToByte(raidPtr,
                    860:                                     sector - suoff[prm]);
                    861:                /*
                    862:                 * Find out if sector is in the shadow of any accessed failed
                    863:                 * SU. If yes, assign dest[0], dest[1] to point at suitable
                    864:                 * position of the buffer corresponding to failed SUs. If no,
                    865:                 * malloc a temporary space of a sector in size for
                    866:                 * destination of decoding.
                    867:                 */
                    868:                RF_ASSERT(nresults == 1 || nresults == 2);
                    869:                if (nresults == 1) {
                    870:                        dest[0] = ((RF_PhysDiskAddr_t *)
                    871:                            node->results[0])->bufPtr +
                    872:                            rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
                    873:                        /* Always malloc temp buffer to dest[1]. */
                    874:                        RF_Malloc(dest[1], bytesPerSector, (char *));
                    875:                        bzero(dest[1], bytesPerSector);
                    876:                        mallc_two = 1;
                    877:                } else {
                    878:                        if (fsuoff[0] <= sector && sector < fsuend[0])
                    879:                                dest[0] = ((RF_PhysDiskAddr_t *)
                    880:                                    node->results[0])->bufPtr +
                    881:                                    rf_RaidAddressToByte(raidPtr,
                    882:                                     sector - fsuoff[0]);
                    883:                        else {
                    884:                                RF_Malloc(dest[0], bytesPerSector, (char *));
                    885:                                bzero(dest[0], bytesPerSector);
                    886:                                mallc_one = 1;
                    887:                        }
                    888:                        if (fsuoff[1] <= sector && sector < fsuend[1])
                    889:                                dest[1] = ((RF_PhysDiskAddr_t *)
                    890:                                    node->results[1])->bufPtr +
                    891:                                    rf_RaidAddressToByte(raidPtr,
                    892:                                     sector - fsuoff[1]);
                    893:                        else {
                    894:                                RF_Malloc(dest[1], bytesPerSector, (char *));
                    895:                                bzero(dest[1], bytesPerSector);
                    896:                                mallc_two = 1;
                    897:                        }
                    898:                        RF_ASSERT(mallc_one == 0 || mallc_two == 0);
                    899:                }
                    900:                pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr,
                    901:                    sector - psuoff);
                    902:                ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr,
                    903:                    sector - esuoff);
                    904:                /*
                    905:                 * After finish finding all needed sectors, call doubleEOdecode
                    906:                 * function for decoding one sector to destination.
                    907:                 */
                    908:                rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
                    909:                /*
                    910:                 * Free all allocated memory, and mark flag to indicate no
                    911:                 * memory is being allocated.
                    912:                 */
                    913:                if (mallc_one == 1)
                    914:                        RF_Free(dest[0], bytesPerSector);
                    915:                if (mallc_two == 1)
                    916:                        RF_Free(dest[1], bytesPerSector);
                    917:                mallc_one = mallc_two = 0;
                    918:        }
                    919:        RF_Free(buf, numDataCol * sizeof(char *));
                    920:        if (ndataParam != 0) {
                    921:                RF_Free(suoff, ndataParam * sizeof(long));
                    922:                RF_Free(suend, ndataParam * sizeof(long));
                    923:                RF_Free(prmToCol, ndataParam * sizeof(long));
                    924:        }
                    925:        RF_ETIMER_STOP(timer);
                    926:        RF_ETIMER_EVAL(timer);
                    927:        if (tracerec) {
                    928:                tracerec->q_us += RF_ETIMER_VAL_US(timer);
                    929:        }
                    930:        rf_GenericWakeupFunc(node, 0);
                    931: #if 1
                    932:        return (0);             /* XXX Is this even close !!?!?!!? GO */
                    933: #endif
                    934: }
                    935:
                    936:
                    937: /*
                    938:  * Currently, only access of one of the two failed SU is allowed in this
                    939:  * function. Also, asmap->numStripeUnitsAccessed is limited to be one,
                    940:  * the RAIDframe will break large access into many accesses of single
                    941:  * stripe unit.
                    942:  */
                    943:
                    944: int
                    945: rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node)
                    946: {
                    947:        int np = node->numParams;
                    948:        RF_AccessStripeMap_t *asmap =
                    949:            (RF_AccessStripeMap_t *) node->params[np - 1].p;
                    950:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
                    951:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
                    952:        RF_SectorNum_t sector;
                    953:        RF_RowCol_t col, scol;
                    954:        int prm, i, j;
                    955:        RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
                    956:        unsigned sosAddr;
                    957:        unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
                    958:        RF_int64 numbytes;
                    959:        RF_SectorNum_t startSector, endSector;
                    960:        RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
                    961:        RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
                    962:        char **buf;             /*
                    963:                                 * buf[0], buf[1], buf[2], ... etc, point to
                    964:                                 * buffer storing data read from col0, col1,
                    965:                                 * col2.
                    966:                                 */
                    967:        char *ebuf, *pbuf, *dest[2], *olddata[2];
                    968:        RF_Etimer_t timer;
                    969:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    970:
                    971:        RF_ASSERT(asmap->numDataFailed == 1);   /*
                    972:                                                 * Currently only support this
                    973:                                                 * case, the other failed SU
                    974:                                                 * is not being accessed.
                    975:                                                 */
                    976:        RF_ETIMER_START(timer);
                    977:        RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
                    978:
                    979:        ppda = node->results[0];        /*
                    980:                                         * Instead of being buffers,
                    981:                                         * node->results[0] and [1]
                    982:                                         * are Ppda and Epda.
                    983:                                         */
                    984:        epda = node->results[1];
                    985:        fpda = asmap->failedPDAs[0];
                    986:
                    987:        /* First, recovery the failed old SU using EvenOdd double decoding. */
                    988:        /* Determine the startSector and endSector for decoding. */
                    989:        startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
                    990:        endSector = startSector + fpda->numSector;
                    991:        /*
                    992:         * Assign buf[col] pointers to point to each non-failed column and
                    993:         * initialize the pbuf and ebuf to point at the beginning of each
                    994:         * source buffers and destination buffers. */
                    995:        for (prm = 0; prm < numDataCol - 2; prm++) {
                    996:                pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
                    997:                col = rf_EUCol(layoutPtr, pda->raidAddress);
                    998:                buf[col] = pda->bufPtr;
                    999:        }
                   1000:        /*
                   1001:         * pbuf and ebuf: They will change values as double recovery decoding
                   1002:         * goes on.
                   1003:         */
                   1004:        pbuf = ppda->bufPtr;
                   1005:        ebuf = epda->bufPtr;
                   1006:        /*
                   1007:         * Find out the logical column numbers in the encoding matrix of the
                   1008:         * two failed columns.
                   1009:         */
                   1010:        fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
                   1011:
                   1012:        /* Find out the other failed column not accessed this time. */
                   1013:        sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
                   1014:            asmap->raidAddress);
                   1015:        for (i = 0; i < numDataCol; i++) {
                   1016:                npda.raidAddress = sosAddr + (i * secPerSU);
                   1017:                (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress,
                   1018:                    &(npda.row), &(npda.col), &(npda.startSector), 0);
                   1019:                /* Skip over dead disks. */
                   1020:                if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
                   1021:                        if (i != fcol[0])
                   1022:                                break;
                   1023:        }
                   1024:        RF_ASSERT(i < numDataCol);
                   1025:        fcol[1] = i;
                   1026:        /* Assign temporary space to put recovered failed SU. */
                   1027:        numbytes = fpda->numSector * bytesPerSector;
                   1028:        RF_Malloc(olddata[0], numbytes, (char *));
                   1029:        RF_Malloc(olddata[1], numbytes, (char *));
                   1030:        dest[0] = olddata[0];
                   1031:        dest[1] = olddata[1];
                   1032:        bzero(olddata[0], numbytes);
                   1033:        bzero(olddata[1], numbytes);
                   1034:        /*
                   1035:         * Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j]
                   1036:         * have already pointed at the beginning of each source buffers and
                   1037:         * destination buffers.
                   1038:         */
                   1039:        for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
                   1040:                rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
                   1041:                for (j = 0; j < numDataCol; j++)
                   1042:                        if ((j != fcol[0]) && (j != fcol[1]))
                   1043:                                buf[j] += bytesPerSector;
                   1044:                dest[0] += bytesPerSector;
                   1045:                dest[1] += bytesPerSector;
                   1046:                ebuf += bytesPerSector;
                   1047:                pbuf += bytesPerSector;
                   1048:        }
                   1049:        /*
                   1050:         * After recovery, the buffer pointed by olddata[0] is the old failed
                   1051:         * data. With new writing data and this old data, use small write to
                   1052:         * calculate the new redundant informations.
                   1053:         */
                   1054:        /*
                   1055:         * node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
                   1056:         * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
                   1057:         * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
                   1058:         * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
                   1059:         * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
                   1060:         * wudNodes; For current implementation, we assume the simplest case:
                   1061:         * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
                   1062:         * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
                   1063:         * data to be written to the failed disk. We first bxor the new data
                   1064:         * into the old recovered data, then do the same things as small
                   1065:         * write.
                   1066:         */
                   1067:
                   1068:        rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr,
                   1069:            olddata[0], numbytes, node->dagHdr->bp);
                   1070:        /* Do new 'E' calculation. */
                   1071:        /*
                   1072:         * Find out the corresponding column in encoding matrix for write
                   1073:         * column to be encoded into redundant disk 'E'.
                   1074:         */
                   1075:        scol = rf_EUCol(layoutPtr, fpda->raidAddress);
                   1076:        /*
                   1077:         * olddata[0] now is source buffer pointer; epda->bufPtr is the dest
                   1078:         * buffer pointer.
                   1079:         */
                   1080:        rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2,
                   1081:            epda->bufPtr, fpda->numSector);
                   1082:
                   1083:        /* Do new 'P' calculation. */
                   1084:        rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
                   1085:        /* Free the allocated buffer. */
                   1086:        RF_Free(olddata[0], numbytes);
                   1087:        RF_Free(olddata[1], numbytes);
                   1088:        RF_Free(buf, numDataCol * sizeof(char *));
                   1089:
                   1090:        RF_ETIMER_STOP(timer);
                   1091:        RF_ETIMER_EVAL(timer);
                   1092:        if (tracerec) {
                   1093:                tracerec->q_us += RF_ETIMER_VAL_US(timer);
                   1094:        }
                   1095:        rf_GenericWakeupFunc(node, 0);
                   1096:        return (0);
                   1097: }

CVSweb