Annotation of sys/dev/raidframe/rf_evenodd_dagfuncs.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_evenodd_dagfuncs.c,v 1.7 2002/12/16 07:01:04 tdeval Exp $ */
! 2: /* $NetBSD: rf_evenodd_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995 Carnegie-Mellon University.
! 6: * All rights reserved.
! 7: *
! 8: * Author: ChangMing Wu
! 9: *
! 10: * Permission to use, copy, modify and distribute this software and
! 11: * its documentation is hereby granted, provided that both the copyright
! 12: * notice and this permission notice appear in all copies of the
! 13: * software, derivative works or modified versions, and any portions
! 14: * thereof, and that both notices appear in supporting documentation.
! 15: *
! 16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 19: *
! 20: * Carnegie Mellon requests users of this software to return to
! 21: *
! 22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 23: * School of Computer Science
! 24: * Carnegie Mellon University
! 25: * Pittsburgh PA 15213-3890
! 26: *
! 27: * any improvements or extensions that they make and grant Carnegie the
! 28: * rights to redistribute these changes.
! 29: */
! 30:
! 31: /*
! 32: * Code for RAID-EVENODD architecture.
! 33: */
! 34:
! 35: #include "rf_types.h"
! 36: #include "rf_raid.h"
! 37: #include "rf_dag.h"
! 38: #include "rf_dagffrd.h"
! 39: #include "rf_dagffwr.h"
! 40: #include "rf_dagdegrd.h"
! 41: #include "rf_dagdegwr.h"
! 42: #include "rf_dagutils.h"
! 43: #include "rf_dagfuncs.h"
! 44: #include "rf_etimer.h"
! 45: #include "rf_general.h"
! 46: #include "rf_configure.h"
! 47: #include "rf_parityscan.h"
! 48: #include "rf_evenodd.h"
! 49: #include "rf_evenodd_dagfuncs.h"
! 50:
! 51: /* These redundant functions are for small write. */
! 52: RF_RedFuncs_t rf_EOSmallWritePFuncs = {
! 53: rf_RegularXorFunc, "Regular Old-New P",
! 54: rf_SimpleXorFunc, "Simple Old-New P"
! 55: };
! 56: RF_RedFuncs_t rf_EOSmallWriteEFuncs = {
! 57: rf_RegularONEFunc, "Regular Old-New E",
! 58: rf_SimpleONEFunc, "Regular Old-New E"
! 59: };
! 60: /* These redundant functions are for degraded read. */
! 61: RF_RedFuncs_t rf_eoPRecoveryFuncs = {
! 62: rf_RecoveryXorFunc, "Recovery Xr",
! 63: rf_RecoveryXorFunc, "Recovery Xr"
! 64: };
! 65: RF_RedFuncs_t rf_eoERecoveryFuncs = {
! 66: rf_RecoveryEFunc, "Recovery E Func",
! 67: rf_RecoveryEFunc, "Recovery E Func"
! 68: };
! 69:
! 70:
! 71: /*****************************************************************************
! 72: * The following encoding node functions is used in
! 73: * EO_000_CreateLargeWriteDAG.
! 74: *****************************************************************************/
! 75: int
! 76: rf_RegularPEFunc(RF_DagNode_t *node)
! 77: {
! 78: rf_RegularESubroutine(node, node->results[1]);
! 79: rf_RegularXorFunc(node); /* Do the wakeup here ! */
! 80: #if 1
! 81: return (0); /* XXX This was missing... GO */
! 82: #endif
! 83: }
! 84:
! 85:
! 86: /*****************************************************************************
! 87: * For EO_001_CreateSmallWriteDAG, there are (i) RegularONEFunc() and
! 88: * (ii) SimpleONEFunc() to be used. The previous case is when write accesses
! 89: * at least sectors of full stripe unit.
! 90: * The later function is used when the write accesses two stripe units but
! 91: * with total sectors less than sectors per SU. In this case, the access of
! 92: * parity and 'E' are shown as disconnected areas in their stripe unit and
! 93: * parity write and 'E' write are both divided into two distinct writes
! 94: * (totally four). This simple old-new write and regular old-new write happen
! 95: * as in RAID-5.
! 96: *****************************************************************************/
! 97:
! 98: /*
! 99: * Algorithm:
! 100: * 1. Store the difference of old data and new data in the Rod buffer.
! 101: * 2. Then encode this buffer into the buffer that already have old 'E'
! 102: * information inside it, the result can be shown to be the new 'E'
! 103: * information.
! 104: * 3. Xor the Wnd buffer into the difference buffer to recover the original
! 105: * old data.
! 106: * Here we have another alternative: to allocate a temporary buffer for
! 107: * storing the difference of old data and new data, then encode temp buf
! 108: * into old 'E' buf to form new 'E', but this approach takes the same speed
! 109: * as the previous, and needs more memory.
! 110: */
! 111: int
! 112: rf_RegularONEFunc(RF_DagNode_t *node)
! 113: {
! 114: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 115: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
! 116: int EpdaIndex = (node->numParams - 1) / 2 - 1; /*
! 117: * The parameter of node
! 118: * where you can find
! 119: * e-pda.
! 120: */
! 121: int i, k, retcode = 0;
! 122: int suoffset, length;
! 123: RF_RowCol_t scol;
! 124: char *srcbuf, *destbuf;
! 125: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 126: RF_Etimer_t timer;
! 127: RF_PhysDiskAddr_t *pda, *EPDA = (RF_PhysDiskAddr_t *)
! 128: node->params[EpdaIndex].p;
! 129: /* Generally zero. */
! 130: int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
! 131:
! 132: RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
! 133: RF_ASSERT(ESUOffset == 0);
! 134:
! 135: RF_ETIMER_START(timer);
! 136:
! 137: /*
! 138: * Xor the Wnd buffer into Rod buffer. The difference of old data and
! 139: * new data is stored in Rod buffer.
! 140: */
! 141: for (k = 0; k < EpdaIndex; k += 2) {
! 142: length = rf_RaidAddressToByte(raidPtr,
! 143: ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
! 144: retcode = rf_bxor(node->params[k + EpdaIndex + 3].p,
! 145: node->params[k + 1].p, length, node->dagHdr->bp);
! 146: }
! 147: /*
! 148: * Start to encode the buffer, storing the difference of old data and
! 149: * new data into 'E' buffer.
! 150: */
! 151: for (i = 0; i < EpdaIndex; i += 2)
! 152: if (node->params[i + 1].p != node->results[0]) {
! 153: /* results[0] is buf ptr of E. */
! 154: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
! 155: srcbuf = (char *) node->params[i + 1].p;
! 156: scol = rf_EUCol(layoutPtr, pda->raidAddress);
! 157: suoffset = rf_StripeUnitOffset(layoutPtr,
! 158: pda->startSector);
! 159: destbuf = ((char *) node->results[0]) +
! 160: rf_RaidAddressToByte(raidPtr, suoffset);
! 161: rf_e_encToBuf(raidPtr, scol, srcbuf,
! 162: RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
! 163: }
! 164: /*
! 165: * Recover the original old data to be used by parity encoding
! 166: * function in XorNode.
! 167: */
! 168: for (k = 0; k < EpdaIndex; k += 2) {
! 169: length = rf_RaidAddressToByte(raidPtr,
! 170: ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
! 171: retcode = rf_bxor(node->params[k + EpdaIndex + 3].p,
! 172: node->params[k + 1].p, length, node->dagHdr->bp);
! 173: }
! 174: RF_ETIMER_STOP(timer);
! 175: RF_ETIMER_EVAL(timer);
! 176: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 177: rf_GenericWakeupFunc(node, 0);
! 178: #if 1
! 179: return (0); /* XXX This was missing... GO */
! 180: #endif
! 181: }
! 182:
! 183: int
! 184: rf_SimpleONEFunc(RF_DagNode_t *node)
! 185: {
! 186: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 187: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
! 188: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 189: int retcode = 0;
! 190: char *srcbuf, *destbuf;
! 191: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 192: int length;
! 193: RF_RowCol_t scol;
! 194: RF_Etimer_t timer;
! 195:
! 196: RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type ==
! 197: RF_PDA_TYPE_Q);
! 198: if (node->dagHdr->status == rf_enable) {
! 199: RF_ETIMER_START(timer);
! 200: /* This is a pda of writeDataNodes. */
! 201: length = rf_RaidAddressToByte(raidPtr,
! 202: ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);
! 203: /* bxor to buffer of readDataNodes. */
! 204: retcode = rf_bxor(node->params[5].p, node->params[1].p,
! 205: length, node->dagHdr->bp);
! 206: /*
! 207: * Find out the corresponding column in encoding matrix for
! 208: * write column to be encoded into redundant disk 'E'.
! 209: */
! 210: scol = rf_EUCol(layoutPtr, pda->raidAddress);
! 211: srcbuf = node->params[1].p;
! 212: destbuf = node->params[3].p;
! 213: /* Start encoding process. */
! 214: rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2,
! 215: destbuf, pda->numSector);
! 216: rf_bxor(node->params[5].p, node->params[1].p, length,
! 217: node->dagHdr->bp);
! 218: RF_ETIMER_STOP(timer);
! 219: RF_ETIMER_EVAL(timer);
! 220: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 221:
! 222: }
! 223: return (rf_GenericWakeupFunc(node, retcode)); /*
! 224: * Call wake func
! 225: * explicitly since no
! 226: * I/O in this node.
! 227: */
! 228: }
! 229:
! 230:
! 231: /*
! 232: * Called by rf_RegularPEFunc(node) and rf_RegularEFunc(node)
! 233: * in f.f. large write.
! 234: */
! 235: void
! 236: rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf)
! 237: {
! 238: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 239: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
! 240: RF_PhysDiskAddr_t *pda;
! 241: int i, suoffset;
! 242: RF_RowCol_t scol;
! 243: char *srcbuf, *destbuf;
! 244: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 245: RF_Etimer_t timer;
! 246:
! 247: RF_ETIMER_START(timer);
! 248: for (i = 0; i < node->numParams - 2; i += 2) {
! 249: RF_ASSERT(node->params[i + 1].p != ebuf);
! 250: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
! 251: suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
! 252: scol = rf_EUCol(layoutPtr, pda->raidAddress);
! 253: srcbuf = (char *) node->params[i + 1].p;
! 254: destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
! 255: rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2,
! 256: destbuf, pda->numSector);
! 257: }
! 258: RF_ETIMER_STOP(timer);
! 259: RF_ETIMER_EVAL(timer);
! 260: tracerec->xor_us += RF_ETIMER_VAL_US(timer);
! 261: }
! 262:
! 263:
! 264: /*****************************************************************************
! 265: * Used in EO_001_CreateLargeWriteDAG.
! 266: *****************************************************************************/
! 267: int
! 268: rf_RegularEFunc(RF_DagNode_t *node)
! 269: {
! 270: rf_RegularESubroutine(node, node->results[0]);
! 271: rf_GenericWakeupFunc(node, 0);
! 272: #if 1
! 273: return (0); /* XXX This was missing... GO */
! 274: #endif
! 275: }
! 276:
! 277:
! 278: /*****************************************************************************
! 279: * This degraded function allow only two cases:
! 280: * 1. When write accesses the full failed stripe unit, then the access can
! 281: * be more than one stripe unit.
! 282: * 2. When write accesses only part of the failed SU, we assume accesses of
! 283: * more than one stripe unit are not allowed so that the write can be
! 284: * dealt with like a large write.
! 285: * The following function is based on these assumptions. So except in the
! 286: * second case, it looks the same as a large write encoding function. But
! 287: * this is not exactly the normal way of doing a degraded write, since
! 288: * RAIDframe has to break cases of accesses other than the above two into
! 289: * smaller accesses. We may have to change DegrESubroutin in the future.
! 290: *****************************************************************************/
! 291: void
! 292: rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf)
! 293: {
! 294: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 295: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
! 296: RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
! 297: RF_PhysDiskAddr_t *pda;
! 298: int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
! 299: RF_RowCol_t scol;
! 300: char *srcbuf, *destbuf;
! 301: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 302: RF_Etimer_t timer;
! 303:
! 304: RF_ETIMER_START(timer);
! 305: for (i = 0; i < node->numParams - 2; i += 2) {
! 306: RF_ASSERT(node->params[i + 1].p != ebuf);
! 307: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
! 308: suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
! 309: scol = rf_EUCol(layoutPtr, pda->raidAddress);
! 310: srcbuf = (char *) node->params[i + 1].p;
! 311: destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
! 312: rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
! 313: }
! 314:
! 315: RF_ETIMER_STOP(timer);
! 316: RF_ETIMER_EVAL(timer);
! 317: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 318: }
! 319:
! 320:
! 321: /*****************************************************************************
! 322: * This function is used in case where one data disk failed and both redundant
! 323: * disks are alive. It is used in the EO_100_CreateWriteDAG. Note: if there is
! 324: * another disk failed in the stripe but not accessed at this time, then we
! 325: * should, instead, use the rf_EOWriteDoubleRecoveryFunc().
! 326: *****************************************************************************/
! 327: int
! 328: rf_Degraded_100_EOFunc(RF_DagNode_t *node)
! 329: {
! 330: rf_DegrESubroutine(node, node->results[1]);
! 331: rf_RecoveryXorFunc(node); /* Does the wakeup here ! */
! 332: #if 1
! 333: return (0); /* XXX This was missing... Should these be
! 334: * void functions ??? GO */
! 335: #endif
! 336: }
! 337:
! 338:
! 339: /*****************************************************************************
! 340: * This function is to encode one sector in one of the data disks to the E
! 341: * disk. However, in evenodd this function can also be used as decoding
! 342: * function to recover data from dead disk in the case of parity failure and
! 343: * a single data failure.
! 344: *****************************************************************************/
! 345: void
! 346: rf_e_EncOneSect(RF_RowCol_t srcLogicCol, char *srcSecbuf,
! 347: RF_RowCol_t destLogicCol, char *destSecbuf, int bytesPerSector)
! 348: {
! 349: int S_index; /*
! 350: * Index of the EU in the src col which need
! 351: * be Xored into all EUs in a dest sector.
! 352: */
! 353: int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1;
! 354: RF_RowCol_t j, indexInDest; /*
! 355: * Row index of an encoding unit in
! 356: * the destination column of encoding
! 357: * matrix.
! 358: */
! 359: RF_RowCol_t indexInSrc; /*
! 360: * Row index of an encoding unit in the source
! 361: * column used for recovery.
! 362: */
! 363: int bytesPerEU = bytesPerSector / numRowInEncMatrix;
! 364:
! 365: #if RF_EO_MATRIX_DIM > 17
! 366: int shortsPerEU = bytesPerEU / sizeof(short);
! 367: short *destShortBuf, *srcShortBuf1, *srcShortBuf2;
! 368: short temp1;
! 369: #elif RF_EO_MATRIX_DIM == 17
! 370: int longsPerEU = bytesPerEU / sizeof(long);
! 371: long *destLongBuf, *srcLongBuf1, *srcLongBuf2;
! 372: long temp1;
! 373: #endif
! 374:
! 375: #if RF_EO_MATRIX_DIM > 17
! 376: RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
! 377: RF_ASSERT(bytesPerEU % sizeof(short) == 0);
! 378: #elif RF_EO_MATRIX_DIM == 17
! 379: RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
! 380: RF_ASSERT(bytesPerEU % sizeof(long) == 0);
! 381: #endif
! 382:
! 383: S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
! 384: #if RF_EO_MATRIX_DIM > 17
! 385: srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
! 386: #elif RF_EO_MATRIX_DIM == 17
! 387: srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
! 388: #endif
! 389:
! 390: for (indexInDest = 0; indexInDest < numRowInEncMatrix; indexInDest++) {
! 391: indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
! 392:
! 393: #if RF_EO_MATRIX_DIM > 17
! 394: destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
! 395: srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
! 396: for (j = 0; j < shortsPerEU; j++) {
! 397: temp1 = destShortBuf[j] ^ srcShortBuf1[j];
! 398: /* Note: S_index won't be at the end row for any src
! 399: * col ! */
! 400: if (indexInSrc != RF_EO_MATRIX_DIM - 1)
! 401: destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
! 402: /* if indexInSrc is at the end row, ie.
! 403: * RF_EO_MATRIX_DIM -1, then all elements are zero ! */
! 404: else
! 405: destShortBuf[j] = temp1;
! 406: }
! 407:
! 408: #elif RF_EO_MATRIX_DIM == 17
! 409: destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
! 410: srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
! 411: for (j = 0; j < longsPerEU; j++) {
! 412: temp1 = destLongBuf[j] ^ srcLongBuf1[j];
! 413: if (indexInSrc != RF_EO_MATRIX_DIM - 1)
! 414: destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
! 415: else
! 416: destLongBuf[j] = temp1;
! 417: }
! 418: #endif
! 419: }
! 420: }
! 421:
! 422: void
! 423: rf_e_encToBuf(RF_Raid_t *raidPtr, RF_RowCol_t srcLogicCol, char *srcbuf,
! 424: RF_RowCol_t destLogicCol, char *destbuf, int numSector)
! 425: {
! 426: int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
! 427:
! 428: for (i = 0; i < numSector; i++) {
! 429: rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
! 430: srcbuf += bytesPerSector;
! 431: destbuf += bytesPerSector;
! 432: }
! 433: }
! 434:
! 435:
! 436: /*****************************************************************************
! 437: * when parity die and one data die, We use second redundant information, 'E',
! 438: * to recover the data in dead disk. This function is used in the recovery node of
! 439: * for EO_110_CreateReadDAG
! 440: *****************************************************************************/
! 441: int
! 442: rf_RecoveryEFunc(RF_DagNode_t *node)
! 443: {
! 444: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 445: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
! 446: RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
! 447: RF_RowCol_t scol; /* source logical column */
! 448: RF_RowCol_t fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of
! 449: * failed SU */
! 450: int i;
! 451: RF_PhysDiskAddr_t *pda;
! 452: int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
! 453: char *srcbuf, *destbuf;
! 454: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 455: RF_Etimer_t timer;
! 456:
! 457: bzero((char *) node->results[0], rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
! 458: if (node->dagHdr->status == rf_enable) {
! 459: RF_ETIMER_START(timer);
! 460: for (i = 0; i < node->numParams - 2; i += 2)
! 461: if (node->params[i + 1].p != node->results[0]) {
! 462: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
! 463: if (i == node->numParams - 4)
! 464: scol = RF_EO_MATRIX_DIM - 2; /* the colume of
! 465: * redundant E */
! 466: else
! 467: scol = rf_EUCol(layoutPtr, pda->raidAddress);
! 468: srcbuf = (char *) node->params[i + 1].p;
! 469: suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
! 470: destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
! 471: rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
! 472: }
! 473: RF_ETIMER_STOP(timer);
! 474: RF_ETIMER_EVAL(timer);
! 475: tracerec->xor_us += RF_ETIMER_VAL_US(timer);
! 476: }
! 477: return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
! 478: }
! 479:
! 480:
! 481: /*****************************************************************************
! 482: * This function is used in the case where one data and the parity have filed.
! 483: * (in EO_110_CreateWriteDAG)
! 484: *****************************************************************************/
! 485: int
! 486: rf_EO_DegradedWriteEFunc(RF_DagNode_t *node)
! 487: {
! 488: rf_DegrESubroutine(node, node->results[0]);
! 489: rf_GenericWakeupFunc(node, 0);
! 490: #if 1
! 491: return (0); /* XXX Yet another one !!! GO */
! 492: #endif
! 493: }
! 494:
! 495:
! 496:
! 497: /*****************************************************************************
! 498: * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES.
! 499: *****************************************************************************/
! 500:
! 501: void
! 502: rf_doubleEOdecode(RF_Raid_t *raidPtr, char **rrdbuf, char **dest,
! 503: RF_RowCol_t *fcol, char *pbuf, char *ebuf)
! 504: {
! 505: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
! 506: int i, j, k, f1, f2, row;
! 507: int rrdrow, erow, count = 0;
! 508: int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
! 509: int numRowInEncMatrix = (RF_EO_MATRIX_DIM) - 1;
! 510: #if 0
! 511: int pcol = (RF_EO_MATRIX_DIM) - 1;
! 512: #endif
! 513: int ecol = (RF_EO_MATRIX_DIM) - 2;
! 514: int bytesPerEU = bytesPerSector / numRowInEncMatrix;
! 515: int numDataCol = layoutPtr->numDataCol;
! 516: #if RF_EO_MATRIX_DIM > 17
! 517: int shortsPerEU = bytesPerEU / sizeof(short);
! 518: short *rrdbuf_current, *pbuf_current, *ebuf_current;
! 519: short *dest_smaller, *dest_smaller_current;
! 520: short *dest_larger, *dest_larger_current;
! 521: short *temp;
! 522: short *P;
! 523:
! 524: RF_ASSERT(bytesPerEU % sizeof(short) == 0);
! 525: RF_Malloc(P, bytesPerEU, (short *));
! 526: RF_Malloc(temp, bytesPerEU, (short *));
! 527: #elif RF_EO_MATRIX_DIM == 17
! 528: int longsPerEU = bytesPerEU / sizeof(long);
! 529: long *rrdbuf_current, *pbuf_current, *ebuf_current;
! 530: long *dest_smaller, *dest_smaller_current;
! 531: long *dest_larger, *dest_larger_current;
! 532: long *temp;
! 533: long *P;
! 534:
! 535: RF_ASSERT(bytesPerEU % sizeof(long) == 0);
! 536: RF_Malloc(P, bytesPerEU, (long *));
! 537: RF_Malloc(temp, bytesPerEU, (long *));
! 538: #endif
! 539: RF_ASSERT(*((long *) dest[0]) == 0);
! 540: RF_ASSERT(*((long *) dest[1]) == 0);
! 541: bzero((char *) P, bytesPerEU);
! 542: bzero((char *) temp, bytesPerEU);
! 543: RF_ASSERT(*P == 0);
! 544: /*
! 545: * Calculate the 'P' parameter, which, not parity, is the Xor of all
! 546: * elements in the last two column, ie. 'E' and 'parity' columns, see
! 547: * the Ref. paper by Blaum, et al 1993.
! 548: */
! 549: for (i = 0; i < numRowInEncMatrix; i++)
! 550: for (k = 0; k < longsPerEU; k++) {
! 551: #if RF_EO_MATRIX_DIM > 17
! 552: ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
! 553: pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
! 554: #elif RF_EO_MATRIX_DIM == 17
! 555: ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
! 556: pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
! 557: #endif
! 558: P[k] ^= *ebuf_current;
! 559: P[k] ^= *pbuf_current;
! 560: }
! 561: RF_ASSERT(fcol[0] != fcol[1]);
! 562: if (fcol[0] < fcol[1]) {
! 563: #if RF_EO_MATRIX_DIM > 17
! 564: dest_smaller = (short *) (dest[0]);
! 565: dest_larger = (short *) (dest[1]);
! 566: #elif RF_EO_MATRIX_DIM == 17
! 567: dest_smaller = (long *) (dest[0]);
! 568: dest_larger = (long *) (dest[1]);
! 569: #endif
! 570: f1 = fcol[0];
! 571: f2 = fcol[1];
! 572: } else {
! 573: #if RF_EO_MATRIX_DIM > 17
! 574: dest_smaller = (short *) (dest[1]);
! 575: dest_larger = (short *) (dest[0]);
! 576: #elif RF_EO_MATRIX_DIM == 17
! 577: dest_smaller = (long *) (dest[1]);
! 578: dest_larger = (long *) (dest[0]);
! 579: #endif
! 580: f1 = fcol[1];
! 581: f2 = fcol[0];
! 582: }
! 583: row = (RF_EO_MATRIX_DIM) - 1;
! 584: while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) !=
! 585: ((RF_EO_MATRIX_DIM) - 1)) {
! 586: #if RF_EO_MATRIX_DIM > 17
! 587: dest_larger_current = dest_larger + row * shortsPerEU;
! 588: dest_smaller_current = dest_smaller + row * shortsPerEU;
! 589: #elif RF_EO_MATRIX_DIM == 17
! 590: dest_larger_current = dest_larger + row * longsPerEU;
! 591: dest_smaller_current = dest_smaller + row * longsPerEU;
! 592: #endif
! 593: /*
! 594: * Do the diagonal recovery. Initially, temp[k] = (failed 1),
! 595: * which is the failed data in the column that has smaller
! 596: * col index.
! 597: */
! 598: /* Step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */
! 599: for (j = 0; j < numDataCol; j++) {
! 600: if (j == f1 || j == f2)
! 601: continue;
! 602: rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
! 603: if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
! 604: #if RF_EO_MATRIX_DIM > 17
! 605: rrdbuf_current = (short *) (rrdbuf[j]) +
! 606: rrdrow * shortsPerEU;
! 607: for (k = 0; k < shortsPerEU; k++)
! 608: temp[k] ^= *(rrdbuf_current + k);
! 609: #elif RF_EO_MATRIX_DIM == 17
! 610: rrdbuf_current = (long *) (rrdbuf[j]) +
! 611: rrdrow * longsPerEU;
! 612: for (k = 0; k < longsPerEU; k++)
! 613: temp[k] ^= *(rrdbuf_current + k);
! 614: #endif
! 615: }
! 616: }
! 617: /*
! 618: * Step 2: ^E(erow,m-2), If erow is at the bottom row, don't
! 619: * Xor into it. E(erow,m-2) = (principle diagonal) ^ (failed
! 620: * 1) ^ (failed 2) ^ (SUM of nonfailed in-diagonal
! 621: * A(rrdrow,0..m-3))
! 622: * After this step, temp[k] = (principle diagonal) ^ (failed 2).
! 623: */
! 624:
! 625: erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
! 626: if (erow != (RF_EO_MATRIX_DIM) - 1) {
! 627: #if RF_EO_MATRIX_DIM > 17
! 628: ebuf_current = (short *) ebuf + shortsPerEU * erow;
! 629: for (k = 0; k < shortsPerEU; k++)
! 630: temp[k] ^= *(ebuf_current + k);
! 631: #elif RF_EO_MATRIX_DIM == 17
! 632: ebuf_current = (long *) ebuf + longsPerEU * erow;
! 633: for (k = 0; k < longsPerEU; k++)
! 634: temp[k] ^= *(ebuf_current + k);
! 635: #endif
! 636: }
! 637: /*
! 638: * Step 3: ^P to obtain the failed data (failed 2). P can be
! 639: * proved to be actually (principal diagonal). After this
! 640: * step, temp[k] = (failed 2), the failed data to be recovered.
! 641: */
! 642: #if RF_EO_MATRIX_DIM > 17
! 643: for (k = 0; k < shortsPerEU; k++)
! 644: temp[k] ^= P[k];
! 645: /* Put the data into the destination buffer. */
! 646: for (k = 0; k < shortsPerEU; k++)
! 647: dest_larger_current[k] = temp[k];
! 648: #elif RF_EO_MATRIX_DIM == 17
! 649: for (k = 0; k < longsPerEU; k++)
! 650: temp[k] ^= P[k];
! 651: /* Put the data into the destination buffer. */
! 652: for (k = 0; k < longsPerEU; k++)
! 653: dest_larger_current[k] = temp[k];
! 654: #endif
! 655:
! 656: /* THE FOLLOWING DO THE HORIZONTAL XOR. */
! 657: /*
! 658: * Step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data
! 659: * columns.
! 660: */
! 661: for (j = 0; j < numDataCol; j++) {
! 662: if (j == f1 || j == f2)
! 663: continue;
! 664: #if RF_EO_MATRIX_DIM > 17
! 665: rrdbuf_current = (short *) (rrdbuf[j]) +
! 666: row * shortsPerEU;
! 667: for (k = 0; k < shortsPerEU; k++)
! 668: temp[k] ^= *(rrdbuf_current + k);
! 669: #elif RF_EO_MATRIX_DIM == 17
! 670: rrdbuf_current = (long *) (rrdbuf[j]) +
! 671: row * longsPerEU;
! 672: for (k = 0; k < longsPerEU; k++)
! 673: temp[k] ^= *(rrdbuf_current + k);
! 674: #endif
! 675: }
! 676: /* Step 2: ^A(row,m-1) */
! 677: /* Step 3: Put the data into the destination buffer. */
! 678: #if RF_EO_MATRIX_DIM > 17
! 679: pbuf_current = (short *) pbuf + shortsPerEU * row;
! 680: for (k = 0; k < shortsPerEU; k++)
! 681: temp[k] ^= *(pbuf_current + k);
! 682: for (k = 0; k < shortsPerEU; k++)
! 683: dest_smaller_current[k] = temp[k];
! 684: #elif RF_EO_MATRIX_DIM == 17
! 685: pbuf_current = (long *) pbuf + longsPerEU * row;
! 686: for (k = 0; k < longsPerEU; k++)
! 687: temp[k] ^= *(pbuf_current + k);
! 688: for (k = 0; k < longsPerEU; k++)
! 689: dest_smaller_current[k] = temp[k];
! 690: #endif
! 691: count++;
! 692: }
! 693: /*
! 694: * Check if all Encoding Unit in the data buffer have been decoded ?
! 695: * According to EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime
! 696: * number, this algorithm will covered all buffer.
! 697: */
! 698: RF_ASSERT(count == numRowInEncMatrix);
! 699: RF_Free((char *) P, bytesPerEU);
! 700: RF_Free((char *) temp, bytesPerEU);
! 701: }
! 702:
! 703:
! 704: /*****************************************************************************
! 705: * This function is called by double degraded read EO_200_CreateReadDAG.
! 706: *****************************************************************************/
! 707: int
! 708: rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node)
! 709: {
! 710: int ndataParam = 0;
! 711: int np = node->numParams;
! 712: RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *)
! 713: node->params[np - 1].p;
! 714: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
! 715: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
! 716: int i, prm, sector, nresults = node->numResults;
! 717: RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
! 718: unsigned sosAddr;
! 719: int two = 0, mallc_one = 0, mallc_two = 0; /*
! 720: * Flags to indicate if
! 721: * memory is allocated.
! 722: */
! 723: int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
! 724: RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
! 725: npda;
! 726: RF_RowCol_t fcol[2], fsuoff[2], fsuend[2],
! 727: numDataCol = layoutPtr->numDataCol;
! 728: char **buf, *ebuf, *pbuf, *dest[2];
! 729: long *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
! 730: RF_SectorNum_t startSector, endSector;
! 731: RF_Etimer_t timer;
! 732: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 733:
! 734: RF_ETIMER_START(timer);
! 735:
! 736: /*
! 737: * Find out the number of parameters that are pdas for data
! 738: * information.
! 739: */
! 740: for (i = 0; i <= np; i++)
! 741: if (((RF_PhysDiskAddr_t *) node->params[i].p)->type !=
! 742: RF_PDA_TYPE_DATA) {
! 743: ndataParam = i;
! 744: break;
! 745: }
! 746: RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
! 747: if (ndataParam != 0) {
! 748: RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
! 749: RF_Malloc(suend, ndataParam * sizeof(long), (long *));
! 750: RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
! 751: }
! 752: if (asmap->failedPDAs[1] &&
! 753: (asmap->failedPDAs[1]->numSector +
! 754: asmap->failedPDAs[0]->numSector) < secPerSU) {
! 755: RF_ASSERT(0); /* Currently, no support for this situation. */
! 756: ppda = node->params[np - 6].p;
! 757: ppda2 = node->params[np - 5].p;
! 758: RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
! 759: epda = node->params[np - 4].p;
! 760: epda2 = node->params[np - 3].p;
! 761: RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
! 762: two = 1;
! 763: } else {
! 764: ppda = node->params[np - 4].p;
! 765: epda = node->params[np - 3].p;
! 766: psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
! 767: esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
! 768: RF_ASSERT(psuoff == esuoff);
! 769: }
! 770: /*
! 771: * The followings have three goals:
! 772: * 1. Determine the startSector to begin decoding and endSector
! 773: * to end decoding.
! 774: * 2. Determine the column numbers of the two failed disks.
! 775: * 3. Determine the offset and end offset of the access within
! 776: * each failed stripe unit.
! 777: */
! 778: if (nresults == 1) {
! 779: /* Find the startSector to begin decoding. */
! 780: pda = node->results[0];
! 781: bzero(pda->bufPtr, bytesPerSector * pda->numSector);
! 782: fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
! 783: fsuend[0] = fsuoff[0] + pda->numSector;
! 784: startSector = fsuoff[0];
! 785: endSector = fsuend[0];
! 786:
! 787: /* Find out the column of failed disk being accessed. */
! 788: fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
! 789:
! 790: /* Find out the other failed column not accessed. */
! 791: sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
! 792: asmap->raidAddress);
! 793: for (i = 0; i < numDataCol; i++) {
! 794: npda.raidAddress = sosAddr + (i * secPerSU);
! 795: (raidPtr->Layout.map->MapSector) (raidPtr,
! 796: npda.raidAddress, &(npda.row), &(npda.col),
! 797: &(npda.startSector), 0);
! 798: /* Skip over dead disks. */
! 799: if (RF_DEAD_DISK(raidPtr
! 800: ->Disks[npda.row][npda.col].status))
! 801: if (i != fcol[0])
! 802: break;
! 803: }
! 804: RF_ASSERT(i < numDataCol);
! 805: fcol[1] = i;
! 806: } else {
! 807: RF_ASSERT(nresults == 2);
! 808: pda0 = node->results[0];
! 809: bzero(pda0->bufPtr, bytesPerSector * pda0->numSector);
! 810: pda1 = node->results[1];
! 811: bzero(pda1->bufPtr, bytesPerSector * pda1->numSector);
! 812: /*
! 813: * Determine the failed column numbers of the two failed
! 814: * disks.
! 815: */
! 816: fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
! 817: fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
! 818: /*
! 819: * Determine the offset and end offset of the access within
! 820: * each failed stripe unit.
! 821: */
! 822: fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
! 823: fsuend[0] = fsuoff[0] + pda0->numSector;
! 824: fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
! 825: fsuend[1] = fsuoff[1] + pda1->numSector;
! 826: /* Determine the startSector to begin decoding. */
! 827: startSector = RF_MIN(pda0->startSector, pda1->startSector);
! 828: /* Determine the endSector to end decoding. */
! 829: endSector = RF_MAX(fsuend[0], fsuend[1]);
! 830: }
! 831: /*
! 832: * Assign the beginning sector and the end sector for each parameter.
! 833: * Find out the corresponding column # for each parameter.
! 834: */
! 835: for (prm = 0; prm < ndataParam; prm++) {
! 836: pda = node->params[prm].p;
! 837: suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
! 838: suend[prm] = suoff[prm] + pda->numSector;
! 839: prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
! 840: }
! 841: /*
! 842: * 'sector' is the sector for the current decoding algorithm. For each
! 843: * sector in the failed SU
! 844: * 1. Find out the corresponding parameters that cover the current
! 845: * sector and that are needed for the decoding of this sector in
! 846: * failed SU.
! 847: * 2. Find out if sector is in the shadow of any accessed failed SU.
! 848: * If not, malloc a temporary space of a sector in size.
! 849: */
! 850: for (sector = startSector; sector < endSector; sector++) {
! 851: if (nresults == 2)
! 852: if (!(fsuoff[0] <= sector && sector < fsuend[0]) &&
! 853: !(fsuoff[1] <= sector && sector < fsuend[1]))
! 854: continue;
! 855: for (prm = 0; prm < ndataParam; prm++)
! 856: if (suoff[prm] <= sector && sector < suend[prm])
! 857: buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *)
! 858: node->params[prm].p)->bufPtr +
! 859: rf_RaidAddressToByte(raidPtr,
! 860: sector - suoff[prm]);
! 861: /*
! 862: * Find out if sector is in the shadow of any accessed failed
! 863: * SU. If yes, assign dest[0], dest[1] to point at suitable
! 864: * position of the buffer corresponding to failed SUs. If no,
! 865: * malloc a temporary space of a sector in size for
! 866: * destination of decoding.
! 867: */
! 868: RF_ASSERT(nresults == 1 || nresults == 2);
! 869: if (nresults == 1) {
! 870: dest[0] = ((RF_PhysDiskAddr_t *)
! 871: node->results[0])->bufPtr +
! 872: rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
! 873: /* Always malloc temp buffer to dest[1]. */
! 874: RF_Malloc(dest[1], bytesPerSector, (char *));
! 875: bzero(dest[1], bytesPerSector);
! 876: mallc_two = 1;
! 877: } else {
! 878: if (fsuoff[0] <= sector && sector < fsuend[0])
! 879: dest[0] = ((RF_PhysDiskAddr_t *)
! 880: node->results[0])->bufPtr +
! 881: rf_RaidAddressToByte(raidPtr,
! 882: sector - fsuoff[0]);
! 883: else {
! 884: RF_Malloc(dest[0], bytesPerSector, (char *));
! 885: bzero(dest[0], bytesPerSector);
! 886: mallc_one = 1;
! 887: }
! 888: if (fsuoff[1] <= sector && sector < fsuend[1])
! 889: dest[1] = ((RF_PhysDiskAddr_t *)
! 890: node->results[1])->bufPtr +
! 891: rf_RaidAddressToByte(raidPtr,
! 892: sector - fsuoff[1]);
! 893: else {
! 894: RF_Malloc(dest[1], bytesPerSector, (char *));
! 895: bzero(dest[1], bytesPerSector);
! 896: mallc_two = 1;
! 897: }
! 898: RF_ASSERT(mallc_one == 0 || mallc_two == 0);
! 899: }
! 900: pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr,
! 901: sector - psuoff);
! 902: ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr,
! 903: sector - esuoff);
! 904: /*
! 905: * After finish finding all needed sectors, call doubleEOdecode
! 906: * function for decoding one sector to destination.
! 907: */
! 908: rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
! 909: /*
! 910: * Free all allocated memory, and mark flag to indicate no
! 911: * memory is being allocated.
! 912: */
! 913: if (mallc_one == 1)
! 914: RF_Free(dest[0], bytesPerSector);
! 915: if (mallc_two == 1)
! 916: RF_Free(dest[1], bytesPerSector);
! 917: mallc_one = mallc_two = 0;
! 918: }
! 919: RF_Free(buf, numDataCol * sizeof(char *));
! 920: if (ndataParam != 0) {
! 921: RF_Free(suoff, ndataParam * sizeof(long));
! 922: RF_Free(suend, ndataParam * sizeof(long));
! 923: RF_Free(prmToCol, ndataParam * sizeof(long));
! 924: }
! 925: RF_ETIMER_STOP(timer);
! 926: RF_ETIMER_EVAL(timer);
! 927: if (tracerec) {
! 928: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 929: }
! 930: rf_GenericWakeupFunc(node, 0);
! 931: #if 1
! 932: return (0); /* XXX Is this even close !!?!?!!? GO */
! 933: #endif
! 934: }
! 935:
! 936:
! 937: /*
! 938: * Currently, only access of one of the two failed SU is allowed in this
! 939: * function. Also, asmap->numStripeUnitsAccessed is limited to be one,
! 940: * the RAIDframe will break large access into many accesses of single
! 941: * stripe unit.
! 942: */
! 943:
! 944: int
! 945: rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node)
! 946: {
! 947: int np = node->numParams;
! 948: RF_AccessStripeMap_t *asmap =
! 949: (RF_AccessStripeMap_t *) node->params[np - 1].p;
! 950: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
! 951: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) &(raidPtr->Layout);
! 952: RF_SectorNum_t sector;
! 953: RF_RowCol_t col, scol;
! 954: int prm, i, j;
! 955: RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
! 956: unsigned sosAddr;
! 957: unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
! 958: RF_int64 numbytes;
! 959: RF_SectorNum_t startSector, endSector;
! 960: RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
! 961: RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
! 962: char **buf; /*
! 963: * buf[0], buf[1], buf[2], ... etc, point to
! 964: * buffer storing data read from col0, col1,
! 965: * col2.
! 966: */
! 967: char *ebuf, *pbuf, *dest[2], *olddata[2];
! 968: RF_Etimer_t timer;
! 969: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 970:
! 971: RF_ASSERT(asmap->numDataFailed == 1); /*
! 972: * Currently only support this
! 973: * case, the other failed SU
! 974: * is not being accessed.
! 975: */
! 976: RF_ETIMER_START(timer);
! 977: RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
! 978:
! 979: ppda = node->results[0]; /*
! 980: * Instead of being buffers,
! 981: * node->results[0] and [1]
! 982: * are Ppda and Epda.
! 983: */
! 984: epda = node->results[1];
! 985: fpda = asmap->failedPDAs[0];
! 986:
! 987: /* First, recovery the failed old SU using EvenOdd double decoding. */
! 988: /* Determine the startSector and endSector for decoding. */
! 989: startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
! 990: endSector = startSector + fpda->numSector;
! 991: /*
! 992: * Assign buf[col] pointers to point to each non-failed column and
! 993: * initialize the pbuf and ebuf to point at the beginning of each
! 994: * source buffers and destination buffers. */
! 995: for (prm = 0; prm < numDataCol - 2; prm++) {
! 996: pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
! 997: col = rf_EUCol(layoutPtr, pda->raidAddress);
! 998: buf[col] = pda->bufPtr;
! 999: }
! 1000: /*
! 1001: * pbuf and ebuf: They will change values as double recovery decoding
! 1002: * goes on.
! 1003: */
! 1004: pbuf = ppda->bufPtr;
! 1005: ebuf = epda->bufPtr;
! 1006: /*
! 1007: * Find out the logical column numbers in the encoding matrix of the
! 1008: * two failed columns.
! 1009: */
! 1010: fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
! 1011:
! 1012: /* Find out the other failed column not accessed this time. */
! 1013: sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
! 1014: asmap->raidAddress);
! 1015: for (i = 0; i < numDataCol; i++) {
! 1016: npda.raidAddress = sosAddr + (i * secPerSU);
! 1017: (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress,
! 1018: &(npda.row), &(npda.col), &(npda.startSector), 0);
! 1019: /* Skip over dead disks. */
! 1020: if (RF_DEAD_DISK(raidPtr->Disks[npda.row][npda.col].status))
! 1021: if (i != fcol[0])
! 1022: break;
! 1023: }
! 1024: RF_ASSERT(i < numDataCol);
! 1025: fcol[1] = i;
! 1026: /* Assign temporary space to put recovered failed SU. */
! 1027: numbytes = fpda->numSector * bytesPerSector;
! 1028: RF_Malloc(olddata[0], numbytes, (char *));
! 1029: RF_Malloc(olddata[1], numbytes, (char *));
! 1030: dest[0] = olddata[0];
! 1031: dest[1] = olddata[1];
! 1032: bzero(olddata[0], numbytes);
! 1033: bzero(olddata[1], numbytes);
! 1034: /*
! 1035: * Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j]
! 1036: * have already pointed at the beginning of each source buffers and
! 1037: * destination buffers.
! 1038: */
! 1039: for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
! 1040: rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
! 1041: for (j = 0; j < numDataCol; j++)
! 1042: if ((j != fcol[0]) && (j != fcol[1]))
! 1043: buf[j] += bytesPerSector;
! 1044: dest[0] += bytesPerSector;
! 1045: dest[1] += bytesPerSector;
! 1046: ebuf += bytesPerSector;
! 1047: pbuf += bytesPerSector;
! 1048: }
! 1049: /*
! 1050: * After recovery, the buffer pointed by olddata[0] is the old failed
! 1051: * data. With new writing data and this old data, use small write to
! 1052: * calculate the new redundant informations.
! 1053: */
! 1054: /*
! 1055: * node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
! 1056: * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
! 1057: * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
! 1058: * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
! 1059: * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
! 1060: * wudNodes; For current implementation, we assume the simplest case:
! 1061: * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
! 1062: * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
! 1063: * data to be written to the failed disk. We first bxor the new data
! 1064: * into the old recovered data, then do the same things as small
! 1065: * write.
! 1066: */
! 1067:
! 1068: rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr,
! 1069: olddata[0], numbytes, node->dagHdr->bp);
! 1070: /* Do new 'E' calculation. */
! 1071: /*
! 1072: * Find out the corresponding column in encoding matrix for write
! 1073: * column to be encoded into redundant disk 'E'.
! 1074: */
! 1075: scol = rf_EUCol(layoutPtr, fpda->raidAddress);
! 1076: /*
! 1077: * olddata[0] now is source buffer pointer; epda->bufPtr is the dest
! 1078: * buffer pointer.
! 1079: */
! 1080: rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2,
! 1081: epda->bufPtr, fpda->numSector);
! 1082:
! 1083: /* Do new 'P' calculation. */
! 1084: rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
! 1085: /* Free the allocated buffer. */
! 1086: RF_Free(olddata[0], numbytes);
! 1087: RF_Free(olddata[1], numbytes);
! 1088: RF_Free(buf, numDataCol * sizeof(char *));
! 1089:
! 1090: RF_ETIMER_STOP(timer);
! 1091: RF_ETIMER_EVAL(timer);
! 1092: if (tracerec) {
! 1093: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 1094: }
! 1095: rf_GenericWakeupFunc(node, 0);
! 1096: return (0);
! 1097: }
CVSweb