Annotation of sys/dev/raidframe/rf_dagdegrd.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_dagdegrd.c,v 1.6 2006/07/09 22:10:05 mk Exp $ */
! 2: /* $NetBSD: rf_dagdegrd.c,v 1.5 2000/01/07 03:40:57 oster Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995 Carnegie-Mellon University.
! 6: * All rights reserved.
! 7: *
! 8: * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
! 9: *
! 10: * Permission to use, copy, modify and distribute this software and
! 11: * its documentation is hereby granted, provided that both the copyright
! 12: * notice and this permission notice appear in all copies of the
! 13: * software, derivative works or modified versions, and any portions
! 14: * thereof, and that both notices appear in supporting documentation.
! 15: *
! 16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 19: *
! 20: * Carnegie Mellon requests users of this software to return to
! 21: *
! 22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 23: * School of Computer Science
! 24: * Carnegie Mellon University
! 25: * Pittsburgh PA 15213-3890
! 26: *
! 27: * any improvements or extensions that they make and grant Carnegie the
! 28: * rights to redistribute these changes.
! 29: */
! 30:
! 31: /*
! 32: * rf_dagdegrd.c
! 33: *
! 34: * Code for creating degraded read DAGs.
! 35: */
! 36:
! 37: #include "rf_types.h"
! 38: #include "rf_raid.h"
! 39: #include "rf_dag.h"
! 40: #include "rf_dagutils.h"
! 41: #include "rf_dagfuncs.h"
! 42: #include "rf_debugMem.h"
! 43: #include "rf_memchunk.h"
! 44: #include "rf_general.h"
! 45: #include "rf_dagdegrd.h"
! 46:
! 47:
! 48: /*****************************************************************************
! 49: *
! 50: * General comments on DAG creation:
! 51: *
! 52: * All DAGs in this file use roll-away error recovery. Each DAG has a single
! 53: * commit node, usually called "Cmt". If an error occurs before the Cmt node
! 54: * is reached, the execution engine will halt forward execution and work
! 55: * backward through the graph, executing the undo functions. Assuming that
! 56: * each node in the graph prior to the Cmt node are undoable and atomic - or -
! 57: * does not make changes to permanent state, the graph will fail atomically.
! 58: * If an error occurs after the Cmt node executes, the engine will roll-forward
! 59: * through the graph, blindly executing nodes until it reaches the end.
! 60: * If a graph reaches the end, it is assumed to have completed successfully.
! 61: *
! 62: * A graph has only 1 Cmt node.
! 63: *
! 64: *****************************************************************************/
! 65:
! 66:
! 67: /*****************************************************************************
! 68: *
! 69: * The following wrappers map the standard DAG creation interface to the
! 70: * DAG creation routines. Additionally, these wrappers enable experimentation
! 71: * with new DAG structures by providing an extra level of indirection, allowing
! 72: * the DAG creation routines to be replaced at this single point.
! 73: *
! 74: *****************************************************************************/
! 75:
! 76: void
! 77: rf_CreateRaidFiveDegradedReadDAG(
! 78: RF_Raid_t *raidPtr,
! 79: RF_AccessStripeMap_t *asmap,
! 80: RF_DagHeader_t *dag_h,
! 81: void *bp,
! 82: RF_RaidAccessFlags_t flags,
! 83: RF_AllocListElem_t *allocList)
! 84: {
! 85: rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
! 86: &rf_xorRecoveryFuncs);
! 87: }
! 88:
! 89:
! 90: /*****************************************************************************
! 91: *
! 92: * DAG creation code begins here.
! 93: *
! 94: *****************************************************************************/
! 95:
! 96:
! 97: /*****************************************************************************
! 98: * Create a degraded read DAG for RAID level 1.
! 99: *
! 100: * Hdr -> Nil -> R(p/s)d -> Commit -> Trm
! 101: *
! 102: * The "Rd" node reads data from the surviving disk in the mirror pair.
! 103: * Rpd - read of primary copy
! 104: * Rsd - read of secondary copy
! 105: *
! 106: * Parameters: raidPtr - description of the physical array
! 107: * asmap - logical & physical addresses for this access
! 108: * bp - buffer ptr (for holding write data)
! 109: * flags - general flags (e.g. disk locking)
! 110: * allocList - list of memory allocated in DAG creation
! 111: *****************************************************************************/
! 112:
! 113: void
! 114: rf_CreateRaidOneDegradedReadDAG(
! 115: RF_Raid_t *raidPtr,
! 116: RF_AccessStripeMap_t *asmap,
! 117: RF_DagHeader_t *dag_h,
! 118: void *bp,
! 119: RF_RaidAccessFlags_t flags,
! 120: RF_AllocListElem_t *allocList)
! 121: {
! 122: RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
! 123: RF_StripeNum_t parityStripeID;
! 124: RF_ReconUnitNum_t which_ru;
! 125: RF_PhysDiskAddr_t *pda;
! 126: int useMirror, i;
! 127:
! 128: useMirror = 0;
! 129: parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
! 130: asmap->raidAddress, &which_ru);
! 131: if (rf_dagDebug) {
! 132: printf("[Creating RAID level 1 degraded read DAG]\n");
! 133: }
! 134: dag_h->creator = "RaidOneDegradedReadDAG";
! 135: /* Alloc the Wnd nodes and the Wmir node. */
! 136: if (asmap->numDataFailed == 0)
! 137: useMirror = RF_FALSE;
! 138: else
! 139: useMirror = RF_TRUE;
! 140:
! 141: /* Total number of nodes = 1 + (block + commit + terminator). */
! 142: RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *),
! 143: allocList);
! 144: i = 0;
! 145: rdNode = &nodes[i];
! 146: i++;
! 147: blockNode = &nodes[i];
! 148: i++;
! 149: commitNode = &nodes[i];
! 150: i++;
! 151: termNode = &nodes[i];
! 152: i++;
! 153:
! 154: /*
! 155: * This dag can not commit until the commit node is reached. Errors
! 156: * prior to the commit point imply the dag has failed and must be
! 157: * retried.
! 158: */
! 159: dag_h->numCommitNodes = 1;
! 160: dag_h->numCommits = 0;
! 161: dag_h->numSuccedents = 1;
! 162:
! 163: /* Initialize the block, commit, and terminator nodes. */
! 164: rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
! 165: rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
! 166: rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
! 167: rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
! 168: rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
! 169: rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
! 170:
! 171: pda = asmap->physInfo;
! 172: RF_ASSERT(pda != NULL);
! 173: /* parityInfo must describe entire parity unit. */
! 174: RF_ASSERT(asmap->parityInfo->next == NULL);
! 175:
! 176: /* Initialize the data node. */
! 177: if (!useMirror) {
! 178: /* Read primary copy of data. */
! 179: rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
! 180: rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
! 181: dag_h, "Rpd", allocList);
! 182: rdNode->params[0].p = pda;
! 183: rdNode->params[1].p = pda->bufPtr;
! 184: rdNode->params[2].v = parityStripeID;
! 185: rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
! 186: 0, 0, which_ru);
! 187: } else {
! 188: /* Read secondary copy of data. */
! 189: rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
! 190: rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
! 191: dag_h, "Rsd", allocList);
! 192: rdNode->params[0].p = asmap->parityInfo;
! 193: rdNode->params[1].p = pda->bufPtr;
! 194: rdNode->params[2].v = parityStripeID;
! 195: rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
! 196: 0, 0, which_ru);
! 197: }
! 198:
! 199: /* Connect header to block node. */
! 200: RF_ASSERT(dag_h->numSuccedents == 1);
! 201: RF_ASSERT(blockNode->numAntecedents == 0);
! 202: dag_h->succedents[0] = blockNode;
! 203:
! 204: /* Connect block node to rdnode. */
! 205: RF_ASSERT(blockNode->numSuccedents == 1);
! 206: RF_ASSERT(rdNode->numAntecedents == 1);
! 207: blockNode->succedents[0] = rdNode;
! 208: rdNode->antecedents[0] = blockNode;
! 209: rdNode->antType[0] = rf_control;
! 210:
! 211: /* Connect rdnode to commit node. */
! 212: RF_ASSERT(rdNode->numSuccedents == 1);
! 213: RF_ASSERT(commitNode->numAntecedents == 1);
! 214: rdNode->succedents[0] = commitNode;
! 215: commitNode->antecedents[0] = rdNode;
! 216: commitNode->antType[0] = rf_control;
! 217:
! 218: /* Connect commit node to terminator. */
! 219: RF_ASSERT(commitNode->numSuccedents == 1);
! 220: RF_ASSERT(termNode->numAntecedents == 1);
! 221: RF_ASSERT(termNode->numSuccedents == 0);
! 222: commitNode->succedents[0] = termNode;
! 223: termNode->antecedents[0] = commitNode;
! 224: termNode->antType[0] = rf_control;
! 225: }
! 226:
! 227:
! 228: /*****************************************************************************
! 229: *
! 230: * Create a DAG to perform a degraded-mode read of data within one stripe.
! 231: * This DAG is as follows:
! 232: *
! 233: * Hdr -> Block -> Rud -> Xor -> Cmt -> T
! 234: * -> Rrd ->
! 235: * -> Rp -->
! 236: *
! 237: * Each R node is a successor of the L node.
! 238: * One successor arc from each R node goes to C, and the other to X.
! 239: * There is one Rud for each chunk of surviving user data requested by the
! 240: * user, and one Rrd for each chunk of surviving user data _not_ being read by
! 241: * the user.
! 242: * R = read, ud = user data, rd = recovery (surviving) data, p = parity
! 243: * X = XOR, C = Commit, T = terminate
! 244: *
! 245: * The block node guarantees a single source node.
! 246: *
! 247: * Note: The target buffer for the XOR node is set to the actual user buffer
! 248: * where the failed data is supposed to end up. This buffer is zero'd by the
! 249: * code here. Thus, if you create a degraded read dag, use it, and then
! 250: * re-use, you have to be sure to zero the target buffer prior to the re-use.
! 251: *
! 252: * The recfunc argument at the end specifies the name and function used for
! 253: * the redundancy recovery function.
! 254: *
! 255: *****************************************************************************/
! 256:
! 257: void
! 258: rf_CreateDegradedReadDAG(
! 259: RF_Raid_t *raidPtr,
! 260: RF_AccessStripeMap_t *asmap,
! 261: RF_DagHeader_t *dag_h,
! 262: void *bp,
! 263: RF_RaidAccessFlags_t flags,
! 264: RF_AllocListElem_t *allocList,
! 265: RF_RedFuncs_t *recFunc)
! 266: {
! 267: RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *xorNode, *blockNode;
! 268: RF_DagNode_t *commitNode, *rpNode, *termNode;
! 269: int nNodes, nRrdNodes, nRudNodes, nXorBufs, i;
! 270: int j, paramNum;
! 271: RF_SectorCount_t sectorsPerSU;
! 272: RF_ReconUnitNum_t which_ru;
! 273: char *overlappingPDAs; /* A temporary array of flags. */
! 274: RF_AccessStripeMapHeader_t *new_asm_h[2];
! 275: RF_PhysDiskAddr_t *pda, *parityPDA;
! 276: RF_StripeNum_t parityStripeID;
! 277: RF_PhysDiskAddr_t *failedPDA;
! 278: RF_RaidLayout_t *layoutPtr;
! 279: char *rpBuf;
! 280:
! 281: layoutPtr = &(raidPtr->Layout);
! 282: /*
! 283: * failedPDA points to the pda within the asm that targets
! 284: * the failed disk.
! 285: */
! 286: failedPDA = asmap->failedPDAs[0];
! 287: parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
! 288: asmap->raidAddress, &which_ru);
! 289: sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
! 290:
! 291: if (rf_dagDebug) {
! 292: printf("[Creating degraded read DAG]\n");
! 293: }
! 294: RF_ASSERT(asmap->numDataFailed == 1);
! 295: dag_h->creator = "DegradedReadDAG";
! 296:
! 297: /*
! 298: * Generate two ASMs identifying the surviving data we need
! 299: * in order to recover the lost data.
! 300: */
! 301:
! 302: /* overlappingPDAs array must be zero'd. */
! 303: RF_Calloc(overlappingPDAs, asmap->numStripeUnitsAccessed,
! 304: sizeof(char), (char *));
! 305: rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h,
! 306: new_asm_h, &nXorBufs, &rpBuf, overlappingPDAs, allocList);
! 307:
! 308: /*
! 309: * Create all the nodes at once.
! 310: *
! 311: * -1 because no access is generated for the failed pda.
! 312: */
! 313: nRudNodes = asmap->numStripeUnitsAccessed - 1;
! 314: nRrdNodes = ((new_asm_h[0]) ?
! 315: new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
! 316: ((new_asm_h[1]) ?
! 317: new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
! 318: nNodes = 5 + nRudNodes + nRrdNodes; /*
! 319: * lock, unlock, xor, Rp,
! 320: * Rud, Rrd
! 321: */
! 322: RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *),
! 323: allocList);
! 324: i = 0;
! 325: blockNode = &nodes[i];
! 326: i++;
! 327: commitNode = &nodes[i];
! 328: i++;
! 329: xorNode = &nodes[i];
! 330: i++;
! 331: rpNode = &nodes[i];
! 332: i++;
! 333: termNode = &nodes[i];
! 334: i++;
! 335: rudNodes = &nodes[i];
! 336: i += nRudNodes;
! 337: rrdNodes = &nodes[i];
! 338: i += nRrdNodes;
! 339: RF_ASSERT(i == nNodes);
! 340:
! 341: /* Initialize nodes. */
! 342: dag_h->numCommitNodes = 1;
! 343: dag_h->numCommits = 0;
! 344: /*
! 345: * This dag can not commit until the commit node is reached.
! 346: * Errors prior to the commit point imply the dag has failed.
! 347: */
! 348: dag_h->numSuccedents = 1;
! 349:
! 350: rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
! 351: rf_NullNodeUndoFunc, NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0,
! 352: dag_h, "Nil", allocList);
! 353: rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
! 354: rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
! 355: rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
! 356: rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
! 357: rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple,
! 358: rf_NullNodeUndoFunc, NULL, 1, nRudNodes + nRrdNodes + 1,
! 359: 2 * nXorBufs + 2, 1, dag_h, recFunc->SimpleName, allocList);
! 360:
! 361: /* Fill in the Rud nodes. */
! 362: for (pda = asmap->physInfo, i = 0; i < nRudNodes;
! 363: i++, pda = pda->next) {
! 364: if (pda == failedPDA) {
! 365: i--;
! 366: continue;
! 367: }
! 368: rf_InitNode(&rudNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc,
! 369: rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
! 370: dag_h, "Rud", allocList);
! 371: RF_ASSERT(pda);
! 372: rudNodes[i].params[0].p = pda;
! 373: rudNodes[i].params[1].p = pda->bufPtr;
! 374: rudNodes[i].params[2].v = parityStripeID;
! 375: rudNodes[i].params[3].v =
! 376: RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
! 377: }
! 378:
! 379: /* Fill in the Rrd nodes. */
! 380: i = 0;
! 381: if (new_asm_h[0]) {
! 382: for (pda = new_asm_h[0]->stripeMap->physInfo;
! 383: i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
! 384: i++, pda = pda->next) {
! 385: rf_InitNode(&rrdNodes[i], rf_wait, RF_FALSE,
! 386: rf_DiskReadFunc, rf_DiskReadUndoFunc,
! 387: rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
! 388: "Rrd", allocList);
! 389: RF_ASSERT(pda);
! 390: rrdNodes[i].params[0].p = pda;
! 391: rrdNodes[i].params[1].p = pda->bufPtr;
! 392: rrdNodes[i].params[2].v = parityStripeID;
! 393: rrdNodes[i].params[3].v =
! 394: RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
! 395: which_ru);
! 396: }
! 397: }
! 398: if (new_asm_h[1]) {
! 399: for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo;
! 400: j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
! 401: j++, pda = pda->next) {
! 402: rf_InitNode(&rrdNodes[i + j], rf_wait, RF_FALSE,
! 403: rf_DiskReadFunc, rf_DiskReadUndoFunc,
! 404: rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
! 405: "Rrd", allocList);
! 406: RF_ASSERT(pda);
! 407: rrdNodes[i + j].params[0].p = pda;
! 408: rrdNodes[i + j].params[1].p = pda->bufPtr;
! 409: rrdNodes[i + j].params[2].v = parityStripeID;
! 410: rrdNodes[i + j].params[3].v =
! 411: RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
! 412: which_ru);
! 413: }
! 414: }
! 415: /* Make a PDA for the parity unit. */
! 416: RF_MallocAndAdd(parityPDA, sizeof(RF_PhysDiskAddr_t),
! 417: (RF_PhysDiskAddr_t *), allocList);
! 418: parityPDA->row = asmap->parityInfo->row;
! 419: parityPDA->col = asmap->parityInfo->col;
! 420: parityPDA->startSector = ((asmap->parityInfo->startSector /
! 421: sectorsPerSU) * sectorsPerSU) +
! 422: (failedPDA->startSector % sectorsPerSU);
! 423: parityPDA->numSector = failedPDA->numSector;
! 424:
! 425: /* Initialize the Rp node. */
! 426: rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
! 427: rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
! 428: "Rp ", allocList);
! 429: rpNode->params[0].p = parityPDA;
! 430: rpNode->params[1].p = rpBuf;
! 431: rpNode->params[2].v = parityStripeID;
! 432: rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0,
! 433: which_ru);
! 434:
! 435: /*
! 436: * The last and nastiest step is to assign all
! 437: * the parameters of the Xor node.
! 438: */
! 439: paramNum = 0;
! 440: for (i = 0; i < nRrdNodes; i++) {
! 441: /* All the Rrd nodes need to be xored together. */
! 442: xorNode->params[paramNum++] = rrdNodes[i].params[0];
! 443: xorNode->params[paramNum++] = rrdNodes[i].params[1];
! 444: }
! 445: for (i = 0; i < nRudNodes; i++) {
! 446: /* Any Rud nodes that overlap the failed access need to be
! 447: * xored in. */
! 448: if (overlappingPDAs[i]) {
! 449: RF_MallocAndAdd(pda, sizeof(RF_PhysDiskAddr_t),
! 450: (RF_PhysDiskAddr_t *), allocList);
! 451: bcopy((char *) rudNodes[i].params[0].p, (char *) pda,
! 452: sizeof(RF_PhysDiskAddr_t));
! 453: rf_RangeRestrictPDA(raidPtr, failedPDA, pda,
! 454: RF_RESTRICT_DOBUFFER, 0);
! 455: xorNode->params[paramNum++].p = pda;
! 456: xorNode->params[paramNum++].p = pda->bufPtr;
! 457: }
! 458: }
! 459: RF_Free(overlappingPDAs, asmap->numStripeUnitsAccessed * sizeof(char));
! 460:
! 461: /* Install parity pda as last set of params to be xor'd. */
! 462: xorNode->params[paramNum++].p = parityPDA;
! 463: xorNode->params[paramNum++].p = rpBuf;
! 464:
! 465: /*
! 466: * The last 2 params to the recovery xor node are
! 467: * the failed PDA and the raidPtr.
! 468: */
! 469: xorNode->params[paramNum++].p = failedPDA;
! 470: xorNode->params[paramNum++].p = raidPtr;
! 471: RF_ASSERT(paramNum == 2 * nXorBufs + 2);
! 472:
! 473: /*
! 474: * The xor node uses results[0] as the target buffer.
! 475: * Set pointer and zero the buffer. In the kernel, this
! 476: * may be a user buffer in which case we have to remap it.
! 477: */
! 478: xorNode->results[0] = failedPDA->bufPtr;
! 479: RF_BZERO(bp, failedPDA->bufPtr, rf_RaidAddressToByte(raidPtr,
! 480: failedPDA->numSector));
! 481:
! 482: /* Connect nodes to form graph. */
! 483: /* Connect the header to the block node. */
! 484: RF_ASSERT(dag_h->numSuccedents == 1);
! 485: RF_ASSERT(blockNode->numAntecedents == 0);
! 486: dag_h->succedents[0] = blockNode;
! 487:
! 488: /* Connect the block node to the read nodes. */
! 489: RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes));
! 490: RF_ASSERT(rpNode->numAntecedents == 1);
! 491: blockNode->succedents[0] = rpNode;
! 492: rpNode->antecedents[0] = blockNode;
! 493: rpNode->antType[0] = rf_control;
! 494: for (i = 0; i < nRrdNodes; i++) {
! 495: RF_ASSERT(rrdNodes[i].numSuccedents == 1);
! 496: blockNode->succedents[1 + i] = &rrdNodes[i];
! 497: rrdNodes[i].antecedents[0] = blockNode;
! 498: rrdNodes[i].antType[0] = rf_control;
! 499: }
! 500: for (i = 0; i < nRudNodes; i++) {
! 501: RF_ASSERT(rudNodes[i].numSuccedents == 1);
! 502: blockNode->succedents[1 + nRrdNodes + i] = &rudNodes[i];
! 503: rudNodes[i].antecedents[0] = blockNode;
! 504: rudNodes[i].antType[0] = rf_control;
! 505: }
! 506:
! 507: /* Connect the read nodes to the xor node. */
! 508: RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes));
! 509: RF_ASSERT(rpNode->numSuccedents == 1);
! 510: rpNode->succedents[0] = xorNode;
! 511: xorNode->antecedents[0] = rpNode;
! 512: xorNode->antType[0] = rf_trueData;
! 513: for (i = 0; i < nRrdNodes; i++) {
! 514: RF_ASSERT(rrdNodes[i].numSuccedents == 1);
! 515: rrdNodes[i].succedents[0] = xorNode;
! 516: xorNode->antecedents[1 + i] = &rrdNodes[i];
! 517: xorNode->antType[1 + i] = rf_trueData;
! 518: }
! 519: for (i = 0; i < nRudNodes; i++) {
! 520: RF_ASSERT(rudNodes[i].numSuccedents == 1);
! 521: rudNodes[i].succedents[0] = xorNode;
! 522: xorNode->antecedents[1 + nRrdNodes + i] = &rudNodes[i];
! 523: xorNode->antType[1 + nRrdNodes + i] = rf_trueData;
! 524: }
! 525:
! 526: /* Connect the xor node to the commit node. */
! 527: RF_ASSERT(xorNode->numSuccedents == 1);
! 528: RF_ASSERT(commitNode->numAntecedents == 1);
! 529: xorNode->succedents[0] = commitNode;
! 530: commitNode->antecedents[0] = xorNode;
! 531: commitNode->antType[0] = rf_control;
! 532:
! 533: /* Connect the termNode to the commit node. */
! 534: RF_ASSERT(commitNode->numSuccedents == 1);
! 535: RF_ASSERT(termNode->numAntecedents == 1);
! 536: RF_ASSERT(termNode->numSuccedents == 0);
! 537: commitNode->succedents[0] = termNode;
! 538: termNode->antType[0] = rf_control;
! 539: termNode->antecedents[0] = commitNode;
! 540: }
! 541:
! 542:
! 543: /*****************************************************************************
! 544: * Create a degraded read DAG for Chained Declustering.
! 545: *
! 546: * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm
! 547: *
! 548: * The "Rd" node reads data from the surviving disk in the mirror pair
! 549: * Rpd - read of primary copy
! 550: * Rsd - read of secondary copy
! 551: *
! 552: * Parameters: raidPtr - description of the physical array
! 553: * asmap - logical & physical addresses for this access
! 554: * bp - buffer ptr (for holding write data)
! 555: * flags - general flags (e.g. disk locking)
! 556: * allocList - list of memory allocated in DAG creation
! 557: *****************************************************************************/
! 558:
! 559: void
! 560: rf_CreateRaidCDegradedReadDAG(
! 561: RF_Raid_t *raidPtr,
! 562: RF_AccessStripeMap_t *asmap,
! 563: RF_DagHeader_t *dag_h,
! 564: void *bp,
! 565: RF_RaidAccessFlags_t flags,
! 566: RF_AllocListElem_t *allocList
! 567: )
! 568: {
! 569: RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
! 570: RF_StripeNum_t parityStripeID;
! 571: int useMirror, i, shiftable;
! 572: RF_ReconUnitNum_t which_ru;
! 573: RF_PhysDiskAddr_t *pda;
! 574:
! 575: if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
! 576: shiftable = RF_TRUE;
! 577: } else {
! 578: shiftable = RF_FALSE;
! 579: }
! 580: useMirror = 0;
! 581: parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
! 582: asmap->raidAddress, &which_ru);
! 583:
! 584: if (rf_dagDebug) {
! 585: printf("[Creating RAID C degraded read DAG]\n");
! 586: }
! 587: dag_h->creator = "RaidCDegradedReadDAG";
! 588: /* Alloc the Wnd nodes and the Wmir node. */
! 589: if (asmap->numDataFailed == 0)
! 590: useMirror = RF_FALSE;
! 591: else
! 592: useMirror = RF_TRUE;
! 593:
! 594: /* total number of nodes = 1 + (block + commit + terminator) */
! 595: RF_CallocAndAdd(nodes, 4, sizeof(RF_DagNode_t), (RF_DagNode_t *),
! 596: allocList);
! 597: i = 0;
! 598: rdNode = &nodes[i];
! 599: i++;
! 600: blockNode = &nodes[i];
! 601: i++;
! 602: commitNode = &nodes[i];
! 603: i++;
! 604: termNode = &nodes[i];
! 605: i++;
! 606:
! 607: /*
! 608: * This dag can not commit until the commit node is reached.
! 609: * Errors prior to the commit point imply the dag has failed
! 610: * and must be retried.
! 611: */
! 612: dag_h->numCommitNodes = 1;
! 613: dag_h->numCommits = 0;
! 614: dag_h->numSuccedents = 1;
! 615:
! 616: /* initialize the block, commit, and terminator nodes */
! 617: rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
! 618: rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
! 619: rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
! 620: rf_NullNodeUndoFunc, NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
! 621: rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
! 622: rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
! 623:
! 624: pda = asmap->physInfo;
! 625: RF_ASSERT(pda != NULL);
! 626: /* ParityInfo must describe entire parity unit. */
! 627: RF_ASSERT(asmap->parityInfo->next == NULL);
! 628:
! 629: /* Initialize the data node. */
! 630: if (!useMirror) {
! 631: rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
! 632: rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
! 633: dag_h, "Rpd", allocList);
! 634: if (shiftable && rf_compute_workload_shift(raidPtr, pda)) {
! 635: /* Shift this read to the next disk in line. */
! 636: rdNode->params[0].p = asmap->parityInfo;
! 637: rdNode->params[1].p = pda->bufPtr;
! 638: rdNode->params[2].v = parityStripeID;
! 639: rdNode->params[3].v = RF_CREATE_PARAM3(
! 640: RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
! 641: } else {
! 642: /* Read primary copy. */
! 643: rdNode->params[0].p = pda;
! 644: rdNode->params[1].p = pda->bufPtr;
! 645: rdNode->params[2].v = parityStripeID;
! 646: rdNode->params[3].v = RF_CREATE_PARAM3(
! 647: RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
! 648: }
! 649: } else {
! 650: /* Read secondary copy of data. */
! 651: rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
! 652: rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
! 653: dag_h, "Rsd", allocList);
! 654: rdNode->params[0].p = asmap->parityInfo;
! 655: rdNode->params[1].p = pda->bufPtr;
! 656: rdNode->params[2].v = parityStripeID;
! 657: rdNode->params[3].v =
! 658: RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, which_ru);
! 659: }
! 660:
! 661: /* Connect header to block node. */
! 662: RF_ASSERT(dag_h->numSuccedents == 1);
! 663: RF_ASSERT(blockNode->numAntecedents == 0);
! 664: dag_h->succedents[0] = blockNode;
! 665:
! 666: /* Connect block node to rdnode. */
! 667: RF_ASSERT(blockNode->numSuccedents == 1);
! 668: RF_ASSERT(rdNode->numAntecedents == 1);
! 669: blockNode->succedents[0] = rdNode;
! 670: rdNode->antecedents[0] = blockNode;
! 671: rdNode->antType[0] = rf_control;
! 672:
! 673: /* Connect rdnode to commit node. */
! 674: RF_ASSERT(rdNode->numSuccedents == 1);
! 675: RF_ASSERT(commitNode->numAntecedents == 1);
! 676: rdNode->succedents[0] = commitNode;
! 677: commitNode->antecedents[0] = rdNode;
! 678: commitNode->antType[0] = rf_control;
! 679:
! 680: /* Connect commit node to terminator. */
! 681: RF_ASSERT(commitNode->numSuccedents == 1);
! 682: RF_ASSERT(termNode->numAntecedents == 1);
! 683: RF_ASSERT(termNode->numSuccedents == 0);
! 684: commitNode->succedents[0] = termNode;
! 685: termNode->antecedents[0] = commitNode;
! 686: termNode->antType[0] = rf_control;
! 687: }
! 688:
! 689: /*
! 690: * XXX move this elsewhere ?
! 691: */
! 692: void
! 693: rf_DD_GenerateFailedAccessASMs(
! 694: RF_Raid_t *raidPtr,
! 695: RF_AccessStripeMap_t *asmap,
! 696: RF_PhysDiskAddr_t **pdap,
! 697: int *nNodep,
! 698: RF_PhysDiskAddr_t **pqpdap,
! 699: int *nPQNodep,
! 700: RF_AllocListElem_t *allocList
! 701: )
! 702: {
! 703: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 704: int PDAPerDisk, i;
! 705: RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
! 706: int numDataCol = layoutPtr->numDataCol;
! 707: int state;
! 708: RF_SectorNum_t suoff, suend;
! 709: unsigned firstDataCol, napdas, count;
! 710: RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0;
! 711: RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0];
! 712: RF_PhysDiskAddr_t *ftwo = asmap->failedPDAs[1];
! 713: RF_PhysDiskAddr_t *pda_p;
! 714: RF_PhysDiskAddr_t *phys_p;
! 715: RF_RaidAddr_t sosAddr;
! 716:
! 717: /*
! 718: * Determine how many pda's we will have to generate per unaccessed
! 719: * stripe. If there is only one failed data unit, it is one; if two,
! 720: * possibly two, depending whether they overlap.
! 721: */
! 722:
! 723: fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector);
! 724: fone_end = fone_start + fone->numSector;
! 725:
! 726: #define CONS_PDA(if,start,num) do { \
! 727: pda_p->row = asmap->if->row; \
! 728: pda_p->col = asmap->if->col; \
! 729: pda_p->startSector = ((asmap->if->startSector / secPerSU) * \
! 730: secPerSU) + start; \
! 731: pda_p->numSector = num; \
! 732: pda_p->next = NULL; \
! 733: RF_MallocAndAdd(pda_p->bufPtr, \
! 734: rf_RaidAddressToByte(raidPtr,num),(char *), allocList); \
! 735: } while (0)
! 736:
! 737: if (asmap->numDataFailed == 1) {
! 738: PDAPerDisk = 1;
! 739: state = 1;
! 740: RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t),
! 741: (RF_PhysDiskAddr_t *), allocList);
! 742: pda_p = *pqpdap;
! 743: /* Build p. */
! 744: CONS_PDA(parityInfo, fone_start, fone->numSector);
! 745: pda_p->type = RF_PDA_TYPE_PARITY;
! 746: pda_p++;
! 747: /* Build q. */
! 748: CONS_PDA(qInfo, fone_start, fone->numSector);
! 749: pda_p->type = RF_PDA_TYPE_Q;
! 750: } else {
! 751: ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector);
! 752: ftwo_end = ftwo_start + ftwo->numSector;
! 753: if (fone->numSector + ftwo->numSector > secPerSU) {
! 754: PDAPerDisk = 1;
! 755: state = 2;
! 756: RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t),
! 757: (RF_PhysDiskAddr_t *), allocList);
! 758: pda_p = *pqpdap;
! 759: CONS_PDA(parityInfo, 0, secPerSU);
! 760: pda_p->type = RF_PDA_TYPE_PARITY;
! 761: pda_p++;
! 762: CONS_PDA(qInfo, 0, secPerSU);
! 763: pda_p->type = RF_PDA_TYPE_Q;
! 764: } else {
! 765: PDAPerDisk = 2;
! 766: state = 3;
! 767: /* Four of them, fone, then ftwo. */
! 768: RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t),
! 769: (RF_PhysDiskAddr_t *), allocList);
! 770: pda_p = *pqpdap;
! 771: CONS_PDA(parityInfo, fone_start, fone->numSector);
! 772: pda_p->type = RF_PDA_TYPE_PARITY;
! 773: pda_p++;
! 774: CONS_PDA(qInfo, fone_start, fone->numSector);
! 775: pda_p->type = RF_PDA_TYPE_Q;
! 776: pda_p++;
! 777: CONS_PDA(parityInfo, ftwo_start, ftwo->numSector);
! 778: pda_p->type = RF_PDA_TYPE_PARITY;
! 779: pda_p++;
! 780: CONS_PDA(qInfo, ftwo_start, ftwo->numSector);
! 781: pda_p->type = RF_PDA_TYPE_Q;
! 782: }
! 783: }
! 784: /* Figure out number of nonaccessed pda. */
! 785: napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed -
! 786: (ftwo == NULL ? 1 : 0));
! 787: *nPQNodep = PDAPerDisk;
! 788:
! 789: /*
! 790: * Sweep over the over accessed pda's, figuring out the number of
! 791: * additional pda's to generate. Of course, skip the failed ones.
! 792: */
! 793:
! 794: count = 0;
! 795: for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) {
! 796: if ((pda_p == fone) || (pda_p == ftwo))
! 797: continue;
! 798: suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector);
! 799: suend = suoff + pda_p->numSector;
! 800: switch (state) {
! 801: case 1: /* One failed PDA to overlap. */
! 802: /*
! 803: * If a PDA doesn't contain the failed unit, it can
! 804: * only miss the start or end, not both.
! 805: */
! 806: if ((suoff > fone_start) || (suend < fone_end))
! 807: count++;
! 808: break;
! 809: case 2: /* Whole stripe. */
! 810: if (suoff) /* Leak at begining. */
! 811: count++;
! 812: if (suend < numDataCol) /* Leak at end. */
! 813: count++;
! 814: break;
! 815: case 3: /* Two disjoint units. */
! 816: if ((suoff > fone_start) || (suend < fone_end))
! 817: count++;
! 818: if ((suoff > ftwo_start) || (suend < ftwo_end))
! 819: count++;
! 820: break;
! 821: default:
! 822: RF_PANIC();
! 823: }
! 824: }
! 825:
! 826: napdas += count;
! 827: *nNodep = napdas;
! 828: if (napdas == 0)
! 829: return; /* short circuit */
! 830:
! 831: /* Allocate up our list of pda's. */
! 832:
! 833: RF_CallocAndAdd(pda_p, napdas, sizeof(RF_PhysDiskAddr_t),
! 834: (RF_PhysDiskAddr_t *), allocList);
! 835: *pdap = pda_p;
! 836:
! 837: /* Link them together. */
! 838: for (i = 0; i < (napdas - 1); i++)
! 839: pda_p[i].next = pda_p + (i + 1);
! 840:
! 841: /* March through the one's up to the first accessed disk. */
! 842: firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
! 843: asmap->physInfo->raidAddress) % numDataCol;
! 844: sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
! 845: asmap->raidAddress);
! 846: for (i = 0; i < firstDataCol; i++) {
! 847: if ((pda_p - (*pdap)) == napdas)
! 848: continue;
! 849: pda_p->type = RF_PDA_TYPE_DATA;
! 850: pda_p->raidAddress = sosAddr + (i * secPerSU);
! 851: (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress,
! 852: &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
! 853: /* Skip over dead disks. */
! 854: if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
! 855: continue;
! 856: switch (state) {
! 857: case 1: /* Fone. */
! 858: pda_p->numSector = fone->numSector;
! 859: pda_p->raidAddress += fone_start;
! 860: pda_p->startSector += fone_start;
! 861: RF_MallocAndAdd(pda_p->bufPtr,
! 862: rf_RaidAddressToByte(raidPtr, pda_p->numSector),
! 863: (char *), allocList);
! 864: break;
! 865: case 2: /* Full stripe. */
! 866: pda_p->numSector = secPerSU;
! 867: RF_MallocAndAdd(pda_p->bufPtr,
! 868: rf_RaidAddressToByte(raidPtr, secPerSU),
! 869: (char *), allocList);
! 870: break;
! 871: case 3: /* Two slabs. */
! 872: pda_p->numSector = fone->numSector;
! 873: pda_p->raidAddress += fone_start;
! 874: pda_p->startSector += fone_start;
! 875: RF_MallocAndAdd(pda_p->bufPtr,
! 876: rf_RaidAddressToByte(raidPtr, pda_p->numSector),
! 877: (char *), allocList);
! 878: pda_p++;
! 879: pda_p->type = RF_PDA_TYPE_DATA;
! 880: pda_p->raidAddress = sosAddr + (i * secPerSU);
! 881: (raidPtr->Layout.map->MapSector) (raidPtr,
! 882: pda_p->raidAddress, &(pda_p->row), &(pda_p->col),
! 883: &(pda_p->startSector), 0);
! 884: pda_p->numSector = ftwo->numSector;
! 885: pda_p->raidAddress += ftwo_start;
! 886: pda_p->startSector += ftwo_start;
! 887: RF_MallocAndAdd(pda_p->bufPtr,
! 888: rf_RaidAddressToByte(raidPtr, pda_p->numSector),
! 889: (char *), allocList);
! 890: break;
! 891: default:
! 892: RF_PANIC();
! 893: }
! 894: pda_p++;
! 895: }
! 896:
! 897: /* March through the touched stripe units. */
! 898: for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) {
! 899: if ((phys_p == asmap->failedPDAs[0]) ||
! 900: (phys_p == asmap->failedPDAs[1]))
! 901: continue;
! 902: suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector);
! 903: suend = suoff + phys_p->numSector;
! 904: switch (state) {
! 905: case 1: /* Single buffer. */
! 906: if (suoff > fone_start) {
! 907: RF_ASSERT(suend >= fone_end);
! 908: /*
! 909: * The data read starts after the mapped
! 910: * access, snip off the begining.
! 911: */
! 912: pda_p->numSector = suoff - fone_start;
! 913: pda_p->raidAddress = sosAddr + (i * secPerSU)
! 914: + fone_start;
! 915: (raidPtr->Layout.map->MapSector) (raidPtr,
! 916: pda_p->raidAddress, &(pda_p->row),
! 917: &(pda_p->col), &(pda_p->startSector), 0);
! 918: RF_MallocAndAdd(pda_p->bufPtr,
! 919: rf_RaidAddressToByte(raidPtr,
! 920: pda_p->numSector), (char *), allocList);
! 921: pda_p++;
! 922: }
! 923: if (suend < fone_end) {
! 924: RF_ASSERT(suoff <= fone_start);
! 925: /*
! 926: * The data read stops before the end of the
! 927: * failed access, extend.
! 928: */
! 929: pda_p->numSector = fone_end - suend;
! 930: pda_p->raidAddress = sosAddr + (i * secPerSU)
! 931: + suend; /* off by one? */
! 932: (raidPtr->Layout.map->MapSector) (raidPtr,
! 933: pda_p->raidAddress, &(pda_p->row),
! 934: &(pda_p->col), &(pda_p->startSector), 0);
! 935: RF_MallocAndAdd(pda_p->bufPtr,
! 936: rf_RaidAddressToByte(raidPtr,
! 937: pda_p->numSector), (char *), allocList);
! 938: pda_p++;
! 939: }
! 940: break;
! 941: case 2: /* Whole stripe unit. */
! 942: RF_ASSERT((suoff == 0) || (suend == secPerSU));
! 943: if (suend < secPerSU) {
! 944: /* Short read, snip from end on. */
! 945: pda_p->numSector = secPerSU - suend;
! 946: pda_p->raidAddress = sosAddr + (i * secPerSU)
! 947: + suend; /* off by one? */
! 948: (raidPtr->Layout.map->MapSector) (raidPtr,
! 949: pda_p->raidAddress, &(pda_p->row),
! 950: &(pda_p->col), &(pda_p->startSector), 0);
! 951: RF_MallocAndAdd(pda_p->bufPtr,
! 952: rf_RaidAddressToByte(raidPtr,
! 953: pda_p->numSector), (char *), allocList);
! 954: pda_p++;
! 955: } else
! 956: if (suoff > 0) {
! 957: /* Short at front. */
! 958: pda_p->numSector = suoff;
! 959: pda_p->raidAddress = sosAddr +
! 960: (i * secPerSU);
! 961: (raidPtr->Layout.map->MapSector)
! 962: (raidPtr, pda_p->raidAddress,
! 963: &(pda_p->row), &(pda_p->col),
! 964: &(pda_p->startSector), 0);
! 965: RF_MallocAndAdd(pda_p->bufPtr,
! 966: rf_RaidAddressToByte(raidPtr,
! 967: pda_p->numSector), (char *),
! 968: allocList);
! 969: pda_p++;
! 970: }
! 971: break;
! 972: case 3: /* Two nonoverlapping failures. */
! 973: if ((suoff > fone_start) || (suend < fone_end)) {
! 974: if (suoff > fone_start) {
! 975: RF_ASSERT(suend >= fone_end);
! 976: /*
! 977: * The data read starts after the
! 978: * mapped access, snip off the
! 979: * begining.
! 980: */
! 981: pda_p->numSector = suoff - fone_start;
! 982: pda_p->raidAddress = sosAddr +
! 983: (i * secPerSU) + fone_start;
! 984: (raidPtr->Layout.map->MapSector)
! 985: (raidPtr, pda_p->raidAddress,
! 986: &(pda_p->row), &(pda_p->col),
! 987: &(pda_p->startSector), 0);
! 988: RF_MallocAndAdd(pda_p->bufPtr,
! 989: rf_RaidAddressToByte(raidPtr,
! 990: pda_p->numSector), (char *),
! 991: allocList);
! 992: pda_p++;
! 993: }
! 994: if (suend < fone_end) {
! 995: RF_ASSERT(suoff <= fone_start);
! 996: /*
! 997: * The data read stops before the end
! 998: * of the failed access, extend.
! 999: */
! 1000: pda_p->numSector = fone_end - suend;
! 1001: pda_p->raidAddress = sosAddr +
! 1002: (i * secPerSU) +
! 1003: suend; /* Off by one ? */
! 1004: (raidPtr->Layout.map->MapSector)
! 1005: (raidPtr, pda_p->raidAddress,
! 1006: &(pda_p->row), &(pda_p->col),
! 1007: &(pda_p->startSector), 0);
! 1008: RF_MallocAndAdd(pda_p->bufPtr,
! 1009: rf_RaidAddressToByte(raidPtr,
! 1010: pda_p->numSector), (char *),
! 1011: allocList);
! 1012: pda_p++;
! 1013: }
! 1014: }
! 1015: if ((suoff > ftwo_start) || (suend < ftwo_end)) {
! 1016: if (suoff > ftwo_start) {
! 1017: RF_ASSERT(suend >= ftwo_end);
! 1018: /*
! 1019: * The data read starts after the
! 1020: * mapped access, snip off the
! 1021: * begining.
! 1022: */
! 1023: pda_p->numSector = suoff - ftwo_start;
! 1024: pda_p->raidAddress = sosAddr +
! 1025: (i * secPerSU) + ftwo_start;
! 1026: (raidPtr->Layout.map->MapSector)
! 1027: (raidPtr, pda_p->raidAddress,
! 1028: &(pda_p->row), &(pda_p->col),
! 1029: &(pda_p->startSector), 0);
! 1030: RF_MallocAndAdd(pda_p->bufPtr,
! 1031: rf_RaidAddressToByte(raidPtr,
! 1032: pda_p->numSector), (char *),
! 1033: allocList);
! 1034: pda_p++;
! 1035: }
! 1036: if (suend < ftwo_end) {
! 1037: RF_ASSERT(suoff <= ftwo_start);
! 1038: /*
! 1039: * The data read stops before the end
! 1040: * of the failed access, extend.
! 1041: */
! 1042: pda_p->numSector = ftwo_end - suend;
! 1043: pda_p->raidAddress = sosAddr +
! 1044: (i * secPerSU) +
! 1045: suend; /* Off by one ? */
! 1046: (raidPtr->Layout.map->MapSector)
! 1047: (raidPtr, pda_p->raidAddress,
! 1048: &(pda_p->row), &(pda_p->col),
! 1049: &(pda_p->startSector), 0);
! 1050: RF_MallocAndAdd(pda_p->bufPtr,
! 1051: rf_RaidAddressToByte(raidPtr,
! 1052: pda_p->numSector), (char *),
! 1053: allocList);
! 1054: pda_p++;
! 1055: }
! 1056: }
! 1057: break;
! 1058: default:
! 1059: RF_PANIC();
! 1060: }
! 1061: }
! 1062:
! 1063: /* After the last accessed disk. */
! 1064: for (; i < numDataCol; i++) {
! 1065: if ((pda_p - (*pdap)) == napdas)
! 1066: continue;
! 1067: pda_p->type = RF_PDA_TYPE_DATA;
! 1068: pda_p->raidAddress = sosAddr + (i * secPerSU);
! 1069: (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress,
! 1070: &(pda_p->row), &(pda_p->col), &(pda_p->startSector), 0);
! 1071: /* Skip over dead disks. */
! 1072: if (RF_DEAD_DISK(raidPtr->Disks[pda_p->row][pda_p->col].status))
! 1073: continue;
! 1074: switch (state) {
! 1075: case 1: /* Fone. */
! 1076: pda_p->numSector = fone->numSector;
! 1077: pda_p->raidAddress += fone_start;
! 1078: pda_p->startSector += fone_start;
! 1079: RF_MallocAndAdd(pda_p->bufPtr,
! 1080: rf_RaidAddressToByte(raidPtr, pda_p->numSector),
! 1081: (char *), allocList);
! 1082: break;
! 1083: case 2: /* Full stripe. */
! 1084: pda_p->numSector = secPerSU;
! 1085: RF_MallocAndAdd(pda_p->bufPtr,
! 1086: rf_RaidAddressToByte(raidPtr, secPerSU),
! 1087: (char *), allocList);
! 1088: break;
! 1089: case 3: /* Two slabs. */
! 1090: pda_p->numSector = fone->numSector;
! 1091: pda_p->raidAddress += fone_start;
! 1092: pda_p->startSector += fone_start;
! 1093: RF_MallocAndAdd(pda_p->bufPtr,
! 1094: rf_RaidAddressToByte(raidPtr, pda_p->numSector),
! 1095: (char *), allocList);
! 1096: pda_p++;
! 1097: pda_p->type = RF_PDA_TYPE_DATA;
! 1098: pda_p->raidAddress = sosAddr + (i * secPerSU);
! 1099: (raidPtr->Layout.map->MapSector) (raidPtr,
! 1100: pda_p->raidAddress, &(pda_p->row), &(pda_p->col),
! 1101: &(pda_p->startSector), 0);
! 1102: pda_p->numSector = ftwo->numSector;
! 1103: pda_p->raidAddress += ftwo_start;
! 1104: pda_p->startSector += ftwo_start;
! 1105: RF_MallocAndAdd(pda_p->bufPtr,
! 1106: rf_RaidAddressToByte(raidPtr, pda_p->numSector),
! 1107: (char *), allocList);
! 1108: break;
! 1109: default:
! 1110: RF_PANIC();
! 1111: }
! 1112: pda_p++;
! 1113: }
! 1114:
! 1115: RF_ASSERT(pda_p - *pdap == napdas);
! 1116: return;
! 1117: }
! 1118:
! 1119: #define INIT_DISK_NODE(node,name) do { \
! 1120: rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, \
! 1121: rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, \
! 1122: dag_h, name, allocList); \
! 1123: (node)->succedents[0] = unblockNode; \
! 1124: (node)->succedents[1] = recoveryNode; \
! 1125: (node)->antecedents[0] = blockNode; \
! 1126: (node)->antType[0] = rf_control; \
! 1127: } while (0)
! 1128:
! 1129: #define DISK_NODE_PARAMS(_node_,_p_) do { \
! 1130: (_node_).params[0].p = _p_ ; \
! 1131: (_node_).params[1].p = (_p_)->bufPtr; \
! 1132: (_node_).params[2].v = parityStripeID; \
! 1133: (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, \
! 1134: 0, 0, which_ru); \
! 1135: } while (0)
! 1136:
! 1137: void
! 1138: rf_DoubleDegRead(
! 1139: RF_Raid_t *raidPtr,
! 1140: RF_AccessStripeMap_t *asmap,
! 1141: RF_DagHeader_t *dag_h,
! 1142: void *bp,
! 1143: RF_RaidAccessFlags_t flags,
! 1144: RF_AllocListElem_t *allocList,
! 1145: char *redundantReadNodeName,
! 1146: char *recoveryNodeName,
! 1147: int (*recovFunc) (RF_DagNode_t *)
! 1148: )
! 1149: {
! 1150: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 1151: RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode,
! 1152: *unblockNode, *rpNodes, *rqNodes, *termNode;
! 1153: RF_PhysDiskAddr_t *pda, *pqPDAs;
! 1154: RF_PhysDiskAddr_t *npdas;
! 1155: int nNodes, nRrdNodes, nRudNodes, i;
! 1156: RF_ReconUnitNum_t which_ru;
! 1157: int nReadNodes, nPQNodes;
! 1158: RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0];
! 1159: RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1];
! 1160: RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(
! 1161: layoutPtr, asmap->raidAddress, &which_ru);
! 1162:
! 1163: if (rf_dagDebug)
! 1164: printf("[Creating Double Degraded Read DAG]\n");
! 1165: rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes,
! 1166: &pqPDAs, &nPQNodes, allocList);
! 1167:
! 1168: nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed);
! 1169: nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes;
! 1170: nNodes = 4 /* Block, unblock, recovery, term. */ + nReadNodes;
! 1171:
! 1172: RF_CallocAndAdd(nodes, nNodes, sizeof(RF_DagNode_t), (RF_DagNode_t *),
! 1173: allocList);
! 1174: i = 0;
! 1175: blockNode = &nodes[i];
! 1176: i += 1;
! 1177: unblockNode = &nodes[i];
! 1178: i += 1;
! 1179: recoveryNode = &nodes[i];
! 1180: i += 1;
! 1181: termNode = &nodes[i];
! 1182: i += 1;
! 1183: rudNodes = &nodes[i];
! 1184: i += nRudNodes;
! 1185: rrdNodes = &nodes[i];
! 1186: i += nRrdNodes;
! 1187: rpNodes = &nodes[i];
! 1188: i += nPQNodes;
! 1189: rqNodes = &nodes[i];
! 1190: i += nPQNodes;
! 1191: RF_ASSERT(i == nNodes);
! 1192:
! 1193: dag_h->numSuccedents = 1;
! 1194: dag_h->succedents[0] = blockNode;
! 1195: dag_h->creator = "DoubleDegRead";
! 1196: dag_h->numCommits = 0;
! 1197: dag_h->numCommitNodes = 1; /* Unblock. */
! 1198:
! 1199: rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
! 1200: rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList);
! 1201: termNode->antecedents[0] = unblockNode;
! 1202: termNode->antType[0] = rf_control;
! 1203: termNode->antecedents[1] = recoveryNode;
! 1204: termNode->antType[1] = rf_control;
! 1205:
! 1206: /*
! 1207: * Init the block and unblock nodes.
! 1208: * The block node has all nodes except itself, unblock and
! 1209: * recovery as successors.
! 1210: * Similarly for predecessors of the unblock.
! 1211: */
! 1212: rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
! 1213: rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h,
! 1214: "Nil", allocList);
! 1215: rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
! 1216: rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h,
! 1217: "Nil", allocList);
! 1218:
! 1219: for (i = 0; i < nReadNodes; i++) {
! 1220: blockNode->succedents[i] = rudNodes + i;
! 1221: unblockNode->antecedents[i] = rudNodes + i;
! 1222: unblockNode->antType[i] = rf_control;
! 1223: }
! 1224: unblockNode->succedents[0] = termNode;
! 1225:
! 1226: /*
! 1227: * The recovery node has all the reads as predecessors, and the term
! 1228: * node as successors. It gets a pda as a param from each of the read
! 1229: * nodes plus the raidPtr. For each failed unit is has a result pda.
! 1230: */
! 1231: rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc,
! 1232: rf_NullNodeUndoFunc, NULL,
! 1233: 1, /* succesors */
! 1234: nReadNodes, /* preds */
! 1235: nReadNodes + 2, /* params */
! 1236: asmap->numDataFailed, /* results */
! 1237: dag_h, recoveryNodeName, allocList);
! 1238:
! 1239: recoveryNode->succedents[0] = termNode;
! 1240: for (i = 0; i < nReadNodes; i++) {
! 1241: recoveryNode->antecedents[i] = rudNodes + i;
! 1242: recoveryNode->antType[i] = rf_trueData;
! 1243: }
! 1244:
! 1245: /*
! 1246: * Build the read nodes, then come back and fill in recovery params
! 1247: * and results.
! 1248: */
! 1249: pda = asmap->physInfo;
! 1250: for (i = 0; i < nRudNodes; pda = pda->next) {
! 1251: if ((pda == failedPDA) || (pda == failedPDAtwo))
! 1252: continue;
! 1253: INIT_DISK_NODE(rudNodes + i, "Rud");
! 1254: RF_ASSERT(pda);
! 1255: DISK_NODE_PARAMS(rudNodes[i], pda);
! 1256: i++;
! 1257: }
! 1258:
! 1259: pda = npdas;
! 1260: for (i = 0; i < nRrdNodes; i++, pda = pda->next) {
! 1261: INIT_DISK_NODE(rrdNodes + i, "Rrd");
! 1262: RF_ASSERT(pda);
! 1263: DISK_NODE_PARAMS(rrdNodes[i], pda);
! 1264: }
! 1265:
! 1266: /* Redundancy pdas. */
! 1267: pda = pqPDAs;
! 1268: INIT_DISK_NODE(rpNodes, "Rp");
! 1269: RF_ASSERT(pda);
! 1270: DISK_NODE_PARAMS(rpNodes[0], pda);
! 1271: pda++;
! 1272: INIT_DISK_NODE(rqNodes, redundantReadNodeName);
! 1273: RF_ASSERT(pda);
! 1274: DISK_NODE_PARAMS(rqNodes[0], pda);
! 1275: if (nPQNodes == 2) {
! 1276: pda++;
! 1277: INIT_DISK_NODE(rpNodes + 1, "Rp");
! 1278: RF_ASSERT(pda);
! 1279: DISK_NODE_PARAMS(rpNodes[1], pda);
! 1280: pda++;
! 1281: INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName);
! 1282: RF_ASSERT(pda);
! 1283: DISK_NODE_PARAMS(rqNodes[1], pda);
! 1284: }
! 1285: /* Fill in recovery node params. */
! 1286: for (i = 0; i < nReadNodes; i++)
! 1287: recoveryNode->params[i] = rudNodes[i].params[0]; /* pda */
! 1288: recoveryNode->params[i++].p = (void *) raidPtr;
! 1289: recoveryNode->params[i++].p = (void *) asmap;
! 1290: recoveryNode->results[0] = failedPDA;
! 1291: if (asmap->numDataFailed == 2)
! 1292: recoveryNode->results[1] = failedPDAtwo;
! 1293:
! 1294: /* Zero fill the target data buffers ? */
! 1295: }
CVSweb