Annotation of sys/dev/raidframe/rf_dagutils.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_dagutils.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $ */
! 2: /* $NetBSD: rf_dagutils.c,v 1.6 1999/12/09 02:26:09 oster Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995 Carnegie-Mellon University.
! 6: * All rights reserved.
! 7: *
! 8: * Authors: Mark Holland, William V. Courtright II, Jim Zelenka
! 9: *
! 10: * Permission to use, copy, modify and distribute this software and
! 11: * its documentation is hereby granted, provided that both the copyright
! 12: * notice and this permission notice appear in all copies of the
! 13: * software, derivative works or modified versions, and any portions
! 14: * thereof, and that both notices appear in supporting documentation.
! 15: *
! 16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 19: *
! 20: * Carnegie Mellon requests users of this software to return to
! 21: *
! 22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 23: * School of Computer Science
! 24: * Carnegie Mellon University
! 25: * Pittsburgh PA 15213-3890
! 26: *
! 27: * any improvements or extensions that they make and grant Carnegie the
! 28: * rights to redistribute these changes.
! 29: */
! 30:
! 31: /*****************************************************************************
! 32: *
! 33: * rf_dagutils.c -- Utility routines for manipulating dags.
! 34: *
! 35: *****************************************************************************/
! 36:
! 37: #include "rf_archs.h"
! 38: #include "rf_types.h"
! 39: #include "rf_threadstuff.h"
! 40: #include "rf_raid.h"
! 41: #include "rf_dag.h"
! 42: #include "rf_dagutils.h"
! 43: #include "rf_dagfuncs.h"
! 44: #include "rf_general.h"
! 45: #include "rf_freelist.h"
! 46: #include "rf_map.h"
! 47: #include "rf_shutdown.h"
! 48:
! 49: #define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_)))
! 50:
! 51: RF_RedFuncs_t rf_xorFuncs = {
! 52: rf_RegularXorFunc, "Reg Xr", rf_SimpleXorFunc, "Simple Xr"
! 53: };
! 54:
! 55: RF_RedFuncs_t rf_xorRecoveryFuncs = {
! 56: rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"
! 57: };
! 58:
! 59: void rf_RecurPrintDAG(RF_DagNode_t *, int, int);
! 60: void rf_PrintDAG(RF_DagHeader_t *);
! 61: int rf_ValidateBranch(RF_DagNode_t *, int *, int *, RF_DagNode_t **, int);
! 62: void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int);
! 63: void rf_ValidateVisitedBits(RF_DagHeader_t *);
! 64:
! 65: /*****************************************************************************
! 66: *
! 67: * InitNode - Initialize a dag node.
! 68: *
! 69: * The size of the propList array is always the same as that of the
! 70: * successors array.
! 71: *
! 72: *****************************************************************************/
! 73: void
! 74: rf_InitNode(
! 75: RF_DagNode_t *node,
! 76: RF_NodeStatus_t initstatus,
! 77: int commit,
! 78: int (*doFunc) (RF_DagNode_t *),
! 79: int (*undoFunc) (RF_DagNode_t *node),
! 80: int (*wakeFunc) (RF_DagNode_t *node, int),
! 81: int nSucc,
! 82: int nAnte,
! 83: int nParam,
! 84: int nResult,
! 85: RF_DagHeader_t *hdr,
! 86: char *name,
! 87: RF_AllocListElem_t *alist
! 88: )
! 89: {
! 90: void **ptrs;
! 91: int nptrs;
! 92:
! 93: if (nAnte > RF_MAX_ANTECEDENTS)
! 94: RF_PANIC();
! 95: node->status = initstatus;
! 96: node->commitNode = commit;
! 97: node->doFunc = doFunc;
! 98: node->undoFunc = undoFunc;
! 99: node->wakeFunc = wakeFunc;
! 100: node->numParams = nParam;
! 101: node->numResults = nResult;
! 102: node->numAntecedents = nAnte;
! 103: node->numAntDone = 0;
! 104: node->next = NULL;
! 105: node->numSuccedents = nSucc;
! 106: node->name = name;
! 107: node->dagHdr = hdr;
! 108: node->visited = 0;
! 109:
! 110: /* Allocate all the pointers with one call to malloc. */
! 111: nptrs = nSucc + nAnte + nResult + nSucc;
! 112:
! 113: if (nptrs <= RF_DAG_PTRCACHESIZE) {
! 114: /*
! 115: * The dag_ptrs field of the node is basically some scribble
! 116: * space to be used here. We could get rid of it, and always
! 117: * allocate the range of pointers, but that's expensive. So,
! 118: * we pick a "common case" size for the pointer cache.
! 119: * Hopefully, we'll find that:
! 120: * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by
! 121: * only a little bit (least efficient case).
! 122: * (2) Generally, ntprs isn't a lot less than
! 123: * RF_DAG_PTRCACHESIZE (wasted memory).
! 124: */
! 125: ptrs = (void **) node->dag_ptrs;
! 126: } else {
! 127: RF_CallocAndAdd(ptrs, nptrs, sizeof(void *), (void **), alist);
! 128: }
! 129: node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL;
! 130: node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs + nSucc) : NULL;
! 131: node->results = (nResult) ? (void **) (ptrs + nSucc + nAnte) : NULL;
! 132: node->propList = (nSucc) ? (RF_PropHeader_t **)
! 133: (ptrs + nSucc + nAnte + nResult) : NULL;
! 134:
! 135: if (nParam) {
! 136: if (nParam <= RF_DAG_PARAMCACHESIZE) {
! 137: node->params = (RF_DagParam_t *) node->dag_params;
! 138: } else {
! 139: RF_CallocAndAdd(node->params, nParam,
! 140: sizeof(RF_DagParam_t), (RF_DagParam_t *), alist);
! 141: }
! 142: } else {
! 143: node->params = NULL;
! 144: }
! 145: }
! 146:
! 147:
! 148:
! 149: /*****************************************************************************
! 150: *
! 151: * Allocation and deallocation routines.
! 152: *
! 153: *****************************************************************************/
! 154:
! 155: void
! 156: rf_FreeDAG(RF_DagHeader_t *dag_h)
! 157: {
! 158: RF_AccessStripeMapHeader_t *asmap, *t_asmap;
! 159: RF_DagHeader_t *nextDag;
! 160: int i;
! 161:
! 162: while (dag_h) {
! 163: nextDag = dag_h->next;
! 164: for (i = 0; dag_h->memChunk[i] && i < RF_MAXCHUNKS; i++) {
! 165: /* Release mem chunks. */
! 166: rf_ReleaseMemChunk(dag_h->memChunk[i]);
! 167: dag_h->memChunk[i] = NULL;
! 168: }
! 169:
! 170: RF_ASSERT(i == dag_h->chunkIndex);
! 171: if (dag_h->xtraChunkCnt > 0) {
! 172: /* Free xtraMemChunks. */
! 173: for (i = 0; dag_h->xtraMemChunk[i] &&
! 174: i < dag_h->xtraChunkIndex; i++) {
! 175: rf_ReleaseMemChunk(dag_h->xtraMemChunk[i]);
! 176: dag_h->xtraMemChunk[i] = NULL;
! 177: }
! 178: RF_ASSERT(i == dag_h->xtraChunkIndex);
! 179: /* Free ptrs to xtraMemChunks. */
! 180: RF_Free(dag_h->xtraMemChunk, dag_h->xtraChunkCnt *
! 181: sizeof(RF_ChunkDesc_t *));
! 182: }
! 183: rf_FreeAllocList(dag_h->allocList);
! 184: for (asmap = dag_h->asmList; asmap;) {
! 185: t_asmap = asmap;
! 186: asmap = asmap->next;
! 187: rf_FreeAccessStripeMap(t_asmap);
! 188: }
! 189: rf_FreeDAGHeader(dag_h);
! 190: dag_h = nextDag;
! 191: }
! 192: }
! 193:
! 194: RF_PropHeader_t *
! 195: rf_MakePropListEntry(RF_DagHeader_t *dag_h, int resultNum, int paramNum,
! 196: RF_PropHeader_t *next, RF_AllocListElem_t *allocList)
! 197: {
! 198: RF_PropHeader_t *p;
! 199:
! 200: RF_CallocAndAdd(p, 1, sizeof(RF_PropHeader_t), (RF_PropHeader_t *),
! 201: allocList);
! 202: p->resultNum = resultNum;
! 203: p->paramNum = paramNum;
! 204: p->next = next;
! 205: return (p);
! 206: }
! 207:
! 208: static RF_FreeList_t *rf_dagh_freelist;
! 209:
! 210: #define RF_MAX_FREE_DAGH 128
! 211: #define RF_DAGH_INC 16
! 212: #define RF_DAGH_INITIAL 32
! 213:
! 214: void rf_ShutdownDAGs(void *);
! 215: void
! 216: rf_ShutdownDAGs(void *ignored)
! 217: {
! 218: RF_FREELIST_DESTROY(rf_dagh_freelist, next, (RF_DagHeader_t *));
! 219: }
! 220:
! 221: int
! 222: rf_ConfigureDAGs(RF_ShutdownList_t **listp)
! 223: {
! 224: int rc;
! 225:
! 226: RF_FREELIST_CREATE(rf_dagh_freelist, RF_MAX_FREE_DAGH, RF_DAGH_INC,
! 227: sizeof(RF_DagHeader_t));
! 228: if (rf_dagh_freelist == NULL)
! 229: return (ENOMEM);
! 230: rc = rf_ShutdownCreate(listp, rf_ShutdownDAGs, NULL);
! 231: if (rc) {
! 232: RF_ERRORMSG3("Unable to add to shutdown list file %s line"
! 233: " %d rc=%d\n", __FILE__, __LINE__, rc);
! 234: rf_ShutdownDAGs(NULL);
! 235: return (rc);
! 236: }
! 237: RF_FREELIST_PRIME(rf_dagh_freelist, RF_DAGH_INITIAL, next,
! 238: (RF_DagHeader_t *));
! 239: return (0);
! 240: }
! 241:
! 242: RF_DagHeader_t *
! 243: rf_AllocDAGHeader(void)
! 244: {
! 245: RF_DagHeader_t *dh;
! 246:
! 247: RF_FREELIST_GET(rf_dagh_freelist, dh, next, (RF_DagHeader_t *));
! 248: if (dh) {
! 249: bzero((char *) dh, sizeof(RF_DagHeader_t));
! 250: }
! 251: return (dh);
! 252: }
! 253:
! 254: void
! 255: rf_FreeDAGHeader(RF_DagHeader_t *dh)
! 256: {
! 257: RF_FREELIST_FREE(rf_dagh_freelist, dh, next);
! 258: }
! 259:
! 260: /* Allocate a buffer big enough to hold the data described by pda. */
! 261: void *
! 262: rf_AllocBuffer(RF_Raid_t *raidPtr, RF_DagHeader_t *dag_h,
! 263: RF_PhysDiskAddr_t *pda, RF_AllocListElem_t *allocList)
! 264: {
! 265: char *p;
! 266:
! 267: RF_MallocAndAdd(p, pda->numSector << raidPtr->logBytesPerSector,
! 268: (char *), allocList);
! 269: return ((void *) p);
! 270: }
! 271:
! 272:
! 273: /*****************************************************************************
! 274: *
! 275: * Debug routines.
! 276: *
! 277: *****************************************************************************/
! 278:
! 279: char *
! 280: rf_NodeStatusString(RF_DagNode_t *node)
! 281: {
! 282: switch (node->status) {
! 283: case rf_wait:
! 284: return ("wait");
! 285: case rf_fired:
! 286: return ("fired");
! 287: case rf_good:
! 288: return ("good");
! 289: case rf_bad:
! 290: return ("bad");
! 291: default:
! 292: return ("?");
! 293: }
! 294: }
! 295:
! 296: void
! 297: rf_PrintNodeInfoString(RF_DagNode_t *node)
! 298: {
! 299: RF_PhysDiskAddr_t *pda;
! 300: int (*df) (RF_DagNode_t *) = node->doFunc;
! 301: int i, lk, unlk;
! 302: void *bufPtr;
! 303:
! 304: if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc) ||
! 305: (df == rf_DiskReadMirrorIdleFunc) ||
! 306: (df == rf_DiskReadMirrorPartitionFunc)) {
! 307: pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 308: bufPtr = (void *) node->params[1].p;
! 309: lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
! 310: unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
! 311: RF_ASSERT(!(lk && unlk));
! 312: printf("r %d c %d offs %ld nsect %d buf 0x%lx %s\n", pda->row,
! 313: pda->col, (long) pda->startSector, (int) pda->numSector,
! 314: (long) bufPtr, (lk) ? "LOCK" : ((unlk) ? "UNLK" : " "));
! 315: return;
! 316: }
! 317: if (df == rf_DiskUnlockFunc) {
! 318: pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 319: lk = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
! 320: unlk = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
! 321: RF_ASSERT(!(lk && unlk));
! 322: printf("r %d c %d %s\n", pda->row, pda->col,
! 323: (lk) ? "LOCK" : ((unlk) ? "UNLK" : "nop"));
! 324: return;
! 325: }
! 326: if ((df == rf_SimpleXorFunc) || (df == rf_RegularXorFunc)
! 327: || (df == rf_RecoveryXorFunc)) {
! 328: printf("result buf 0x%lx\n", (long) node->results[0]);
! 329: for (i = 0; i < node->numParams - 1; i += 2) {
! 330: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
! 331: bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p;
! 332: printf(" buf 0x%lx r%d c%d offs %ld nsect %d\n",
! 333: (long) bufPtr, pda->row, pda->col,
! 334: (long) pda->startSector, (int) pda->numSector);
! 335: }
! 336: return;
! 337: }
! 338: #if RF_INCLUDE_PARITYLOGGING > 0
! 339: if (df == rf_ParityLogOverwriteFunc || df == rf_ParityLogUpdateFunc) {
! 340: for (i = 0; i < node->numParams - 1; i += 2) {
! 341: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
! 342: bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p;
! 343: printf(" r%d c%d offs %ld nsect %d buf 0x%lx\n",
! 344: pda->row, pda->col, (long) pda->startSector,
! 345: (int) pda->numSector, (long) bufPtr);
! 346: }
! 347: return;
! 348: }
! 349: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
! 350:
! 351: if ((df == rf_TerminateFunc) || (df == rf_NullNodeFunc)) {
! 352: printf("\n");
! 353: return;
! 354: }
! 355: printf("?\n");
! 356: }
! 357:
! 358: void
! 359: rf_RecurPrintDAG(RF_DagNode_t *node, int depth, int unvisited)
! 360: {
! 361: char *anttype;
! 362: int i;
! 363:
! 364: node->visited = (unvisited) ? 0 : 1;
! 365: printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth,
! 366: node->nodeNum, node->commitNode, node->name,
! 367: rf_NodeStatusString(node), node->numSuccedents,
! 368: node->numSuccFired, node->numSuccDone,
! 369: node->numAntecedents, node->numAntDone,
! 370: node->numParams, node->numResults);
! 371: for (i = 0; i < node->numSuccedents; i++) {
! 372: printf("%d%s", node->succedents[i]->nodeNum,
! 373: ((i == node->numSuccedents - 1) ? "\0" : " "));
! 374: }
! 375: printf("} A{");
! 376: for (i = 0; i < node->numAntecedents; i++) {
! 377: switch (node->antType[i]) {
! 378: case rf_trueData:
! 379: anttype = "T";
! 380: break;
! 381: case rf_antiData:
! 382: anttype = "A";
! 383: break;
! 384: case rf_outputData:
! 385: anttype = "O";
! 386: break;
! 387: case rf_control:
! 388: anttype = "C";
! 389: break;
! 390: default:
! 391: anttype = "?";
! 392: break;
! 393: }
! 394: printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype,
! 395: (i == node->numAntecedents - 1) ? "\0" : " ");
! 396: }
! 397: printf("}; ");
! 398: rf_PrintNodeInfoString(node);
! 399: for (i = 0; i < node->numSuccedents; i++) {
! 400: if (node->succedents[i]->visited == unvisited)
! 401: rf_RecurPrintDAG(node->succedents[i], depth + 1,
! 402: unvisited);
! 403: }
! 404: }
! 405:
! 406: void
! 407: rf_PrintDAG(RF_DagHeader_t *dag_h)
! 408: {
! 409: int unvisited, i;
! 410: char *status;
! 411:
! 412: /* Set dag status. */
! 413: switch (dag_h->status) {
! 414: case rf_enable:
! 415: status = "enable";
! 416: break;
! 417: case rf_rollForward:
! 418: status = "rollForward";
! 419: break;
! 420: case rf_rollBackward:
! 421: status = "rollBackward";
! 422: break;
! 423: default:
! 424: status = "illegal !";
! 425: break;
! 426: }
! 427: /* Find out if visited bits are currently set or cleared. */
! 428: unvisited = dag_h->succedents[0]->visited;
! 429:
! 430: printf("DAG type: %s\n", dag_h->creator);
! 431: printf("format is (depth) num commit type: status,nSucc nSuccFired/n"
! 432: "SuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)}; info\n");
! 433: printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum,
! 434: status, dag_h->numSuccedents, dag_h->numCommitNodes,
! 435: dag_h->numCommits);
! 436: for (i = 0; i < dag_h->numSuccedents; i++) {
! 437: printf("%d%s", dag_h->succedents[i]->nodeNum,
! 438: ((i == dag_h->numSuccedents - 1) ? "\0" : " "));
! 439: }
! 440: printf("};\n");
! 441: for (i = 0; i < dag_h->numSuccedents; i++) {
! 442: if (dag_h->succedents[i]->visited == unvisited)
! 443: rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited);
! 444: }
! 445: }
! 446:
! 447: /* Assign node numbers. */
! 448: int
! 449: rf_AssignNodeNums(RF_DagHeader_t *dag_h)
! 450: {
! 451: int unvisited, i, nnum;
! 452: RF_DagNode_t *node;
! 453:
! 454: nnum = 0;
! 455: unvisited = dag_h->succedents[0]->visited;
! 456:
! 457: dag_h->nodeNum = nnum++;
! 458: for (i = 0; i < dag_h->numSuccedents; i++) {
! 459: node = dag_h->succedents[i];
! 460: if (node->visited == unvisited) {
! 461: nnum = rf_RecurAssignNodeNums(dag_h->succedents[i],
! 462: nnum, unvisited);
! 463: }
! 464: }
! 465: return (nnum);
! 466: }
! 467:
! 468: int
! 469: rf_RecurAssignNodeNums(RF_DagNode_t *node, int num, int unvisited)
! 470: {
! 471: int i;
! 472:
! 473: node->visited = (unvisited) ? 0 : 1;
! 474:
! 475: node->nodeNum = num++;
! 476: for (i = 0; i < node->numSuccedents; i++) {
! 477: if (node->succedents[i]->visited == unvisited) {
! 478: num = rf_RecurAssignNodeNums(node->succedents[i],
! 479: num, unvisited);
! 480: }
! 481: }
! 482: return (num);
! 483: }
! 484:
! 485: /* Set the header pointers in each node to "newptr". */
! 486: void
! 487: rf_ResetDAGHeaderPointers(RF_DagHeader_t *dag_h, RF_DagHeader_t *newptr)
! 488: {
! 489: int i;
! 490:
! 491: for (i = 0; i < dag_h->numSuccedents; i++)
! 492: if (dag_h->succedents[i]->dagHdr != newptr)
! 493: rf_RecurResetDAGHeaderPointers(dag_h->succedents[i],
! 494: newptr);
! 495: }
! 496:
! 497: void
! 498: rf_RecurResetDAGHeaderPointers(RF_DagNode_t *node, RF_DagHeader_t *newptr)
! 499: {
! 500: int i;
! 501:
! 502: node->dagHdr = newptr;
! 503: for (i = 0; i < node->numSuccedents; i++)
! 504: if (node->succedents[i]->dagHdr != newptr)
! 505: rf_RecurResetDAGHeaderPointers(node->succedents[i],
! 506: newptr);
! 507: }
! 508:
! 509: void
! 510: rf_PrintDAGList(RF_DagHeader_t *dag_h)
! 511: {
! 512: int i = 0;
! 513:
! 514: for (; dag_h; dag_h = dag_h->next) {
! 515: rf_AssignNodeNums(dag_h);
! 516: printf("\n\nDAG %d IN LIST:\n", i++);
! 517: rf_PrintDAG(dag_h);
! 518: }
! 519: }
! 520:
! 521: int
! 522: rf_ValidateBranch(RF_DagNode_t *node, int *scount, int *acount,
! 523: RF_DagNode_t **nodes, int unvisited)
! 524: {
! 525: int i, retcode = 0;
! 526:
! 527: /* Construct an array of node pointers indexed by node num. */
! 528: node->visited = (unvisited) ? 0 : 1;
! 529: nodes[node->nodeNum] = node;
! 530:
! 531: if (node->next != NULL) {
! 532: printf("INVALID DAG: next pointer in node is not NULL.\n");
! 533: retcode = 1;
! 534: }
! 535: if (node->status != rf_wait) {
! 536: printf("INVALID DAG: Node status is not wait.\n");
! 537: retcode = 1;
! 538: }
! 539: if (node->numAntDone != 0) {
! 540: printf("INVALID DAG: numAntDone is not zero.\n");
! 541: retcode = 1;
! 542: }
! 543: if (node->doFunc == rf_TerminateFunc) {
! 544: if (node->numSuccedents != 0) {
! 545: printf("INVALID DAG: Terminator node has"
! 546: " succedents.\n");
! 547: retcode = 1;
! 548: }
! 549: } else {
! 550: if (node->numSuccedents == 0) {
! 551: printf("INVALID DAG: Non-terminator node has no"
! 552: " succedents\n");
! 553: retcode = 1;
! 554: }
! 555: }
! 556: for (i = 0; i < node->numSuccedents; i++) {
! 557: if (!node->succedents[i]) {
! 558: printf("INVALID DAG: succedent %d of node %s"
! 559: " is NULL.\n", i, node->name);
! 560: retcode = 1;
! 561: }
! 562: scount[node->succedents[i]->nodeNum]++;
! 563: }
! 564: for (i = 0; i < node->numAntecedents; i++) {
! 565: if (!node->antecedents[i]) {
! 566: printf("INVALID DAG: antecedent %d of node %s is"
! 567: " NULL.\n", i, node->name);
! 568: retcode = 1;
! 569: }
! 570: acount[node->antecedents[i]->nodeNum]++;
! 571: }
! 572: for (i = 0; i < node->numSuccedents; i++) {
! 573: if (node->succedents[i]->visited == unvisited) {
! 574: if (rf_ValidateBranch(node->succedents[i], scount,
! 575: acount, nodes, unvisited)) {
! 576: retcode = 1;
! 577: }
! 578: }
! 579: }
! 580: return (retcode);
! 581: }
! 582:
! 583: void
! 584: rf_ValidateBranchVisitedBits(RF_DagNode_t *node, int unvisited, int rl)
! 585: {
! 586: int i;
! 587:
! 588: RF_ASSERT(node->visited == unvisited);
! 589: for (i = 0; i < node->numSuccedents; i++) {
! 590: if (node->succedents[i] == NULL) {
! 591: printf("node=%lx node->succedents[%d] is NULL.\n",
! 592: (long) node, i);
! 593: RF_ASSERT(0);
! 594: }
! 595: rf_ValidateBranchVisitedBits(node->succedents[i],
! 596: unvisited, rl + 1);
! 597: }
! 598: }
! 599:
! 600: /*
! 601: * NOTE: Never call this on a big dag, because it is exponential
! 602: * in execution time.
! 603: */
! 604: void
! 605: rf_ValidateVisitedBits(RF_DagHeader_t *dag)
! 606: {
! 607: int i, unvisited;
! 608:
! 609: unvisited = dag->succedents[0]->visited;
! 610:
! 611: for (i = 0; i < dag->numSuccedents; i++) {
! 612: if (dag->succedents[i] == NULL) {
! 613: printf("dag=%lx dag->succedents[%d] is NULL.\n",
! 614: (long) dag, i);
! 615: RF_ASSERT(0);
! 616: }
! 617: rf_ValidateBranchVisitedBits(dag->succedents[i], unvisited, 0);
! 618: }
! 619: }
! 620:
! 621: /*
! 622: * Validate a DAG. _at entry_ verify that:
! 623: * -- numNodesCompleted is zero
! 624: * -- node queue is null
! 625: * -- dag status is rf_enable
! 626: * -- next pointer is null on every node
! 627: * -- all nodes have status wait
! 628: * -- numAntDone is zero in all nodes
! 629: * -- terminator node has zero successors
! 630: * -- no other node besides terminator has zero successors
! 631: * -- no successor or antecedent pointer in a node is NULL
! 632: * -- number of times that each node appears as a successor of another node
! 633: * is equal to the antecedent count on that node
! 634: * -- number of times that each node appears as an antecedent of another node
! 635: * is equal to the succedent count on that node
! 636: * -- what else ?
! 637: */
! 638: int
! 639: rf_ValidateDAG(RF_DagHeader_t *dag_h)
! 640: {
! 641: int i, nodecount;
! 642: int *scount, *acount; /* Per-node successor and antecedent counts. */
! 643: RF_DagNode_t **nodes; /* Array of ptrs to nodes in dag. */
! 644: int retcode = 0;
! 645: int unvisited;
! 646: int commitNodeCount = 0;
! 647:
! 648: if (rf_validateVisitedDebug)
! 649: rf_ValidateVisitedBits(dag_h);
! 650:
! 651: if (dag_h->numNodesCompleted != 0) {
! 652: printf("INVALID DAG: num nodes completed is %d, should be 0.\n",
! 653: dag_h->numNodesCompleted);
! 654: retcode = 1;
! 655: goto validate_dag_bad;
! 656: }
! 657: if (dag_h->status != rf_enable) {
! 658: printf("INVALID DAG: not enabled.\n");
! 659: retcode = 1;
! 660: goto validate_dag_bad;
! 661: }
! 662: if (dag_h->numCommits != 0) {
! 663: printf("INVALID DAG: numCommits != 0 (%d)\n",
! 664: dag_h->numCommits);
! 665: retcode = 1;
! 666: goto validate_dag_bad;
! 667: }
! 668: if (dag_h->numSuccedents != 1) {
! 669: /* Currently, all dags must have only one succedent. */
! 670: printf("INVALID DAG: numSuccedents != 1 (%d).\n",
! 671: dag_h->numSuccedents);
! 672: retcode = 1;
! 673: goto validate_dag_bad;
! 674: }
! 675: nodecount = rf_AssignNodeNums(dag_h);
! 676:
! 677: unvisited = dag_h->succedents[0]->visited;
! 678:
! 679: RF_Calloc(scount, nodecount, sizeof(int), (int *));
! 680: RF_Calloc(acount, nodecount, sizeof(int), (int *));
! 681: RF_Calloc(nodes, nodecount, sizeof(RF_DagNode_t *), (RF_DagNode_t **));
! 682: for (i = 0; i < dag_h->numSuccedents; i++) {
! 683: if ((dag_h->succedents[i]->visited == unvisited)
! 684: && rf_ValidateBranch(dag_h->succedents[i], scount,
! 685: acount, nodes, unvisited)) {
! 686: retcode = 1;
! 687: }
! 688: }
! 689: /* Start at 1 to skip the header node. */
! 690: for (i = 1; i < nodecount; i++) {
! 691: if (nodes[i]->commitNode)
! 692: commitNodeCount++;
! 693: if (nodes[i]->doFunc == NULL) {
! 694: printf("INVALID DAG: node %s has an undefined"
! 695: " doFunc.\n", nodes[i]->name);
! 696: retcode = 1;
! 697: goto validate_dag_out;
! 698: }
! 699: if (nodes[i]->undoFunc == NULL) {
! 700: printf("INVALID DAG: node %s has an undefined"
! 701: " doFunc.\n", nodes[i]->name);
! 702: retcode = 1;
! 703: goto validate_dag_out;
! 704: }
! 705: if (nodes[i]->numAntecedents != scount[nodes[i]->nodeNum]) {
! 706: printf("INVALID DAG: node %s has %d antecedents but"
! 707: " appears as a succedent %d times.\n",
! 708: nodes[i]->name, nodes[i]->numAntecedents,
! 709: scount[nodes[i]->nodeNum]);
! 710: retcode = 1;
! 711: goto validate_dag_out;
! 712: }
! 713: if (nodes[i]->numSuccedents != acount[nodes[i]->nodeNum]) {
! 714: printf("INVALID DAG: node %s has %d succedents but"
! 715: " appears as an antecedent %d times.\n",
! 716: nodes[i]->name, nodes[i]->numSuccedents,
! 717: acount[nodes[i]->nodeNum]);
! 718: retcode = 1;
! 719: goto validate_dag_out;
! 720: }
! 721: }
! 722:
! 723: if (dag_h->numCommitNodes != commitNodeCount) {
! 724: printf("INVALID DAG: incorrect commit node count. "
! 725: "hdr->numCommitNodes (%d) found (%d) commit nodes"
! 726: " in graph.\n",
! 727: dag_h->numCommitNodes, commitNodeCount);
! 728: retcode = 1;
! 729: goto validate_dag_out;
! 730: }
! 731:
! 732: validate_dag_out:
! 733: RF_Free(scount, nodecount * sizeof(int));
! 734: RF_Free(acount, nodecount * sizeof(int));
! 735: RF_Free(nodes, nodecount * sizeof(RF_DagNode_t *));
! 736: if (retcode)
! 737: rf_PrintDAGList(dag_h);
! 738:
! 739: if (rf_validateVisitedDebug)
! 740: rf_ValidateVisitedBits(dag_h);
! 741:
! 742: return (retcode);
! 743:
! 744: validate_dag_bad:
! 745: rf_PrintDAGList(dag_h);
! 746: return (retcode);
! 747: }
! 748:
! 749:
! 750: /*****************************************************************************
! 751: *
! 752: * Misc construction routines.
! 753: *
! 754: *****************************************************************************/
! 755:
! 756: void
! 757: rf_redirect_asm(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
! 758: {
! 759: int ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0;
! 760: int row = asmap->physInfo->row;
! 761: int fcol = raidPtr->reconControl[row]->fcol;
! 762: int srow = raidPtr->reconControl[row]->spareRow;
! 763: int scol = raidPtr->reconControl[row]->spareCol;
! 764: RF_PhysDiskAddr_t *pda;
! 765:
! 766: RF_ASSERT(raidPtr->status[row] == rf_rs_reconstructing);
! 767: for (pda = asmap->physInfo; pda; pda = pda->next) {
! 768: if (pda->col == fcol) {
! 769: if (rf_dagDebug) {
! 770: if (!rf_CheckRUReconstructed(
! 771: raidPtr->reconControl[row]->reconMap,
! 772: pda->startSector)) {
! 773: RF_PANIC();
! 774: }
! 775: }
! 776: /*printf("Remapped data for large write\n");*/
! 777: if (ds) {
! 778: raidPtr->Layout.map->MapSector(raidPtr,
! 779: pda->raidAddress, &pda->row, &pda->col,
! 780: &pda->startSector, RF_REMAP);
! 781: } else {
! 782: pda->row = srow;
! 783: pda->col = scol;
! 784: }
! 785: }
! 786: }
! 787: for (pda = asmap->parityInfo; pda; pda = pda->next) {
! 788: if (pda->col == fcol) {
! 789: if (rf_dagDebug) {
! 790: if (!rf_CheckRUReconstructed(
! 791: raidPtr->reconControl[row]->reconMap,
! 792: pda->startSector)) {
! 793: RF_PANIC();
! 794: }
! 795: }
! 796: }
! 797: if (ds) {
! 798: (raidPtr->Layout.map->MapParity) (raidPtr,
! 799: pda->raidAddress, &pda->row, &pda->col,
! 800: &pda->startSector, RF_REMAP);
! 801: } else {
! 802: pda->row = srow;
! 803: pda->col = scol;
! 804: }
! 805: }
! 806: }
! 807:
! 808:
! 809: /*
! 810: * This routine allocates read buffers and generates stripe maps for the
! 811: * regions of the array from the start of the stripe to the start of the
! 812: * access, and from the end of the access to the end of the stripe. It also
! 813: * computes and returns the number of DAG nodes needed to read all this data.
! 814: * Note that this routine does the wrong thing if the access is fully
! 815: * contained within one stripe unit, so we RF_ASSERT against this case at the
! 816: * start.
! 817: */
! 818: void
! 819: rf_MapUnaccessedPortionOfStripe(
! 820: RF_Raid_t *raidPtr,
! 821: RF_RaidLayout_t *layoutPtr, /* in: layout information */
! 822: RF_AccessStripeMap_t *asmap, /* in: access stripe map */
! 823: RF_DagHeader_t *dag_h, /* in: header of the dag */
! 824: /* to create */
! 825: RF_AccessStripeMapHeader_t **new_asm_h, /* in: ptr to array of 2 */
! 826: /* headers, to be */
! 827: /* filled in */
! 828: int *nRodNodes, /* out: num nodes to be */
! 829: /* generated to read */
! 830: /* unaccessed data */
! 831: char **sosBuffer, /* out: pointers to newly */
! 832: /* allocated buffer */
! 833: char **eosBuffer,
! 834: RF_AllocListElem_t *allocList
! 835: )
! 836: {
! 837: RF_RaidAddr_t sosRaidAddress, eosRaidAddress;
! 838: RF_SectorNum_t sosNumSector, eosNumSector;
! 839:
! 840: RF_ASSERT(asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol / 2));
! 841: /*
! 842: * Generate an access map for the region of the array from start of
! 843: * stripe to start of access.
! 844: */
! 845: new_asm_h[0] = new_asm_h[1] = NULL;
! 846: *nRodNodes = 0;
! 847: if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) {
! 848: sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
! 849: asmap->raidAddress);
! 850: sosNumSector = asmap->raidAddress - sosRaidAddress;
! 851: RF_MallocAndAdd(*sosBuffer, rf_RaidAddressToByte(raidPtr,
! 852: sosNumSector), (char *), allocList);
! 853: new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress,
! 854: sosNumSector, *sosBuffer, RF_DONT_REMAP);
! 855: new_asm_h[0]->next = dag_h->asmList;
! 856: dag_h->asmList = new_asm_h[0];
! 857: *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
! 858:
! 859: RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL);
! 860: /* We're totally within one stripe here. */
! 861: if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
! 862: rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap);
! 863: }
! 864: /*
! 865: * Generate an access map for the region of the array from end of
! 866: * access to end of stripe.
! 867: */
! 868: if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) {
! 869: eosRaidAddress = asmap->endRaidAddress;
! 870: eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr,
! 871: eosRaidAddress) - eosRaidAddress;
! 872: RF_MallocAndAdd(*eosBuffer, rf_RaidAddressToByte(raidPtr,
! 873: eosNumSector), (char *), allocList);
! 874: new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress,
! 875: eosNumSector, *eosBuffer, RF_DONT_REMAP);
! 876: new_asm_h[1]->next = dag_h->asmList;
! 877: dag_h->asmList = new_asm_h[1];
! 878: *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
! 879:
! 880: RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL);
! 881: /* We're totally within one stripe here. */
! 882: if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
! 883: rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap);
! 884: }
! 885: }
! 886:
! 887:
! 888: /* Returns non-zero if the indicated ranges of stripe unit offsets overlap. */
! 889: int
! 890: rf_PDAOverlap(RF_RaidLayout_t *layoutPtr, RF_PhysDiskAddr_t *src,
! 891: RF_PhysDiskAddr_t *dest)
! 892: {
! 893: RF_SectorNum_t soffs =
! 894: rf_StripeUnitOffset(layoutPtr, src->startSector);
! 895: RF_SectorNum_t doffs =
! 896: rf_StripeUnitOffset(layoutPtr, dest->startSector);
! 897: /* Use -1 to be sure we stay within SU. */
! 898: RF_SectorNum_t send =
! 899: rf_StripeUnitOffset(layoutPtr, src->startSector +
! 900: src->numSector - 1);
! 901: RF_SectorNum_t dend =
! 902: rf_StripeUnitOffset(layoutPtr, dest->startSector +
! 903: dest->numSector - 1);
! 904:
! 905: return ((RF_MAX(soffs, doffs) <= RF_MIN(send, dend)) ? 1 : 0);
! 906: }
! 907:
! 908:
! 909: /*
! 910: * GenerateFailedAccessASMs
! 911: *
! 912: * This routine figures out what portion of the stripe needs to be read
! 913: * to effect the degraded read or write operation. It's primary function
! 914: * is to identify everything required to recover the data, and then
! 915: * eliminate anything that is already being accessed by the user.
! 916: *
! 917: * The main result is two new ASMs, one for the region from the start of the
! 918: * stripe to the start of the access, and one for the region from the end of
! 919: * the access to the end of the stripe. These ASMs describe everything that
! 920: * needs to be read to effect the degraded access. Other results are:
! 921: * nXorBufs -- The total number of buffers that need to be XORed together
! 922: * to recover the lost data,
! 923: * rpBufPtr -- Ptr to a newly-allocated buffer to hold the parity. If NULL
! 924: * at entry, not allocated.
! 925: * overlappingPDAs --
! 926: * Describes which of the non-failed PDAs, in the user access,
! 927: * overlap data that needs to be read to effect recovery.
! 928: * overlappingPDAs[i]==1 if and only if, neglecting the failed
! 929: * PDA, the i'th pda in the input asm overlaps data that needs
! 930: * to be read for recovery.
! 931: */
! 932: /* in: asmap - ASM for the actual access, one stripe only. */
! 933: /* in: faildPDA - Which component of the access has failed. */
! 934: /* in: dag_h - Header of the DAG we're going to create. */
! 935: /* out: new_asm_h - The two new ASMs. */
! 936: /* out: nXorBufs - The total number of xor bufs required. */
! 937: /* out: rpBufPtr - A buffer for the parity read. */
! 938: void
! 939: rf_GenerateFailedAccessASMs(
! 940: RF_Raid_t *raidPtr,
! 941: RF_AccessStripeMap_t *asmap,
! 942: RF_PhysDiskAddr_t *failedPDA,
! 943: RF_DagHeader_t *dag_h,
! 944: RF_AccessStripeMapHeader_t **new_asm_h,
! 945: int *nXorBufs,
! 946: char **rpBufPtr,
! 947: char *overlappingPDAs,
! 948: RF_AllocListElem_t *allocList
! 949: )
! 950: {
! 951: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 952:
! 953: /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */
! 954: RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr;
! 955:
! 956: RF_SectorCount_t numSect[2], numParitySect;
! 957: RF_PhysDiskAddr_t *pda;
! 958: char *rdBuf, *bufP;
! 959: int foundit, i;
! 960:
! 961: bufP = NULL;
! 962: foundit = 0;
! 963: /*
! 964: * First compute the following raid addresses:
! 965: * - Start of stripe
! 966: * - (sosAddr) MIN(start of access, start of failed SU)
! 967: * - (sosEndAddr) MAX(end of access, end of failed SU)
! 968: * - (eosStartAddr) end of stripe (i.e. start of next stripe)
! 969: * (eosAddr)
! 970: */
! 971: sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
! 972: asmap->raidAddress);
! 973: sosEndAddr = RF_MIN(asmap->raidAddress,
! 974: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 975: failedPDA->raidAddress));
! 976: eosStartAddr = RF_MAX(asmap->endRaidAddress,
! 977: rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
! 978: failedPDA->raidAddress));
! 979: eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr,
! 980: asmap->raidAddress);
! 981:
! 982: /*
! 983: * Now generate access stripe maps for each of the above regions of
! 984: * the stripe. Use a dummy (NULL) buf ptr for now.
! 985: */
! 986:
! 987: new_asm_h[0] = (sosAddr != sosEndAddr) ?
! 988: rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL,
! 989: RF_DONT_REMAP) : NULL;
! 990: new_asm_h[1] = (eosStartAddr != eosAddr) ?
! 991: rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL,
! 992: RF_DONT_REMAP) : NULL;
! 993:
! 994: /*
! 995: * Walk through the PDAs and range-restrict each SU to the region of
! 996: * the SU touched on the failed PDA. Also compute total data buffer
! 997: * space requirements in this step. Ignore the parity for now.
! 998: */
! 999:
! 1000: numSect[0] = numSect[1] = 0;
! 1001: if (new_asm_h[0]) {
! 1002: new_asm_h[0]->next = dag_h->asmList;
! 1003: dag_h->asmList = new_asm_h[0];
! 1004: for (pda = new_asm_h[0]->stripeMap->physInfo; pda;
! 1005: pda = pda->next) {
! 1006: rf_RangeRestrictPDA(raidPtr, failedPDA, pda,
! 1007: RF_RESTRICT_NOBUFFER, 0);
! 1008: numSect[0] += pda->numSector;
! 1009: }
! 1010: }
! 1011: if (new_asm_h[1]) {
! 1012: new_asm_h[1]->next = dag_h->asmList;
! 1013: dag_h->asmList = new_asm_h[1];
! 1014: for (pda = new_asm_h[1]->stripeMap->physInfo;
! 1015: pda; pda = pda->next) {
! 1016: rf_RangeRestrictPDA(raidPtr, failedPDA, pda,
! 1017: RF_RESTRICT_NOBUFFER, 0);
! 1018: numSect[1] += pda->numSector;
! 1019: }
! 1020: }
! 1021: numParitySect = failedPDA->numSector;
! 1022:
! 1023: /*
! 1024: * Allocate buffer space for the data & parity we have to read to
! 1025: * recover from the failure.
! 1026: */
! 1027:
! 1028: if (numSect[0] + numSect[1] + ((rpBufPtr) ? numParitySect : 0)) {
! 1029: /* Don't allocate parity buf if not needed. */
! 1030: RF_MallocAndAdd(rdBuf, rf_RaidAddressToByte(raidPtr,
! 1031: numSect[0] + numSect[1] + numParitySect), (char *),
! 1032: allocList);
! 1033: bufP = rdBuf;
! 1034: if (rf_degDagDebug)
! 1035: printf("Newly allocated buffer (%d bytes) is 0x%lx\n",
! 1036: (int) rf_RaidAddressToByte(raidPtr,
! 1037: numSect[0] + numSect[1] + numParitySect),
! 1038: (unsigned long) bufP);
! 1039: }
! 1040: /*
! 1041: * Now walk through the pdas one last time and assign buffer pointers
! 1042: * (ugh!). Again, ignore the parity. Also, count nodes to find out
! 1043: * how many bufs need to be xored together.
! 1044: */
! 1045: (*nXorBufs) = 1; /* In read case, 1 is for parity. */
! 1046: /* In write case, 1 is for failed data. */
! 1047: if (new_asm_h[0]) {
! 1048: for (pda = new_asm_h[0]->stripeMap->physInfo; pda;
! 1049: pda = pda->next) {
! 1050: pda->bufPtr = bufP;
! 1051: bufP += rf_RaidAddressToByte(raidPtr, pda->numSector);
! 1052: }
! 1053: *nXorBufs += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
! 1054: }
! 1055: if (new_asm_h[1]) {
! 1056: for (pda = new_asm_h[1]->stripeMap->physInfo; pda;
! 1057: pda = pda->next) {
! 1058: pda->bufPtr = bufP;
! 1059: bufP += rf_RaidAddressToByte(raidPtr, pda->numSector);
! 1060: }
! 1061: (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
! 1062: }
! 1063: if (rpBufPtr)
! 1064: /* The rest of the buffer is for parity. */
! 1065: *rpBufPtr = bufP;
! 1066:
! 1067: /*
! 1068: * The last step is to figure out how many more distinct buffers need
! 1069: * to get xor'd to produce the missing unit. there's one for each
! 1070: * user-data read node that overlaps the portion of the failed unit
! 1071: * being accessed.
! 1072: */
! 1073:
! 1074: for (foundit = i = 0, pda = asmap->physInfo;
! 1075: pda; i++, pda = pda->next) {
! 1076: if (pda == failedPDA) {
! 1077: i--;
! 1078: foundit = 1;
! 1079: continue;
! 1080: }
! 1081: if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) {
! 1082: overlappingPDAs[i] = 1;
! 1083: (*nXorBufs)++;
! 1084: }
! 1085: }
! 1086: if (!foundit) {
! 1087: RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA"
! 1088: " in asm list.\n");
! 1089: RF_ASSERT(0);
! 1090: }
! 1091: if (rf_degDagDebug) {
! 1092: if (new_asm_h[0]) {
! 1093: printf("First asm:\n");
! 1094: rf_PrintFullAccessStripeMap(new_asm_h[0], 1);
! 1095: }
! 1096: if (new_asm_h[1]) {
! 1097: printf("Second asm:\n");
! 1098: rf_PrintFullAccessStripeMap(new_asm_h[1], 1);
! 1099: }
! 1100: }
! 1101: }
! 1102:
! 1103:
! 1104: /*
! 1105: * Adjust the offset and number of sectors in the destination pda so that
! 1106: * it covers at most the region of the SU covered by the source PDA. This
! 1107: * is exclusively a restriction: the number of sectors indicated by the
! 1108: * target PDA can only shrink.
! 1109: *
! 1110: * For example: s = sectors within SU indicated by source PDA
! 1111: * d = sectors within SU indicated by dest PDA
! 1112: * r = results, stored in dest PDA
! 1113: *
! 1114: * |--------------- one stripe unit ---------------------|
! 1115: * | sssssssssssssssssssssssssssssssss |
! 1116: * | ddddddddddddddddddddddddddddddddddddddddddddd |
! 1117: * | rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr |
! 1118: *
! 1119: * Another example:
! 1120: *
! 1121: * |--------------- one stripe unit ---------------------|
! 1122: * | sssssssssssssssssssssssssssssssss |
! 1123: * | ddddddddddddddddddddddd |
! 1124: * | rrrrrrrrrrrrrrrr |
! 1125: *
! 1126: */
! 1127: void
! 1128: rf_RangeRestrictPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *src,
! 1129: RF_PhysDiskAddr_t *dest, int dobuffer, int doraidaddr)
! 1130: {
! 1131: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
! 1132: RF_SectorNum_t soffs =
! 1133: rf_StripeUnitOffset(layoutPtr, src->startSector);
! 1134: RF_SectorNum_t doffs =
! 1135: rf_StripeUnitOffset(layoutPtr, dest->startSector);
! 1136: RF_SectorNum_t send =
! 1137: rf_StripeUnitOffset(layoutPtr, src->startSector +
! 1138: src->numSector - 1); /* Use -1 to be sure we stay within SU. */
! 1139: RF_SectorNum_t dend =
! 1140: rf_StripeUnitOffset(layoutPtr, dest->startSector +
! 1141: dest->numSector - 1);
! 1142: RF_SectorNum_t subAddr =
! 1143: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 1144: dest->startSector); /* Stripe unit boundary. */
! 1145:
! 1146: dest->startSector = subAddr + RF_MAX(soffs, doffs);
! 1147: dest->numSector = subAddr + RF_MIN(send, dend) + 1 - dest->startSector;
! 1148:
! 1149: if (dobuffer)
! 1150: dest->bufPtr += (soffs > doffs) ?
! 1151: rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0;
! 1152: if (doraidaddr) {
! 1153: dest->raidAddress =
! 1154: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 1155: dest->raidAddress) +
! 1156: rf_StripeUnitOffset(layoutPtr, dest->startSector);
! 1157: }
! 1158: }
! 1159:
! 1160: /*
! 1161: * Want the highest of these primes to be the largest one
! 1162: * less than the max expected number of columns (won't hurt
! 1163: * to be too small or too large, but won't be optimal, either)
! 1164: * --jimz
! 1165: */
! 1166: #define NLOWPRIMES 8
! 1167: static int lowprimes[NLOWPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19};
! 1168:
! 1169: /*****************************************************************************
! 1170: * Compute the workload shift factor. (chained declustering)
! 1171: *
! 1172: * Return nonzero if access should shift to secondary, otherwise,
! 1173: * access is to primary.
! 1174: *****************************************************************************/
! 1175: int
! 1176: rf_compute_workload_shift(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda)
! 1177: {
! 1178: /*
! 1179: * Variables:
! 1180: * d = Column of disk containing primary.
! 1181: * f = Column of failed disk.
! 1182: * n = Number of disks in array.
! 1183: * sd = "shift distance"
! 1184: * (number of columns that d is to the right of f).
! 1185: * row = Row of array the access is in.
! 1186: * v = Numerator of redirection ratio.
! 1187: * k = Denominator of redirection ratio.
! 1188: */
! 1189: RF_RowCol_t d, f, sd, row, n;
! 1190: int k, v, ret, i;
! 1191:
! 1192: row = pda->row;
! 1193: n = raidPtr->numCol;
! 1194:
! 1195: /* Assign column of primary copy to d. */
! 1196: d = pda->col;
! 1197:
! 1198: /* Assign column of dead disk to f. */
! 1199: for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[row][f].status)) &&
! 1200: (f < n)); f++);
! 1201:
! 1202: RF_ASSERT(f < n);
! 1203: RF_ASSERT(f != d);
! 1204:
! 1205: sd = (f > d) ? (n + d - f) : (d - f);
! 1206: RF_ASSERT(sd < n);
! 1207:
! 1208: /*
! 1209: * v of every k accesses should be redirected.
! 1210: *
! 1211: * v/k := (n-1-sd)/(n-1)
! 1212: */
! 1213: v = (n - 1 - sd);
! 1214: k = (n - 1);
! 1215:
! 1216: #if 1
! 1217: /*
! 1218: * XXX
! 1219: * Is this worth it ?
! 1220: *
! 1221: * Now reduce the fraction, by repeatedly factoring
! 1222: * out primes (just like they teach in elementary school !).
! 1223: */
! 1224: for (i = 0; i < NLOWPRIMES; i++) {
! 1225: if (lowprimes[i] > v)
! 1226: break;
! 1227: while (((v % lowprimes[i]) == 0) && ((k % lowprimes[i]) == 0)) {
! 1228: v /= lowprimes[i];
! 1229: k /= lowprimes[i];
! 1230: }
! 1231: }
! 1232: #endif
! 1233:
! 1234: raidPtr->hist_diskreq[row][d]++;
! 1235: if (raidPtr->hist_diskreq[row][d] > v) {
! 1236: ret = 0; /* Do not redirect. */
! 1237: } else {
! 1238: ret = 1; /* Redirect. */
! 1239: }
! 1240:
! 1241: #if 0
! 1242: printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret,
! 1243: raidPtr->hist_diskreq[row][d]);
! 1244: #endif
! 1245:
! 1246: if (raidPtr->hist_diskreq[row][d] >= k) {
! 1247: /* Reset counter. */
! 1248: raidPtr->hist_diskreq[row][d] = 0;
! 1249: }
! 1250: return (ret);
! 1251: }
! 1252:
! 1253: /*
! 1254: * Disk selection routines.
! 1255: */
! 1256:
! 1257: /*
! 1258: * Select the disk with the shortest queue from a mirror pair.
! 1259: * Both the disk I/Os queued in RAIDframe as well as those at the physical
! 1260: * disk are counted as members of the "queue".
! 1261: */
! 1262: void
! 1263: rf_SelectMirrorDiskIdle(RF_DagNode_t *node)
! 1264: {
! 1265: RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
! 1266: RF_RowCol_t rowData, colData, rowMirror, colMirror;
! 1267: int dataQueueLength, mirrorQueueLength, usemirror;
! 1268: RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 1269: RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p;
! 1270: RF_PhysDiskAddr_t *tmp_pda;
! 1271: RF_RaidDisk_t **disks = raidPtr->Disks;
! 1272: RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
! 1273:
! 1274: /* Return the [row col] of the disk with the shortest queue. */
! 1275: rowData = data_pda->row;
! 1276: colData = data_pda->col;
! 1277: rowMirror = mirror_pda->row;
! 1278: colMirror = mirror_pda->col;
! 1279: dataQueue = &(dqs[rowData][colData]);
! 1280: mirrorQueue = &(dqs[rowMirror][colMirror]);
! 1281:
! 1282: #ifdef RF_LOCK_QUEUES_TO_READ_LEN
! 1283: RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
! 1284: #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
! 1285: dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding;
! 1286: #ifdef RF_LOCK_QUEUES_TO_READ_LEN
! 1287: RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
! 1288: RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
! 1289: #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
! 1290: mirrorQueueLength = mirrorQueue->queueLength +
! 1291: mirrorQueue->numOutstanding;
! 1292: #ifdef RF_LOCK_QUEUES_TO_READ_LEN
! 1293: RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
! 1294: #endif /* RF_LOCK_QUEUES_TO_READ_LEN */
! 1295:
! 1296: usemirror = 0;
! 1297: if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
! 1298: usemirror = 0;
! 1299: } else
! 1300: if (RF_DEAD_DISK(disks[rowData][colData].status)) {
! 1301: usemirror = 1;
! 1302: } else
! 1303: if (raidPtr->parity_good == RF_RAID_DIRTY) {
! 1304: /* Trust only the main disk. */
! 1305: usemirror = 0;
! 1306: } else
! 1307: if (dataQueueLength < mirrorQueueLength) {
! 1308: usemirror = 0;
! 1309: } else
! 1310: if (mirrorQueueLength < dataQueueLength) {
! 1311: usemirror = 1;
! 1312: } else {
! 1313: /* Queues are equal length. */
! 1314: /* Attempt cleverness. */
! 1315: if (SNUM_DIFF(dataQueue
! 1316: ->last_deq_sector, data_pda
! 1317: ->startSector) <=
! 1318: SNUM_DIFF(mirrorQueue
! 1319: ->last_deq_sector, mirror_pda
! 1320: ->startSector)) {
! 1321: usemirror = 0;
! 1322: } else {
! 1323: usemirror = 1;
! 1324: }
! 1325: }
! 1326:
! 1327: if (usemirror) {
! 1328: /* Use mirror (parity) disk, swap params 0 & 4. */
! 1329: tmp_pda = data_pda;
! 1330: node->params[0].p = mirror_pda;
! 1331: node->params[4].p = tmp_pda;
! 1332: } else {
! 1333: /* Use data disk, leave param 0 unchanged. */
! 1334: }
! 1335: /*printf("dataQueueLength %d, mirrorQueueLength %d\n", dataQueueLength,
! 1336: mirrorQueueLength);*/
! 1337: }
! 1338:
! 1339: /*
! 1340: * Do simple partitioning. This assumes that
! 1341: * the data and parity disks are laid out identically.
! 1342: */
! 1343: void
! 1344: rf_SelectMirrorDiskPartition(RF_DagNode_t *node)
! 1345: {
! 1346: RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
! 1347: RF_RowCol_t rowData, colData, rowMirror, colMirror;
! 1348: RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 1349: RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p;
! 1350: RF_PhysDiskAddr_t *tmp_pda;
! 1351: RF_RaidDisk_t **disks = raidPtr->Disks;
! 1352: RF_DiskQueue_t **dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;
! 1353: int usemirror;
! 1354:
! 1355: /* Return the [row col] of the disk with the shortest queue. */
! 1356: rowData = data_pda->row;
! 1357: colData = data_pda->col;
! 1358: rowMirror = mirror_pda->row;
! 1359: colMirror = mirror_pda->col;
! 1360: dataQueue = &(dqs[rowData][colData]);
! 1361: mirrorQueue = &(dqs[rowMirror][colMirror]);
! 1362:
! 1363: usemirror = 0;
! 1364: if (RF_DEAD_DISK(disks[rowMirror][colMirror].status)) {
! 1365: usemirror = 0;
! 1366: } else
! 1367: if (RF_DEAD_DISK(disks[rowData][colData].status)) {
! 1368: usemirror = 1;
! 1369: } else
! 1370: if (raidPtr->parity_good == RF_RAID_DIRTY) {
! 1371: /* Trust only the main disk. */
! 1372: usemirror = 0;
! 1373: } else
! 1374: if (data_pda->startSector <
! 1375: (disks[rowData][colData].numBlocks / 2)) {
! 1376: usemirror = 0;
! 1377: } else {
! 1378: usemirror = 1;
! 1379: }
! 1380:
! 1381: if (usemirror) {
! 1382: /* Use mirror (parity) disk, swap params 0 & 4. */
! 1383: tmp_pda = data_pda;
! 1384: node->params[0].p = mirror_pda;
! 1385: node->params[4].p = tmp_pda;
! 1386: } else {
! 1387: /* Use data disk, leave param 0 unchanged. */
! 1388: }
! 1389: }
CVSweb