Annotation of sys/dev/raidframe/rf_pq.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_pq.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */
! 2: /* $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995 Carnegie-Mellon University.
! 6: * All rights reserved.
! 7: *
! 8: * Author: Daniel Stodolsky
! 9: *
! 10: * Permission to use, copy, modify and distribute this software and
! 11: * its documentation is hereby granted, provided that both the copyright
! 12: * notice and this permission notice appear in all copies of the
! 13: * software, derivative works or modified versions, and any portions
! 14: * thereof, and that both notices appear in supporting documentation.
! 15: *
! 16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 19: *
! 20: * Carnegie Mellon requests users of this software to return to
! 21: *
! 22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 23: * School of Computer Science
! 24: * Carnegie Mellon University
! 25: * Pittsburgh PA 15213-3890
! 26: *
! 27: * any improvements or extensions that they make and grant Carnegie the
! 28: * rights to redistribute these changes.
! 29: */
! 30:
! 31: /*
! 32: * Code for RAID level 6 (P + Q) disk array architecture.
! 33: */
! 34:
! 35: #include "rf_archs.h"
! 36: #include "rf_types.h"
! 37: #include "rf_raid.h"
! 38: #include "rf_dag.h"
! 39: #include "rf_dagffrd.h"
! 40: #include "rf_dagffwr.h"
! 41: #include "rf_dagdegrd.h"
! 42: #include "rf_dagdegwr.h"
! 43: #include "rf_dagutils.h"
! 44: #include "rf_dagfuncs.h"
! 45: #include "rf_etimer.h"
! 46: #include "rf_pqdeg.h"
! 47: #include "rf_general.h"
! 48: #include "rf_map.h"
! 49: #include "rf_pq.h"
! 50:
! 51: RF_RedFuncs_t rf_pFuncs = {
! 52: rf_RegularONPFunc, "Regular Old-New P",
! 53: rf_SimpleONPFunc, "Simple Old-New P"
! 54: };
! 55: RF_RedFuncs_t rf_pRecoveryFuncs = {
! 56: rf_RecoveryPFunc, "Recovery P Func",
! 57: rf_RecoveryPFunc, "Recovery P Func"
! 58: };
! 59:
! 60: int
! 61: rf_RegularONPFunc(RF_DagNode_t *node)
! 62: {
! 63: return (rf_RegularXorFunc(node));
! 64: }
! 65:
! 66:
! 67: /*
! 68: * Same as simpleONQ func, but the coefficient is always 1.
! 69: */
! 70:
! 71: int
! 72: rf_SimpleONPFunc(RF_DagNode_t *node)
! 73: {
! 74: return (rf_SimpleXorFunc(node));
! 75: }
! 76:
! 77: int
! 78: rf_RecoveryPFunc(RF_DagNode_t *node)
! 79: {
! 80: return (rf_RecoveryXorFunc(node));
! 81: }
! 82:
! 83: int
! 84: rf_RegularPFunc(RF_DagNode_t *node)
! 85: {
! 86: return (rf_RegularXorFunc(node));
! 87: }
! 88:
! 89:
! 90: #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
! 91:
! 92: void rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
! 93: unsigned char coeff);
! 94: void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
! 95: unsigned coeff);
! 96:
! 97: RF_RedFuncs_t rf_qFuncs = {
! 98: rf_RegularONQFunc, "Regular Old-New Q",
! 99: rf_SimpleONQFunc, "Simple Old-New Q"
! 100: };
! 101: RF_RedFuncs_t rf_qRecoveryFuncs = {
! 102: rf_RecoveryQFunc, "Recovery Q Func",
! 103: rf_RecoveryQFunc, "Recovery Q Func"
! 104: };
! 105: RF_RedFuncs_t rf_pqRecoveryFuncs = {
! 106: rf_RecoveryPQFunc, "Recovery PQ Func",
! 107: rf_RecoveryPQFunc, "Recovery PQ Func"
! 108: };
! 109:
! 110: void
! 111: rf_PQDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
! 112: RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
! 113: {
! 114: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 115: unsigned ndfail = asmap->numDataFailed;
! 116: unsigned npfail = asmap->numParityFailed;
! 117: unsigned ntfail = npfail + ndfail;
! 118:
! 119: RF_ASSERT(RF_IO_IS_R_OR_W(type));
! 120: if (ntfail > 2) {
! 121: RF_ERRORMSG("more than two disks failed in a single group !"
! 122: " Aborting I/O operation.\n");
! 123: /* *infoFunc = */ *createFunc = NULL;
! 124: return;
! 125: }
! 126: /* Ok, we can do this I/O. */
! 127: if (type == RF_IO_TYPE_READ) {
! 128: switch (ndfail) {
! 129: case 0:
! 130: /* Fault free read. */
! 131: *createFunc = (RF_VoidFuncPtr)
! 132: rf_CreateFaultFreeReadDAG; /* Same as raid 5. */
! 133: break;
! 134: case 1:
! 135: /* Lost a single data unit. */
! 136: /*
! 137: * Two cases:
! 138: * (1) Parity is not lost. Do a normal raid 5
! 139: * reconstruct read.
! 140: * (2) Parity is lost. Do a reconstruct read using "q".
! 141: */
! 142: if (ntfail == 2) { /* Also lost redundancy. */
! 143: if (asmap->failedPDAs[1]->type ==
! 144: RF_PDA_TYPE_PARITY)
! 145: *createFunc = (RF_VoidFuncPtr)
! 146: rf_PQ_110_CreateReadDAG;
! 147: else
! 148: *createFunc = (RF_VoidFuncPtr)
! 149: rf_PQ_101_CreateReadDAG;
! 150: } else {
! 151: /*
! 152: * P and Q are ok. But is there a failure in
! 153: * some unaccessed data unit ?
! 154: */
! 155: if (rf_NumFailedDataUnitsInStripe(raidPtr,
! 156: asmap) == 2)
! 157: *createFunc = (RF_VoidFuncPtr)
! 158: rf_PQ_200_CreateReadDAG;
! 159: else
! 160: *createFunc = (RF_VoidFuncPtr)
! 161: rf_PQ_100_CreateReadDAG;
! 162: }
! 163: break;
! 164: case 2:
! 165: /* Lost two data units. */
! 166: /* *infoFunc = rf_PQOneTwo; */
! 167: *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
! 168: break;
! 169: }
! 170: return;
! 171: }
! 172: /* A write. */
! 173: switch (ntfail) {
! 174: case 0: /* Fault free. */
! 175: if (rf_suppressLocksAndLargeWrites ||
! 176: (((asmap->numStripeUnitsAccessed <=
! 177: (layoutPtr->numDataCol / 2)) &&
! 178: (layoutPtr->numDataCol != 1)) ||
! 179: (asmap->parityInfo->next != NULL) ||
! 180: (asmap->qInfo->next != NULL) ||
! 181: rf_CheckStripeForFailures(raidPtr, asmap))) {
! 182:
! 183: *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
! 184: } else {
! 185: *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
! 186: }
! 187: break;
! 188:
! 189: case 1: /* Single disk fault. */
! 190: if (npfail == 1) {
! 191: RF_ASSERT((asmap->failedPDAs[0]->type ==
! 192: RF_PDA_TYPE_PARITY) ||
! 193: (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
! 194: if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {
! 195: /*
! 196: * Q died, treat like normal mode raid5 write.
! 197: */
! 198: if (((asmap->numStripeUnitsAccessed <=
! 199: (layoutPtr->numDataCol / 2)) ||
! 200: (asmap->numStripeUnitsAccessed == 1)) ||
! 201: rf_NumFailedDataUnitsInStripe(raidPtr,
! 202: asmap))
! 203: *createFunc = (RF_VoidFuncPtr)
! 204: rf_PQ_001_CreateSmallWriteDAG;
! 205: else
! 206: *createFunc = (RF_VoidFuncPtr)
! 207: rf_PQ_001_CreateLargeWriteDAG;
! 208: } else {/* Parity died, small write only updating Q. */
! 209: if (((asmap->numStripeUnitsAccessed <=
! 210: (layoutPtr->numDataCol / 2)) ||
! 211: (asmap->numStripeUnitsAccessed == 1)) ||
! 212: rf_NumFailedDataUnitsInStripe(raidPtr,
! 213: asmap))
! 214: *createFunc = (RF_VoidFuncPtr)
! 215: rf_PQ_010_CreateSmallWriteDAG;
! 216: else
! 217: *createFunc = (RF_VoidFuncPtr)
! 218: rf_PQ_010_CreateLargeWriteDAG;
! 219: }
! 220: } else { /*
! 221: * Data missing. Do a P reconstruct write if
! 222: * only a single data unit is lost in the
! 223: * stripe, otherwise a PQ reconstruct write.
! 224: */
! 225: if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
! 226: *createFunc = (RF_VoidFuncPtr)
! 227: rf_PQ_200_CreateWriteDAG;
! 228: else
! 229: *createFunc = (RF_VoidFuncPtr)
! 230: rf_PQ_100_CreateWriteDAG;
! 231: }
! 232: break;
! 233:
! 234: case 2: /* Two disk faults. */
! 235: switch (npfail) {
! 236: case 2: /* Both p and q dead. */
! 237: *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
! 238: break;
! 239: case 1: /* Either p or q and dead data. */
! 240: RF_ASSERT(asmap->failedPDAs[0]->type ==
! 241: RF_PDA_TYPE_DATA);
! 242: RF_ASSERT((asmap->failedPDAs[1]->type ==
! 243: RF_PDA_TYPE_PARITY) ||
! 244: (asmap->failedPDAs[1]->type ==
! 245: RF_PDA_TYPE_Q));
! 246: if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
! 247: *createFunc = (RF_VoidFuncPtr)
! 248: rf_PQ_101_CreateWriteDAG;
! 249: else
! 250: *createFunc = (RF_VoidFuncPtr)
! 251: rf_PQ_110_CreateWriteDAG;
! 252: break;
! 253: case 0: /* Double data loss. */
! 254: *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
! 255: break;
! 256: }
! 257: break;
! 258:
! 259: default: /* More than 2 disk faults. */
! 260: *createFunc = NULL;
! 261: RF_PANIC();
! 262: }
! 263: return;
! 264: }
! 265:
! 266:
! 267: /*
! 268: * Used as a stop gap info function.
! 269: */
! 270: #if 0
! 271: void
! 272: rf_PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
! 273: RF_AccessStripeMap_t *asmap)
! 274: {
! 275: *nSucc = *nAnte = 1;
! 276: }
! 277:
! 278: void
! 279: rf_PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
! 280: RF_AccessStripeMap_t *asmap)
! 281: {
! 282: *nSucc = 1;
! 283: *nAnte = 2;
! 284: }
! 285: #endif
! 286:
! 287: RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
! 288: {
! 289: rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
! 290: allocList, 2, rf_RegularPQFunc, RF_FALSE);
! 291: }
! 292:
! 293: int
! 294: rf_RegularONQFunc(RF_DagNode_t *node)
! 295: {
! 296: int np = node->numParams;
! 297: int d;
! 298: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
! 299: int i;
! 300: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 301: RF_Etimer_t timer;
! 302: char *qbuf, *qpbuf;
! 303: char *obuf, *nbuf;
! 304: RF_PhysDiskAddr_t *old, *new;
! 305: unsigned long coeff;
! 306: unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
! 307:
! 308: RF_ETIMER_START(timer);
! 309:
! 310: d = (np - 3) / 4;
! 311: RF_ASSERT(4 * d + 3 == np);
! 312: qbuf = (char *) node->params[2 * d + 1].p; /* Q buffer. */
! 313: for (i = 0; i < d; i++) {
! 314: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
! 315: obuf = (char *) node->params[2 * i + 1].p;
! 316: new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
! 317: nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
! 318: RF_ASSERT(new->numSector == old->numSector);
! 319: RF_ASSERT(new->raidAddress == old->raidAddress);
! 320: /*
! 321: * The stripe unit within the stripe tells us the coefficient
! 322: * to use for the multiply.
! 323: */
! 324: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
! 325: new->raidAddress);
! 326: /*
! 327: * Compute the data unit offset within the column, then add
! 328: * one.
! 329: */
! 330: coeff = (coeff % raidPtr->Layout.numDataCol);
! 331: qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
! 332: old->startSector % secPerSU);
! 333: rf_QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
! 334: old->numSector), coeff);
! 335: }
! 336:
! 337: RF_ETIMER_STOP(timer);
! 338: RF_ETIMER_EVAL(timer);
! 339: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 340: rf_GenericWakeupFunc(node, 0); /*
! 341: * Call wake func explicitly since no
! 342: * I/O in this node.
! 343: */
! 344: return (0);
! 345: }
! 346:
! 347:
! 348: /*
! 349: * See the SimpleXORFunc for the difference between a simple and regular func.
! 350: * These Q functions should be used for
! 351: * new q = Q(data, old data, old q)
! 352: * style updates and not for
! 353: * q = (new data, new data, ...)
! 354: * computations.
! 355: *
! 356: * The simple q takes 2(2d+1)+1 params, where d is the number
! 357: * of stripes written. The order of params is
! 358: * old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ...
! 359: * old data pda_d, old data buffer_d
! 360: * [2d] old q pda_0, old q buffer
! 361: * [2d_2] new data pda_0, new data buffer_0, ...
! 362: * new data pda_d, new data buffer_d
! 363: * raidPtr
! 364: */
! 365:
! 366: int
! 367: rf_SimpleONQFunc(RF_DagNode_t *node)
! 368: {
! 369: int np = node->numParams;
! 370: int d;
! 371: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
! 372: int i;
! 373: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 374: RF_Etimer_t timer;
! 375: char *qbuf;
! 376: char *obuf, *nbuf;
! 377: RF_PhysDiskAddr_t *old, *new;
! 378: unsigned long coeff;
! 379:
! 380: RF_ETIMER_START(timer);
! 381:
! 382: d = (np - 3) / 4;
! 383: RF_ASSERT(4 * d + 3 == np);
! 384: qbuf = (char *) node->params[2 * d + 1].p; /* Q buffer. */
! 385: for (i = 0; i < d; i++) {
! 386: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
! 387: obuf = (char *) node->params[2 * i + 1].p;
! 388: new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
! 389: nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
! 390: RF_ASSERT(new->numSector == old->numSector);
! 391: RF_ASSERT(new->raidAddress == old->raidAddress);
! 392: /*
! 393: * The stripe unit within the stripe tells us the coefficient
! 394: * to use for the multiply.
! 395: */
! 396: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
! 397: new->raidAddress);
! 398: /*
! 399: * Compute the data unit offset within the column, then add
! 400: * one.
! 401: */
! 402: coeff = (coeff % raidPtr->Layout.numDataCol);
! 403: rf_QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
! 404: old->numSector), coeff);
! 405: }
! 406:
! 407: RF_ETIMER_STOP(timer);
! 408: RF_ETIMER_EVAL(timer);
! 409: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 410: rf_GenericWakeupFunc(node, 0); /*
! 411: * Call wake func explicitly since no
! 412: * I/O in this node.
! 413: */
! 414: return (0);
! 415: }
! 416:
! 417: RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
! 418: {
! 419: rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
! 420: allocList, &rf_pFuncs, &rf_qFuncs);
! 421: }
! 422:
! 423:
! 424: void rf_RegularQSubr(RF_DagNode_t *, char *);
! 425:
! 426: void
! 427: rf_RegularQSubr(RF_DagNode_t *node, char *qbuf)
! 428: {
! 429: int np = node->numParams;
! 430: int d;
! 431: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
! 432: unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
! 433: int i;
! 434: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 435: RF_Etimer_t timer;
! 436: char *obuf, *qpbuf;
! 437: RF_PhysDiskAddr_t *old;
! 438: unsigned long coeff;
! 439:
! 440: RF_ETIMER_START(timer);
! 441:
! 442: d = (np - 1) / 2;
! 443: RF_ASSERT(2 * d + 1 == np);
! 444: for (i = 0; i < d; i++) {
! 445: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
! 446: obuf = (char *) node->params[2 * i + 1].p;
! 447: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
! 448: old->raidAddress);
! 449: /*
! 450: * Compute the data unit offset within the column, then add
! 451: * one.
! 452: */
! 453: coeff = (coeff % raidPtr->Layout.numDataCol);
! 454: /*
! 455: * The input buffers may not all be aligned with the start of
! 456: * the stripe. So shift by their sector offset within the
! 457: * stripe unit.
! 458: */
! 459: qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
! 460: old->startSector % secPerSU);
! 461: rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
! 462: rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
! 463: }
! 464:
! 465: RF_ETIMER_STOP(timer);
! 466: RF_ETIMER_EVAL(timer);
! 467: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 468: }
! 469:
! 470:
! 471: /*
! 472: * Used in degraded writes.
! 473: */
! 474:
! 475: void rf_DegrQSubr(RF_DagNode_t *);
! 476:
! 477: void
! 478: rf_DegrQSubr(RF_DagNode_t *node)
! 479: {
! 480: int np = node->numParams;
! 481: int d;
! 482: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
! 483: unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
! 484: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 485: RF_Etimer_t timer;
! 486: char *qbuf = node->results[1];
! 487: char *obuf, *qpbuf;
! 488: RF_PhysDiskAddr_t *old;
! 489: unsigned long coeff;
! 490: unsigned fail_start;
! 491: int i, j;
! 492:
! 493: old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
! 494: fail_start = old->startSector % secPerSU;
! 495:
! 496: RF_ETIMER_START(timer);
! 497:
! 498: d = (np - 2) / 2;
! 499: RF_ASSERT(2 * d + 2 == np);
! 500: for (i = 0; i < d; i++) {
! 501: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
! 502: obuf = (char *) node->params[2 * i + 1].p;
! 503: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
! 504: old->raidAddress);
! 505: /*
! 506: * Compute the data unit offset within the column, then add
! 507: * one.
! 508: */
! 509: coeff = (coeff % raidPtr->Layout.numDataCol);
! 510: /*
! 511: * The input buffers may not all be aligned with the start of
! 512: * the stripe. So shift by their sector offset within the
! 513: * stripe unit.
! 514: */
! 515: j = old->startSector % secPerSU;
! 516: RF_ASSERT(j >= fail_start);
! 517: qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
! 518: rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
! 519: rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
! 520: }
! 521:
! 522: RF_ETIMER_STOP(timer);
! 523: RF_ETIMER_EVAL(timer);
! 524: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 525: }
! 526:
! 527:
! 528: /*
! 529: * Called by large write code to compute the new parity and the new q.
! 530: *
! 531: * Structure of the params:
! 532: *
! 533: * pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d (d = numDataCol)
! 534: * raidPtr
! 535: *
! 536: * For a total of 2d+1 arguments.
! 537: * The result buffers results[0], results[1] are the buffers for the p and q,
! 538: * respectively.
! 539: *
! 540: * We compute Q first, then compute P. The P calculation may try to reuse
! 541: * one of the input buffers for its output, so if we computed P first, we would
! 542: * corrupt the input for the q calculation.
! 543: */
! 544:
! 545: int
! 546: rf_RegularPQFunc(RF_DagNode_t *node)
! 547: {
! 548: rf_RegularQSubr(node, node->results[1]);
! 549: return (rf_RegularXorFunc(node)); /* Does the wakeup. */
! 550: }
! 551:
! 552: int
! 553: rf_RegularQFunc(RF_DagNode_t *node)
! 554: {
! 555: /* Almost ... adjust Qsubr args. */
! 556: rf_RegularQSubr(node, node->results[0]);
! 557: rf_GenericWakeupFunc(node, 0); /*
! 558: * Call wake func explicitly since no
! 559: * I/O in this node.
! 560: */
! 561: return (0);
! 562: }
! 563:
! 564:
! 565: /*
! 566: * Called by singly degraded write code to compute the new parity and
! 567: * the new q.
! 568: *
! 569: * Structure of the params:
! 570: *
! 571: * pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d
! 572: * failedPDA raidPtr
! 573: *
! 574: * for a total of 2d+2 arguments.
! 575: * The result buffers results[0], results[1] are the buffers for the parity
! 576: * and q, respectively.
! 577: *
! 578: * We compute Q first, then compute parity. The parity calculation may try
! 579: * to reuse one of the input buffers for its output, so if we computed parity
! 580: * first, we would corrupt the input for the q calculation.
! 581: *
! 582: * We treat this identically to the regularPQ case, ignoring the failedPDA
! 583: * extra argument.
! 584: */
! 585:
! 586: void
! 587: rf_Degraded_100_PQFunc(RF_DagNode_t *node)
! 588: {
! 589: int np = node->numParams;
! 590:
! 591: RF_ASSERT(np >= 2);
! 592: rf_DegrQSubr(node);
! 593: rf_RecoveryXorFunc(node);
! 594: }
! 595:
! 596:
! 597: /*
! 598: * The two below are used when reading a stripe with a single lost data unit.
! 599: * The parameters are
! 600: *
! 601: * pda_0, buffer_0, ..., pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
! 602: *
! 603: * and results[0] contains the data buffer, which is originally zero-filled.
! 604: */
! 605:
! 606: /*
! 607: * This Q func is used by the degraded-mode dag functions to recover lost data.
! 608: * The second-to-last parameter is the PDA for the failed portion of the
! 609: * access. The code here looks at this PDA and assumes that the xor target
! 610: * buffer is equal in size to the number of sectors in the failed PDA. It then
! 611: * uses the other PDAs in the parameter list to determine where within the
! 612: * target buffer the corresponding data should be xored.
! 613: *
! 614: * Recall the basic equation is
! 615: *
! 616: * Q = (data_1 + 2 * data_2 ... + k * data_k) mod 256
! 617: *
! 618: * so to recover data_j we need
! 619: *
! 620: * J data_j = (Q - data_1 - 2 data_2 ... - k * data_k) mod 256
! 621: *
! 622: * So the coefficient for each buffer is (255 - data_col), and j should be
! 623: * initialized by copying Q into it. Then we need to do a table lookup to
! 624: * convert to solve
! 625: * data_j /= J
! 626: *
! 627: */
! 628:
! 629: int
! 630: rf_RecoveryQFunc(RF_DagNode_t *node)
! 631: {
! 632: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 633: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
! 634: RF_PhysDiskAddr_t *failedPDA =
! 635: (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
! 636: int i;
! 637: RF_PhysDiskAddr_t *pda;
! 638: RF_RaidAddr_t suoffset;
! 639: RF_RaidAddr_t failedSUOffset =
! 640: rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
! 641: char *srcbuf, *destbuf;
! 642: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 643: RF_Etimer_t timer;
! 644: unsigned long coeff;
! 645:
! 646: RF_ETIMER_START(timer);
! 647: /* Start by copying Q into the buffer. */
! 648: bcopy(node->params[node->numParams - 3].p, node->results[0],
! 649: rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
! 650: for (i = 0; i < node->numParams - 4; i += 2) {
! 651: RF_ASSERT(node->params[i + 1].p != node->results[0]);
! 652: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
! 653: srcbuf = (char *) node->params[i + 1].p;
! 654: suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
! 655: destbuf = ((char *) node->results[0]) +
! 656: rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
! 657: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
! 658: pda->raidAddress);
! 659: /* Compute the data unit offset within the column. */
! 660: coeff = (coeff % raidPtr->Layout.numDataCol);
! 661: rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf,
! 662: rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
! 663: }
! 664: /* Do the nasty inversion now. */
! 665: coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
! 666: failedPDA->startSector) % raidPtr->Layout.numDataCol);
! 667: rf_InvertQ(node->results[0], node->results[0],
! 668: rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
! 669: RF_ETIMER_STOP(timer);
! 670: RF_ETIMER_EVAL(timer);
! 671: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 672: rf_GenericWakeupFunc(node, 0);
! 673: return (0);
! 674: }
! 675:
! 676: int
! 677: rf_RecoveryPQFunc(RF_DagNode_t *node)
! 678: {
! 679: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 680: printf("raid%d: Recovery from PQ not implemented.\n", raidPtr->raidid);
! 681: return (1);
! 682: }
! 683:
! 684:
! 685: /*
! 686: * Degraded write Q subroutine.
! 687: * Used when P is dead.
! 688: * Large-write style Q computation.
! 689: * Parameters:
! 690: *
! 691: * (pda, buf), (pda, buf), ..., (failedPDA, bufPtr), failedPDA, raidPtr.
! 692: *
! 693: * We ignore failedPDA.
! 694: *
! 695: * This is a "simple style" recovery func.
! 696: */
! 697:
! 698: void
! 699: rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node)
! 700: {
! 701: int np = node->numParams;
! 702: int d;
! 703: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
! 704: unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
! 705: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 706: RF_Etimer_t timer;
! 707: char *qbuf = node->results[0];
! 708: char *obuf, *qpbuf;
! 709: RF_PhysDiskAddr_t *old;
! 710: unsigned long coeff;
! 711: int fail_start, i, j;
! 712:
! 713: old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
! 714: fail_start = old->startSector % secPerSU;
! 715:
! 716: RF_ETIMER_START(timer);
! 717:
! 718: d = (np - 2) / 2;
! 719: RF_ASSERT(2 * d + 2 == np);
! 720:
! 721: for (i = 0; i < d; i++) {
! 722: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
! 723: obuf = (char *) node->params[2 * i + 1].p;
! 724: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
! 725: old->raidAddress);
! 726: /*
! 727: * Compute the data unit offset within the column, then add
! 728: * one.
! 729: */
! 730: coeff = (coeff % raidPtr->Layout.numDataCol);
! 731: j = old->startSector % secPerSU;
! 732: RF_ASSERT(j >= fail_start);
! 733: qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
! 734: rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
! 735: rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
! 736: }
! 737:
! 738: RF_ETIMER_STOP(timer);
! 739: RF_ETIMER_EVAL(timer);
! 740: tracerec->q_us += RF_ETIMER_VAL_US(timer);
! 741: rf_GenericWakeupFunc(node, 0);
! 742: }
! 743:
! 744:
! 745: /* Q computations. */
! 746:
! 747: /*
! 748: * Coeff - colummn;
! 749: *
! 750: * Compute dest ^= qfor[28-coeff][rn[coeff+1] a]
! 751: *
! 752: * On 5-bit basis;
! 753: * Length in bytes;
! 754: */
! 755:
! 756: void
! 757: rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length,
! 758: unsigned coeff)
! 759: {
! 760: unsigned long a, d, new;
! 761: unsigned long a1, a2;
! 762: unsigned int *q = &(rf_qfor[28 - coeff][0]);
! 763: unsigned r = rf_rn[coeff + 1];
! 764:
! 765: #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
! 766: #define INSERT(a,i) (a << (5L*i))
! 767:
! 768: length /= 8;
! 769: /* 13 5 bit quants in a 64 bit word. */
! 770: while (length) {
! 771: a = *buf++;
! 772: d = *dest;
! 773: a1 = EXTRACT(a, 0) ^ r;
! 774: a2 = EXTRACT(a, 1) ^ r;
! 775: new = INSERT(a2, 1) | a1;
! 776: a1 = EXTRACT(a, 2) ^ r;
! 777: a2 = EXTRACT(a, 3) ^ r;
! 778: a1 = q[a1];
! 779: a2 = q[a2];
! 780: new = new | INSERT(a1, 2) | INSERT(a2, 3);
! 781: a1 = EXTRACT(a, 4) ^ r;
! 782: a2 = EXTRACT(a, 5) ^ r;
! 783: a1 = q[a1];
! 784: a2 = q[a2];
! 785: new = new | INSERT(a1, 4) | INSERT(a2, 5);
! 786: a1 = EXTRACT(a, 5) ^ r;
! 787: a2 = EXTRACT(a, 6) ^ r;
! 788: a1 = q[a1];
! 789: a2 = q[a2];
! 790: new = new | INSERT(a1, 5) | INSERT(a2, 6);
! 791: #if RF_LONGSHIFT > 2
! 792: a1 = EXTRACT(a, 7) ^ r;
! 793: a2 = EXTRACT(a, 8) ^ r;
! 794: a1 = q[a1];
! 795: a2 = q[a2];
! 796: new = new | INSERT(a1, 7) | INSERT(a2, 8);
! 797: a1 = EXTRACT(a, 9) ^ r;
! 798: a2 = EXTRACT(a, 10) ^ r;
! 799: a1 = q[a1];
! 800: a2 = q[a2];
! 801: new = new | INSERT(a1, 9) | INSERT(a2, 10);
! 802: a1 = EXTRACT(a, 11) ^ r;
! 803: a2 = EXTRACT(a, 12) ^ r;
! 804: a1 = q[a1];
! 805: a2 = q[a2];
! 806: new = new | INSERT(a1, 11) | INSERT(a2, 12);
! 807: #endif /* RF_LONGSHIFT > 2 */
! 808: d ^= new;
! 809: *dest++ = d;
! 810: length--;
! 811: }
! 812: }
! 813:
! 814:
! 815: /*
! 816: * Compute.
! 817: *
! 818: * dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new)]
! 819: *
! 820: * On a five bit basis.
! 821: * Optimization: compute old ^ new on 64 bit basis.
! 822: *
! 823: * Length in bytes.
! 824: */
! 825:
! 826: void
! 827: rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
! 828: unsigned char coeff)
! 829: {
! 830: unsigned long a, d, new;
! 831: unsigned long a1, a2;
! 832: unsigned int *q = &(rf_qfor[28 - coeff][0]);
! 833: unsigned int r = rf_rn[coeff + 1];
! 834:
! 835: r = a1 = a2 = new = d = a = 0; /* XXX For now... */
! 836: q = NULL; /* XXX For now */
! 837:
! 838: #ifdef _KERNEL
! 839: /*
! 840: * PQ in kernel currently not supported because the encoding/decoding
! 841: * table is not present.
! 842: */
! 843: bzero(dest, length);
! 844: #else /* _KERNEL */
! 845: /* This code probably doesn't work and should be rewritten. -wvcii */
! 846: /* 13 5 bit quants in a 64 bit word. */
! 847: length /= 8;
! 848: while (length) {
! 849: a = *obuf++; /*
! 850: * XXX Need to reorg to avoid cache conflicts.
! 851: */
! 852: a ^= *nbuf++;
! 853: d = *dest;
! 854: a1 = EXTRACT(a, 0) ^ r;
! 855: a2 = EXTRACT(a, 1) ^ r;
! 856: a1 = q[a1];
! 857: a2 = q[a2];
! 858: new = INSERT(a2, 1) | a1;
! 859: a1 = EXTRACT(a, 2) ^ r;
! 860: a2 = EXTRACT(a, 3) ^ r;
! 861: a1 = q[a1];
! 862: a2 = q[a2];
! 863: new = new | INSERT(a1, 2) | INSERT(a2, 3);
! 864: a1 = EXTRACT(a, 4) ^ r;
! 865: a2 = EXTRACT(a, 5) ^ r;
! 866: a1 = q[a1];
! 867: a2 = q[a2];
! 868: new = new | INSERT(a1, 4) | INSERT(a2, 5);
! 869: a1 = EXTRACT(a, 5) ^ r;
! 870: a2 = EXTRACT(a, 6) ^ r;
! 871: a1 = q[a1];
! 872: a2 = q[a2];
! 873: new = new | INSERT(a1, 5) | INSERT(a2, 6);
! 874: #if RF_LONGSHIFT > 2
! 875: a1 = EXTRACT(a, 7) ^ r;
! 876: a2 = EXTRACT(a, 8) ^ r;
! 877: a1 = q[a1];
! 878: a2 = q[a2];
! 879: new = new | INSERT(a1, 7) | INSERT(a2, 8);
! 880: a1 = EXTRACT(a, 9) ^ r;
! 881: a2 = EXTRACT(a, 10) ^ r;
! 882: a1 = q[a1];
! 883: a2 = q[a2];
! 884: new = new | INSERT(a1, 9) | INSERT(a2, 10);
! 885: a1 = EXTRACT(a, 11) ^ r;
! 886: a2 = EXTRACT(a, 12) ^ r;
! 887: a1 = q[a1];
! 888: a2 = q[a2];
! 889: new = new | INSERT(a1, 11) | INSERT(a2, 12);
! 890: #endif /* RF_LONGSHIFT > 2 */
! 891: d ^= new;
! 892: *dest++ = d;
! 893: length--;
! 894: }
! 895: #endif /* _KERNEL */
! 896: }
! 897:
! 898:
! 899: /*
! 900: * Recover columns a and b from the given p and q into
! 901: * bufs abuf and bbuf. All bufs are word aligned.
! 902: * Length is in bytes.
! 903: */
! 904:
! 905: /*
! 906: * XXX
! 907: *
! 908: * Everything about this seems wrong.
! 909: */
! 910:
! 911: void
! 912: rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf,
! 913: unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b)
! 914: {
! 915: unsigned long p, q, a, a0, a1;
! 916: int col = (29 * coeff_a) + coeff_b;
! 917: unsigned char *q0 = &(rf_qinv[col][0]);
! 918:
! 919: length /= 8;
! 920: while (length) {
! 921: p = *pbuf++;
! 922: q = *qbuf++;
! 923: a0 = EXTRACT(p, 0);
! 924: a1 = EXTRACT(q, 0);
! 925: a = q0[a0 << 5 | a1];
! 926:
! 927: #define MF(i) \
! 928: do { \
! 929: a0 = EXTRACT(p, i); \
! 930: a1 = EXTRACT(q, i); \
! 931: a = a | INSERT(q0[a0<<5 | a1], i); \
! 932: } while (0)
! 933:
! 934: MF(1);
! 935: MF(2);
! 936: MF(3);
! 937: MF(4);
! 938: MF(5);
! 939: MF(6);
! 940: #if 0
! 941: MF(7);
! 942: MF(8);
! 943: MF(9);
! 944: MF(10);
! 945: MF(11);
! 946: MF(12);
! 947: #endif /* 0 */
! 948: *abuf++ = a;
! 949: *bbuf++ = a ^ p;
! 950: length--;
! 951: }
! 952: }
! 953:
! 954:
! 955: /*
! 956: * Lost parity and a data column. Recover that data column.
! 957: * Assume col coeff is lost. Let q the contents of Q after
! 958: * all surviving data columns have been q-xored out of it.
! 959: * Then we have the equation
! 960: *
! 961: * q[28-coeff][a_i ^ r_i+1] = q
! 962: *
! 963: * but q is cyclic with period 31.
! 964: * So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
! 965: * q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
! 966: *
! 967: * so a_i = r_{coeff+1} ^ q[3+coeff][q]
! 968: *
! 969: * The routine is passed q buffer and the buffer
! 970: * the data is to be recoverd into. They can be the same.
! 971: */
! 972:
! 973: void
! 974: rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
! 975: unsigned coeff)
! 976: {
! 977: unsigned long a, new;
! 978: unsigned long a1, a2;
! 979: unsigned int *q = &(rf_qfor[3 + coeff][0]);
! 980: unsigned r = rf_rn[coeff + 1];
! 981:
! 982: /* 13 5 bit quants in a 64 bit word. */
! 983: length /= 8;
! 984: while (length) {
! 985: a = *qbuf++;
! 986: a1 = EXTRACT(a, 0);
! 987: a2 = EXTRACT(a, 1);
! 988: a1 = r ^ q[a1];
! 989: a2 = r ^ q[a2];
! 990: new = INSERT(a2, 1) | a1;
! 991:
! 992: #define M(i,j) \
! 993: do { \
! 994: a1 = EXTRACT(a, i); \
! 995: a2 = EXTRACT(a, j); \
! 996: a1 = r ^ q[a1]; \
! 997: a2 = r ^ q[a2]; \
! 998: new = new | INSERT(a1, i) | INSERT(a2, j); \
! 999: } while (0)
! 1000:
! 1001: M(2, 3);
! 1002: M(4, 5);
! 1003: M(5, 6);
! 1004: #if RF_LONGSHIFT > 2
! 1005: M(7, 8);
! 1006: M(9, 10);
! 1007: M(11, 12);
! 1008: #endif /* RF_LONGSHIFT > 2 */
! 1009: *abuf++ = new;
! 1010: length--;
! 1011: }
! 1012: }
! 1013: #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */
CVSweb