Annotation of sys/dev/raidframe/rf_dagfuncs.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_dagfuncs.c,v 1.7 2004/09/20 17:51:07 miod Exp $ */
! 2: /* $NetBSD: rf_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995 Carnegie-Mellon University.
! 6: * All rights reserved.
! 7: *
! 8: * Author: Mark Holland, William V. Courtright II
! 9: *
! 10: * Permission to use, copy, modify and distribute this software and
! 11: * its documentation is hereby granted, provided that both the copyright
! 12: * notice and this permission notice appear in all copies of the
! 13: * software, derivative works or modified versions, and any portions
! 14: * thereof, and that both notices appear in supporting documentation.
! 15: *
! 16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 19: *
! 20: * Carnegie Mellon requests users of this software to return to
! 21: *
! 22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 23: * School of Computer Science
! 24: * Carnegie Mellon University
! 25: * Pittsburgh PA 15213-3890
! 26: *
! 27: * any improvements or extensions that they make and grant Carnegie the
! 28: * rights to redistribute these changes.
! 29: */
! 30:
! 31: /*
! 32: * dagfuncs.c -- DAG node execution routines.
! 33: *
! 34: * Rules:
! 35: * 1. Every DAG execution function must eventually cause node->status to
! 36: * get set to "good" or "bad", and "FinishNode" to be called. In the
! 37: * case of nodes that complete immediately (xor, NullNodeFunc, etc),
! 38: * the node execution function can do these two things directly. In
! 39: * the case of nodes that have to wait for some event (a disk read to
! 40: * complete, a lock to be released, etc) to occur before they can
! 41: * complete, this is typically achieved by having whatever module
! 42: * is doing the operation call GenericWakeupFunc upon completion.
! 43: * 2. DAG execution functions should check the status in the DAG header
! 44: * and NOP out their operations if the status is not "enable". However,
! 45: * execution functions that release resources must be sure to release
! 46: * them even when they NOP out the function that would use them.
! 47: * Functions that acquire resources should go ahead and acquire them
! 48: * even when they NOP, so that a downstream release node will not have
! 49: * to check to find out whether or not the acquire was suppressed.
! 50: */
! 51:
! 52: #include <sys/ioctl.h>
! 53: #include <sys/param.h>
! 54:
! 55: #include "rf_archs.h"
! 56: #include "rf_raid.h"
! 57: #include "rf_dag.h"
! 58: #include "rf_layout.h"
! 59: #include "rf_etimer.h"
! 60: #include "rf_acctrace.h"
! 61: #include "rf_diskqueue.h"
! 62: #include "rf_dagfuncs.h"
! 63: #include "rf_general.h"
! 64: #include "rf_engine.h"
! 65: #include "rf_dagutils.h"
! 66:
! 67: #include "rf_kintf.h"
! 68:
! 69: #if RF_INCLUDE_PARITYLOGGING > 0
! 70: #include "rf_paritylog.h"
! 71: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
! 72:
! 73: int (*rf_DiskReadFunc) (RF_DagNode_t *);
! 74: int (*rf_DiskWriteFunc) (RF_DagNode_t *);
! 75: int (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
! 76: int (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
! 77: int (*rf_DiskUnlockFunc) (RF_DagNode_t *);
! 78: int (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
! 79: int (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
! 80: int (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
! 81: int (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
! 82:
! 83: /*****************************************************************************
! 84: * Main (only) configuration routine for this module.
! 85: *****************************************************************************/
! 86: int
! 87: rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp)
! 88: {
! 89: RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) ||
! 90: ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
! 91: rf_DiskReadFunc = rf_DiskReadFuncForThreads;
! 92: rf_DiskReadUndoFunc = rf_DiskUndoFunc;
! 93: rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
! 94: rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
! 95: rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads;
! 96: rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc;
! 97: rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
! 98: rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
! 99: rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
! 100: return (0);
! 101: }
! 102:
! 103:
! 104: /*****************************************************************************
! 105: * The execution function associated with a terminate node.
! 106: *****************************************************************************/
! 107: int
! 108: rf_TerminateFunc(RF_DagNode_t *node)
! 109: {
! 110: RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
! 111: node->status = rf_good;
! 112: return (rf_FinishNode(node, RF_THREAD_CONTEXT));
! 113: }
! 114:
! 115: int
! 116: rf_TerminateUndoFunc(RF_DagNode_t *node)
! 117: {
! 118: return (0);
! 119: }
! 120:
! 121:
! 122: /*****************************************************************************
! 123: * Execution functions associated with a mirror node.
! 124: *
! 125: * parameters:
! 126: *
! 127: * 0 - Physical disk address of data.
! 128: * 1 - Buffer for holding read data.
! 129: * 2 - Parity stripe ID.
! 130: * 3 - Flags.
! 131: * 4 - Physical disk address of mirror (parity).
! 132: *
! 133: *****************************************************************************/
! 134:
! 135: int
! 136: rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node)
! 137: {
! 138: /*
! 139: * Select the mirror copy with the shortest queue and fill in node
! 140: * parameters with physical disk address.
! 141: */
! 142:
! 143: rf_SelectMirrorDiskIdle(node);
! 144: return (rf_DiskReadFunc(node));
! 145: }
! 146:
! 147: int
! 148: rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node)
! 149: {
! 150: /*
! 151: * Select the mirror copy with the shortest queue and fill in node
! 152: * parameters with physical disk address.
! 153: */
! 154:
! 155: rf_SelectMirrorDiskPartition(node);
! 156: return (rf_DiskReadFunc(node));
! 157: }
! 158:
! 159: int
! 160: rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node)
! 161: {
! 162: return (0);
! 163: }
! 164:
! 165:
! 166:
! 167: #if RF_INCLUDE_PARITYLOGGING > 0
! 168: /*****************************************************************************
! 169: * The execution function associated with a parity log update node.
! 170: *****************************************************************************/
! 171: int
! 172: rf_ParityLogUpdateFunc(RF_DagNode_t *node)
! 173: {
! 174: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 175: caddr_t buf = (caddr_t) node->params[1].p;
! 176: RF_ParityLogData_t *logData;
! 177: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 178: RF_Etimer_t timer;
! 179:
! 180: if (node->dagHdr->status == rf_enable) {
! 181: RF_ETIMER_START(timer);
! 182: logData = rf_CreateParityLogData(RF_UPDATE, pda, buf,
! 183: (RF_Raid_t *) (node->dagHdr->raidPtr),
! 184: node->wakeFunc, (void *) node,
! 185: node->dagHdr->tracerec, timer);
! 186: if (logData)
! 187: rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
! 188: else {
! 189: RF_ETIMER_STOP(timer);
! 190: RF_ETIMER_EVAL(timer);
! 191: tracerec->plog_us += RF_ETIMER_VAL_US(timer);
! 192: (node->wakeFunc) (node, ENOMEM);
! 193: }
! 194: }
! 195: return (0);
! 196: }
! 197:
! 198:
! 199: /*****************************************************************************
! 200: * The execution function associated with a parity log overwrite node.
! 201: *****************************************************************************/
! 202: int
! 203: rf_ParityLogOverwriteFunc(RF_DagNode_t *node)
! 204: {
! 205: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 206: caddr_t buf = (caddr_t) node->params[1].p;
! 207: RF_ParityLogData_t *logData;
! 208: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 209: RF_Etimer_t timer;
! 210:
! 211: if (node->dagHdr->status == rf_enable) {
! 212: RF_ETIMER_START(timer);
! 213: logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf,
! 214: (RF_Raid_t *) (node->dagHdr->raidPtr), node->wakeFunc,
! 215: (void *) node, node->dagHdr->tracerec, timer);
! 216: if (logData)
! 217: rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
! 218: else {
! 219: RF_ETIMER_STOP(timer);
! 220: RF_ETIMER_EVAL(timer);
! 221: tracerec->plog_us += RF_ETIMER_VAL_US(timer);
! 222: (node->wakeFunc) (node, ENOMEM);
! 223: }
! 224: }
! 225: return (0);
! 226: }
! 227: #else /* RF_INCLUDE_PARITYLOGGING > 0 */
! 228:
! 229: int
! 230: rf_ParityLogUpdateFunc(RF_DagNode_t *node)
! 231: {
! 232: return (0);
! 233: }
! 234:
! 235: int
! 236: rf_ParityLogOverwriteFunc(RF_DagNode_t *node)
! 237: {
! 238: return (0);
! 239: }
! 240: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
! 241:
! 242: int
! 243: rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node)
! 244: {
! 245: return (0);
! 246: }
! 247:
! 248: int
! 249: rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node)
! 250: {
! 251: return (0);
! 252: }
! 253:
! 254: /*****************************************************************************
! 255: * The execution function associated with a NOP node.
! 256: *****************************************************************************/
! 257: int
! 258: rf_NullNodeFunc(RF_DagNode_t *node)
! 259: {
! 260: node->status = rf_good;
! 261: return (rf_FinishNode(node, RF_THREAD_CONTEXT));
! 262: }
! 263:
! 264: int
! 265: rf_NullNodeUndoFunc(RF_DagNode_t *node)
! 266: {
! 267: node->status = rf_undone;
! 268: return (rf_FinishNode(node, RF_THREAD_CONTEXT));
! 269: }
! 270:
! 271:
! 272: /*****************************************************************************
! 273: * The execution function associated with a disk-read node.
! 274: *****************************************************************************/
! 275: int
! 276: rf_DiskReadFuncForThreads(RF_DagNode_t *node)
! 277: {
! 278: RF_DiskQueueData_t *req;
! 279: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 280: caddr_t buf = (caddr_t) node->params[1].p;
! 281: RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
! 282: unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
! 283: unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
! 284: unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
! 285: unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
! 286: RF_DiskQueueDataFlags_t flags = 0;
! 287: RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ?
! 288: RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
! 289: RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
! 290: void *b_proc = NULL;
! 291:
! 292: if (node->dagHdr->bp)
! 293: b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
! 294:
! 295: RF_ASSERT(!(lock && unlock));
! 296: flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
! 297: flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
! 298:
! 299: req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
! 300: buf, parityStripeID, which_ru,
! 301: (int (*) (void *, int)) node->wakeFunc,
! 302: node, NULL, node->dagHdr->tracerec,
! 303: (void *) (node->dagHdr->raidPtr), flags, b_proc);
! 304: if (!req) {
! 305: (node->wakeFunc) (node, ENOMEM);
! 306: } else {
! 307: node->dagFuncData = (void *) req;
! 308: rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
! 309: }
! 310: return (0);
! 311: }
! 312:
! 313:
! 314: /*****************************************************************************
! 315: * the execution function associated with a disk-write node
! 316: *****************************************************************************/
! 317: int
! 318: rf_DiskWriteFuncForThreads(RF_DagNode_t *node)
! 319: {
! 320: RF_DiskQueueData_t *req;
! 321: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 322: caddr_t buf = (caddr_t) node->params[1].p;
! 323: RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
! 324: unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
! 325: unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
! 326: unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
! 327: unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
! 328: RF_DiskQueueDataFlags_t flags = 0;
! 329: RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ?
! 330: RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
! 331: RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
! 332: void *b_proc = NULL;
! 333:
! 334: if (node->dagHdr->bp)
! 335: b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
! 336:
! 337: /* Normal processing (rollaway or forward recovery) begins here. */
! 338: RF_ASSERT(!(lock && unlock));
! 339: flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
! 340: flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
! 341: req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
! 342: buf, parityStripeID, which_ru,
! 343: (int (*) (void *, int)) node->wakeFunc, (void *) node, NULL,
! 344: node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
! 345: flags, b_proc);
! 346:
! 347: if (!req) {
! 348: (node->wakeFunc) (node, ENOMEM);
! 349: } else {
! 350: node->dagFuncData = (void *) req;
! 351: rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
! 352: }
! 353:
! 354: return (0);
! 355: }
! 356: /*****************************************************************************
! 357: * The undo function for disk nodes.
! 358: * Note: This is not a proper undo of a write node, only locks are released.
! 359: * old data is not restored to disk !
! 360: *****************************************************************************/
! 361: int
! 362: rf_DiskUndoFunc(RF_DagNode_t *node)
! 363: {
! 364: RF_DiskQueueData_t *req;
! 365: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 366: RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
! 367:
! 368: req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0,
! 369: (int (*) (void *, int)) node->wakeFunc, (void *) node,
! 370: NULL, node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
! 371: RF_UNLOCK_DISK_QUEUE, NULL);
! 372: if (!req)
! 373: (node->wakeFunc) (node, ENOMEM);
! 374: else {
! 375: node->dagFuncData = (void *) req;
! 376: rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req,
! 377: RF_IO_NORMAL_PRIORITY);
! 378: }
! 379:
! 380: return (0);
! 381: }
! 382:
! 383: /*****************************************************************************
! 384: * The execution function associated with an "unlock disk queue" node.
! 385: *****************************************************************************/
! 386: int
! 387: rf_DiskUnlockFuncForThreads(RF_DagNode_t *node)
! 388: {
! 389: RF_DiskQueueData_t *req;
! 390: RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
! 391: RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
! 392:
! 393: req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0,
! 394: (int (*) (void *, int)) node->wakeFunc, (void *) node,
! 395: NULL, node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
! 396: RF_UNLOCK_DISK_QUEUE, NULL);
! 397: if (!req)
! 398: (node->wakeFunc) (node, ENOMEM);
! 399: else {
! 400: node->dagFuncData = (void *) req;
! 401: rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req,
! 402: RF_IO_NORMAL_PRIORITY);
! 403: }
! 404:
! 405: return (0);
! 406: }
! 407:
! 408: /*****************************************************************************
! 409: * Callback routine for DiskRead and DiskWrite nodes. When the disk op
! 410: * completes, the routine is called to set the node status and inform
! 411: * the execution engine that the node has fired.
! 412: *****************************************************************************/
! 413: int
! 414: rf_GenericWakeupFunc(RF_DagNode_t *node, int status)
! 415: {
! 416: switch (node->status) {
! 417: case rf_bwd1:
! 418: node->status = rf_bwd2;
! 419: if (node->dagFuncData)
! 420: rf_FreeDiskQueueData((RF_DiskQueueData_t *)
! 421: node->dagFuncData);
! 422: return (rf_DiskWriteFuncForThreads(node));
! 423: break;
! 424: case rf_fired:
! 425: if (status)
! 426: node->status = rf_bad;
! 427: else
! 428: node->status = rf_good;
! 429: break;
! 430: case rf_recover:
! 431: /* Probably should never reach this case. */
! 432: if (status)
! 433: node->status = rf_panic;
! 434: else
! 435: node->status = rf_undone;
! 436: break;
! 437: default:
! 438: printf("rf_GenericWakeupFunc:");
! 439: printf("node->status is %d,", node->status);
! 440: printf("status is %d \n", status);
! 441: RF_PANIC();
! 442: break;
! 443: }
! 444: if (node->dagFuncData)
! 445: rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
! 446: return (rf_FinishNode(node, RF_INTR_CONTEXT));
! 447: }
! 448:
! 449:
! 450: /*****************************************************************************
! 451: * There are three distinct types of xor nodes.
! 452: *
! 453: * A "regular xor" is used in the fault-free case where the access spans
! 454: * a complete stripe unit. It assumes that the result buffer is one full
! 455: * stripe unit in size, and uses the stripe-unit-offset values that it
! 456: * computes from the PDAs to determine where within the stripe unit to
! 457: * XOR each argument buffer.
! 458: *
! 459: * A "simple xor" is used in the fault-free case where the access touches
! 460: * only a portion of one (or two, in some cases) stripe unit(s). It assumes
! 461: * that all the argument buffers are of the same size and have the same
! 462: * stripe unit offset.
! 463: *
! 464: * A "recovery xor" is used in the degraded-mode case. It's similar to
! 465: * the regular xor function except that it takes the failed PDA as an
! 466: * additional parameter, and uses it to determine what portions of the
! 467: * argument buffers need to be xor'd into the result buffer, and where
! 468: * in the result buffer they should go.
! 469: *****************************************************************************/
! 470:
! 471: /*
! 472: * Xor the params together and store the result in the result field.
! 473: * Assume the result field points to a buffer that is the size of one SU,
! 474: * and use the pda params to determine where within the buffer to XOR
! 475: * the input buffers.
! 476: */
! 477: int
! 478: rf_RegularXorFunc(RF_DagNode_t *node)
! 479: {
! 480: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 481: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 482: RF_Etimer_t timer;
! 483: int i, retcode;
! 484:
! 485: retcode = 0;
! 486: if (node->dagHdr->status == rf_enable) {
! 487: /* Don't do the XOR if the input is the same as the output. */
! 488: RF_ETIMER_START(timer);
! 489: for (i = 0; i < node->numParams - 1; i += 2)
! 490: if (node->params[i + 1].p != node->results[0]) {
! 491: retcode = rf_XorIntoBuffer(raidPtr,
! 492: (RF_PhysDiskAddr_t *) node->params[i].p,
! 493: (char *) node->params[i + 1].p,
! 494: (char *) node->results[0],
! 495: node->dagHdr->bp);
! 496: }
! 497: RF_ETIMER_STOP(timer);
! 498: RF_ETIMER_EVAL(timer);
! 499: tracerec->xor_us += RF_ETIMER_VAL_US(timer);
! 500: }
! 501: /* Call wake func explicitly since no I/O in this node. */
! 502: return (rf_GenericWakeupFunc(node, retcode));
! 503: }
! 504:
! 505: /* Xor the inputs into the result buffer, ignoring placement issues. */
! 506: int
! 507: rf_SimpleXorFunc(RF_DagNode_t *node)
! 508: {
! 509: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 510: int i, retcode = 0;
! 511: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 512: RF_Etimer_t timer;
! 513:
! 514: if (node->dagHdr->status == rf_enable) {
! 515: RF_ETIMER_START(timer);
! 516: /* Don't do the XOR if the input is the same as the output. */
! 517: for (i = 0; i < node->numParams - 1; i += 2)
! 518: if (node->params[i + 1].p != node->results[0]) {
! 519: retcode = rf_bxor((char *)
! 520: node->params[i + 1].p,
! 521: (char *) node->results[0],
! 522: rf_RaidAddressToByte(raidPtr,
! 523: ((RF_PhysDiskAddr_t *)
! 524: node->params[i].p)->numSector),
! 525: (struct buf *) node->dagHdr->bp);
! 526: }
! 527: RF_ETIMER_STOP(timer);
! 528: RF_ETIMER_EVAL(timer);
! 529: tracerec->xor_us += RF_ETIMER_VAL_US(timer);
! 530: }
! 531: /* Call wake func explicitly since no I/O in this node. */
! 532: return (rf_GenericWakeupFunc(node, retcode));
! 533: }
! 534:
! 535: /*
! 536: * This xor is used by the degraded-mode dag functions to recover lost data.
! 537: * The second-to-last parameter is the PDA for the failed portion of the access.
! 538: * The code here looks at this PDA and assumes that the xor target buffer is
! 539: * equal in size to the number of sectors in the failed PDA. It then uses
! 540: * the other PDAs in the parameter list to determine where within the target
! 541: * buffer the corresponding data should be xored.
! 542: */
! 543: int
! 544: rf_RecoveryXorFunc(RF_DagNode_t *node)
! 545: {
! 546: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
! 547: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
! 548: RF_PhysDiskAddr_t *failedPDA =
! 549: (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
! 550: int i, retcode = 0;
! 551: RF_PhysDiskAddr_t *pda;
! 552: int suoffset, failedSUOffset =
! 553: rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
! 554: char *srcbuf, *destbuf;
! 555: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
! 556: RF_Etimer_t timer;
! 557:
! 558: if (node->dagHdr->status == rf_enable) {
! 559: RF_ETIMER_START(timer);
! 560: for (i = 0; i < node->numParams - 2; i += 2)
! 561: if (node->params[i + 1].p != node->results[0]) {
! 562: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
! 563: srcbuf = (char *) node->params[i + 1].p;
! 564: suoffset = rf_StripeUnitOffset(layoutPtr,
! 565: pda->startSector);
! 566: destbuf = ((char *) node->results[0]) +
! 567: rf_RaidAddressToByte(raidPtr,
! 568: suoffset - failedSUOffset);
! 569: retcode = rf_bxor(srcbuf, destbuf,
! 570: rf_RaidAddressToByte(raidPtr,
! 571: pda->numSector), node->dagHdr->bp);
! 572: }
! 573: RF_ETIMER_STOP(timer);
! 574: RF_ETIMER_EVAL(timer);
! 575: tracerec->xor_us += RF_ETIMER_VAL_US(timer);
! 576: }
! 577: return (rf_GenericWakeupFunc(node, retcode));
! 578: }
! 579:
! 580:
! 581: /*****************************************************************************
! 582: * The next three functions are utilities used by the above xor-execution
! 583: * functions.
! 584: *****************************************************************************/
! 585:
! 586: /*
! 587: * This is just a glorified buffer xor. Targbuf points to a buffer that is
! 588: * one full stripe unit in size. srcbuf points to a buffer that may be less
! 589: * than 1 SU, but never more. When the access described by pda is one SU in
! 590: * size (which by implication means it's SU-aligned), all that happens is
! 591: * (targbuf) <- (srcbuf ^ targbuf). When the access is less than one SU in
! 592: * size the XOR occurs on only the portion of targbuf identified in the pda.
! 593: */
! 594:
! 595: int
! 596: rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, char *srcbuf,
! 597: char *targbuf, void *bp)
! 598: {
! 599: char *targptr;
! 600: int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
! 601: int SUOffset = pda->startSector % sectPerSU;
! 602: int length, retcode = 0;
! 603:
! 604: RF_ASSERT(pda->numSector <= sectPerSU);
! 605:
! 606: targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
! 607: length = rf_RaidAddressToByte(raidPtr, pda->numSector);
! 608: retcode = rf_bxor(srcbuf, targptr, length, bp);
! 609: return (retcode);
! 610: }
! 611:
! 612: /*
! 613: * It really should be the case that the buffer pointers (returned by malloc)
! 614: * are aligned to the natural word size of the machine, so this is the only
! 615: * case we optimize for. The length should always be a multiple of the sector
! 616: * size, so there should be no problem with leftover bytes at the end.
! 617: */
! 618: int
! 619: rf_bxor(char *src, char *dest, int len, void *bp)
! 620: {
! 621: unsigned mask = sizeof(long) - 1, retcode = 0;
! 622:
! 623: if (!(((unsigned long) src) & mask) &&
! 624: !(((unsigned long) dest) & mask) && !(len & mask)) {
! 625: retcode = rf_longword_bxor((unsigned long *) src,
! 626: (unsigned long *) dest, len >> RF_LONGSHIFT, bp);
! 627: } else {
! 628: RF_ASSERT(0);
! 629: }
! 630: return (retcode);
! 631: }
! 632:
! 633: /* Map a user buffer into kernel space, if necessary. */
! 634: #define REMAP_VA(_bp,x,y) (y) = (x)
! 635:
! 636: /*
! 637: * When XORing in kernel mode, we need to map each user page to kernel
! 638: * space before we can access it.
! 639: * We don't want to assume anything about which input buffers are in
! 640: * kernel/user space, nor about their alignment, so in each loop we
! 641: * compute the maximum number of bytes that we can xor without crossing
! 642: * any page boundaries, and do only this many bytes before the next remap.
! 643: */
! 644: int
! 645: rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp)
! 646: {
! 647: unsigned long *end = src + len; /* len in longwords. */
! 648: unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */
! 649: unsigned long *pg_src, *pg_dest; /* Per-page source/dest pointers. */
! 650: int longs_this_time; /* # longwords to xor in the current iteration. */
! 651:
! 652: REMAP_VA(bp, src, pg_src);
! 653: REMAP_VA(bp, dest, pg_dest);
! 654: if (!pg_src || !pg_dest)
! 655: return (EFAULT);
! 656:
! 657: while (len >= 4) {
! 658: longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src),
! 659: RF_BLIP(pg_dest)) >> RF_LONGSHIFT);
! 660: src += longs_this_time;
! 661: dest += longs_this_time;
! 662: len -= longs_this_time;
! 663: while (longs_this_time >= 4) {
! 664: d0 = pg_dest[0];
! 665: d1 = pg_dest[1];
! 666: d2 = pg_dest[2];
! 667: d3 = pg_dest[3];
! 668: s0 = pg_src[0];
! 669: s1 = pg_src[1];
! 670: s2 = pg_src[2];
! 671: s3 = pg_src[3];
! 672: pg_dest[0] = d0 ^ s0;
! 673: pg_dest[1] = d1 ^ s1;
! 674: pg_dest[2] = d2 ^ s2;
! 675: pg_dest[3] = d3 ^ s3;
! 676: pg_src += 4;
! 677: pg_dest += 4;
! 678: longs_this_time -= 4;
! 679: }
! 680: while (longs_this_time > 0) {
! 681: /* Cannot cross any page boundaries here. */
! 682: *pg_dest++ ^= *pg_src++;
! 683: longs_this_time--;
! 684: }
! 685:
! 686: /*
! 687: * Either we're done, or we've reached a page boundary on one
! 688: * (or possibly both) of the pointers.
! 689: */
! 690: if (len) {
! 691: if (RF_PAGE_ALIGNED(src))
! 692: REMAP_VA(bp, src, pg_src);
! 693: if (RF_PAGE_ALIGNED(dest))
! 694: REMAP_VA(bp, dest, pg_dest);
! 695: if (!pg_src || !pg_dest)
! 696: return (EFAULT);
! 697: }
! 698: }
! 699: while (src < end) {
! 700: *pg_dest++ ^= *pg_src++;
! 701: src++;
! 702: dest++;
! 703: len--;
! 704: if (RF_PAGE_ALIGNED(src))
! 705: REMAP_VA(bp, src, pg_src);
! 706: if (RF_PAGE_ALIGNED(dest))
! 707: REMAP_VA(bp, dest, pg_dest);
! 708: }
! 709: RF_ASSERT(len == 0);
! 710: return (0);
! 711: }
! 712:
! 713:
! 714: /*
! 715: * dst = a ^ b ^ c;
! 716: * a may equal dst
! 717: * see comment above longword_bxor
! 718: */
! 719: int
! 720: rf_longword_bxor3(unsigned long *dst, unsigned long *a, unsigned long *b,
! 721: unsigned long *c, int len, void *bp)
! 722: {
! 723: unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
! 724: /* Per-page source/dest pointers. */
! 725: unsigned long *pg_a, *pg_b, *pg_c, *pg_dst;
! 726: int longs_this_time; /* # longs to xor in the current iteration */
! 727: char dst_is_a = 0;
! 728:
! 729: /* Note: The length (len) is in longwords. */
! 730:
! 731: REMAP_VA(bp, a, pg_a);
! 732: REMAP_VA(bp, b, pg_b);
! 733: REMAP_VA(bp, c, pg_c);
! 734: if (a == dst) {
! 735: pg_dst = pg_a;
! 736: dst_is_a = 1;
! 737: } else {
! 738: REMAP_VA(bp, dst, pg_dst);
! 739: }
! 740:
! 741: /* Align dest to cache line. Can't cross a pg boundary on dst here. */
! 742: while ((((unsigned long) pg_dst) & 0x1f)) {
! 743: *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
! 744: dst++;
! 745: a++;
! 746: b++;
! 747: c++;
! 748: if (RF_PAGE_ALIGNED(a)) {
! 749: REMAP_VA(bp, a, pg_a);
! 750: if (!pg_a)
! 751: return (EFAULT);
! 752: }
! 753: if (RF_PAGE_ALIGNED(b)) {
! 754: REMAP_VA(bp, a, pg_b);
! 755: if (!pg_b)
! 756: return (EFAULT);
! 757: }
! 758: if (RF_PAGE_ALIGNED(c)) {
! 759: REMAP_VA(bp, a, pg_c);
! 760: if (!pg_c)
! 761: return (EFAULT);
! 762: }
! 763: len--;
! 764: }
! 765:
! 766: while (len > 4) {
! 767: longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a),
! 768: RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >>
! 769: RF_LONGSHIFT);
! 770: a += longs_this_time;
! 771: b += longs_this_time;
! 772: c += longs_this_time;
! 773: dst += longs_this_time;
! 774: len -= longs_this_time;
! 775: while (longs_this_time >= 4) {
! 776: a0 = pg_a[0];
! 777: longs_this_time -= 4;
! 778:
! 779: a1 = pg_a[1];
! 780: a2 = pg_a[2];
! 781:
! 782: a3 = pg_a[3];
! 783: pg_a += 4;
! 784:
! 785: b0 = pg_b[0];
! 786: b1 = pg_b[1];
! 787:
! 788: b2 = pg_b[2];
! 789: b3 = pg_b[3];
! 790: /* Start dual issue. */
! 791: a0 ^= b0;
! 792: b0 = pg_c[0];
! 793:
! 794: pg_b += 4;
! 795: a1 ^= b1;
! 796:
! 797: a2 ^= b2;
! 798: a3 ^= b3;
! 799:
! 800: b1 = pg_c[1];
! 801: a0 ^= b0;
! 802:
! 803: b2 = pg_c[2];
! 804: a1 ^= b1;
! 805:
! 806: b3 = pg_c[3];
! 807: a2 ^= b2;
! 808:
! 809: pg_dst[0] = a0;
! 810: a3 ^= b3;
! 811: pg_dst[1] = a1;
! 812: pg_c += 4;
! 813: pg_dst[2] = a2;
! 814: pg_dst[3] = a3;
! 815: pg_dst += 4;
! 816: }
! 817: while (longs_this_time > 0) {
! 818: /* Cannot cross any page boundaries here. */
! 819: *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
! 820: longs_this_time--;
! 821: }
! 822:
! 823: if (len) {
! 824: if (RF_PAGE_ALIGNED(a)) {
! 825: REMAP_VA(bp, a, pg_a);
! 826: if (!pg_a)
! 827: return (EFAULT);
! 828: if (dst_is_a)
! 829: pg_dst = pg_a;
! 830: }
! 831: if (RF_PAGE_ALIGNED(b)) {
! 832: REMAP_VA(bp, b, pg_b);
! 833: if (!pg_b)
! 834: return (EFAULT);
! 835: }
! 836: if (RF_PAGE_ALIGNED(c)) {
! 837: REMAP_VA(bp, c, pg_c);
! 838: if (!pg_c)
! 839: return (EFAULT);
! 840: }
! 841: if (!dst_is_a)
! 842: if (RF_PAGE_ALIGNED(dst)) {
! 843: REMAP_VA(bp, dst, pg_dst);
! 844: if (!pg_dst)
! 845: return (EFAULT);
! 846: }
! 847: }
! 848: }
! 849: while (len) {
! 850: *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
! 851: dst++;
! 852: a++;
! 853: b++;
! 854: c++;
! 855: if (RF_PAGE_ALIGNED(a)) {
! 856: REMAP_VA(bp, a, pg_a);
! 857: if (!pg_a)
! 858: return (EFAULT);
! 859: if (dst_is_a)
! 860: pg_dst = pg_a;
! 861: }
! 862: if (RF_PAGE_ALIGNED(b)) {
! 863: REMAP_VA(bp, b, pg_b);
! 864: if (!pg_b)
! 865: return (EFAULT);
! 866: }
! 867: if (RF_PAGE_ALIGNED(c)) {
! 868: REMAP_VA(bp, c, pg_c);
! 869: if (!pg_c)
! 870: return (EFAULT);
! 871: }
! 872: if (!dst_is_a)
! 873: if (RF_PAGE_ALIGNED(dst)) {
! 874: REMAP_VA(bp, dst, pg_dst);
! 875: if (!pg_dst)
! 876: return (EFAULT);
! 877: }
! 878: len--;
! 879: }
! 880: return (0);
! 881: }
! 882:
! 883: int
! 884: rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b,
! 885: unsigned char *c, unsigned long len, void *bp)
! 886: {
! 887: RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7)
! 888: == 0);
! 889:
! 890: return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
! 891: (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT,
! 892: bp));
! 893: }
CVSweb