[BACK]Return to rf_dagfuncs.c CVS log [TXT][DIR] Up to [local] / sys / dev / raidframe

Annotation of sys/dev/raidframe/rf_dagfuncs.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: rf_dagfuncs.c,v 1.7 2004/09/20 17:51:07 miod Exp $    */
                      2: /*     $NetBSD: rf_dagfuncs.c,v 1.6 2000/03/30 12:45:40 augustss Exp $ */
                      3:
                      4: /*
                      5:  * Copyright (c) 1995 Carnegie-Mellon University.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Author: Mark Holland, William V. Courtright II
                      9:  *
                     10:  * Permission to use, copy, modify and distribute this software and
                     11:  * its documentation is hereby granted, provided that both the copyright
                     12:  * notice and this permission notice appear in all copies of the
                     13:  * software, derivative works or modified versions, and any portions
                     14:  * thereof, and that both notices appear in supporting documentation.
                     15:  *
                     16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     19:  *
                     20:  * Carnegie Mellon requests users of this software to return to
                     21:  *
                     22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     23:  *  School of Computer Science
                     24:  *  Carnegie Mellon University
                     25:  *  Pittsburgh PA 15213-3890
                     26:  *
                     27:  * any improvements or extensions that they make and grant Carnegie the
                     28:  * rights to redistribute these changes.
                     29:  */
                     30:
                     31: /*
                     32:  * dagfuncs.c -- DAG node execution routines.
                     33:  *
                     34:  * Rules:
                     35:  * 1. Every DAG execution function must eventually cause node->status to
                     36:  *    get set to "good" or "bad", and "FinishNode" to be called. In the
                     37:  *    case of nodes that complete immediately (xor, NullNodeFunc, etc),
                     38:  *    the node execution function can do these two things directly. In
                     39:  *    the case of nodes that have to wait for some event (a disk read to
                     40:  *    complete, a lock to be released, etc) to occur before they can
                     41:  *    complete, this is typically achieved by having whatever module
                     42:  *    is doing the operation call GenericWakeupFunc upon completion.
                     43:  * 2. DAG execution functions should check the status in the DAG header
                     44:  *    and NOP out their operations if the status is not "enable". However,
                     45:  *    execution functions that release resources must be sure to release
                     46:  *    them even when they NOP out the function that would use them.
                     47:  *    Functions that acquire resources should go ahead and acquire them
                     48:  *    even when they NOP, so that a downstream release node will not have
                     49:  *    to check to find out whether or not the acquire was suppressed.
                     50:  */
                     51:
                     52: #include <sys/ioctl.h>
                     53: #include <sys/param.h>
                     54:
                     55: #include "rf_archs.h"
                     56: #include "rf_raid.h"
                     57: #include "rf_dag.h"
                     58: #include "rf_layout.h"
                     59: #include "rf_etimer.h"
                     60: #include "rf_acctrace.h"
                     61: #include "rf_diskqueue.h"
                     62: #include "rf_dagfuncs.h"
                     63: #include "rf_general.h"
                     64: #include "rf_engine.h"
                     65: #include "rf_dagutils.h"
                     66:
                     67: #include "rf_kintf.h"
                     68:
                     69: #if    RF_INCLUDE_PARITYLOGGING > 0
                     70: #include "rf_paritylog.h"
                     71: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
                     72:
                     73: int    (*rf_DiskReadFunc) (RF_DagNode_t *);
                     74: int    (*rf_DiskWriteFunc) (RF_DagNode_t *);
                     75: int    (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
                     76: int    (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
                     77: int    (*rf_DiskUnlockFunc) (RF_DagNode_t *);
                     78: int    (*rf_DiskUnlockUndoFunc) (RF_DagNode_t *);
                     79: int    (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
                     80: int    (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
                     81: int    (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);
                     82:
                     83: /*****************************************************************************
                     84:  * Main (only) configuration routine for this module.
                     85:  *****************************************************************************/
                     86: int
                     87: rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp)
                     88: {
                     89:        RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) ||
                     90:            ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
                     91:        rf_DiskReadFunc = rf_DiskReadFuncForThreads;
                     92:        rf_DiskReadUndoFunc = rf_DiskUndoFunc;
                     93:        rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
                     94:        rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
                     95:        rf_DiskUnlockFunc = rf_DiskUnlockFuncForThreads;
                     96:        rf_DiskUnlockUndoFunc = rf_NullNodeUndoFunc;
                     97:        rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
                     98:        rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
                     99:        rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
                    100:        return (0);
                    101: }
                    102:
                    103:
                    104: /*****************************************************************************
                    105:  * The execution function associated with a terminate node.
                    106:  *****************************************************************************/
                    107: int
                    108: rf_TerminateFunc(RF_DagNode_t *node)
                    109: {
                    110:        RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
                    111:        node->status = rf_good;
                    112:        return (rf_FinishNode(node, RF_THREAD_CONTEXT));
                    113: }
                    114:
                    115: int
                    116: rf_TerminateUndoFunc(RF_DagNode_t *node)
                    117: {
                    118:        return (0);
                    119: }
                    120:
                    121:
                    122: /*****************************************************************************
                    123:  * Execution functions associated with a mirror node.
                    124:  *
                    125:  * parameters:
                    126:  *
                    127:  * 0 - Physical disk address of data.
                    128:  * 1 - Buffer for holding read data.
                    129:  * 2 - Parity stripe ID.
                    130:  * 3 - Flags.
                    131:  * 4 - Physical disk address of mirror (parity).
                    132:  *
                    133:  *****************************************************************************/
                    134:
                    135: int
                    136: rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node)
                    137: {
                    138:        /*
                    139:         * Select the mirror copy with the shortest queue and fill in node
                    140:         * parameters with physical disk address.
                    141:         */
                    142:
                    143:        rf_SelectMirrorDiskIdle(node);
                    144:        return (rf_DiskReadFunc(node));
                    145: }
                    146:
                    147: int
                    148: rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node)
                    149: {
                    150:        /*
                    151:         * Select the mirror copy with the shortest queue and fill in node
                    152:         * parameters with physical disk address.
                    153:         */
                    154:
                    155:        rf_SelectMirrorDiskPartition(node);
                    156:        return (rf_DiskReadFunc(node));
                    157: }
                    158:
                    159: int
                    160: rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node)
                    161: {
                    162:        return (0);
                    163: }
                    164:
                    165:
                    166:
                    167: #if    RF_INCLUDE_PARITYLOGGING > 0
                    168: /*****************************************************************************
                    169:  * The execution function associated with a parity log update node.
                    170:  *****************************************************************************/
                    171: int
                    172: rf_ParityLogUpdateFunc(RF_DagNode_t *node)
                    173: {
                    174:        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
                    175:        caddr_t buf = (caddr_t) node->params[1].p;
                    176:        RF_ParityLogData_t *logData;
                    177:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    178:        RF_Etimer_t timer;
                    179:
                    180:        if (node->dagHdr->status == rf_enable) {
                    181:                RF_ETIMER_START(timer);
                    182:                logData = rf_CreateParityLogData(RF_UPDATE, pda, buf,
                    183:                    (RF_Raid_t *) (node->dagHdr->raidPtr),
                    184:                    node->wakeFunc, (void *) node,
                    185:                    node->dagHdr->tracerec, timer);
                    186:                if (logData)
                    187:                        rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
                    188:                else {
                    189:                        RF_ETIMER_STOP(timer);
                    190:                        RF_ETIMER_EVAL(timer);
                    191:                        tracerec->plog_us += RF_ETIMER_VAL_US(timer);
                    192:                        (node->wakeFunc) (node, ENOMEM);
                    193:                }
                    194:        }
                    195:        return (0);
                    196: }
                    197:
                    198:
                    199: /*****************************************************************************
                    200:  * The execution function associated with a parity log overwrite node.
                    201:  *****************************************************************************/
                    202: int
                    203: rf_ParityLogOverwriteFunc(RF_DagNode_t *node)
                    204: {
                    205:        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
                    206:        caddr_t buf = (caddr_t) node->params[1].p;
                    207:        RF_ParityLogData_t *logData;
                    208:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    209:        RF_Etimer_t timer;
                    210:
                    211:        if (node->dagHdr->status == rf_enable) {
                    212:                RF_ETIMER_START(timer);
                    213:                logData = rf_CreateParityLogData(RF_OVERWRITE, pda, buf,
                    214:                    (RF_Raid_t *) (node->dagHdr->raidPtr), node->wakeFunc,
                    215:                    (void *) node, node->dagHdr->tracerec, timer);
                    216:                if (logData)
                    217:                        rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
                    218:                else {
                    219:                        RF_ETIMER_STOP(timer);
                    220:                        RF_ETIMER_EVAL(timer);
                    221:                        tracerec->plog_us += RF_ETIMER_VAL_US(timer);
                    222:                        (node->wakeFunc) (node, ENOMEM);
                    223:                }
                    224:        }
                    225:        return (0);
                    226: }
                    227: #else  /* RF_INCLUDE_PARITYLOGGING > 0 */
                    228:
                    229: int
                    230: rf_ParityLogUpdateFunc(RF_DagNode_t *node)
                    231: {
                    232:        return (0);
                    233: }
                    234:
                    235: int
                    236: rf_ParityLogOverwriteFunc(RF_DagNode_t *node)
                    237: {
                    238:        return (0);
                    239: }
                    240: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
                    241:
                    242: int
                    243: rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node)
                    244: {
                    245:        return (0);
                    246: }
                    247:
                    248: int
                    249: rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node)
                    250: {
                    251:        return (0);
                    252: }
                    253:
                    254: /*****************************************************************************
                    255:  * The execution function associated with a NOP node.
                    256:  *****************************************************************************/
                    257: int
                    258: rf_NullNodeFunc(RF_DagNode_t *node)
                    259: {
                    260:        node->status = rf_good;
                    261:        return (rf_FinishNode(node, RF_THREAD_CONTEXT));
                    262: }
                    263:
                    264: int
                    265: rf_NullNodeUndoFunc(RF_DagNode_t *node)
                    266: {
                    267:        node->status = rf_undone;
                    268:        return (rf_FinishNode(node, RF_THREAD_CONTEXT));
                    269: }
                    270:
                    271:
                    272: /*****************************************************************************
                    273:  * The execution function associated with a disk-read node.
                    274:  *****************************************************************************/
                    275: int
                    276: rf_DiskReadFuncForThreads(RF_DagNode_t *node)
                    277: {
                    278:        RF_DiskQueueData_t *req;
                    279:        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
                    280:        caddr_t buf = (caddr_t) node->params[1].p;
                    281:        RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
                    282:        unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
                    283:        unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
                    284:        unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
                    285:        unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
                    286:        RF_DiskQueueDataFlags_t flags = 0;
                    287:        RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ?
                    288:            RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
                    289:        RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
                    290:        void *b_proc = NULL;
                    291:
                    292:        if (node->dagHdr->bp)
                    293:                b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
                    294:
                    295:        RF_ASSERT(!(lock && unlock));
                    296:        flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
                    297:        flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
                    298:
                    299:        req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
                    300:            buf, parityStripeID, which_ru,
                    301:            (int (*) (void *, int)) node->wakeFunc,
                    302:            node, NULL, node->dagHdr->tracerec,
                    303:            (void *) (node->dagHdr->raidPtr), flags, b_proc);
                    304:        if (!req) {
                    305:                (node->wakeFunc) (node, ENOMEM);
                    306:        } else {
                    307:                node->dagFuncData = (void *) req;
                    308:                rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
                    309:        }
                    310:        return (0);
                    311: }
                    312:
                    313:
                    314: /*****************************************************************************
                    315:  * the execution function associated with a disk-write node
                    316:  *****************************************************************************/
                    317: int
                    318: rf_DiskWriteFuncForThreads(RF_DagNode_t *node)
                    319: {
                    320:        RF_DiskQueueData_t *req;
                    321:        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
                    322:        caddr_t buf = (caddr_t) node->params[1].p;
                    323:        RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
                    324:        unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
                    325:        unsigned lock = RF_EXTRACT_LOCK_FLAG(node->params[3].v);
                    326:        unsigned unlock = RF_EXTRACT_UNLOCK_FLAG(node->params[3].v);
                    327:        unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
                    328:        RF_DiskQueueDataFlags_t flags = 0;
                    329:        RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ?
                    330:            RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
                    331:        RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
                    332:        void *b_proc = NULL;
                    333:
                    334:        if (node->dagHdr->bp)
                    335:                b_proc = (void *) ((struct buf *) node->dagHdr->bp)->b_proc;
                    336:
                    337:        /* Normal processing (rollaway or forward recovery) begins here. */
                    338:        RF_ASSERT(!(lock && unlock));
                    339:        flags |= (lock) ? RF_LOCK_DISK_QUEUE : 0;
                    340:        flags |= (unlock) ? RF_UNLOCK_DISK_QUEUE : 0;
                    341:        req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
                    342:            buf, parityStripeID, which_ru,
                    343:            (int (*) (void *, int)) node->wakeFunc, (void *) node, NULL,
                    344:            node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
                    345:            flags, b_proc);
                    346:
                    347:        if (!req) {
                    348:                (node->wakeFunc) (node, ENOMEM);
                    349:        } else {
                    350:                node->dagFuncData = (void *) req;
                    351:                rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req, priority);
                    352:        }
                    353:
                    354:        return (0);
                    355: }
                    356: /*****************************************************************************
                    357:  * The undo function for disk nodes.
                    358:  * Note:  This is not a proper undo of a write node, only locks are released.
                    359:  *       old data is not restored to disk !
                    360:  *****************************************************************************/
                    361: int
                    362: rf_DiskUndoFunc(RF_DagNode_t *node)
                    363: {
                    364:        RF_DiskQueueData_t *req;
                    365:        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
                    366:        RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
                    367:
                    368:        req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0,
                    369:            (int (*) (void *, int)) node->wakeFunc, (void *) node,
                    370:            NULL, node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
                    371:            RF_UNLOCK_DISK_QUEUE, NULL);
                    372:        if (!req)
                    373:                (node->wakeFunc) (node, ENOMEM);
                    374:        else {
                    375:                node->dagFuncData = (void *) req;
                    376:                rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req,
                    377:                    RF_IO_NORMAL_PRIORITY);
                    378:        }
                    379:
                    380:        return (0);
                    381: }
                    382:
                    383: /*****************************************************************************
                    384:  * The execution function associated with an "unlock disk queue" node.
                    385:  *****************************************************************************/
                    386: int
                    387: rf_DiskUnlockFuncForThreads(RF_DagNode_t *node)
                    388: {
                    389:        RF_DiskQueueData_t *req;
                    390:        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
                    391:        RF_DiskQueue_t **dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;
                    392:
                    393:        req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP, 0L, 0, NULL, 0L, 0,
                    394:            (int (*) (void *, int)) node->wakeFunc, (void *) node,
                    395:            NULL, node->dagHdr->tracerec, (void *) (node->dagHdr->raidPtr),
                    396:            RF_UNLOCK_DISK_QUEUE, NULL);
                    397:        if (!req)
                    398:                (node->wakeFunc) (node, ENOMEM);
                    399:        else {
                    400:                node->dagFuncData = (void *) req;
                    401:                rf_DiskIOEnqueue(&(dqs[pda->row][pda->col]), req,
                    402:                    RF_IO_NORMAL_PRIORITY);
                    403:        }
                    404:
                    405:        return (0);
                    406: }
                    407:
                    408: /*****************************************************************************
                    409:  * Callback routine for DiskRead and DiskWrite nodes. When the disk op
                    410:  * completes, the routine is called to set the node status and inform
                    411:  * the execution engine that the node has fired.
                    412:  *****************************************************************************/
                    413: int
                    414: rf_GenericWakeupFunc(RF_DagNode_t *node, int status)
                    415: {
                    416:        switch (node->status) {
                    417:        case rf_bwd1:
                    418:                node->status = rf_bwd2;
                    419:                if (node->dagFuncData)
                    420:                        rf_FreeDiskQueueData((RF_DiskQueueData_t *)
                    421:                            node->dagFuncData);
                    422:                return (rf_DiskWriteFuncForThreads(node));
                    423:                break;
                    424:        case rf_fired:
                    425:                if (status)
                    426:                        node->status = rf_bad;
                    427:                else
                    428:                        node->status = rf_good;
                    429:                break;
                    430:        case rf_recover:
                    431:                /* Probably should never reach this case. */
                    432:                if (status)
                    433:                        node->status = rf_panic;
                    434:                else
                    435:                        node->status = rf_undone;
                    436:                break;
                    437:        default:
                    438:                printf("rf_GenericWakeupFunc:");
                    439:                printf("node->status is %d,", node->status);
                    440:                printf("status is %d \n", status);
                    441:                RF_PANIC();
                    442:                break;
                    443:        }
                    444:        if (node->dagFuncData)
                    445:                rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
                    446:        return (rf_FinishNode(node, RF_INTR_CONTEXT));
                    447: }
                    448:
                    449:
                    450: /*****************************************************************************
                    451:  * There are three distinct types of xor nodes.
                    452:  *
                    453:  * A "regular xor" is used in the fault-free case where the access spans
                    454:  * a complete stripe unit. It assumes that the result buffer is one full
                    455:  * stripe unit in size, and uses the stripe-unit-offset values that it
                    456:  * computes from the PDAs to determine where within the stripe unit to
                    457:  * XOR each argument buffer.
                    458:  *
                    459:  * A "simple xor" is used in the fault-free case where the access touches
                    460:  * only a portion of one (or two, in some cases) stripe unit(s). It assumes
                    461:  * that all the argument buffers are of the same size and have the same
                    462:  * stripe unit offset.
                    463:  *
                    464:  * A "recovery xor" is used in the degraded-mode case. It's similar to
                    465:  * the regular xor function except that it takes the failed PDA as an
                    466:  * additional parameter, and uses it to determine what portions of the
                    467:  * argument buffers need to be xor'd into the result buffer, and where
                    468:  * in the result buffer they should go.
                    469:  *****************************************************************************/
                    470:
                    471: /*
                    472:  * Xor the params together and store the result in the result field.
                    473:  * Assume the result field points to a buffer that is the size of one SU,
                    474:  * and use the pda params to determine where within the buffer to XOR
                    475:  * the input buffers.
                    476:  */
                    477: int
                    478: rf_RegularXorFunc(RF_DagNode_t *node)
                    479: {
                    480:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    481:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    482:        RF_Etimer_t timer;
                    483:        int i, retcode;
                    484:
                    485:        retcode = 0;
                    486:        if (node->dagHdr->status == rf_enable) {
                    487:                /* Don't do the XOR if the input is the same as the output. */
                    488:                RF_ETIMER_START(timer);
                    489:                for (i = 0; i < node->numParams - 1; i += 2)
                    490:                        if (node->params[i + 1].p != node->results[0]) {
                    491:                                retcode = rf_XorIntoBuffer(raidPtr,
                    492:                                    (RF_PhysDiskAddr_t *) node->params[i].p,
                    493:                                    (char *) node->params[i + 1].p,
                    494:                                    (char *) node->results[0],
                    495:                                    node->dagHdr->bp);
                    496:                        }
                    497:                RF_ETIMER_STOP(timer);
                    498:                RF_ETIMER_EVAL(timer);
                    499:                tracerec->xor_us += RF_ETIMER_VAL_US(timer);
                    500:        }
                    501:        /* Call wake func explicitly since no I/O in this node. */
                    502:        return (rf_GenericWakeupFunc(node, retcode));
                    503: }
                    504:
                    505: /* Xor the inputs into the result buffer, ignoring placement issues. */
                    506: int
                    507: rf_SimpleXorFunc(RF_DagNode_t *node)
                    508: {
                    509:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    510:        int i, retcode = 0;
                    511:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    512:        RF_Etimer_t timer;
                    513:
                    514:        if (node->dagHdr->status == rf_enable) {
                    515:                RF_ETIMER_START(timer);
                    516:                /* Don't do the XOR if the input is the same as the output. */
                    517:                for (i = 0; i < node->numParams - 1; i += 2)
                    518:                        if (node->params[i + 1].p != node->results[0]) {
                    519:                                retcode = rf_bxor((char *)
                    520:                                    node->params[i + 1].p,
                    521:                                    (char *) node->results[0],
                    522:                                    rf_RaidAddressToByte(raidPtr,
                    523:                                    ((RF_PhysDiskAddr_t *)
                    524:                                    node->params[i].p)->numSector),
                    525:                                    (struct buf *) node->dagHdr->bp);
                    526:                        }
                    527:                RF_ETIMER_STOP(timer);
                    528:                RF_ETIMER_EVAL(timer);
                    529:                tracerec->xor_us += RF_ETIMER_VAL_US(timer);
                    530:        }
                    531:        /* Call wake func explicitly since no I/O in this node. */
                    532:        return (rf_GenericWakeupFunc(node, retcode));
                    533: }
                    534:
                    535: /*
                    536:  * This xor is used by the degraded-mode dag functions to recover lost data.
                    537:  * The second-to-last parameter is the PDA for the failed portion of the access.
                    538:  * The code here looks at this PDA and assumes that the xor target buffer is
                    539:  * equal in size to the number of sectors in the failed PDA. It then uses
                    540:  * the other PDAs in the parameter list to determine where within the target
                    541:  * buffer the corresponding data should be xored.
                    542:  */
                    543: int
                    544: rf_RecoveryXorFunc(RF_DagNode_t *node)
                    545: {
                    546:        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
                    547:        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
                    548:        RF_PhysDiskAddr_t *failedPDA =
                    549:            (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
                    550:        int i, retcode = 0;
                    551:        RF_PhysDiskAddr_t *pda;
                    552:        int suoffset, failedSUOffset =
                    553:            rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
                    554:        char *srcbuf, *destbuf;
                    555:        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
                    556:        RF_Etimer_t timer;
                    557:
                    558:        if (node->dagHdr->status == rf_enable) {
                    559:                RF_ETIMER_START(timer);
                    560:                for (i = 0; i < node->numParams - 2; i += 2)
                    561:                        if (node->params[i + 1].p != node->results[0]) {
                    562:                                pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                    563:                                srcbuf = (char *) node->params[i + 1].p;
                    564:                                suoffset = rf_StripeUnitOffset(layoutPtr,
                    565:                                    pda->startSector);
                    566:                                destbuf = ((char *) node->results[0]) +
                    567:                                    rf_RaidAddressToByte(raidPtr,
                    568:                                    suoffset - failedSUOffset);
                    569:                                retcode = rf_bxor(srcbuf, destbuf,
                    570:                                    rf_RaidAddressToByte(raidPtr,
                    571:                                    pda->numSector), node->dagHdr->bp);
                    572:                        }
                    573:                RF_ETIMER_STOP(timer);
                    574:                RF_ETIMER_EVAL(timer);
                    575:                tracerec->xor_us += RF_ETIMER_VAL_US(timer);
                    576:        }
                    577:        return (rf_GenericWakeupFunc(node, retcode));
                    578: }
                    579:
                    580:
                    581: /*****************************************************************************
                    582:  * The next three functions are utilities used by the above xor-execution
                    583:  * functions.
                    584:  *****************************************************************************/
                    585:
                    586: /*
                    587:  * This is just a glorified buffer xor. Targbuf points to a buffer that is
                    588:  * one full stripe unit in size. srcbuf points to a buffer that may be less
                    589:  * than 1 SU, but never more. When the access described by pda is one SU in
                    590:  * size (which by implication means it's SU-aligned), all that happens is
                    591:  * (targbuf) <- (srcbuf ^ targbuf). When the access is less than one SU in
                    592:  * size the XOR occurs on only the portion of targbuf identified in the pda.
                    593:  */
                    594:
                    595: int
                    596: rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, char *srcbuf,
                    597:     char *targbuf, void *bp)
                    598: {
                    599:        char *targptr;
                    600:        int sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                    601:        int SUOffset = pda->startSector % sectPerSU;
                    602:        int length, retcode = 0;
                    603:
                    604:        RF_ASSERT(pda->numSector <= sectPerSU);
                    605:
                    606:        targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
                    607:        length = rf_RaidAddressToByte(raidPtr, pda->numSector);
                    608:        retcode = rf_bxor(srcbuf, targptr, length, bp);
                    609:        return (retcode);
                    610: }
                    611:
                    612: /*
                    613:  * It really should be the case that the buffer pointers (returned by malloc)
                    614:  * are aligned to the natural word size of the machine, so this is the only
                    615:  * case we optimize for. The length should always be a multiple of the sector
                    616:  * size, so there should be no problem with leftover bytes at the end.
                    617:  */
                    618: int
                    619: rf_bxor(char *src, char *dest, int len, void *bp)
                    620: {
                    621:        unsigned mask = sizeof(long) - 1, retcode = 0;
                    622:
                    623:        if (!(((unsigned long) src) & mask) &&
                    624:            !(((unsigned long) dest) & mask) && !(len & mask)) {
                    625:                retcode = rf_longword_bxor((unsigned long *) src,
                    626:                    (unsigned long *) dest, len >> RF_LONGSHIFT, bp);
                    627:        } else {
                    628:                RF_ASSERT(0);
                    629:        }
                    630:        return (retcode);
                    631: }
                    632:
                    633: /* Map a user buffer into kernel space, if necessary. */
                    634: #define        REMAP_VA(_bp,x,y)       (y) = (x)
                    635:
                    636: /*
                    637:  * When XORing in kernel mode, we need to map each user page to kernel
                    638:  * space before we can access it.
                    639:  * We don't want to assume anything about which input buffers are in
                    640:  * kernel/user space, nor about their alignment, so in each loop we
                    641:  * compute the maximum number of bytes that we can xor without crossing
                    642:  * any page boundaries, and do only this many bytes before the next remap.
                    643:  */
                    644: int
                    645: rf_longword_bxor(unsigned long *src, unsigned long *dest, int len, void *bp)
                    646: {
                    647:        unsigned long *end = src + len; /* len in longwords. */
                    648:        unsigned long d0, d1, d2, d3, s0, s1, s2, s3; /* temps */
                    649:        unsigned long *pg_src, *pg_dest; /* Per-page source/dest pointers. */
                    650:        int longs_this_time; /* # longwords to xor in the current iteration. */
                    651:
                    652:        REMAP_VA(bp, src, pg_src);
                    653:        REMAP_VA(bp, dest, pg_dest);
                    654:        if (!pg_src || !pg_dest)
                    655:                return (EFAULT);
                    656:
                    657:        while (len >= 4) {
                    658:                longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src),
                    659:                    RF_BLIP(pg_dest)) >> RF_LONGSHIFT);
                    660:                src += longs_this_time;
                    661:                dest += longs_this_time;
                    662:                len -= longs_this_time;
                    663:                while (longs_this_time >= 4) {
                    664:                        d0 = pg_dest[0];
                    665:                        d1 = pg_dest[1];
                    666:                        d2 = pg_dest[2];
                    667:                        d3 = pg_dest[3];
                    668:                        s0 = pg_src[0];
                    669:                        s1 = pg_src[1];
                    670:                        s2 = pg_src[2];
                    671:                        s3 = pg_src[3];
                    672:                        pg_dest[0] = d0 ^ s0;
                    673:                        pg_dest[1] = d1 ^ s1;
                    674:                        pg_dest[2] = d2 ^ s2;
                    675:                        pg_dest[3] = d3 ^ s3;
                    676:                        pg_src += 4;
                    677:                        pg_dest += 4;
                    678:                        longs_this_time -= 4;
                    679:                }
                    680:                while (longs_this_time > 0) {
                    681:                        /* Cannot cross any page boundaries here. */
                    682:                        *pg_dest++ ^= *pg_src++;
                    683:                        longs_this_time--;
                    684:                }
                    685:
                    686:                /*
                    687:                 * Either we're done, or we've reached a page boundary on one
                    688:                 * (or possibly both) of the pointers.
                    689:                 */
                    690:                if (len) {
                    691:                        if (RF_PAGE_ALIGNED(src))
                    692:                                REMAP_VA(bp, src, pg_src);
                    693:                        if (RF_PAGE_ALIGNED(dest))
                    694:                                REMAP_VA(bp, dest, pg_dest);
                    695:                        if (!pg_src || !pg_dest)
                    696:                                return (EFAULT);
                    697:                }
                    698:        }
                    699:        while (src < end) {
                    700:                *pg_dest++ ^= *pg_src++;
                    701:                src++;
                    702:                dest++;
                    703:                len--;
                    704:                if (RF_PAGE_ALIGNED(src))
                    705:                        REMAP_VA(bp, src, pg_src);
                    706:                if (RF_PAGE_ALIGNED(dest))
                    707:                        REMAP_VA(bp, dest, pg_dest);
                    708:        }
                    709:        RF_ASSERT(len == 0);
                    710:        return (0);
                    711: }
                    712:
                    713:
                    714: /*
                    715:  * dst = a ^ b ^ c;
                    716:  * a may equal dst
                    717:  * see comment above longword_bxor
                    718:  */
                    719: int
                    720: rf_longword_bxor3(unsigned long *dst, unsigned long *a, unsigned long *b,
                    721:     unsigned long *c, int len, void *bp)
                    722: {
                    723:        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
                    724:        /* Per-page source/dest pointers. */
                    725:        unsigned long *pg_a, *pg_b, *pg_c, *pg_dst;
                    726:        int longs_this_time;    /* # longs to xor in the current iteration */
                    727:        char dst_is_a = 0;
                    728:
                    729:        /* Note: The length (len) is in longwords. */
                    730:
                    731:        REMAP_VA(bp, a, pg_a);
                    732:        REMAP_VA(bp, b, pg_b);
                    733:        REMAP_VA(bp, c, pg_c);
                    734:        if (a == dst) {
                    735:                pg_dst = pg_a;
                    736:                dst_is_a = 1;
                    737:        } else {
                    738:                REMAP_VA(bp, dst, pg_dst);
                    739:        }
                    740:
                    741:        /* Align dest to cache line. Can't cross a pg boundary on dst here. */
                    742:        while ((((unsigned long) pg_dst) & 0x1f)) {
                    743:                *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
                    744:                dst++;
                    745:                a++;
                    746:                b++;
                    747:                c++;
                    748:                if (RF_PAGE_ALIGNED(a)) {
                    749:                        REMAP_VA(bp, a, pg_a);
                    750:                        if (!pg_a)
                    751:                                return (EFAULT);
                    752:                }
                    753:                if (RF_PAGE_ALIGNED(b)) {
                    754:                        REMAP_VA(bp, a, pg_b);
                    755:                        if (!pg_b)
                    756:                                return (EFAULT);
                    757:                }
                    758:                if (RF_PAGE_ALIGNED(c)) {
                    759:                        REMAP_VA(bp, a, pg_c);
                    760:                        if (!pg_c)
                    761:                                return (EFAULT);
                    762:                }
                    763:                len--;
                    764:        }
                    765:
                    766:        while (len > 4) {
                    767:                longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a),
                    768:                    RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >>
                    769:                    RF_LONGSHIFT);
                    770:                a += longs_this_time;
                    771:                b += longs_this_time;
                    772:                c += longs_this_time;
                    773:                dst += longs_this_time;
                    774:                len -= longs_this_time;
                    775:                while (longs_this_time >= 4) {
                    776:                        a0 = pg_a[0];
                    777:                        longs_this_time -= 4;
                    778:
                    779:                        a1 = pg_a[1];
                    780:                        a2 = pg_a[2];
                    781:
                    782:                        a3 = pg_a[3];
                    783:                        pg_a += 4;
                    784:
                    785:                        b0 = pg_b[0];
                    786:                        b1 = pg_b[1];
                    787:
                    788:                        b2 = pg_b[2];
                    789:                        b3 = pg_b[3];
                    790:                        /* Start dual issue. */
                    791:                        a0 ^= b0;
                    792:                        b0 = pg_c[0];
                    793:
                    794:                        pg_b += 4;
                    795:                        a1 ^= b1;
                    796:
                    797:                        a2 ^= b2;
                    798:                        a3 ^= b3;
                    799:
                    800:                        b1 = pg_c[1];
                    801:                        a0 ^= b0;
                    802:
                    803:                        b2 = pg_c[2];
                    804:                        a1 ^= b1;
                    805:
                    806:                        b3 = pg_c[3];
                    807:                        a2 ^= b2;
                    808:
                    809:                        pg_dst[0] = a0;
                    810:                        a3 ^= b3;
                    811:                        pg_dst[1] = a1;
                    812:                        pg_c += 4;
                    813:                        pg_dst[2] = a2;
                    814:                        pg_dst[3] = a3;
                    815:                        pg_dst += 4;
                    816:                }
                    817:                while (longs_this_time > 0) {
                    818:                        /* Cannot cross any page boundaries here. */
                    819:                        *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
                    820:                        longs_this_time--;
                    821:                }
                    822:
                    823:                if (len) {
                    824:                        if (RF_PAGE_ALIGNED(a)) {
                    825:                                REMAP_VA(bp, a, pg_a);
                    826:                                if (!pg_a)
                    827:                                        return (EFAULT);
                    828:                                if (dst_is_a)
                    829:                                        pg_dst = pg_a;
                    830:                        }
                    831:                        if (RF_PAGE_ALIGNED(b)) {
                    832:                                REMAP_VA(bp, b, pg_b);
                    833:                                if (!pg_b)
                    834:                                        return (EFAULT);
                    835:                        }
                    836:                        if (RF_PAGE_ALIGNED(c)) {
                    837:                                REMAP_VA(bp, c, pg_c);
                    838:                                if (!pg_c)
                    839:                                        return (EFAULT);
                    840:                        }
                    841:                        if (!dst_is_a)
                    842:                                if (RF_PAGE_ALIGNED(dst)) {
                    843:                                        REMAP_VA(bp, dst, pg_dst);
                    844:                                        if (!pg_dst)
                    845:                                                return (EFAULT);
                    846:                                }
                    847:                }
                    848:        }
                    849:        while (len) {
                    850:                *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
                    851:                dst++;
                    852:                a++;
                    853:                b++;
                    854:                c++;
                    855:                if (RF_PAGE_ALIGNED(a)) {
                    856:                        REMAP_VA(bp, a, pg_a);
                    857:                        if (!pg_a)
                    858:                                return (EFAULT);
                    859:                        if (dst_is_a)
                    860:                                pg_dst = pg_a;
                    861:                }
                    862:                if (RF_PAGE_ALIGNED(b)) {
                    863:                        REMAP_VA(bp, b, pg_b);
                    864:                        if (!pg_b)
                    865:                                return (EFAULT);
                    866:                }
                    867:                if (RF_PAGE_ALIGNED(c)) {
                    868:                        REMAP_VA(bp, c, pg_c);
                    869:                        if (!pg_c)
                    870:                                return (EFAULT);
                    871:                }
                    872:                if (!dst_is_a)
                    873:                        if (RF_PAGE_ALIGNED(dst)) {
                    874:                                REMAP_VA(bp, dst, pg_dst);
                    875:                                if (!pg_dst)
                    876:                                        return (EFAULT);
                    877:                        }
                    878:                len--;
                    879:        }
                    880:        return (0);
                    881: }
                    882:
                    883: int
                    884: rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b,
                    885:     unsigned char *c, unsigned long len, void *bp)
                    886: {
                    887:        RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7)
                    888:            == 0);
                    889:
                    890:        return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
                    891:                (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT,
                    892:                 bp));
                    893: }

CVSweb