[BACK]Return to rf_dagffrd.c CVS log [TXT][DIR] Up to [local] / sys / dev / raidframe

Annotation of sys/dev/raidframe/rf_dagffrd.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: rf_dagffrd.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $   */
                      2: /*     $NetBSD: rf_dagffrd.c,v 1.4 2000/01/07 03:40:58 oster Exp $     */
                      3:
                      4: /*
                      5:  * Copyright (c) 1995 Carnegie-Mellon University.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
                      9:  *
                     10:  * Permission to use, copy, modify and distribute this software and
                     11:  * its documentation is hereby granted, provided that both the copyright
                     12:  * notice and this permission notice appear in all copies of the
                     13:  * software, derivative works or modified versions, and any portions
                     14:  * thereof, and that both notices appear in supporting documentation.
                     15:  *
                     16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     19:  *
                     20:  * Carnegie Mellon requests users of this software to return to
                     21:  *
                     22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     23:  *  School of Computer Science
                     24:  *  Carnegie Mellon University
                     25:  *  Pittsburgh PA 15213-3890
                     26:  *
                     27:  * any improvements or extensions that they make and grant Carnegie the
                     28:  * rights to redistribute these changes.
                     29:  */
                     30:
                     31: /*
                     32:  * rf_dagffrd.c
                     33:  *
                     34:  * Code for creating fault-free read DAGs.
                     35:  *
                     36:  */
                     37:
                     38: #include "rf_types.h"
                     39: #include "rf_raid.h"
                     40: #include "rf_dag.h"
                     41: #include "rf_dagutils.h"
                     42: #include "rf_dagfuncs.h"
                     43: #include "rf_debugMem.h"
                     44: #include "rf_memchunk.h"
                     45: #include "rf_general.h"
                     46: #include "rf_dagffrd.h"
                     47:
                     48: void rf_CreateMirrorReadDAG( RF_Raid_t *, RF_AccessStripeMap_t *,
                     49:        RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *,
                     50:        int (*) (RF_DagNode_t *));
                     51:
                     52: /*****************************************************************************
                     53:  *
                     54:  * General comments on DAG creation:
                     55:  *
                     56:  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
                     57:  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
                     58:  * is reached, the execution engine will halt forward execution and work
                     59:  * backward through the graph, executing the undo functions.  Assuming that
                     60:  * each node in the graph prior to the Cmt node are undoable and atomic - or -
                     61:  * does not make changes to permanent state, the graph will fail atomically.
                     62:  * If an error occurs after the Cmt node executes, the engine will roll-forward
                     63:  * through the graph, blindly executing nodes until it reaches the end.
                     64:  * If a graph reaches the end, it is assumed to have completed successfully.
                     65:  *
                     66:  * A graph has only 1 Cmt node.
                     67:  *
                     68:  *****************************************************************************/
                     69:
                     70:
                     71: /*****************************************************************************
                     72:  *
                     73:  * The following wrappers map the standard DAG creation interface to the
                     74:  * DAG creation routines.  Additionally, these wrappers enable experimentation
                     75:  * with new DAG structures by providing an extra level of indirection, allowing
                     76:  * the DAG creation routines to be replaced at this single point.
                     77:  *
                     78:  *****************************************************************************/
                     79:
                     80: void
                     81: rf_CreateFaultFreeReadDAG(
                     82:        RF_Raid_t               *raidPtr,
                     83:        RF_AccessStripeMap_t    *asmap,
                     84:        RF_DagHeader_t          *dag_h,
                     85:        void                    *bp,
                     86:        RF_RaidAccessFlags_t     flags,
                     87:        RF_AllocListElem_t      *allocList
                     88: )
                     89: {
                     90:        rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
                     91:            RF_IO_TYPE_READ);
                     92: }
                     93:
                     94:
                     95: /*****************************************************************************
                     96:  *
                     97:  * DAG creation code begins here.
                     98:  *
                     99:  *****************************************************************************/
                    100:
                    101: /*****************************************************************************
                    102:  *
                    103:  * Creates a DAG to perform a nonredundant read or write of data within one
                    104:  * stripe.
                    105:  * For reads, this DAG is as follows:
                    106:  *
                    107:  *                   /---- read ----\
                    108:  *    Header -- Block ---- read ---- Commit -- Terminate
                    109:  *                   \---- read ----/
                    110:  *
                    111:  * For writes, this DAG is as follows:
                    112:  *
                    113:  *                    /---- write ----\
                    114:  *    Header -- Commit ---- write ---- Block -- Terminate
                    115:  *                    \---- write ----/
                    116:  *
                    117:  * There is one disk node per stripe unit accessed, and all disk nodes are in
                    118:  * parallel.
                    119:  *
                    120:  * Tricky point here:  The first disk node (read or write) is created
                    121:  * normally.  Subsequent disk nodes are created by copying the first one,
                    122:  * and modifying a few params.  The "succedents" and "antecedents" fields are
                    123:  * _not_ re-created in each node, but rather left pointing to the same array
                    124:  * that was malloc'd when the first node was created.  Thus, it's essential
                    125:  * that when this DAG is freed, the succedents and antecedents fields be freed
                    126:  * in ONLY ONE of the read nodes.  This does not apply to the "params" field
                    127:  * because it is recreated for each READ node.
                    128:  *
                    129:  * Note that normal-priority accesses do not need to be tagged with their
                    130:  * parity stripe ID, because they will never be promoted.  Hence, I've
                    131:  * commented-out the code to do this, and marked it with UNNEEDED.
                    132:  *
                    133:  *****************************************************************************/
                    134:
                    135: void
                    136: rf_CreateNonredundantDAG(
                    137:        RF_Raid_t               *raidPtr,
                    138:        RF_AccessStripeMap_t    *asmap,
                    139:        RF_DagHeader_t          *dag_h,
                    140:        void                    *bp,
                    141:        RF_RaidAccessFlags_t     flags,
                    142:        RF_AllocListElem_t      *allocList,
                    143:        RF_IoType_t              type
                    144: )
                    145: {
                    146:        RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode;
                    147:        RF_PhysDiskAddr_t *pda = asmap->physInfo;
                    148:        int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
                    149:        int i, n, totalNumNodes;
                    150:        char *name;
                    151:
                    152:        n = asmap->numStripeUnitsAccessed;
                    153:        dag_h->creator = "NonredundantDAG";
                    154:
                    155:        RF_ASSERT(RF_IO_IS_R_OR_W(type));
                    156:        switch (type) {
                    157:        case RF_IO_TYPE_READ:
                    158:                doFunc = rf_DiskReadFunc;
                    159:                undoFunc = rf_DiskReadUndoFunc;
                    160:                name = "R  ";
                    161:                if (rf_dagDebug)
                    162:                        printf("[Creating non-redundant read DAG]\n");
                    163:                break;
                    164:        case RF_IO_TYPE_WRITE:
                    165:                doFunc = rf_DiskWriteFunc;
                    166:                undoFunc = rf_DiskWriteUndoFunc;
                    167:                name = "W  ";
                    168:                if (rf_dagDebug)
                    169:                        printf("[Creating non-redundant write DAG]\n");
                    170:                break;
                    171:        default:
                    172:                RF_PANIC();
                    173:        }
                    174:
                    175:        /*
                    176:         * For reads, the dag can not commit until the block node is reached.
                    177:         * For writes, the dag commits immediately.
                    178:         */
                    179:        dag_h->numCommitNodes = 1;
                    180:        dag_h->numCommits = 0;
                    181:        dag_h->numSuccedents = 1;
                    182:
                    183:        /*
                    184:         * Node count:
                    185:         * 1 block node
                    186:         * n data reads (or writes)
                    187:         * 1 commit node
                    188:         * 1 terminator node
                    189:         */
                    190:        RF_ASSERT(n > 0);
                    191:        totalNumNodes = n + 3;
                    192:        RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
                    193:            (RF_DagNode_t *), allocList);
                    194:        i = 0;
                    195:        diskNodes = &nodes[i];
                    196:        i += n;
                    197:        blockNode = &nodes[i];
                    198:        i += 1;
                    199:        commitNode = &nodes[i];
                    200:        i += 1;
                    201:        termNode = &nodes[i];
                    202:        i += 1;
                    203:        RF_ASSERT(i == totalNumNodes);
                    204:
                    205:        /* Initialize nodes. */
                    206:        switch (type) {
                    207:        case RF_IO_TYPE_READ:
                    208:                rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
                    209:                    rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil",
                    210:                    allocList);
                    211:                rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
                    212:                    rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt",
                    213:                    allocList);
                    214:                rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
                    215:                    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm",
                    216:                    allocList);
                    217:                break;
                    218:        case RF_IO_TYPE_WRITE:
                    219:                rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
                    220:                    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil",
                    221:                    allocList);
                    222:                rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
                    223:                    rf_NullNodeUndoFunc, NULL, n, 1, 0, 0, dag_h, "Cmt",
                    224:                    allocList);
                    225:                rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
                    226:                    rf_TerminateUndoFunc, NULL, 0, n, 0, 0, dag_h, "Trm",
                    227:                    allocList);
                    228:                break;
                    229:        default:
                    230:                RF_PANIC();
                    231:        }
                    232:
                    233:        for (i = 0; i < n; i++) {
                    234:                RF_ASSERT(pda != NULL);
                    235:                rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc,
                    236:                    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList);
                    237:                diskNodes[i].params[0].p = pda;
                    238:                diskNodes[i].params[1].p = pda->bufPtr;
                    239:                /* Parity stripe id is not necessary. */
                    240:                diskNodes[i].params[2].v = 0;
                    241:                diskNodes[i].params[3].v =
                    242:                    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
                    243:                pda = pda->next;
                    244:        }
                    245:
                    246:        /*
                    247:         * Connect nodes.
                    248:         */
                    249:
                    250:        /* Connect hdr to block node. */
                    251:        RF_ASSERT(blockNode->numAntecedents == 0);
                    252:        dag_h->succedents[0] = blockNode;
                    253:
                    254:        if (type == RF_IO_TYPE_READ) {
                    255:                /* Connecting a nonredundant read DAG. */
                    256:                RF_ASSERT(blockNode->numSuccedents == n);
                    257:                RF_ASSERT(commitNode->numAntecedents == n);
                    258:                for (i = 0; i < n; i++) {
                    259:                        /* Connect block node to each read node. */
                    260:                        RF_ASSERT(diskNodes[i].numAntecedents == 1);
                    261:                        blockNode->succedents[i] = &diskNodes[i];
                    262:                        diskNodes[i].antecedents[0] = blockNode;
                    263:                        diskNodes[i].antType[0] = rf_control;
                    264:
                    265:                        /* Connect each read node to the commit node. */
                    266:                        RF_ASSERT(diskNodes[i].numSuccedents == 1);
                    267:                        diskNodes[i].succedents[0] = commitNode;
                    268:                        commitNode->antecedents[i] = &diskNodes[i];
                    269:                        commitNode->antType[i] = rf_control;
                    270:                }
                    271:                /* Connect the commit node to the term node. */
                    272:                RF_ASSERT(commitNode->numSuccedents == 1);
                    273:                RF_ASSERT(termNode->numAntecedents == 1);
                    274:                RF_ASSERT(termNode->numSuccedents == 0);
                    275:                commitNode->succedents[0] = termNode;
                    276:                termNode->antecedents[0] = commitNode;
                    277:                termNode->antType[0] = rf_control;
                    278:        } else {
                    279:                /* Connecting a nonredundant write DAG. */
                    280:                /* Connect the block node to the commit node. */
                    281:                RF_ASSERT(blockNode->numSuccedents == 1);
                    282:                RF_ASSERT(commitNode->numAntecedents == 1);
                    283:                blockNode->succedents[0] = commitNode;
                    284:                commitNode->antecedents[0] = blockNode;
                    285:                commitNode->antType[0] = rf_control;
                    286:
                    287:                RF_ASSERT(commitNode->numSuccedents == n);
                    288:                RF_ASSERT(termNode->numAntecedents == n);
                    289:                RF_ASSERT(termNode->numSuccedents == 0);
                    290:                for (i = 0; i < n; i++) {
                    291:                        /* Connect the commit node to each write node. */
                    292:                        RF_ASSERT(diskNodes[i].numAntecedents == 1);
                    293:                        commitNode->succedents[i] = &diskNodes[i];
                    294:                        diskNodes[i].antecedents[0] = commitNode;
                    295:                        diskNodes[i].antType[0] = rf_control;
                    296:
                    297:                        /* Connect each write node to the term node. */
                    298:                        RF_ASSERT(diskNodes[i].numSuccedents == 1);
                    299:                        diskNodes[i].succedents[0] = termNode;
                    300:                        termNode->antecedents[i] = &diskNodes[i];
                    301:                        termNode->antType[i] = rf_control;
                    302:                }
                    303:        }
                    304: }
                    305: /*****************************************************************************
                    306:  * Create a fault-free read DAG for RAID level 1.
                    307:  *
                    308:  * Hdr -> Nil -> Rmir -> Cmt -> Trm
                    309:  *
                    310:  * The "Rmir" node schedules a read from the disk in the mirror pair with the
                    311:  * shortest disk queue.  The proper queue is selected at Rmir execution.  This
                    312:  * deferred mapping is unlike other archs in RAIDframe which generally fix
                    313:  * mapping at DAG creation time.
                    314:  *
                    315:  * Parameters:  raidPtr          - description of the physical array
                    316:  *             asmap     - logical & physical addresses for this access
                    317:  *             bp        - buffer ptr (for holding read data)
                    318:  *             flags     - general flags (e.g. disk locking)
                    319:  *             allocList - list of memory allocated in DAG creation
                    320:  *****************************************************************************/
                    321:
                    322: void
                    323: rf_CreateMirrorReadDAG(
                    324:        RF_Raid_t                *raidPtr,
                    325:        RF_AccessStripeMap_t     *asmap,
                    326:        RF_DagHeader_t           *dag_h,
                    327:        void                     *bp,
                    328:        RF_RaidAccessFlags_t      flags,
                    329:        RF_AllocListElem_t       *allocList,
                    330:        int                     (*readfunc) (RF_DagNode_t *)
                    331: )
                    332: {
                    333:        RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode;
                    334:        RF_PhysDiskAddr_t *data_pda = asmap->physInfo;
                    335:        RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo;
                    336:        int i, n, totalNumNodes;
                    337:
                    338:        n = asmap->numStripeUnitsAccessed;
                    339:        dag_h->creator = "RaidOneReadDAG";
                    340:        if (rf_dagDebug) {
                    341:                printf("[Creating RAID level 1 read DAG]\n");
                    342:        }
                    343:        /*
                    344:         * This dag can not commit until the commit node is reached.
                    345:         * Errors prior to the commit point imply the dag has failed.
                    346:         */
                    347:        dag_h->numCommitNodes = 1;
                    348:        dag_h->numCommits = 0;
                    349:        dag_h->numSuccedents = 1;
                    350:
                    351:        /*
                    352:         * Node count:
                    353:         * n data reads
                    354:         * 1 block node
                    355:         * 1 commit node
                    356:         * 1 terminator node
                    357:         */
                    358:        RF_ASSERT(n > 0);
                    359:        totalNumNodes = n + 3;
                    360:        RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
                    361:            (RF_DagNode_t *), allocList);
                    362:        i = 0;
                    363:        readNodes = &nodes[i];
                    364:        i += n;
                    365:        blockNode = &nodes[i];
                    366:        i += 1;
                    367:        commitNode = &nodes[i];
                    368:        i += 1;
                    369:        termNode = &nodes[i];
                    370:        i += 1;
                    371:        RF_ASSERT(i == totalNumNodes);
                    372:
                    373:        /* Initialize nodes. */
                    374:        rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
                    375:            rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
                    376:        rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
                    377:            rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
                    378:        rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
                    379:            rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
                    380:
                    381:        for (i = 0; i < n; i++) {
                    382:                RF_ASSERT(data_pda != NULL);
                    383:                RF_ASSERT(parity_pda != NULL);
                    384:                rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc,
                    385:                    rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5,
                    386:                    0, dag_h, "Rmir", allocList);
                    387:                readNodes[i].params[0].p = data_pda;
                    388:                readNodes[i].params[1].p = data_pda->bufPtr;
                    389:                /* Parity stripe id is not necessary. */
                    390:                readNodes[i].params[2].p = 0;
                    391:                readNodes[i].params[3].v =
                    392:                    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
                    393:                readNodes[i].params[4].p = parity_pda;
                    394:                data_pda = data_pda->next;
                    395:                parity_pda = parity_pda->next;
                    396:        }
                    397:
                    398:        /*
                    399:         * Connect nodes.
                    400:         */
                    401:
                    402:        /* Connect hdr to block node. */
                    403:        RF_ASSERT(blockNode->numAntecedents == 0);
                    404:        dag_h->succedents[0] = blockNode;
                    405:
                    406:        /* Connect block node to read nodes. */
                    407:        RF_ASSERT(blockNode->numSuccedents == n);
                    408:        for (i = 0; i < n; i++) {
                    409:                RF_ASSERT(readNodes[i].numAntecedents == 1);
                    410:                blockNode->succedents[i] = &readNodes[i];
                    411:                readNodes[i].antecedents[0] = blockNode;
                    412:                readNodes[i].antType[0] = rf_control;
                    413:        }
                    414:
                    415:        /* Connect read nodes to commit node. */
                    416:        RF_ASSERT(commitNode->numAntecedents == n);
                    417:        for (i = 0; i < n; i++) {
                    418:                RF_ASSERT(readNodes[i].numSuccedents == 1);
                    419:                readNodes[i].succedents[0] = commitNode;
                    420:                commitNode->antecedents[i] = &readNodes[i];
                    421:                commitNode->antType[i] = rf_control;
                    422:        }
                    423:
                    424:        /* Connect commit node to term node. */
                    425:        RF_ASSERT(commitNode->numSuccedents == 1);
                    426:        RF_ASSERT(termNode->numAntecedents == 1);
                    427:        RF_ASSERT(termNode->numSuccedents == 0);
                    428:        commitNode->succedents[0] = termNode;
                    429:        termNode->antecedents[0] = commitNode;
                    430:        termNode->antType[0] = rf_control;
                    431: }
                    432:
                    433: void
                    434: rf_CreateMirrorIdleReadDAG(
                    435:        RF_Raid_t               *raidPtr,
                    436:        RF_AccessStripeMap_t    *asmap,
                    437:        RF_DagHeader_t          *dag_h,
                    438:        void                    *bp,
                    439:        RF_RaidAccessFlags_t     flags,
                    440:        RF_AllocListElem_t      *allocList
                    441: )
                    442: {
                    443:        rf_CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
                    444:            rf_DiskReadMirrorIdleFunc);
                    445: }
                    446:
                    447: void
                    448: rf_CreateMirrorPartitionReadDAG(
                    449:        RF_Raid_t               *raidPtr,
                    450:        RF_AccessStripeMap_t    *asmap,
                    451:        RF_DagHeader_t          *dag_h,
                    452:        void                    *bp,
                    453:        RF_RaidAccessFlags_t     flags,
                    454:        RF_AllocListElem_t      *allocList
                    455: )
                    456: {
                    457:        rf_CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
                    458:            rf_DiskReadMirrorPartitionFunc);
                    459: }

CVSweb