[BACK]Return to rf_dagffrd.c CVS log [TXT][DIR] Up to [local] / sys / dev / raidframe

Annotation of sys/dev/raidframe/rf_dagffrd.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: rf_dagffrd.c,v 1.4 2002/12/16 07:01:03 tdeval Exp $   */
        !             2: /*     $NetBSD: rf_dagffrd.c,v 1.4 2000/01/07 03:40:58 oster Exp $     */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1995 Carnegie-Mellon University.
        !             6:  * All rights reserved.
        !             7:  *
        !             8:  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
        !             9:  *
        !            10:  * Permission to use, copy, modify and distribute this software and
        !            11:  * its documentation is hereby granted, provided that both the copyright
        !            12:  * notice and this permission notice appear in all copies of the
        !            13:  * software, derivative works or modified versions, and any portions
        !            14:  * thereof, and that both notices appear in supporting documentation.
        !            15:  *
        !            16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
        !            17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
        !            18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
        !            19:  *
        !            20:  * Carnegie Mellon requests users of this software to return to
        !            21:  *
        !            22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
        !            23:  *  School of Computer Science
        !            24:  *  Carnegie Mellon University
        !            25:  *  Pittsburgh PA 15213-3890
        !            26:  *
        !            27:  * any improvements or extensions that they make and grant Carnegie the
        !            28:  * rights to redistribute these changes.
        !            29:  */
        !            30:
        !            31: /*
        !            32:  * rf_dagffrd.c
        !            33:  *
        !            34:  * Code for creating fault-free read DAGs.
        !            35:  *
        !            36:  */
        !            37:
        !            38: #include "rf_types.h"
        !            39: #include "rf_raid.h"
        !            40: #include "rf_dag.h"
        !            41: #include "rf_dagutils.h"
        !            42: #include "rf_dagfuncs.h"
        !            43: #include "rf_debugMem.h"
        !            44: #include "rf_memchunk.h"
        !            45: #include "rf_general.h"
        !            46: #include "rf_dagffrd.h"
        !            47:
        !            48: void rf_CreateMirrorReadDAG( RF_Raid_t *, RF_AccessStripeMap_t *,
        !            49:        RF_DagHeader_t *, void *, RF_RaidAccessFlags_t, RF_AllocListElem_t *,
        !            50:        int (*) (RF_DagNode_t *));
        !            51:
        !            52: /*****************************************************************************
        !            53:  *
        !            54:  * General comments on DAG creation:
        !            55:  *
        !            56:  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
        !            57:  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
        !            58:  * is reached, the execution engine will halt forward execution and work
        !            59:  * backward through the graph, executing the undo functions.  Assuming that
        !            60:  * each node in the graph prior to the Cmt node are undoable and atomic - or -
        !            61:  * does not make changes to permanent state, the graph will fail atomically.
        !            62:  * If an error occurs after the Cmt node executes, the engine will roll-forward
        !            63:  * through the graph, blindly executing nodes until it reaches the end.
        !            64:  * If a graph reaches the end, it is assumed to have completed successfully.
        !            65:  *
        !            66:  * A graph has only 1 Cmt node.
        !            67:  *
        !            68:  *****************************************************************************/
        !            69:
        !            70:
        !            71: /*****************************************************************************
        !            72:  *
        !            73:  * The following wrappers map the standard DAG creation interface to the
        !            74:  * DAG creation routines.  Additionally, these wrappers enable experimentation
        !            75:  * with new DAG structures by providing an extra level of indirection, allowing
        !            76:  * the DAG creation routines to be replaced at this single point.
        !            77:  *
        !            78:  *****************************************************************************/
        !            79:
        !            80: void
        !            81: rf_CreateFaultFreeReadDAG(
        !            82:        RF_Raid_t               *raidPtr,
        !            83:        RF_AccessStripeMap_t    *asmap,
        !            84:        RF_DagHeader_t          *dag_h,
        !            85:        void                    *bp,
        !            86:        RF_RaidAccessFlags_t     flags,
        !            87:        RF_AllocListElem_t      *allocList
        !            88: )
        !            89: {
        !            90:        rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
        !            91:            RF_IO_TYPE_READ);
        !            92: }
        !            93:
        !            94:
        !            95: /*****************************************************************************
        !            96:  *
        !            97:  * DAG creation code begins here.
        !            98:  *
        !            99:  *****************************************************************************/
        !           100:
        !           101: /*****************************************************************************
        !           102:  *
        !           103:  * Creates a DAG to perform a nonredundant read or write of data within one
        !           104:  * stripe.
        !           105:  * For reads, this DAG is as follows:
        !           106:  *
        !           107:  *                   /---- read ----\
        !           108:  *    Header -- Block ---- read ---- Commit -- Terminate
        !           109:  *                   \---- read ----/
        !           110:  *
        !           111:  * For writes, this DAG is as follows:
        !           112:  *
        !           113:  *                    /---- write ----\
        !           114:  *    Header -- Commit ---- write ---- Block -- Terminate
        !           115:  *                    \---- write ----/
        !           116:  *
        !           117:  * There is one disk node per stripe unit accessed, and all disk nodes are in
        !           118:  * parallel.
        !           119:  *
        !           120:  * Tricky point here:  The first disk node (read or write) is created
        !           121:  * normally.  Subsequent disk nodes are created by copying the first one,
        !           122:  * and modifying a few params.  The "succedents" and "antecedents" fields are
        !           123:  * _not_ re-created in each node, but rather left pointing to the same array
        !           124:  * that was malloc'd when the first node was created.  Thus, it's essential
        !           125:  * that when this DAG is freed, the succedents and antecedents fields be freed
        !           126:  * in ONLY ONE of the read nodes.  This does not apply to the "params" field
        !           127:  * because it is recreated for each READ node.
        !           128:  *
        !           129:  * Note that normal-priority accesses do not need to be tagged with their
        !           130:  * parity stripe ID, because they will never be promoted.  Hence, I've
        !           131:  * commented-out the code to do this, and marked it with UNNEEDED.
        !           132:  *
        !           133:  *****************************************************************************/
        !           134:
        !           135: void
        !           136: rf_CreateNonredundantDAG(
        !           137:        RF_Raid_t               *raidPtr,
        !           138:        RF_AccessStripeMap_t    *asmap,
        !           139:        RF_DagHeader_t          *dag_h,
        !           140:        void                    *bp,
        !           141:        RF_RaidAccessFlags_t     flags,
        !           142:        RF_AllocListElem_t      *allocList,
        !           143:        RF_IoType_t              type
        !           144: )
        !           145: {
        !           146:        RF_DagNode_t *nodes, *diskNodes, *blockNode, *commitNode, *termNode;
        !           147:        RF_PhysDiskAddr_t *pda = asmap->physInfo;
        !           148:        int (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
        !           149:        int i, n, totalNumNodes;
        !           150:        char *name;
        !           151:
        !           152:        n = asmap->numStripeUnitsAccessed;
        !           153:        dag_h->creator = "NonredundantDAG";
        !           154:
        !           155:        RF_ASSERT(RF_IO_IS_R_OR_W(type));
        !           156:        switch (type) {
        !           157:        case RF_IO_TYPE_READ:
        !           158:                doFunc = rf_DiskReadFunc;
        !           159:                undoFunc = rf_DiskReadUndoFunc;
        !           160:                name = "R  ";
        !           161:                if (rf_dagDebug)
        !           162:                        printf("[Creating non-redundant read DAG]\n");
        !           163:                break;
        !           164:        case RF_IO_TYPE_WRITE:
        !           165:                doFunc = rf_DiskWriteFunc;
        !           166:                undoFunc = rf_DiskWriteUndoFunc;
        !           167:                name = "W  ";
        !           168:                if (rf_dagDebug)
        !           169:                        printf("[Creating non-redundant write DAG]\n");
        !           170:                break;
        !           171:        default:
        !           172:                RF_PANIC();
        !           173:        }
        !           174:
        !           175:        /*
        !           176:         * For reads, the dag can not commit until the block node is reached.
        !           177:         * For writes, the dag commits immediately.
        !           178:         */
        !           179:        dag_h->numCommitNodes = 1;
        !           180:        dag_h->numCommits = 0;
        !           181:        dag_h->numSuccedents = 1;
        !           182:
        !           183:        /*
        !           184:         * Node count:
        !           185:         * 1 block node
        !           186:         * n data reads (or writes)
        !           187:         * 1 commit node
        !           188:         * 1 terminator node
        !           189:         */
        !           190:        RF_ASSERT(n > 0);
        !           191:        totalNumNodes = n + 3;
        !           192:        RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
        !           193:            (RF_DagNode_t *), allocList);
        !           194:        i = 0;
        !           195:        diskNodes = &nodes[i];
        !           196:        i += n;
        !           197:        blockNode = &nodes[i];
        !           198:        i += 1;
        !           199:        commitNode = &nodes[i];
        !           200:        i += 1;
        !           201:        termNode = &nodes[i];
        !           202:        i += 1;
        !           203:        RF_ASSERT(i == totalNumNodes);
        !           204:
        !           205:        /* Initialize nodes. */
        !           206:        switch (type) {
        !           207:        case RF_IO_TYPE_READ:
        !           208:                rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
        !           209:                    rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil",
        !           210:                    allocList);
        !           211:                rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
        !           212:                    rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt",
        !           213:                    allocList);
        !           214:                rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
        !           215:                    rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm",
        !           216:                    allocList);
        !           217:                break;
        !           218:        case RF_IO_TYPE_WRITE:
        !           219:                rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
        !           220:                    rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, dag_h, "Nil",
        !           221:                    allocList);
        !           222:                rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
        !           223:                    rf_NullNodeUndoFunc, NULL, n, 1, 0, 0, dag_h, "Cmt",
        !           224:                    allocList);
        !           225:                rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
        !           226:                    rf_TerminateUndoFunc, NULL, 0, n, 0, 0, dag_h, "Trm",
        !           227:                    allocList);
        !           228:                break;
        !           229:        default:
        !           230:                RF_PANIC();
        !           231:        }
        !           232:
        !           233:        for (i = 0; i < n; i++) {
        !           234:                RF_ASSERT(pda != NULL);
        !           235:                rf_InitNode(&diskNodes[i], rf_wait, RF_FALSE, doFunc, undoFunc,
        !           236:                    rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, allocList);
        !           237:                diskNodes[i].params[0].p = pda;
        !           238:                diskNodes[i].params[1].p = pda->bufPtr;
        !           239:                /* Parity stripe id is not necessary. */
        !           240:                diskNodes[i].params[2].v = 0;
        !           241:                diskNodes[i].params[3].v =
        !           242:                    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
        !           243:                pda = pda->next;
        !           244:        }
        !           245:
        !           246:        /*
        !           247:         * Connect nodes.
        !           248:         */
        !           249:
        !           250:        /* Connect hdr to block node. */
        !           251:        RF_ASSERT(blockNode->numAntecedents == 0);
        !           252:        dag_h->succedents[0] = blockNode;
        !           253:
        !           254:        if (type == RF_IO_TYPE_READ) {
        !           255:                /* Connecting a nonredundant read DAG. */
        !           256:                RF_ASSERT(blockNode->numSuccedents == n);
        !           257:                RF_ASSERT(commitNode->numAntecedents == n);
        !           258:                for (i = 0; i < n; i++) {
        !           259:                        /* Connect block node to each read node. */
        !           260:                        RF_ASSERT(diskNodes[i].numAntecedents == 1);
        !           261:                        blockNode->succedents[i] = &diskNodes[i];
        !           262:                        diskNodes[i].antecedents[0] = blockNode;
        !           263:                        diskNodes[i].antType[0] = rf_control;
        !           264:
        !           265:                        /* Connect each read node to the commit node. */
        !           266:                        RF_ASSERT(diskNodes[i].numSuccedents == 1);
        !           267:                        diskNodes[i].succedents[0] = commitNode;
        !           268:                        commitNode->antecedents[i] = &diskNodes[i];
        !           269:                        commitNode->antType[i] = rf_control;
        !           270:                }
        !           271:                /* Connect the commit node to the term node. */
        !           272:                RF_ASSERT(commitNode->numSuccedents == 1);
        !           273:                RF_ASSERT(termNode->numAntecedents == 1);
        !           274:                RF_ASSERT(termNode->numSuccedents == 0);
        !           275:                commitNode->succedents[0] = termNode;
        !           276:                termNode->antecedents[0] = commitNode;
        !           277:                termNode->antType[0] = rf_control;
        !           278:        } else {
        !           279:                /* Connecting a nonredundant write DAG. */
        !           280:                /* Connect the block node to the commit node. */
        !           281:                RF_ASSERT(blockNode->numSuccedents == 1);
        !           282:                RF_ASSERT(commitNode->numAntecedents == 1);
        !           283:                blockNode->succedents[0] = commitNode;
        !           284:                commitNode->antecedents[0] = blockNode;
        !           285:                commitNode->antType[0] = rf_control;
        !           286:
        !           287:                RF_ASSERT(commitNode->numSuccedents == n);
        !           288:                RF_ASSERT(termNode->numAntecedents == n);
        !           289:                RF_ASSERT(termNode->numSuccedents == 0);
        !           290:                for (i = 0; i < n; i++) {
        !           291:                        /* Connect the commit node to each write node. */
        !           292:                        RF_ASSERT(diskNodes[i].numAntecedents == 1);
        !           293:                        commitNode->succedents[i] = &diskNodes[i];
        !           294:                        diskNodes[i].antecedents[0] = commitNode;
        !           295:                        diskNodes[i].antType[0] = rf_control;
        !           296:
        !           297:                        /* Connect each write node to the term node. */
        !           298:                        RF_ASSERT(diskNodes[i].numSuccedents == 1);
        !           299:                        diskNodes[i].succedents[0] = termNode;
        !           300:                        termNode->antecedents[i] = &diskNodes[i];
        !           301:                        termNode->antType[i] = rf_control;
        !           302:                }
        !           303:        }
        !           304: }
        !           305: /*****************************************************************************
        !           306:  * Create a fault-free read DAG for RAID level 1.
        !           307:  *
        !           308:  * Hdr -> Nil -> Rmir -> Cmt -> Trm
        !           309:  *
        !           310:  * The "Rmir" node schedules a read from the disk in the mirror pair with the
        !           311:  * shortest disk queue.  The proper queue is selected at Rmir execution.  This
        !           312:  * deferred mapping is unlike other archs in RAIDframe which generally fix
        !           313:  * mapping at DAG creation time.
        !           314:  *
        !           315:  * Parameters:  raidPtr          - description of the physical array
        !           316:  *             asmap     - logical & physical addresses for this access
        !           317:  *             bp        - buffer ptr (for holding read data)
        !           318:  *             flags     - general flags (e.g. disk locking)
        !           319:  *             allocList - list of memory allocated in DAG creation
        !           320:  *****************************************************************************/
        !           321:
        !           322: void
        !           323: rf_CreateMirrorReadDAG(
        !           324:        RF_Raid_t                *raidPtr,
        !           325:        RF_AccessStripeMap_t     *asmap,
        !           326:        RF_DagHeader_t           *dag_h,
        !           327:        void                     *bp,
        !           328:        RF_RaidAccessFlags_t      flags,
        !           329:        RF_AllocListElem_t       *allocList,
        !           330:        int                     (*readfunc) (RF_DagNode_t *)
        !           331: )
        !           332: {
        !           333:        RF_DagNode_t *readNodes, *nodes, *blockNode, *commitNode, *termNode;
        !           334:        RF_PhysDiskAddr_t *data_pda = asmap->physInfo;
        !           335:        RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo;
        !           336:        int i, n, totalNumNodes;
        !           337:
        !           338:        n = asmap->numStripeUnitsAccessed;
        !           339:        dag_h->creator = "RaidOneReadDAG";
        !           340:        if (rf_dagDebug) {
        !           341:                printf("[Creating RAID level 1 read DAG]\n");
        !           342:        }
        !           343:        /*
        !           344:         * This dag can not commit until the commit node is reached.
        !           345:         * Errors prior to the commit point imply the dag has failed.
        !           346:         */
        !           347:        dag_h->numCommitNodes = 1;
        !           348:        dag_h->numCommits = 0;
        !           349:        dag_h->numSuccedents = 1;
        !           350:
        !           351:        /*
        !           352:         * Node count:
        !           353:         * n data reads
        !           354:         * 1 block node
        !           355:         * 1 commit node
        !           356:         * 1 terminator node
        !           357:         */
        !           358:        RF_ASSERT(n > 0);
        !           359:        totalNumNodes = n + 3;
        !           360:        RF_CallocAndAdd(nodes, totalNumNodes, sizeof(RF_DagNode_t),
        !           361:            (RF_DagNode_t *), allocList);
        !           362:        i = 0;
        !           363:        readNodes = &nodes[i];
        !           364:        i += n;
        !           365:        blockNode = &nodes[i];
        !           366:        i += 1;
        !           367:        commitNode = &nodes[i];
        !           368:        i += 1;
        !           369:        termNode = &nodes[i];
        !           370:        i += 1;
        !           371:        RF_ASSERT(i == totalNumNodes);
        !           372:
        !           373:        /* Initialize nodes. */
        !           374:        rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
        !           375:            rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
        !           376:        rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
        !           377:            rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
        !           378:        rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
        !           379:            rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
        !           380:
        !           381:        for (i = 0; i < n; i++) {
        !           382:                RF_ASSERT(data_pda != NULL);
        !           383:                RF_ASSERT(parity_pda != NULL);
        !           384:                rf_InitNode(&readNodes[i], rf_wait, RF_FALSE, readfunc,
        !           385:                    rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5,
        !           386:                    0, dag_h, "Rmir", allocList);
        !           387:                readNodes[i].params[0].p = data_pda;
        !           388:                readNodes[i].params[1].p = data_pda->bufPtr;
        !           389:                /* Parity stripe id is not necessary. */
        !           390:                readNodes[i].params[2].p = 0;
        !           391:                readNodes[i].params[3].v =
        !           392:                    RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
        !           393:                readNodes[i].params[4].p = parity_pda;
        !           394:                data_pda = data_pda->next;
        !           395:                parity_pda = parity_pda->next;
        !           396:        }
        !           397:
        !           398:        /*
        !           399:         * Connect nodes.
        !           400:         */
        !           401:
        !           402:        /* Connect hdr to block node. */
        !           403:        RF_ASSERT(blockNode->numAntecedents == 0);
        !           404:        dag_h->succedents[0] = blockNode;
        !           405:
        !           406:        /* Connect block node to read nodes. */
        !           407:        RF_ASSERT(blockNode->numSuccedents == n);
        !           408:        for (i = 0; i < n; i++) {
        !           409:                RF_ASSERT(readNodes[i].numAntecedents == 1);
        !           410:                blockNode->succedents[i] = &readNodes[i];
        !           411:                readNodes[i].antecedents[0] = blockNode;
        !           412:                readNodes[i].antType[0] = rf_control;
        !           413:        }
        !           414:
        !           415:        /* Connect read nodes to commit node. */
        !           416:        RF_ASSERT(commitNode->numAntecedents == n);
        !           417:        for (i = 0; i < n; i++) {
        !           418:                RF_ASSERT(readNodes[i].numSuccedents == 1);
        !           419:                readNodes[i].succedents[0] = commitNode;
        !           420:                commitNode->antecedents[i] = &readNodes[i];
        !           421:                commitNode->antType[i] = rf_control;
        !           422:        }
        !           423:
        !           424:        /* Connect commit node to term node. */
        !           425:        RF_ASSERT(commitNode->numSuccedents == 1);
        !           426:        RF_ASSERT(termNode->numAntecedents == 1);
        !           427:        RF_ASSERT(termNode->numSuccedents == 0);
        !           428:        commitNode->succedents[0] = termNode;
        !           429:        termNode->antecedents[0] = commitNode;
        !           430:        termNode->antType[0] = rf_control;
        !           431: }
        !           432:
        !           433: void
        !           434: rf_CreateMirrorIdleReadDAG(
        !           435:        RF_Raid_t               *raidPtr,
        !           436:        RF_AccessStripeMap_t    *asmap,
        !           437:        RF_DagHeader_t          *dag_h,
        !           438:        void                    *bp,
        !           439:        RF_RaidAccessFlags_t     flags,
        !           440:        RF_AllocListElem_t      *allocList
        !           441: )
        !           442: {
        !           443:        rf_CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
        !           444:            rf_DiskReadMirrorIdleFunc);
        !           445: }
        !           446:
        !           447: void
        !           448: rf_CreateMirrorPartitionReadDAG(
        !           449:        RF_Raid_t               *raidPtr,
        !           450:        RF_AccessStripeMap_t    *asmap,
        !           451:        RF_DagHeader_t          *dag_h,
        !           452:        void                    *bp,
        !           453:        RF_RaidAccessFlags_t     flags,
        !           454:        RF_AllocListElem_t      *allocList
        !           455: )
        !           456: {
        !           457:        rf_CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
        !           458:            rf_DiskReadMirrorPartitionFunc);
        !           459: }

CVSweb