sys/dev/raidframe/rf_reconstruct.c - annotate

Return to rf_reconstruct.c CVS log
Up to [local] / sys / dev / raidframe
Annotation of sys/dev/raidframe/rf_reconstruct.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: rf_reconstruct.c,v 1.16 2007/06/05 00:38:22 deraadt Exp $     */
        !             2: /*     $NetBSD: rf_reconstruct.c,v 1.26 2000/06/04 02:05:13 oster Exp $        */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1995 Carnegie-Mellon University.
        !             6:  * All rights reserved.
        !             7:  *
        !             8:  * Author: Mark Holland
        !             9:  *
        !            10:  * Permission to use, copy, modify and distribute this software and
        !            11:  * its documentation is hereby granted, provided that both the copyright
        !            12:  * notice and this permission notice appear in all copies of the
        !            13:  * software, derivative works or modified versions, and any portions
        !            14:  * thereof, and that both notices appear in supporting documentation.
        !            15:  *
        !            16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
        !            17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
        !            18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
        !            19:  *
        !            20:  * Carnegie Mellon requests users of this software to return to
        !            21:  *
        !            22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
        !            23:  *  School of Computer Science
        !            24:  *  Carnegie Mellon University
        !            25:  *  Pittsburgh PA 15213-3890
        !            26:  *
        !            27:  * any improvements or extensions that they make and grant Carnegie the
        !            28:  * rights to redistribute these changes.
        !            29:  */
        !            30:
        !            31: /**************************************************************
        !            32:  *
        !            33:  * rf_reconstruct.c -- Code to perform on-line reconstruction.
        !            34:  *
        !            35:  **************************************************************/
        !            36:
        !            37: #include "rf_types.h"
        !            38: #include <sys/time.h>
        !            39: #include <sys/buf.h>
        !            40: #include <sys/errno.h>
        !            41:
        !            42: #include <sys/types.h>
        !            43: #include <sys/param.h>
        !            44: #include <sys/systm.h>
        !            45: #include <sys/proc.h>
        !            46: #include <sys/ioctl.h>
        !            47: #include <sys/fcntl.h>
        !            48: #if    __NETBSD__
        !            49: #include <sys/vnode.h>
        !            50: #endif
        !            51:
        !            52: #include "rf_raid.h"
        !            53: #include "rf_reconutil.h"
        !            54: #include "rf_revent.h"
        !            55: #include "rf_reconbuffer.h"
        !            56: #include "rf_acctrace.h"
        !            57: #include "rf_etimer.h"
        !            58: #include "rf_dag.h"
        !            59: #include "rf_desc.h"
        !            60: #include "rf_general.h"
        !            61: #include "rf_freelist.h"
        !            62: #include "rf_debugprint.h"
        !            63: #include "rf_driver.h"
        !            64: #include "rf_utils.h"
        !            65: #include "rf_shutdown.h"
        !            66:
        !            67: #include "rf_kintf.h"
        !            68:
        !            69: /*
        !            70:  * Setting these to -1 causes them to be set to their default values if not set
        !            71:  * by debug options.
        !            72:  */
        !            73:
        !            74: #define        Dprintf(s)                                                      \
        !            75: do {                                                                   \
        !            76:        if (rf_reconDebug)                                              \
        !            77:                rf_debug_printf(s,                                      \
        !            78:                    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);    \
        !            79: } while (0)
        !            80: #define        Dprintf1(s,a)                                                   \
        !            81: do {                                                                   \
        !            82:        if (rf_reconDebug)                                              \
        !            83:                rf_debug_printf(s,                                      \
        !            84:                    (void *)((unsigned long)a),                         \
        !            85:                    NULL, NULL, NULL, NULL, NULL, NULL, NULL);          \
        !            86: } while (0)
        !            87: #define        Dprintf2(s,a,b)                                                 \
        !            88: do {                                                                   \
        !            89:        if (rf_reconDebug)                                              \
        !            90:                rf_debug_printf(s,                                      \
        !            91:                    (void *)((unsigned long)a),                         \
        !            92:                    (void *)((unsigned long)b),                         \
        !            93:                    NULL, NULL, NULL, NULL, NULL, NULL);                \
        !            94: } while (0)
        !            95: #define        Dprintf3(s,a,b,c)                                               \
        !            96: do {                                                                   \
        !            97:        if (rf_reconDebug)                                              \
        !            98:                rf_debug_printf(s,                                      \
        !            99:                    (void *)((unsigned long)a),                         \
        !           100:                    (void *)((unsigned long)b),                         \
        !           101:                    (void *)((unsigned long)c),                         \
        !           102:                    NULL, NULL, NULL, NULL, NULL);                      \
        !           103: } while (0)
        !           104: #define        Dprintf4(s,a,b,c,d)                                             \
        !           105: do {                                                                   \
        !           106:        if (rf_reconDebug)                                              \
        !           107:                rf_debug_printf(s,                                      \
        !           108:                    (void *)((unsigned long)a),                         \
        !           109:                    (void *)((unsigned long)b),                         \
        !           110:                    (void *)((unsigned long)c),                         \
        !           111:                    (void *)((unsigned long)d),                         \
        !           112:                    NULL, NULL, NULL, NULL);                            \
        !           113: } while (0)
        !           114: #define        Dprintf5(s,a,b,c,d,e)                                           \
        !           115: do {                                                                   \
        !           116:        if (rf_reconDebug)                                              \
        !           117:                rf_debug_printf(s,                                      \
        !           118:                    (void *)((unsigned long)a),                         \
        !           119:                    (void *)((unsigned long)b),                         \
        !           120:                    (void *)((unsigned long)c),                         \
        !           121:                    (void *)((unsigned long)d),                         \
        !           122:                    (void *)((unsigned long)e),                         \
        !           123:                    NULL, NULL, NULL);                                  \
        !           124: } while (0)
        !           125: #define        Dprintf6(s,a,b,c,d,e,f)                                         \
        !           126: do {                                                                   \
        !           127:        if (rf_reconDebug)                                              \
        !           128:                rf_debug_printf(s,                                      \
        !           129:                    (void *)((unsigned long)a),                         \
        !           130:                    (void *)((unsigned long)b),                         \
        !           131:                    (void *)((unsigned long)c),                         \
        !           132:                    (void *)((unsigned long)d),                         \
        !           133:                    (void *)((unsigned long)e),                         \
        !           134:                    (void *)((unsigned long)f),                         \
        !           135:                    NULL, NULL);                                        \
        !           136: } while (0)
        !           137: #define        Dprintf7(s,a,b,c,d,e,f,g)                                       \
        !           138: do {                                                                   \
        !           139:        if (rf_reconDebug)                                              \
        !           140:                rf_debug_printf(s,                                      \
        !           141:                    (void *)((unsigned long)a),                         \
        !           142:                    (void *)((unsigned long)b),                         \
        !           143:                    (void *)((unsigned long)c),                         \
        !           144:                    (void *)((unsigned long)d),                         \
        !           145:                    (void *)((unsigned long)e),                         \
        !           146:                    (void *)((unsigned long)f),                         \
        !           147:                    (void *)((unsigned long)g),                         \
        !           148:                    NULL);                                              \
        !           149: } while (0)
        !           150:
        !           151: #define        DDprintf1(s,a)                                                  \
        !           152: do {                                                                   \
        !           153:        if (rf_reconDebug)                                              \
        !           154:                rf_debug_printf(s,                                      \
        !           155:                    (void *)((unsigned long)a),                         \
        !           156:                    NULL, NULL, NULL, NULL, NULL, NULL, NULL);          \
        !           157: } while (0)
        !           158: #define        DDprintf2(s,a,b)                                                \
        !           159: do {                                                                   \
        !           160:        if (rf_reconDebug)                                              \
        !           161:                rf_debug_printf(s,                                      \
        !           162:                    (void *)((unsigned long)a),                         \
        !           163:                    (void *)((unsigned long)b),                         \
        !           164:                    NULL, NULL, NULL, NULL, NULL, NULL);                \
        !           165: } while (0)
        !           166:
        !           167: static RF_FreeList_t *rf_recond_freelist;
        !           168: #define        RF_MAX_FREE_RECOND      4
        !           169: #define        RF_RECOND_INC           1
        !           170:
        !           171: RF_RaidReconDesc_t *rf_AllocRaidReconDesc(RF_Raid_t *,
        !           172:        RF_RowCol_t, RF_RowCol_t, RF_RaidDisk_t *, int,
        !           173:        RF_RowCol_t, RF_RowCol_t);
        !           174: int  rf_ProcessReconEvent(RF_Raid_t *, RF_RowCol_t, RF_ReconEvent_t *);
        !           175: int  rf_IssueNextReadRequest(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t);
        !           176: int  rf_TryToRead(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t);
        !           177: int  rf_ComputePSDiskOffsets(RF_Raid_t *, RF_StripeNum_t,
        !           178:        RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t *, RF_SectorNum_t *,
        !           179:        RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *);
        !           180: int  rf_ReconReadDoneProc(void *, int);
        !           181: int  rf_ReconWriteDoneProc(void *, int);
        !           182: void rf_CheckForNewMinHeadSep(RF_Raid_t *, RF_RowCol_t, RF_HeadSepLimit_t);
        !           183: int  rf_CheckHeadSeparation(RF_Raid_t *, RF_PerDiskReconCtrl_t *,
        !           184:        RF_RowCol_t, RF_RowCol_t, RF_HeadSepLimit_t, RF_ReconUnitNum_t);
        !           185: void rf_ForceReconReadDoneProc(void *, int);
        !           186: void rf_ShutdownReconstruction(void *);
        !           187:
        !           188: /*
        !           189:  * These functions are inlined on gcc. If they are used more than
        !           190:  * once, it is strongly advised to un-line them.
        !           191:  */
        !           192: void rf_FreeReconDesc(RF_RaidReconDesc_t *);
        !           193: int  rf_IssueNextWriteRequest(RF_Raid_t *, RF_RowCol_t);
        !           194: int  rf_CheckForcedOrBlockedReconstruction(RF_Raid_t *,
        !           195:        RF_ReconParityStripeStatus_t *, RF_PerDiskReconCtrl_t *,
        !           196:        RF_RowCol_t, RF_RowCol_t, RF_StripeNum_t, RF_ReconUnitNum_t);
        !           197: void rf_SignalReconDone(RF_Raid_t *);
        !           198:
        !           199: struct RF_ReconDoneProc_s {
        !           200:        void                    (*proc) (RF_Raid_t *, void *);
        !           201:        void                     *arg;
        !           202:        RF_ReconDoneProc_t       *next;
        !           203: };
        !           204:
        !           205: static RF_FreeList_t *rf_rdp_freelist;
        !           206: #define        RF_MAX_FREE_RDP         4
        !           207: #define        RF_RDP_INC              1
        !           208:
        !           209: void
        !           210: rf_SignalReconDone(RF_Raid_t *raidPtr)
        !           211: {
        !           212:        RF_ReconDoneProc_t *p;
        !           213:
        !           214:        RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex);
        !           215:        for (p = raidPtr->recon_done_procs; p; p = p->next) {
        !           216:                p->proc(raidPtr, p->arg);
        !           217:        }
        !           218:        RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex);
        !           219: }
        !           220:
        !           221: int
        !           222: rf_RegisterReconDoneProc(RF_Raid_t *raidPtr, void (*proc) (RF_Raid_t *, void *),
        !           223:     void *arg, RF_ReconDoneProc_t **handlep)
        !           224: {
        !           225:        RF_ReconDoneProc_t *p;
        !           226:
        !           227:        RF_FREELIST_GET(rf_rdp_freelist, p, next, (RF_ReconDoneProc_t *));
        !           228:        if (p == NULL)
        !           229:                return (ENOMEM);
        !           230:        p->proc = proc;
        !           231:        p->arg = arg;
        !           232:        RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex);
        !           233:        p->next = raidPtr->recon_done_procs;
        !           234:        raidPtr->recon_done_procs = p;
        !           235:        RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex);
        !           236:        if (handlep)
        !           237:                *handlep = p;
        !           238:        return (0);
        !           239: }
        !           240:
        !           241: /*****************************************************************************
        !           242:  *
        !           243:  * Sets up the parameters that will be used by the reconstruction process.
        !           244:  * Currently there are none, except for those that the layout-specific
        !           245:  * configuration (e.g. rf_ConfigureDeclustered) routine sets up.
        !           246:  *
        !           247:  * In the kernel, we fire off the recon thread.
        !           248:  *
        !           249:  *****************************************************************************/
        !           250: void
        !           251: rf_ShutdownReconstruction(void *ignored)
        !           252: {
        !           253:        RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *));
        !           254:        RF_FREELIST_DESTROY(rf_rdp_freelist, next, (RF_ReconDoneProc_t *));
        !           255: }
        !           256:
        !           257: int
        !           258: rf_ConfigureReconstruction(RF_ShutdownList_t **listp)
        !           259: {
        !           260:        int rc;
        !           261:
        !           262:        RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND,
        !           263:            RF_RECOND_INC, sizeof(RF_RaidReconDesc_t));
        !           264:        if (rf_recond_freelist == NULL)
        !           265:                return (ENOMEM);
        !           266:        RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP,
        !           267:            RF_RDP_INC, sizeof(RF_ReconDoneProc_t));
        !           268:        if (rf_rdp_freelist == NULL) {
        !           269:                RF_FREELIST_DESTROY(rf_recond_freelist, next,
        !           270:                    (RF_RaidReconDesc_t *));
        !           271:                return (ENOMEM);
        !           272:        }
        !           273:        rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL);
        !           274:        if (rc) {
        !           275:                RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
        !           276:                    " rc=%d.\n", __FILE__, __LINE__, rc);
        !           277:                rf_ShutdownReconstruction(NULL);
        !           278:                return (rc);
        !           279:        }
        !           280:        return (0);
        !           281: }
        !           282:
        !           283: RF_RaidReconDesc_t *
        !           284: rf_AllocRaidReconDesc(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col,
        !           285:     RF_RaidDisk_t *spareDiskPtr, int numDisksDone, RF_RowCol_t srow,
        !           286:     RF_RowCol_t scol)
        !           287: {
        !           288:
        !           289:        RF_RaidReconDesc_t *reconDesc;
        !           290:
        !           291:        RF_FREELIST_GET(rf_recond_freelist, reconDesc, next,
        !           292:            (RF_RaidReconDesc_t *));
        !           293:
        !           294:        reconDesc->raidPtr = raidPtr;
        !           295:        reconDesc->row = row;
        !           296:        reconDesc->col = col;
        !           297:        reconDesc->spareDiskPtr = spareDiskPtr;
        !           298:        reconDesc->numDisksDone = numDisksDone;
        !           299:        reconDesc->srow = srow;
        !           300:        reconDesc->scol = scol;
        !           301:        reconDesc->state = 0;
        !           302:        reconDesc->next = NULL;
        !           303:
        !           304:        return (reconDesc);
        !           305: }
        !           306:
        !           307: void
        !           308: rf_FreeReconDesc(RF_RaidReconDesc_t *reconDesc)
        !           309: {
        !           310: #if    RF_RECON_STATS > 0
        !           311:        printf("RAIDframe: %qu recon event waits, %qu recon delays.\n",
        !           312:            reconDesc->numReconEventWaits, reconDesc->numReconExecDelays);
        !           313: #endif /* RF_RECON_STATS > 0 */
        !           314:
        !           315:        printf("RAIDframe: %qu max exec ticks.\n",
        !           316:            reconDesc->maxReconExecTicks);
        !           317:
        !           318: #if    (RF_RECON_STATS > 0) || defined(_KERNEL)
        !           319:        printf("\n");
        !           320: #endif /* (RF_RECON_STATS > 0) || _KERNEL */
        !           321:        RF_FREELIST_FREE(rf_recond_freelist, reconDesc, next);
        !           322: }
        !           323:
        !           324:
        !           325: /*****************************************************************************
        !           326:  *
        !           327:  * Primary routine to reconstruct a failed disk. This should be called from
        !           328:  * within its own thread. It won't return until reconstruction completes,
        !           329:  * fails, or is aborted.
        !           330:  *
        !           331:  *****************************************************************************/
        !           332: int
        !           333: rf_ReconstructFailedDisk(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
        !           334: {
        !           335:        RF_LayoutSW_t *lp;
        !           336:        int rc;
        !           337:
        !           338:        lp = raidPtr->Layout.map;
        !           339:        if (lp->SubmitReconBuffer) {
        !           340:                /*
        !           341:                 * The current infrastructure only supports reconstructing one
        !           342:                 * disk at a time for each array.
        !           343:                 */
        !           344:                RF_LOCK_MUTEX(raidPtr->mutex);
        !           345:                while (raidPtr->reconInProgress) {
        !           346:                        RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex);
        !           347:                }
        !           348:                raidPtr->reconInProgress++;
        !           349:                RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           350:                rc = rf_ReconstructFailedDiskBasic(raidPtr, row, col);
        !           351:                RF_LOCK_MUTEX(raidPtr->mutex);
        !           352:                raidPtr->reconInProgress--;
        !           353:                RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           354:        } else {
        !           355:                RF_ERRORMSG1("RECON: no way to reconstruct failed disk for"
        !           356:                    " arch %c.\n", lp->parityConfig);
        !           357:                rc = EIO;
        !           358:        }
        !           359:        RF_SIGNAL_COND(raidPtr->waitForReconCond);
        !           360:        wakeup(&raidPtr->waitForReconCond);     /*
        !           361:                                                 * XXX Methinks this will be
        !           362:                                                 * needed at some point... GO
        !           363:                                                 */
        !           364:        return (rc);
        !           365: }
        !           366:
        !           367: int
        !           368: rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t row,
        !           369:     RF_RowCol_t col)
        !           370: {
        !           371:        RF_ComponentLabel_t c_label;
        !           372:        RF_RaidDisk_t *spareDiskPtr = NULL;
        !           373:        RF_RaidReconDesc_t *reconDesc;
        !           374:        RF_RowCol_t srow, scol;
        !           375:        int numDisksDone = 0, rc;
        !           376:
        !           377:        /* First look for a spare drive onto which to reconstruct the data. */
        !           378:        /*
        !           379:         * Spare disk descriptors are stored in row 0. This may have to
        !           380:         * change eventually.
        !           381:         */
        !           382:
        !           383:        RF_LOCK_MUTEX(raidPtr->mutex);
        !           384:        RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed);
        !           385:
        !           386:        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
        !           387:                if (raidPtr->status[row] != rf_rs_degraded) {
        !           388:                        RF_ERRORMSG2("Unable to reconstruct disk at row %d"
        !           389:                            " col %d because status not degraded.\n", row, col);
        !           390:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           391:                        return (EINVAL);
        !           392:                }
        !           393:                srow = row;
        !           394:                scol = (-1);
        !           395:        } else {
        !           396:                srow = 0;
        !           397:                for (scol = raidPtr->numCol;
        !           398:                     scol < raidPtr->numCol + raidPtr->numSpare; scol++) {
        !           399:                        if (raidPtr->Disks[srow][scol].status == rf_ds_spare) {
        !           400:                                spareDiskPtr = &raidPtr->Disks[srow][scol];
        !           401:                                spareDiskPtr->status = rf_ds_used_spare;
        !           402:                                break;
        !           403:                        }
        !           404:                }
        !           405:                if (!spareDiskPtr) {
        !           406:                        RF_ERRORMSG2("Unable to reconstruct disk at row %d"
        !           407:                            " col %d because no spares are available.\n",
        !           408:                            row, col);
        !           409:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           410:                        return (ENOSPC);
        !           411:                }
        !           412:                printf("RECON: initiating reconstruction on row %d col %d"
        !           413:                    " -> spare at row %d col %d.\n", row, col, srow, scol);
        !           414:        }
        !           415:        RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           416:
        !           417:        reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col,
        !           418:            spareDiskPtr, numDisksDone, srow, scol);
        !           419:        raidPtr->reconDesc = (void *) reconDesc;
        !           420: #if    RF_RECON_STATS > 0
        !           421:        reconDesc->hsStallCount = 0;
        !           422:        reconDesc->numReconExecDelays = 0;
        !           423:        reconDesc->numReconEventWaits = 0;
        !           424: #endif /* RF_RECON_STATS > 0 */
        !           425:        reconDesc->reconExecTimerRunning = 0;
        !           426:        reconDesc->reconExecTicks = 0;
        !           427:        reconDesc->maxReconExecTicks = 0;
        !           428:        rc = rf_ContinueReconstructFailedDisk(reconDesc);
        !           429:
        !           430:        if (!rc) {
        !           431:                /* Fix up the component label. */
        !           432:                /* Don't actually need the read here... */
        !           433:                raidread_component_label(
        !           434:                    raidPtr->raid_cinfo[srow][scol].ci_dev,
        !           435:                    raidPtr->raid_cinfo[srow][scol].ci_vp,
        !           436:                    &c_label);
        !           437:
        !           438:                raid_init_component_label(raidPtr, &c_label);
        !           439:                c_label.row = row;
        !           440:                c_label.column = col;
        !           441:                c_label.clean = RF_RAID_DIRTY;
        !           442:                c_label.status = rf_ds_optimal;
        !           443:
        !           444:                /* XXXX MORE NEEDED HERE. */
        !           445:
        !           446:                raidwrite_component_label(
        !           447:                    raidPtr->raid_cinfo[srow][scol].ci_dev,
        !           448:                    raidPtr->raid_cinfo[srow][scol].ci_vp,
        !           449:                    &c_label);
        !           450:
        !           451:        }
        !           452:        return (rc);
        !           453: }
        !           454:
        !           455: /*
        !           456:  *
        !           457:  * Allow reconstructing a disk in-place -- i.e. component /dev/sd2e goes AWOL,
        !           458:  * and you don't get a spare until the next Monday. With this function
        !           459:  * (and hot-swappable drives) you can now put your new disk containing
        !           460:  * /dev/sd2e on the bus, scsictl it alive, and then use raidctl(8) to
        !           461:  * rebuild the data "on the spot".
        !           462:  *
        !           463:  */
        !           464:
        !           465: int
        !           466: rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
        !           467: {
        !           468:        RF_RaidDisk_t *spareDiskPtr = NULL;
        !           469:        RF_RaidReconDesc_t *reconDesc;
        !           470:        RF_LayoutSW_t *lp;
        !           471:        RF_RaidDisk_t *badDisk;
        !           472:        RF_ComponentLabel_t c_label;
        !           473:        int numDisksDone = 0, rc;
        !           474:        struct partinfo dpart;
        !           475:        struct vnode *vp;
        !           476:        struct vattr va;
        !           477:        struct proc *proc;
        !           478:        int retcode;
        !           479:        int ac;
        !           480:
        !           481:        lp = raidPtr->Layout.map;
        !           482:        if (lp->SubmitReconBuffer) {
        !           483:                /*
        !           484:                 * The current infrastructure only supports reconstructing one
        !           485:                 * disk at a time for each array.
        !           486:                 */
        !           487:                RF_LOCK_MUTEX(raidPtr->mutex);
        !           488:                if ((raidPtr->Disks[row][col].status == rf_ds_optimal) &&
        !           489:                    (raidPtr->numFailures > 0)) {
        !           490:                        /* XXX 0 above shouldn't be constant !!! */
        !           491:                        /*
        !           492:                         * Some component other than this has failed.
        !           493:                         * Let's not make things worse than they already
        !           494:                         * are...
        !           495:                         */
        !           496: #ifdef RAIDDEBUG
        !           497:                        printf("RAIDFRAME: Unable to reconstruct to disk at:\n"
        !           498:                            "      Row: %d Col: %d   Too many failures.\n",
        !           499:                            row, col);
        !           500: #endif /* RAIDDEBUG */
        !           501:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           502:                        return (EINVAL);
        !           503:                }
        !           504:                if (raidPtr->Disks[row][col].status == rf_ds_reconstructing) {
        !           505: #ifdef RAIDDEBUG
        !           506:                        printf("RAIDFRAME: Unable to reconstruct to disk at:\n"
        !           507:                            "      Row: %d Col: %d   Reconstruction already"
        !           508:                            " occurring !\n", row, col);
        !           509: #endif /* RAIDDEBUG */
        !           510:
        !           511:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           512:                        return (EINVAL);
        !           513:                }
        !           514:
        !           515:
        !           516:                if (raidPtr->Disks[row][col].status != rf_ds_failed) {
        !           517:                        /* "It's gone..." */
        !           518:                        raidPtr->numFailures++;
        !           519:                        raidPtr->Disks[row][col].status = rf_ds_failed;
        !           520:                        raidPtr->status[row] = rf_rs_degraded;
        !           521:                        rf_update_component_labels(raidPtr,
        !           522:                            RF_NORMAL_COMPONENT_UPDATE);
        !           523:                }
        !           524:
        !           525:                while (raidPtr->reconInProgress) {
        !           526:                        RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex);
        !           527:                }
        !           528:
        !           529:                raidPtr->reconInProgress++;
        !           530:
        !           531:                /*
        !           532:                 * First look for a spare drive onto which to reconstruct
        !           533:                 * the data. Spare disk descriptors are stored in row 0.
        !           534:                 * This may have to change eventually.
        !           535:                 */
        !           536:
        !           537:                /*
        !           538:                 * Actually, we don't care if it's failed or not...
        !           539:                 * On a RAID set with correct parity, this function
        !           540:                 * should be callable on any component without ill effects.
        !           541:                 */
        !           542:                /*
        !           543:                 * RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed);
        !           544:                 */
        !           545:
        !           546:                if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
        !           547:                        RF_ERRORMSG2("Unable to reconstruct to disk at row %d"
        !           548:                            " col %d: operation not supported for"
        !           549:                            " RF_DISTRIBUTE_SPARE.\n", row, col);
        !           550:
        !           551:                        raidPtr->reconInProgress--;
        !           552:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           553:                        return (EINVAL);
        !           554:                }
        !           555:
        !           556:                /*
        !           557:                 * XXX Need goop here to see if the disk is alive,
        !           558:                 * and, if not, make it so...
        !           559:                 */
        !           560:
        !           561:                badDisk = &raidPtr->Disks[row][col];
        !           562:
        !           563:                proc = raidPtr->recon_thread;
        !           564:
        !           565:                /*
        !           566:                 * This device may have been opened successfully the
        !           567:                 * first time. Close it before trying to open it again...
        !           568:                 */
        !           569:
        !           570:                if (raidPtr->raid_cinfo[row][col].ci_vp != NULL) {
        !           571:                        printf("Closing the opened device: %s\n",
        !           572:                            raidPtr->Disks[row][col].devname);
        !           573:                        vp = raidPtr->raid_cinfo[row][col].ci_vp;
        !           574:                        ac = raidPtr->Disks[row][col].auto_configured;
        !           575:                        rf_close_component(raidPtr, vp, ac);
        !           576:                        raidPtr->raid_cinfo[row][col].ci_vp = NULL;
        !           577:                }
        !           578:                /*
        !           579:                 * Note that this disk was *not* auto_configured (any longer).
        !           580:                 */
        !           581:                raidPtr->Disks[row][col].auto_configured = 0;
        !           582:
        !           583:                printf("About to (re-)open the device for rebuilding: %s\n",
        !           584:                    raidPtr->Disks[row][col].devname);
        !           585:
        !           586:                retcode = raidlookup(raidPtr->Disks[row][col].devname,
        !           587:                    proc, &vp);
        !           588:
        !           589:                if (retcode) {
        !           590:                        printf("raid%d: rebuilding: raidlookup on device: %s"
        !           591:                            " failed: %d !\n", raidPtr->raidid,
        !           592:                            raidPtr->Disks[row][col].devname, retcode);
        !           593:
        !           594:                        /*
        !           595:                         * XXX the component isn't responding properly...
        !           596:                         * Must still be dead :-(
        !           597:                         */
        !           598:                        raidPtr->reconInProgress--;
        !           599:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           600:                        return(retcode);
        !           601:
        !           602:                } else {
        !           603:
        !           604:                        /*
        !           605:                         * Ok, so we can at least do a lookup...
        !           606:                         * How about actually getting a vp for it ?
        !           607:                         */
        !           608:
        !           609:                        if ((retcode =
        !           610:                             VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
        !           611:                                raidPtr->reconInProgress--;
        !           612:                                RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           613:                                return(retcode);
        !           614:                        }
        !           615:                        retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
        !           616:                            FREAD, proc->p_ucred, proc);
        !           617:                        if (retcode) {
        !           618:                                raidPtr->reconInProgress--;
        !           619:                                RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           620:                                return(retcode);
        !           621:                        }
        !           622:                        raidPtr->Disks[row][col].blockSize =
        !           623:                            dpart.disklab->d_secsize;
        !           624:
        !           625:                        raidPtr->Disks[row][col].numBlocks =
        !           626:                            DL_GETPSIZE(dpart.part) - rf_protectedSectors;
        !           627:
        !           628:                        raidPtr->raid_cinfo[row][col].ci_vp = vp;
        !           629:                        raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
        !           630:
        !           631:                        raidPtr->Disks[row][col].dev = va.va_rdev;
        !           632:
        !           633:                        /*
        !           634:                         * We allow the user to specify that only a
        !           635:                         * fraction of the disks should be used this is
        !           636:                         * just for debug:  it speeds up the parity scan.
        !           637:                         */
        !           638:                        raidPtr->Disks[row][col].numBlocks =
        !           639:                            raidPtr->Disks[row][col].numBlocks *
        !           640:                            rf_sizePercentage / 100;
        !           641:                }
        !           642:
        !           643:                spareDiskPtr = &raidPtr->Disks[row][col];
        !           644:                spareDiskPtr->status = rf_ds_used_spare;
        !           645:
        !           646:                printf("RECON: Initiating in-place reconstruction on\n");
        !           647:                printf("       row %d col %d -> spare at row %d col %d.\n",
        !           648:                    row, col, row, col);
        !           649:
        !           650:                RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           651:
        !           652:                reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col,
        !           653:                    spareDiskPtr, numDisksDone, row, col);
        !           654:                raidPtr->reconDesc = (void *) reconDesc;
        !           655: #if    RF_RECON_STATS > 0
        !           656:                reconDesc->hsStallCount = 0;
        !           657:                reconDesc->numReconExecDelays = 0;
        !           658:                reconDesc->numReconEventWaits = 0;
        !           659: #endif /* RF_RECON_STATS > 0 */
        !           660:                reconDesc->reconExecTimerRunning = 0;
        !           661:                reconDesc->reconExecTicks = 0;
        !           662:                reconDesc->maxReconExecTicks = 0;
        !           663:                rc = rf_ContinueReconstructFailedDisk(reconDesc);
        !           664:
        !           665:                RF_LOCK_MUTEX(raidPtr->mutex);
        !           666:                raidPtr->reconInProgress--;
        !           667:                RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           668:
        !           669:        } else {
        !           670:                RF_ERRORMSG1("RECON: no way to reconstruct failed disk for"
        !           671:                    " arch %c.\n", lp->parityConfig);
        !           672:                rc = EIO;
        !           673:        }
        !           674:        RF_LOCK_MUTEX(raidPtr->mutex);
        !           675:
        !           676:        if (!rc) {
        !           677:                /*
        !           678:                 * Need to set these here, as at this point it'll be claiming
        !           679:                 * that the disk is in rf_ds_spared !  But we know better :-)
        !           680:                 */
        !           681:
        !           682:                raidPtr->Disks[row][col].status = rf_ds_optimal;
        !           683:                raidPtr->status[row] = rf_rs_optimal;
        !           684:
        !           685:                /* Fix up the component label. */
        !           686:                /* Don't actually need the read here... */
        !           687:                raidread_component_label(
        !           688:                    raidPtr->raid_cinfo[row][col].ci_dev,
        !           689:                    raidPtr->raid_cinfo[row][col].ci_vp,
        !           690:                    &c_label);
        !           691:
        !           692:                raid_init_component_label(raidPtr, &c_label);
        !           693:
        !           694:                c_label.row = row;
        !           695:                c_label.column = col;
        !           696:
        !           697:                raidwrite_component_label(raidPtr->raid_cinfo[row][col].ci_dev,
        !           698:                    raidPtr->raid_cinfo[row][col].ci_vp, &c_label);
        !           699:
        !           700:        }
        !           701:        RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           702:        RF_SIGNAL_COND(raidPtr->waitForReconCond);
        !           703:        wakeup(&raidPtr->waitForReconCond);
        !           704:        return (rc);
        !           705: }
        !           706:
        !           707:
        !           708: int
        !           709: rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
        !           710: {
        !           711:        RF_Raid_t *raidPtr = reconDesc->raidPtr;
        !           712:        RF_RowCol_t row = reconDesc->row;
        !           713:        RF_RowCol_t col = reconDesc->col;
        !           714:        RF_RowCol_t srow = reconDesc->srow;
        !           715:        RF_RowCol_t scol = reconDesc->scol;
        !           716:        RF_ReconMap_t *mapPtr;
        !           717:
        !           718:        RF_ReconEvent_t *event;
        !           719:        struct timeval etime, elpsd;
        !           720:        unsigned long xor_s, xor_resid_us;
        !           721:        int retcode, i, ds;
        !           722:
        !           723:        switch (reconDesc->state) {
        !           724:        case 0:
        !           725:                raidPtr->accumXorTimeUs = 0;
        !           726:
        !           727:                /* Create one trace record per physical disk. */
        !           728:                RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol *
        !           729:                    sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
        !           730:
        !           731:                /*
        !           732:                 * Quiesce the array prior to starting recon. This is needed
        !           733:                 * to assure no nasty interactions with pending user writes.
        !           734:                 * We need to do this before we change the disk or row status.
        !           735:                 */
        !           736:                reconDesc->state = 1;
        !           737:
        !           738:                Dprintf("RECON: begin request suspend.\n");
        !           739:                retcode = rf_SuspendNewRequestsAndWait(raidPtr);
        !           740:                Dprintf("RECON: end request suspend.\n");
        !           741:                rf_StartUserStats(raidPtr);     /*
        !           742:                                                 * Zero out the stats kept on
        !           743:                                                 * user accs.
        !           744:                                                 */
        !           745:                /* Fall through to state 1. */
        !           746:        case 1:
        !           747:                RF_LOCK_MUTEX(raidPtr->mutex);
        !           748:
        !           749:                /*
        !           750:                 * Create the reconstruction control pointer and install it in
        !           751:                 * the right slot.
        !           752:                 */
        !           753:                raidPtr->reconControl[row] =
        !           754:                    rf_MakeReconControl(reconDesc, row, col, srow, scol);
        !           755:                mapPtr = raidPtr->reconControl[row]->reconMap;
        !           756:                raidPtr->status[row] = rf_rs_reconstructing;
        !           757:                raidPtr->Disks[row][col].status = rf_ds_reconstructing;
        !           758:                raidPtr->Disks[row][col].spareRow = srow;
        !           759:                raidPtr->Disks[row][col].spareCol = scol;
        !           760:
        !           761:                RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           762:
        !           763:                RF_GETTIME(raidPtr->reconControl[row]->starttime);
        !           764:
        !           765:                /*
        !           766:                 * Now start up the actual reconstruction: issue a read for
        !           767:                 * each surviving disk.
        !           768:                 */
        !           769:
        !           770:                reconDesc->numDisksDone = 0;
        !           771:                for (i = 0; i < raidPtr->numCol; i++) {
        !           772:                        if (i != col) {
        !           773:                                /*
        !           774:                                 * Find and issue the next I/O on the
        !           775:                                 * indicated disk.
        !           776:                                 */
        !           777:                                if (rf_IssueNextReadRequest(raidPtr, row, i)) {
        !           778:                                        Dprintf2("RECON: done issuing for r%d"
        !           779:                                            " c%d.\n", row, i);
        !           780:                                        reconDesc->numDisksDone++;
        !           781:                                }
        !           782:                        }
        !           783:                }
        !           784:
        !           785:                reconDesc->state = 2;
        !           786:
        !           787:        case 2:
        !           788:                Dprintf("RECON: resume requests.\n");
        !           789:                rf_ResumeNewRequests(raidPtr);
        !           790:
        !           791:                reconDesc->state = 3;
        !           792:
        !           793:        case 3:
        !           794:
        !           795:                /*
        !           796:                 * Process reconstruction events until all disks report that
        !           797:                 * they've completed all work.
        !           798:                 */
        !           799:                mapPtr = raidPtr->reconControl[row]->reconMap;
        !           800:
        !           801:                while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
        !           802:
        !           803:                        event = rf_GetNextReconEvent(reconDesc, row,
        !           804:                           (void (*) (void *)) rf_ContinueReconstructFailedDisk,
        !           805:                            reconDesc);
        !           806:                        RF_ASSERT(event);
        !           807:
        !           808:                        if (rf_ProcessReconEvent(raidPtr, row, event))
        !           809:                                reconDesc->numDisksDone++;
        !           810:                        raidPtr->reconControl[row]->numRUsTotal =
        !           811:                                mapPtr->totalRUs;
        !           812:                        raidPtr->reconControl[row]->numRUsComplete =
        !           813:                                mapPtr->totalRUs -
        !           814:                                rf_UnitsLeftToReconstruct(mapPtr);
        !           815:
        !           816:                        raidPtr->reconControl[row]->percentComplete =
        !           817:                            (raidPtr->reconControl[row]->numRUsComplete * 100 /
        !           818:                             raidPtr->reconControl[row]->numRUsTotal);
        !           819:                        if (rf_prReconSched) {
        !           820:                                rf_PrintReconSchedule(
        !           821:                                    raidPtr->reconControl[row]->reconMap,
        !           822:                                    &(raidPtr->reconControl[row]->starttime));
        !           823:                        }
        !           824:                }
        !           825:
        !           826:                reconDesc->state = 4;
        !           827:
        !           828:        case 4:
        !           829:                mapPtr = raidPtr->reconControl[row]->reconMap;
        !           830:                if (rf_reconDebug) {
        !           831:                        printf("RECON: all reads completed.\n");
        !           832:                }
        !           833:                /*
        !           834:                 * At this point all the reads have completed. We now wait
        !           835:                 * for any pending writes to complete, and then we're done.
        !           836:                 */
        !           837:
        !           838:                while (rf_UnitsLeftToReconstruct(
        !           839:                    raidPtr->reconControl[row]->reconMap) > 0) {
        !           840:
        !           841:                        event = rf_GetNextReconEvent(reconDesc, row,
        !           842:                           (void (*) (void *)) rf_ContinueReconstructFailedDisk,
        !           843:                            reconDesc);
        !           844:                        RF_ASSERT(event);
        !           845:
        !           846:                        /* Ignore return code. */
        !           847:                        (void) rf_ProcessReconEvent(raidPtr, row, event);
        !           848:                        raidPtr->reconControl[row]->percentComplete =
        !           849:                            100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 /
        !           850:                            mapPtr->totalRUs);
        !           851:                        if (rf_prReconSched) {
        !           852:                                rf_PrintReconSchedule(
        !           853:                                    raidPtr->reconControl[row]->reconMap,
        !           854:                                    &(raidPtr->reconControl[row]->starttime));
        !           855:                        }
        !           856:                }
        !           857:                reconDesc->state = 5;
        !           858:
        !           859:        case 5:
        !           860:                /*
        !           861:                 * Success:  mark the dead disk as reconstructed. We quiesce
        !           862:                 * the array here to assure no nasty interactions with pending
        !           863:                 * user accesses, when we free up the psstatus structure as
        !           864:                 * part of FreeReconControl().
        !           865:                 */
        !           866:
        !           867:                reconDesc->state = 6;
        !           868:
        !           869:                retcode = rf_SuspendNewRequestsAndWait(raidPtr);
        !           870:                rf_StopUserStats(raidPtr);
        !           871:                rf_PrintUserStats(raidPtr);     /*
        !           872:                                                 * Print out the stats on user
        !           873:                                                 * accs accumulated during
        !           874:                                                 * recon.
        !           875:                                                 */
        !           876:
        !           877:                /* Fall through to state 6. */
        !           878:        case 6:
        !           879:                RF_LOCK_MUTEX(raidPtr->mutex);
        !           880:                raidPtr->numFailures--;
        !           881:                ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE);
        !           882:                raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared :
        !           883:                                                         rf_ds_spared;
        !           884:                raidPtr->status[row] = (ds) ? rf_rs_reconfigured :
        !           885:                                              rf_rs_optimal;
        !           886:                RF_UNLOCK_MUTEX(raidPtr->mutex);
        !           887:                RF_GETTIME(etime);
        !           888:                RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime),
        !           889:                    &etime, &elpsd);
        !           890:
        !           891:                /*
        !           892:                 * XXX -- Why is state 7 different from state 6 if there is no
        !           893:                 * return() here ? -- XXX Note that I set elpsd above & use it
        !           894:                 * below, so if you put a return here you'll have to fix this.
        !           895:                 * (also, FreeReconControl is called below).
        !           896:                 */
        !           897:
        !           898:        case 7:
        !           899:
        !           900:                rf_ResumeNewRequests(raidPtr);
        !           901:
        !           902:                printf("Reconstruction of disk at row %d col %d completed.\n",
        !           903:                    row, col);
        !           904:                xor_s = raidPtr->accumXorTimeUs / 1000000;
        !           905:                xor_resid_us = raidPtr->accumXorTimeUs % 1000000;
        !           906:                printf("Recon time was %d.%06d seconds, accumulated XOR time"
        !           907:                    " was %ld us (%ld.%06ld).\n", (int) elpsd.tv_sec,
        !           908:                    (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s,
        !           909:                    xor_resid_us);
        !           910:                printf("  (start time %d sec %d usec, end time %d sec %d"
        !           911:                    " usec)\n",
        !           912:                    (int) raidPtr->reconControl[row]->starttime.tv_sec,
        !           913:                    (int) raidPtr->reconControl[row]->starttime.tv_usec,
        !           914:                    (int) etime.tv_sec, (int) etime.tv_usec);
        !           915:
        !           916: #if    RF_RECON_STATS > 0
        !           917:                printf("Total head-sep stall count was %d.\n",
        !           918:                    (int) reconDesc->hsStallCount);
        !           919: #endif /* RF_RECON_STATS > 0 */
        !           920:                rf_FreeReconControl(raidPtr, row);
        !           921:                RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol *
        !           922:                    sizeof(RF_AccTraceEntry_t));
        !           923:                rf_FreeReconDesc(reconDesc);
        !           924:
        !           925:        }
        !           926:
        !           927:        rf_SignalReconDone(raidPtr);
        !           928:        return (0);
        !           929: }
        !           930:
        !           931:
        !           932: /*****************************************************************************
        !           933:  * Do the right thing upon each reconstruction event.
        !           934:  * Returns nonzero if and only if there is nothing left unread on the
        !           935:  * indicated disk.
        !           936:  *****************************************************************************/
        !           937: int
        !           938: rf_ProcessReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t frow,
        !           939:     RF_ReconEvent_t *event)
        !           940: {
        !           941:        int retcode = 0, submitblocked;
        !           942:        RF_ReconBuffer_t *rbuf;
        !           943:        RF_SectorCount_t sectorsPerRU;
        !           944:
        !           945:        Dprintf1("RECON: rf_ProcessReconEvent type %d.\n", event->type);
        !           946:
        !           947:        switch (event->type) {
        !           948:
        !           949:                /* A read I/O has completed. */
        !           950:        case RF_REVENT_READDONE:
        !           951:                rbuf = raidPtr->reconControl[frow]
        !           952:                    ->perDiskInfo[event->col].rbuf;
        !           953:                Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld.\n",
        !           954:                    frow, event->col, rbuf->parityStripeID);
        !           955:                Dprintf7("RECON: done read  psid %ld buf %lx  %02x %02x %02x"
        !           956:                    " %02x %02x.\n", rbuf->parityStripeID, rbuf->buffer,
        !           957:                    rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
        !           958:                    rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff,
        !           959:                    rbuf->buffer[4] & 0xff);
        !           960:                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
        !           961:                submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
        !           962:                Dprintf1("RECON: submitblocked=%d.\n", submitblocked);
        !           963:                if (!submitblocked)
        !           964:                        retcode = rf_IssueNextReadRequest(raidPtr, frow,
        !           965:                            event->col);
        !           966:                break;
        !           967:
        !           968:                /* A write I/O has completed. */
        !           969:        case RF_REVENT_WRITEDONE:
        !           970:                if (rf_floatingRbufDebug) {
        !           971:                        rf_CheckFloatingRbufCount(raidPtr, 1);
        !           972:                }
        !           973:                sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit *
        !           974:                    raidPtr->Layout.SUsPerRU;
        !           975:                rbuf = (RF_ReconBuffer_t *) event->arg;
        !           976:                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
        !           977:                Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d"
        !           978:                    " (%d %% complete).\n",
        !           979:                    rbuf->parityStripeID, rbuf->which_ru,
        !           980:                    raidPtr->reconControl[frow]->percentComplete);
        !           981:                rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]
        !           982:                    ->reconMap, rbuf->failedDiskSectorOffset,
        !           983:                    rbuf->failedDiskSectorOffset + sectorsPerRU - 1);
        !           984:                rf_RemoveFromActiveReconTable(raidPtr, frow,
        !           985:                    rbuf->parityStripeID, rbuf->which_ru);
        !           986:
        !           987:                if (rbuf->type == RF_RBUF_TYPE_FLOATING) {
        !           988:                        RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
        !           989:                        raidPtr->numFullReconBuffers--;
        !           990:                        rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf);
        !           991:                        RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
        !           992:                } else
        !           993:                        if (rbuf->type == RF_RBUF_TYPE_FORCED)
        !           994:                                rf_FreeReconBuffer(rbuf);
        !           995:                        else
        !           996:                                RF_ASSERT(0);
        !           997:                break;
        !           998:
        !           999:                /* A buffer-stall condition has been cleared. */
        !          1000:        case RF_REVENT_BUFCLEAR:
        !          1001:                Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d.\n", frow,
        !          1002:                    event->col);
        !          1003:                submitblocked = rf_SubmitReconBuffer(raidPtr
        !          1004:                    ->reconControl[frow]->perDiskInfo[event->col].rbuf, 0,
        !          1005:                    (int) (long) event->arg);
        !          1006:                RF_ASSERT(!submitblocked);      /*
        !          1007:                                                 * We wouldn't have gotten the
        !          1008:                                                 * BUFCLEAR event if we
        !          1009:                                                 * couldn't submit.
        !          1010:                                                 */
        !          1011:                retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col);
        !          1012:                break;
        !          1013:
        !          1014:                /* A user-write reconstruction blockage has been cleared. */
        !          1015:        case RF_REVENT_BLOCKCLEAR:
        !          1016:                DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d.\n",
        !          1017:                    frow, event->col);
        !          1018:                retcode = rf_TryToRead(raidPtr, frow, event->col);
        !          1019:                break;
        !          1020:
        !          1021:                /*
        !          1022:                 * A max-head-separation reconstruction blockage has been
        !          1023:                 * cleared.
        !          1024:                 */
        !          1025:        case RF_REVENT_HEADSEPCLEAR:
        !          1026:                Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d.\n",
        !          1027:                    frow, event->col);
        !          1028:                retcode = rf_TryToRead(raidPtr, frow, event->col);
        !          1029:                break;
        !          1030:
        !          1031:                /* A buffer has become ready to write. */
        !          1032:        case RF_REVENT_BUFREADY:
        !          1033:                Dprintf2("RECON: BUFREADY EVENT: row %d col %d.\n",
        !          1034:                    frow, event->col);
        !          1035:                retcode = rf_IssueNextWriteRequest(raidPtr, frow);
        !          1036:                if (rf_floatingRbufDebug) {
        !          1037:                        rf_CheckFloatingRbufCount(raidPtr, 1);
        !          1038:                }
        !          1039:                break;
        !          1040:
        !          1041:                /*
        !          1042:                 * We need to skip the current RU entirely because it got
        !          1043:                 * recon'd while we were waiting for something else to happen.
        !          1044:                 */
        !          1045:        case RF_REVENT_SKIP:
        !          1046:                DDprintf2("RECON: SKIP EVENT: row %d col %d.\n",
        !          1047:                    frow, event->col);
        !          1048:                retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col);
        !          1049:                break;
        !          1050:
        !          1051:                /*
        !          1052:                 * A forced-reconstruction read access has completed. Just
        !          1053:                 * submit the buffer.
        !          1054:                 */
        !          1055:        case RF_REVENT_FORCEDREADDONE:
        !          1056:                rbuf = (RF_ReconBuffer_t *) event->arg;
        !          1057:                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
        !          1058:                DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d.\n",
        !          1059:                    frow, event->col);
        !          1060:                submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
        !          1061:                RF_ASSERT(!submitblocked);
        !          1062:                break;
        !          1063:
        !          1064:        default:
        !          1065:                RF_PANIC();
        !          1066:        }
        !          1067:        rf_FreeReconEventDesc(event);
        !          1068:        return (retcode);
        !          1069: }
        !          1070:
        !          1071: /*****************************************************************************
        !          1072:  *
        !          1073:  * Find the next thing that's needed on the indicated disk, and issue
        !          1074:  * a read request for it. We assume that the reconstruction buffer
        !          1075:  * associated with this process is free to receive the data. If
        !          1076:  * reconstruction is blocked on the indicated RU, we issue a
        !          1077:  * blockage-release request instead of a physical disk read request.
        !          1078:  * If the current disk gets too far ahead of the others, we issue a
        !          1079:  * head-separation wait request and return.
        !          1080:  *
        !          1081:  * ctrl->{ru_count, curPSID, diskOffset} and
        !          1082:  * rbuf->failedDiskSectorOffset are maintained to point to the unit
        !          1083:  * we're currently accessing. Note that this deviates from the
        !          1084:  * standard C idiom of having counters point to the next thing to be
        !          1085:  * accessed. This allows us to easily retry when we're blocked by
        !          1086:  * head separation or reconstruction-blockage events.
        !          1087:  *
        !          1088:  * Returns nonzero if and only if there is nothing left unread on the
        !          1089:  * indicated disk.
        !          1090:  *
        !          1091:  *****************************************************************************/
        !          1092: int
        !          1093: rf_IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
        !          1094: {
        !          1095:        RF_PerDiskReconCtrl_t *ctrl =
        !          1096:            &raidPtr->reconControl[row]->perDiskInfo[col];
        !          1097:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        !          1098:        RF_ReconBuffer_t *rbuf = ctrl->rbuf;
        !          1099:        RF_ReconUnitCount_t RUsPerPU =
        !          1100:            layoutPtr->SUsPerPU / layoutPtr->SUsPerRU;
        !          1101:        RF_SectorCount_t sectorsPerRU =
        !          1102:            layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
        !          1103:        int do_new_check = 0, retcode = 0, status;
        !          1104:
        !          1105:        /*
        !          1106:         * If we are currently the slowest disk, mark that we have to do a new
        !          1107:         * check.
        !          1108:         */
        !          1109:        if (ctrl->headSepCounter <=
        !          1110:            raidPtr->reconControl[row]->minHeadSepCounter)
        !          1111:                do_new_check = 1;
        !          1112:
        !          1113:        while (1) {
        !          1114:
        !          1115:                ctrl->ru_count++;
        !          1116:                if (ctrl->ru_count < RUsPerPU) {
        !          1117:                        ctrl->diskOffset += sectorsPerRU;
        !          1118:                        rbuf->failedDiskSectorOffset += sectorsPerRU;
        !          1119:                } else {
        !          1120:                        ctrl->curPSID++;
        !          1121:                        ctrl->ru_count = 0;
        !          1122:                        /* code left over from when head-sep was based on
        !          1123:                         * parity stripe id */
        !          1124:                        if (ctrl->curPSID >=
        !          1125:                            raidPtr->reconControl[row]->lastPSID) {
        !          1126:                                rf_CheckForNewMinHeadSep(raidPtr, row,
        !          1127:                                    ++(ctrl->headSepCounter));
        !          1128:                                return (1);     /* Finito ! */
        !          1129:                        }
        !          1130:                        /*
        !          1131:                         * Find the disk offsets of the start of the parity
        !          1132:                         * stripe on both the current disk and the failed
        !          1133:                         * disk. Skip this entire parity stripe if either disk
        !          1134:                         * does not appear in the indicated PS.
        !          1135:                         */
        !          1136:                        status = rf_ComputePSDiskOffsets(raidPtr,
        !          1137:                            ctrl->curPSID, row, col, &ctrl->diskOffset,
        !          1138:                            &rbuf->failedDiskSectorOffset, &rbuf->spRow,
        !          1139:                            &rbuf->spCol, &rbuf->spOffset);
        !          1140:                        if (status) {
        !          1141:                                ctrl->ru_count = RUsPerPU - 1;
        !          1142:                                continue;
        !          1143:                        }
        !          1144:                }
        !          1145:                rbuf->which_ru = ctrl->ru_count;
        !          1146:
        !          1147:                /* Skip this RU if it's already been reconstructed. */
        !          1148:                if (rf_CheckRUReconstructed(raidPtr->reconControl[row]
        !          1149:                    ->reconMap, rbuf->failedDiskSectorOffset)) {
        !          1150:                        Dprintf2("Skipping psid %ld ru %d: already"
        !          1151:                            " reconstructed.\n", ctrl->curPSID, ctrl->ru_count);
        !          1152:                        continue;
        !          1153:                }
        !          1154:                break;
        !          1155:        }
        !          1156:        ctrl->headSepCounter++;
        !          1157:        if (do_new_check)       /* Update min if needed. */
        !          1158:                rf_CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter);
        !          1159:
        !          1160:
        !          1161:        /*
        !          1162:         * At this point, we have definitely decided what to do, and we have
        !          1163:         * only to see if we can actually do it now.
        !          1164:         */
        !          1165:        rbuf->parityStripeID = ctrl->curPSID;
        !          1166:        rbuf->which_ru = ctrl->ru_count;
        !          1167:        bzero((char *) &raidPtr->recon_tracerecs[col],
        !          1168:            sizeof(raidPtr->recon_tracerecs[col]));
        !          1169:        raidPtr->recon_tracerecs[col].reconacc = 1;
        !          1170:        RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
        !          1171:        retcode = rf_TryToRead(raidPtr, row, col);
        !          1172:        return (retcode);
        !          1173: }
        !          1174:
        !          1175: /*
        !          1176:  * Tries to issue the next read on the indicated disk. We may be
        !          1177:  * blocked by (a) the heads being too far apart, or (b) recon on the
        !          1178:  * indicated RU being blocked due to a write by a user thread. In
        !          1179:  * this case, we issue a head-sep or blockage wait request, which will
        !          1180:  * cause this same routine to be invoked again later when the blockage
        !          1181:  * has cleared.
        !          1182:  */
        !          1183:
        !          1184: int
        !          1185: rf_TryToRead(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
        !          1186: {
        !          1187:        RF_PerDiskReconCtrl_t *ctrl =
        !          1188:            &raidPtr->reconControl[row]->perDiskInfo[col];
        !          1189:        RF_SectorCount_t sectorsPerRU =
        !          1190:            raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;
        !          1191:        RF_StripeNum_t psid = ctrl->curPSID;
        !          1192:        RF_ReconUnitNum_t which_ru = ctrl->ru_count;
        !          1193:        RF_DiskQueueData_t *req;
        !          1194:        int status, created = 0;
        !          1195:        RF_ReconParityStripeStatus_t *pssPtr;
        !          1196:
        !          1197:        /*
        !          1198:         * If the current disk is too far ahead of the others, issue a
        !          1199:         * head-separation wait and return.
        !          1200:         */
        !          1201:        if (rf_CheckHeadSeparation(raidPtr, ctrl, row, col,
        !          1202:            ctrl->headSepCounter, which_ru))
        !          1203:                return (0);
        !          1204:        RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
        !          1205:        pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]
        !          1206:            ->pssTable, psid, which_ru, RF_PSS_CREATE, &created);
        !          1207:
        !          1208:        /*
        !          1209:         * If recon is blocked on the indicated parity stripe, issue a
        !          1210:         * block-wait request and return. This also must mark the indicated RU
        !          1211:         * in the stripe as under reconstruction if not blocked.
        !          1212:         */
        !          1213:        status = rf_CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl,
        !          1214:            row, col, psid, which_ru);
        !          1215:        if (status == RF_PSS_RECON_BLOCKED) {
        !          1216:                Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked.\n",
        !          1217:                    psid, which_ru);
        !          1218:                goto out;
        !          1219:        } else
        !          1220:                if (status == RF_PSS_FORCED_ON_WRITE) {
        !          1221:                        rf_CauseReconEvent(raidPtr, row, col, NULL,
        !          1222:                            RF_REVENT_SKIP);
        !          1223:                        goto out;
        !          1224:                }
        !          1225:        /*
        !          1226:         * Make one last check to be sure that the indicated RU didn't get
        !          1227:         * reconstructed while we were waiting for something else to happen.
        !          1228:         * This is unfortunate in that it causes us to make this check twice
        !          1229:         * in the normal case. Might want to make some attempt to re-work
        !          1230:         * this so that we only do this check if we've definitely blocked on
        !          1231:         * one of the above checks. When this condition is detected, we may
        !          1232:         * have just created a bogus status entry, which we need to delete.
        !          1233:         */
        !          1234:        if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap,
        !          1235:            ctrl->rbuf->failedDiskSectorOffset)) {
        !          1236:                Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after"
        !          1237:                    " stall.\n", psid, which_ru);
        !          1238:                if (created)
        !          1239:                        rf_PSStatusDelete(raidPtr,
        !          1240:                            raidPtr->reconControl[row]->pssTable, pssPtr);
        !          1241:                rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP);
        !          1242:                goto out;
        !          1243:        }
        !          1244:        /* Found something to read. Issue the I/O. */
        !          1245:        Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld"
        !          1246:            " buf %lx.\n", psid, row, col, ctrl->diskOffset,
        !          1247:            ctrl->rbuf->buffer);
        !          1248:        RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer);
        !          1249:        RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer);
        !          1250:        raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us =
        !          1251:            RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer);
        !          1252:        RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
        !          1253:
        !          1254:        /*
        !          1255:         * Should be ok to use a NULL proc pointer here, all the bufs we use
        !          1256:         * should be in kernel space.
        !          1257:         */
        !          1258:        req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset,
        !          1259:            sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru,
        !          1260:            rf_ReconReadDoneProc, (void *) ctrl, NULL,
        !          1261:            &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL);
        !          1262:
        !          1263:        RF_ASSERT(req);         /* XXX -- Fix this. -- XXX */
        !          1264:
        !          1265:        ctrl->rbuf->arg = (void *) req;
        !          1266:        rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY);
        !          1267:        pssPtr->issued[col] = 1;
        !          1268:
        !          1269: out:
        !          1270:        RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
        !          1271:        return (0);
        !          1272: }
        !          1273:
        !          1274:
        !          1275: /*
        !          1276:  * Given a parity stripe ID, we want to find out whether both the
        !          1277:  * current disk and the failed disk exist in that parity stripe. If
        !          1278:  * not, we want to skip this whole PS. If so, we want to find the
        !          1279:  * disk offset of the start of the PS on both the current disk and the
        !          1280:  * failed disk.
        !          1281:  *
        !          1282:  * This works by getting a list of disks comprising the indicated
        !          1283:  * parity stripe, and searching the list for the current and failed
        !          1284:  * disks. Once we've decided they both exist in the parity stripe, we
        !          1285:  * need to decide whether each is data or parity, so that we'll know
        !          1286:  * which mapping function to call to get the corresponding disk
        !          1287:  * offsets.
        !          1288:  *
        !          1289:  * This is kind of unpleasant, but doing it this way allows the
        !          1290:  * reconstruction code to use parity stripe IDs rather than physical
        !          1291:  * disks address to march through the failed disk, which greatly
        !          1292:  * simplifies a lot of code, as well as eliminating the need for a
        !          1293:  * reverse-mapping function. I also think it will execute faster,
        !          1294:  * since the calls to the mapping module are kept to a minimum.
        !          1295:  *
        !          1296:  * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING
        !          1297:  * THE STRIPE IN THE CORRECT ORDER.
        !          1298:  */
        !          1299:
        !          1300: int
        !          1301: rf_ComputePSDiskOffsets(
        !          1302:     RF_Raid_t          *raidPtr,       /* RAID descriptor. */
        !          1303:     RF_StripeNum_t      psid,          /* Parity stripe identifier. */
        !          1304:     RF_RowCol_t                 row,           /*
        !          1305:                                         * Row and column of disk to find
        !          1306:                                         * the offsets for.
        !          1307:                                         */
        !          1308:     RF_RowCol_t                 col,
        !          1309:     RF_SectorNum_t     *outDiskOffset,
        !          1310:     RF_SectorNum_t     *outFailedDiskSectorOffset,
        !          1311:     RF_RowCol_t                *spRow,         /*
        !          1312:                                         * OUT: Row,col of spare unit for
        !          1313:                                         * failed unit.
        !          1314:                                         */
        !          1315:     RF_RowCol_t                *spCol,
        !          1316:     RF_SectorNum_t     *spOffset       /*
        !          1317:                                         * OUT: Offset into disk containing
        !          1318:                                         * spare unit.
        !          1319:                                         */
        !          1320: )
        !          1321: {
        !          1322:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        !          1323:        RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol;
        !          1324:        RF_RaidAddr_t sosRaidAddress;   /* start-of-stripe */
        !          1325:        RF_RowCol_t *diskids;
        !          1326:        u_int i, j, k, i_offset, j_offset;
        !          1327:        RF_RowCol_t prow, pcol;
        !          1328:        int testcol, testrow;
        !          1329:        RF_RowCol_t stripe;
        !          1330:        RF_SectorNum_t poffset;
        !          1331:        char i_is_parity = 0, j_is_parity = 0;
        !          1332:        RF_RowCol_t stripeWidth =
        !          1333:            layoutPtr->numDataCol + layoutPtr->numParityCol;
        !          1334:
        !          1335:        /* Get a listing of the disks comprising that stripe. */
        !          1336:        sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid);
        !          1337:        (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids,
        !          1338:            &stripe);
        !          1339:        RF_ASSERT(diskids);
        !          1340:
        !          1341:        /*
        !          1342:         * Reject this entire parity stripe if it does not contain the
        !          1343:         * indicated disk or it does not contain the failed disk.
        !          1344:         */
        !          1345:        if (row != stripe)
        !          1346:                goto skipit;
        !          1347:        for (i = 0; i < stripeWidth; i++) {
        !          1348:                if (col == diskids[i])
        !          1349:                        break;
        !          1350:        }
        !          1351:        if (i == stripeWidth)
        !          1352:                goto skipit;
        !          1353:        for (j = 0; j < stripeWidth; j++) {
        !          1354:                if (fcol == diskids[j])
        !          1355:                        break;
        !          1356:        }
        !          1357:        if (j == stripeWidth) {
        !          1358:                goto skipit;
        !          1359:        }
        !          1360:        /* Find out which disk the parity is on. */
        !          1361:        (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol,
        !          1362:            &poffset, RF_DONT_REMAP);
        !          1363:
        !          1364:        /* Find out if either the current RU or the failed RU is parity. */
        !          1365:        /*
        !          1366:         * Also, if the parity occurs in this stripe prior to the data and/or
        !          1367:         * failed col, we need to decrement i and/or j.
        !          1368:         */
        !          1369:        for (k = 0; k < stripeWidth; k++)
        !          1370:                if (diskids[k] == pcol)
        !          1371:                        break;
        !          1372:        RF_ASSERT(k < stripeWidth);
        !          1373:        i_offset = i;
        !          1374:        j_offset = j;
        !          1375:        if (k < i)
        !          1376:                i_offset--;
        !          1377:        else
        !          1378:                if (k == i) {
        !          1379:                        i_is_parity = 1;
        !          1380:                        i_offset = 0;
        !          1381:                }               /*
        !          1382:                                 * Set offsets to zero to disable multiply
        !          1383:                                 * below.
        !          1384:                                 */
        !          1385:        if (k < j)
        !          1386:                j_offset--;
        !          1387:        else
        !          1388:                if (k == j) {
        !          1389:                        j_is_parity = 1;
        !          1390:                        j_offset = 0;
        !          1391:                }
        !          1392:        /*
        !          1393:         * At this point, [ij]_is_parity tells us whether the [current,failed]
        !          1394:         * disk is parity at the start of this RU, and, if data, "[ij]_offset"
        !          1395:         * tells us how far into the stripe the [current,failed] disk is.
        !          1396:         */
        !          1397:
        !          1398:        /*
        !          1399:         * Call the mapping routine to get the offset into the current disk,
        !          1400:         * repeat for failed disk.
        !          1401:         */
        !          1402:        if (i_is_parity)
        !          1403:                layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset *
        !          1404:                    layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
        !          1405:                    outDiskOffset, RF_DONT_REMAP);
        !          1406:        else
        !          1407:                layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset *
        !          1408:                    layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
        !          1409:                    outDiskOffset, RF_DONT_REMAP);
        !          1410:
        !          1411:        RF_ASSERT(row == testrow && col == testcol);
        !          1412:
        !          1413:        if (j_is_parity)
        !          1414:                layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset *
        !          1415:                    layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
        !          1416:                    outFailedDiskSectorOffset, RF_DONT_REMAP);
        !          1417:        else
        !          1418:                layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset *
        !          1419:                    layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
        !          1420:                    outFailedDiskSectorOffset, RF_DONT_REMAP);
        !          1421:        RF_ASSERT(row == testrow && fcol == testcol);
        !          1422:
        !          1423:        /* Now locate the spare unit for the failed unit. */
        !          1424:        if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
        !          1425:                if (j_is_parity)
        !          1426:                        layoutPtr->map->MapParity(raidPtr, sosRaidAddress +
        !          1427:                            j_offset * layoutPtr->sectorsPerStripeUnit, spRow,
        !          1428:                            spCol, spOffset, RF_REMAP);
        !          1429:                else
        !          1430:                        layoutPtr->map->MapSector(raidPtr, sosRaidAddress +
        !          1431:                            j_offset * layoutPtr->sectorsPerStripeUnit, spRow,
        !          1432:                            spCol, spOffset, RF_REMAP);
        !          1433:        } else {
        !          1434:                *spRow = raidPtr->reconControl[row]->spareRow;
        !          1435:                *spCol = raidPtr->reconControl[row]->spareCol;
        !          1436:                *spOffset = *outFailedDiskSectorOffset;
        !          1437:        }
        !          1438:
        !          1439:        return (0);
        !          1440:
        !          1441: skipit:
        !          1442:        Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d.\n",
        !          1443:            psid, row, col);
        !          1444:        return (1);
        !          1445: }
        !          1446:
        !          1447:
        !          1448: /*
        !          1449:  * This is called when a buffer has become ready to write to the replacement
        !          1450:  * disk.
        !          1451:  */
        !          1452: int
        !          1453: rf_IssueNextWriteRequest(RF_Raid_t *raidPtr, RF_RowCol_t row)
        !          1454: {
        !          1455:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        !          1456:        RF_SectorCount_t sectorsPerRU =
        !          1457:            layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
        !          1458:        RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol;
        !          1459:        RF_ReconBuffer_t *rbuf;
        !          1460:        RF_DiskQueueData_t *req;
        !          1461:
        !          1462:        rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]);
        !          1463:        RF_ASSERT(rbuf);        /*
        !          1464:                                 * There must be one available, or we wouldn't
        !          1465:                                 * have gotten the event that sent us here.
        !          1466:                                 */
        !          1467:        RF_ASSERT(rbuf->pssPtr);
        !          1468:
        !          1469:        rbuf->pssPtr->writeRbuf = rbuf;
        !          1470:        rbuf->pssPtr = NULL;
        !          1471:
        !          1472:        Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d"
        !          1473:            " (failed disk offset %ld) buf %lx.\n",
        !          1474:            rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID,
        !          1475:            rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer);
        !          1476:        Dprintf6("RECON: new write psid %ld   %02x %02x %02x %02x %02x.\n",
        !          1477:            rbuf->parityStripeID, rbuf->buffer[0] & 0xff,
        !          1478:            rbuf->buffer[1] & 0xff, rbuf->buffer[2] & 0xff,
        !          1479:            rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);
        !          1480:
        !          1481:        /*
        !          1482:         * Should be ok to use a NULL b_proc here b/c all addrs should be in
        !          1483:         * kernel space.
        !          1484:         */
        !          1485:        req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset,
        !          1486:            sectorsPerRU, rbuf->buffer, rbuf->parityStripeID, rbuf->which_ru,
        !          1487:            rf_ReconWriteDoneProc, (void *) rbuf, NULL,
        !          1488:            &raidPtr->recon_tracerecs[fcol], (void *) raidPtr, 0, NULL);
        !          1489:
        !          1490:        RF_ASSERT(req);         /* XXX -- Fix this. -- XXX */
        !          1491:
        !          1492:        rbuf->arg = (void *) req;
        !          1493:        rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req,
        !          1494:            RF_IO_RECON_PRIORITY);
        !          1495:
        !          1496:        return (0);
        !          1497: }
        !          1498:
        !          1499: /*
        !          1500:  * This gets called upon the completion of a reconstruction read
        !          1501:  * operation. The arg is a pointer to the per-disk reconstruction
        !          1502:  * control structure for the process that just finished a read.
        !          1503:  *
        !          1504:  * Called at interrupt context in the kernel, so don't do anything
        !          1505:  * illegal here.
        !          1506:  */
        !          1507: int
        !          1508: rf_ReconReadDoneProc(void *arg, int status)
        !          1509: {
        !          1510:        RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg;
        !          1511:        RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;
        !          1512:
        !          1513:        if (status) {
        !          1514:                /*
        !          1515:                 * XXX
        !          1516:                 */
        !          1517:                printf("Recon read failed !\n");
        !          1518:                RF_PANIC();
        !          1519:        }
        !          1520:        RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
        !          1521:        RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
        !          1522:        raidPtr->recon_tracerecs[ctrl->col].specific.recon.
        !          1523:           recon_fetch_to_return_us =
        !          1524:             RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
        !          1525:        RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
        !          1526:
        !          1527:        rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL,
        !          1528:            RF_REVENT_READDONE);
        !          1529:        return (0);
        !          1530: }
        !          1531:
        !          1532:
        !          1533: /*
        !          1534:  * This gets called upon the completion of a reconstruction write operation.
        !          1535:  * The arg is a pointer to the rbuf that was just written.
        !          1536:  *
        !          1537:  * Called at interrupt context in the kernel, so don't do anything illegal here.
        !          1538:  */
        !          1539: int
        !          1540: rf_ReconWriteDoneProc(void *arg, int status)
        !          1541: {
        !          1542:        RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg;
        !          1543:
        !          1544:        Dprintf2("Reconstruction completed on psid %ld ru %d.\n",
        !          1545:            rbuf->parityStripeID, rbuf->which_ru);
        !          1546:        if (status) {
        !          1547:                /* fprintf(stderr, "Recon write failed !\n"); */
        !          1548:                printf("Recon write failed !\n");
        !          1549:                RF_PANIC();
        !          1550:        }
        !          1551:        rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col,
        !          1552:            arg, RF_REVENT_WRITEDONE);
        !          1553:        return (0);
        !          1554: }
        !          1555:
        !          1556:
        !          1557: /*
        !          1558:  * Computes a new minimum head sep, and wakes up anyone who needs to
        !          1559:  * be woken as a result.
        !          1560:  */
        !          1561: void
        !          1562: rf_CheckForNewMinHeadSep(RF_Raid_t *raidPtr, RF_RowCol_t row,
        !          1563:     RF_HeadSepLimit_t hsCtr)
        !          1564: {
        !          1565:        RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
        !          1566:        RF_HeadSepLimit_t new_min;
        !          1567:        RF_RowCol_t i;
        !          1568:        RF_CallbackDesc_t *p;
        !          1569:        /* From the definition of a minimum. */
        !          1570:        RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter);
        !          1571:
        !          1572:
        !          1573:        RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
        !          1574:
        !          1575:        new_min = ~(1L << (8 * sizeof(long) - 1));      /* 0x7FFF....FFF */
        !          1576:        for (i = 0; i < raidPtr->numCol; i++)
        !          1577:                if (i != reconCtrlPtr->fcol) {
        !          1578:                        if (reconCtrlPtr->perDiskInfo[i].headSepCounter <
        !          1579:                            new_min)
        !          1580:                                new_min =
        !          1581:                                    reconCtrlPtr->perDiskInfo[i].headSepCounter;
        !          1582:                }
        !          1583:        /* Set the new minimum and wake up anyone who can now run again. */
        !          1584:        if (new_min != reconCtrlPtr->minHeadSepCounter) {
        !          1585:                reconCtrlPtr->minHeadSepCounter = new_min;
        !          1586:                Dprintf1("RECON:  new min head pos counter val is %ld.\n",
        !          1587:                    new_min);
        !          1588:                while (reconCtrlPtr->headSepCBList) {
        !          1589:                        if (reconCtrlPtr->headSepCBList->callbackArg.v >
        !          1590:                            new_min)
        !          1591:                                break;
        !          1592:                        p = reconCtrlPtr->headSepCBList;
        !          1593:                        reconCtrlPtr->headSepCBList = p->next;
        !          1594:                        p->next = NULL;
        !          1595:                        rf_CauseReconEvent(raidPtr, p->row, p->col, NULL,
        !          1596:                            RF_REVENT_HEADSEPCLEAR);
        !          1597:                        rf_FreeCallbackDesc(p);
        !          1598:                }
        !          1599:
        !          1600:        }
        !          1601:        RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
        !          1602: }
        !          1603:
        !          1604: /*
        !          1605:  * Checks to see that the maximum head separation will not be violated
        !          1606:  * if we initiate a reconstruction I/O on the indicated disk.
        !          1607:  * Limiting the maximum head separation between two disks eliminates
        !          1608:  * the nasty buffer-stall conditions that occur when one disk races
        !          1609:  * ahead of the others and consumes all of the floating recon buffers.
        !          1610:  * This code is complex and unpleasant but it's necessary to avoid
        !          1611:  * some very nasty, albeit fairly rare, reconstruction behavior.
        !          1612:  *
        !          1613:  * Returns non-zero if and only if we have to stop working on the
        !          1614:  * indicated disk due to a head-separation delay.
        !          1615:  */
        !          1616: int
        !          1617: rf_CheckHeadSeparation(
        !          1618:     RF_Raid_t                  *raidPtr,
        !          1619:     RF_PerDiskReconCtrl_t      *ctrl,
        !          1620:     RF_RowCol_t                         row,
        !          1621:     RF_RowCol_t                         col,
        !          1622:     RF_HeadSepLimit_t           hsCtr,
        !          1623:     RF_ReconUnitNum_t           which_ru
        !          1624: )
        !          1625: {
        !          1626:        RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
        !          1627:        RF_CallbackDesc_t *cb, *p, *pt;
        !          1628:        int retval = 0;
        !          1629:
        !          1630:        /*
        !          1631:         * If we're too far ahead of the slowest disk, stop working on this
        !          1632:         * disk until the slower ones catch up. We do this by scheduling a
        !          1633:         * wakeup callback for the time when the slowest disk has caught up.
        !          1634:         * We define "caught up" with 20% hysteresis, i.e. the head separation
        !          1635:         * must have fallen to at most 80% of the max allowable head
        !          1636:         * separation before we'll wake up.
        !          1637:         */
        !          1638:        RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
        !          1639:        if ((raidPtr->headSepLimit >= 0) &&
        !          1640:            ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) >
        !          1641:             raidPtr->headSepLimit)) {
        !          1642:                Dprintf6("raid%d: RECON: head sep stall: row %d col %d hsCtr"
        !          1643:                    " %ld minHSCtr %ld limit %ld.\n",
        !          1644:                    raidPtr->raidid, row, col, ctrl->headSepCounter,
        !          1645:                    reconCtrlPtr->minHeadSepCounter, raidPtr->headSepLimit);
        !          1646:                cb = rf_AllocCallbackDesc();
        !          1647:                /*
        !          1648:                 * The minHeadSepCounter value we have to get to before we'll
        !          1649:                 * wake up. Build in 20% hysteresis.
        !          1650:                 */
        !          1651:                cb->callbackArg.v = (ctrl->headSepCounter -
        !          1652:                    raidPtr->headSepLimit + raidPtr->headSepLimit / 5);
        !          1653:                cb->row = row;
        !          1654:                cb->col = col;
        !          1655:                cb->next = NULL;
        !          1656:
        !          1657:                /*
        !          1658:                 * Insert this callback descriptor into the sorted list of
        !          1659:                 * pending head-sep callbacks.
        !          1660:                 */
        !          1661:                p = reconCtrlPtr->headSepCBList;
        !          1662:                if (!p)
        !          1663:                        reconCtrlPtr->headSepCBList = cb;
        !          1664:                else
        !          1665:                        if (cb->callbackArg.v < p->callbackArg.v) {
        !          1666:                                cb->next = reconCtrlPtr->headSepCBList;
        !          1667:                                reconCtrlPtr->headSepCBList = cb;
        !          1668:                        } else {
        !          1669:                                for (pt = p, p = p->next;
        !          1670:                                    p && (p->callbackArg.v < cb->callbackArg.v);
        !          1671:                                    pt = p, p = p->next);
        !          1672:                                cb->next = p;
        !          1673:                                pt->next = cb;
        !          1674:                        }
        !          1675:                retval = 1;
        !          1676: #if    RF_RECON_STATS > 0
        !          1677:                ctrl->reconCtrl->reconDesc->hsStallCount++;
        !          1678: #endif /* RF_RECON_STATS > 0 */
        !          1679:        }
        !          1680:        RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
        !          1681:
        !          1682:        return (retval);
        !          1683: }
        !          1684:
        !          1685:
        !          1686:
        !          1687: /*
        !          1688:  * Checks to see if reconstruction has been either forced or blocked
        !          1689:  * by a user operation. If forced, we skip this RU entirely. Else if
        !          1690:  * blocked, put ourselves on the wait list. Else return 0.
        !          1691:  *
        !          1692:  * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY.
        !          1693:  */
        !          1694: int
        !          1695: rf_CheckForcedOrBlockedReconstruction(
        !          1696:     RF_Raid_t                   *raidPtr,
        !          1697:     RF_ReconParityStripeStatus_t *pssPtr,
        !          1698:     RF_PerDiskReconCtrl_t       *ctrl,
        !          1699:     RF_RowCol_t                          row,
        !          1700:     RF_RowCol_t                          col,
        !          1701:     RF_StripeNum_t               psid,
        !          1702:     RF_ReconUnitNum_t            which_ru
        !          1703: )
        !          1704: {
        !          1705:        RF_CallbackDesc_t *cb;
        !          1706:        int retcode = 0;
        !          1707:
        !          1708:        if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) ||
        !          1709:            (pssPtr->flags & RF_PSS_FORCED_ON_WRITE))
        !          1710:                retcode = RF_PSS_FORCED_ON_WRITE;
        !          1711:        else
        !          1712:                if (pssPtr->flags & RF_PSS_RECON_BLOCKED) {
        !          1713:                        Dprintf4("RECON: row %d col %d blocked at psid %ld"
        !          1714:                            " ru %d.\n", row, col, psid, which_ru);
        !          1715:                        cb = rf_AllocCallbackDesc();    /*
        !          1716:                                                         * Append ourselves to
        !          1717:                                                         * the blockage-wait
        !          1718:                                                         * list.
        !          1719:                                                         */
        !          1720:                        cb->row = row;
        !          1721:                        cb->col = col;
        !          1722:                        cb->next = pssPtr->blockWaitList;
        !          1723:                        pssPtr->blockWaitList = cb;
        !          1724:                        retcode = RF_PSS_RECON_BLOCKED;
        !          1725:                }
        !          1726:        if (!retcode)
        !          1727:                pssPtr->flags |= RF_PSS_UNDER_RECON;    /*
        !          1728:                                                         * Mark this RU as under
        !          1729:                                                         * reconstruction.
        !          1730:                                                         */
        !          1731:
        !          1732:        return (retcode);
        !          1733: }
        !          1734:
        !          1735:
        !          1736: /*
        !          1737:  * If reconstruction is currently ongoing for the indicated stripeID,
        !          1738:  * reconstruction is forced to completion and we return non-zero to
        !          1739:  * indicate that the caller must wait. If not, then reconstruction is
        !          1740:  * blocked on the indicated stripe and the routine returns zero. If
        !          1741:  * and only if we return non-zero, we'll cause the cbFunc to get
        !          1742:  * invoked with the cbArg when the reconstruction has completed.
        !          1743:  */
        !          1744: int
        !          1745: rf_ForceOrBlockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
        !          1746:        void (*cbFunc) (RF_Raid_t *, void *), void *cbArg)
        !          1747: {
        !          1748:        RF_RowCol_t row = asmap->physInfo->row; /*
        !          1749:                                                 * Which row of the array
        !          1750:                                                 * we're working on.
        !          1751:                                                 */
        !          1752:        RF_StripeNum_t stripeID = asmap->stripeID;      /*
        !          1753:                                                         * The stripe ID we're
        !          1754:                                                         * forcing recon on.
        !          1755:                                                         */
        !          1756:        RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit *
        !          1757:            raidPtr->Layout.SUsPerRU;           /* Num sects in one RU. */
        !          1758:        RF_ReconParityStripeStatus_t *pssPtr;   /*
        !          1759:                                                 * A pointer to the parity
        !          1760:                                                 * stripe status structure.
        !          1761:                                                 */
        !          1762:        RF_StripeNum_t psid;                    /* Parity stripe id. */
        !          1763:        RF_SectorNum_t offset, fd_offset;       /*
        !          1764:                                                 * Disk offset, failed-disk
        !          1765:                                                 * offset.
        !          1766:                                                 */
        !          1767:        RF_RowCol_t *diskids;
        !          1768:        RF_RowCol_t stripe;
        !          1769:        RF_ReconUnitNum_t which_ru;     /* RU within parity stripe. */
        !          1770:        RF_RowCol_t fcol, diskno, i;
        !          1771:        RF_ReconBuffer_t *new_rbuf;     /* Ptr to newly allocated rbufs. */
        !          1772:        RF_DiskQueueData_t *req;        /* Disk I/O req to be enqueued. */
        !          1773:        RF_CallbackDesc_t *cb;
        !          1774:        int created = 0, nPromoted;
        !          1775:
        !          1776:        psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID,
        !          1777:            &which_ru);
        !          1778:
        !          1779:        RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
        !          1780:
        !          1781:        pssPtr = rf_LookupRUStatus(raidPtr,
        !          1782:            raidPtr->reconControl[row]->pssTable, psid, which_ru,
        !          1783:            RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created);
        !          1784:
        !          1785:        /* If recon is not ongoing on this PS, just return. */
        !          1786:        if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
        !          1787:                RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
        !          1788:                return (0);
        !          1789:        }
        !          1790:        /*
        !          1791:         * Otherwise, we have to wait for reconstruction to complete on this
        !          1792:         * RU.
        !          1793:         */
        !          1794:        /*
        !          1795:         * In order to avoid waiting for a potentially large number of
        !          1796:         * low-priority accesses to complete, we force a normal-priority (i.e.
        !          1797:         * not low-priority) reconstruction on this RU.
        !          1798:         */
        !          1799:        if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) &&
        !          1800:            !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) {
        !          1801:                DDprintf1("Forcing recon on psid %ld.\n", psid);
        !          1802:                /* Mark this RU as under forced recon. */
        !          1803:                pssPtr->flags |= RF_PSS_FORCED_ON_WRITE;
        !          1804:                /* Clear the blockage that we just set. */
        !          1805:                pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;
        !          1806:                fcol = raidPtr->reconControl[row]->fcol;
        !          1807:
        !          1808:                /*
        !          1809:                 * Get a listing of the disks comprising the indicated stripe.
        !          1810:                 */
        !          1811:                (raidPtr->Layout.map->IdentifyStripe) (raidPtr,
        !          1812:                    asmap->raidAddress, &diskids, &stripe);
        !          1813:                RF_ASSERT(row == stripe);
        !          1814:
        !          1815:                /*
        !          1816:                 * For previously issued reads, elevate them to normal
        !          1817:                 * priority. If the I/O has already completed, it won't be
        !          1818:                 * found in the queue, and hence this will be a no-op. For
        !          1819:                 * unissued reads, allocate buffers and issue new reads. The
        !          1820:                 * fact that we've set the FORCED bit means that the regular
        !          1821:                 * recon procs will not re-issue these reqs.
        !          1822:                 */
        !          1823:                for (i = 0; i < raidPtr->Layout.numDataCol +
        !          1824:                    raidPtr->Layout.numParityCol; i++)
        !          1825:                        if ((diskno = diskids[i]) != fcol) {
        !          1826:                                if (pssPtr->issued[diskno]) {
        !          1827:                                        nPromoted = rf_DiskIOPromote(&raidPtr
        !          1828:                                            ->Queues[row][diskno], psid,
        !          1829:                                            which_ru);
        !          1830:                                        if (rf_reconDebug && nPromoted)
        !          1831:                                                printf("raid%d: promoted read"
        !          1832:                                                    " from row %d col %d.\n",
        !          1833:                                                    raidPtr->raidid, row,
        !          1834:                                                    diskno);
        !          1835:                                } else {
        !          1836:                                        /* Create new buf. */
        !          1837:                                        new_rbuf = rf_MakeReconBuffer(raidPtr,
        !          1838:                                            row, diskno, RF_RBUF_TYPE_FORCED);
        !          1839:                                        /* Find offsets & spare locationp */
        !          1840:                                        rf_ComputePSDiskOffsets(raidPtr, psid,
        !          1841:                                            row, diskno, &offset, &fd_offset,
        !          1842:                                            &new_rbuf->spRow, &new_rbuf->spCol,
        !          1843:                                            &new_rbuf->spOffset);
        !          1844:                                        new_rbuf->parityStripeID = psid;
        !          1845:                                        /* Fill in the buffer. */
        !          1846:                                        new_rbuf->which_ru = which_ru;
        !          1847:                                        new_rbuf->failedDiskSectorOffset =
        !          1848:                                            fd_offset;
        !          1849:                                        new_rbuf->priority =
        !          1850:                                            RF_IO_NORMAL_PRIORITY;
        !          1851:
        !          1852:                                        /*
        !          1853:                                         * Use NULL b_proc b/c all addrs
        !          1854:                                         * should be in kernel space.
        !          1855:                                         */
        !          1856:                                        req = rf_CreateDiskQueueData(
        !          1857:                                            RF_IO_TYPE_READ, offset +
        !          1858:                                            which_ru * sectorsPerRU,
        !          1859:                                            sectorsPerRU, new_rbuf->buffer,
        !          1860:                                            psid, which_ru, (int (*)
        !          1861:                                            (void *, int))
        !          1862:                                              rf_ForceReconReadDoneProc,
        !          1863:                                            (void *) new_rbuf, NULL,
        !          1864:                                            NULL, (void *) raidPtr, 0, NULL);
        !          1865:
        !          1866:                                        RF_ASSERT(req); /*
        !          1867:                                                         * XXX -- Fix this. --
        !          1868:                                                         * XXX
        !          1869:                                                         */
        !          1870:
        !          1871:                                        new_rbuf->arg = req;
        !          1872:                                        /* Enqueue the I/O. */
        !          1873:                                        rf_DiskIOEnqueue(&raidPtr
        !          1874:                                            ->Queues[row][diskno], req,
        !          1875:                                            RF_IO_NORMAL_PRIORITY);
        !          1876:                                        Dprintf3("raid%d: Issued new read req"
        !          1877:                                            " on row %d col %d.\n",
        !          1878:                                            raidPtr->raidid, row, diskno);
        !          1879:                                }
        !          1880:                        }
        !          1881:                /*
        !          1882:                 * If the write is sitting in the disk queue, elevate its
        !          1883:                 * priority.
        !          1884:                 */
        !          1885:                if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol],
        !          1886:                    psid, which_ru))
        !          1887:                        printf("raid%d: promoted write to row %d col %d.\n",
        !          1888:                            raidPtr->raidid, row, fcol);
        !          1889:        }
        !          1890:        /*
        !          1891:         * Install a callback descriptor to be invoked when recon completes on
        !          1892:         * this parity stripe.
        !          1893:         */
        !          1894:        cb = rf_AllocCallbackDesc();
        !          1895:        /*
        !          1896:         * XXX The following is bogus... These functions don't really match !!!
        !          1897:         * GO
        !          1898:         */
        !          1899:        cb->callbackFunc = (void (*) (RF_CBParam_t)) cbFunc;
        !          1900:        cb->callbackArg.p = (void *) cbArg;
        !          1901:        cb->next = pssPtr->procWaitList;
        !          1902:        pssPtr->procWaitList = cb;
        !          1903:        DDprintf2("raid%d: Waiting for forced recon on psid %ld.\n",
        !          1904:            raidPtr->raidid, psid);
        !          1905:
        !          1906:        RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
        !          1907:        return (1);
        !          1908: }
        !          1909:
        !          1910:
        !          1911: /*
        !          1912:  * Called upon the completion of a forced reconstruction read.
        !          1913:  * All we do is schedule the FORCEDREADONE event.
        !          1914:  * Called at interrupt context in the kernel, so don't do anything illegal here.
        !          1915:  */
        !          1916: void
        !          1917: rf_ForceReconReadDoneProc(void *arg, int status)
        !          1918: {
        !          1919:        RF_ReconBuffer_t *rbuf = arg;
        !          1920:
        !          1921:        if (status) {
        !          1922:                /* fprintf(stderr, "Forced recon read failed !\n"); */
        !          1923:                printf("Forced recon read failed !\n");
        !          1924:                RF_PANIC();
        !          1925:        }
        !          1926:        rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col,
        !          1927:            (void *) rbuf, RF_REVENT_FORCEDREADDONE);
        !          1928: }
        !          1929:
        !          1930:
        !          1931: /* Releases a block on the reconstruction of the indicated stripe. */
        !          1932: int
        !          1933: rf_UnblockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
        !          1934: {
        !          1935:        RF_RowCol_t row = asmap->origRow;
        !          1936:        RF_StripeNum_t stripeID = asmap->stripeID;
        !          1937:        RF_ReconParityStripeStatus_t *pssPtr;
        !          1938:        RF_ReconUnitNum_t which_ru;
        !          1939:        RF_StripeNum_t psid;
        !          1940:        int created = 0;
        !          1941:        RF_CallbackDesc_t *cb;
        !          1942:
        !          1943:        psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID,
        !          1944:            &which_ru);
        !          1945:        RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
        !          1946:        pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]
        !          1947:            ->pssTable, psid, which_ru, RF_PSS_NONE, &created);
        !          1948:
        !          1949:        /*
        !          1950:         * When recon is forced, the pss desc can get deleted before we get
        !          1951:         * back to unblock recon. But, this can _only_ happen when recon is
        !          1952:         * forced. It would be good to put some kind of sanity check here, but
        !          1953:         * how to decide if recon was just forced or not ?
        !          1954:         */
        !          1955:        if (!pssPtr) {
        !          1956:                /*
        !          1957:                 * printf("Warning: no pss descriptor upon unblock on psid %ld"
        !          1958:                 *     " RU %d.\n", psid, which_ru);
        !          1959:                 */
        !          1960:                if (rf_reconDebug || rf_pssDebug)
        !          1961:                        printf("Warning: no pss descriptor upon unblock on"
        !          1962:                            " psid %ld RU %d.\n", (long) psid, which_ru);
        !          1963:                goto out;
        !          1964:        }
        !          1965:        pssPtr->blockCount--;
        !          1966:        Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d.\n",
        !          1967:            raidPtr->raidid, psid, pssPtr->blockCount);
        !          1968:        if (pssPtr->blockCount == 0) {
        !          1969:                /* If recon blockage has been released. */
        !          1970:
        !          1971:                /*
        !          1972:                 * Unblock recon before calling CauseReconEvent in case
        !          1973:                 * CauseReconEvent causes us to try to issue a new read before
        !          1974:                 * returning here.
        !          1975:                 */
        !          1976:                pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;
        !          1977:
        !          1978:
        !          1979:                while (pssPtr->blockWaitList) {
        !          1980:                        /*
        !          1981:                         * Spin through the block-wait list and
        !          1982:                         * release all the waiters.
        !          1983:                         */
        !          1984:                        cb = pssPtr->blockWaitList;
        !          1985:                        pssPtr->blockWaitList = cb->next;
        !          1986:                        cb->next = NULL;
        !          1987:                        rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL,
        !          1988:                            RF_REVENT_BLOCKCLEAR);
        !          1989:                        rf_FreeCallbackDesc(cb);
        !          1990:                }
        !          1991:                if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
        !          1992:                        /* If no recon was requested while recon was blocked. */
        !          1993:                        rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]
        !          1994:                            ->pssTable, pssPtr);
        !          1995:                }
        !          1996:        }
        !          1997: out:
        !          1998:        RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
        !          1999:        return (0);
        !          2000: }
CVSweb