sys/dev/raidframe/rf_reconstruct.c - annotate

Return to rf_reconstruct.c CVS log
Up to [local] / sys / dev / raidframe
Annotation of sys/dev/raidframe/rf_reconstruct.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: rf_reconstruct.c,v 1.16 2007/06/05 00:38:22 deraadt Exp $     */
                      2: /*     $NetBSD: rf_reconstruct.c,v 1.26 2000/06/04 02:05:13 oster Exp $        */
                      3:
                      4: /*
                      5:  * Copyright (c) 1995 Carnegie-Mellon University.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Author: Mark Holland
                      9:  *
                     10:  * Permission to use, copy, modify and distribute this software and
                     11:  * its documentation is hereby granted, provided that both the copyright
                     12:  * notice and this permission notice appear in all copies of the
                     13:  * software, derivative works or modified versions, and any portions
                     14:  * thereof, and that both notices appear in supporting documentation.
                     15:  *
                     16:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     17:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     18:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     19:  *
                     20:  * Carnegie Mellon requests users of this software to return to
                     21:  *
                     22:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     23:  *  School of Computer Science
                     24:  *  Carnegie Mellon University
                     25:  *  Pittsburgh PA 15213-3890
                     26:  *
                     27:  * any improvements or extensions that they make and grant Carnegie the
                     28:  * rights to redistribute these changes.
                     29:  */
                     30:
                     31: /**************************************************************
                     32:  *
                     33:  * rf_reconstruct.c -- Code to perform on-line reconstruction.
                     34:  *
                     35:  **************************************************************/
                     36:
                     37: #include "rf_types.h"
                     38: #include <sys/time.h>
                     39: #include <sys/buf.h>
                     40: #include <sys/errno.h>
                     41:
                     42: #include <sys/types.h>
                     43: #include <sys/param.h>
                     44: #include <sys/systm.h>
                     45: #include <sys/proc.h>
                     46: #include <sys/ioctl.h>
                     47: #include <sys/fcntl.h>
                     48: #if    __NETBSD__
                     49: #include <sys/vnode.h>
                     50: #endif
                     51:
                     52: #include "rf_raid.h"
                     53: #include "rf_reconutil.h"
                     54: #include "rf_revent.h"
                     55: #include "rf_reconbuffer.h"
                     56: #include "rf_acctrace.h"
                     57: #include "rf_etimer.h"
                     58: #include "rf_dag.h"
                     59: #include "rf_desc.h"
                     60: #include "rf_general.h"
                     61: #include "rf_freelist.h"
                     62: #include "rf_debugprint.h"
                     63: #include "rf_driver.h"
                     64: #include "rf_utils.h"
                     65: #include "rf_shutdown.h"
                     66:
                     67: #include "rf_kintf.h"
                     68:
                     69: /*
                     70:  * Setting these to -1 causes them to be set to their default values if not set
                     71:  * by debug options.
                     72:  */
                     73:
                     74: #define        Dprintf(s)                                                      \
                     75: do {                                                                   \
                     76:        if (rf_reconDebug)                                              \
                     77:                rf_debug_printf(s,                                      \
                     78:                    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);    \
                     79: } while (0)
                     80: #define        Dprintf1(s,a)                                                   \
                     81: do {                                                                   \
                     82:        if (rf_reconDebug)                                              \
                     83:                rf_debug_printf(s,                                      \
                     84:                    (void *)((unsigned long)a),                         \
                     85:                    NULL, NULL, NULL, NULL, NULL, NULL, NULL);          \
                     86: } while (0)
                     87: #define        Dprintf2(s,a,b)                                                 \
                     88: do {                                                                   \
                     89:        if (rf_reconDebug)                                              \
                     90:                rf_debug_printf(s,                                      \
                     91:                    (void *)((unsigned long)a),                         \
                     92:                    (void *)((unsigned long)b),                         \
                     93:                    NULL, NULL, NULL, NULL, NULL, NULL);                \
                     94: } while (0)
                     95: #define        Dprintf3(s,a,b,c)                                               \
                     96: do {                                                                   \
                     97:        if (rf_reconDebug)                                              \
                     98:                rf_debug_printf(s,                                      \
                     99:                    (void *)((unsigned long)a),                         \
                    100:                    (void *)((unsigned long)b),                         \
                    101:                    (void *)((unsigned long)c),                         \
                    102:                    NULL, NULL, NULL, NULL, NULL);                      \
                    103: } while (0)
                    104: #define        Dprintf4(s,a,b,c,d)                                             \
                    105: do {                                                                   \
                    106:        if (rf_reconDebug)                                              \
                    107:                rf_debug_printf(s,                                      \
                    108:                    (void *)((unsigned long)a),                         \
                    109:                    (void *)((unsigned long)b),                         \
                    110:                    (void *)((unsigned long)c),                         \
                    111:                    (void *)((unsigned long)d),                         \
                    112:                    NULL, NULL, NULL, NULL);                            \
                    113: } while (0)
                    114: #define        Dprintf5(s,a,b,c,d,e)                                           \
                    115: do {                                                                   \
                    116:        if (rf_reconDebug)                                              \
                    117:                rf_debug_printf(s,                                      \
                    118:                    (void *)((unsigned long)a),                         \
                    119:                    (void *)((unsigned long)b),                         \
                    120:                    (void *)((unsigned long)c),                         \
                    121:                    (void *)((unsigned long)d),                         \
                    122:                    (void *)((unsigned long)e),                         \
                    123:                    NULL, NULL, NULL);                                  \
                    124: } while (0)
                    125: #define        Dprintf6(s,a,b,c,d,e,f)                                         \
                    126: do {                                                                   \
                    127:        if (rf_reconDebug)                                              \
                    128:                rf_debug_printf(s,                                      \
                    129:                    (void *)((unsigned long)a),                         \
                    130:                    (void *)((unsigned long)b),                         \
                    131:                    (void *)((unsigned long)c),                         \
                    132:                    (void *)((unsigned long)d),                         \
                    133:                    (void *)((unsigned long)e),                         \
                    134:                    (void *)((unsigned long)f),                         \
                    135:                    NULL, NULL);                                        \
                    136: } while (0)
                    137: #define        Dprintf7(s,a,b,c,d,e,f,g)                                       \
                    138: do {                                                                   \
                    139:        if (rf_reconDebug)                                              \
                    140:                rf_debug_printf(s,                                      \
                    141:                    (void *)((unsigned long)a),                         \
                    142:                    (void *)((unsigned long)b),                         \
                    143:                    (void *)((unsigned long)c),                         \
                    144:                    (void *)((unsigned long)d),                         \
                    145:                    (void *)((unsigned long)e),                         \
                    146:                    (void *)((unsigned long)f),                         \
                    147:                    (void *)((unsigned long)g),                         \
                    148:                    NULL);                                              \
                    149: } while (0)
                    150:
                    151: #define        DDprintf1(s,a)                                                  \
                    152: do {                                                                   \
                    153:        if (rf_reconDebug)                                              \
                    154:                rf_debug_printf(s,                                      \
                    155:                    (void *)((unsigned long)a),                         \
                    156:                    NULL, NULL, NULL, NULL, NULL, NULL, NULL);          \
                    157: } while (0)
                    158: #define        DDprintf2(s,a,b)                                                \
                    159: do {                                                                   \
                    160:        if (rf_reconDebug)                                              \
                    161:                rf_debug_printf(s,                                      \
                    162:                    (void *)((unsigned long)a),                         \
                    163:                    (void *)((unsigned long)b),                         \
                    164:                    NULL, NULL, NULL, NULL, NULL, NULL);                \
                    165: } while (0)
                    166:
                    167: static RF_FreeList_t *rf_recond_freelist;
                    168: #define        RF_MAX_FREE_RECOND      4
                    169: #define        RF_RECOND_INC           1
                    170:
                    171: RF_RaidReconDesc_t *rf_AllocRaidReconDesc(RF_Raid_t *,
                    172:        RF_RowCol_t, RF_RowCol_t, RF_RaidDisk_t *, int,
                    173:        RF_RowCol_t, RF_RowCol_t);
                    174: int  rf_ProcessReconEvent(RF_Raid_t *, RF_RowCol_t, RF_ReconEvent_t *);
                    175: int  rf_IssueNextReadRequest(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t);
                    176: int  rf_TryToRead(RF_Raid_t *, RF_RowCol_t, RF_RowCol_t);
                    177: int  rf_ComputePSDiskOffsets(RF_Raid_t *, RF_StripeNum_t,
                    178:        RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t *, RF_SectorNum_t *,
                    179:        RF_RowCol_t *, RF_RowCol_t *, RF_SectorNum_t *);
                    180: int  rf_ReconReadDoneProc(void *, int);
                    181: int  rf_ReconWriteDoneProc(void *, int);
                    182: void rf_CheckForNewMinHeadSep(RF_Raid_t *, RF_RowCol_t, RF_HeadSepLimit_t);
                    183: int  rf_CheckHeadSeparation(RF_Raid_t *, RF_PerDiskReconCtrl_t *,
                    184:        RF_RowCol_t, RF_RowCol_t, RF_HeadSepLimit_t, RF_ReconUnitNum_t);
                    185: void rf_ForceReconReadDoneProc(void *, int);
                    186: void rf_ShutdownReconstruction(void *);
                    187:
                    188: /*
                    189:  * These functions are inlined on gcc. If they are used more than
                    190:  * once, it is strongly advised to un-line them.
                    191:  */
                    192: void rf_FreeReconDesc(RF_RaidReconDesc_t *);
                    193: int  rf_IssueNextWriteRequest(RF_Raid_t *, RF_RowCol_t);
                    194: int  rf_CheckForcedOrBlockedReconstruction(RF_Raid_t *,
                    195:        RF_ReconParityStripeStatus_t *, RF_PerDiskReconCtrl_t *,
                    196:        RF_RowCol_t, RF_RowCol_t, RF_StripeNum_t, RF_ReconUnitNum_t);
                    197: void rf_SignalReconDone(RF_Raid_t *);
                    198:
                    199: struct RF_ReconDoneProc_s {
                    200:        void                    (*proc) (RF_Raid_t *, void *);
                    201:        void                     *arg;
                    202:        RF_ReconDoneProc_t       *next;
                    203: };
                    204:
                    205: static RF_FreeList_t *rf_rdp_freelist;
                    206: #define        RF_MAX_FREE_RDP         4
                    207: #define        RF_RDP_INC              1
                    208:
                    209: void
                    210: rf_SignalReconDone(RF_Raid_t *raidPtr)
                    211: {
                    212:        RF_ReconDoneProc_t *p;
                    213:
                    214:        RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex);
                    215:        for (p = raidPtr->recon_done_procs; p; p = p->next) {
                    216:                p->proc(raidPtr, p->arg);
                    217:        }
                    218:        RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex);
                    219: }
                    220:
                    221: int
                    222: rf_RegisterReconDoneProc(RF_Raid_t *raidPtr, void (*proc) (RF_Raid_t *, void *),
                    223:     void *arg, RF_ReconDoneProc_t **handlep)
                    224: {
                    225:        RF_ReconDoneProc_t *p;
                    226:
                    227:        RF_FREELIST_GET(rf_rdp_freelist, p, next, (RF_ReconDoneProc_t *));
                    228:        if (p == NULL)
                    229:                return (ENOMEM);
                    230:        p->proc = proc;
                    231:        p->arg = arg;
                    232:        RF_LOCK_MUTEX(raidPtr->recon_done_proc_mutex);
                    233:        p->next = raidPtr->recon_done_procs;
                    234:        raidPtr->recon_done_procs = p;
                    235:        RF_UNLOCK_MUTEX(raidPtr->recon_done_proc_mutex);
                    236:        if (handlep)
                    237:                *handlep = p;
                    238:        return (0);
                    239: }
                    240:
                    241: /*****************************************************************************
                    242:  *
                    243:  * Sets up the parameters that will be used by the reconstruction process.
                    244:  * Currently there are none, except for those that the layout-specific
                    245:  * configuration (e.g. rf_ConfigureDeclustered) routine sets up.
                    246:  *
                    247:  * In the kernel, we fire off the recon thread.
                    248:  *
                    249:  *****************************************************************************/
                    250: void
                    251: rf_ShutdownReconstruction(void *ignored)
                    252: {
                    253:        RF_FREELIST_DESTROY(rf_recond_freelist, next, (RF_RaidReconDesc_t *));
                    254:        RF_FREELIST_DESTROY(rf_rdp_freelist, next, (RF_ReconDoneProc_t *));
                    255: }
                    256:
                    257: int
                    258: rf_ConfigureReconstruction(RF_ShutdownList_t **listp)
                    259: {
                    260:        int rc;
                    261:
                    262:        RF_FREELIST_CREATE(rf_recond_freelist, RF_MAX_FREE_RECOND,
                    263:            RF_RECOND_INC, sizeof(RF_RaidReconDesc_t));
                    264:        if (rf_recond_freelist == NULL)
                    265:                return (ENOMEM);
                    266:        RF_FREELIST_CREATE(rf_rdp_freelist, RF_MAX_FREE_RDP,
                    267:            RF_RDP_INC, sizeof(RF_ReconDoneProc_t));
                    268:        if (rf_rdp_freelist == NULL) {
                    269:                RF_FREELIST_DESTROY(rf_recond_freelist, next,
                    270:                    (RF_RaidReconDesc_t *));
                    271:                return (ENOMEM);
                    272:        }
                    273:        rc = rf_ShutdownCreate(listp, rf_ShutdownReconstruction, NULL);
                    274:        if (rc) {
                    275:                RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
                    276:                    " rc=%d.\n", __FILE__, __LINE__, rc);
                    277:                rf_ShutdownReconstruction(NULL);
                    278:                return (rc);
                    279:        }
                    280:        return (0);
                    281: }
                    282:
                    283: RF_RaidReconDesc_t *
                    284: rf_AllocRaidReconDesc(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col,
                    285:     RF_RaidDisk_t *spareDiskPtr, int numDisksDone, RF_RowCol_t srow,
                    286:     RF_RowCol_t scol)
                    287: {
                    288:
                    289:        RF_RaidReconDesc_t *reconDesc;
                    290:
                    291:        RF_FREELIST_GET(rf_recond_freelist, reconDesc, next,
                    292:            (RF_RaidReconDesc_t *));
                    293:
                    294:        reconDesc->raidPtr = raidPtr;
                    295:        reconDesc->row = row;
                    296:        reconDesc->col = col;
                    297:        reconDesc->spareDiskPtr = spareDiskPtr;
                    298:        reconDesc->numDisksDone = numDisksDone;
                    299:        reconDesc->srow = srow;
                    300:        reconDesc->scol = scol;
                    301:        reconDesc->state = 0;
                    302:        reconDesc->next = NULL;
                    303:
                    304:        return (reconDesc);
                    305: }
                    306:
                    307: void
                    308: rf_FreeReconDesc(RF_RaidReconDesc_t *reconDesc)
                    309: {
                    310: #if    RF_RECON_STATS > 0
                    311:        printf("RAIDframe: %qu recon event waits, %qu recon delays.\n",
                    312:            reconDesc->numReconEventWaits, reconDesc->numReconExecDelays);
                    313: #endif /* RF_RECON_STATS > 0 */
                    314:
                    315:        printf("RAIDframe: %qu max exec ticks.\n",
                    316:            reconDesc->maxReconExecTicks);
                    317:
                    318: #if    (RF_RECON_STATS > 0) || defined(_KERNEL)
                    319:        printf("\n");
                    320: #endif /* (RF_RECON_STATS > 0) || _KERNEL */
                    321:        RF_FREELIST_FREE(rf_recond_freelist, reconDesc, next);
                    322: }
                    323:
                    324:
                    325: /*****************************************************************************
                    326:  *
                    327:  * Primary routine to reconstruct a failed disk. This should be called from
                    328:  * within its own thread. It won't return until reconstruction completes,
                    329:  * fails, or is aborted.
                    330:  *
                    331:  *****************************************************************************/
                    332: int
                    333: rf_ReconstructFailedDisk(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
                    334: {
                    335:        RF_LayoutSW_t *lp;
                    336:        int rc;
                    337:
                    338:        lp = raidPtr->Layout.map;
                    339:        if (lp->SubmitReconBuffer) {
                    340:                /*
                    341:                 * The current infrastructure only supports reconstructing one
                    342:                 * disk at a time for each array.
                    343:                 */
                    344:                RF_LOCK_MUTEX(raidPtr->mutex);
                    345:                while (raidPtr->reconInProgress) {
                    346:                        RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex);
                    347:                }
                    348:                raidPtr->reconInProgress++;
                    349:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                    350:                rc = rf_ReconstructFailedDiskBasic(raidPtr, row, col);
                    351:                RF_LOCK_MUTEX(raidPtr->mutex);
                    352:                raidPtr->reconInProgress--;
                    353:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                    354:        } else {
                    355:                RF_ERRORMSG1("RECON: no way to reconstruct failed disk for"
                    356:                    " arch %c.\n", lp->parityConfig);
                    357:                rc = EIO;
                    358:        }
                    359:        RF_SIGNAL_COND(raidPtr->waitForReconCond);
                    360:        wakeup(&raidPtr->waitForReconCond);     /*
                    361:                                                 * XXX Methinks this will be
                    362:                                                 * needed at some point... GO
                    363:                                                 */
                    364:        return (rc);
                    365: }
                    366:
                    367: int
                    368: rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t row,
                    369:     RF_RowCol_t col)
                    370: {
                    371:        RF_ComponentLabel_t c_label;
                    372:        RF_RaidDisk_t *spareDiskPtr = NULL;
                    373:        RF_RaidReconDesc_t *reconDesc;
                    374:        RF_RowCol_t srow, scol;
                    375:        int numDisksDone = 0, rc;
                    376:
                    377:        /* First look for a spare drive onto which to reconstruct the data. */
                    378:        /*
                    379:         * Spare disk descriptors are stored in row 0. This may have to
                    380:         * change eventually.
                    381:         */
                    382:
                    383:        RF_LOCK_MUTEX(raidPtr->mutex);
                    384:        RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed);
                    385:
                    386:        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                    387:                if (raidPtr->status[row] != rf_rs_degraded) {
                    388:                        RF_ERRORMSG2("Unable to reconstruct disk at row %d"
                    389:                            " col %d because status not degraded.\n", row, col);
                    390:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    391:                        return (EINVAL);
                    392:                }
                    393:                srow = row;
                    394:                scol = (-1);
                    395:        } else {
                    396:                srow = 0;
                    397:                for (scol = raidPtr->numCol;
                    398:                     scol < raidPtr->numCol + raidPtr->numSpare; scol++) {
                    399:                        if (raidPtr->Disks[srow][scol].status == rf_ds_spare) {
                    400:                                spareDiskPtr = &raidPtr->Disks[srow][scol];
                    401:                                spareDiskPtr->status = rf_ds_used_spare;
                    402:                                break;
                    403:                        }
                    404:                }
                    405:                if (!spareDiskPtr) {
                    406:                        RF_ERRORMSG2("Unable to reconstruct disk at row %d"
                    407:                            " col %d because no spares are available.\n",
                    408:                            row, col);
                    409:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    410:                        return (ENOSPC);
                    411:                }
                    412:                printf("RECON: initiating reconstruction on row %d col %d"
                    413:                    " -> spare at row %d col %d.\n", row, col, srow, scol);
                    414:        }
                    415:        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    416:
                    417:        reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col,
                    418:            spareDiskPtr, numDisksDone, srow, scol);
                    419:        raidPtr->reconDesc = (void *) reconDesc;
                    420: #if    RF_RECON_STATS > 0
                    421:        reconDesc->hsStallCount = 0;
                    422:        reconDesc->numReconExecDelays = 0;
                    423:        reconDesc->numReconEventWaits = 0;
                    424: #endif /* RF_RECON_STATS > 0 */
                    425:        reconDesc->reconExecTimerRunning = 0;
                    426:        reconDesc->reconExecTicks = 0;
                    427:        reconDesc->maxReconExecTicks = 0;
                    428:        rc = rf_ContinueReconstructFailedDisk(reconDesc);
                    429:
                    430:        if (!rc) {
                    431:                /* Fix up the component label. */
                    432:                /* Don't actually need the read here... */
                    433:                raidread_component_label(
                    434:                    raidPtr->raid_cinfo[srow][scol].ci_dev,
                    435:                    raidPtr->raid_cinfo[srow][scol].ci_vp,
                    436:                    &c_label);
                    437:
                    438:                raid_init_component_label(raidPtr, &c_label);
                    439:                c_label.row = row;
                    440:                c_label.column = col;
                    441:                c_label.clean = RF_RAID_DIRTY;
                    442:                c_label.status = rf_ds_optimal;
                    443:
                    444:                /* XXXX MORE NEEDED HERE. */
                    445:
                    446:                raidwrite_component_label(
                    447:                    raidPtr->raid_cinfo[srow][scol].ci_dev,
                    448:                    raidPtr->raid_cinfo[srow][scol].ci_vp,
                    449:                    &c_label);
                    450:
                    451:        }
                    452:        return (rc);
                    453: }
                    454:
                    455: /*
                    456:  *
                    457:  * Allow reconstructing a disk in-place -- i.e. component /dev/sd2e goes AWOL,
                    458:  * and you don't get a spare until the next Monday. With this function
                    459:  * (and hot-swappable drives) you can now put your new disk containing
                    460:  * /dev/sd2e on the bus, scsictl it alive, and then use raidctl(8) to
                    461:  * rebuild the data "on the spot".
                    462:  *
                    463:  */
                    464:
                    465: int
                    466: rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
                    467: {
                    468:        RF_RaidDisk_t *spareDiskPtr = NULL;
                    469:        RF_RaidReconDesc_t *reconDesc;
                    470:        RF_LayoutSW_t *lp;
                    471:        RF_RaidDisk_t *badDisk;
                    472:        RF_ComponentLabel_t c_label;
                    473:        int numDisksDone = 0, rc;
                    474:        struct partinfo dpart;
                    475:        struct vnode *vp;
                    476:        struct vattr va;
                    477:        struct proc *proc;
                    478:        int retcode;
                    479:        int ac;
                    480:
                    481:        lp = raidPtr->Layout.map;
                    482:        if (lp->SubmitReconBuffer) {
                    483:                /*
                    484:                 * The current infrastructure only supports reconstructing one
                    485:                 * disk at a time for each array.
                    486:                 */
                    487:                RF_LOCK_MUTEX(raidPtr->mutex);
                    488:                if ((raidPtr->Disks[row][col].status == rf_ds_optimal) &&
                    489:                    (raidPtr->numFailures > 0)) {
                    490:                        /* XXX 0 above shouldn't be constant !!! */
                    491:                        /*
                    492:                         * Some component other than this has failed.
                    493:                         * Let's not make things worse than they already
                    494:                         * are...
                    495:                         */
                    496: #ifdef RAIDDEBUG
                    497:                        printf("RAIDFRAME: Unable to reconstruct to disk at:\n"
                    498:                            "      Row: %d Col: %d   Too many failures.\n",
                    499:                            row, col);
                    500: #endif /* RAIDDEBUG */
                    501:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    502:                        return (EINVAL);
                    503:                }
                    504:                if (raidPtr->Disks[row][col].status == rf_ds_reconstructing) {
                    505: #ifdef RAIDDEBUG
                    506:                        printf("RAIDFRAME: Unable to reconstruct to disk at:\n"
                    507:                            "      Row: %d Col: %d   Reconstruction already"
                    508:                            " occurring !\n", row, col);
                    509: #endif /* RAIDDEBUG */
                    510:
                    511:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    512:                        return (EINVAL);
                    513:                }
                    514:
                    515:
                    516:                if (raidPtr->Disks[row][col].status != rf_ds_failed) {
                    517:                        /* "It's gone..." */
                    518:                        raidPtr->numFailures++;
                    519:                        raidPtr->Disks[row][col].status = rf_ds_failed;
                    520:                        raidPtr->status[row] = rf_rs_degraded;
                    521:                        rf_update_component_labels(raidPtr,
                    522:                            RF_NORMAL_COMPONENT_UPDATE);
                    523:                }
                    524:
                    525:                while (raidPtr->reconInProgress) {
                    526:                        RF_WAIT_COND(raidPtr->waitForReconCond, raidPtr->mutex);
                    527:                }
                    528:
                    529:                raidPtr->reconInProgress++;
                    530:
                    531:                /*
                    532:                 * First look for a spare drive onto which to reconstruct
                    533:                 * the data. Spare disk descriptors are stored in row 0.
                    534:                 * This may have to change eventually.
                    535:                 */
                    536:
                    537:                /*
                    538:                 * Actually, we don't care if it's failed or not...
                    539:                 * On a RAID set with correct parity, this function
                    540:                 * should be callable on any component without ill effects.
                    541:                 */
                    542:                /*
                    543:                 * RF_ASSERT(raidPtr->Disks[row][col].status == rf_ds_failed);
                    544:                 */
                    545:
                    546:                if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                    547:                        RF_ERRORMSG2("Unable to reconstruct to disk at row %d"
                    548:                            " col %d: operation not supported for"
                    549:                            " RF_DISTRIBUTE_SPARE.\n", row, col);
                    550:
                    551:                        raidPtr->reconInProgress--;
                    552:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    553:                        return (EINVAL);
                    554:                }
                    555:
                    556:                /*
                    557:                 * XXX Need goop here to see if the disk is alive,
                    558:                 * and, if not, make it so...
                    559:                 */
                    560:
                    561:                badDisk = &raidPtr->Disks[row][col];
                    562:
                    563:                proc = raidPtr->recon_thread;
                    564:
                    565:                /*
                    566:                 * This device may have been opened successfully the
                    567:                 * first time. Close it before trying to open it again...
                    568:                 */
                    569:
                    570:                if (raidPtr->raid_cinfo[row][col].ci_vp != NULL) {
                    571:                        printf("Closing the opened device: %s\n",
                    572:                            raidPtr->Disks[row][col].devname);
                    573:                        vp = raidPtr->raid_cinfo[row][col].ci_vp;
                    574:                        ac = raidPtr->Disks[row][col].auto_configured;
                    575:                        rf_close_component(raidPtr, vp, ac);
                    576:                        raidPtr->raid_cinfo[row][col].ci_vp = NULL;
                    577:                }
                    578:                /*
                    579:                 * Note that this disk was *not* auto_configured (any longer).
                    580:                 */
                    581:                raidPtr->Disks[row][col].auto_configured = 0;
                    582:
                    583:                printf("About to (re-)open the device for rebuilding: %s\n",
                    584:                    raidPtr->Disks[row][col].devname);
                    585:
                    586:                retcode = raidlookup(raidPtr->Disks[row][col].devname,
                    587:                    proc, &vp);
                    588:
                    589:                if (retcode) {
                    590:                        printf("raid%d: rebuilding: raidlookup on device: %s"
                    591:                            " failed: %d !\n", raidPtr->raidid,
                    592:                            raidPtr->Disks[row][col].devname, retcode);
                    593:
                    594:                        /*
                    595:                         * XXX the component isn't responding properly...
                    596:                         * Must still be dead :-(
                    597:                         */
                    598:                        raidPtr->reconInProgress--;
                    599:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    600:                        return(retcode);
                    601:
                    602:                } else {
                    603:
                    604:                        /*
                    605:                         * Ok, so we can at least do a lookup...
                    606:                         * How about actually getting a vp for it ?
                    607:                         */
                    608:
                    609:                        if ((retcode =
                    610:                             VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
                    611:                                raidPtr->reconInProgress--;
                    612:                                RF_UNLOCK_MUTEX(raidPtr->mutex);
                    613:                                return(retcode);
                    614:                        }
                    615:                        retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart,
                    616:                            FREAD, proc->p_ucred, proc);
                    617:                        if (retcode) {
                    618:                                raidPtr->reconInProgress--;
                    619:                                RF_UNLOCK_MUTEX(raidPtr->mutex);
                    620:                                return(retcode);
                    621:                        }
                    622:                        raidPtr->Disks[row][col].blockSize =
                    623:                            dpart.disklab->d_secsize;
                    624:
                    625:                        raidPtr->Disks[row][col].numBlocks =
                    626:                            DL_GETPSIZE(dpart.part) - rf_protectedSectors;
                    627:
                    628:                        raidPtr->raid_cinfo[row][col].ci_vp = vp;
                    629:                        raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
                    630:
                    631:                        raidPtr->Disks[row][col].dev = va.va_rdev;
                    632:
                    633:                        /*
                    634:                         * We allow the user to specify that only a
                    635:                         * fraction of the disks should be used this is
                    636:                         * just for debug:  it speeds up the parity scan.
                    637:                         */
                    638:                        raidPtr->Disks[row][col].numBlocks =
                    639:                            raidPtr->Disks[row][col].numBlocks *
                    640:                            rf_sizePercentage / 100;
                    641:                }
                    642:
                    643:                spareDiskPtr = &raidPtr->Disks[row][col];
                    644:                spareDiskPtr->status = rf_ds_used_spare;
                    645:
                    646:                printf("RECON: Initiating in-place reconstruction on\n");
                    647:                printf("       row %d col %d -> spare at row %d col %d.\n",
                    648:                    row, col, row, col);
                    649:
                    650:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                    651:
                    652:                reconDesc = rf_AllocRaidReconDesc((void *) raidPtr, row, col,
                    653:                    spareDiskPtr, numDisksDone, row, col);
                    654:                raidPtr->reconDesc = (void *) reconDesc;
                    655: #if    RF_RECON_STATS > 0
                    656:                reconDesc->hsStallCount = 0;
                    657:                reconDesc->numReconExecDelays = 0;
                    658:                reconDesc->numReconEventWaits = 0;
                    659: #endif /* RF_RECON_STATS > 0 */
                    660:                reconDesc->reconExecTimerRunning = 0;
                    661:                reconDesc->reconExecTicks = 0;
                    662:                reconDesc->maxReconExecTicks = 0;
                    663:                rc = rf_ContinueReconstructFailedDisk(reconDesc);
                    664:
                    665:                RF_LOCK_MUTEX(raidPtr->mutex);
                    666:                raidPtr->reconInProgress--;
                    667:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                    668:
                    669:        } else {
                    670:                RF_ERRORMSG1("RECON: no way to reconstruct failed disk for"
                    671:                    " arch %c.\n", lp->parityConfig);
                    672:                rc = EIO;
                    673:        }
                    674:        RF_LOCK_MUTEX(raidPtr->mutex);
                    675:
                    676:        if (!rc) {
                    677:                /*
                    678:                 * Need to set these here, as at this point it'll be claiming
                    679:                 * that the disk is in rf_ds_spared !  But we know better :-)
                    680:                 */
                    681:
                    682:                raidPtr->Disks[row][col].status = rf_ds_optimal;
                    683:                raidPtr->status[row] = rf_rs_optimal;
                    684:
                    685:                /* Fix up the component label. */
                    686:                /* Don't actually need the read here... */
                    687:                raidread_component_label(
                    688:                    raidPtr->raid_cinfo[row][col].ci_dev,
                    689:                    raidPtr->raid_cinfo[row][col].ci_vp,
                    690:                    &c_label);
                    691:
                    692:                raid_init_component_label(raidPtr, &c_label);
                    693:
                    694:                c_label.row = row;
                    695:                c_label.column = col;
                    696:
                    697:                raidwrite_component_label(raidPtr->raid_cinfo[row][col].ci_dev,
                    698:                    raidPtr->raid_cinfo[row][col].ci_vp, &c_label);
                    699:
                    700:        }
                    701:        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    702:        RF_SIGNAL_COND(raidPtr->waitForReconCond);
                    703:        wakeup(&raidPtr->waitForReconCond);
                    704:        return (rc);
                    705: }
                    706:
                    707:
                    708: int
                    709: rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
                    710: {
                    711:        RF_Raid_t *raidPtr = reconDesc->raidPtr;
                    712:        RF_RowCol_t row = reconDesc->row;
                    713:        RF_RowCol_t col = reconDesc->col;
                    714:        RF_RowCol_t srow = reconDesc->srow;
                    715:        RF_RowCol_t scol = reconDesc->scol;
                    716:        RF_ReconMap_t *mapPtr;
                    717:
                    718:        RF_ReconEvent_t *event;
                    719:        struct timeval etime, elpsd;
                    720:        unsigned long xor_s, xor_resid_us;
                    721:        int retcode, i, ds;
                    722:
                    723:        switch (reconDesc->state) {
                    724:        case 0:
                    725:                raidPtr->accumXorTimeUs = 0;
                    726:
                    727:                /* Create one trace record per physical disk. */
                    728:                RF_Malloc(raidPtr->recon_tracerecs, raidPtr->numCol *
                    729:                    sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
                    730:
                    731:                /*
                    732:                 * Quiesce the array prior to starting recon. This is needed
                    733:                 * to assure no nasty interactions with pending user writes.
                    734:                 * We need to do this before we change the disk or row status.
                    735:                 */
                    736:                reconDesc->state = 1;
                    737:
                    738:                Dprintf("RECON: begin request suspend.\n");
                    739:                retcode = rf_SuspendNewRequestsAndWait(raidPtr);
                    740:                Dprintf("RECON: end request suspend.\n");
                    741:                rf_StartUserStats(raidPtr);     /*
                    742:                                                 * Zero out the stats kept on
                    743:                                                 * user accs.
                    744:                                                 */
                    745:                /* Fall through to state 1. */
                    746:        case 1:
                    747:                RF_LOCK_MUTEX(raidPtr->mutex);
                    748:
                    749:                /*
                    750:                 * Create the reconstruction control pointer and install it in
                    751:                 * the right slot.
                    752:                 */
                    753:                raidPtr->reconControl[row] =
                    754:                    rf_MakeReconControl(reconDesc, row, col, srow, scol);
                    755:                mapPtr = raidPtr->reconControl[row]->reconMap;
                    756:                raidPtr->status[row] = rf_rs_reconstructing;
                    757:                raidPtr->Disks[row][col].status = rf_ds_reconstructing;
                    758:                raidPtr->Disks[row][col].spareRow = srow;
                    759:                raidPtr->Disks[row][col].spareCol = scol;
                    760:
                    761:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                    762:
                    763:                RF_GETTIME(raidPtr->reconControl[row]->starttime);
                    764:
                    765:                /*
                    766:                 * Now start up the actual reconstruction: issue a read for
                    767:                 * each surviving disk.
                    768:                 */
                    769:
                    770:                reconDesc->numDisksDone = 0;
                    771:                for (i = 0; i < raidPtr->numCol; i++) {
                    772:                        if (i != col) {
                    773:                                /*
                    774:                                 * Find and issue the next I/O on the
                    775:                                 * indicated disk.
                    776:                                 */
                    777:                                if (rf_IssueNextReadRequest(raidPtr, row, i)) {
                    778:                                        Dprintf2("RECON: done issuing for r%d"
                    779:                                            " c%d.\n", row, i);
                    780:                                        reconDesc->numDisksDone++;
                    781:                                }
                    782:                        }
                    783:                }
                    784:
                    785:                reconDesc->state = 2;
                    786:
                    787:        case 2:
                    788:                Dprintf("RECON: resume requests.\n");
                    789:                rf_ResumeNewRequests(raidPtr);
                    790:
                    791:                reconDesc->state = 3;
                    792:
                    793:        case 3:
                    794:
                    795:                /*
                    796:                 * Process reconstruction events until all disks report that
                    797:                 * they've completed all work.
                    798:                 */
                    799:                mapPtr = raidPtr->reconControl[row]->reconMap;
                    800:
                    801:                while (reconDesc->numDisksDone < raidPtr->numCol - 1) {
                    802:
                    803:                        event = rf_GetNextReconEvent(reconDesc, row,
                    804:                           (void (*) (void *)) rf_ContinueReconstructFailedDisk,
                    805:                            reconDesc);
                    806:                        RF_ASSERT(event);
                    807:
                    808:                        if (rf_ProcessReconEvent(raidPtr, row, event))
                    809:                                reconDesc->numDisksDone++;
                    810:                        raidPtr->reconControl[row]->numRUsTotal =
                    811:                                mapPtr->totalRUs;
                    812:                        raidPtr->reconControl[row]->numRUsComplete =
                    813:                                mapPtr->totalRUs -
                    814:                                rf_UnitsLeftToReconstruct(mapPtr);
                    815:
                    816:                        raidPtr->reconControl[row]->percentComplete =
                    817:                            (raidPtr->reconControl[row]->numRUsComplete * 100 /
                    818:                             raidPtr->reconControl[row]->numRUsTotal);
                    819:                        if (rf_prReconSched) {
                    820:                                rf_PrintReconSchedule(
                    821:                                    raidPtr->reconControl[row]->reconMap,
                    822:                                    &(raidPtr->reconControl[row]->starttime));
                    823:                        }
                    824:                }
                    825:
                    826:                reconDesc->state = 4;
                    827:
                    828:        case 4:
                    829:                mapPtr = raidPtr->reconControl[row]->reconMap;
                    830:                if (rf_reconDebug) {
                    831:                        printf("RECON: all reads completed.\n");
                    832:                }
                    833:                /*
                    834:                 * At this point all the reads have completed. We now wait
                    835:                 * for any pending writes to complete, and then we're done.
                    836:                 */
                    837:
                    838:                while (rf_UnitsLeftToReconstruct(
                    839:                    raidPtr->reconControl[row]->reconMap) > 0) {
                    840:
                    841:                        event = rf_GetNextReconEvent(reconDesc, row,
                    842:                           (void (*) (void *)) rf_ContinueReconstructFailedDisk,
                    843:                            reconDesc);
                    844:                        RF_ASSERT(event);
                    845:
                    846:                        /* Ignore return code. */
                    847:                        (void) rf_ProcessReconEvent(raidPtr, row, event);
                    848:                        raidPtr->reconControl[row]->percentComplete =
                    849:                            100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 /
                    850:                            mapPtr->totalRUs);
                    851:                        if (rf_prReconSched) {
                    852:                                rf_PrintReconSchedule(
                    853:                                    raidPtr->reconControl[row]->reconMap,
                    854:                                    &(raidPtr->reconControl[row]->starttime));
                    855:                        }
                    856:                }
                    857:                reconDesc->state = 5;
                    858:
                    859:        case 5:
                    860:                /*
                    861:                 * Success:  mark the dead disk as reconstructed. We quiesce
                    862:                 * the array here to assure no nasty interactions with pending
                    863:                 * user accesses, when we free up the psstatus structure as
                    864:                 * part of FreeReconControl().
                    865:                 */
                    866:
                    867:                reconDesc->state = 6;
                    868:
                    869:                retcode = rf_SuspendNewRequestsAndWait(raidPtr);
                    870:                rf_StopUserStats(raidPtr);
                    871:                rf_PrintUserStats(raidPtr);     /*
                    872:                                                 * Print out the stats on user
                    873:                                                 * accs accumulated during
                    874:                                                 * recon.
                    875:                                                 */
                    876:
                    877:                /* Fall through to state 6. */
                    878:        case 6:
                    879:                RF_LOCK_MUTEX(raidPtr->mutex);
                    880:                raidPtr->numFailures--;
                    881:                ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE);
                    882:                raidPtr->Disks[row][col].status = (ds) ? rf_ds_dist_spared :
                    883:                                                         rf_ds_spared;
                    884:                raidPtr->status[row] = (ds) ? rf_rs_reconfigured :
                    885:                                              rf_rs_optimal;
                    886:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                    887:                RF_GETTIME(etime);
                    888:                RF_TIMEVAL_DIFF(&(raidPtr->reconControl[row]->starttime),
                    889:                    &etime, &elpsd);
                    890:
                    891:                /*
                    892:                 * XXX -- Why is state 7 different from state 6 if there is no
                    893:                 * return() here ? -- XXX Note that I set elpsd above & use it
                    894:                 * below, so if you put a return here you'll have to fix this.
                    895:                 * (also, FreeReconControl is called below).
                    896:                 */
                    897:
                    898:        case 7:
                    899:
                    900:                rf_ResumeNewRequests(raidPtr);
                    901:
                    902:                printf("Reconstruction of disk at row %d col %d completed.\n",
                    903:                    row, col);
                    904:                xor_s = raidPtr->accumXorTimeUs / 1000000;
                    905:                xor_resid_us = raidPtr->accumXorTimeUs % 1000000;
                    906:                printf("Recon time was %d.%06d seconds, accumulated XOR time"
                    907:                    " was %ld us (%ld.%06ld).\n", (int) elpsd.tv_sec,
                    908:                    (int) elpsd.tv_usec, raidPtr->accumXorTimeUs, xor_s,
                    909:                    xor_resid_us);
                    910:                printf("  (start time %d sec %d usec, end time %d sec %d"
                    911:                    " usec)\n",
                    912:                    (int) raidPtr->reconControl[row]->starttime.tv_sec,
                    913:                    (int) raidPtr->reconControl[row]->starttime.tv_usec,
                    914:                    (int) etime.tv_sec, (int) etime.tv_usec);
                    915:
                    916: #if    RF_RECON_STATS > 0
                    917:                printf("Total head-sep stall count was %d.\n",
                    918:                    (int) reconDesc->hsStallCount);
                    919: #endif /* RF_RECON_STATS > 0 */
                    920:                rf_FreeReconControl(raidPtr, row);
                    921:                RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol *
                    922:                    sizeof(RF_AccTraceEntry_t));
                    923:                rf_FreeReconDesc(reconDesc);
                    924:
                    925:        }
                    926:
                    927:        rf_SignalReconDone(raidPtr);
                    928:        return (0);
                    929: }
                    930:
                    931:
                    932: /*****************************************************************************
                    933:  * Do the right thing upon each reconstruction event.
                    934:  * Returns nonzero if and only if there is nothing left unread on the
                    935:  * indicated disk.
                    936:  *****************************************************************************/
                    937: int
                    938: rf_ProcessReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t frow,
                    939:     RF_ReconEvent_t *event)
                    940: {
                    941:        int retcode = 0, submitblocked;
                    942:        RF_ReconBuffer_t *rbuf;
                    943:        RF_SectorCount_t sectorsPerRU;
                    944:
                    945:        Dprintf1("RECON: rf_ProcessReconEvent type %d.\n", event->type);
                    946:
                    947:        switch (event->type) {
                    948:
                    949:                /* A read I/O has completed. */
                    950:        case RF_REVENT_READDONE:
                    951:                rbuf = raidPtr->reconControl[frow]
                    952:                    ->perDiskInfo[event->col].rbuf;
                    953:                Dprintf3("RECON: READDONE EVENT: row %d col %d psid %ld.\n",
                    954:                    frow, event->col, rbuf->parityStripeID);
                    955:                Dprintf7("RECON: done read  psid %ld buf %lx  %02x %02x %02x"
                    956:                    " %02x %02x.\n", rbuf->parityStripeID, rbuf->buffer,
                    957:                    rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
                    958:                    rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff,
                    959:                    rbuf->buffer[4] & 0xff);
                    960:                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
                    961:                submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
                    962:                Dprintf1("RECON: submitblocked=%d.\n", submitblocked);
                    963:                if (!submitblocked)
                    964:                        retcode = rf_IssueNextReadRequest(raidPtr, frow,
                    965:                            event->col);
                    966:                break;
                    967:
                    968:                /* A write I/O has completed. */
                    969:        case RF_REVENT_WRITEDONE:
                    970:                if (rf_floatingRbufDebug) {
                    971:                        rf_CheckFloatingRbufCount(raidPtr, 1);
                    972:                }
                    973:                sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit *
                    974:                    raidPtr->Layout.SUsPerRU;
                    975:                rbuf = (RF_ReconBuffer_t *) event->arg;
                    976:                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
                    977:                Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d"
                    978:                    " (%d %% complete).\n",
                    979:                    rbuf->parityStripeID, rbuf->which_ru,
                    980:                    raidPtr->reconControl[frow]->percentComplete);
                    981:                rf_ReconMapUpdate(raidPtr, raidPtr->reconControl[frow]
                    982:                    ->reconMap, rbuf->failedDiskSectorOffset,
                    983:                    rbuf->failedDiskSectorOffset + sectorsPerRU - 1);
                    984:                rf_RemoveFromActiveReconTable(raidPtr, frow,
                    985:                    rbuf->parityStripeID, rbuf->which_ru);
                    986:
                    987:                if (rbuf->type == RF_RBUF_TYPE_FLOATING) {
                    988:                        RF_LOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
                    989:                        raidPtr->numFullReconBuffers--;
                    990:                        rf_ReleaseFloatingReconBuffer(raidPtr, frow, rbuf);
                    991:                        RF_UNLOCK_MUTEX(raidPtr->reconControl[frow]->rb_mutex);
                    992:                } else
                    993:                        if (rbuf->type == RF_RBUF_TYPE_FORCED)
                    994:                                rf_FreeReconBuffer(rbuf);
                    995:                        else
                    996:                                RF_ASSERT(0);
                    997:                break;
                    998:
                    999:                /* A buffer-stall condition has been cleared. */
                   1000:        case RF_REVENT_BUFCLEAR:
                   1001:                Dprintf2("RECON: BUFCLEAR EVENT: row %d col %d.\n", frow,
                   1002:                    event->col);
                   1003:                submitblocked = rf_SubmitReconBuffer(raidPtr
                   1004:                    ->reconControl[frow]->perDiskInfo[event->col].rbuf, 0,
                   1005:                    (int) (long) event->arg);
                   1006:                RF_ASSERT(!submitblocked);      /*
                   1007:                                                 * We wouldn't have gotten the
                   1008:                                                 * BUFCLEAR event if we
                   1009:                                                 * couldn't submit.
                   1010:                                                 */
                   1011:                retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col);
                   1012:                break;
                   1013:
                   1014:                /* A user-write reconstruction blockage has been cleared. */
                   1015:        case RF_REVENT_BLOCKCLEAR:
                   1016:                DDprintf2("RECON: BLOCKCLEAR EVENT: row %d col %d.\n",
                   1017:                    frow, event->col);
                   1018:                retcode = rf_TryToRead(raidPtr, frow, event->col);
                   1019:                break;
                   1020:
                   1021:                /*
                   1022:                 * A max-head-separation reconstruction blockage has been
                   1023:                 * cleared.
                   1024:                 */
                   1025:        case RF_REVENT_HEADSEPCLEAR:
                   1026:                Dprintf2("RECON: HEADSEPCLEAR EVENT: row %d col %d.\n",
                   1027:                    frow, event->col);
                   1028:                retcode = rf_TryToRead(raidPtr, frow, event->col);
                   1029:                break;
                   1030:
                   1031:                /* A buffer has become ready to write. */
                   1032:        case RF_REVENT_BUFREADY:
                   1033:                Dprintf2("RECON: BUFREADY EVENT: row %d col %d.\n",
                   1034:                    frow, event->col);
                   1035:                retcode = rf_IssueNextWriteRequest(raidPtr, frow);
                   1036:                if (rf_floatingRbufDebug) {
                   1037:                        rf_CheckFloatingRbufCount(raidPtr, 1);
                   1038:                }
                   1039:                break;
                   1040:
                   1041:                /*
                   1042:                 * We need to skip the current RU entirely because it got
                   1043:                 * recon'd while we were waiting for something else to happen.
                   1044:                 */
                   1045:        case RF_REVENT_SKIP:
                   1046:                DDprintf2("RECON: SKIP EVENT: row %d col %d.\n",
                   1047:                    frow, event->col);
                   1048:                retcode = rf_IssueNextReadRequest(raidPtr, frow, event->col);
                   1049:                break;
                   1050:
                   1051:                /*
                   1052:                 * A forced-reconstruction read access has completed. Just
                   1053:                 * submit the buffer.
                   1054:                 */
                   1055:        case RF_REVENT_FORCEDREADDONE:
                   1056:                rbuf = (RF_ReconBuffer_t *) event->arg;
                   1057:                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
                   1058:                DDprintf2("RECON: FORCEDREADDONE EVENT: row %d col %d.\n",
                   1059:                    frow, event->col);
                   1060:                submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
                   1061:                RF_ASSERT(!submitblocked);
                   1062:                break;
                   1063:
                   1064:        default:
                   1065:                RF_PANIC();
                   1066:        }
                   1067:        rf_FreeReconEventDesc(event);
                   1068:        return (retcode);
                   1069: }
                   1070:
                   1071: /*****************************************************************************
                   1072:  *
                   1073:  * Find the next thing that's needed on the indicated disk, and issue
                   1074:  * a read request for it. We assume that the reconstruction buffer
                   1075:  * associated with this process is free to receive the data. If
                   1076:  * reconstruction is blocked on the indicated RU, we issue a
                   1077:  * blockage-release request instead of a physical disk read request.
                   1078:  * If the current disk gets too far ahead of the others, we issue a
                   1079:  * head-separation wait request and return.
                   1080:  *
                   1081:  * ctrl->{ru_count, curPSID, diskOffset} and
                   1082:  * rbuf->failedDiskSectorOffset are maintained to point to the unit
                   1083:  * we're currently accessing. Note that this deviates from the
                   1084:  * standard C idiom of having counters point to the next thing to be
                   1085:  * accessed. This allows us to easily retry when we're blocked by
                   1086:  * head separation or reconstruction-blockage events.
                   1087:  *
                   1088:  * Returns nonzero if and only if there is nothing left unread on the
                   1089:  * indicated disk.
                   1090:  *
                   1091:  *****************************************************************************/
                   1092: int
                   1093: rf_IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
                   1094: {
                   1095:        RF_PerDiskReconCtrl_t *ctrl =
                   1096:            &raidPtr->reconControl[row]->perDiskInfo[col];
                   1097:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
                   1098:        RF_ReconBuffer_t *rbuf = ctrl->rbuf;
                   1099:        RF_ReconUnitCount_t RUsPerPU =
                   1100:            layoutPtr->SUsPerPU / layoutPtr->SUsPerRU;
                   1101:        RF_SectorCount_t sectorsPerRU =
                   1102:            layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
                   1103:        int do_new_check = 0, retcode = 0, status;
                   1104:
                   1105:        /*
                   1106:         * If we are currently the slowest disk, mark that we have to do a new
                   1107:         * check.
                   1108:         */
                   1109:        if (ctrl->headSepCounter <=
                   1110:            raidPtr->reconControl[row]->minHeadSepCounter)
                   1111:                do_new_check = 1;
                   1112:
                   1113:        while (1) {
                   1114:
                   1115:                ctrl->ru_count++;
                   1116:                if (ctrl->ru_count < RUsPerPU) {
                   1117:                        ctrl->diskOffset += sectorsPerRU;
                   1118:                        rbuf->failedDiskSectorOffset += sectorsPerRU;
                   1119:                } else {
                   1120:                        ctrl->curPSID++;
                   1121:                        ctrl->ru_count = 0;
                   1122:                        /* code left over from when head-sep was based on
                   1123:                         * parity stripe id */
                   1124:                        if (ctrl->curPSID >=
                   1125:                            raidPtr->reconControl[row]->lastPSID) {
                   1126:                                rf_CheckForNewMinHeadSep(raidPtr, row,
                   1127:                                    ++(ctrl->headSepCounter));
                   1128:                                return (1);     /* Finito ! */
                   1129:                        }
                   1130:                        /*
                   1131:                         * Find the disk offsets of the start of the parity
                   1132:                         * stripe on both the current disk and the failed
                   1133:                         * disk. Skip this entire parity stripe if either disk
                   1134:                         * does not appear in the indicated PS.
                   1135:                         */
                   1136:                        status = rf_ComputePSDiskOffsets(raidPtr,
                   1137:                            ctrl->curPSID, row, col, &ctrl->diskOffset,
                   1138:                            &rbuf->failedDiskSectorOffset, &rbuf->spRow,
                   1139:                            &rbuf->spCol, &rbuf->spOffset);
                   1140:                        if (status) {
                   1141:                                ctrl->ru_count = RUsPerPU - 1;
                   1142:                                continue;
                   1143:                        }
                   1144:                }
                   1145:                rbuf->which_ru = ctrl->ru_count;
                   1146:
                   1147:                /* Skip this RU if it's already been reconstructed. */
                   1148:                if (rf_CheckRUReconstructed(raidPtr->reconControl[row]
                   1149:                    ->reconMap, rbuf->failedDiskSectorOffset)) {
                   1150:                        Dprintf2("Skipping psid %ld ru %d: already"
                   1151:                            " reconstructed.\n", ctrl->curPSID, ctrl->ru_count);
                   1152:                        continue;
                   1153:                }
                   1154:                break;
                   1155:        }
                   1156:        ctrl->headSepCounter++;
                   1157:        if (do_new_check)       /* Update min if needed. */
                   1158:                rf_CheckForNewMinHeadSep(raidPtr, row, ctrl->headSepCounter);
                   1159:
                   1160:
                   1161:        /*
                   1162:         * At this point, we have definitely decided what to do, and we have
                   1163:         * only to see if we can actually do it now.
                   1164:         */
                   1165:        rbuf->parityStripeID = ctrl->curPSID;
                   1166:        rbuf->which_ru = ctrl->ru_count;
                   1167:        bzero((char *) &raidPtr->recon_tracerecs[col],
                   1168:            sizeof(raidPtr->recon_tracerecs[col]));
                   1169:        raidPtr->recon_tracerecs[col].reconacc = 1;
                   1170:        RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
                   1171:        retcode = rf_TryToRead(raidPtr, row, col);
                   1172:        return (retcode);
                   1173: }
                   1174:
                   1175: /*
                   1176:  * Tries to issue the next read on the indicated disk. We may be
                   1177:  * blocked by (a) the heads being too far apart, or (b) recon on the
                   1178:  * indicated RU being blocked due to a write by a user thread. In
                   1179:  * this case, we issue a head-sep or blockage wait request, which will
                   1180:  * cause this same routine to be invoked again later when the blockage
                   1181:  * has cleared.
                   1182:  */
                   1183:
                   1184: int
                   1185: rf_TryToRead(RF_Raid_t *raidPtr, RF_RowCol_t row, RF_RowCol_t col)
                   1186: {
                   1187:        RF_PerDiskReconCtrl_t *ctrl =
                   1188:            &raidPtr->reconControl[row]->perDiskInfo[col];
                   1189:        RF_SectorCount_t sectorsPerRU =
                   1190:            raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;
                   1191:        RF_StripeNum_t psid = ctrl->curPSID;
                   1192:        RF_ReconUnitNum_t which_ru = ctrl->ru_count;
                   1193:        RF_DiskQueueData_t *req;
                   1194:        int status, created = 0;
                   1195:        RF_ReconParityStripeStatus_t *pssPtr;
                   1196:
                   1197:        /*
                   1198:         * If the current disk is too far ahead of the others, issue a
                   1199:         * head-separation wait and return.
                   1200:         */
                   1201:        if (rf_CheckHeadSeparation(raidPtr, ctrl, row, col,
                   1202:            ctrl->headSepCounter, which_ru))
                   1203:                return (0);
                   1204:        RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
                   1205:        pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]
                   1206:            ->pssTable, psid, which_ru, RF_PSS_CREATE, &created);
                   1207:
                   1208:        /*
                   1209:         * If recon is blocked on the indicated parity stripe, issue a
                   1210:         * block-wait request and return. This also must mark the indicated RU
                   1211:         * in the stripe as under reconstruction if not blocked.
                   1212:         */
                   1213:        status = rf_CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl,
                   1214:            row, col, psid, which_ru);
                   1215:        if (status == RF_PSS_RECON_BLOCKED) {
                   1216:                Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked.\n",
                   1217:                    psid, which_ru);
                   1218:                goto out;
                   1219:        } else
                   1220:                if (status == RF_PSS_FORCED_ON_WRITE) {
                   1221:                        rf_CauseReconEvent(raidPtr, row, col, NULL,
                   1222:                            RF_REVENT_SKIP);
                   1223:                        goto out;
                   1224:                }
                   1225:        /*
                   1226:         * Make one last check to be sure that the indicated RU didn't get
                   1227:         * reconstructed while we were waiting for something else to happen.
                   1228:         * This is unfortunate in that it causes us to make this check twice
                   1229:         * in the normal case. Might want to make some attempt to re-work
                   1230:         * this so that we only do this check if we've definitely blocked on
                   1231:         * one of the above checks. When this condition is detected, we may
                   1232:         * have just created a bogus status entry, which we need to delete.
                   1233:         */
                   1234:        if (rf_CheckRUReconstructed(raidPtr->reconControl[row]->reconMap,
                   1235:            ctrl->rbuf->failedDiskSectorOffset)) {
                   1236:                Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after"
                   1237:                    " stall.\n", psid, which_ru);
                   1238:                if (created)
                   1239:                        rf_PSStatusDelete(raidPtr,
                   1240:                            raidPtr->reconControl[row]->pssTable, pssPtr);
                   1241:                rf_CauseReconEvent(raidPtr, row, col, NULL, RF_REVENT_SKIP);
                   1242:                goto out;
                   1243:        }
                   1244:        /* Found something to read. Issue the I/O. */
                   1245:        Dprintf5("RECON: Read for psid %ld on row %d col %d offset %ld"
                   1246:            " buf %lx.\n", psid, row, col, ctrl->diskOffset,
                   1247:            ctrl->rbuf->buffer);
                   1248:        RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer);
                   1249:        RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer);
                   1250:        raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us =
                   1251:            RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer);
                   1252:        RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
                   1253:
                   1254:        /*
                   1255:         * Should be ok to use a NULL proc pointer here, all the bufs we use
                   1256:         * should be in kernel space.
                   1257:         */
                   1258:        req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset,
                   1259:            sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru,
                   1260:            rf_ReconReadDoneProc, (void *) ctrl, NULL,
                   1261:            &raidPtr->recon_tracerecs[col], (void *) raidPtr, 0, NULL);
                   1262:
                   1263:        RF_ASSERT(req);         /* XXX -- Fix this. -- XXX */
                   1264:
                   1265:        ctrl->rbuf->arg = (void *) req;
                   1266:        rf_DiskIOEnqueue(&raidPtr->Queues[row][col], req, RF_IO_RECON_PRIORITY);
                   1267:        pssPtr->issued[col] = 1;
                   1268:
                   1269: out:
                   1270:        RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
                   1271:        return (0);
                   1272: }
                   1273:
                   1274:
                   1275: /*
                   1276:  * Given a parity stripe ID, we want to find out whether both the
                   1277:  * current disk and the failed disk exist in that parity stripe. If
                   1278:  * not, we want to skip this whole PS. If so, we want to find the
                   1279:  * disk offset of the start of the PS on both the current disk and the
                   1280:  * failed disk.
                   1281:  *
                   1282:  * This works by getting a list of disks comprising the indicated
                   1283:  * parity stripe, and searching the list for the current and failed
                   1284:  * disks. Once we've decided they both exist in the parity stripe, we
                   1285:  * need to decide whether each is data or parity, so that we'll know
                   1286:  * which mapping function to call to get the corresponding disk
                   1287:  * offsets.
                   1288:  *
                   1289:  * This is kind of unpleasant, but doing it this way allows the
                   1290:  * reconstruction code to use parity stripe IDs rather than physical
                   1291:  * disks address to march through the failed disk, which greatly
                   1292:  * simplifies a lot of code, as well as eliminating the need for a
                   1293:  * reverse-mapping function. I also think it will execute faster,
                   1294:  * since the calls to the mapping module are kept to a minimum.
                   1295:  *
                   1296:  * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING
                   1297:  * THE STRIPE IN THE CORRECT ORDER.
                   1298:  */
                   1299:
                   1300: int
                   1301: rf_ComputePSDiskOffsets(
                   1302:     RF_Raid_t          *raidPtr,       /* RAID descriptor. */
                   1303:     RF_StripeNum_t      psid,          /* Parity stripe identifier. */
                   1304:     RF_RowCol_t                 row,           /*
                   1305:                                         * Row and column of disk to find
                   1306:                                         * the offsets for.
                   1307:                                         */
                   1308:     RF_RowCol_t                 col,
                   1309:     RF_SectorNum_t     *outDiskOffset,
                   1310:     RF_SectorNum_t     *outFailedDiskSectorOffset,
                   1311:     RF_RowCol_t                *spRow,         /*
                   1312:                                         * OUT: Row,col of spare unit for
                   1313:                                         * failed unit.
                   1314:                                         */
                   1315:     RF_RowCol_t                *spCol,
                   1316:     RF_SectorNum_t     *spOffset       /*
                   1317:                                         * OUT: Offset into disk containing
                   1318:                                         * spare unit.
                   1319:                                         */
                   1320: )
                   1321: {
                   1322:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
                   1323:        RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol;
                   1324:        RF_RaidAddr_t sosRaidAddress;   /* start-of-stripe */
                   1325:        RF_RowCol_t *diskids;
                   1326:        u_int i, j, k, i_offset, j_offset;
                   1327:        RF_RowCol_t prow, pcol;
                   1328:        int testcol, testrow;
                   1329:        RF_RowCol_t stripe;
                   1330:        RF_SectorNum_t poffset;
                   1331:        char i_is_parity = 0, j_is_parity = 0;
                   1332:        RF_RowCol_t stripeWidth =
                   1333:            layoutPtr->numDataCol + layoutPtr->numParityCol;
                   1334:
                   1335:        /* Get a listing of the disks comprising that stripe. */
                   1336:        sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid);
                   1337:        (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids,
                   1338:            &stripe);
                   1339:        RF_ASSERT(diskids);
                   1340:
                   1341:        /*
                   1342:         * Reject this entire parity stripe if it does not contain the
                   1343:         * indicated disk or it does not contain the failed disk.
                   1344:         */
                   1345:        if (row != stripe)
                   1346:                goto skipit;
                   1347:        for (i = 0; i < stripeWidth; i++) {
                   1348:                if (col == diskids[i])
                   1349:                        break;
                   1350:        }
                   1351:        if (i == stripeWidth)
                   1352:                goto skipit;
                   1353:        for (j = 0; j < stripeWidth; j++) {
                   1354:                if (fcol == diskids[j])
                   1355:                        break;
                   1356:        }
                   1357:        if (j == stripeWidth) {
                   1358:                goto skipit;
                   1359:        }
                   1360:        /* Find out which disk the parity is on. */
                   1361:        (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &prow, &pcol,
                   1362:            &poffset, RF_DONT_REMAP);
                   1363:
                   1364:        /* Find out if either the current RU or the failed RU is parity. */
                   1365:        /*
                   1366:         * Also, if the parity occurs in this stripe prior to the data and/or
                   1367:         * failed col, we need to decrement i and/or j.
                   1368:         */
                   1369:        for (k = 0; k < stripeWidth; k++)
                   1370:                if (diskids[k] == pcol)
                   1371:                        break;
                   1372:        RF_ASSERT(k < stripeWidth);
                   1373:        i_offset = i;
                   1374:        j_offset = j;
                   1375:        if (k < i)
                   1376:                i_offset--;
                   1377:        else
                   1378:                if (k == i) {
                   1379:                        i_is_parity = 1;
                   1380:                        i_offset = 0;
                   1381:                }               /*
                   1382:                                 * Set offsets to zero to disable multiply
                   1383:                                 * below.
                   1384:                                 */
                   1385:        if (k < j)
                   1386:                j_offset--;
                   1387:        else
                   1388:                if (k == j) {
                   1389:                        j_is_parity = 1;
                   1390:                        j_offset = 0;
                   1391:                }
                   1392:        /*
                   1393:         * At this point, [ij]_is_parity tells us whether the [current,failed]
                   1394:         * disk is parity at the start of this RU, and, if data, "[ij]_offset"
                   1395:         * tells us how far into the stripe the [current,failed] disk is.
                   1396:         */
                   1397:
                   1398:        /*
                   1399:         * Call the mapping routine to get the offset into the current disk,
                   1400:         * repeat for failed disk.
                   1401:         */
                   1402:        if (i_is_parity)
                   1403:                layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset *
                   1404:                    layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
                   1405:                    outDiskOffset, RF_DONT_REMAP);
                   1406:        else
                   1407:                layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset *
                   1408:                    layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
                   1409:                    outDiskOffset, RF_DONT_REMAP);
                   1410:
                   1411:        RF_ASSERT(row == testrow && col == testcol);
                   1412:
                   1413:        if (j_is_parity)
                   1414:                layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset *
                   1415:                    layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
                   1416:                    outFailedDiskSectorOffset, RF_DONT_REMAP);
                   1417:        else
                   1418:                layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset *
                   1419:                    layoutPtr->sectorsPerStripeUnit, &testrow, &testcol,
                   1420:                    outFailedDiskSectorOffset, RF_DONT_REMAP);
                   1421:        RF_ASSERT(row == testrow && fcol == testcol);
                   1422:
                   1423:        /* Now locate the spare unit for the failed unit. */
                   1424:        if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
                   1425:                if (j_is_parity)
                   1426:                        layoutPtr->map->MapParity(raidPtr, sosRaidAddress +
                   1427:                            j_offset * layoutPtr->sectorsPerStripeUnit, spRow,
                   1428:                            spCol, spOffset, RF_REMAP);
                   1429:                else
                   1430:                        layoutPtr->map->MapSector(raidPtr, sosRaidAddress +
                   1431:                            j_offset * layoutPtr->sectorsPerStripeUnit, spRow,
                   1432:                            spCol, spOffset, RF_REMAP);
                   1433:        } else {
                   1434:                *spRow = raidPtr->reconControl[row]->spareRow;
                   1435:                *spCol = raidPtr->reconControl[row]->spareCol;
                   1436:                *spOffset = *outFailedDiskSectorOffset;
                   1437:        }
                   1438:
                   1439:        return (0);
                   1440:
                   1441: skipit:
                   1442:        Dprintf3("RECON: Skipping psid %ld: nothing needed from r%d c%d.\n",
                   1443:            psid, row, col);
                   1444:        return (1);
                   1445: }
                   1446:
                   1447:
                   1448: /*
                   1449:  * This is called when a buffer has become ready to write to the replacement
                   1450:  * disk.
                   1451:  */
                   1452: int
                   1453: rf_IssueNextWriteRequest(RF_Raid_t *raidPtr, RF_RowCol_t row)
                   1454: {
                   1455:        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
                   1456:        RF_SectorCount_t sectorsPerRU =
                   1457:            layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
                   1458:        RF_RowCol_t fcol = raidPtr->reconControl[row]->fcol;
                   1459:        RF_ReconBuffer_t *rbuf;
                   1460:        RF_DiskQueueData_t *req;
                   1461:
                   1462:        rbuf = rf_GetFullReconBuffer(raidPtr->reconControl[row]);
                   1463:        RF_ASSERT(rbuf);        /*
                   1464:                                 * There must be one available, or we wouldn't
                   1465:                                 * have gotten the event that sent us here.
                   1466:                                 */
                   1467:        RF_ASSERT(rbuf->pssPtr);
                   1468:
                   1469:        rbuf->pssPtr->writeRbuf = rbuf;
                   1470:        rbuf->pssPtr = NULL;
                   1471:
                   1472:        Dprintf7("RECON: New write (r %d c %d offs %d) for psid %ld ru %d"
                   1473:            " (failed disk offset %ld) buf %lx.\n",
                   1474:            rbuf->spRow, rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID,
                   1475:            rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer);
                   1476:        Dprintf6("RECON: new write psid %ld   %02x %02x %02x %02x %02x.\n",
                   1477:            rbuf->parityStripeID, rbuf->buffer[0] & 0xff,
                   1478:            rbuf->buffer[1] & 0xff, rbuf->buffer[2] & 0xff,
                   1479:            rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);
                   1480:
                   1481:        /*
                   1482:         * Should be ok to use a NULL b_proc here b/c all addrs should be in
                   1483:         * kernel space.
                   1484:         */
                   1485:        req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset,
                   1486:            sectorsPerRU, rbuf->buffer, rbuf->parityStripeID, rbuf->which_ru,
                   1487:            rf_ReconWriteDoneProc, (void *) rbuf, NULL,
                   1488:            &raidPtr->recon_tracerecs[fcol], (void *) raidPtr, 0, NULL);
                   1489:
                   1490:        RF_ASSERT(req);         /* XXX -- Fix this. -- XXX */
                   1491:
                   1492:        rbuf->arg = (void *) req;
                   1493:        rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spRow][rbuf->spCol], req,
                   1494:            RF_IO_RECON_PRIORITY);
                   1495:
                   1496:        return (0);
                   1497: }
                   1498:
                   1499: /*
                   1500:  * This gets called upon the completion of a reconstruction read
                   1501:  * operation. The arg is a pointer to the per-disk reconstruction
                   1502:  * control structure for the process that just finished a read.
                   1503:  *
                   1504:  * Called at interrupt context in the kernel, so don't do anything
                   1505:  * illegal here.
                   1506:  */
                   1507: int
                   1508: rf_ReconReadDoneProc(void *arg, int status)
                   1509: {
                   1510:        RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg;
                   1511:        RF_Raid_t *raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;
                   1512:
                   1513:        if (status) {
                   1514:                /*
                   1515:                 * XXX
                   1516:                 */
                   1517:                printf("Recon read failed !\n");
                   1518:                RF_PANIC();
                   1519:        }
                   1520:        RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
                   1521:        RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
                   1522:        raidPtr->recon_tracerecs[ctrl->col].specific.recon.
                   1523:           recon_fetch_to_return_us =
                   1524:             RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
                   1525:        RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
                   1526:
                   1527:        rf_CauseReconEvent(raidPtr, ctrl->row, ctrl->col, NULL,
                   1528:            RF_REVENT_READDONE);
                   1529:        return (0);
                   1530: }
                   1531:
                   1532:
                   1533: /*
                   1534:  * This gets called upon the completion of a reconstruction write operation.
                   1535:  * The arg is a pointer to the rbuf that was just written.
                   1536:  *
                   1537:  * Called at interrupt context in the kernel, so don't do anything illegal here.
                   1538:  */
                   1539: int
                   1540: rf_ReconWriteDoneProc(void *arg, int status)
                   1541: {
                   1542:        RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg;
                   1543:
                   1544:        Dprintf2("Reconstruction completed on psid %ld ru %d.\n",
                   1545:            rbuf->parityStripeID, rbuf->which_ru);
                   1546:        if (status) {
                   1547:                /* fprintf(stderr, "Recon write failed !\n"); */
                   1548:                printf("Recon write failed !\n");
                   1549:                RF_PANIC();
                   1550:        }
                   1551:        rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col,
                   1552:            arg, RF_REVENT_WRITEDONE);
                   1553:        return (0);
                   1554: }
                   1555:
                   1556:
                   1557: /*
                   1558:  * Computes a new minimum head sep, and wakes up anyone who needs to
                   1559:  * be woken as a result.
                   1560:  */
                   1561: void
                   1562: rf_CheckForNewMinHeadSep(RF_Raid_t *raidPtr, RF_RowCol_t row,
                   1563:     RF_HeadSepLimit_t hsCtr)
                   1564: {
                   1565:        RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
                   1566:        RF_HeadSepLimit_t new_min;
                   1567:        RF_RowCol_t i;
                   1568:        RF_CallbackDesc_t *p;
                   1569:        /* From the definition of a minimum. */
                   1570:        RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter);
                   1571:
                   1572:
                   1573:        RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
                   1574:
                   1575:        new_min = ~(1L << (8 * sizeof(long) - 1));      /* 0x7FFF....FFF */
                   1576:        for (i = 0; i < raidPtr->numCol; i++)
                   1577:                if (i != reconCtrlPtr->fcol) {
                   1578:                        if (reconCtrlPtr->perDiskInfo[i].headSepCounter <
                   1579:                            new_min)
                   1580:                                new_min =
                   1581:                                    reconCtrlPtr->perDiskInfo[i].headSepCounter;
                   1582:                }
                   1583:        /* Set the new minimum and wake up anyone who can now run again. */
                   1584:        if (new_min != reconCtrlPtr->minHeadSepCounter) {
                   1585:                reconCtrlPtr->minHeadSepCounter = new_min;
                   1586:                Dprintf1("RECON:  new min head pos counter val is %ld.\n",
                   1587:                    new_min);
                   1588:                while (reconCtrlPtr->headSepCBList) {
                   1589:                        if (reconCtrlPtr->headSepCBList->callbackArg.v >
                   1590:                            new_min)
                   1591:                                break;
                   1592:                        p = reconCtrlPtr->headSepCBList;
                   1593:                        reconCtrlPtr->headSepCBList = p->next;
                   1594:                        p->next = NULL;
                   1595:                        rf_CauseReconEvent(raidPtr, p->row, p->col, NULL,
                   1596:                            RF_REVENT_HEADSEPCLEAR);
                   1597:                        rf_FreeCallbackDesc(p);
                   1598:                }
                   1599:
                   1600:        }
                   1601:        RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
                   1602: }
                   1603:
                   1604: /*
                   1605:  * Checks to see that the maximum head separation will not be violated
                   1606:  * if we initiate a reconstruction I/O on the indicated disk.
                   1607:  * Limiting the maximum head separation between two disks eliminates
                   1608:  * the nasty buffer-stall conditions that occur when one disk races
                   1609:  * ahead of the others and consumes all of the floating recon buffers.
                   1610:  * This code is complex and unpleasant but it's necessary to avoid
                   1611:  * some very nasty, albeit fairly rare, reconstruction behavior.
                   1612:  *
                   1613:  * Returns non-zero if and only if we have to stop working on the
                   1614:  * indicated disk due to a head-separation delay.
                   1615:  */
                   1616: int
                   1617: rf_CheckHeadSeparation(
                   1618:     RF_Raid_t                  *raidPtr,
                   1619:     RF_PerDiskReconCtrl_t      *ctrl,
                   1620:     RF_RowCol_t                         row,
                   1621:     RF_RowCol_t                         col,
                   1622:     RF_HeadSepLimit_t           hsCtr,
                   1623:     RF_ReconUnitNum_t           which_ru
                   1624: )
                   1625: {
                   1626:        RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[row];
                   1627:        RF_CallbackDesc_t *cb, *p, *pt;
                   1628:        int retval = 0;
                   1629:
                   1630:        /*
                   1631:         * If we're too far ahead of the slowest disk, stop working on this
                   1632:         * disk until the slower ones catch up. We do this by scheduling a
                   1633:         * wakeup callback for the time when the slowest disk has caught up.
                   1634:         * We define "caught up" with 20% hysteresis, i.e. the head separation
                   1635:         * must have fallen to at most 80% of the max allowable head
                   1636:         * separation before we'll wake up.
                   1637:         */
                   1638:        RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
                   1639:        if ((raidPtr->headSepLimit >= 0) &&
                   1640:            ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) >
                   1641:             raidPtr->headSepLimit)) {
                   1642:                Dprintf6("raid%d: RECON: head sep stall: row %d col %d hsCtr"
                   1643:                    " %ld minHSCtr %ld limit %ld.\n",
                   1644:                    raidPtr->raidid, row, col, ctrl->headSepCounter,
                   1645:                    reconCtrlPtr->minHeadSepCounter, raidPtr->headSepLimit);
                   1646:                cb = rf_AllocCallbackDesc();
                   1647:                /*
                   1648:                 * The minHeadSepCounter value we have to get to before we'll
                   1649:                 * wake up. Build in 20% hysteresis.
                   1650:                 */
                   1651:                cb->callbackArg.v = (ctrl->headSepCounter -
                   1652:                    raidPtr->headSepLimit + raidPtr->headSepLimit / 5);
                   1653:                cb->row = row;
                   1654:                cb->col = col;
                   1655:                cb->next = NULL;
                   1656:
                   1657:                /*
                   1658:                 * Insert this callback descriptor into the sorted list of
                   1659:                 * pending head-sep callbacks.
                   1660:                 */
                   1661:                p = reconCtrlPtr->headSepCBList;
                   1662:                if (!p)
                   1663:                        reconCtrlPtr->headSepCBList = cb;
                   1664:                else
                   1665:                        if (cb->callbackArg.v < p->callbackArg.v) {
                   1666:                                cb->next = reconCtrlPtr->headSepCBList;
                   1667:                                reconCtrlPtr->headSepCBList = cb;
                   1668:                        } else {
                   1669:                                for (pt = p, p = p->next;
                   1670:                                    p && (p->callbackArg.v < cb->callbackArg.v);
                   1671:                                    pt = p, p = p->next);
                   1672:                                cb->next = p;
                   1673:                                pt->next = cb;
                   1674:                        }
                   1675:                retval = 1;
                   1676: #if    RF_RECON_STATS > 0
                   1677:                ctrl->reconCtrl->reconDesc->hsStallCount++;
                   1678: #endif /* RF_RECON_STATS > 0 */
                   1679:        }
                   1680:        RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
                   1681:
                   1682:        return (retval);
                   1683: }
                   1684:
                   1685:
                   1686:
                   1687: /*
                   1688:  * Checks to see if reconstruction has been either forced or blocked
                   1689:  * by a user operation. If forced, we skip this RU entirely. Else if
                   1690:  * blocked, put ourselves on the wait list. Else return 0.
                   1691:  *
                   1692:  * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY.
                   1693:  */
                   1694: int
                   1695: rf_CheckForcedOrBlockedReconstruction(
                   1696:     RF_Raid_t                   *raidPtr,
                   1697:     RF_ReconParityStripeStatus_t *pssPtr,
                   1698:     RF_PerDiskReconCtrl_t       *ctrl,
                   1699:     RF_RowCol_t                          row,
                   1700:     RF_RowCol_t                          col,
                   1701:     RF_StripeNum_t               psid,
                   1702:     RF_ReconUnitNum_t            which_ru
                   1703: )
                   1704: {
                   1705:        RF_CallbackDesc_t *cb;
                   1706:        int retcode = 0;
                   1707:
                   1708:        if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) ||
                   1709:            (pssPtr->flags & RF_PSS_FORCED_ON_WRITE))
                   1710:                retcode = RF_PSS_FORCED_ON_WRITE;
                   1711:        else
                   1712:                if (pssPtr->flags & RF_PSS_RECON_BLOCKED) {
                   1713:                        Dprintf4("RECON: row %d col %d blocked at psid %ld"
                   1714:                            " ru %d.\n", row, col, psid, which_ru);
                   1715:                        cb = rf_AllocCallbackDesc();    /*
                   1716:                                                         * Append ourselves to
                   1717:                                                         * the blockage-wait
                   1718:                                                         * list.
                   1719:                                                         */
                   1720:                        cb->row = row;
                   1721:                        cb->col = col;
                   1722:                        cb->next = pssPtr->blockWaitList;
                   1723:                        pssPtr->blockWaitList = cb;
                   1724:                        retcode = RF_PSS_RECON_BLOCKED;
                   1725:                }
                   1726:        if (!retcode)
                   1727:                pssPtr->flags |= RF_PSS_UNDER_RECON;    /*
                   1728:                                                         * Mark this RU as under
                   1729:                                                         * reconstruction.
                   1730:                                                         */
                   1731:
                   1732:        return (retcode);
                   1733: }
                   1734:
                   1735:
                   1736: /*
                   1737:  * If reconstruction is currently ongoing for the indicated stripeID,
                   1738:  * reconstruction is forced to completion and we return non-zero to
                   1739:  * indicate that the caller must wait. If not, then reconstruction is
                   1740:  * blocked on the indicated stripe and the routine returns zero. If
                   1741:  * and only if we return non-zero, we'll cause the cbFunc to get
                   1742:  * invoked with the cbArg when the reconstruction has completed.
                   1743:  */
                   1744: int
                   1745: rf_ForceOrBlockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
                   1746:        void (*cbFunc) (RF_Raid_t *, void *), void *cbArg)
                   1747: {
                   1748:        RF_RowCol_t row = asmap->physInfo->row; /*
                   1749:                                                 * Which row of the array
                   1750:                                                 * we're working on.
                   1751:                                                 */
                   1752:        RF_StripeNum_t stripeID = asmap->stripeID;      /*
                   1753:                                                         * The stripe ID we're
                   1754:                                                         * forcing recon on.
                   1755:                                                         */
                   1756:        RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit *
                   1757:            raidPtr->Layout.SUsPerRU;           /* Num sects in one RU. */
                   1758:        RF_ReconParityStripeStatus_t *pssPtr;   /*
                   1759:                                                 * A pointer to the parity
                   1760:                                                 * stripe status structure.
                   1761:                                                 */
                   1762:        RF_StripeNum_t psid;                    /* Parity stripe id. */
                   1763:        RF_SectorNum_t offset, fd_offset;       /*
                   1764:                                                 * Disk offset, failed-disk
                   1765:                                                 * offset.
                   1766:                                                 */
                   1767:        RF_RowCol_t *diskids;
                   1768:        RF_RowCol_t stripe;
                   1769:        RF_ReconUnitNum_t which_ru;     /* RU within parity stripe. */
                   1770:        RF_RowCol_t fcol, diskno, i;
                   1771:        RF_ReconBuffer_t *new_rbuf;     /* Ptr to newly allocated rbufs. */
                   1772:        RF_DiskQueueData_t *req;        /* Disk I/O req to be enqueued. */
                   1773:        RF_CallbackDesc_t *cb;
                   1774:        int created = 0, nPromoted;
                   1775:
                   1776:        psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID,
                   1777:            &which_ru);
                   1778:
                   1779:        RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
                   1780:
                   1781:        pssPtr = rf_LookupRUStatus(raidPtr,
                   1782:            raidPtr->reconControl[row]->pssTable, psid, which_ru,
                   1783:            RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, &created);
                   1784:
                   1785:        /* If recon is not ongoing on this PS, just return. */
                   1786:        if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
                   1787:                RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
                   1788:                return (0);
                   1789:        }
                   1790:        /*
                   1791:         * Otherwise, we have to wait for reconstruction to complete on this
                   1792:         * RU.
                   1793:         */
                   1794:        /*
                   1795:         * In order to avoid waiting for a potentially large number of
                   1796:         * low-priority accesses to complete, we force a normal-priority (i.e.
                   1797:         * not low-priority) reconstruction on this RU.
                   1798:         */
                   1799:        if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) &&
                   1800:            !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) {
                   1801:                DDprintf1("Forcing recon on psid %ld.\n", psid);
                   1802:                /* Mark this RU as under forced recon. */
                   1803:                pssPtr->flags |= RF_PSS_FORCED_ON_WRITE;
                   1804:                /* Clear the blockage that we just set. */
                   1805:                pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;
                   1806:                fcol = raidPtr->reconControl[row]->fcol;
                   1807:
                   1808:                /*
                   1809:                 * Get a listing of the disks comprising the indicated stripe.
                   1810:                 */
                   1811:                (raidPtr->Layout.map->IdentifyStripe) (raidPtr,
                   1812:                    asmap->raidAddress, &diskids, &stripe);
                   1813:                RF_ASSERT(row == stripe);
                   1814:
                   1815:                /*
                   1816:                 * For previously issued reads, elevate them to normal
                   1817:                 * priority. If the I/O has already completed, it won't be
                   1818:                 * found in the queue, and hence this will be a no-op. For
                   1819:                 * unissued reads, allocate buffers and issue new reads. The
                   1820:                 * fact that we've set the FORCED bit means that the regular
                   1821:                 * recon procs will not re-issue these reqs.
                   1822:                 */
                   1823:                for (i = 0; i < raidPtr->Layout.numDataCol +
                   1824:                    raidPtr->Layout.numParityCol; i++)
                   1825:                        if ((diskno = diskids[i]) != fcol) {
                   1826:                                if (pssPtr->issued[diskno]) {
                   1827:                                        nPromoted = rf_DiskIOPromote(&raidPtr
                   1828:                                            ->Queues[row][diskno], psid,
                   1829:                                            which_ru);
                   1830:                                        if (rf_reconDebug && nPromoted)
                   1831:                                                printf("raid%d: promoted read"
                   1832:                                                    " from row %d col %d.\n",
                   1833:                                                    raidPtr->raidid, row,
                   1834:                                                    diskno);
                   1835:                                } else {
                   1836:                                        /* Create new buf. */
                   1837:                                        new_rbuf = rf_MakeReconBuffer(raidPtr,
                   1838:                                            row, diskno, RF_RBUF_TYPE_FORCED);
                   1839:                                        /* Find offsets & spare locationp */
                   1840:                                        rf_ComputePSDiskOffsets(raidPtr, psid,
                   1841:                                            row, diskno, &offset, &fd_offset,
                   1842:                                            &new_rbuf->spRow, &new_rbuf->spCol,
                   1843:                                            &new_rbuf->spOffset);
                   1844:                                        new_rbuf->parityStripeID = psid;
                   1845:                                        /* Fill in the buffer. */
                   1846:                                        new_rbuf->which_ru = which_ru;
                   1847:                                        new_rbuf->failedDiskSectorOffset =
                   1848:                                            fd_offset;
                   1849:                                        new_rbuf->priority =
                   1850:                                            RF_IO_NORMAL_PRIORITY;
                   1851:
                   1852:                                        /*
                   1853:                                         * Use NULL b_proc b/c all addrs
                   1854:                                         * should be in kernel space.
                   1855:                                         */
                   1856:                                        req = rf_CreateDiskQueueData(
                   1857:                                            RF_IO_TYPE_READ, offset +
                   1858:                                            which_ru * sectorsPerRU,
                   1859:                                            sectorsPerRU, new_rbuf->buffer,
                   1860:                                            psid, which_ru, (int (*)
                   1861:                                            (void *, int))
                   1862:                                              rf_ForceReconReadDoneProc,
                   1863:                                            (void *) new_rbuf, NULL,
                   1864:                                            NULL, (void *) raidPtr, 0, NULL);
                   1865:
                   1866:                                        RF_ASSERT(req); /*
                   1867:                                                         * XXX -- Fix this. --
                   1868:                                                         * XXX
                   1869:                                                         */
                   1870:
                   1871:                                        new_rbuf->arg = req;
                   1872:                                        /* Enqueue the I/O. */
                   1873:                                        rf_DiskIOEnqueue(&raidPtr
                   1874:                                            ->Queues[row][diskno], req,
                   1875:                                            RF_IO_NORMAL_PRIORITY);
                   1876:                                        Dprintf3("raid%d: Issued new read req"
                   1877:                                            " on row %d col %d.\n",
                   1878:                                            raidPtr->raidid, row, diskno);
                   1879:                                }
                   1880:                        }
                   1881:                /*
                   1882:                 * If the write is sitting in the disk queue, elevate its
                   1883:                 * priority.
                   1884:                 */
                   1885:                if (rf_DiskIOPromote(&raidPtr->Queues[row][fcol],
                   1886:                    psid, which_ru))
                   1887:                        printf("raid%d: promoted write to row %d col %d.\n",
                   1888:                            raidPtr->raidid, row, fcol);
                   1889:        }
                   1890:        /*
                   1891:         * Install a callback descriptor to be invoked when recon completes on
                   1892:         * this parity stripe.
                   1893:         */
                   1894:        cb = rf_AllocCallbackDesc();
                   1895:        /*
                   1896:         * XXX The following is bogus... These functions don't really match !!!
                   1897:         * GO
                   1898:         */
                   1899:        cb->callbackFunc = (void (*) (RF_CBParam_t)) cbFunc;
                   1900:        cb->callbackArg.p = (void *) cbArg;
                   1901:        cb->next = pssPtr->procWaitList;
                   1902:        pssPtr->procWaitList = cb;
                   1903:        DDprintf2("raid%d: Waiting for forced recon on psid %ld.\n",
                   1904:            raidPtr->raidid, psid);
                   1905:
                   1906:        RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
                   1907:        return (1);
                   1908: }
                   1909:
                   1910:
                   1911: /*
                   1912:  * Called upon the completion of a forced reconstruction read.
                   1913:  * All we do is schedule the FORCEDREADONE event.
                   1914:  * Called at interrupt context in the kernel, so don't do anything illegal here.
                   1915:  */
                   1916: void
                   1917: rf_ForceReconReadDoneProc(void *arg, int status)
                   1918: {
                   1919:        RF_ReconBuffer_t *rbuf = arg;
                   1920:
                   1921:        if (status) {
                   1922:                /* fprintf(stderr, "Forced recon read failed !\n"); */
                   1923:                printf("Forced recon read failed !\n");
                   1924:                RF_PANIC();
                   1925:        }
                   1926:        rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr, rbuf->row, rbuf->col,
                   1927:            (void *) rbuf, RF_REVENT_FORCEDREADDONE);
                   1928: }
                   1929:
                   1930:
                   1931: /* Releases a block on the reconstruction of the indicated stripe. */
                   1932: int
                   1933: rf_UnblockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
                   1934: {
                   1935:        RF_RowCol_t row = asmap->origRow;
                   1936:        RF_StripeNum_t stripeID = asmap->stripeID;
                   1937:        RF_ReconParityStripeStatus_t *pssPtr;
                   1938:        RF_ReconUnitNum_t which_ru;
                   1939:        RF_StripeNum_t psid;
                   1940:        int created = 0;
                   1941:        RF_CallbackDesc_t *cb;
                   1942:
                   1943:        psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID,
                   1944:            &which_ru);
                   1945:        RF_LOCK_PSS_MUTEX(raidPtr, row, psid);
                   1946:        pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl[row]
                   1947:            ->pssTable, psid, which_ru, RF_PSS_NONE, &created);
                   1948:
                   1949:        /*
                   1950:         * When recon is forced, the pss desc can get deleted before we get
                   1951:         * back to unblock recon. But, this can _only_ happen when recon is
                   1952:         * forced. It would be good to put some kind of sanity check here, but
                   1953:         * how to decide if recon was just forced or not ?
                   1954:         */
                   1955:        if (!pssPtr) {
                   1956:                /*
                   1957:                 * printf("Warning: no pss descriptor upon unblock on psid %ld"
                   1958:                 *     " RU %d.\n", psid, which_ru);
                   1959:                 */
                   1960:                if (rf_reconDebug || rf_pssDebug)
                   1961:                        printf("Warning: no pss descriptor upon unblock on"
                   1962:                            " psid %ld RU %d.\n", (long) psid, which_ru);
                   1963:                goto out;
                   1964:        }
                   1965:        pssPtr->blockCount--;
                   1966:        Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d.\n",
                   1967:            raidPtr->raidid, psid, pssPtr->blockCount);
                   1968:        if (pssPtr->blockCount == 0) {
                   1969:                /* If recon blockage has been released. */
                   1970:
                   1971:                /*
                   1972:                 * Unblock recon before calling CauseReconEvent in case
                   1973:                 * CauseReconEvent causes us to try to issue a new read before
                   1974:                 * returning here.
                   1975:                 */
                   1976:                pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;
                   1977:
                   1978:
                   1979:                while (pssPtr->blockWaitList) {
                   1980:                        /*
                   1981:                         * Spin through the block-wait list and
                   1982:                         * release all the waiters.
                   1983:                         */
                   1984:                        cb = pssPtr->blockWaitList;
                   1985:                        pssPtr->blockWaitList = cb->next;
                   1986:                        cb->next = NULL;
                   1987:                        rf_CauseReconEvent(raidPtr, cb->row, cb->col, NULL,
                   1988:                            RF_REVENT_BLOCKCLEAR);
                   1989:                        rf_FreeCallbackDesc(cb);
                   1990:                }
                   1991:                if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
                   1992:                        /* If no recon was requested while recon was blocked. */
                   1993:                        rf_PSStatusDelete(raidPtr, raidPtr->reconControl[row]
                   1994:                            ->pssTable, pssPtr);
                   1995:                }
                   1996:        }
                   1997: out:
                   1998:        RF_UNLOCK_PSS_MUTEX(raidPtr, row, psid);
                   1999:        return (0);
                   2000: }
CVSweb