[BACK]Return to rf_driver.c CVS log [TXT][DIR] Up to [local] / sys / dev / raidframe

Annotation of sys/dev/raidframe/rf_driver.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: rf_driver.c,v 1.11 2002/12/16 07:01:03 tdeval Exp $   */
                      2: /*     $NetBSD: rf_driver.c,v 1.37 2000/06/04 02:05:13 oster Exp $     */
                      3:
                      4: /*
                      5:  * Copyright (c) 1999 The NetBSD Foundation, Inc.
                      6:  * All rights reserved.
                      7:  *
                      8:  * This code is derived from software contributed to The NetBSD Foundation
                      9:  * by Greg Oster
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
                     39:
                     40: /*
                     41:  * Copyright (c) 1995 Carnegie-Mellon University.
                     42:  * All rights reserved.
                     43:  *
                     44:  * Author:     Mark Holland, Khalil Amiri, Claudson Bornstein,
                     45:  *             William V. Courtright II, Robby Findler, Daniel Stodolsky,
                     46:  *             Rachad Youssef, Jim Zelenka
                     47:  *
                     48:  * Permission to use, copy, modify and distribute this software and
                     49:  * its documentation is hereby granted, provided that both the copyright
                     50:  * notice and this permission notice appear in all copies of the
                     51:  * software, derivative works or modified versions, and any portions
                     52:  * thereof, and that both notices appear in supporting documentation.
                     53:  *
                     54:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     55:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     56:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     57:  *
                     58:  * Carnegie Mellon requests users of this software to return to
                     59:  *
                     60:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     61:  *  School of Computer Science
                     62:  *  Carnegie Mellon University
                     63:  *  Pittsburgh PA 15213-3890
                     64:  *
                     65:  * any improvements or extensions that they make and grant Carnegie the
                     66:  * rights to redistribute these changes.
                     67:  */
                     68:
                     69: /*****************************************************************************
                     70:  *
                     71:  * rf_driver.c -- Main setup, teardown, and access routines for the RAID
                     72:  *               driver
                     73:  *
                     74:  * All routines are prefixed with rf_ (RAIDframe), to avoid conficts.
                     75:  *
                     76:  *****************************************************************************/
                     77:
                     78: #include <sys/types.h>
                     79: #include <sys/param.h>
                     80: #include <sys/systm.h>
                     81: #include <sys/ioctl.h>
                     82: #include <sys/fcntl.h>
                     83: #ifdef __NetBSD__
                     84: #include <sys/vnode.h>
                     85: #endif
                     86:
                     87:
                     88: #include "rf_archs.h"
                     89: #include "rf_threadstuff.h"
                     90:
                     91:
                     92: #include <sys/errno.h>
                     93:
                     94: #include "rf_raid.h"
                     95: #include "rf_dag.h"
                     96: #include "rf_aselect.h"
                     97: #include "rf_diskqueue.h"
                     98: #include "rf_parityscan.h"
                     99: #include "rf_alloclist.h"
                    100: #include "rf_dagutils.h"
                    101: #include "rf_utils.h"
                    102: #include "rf_etimer.h"
                    103: #include "rf_acctrace.h"
                    104: #include "rf_configure.h"
                    105: #include "rf_general.h"
                    106: #include "rf_desc.h"
                    107: #include "rf_states.h"
                    108: #include "rf_freelist.h"
                    109: #include "rf_decluster.h"
                    110: #include "rf_map.h"
                    111: #include "rf_revent.h"
                    112: #include "rf_callback.h"
                    113: #include "rf_engine.h"
                    114: #include "rf_memchunk.h"
                    115: #include "rf_mcpair.h"
                    116: #include "rf_nwayxor.h"
                    117: #include "rf_debugprint.h"
                    118: #include "rf_copyback.h"
                    119: #include "rf_driver.h"
                    120: #include "rf_options.h"
                    121: #include "rf_shutdown.h"
                    122: #include "rf_kintf.h"
                    123:
                    124: #include <sys/buf.h>
                    125:
                    126: /* rad == RF_RaidAccessDesc_t */
                    127: static RF_FreeList_t *rf_rad_freelist;
                    128: #define        RF_MAX_FREE_RAD         128
                    129: #define        RF_RAD_INC               16
                    130: #define        RF_RAD_INITIAL           32
                    131:
                    132: /* Debug variables. */
                    133: char   rf_panicbuf[2048];      /*
                    134:                                 * A buffer to hold an error msg when we panic.
                    135:                                 */
                    136:
                    137: /* Main configuration routines. */
                    138: static int raidframe_booted = 0;
                    139:
                    140: void rf_ConfigureDebug(RF_Config_t *);
                    141: void rf_set_debug_option(char *, long);
                    142: void rf_UnconfigureArray(void);
                    143: int  rf_init_rad(RF_RaidAccessDesc_t *);
                    144: void rf_clean_rad(RF_RaidAccessDesc_t *);
                    145: void rf_ShutdownRDFreeList(void *);
                    146: int  rf_ConfigureRDFreeList(RF_ShutdownList_t **);
                    147:
                    148: RF_DECLARE_MUTEX(rf_printf_mutex);     /*
                    149:                                         * Debug only: Avoids interleaved
                    150:                                         * printfs by different stripes.
                    151:                                         */
                    152:
                    153: #define        SIGNAL_QUIESCENT_COND(_raid_)   wakeup(&((_raid_)->accesses_suspended))
                    154: #define        WAIT_FOR_QUIESCENCE(_raid_)                                     \
                    155:        tsleep(&((_raid_)->accesses_suspended), PRIBIO, "RAIDframe quiesce", 0);
                    156:
                    157: #define        IO_BUF_ERR(bp, err)                                             \
                    158: do {                                                                   \
                    159:        bp->b_flags |= B_ERROR;                                         \
                    160:        bp->b_resid = bp->b_bcount;                                     \
                    161:        bp->b_error = err;                                              \
                    162:        biodone(bp);                                                    \
                    163: } while (0)
                    164:
                    165: static int configureCount = 0; /* Number of active configurations. */
                    166: static int isconfigged = 0;    /*
                    167:                                 * Is basic RAIDframe (non per-array)
                    168:                                 * stuff configured ?
                    169:                                 */
                    170: RF_DECLARE_STATIC_MUTEX(configureMutex);       /*
                    171:                                                 * Used to lock the
                    172:                                                 * configuration stuff.
                    173:                                                 */
                    174: static RF_ShutdownList_t *globalShutdown;      /* Non array-specific stuff. */
                    175: int  rf_ConfigureRDFreeList(RF_ShutdownList_t **);
                    176:
                    177:
                    178: /* Called at system boot time. */
                    179: int
                    180: rf_BootRaidframe(void)
                    181: {
                    182:        int rc;
                    183:
                    184:        if (raidframe_booted)
                    185:                return (EBUSY);
                    186:        raidframe_booted = 1;
                    187:
                    188:        rc = rf_mutex_init(&configureMutex);
                    189:        if (rc) {
                    190:                RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n",
                    191:                    __FILE__, __LINE__, rc);
                    192:                RF_PANIC();
                    193:        }
                    194:        configureCount = 0;
                    195:        isconfigged = 0;
                    196:        globalShutdown = NULL;
                    197:        return (0);
                    198: }
                    199:
                    200:
                    201: /*
                    202:  * This function is really just for debugging user-level stuff: It
                    203:  * frees up all memory, other RAIDframe resources that might otherwise
                    204:  * be kept around. This is used with systems like "sentinel" to detect
                    205:  * memory leaks.
                    206:  */
                    207: int
                    208: rf_UnbootRaidframe(void)
                    209: {
                    210:        int rc;
                    211:
                    212:        RF_LOCK_MUTEX(configureMutex);
                    213:        if (configureCount) {
                    214:                RF_UNLOCK_MUTEX(configureMutex);
                    215:                return (EBUSY);
                    216:        }
                    217:        raidframe_booted = 0;
                    218:        RF_UNLOCK_MUTEX(configureMutex);
                    219:        rc = rf_mutex_destroy(&configureMutex);
                    220:        if (rc) {
                    221:                RF_ERRORMSG3("Unable to destroy mutex file %s line %d"
                    222:                    " rc=%d.\n", __FILE__, __LINE__, rc);
                    223:                RF_PANIC();
                    224:        }
                    225:        return (0);
                    226: }
                    227:
                    228:
                    229: /*
                    230:  * Called whenever an array is shutdown.
                    231:  */
                    232: void
                    233: rf_UnconfigureArray(void)
                    234: {
                    235:        int rc;
                    236:
                    237:        RF_LOCK_MUTEX(configureMutex);
                    238:        if (--configureCount == 0) {    /*
                    239:                                         * If no active configurations, shut
                    240:                                         * everything down.
                    241:                                         */
                    242:                isconfigged = 0;
                    243:
                    244:                rc = rf_ShutdownList(&globalShutdown);
                    245:                if (rc) {
                    246:                        RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown,"
                    247:                            " rc=%d.\n", rc);
                    248:                }
                    249:
                    250:                /*
                    251:                 * We must wait until now, because the AllocList module
                    252:                 * uses the DebugMem module.
                    253:                 */
                    254:                if (rf_memDebug)
                    255:                        rf_print_unfreed();
                    256:        }
                    257:        RF_UNLOCK_MUTEX(configureMutex);
                    258: }
                    259:
                    260:
                    261: /*
                    262:  * Called to shut down an array.
                    263:  */
                    264: int
                    265: rf_Shutdown(RF_Raid_t *raidPtr)
                    266: {
                    267:        if (!raidPtr->valid) {
                    268:                RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe"
                    269:                    " driver. Aborting shutdown.\n");
                    270:                return (EINVAL);
                    271:        }
                    272:        /*
                    273:         * Wait for outstanding IOs to land.
                    274:         * As described in rf_raid.h, we use the rad_freelist lock
                    275:         * to protect the per-array info about outstanding descs,
                    276:         * since we need to do freelist locking anyway, and this
                    277:         * cuts down on the amount of serialization we've got going
                    278:         * on.
                    279:         */
                    280:        RF_FREELIST_DO_LOCK(rf_rad_freelist);
                    281:        if (raidPtr->waitShutdown) {
                    282:                RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
                    283:                return (EBUSY);
                    284:        }
                    285:        raidPtr->waitShutdown = 1;
                    286:        while (raidPtr->nAccOutstanding) {
                    287:                RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist));
                    288:        }
                    289:        RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
                    290:
                    291:        /* Wait for any parity re-writes to stop... */
                    292:        while (raidPtr->parity_rewrite_in_progress) {
                    293:                printf("Waiting for parity re-write to exit...\n");
                    294:                tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO,
                    295:                       "rfprwshutdown", 0);
                    296:        }
                    297:
                    298:        raidPtr->valid = 0;
                    299:
                    300:        rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
                    301:
                    302:        rf_UnconfigureVnodes(raidPtr);
                    303:
                    304:        rf_ShutdownList(&raidPtr->shutdownList);
                    305:
                    306:        rf_UnconfigureArray();
                    307:
                    308:        return (0);
                    309: }
                    310:
                    311: #define        DO_INIT_CONFIGURE(f)                                            \
                    312: do {                                                                   \
                    313:        rc = f (&globalShutdown);                                       \
                    314:        if (rc) {                                                       \
                    315:                RF_ERRORMSG2("RAIDFRAME: failed %s with %d.\n",         \
                    316:                    RF_STRING(f), rc);                                  \
                    317:                rf_ShutdownList(&globalShutdown);                       \
                    318:                configureCount--;                                       \
                    319:                RF_UNLOCK_MUTEX(configureMutex);                        \
                    320:                return(rc);                                             \
                    321:        }                                                               \
                    322: } while (0)
                    323:
                    324: #define        DO_RAID_FAIL()                                                  \
                    325: do {                                                                   \
                    326:        rf_UnconfigureVnodes(raidPtr);                                  \
                    327:        rf_ShutdownList(&raidPtr->shutdownList);                        \
                    328:        rf_UnconfigureArray();                                          \
                    329: } while (0)
                    330:
                    331: #define        DO_RAID_INIT_CONFIGURE(f)                                       \
                    332: do {                                                                   \
                    333:        rc = (f)(&raidPtr->shutdownList, raidPtr, cfgPtr);              \
                    334:        if (rc) {                                                       \
                    335:                RF_ERRORMSG2("RAIDFRAME: failed %s with %d.\n",         \
                    336:                    RF_STRING(f), rc);                                  \
                    337:                DO_RAID_FAIL();                                         \
                    338:                return(rc);                                             \
                    339:        }                                                               \
                    340: } while (0)
                    341:
                    342: #define        DO_RAID_MUTEX(_m_)                                              \
                    343: do {                                                                   \
                    344:        rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_));    \
                    345:        if (rc) {                                                       \
                    346:                RF_ERRORMSG3("Unable to init mutex file %s line %d"     \
                    347:                    " rc=%d.\n", __FILE__, __LINE__, rc);               \
                    348:                DO_RAID_FAIL();                                         \
                    349:                return(rc);                                             \
                    350:        }                                                               \
                    351: } while (0)
                    352:
                    353: #define        DO_RAID_COND(_c_)                                               \
                    354: do {                                                                   \
                    355:        rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_));     \
                    356:        if (rc) {                                                       \
                    357:                RF_ERRORMSG3("Unable to init cond file %s line %d"      \
                    358:                    " rc=%d.\n", __FILE__, __LINE__, rc);               \
                    359:                DO_RAID_FAIL();                                         \
                    360:                return(rc);                                             \
                    361:        }                                                               \
                    362: } while (0)
                    363:
                    364: int
                    365: rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
                    366: {
                    367:        RF_RowCol_t row, col;
                    368:        int i, rc;
                    369:
                    370:        /*
                    371:         * XXX This check can probably be removed now, since
                    372:         * RAIDFRAME_CONFIGURE now checks to make sure that the
                    373:         * RAID set is not already valid.
                    374:         */
                    375:        if (raidPtr->valid) {
                    376:                RF_ERRORMSG("RAIDframe configuration not shut down."
                    377:                    " Aborting configure.\n");
                    378:                return (EINVAL);
                    379:        }
                    380:        RF_LOCK_MUTEX(configureMutex);
                    381:        configureCount++;
                    382:        if (isconfigged == 0) {
                    383:                rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex);
                    384:                if (rc) {
                    385:                        RF_ERRORMSG3("Unable to init mutex file %s line %d"
                    386:                            " rc=%d.\n", __FILE__, __LINE__, rc);
                    387:                        rf_ShutdownList(&globalShutdown);
                    388:                        return (rc);
                    389:                }
                    390:                /* Initialize globals. */
                    391: #ifdef RAIDDEBUG
                    392:                printf("RAIDFRAME: protectedSectors is %ld.\n",
                    393:                       rf_protectedSectors);
                    394: #endif /* RAIDDEBUG */
                    395:
                    396:                rf_clear_debug_print_buffer();
                    397:
                    398:                DO_INIT_CONFIGURE(rf_ConfigureAllocList);
                    399:
                    400:                /*
                    401:                 * Yes, this does make debugging general to the whole
                    402:                 * system instead of being array specific. Bummer, drag.
                    403:                 */
                    404:                rf_ConfigureDebug(cfgPtr);
                    405:                DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
                    406:                DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
                    407:                DO_INIT_CONFIGURE(rf_ConfigureMapModule);
                    408:                DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
                    409:                DO_INIT_CONFIGURE(rf_ConfigureCallback);
                    410:                DO_INIT_CONFIGURE(rf_ConfigureMemChunk);
                    411:                DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
                    412:                DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
                    413:                DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
                    414:                DO_INIT_CONFIGURE(rf_ConfigureMCPair);
                    415:                DO_INIT_CONFIGURE(rf_ConfigureDAGs);
                    416:                DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
                    417:                DO_INIT_CONFIGURE(rf_ConfigureDebugPrint);
                    418:                DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
                    419:                DO_INIT_CONFIGURE(rf_ConfigureCopyback);
                    420:                DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
                    421:                isconfigged = 1;
                    422:        }
                    423:        RF_UNLOCK_MUTEX(configureMutex);
                    424:
                    425:        DO_RAID_MUTEX(&raidPtr->mutex);
                    426:        /*
                    427:         * Set up the cleanup list. Do this after ConfigureDebug so that
                    428:         * value of memDebug will be set.
                    429:         */
                    430:
                    431:        rf_MakeAllocList(raidPtr->cleanupList);
                    432:        if (raidPtr->cleanupList == NULL) {
                    433:                DO_RAID_FAIL();
                    434:                return (ENOMEM);
                    435:        }
                    436:        rc = rf_ShutdownCreate(&raidPtr->shutdownList,
                    437:            (void (*) (void *)) rf_FreeAllocList, raidPtr->cleanupList);
                    438:        if (rc) {
                    439:                RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
                    440:                    " rc=%d.\n", __FILE__, __LINE__, rc);
                    441:                DO_RAID_FAIL();
                    442:                return (rc);
                    443:        }
                    444:        raidPtr->numRow = cfgPtr->numRow;
                    445:        raidPtr->numCol = cfgPtr->numCol;
                    446:        raidPtr->numSpare = cfgPtr->numSpare;
                    447:
                    448:        /*
                    449:         * XXX We don't even pretend to support more than one row in the
                    450:         * kernel...
                    451:         */
                    452:        if (raidPtr->numRow != 1) {
                    453:                RF_ERRORMSG("Only one row supported in kernel.\n");
                    454:                DO_RAID_FAIL();
                    455:                return (EINVAL);
                    456:        }
                    457:        RF_CallocAndAdd(raidPtr->status, raidPtr->numRow,
                    458:            sizeof(RF_RowStatus_t), (RF_RowStatus_t *), raidPtr->cleanupList);
                    459:        if (raidPtr->status == NULL) {
                    460:                DO_RAID_FAIL();
                    461:                return (ENOMEM);
                    462:        }
                    463:        RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow,
                    464:            sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList);
                    465:        if (raidPtr->reconControl == NULL) {
                    466:                DO_RAID_FAIL();
                    467:                return (ENOMEM);
                    468:        }
                    469:        for (i = 0; i < raidPtr->numRow; i++) {
                    470:                raidPtr->status[i] = rf_rs_optimal;
                    471:                raidPtr->reconControl[i] = NULL;
                    472:        }
                    473:
                    474:        DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
                    475:        DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
                    476:
                    477:        DO_RAID_COND(&raidPtr->outstandingCond);
                    478:
                    479:        raidPtr->nAccOutstanding = 0;
                    480:        raidPtr->waitShutdown = 0;
                    481:
                    482:        DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
                    483:        DO_RAID_COND(&raidPtr->quiescent_cond);
                    484:
                    485:        DO_RAID_COND(&raidPtr->waitForReconCond);
                    486:
                    487:        DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex);
                    488:
                    489:        if (ac != NULL) {
                    490:                /*
                    491:                 * We have an AutoConfig structure... Don't do the
                    492:                 * normal disk configuration... call the auto config
                    493:                 * stuff.
                    494:                 */
                    495:                rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
                    496:        } else {
                    497:                DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
                    498:                DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
                    499:        }
                    500:        /*
                    501:         * Do this after ConfigureDisks & ConfigureSpareDisks to be sure
                    502:         * devno is set.
                    503:         */
                    504:        DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
                    505:
                    506:        DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
                    507:
                    508:        DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
                    509:
                    510:        for (row = 0; row < raidPtr->numRow; row++) {
                    511:                for (col = 0; col < raidPtr->numCol; col++) {
                    512:                        /*
                    513:                         * XXX Better distribution.
                    514:                         */
                    515:                        raidPtr->hist_diskreq[row][col] = 0;
                    516:                }
                    517:        }
                    518:
                    519:        raidPtr->numNewFailures = 0;
                    520:        raidPtr->copyback_in_progress = 0;
                    521:        raidPtr->parity_rewrite_in_progress = 0;
                    522:        raidPtr->recon_in_progress = 0;
                    523:        raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
                    524:
                    525:        /*
                    526:         * Autoconfigure and root_partition will actually get filled in
                    527:         * after the config is done.
                    528:         */
                    529:        raidPtr->autoconfigure = 0;
                    530:        raidPtr->root_partition = 0;
                    531:        raidPtr->last_unit = raidPtr->raidid;
                    532:        raidPtr->config_order = 0;
                    533:
                    534:        if (rf_keepAccTotals) {
                    535:                raidPtr->keep_acc_totals = 1;
                    536:        }
                    537:        rf_StartUserStats(raidPtr);
                    538:
                    539:        raidPtr->valid = 1;
                    540:        return (0);
                    541: }
                    542:
                    543: int
                    544: rf_init_rad(RF_RaidAccessDesc_t *desc)
                    545: {
                    546:        int rc;
                    547:
                    548:        rc = rf_mutex_init(&desc->mutex);
                    549:        if (rc) {
                    550:                RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", __FILE__,
                    551:                    __LINE__, rc);
                    552:                return (rc);
                    553:        }
                    554:        rc = rf_cond_init(&desc->cond);
                    555:        if (rc) {
                    556:                RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d.\n", __FILE__,
                    557:                    __LINE__, rc);
                    558:                rf_mutex_destroy(&desc->mutex);
                    559:                return (rc);
                    560:        }
                    561:        return (0);
                    562: }
                    563:
                    564: void
                    565: rf_clean_rad(RF_RaidAccessDesc_t *desc)
                    566: {
                    567:        rf_mutex_destroy(&desc->mutex);
                    568:        rf_cond_destroy(&desc->cond);
                    569: }
                    570:
                    571: void
                    572: rf_ShutdownRDFreeList(void *ignored)
                    573: {
                    574:        RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next,
                    575:            (RF_RaidAccessDesc_t *), rf_clean_rad);
                    576: }
                    577:
                    578: int
                    579: rf_ConfigureRDFreeList(RF_ShutdownList_t **listp)
                    580: {
                    581:        int rc;
                    582:
                    583:        RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD,
                    584:            RF_RAD_INC, sizeof(RF_RaidAccessDesc_t));
                    585:        if (rf_rad_freelist == NULL) {
                    586:                return (ENOMEM);
                    587:        }
                    588:        rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
                    589:        if (rc) {
                    590:                RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d.\n", __FILE__,
                    591:                    __LINE__, rc);
                    592:                rf_ShutdownRDFreeList(NULL);
                    593:                return (rc);
                    594:        }
                    595:        RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL, next,
                    596:            (RF_RaidAccessDesc_t *), rf_init_rad);
                    597:        return (0);
                    598: }
                    599:
                    600: RF_RaidAccessDesc_t *
                    601: rf_AllocRaidAccDesc(
                    602:     RF_Raid_t                   *raidPtr,
                    603:     RF_IoType_t                          type,
                    604:     RF_RaidAddr_t                raidAddress,
                    605:     RF_SectorCount_t             numBlocks,
                    606:     caddr_t                      bufPtr,
                    607:     void                        *bp,
                    608:     RF_DagHeader_t             **paramDAG,
                    609:     RF_AccessStripeMapHeader_t **paramASM,
                    610:     RF_RaidAccessFlags_t         flags,
                    611:     void                       (*cbF) (struct buf *),
                    612:     void                        *cbA,
                    613:     RF_AccessState_t            *states
                    614: )
                    615: {
                    616:        RF_RaidAccessDesc_t *desc;
                    617:
                    618:        RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next,
                    619:            (RF_RaidAccessDesc_t *), rf_init_rad);
                    620:        if (raidPtr->waitShutdown) {
                    621:                /*
                    622:                 * Actually, we're shutting the array down. Free the desc
                    623:                 * and return NULL.
                    624:                 */
                    625:                RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
                    626:                RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next,
                    627:                    rf_clean_rad);
                    628:                return (NULL);
                    629:        }
                    630:        raidPtr->nAccOutstanding++;
                    631:        RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
                    632:
                    633:        desc->raidPtr = (void *) raidPtr;
                    634:        desc->type = type;
                    635:        desc->raidAddress = raidAddress;
                    636:        desc->numBlocks = numBlocks;
                    637:        desc->bufPtr = bufPtr;
                    638:        desc->bp = bp;
                    639:        desc->paramDAG = paramDAG;
                    640:        desc->paramASM = paramASM;
                    641:        desc->flags = flags;
                    642:        desc->states = states;
                    643:        desc->state = 0;
                    644:
                    645:        desc->status = 0;
                    646:        bzero((char *) &desc->tracerec, sizeof(RF_AccTraceEntry_t));
                    647:        desc->callbackFunc = (void (*) (RF_CBParam_t)) cbF;     /* XXX */
                    648:        desc->callbackArg = cbA;
                    649:        desc->next = NULL;
                    650:        desc->head = desc;
                    651:        desc->numPending = 0;
                    652:        desc->cleanupList = NULL;
                    653:        rf_MakeAllocList(desc->cleanupList);
                    654:        return (desc);
                    655: }
                    656:
                    657: void
                    658: rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc)
                    659: {
                    660:        RF_Raid_t *raidPtr = desc->raidPtr;
                    661:
                    662:        RF_ASSERT(desc);
                    663:
                    664:        rf_FreeAllocList(desc->cleanupList);
                    665:        RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, rf_clean_rad);
                    666:        raidPtr->nAccOutstanding--;
                    667:        if (raidPtr->waitShutdown) {
                    668:                RF_SIGNAL_COND(raidPtr->outstandingCond);
                    669:        }
                    670:        RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
                    671: }
                    672:
                    673:
                    674: /********************************************************************
                    675:  * Main routine for performing an access.
                    676:  * Accesses are retried until a DAG can not be selected. This occurs
                    677:  * when either the DAG library is incomplete or there are too many
                    678:  * failures in a parity group.
                    679:  ********************************************************************/
                    680: int
                    681: rf_DoAccess(
                    682:     RF_Raid_t                   *raidPtr,
                    683:     RF_IoType_t                          type,         /* Should be read or write. */
                    684:     int                                  async_flag,   /*
                    685:                                                 * Should be RF_TRUE
                    686:                                                 * or RF_FALSE.
                    687:                                                 */
                    688:     RF_RaidAddr_t                raidAddress,
                    689:     RF_SectorCount_t             numBlocks,
                    690:     caddr_t                      bufPtr,
                    691:     void                        *bp_in,        /*
                    692:                                                 * It's a buf pointer.
                    693:                                                 * void * to facilitate
                    694:                                                 * ignoring it outside
                    695:                                                 * the kernel.
                    696:                                                 */
                    697:     RF_DagHeader_t             **paramDAG,
                    698:     RF_AccessStripeMapHeader_t **paramASM,
                    699:     RF_RaidAccessFlags_t         flags,
                    700:     RF_RaidAccessDesc_t                **paramDesc,
                    701:     void                       (*cbF) (struct buf *),
                    702:     void                        *cbA
                    703: )
                    704: {
                    705:        RF_RaidAccessDesc_t *desc;
                    706:        caddr_t lbufPtr = bufPtr;
                    707:        struct buf *bp = (struct buf *) bp_in;
                    708:
                    709:        raidAddress += rf_raidSectorOffset;
                    710:
                    711:        if (!raidPtr->valid) {
                    712:                RF_ERRORMSG("RAIDframe driver not successfully configured."
                    713:                    " Rejecting access.\n");
                    714:                IO_BUF_ERR(bp, EINVAL);
                    715:                return (EINVAL);
                    716:        }
                    717:
                    718:        if (rf_accessDebug) {
                    719:
                    720:                printf("logBytes is: %d %d %d.\n", raidPtr->raidid,
                    721:                    raidPtr->logBytesPerSector,
                    722:                    (int) rf_RaidAddressToByte(raidPtr, numBlocks));
                    723:                printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx.\n", raidPtr->raidid,
                    724:                    (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
                    725:                    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
                    726:                    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
                    727:                    (int) numBlocks,
                    728:                    (int) rf_RaidAddressToByte(raidPtr, numBlocks),
                    729:                    (long) bufPtr);
                    730:        }
                    731:        if (raidAddress + numBlocks > raidPtr->totalSectors) {
                    732:
                    733:                printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu.\n",
                    734:                    (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors);
                    735:
                    736:                        IO_BUF_ERR(bp, ENOSPC);
                    737:                        return (ENOSPC);
                    738:        }
                    739:        desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
                    740:            numBlocks, lbufPtr, bp, paramDAG, paramASM,
                    741:            flags, cbF, cbA, raidPtr->Layout.map->states);
                    742:
                    743:        if (desc == NULL) {
                    744:                return (ENOMEM);
                    745:        }
                    746:        RF_ETIMER_START(desc->tracerec.tot_timer);
                    747:
                    748:        desc->async_flag = async_flag;
                    749:
                    750:        rf_ContinueRaidAccess(desc);
                    751:
                    752:        return (0);
                    753: }
                    754:
                    755:
                    756: /* Force the array into reconfigured mode without doing reconstruction. */
                    757: int
                    758: rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int row, int col)
                    759: {
                    760:        if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
                    761:                printf("Can't set reconfigured mode in dedicated-spare"
                    762:                    " array.\n");
                    763:                RF_PANIC();
                    764:        }
                    765:        RF_LOCK_MUTEX(raidPtr->mutex);
                    766:        raidPtr->numFailures++;
                    767:        raidPtr->Disks[row][col].status = rf_ds_dist_spared;
                    768:        raidPtr->status[row] = rf_rs_reconfigured;
                    769:        rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
                    770:        /*
                    771:         * Install spare table only if declustering + distributed sparing
                    772:         * architecture.
                    773:         */
                    774:        if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
                    775:                rf_InstallSpareTable(raidPtr, row, col);
                    776:        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    777:        return (0);
                    778: }
                    779:
                    780: extern int fail_row, fail_col, fail_time;
                    781: extern int delayed_recon;
                    782:
                    783: int
                    784: rf_FailDisk(RF_Raid_t *raidPtr, int frow, int fcol, int initRecon)
                    785: {
                    786:        printf("raid%d: Failing disk r%d c%d.\n", raidPtr->raidid, frow, fcol);
                    787:        RF_LOCK_MUTEX(raidPtr->mutex);
                    788:        raidPtr->numFailures++;
                    789:        raidPtr->Disks[frow][fcol].status = rf_ds_failed;
                    790:        raidPtr->status[frow] = rf_rs_degraded;
                    791:        rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
                    792:        RF_UNLOCK_MUTEX(raidPtr->mutex);
                    793:        if (initRecon)
                    794:                rf_ReconstructFailedDisk(raidPtr, frow, fcol);
                    795:        return (0);
                    796: }
                    797:
                    798:
                    799: /*
                    800:  * Releases a thread that is waiting for the array to become quiesced.
                    801:  * access_suspend_mutex should be locked upon calling this.
                    802:  */
                    803: void
                    804: rf_SignalQuiescenceLock(RF_Raid_t *raidPtr, RF_RaidReconDesc_t *reconDesc)
                    805: {
                    806:        if (rf_quiesceDebug) {
                    807:                printf("raid%d: Signalling quiescence lock.\n",
                    808:                       raidPtr->raidid);
                    809:        }
                    810:        raidPtr->access_suspend_release = 1;
                    811:
                    812:        if (raidPtr->waiting_for_quiescence) {
                    813:                SIGNAL_QUIESCENT_COND(raidPtr);
                    814:        }
                    815: }
                    816:
                    817:
                    818: /*
                    819:  * Suspends all new requests to the array. No effect on accesses that are
                    820:  * in flight.
                    821:  */
                    822: int
                    823: rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
                    824: {
                    825:        if (rf_quiesceDebug)
                    826:                printf("Suspending new reqs.\n");
                    827:
                    828:        RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
                    829:        raidPtr->accesses_suspended++;
                    830:        raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
                    831:
                    832:        if (raidPtr->waiting_for_quiescence) {
                    833:                raidPtr->access_suspend_release = 0;
                    834:                while (!raidPtr->access_suspend_release) {
                    835:                        printf("Suspending: Waiting for Quiescence.\n");
                    836:                        WAIT_FOR_QUIESCENCE(raidPtr);
                    837:                        raidPtr->waiting_for_quiescence = 0;
                    838:                }
                    839:        }
                    840:        printf("Quiescence reached...\n");
                    841:
                    842:        RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
                    843:        return (raidPtr->waiting_for_quiescence);
                    844: }
                    845:
                    846:
                    847: /* Wake up everyone waiting for quiescence to be released. */
                    848: void
                    849: rf_ResumeNewRequests(RF_Raid_t *raidPtr)
                    850: {
                    851:        RF_CallbackDesc_t *t, *cb;
                    852:
                    853:        if (rf_quiesceDebug)
                    854:                printf("Resuming new reqs.\n");
                    855:
                    856:        RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
                    857:        raidPtr->accesses_suspended--;
                    858:        if (raidPtr->accesses_suspended == 0)
                    859:                cb = raidPtr->quiesce_wait_list;
                    860:        else
                    861:                cb = NULL;
                    862:        raidPtr->quiesce_wait_list = NULL;
                    863:        RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
                    864:
                    865:        while (cb) {
                    866:                t = cb;
                    867:                cb = cb->next;
                    868:                (t->callbackFunc) (t->callbackArg);
                    869:                rf_FreeCallbackDesc(t);
                    870:        }
                    871: }
                    872:
                    873:
                    874: /*****************************************************************************
                    875:  *
                    876:  * Debug routines.
                    877:  *
                    878:  *****************************************************************************/
                    879:
                    880: void
                    881: rf_set_debug_option(char *name, long val)
                    882: {
                    883:        RF_DebugName_t *p;
                    884:
                    885:        for (p = rf_debugNames; p->name; p++) {
                    886:                if (!strcmp(p->name, name)) {
                    887:                        *(p->ptr) = val;
                    888:                        printf("[Set debug variable %s to %ld]\n", name, val);
                    889:                        return;
                    890:                }
                    891:        }
                    892:        RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
                    893: }
                    894:
                    895:
                    896: /* Would like to use sscanf here, but apparently not available in kernel. */
                    897: /*ARGSUSED*/
                    898: void
                    899: rf_ConfigureDebug(RF_Config_t *cfgPtr)
                    900: {
                    901:        char *val_p, *name_p, *white_p;
                    902:        long val;
                    903:        int i;
                    904:
                    905:        rf_ResetDebugOptions();
                    906:        for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
                    907:                name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
                    908:                white_p = rf_find_white(name_p);        /*
                    909:                                                         * Skip to start of 2nd
                    910:                                                         * word.
                    911:                                                         */
                    912:                val_p = rf_find_non_white(white_p);
                    913:                if (*val_p == '0' && *(val_p + 1) == 'x')
                    914:                        val = rf_htoi(val_p + 2);
                    915:                else
                    916:                        val = rf_atoi(val_p);
                    917:                *white_p = '\0';
                    918:                rf_set_debug_option(name_p, val);
                    919:        }
                    920: }
                    921:
                    922:
                    923: /* Performance monitoring stuff. */
                    924:
                    925: #if    !defined(_KERNEL) && !defined(SIMULATE)
                    926:
                    927: /*
                    928:  * Throughput stats currently only used in user-level RAIDframe.
                    929:  */
                    930:
                    931: int
                    932: rf_InitThroughputStats(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                    933:     RF_Config_t *cfgPtr)
                    934: {
                    935:        int rc;
                    936:
                    937:        /* These used by user-level RAIDframe only. */
                    938:        rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex);
                    939:        if (rc) {
                    940:                RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n",
                    941:                    __FILE__, __LINE__, rc);
                    942:                return (rc);
                    943:        }
                    944:        raidPtr->throughputstats.sum_io_us = 0;
                    945:        raidPtr->throughputstats.num_ios = 0;
                    946:        raidPtr->throughputstats.num_out_ios = 0;
                    947:        return (0);
                    948: }
                    949:
                    950: void
                    951: rf_StartThroughputStats(RF_Raid_t *raidPtr)
                    952: {
                    953:        RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
                    954:        raidPtr->throughputstats.num_ios++;
                    955:        raidPtr->throughputstats.num_out_ios++;
                    956:        if (raidPtr->throughputstats.num_out_ios == 1)
                    957:                RF_GETTIME(raidPtr->throughputstats.start);
                    958:        RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
                    959: }
                    960:
                    961: void
                    962: rf_StopThroughputStats(RF_Raid_t *raidPtr)
                    963: {
                    964:        struct timeval diff;
                    965:
                    966:        RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
                    967:        raidPtr->throughputstats.num_out_ios--;
                    968:        if (raidPtr->throughputstats.num_out_ios == 0) {
                    969:                RF_GETTIME(raidPtr->throughputstats.stop);
                    970:                RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start,
                    971:                    &raidPtr->throughputstats.stop, &diff);
                    972:                raidPtr->throughputstats.sum_io_us += RF_TIMEVAL_TO_US(diff);
                    973:        }
                    974:        RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
                    975: }
                    976:
                    977: void
                    978: rf_PrintThroughputStats(RF_Raid_t *raidPtr)
                    979: {
                    980:        RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0);
                    981:        if (raidPtr->throughputstats.sum_io_us != 0) {
                    982:                printf("[Througphut: %8.2f IOs/second]\n",
                    983:                    raidPtr->throughputstats.num_ios /
                    984:                    (raidPtr->throughputstats.sum_io_us / 1000000.0));
                    985:        }
                    986: }
                    987:
                    988: #endif /* !_KERNEL && !SIMULATE */
                    989:
                    990: void
                    991: rf_StartUserStats(RF_Raid_t *raidPtr)
                    992: {
                    993:        RF_GETTIME(raidPtr->userstats.start);
                    994:        raidPtr->userstats.sum_io_us = 0;
                    995:        raidPtr->userstats.num_ios = 0;
                    996:        raidPtr->userstats.num_sect_moved = 0;
                    997: }
                    998:
                    999: void
                   1000: rf_StopUserStats(RF_Raid_t *raidPtr)
                   1001: {
                   1002:        RF_GETTIME(raidPtr->userstats.stop);
                   1003: }
                   1004:
                   1005: void
                   1006: rf_UpdateUserStats(
                   1007:     RF_Raid_t  *raidPtr,
                   1008:     int                 rt,            /* Response time in us. */
                   1009:     int                 numsect        /* Number of sectors for this access. */
                   1010: )
                   1011: {
                   1012:        raidPtr->userstats.sum_io_us += rt;
                   1013:        raidPtr->userstats.num_ios++;
                   1014:        raidPtr->userstats.num_sect_moved += numsect;
                   1015: }
                   1016:
                   1017: void
                   1018: rf_PrintUserStats(RF_Raid_t *raidPtr)
                   1019: {
                   1020:        long    elapsed_us, mbs, mbs_frac;
                   1021:        struct timeval diff;
                   1022:
                   1023:        RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop,
                   1024:            &diff);
                   1025:        elapsed_us = RF_TIMEVAL_TO_US(diff);
                   1026:
                   1027:        /* 2000 sectors per megabyte, 10000000 microseconds per second. */
                   1028:        if (elapsed_us)
                   1029:                mbs = (raidPtr->userstats.num_sect_moved / 2000) /
                   1030:                    (elapsed_us / 1000000);
                   1031:        else
                   1032:                mbs = 0;
                   1033:
                   1034:        /* This computes only the first digit of the fractional mb/s moved. */
                   1035:        if (elapsed_us) {
                   1036:                mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) /
                   1037:                    (elapsed_us / 1000000)) - (mbs * 10);
                   1038:        } else {
                   1039:                mbs_frac = 0;
                   1040:        }
                   1041:
                   1042:        printf("Number of I/Os:             %ld\n",
                   1043:            raidPtr->userstats.num_ios);
                   1044:        printf("Elapsed time (us):          %ld\n",
                   1045:            elapsed_us);
                   1046:        printf("User I/Os per second:       %ld\n",
                   1047:            RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000)));
                   1048:        printf("Average user response time: %ld us\n",
                   1049:            RF_DB0_CHECK(raidPtr->userstats.sum_io_us,
                   1050:             raidPtr->userstats.num_ios));
                   1051:        printf("Total sectors moved:        %ld\n",
                   1052:            raidPtr->userstats.num_sect_moved);
                   1053:        printf("Average access size (sect): %ld\n",
                   1054:            RF_DB0_CHECK(raidPtr->userstats.num_sect_moved,
                   1055:            raidPtr->userstats.num_ios));
                   1056:        printf("Achieved data rate:         %ld.%ld MB/sec\n",
                   1057:            mbs, mbs_frac);
                   1058: }

CVSweb