Annotation of sys/dev/raidframe/rf_driver.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_driver.c,v 1.11 2002/12/16 07:01:03 tdeval Exp $ */
! 2: /* $NetBSD: rf_driver.c,v 1.37 2000/06/04 02:05:13 oster Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1999 The NetBSD Foundation, Inc.
! 6: * All rights reserved.
! 7: *
! 8: * This code is derived from software contributed to The NetBSD Foundation
! 9: * by Greg Oster
! 10: *
! 11: * Redistribution and use in source and binary forms, with or without
! 12: * modification, are permitted provided that the following conditions
! 13: * are met:
! 14: * 1. Redistributions of source code must retain the above copyright
! 15: * notice, this list of conditions and the following disclaimer.
! 16: * 2. Redistributions in binary form must reproduce the above copyright
! 17: * notice, this list of conditions and the following disclaimer in the
! 18: * documentation and/or other materials provided with the distribution.
! 19: * 3. All advertising materials mentioning features or use of this software
! 20: * must display the following acknowledgement:
! 21: * This product includes software developed by the NetBSD
! 22: * Foundation, Inc. and its contributors.
! 23: * 4. Neither the name of The NetBSD Foundation nor the names of its
! 24: * contributors may be used to endorse or promote products derived
! 25: * from this software without specific prior written permission.
! 26: *
! 27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
! 28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! 29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! 30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
! 31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! 32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! 33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! 34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! 35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! 36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! 37: * POSSIBILITY OF SUCH DAMAGE.
! 38: */
! 39:
! 40: /*
! 41: * Copyright (c) 1995 Carnegie-Mellon University.
! 42: * All rights reserved.
! 43: *
! 44: * Author: Mark Holland, Khalil Amiri, Claudson Bornstein,
! 45: * William V. Courtright II, Robby Findler, Daniel Stodolsky,
! 46: * Rachad Youssef, Jim Zelenka
! 47: *
! 48: * Permission to use, copy, modify and distribute this software and
! 49: * its documentation is hereby granted, provided that both the copyright
! 50: * notice and this permission notice appear in all copies of the
! 51: * software, derivative works or modified versions, and any portions
! 52: * thereof, and that both notices appear in supporting documentation.
! 53: *
! 54: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 55: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 56: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 57: *
! 58: * Carnegie Mellon requests users of this software to return to
! 59: *
! 60: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 61: * School of Computer Science
! 62: * Carnegie Mellon University
! 63: * Pittsburgh PA 15213-3890
! 64: *
! 65: * any improvements or extensions that they make and grant Carnegie the
! 66: * rights to redistribute these changes.
! 67: */
! 68:
! 69: /*****************************************************************************
! 70: *
! 71: * rf_driver.c -- Main setup, teardown, and access routines for the RAID
! 72: * driver
! 73: *
! 74: * All routines are prefixed with rf_ (RAIDframe), to avoid conficts.
! 75: *
! 76: *****************************************************************************/
! 77:
! 78: #include <sys/types.h>
! 79: #include <sys/param.h>
! 80: #include <sys/systm.h>
! 81: #include <sys/ioctl.h>
! 82: #include <sys/fcntl.h>
! 83: #ifdef __NetBSD__
! 84: #include <sys/vnode.h>
! 85: #endif
! 86:
! 87:
! 88: #include "rf_archs.h"
! 89: #include "rf_threadstuff.h"
! 90:
! 91:
! 92: #include <sys/errno.h>
! 93:
! 94: #include "rf_raid.h"
! 95: #include "rf_dag.h"
! 96: #include "rf_aselect.h"
! 97: #include "rf_diskqueue.h"
! 98: #include "rf_parityscan.h"
! 99: #include "rf_alloclist.h"
! 100: #include "rf_dagutils.h"
! 101: #include "rf_utils.h"
! 102: #include "rf_etimer.h"
! 103: #include "rf_acctrace.h"
! 104: #include "rf_configure.h"
! 105: #include "rf_general.h"
! 106: #include "rf_desc.h"
! 107: #include "rf_states.h"
! 108: #include "rf_freelist.h"
! 109: #include "rf_decluster.h"
! 110: #include "rf_map.h"
! 111: #include "rf_revent.h"
! 112: #include "rf_callback.h"
! 113: #include "rf_engine.h"
! 114: #include "rf_memchunk.h"
! 115: #include "rf_mcpair.h"
! 116: #include "rf_nwayxor.h"
! 117: #include "rf_debugprint.h"
! 118: #include "rf_copyback.h"
! 119: #include "rf_driver.h"
! 120: #include "rf_options.h"
! 121: #include "rf_shutdown.h"
! 122: #include "rf_kintf.h"
! 123:
! 124: #include <sys/buf.h>
! 125:
! 126: /* rad == RF_RaidAccessDesc_t */
! 127: static RF_FreeList_t *rf_rad_freelist;
! 128: #define RF_MAX_FREE_RAD 128
! 129: #define RF_RAD_INC 16
! 130: #define RF_RAD_INITIAL 32
! 131:
! 132: /* Debug variables. */
! 133: char rf_panicbuf[2048]; /*
! 134: * A buffer to hold an error msg when we panic.
! 135: */
! 136:
! 137: /* Main configuration routines. */
! 138: static int raidframe_booted = 0;
! 139:
! 140: void rf_ConfigureDebug(RF_Config_t *);
! 141: void rf_set_debug_option(char *, long);
! 142: void rf_UnconfigureArray(void);
! 143: int rf_init_rad(RF_RaidAccessDesc_t *);
! 144: void rf_clean_rad(RF_RaidAccessDesc_t *);
! 145: void rf_ShutdownRDFreeList(void *);
! 146: int rf_ConfigureRDFreeList(RF_ShutdownList_t **);
! 147:
! 148: RF_DECLARE_MUTEX(rf_printf_mutex); /*
! 149: * Debug only: Avoids interleaved
! 150: * printfs by different stripes.
! 151: */
! 152:
! 153: #define SIGNAL_QUIESCENT_COND(_raid_) wakeup(&((_raid_)->accesses_suspended))
! 154: #define WAIT_FOR_QUIESCENCE(_raid_) \
! 155: tsleep(&((_raid_)->accesses_suspended), PRIBIO, "RAIDframe quiesce", 0);
! 156:
! 157: #define IO_BUF_ERR(bp, err) \
! 158: do { \
! 159: bp->b_flags |= B_ERROR; \
! 160: bp->b_resid = bp->b_bcount; \
! 161: bp->b_error = err; \
! 162: biodone(bp); \
! 163: } while (0)
! 164:
! 165: static int configureCount = 0; /* Number of active configurations. */
! 166: static int isconfigged = 0; /*
! 167: * Is basic RAIDframe (non per-array)
! 168: * stuff configured ?
! 169: */
! 170: RF_DECLARE_STATIC_MUTEX(configureMutex); /*
! 171: * Used to lock the
! 172: * configuration stuff.
! 173: */
! 174: static RF_ShutdownList_t *globalShutdown; /* Non array-specific stuff. */
! 175: int rf_ConfigureRDFreeList(RF_ShutdownList_t **);
! 176:
! 177:
! 178: /* Called at system boot time. */
! 179: int
! 180: rf_BootRaidframe(void)
! 181: {
! 182: int rc;
! 183:
! 184: if (raidframe_booted)
! 185: return (EBUSY);
! 186: raidframe_booted = 1;
! 187:
! 188: rc = rf_mutex_init(&configureMutex);
! 189: if (rc) {
! 190: RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n",
! 191: __FILE__, __LINE__, rc);
! 192: RF_PANIC();
! 193: }
! 194: configureCount = 0;
! 195: isconfigged = 0;
! 196: globalShutdown = NULL;
! 197: return (0);
! 198: }
! 199:
! 200:
! 201: /*
! 202: * This function is really just for debugging user-level stuff: It
! 203: * frees up all memory, other RAIDframe resources that might otherwise
! 204: * be kept around. This is used with systems like "sentinel" to detect
! 205: * memory leaks.
! 206: */
! 207: int
! 208: rf_UnbootRaidframe(void)
! 209: {
! 210: int rc;
! 211:
! 212: RF_LOCK_MUTEX(configureMutex);
! 213: if (configureCount) {
! 214: RF_UNLOCK_MUTEX(configureMutex);
! 215: return (EBUSY);
! 216: }
! 217: raidframe_booted = 0;
! 218: RF_UNLOCK_MUTEX(configureMutex);
! 219: rc = rf_mutex_destroy(&configureMutex);
! 220: if (rc) {
! 221: RF_ERRORMSG3("Unable to destroy mutex file %s line %d"
! 222: " rc=%d.\n", __FILE__, __LINE__, rc);
! 223: RF_PANIC();
! 224: }
! 225: return (0);
! 226: }
! 227:
! 228:
! 229: /*
! 230: * Called whenever an array is shutdown.
! 231: */
! 232: void
! 233: rf_UnconfigureArray(void)
! 234: {
! 235: int rc;
! 236:
! 237: RF_LOCK_MUTEX(configureMutex);
! 238: if (--configureCount == 0) { /*
! 239: * If no active configurations, shut
! 240: * everything down.
! 241: */
! 242: isconfigged = 0;
! 243:
! 244: rc = rf_ShutdownList(&globalShutdown);
! 245: if (rc) {
! 246: RF_ERRORMSG1("RAIDFRAME: unable to do global shutdown,"
! 247: " rc=%d.\n", rc);
! 248: }
! 249:
! 250: /*
! 251: * We must wait until now, because the AllocList module
! 252: * uses the DebugMem module.
! 253: */
! 254: if (rf_memDebug)
! 255: rf_print_unfreed();
! 256: }
! 257: RF_UNLOCK_MUTEX(configureMutex);
! 258: }
! 259:
! 260:
! 261: /*
! 262: * Called to shut down an array.
! 263: */
! 264: int
! 265: rf_Shutdown(RF_Raid_t *raidPtr)
! 266: {
! 267: if (!raidPtr->valid) {
! 268: RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe"
! 269: " driver. Aborting shutdown.\n");
! 270: return (EINVAL);
! 271: }
! 272: /*
! 273: * Wait for outstanding IOs to land.
! 274: * As described in rf_raid.h, we use the rad_freelist lock
! 275: * to protect the per-array info about outstanding descs,
! 276: * since we need to do freelist locking anyway, and this
! 277: * cuts down on the amount of serialization we've got going
! 278: * on.
! 279: */
! 280: RF_FREELIST_DO_LOCK(rf_rad_freelist);
! 281: if (raidPtr->waitShutdown) {
! 282: RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
! 283: return (EBUSY);
! 284: }
! 285: raidPtr->waitShutdown = 1;
! 286: while (raidPtr->nAccOutstanding) {
! 287: RF_WAIT_COND(raidPtr->outstandingCond, RF_FREELIST_MUTEX_OF(rf_rad_freelist));
! 288: }
! 289: RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
! 290:
! 291: /* Wait for any parity re-writes to stop... */
! 292: while (raidPtr->parity_rewrite_in_progress) {
! 293: printf("Waiting for parity re-write to exit...\n");
! 294: tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO,
! 295: "rfprwshutdown", 0);
! 296: }
! 297:
! 298: raidPtr->valid = 0;
! 299:
! 300: rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);
! 301:
! 302: rf_UnconfigureVnodes(raidPtr);
! 303:
! 304: rf_ShutdownList(&raidPtr->shutdownList);
! 305:
! 306: rf_UnconfigureArray();
! 307:
! 308: return (0);
! 309: }
! 310:
! 311: #define DO_INIT_CONFIGURE(f) \
! 312: do { \
! 313: rc = f (&globalShutdown); \
! 314: if (rc) { \
! 315: RF_ERRORMSG2("RAIDFRAME: failed %s with %d.\n", \
! 316: RF_STRING(f), rc); \
! 317: rf_ShutdownList(&globalShutdown); \
! 318: configureCount--; \
! 319: RF_UNLOCK_MUTEX(configureMutex); \
! 320: return(rc); \
! 321: } \
! 322: } while (0)
! 323:
! 324: #define DO_RAID_FAIL() \
! 325: do { \
! 326: rf_UnconfigureVnodes(raidPtr); \
! 327: rf_ShutdownList(&raidPtr->shutdownList); \
! 328: rf_UnconfigureArray(); \
! 329: } while (0)
! 330:
! 331: #define DO_RAID_INIT_CONFIGURE(f) \
! 332: do { \
! 333: rc = (f)(&raidPtr->shutdownList, raidPtr, cfgPtr); \
! 334: if (rc) { \
! 335: RF_ERRORMSG2("RAIDFRAME: failed %s with %d.\n", \
! 336: RF_STRING(f), rc); \
! 337: DO_RAID_FAIL(); \
! 338: return(rc); \
! 339: } \
! 340: } while (0)
! 341:
! 342: #define DO_RAID_MUTEX(_m_) \
! 343: do { \
! 344: rc = rf_create_managed_mutex(&raidPtr->shutdownList, (_m_)); \
! 345: if (rc) { \
! 346: RF_ERRORMSG3("Unable to init mutex file %s line %d" \
! 347: " rc=%d.\n", __FILE__, __LINE__, rc); \
! 348: DO_RAID_FAIL(); \
! 349: return(rc); \
! 350: } \
! 351: } while (0)
! 352:
! 353: #define DO_RAID_COND(_c_) \
! 354: do { \
! 355: rc = rf_create_managed_cond(&raidPtr->shutdownList, (_c_)); \
! 356: if (rc) { \
! 357: RF_ERRORMSG3("Unable to init cond file %s line %d" \
! 358: " rc=%d.\n", __FILE__, __LINE__, rc); \
! 359: DO_RAID_FAIL(); \
! 360: return(rc); \
! 361: } \
! 362: } while (0)
! 363:
! 364: int
! 365: rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
! 366: {
! 367: RF_RowCol_t row, col;
! 368: int i, rc;
! 369:
! 370: /*
! 371: * XXX This check can probably be removed now, since
! 372: * RAIDFRAME_CONFIGURE now checks to make sure that the
! 373: * RAID set is not already valid.
! 374: */
! 375: if (raidPtr->valid) {
! 376: RF_ERRORMSG("RAIDframe configuration not shut down."
! 377: " Aborting configure.\n");
! 378: return (EINVAL);
! 379: }
! 380: RF_LOCK_MUTEX(configureMutex);
! 381: configureCount++;
! 382: if (isconfigged == 0) {
! 383: rc = rf_create_managed_mutex(&globalShutdown, &rf_printf_mutex);
! 384: if (rc) {
! 385: RF_ERRORMSG3("Unable to init mutex file %s line %d"
! 386: " rc=%d.\n", __FILE__, __LINE__, rc);
! 387: rf_ShutdownList(&globalShutdown);
! 388: return (rc);
! 389: }
! 390: /* Initialize globals. */
! 391: #ifdef RAIDDEBUG
! 392: printf("RAIDFRAME: protectedSectors is %ld.\n",
! 393: rf_protectedSectors);
! 394: #endif /* RAIDDEBUG */
! 395:
! 396: rf_clear_debug_print_buffer();
! 397:
! 398: DO_INIT_CONFIGURE(rf_ConfigureAllocList);
! 399:
! 400: /*
! 401: * Yes, this does make debugging general to the whole
! 402: * system instead of being array specific. Bummer, drag.
! 403: */
! 404: rf_ConfigureDebug(cfgPtr);
! 405: DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
! 406: DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
! 407: DO_INIT_CONFIGURE(rf_ConfigureMapModule);
! 408: DO_INIT_CONFIGURE(rf_ConfigureReconEvent);
! 409: DO_INIT_CONFIGURE(rf_ConfigureCallback);
! 410: DO_INIT_CONFIGURE(rf_ConfigureMemChunk);
! 411: DO_INIT_CONFIGURE(rf_ConfigureRDFreeList);
! 412: DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
! 413: DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
! 414: DO_INIT_CONFIGURE(rf_ConfigureMCPair);
! 415: DO_INIT_CONFIGURE(rf_ConfigureDAGs);
! 416: DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
! 417: DO_INIT_CONFIGURE(rf_ConfigureDebugPrint);
! 418: DO_INIT_CONFIGURE(rf_ConfigureReconstruction);
! 419: DO_INIT_CONFIGURE(rf_ConfigureCopyback);
! 420: DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
! 421: isconfigged = 1;
! 422: }
! 423: RF_UNLOCK_MUTEX(configureMutex);
! 424:
! 425: DO_RAID_MUTEX(&raidPtr->mutex);
! 426: /*
! 427: * Set up the cleanup list. Do this after ConfigureDebug so that
! 428: * value of memDebug will be set.
! 429: */
! 430:
! 431: rf_MakeAllocList(raidPtr->cleanupList);
! 432: if (raidPtr->cleanupList == NULL) {
! 433: DO_RAID_FAIL();
! 434: return (ENOMEM);
! 435: }
! 436: rc = rf_ShutdownCreate(&raidPtr->shutdownList,
! 437: (void (*) (void *)) rf_FreeAllocList, raidPtr->cleanupList);
! 438: if (rc) {
! 439: RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
! 440: " rc=%d.\n", __FILE__, __LINE__, rc);
! 441: DO_RAID_FAIL();
! 442: return (rc);
! 443: }
! 444: raidPtr->numRow = cfgPtr->numRow;
! 445: raidPtr->numCol = cfgPtr->numCol;
! 446: raidPtr->numSpare = cfgPtr->numSpare;
! 447:
! 448: /*
! 449: * XXX We don't even pretend to support more than one row in the
! 450: * kernel...
! 451: */
! 452: if (raidPtr->numRow != 1) {
! 453: RF_ERRORMSG("Only one row supported in kernel.\n");
! 454: DO_RAID_FAIL();
! 455: return (EINVAL);
! 456: }
! 457: RF_CallocAndAdd(raidPtr->status, raidPtr->numRow,
! 458: sizeof(RF_RowStatus_t), (RF_RowStatus_t *), raidPtr->cleanupList);
! 459: if (raidPtr->status == NULL) {
! 460: DO_RAID_FAIL();
! 461: return (ENOMEM);
! 462: }
! 463: RF_CallocAndAdd(raidPtr->reconControl, raidPtr->numRow,
! 464: sizeof(RF_ReconCtrl_t *), (RF_ReconCtrl_t **), raidPtr->cleanupList);
! 465: if (raidPtr->reconControl == NULL) {
! 466: DO_RAID_FAIL();
! 467: return (ENOMEM);
! 468: }
! 469: for (i = 0; i < raidPtr->numRow; i++) {
! 470: raidPtr->status[i] = rf_rs_optimal;
! 471: raidPtr->reconControl[i] = NULL;
! 472: }
! 473:
! 474: DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
! 475: DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);
! 476:
! 477: DO_RAID_COND(&raidPtr->outstandingCond);
! 478:
! 479: raidPtr->nAccOutstanding = 0;
! 480: raidPtr->waitShutdown = 0;
! 481:
! 482: DO_RAID_MUTEX(&raidPtr->access_suspend_mutex);
! 483: DO_RAID_COND(&raidPtr->quiescent_cond);
! 484:
! 485: DO_RAID_COND(&raidPtr->waitForReconCond);
! 486:
! 487: DO_RAID_MUTEX(&raidPtr->recon_done_proc_mutex);
! 488:
! 489: if (ac != NULL) {
! 490: /*
! 491: * We have an AutoConfig structure... Don't do the
! 492: * normal disk configuration... call the auto config
! 493: * stuff.
! 494: */
! 495: rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
! 496: } else {
! 497: DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
! 498: DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
! 499: }
! 500: /*
! 501: * Do this after ConfigureDisks & ConfigureSpareDisks to be sure
! 502: * devno is set.
! 503: */
! 504: DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);
! 505:
! 506: DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);
! 507:
! 508: DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
! 509:
! 510: for (row = 0; row < raidPtr->numRow; row++) {
! 511: for (col = 0; col < raidPtr->numCol; col++) {
! 512: /*
! 513: * XXX Better distribution.
! 514: */
! 515: raidPtr->hist_diskreq[row][col] = 0;
! 516: }
! 517: }
! 518:
! 519: raidPtr->numNewFailures = 0;
! 520: raidPtr->copyback_in_progress = 0;
! 521: raidPtr->parity_rewrite_in_progress = 0;
! 522: raidPtr->recon_in_progress = 0;
! 523: raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;
! 524:
! 525: /*
! 526: * Autoconfigure and root_partition will actually get filled in
! 527: * after the config is done.
! 528: */
! 529: raidPtr->autoconfigure = 0;
! 530: raidPtr->root_partition = 0;
! 531: raidPtr->last_unit = raidPtr->raidid;
! 532: raidPtr->config_order = 0;
! 533:
! 534: if (rf_keepAccTotals) {
! 535: raidPtr->keep_acc_totals = 1;
! 536: }
! 537: rf_StartUserStats(raidPtr);
! 538:
! 539: raidPtr->valid = 1;
! 540: return (0);
! 541: }
! 542:
! 543: int
! 544: rf_init_rad(RF_RaidAccessDesc_t *desc)
! 545: {
! 546: int rc;
! 547:
! 548: rc = rf_mutex_init(&desc->mutex);
! 549: if (rc) {
! 550: RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n", __FILE__,
! 551: __LINE__, rc);
! 552: return (rc);
! 553: }
! 554: rc = rf_cond_init(&desc->cond);
! 555: if (rc) {
! 556: RF_ERRORMSG3("Unable to init cond file %s line %d rc=%d.\n", __FILE__,
! 557: __LINE__, rc);
! 558: rf_mutex_destroy(&desc->mutex);
! 559: return (rc);
! 560: }
! 561: return (0);
! 562: }
! 563:
! 564: void
! 565: rf_clean_rad(RF_RaidAccessDesc_t *desc)
! 566: {
! 567: rf_mutex_destroy(&desc->mutex);
! 568: rf_cond_destroy(&desc->cond);
! 569: }
! 570:
! 571: void
! 572: rf_ShutdownRDFreeList(void *ignored)
! 573: {
! 574: RF_FREELIST_DESTROY_CLEAN(rf_rad_freelist, next,
! 575: (RF_RaidAccessDesc_t *), rf_clean_rad);
! 576: }
! 577:
! 578: int
! 579: rf_ConfigureRDFreeList(RF_ShutdownList_t **listp)
! 580: {
! 581: int rc;
! 582:
! 583: RF_FREELIST_CREATE(rf_rad_freelist, RF_MAX_FREE_RAD,
! 584: RF_RAD_INC, sizeof(RF_RaidAccessDesc_t));
! 585: if (rf_rad_freelist == NULL) {
! 586: return (ENOMEM);
! 587: }
! 588: rc = rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL);
! 589: if (rc) {
! 590: RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d.\n", __FILE__,
! 591: __LINE__, rc);
! 592: rf_ShutdownRDFreeList(NULL);
! 593: return (rc);
! 594: }
! 595: RF_FREELIST_PRIME_INIT(rf_rad_freelist, RF_RAD_INITIAL, next,
! 596: (RF_RaidAccessDesc_t *), rf_init_rad);
! 597: return (0);
! 598: }
! 599:
! 600: RF_RaidAccessDesc_t *
! 601: rf_AllocRaidAccDesc(
! 602: RF_Raid_t *raidPtr,
! 603: RF_IoType_t type,
! 604: RF_RaidAddr_t raidAddress,
! 605: RF_SectorCount_t numBlocks,
! 606: caddr_t bufPtr,
! 607: void *bp,
! 608: RF_DagHeader_t **paramDAG,
! 609: RF_AccessStripeMapHeader_t **paramASM,
! 610: RF_RaidAccessFlags_t flags,
! 611: void (*cbF) (struct buf *),
! 612: void *cbA,
! 613: RF_AccessState_t *states
! 614: )
! 615: {
! 616: RF_RaidAccessDesc_t *desc;
! 617:
! 618: RF_FREELIST_GET_INIT_NOUNLOCK(rf_rad_freelist, desc, next,
! 619: (RF_RaidAccessDesc_t *), rf_init_rad);
! 620: if (raidPtr->waitShutdown) {
! 621: /*
! 622: * Actually, we're shutting the array down. Free the desc
! 623: * and return NULL.
! 624: */
! 625: RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
! 626: RF_FREELIST_FREE_CLEAN(rf_rad_freelist, desc, next,
! 627: rf_clean_rad);
! 628: return (NULL);
! 629: }
! 630: raidPtr->nAccOutstanding++;
! 631: RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
! 632:
! 633: desc->raidPtr = (void *) raidPtr;
! 634: desc->type = type;
! 635: desc->raidAddress = raidAddress;
! 636: desc->numBlocks = numBlocks;
! 637: desc->bufPtr = bufPtr;
! 638: desc->bp = bp;
! 639: desc->paramDAG = paramDAG;
! 640: desc->paramASM = paramASM;
! 641: desc->flags = flags;
! 642: desc->states = states;
! 643: desc->state = 0;
! 644:
! 645: desc->status = 0;
! 646: bzero((char *) &desc->tracerec, sizeof(RF_AccTraceEntry_t));
! 647: desc->callbackFunc = (void (*) (RF_CBParam_t)) cbF; /* XXX */
! 648: desc->callbackArg = cbA;
! 649: desc->next = NULL;
! 650: desc->head = desc;
! 651: desc->numPending = 0;
! 652: desc->cleanupList = NULL;
! 653: rf_MakeAllocList(desc->cleanupList);
! 654: return (desc);
! 655: }
! 656:
! 657: void
! 658: rf_FreeRaidAccDesc(RF_RaidAccessDesc_t * desc)
! 659: {
! 660: RF_Raid_t *raidPtr = desc->raidPtr;
! 661:
! 662: RF_ASSERT(desc);
! 663:
! 664: rf_FreeAllocList(desc->cleanupList);
! 665: RF_FREELIST_FREE_CLEAN_NOUNLOCK(rf_rad_freelist, desc, next, rf_clean_rad);
! 666: raidPtr->nAccOutstanding--;
! 667: if (raidPtr->waitShutdown) {
! 668: RF_SIGNAL_COND(raidPtr->outstandingCond);
! 669: }
! 670: RF_FREELIST_DO_UNLOCK(rf_rad_freelist);
! 671: }
! 672:
! 673:
! 674: /********************************************************************
! 675: * Main routine for performing an access.
! 676: * Accesses are retried until a DAG can not be selected. This occurs
! 677: * when either the DAG library is incomplete or there are too many
! 678: * failures in a parity group.
! 679: ********************************************************************/
! 680: int
! 681: rf_DoAccess(
! 682: RF_Raid_t *raidPtr,
! 683: RF_IoType_t type, /* Should be read or write. */
! 684: int async_flag, /*
! 685: * Should be RF_TRUE
! 686: * or RF_FALSE.
! 687: */
! 688: RF_RaidAddr_t raidAddress,
! 689: RF_SectorCount_t numBlocks,
! 690: caddr_t bufPtr,
! 691: void *bp_in, /*
! 692: * It's a buf pointer.
! 693: * void * to facilitate
! 694: * ignoring it outside
! 695: * the kernel.
! 696: */
! 697: RF_DagHeader_t **paramDAG,
! 698: RF_AccessStripeMapHeader_t **paramASM,
! 699: RF_RaidAccessFlags_t flags,
! 700: RF_RaidAccessDesc_t **paramDesc,
! 701: void (*cbF) (struct buf *),
! 702: void *cbA
! 703: )
! 704: {
! 705: RF_RaidAccessDesc_t *desc;
! 706: caddr_t lbufPtr = bufPtr;
! 707: struct buf *bp = (struct buf *) bp_in;
! 708:
! 709: raidAddress += rf_raidSectorOffset;
! 710:
! 711: if (!raidPtr->valid) {
! 712: RF_ERRORMSG("RAIDframe driver not successfully configured."
! 713: " Rejecting access.\n");
! 714: IO_BUF_ERR(bp, EINVAL);
! 715: return (EINVAL);
! 716: }
! 717:
! 718: if (rf_accessDebug) {
! 719:
! 720: printf("logBytes is: %d %d %d.\n", raidPtr->raidid,
! 721: raidPtr->logBytesPerSector,
! 722: (int) rf_RaidAddressToByte(raidPtr, numBlocks));
! 723: printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx.\n", raidPtr->raidid,
! 724: (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
! 725: (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
! 726: (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
! 727: (int) numBlocks,
! 728: (int) rf_RaidAddressToByte(raidPtr, numBlocks),
! 729: (long) bufPtr);
! 730: }
! 731: if (raidAddress + numBlocks > raidPtr->totalSectors) {
! 732:
! 733: printf("DoAccess: raid addr %lu too large to access %lu sectors. Max legal addr is %lu.\n",
! 734: (u_long) raidAddress, (u_long) numBlocks, (u_long) raidPtr->totalSectors);
! 735:
! 736: IO_BUF_ERR(bp, ENOSPC);
! 737: return (ENOSPC);
! 738: }
! 739: desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
! 740: numBlocks, lbufPtr, bp, paramDAG, paramASM,
! 741: flags, cbF, cbA, raidPtr->Layout.map->states);
! 742:
! 743: if (desc == NULL) {
! 744: return (ENOMEM);
! 745: }
! 746: RF_ETIMER_START(desc->tracerec.tot_timer);
! 747:
! 748: desc->async_flag = async_flag;
! 749:
! 750: rf_ContinueRaidAccess(desc);
! 751:
! 752: return (0);
! 753: }
! 754:
! 755:
! 756: /* Force the array into reconfigured mode without doing reconstruction. */
! 757: int
! 758: rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int row, int col)
! 759: {
! 760: if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
! 761: printf("Can't set reconfigured mode in dedicated-spare"
! 762: " array.\n");
! 763: RF_PANIC();
! 764: }
! 765: RF_LOCK_MUTEX(raidPtr->mutex);
! 766: raidPtr->numFailures++;
! 767: raidPtr->Disks[row][col].status = rf_ds_dist_spared;
! 768: raidPtr->status[row] = rf_rs_reconfigured;
! 769: rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
! 770: /*
! 771: * Install spare table only if declustering + distributed sparing
! 772: * architecture.
! 773: */
! 774: if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
! 775: rf_InstallSpareTable(raidPtr, row, col);
! 776: RF_UNLOCK_MUTEX(raidPtr->mutex);
! 777: return (0);
! 778: }
! 779:
! 780: extern int fail_row, fail_col, fail_time;
! 781: extern int delayed_recon;
! 782:
! 783: int
! 784: rf_FailDisk(RF_Raid_t *raidPtr, int frow, int fcol, int initRecon)
! 785: {
! 786: printf("raid%d: Failing disk r%d c%d.\n", raidPtr->raidid, frow, fcol);
! 787: RF_LOCK_MUTEX(raidPtr->mutex);
! 788: raidPtr->numFailures++;
! 789: raidPtr->Disks[frow][fcol].status = rf_ds_failed;
! 790: raidPtr->status[frow] = rf_rs_degraded;
! 791: rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
! 792: RF_UNLOCK_MUTEX(raidPtr->mutex);
! 793: if (initRecon)
! 794: rf_ReconstructFailedDisk(raidPtr, frow, fcol);
! 795: return (0);
! 796: }
! 797:
! 798:
! 799: /*
! 800: * Releases a thread that is waiting for the array to become quiesced.
! 801: * access_suspend_mutex should be locked upon calling this.
! 802: */
! 803: void
! 804: rf_SignalQuiescenceLock(RF_Raid_t *raidPtr, RF_RaidReconDesc_t *reconDesc)
! 805: {
! 806: if (rf_quiesceDebug) {
! 807: printf("raid%d: Signalling quiescence lock.\n",
! 808: raidPtr->raidid);
! 809: }
! 810: raidPtr->access_suspend_release = 1;
! 811:
! 812: if (raidPtr->waiting_for_quiescence) {
! 813: SIGNAL_QUIESCENT_COND(raidPtr);
! 814: }
! 815: }
! 816:
! 817:
! 818: /*
! 819: * Suspends all new requests to the array. No effect on accesses that are
! 820: * in flight.
! 821: */
! 822: int
! 823: rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
! 824: {
! 825: if (rf_quiesceDebug)
! 826: printf("Suspending new reqs.\n");
! 827:
! 828: RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
! 829: raidPtr->accesses_suspended++;
! 830: raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;
! 831:
! 832: if (raidPtr->waiting_for_quiescence) {
! 833: raidPtr->access_suspend_release = 0;
! 834: while (!raidPtr->access_suspend_release) {
! 835: printf("Suspending: Waiting for Quiescence.\n");
! 836: WAIT_FOR_QUIESCENCE(raidPtr);
! 837: raidPtr->waiting_for_quiescence = 0;
! 838: }
! 839: }
! 840: printf("Quiescence reached...\n");
! 841:
! 842: RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
! 843: return (raidPtr->waiting_for_quiescence);
! 844: }
! 845:
! 846:
! 847: /* Wake up everyone waiting for quiescence to be released. */
! 848: void
! 849: rf_ResumeNewRequests(RF_Raid_t *raidPtr)
! 850: {
! 851: RF_CallbackDesc_t *t, *cb;
! 852:
! 853: if (rf_quiesceDebug)
! 854: printf("Resuming new reqs.\n");
! 855:
! 856: RF_LOCK_MUTEX(raidPtr->access_suspend_mutex);
! 857: raidPtr->accesses_suspended--;
! 858: if (raidPtr->accesses_suspended == 0)
! 859: cb = raidPtr->quiesce_wait_list;
! 860: else
! 861: cb = NULL;
! 862: raidPtr->quiesce_wait_list = NULL;
! 863: RF_UNLOCK_MUTEX(raidPtr->access_suspend_mutex);
! 864:
! 865: while (cb) {
! 866: t = cb;
! 867: cb = cb->next;
! 868: (t->callbackFunc) (t->callbackArg);
! 869: rf_FreeCallbackDesc(t);
! 870: }
! 871: }
! 872:
! 873:
! 874: /*****************************************************************************
! 875: *
! 876: * Debug routines.
! 877: *
! 878: *****************************************************************************/
! 879:
! 880: void
! 881: rf_set_debug_option(char *name, long val)
! 882: {
! 883: RF_DebugName_t *p;
! 884:
! 885: for (p = rf_debugNames; p->name; p++) {
! 886: if (!strcmp(p->name, name)) {
! 887: *(p->ptr) = val;
! 888: printf("[Set debug variable %s to %ld]\n", name, val);
! 889: return;
! 890: }
! 891: }
! 892: RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
! 893: }
! 894:
! 895:
! 896: /* Would like to use sscanf here, but apparently not available in kernel. */
! 897: /*ARGSUSED*/
! 898: void
! 899: rf_ConfigureDebug(RF_Config_t *cfgPtr)
! 900: {
! 901: char *val_p, *name_p, *white_p;
! 902: long val;
! 903: int i;
! 904:
! 905: rf_ResetDebugOptions();
! 906: for (i = 0; cfgPtr->debugVars[i][0] && i < RF_MAXDBGV; i++) {
! 907: name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
! 908: white_p = rf_find_white(name_p); /*
! 909: * Skip to start of 2nd
! 910: * word.
! 911: */
! 912: val_p = rf_find_non_white(white_p);
! 913: if (*val_p == '0' && *(val_p + 1) == 'x')
! 914: val = rf_htoi(val_p + 2);
! 915: else
! 916: val = rf_atoi(val_p);
! 917: *white_p = '\0';
! 918: rf_set_debug_option(name_p, val);
! 919: }
! 920: }
! 921:
! 922:
! 923: /* Performance monitoring stuff. */
! 924:
! 925: #if !defined(_KERNEL) && !defined(SIMULATE)
! 926:
! 927: /*
! 928: * Throughput stats currently only used in user-level RAIDframe.
! 929: */
! 930:
! 931: int
! 932: rf_InitThroughputStats(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
! 933: RF_Config_t *cfgPtr)
! 934: {
! 935: int rc;
! 936:
! 937: /* These used by user-level RAIDframe only. */
! 938: rc = rf_create_managed_mutex(listp, &raidPtr->throughputstats.mutex);
! 939: if (rc) {
! 940: RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d.\n",
! 941: __FILE__, __LINE__, rc);
! 942: return (rc);
! 943: }
! 944: raidPtr->throughputstats.sum_io_us = 0;
! 945: raidPtr->throughputstats.num_ios = 0;
! 946: raidPtr->throughputstats.num_out_ios = 0;
! 947: return (0);
! 948: }
! 949:
! 950: void
! 951: rf_StartThroughputStats(RF_Raid_t *raidPtr)
! 952: {
! 953: RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
! 954: raidPtr->throughputstats.num_ios++;
! 955: raidPtr->throughputstats.num_out_ios++;
! 956: if (raidPtr->throughputstats.num_out_ios == 1)
! 957: RF_GETTIME(raidPtr->throughputstats.start);
! 958: RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
! 959: }
! 960:
! 961: void
! 962: rf_StopThroughputStats(RF_Raid_t *raidPtr)
! 963: {
! 964: struct timeval diff;
! 965:
! 966: RF_LOCK_MUTEX(raidPtr->throughputstats.mutex);
! 967: raidPtr->throughputstats.num_out_ios--;
! 968: if (raidPtr->throughputstats.num_out_ios == 0) {
! 969: RF_GETTIME(raidPtr->throughputstats.stop);
! 970: RF_TIMEVAL_DIFF(&raidPtr->throughputstats.start,
! 971: &raidPtr->throughputstats.stop, &diff);
! 972: raidPtr->throughputstats.sum_io_us += RF_TIMEVAL_TO_US(diff);
! 973: }
! 974: RF_UNLOCK_MUTEX(raidPtr->throughputstats.mutex);
! 975: }
! 976:
! 977: void
! 978: rf_PrintThroughputStats(RF_Raid_t *raidPtr)
! 979: {
! 980: RF_ASSERT(raidPtr->throughputstats.num_out_ios == 0);
! 981: if (raidPtr->throughputstats.sum_io_us != 0) {
! 982: printf("[Througphut: %8.2f IOs/second]\n",
! 983: raidPtr->throughputstats.num_ios /
! 984: (raidPtr->throughputstats.sum_io_us / 1000000.0));
! 985: }
! 986: }
! 987:
! 988: #endif /* !_KERNEL && !SIMULATE */
! 989:
! 990: void
! 991: rf_StartUserStats(RF_Raid_t *raidPtr)
! 992: {
! 993: RF_GETTIME(raidPtr->userstats.start);
! 994: raidPtr->userstats.sum_io_us = 0;
! 995: raidPtr->userstats.num_ios = 0;
! 996: raidPtr->userstats.num_sect_moved = 0;
! 997: }
! 998:
! 999: void
! 1000: rf_StopUserStats(RF_Raid_t *raidPtr)
! 1001: {
! 1002: RF_GETTIME(raidPtr->userstats.stop);
! 1003: }
! 1004:
! 1005: void
! 1006: rf_UpdateUserStats(
! 1007: RF_Raid_t *raidPtr,
! 1008: int rt, /* Response time in us. */
! 1009: int numsect /* Number of sectors for this access. */
! 1010: )
! 1011: {
! 1012: raidPtr->userstats.sum_io_us += rt;
! 1013: raidPtr->userstats.num_ios++;
! 1014: raidPtr->userstats.num_sect_moved += numsect;
! 1015: }
! 1016:
! 1017: void
! 1018: rf_PrintUserStats(RF_Raid_t *raidPtr)
! 1019: {
! 1020: long elapsed_us, mbs, mbs_frac;
! 1021: struct timeval diff;
! 1022:
! 1023: RF_TIMEVAL_DIFF(&raidPtr->userstats.start, &raidPtr->userstats.stop,
! 1024: &diff);
! 1025: elapsed_us = RF_TIMEVAL_TO_US(diff);
! 1026:
! 1027: /* 2000 sectors per megabyte, 10000000 microseconds per second. */
! 1028: if (elapsed_us)
! 1029: mbs = (raidPtr->userstats.num_sect_moved / 2000) /
! 1030: (elapsed_us / 1000000);
! 1031: else
! 1032: mbs = 0;
! 1033:
! 1034: /* This computes only the first digit of the fractional mb/s moved. */
! 1035: if (elapsed_us) {
! 1036: mbs_frac = ((raidPtr->userstats.num_sect_moved / 200) /
! 1037: (elapsed_us / 1000000)) - (mbs * 10);
! 1038: } else {
! 1039: mbs_frac = 0;
! 1040: }
! 1041:
! 1042: printf("Number of I/Os: %ld\n",
! 1043: raidPtr->userstats.num_ios);
! 1044: printf("Elapsed time (us): %ld\n",
! 1045: elapsed_us);
! 1046: printf("User I/Os per second: %ld\n",
! 1047: RF_DB0_CHECK(raidPtr->userstats.num_ios, (elapsed_us / 1000000)));
! 1048: printf("Average user response time: %ld us\n",
! 1049: RF_DB0_CHECK(raidPtr->userstats.sum_io_us,
! 1050: raidPtr->userstats.num_ios));
! 1051: printf("Total sectors moved: %ld\n",
! 1052: raidPtr->userstats.num_sect_moved);
! 1053: printf("Average access size (sect): %ld\n",
! 1054: RF_DB0_CHECK(raidPtr->userstats.num_sect_moved,
! 1055: raidPtr->userstats.num_ios));
! 1056: printf("Achieved data rate: %ld.%ld MB/sec\n",
! 1057: mbs, mbs_frac);
! 1058: }
CVSweb