Annotation of sys/dev/raidframe/rf_disks.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_disks.c,v 1.12 2007/06/05 00:38:22 deraadt Exp $ */
! 2: /* $NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1999 The NetBSD Foundation, Inc.
! 6: * All rights reserved.
! 7: *
! 8: * This code is derived from software contributed to The NetBSD Foundation
! 9: * by Greg Oster
! 10: *
! 11: * Redistribution and use in source and binary forms, with or without
! 12: * modification, are permitted provided that the following conditions
! 13: * are met:
! 14: * 1. Redistributions of source code must retain the above copyright
! 15: * notice, this list of conditions and the following disclaimer.
! 16: * 2. Redistributions in binary form must reproduce the above copyright
! 17: * notice, this list of conditions and the following disclaimer in the
! 18: * documentation and/or other materials provided with the distribution.
! 19: * 3. All advertising materials mentioning features or use of this software
! 20: * must display the following acknowledgement:
! 21: * This product includes software developed by the NetBSD
! 22: * Foundation, Inc. and its contributors.
! 23: * 4. Neither the name of The NetBSD Foundation nor the names of its
! 24: * contributors may be used to endorse or promote products derived
! 25: * from this software without specific prior written permission.
! 26: *
! 27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
! 28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! 29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! 30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
! 31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! 32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! 33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! 34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! 35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! 36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! 37: * POSSIBILITY OF SUCH DAMAGE.
! 38: */
! 39: /*
! 40: * Copyright (c) 1995 Carnegie-Mellon University.
! 41: * All rights reserved.
! 42: *
! 43: * Author: Mark Holland
! 44: *
! 45: * Permission to use, copy, modify and distribute this software and
! 46: * its documentation is hereby granted, provided that both the copyright
! 47: * notice and this permission notice appear in all copies of the
! 48: * software, derivative works or modified versions, and any portions
! 49: * thereof, and that both notices appear in supporting documentation.
! 50: *
! 51: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 52: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 53: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 54: *
! 55: * Carnegie Mellon requests users of this software to return to
! 56: *
! 57: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 58: * School of Computer Science
! 59: * Carnegie Mellon University
! 60: * Pittsburgh PA 15213-3890
! 61: *
! 62: * any improvements or extensions that they make and grant Carnegie the
! 63: * rights to redistribute these changes.
! 64: */
! 65:
! 66: /***************************************************************
! 67: * rf_disks.c -- Code to perform operations on the actual disks.
! 68: ***************************************************************/
! 69:
! 70: #include "rf_types.h"
! 71: #include "rf_raid.h"
! 72: #include "rf_alloclist.h"
! 73: #include "rf_utils.h"
! 74: #include "rf_configure.h"
! 75: #include "rf_general.h"
! 76: #include "rf_options.h"
! 77: #include "rf_kintf.h"
! 78:
! 79: #if defined(__NetBSD__)
! 80: #include "rf_netbsd.h"
! 81: #elif defined(__OpenBSD__)
! 82: #include "rf_openbsd.h"
! 83: #endif
! 84:
! 85: #include <sys/types.h>
! 86: #include <sys/param.h>
! 87: #include <sys/systm.h>
! 88: #include <sys/proc.h>
! 89: #include <sys/ioctl.h>
! 90: #include <sys/fcntl.h>
! 91: #ifdef __NETBSD__
! 92: #include <sys/vnode.h>
! 93: #endif /* __NETBSD__ */
! 94:
! 95: int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
! 96: void rf_print_label_status(RF_Raid_t *, int, int, char *,
! 97: RF_ComponentLabel_t *);
! 98: int rf_check_label_vitals(RF_Raid_t *, int, int, char *,
! 99: RF_ComponentLabel_t *, int, int);
! 100:
! 101: #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
! 102: #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
! 103:
! 104: /****************************************************************************
! 105: *
! 106: * Initialize the disks comprising the array.
! 107: *
! 108: * We want the spare disks to have regular row,col numbers so that we can
! 109: * easily substitue a spare for a failed disk. But, the driver code assumes
! 110: * throughout that the array contains numRow by numCol _non-spare_ disks, so
! 111: * it's not clear how to fit in the spares. This is an unfortunate holdover
! 112: * from raidSim. The quick and dirty fix is to make row zero bigger than the
! 113: * rest, and put all the spares in it. This probably needs to get changed
! 114: * eventually.
! 115: *
! 116: ****************************************************************************/
! 117: int
! 118: rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
! 119: RF_Config_t *cfgPtr)
! 120: {
! 121: RF_RaidDisk_t **disks;
! 122: RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
! 123: RF_RowCol_t r, c;
! 124: int bs, ret;
! 125: unsigned i, count, foundone = 0, numFailuresThisRow;
! 126: int force;
! 127:
! 128: force = cfgPtr->force;
! 129:
! 130: ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
! 131: if (ret)
! 132: goto fail;
! 133:
! 134: disks = raidPtr->Disks;
! 135:
! 136: for (r = 0; r < raidPtr->numRow; r++) {
! 137: numFailuresThisRow = 0;
! 138: for (c = 0; c < raidPtr->numCol; c++) {
! 139: ret = rf_ConfigureDisk(raidPtr,
! 140: &cfgPtr->devnames[r][c][0], &disks[r][c], r, c);
! 141:
! 142: if (ret)
! 143: goto fail;
! 144:
! 145: if (disks[r][c].status == rf_ds_optimal) {
! 146: raidread_component_label(
! 147: raidPtr->raid_cinfo[r][c].ci_dev,
! 148: raidPtr->raid_cinfo[r][c].ci_vp,
! 149: &raidPtr->raid_cinfo[r][c].ci_label);
! 150: }
! 151:
! 152: if (disks[r][c].status != rf_ds_optimal) {
! 153: numFailuresThisRow++;
! 154: } else {
! 155: if (disks[r][c].numBlocks < min_numblks)
! 156: min_numblks = disks[r][c].numBlocks;
! 157: DPRINTF7("Disk at row %d col %d: dev %s"
! 158: " numBlocks %ld blockSize %d (%ld MB)\n",
! 159: r, c, disks[r][c].devname,
! 160: (long int) disks[r][c].numBlocks,
! 161: disks[r][c].blockSize,
! 162: (long int) disks[r][c].numBlocks *
! 163: disks[r][c].blockSize / 1024 / 1024);
! 164: }
! 165: }
! 166: /* XXX Fix for n-fault tolerant. */
! 167: /*
! 168: * XXX This should probably check to see how many failures
! 169: * we can handle for this configuration !
! 170: */
! 171: if (numFailuresThisRow > 0)
! 172: raidPtr->status[r] = rf_rs_degraded;
! 173: }
! 174: /*
! 175: * All disks must be the same size & have the same block size, bs must
! 176: * be a power of 2.
! 177: */
! 178: bs = 0;
! 179: for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
! 180: for (c = 0; !foundone && c < raidPtr->numCol; c++) {
! 181: if (disks[r][c].status == rf_ds_optimal) {
! 182: bs = disks[r][c].blockSize;
! 183: foundone = 1;
! 184: }
! 185: }
! 186: }
! 187: if (!foundone) {
! 188: RF_ERRORMSG("RAIDFRAME: Did not find any live disks in"
! 189: " the array.\n");
! 190: ret = EINVAL;
! 191: goto fail;
! 192: }
! 193: for (count = 0, i = 1; i; i <<= 1)
! 194: if (bs & i)
! 195: count++;
! 196: if (count != 1) {
! 197: RF_ERRORMSG1("Error: block size on disks (%d) must be a"
! 198: " power of 2.\n", bs);
! 199: ret = EINVAL;
! 200: goto fail;
! 201: }
! 202:
! 203: if (rf_CheckLabels(raidPtr, cfgPtr)) {
! 204: printf("raid%d: There were fatal errors\n", raidPtr->raidid);
! 205: if (force != 0) {
! 206: printf("raid%d: Fatal errors being ignored.\n",
! 207: raidPtr->raidid);
! 208: } else {
! 209: ret = EINVAL;
! 210: goto fail;
! 211: }
! 212: }
! 213:
! 214: for (r = 0; r < raidPtr->numRow; r++) {
! 215: for (c = 0; c < raidPtr->numCol; c++) {
! 216: if (disks[r][c].status == rf_ds_optimal) {
! 217: if (disks[r][c].blockSize != bs) {
! 218: RF_ERRORMSG2("Error: block size of"
! 219: " disk at r %d c %d different from"
! 220: " disk at r 0 c 0.\n", r, c);
! 221: ret = EINVAL;
! 222: goto fail;
! 223: }
! 224: if (disks[r][c].numBlocks != min_numblks) {
! 225: RF_ERRORMSG3("WARNING: truncating disk"
! 226: " at r %d c %d to %d blocks.\n",
! 227: r, c, (int) min_numblks);
! 228: disks[r][c].numBlocks = min_numblks;
! 229: }
! 230: }
! 231: }
! 232: }
! 233:
! 234: raidPtr->sectorsPerDisk = min_numblks;
! 235: raidPtr->logBytesPerSector = ffs(bs) - 1;
! 236: raidPtr->bytesPerSector = bs;
! 237: raidPtr->sectorMask = bs - 1;
! 238: return (0);
! 239:
! 240: fail:
! 241: rf_UnconfigureVnodes(raidPtr);
! 242:
! 243: return (ret);
! 244: }
! 245:
! 246:
! 247: /****************************************************************************
! 248: * Set up the data structures describing the spare disks in the array.
! 249: * Recall from the above comment that the spare disk descriptors are stored
! 250: * in row zero, which is specially expanded to hold them.
! 251: ****************************************************************************/
! 252: int
! 253: rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
! 254: RF_Config_t * cfgPtr)
! 255: {
! 256: int i, ret;
! 257: unsigned int bs;
! 258: RF_RaidDisk_t *disks;
! 259: int num_spares_done;
! 260:
! 261: num_spares_done = 0;
! 262:
! 263: /*
! 264: * The space for the spares should have already been allocated by
! 265: * ConfigureDisks().
! 266: */
! 267:
! 268: disks = &raidPtr->Disks[0][raidPtr->numCol];
! 269: for (i = 0; i < raidPtr->numSpare; i++) {
! 270: ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
! 271: &disks[i], 0, raidPtr->numCol + i);
! 272: if (ret)
! 273: goto fail;
! 274: if (disks[i].status != rf_ds_optimal) {
! 275: RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
! 276: &cfgPtr->spare_names[i][0]);
! 277: } else {
! 278: /* Change status to spare. */
! 279: disks[i].status = rf_ds_spare;
! 280: DPRINTF6("Spare Disk %d: dev %s numBlocks %ld"
! 281: " blockSize %d (%ld MB).\n", i, disks[i].devname,
! 282: (long int) disks[i].numBlocks, disks[i].blockSize,
! 283: (long int) disks[i].numBlocks *
! 284: disks[i].blockSize / 1024 / 1024);
! 285: }
! 286: num_spares_done++;
! 287: }
! 288:
! 289: /* Check sizes and block sizes on spare disks. */
! 290: bs = 1 << raidPtr->logBytesPerSector;
! 291: for (i = 0; i < raidPtr->numSpare; i++) {
! 292: if (disks[i].blockSize != bs) {
! 293: RF_ERRORMSG3("Block size of %d on spare disk %s is"
! 294: " not the same as on other disks (%d).\n",
! 295: disks[i].blockSize, disks[i].devname, bs);
! 296: ret = EINVAL;
! 297: goto fail;
! 298: }
! 299: if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
! 300: RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small"
! 301: " to serve as a spare (need %llu blocks).\n",
! 302: disks[i].devname, disks[i].numBlocks,
! 303: raidPtr->sectorsPerDisk);
! 304: ret = EINVAL;
! 305: goto fail;
! 306: } else
! 307: if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
! 308: RF_ERRORMSG2("Warning: truncating spare disk"
! 309: " %s to %llu blocks.\n", disks[i].devname,
! 310: raidPtr->sectorsPerDisk);
! 311:
! 312: disks[i].numBlocks = raidPtr->sectorsPerDisk;
! 313: }
! 314: }
! 315:
! 316: return (0);
! 317:
! 318: fail:
! 319:
! 320: /*
! 321: * Release the hold on the main components. We've failed to allocate
! 322: * a spare, and since we're failing, we need to free things...
! 323: *
! 324: * XXX Failing to allocate a spare is *not* that big of a deal...
! 325: * We *can* survive without it, if need be, esp. if we get hot
! 326: * adding working.
! 327: * If we don't fail out here, then we need a way to remove this spare...
! 328: * That should be easier to do here than if we are "live"...
! 329: */
! 330:
! 331: rf_UnconfigureVnodes(raidPtr);
! 332:
! 333: return (ret);
! 334: }
! 335:
! 336: int
! 337: rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
! 338: {
! 339: RF_RaidDisk_t **disks;
! 340: int ret;
! 341: int r;
! 342:
! 343: RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
! 344: (RF_RaidDisk_t **), raidPtr->cleanupList);
! 345: if (disks == NULL) {
! 346: ret = ENOMEM;
! 347: goto fail;
! 348: }
! 349: raidPtr->Disks = disks;
! 350: /* Get space for the device-specific stuff... */
! 351: RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
! 352: sizeof(struct raidcinfo *), (struct raidcinfo **),
! 353: raidPtr->cleanupList);
! 354: if (raidPtr->raid_cinfo == NULL) {
! 355: ret = ENOMEM;
! 356: goto fail;
! 357: }
! 358:
! 359: for (r = 0; r < raidPtr->numRow; r++) {
! 360: /*
! 361: * We allocate RF_MAXSPARE on the first row so that we
! 362: * have room to do hot-swapping of spares.
! 363: */
! 364: RF_CallocAndAdd(disks[r], raidPtr->numCol +
! 365: ((r == 0) ? RF_MAXSPARE : 0), sizeof(RF_RaidDisk_t),
! 366: (RF_RaidDisk_t *), raidPtr->cleanupList);
! 367: if (disks[r] == NULL) {
! 368: ret = ENOMEM;
! 369: goto fail;
! 370: }
! 371: /* Get more space for device specific stuff... */
! 372: RF_CallocAndAdd(raidPtr->raid_cinfo[r], raidPtr->numCol +
! 373: ((r == 0) ? raidPtr->numSpare : 0),
! 374: sizeof(struct raidcinfo), (struct raidcinfo *),
! 375: raidPtr->cleanupList);
! 376: if (raidPtr->raid_cinfo[r] == NULL) {
! 377: ret = ENOMEM;
! 378: goto fail;
! 379: }
! 380: }
! 381: return(0);
! 382: fail:
! 383: rf_UnconfigureVnodes(raidPtr);
! 384:
! 385: return(ret);
! 386: }
! 387:
! 388:
! 389: /* Configure a single disk during auto-configuration at boot. */
! 390: int
! 391: rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
! 392: RF_AutoConfig_t *auto_config)
! 393: {
! 394: RF_RaidDisk_t **disks;
! 395: RF_RaidDisk_t *diskPtr;
! 396: RF_RowCol_t r, c;
! 397: RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
! 398: int bs, ret;
! 399: int numFailuresThisRow;
! 400: int force;
! 401: RF_AutoConfig_t *ac;
! 402: int parity_good;
! 403: int mod_counter;
! 404: int mod_counter_found;
! 405:
! 406: #if DEBUG
! 407: printf("Starting autoconfiguration of RAID set...\n");
! 408: #endif /* DEBUG */
! 409: force = cfgPtr->force;
! 410:
! 411: ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
! 412: if (ret)
! 413: goto fail;
! 414:
! 415: disks = raidPtr->Disks;
! 416:
! 417: /* Assume the parity will be fine... */
! 418: parity_good = RF_RAID_CLEAN;
! 419:
! 420: /* Check for mod_counters that are too low. */
! 421: mod_counter_found = 0;
! 422: ac = auto_config;
! 423: while(ac!=NULL) {
! 424: if (mod_counter_found == 0) {
! 425: mod_counter = ac->clabel->mod_counter;
! 426: mod_counter_found = 1;
! 427: } else {
! 428: if (ac->clabel->mod_counter > mod_counter) {
! 429: mod_counter = ac->clabel->mod_counter;
! 430: }
! 431: }
! 432: ac->flag = 0; /* Clear the general purpose flag. */
! 433: ac = ac->next;
! 434: }
! 435:
! 436: for (r = 0; r < raidPtr->numRow; r++) {
! 437: numFailuresThisRow = 0;
! 438: for (c = 0; c < raidPtr->numCol; c++) {
! 439: diskPtr = &disks[r][c];
! 440:
! 441: /* Find this row/col in the autoconfig. */
! 442: #if DEBUG
! 443: printf("Looking for %d,%d in autoconfig.\n", r, c);
! 444: #endif /* DEBUG */
! 445: ac = auto_config;
! 446: while(ac!=NULL) {
! 447: if (ac->clabel == NULL) {
! 448: /* Big-time bad news. */
! 449: goto fail;
! 450: }
! 451: if ((ac->clabel->row == r) &&
! 452: (ac->clabel->column == c) &&
! 453: (ac->clabel->mod_counter == mod_counter)) {
! 454: /* It's this one... */
! 455: /*
! 456: * Flag it as 'used', so we don't
! 457: * free it later.
! 458: */
! 459: ac->flag = 1;
! 460: #if DEBUG
! 461: printf("Found: %s at %d,%d.\n",
! 462: ac->devname, r, c);
! 463: #endif /* DEBUG */
! 464:
! 465: break;
! 466: }
! 467: ac = ac->next;
! 468: }
! 469:
! 470: if (ac == NULL) {
! 471: /*
! 472: * We didn't find an exact match with a
! 473: * correct mod_counter above... Can we
! 474: * find one with an incorrect mod_counter
! 475: * to use instead ? (This one, if we find
! 476: * it, will be marked as failed once the
! 477: * set configures)
! 478: */
! 479:
! 480: ac = auto_config;
! 481: while(ac!=NULL) {
! 482: if (ac->clabel == NULL) {
! 483: /* Big-time bad news. */
! 484: goto fail;
! 485: }
! 486: if ((ac->clabel->row == r) &&
! 487: (ac->clabel->column == c)) {
! 488: /*
! 489: * It's this one...
! 490: * Flag it as 'used', so we
! 491: * don't free it later.
! 492: */
! 493: ac->flag = 1;
! 494: #if DEBUG
! 495: printf("Found(low mod_counter)"
! 496: ": %s at %d,%d.\n",
! 497: ac->devname, r, c);
! 498: #endif /* DEBUG */
! 499:
! 500: break;
! 501: }
! 502: ac = ac->next;
! 503: }
! 504: }
! 505:
! 506:
! 507:
! 508: if (ac!=NULL) {
! 509: /* Found it. Configure it... */
! 510: diskPtr->blockSize = ac->clabel->blockSize;
! 511: diskPtr->numBlocks = ac->clabel->numBlocks;
! 512: /*
! 513: * Note: rf_protectedSectors is already
! 514: * factored into numBlocks here.
! 515: */
! 516: raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
! 517: raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
! 518:
! 519: memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
! 520: ac->clabel, sizeof(*ac->clabel));
! 521: snprintf(diskPtr->devname,
! 522: sizeof diskPtr->devname, "/dev/%s",
! 523: ac->devname);
! 524:
! 525: /*
! 526: * Note the fact that this component was
! 527: * autoconfigured. You'll need this info
! 528: * later. Trust me :)
! 529: */
! 530: diskPtr->auto_configured = 1;
! 531: diskPtr->dev = ac->dev;
! 532:
! 533: /*
! 534: * We allow the user to specify that
! 535: * only a fraction of the disks should
! 536: * be used. This is just for debug: it
! 537: * speeds up the parity scan.
! 538: */
! 539:
! 540: diskPtr->numBlocks = diskPtr->numBlocks *
! 541: rf_sizePercentage / 100;
! 542:
! 543: /*
! 544: * XXX These will get set multiple times,
! 545: * but since we're autoconfiguring, they'd
! 546: * better be always the same each time !
! 547: * If not, this is the least of your worries.
! 548: */
! 549:
! 550: bs = diskPtr->blockSize;
! 551: min_numblks = diskPtr->numBlocks;
! 552:
! 553: /*
! 554: * This gets done multiple times, but that's
! 555: * fine -- the serial number will be the same
! 556: * for all components, guaranteed.
! 557: */
! 558: raidPtr->serial_number =
! 559: ac->clabel->serial_number;
! 560: /*
! 561: * Check the last time the label
! 562: * was modified.
! 563: */
! 564: if (ac->clabel->mod_counter != mod_counter) {
! 565: /*
! 566: * Even though we've filled in all
! 567: * of the above, we don't trust
! 568: * this component since it's
! 569: * modification counter is not
! 570: * in sync with the rest, and we really
! 571: * consider it to be failed.
! 572: */
! 573: disks[r][c].status = rf_ds_failed;
! 574: numFailuresThisRow++;
! 575: } else {
! 576: if (ac->clabel->clean != RF_RAID_CLEAN)
! 577: {
! 578: parity_good = RF_RAID_DIRTY;
! 579: }
! 580: }
! 581: } else {
! 582: /*
! 583: * Didn't find it at all !!!
! 584: * Component must really be dead.
! 585: */
! 586: disks[r][c].status = rf_ds_failed;
! 587: snprintf(disks[r][c].devname,
! 588: sizeof disks[r][c].devname, "component%d",
! 589: r * raidPtr->numCol + c);
! 590: numFailuresThisRow++;
! 591: }
! 592: }
! 593: /* XXX Fix for n-fault tolerant. */
! 594: /*
! 595: * XXX This should probably check to see how many failures
! 596: * we can handle for this configuration !
! 597: */
! 598: if (numFailuresThisRow > 0)
! 599: raidPtr->status[r] = rf_rs_degraded;
! 600: }
! 601:
! 602: /* Close the device for the ones that didn't get used. */
! 603:
! 604: ac = auto_config;
! 605: while(ac != NULL) {
! 606: if (ac->flag == 0) {
! 607: VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
! 608: vput(ac->vp);
! 609: ac->vp = NULL;
! 610: #if DEBUG
! 611: printf("Released %s from auto-config set.\n",
! 612: ac->devname);
! 613: #endif /* DEBUG */
! 614: }
! 615: ac = ac->next;
! 616: }
! 617:
! 618: raidPtr->mod_counter = mod_counter;
! 619:
! 620: /* Note the state of the parity, if any. */
! 621: raidPtr->parity_good = parity_good;
! 622: raidPtr->sectorsPerDisk = min_numblks;
! 623: raidPtr->logBytesPerSector = ffs(bs) - 1;
! 624: raidPtr->bytesPerSector = bs;
! 625: raidPtr->sectorMask = bs - 1;
! 626: return (0);
! 627:
! 628: fail:
! 629:
! 630: rf_UnconfigureVnodes(raidPtr);
! 631:
! 632: return (ret);
! 633:
! 634: }
! 635:
! 636: /* Configure a single disk in the array. */
! 637: int
! 638: rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
! 639: RF_RowCol_t row, RF_RowCol_t col)
! 640: {
! 641: char *p;
! 642: int retcode;
! 643:
! 644: struct partinfo dpart;
! 645: struct vnode *vp;
! 646: struct vattr va;
! 647: struct proc *proc;
! 648: int error;
! 649:
! 650: retcode = 0;
! 651: p = rf_find_non_white(buf);
! 652: if (*buf != '\0' && p[strlen(p) - 1] == '\n') {
! 653: /* Strip off the newline. */
! 654: p[strlen(p) - 1] = '\0';
! 655: }
! 656: (void) strlcpy(diskPtr->devname, p, sizeof diskPtr->devname);
! 657:
! 658: proc = raidPtr->engine_thread;
! 659:
! 660: /* Let's start by claiming the component is fine and well... */
! 661: diskPtr->status = rf_ds_optimal;
! 662:
! 663: raidPtr->raid_cinfo[row][col].ci_vp = NULL;
! 664: raidPtr->raid_cinfo[row][col].ci_dev = NULL;
! 665:
! 666: error = raidlookup(diskPtr->devname, curproc, &vp);
! 667: if (error) {
! 668: printf("raidlookup on device: %s failed !\n", diskPtr->devname);
! 669: if (error == ENXIO) {
! 670: /* The component isn't there... Must be dead :-( */
! 671: diskPtr->status = rf_ds_failed;
! 672: } else {
! 673: return (error);
! 674: }
! 675: }
! 676: if (diskPtr->status == rf_ds_optimal) {
! 677:
! 678: if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
! 679: return (error);
! 680: }
! 681: error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, FREAD,
! 682: proc->p_ucred, proc);
! 683: if (error) {
! 684: return (error);
! 685: }
! 686: diskPtr->blockSize = dpart.disklab->d_secsize;
! 687:
! 688: diskPtr->numBlocks = DL_GETPSIZE(dpart.part) - rf_protectedSectors;
! 689: diskPtr->partitionSize = DL_GETPSIZE(dpart.part);
! 690:
! 691: raidPtr->raid_cinfo[row][col].ci_vp = vp;
! 692: raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
! 693:
! 694: /* This component was not automatically configured. */
! 695: diskPtr->auto_configured = 0;
! 696: diskPtr->dev = va.va_rdev;
! 697:
! 698: /*
! 699: * We allow the user to specify that only a fraction of the
! 700: * disks should be used. This is just for debug: it speeds up
! 701: * the parity scan.
! 702: */
! 703: diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage
! 704: / 100;
! 705: }
! 706: return (0);
! 707: }
! 708:
! 709: void
! 710: rf_print_label_status(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
! 711: RF_ComponentLabel_t *ci_label)
! 712: {
! 713:
! 714: printf("raid%d: Component %s being configured at row: %d col: %d\n",
! 715: raidPtr->raidid, dev_name, row, column);
! 716: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
! 717: ci_label->row, ci_label->column, ci_label->num_rows,
! 718: ci_label->num_columns);
! 719: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
! 720: ci_label->version, ci_label->serial_number, ci_label->mod_counter);
! 721: printf(" Clean: %s Status: %d\n",
! 722: ci_label->clean ? "Yes" : "No", ci_label->status);
! 723: }
! 724:
! 725: int
! 726: rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
! 727: RF_ComponentLabel_t *ci_label, int serial_number, int mod_counter)
! 728: {
! 729: int fatal_error = 0;
! 730:
! 731: if (serial_number != ci_label->serial_number) {
! 732: printf("%s has a different serial number: %d %d.\n",
! 733: dev_name, serial_number, ci_label->serial_number);
! 734: fatal_error = 1;
! 735: }
! 736: if (mod_counter != ci_label->mod_counter) {
! 737: printf("%s has a different modfication count: %d %d.\n",
! 738: dev_name, mod_counter, ci_label->mod_counter);
! 739: }
! 740:
! 741: if (row != ci_label->row) {
! 742: printf("Row out of alignment for: %s.\n", dev_name);
! 743: fatal_error = 1;
! 744: }
! 745: if (column != ci_label->column) {
! 746: printf("Column out of alignment for: %s.\n", dev_name);
! 747: fatal_error = 1;
! 748: }
! 749: if (raidPtr->numRow != ci_label->num_rows) {
! 750: printf("Number of rows do not match for: %s.\n", dev_name);
! 751: fatal_error = 1;
! 752: }
! 753: if (raidPtr->numCol != ci_label->num_columns) {
! 754: printf("Number of columns do not match for: %s.\n", dev_name);
! 755: fatal_error = 1;
! 756: }
! 757: if (ci_label->clean == 0) {
! 758: /* It's not clean, but that's not fatal. */
! 759: printf("%s is not clean !\n", dev_name);
! 760: }
! 761: return(fatal_error);
! 762: }
! 763:
! 764:
! 765: /*
! 766: *
! 767: * rf_CheckLabels() - Check all the component labels for consistency.
! 768: * Return an error if there is anything major amiss.
! 769: *
! 770: */
! 771:
! 772: int
! 773: rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
! 774: {
! 775: int r, c;
! 776: char *dev_name;
! 777: RF_ComponentLabel_t *ci_label;
! 778: int serial_number = 0;
! 779: int mod_number = 0;
! 780: int fatal_error = 0;
! 781: int mod_values[4];
! 782: int mod_count[4];
! 783: int ser_values[4];
! 784: int ser_count[4];
! 785: int num_ser;
! 786: int num_mod;
! 787: int i;
! 788: int found;
! 789: int hosed_row;
! 790: int hosed_column;
! 791: int too_fatal;
! 792: int parity_good;
! 793: int force;
! 794:
! 795: hosed_row = -1;
! 796: hosed_column = -1;
! 797: too_fatal = 0;
! 798: force = cfgPtr->force;
! 799:
! 800: /*
! 801: * We're going to try to be a little intelligent here. If one
! 802: * component's label is bogus, and we can identify that it's the
! 803: * *only* one that's gone, we'll mark it as "failed" and allow
! 804: * the configuration to proceed. This will be the *only* case
! 805: * that we'll proceed if there would be (otherwise) fatal errors.
! 806: *
! 807: * Basically we simply keep a count of how many components had
! 808: * what serial number. If all but one agree, we simply mark
! 809: * the disagreeing component as being failed, and allow
! 810: * things to come up "normally".
! 811: *
! 812: * We do this first for serial numbers, and then for "mod_counter".
! 813: *
! 814: */
! 815:
! 816: num_ser = 0;
! 817: num_mod = 0;
! 818: for (r = 0; r < raidPtr->numRow && !fatal_error; r++) {
! 819: for (c = 0; c < raidPtr->numCol; c++) {
! 820: ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
! 821: found = 0;
! 822: for(i = 0; i < num_ser; i++) {
! 823: if (ser_values[i] == ci_label->serial_number) {
! 824: ser_count[i]++;
! 825: found = 1;
! 826: break;
! 827: }
! 828: }
! 829: if (!found) {
! 830: ser_values[num_ser] = ci_label->serial_number;
! 831: ser_count[num_ser] = 1;
! 832: num_ser++;
! 833: if (num_ser > 2) {
! 834: fatal_error = 1;
! 835: break;
! 836: }
! 837: }
! 838: found = 0;
! 839: for(i = 0; i < num_mod; i++) {
! 840: if (mod_values[i] == ci_label->mod_counter) {
! 841: mod_count[i]++;
! 842: found = 1;
! 843: break;
! 844: }
! 845: }
! 846: if (!found) {
! 847: mod_values[num_mod] = ci_label->mod_counter;
! 848: mod_count[num_mod] = 1;
! 849: num_mod++;
! 850: if (num_mod > 2) {
! 851: fatal_error = 1;
! 852: break;
! 853: }
! 854: }
! 855: }
! 856: }
! 857: #if DEBUG
! 858: printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
! 859: for(i = 0; i < num_ser; i++) {
! 860: printf("%d %d\n", ser_values[i], ser_count[i]);
! 861: }
! 862: printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
! 863: for(i = 0; i < num_mod; i++) {
! 864: printf("%d %d\n", mod_values[i], mod_count[i]);
! 865: }
! 866: #endif /* DEBUG */
! 867: serial_number = ser_values[0];
! 868: if (num_ser == 2) {
! 869: if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
! 870: /* Locate the maverick component. */
! 871: if (ser_count[1] > ser_count[0]) {
! 872: serial_number = ser_values[1];
! 873: }
! 874: for (r = 0; r < raidPtr->numRow; r++) {
! 875: for (c = 0; c < raidPtr->numCol; c++) {
! 876: ci_label =
! 877: &raidPtr->raid_cinfo[r][c].ci_label;
! 878: if (serial_number !=
! 879: ci_label->serial_number) {
! 880: hosed_row = r;
! 881: hosed_column = c;
! 882: break;
! 883: }
! 884: }
! 885: }
! 886: printf("Hosed component: %s.\n",
! 887: &cfgPtr->devnames[hosed_row][hosed_column][0]);
! 888: if (!force) {
! 889: /*
! 890: * We'll fail this component, as if there are
! 891: * other major errors, we aren't forcing things
! 892: * and we'll abort the config anyways.
! 893: */
! 894: raidPtr->Disks[hosed_row][hosed_column].status
! 895: = rf_ds_failed;
! 896: raidPtr->numFailures++;
! 897: raidPtr->status[hosed_row] = rf_rs_degraded;
! 898: }
! 899: } else {
! 900: too_fatal = 1;
! 901: }
! 902: if (cfgPtr->parityConfig == '0') {
! 903: /*
! 904: * We've identified two different serial numbers.
! 905: * RAID 0 can't cope with that, so we'll punt.
! 906: */
! 907: too_fatal = 1;
! 908: }
! 909:
! 910: }
! 911:
! 912: /*
! 913: * Record the serial number for later. If we bail later, setting
! 914: * this doesn't matter, otherwise we've got the best guess at the
! 915: * correct serial number.
! 916: */
! 917: raidPtr->serial_number = serial_number;
! 918:
! 919: mod_number = mod_values[0];
! 920: if (num_mod == 2) {
! 921: if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
! 922: /* Locate the maverick component. */
! 923: if (mod_count[1] > mod_count[0]) {
! 924: mod_number = mod_values[1];
! 925: } else if (mod_count[1] < mod_count[0]) {
! 926: mod_number = mod_values[0];
! 927: } else {
! 928: /*
! 929: * Counts of different modification values
! 930: * are the same. Assume greater value is
! 931: * the correct one, all other things
! 932: * considered.
! 933: */
! 934: if (mod_values[0] > mod_values[1]) {
! 935: mod_number = mod_values[0];
! 936: } else {
! 937: mod_number = mod_values[1];
! 938: }
! 939:
! 940: }
! 941: for (r = 0; r < raidPtr->numRow && !too_fatal; r++) {
! 942: for (c = 0; c < raidPtr->numCol; c++) {
! 943: ci_label =
! 944: &raidPtr->raid_cinfo[r][c].ci_label;
! 945: if (mod_number !=
! 946: ci_label->mod_counter) {
! 947: if ((hosed_row == r) &&
! 948: (hosed_column == c)) {
! 949: /*
! 950: * Same one. Can
! 951: * deal with it.
! 952: */
! 953: } else {
! 954: hosed_row = r;
! 955: hosed_column = c;
! 956: if (num_ser != 1) {
! 957: too_fatal = 1;
! 958: break;
! 959: }
! 960: }
! 961: }
! 962: }
! 963: }
! 964: printf("Hosed component: %s.\n",
! 965: &cfgPtr->devnames[hosed_row][hosed_column][0]);
! 966: if (!force) {
! 967: /*
! 968: * We'll fail this component, as if there are
! 969: * other major errors, we aren't forcing things
! 970: * and we'll abort the config anyways.
! 971: */
! 972: if (raidPtr
! 973: ->Disks[hosed_row][hosed_column].status !=
! 974: rf_ds_failed) {
! 975: raidPtr->Disks[hosed_row]
! 976: [hosed_column].status =
! 977: rf_ds_failed;
! 978: raidPtr->numFailures++;
! 979: raidPtr->status[hosed_row] =
! 980: rf_rs_degraded;
! 981: }
! 982: }
! 983: } else {
! 984: too_fatal = 1;
! 985: }
! 986: if (cfgPtr->parityConfig == '0') {
! 987: /*
! 988: * We've identified two different mod counters.
! 989: * RAID 0 can't cope with that, so we'll punt.
! 990: */
! 991: too_fatal = 1;
! 992: }
! 993: }
! 994:
! 995: raidPtr->mod_counter = mod_number;
! 996:
! 997: if (too_fatal) {
! 998: /*
! 999: * We've had both a serial number mismatch, and a mod_counter
! 1000: * mismatch -- and they involved two different components !!!
! 1001: * Bail -- make things fail so that the user must force
! 1002: * the issue...
! 1003: */
! 1004: hosed_row = -1;
! 1005: hosed_column = -1;
! 1006: }
! 1007:
! 1008: if (num_ser > 2) {
! 1009: printf("raid%d: Too many different serial numbers !\n",
! 1010: raidPtr->raidid);
! 1011: }
! 1012:
! 1013: if (num_mod > 2) {
! 1014: printf("raid%d: Too many different mod counters !\n",
! 1015: raidPtr->raidid);
! 1016: }
! 1017:
! 1018: /*
! 1019: * We start by assuming the parity will be good, and flee from
! 1020: * that notion at the slightest sign of trouble.
! 1021: */
! 1022:
! 1023: parity_good = RF_RAID_CLEAN;
! 1024: for (r = 0; r < raidPtr->numRow; r++) {
! 1025: for (c = 0; c < raidPtr->numCol; c++) {
! 1026: dev_name = &cfgPtr->devnames[r][c][0];
! 1027: ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
! 1028:
! 1029: if ((r == hosed_row) && (c == hosed_column)) {
! 1030: printf("raid%d: Ignoring %s.\n",
! 1031: raidPtr->raidid, dev_name);
! 1032: } else {
! 1033: rf_print_label_status(raidPtr, r, c, dev_name,
! 1034: ci_label);
! 1035: if (rf_check_label_vitals(raidPtr, r, c,
! 1036: dev_name, ci_label, serial_number,
! 1037: mod_number)) {
! 1038: fatal_error = 1;
! 1039: }
! 1040: if (ci_label->clean != RF_RAID_CLEAN) {
! 1041: parity_good = RF_RAID_DIRTY;
! 1042: }
! 1043: }
! 1044: }
! 1045: }
! 1046: if (fatal_error) {
! 1047: parity_good = RF_RAID_DIRTY;
! 1048: }
! 1049:
! 1050: /* We note the state of the parity. */
! 1051: raidPtr->parity_good = parity_good;
! 1052:
! 1053: return(fatal_error);
! 1054: }
! 1055:
! 1056: int
! 1057: rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
! 1058: {
! 1059: RF_RaidDisk_t *disks;
! 1060: RF_DiskQueue_t *spareQueues;
! 1061: int ret;
! 1062: unsigned int bs;
! 1063: int spare_number;
! 1064:
! 1065: #if 0
! 1066: printf("Just in rf_add_hot_spare: %d.\n", raidPtr->numSpare);
! 1067: printf("Num col: %d.\n", raidPtr->numCol);
! 1068: #endif
! 1069: if (raidPtr->numSpare >= RF_MAXSPARE) {
! 1070: RF_ERRORMSG1("Too many spares: %d.\n", raidPtr->numSpare);
! 1071: return(EINVAL);
! 1072: }
! 1073:
! 1074: RF_LOCK_MUTEX(raidPtr->mutex);
! 1075:
! 1076: /* The beginning of the spares... */
! 1077: disks = &raidPtr->Disks[0][raidPtr->numCol];
! 1078:
! 1079: spare_number = raidPtr->numSpare;
! 1080:
! 1081: ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
! 1082: &disks[spare_number], 0, raidPtr->numCol + spare_number);
! 1083:
! 1084: if (ret)
! 1085: goto fail;
! 1086: if (disks[spare_number].status != rf_ds_optimal) {
! 1087: RF_ERRORMSG1("Warning: spare disk %s failed TUR.\n",
! 1088: sparePtr->component_name);
! 1089: ret = EINVAL;
! 1090: goto fail;
! 1091: } else {
! 1092: disks[spare_number].status = rf_ds_spare;
! 1093: DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d"
! 1094: " (%ld MB).\n", spare_number, disks[spare_number].devname,
! 1095: (long int) disks[spare_number].numBlocks,
! 1096: disks[spare_number].blockSize,
! 1097: (long int) disks[spare_number].numBlocks *
! 1098: disks[spare_number].blockSize / 1024 / 1024);
! 1099: }
! 1100:
! 1101:
! 1102: /* Check sizes and block sizes on the spare disk. */
! 1103: bs = 1 << raidPtr->logBytesPerSector;
! 1104: if (disks[spare_number].blockSize != bs) {
! 1105: RF_ERRORMSG3("Block size of %d on spare disk %s is not"
! 1106: " the same as on other disks (%d).\n",
! 1107: disks[spare_number].blockSize,
! 1108: disks[spare_number].devname, bs);
! 1109: ret = EINVAL;
! 1110: goto fail;
! 1111: }
! 1112: if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
! 1113: RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small to serve"
! 1114: " as a spare (need %llu blocks).\n",
! 1115: disks[spare_number].devname, disks[spare_number].numBlocks,
! 1116: raidPtr->sectorsPerDisk);
! 1117: ret = EINVAL;
! 1118: goto fail;
! 1119: } else {
! 1120: if (disks[spare_number].numBlocks >
! 1121: raidPtr->sectorsPerDisk) {
! 1122: RF_ERRORMSG2("Warning: truncating spare disk %s to %llu"
! 1123: " blocks.\n", disks[spare_number].devname,
! 1124: raidPtr->sectorsPerDisk);
! 1125:
! 1126: disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
! 1127: }
! 1128: }
! 1129:
! 1130: spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
! 1131: ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number],
! 1132: 0, raidPtr->numCol + spare_number, raidPtr->qType,
! 1133: raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol +
! 1134: spare_number].dev, raidPtr->maxOutstanding,
! 1135: &raidPtr->shutdownList, raidPtr->cleanupList);
! 1136:
! 1137:
! 1138: raidPtr->numSpare++;
! 1139: RF_UNLOCK_MUTEX(raidPtr->mutex);
! 1140: return (0);
! 1141:
! 1142: fail:
! 1143: RF_UNLOCK_MUTEX(raidPtr->mutex);
! 1144: return(ret);
! 1145: }
! 1146:
! 1147: int
! 1148: rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
! 1149: {
! 1150: int spare_number;
! 1151:
! 1152: if (raidPtr->numSpare == 0) {
! 1153: printf("No spares to remove !\n");
! 1154: return(EINVAL);
! 1155: }
! 1156:
! 1157: spare_number = sparePtr->column;
! 1158:
! 1159: return(EINVAL); /* XXX Not implemented yet. */
! 1160: #if 0
! 1161: if (spare_number < 0 || spare_number > raidPtr->numSpare) {
! 1162: return(EINVAL);
! 1163: }
! 1164:
! 1165: /* Verify that this spare isn't in use... */
! 1166:
! 1167: /* It's gone... */
! 1168:
! 1169: raidPtr->numSpare--;
! 1170:
! 1171: return (0);
! 1172: #endif
! 1173: }
! 1174:
! 1175: int
! 1176: rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
! 1177: {
! 1178: RF_RaidDisk_t *disks;
! 1179:
! 1180: if ((component->row < 0) ||
! 1181: (component->row >= raidPtr->numRow) ||
! 1182: (component->column < 0) ||
! 1183: (component->column >= raidPtr->numCol)) {
! 1184: return(EINVAL);
! 1185: }
! 1186:
! 1187: disks = &raidPtr->Disks[component->row][component->column];
! 1188:
! 1189: /* 1. This component must be marked as 'failed'. */
! 1190:
! 1191: return(EINVAL); /* Not implemented yet. */
! 1192: }
! 1193:
! 1194: int
! 1195: rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
! 1196: {
! 1197:
! 1198: /*
! 1199: * Issues here include how to 'move' this in if there is IO
! 1200: * taking place (e.g. component queues and such).
! 1201: */
! 1202:
! 1203: return(EINVAL); /* Not implemented yet. */
! 1204: }
CVSweb