Annotation of sys/dev/raidframe/rf_copyback.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_copyback.c,v 1.8 2007/06/05 00:38:22 deraadt Exp $ */
! 2: /* $NetBSD: rf_copyback.c,v 1.14 2000/03/07 02:59:50 oster Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995 Carnegie-Mellon University.
! 6: * All rights reserved.
! 7: *
! 8: * Author: Mark Holland
! 9: *
! 10: * Permission to use, copy, modify and distribute this software and
! 11: * its documentation is hereby granted, provided that both the copyright
! 12: * notice and this permission notice appear in all copies of the
! 13: * software, derivative works or modified versions, and any portions
! 14: * thereof, and that both notices appear in supporting documentation.
! 15: *
! 16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 19: *
! 20: * Carnegie Mellon requests users of this software to return to
! 21: *
! 22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 23: * School of Computer Science
! 24: * Carnegie Mellon University
! 25: * Pittsburgh PA 15213-3890
! 26: *
! 27: * any improvements or extensions that they make and grant Carnegie the
! 28: * rights to redistribute these changes.
! 29: */
! 30:
! 31:
! 32: /*****************************************************************************
! 33: *
! 34: * copyback.c -- Code to copy reconstructed data back from spare space to
! 35: * the replaced disk.
! 36: *
! 37: * The code operates using callbacks on the I/Os to continue with the next
! 38: * unit to be copied back. We do this because a simple loop containing
! 39: * blocking I/Os will not work in the simulator.
! 40: *
! 41: *****************************************************************************/
! 42:
! 43: #include "rf_types.h"
! 44:
! 45: #include <sys/time.h>
! 46: #include <sys/buf.h>
! 47: #include "rf_raid.h"
! 48: #include "rf_mcpair.h"
! 49: #include "rf_acctrace.h"
! 50: #include "rf_etimer.h"
! 51: #include "rf_general.h"
! 52: #include "rf_utils.h"
! 53: #include "rf_copyback.h"
! 54: #include "rf_decluster.h"
! 55: #include "rf_driver.h"
! 56: #include "rf_shutdown.h"
! 57: #include "rf_kintf.h"
! 58:
! 59: #define RF_COPYBACK_DATA 0
! 60: #define RF_COPYBACK_PARITY 1
! 61:
! 62: int rf_copyback_in_progress;
! 63:
! 64: int rf_CopybackReadDoneProc(RF_CopybackDesc_t *, int);
! 65: int rf_CopybackWriteDoneProc(RF_CopybackDesc_t *, int);
! 66: void rf_CopybackOne(RF_CopybackDesc_t *, int, RF_RaidAddr_t,
! 67: RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t);
! 68: void rf_CopybackComplete(RF_CopybackDesc_t *, int);
! 69:
! 70: int
! 71: rf_ConfigureCopyback(RF_ShutdownList_t **listp)
! 72: {
! 73: rf_copyback_in_progress = 0;
! 74: return (0);
! 75: }
! 76:
! 77: #include <sys/types.h>
! 78: #include <sys/param.h>
! 79: #include <sys/systm.h>
! 80: #include <sys/proc.h>
! 81: #include <sys/ioctl.h>
! 82: #include <sys/fcntl.h>
! 83: #ifdef __NETBSD__
! 84: #include <sys/vnode.h>
! 85: #endif
! 86:
! 87:
! 88: /* Do a complete copyback. */
! 89: void
! 90: rf_CopybackReconstructedData(RF_Raid_t *raidPtr)
! 91: {
! 92: RF_ComponentLabel_t c_label;
! 93: int done, retcode;
! 94: RF_CopybackDesc_t *desc;
! 95: RF_RowCol_t frow, fcol;
! 96: RF_RaidDisk_t *badDisk;
! 97: char *databuf;
! 98:
! 99: struct partinfo dpart;
! 100: struct vnode *vp;
! 101: struct vattr va;
! 102: struct proc *proc;
! 103:
! 104: int ac;
! 105:
! 106: done = 0;
! 107: fcol = 0;
! 108: for (frow = 0; frow < raidPtr->numRow; frow++) {
! 109: for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
! 110: if (raidPtr->Disks[frow][fcol].status ==
! 111: rf_ds_dist_spared ||
! 112: raidPtr->Disks[frow][fcol].status ==
! 113: rf_ds_spared) {
! 114: done = 1;
! 115: break;
! 116: }
! 117: }
! 118: if (done)
! 119: break;
! 120: }
! 121:
! 122: if (frow == raidPtr->numRow) {
! 123: printf("COPYBACK: No disks need copyback.\n");
! 124: return;
! 125: }
! 126: badDisk = &raidPtr->Disks[frow][fcol];
! 127:
! 128: proc = raidPtr->engine_thread;
! 129:
! 130: /*
! 131: * This device may have been opened successfully the first time.
! 132: * Close it before trying to open it again.
! 133: */
! 134:
! 135: if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) {
! 136: printf("Close the opened device: %s.\n",
! 137: raidPtr->Disks[frow][fcol].devname);
! 138: vp = raidPtr->raid_cinfo[frow][fcol].ci_vp;
! 139: ac = raidPtr->Disks[frow][fcol].auto_configured;
! 140: rf_close_component(raidPtr, vp, ac);
! 141: raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL;
! 142:
! 143: }
! 144: /* Note that this disk was *not* auto_configured (any longer). */
! 145: raidPtr->Disks[frow][fcol].auto_configured = 0;
! 146:
! 147: printf("About to (re-)open the device: %s.\n",
! 148: raidPtr->Disks[frow][fcol].devname);
! 149:
! 150: retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp);
! 151:
! 152: if (retcode) {
! 153: printf("COPYBACK: raidlookup on device: %s failed: %d !\n",
! 154: raidPtr->Disks[frow][fcol].devname, retcode);
! 155:
! 156: /*
! 157: * XXX The component isn't responding properly... Must be
! 158: * still dead :-(
! 159: */
! 160: return;
! 161:
! 162: } else {
! 163:
! 164: /*
! 165: * Ok, so we can at least do a lookup...
! 166: * How about actually getting a vp for it ?
! 167: */
! 168:
! 169: if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0)
! 170: {
! 171: return;
! 172: }
! 173: retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) &dpart, FREAD,
! 174: proc->p_ucred, proc);
! 175: if (retcode) {
! 176: return;
! 177: }
! 178: raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize;
! 179:
! 180: raidPtr->Disks[frow][fcol].numBlocks = DL_GETPSIZE(dpart.part) -
! 181: rf_protectedSectors;
! 182:
! 183: raidPtr->raid_cinfo[frow][fcol].ci_vp = vp;
! 184: raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev;
! 185:
! 186: /* XXX Or the above ? */
! 187: raidPtr->Disks[frow][fcol].dev = va.va_rdev;
! 188:
! 189: /*
! 190: * We allow the user to specify that only a fraction of the
! 191: * disks should be used this is just for debug: it speeds up
! 192: * the parity scan.
! 193: */
! 194: raidPtr->Disks[frow][fcol].numBlocks =
! 195: raidPtr->Disks[frow][fcol].numBlocks *
! 196: rf_sizePercentage / 100;
! 197: }
! 198: #if 0
! 199: /* This is the way it was done before the CAM stuff was removed. */
! 200:
! 201: if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) {
! 202: printf("COPYBACK: unable to extract bus, target, lun from"
! 203: " devname %s.\n", badDisk->devname);
! 204: return;
! 205: }
! 206: /*
! 207: * TUR the disk that's marked as bad to be sure that it's actually
! 208: * alive.
! 209: */
! 210: rf_SCSI_AllocTUR(&tur_op);
! 211: retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev);
! 212: rf_SCSI_FreeDiskOp(tur_op, 0);
! 213: #endif
! 214:
! 215: if (retcode) {
! 216: printf("COPYBACK: target disk failed TUR.\n");
! 217: return;
! 218: }
! 219: /* Get a buffer to hold one SU. */
! 220: RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr,
! 221: raidPtr->Layout.sectorsPerStripeUnit), (char *));
! 222:
! 223: /* Create a descriptor. */
! 224: RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
! 225: desc->raidPtr = raidPtr;
! 226: desc->status = 0;
! 227: desc->frow = frow;
! 228: desc->fcol = fcol;
! 229: desc->spRow = badDisk->spareRow;
! 230: desc->spCol = badDisk->spareCol;
! 231: desc->stripeAddr = 0;
! 232: desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
! 233: desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit *
! 234: raidPtr->Layout.numDataCol;
! 235: desc->databuf = databuf;
! 236: desc->mcpair = rf_AllocMCPair();
! 237:
! 238: printf("COPYBACK: Quiescing the array.\n");
! 239: /*
! 240: * Quiesce the array, since we don't want to code support for user
! 241: * accs here.
! 242: */
! 243: rf_SuspendNewRequestsAndWait(raidPtr);
! 244:
! 245: /* Adjust state of the array and of the disks. */
! 246: RF_LOCK_MUTEX(raidPtr->mutex);
! 247: raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal;
! 248: raidPtr->status[desc->frow] = rf_rs_optimal;
! 249: rf_copyback_in_progress = 1; /* Debug only. */
! 250: RF_UNLOCK_MUTEX(raidPtr->mutex);
! 251:
! 252: printf("COPYBACK: Beginning\n");
! 253: RF_GETTIME(desc->starttime);
! 254: rf_ContinueCopyback(desc);
! 255:
! 256: /*
! 257: * Data has been restored.
! 258: * Fix up the component label.
! 259: * Don't actually need the read here.
! 260: */
! 261: raidread_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev,
! 262: raidPtr->raid_cinfo[frow][fcol].ci_vp,
! 263: &c_label);
! 264:
! 265: raid_init_component_label(raidPtr, &c_label);
! 266:
! 267: c_label.row = frow;
! 268: c_label.column = fcol;
! 269:
! 270: raidwrite_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev,
! 271: raidPtr->raid_cinfo[frow][fcol].ci_vp,
! 272: &c_label);
! 273: }
! 274:
! 275:
! 276: /*
! 277: * Invoked via callback after a copyback I/O has completed to
! 278: * continue on with the next one.
! 279: */
! 280: void
! 281: rf_ContinueCopyback(RF_CopybackDesc_t *desc)
! 282: {
! 283: RF_SectorNum_t testOffs, stripeAddr;
! 284: RF_Raid_t *raidPtr = desc->raidPtr;
! 285: RF_RaidAddr_t addr;
! 286: RF_RowCol_t testRow, testCol;
! 287: int old_pctg, new_pctg, done;
! 288: struct timeval t, diff;
! 289:
! 290: old_pctg = (-1);
! 291: while (1) {
! 292: stripeAddr = desc->stripeAddr;
! 293: desc->raidPtr->copyback_stripes_done = stripeAddr /
! 294: desc->sectPerStripe;
! 295: if (rf_prReconSched) {
! 296: old_pctg = 100 * desc->stripeAddr /
! 297: raidPtr->totalSectors;
! 298: }
! 299: desc->stripeAddr += desc->sectPerStripe;
! 300: if (rf_prReconSched) {
! 301: new_pctg = 100 * desc->stripeAddr /
! 302: raidPtr->totalSectors;
! 303: if (new_pctg != old_pctg) {
! 304: RF_GETTIME(t);
! 305: RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
! 306: printf("%d %d.%06d\n", new_pctg,
! 307: (int) diff.tv_sec, (int) diff.tv_usec);
! 308: }
! 309: }
! 310: if (stripeAddr >= raidPtr->totalSectors) {
! 311: rf_CopybackComplete(desc, 0);
! 312: return;
! 313: }
! 314: /* Walk through the current stripe, su-by-su. */
! 315: for (done = 0, addr = stripeAddr;
! 316: addr < stripeAddr + desc->sectPerStripe;
! 317: addr += desc->sectPerSU) {
! 318:
! 319: /* Map the SU, disallowing remap to spare space. */
! 320: (raidPtr->Layout.map->MapSector) (raidPtr, addr,
! 321: &testRow, &testCol, &testOffs, RF_DONT_REMAP);
! 322:
! 323: if (testRow == desc->frow && testCol == desc->fcol) {
! 324: rf_CopybackOne(desc, RF_COPYBACK_DATA, addr,
! 325: testRow, testCol, testOffs);
! 326: done = 1;
! 327: break;
! 328: }
! 329: }
! 330:
! 331: if (!done) {
! 332: /*
! 333: * We didn't find the failed disk in the data part,
! 334: * check parity.
! 335: */
! 336:
! 337: /*
! 338: * Map the parity for this stripe, disallowing remap
! 339: * to spare space.
! 340: */
! 341: (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr,
! 342: &testRow, &testCol, &testOffs, RF_DONT_REMAP);
! 343:
! 344: if (testRow == desc->frow && testCol == desc->fcol) {
! 345: rf_CopybackOne(desc, RF_COPYBACK_PARITY,
! 346: stripeAddr, testRow, testCol, testOffs);
! 347: }
! 348: }
! 349: /* Check to see if the last read/write pair failed. */
! 350: if (desc->status) {
! 351: rf_CopybackComplete(desc, 1);
! 352: return;
! 353: }
! 354: /*
! 355: * We didn't find any units to copy back in this stripe.
! 356: * Continue with the next one.
! 357: */
! 358: }
! 359: }
! 360:
! 361:
! 362: /* Copyback one unit. */
! 363: void
! 364: rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr,
! 365: RF_RowCol_t testRow, RF_RowCol_t testCol, RF_SectorNum_t testOffs)
! 366: {
! 367: RF_SectorCount_t sectPerSU = desc->sectPerSU;
! 368: RF_Raid_t *raidPtr = desc->raidPtr;
! 369: RF_RowCol_t spRow = desc->spRow;
! 370: RF_RowCol_t spCol = desc->spCol;
! 371: RF_SectorNum_t spOffs;
! 372:
! 373: /* Find the spare location for this SU. */
! 374: if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
! 375: if (typ == RF_COPYBACK_DATA)
! 376: raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow,
! 377: &spCol, &spOffs, RF_REMAP);
! 378: else
! 379: raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow,
! 380: &spCol, &spOffs, RF_REMAP);
! 381: } else {
! 382: spOffs = testOffs;
! 383: }
! 384:
! 385: /* Create reqs to read the old location & write the new. */
! 386: desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
! 387: sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int))
! 388: rf_CopybackReadDoneProc, desc, NULL, NULL, (void *) raidPtr,
! 389: RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
! 390: desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
! 391: sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int))
! 392: rf_CopybackWriteDoneProc, desc, NULL, NULL, (void *) raidPtr,
! 393: RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
! 394: desc->frow = testRow;
! 395: desc->fcol = testCol;
! 396:
! 397: /*
! 398: * Enqueue the read. The write will go out as part of the callback on
! 399: * the read. At user-level & in the kernel, wait for the read-write
! 400: * pair to complete. In the simulator, just return, since everything
! 401: * will happen as callbacks.
! 402: */
! 403:
! 404: RF_LOCK_MUTEX(desc->mcpair->mutex);
! 405: desc->mcpair->flag = 0;
! 406:
! 407: rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq,
! 408: RF_IO_NORMAL_PRIORITY);
! 409:
! 410: while (!desc->mcpair->flag) {
! 411: RF_WAIT_MCPAIR(desc->mcpair);
! 412: }
! 413: RF_UNLOCK_MUTEX(desc->mcpair->mutex);
! 414: rf_FreeDiskQueueData(desc->readreq);
! 415: rf_FreeDiskQueueData(desc->writereq);
! 416:
! 417: }
! 418:
! 419:
! 420: /*
! 421: * Called at interrupt context when the read has completed.
! 422: * Just send out the write.
! 423: */
! 424: int
! 425: rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status)
! 426: {
! 427: if (status) { /* Invoke the callback with bad status. */
! 428: printf("COPYBACK: copyback read failed. Aborting.\n");
! 429: (desc->writereq->CompleteFunc) (desc, -100);
! 430: } else {
! 431: rf_DiskIOEnqueue(&(desc->raidPtr
! 432: ->Queues[desc->frow][desc->fcol]),
! 433: desc->writereq, RF_IO_NORMAL_PRIORITY);
! 434: }
! 435: return (0);
! 436: }
! 437:
! 438:
! 439: /*
! 440: * Called at interrupt context when the write has completed.
! 441: * At user level & in the kernel, wake up the copyback thread.
! 442: * In the simulator, invoke the next copyback directly.
! 443: * Can't free diskqueuedata structs in the kernel because we're at
! 444: * interrupt context.
! 445: */
! 446: int
! 447: rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status)
! 448: {
! 449: if (status && status != -100) {
! 450: printf("COPYBACK: copyback write failed. Aborting.\n");
! 451: }
! 452: desc->status = status;
! 453: rf_MCPairWakeupFunc(desc->mcpair);
! 454: return (0);
! 455: }
! 456:
! 457:
! 458: /* Invoked when the copyback has completed. */
! 459: void
! 460: rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
! 461: {
! 462: RF_Raid_t *raidPtr = desc->raidPtr;
! 463: struct timeval t, diff;
! 464:
! 465: if (!status) {
! 466: RF_LOCK_MUTEX(raidPtr->mutex);
! 467: if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
! 468: RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
! 469: rf_FreeSpareTable(raidPtr);
! 470: } else {
! 471: raidPtr->Disks[desc->spRow][desc->spCol].status =
! 472: rf_ds_spare;
! 473: }
! 474: RF_UNLOCK_MUTEX(raidPtr->mutex);
! 475:
! 476: RF_GETTIME(t);
! 477: RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
! 478: printf("Copyback time was %d.%06d seconds.\n",
! 479: (int) diff.tv_sec, (int) diff.tv_usec);
! 480: } else
! 481: printf("COPYBACK: Failure.\n");
! 482:
! 483: RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
! 484: rf_FreeMCPair(desc->mcpair);
! 485: RF_Free(desc, sizeof(*desc));
! 486:
! 487: rf_copyback_in_progress = 0;
! 488: rf_ResumeNewRequests(raidPtr);
! 489: }
CVSweb