Annotation of sys/dev/raidframe/rf_copyback.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_copyback.c,v 1.8 2007/06/05 00:38:22 deraadt Exp $ */
2: /* $NetBSD: rf_copyback.c,v 1.14 2000/03/07 02:59:50 oster Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Carnegie-Mellon University.
6: * All rights reserved.
7: *
8: * Author: Mark Holland
9: *
10: * Permission to use, copy, modify and distribute this software and
11: * its documentation is hereby granted, provided that both the copyright
12: * notice and this permission notice appear in all copies of the
13: * software, derivative works or modified versions, and any portions
14: * thereof, and that both notices appear in supporting documentation.
15: *
16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19: *
20: * Carnegie Mellon requests users of this software to return to
21: *
22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23: * School of Computer Science
24: * Carnegie Mellon University
25: * Pittsburgh PA 15213-3890
26: *
27: * any improvements or extensions that they make and grant Carnegie the
28: * rights to redistribute these changes.
29: */
30:
31:
32: /*****************************************************************************
33: *
34: * copyback.c -- Code to copy reconstructed data back from spare space to
35: * the replaced disk.
36: *
37: * The code operates using callbacks on the I/Os to continue with the next
38: * unit to be copied back. We do this because a simple loop containing
39: * blocking I/Os will not work in the simulator.
40: *
41: *****************************************************************************/
42:
43: #include "rf_types.h"
44:
45: #include <sys/time.h>
46: #include <sys/buf.h>
47: #include "rf_raid.h"
48: #include "rf_mcpair.h"
49: #include "rf_acctrace.h"
50: #include "rf_etimer.h"
51: #include "rf_general.h"
52: #include "rf_utils.h"
53: #include "rf_copyback.h"
54: #include "rf_decluster.h"
55: #include "rf_driver.h"
56: #include "rf_shutdown.h"
57: #include "rf_kintf.h"
58:
59: #define RF_COPYBACK_DATA 0
60: #define RF_COPYBACK_PARITY 1
61:
62: int rf_copyback_in_progress;
63:
64: int rf_CopybackReadDoneProc(RF_CopybackDesc_t *, int);
65: int rf_CopybackWriteDoneProc(RF_CopybackDesc_t *, int);
66: void rf_CopybackOne(RF_CopybackDesc_t *, int, RF_RaidAddr_t,
67: RF_RowCol_t, RF_RowCol_t, RF_SectorNum_t);
68: void rf_CopybackComplete(RF_CopybackDesc_t *, int);
69:
70: int
71: rf_ConfigureCopyback(RF_ShutdownList_t **listp)
72: {
73: rf_copyback_in_progress = 0;
74: return (0);
75: }
76:
77: #include <sys/types.h>
78: #include <sys/param.h>
79: #include <sys/systm.h>
80: #include <sys/proc.h>
81: #include <sys/ioctl.h>
82: #include <sys/fcntl.h>
83: #ifdef __NETBSD__
84: #include <sys/vnode.h>
85: #endif
86:
87:
88: /* Do a complete copyback. */
89: void
90: rf_CopybackReconstructedData(RF_Raid_t *raidPtr)
91: {
92: RF_ComponentLabel_t c_label;
93: int done, retcode;
94: RF_CopybackDesc_t *desc;
95: RF_RowCol_t frow, fcol;
96: RF_RaidDisk_t *badDisk;
97: char *databuf;
98:
99: struct partinfo dpart;
100: struct vnode *vp;
101: struct vattr va;
102: struct proc *proc;
103:
104: int ac;
105:
106: done = 0;
107: fcol = 0;
108: for (frow = 0; frow < raidPtr->numRow; frow++) {
109: for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
110: if (raidPtr->Disks[frow][fcol].status ==
111: rf_ds_dist_spared ||
112: raidPtr->Disks[frow][fcol].status ==
113: rf_ds_spared) {
114: done = 1;
115: break;
116: }
117: }
118: if (done)
119: break;
120: }
121:
122: if (frow == raidPtr->numRow) {
123: printf("COPYBACK: No disks need copyback.\n");
124: return;
125: }
126: badDisk = &raidPtr->Disks[frow][fcol];
127:
128: proc = raidPtr->engine_thread;
129:
130: /*
131: * This device may have been opened successfully the first time.
132: * Close it before trying to open it again.
133: */
134:
135: if (raidPtr->raid_cinfo[frow][fcol].ci_vp != NULL) {
136: printf("Close the opened device: %s.\n",
137: raidPtr->Disks[frow][fcol].devname);
138: vp = raidPtr->raid_cinfo[frow][fcol].ci_vp;
139: ac = raidPtr->Disks[frow][fcol].auto_configured;
140: rf_close_component(raidPtr, vp, ac);
141: raidPtr->raid_cinfo[frow][fcol].ci_vp = NULL;
142:
143: }
144: /* Note that this disk was *not* auto_configured (any longer). */
145: raidPtr->Disks[frow][fcol].auto_configured = 0;
146:
147: printf("About to (re-)open the device: %s.\n",
148: raidPtr->Disks[frow][fcol].devname);
149:
150: retcode = raidlookup(raidPtr->Disks[frow][fcol].devname, proc, &vp);
151:
152: if (retcode) {
153: printf("COPYBACK: raidlookup on device: %s failed: %d !\n",
154: raidPtr->Disks[frow][fcol].devname, retcode);
155:
156: /*
157: * XXX The component isn't responding properly... Must be
158: * still dead :-(
159: */
160: return;
161:
162: } else {
163:
164: /*
165: * Ok, so we can at least do a lookup...
166: * How about actually getting a vp for it ?
167: */
168:
169: if ((retcode = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0)
170: {
171: return;
172: }
173: retcode = VOP_IOCTL(vp, DIOCGPART, (caddr_t) &dpart, FREAD,
174: proc->p_ucred, proc);
175: if (retcode) {
176: return;
177: }
178: raidPtr->Disks[frow][fcol].blockSize = dpart.disklab->d_secsize;
179:
180: raidPtr->Disks[frow][fcol].numBlocks = DL_GETPSIZE(dpart.part) -
181: rf_protectedSectors;
182:
183: raidPtr->raid_cinfo[frow][fcol].ci_vp = vp;
184: raidPtr->raid_cinfo[frow][fcol].ci_dev = va.va_rdev;
185:
186: /* XXX Or the above ? */
187: raidPtr->Disks[frow][fcol].dev = va.va_rdev;
188:
189: /*
190: * We allow the user to specify that only a fraction of the
191: * disks should be used this is just for debug: it speeds up
192: * the parity scan.
193: */
194: raidPtr->Disks[frow][fcol].numBlocks =
195: raidPtr->Disks[frow][fcol].numBlocks *
196: rf_sizePercentage / 100;
197: }
198: #if 0
199: /* This is the way it was done before the CAM stuff was removed. */
200:
201: if (rf_extract_ids(badDisk->devname, &bus, &targ, &lun)) {
202: printf("COPYBACK: unable to extract bus, target, lun from"
203: " devname %s.\n", badDisk->devname);
204: return;
205: }
206: /*
207: * TUR the disk that's marked as bad to be sure that it's actually
208: * alive.
209: */
210: rf_SCSI_AllocTUR(&tur_op);
211: retcode = rf_SCSI_DoTUR(tur_op, bus, targ, lun, badDisk->dev);
212: rf_SCSI_FreeDiskOp(tur_op, 0);
213: #endif
214:
215: if (retcode) {
216: printf("COPYBACK: target disk failed TUR.\n");
217: return;
218: }
219: /* Get a buffer to hold one SU. */
220: RF_Malloc(databuf, rf_RaidAddressToByte(raidPtr,
221: raidPtr->Layout.sectorsPerStripeUnit), (char *));
222:
223: /* Create a descriptor. */
224: RF_Malloc(desc, sizeof(*desc), (RF_CopybackDesc_t *));
225: desc->raidPtr = raidPtr;
226: desc->status = 0;
227: desc->frow = frow;
228: desc->fcol = fcol;
229: desc->spRow = badDisk->spareRow;
230: desc->spCol = badDisk->spareCol;
231: desc->stripeAddr = 0;
232: desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
233: desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit *
234: raidPtr->Layout.numDataCol;
235: desc->databuf = databuf;
236: desc->mcpair = rf_AllocMCPair();
237:
238: printf("COPYBACK: Quiescing the array.\n");
239: /*
240: * Quiesce the array, since we don't want to code support for user
241: * accs here.
242: */
243: rf_SuspendNewRequestsAndWait(raidPtr);
244:
245: /* Adjust state of the array and of the disks. */
246: RF_LOCK_MUTEX(raidPtr->mutex);
247: raidPtr->Disks[desc->frow][desc->fcol].status = rf_ds_optimal;
248: raidPtr->status[desc->frow] = rf_rs_optimal;
249: rf_copyback_in_progress = 1; /* Debug only. */
250: RF_UNLOCK_MUTEX(raidPtr->mutex);
251:
252: printf("COPYBACK: Beginning\n");
253: RF_GETTIME(desc->starttime);
254: rf_ContinueCopyback(desc);
255:
256: /*
257: * Data has been restored.
258: * Fix up the component label.
259: * Don't actually need the read here.
260: */
261: raidread_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev,
262: raidPtr->raid_cinfo[frow][fcol].ci_vp,
263: &c_label);
264:
265: raid_init_component_label(raidPtr, &c_label);
266:
267: c_label.row = frow;
268: c_label.column = fcol;
269:
270: raidwrite_component_label(raidPtr->raid_cinfo[frow][fcol].ci_dev,
271: raidPtr->raid_cinfo[frow][fcol].ci_vp,
272: &c_label);
273: }
274:
275:
276: /*
277: * Invoked via callback after a copyback I/O has completed to
278: * continue on with the next one.
279: */
280: void
281: rf_ContinueCopyback(RF_CopybackDesc_t *desc)
282: {
283: RF_SectorNum_t testOffs, stripeAddr;
284: RF_Raid_t *raidPtr = desc->raidPtr;
285: RF_RaidAddr_t addr;
286: RF_RowCol_t testRow, testCol;
287: int old_pctg, new_pctg, done;
288: struct timeval t, diff;
289:
290: old_pctg = (-1);
291: while (1) {
292: stripeAddr = desc->stripeAddr;
293: desc->raidPtr->copyback_stripes_done = stripeAddr /
294: desc->sectPerStripe;
295: if (rf_prReconSched) {
296: old_pctg = 100 * desc->stripeAddr /
297: raidPtr->totalSectors;
298: }
299: desc->stripeAddr += desc->sectPerStripe;
300: if (rf_prReconSched) {
301: new_pctg = 100 * desc->stripeAddr /
302: raidPtr->totalSectors;
303: if (new_pctg != old_pctg) {
304: RF_GETTIME(t);
305: RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
306: printf("%d %d.%06d\n", new_pctg,
307: (int) diff.tv_sec, (int) diff.tv_usec);
308: }
309: }
310: if (stripeAddr >= raidPtr->totalSectors) {
311: rf_CopybackComplete(desc, 0);
312: return;
313: }
314: /* Walk through the current stripe, su-by-su. */
315: for (done = 0, addr = stripeAddr;
316: addr < stripeAddr + desc->sectPerStripe;
317: addr += desc->sectPerSU) {
318:
319: /* Map the SU, disallowing remap to spare space. */
320: (raidPtr->Layout.map->MapSector) (raidPtr, addr,
321: &testRow, &testCol, &testOffs, RF_DONT_REMAP);
322:
323: if (testRow == desc->frow && testCol == desc->fcol) {
324: rf_CopybackOne(desc, RF_COPYBACK_DATA, addr,
325: testRow, testCol, testOffs);
326: done = 1;
327: break;
328: }
329: }
330:
331: if (!done) {
332: /*
333: * We didn't find the failed disk in the data part,
334: * check parity.
335: */
336:
337: /*
338: * Map the parity for this stripe, disallowing remap
339: * to spare space.
340: */
341: (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr,
342: &testRow, &testCol, &testOffs, RF_DONT_REMAP);
343:
344: if (testRow == desc->frow && testCol == desc->fcol) {
345: rf_CopybackOne(desc, RF_COPYBACK_PARITY,
346: stripeAddr, testRow, testCol, testOffs);
347: }
348: }
349: /* Check to see if the last read/write pair failed. */
350: if (desc->status) {
351: rf_CopybackComplete(desc, 1);
352: return;
353: }
354: /*
355: * We didn't find any units to copy back in this stripe.
356: * Continue with the next one.
357: */
358: }
359: }
360:
361:
362: /* Copyback one unit. */
363: void
364: rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr,
365: RF_RowCol_t testRow, RF_RowCol_t testCol, RF_SectorNum_t testOffs)
366: {
367: RF_SectorCount_t sectPerSU = desc->sectPerSU;
368: RF_Raid_t *raidPtr = desc->raidPtr;
369: RF_RowCol_t spRow = desc->spRow;
370: RF_RowCol_t spCol = desc->spCol;
371: RF_SectorNum_t spOffs;
372:
373: /* Find the spare location for this SU. */
374: if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
375: if (typ == RF_COPYBACK_DATA)
376: raidPtr->Layout.map->MapSector(raidPtr, addr, &spRow,
377: &spCol, &spOffs, RF_REMAP);
378: else
379: raidPtr->Layout.map->MapParity(raidPtr, addr, &spRow,
380: &spCol, &spOffs, RF_REMAP);
381: } else {
382: spOffs = testOffs;
383: }
384:
385: /* Create reqs to read the old location & write the new. */
386: desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
387: sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int))
388: rf_CopybackReadDoneProc, desc, NULL, NULL, (void *) raidPtr,
389: RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
390: desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
391: sectPerSU, desc->databuf, 0L, 0, (int (*) (void *, int))
392: rf_CopybackWriteDoneProc, desc, NULL, NULL, (void *) raidPtr,
393: RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
394: desc->frow = testRow;
395: desc->fcol = testCol;
396:
397: /*
398: * Enqueue the read. The write will go out as part of the callback on
399: * the read. At user-level & in the kernel, wait for the read-write
400: * pair to complete. In the simulator, just return, since everything
401: * will happen as callbacks.
402: */
403:
404: RF_LOCK_MUTEX(desc->mcpair->mutex);
405: desc->mcpair->flag = 0;
406:
407: rf_DiskIOEnqueue(&raidPtr->Queues[spRow][spCol], desc->readreq,
408: RF_IO_NORMAL_PRIORITY);
409:
410: while (!desc->mcpair->flag) {
411: RF_WAIT_MCPAIR(desc->mcpair);
412: }
413: RF_UNLOCK_MUTEX(desc->mcpair->mutex);
414: rf_FreeDiskQueueData(desc->readreq);
415: rf_FreeDiskQueueData(desc->writereq);
416:
417: }
418:
419:
420: /*
421: * Called at interrupt context when the read has completed.
422: * Just send out the write.
423: */
424: int
425: rf_CopybackReadDoneProc(RF_CopybackDesc_t *desc, int status)
426: {
427: if (status) { /* Invoke the callback with bad status. */
428: printf("COPYBACK: copyback read failed. Aborting.\n");
429: (desc->writereq->CompleteFunc) (desc, -100);
430: } else {
431: rf_DiskIOEnqueue(&(desc->raidPtr
432: ->Queues[desc->frow][desc->fcol]),
433: desc->writereq, RF_IO_NORMAL_PRIORITY);
434: }
435: return (0);
436: }
437:
438:
439: /*
440: * Called at interrupt context when the write has completed.
441: * At user level & in the kernel, wake up the copyback thread.
442: * In the simulator, invoke the next copyback directly.
443: * Can't free diskqueuedata structs in the kernel because we're at
444: * interrupt context.
445: */
446: int
447: rf_CopybackWriteDoneProc(RF_CopybackDesc_t *desc, int status)
448: {
449: if (status && status != -100) {
450: printf("COPYBACK: copyback write failed. Aborting.\n");
451: }
452: desc->status = status;
453: rf_MCPairWakeupFunc(desc->mcpair);
454: return (0);
455: }
456:
457:
458: /* Invoked when the copyback has completed. */
459: void
460: rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
461: {
462: RF_Raid_t *raidPtr = desc->raidPtr;
463: struct timeval t, diff;
464:
465: if (!status) {
466: RF_LOCK_MUTEX(raidPtr->mutex);
467: if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
468: RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
469: rf_FreeSpareTable(raidPtr);
470: } else {
471: raidPtr->Disks[desc->spRow][desc->spCol].status =
472: rf_ds_spare;
473: }
474: RF_UNLOCK_MUTEX(raidPtr->mutex);
475:
476: RF_GETTIME(t);
477: RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
478: printf("Copyback time was %d.%06d seconds.\n",
479: (int) diff.tv_sec, (int) diff.tv_usec);
480: } else
481: printf("COPYBACK: Failure.\n");
482:
483: RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
484: rf_FreeMCPair(desc->mcpair);
485: RF_Free(desc, sizeof(*desc));
486:
487: rf_copyback_in_progress = 0;
488: rf_ResumeNewRequests(raidPtr);
489: }
CVSweb