Annotation of sys/dev/raidframe/rf_disks.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_disks.c,v 1.12 2007/06/05 00:38:22 deraadt Exp $ */
2: /* $NetBSD: rf_disks.c,v 1.31 2000/06/02 01:17:14 oster Exp $ */
3:
4: /*
5: * Copyright (c) 1999 The NetBSD Foundation, Inc.
6: * All rights reserved.
7: *
8: * This code is derived from software contributed to The NetBSD Foundation
9: * by Greg Oster
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the NetBSD
22: * Foundation, Inc. and its contributors.
23: * 4. Neither the name of The NetBSD Foundation nor the names of its
24: * contributors may be used to endorse or promote products derived
25: * from this software without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37: * POSSIBILITY OF SUCH DAMAGE.
38: */
39: /*
40: * Copyright (c) 1995 Carnegie-Mellon University.
41: * All rights reserved.
42: *
43: * Author: Mark Holland
44: *
45: * Permission to use, copy, modify and distribute this software and
46: * its documentation is hereby granted, provided that both the copyright
47: * notice and this permission notice appear in all copies of the
48: * software, derivative works or modified versions, and any portions
49: * thereof, and that both notices appear in supporting documentation.
50: *
51: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
52: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
53: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
54: *
55: * Carnegie Mellon requests users of this software to return to
56: *
57: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
58: * School of Computer Science
59: * Carnegie Mellon University
60: * Pittsburgh PA 15213-3890
61: *
62: * any improvements or extensions that they make and grant Carnegie the
63: * rights to redistribute these changes.
64: */
65:
66: /***************************************************************
67: * rf_disks.c -- Code to perform operations on the actual disks.
68: ***************************************************************/
69:
70: #include "rf_types.h"
71: #include "rf_raid.h"
72: #include "rf_alloclist.h"
73: #include "rf_utils.h"
74: #include "rf_configure.h"
75: #include "rf_general.h"
76: #include "rf_options.h"
77: #include "rf_kintf.h"
78:
79: #if defined(__NetBSD__)
80: #include "rf_netbsd.h"
81: #elif defined(__OpenBSD__)
82: #include "rf_openbsd.h"
83: #endif
84:
85: #include <sys/types.h>
86: #include <sys/param.h>
87: #include <sys/systm.h>
88: #include <sys/proc.h>
89: #include <sys/ioctl.h>
90: #include <sys/fcntl.h>
91: #ifdef __NETBSD__
92: #include <sys/vnode.h>
93: #endif /* __NETBSD__ */
94:
95: int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
96: void rf_print_label_status(RF_Raid_t *, int, int, char *,
97: RF_ComponentLabel_t *);
98: int rf_check_label_vitals(RF_Raid_t *, int, int, char *,
99: RF_ComponentLabel_t *, int, int);
100:
101: #define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
102: #define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)
103:
104: /****************************************************************************
105: *
106: * Initialize the disks comprising the array.
107: *
108: * We want the spare disks to have regular row,col numbers so that we can
109: * easily substitue a spare for a failed disk. But, the driver code assumes
110: * throughout that the array contains numRow by numCol _non-spare_ disks, so
111: * it's not clear how to fit in the spares. This is an unfortunate holdover
112: * from raidSim. The quick and dirty fix is to make row zero bigger than the
113: * rest, and put all the spares in it. This probably needs to get changed
114: * eventually.
115: *
116: ****************************************************************************/
117: int
118: rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
119: RF_Config_t *cfgPtr)
120: {
121: RF_RaidDisk_t **disks;
122: RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
123: RF_RowCol_t r, c;
124: int bs, ret;
125: unsigned i, count, foundone = 0, numFailuresThisRow;
126: int force;
127:
128: force = cfgPtr->force;
129:
130: ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
131: if (ret)
132: goto fail;
133:
134: disks = raidPtr->Disks;
135:
136: for (r = 0; r < raidPtr->numRow; r++) {
137: numFailuresThisRow = 0;
138: for (c = 0; c < raidPtr->numCol; c++) {
139: ret = rf_ConfigureDisk(raidPtr,
140: &cfgPtr->devnames[r][c][0], &disks[r][c], r, c);
141:
142: if (ret)
143: goto fail;
144:
145: if (disks[r][c].status == rf_ds_optimal) {
146: raidread_component_label(
147: raidPtr->raid_cinfo[r][c].ci_dev,
148: raidPtr->raid_cinfo[r][c].ci_vp,
149: &raidPtr->raid_cinfo[r][c].ci_label);
150: }
151:
152: if (disks[r][c].status != rf_ds_optimal) {
153: numFailuresThisRow++;
154: } else {
155: if (disks[r][c].numBlocks < min_numblks)
156: min_numblks = disks[r][c].numBlocks;
157: DPRINTF7("Disk at row %d col %d: dev %s"
158: " numBlocks %ld blockSize %d (%ld MB)\n",
159: r, c, disks[r][c].devname,
160: (long int) disks[r][c].numBlocks,
161: disks[r][c].blockSize,
162: (long int) disks[r][c].numBlocks *
163: disks[r][c].blockSize / 1024 / 1024);
164: }
165: }
166: /* XXX Fix for n-fault tolerant. */
167: /*
168: * XXX This should probably check to see how many failures
169: * we can handle for this configuration !
170: */
171: if (numFailuresThisRow > 0)
172: raidPtr->status[r] = rf_rs_degraded;
173: }
174: /*
175: * All disks must be the same size & have the same block size, bs must
176: * be a power of 2.
177: */
178: bs = 0;
179: for (foundone = r = 0; !foundone && r < raidPtr->numRow; r++) {
180: for (c = 0; !foundone && c < raidPtr->numCol; c++) {
181: if (disks[r][c].status == rf_ds_optimal) {
182: bs = disks[r][c].blockSize;
183: foundone = 1;
184: }
185: }
186: }
187: if (!foundone) {
188: RF_ERRORMSG("RAIDFRAME: Did not find any live disks in"
189: " the array.\n");
190: ret = EINVAL;
191: goto fail;
192: }
193: for (count = 0, i = 1; i; i <<= 1)
194: if (bs & i)
195: count++;
196: if (count != 1) {
197: RF_ERRORMSG1("Error: block size on disks (%d) must be a"
198: " power of 2.\n", bs);
199: ret = EINVAL;
200: goto fail;
201: }
202:
203: if (rf_CheckLabels(raidPtr, cfgPtr)) {
204: printf("raid%d: There were fatal errors\n", raidPtr->raidid);
205: if (force != 0) {
206: printf("raid%d: Fatal errors being ignored.\n",
207: raidPtr->raidid);
208: } else {
209: ret = EINVAL;
210: goto fail;
211: }
212: }
213:
214: for (r = 0; r < raidPtr->numRow; r++) {
215: for (c = 0; c < raidPtr->numCol; c++) {
216: if (disks[r][c].status == rf_ds_optimal) {
217: if (disks[r][c].blockSize != bs) {
218: RF_ERRORMSG2("Error: block size of"
219: " disk at r %d c %d different from"
220: " disk at r 0 c 0.\n", r, c);
221: ret = EINVAL;
222: goto fail;
223: }
224: if (disks[r][c].numBlocks != min_numblks) {
225: RF_ERRORMSG3("WARNING: truncating disk"
226: " at r %d c %d to %d blocks.\n",
227: r, c, (int) min_numblks);
228: disks[r][c].numBlocks = min_numblks;
229: }
230: }
231: }
232: }
233:
234: raidPtr->sectorsPerDisk = min_numblks;
235: raidPtr->logBytesPerSector = ffs(bs) - 1;
236: raidPtr->bytesPerSector = bs;
237: raidPtr->sectorMask = bs - 1;
238: return (0);
239:
240: fail:
241: rf_UnconfigureVnodes(raidPtr);
242:
243: return (ret);
244: }
245:
246:
247: /****************************************************************************
248: * Set up the data structures describing the spare disks in the array.
249: * Recall from the above comment that the spare disk descriptors are stored
250: * in row zero, which is specially expanded to hold them.
251: ****************************************************************************/
252: int
253: rf_ConfigureSpareDisks(RF_ShutdownList_t ** listp, RF_Raid_t * raidPtr,
254: RF_Config_t * cfgPtr)
255: {
256: int i, ret;
257: unsigned int bs;
258: RF_RaidDisk_t *disks;
259: int num_spares_done;
260:
261: num_spares_done = 0;
262:
263: /*
264: * The space for the spares should have already been allocated by
265: * ConfigureDisks().
266: */
267:
268: disks = &raidPtr->Disks[0][raidPtr->numCol];
269: for (i = 0; i < raidPtr->numSpare; i++) {
270: ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
271: &disks[i], 0, raidPtr->numCol + i);
272: if (ret)
273: goto fail;
274: if (disks[i].status != rf_ds_optimal) {
275: RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
276: &cfgPtr->spare_names[i][0]);
277: } else {
278: /* Change status to spare. */
279: disks[i].status = rf_ds_spare;
280: DPRINTF6("Spare Disk %d: dev %s numBlocks %ld"
281: " blockSize %d (%ld MB).\n", i, disks[i].devname,
282: (long int) disks[i].numBlocks, disks[i].blockSize,
283: (long int) disks[i].numBlocks *
284: disks[i].blockSize / 1024 / 1024);
285: }
286: num_spares_done++;
287: }
288:
289: /* Check sizes and block sizes on spare disks. */
290: bs = 1 << raidPtr->logBytesPerSector;
291: for (i = 0; i < raidPtr->numSpare; i++) {
292: if (disks[i].blockSize != bs) {
293: RF_ERRORMSG3("Block size of %d on spare disk %s is"
294: " not the same as on other disks (%d).\n",
295: disks[i].blockSize, disks[i].devname, bs);
296: ret = EINVAL;
297: goto fail;
298: }
299: if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
300: RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small"
301: " to serve as a spare (need %llu blocks).\n",
302: disks[i].devname, disks[i].numBlocks,
303: raidPtr->sectorsPerDisk);
304: ret = EINVAL;
305: goto fail;
306: } else
307: if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
308: RF_ERRORMSG2("Warning: truncating spare disk"
309: " %s to %llu blocks.\n", disks[i].devname,
310: raidPtr->sectorsPerDisk);
311:
312: disks[i].numBlocks = raidPtr->sectorsPerDisk;
313: }
314: }
315:
316: return (0);
317:
318: fail:
319:
320: /*
321: * Release the hold on the main components. We've failed to allocate
322: * a spare, and since we're failing, we need to free things...
323: *
324: * XXX Failing to allocate a spare is *not* that big of a deal...
325: * We *can* survive without it, if need be, esp. if we get hot
326: * adding working.
327: * If we don't fail out here, then we need a way to remove this spare...
328: * That should be easier to do here than if we are "live"...
329: */
330:
331: rf_UnconfigureVnodes(raidPtr);
332:
333: return (ret);
334: }
335:
336: int
337: rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
338: {
339: RF_RaidDisk_t **disks;
340: int ret;
341: int r;
342:
343: RF_CallocAndAdd(disks, raidPtr->numRow, sizeof(RF_RaidDisk_t *),
344: (RF_RaidDisk_t **), raidPtr->cleanupList);
345: if (disks == NULL) {
346: ret = ENOMEM;
347: goto fail;
348: }
349: raidPtr->Disks = disks;
350: /* Get space for the device-specific stuff... */
351: RF_CallocAndAdd(raidPtr->raid_cinfo, raidPtr->numRow,
352: sizeof(struct raidcinfo *), (struct raidcinfo **),
353: raidPtr->cleanupList);
354: if (raidPtr->raid_cinfo == NULL) {
355: ret = ENOMEM;
356: goto fail;
357: }
358:
359: for (r = 0; r < raidPtr->numRow; r++) {
360: /*
361: * We allocate RF_MAXSPARE on the first row so that we
362: * have room to do hot-swapping of spares.
363: */
364: RF_CallocAndAdd(disks[r], raidPtr->numCol +
365: ((r == 0) ? RF_MAXSPARE : 0), sizeof(RF_RaidDisk_t),
366: (RF_RaidDisk_t *), raidPtr->cleanupList);
367: if (disks[r] == NULL) {
368: ret = ENOMEM;
369: goto fail;
370: }
371: /* Get more space for device specific stuff... */
372: RF_CallocAndAdd(raidPtr->raid_cinfo[r], raidPtr->numCol +
373: ((r == 0) ? raidPtr->numSpare : 0),
374: sizeof(struct raidcinfo), (struct raidcinfo *),
375: raidPtr->cleanupList);
376: if (raidPtr->raid_cinfo[r] == NULL) {
377: ret = ENOMEM;
378: goto fail;
379: }
380: }
381: return(0);
382: fail:
383: rf_UnconfigureVnodes(raidPtr);
384:
385: return(ret);
386: }
387:
388:
389: /* Configure a single disk during auto-configuration at boot. */
390: int
391: rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
392: RF_AutoConfig_t *auto_config)
393: {
394: RF_RaidDisk_t **disks;
395: RF_RaidDisk_t *diskPtr;
396: RF_RowCol_t r, c;
397: RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
398: int bs, ret;
399: int numFailuresThisRow;
400: int force;
401: RF_AutoConfig_t *ac;
402: int parity_good;
403: int mod_counter;
404: int mod_counter_found;
405:
406: #if DEBUG
407: printf("Starting autoconfiguration of RAID set...\n");
408: #endif /* DEBUG */
409: force = cfgPtr->force;
410:
411: ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
412: if (ret)
413: goto fail;
414:
415: disks = raidPtr->Disks;
416:
417: /* Assume the parity will be fine... */
418: parity_good = RF_RAID_CLEAN;
419:
420: /* Check for mod_counters that are too low. */
421: mod_counter_found = 0;
422: ac = auto_config;
423: while(ac!=NULL) {
424: if (mod_counter_found == 0) {
425: mod_counter = ac->clabel->mod_counter;
426: mod_counter_found = 1;
427: } else {
428: if (ac->clabel->mod_counter > mod_counter) {
429: mod_counter = ac->clabel->mod_counter;
430: }
431: }
432: ac->flag = 0; /* Clear the general purpose flag. */
433: ac = ac->next;
434: }
435:
436: for (r = 0; r < raidPtr->numRow; r++) {
437: numFailuresThisRow = 0;
438: for (c = 0; c < raidPtr->numCol; c++) {
439: diskPtr = &disks[r][c];
440:
441: /* Find this row/col in the autoconfig. */
442: #if DEBUG
443: printf("Looking for %d,%d in autoconfig.\n", r, c);
444: #endif /* DEBUG */
445: ac = auto_config;
446: while(ac!=NULL) {
447: if (ac->clabel == NULL) {
448: /* Big-time bad news. */
449: goto fail;
450: }
451: if ((ac->clabel->row == r) &&
452: (ac->clabel->column == c) &&
453: (ac->clabel->mod_counter == mod_counter)) {
454: /* It's this one... */
455: /*
456: * Flag it as 'used', so we don't
457: * free it later.
458: */
459: ac->flag = 1;
460: #if DEBUG
461: printf("Found: %s at %d,%d.\n",
462: ac->devname, r, c);
463: #endif /* DEBUG */
464:
465: break;
466: }
467: ac = ac->next;
468: }
469:
470: if (ac == NULL) {
471: /*
472: * We didn't find an exact match with a
473: * correct mod_counter above... Can we
474: * find one with an incorrect mod_counter
475: * to use instead ? (This one, if we find
476: * it, will be marked as failed once the
477: * set configures)
478: */
479:
480: ac = auto_config;
481: while(ac!=NULL) {
482: if (ac->clabel == NULL) {
483: /* Big-time bad news. */
484: goto fail;
485: }
486: if ((ac->clabel->row == r) &&
487: (ac->clabel->column == c)) {
488: /*
489: * It's this one...
490: * Flag it as 'used', so we
491: * don't free it later.
492: */
493: ac->flag = 1;
494: #if DEBUG
495: printf("Found(low mod_counter)"
496: ": %s at %d,%d.\n",
497: ac->devname, r, c);
498: #endif /* DEBUG */
499:
500: break;
501: }
502: ac = ac->next;
503: }
504: }
505:
506:
507:
508: if (ac!=NULL) {
509: /* Found it. Configure it... */
510: diskPtr->blockSize = ac->clabel->blockSize;
511: diskPtr->numBlocks = ac->clabel->numBlocks;
512: /*
513: * Note: rf_protectedSectors is already
514: * factored into numBlocks here.
515: */
516: raidPtr->raid_cinfo[r][c].ci_vp = ac->vp;
517: raidPtr->raid_cinfo[r][c].ci_dev = ac->dev;
518:
519: memcpy(&raidPtr->raid_cinfo[r][c].ci_label,
520: ac->clabel, sizeof(*ac->clabel));
521: snprintf(diskPtr->devname,
522: sizeof diskPtr->devname, "/dev/%s",
523: ac->devname);
524:
525: /*
526: * Note the fact that this component was
527: * autoconfigured. You'll need this info
528: * later. Trust me :)
529: */
530: diskPtr->auto_configured = 1;
531: diskPtr->dev = ac->dev;
532:
533: /*
534: * We allow the user to specify that
535: * only a fraction of the disks should
536: * be used. This is just for debug: it
537: * speeds up the parity scan.
538: */
539:
540: diskPtr->numBlocks = diskPtr->numBlocks *
541: rf_sizePercentage / 100;
542:
543: /*
544: * XXX These will get set multiple times,
545: * but since we're autoconfiguring, they'd
546: * better be always the same each time !
547: * If not, this is the least of your worries.
548: */
549:
550: bs = diskPtr->blockSize;
551: min_numblks = diskPtr->numBlocks;
552:
553: /*
554: * This gets done multiple times, but that's
555: * fine -- the serial number will be the same
556: * for all components, guaranteed.
557: */
558: raidPtr->serial_number =
559: ac->clabel->serial_number;
560: /*
561: * Check the last time the label
562: * was modified.
563: */
564: if (ac->clabel->mod_counter != mod_counter) {
565: /*
566: * Even though we've filled in all
567: * of the above, we don't trust
568: * this component since it's
569: * modification counter is not
570: * in sync with the rest, and we really
571: * consider it to be failed.
572: */
573: disks[r][c].status = rf_ds_failed;
574: numFailuresThisRow++;
575: } else {
576: if (ac->clabel->clean != RF_RAID_CLEAN)
577: {
578: parity_good = RF_RAID_DIRTY;
579: }
580: }
581: } else {
582: /*
583: * Didn't find it at all !!!
584: * Component must really be dead.
585: */
586: disks[r][c].status = rf_ds_failed;
587: snprintf(disks[r][c].devname,
588: sizeof disks[r][c].devname, "component%d",
589: r * raidPtr->numCol + c);
590: numFailuresThisRow++;
591: }
592: }
593: /* XXX Fix for n-fault tolerant. */
594: /*
595: * XXX This should probably check to see how many failures
596: * we can handle for this configuration !
597: */
598: if (numFailuresThisRow > 0)
599: raidPtr->status[r] = rf_rs_degraded;
600: }
601:
602: /* Close the device for the ones that didn't get used. */
603:
604: ac = auto_config;
605: while(ac != NULL) {
606: if (ac->flag == 0) {
607: VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
608: vput(ac->vp);
609: ac->vp = NULL;
610: #if DEBUG
611: printf("Released %s from auto-config set.\n",
612: ac->devname);
613: #endif /* DEBUG */
614: }
615: ac = ac->next;
616: }
617:
618: raidPtr->mod_counter = mod_counter;
619:
620: /* Note the state of the parity, if any. */
621: raidPtr->parity_good = parity_good;
622: raidPtr->sectorsPerDisk = min_numblks;
623: raidPtr->logBytesPerSector = ffs(bs) - 1;
624: raidPtr->bytesPerSector = bs;
625: raidPtr->sectorMask = bs - 1;
626: return (0);
627:
628: fail:
629:
630: rf_UnconfigureVnodes(raidPtr);
631:
632: return (ret);
633:
634: }
635:
636: /* Configure a single disk in the array. */
637: int
638: rf_ConfigureDisk(RF_Raid_t *raidPtr, char *buf, RF_RaidDisk_t *diskPtr,
639: RF_RowCol_t row, RF_RowCol_t col)
640: {
641: char *p;
642: int retcode;
643:
644: struct partinfo dpart;
645: struct vnode *vp;
646: struct vattr va;
647: struct proc *proc;
648: int error;
649:
650: retcode = 0;
651: p = rf_find_non_white(buf);
652: if (*buf != '\0' && p[strlen(p) - 1] == '\n') {
653: /* Strip off the newline. */
654: p[strlen(p) - 1] = '\0';
655: }
656: (void) strlcpy(diskPtr->devname, p, sizeof diskPtr->devname);
657:
658: proc = raidPtr->engine_thread;
659:
660: /* Let's start by claiming the component is fine and well... */
661: diskPtr->status = rf_ds_optimal;
662:
663: raidPtr->raid_cinfo[row][col].ci_vp = NULL;
664: raidPtr->raid_cinfo[row][col].ci_dev = NULL;
665:
666: error = raidlookup(diskPtr->devname, curproc, &vp);
667: if (error) {
668: printf("raidlookup on device: %s failed !\n", diskPtr->devname);
669: if (error == ENXIO) {
670: /* The component isn't there... Must be dead :-( */
671: diskPtr->status = rf_ds_failed;
672: } else {
673: return (error);
674: }
675: }
676: if (diskPtr->status == rf_ds_optimal) {
677:
678: if ((error = VOP_GETATTR(vp, &va, proc->p_ucred, proc)) != 0) {
679: return (error);
680: }
681: error = VOP_IOCTL(vp, DIOCGPART, (caddr_t) & dpart, FREAD,
682: proc->p_ucred, proc);
683: if (error) {
684: return (error);
685: }
686: diskPtr->blockSize = dpart.disklab->d_secsize;
687:
688: diskPtr->numBlocks = DL_GETPSIZE(dpart.part) - rf_protectedSectors;
689: diskPtr->partitionSize = DL_GETPSIZE(dpart.part);
690:
691: raidPtr->raid_cinfo[row][col].ci_vp = vp;
692: raidPtr->raid_cinfo[row][col].ci_dev = va.va_rdev;
693:
694: /* This component was not automatically configured. */
695: diskPtr->auto_configured = 0;
696: diskPtr->dev = va.va_rdev;
697:
698: /*
699: * We allow the user to specify that only a fraction of the
700: * disks should be used. This is just for debug: it speeds up
701: * the parity scan.
702: */
703: diskPtr->numBlocks = diskPtr->numBlocks * rf_sizePercentage
704: / 100;
705: }
706: return (0);
707: }
708:
709: void
710: rf_print_label_status(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
711: RF_ComponentLabel_t *ci_label)
712: {
713:
714: printf("raid%d: Component %s being configured at row: %d col: %d\n",
715: raidPtr->raidid, dev_name, row, column);
716: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
717: ci_label->row, ci_label->column, ci_label->num_rows,
718: ci_label->num_columns);
719: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
720: ci_label->version, ci_label->serial_number, ci_label->mod_counter);
721: printf(" Clean: %s Status: %d\n",
722: ci_label->clean ? "Yes" : "No", ci_label->status);
723: }
724:
725: int
726: rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column, char *dev_name,
727: RF_ComponentLabel_t *ci_label, int serial_number, int mod_counter)
728: {
729: int fatal_error = 0;
730:
731: if (serial_number != ci_label->serial_number) {
732: printf("%s has a different serial number: %d %d.\n",
733: dev_name, serial_number, ci_label->serial_number);
734: fatal_error = 1;
735: }
736: if (mod_counter != ci_label->mod_counter) {
737: printf("%s has a different modfication count: %d %d.\n",
738: dev_name, mod_counter, ci_label->mod_counter);
739: }
740:
741: if (row != ci_label->row) {
742: printf("Row out of alignment for: %s.\n", dev_name);
743: fatal_error = 1;
744: }
745: if (column != ci_label->column) {
746: printf("Column out of alignment for: %s.\n", dev_name);
747: fatal_error = 1;
748: }
749: if (raidPtr->numRow != ci_label->num_rows) {
750: printf("Number of rows do not match for: %s.\n", dev_name);
751: fatal_error = 1;
752: }
753: if (raidPtr->numCol != ci_label->num_columns) {
754: printf("Number of columns do not match for: %s.\n", dev_name);
755: fatal_error = 1;
756: }
757: if (ci_label->clean == 0) {
758: /* It's not clean, but that's not fatal. */
759: printf("%s is not clean !\n", dev_name);
760: }
761: return(fatal_error);
762: }
763:
764:
765: /*
766: *
767: * rf_CheckLabels() - Check all the component labels for consistency.
768: * Return an error if there is anything major amiss.
769: *
770: */
771:
772: int
773: rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
774: {
775: int r, c;
776: char *dev_name;
777: RF_ComponentLabel_t *ci_label;
778: int serial_number = 0;
779: int mod_number = 0;
780: int fatal_error = 0;
781: int mod_values[4];
782: int mod_count[4];
783: int ser_values[4];
784: int ser_count[4];
785: int num_ser;
786: int num_mod;
787: int i;
788: int found;
789: int hosed_row;
790: int hosed_column;
791: int too_fatal;
792: int parity_good;
793: int force;
794:
795: hosed_row = -1;
796: hosed_column = -1;
797: too_fatal = 0;
798: force = cfgPtr->force;
799:
800: /*
801: * We're going to try to be a little intelligent here. If one
802: * component's label is bogus, and we can identify that it's the
803: * *only* one that's gone, we'll mark it as "failed" and allow
804: * the configuration to proceed. This will be the *only* case
805: * that we'll proceed if there would be (otherwise) fatal errors.
806: *
807: * Basically we simply keep a count of how many components had
808: * what serial number. If all but one agree, we simply mark
809: * the disagreeing component as being failed, and allow
810: * things to come up "normally".
811: *
812: * We do this first for serial numbers, and then for "mod_counter".
813: *
814: */
815:
816: num_ser = 0;
817: num_mod = 0;
818: for (r = 0; r < raidPtr->numRow && !fatal_error; r++) {
819: for (c = 0; c < raidPtr->numCol; c++) {
820: ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
821: found = 0;
822: for(i = 0; i < num_ser; i++) {
823: if (ser_values[i] == ci_label->serial_number) {
824: ser_count[i]++;
825: found = 1;
826: break;
827: }
828: }
829: if (!found) {
830: ser_values[num_ser] = ci_label->serial_number;
831: ser_count[num_ser] = 1;
832: num_ser++;
833: if (num_ser > 2) {
834: fatal_error = 1;
835: break;
836: }
837: }
838: found = 0;
839: for(i = 0; i < num_mod; i++) {
840: if (mod_values[i] == ci_label->mod_counter) {
841: mod_count[i]++;
842: found = 1;
843: break;
844: }
845: }
846: if (!found) {
847: mod_values[num_mod] = ci_label->mod_counter;
848: mod_count[num_mod] = 1;
849: num_mod++;
850: if (num_mod > 2) {
851: fatal_error = 1;
852: break;
853: }
854: }
855: }
856: }
857: #if DEBUG
858: printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
859: for(i = 0; i < num_ser; i++) {
860: printf("%d %d\n", ser_values[i], ser_count[i]);
861: }
862: printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
863: for(i = 0; i < num_mod; i++) {
864: printf("%d %d\n", mod_values[i], mod_count[i]);
865: }
866: #endif /* DEBUG */
867: serial_number = ser_values[0];
868: if (num_ser == 2) {
869: if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
870: /* Locate the maverick component. */
871: if (ser_count[1] > ser_count[0]) {
872: serial_number = ser_values[1];
873: }
874: for (r = 0; r < raidPtr->numRow; r++) {
875: for (c = 0; c < raidPtr->numCol; c++) {
876: ci_label =
877: &raidPtr->raid_cinfo[r][c].ci_label;
878: if (serial_number !=
879: ci_label->serial_number) {
880: hosed_row = r;
881: hosed_column = c;
882: break;
883: }
884: }
885: }
886: printf("Hosed component: %s.\n",
887: &cfgPtr->devnames[hosed_row][hosed_column][0]);
888: if (!force) {
889: /*
890: * We'll fail this component, as if there are
891: * other major errors, we aren't forcing things
892: * and we'll abort the config anyways.
893: */
894: raidPtr->Disks[hosed_row][hosed_column].status
895: = rf_ds_failed;
896: raidPtr->numFailures++;
897: raidPtr->status[hosed_row] = rf_rs_degraded;
898: }
899: } else {
900: too_fatal = 1;
901: }
902: if (cfgPtr->parityConfig == '0') {
903: /*
904: * We've identified two different serial numbers.
905: * RAID 0 can't cope with that, so we'll punt.
906: */
907: too_fatal = 1;
908: }
909:
910: }
911:
912: /*
913: * Record the serial number for later. If we bail later, setting
914: * this doesn't matter, otherwise we've got the best guess at the
915: * correct serial number.
916: */
917: raidPtr->serial_number = serial_number;
918:
919: mod_number = mod_values[0];
920: if (num_mod == 2) {
921: if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
922: /* Locate the maverick component. */
923: if (mod_count[1] > mod_count[0]) {
924: mod_number = mod_values[1];
925: } else if (mod_count[1] < mod_count[0]) {
926: mod_number = mod_values[0];
927: } else {
928: /*
929: * Counts of different modification values
930: * are the same. Assume greater value is
931: * the correct one, all other things
932: * considered.
933: */
934: if (mod_values[0] > mod_values[1]) {
935: mod_number = mod_values[0];
936: } else {
937: mod_number = mod_values[1];
938: }
939:
940: }
941: for (r = 0; r < raidPtr->numRow && !too_fatal; r++) {
942: for (c = 0; c < raidPtr->numCol; c++) {
943: ci_label =
944: &raidPtr->raid_cinfo[r][c].ci_label;
945: if (mod_number !=
946: ci_label->mod_counter) {
947: if ((hosed_row == r) &&
948: (hosed_column == c)) {
949: /*
950: * Same one. Can
951: * deal with it.
952: */
953: } else {
954: hosed_row = r;
955: hosed_column = c;
956: if (num_ser != 1) {
957: too_fatal = 1;
958: break;
959: }
960: }
961: }
962: }
963: }
964: printf("Hosed component: %s.\n",
965: &cfgPtr->devnames[hosed_row][hosed_column][0]);
966: if (!force) {
967: /*
968: * We'll fail this component, as if there are
969: * other major errors, we aren't forcing things
970: * and we'll abort the config anyways.
971: */
972: if (raidPtr
973: ->Disks[hosed_row][hosed_column].status !=
974: rf_ds_failed) {
975: raidPtr->Disks[hosed_row]
976: [hosed_column].status =
977: rf_ds_failed;
978: raidPtr->numFailures++;
979: raidPtr->status[hosed_row] =
980: rf_rs_degraded;
981: }
982: }
983: } else {
984: too_fatal = 1;
985: }
986: if (cfgPtr->parityConfig == '0') {
987: /*
988: * We've identified two different mod counters.
989: * RAID 0 can't cope with that, so we'll punt.
990: */
991: too_fatal = 1;
992: }
993: }
994:
995: raidPtr->mod_counter = mod_number;
996:
997: if (too_fatal) {
998: /*
999: * We've had both a serial number mismatch, and a mod_counter
1000: * mismatch -- and they involved two different components !!!
1001: * Bail -- make things fail so that the user must force
1002: * the issue...
1003: */
1004: hosed_row = -1;
1005: hosed_column = -1;
1006: }
1007:
1008: if (num_ser > 2) {
1009: printf("raid%d: Too many different serial numbers !\n",
1010: raidPtr->raidid);
1011: }
1012:
1013: if (num_mod > 2) {
1014: printf("raid%d: Too many different mod counters !\n",
1015: raidPtr->raidid);
1016: }
1017:
1018: /*
1019: * We start by assuming the parity will be good, and flee from
1020: * that notion at the slightest sign of trouble.
1021: */
1022:
1023: parity_good = RF_RAID_CLEAN;
1024: for (r = 0; r < raidPtr->numRow; r++) {
1025: for (c = 0; c < raidPtr->numCol; c++) {
1026: dev_name = &cfgPtr->devnames[r][c][0];
1027: ci_label = &raidPtr->raid_cinfo[r][c].ci_label;
1028:
1029: if ((r == hosed_row) && (c == hosed_column)) {
1030: printf("raid%d: Ignoring %s.\n",
1031: raidPtr->raidid, dev_name);
1032: } else {
1033: rf_print_label_status(raidPtr, r, c, dev_name,
1034: ci_label);
1035: if (rf_check_label_vitals(raidPtr, r, c,
1036: dev_name, ci_label, serial_number,
1037: mod_number)) {
1038: fatal_error = 1;
1039: }
1040: if (ci_label->clean != RF_RAID_CLEAN) {
1041: parity_good = RF_RAID_DIRTY;
1042: }
1043: }
1044: }
1045: }
1046: if (fatal_error) {
1047: parity_good = RF_RAID_DIRTY;
1048: }
1049:
1050: /* We note the state of the parity. */
1051: raidPtr->parity_good = parity_good;
1052:
1053: return(fatal_error);
1054: }
1055:
1056: int
1057: rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1058: {
1059: RF_RaidDisk_t *disks;
1060: RF_DiskQueue_t *spareQueues;
1061: int ret;
1062: unsigned int bs;
1063: int spare_number;
1064:
1065: #if 0
1066: printf("Just in rf_add_hot_spare: %d.\n", raidPtr->numSpare);
1067: printf("Num col: %d.\n", raidPtr->numCol);
1068: #endif
1069: if (raidPtr->numSpare >= RF_MAXSPARE) {
1070: RF_ERRORMSG1("Too many spares: %d.\n", raidPtr->numSpare);
1071: return(EINVAL);
1072: }
1073:
1074: RF_LOCK_MUTEX(raidPtr->mutex);
1075:
1076: /* The beginning of the spares... */
1077: disks = &raidPtr->Disks[0][raidPtr->numCol];
1078:
1079: spare_number = raidPtr->numSpare;
1080:
1081: ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
1082: &disks[spare_number], 0, raidPtr->numCol + spare_number);
1083:
1084: if (ret)
1085: goto fail;
1086: if (disks[spare_number].status != rf_ds_optimal) {
1087: RF_ERRORMSG1("Warning: spare disk %s failed TUR.\n",
1088: sparePtr->component_name);
1089: ret = EINVAL;
1090: goto fail;
1091: } else {
1092: disks[spare_number].status = rf_ds_spare;
1093: DPRINTF6("Spare Disk %d: dev %s numBlocks %ld blockSize %d"
1094: " (%ld MB).\n", spare_number, disks[spare_number].devname,
1095: (long int) disks[spare_number].numBlocks,
1096: disks[spare_number].blockSize,
1097: (long int) disks[spare_number].numBlocks *
1098: disks[spare_number].blockSize / 1024 / 1024);
1099: }
1100:
1101:
1102: /* Check sizes and block sizes on the spare disk. */
1103: bs = 1 << raidPtr->logBytesPerSector;
1104: if (disks[spare_number].blockSize != bs) {
1105: RF_ERRORMSG3("Block size of %d on spare disk %s is not"
1106: " the same as on other disks (%d).\n",
1107: disks[spare_number].blockSize,
1108: disks[spare_number].devname, bs);
1109: ret = EINVAL;
1110: goto fail;
1111: }
1112: if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
1113: RF_ERRORMSG3("Spare disk %s (%llu blocks) is too small to serve"
1114: " as a spare (need %llu blocks).\n",
1115: disks[spare_number].devname, disks[spare_number].numBlocks,
1116: raidPtr->sectorsPerDisk);
1117: ret = EINVAL;
1118: goto fail;
1119: } else {
1120: if (disks[spare_number].numBlocks >
1121: raidPtr->sectorsPerDisk) {
1122: RF_ERRORMSG2("Warning: truncating spare disk %s to %llu"
1123: " blocks.\n", disks[spare_number].devname,
1124: raidPtr->sectorsPerDisk);
1125:
1126: disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
1127: }
1128: }
1129:
1130: spareQueues = &raidPtr->Queues[0][raidPtr->numCol];
1131: ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number],
1132: 0, raidPtr->numCol + spare_number, raidPtr->qType,
1133: raidPtr->sectorsPerDisk, raidPtr->Disks[0][raidPtr->numCol +
1134: spare_number].dev, raidPtr->maxOutstanding,
1135: &raidPtr->shutdownList, raidPtr->cleanupList);
1136:
1137:
1138: raidPtr->numSpare++;
1139: RF_UNLOCK_MUTEX(raidPtr->mutex);
1140: return (0);
1141:
1142: fail:
1143: RF_UNLOCK_MUTEX(raidPtr->mutex);
1144: return(ret);
1145: }
1146:
1147: int
1148: rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
1149: {
1150: int spare_number;
1151:
1152: if (raidPtr->numSpare == 0) {
1153: printf("No spares to remove !\n");
1154: return(EINVAL);
1155: }
1156:
1157: spare_number = sparePtr->column;
1158:
1159: return(EINVAL); /* XXX Not implemented yet. */
1160: #if 0
1161: if (spare_number < 0 || spare_number > raidPtr->numSpare) {
1162: return(EINVAL);
1163: }
1164:
1165: /* Verify that this spare isn't in use... */
1166:
1167: /* It's gone... */
1168:
1169: raidPtr->numSpare--;
1170:
1171: return (0);
1172: #endif
1173: }
1174:
1175: int
1176: rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1177: {
1178: RF_RaidDisk_t *disks;
1179:
1180: if ((component->row < 0) ||
1181: (component->row >= raidPtr->numRow) ||
1182: (component->column < 0) ||
1183: (component->column >= raidPtr->numCol)) {
1184: return(EINVAL);
1185: }
1186:
1187: disks = &raidPtr->Disks[component->row][component->column];
1188:
1189: /* 1. This component must be marked as 'failed'. */
1190:
1191: return(EINVAL); /* Not implemented yet. */
1192: }
1193:
1194: int
1195: rf_incorporate_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
1196: {
1197:
1198: /*
1199: * Issues here include how to 'move' this in if there is IO
1200: * taking place (e.g. component queues and such).
1201: */
1202:
1203: return(EINVAL); /* Not implemented yet. */
1204: }
CVSweb