Annotation of sys/dev/raidframe/rf_openbsdkintf.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_openbsdkintf.c,v 1.42 2007/06/23 03:11:34 krw Exp $ */
2: /* $NetBSD: rf_netbsdkintf.c,v 1.109 2001/07/27 03:30:07 oster Exp $ */
3:
4: /*-
5: * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
6: * All rights reserved.
7: *
8: * This code is derived from software contributed to The NetBSD Foundation
9: * by Greg Oster; Jason R. Thorpe.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the NetBSD
22: * Foundation, Inc. and its contributors.
23: * 4. Neither the name of The NetBSD Foundation nor the names of its
24: * contributors may be used to endorse or promote products derived
25: * from this software without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37: * POSSIBILITY OF SUCH DAMAGE.
38: */
39:
40: /*
41: * Copyright (c) 1988 University of Utah.
42: * Copyright (c) 1990, 1993
43: * The Regents of the University of California. All rights reserved.
44: *
45: * This code is derived from software contributed to Berkeley by
46: * the Systems Programming Group of the University of Utah Computer
47: * Science Department.
48: *
49: * Redistribution and use in source and binary forms, with or without
50: * modification, are permitted provided that the following conditions
51: * are met:
52: * 1. Redistributions of source code must retain the above copyright
53: * notice, this list of conditions and the following disclaimer.
54: * 2. Redistributions in binary form must reproduce the above copyright
55: * notice, this list of conditions and the following disclaimer in the
56: * documentation and/or other materials provided with the distribution.
57: * 3. Neither the name of the University nor the names of its contributors
58: * may be used to endorse or promote products derived from this software
59: * without specific prior written permission.
60: *
61: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
62: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
65: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
66: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
67: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
68: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
69: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
71: * SUCH DAMAGE.
72: *
73: * from: Utah $Hdr: cd.c 1.6 90/11/28$
74: *
75: * @(#)cd.c 8.2 (Berkeley) 11/16/93
76: */
77:
78: /*
79: * Copyright (c) 1995 Carnegie-Mellon University.
80: * All rights reserved.
81: *
82: * Authors: Mark Holland, Jim Zelenka
83: *
84: * Permission to use, copy, modify and distribute this software and
85: * its documentation is hereby granted, provided that both the copyright
86: * notice and this permission notice appear in all copies of the
87: * software, derivative works or modified versions, and any portions
88: * thereof, and that both notices appear in supporting documentation.
89: *
90: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
91: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
92: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
93: *
94: * Carnegie Mellon requests users of this software to return to
95: *
96: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
97: * School of Computer Science
98: * Carnegie Mellon University
99: * Pittsburgh PA 15213-3890
100: *
101: * any improvements or extensions that they make and grant Carnegie the
102: * rights to redistribute these changes.
103: */
104:
105: /*****************************************************************************
106: *
107: * rf_kintf.c -- The kernel interface routines for RAIDframe.
108: *
109: *****************************************************************************/
110:
111: #include <sys/errno.h>
112:
113: #include <sys/param.h>
114: #include <sys/pool.h>
115: #include <sys/malloc.h>
116: #include <sys/queue.h>
117: #include <sys/disk.h>
118: #include <sys/device.h>
119: #include <sys/stat.h>
120: #include <sys/ioctl.h>
121: #include <sys/fcntl.h>
122: #include <sys/systm.h>
123: #include <sys/namei.h>
124: #include <sys/conf.h>
125: #include <sys/lock.h>
126: #include <sys/buf.h>
127: #include <sys/user.h>
128: #include <sys/reboot.h>
129:
130: #include "raid.h"
131: #include "rf_raid.h"
132: #include "rf_raidframe.h"
133: #include "rf_copyback.h"
134: #include "rf_dag.h"
135: #include "rf_dagflags.h"
136: #include "rf_desc.h"
137: #include "rf_diskqueue.h"
138: #include "rf_engine.h"
139: #include "rf_acctrace.h"
140: #include "rf_etimer.h"
141: #include "rf_general.h"
142: #include "rf_debugMem.h"
143: #include "rf_kintf.h"
144: #include "rf_options.h"
145: #include "rf_driver.h"
146: #include "rf_parityscan.h"
147: #include "rf_debugprint.h"
148: #include "rf_threadstuff.h"
149: #include "rf_configure.h"
150:
151: int rf_kdebug_level = 0;
152:
153: #ifdef RAIDDEBUG
154: #define db1_printf(a) do { if (rf_kdebug_level > 0) printf a; } while(0)
155: #else /* RAIDDEBUG */
156: #define db1_printf(a) (void)0
157: #endif /* ! RAIDDEBUG */
158:
159: static RF_Raid_t **raidPtrs; /* Global raid device descriptors. */
160:
161: RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex);
162:
163: /* Requests to install a spare table. */
164: static RF_SparetWait_t *rf_sparet_wait_queue;
165:
166: /* Responses from installation process. */
167: static RF_SparetWait_t *rf_sparet_resp_queue;
168:
169: /* Prototypes. */
170: void rf_KernelWakeupFunc(struct buf *);
171: void rf_InitBP(struct buf *, struct vnode *, unsigned, dev_t, RF_SectorNum_t,
172: RF_SectorCount_t, caddr_t, void (*)(struct buf *), void *, int,
173: struct proc *);
174: void raidinit(RF_Raid_t *);
175:
176: void raidattach(int);
177: daddr64_t raidsize(dev_t);
178: int raidopen(dev_t, int, int, struct proc *);
179: int raidclose(dev_t, int, int, struct proc *);
180: int raidioctl(dev_t, u_long, caddr_t, int, struct proc *);
181: int raidwrite(dev_t, struct uio *, int);
182: int raidread(dev_t, struct uio *, int);
183: void raidstrategy(struct buf *);
184: int raiddump(dev_t, daddr64_t, caddr_t, size_t);
185:
186: /*
187: * Pilfered from ccd.c
188: */
189: struct raidbuf {
190: struct buf rf_buf; /* New I/O buf. MUST BE FIRST!!! */
191: struct buf *rf_obp; /* Ptr. to original I/O buf. */
192: int rf_flags; /* Miscellaneous flags. */
193: RF_DiskQueueData_t *req; /* The request that this was part of. */
194: };
195:
196: #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
197: #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
198:
199: /*
200: * Some port (like i386) use a swapgeneric that wants to snoop around
201: * in this raid_cd structure. It is preserved (for now) to remain
202: * compatible with such practice.
203: */
204: struct cfdriver raid_cd = {
205: NULL, "raid", DV_DISK
206: };
207:
208: /*
209: * XXX Not sure if the following should be replacing the raidPtrs above,
210: * or if it should be used in conjunction with that...
211: */
212: struct raid_softc {
213: int sc_flags; /* Flags. */
214: int sc_cflags; /* Configuration flags. */
215: size_t sc_size; /* Size of the raid device. */
216: char sc_xname[20]; /* XXX external name. */
217: struct disk sc_dkdev; /* Generic disk device info. */
218: struct pool sc_cbufpool; /* Component buffer pool. */
219: struct buf sc_q; /* Used for the device queue. */
220: };
221:
222: /* sc_flags */
223: #define RAIDF_INITED 0x01 /* Unit has been initialized. */
224: #define RAIDF_WLABEL 0x02 /* Label area is writable. */
225: #define RAIDF_LABELLING 0x04 /* Unit is currently being labelled. */
226: #define RAIDF_WANTED 0x40 /* Someone is waiting to obtain a lock. */
227: #define RAIDF_LOCKED 0x80 /* Unit is locked. */
228:
229: int numraid = 0;
230:
231: /*
232: * Here we define a cfattach structure for inserting any new raid device
233: * into the device tree. This is needed by some archs that look for
234: * bootable devices in there.
235: */
236: int rf_probe(struct device *, void *, void *);
237: void rf_attach(struct device *, struct device *, void *);
238: int rf_detach(struct device *, int);
239: int rf_activate(struct device *, enum devact);
240:
241: struct cfattach raid_ca = {
242: sizeof(struct raid_softc), rf_probe, rf_attach,
243: rf_detach, rf_activate
244: };
245:
246: /*
247: * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
248: * Be aware that large numbers can allow the driver to consume a lot of
249: * kernel memory, especially on writes, and in degraded mode reads.
250: *
251: * For example: with a stripe width of 64 blocks (32k) and 5 disks,
252: * a single 64K write will typically require 64K for the old data,
253: * 64K for the old parity, and 64K for the new parity, for a total
254: * of 192K (if the parity buffer is not re-used immediately).
255: * Even it if is used immedately, that's still 128K, which when multiplied
256: * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
257: *
258: * Now in degraded mode, for example, a 64K read on the above setup may
259: * require data reconstruction, which will require *all* of the 4 remaining
260: * disks to participate -- 4 * 32K/disk == 128K again.
261: */
262:
263: #ifndef RAIDOUTSTANDING
264: #define RAIDOUTSTANDING 6
265: #endif
266:
267: /* Declared here, and made public, for the benefit of KVM stuff... */
268: struct raid_softc *raid_softc;
269: struct raid_softc **raid_scPtrs;
270:
271: void rf_shutdown_hook(RF_ThreadArg_t);
272: void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, struct disklabel *);
273: void raidgetdisklabel(dev_t, struct disklabel *, int);
274:
275: int raidlock(struct raid_softc *);
276: void raidunlock(struct raid_softc *);
277:
278: void rf_markalldirty(RF_Raid_t *);
279:
280: struct device *raidrootdev;
281:
282: int findblkmajor(struct device *dv);
283: char *findblkname(int);
284:
285: void rf_ReconThread(struct rf_recon_req *);
286: /* XXX what I want is: */
287: /*void rf_ReconThread(RF_Raid_t *raidPtr);*/
288: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
289: void rf_CopybackThread(RF_Raid_t *raidPtr);
290: void rf_ReconstructInPlaceThread(struct rf_recon_req *);
291: #ifdef RAID_AUTOCONFIG
292: void rf_buildroothack(void *);
293: int rf_reasonable_label(RF_ComponentLabel_t *);
294: #endif /* RAID_AUTOCONFIG */
295:
296: RF_AutoConfig_t *rf_find_raid_components(void);
297: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
298: int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
299: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *,
300: RF_Raid_t *);
301: int rf_set_autoconfig(RF_Raid_t *, int);
302: int rf_set_rootpartition(RF_Raid_t *, int);
303: void rf_release_all_vps(RF_ConfigSet_t *);
304: void rf_cleanup_config_set(RF_ConfigSet_t *);
305: int rf_have_enough_components(RF_ConfigSet_t *);
306: int rf_auto_config_set(RF_ConfigSet_t *, int *);
307:
308: #ifdef RAID_AUTOCONFIG
309: static int raidautoconfig = 0; /*
310: * Debugging, mostly. Set to 0 to not
311: * allow autoconfig to take place.
312: * Note that this is overridden by having
313: * RAID_AUTOCONFIG as an option in the
314: * kernel config file.
315: */
316: #endif /* RAID_AUTOCONFIG */
317:
318: int
319: rf_probe(struct device *parent, void *match_, void *aux)
320: {
321: return 0;
322: }
323:
324: void
325: rf_attach(struct device *parent, struct device *self, void *aux)
326: {
327: /*struct raid_softc *raid = (void *)self;*/
328: }
329:
330: int
331: rf_detach(struct device *self, int flags)
332: {
333: return 0;
334: }
335:
336: int
337: rf_activate(struct device *self, enum devact act)
338: {
339: return 0;
340: }
341:
342: void
343: raidattach(int num)
344: {
345: int raidID;
346: int i, rc;
347: #ifdef RAID_AUTOCONFIG
348: RF_AutoConfig_t *ac_list; /* Autoconfig list. */
349: RF_ConfigSet_t *config_sets;
350: #endif /* RAID_AUTOCONFIG */
351:
352: db1_printf(("raidattach: Asked for %d units\n", num));
353:
354: if (num <= 0) {
355: #ifdef DIAGNOSTIC
356: panic("raidattach: count <= 0");
357: #endif /* DIAGNOSTIC */
358: return;
359: }
360:
361: /* This is where all the initialization stuff gets done. */
362:
363: numraid = num;
364:
365: /* Make some space for requested number of units... */
366: RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
367: if (raidPtrs == NULL) {
368: panic("raidPtrs is NULL!!");
369: }
370:
371: rc = rf_mutex_init(&rf_sparet_wait_mutex);
372: if (rc) {
373: RF_PANIC();
374: }
375:
376: rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
377:
378: for (i = 0; i < num; i++)
379: raidPtrs[i] = NULL;
380: rc = rf_BootRaidframe();
381: if (rc == 0)
382: printf("Kernelized RAIDframe activated\n");
383: else
384: panic("Serious error booting RAID !!!");
385:
386: /*
387: * Put together some datastructures like the CCD device does...
388: * This lets us lock the device and what-not when it gets opened.
389: */
390:
391: raid_softc = (struct raid_softc *)
392: malloc(num * sizeof(struct raid_softc), M_RAIDFRAME, M_NOWAIT);
393: if (raid_softc == NULL) {
394: printf("WARNING: no memory for RAIDframe driver\n");
395: return;
396: }
397:
398: bzero(raid_softc, num * sizeof (struct raid_softc));
399:
400: raid_scPtrs = (struct raid_softc **)
401: malloc(num * sizeof(struct raid_softc *), M_RAIDFRAME,
402: M_NOWAIT);
403: if (raid_scPtrs == NULL) {
404: printf("WARNING: no memory for RAIDframe driver\n");
405: return;
406: }
407:
408: bzero(raid_scPtrs, num * sizeof (struct raid_softc *));
409:
410: raidrootdev = (struct device *)malloc(num * sizeof(struct device),
411: M_RAIDFRAME, M_NOWAIT);
412: if (raidrootdev == NULL) {
413: panic("No memory for RAIDframe driver!!?!?!");
414: }
415:
416: for (raidID = 0; raidID < num; raidID++) {
417: #if 0
418: SIMPLEQ_INIT(&raid_softc[raidID].sc_q);
419: #endif
420:
421: raidrootdev[raidID].dv_class = DV_DISK;
422: raidrootdev[raidID].dv_cfdata = NULL;
423: raidrootdev[raidID].dv_unit = raidID;
424: raidrootdev[raidID].dv_parent = NULL;
425: raidrootdev[raidID].dv_flags = 0;
426: snprintf(raidrootdev[raidID].dv_xname,
427: sizeof raidrootdev[raidID].dv_xname,"raid%d",raidID);
428:
429: RF_Calloc(raidPtrs[raidID], 1, sizeof (RF_Raid_t),
430: (RF_Raid_t *));
431: if (raidPtrs[raidID] == NULL) {
432: printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
433: numraid = raidID;
434: return;
435: }
436: }
437:
438: raid_cd.cd_devs = (void **) raid_scPtrs;
439: raid_cd.cd_ndevs = num;
440:
441: #ifdef RAID_AUTOCONFIG
442: raidautoconfig = 1;
443:
444: if (raidautoconfig) {
445: /* 1. Locate all RAID components on the system. */
446:
447: #ifdef RAIDDEBUG
448: printf("Searching for raid components...\n");
449: #endif /* RAIDDEBUG */
450: ac_list = rf_find_raid_components();
451:
452: /* 2. Sort them into their respective sets. */
453:
454: config_sets = rf_create_auto_sets(ac_list);
455:
456: /*
457: * 3. Evaluate each set and configure the valid ones
458: * This gets done in rf_buildroothack().
459: */
460:
461: /*
462: * Schedule the creation of the thread to do the
463: * "/ on RAID" stuff.
464: */
465:
466: rf_buildroothack(config_sets);
467:
468: }
469: #endif /* RAID_AUTOCONFIG */
470:
471: }
472:
473: #ifdef RAID_AUTOCONFIG
474: void
475: rf_buildroothack(void *arg)
476: {
477: RF_ConfigSet_t *config_sets = arg;
478: RF_ConfigSet_t *cset;
479: RF_ConfigSet_t *next_cset;
480: int retcode;
481: int raidID;
482: int rootID;
483: int num_root;
484: int majdev;
485:
486: rootID = 0;
487: num_root = 0;
488: cset = config_sets;
489: while(cset != NULL ) {
490: next_cset = cset->next;
491: if (rf_have_enough_components(cset) &&
492: cset->ac->clabel->autoconfigure==1) {
493: retcode = rf_auto_config_set(cset,&raidID);
494: if (!retcode) {
495: if (cset->rootable) {
496: rootID = raidID;
497: #ifdef RAIDDEBUG
498: printf("eligible root device %d:"
499: " raid%d\n", num_root, rootID);
500: #endif /* RAIDDEBUG */
501: num_root++;
502: }
503: } else {
504: /* The autoconfig didn't work :( */
505: #ifdef RAIDDEBUG
506: printf("Autoconfig failed with code %d for"
507: " raid%d\n", retcode, raidID);
508: #endif /* RAIDDEBUG */
509: rf_release_all_vps(cset);
510: }
511: } else {
512: /*
513: * We're not autoconfiguring this set...
514: * Release the associated resources.
515: */
516: rf_release_all_vps(cset);
517: }
518: /* Cleanup. */
519: rf_cleanup_config_set(cset);
520: cset = next_cset;
521: }
522: if (boothowto & RB_ASKNAME) {
523: /* We don't auto-config... */
524: } else {
525: /* They didn't ask, and we found something bootable... */
526:
527: if (num_root == 1) {
528: majdev = findblkmajor(&raidrootdev[rootID]);
529: if (majdev < 0)
530: boothowto |= RB_ASKNAME;
531: else {
532: rootdev = MAKEDISKDEV(majdev,rootID,0);
533: boothowto |= RB_DFLTROOT;
534: }
535: } else if (num_root > 1) {
536: /* We can't guess... Require the user to answer... */
537: boothowto |= RB_ASKNAME;
538: }
539: }
540: }
541: #endif /* RAID_AUTOCONFIG */
542:
543: void
544: rf_shutdown_hook(RF_ThreadArg_t arg)
545: {
546: int unit;
547: struct raid_softc *rs;
548: RF_Raid_t *raidPtr;
549:
550: /* Don't do it if we are not "safe". */
551: if (boothowto & RB_NOSYNC)
552: return;
553:
554: raidPtr = (RF_Raid_t *) arg;
555: unit = raidPtr->raidid;
556: rs = &raid_softc[unit];
557:
558: /* Shutdown the system. */
559:
560: if (rf_hook_cookies != NULL && rf_hook_cookies[unit] != NULL)
561: rf_hook_cookies[unit] = NULL;
562:
563: rf_Shutdown(raidPtr);
564:
565: pool_destroy(&rs->sc_cbufpool);
566:
567: /* It's no longer initialized... */
568: rs->sc_flags &= ~RAIDF_INITED;
569:
570: /* config_detach the device. */
571: config_detach(device_lookup(&raid_cd, unit), 0);
572:
573: /* Detach the disk. */
574: disk_detach(&rs->sc_dkdev);
575: }
576:
577: daddr64_t
578: raidsize(dev_t dev)
579: {
580: struct raid_softc *rs;
581: struct disklabel *lp;
582: int part, unit, omask, size;
583:
584: unit = DISKUNIT(dev);
585: if (unit >= numraid)
586: return (-1);
587: rs = &raid_softc[unit];
588:
589: if ((rs->sc_flags & RAIDF_INITED) == 0)
590: return (-1);
591:
592: part = DISKPART(dev);
593: omask = rs->sc_dkdev.dk_openmask & (1 << part);
594: lp = rs->sc_dkdev.dk_label;
595:
596: if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
597: return (-1);
598:
599: if (lp->d_partitions[part].p_fstype != FS_SWAP)
600: size = -1;
601: else
602: size = DL_GETPSIZE(&lp->d_partitions[part]) *
603: (lp->d_secsize / DEV_BSIZE);
604:
605: if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
606: return (-1);
607:
608: return (size);
609:
610: }
611:
612: int
613: raiddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size)
614: {
615: /* Not implemented. */
616: return (ENXIO);
617: }
618:
619: /* ARGSUSED */
620: int
621: raidopen(dev_t dev, int flags, int fmt, struct proc *p)
622: {
623: int unit = DISKUNIT(dev);
624: struct raid_softc *rs;
625: int part,pmask;
626: int error = 0;
627:
628: if (unit >= numraid)
629: return (ENXIO);
630: rs = &raid_softc[unit];
631:
632: if ((error = raidlock(rs)) != 0)
633: return (error);
634:
635: part = DISKPART(dev);
636: pmask = (1 << part);
637:
638: db1_printf(
639: ("Opening raid device number: %d partition: %d\n", unit, part));
640:
641:
642: if ((rs->sc_flags & RAIDF_INITED) && (rs->sc_dkdev.dk_openmask == 0))
643: raidgetdisklabel(dev, rs->sc_dkdev.dk_label, 0);
644:
645: /* Make sure that this partition exists. */
646:
647: if (part != RAW_PART) {
648: db1_printf(("Not a raw partition..\n"));
649: if (((rs->sc_flags & RAIDF_INITED) == 0) ||
650: ((part >= rs->sc_dkdev.dk_label->d_npartitions) ||
651: (rs->sc_dkdev.dk_label->d_partitions[part].p_fstype ==
652: FS_UNUSED))) {
653: error = ENXIO;
654: raidunlock(rs);
655: db1_printf(("Bailing out...\n"));
656: return (error);
657: }
658: }
659:
660: /* Prevent this unit from being unconfigured while opened. */
661: switch (fmt) {
662: case S_IFCHR:
663: rs->sc_dkdev.dk_copenmask |= pmask;
664: break;
665:
666: case S_IFBLK:
667: rs->sc_dkdev.dk_bopenmask |= pmask;
668: break;
669: }
670:
671: if ((rs->sc_dkdev.dk_openmask == 0) &&
672: ((rs->sc_flags & RAIDF_INITED) != 0)) {
673: /*
674: * First one... Mark things as dirty... Note that we *MUST*
675: * have done a configure before this. I DO NOT WANT TO BE
676: * SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
677: * THAT THEY BELONG TOGETHER!!!!!
678: */
679: /*
680: * XXX should check to see if we're only open for reading
681: * here... If so, we needn't do this, but then need some
682: * other way of keeping track of what's happened...
683: */
684:
685: rf_markalldirty( raidPtrs[unit] );
686: }
687:
688: rs->sc_dkdev.dk_openmask =
689: rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
690:
691: raidunlock(rs);
692:
693: return (error);
694: }
695:
696: /* ARGSUSED */
697: int
698: raidclose(dev_t dev, int flags, int fmt, struct proc *p)
699: {
700: int unit = DISKUNIT(dev);
701: struct raid_softc *rs;
702: int error = 0;
703: int part;
704:
705: if (unit >= numraid)
706: return (ENXIO);
707: rs = &raid_softc[unit];
708:
709: if ((error = raidlock(rs)) != 0)
710: return (error);
711:
712: part = DISKPART(dev);
713:
714: /* ...that much closer to allowing unconfiguration... */
715: switch (fmt) {
716: case S_IFCHR:
717: rs->sc_dkdev.dk_copenmask &= ~(1 << part);
718: break;
719:
720: case S_IFBLK:
721: rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
722: break;
723: }
724: rs->sc_dkdev.dk_openmask =
725: rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
726:
727: if ((rs->sc_dkdev.dk_openmask == 0) &&
728: ((rs->sc_flags & RAIDF_INITED) != 0)) {
729: /*
730: * Last one... Device is not unconfigured yet.
731: * Device shutdown has taken care of setting the
732: * clean bits if RAIDF_INITED is not set.
733: * Mark things as clean...
734: */
735: db1_printf(("Last one on raid%d. Updating status.\n",unit));
736: rf_update_component_labels(raidPtrs[unit],
737: RF_FINAL_COMPONENT_UPDATE);
738: }
739:
740: raidunlock(rs);
741: return (0);
742: }
743:
744: void
745: raidstrategy(struct buf *bp)
746: {
747: int s;
748:
749: unsigned int raidID = DISKUNIT(bp->b_dev);
750: RF_Raid_t *raidPtr;
751: struct raid_softc *rs = &raid_softc[raidID];
752: struct disklabel *lp;
753: int wlabel;
754:
755: s = splbio();
756:
757: if ((rs->sc_flags & RAIDF_INITED) ==0) {
758: bp->b_error = ENXIO;
759: bp->b_flags |= B_ERROR;
760: bp->b_resid = bp->b_bcount;
761: biodone(bp);
762: goto raidstrategy_end;
763: }
764: if (raidID >= numraid || !raidPtrs[raidID]) {
765: bp->b_error = ENODEV;
766: bp->b_flags |= B_ERROR;
767: bp->b_resid = bp->b_bcount;
768: biodone(bp);
769: goto raidstrategy_end;
770: }
771: raidPtr = raidPtrs[raidID];
772: if (!raidPtr->valid) {
773: bp->b_error = ENODEV;
774: bp->b_flags |= B_ERROR;
775: bp->b_resid = bp->b_bcount;
776: biodone(bp);
777: goto raidstrategy_end;
778: }
779: if (bp->b_bcount == 0) {
780: db1_printf(("b_bcount is zero..\n"));
781: biodone(bp);
782: goto raidstrategy_end;
783: }
784: lp = rs->sc_dkdev.dk_label;
785:
786: /*
787: * Do bounds checking and adjust transfer. If there's an
788: * error, the bounds check will flag that for us.
789: */
790: wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
791: if (DISKPART(bp->b_dev) != RAW_PART)
792: if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
793: db1_printf(("Bounds check failed!!:%d %d\n",
794: (int)bp->b_blkno, (int)wlabel));
795: biodone(bp);
796: goto raidstrategy_end;
797: }
798:
799: bp->b_resid = 0;
800:
801: bp->b_actf = rs->sc_q.b_actf;
802: rs->sc_q.b_actf = bp;
803: rs->sc_q.b_active++;
804:
805: raidstart(raidPtrs[raidID]);
806:
807: raidstrategy_end:
808: splx(s);
809: }
810:
811: /* ARGSUSED */
812: int
813: raidread(dev_t dev, struct uio *uio, int flags)
814: {
815: int unit = DISKUNIT(dev);
816: struct raid_softc *rs;
817: int part;
818:
819: if (unit >= numraid)
820: return (ENXIO);
821: rs = &raid_softc[unit];
822:
823: if ((rs->sc_flags & RAIDF_INITED) == 0)
824: return (ENXIO);
825: part = DISKPART(dev);
826:
827: db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
828:
829: return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
830: }
831:
832: /* ARGSUSED */
833: int
834: raidwrite(dev_t dev, struct uio *uio, int flags)
835: {
836: int unit = DISKUNIT(dev);
837: struct raid_softc *rs;
838:
839: if (unit >= numraid)
840: return (ENXIO);
841: rs = &raid_softc[unit];
842:
843: if ((rs->sc_flags & RAIDF_INITED) == 0)
844: return (ENXIO);
845: db1_printf(("raidwrite\n"));
846: return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
847: }
848:
849: int
850: raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
851: {
852: int unit = DISKUNIT(dev);
853: int error = 0;
854: int part, pmask;
855: struct raid_softc *rs;
856: RF_Config_t *k_cfg, *u_cfg;
857: RF_Raid_t *raidPtr;
858: RF_RaidDisk_t *diskPtr;
859: RF_AccTotals_t *totals;
860: RF_DeviceConfig_t *d_cfg, **ucfgp;
861: u_char *specific_buf;
862: int retcode = 0;
863: int row;
864: int column;
865: struct rf_recon_req *rrcopy, *rr;
866: RF_ComponentLabel_t *clabel;
867: RF_ComponentLabel_t ci_label;
868: RF_ComponentLabel_t **clabel_ptr;
869: RF_SingleComponent_t *sparePtr,*componentPtr;
870: RF_SingleComponent_t hot_spare;
871: RF_SingleComponent_t component;
872: RF_ProgressInfo_t progressInfo, **progressInfoPtr;
873: int i, j, d;
874:
875: if (unit >= numraid)
876: return (ENXIO);
877: rs = &raid_softc[unit];
878: raidPtr = raidPtrs[unit];
879:
880: db1_printf(("raidioctl: %d %d %d %d\n", (int)dev, (int)DISKPART(dev),
881: (int)unit, (int)cmd));
882:
883: /* Must be open for writes for these commands... */
884: switch (cmd) {
885: case DIOCSDINFO:
886: case DIOCWDINFO:
887: case DIOCWLABEL:
888: if ((flag & FWRITE) == 0)
889: return (EBADF);
890: }
891:
892: /* Must be initialized for these... */
893: switch (cmd) {
894: case DIOCGDINFO:
895: case DIOCSDINFO:
896: case DIOCWDINFO:
897: case DIOCGPART:
898: case DIOCWLABEL:
899: case DIOCGPDINFO:
900: case RAIDFRAME_SHUTDOWN:
901: case RAIDFRAME_REWRITEPARITY:
902: case RAIDFRAME_GET_INFO:
903: case RAIDFRAME_RESET_ACCTOTALS:
904: case RAIDFRAME_GET_ACCTOTALS:
905: case RAIDFRAME_KEEP_ACCTOTALS:
906: case RAIDFRAME_GET_SIZE:
907: case RAIDFRAME_FAIL_DISK:
908: case RAIDFRAME_COPYBACK:
909: case RAIDFRAME_CHECK_RECON_STATUS:
910: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
911: case RAIDFRAME_GET_COMPONENT_LABEL:
912: case RAIDFRAME_SET_COMPONENT_LABEL:
913: case RAIDFRAME_ADD_HOT_SPARE:
914: case RAIDFRAME_REMOVE_HOT_SPARE:
915: case RAIDFRAME_INIT_LABELS:
916: case RAIDFRAME_REBUILD_IN_PLACE:
917: case RAIDFRAME_CHECK_PARITY:
918: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
919: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
920: case RAIDFRAME_CHECK_COPYBACK_STATUS:
921: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
922: case RAIDFRAME_SET_AUTOCONFIG:
923: case RAIDFRAME_SET_ROOT:
924: case RAIDFRAME_DELETE_COMPONENT:
925: case RAIDFRAME_INCORPORATE_HOT_SPARE:
926: if ((rs->sc_flags & RAIDF_INITED) == 0)
927: return (ENXIO);
928: }
929:
930: switch (cmd) {
931: /* Configure the system. */
932: case RAIDFRAME_CONFIGURE:
933:
934: if (raidPtr->valid) {
935: /* There is a valid RAID set running on this unit ! */
936: printf("raid%d: Device already configured!\n",unit);
937: return(EINVAL);
938: }
939:
940: /*
941: * Copy-in the configuration information.
942: * data points to a pointer to the configuration structure.
943: */
944: u_cfg = *((RF_Config_t **)data);
945: RF_Malloc(k_cfg, sizeof (RF_Config_t), (RF_Config_t *));
946: if (k_cfg == NULL) {
947: return (ENOMEM);
948: }
949: retcode = copyin((caddr_t)u_cfg, (caddr_t)k_cfg,
950: sizeof (RF_Config_t));
951: if (retcode) {
952: RF_Free(k_cfg, sizeof(RF_Config_t));
953: return (retcode);
954: }
955:
956: /*
957: * Allocate a buffer for the layout-specific data,
958: * and copy it in.
959: */
960: if (k_cfg->layoutSpecificSize) {
961: if (k_cfg->layoutSpecificSize > 10000) {
962: /* Sanity check. */
963: RF_Free(k_cfg, sizeof(RF_Config_t));
964: return (EINVAL);
965: }
966: RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
967: (u_char *));
968: if (specific_buf == NULL) {
969: RF_Free(k_cfg, sizeof (RF_Config_t));
970: return (ENOMEM);
971: }
972: retcode = copyin(k_cfg->layoutSpecific,
973: (caddr_t)specific_buf, k_cfg->layoutSpecificSize);
974: if (retcode) {
975: RF_Free(k_cfg, sizeof(RF_Config_t));
976: RF_Free(specific_buf,
977: k_cfg->layoutSpecificSize);
978: return (retcode);
979: }
980: } else
981: specific_buf = NULL;
982: k_cfg->layoutSpecific = specific_buf;
983:
984: /*
985: * We should do some kind of sanity check on the
986: * configuration.
987: * Store the sum of all the bytes in the last byte ?
988: */
989:
990: /*
991: * Clear the entire RAID descriptor, just to make sure
992: * there is no stale data left in the case of a
993: * reconfiguration.
994: */
995: bzero((char *) raidPtr, sizeof(RF_Raid_t));
996:
997: /* Configure the system. */
998: raidPtr->raidid = unit;
999:
1000: retcode = rf_Configure(raidPtr, k_cfg, NULL);
1001:
1002: if (retcode == 0) {
1003:
1004: /*
1005: * Allow this many simultaneous IO's to
1006: * this RAID device.
1007: */
1008: raidPtr->openings = RAIDOUTSTANDING;
1009:
1010: raidinit(raidPtr);
1011: rf_markalldirty(raidPtr);
1012: }
1013:
1014: /* Free the buffers. No return code here. */
1015: if (k_cfg->layoutSpecificSize) {
1016: RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1017: }
1018: RF_Free(k_cfg, sizeof (RF_Config_t));
1019:
1020: return (retcode);
1021:
1022: case RAIDFRAME_SHUTDOWN:
1023: /* Shutdown the system. */
1024:
1025: if ((error = raidlock(rs)) != 0)
1026: return (error);
1027:
1028: /*
1029: * If somebody has a partition mounted, we shouldn't
1030: * shutdown.
1031: */
1032:
1033: part = DISKPART(dev);
1034: pmask = (1 << part);
1035: if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1036: ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1037: (rs->sc_dkdev.dk_copenmask & pmask))) {
1038: raidunlock(rs);
1039: return (EBUSY);
1040: }
1041:
1042: if ((retcode = rf_Shutdown(raidPtr)) == 0) {
1043:
1044: pool_destroy(&rs->sc_cbufpool);
1045:
1046: /* It's no longer initialized... */
1047: rs->sc_flags &= ~RAIDF_INITED;
1048:
1049: /* config_detach the device. */
1050: config_detach(device_lookup(&raid_cd, unit), 0);
1051:
1052: /* Detach the disk. */
1053: disk_detach(&rs->sc_dkdev);
1054: }
1055:
1056: raidunlock(rs);
1057:
1058: return (retcode);
1059:
1060: case RAIDFRAME_GET_COMPONENT_LABEL:
1061: clabel_ptr = (RF_ComponentLabel_t **) data;
1062: /*
1063: * We need to read the component label for the disk indicated
1064: * by row,column in clabel.
1065: */
1066:
1067: /*
1068: * For practice, let's get it directly from disk, rather
1069: * than from the in-core copy.
1070: */
1071: RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1072: (RF_ComponentLabel_t *));
1073: if (clabel == NULL)
1074: return (ENOMEM);
1075:
1076: bzero((char *) clabel, sizeof(RF_ComponentLabel_t));
1077:
1078: retcode = copyin( *clabel_ptr, clabel,
1079: sizeof(RF_ComponentLabel_t));
1080:
1081: if (retcode) {
1082: RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1083: return(retcode);
1084: }
1085:
1086: row = clabel->row;
1087: column = clabel->column;
1088:
1089: if ((row < 0) || (row >= raidPtr->numRow) ||
1090: (column < 0) || (column >= raidPtr->numCol)) {
1091: RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1092: return(EINVAL);
1093: }
1094:
1095: raidread_component_label(raidPtr->Disks[row][column].dev,
1096: raidPtr->raid_cinfo[row][column].ci_vp, clabel );
1097:
1098: retcode = copyout((caddr_t) clabel,
1099: (caddr_t) *clabel_ptr,
1100: sizeof(RF_ComponentLabel_t));
1101: RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1102: return (retcode);
1103:
1104: case RAIDFRAME_SET_COMPONENT_LABEL:
1105: clabel = (RF_ComponentLabel_t *) data;
1106:
1107: /* XXX check the label for valid stuff... */
1108: /*
1109: * Note that some things *should not* get modified --
1110: * the user should be re-initing the labels instead of
1111: * trying to patch things.
1112: */
1113:
1114: #ifdef RAIDDEBUG
1115: printf("Got component label:\n");
1116: printf("Version: %d\n",clabel->version);
1117: printf("Serial Number: %d\n",clabel->serial_number);
1118: printf("Mod counter: %d\n",clabel->mod_counter);
1119: printf("Row: %d\n", clabel->row);
1120: printf("Column: %d\n", clabel->column);
1121: printf("Num Rows: %d\n", clabel->num_rows);
1122: printf("Num Columns: %d\n", clabel->num_columns);
1123: printf("Clean: %d\n", clabel->clean);
1124: printf("Status: %d\n", clabel->status);
1125: #endif /* RAIDDEBUG */
1126:
1127: row = clabel->row;
1128: column = clabel->column;
1129:
1130: if ((row < 0) || (row >= raidPtr->numRow) ||
1131: (column < 0) || (column >= raidPtr->numCol)) {
1132: return(EINVAL);
1133: }
1134:
1135: /* XXX this isn't allowed to do anything for now :-) */
1136: #if 0
1137: raidwrite_component_label(raidPtr->Disks[row][column].dev,
1138: raidPtr->raid_cinfo[row][column].ci_vp, clabel );
1139: #endif
1140: return (0);
1141:
1142: case RAIDFRAME_INIT_LABELS:
1143: clabel = (RF_ComponentLabel_t *) data;
1144: /*
1145: * We only want the serial number from the above.
1146: * We get all the rest of the information from
1147: * the config that was used to create this RAID
1148: * set.
1149: */
1150:
1151: raidPtr->serial_number = clabel->serial_number;
1152:
1153: raid_init_component_label(raidPtr, &ci_label);
1154: ci_label.serial_number = clabel->serial_number;
1155:
1156: for(row=0;row<raidPtr->numRow;row++) {
1157: ci_label.row = row;
1158: for(column=0;column<raidPtr->numCol;column++) {
1159: diskPtr = &raidPtr->Disks[row][column];
1160: if (!RF_DEAD_DISK(diskPtr->status)) {
1161: ci_label.partitionSize =
1162: diskPtr->partitionSize;
1163: ci_label.column = column;
1164: raidwrite_component_label(
1165: raidPtr->Disks[row][column].dev,
1166: raidPtr->raid_cinfo[row][column].ci_vp,
1167: &ci_label );
1168: }
1169: }
1170: }
1171:
1172: return (retcode);
1173:
1174: case RAIDFRAME_REWRITEPARITY:
1175:
1176: if (raidPtr->Layout.map->faultsTolerated == 0) {
1177: /* Parity for RAID 0 is trivially correct. */
1178: raidPtr->parity_good = RF_RAID_CLEAN;
1179: return(0);
1180: }
1181:
1182:
1183: if (raidPtr->parity_rewrite_in_progress == 1) {
1184: /* Re-write is already in progress ! */
1185: return(EINVAL);
1186: }
1187:
1188: retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1189: rf_RewriteParityThread,
1190: raidPtr,"raid_parity");
1191:
1192: return (retcode);
1193:
1194: case RAIDFRAME_SET_AUTOCONFIG:
1195: d = rf_set_autoconfig(raidPtr, *(int *) data);
1196: db1_printf(("New autoconfig value is: %d\n", d));
1197: *(int *) data = d;
1198: return (retcode);
1199:
1200: case RAIDFRAME_SET_ROOT:
1201: d = rf_set_rootpartition(raidPtr, *(int *) data);
1202: db1_printf(("New rootpartition value is: %d\n", d));
1203: *(int *) data = d;
1204: return (retcode);
1205:
1206:
1207: case RAIDFRAME_ADD_HOT_SPARE:
1208: sparePtr = (RF_SingleComponent_t *) data;
1209: memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1210: retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1211: return(retcode);
1212:
1213: case RAIDFRAME_REMOVE_HOT_SPARE:
1214: return(retcode);
1215:
1216: case RAIDFRAME_DELETE_COMPONENT:
1217: componentPtr = (RF_SingleComponent_t *)data;
1218: memcpy( &component, componentPtr,
1219: sizeof(RF_SingleComponent_t));
1220: retcode = rf_delete_component(raidPtr, &component);
1221: return(retcode);
1222:
1223: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1224: componentPtr = (RF_SingleComponent_t *)data;
1225: memcpy( &component, componentPtr,
1226: sizeof(RF_SingleComponent_t));
1227: retcode = rf_incorporate_hot_spare(raidPtr, &component);
1228: return(retcode);
1229:
1230: case RAIDFRAME_REBUILD_IN_PLACE:
1231:
1232: if (raidPtr->Layout.map->faultsTolerated == 0) {
1233: /* Can't do this on a RAID 0 !! */
1234: return(EINVAL);
1235: }
1236:
1237: if (raidPtr->recon_in_progress == 1) {
1238: /* A reconstruct is already in progress ! */
1239: return(EINVAL);
1240: }
1241:
1242: componentPtr = (RF_SingleComponent_t *) data;
1243: memcpy( &component, componentPtr,
1244: sizeof(RF_SingleComponent_t));
1245: row = component.row;
1246: column = component.column;
1247: db1_printf(("Rebuild: %d %d\n",row, column));
1248: if ((row < 0) || (row >= raidPtr->numRow) ||
1249: (column < 0) || (column >= raidPtr->numCol)) {
1250: return(EINVAL);
1251: }
1252:
1253: RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1254: if (rrcopy == NULL)
1255: return(ENOMEM);
1256:
1257: rrcopy->raidPtr = (void *) raidPtr;
1258: rrcopy->row = row;
1259: rrcopy->col = column;
1260:
1261: retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1262: rf_ReconstructInPlaceThread,
1263: rrcopy,"raid_reconip");
1264:
1265: return (retcode);
1266:
1267: case RAIDFRAME_GET_INFO:
1268: if (!raidPtr->valid)
1269: return (ENODEV);
1270: ucfgp = (RF_DeviceConfig_t **) data;
1271: RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1272: (RF_DeviceConfig_t *));
1273: if (d_cfg == NULL)
1274: return (ENOMEM);
1275: bzero((char *) d_cfg, sizeof(RF_DeviceConfig_t));
1276: d_cfg->rows = raidPtr->numRow;
1277: d_cfg->cols = raidPtr->numCol;
1278: d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1279: if (d_cfg->ndevs >= RF_MAX_DISKS) {
1280: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1281: return (ENOMEM);
1282: }
1283: d_cfg->nspares = raidPtr->numSpare;
1284: if (d_cfg->nspares >= RF_MAX_DISKS) {
1285: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1286: return (ENOMEM);
1287: }
1288: d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1289: d = 0;
1290: for (i = 0; i < d_cfg->rows; i++) {
1291: for (j = 0; j < d_cfg->cols; j++) {
1292: d_cfg->devs[d] = raidPtr->Disks[i][j];
1293: d++;
1294: }
1295: }
1296: for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1297: d_cfg->spares[i] = raidPtr->Disks[0][j];
1298: }
1299: retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1300: sizeof(RF_DeviceConfig_t));
1301: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1302:
1303: return (retcode);
1304:
1305: case RAIDFRAME_CHECK_PARITY:
1306: *(int *) data = raidPtr->parity_good;
1307: return (0);
1308:
1309: case RAIDFRAME_RESET_ACCTOTALS:
1310: bzero(&raidPtr->acc_totals, sizeof(raidPtr->acc_totals));
1311: return (0);
1312:
1313: case RAIDFRAME_GET_ACCTOTALS:
1314: totals = (RF_AccTotals_t *) data;
1315: *totals = raidPtr->acc_totals;
1316: return (0);
1317:
1318: case RAIDFRAME_KEEP_ACCTOTALS:
1319: raidPtr->keep_acc_totals = *(int *)data;
1320: return (0);
1321:
1322: case RAIDFRAME_GET_SIZE:
1323: *(int *) data = raidPtr->totalSectors;
1324: return (0);
1325:
1326: /* Fail a disk & optionally start reconstruction. */
1327: case RAIDFRAME_FAIL_DISK:
1328: rr = (struct rf_recon_req *)data;
1329:
1330: if (rr->row < 0 || rr->row >= raidPtr->numRow ||
1331: rr->col < 0 || rr->col >= raidPtr->numCol)
1332: return (EINVAL);
1333:
1334: db1_printf(("raid%d: Failing the disk: row: %d col: %d\n",
1335: unit, rr->row, rr->col));
1336:
1337: /*
1338: * Make a copy of the recon request so that we don't
1339: * rely on the user's buffer.
1340: */
1341: RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1342: if (rrcopy == NULL)
1343: return(ENOMEM);
1344: bcopy(rr, rrcopy, sizeof(*rr));
1345: rrcopy->raidPtr = (void *)raidPtr;
1346:
1347: retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1348: rf_ReconThread,
1349: rrcopy,"raid_recon");
1350: return (0);
1351:
1352: /*
1353: * Invoke a copyback operation after recon on whatever
1354: * disk needs it, if any.
1355: */
1356: case RAIDFRAME_COPYBACK:
1357: if (raidPtr->Layout.map->faultsTolerated == 0) {
1358: /* This makes no sense on a RAID 0 !! */
1359: return(EINVAL);
1360: }
1361:
1362: if (raidPtr->copyback_in_progress == 1) {
1363: /* Copyback is already in progress ! */
1364: return(EINVAL);
1365: }
1366:
1367: retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1368: rf_CopybackThread,
1369: raidPtr,"raid_copyback");
1370: return (retcode);
1371:
1372: /* Return the percentage completion of reconstruction. */
1373: case RAIDFRAME_CHECK_RECON_STATUS:
1374: if (raidPtr->Layout.map->faultsTolerated == 0) {
1375: /*
1376: * This makes no sense on a RAID 0, so tell the
1377: * user it's done.
1378: */
1379: *(int *) data = 100;
1380: return(0);
1381: }
1382: row = 0; /* XXX we only consider a single row... */
1383: if (raidPtr->status[row] != rf_rs_reconstructing)
1384: *(int *)data = 100;
1385: else
1386: *(int *)data =
1387: raidPtr->reconControl[row]->percentComplete;
1388: return (0);
1389:
1390: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1391: progressInfoPtr = (RF_ProgressInfo_t **) data;
1392: row = 0; /* XXX we only consider a single row... */
1393: if (raidPtr->status[row] != rf_rs_reconstructing) {
1394: progressInfo.remaining = 0;
1395: progressInfo.completed = 100;
1396: progressInfo.total = 100;
1397: } else {
1398: progressInfo.total =
1399: raidPtr->reconControl[row]->numRUsTotal;
1400: progressInfo.completed =
1401: raidPtr->reconControl[row]->numRUsComplete;
1402: progressInfo.remaining = progressInfo.total -
1403: progressInfo.completed;
1404: }
1405: retcode = copyout((caddr_t) &progressInfo,
1406: (caddr_t) *progressInfoPtr,
1407: sizeof(RF_ProgressInfo_t));
1408: return (retcode);
1409:
1410: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1411: if (raidPtr->Layout.map->faultsTolerated == 0) {
1412: /*
1413: * This makes no sense on a RAID 0, so tell the
1414: * user it's done.
1415: */
1416: *(int *) data = 100;
1417: return(0);
1418: }
1419: if (raidPtr->parity_rewrite_in_progress == 1) {
1420: *(int *) data = 100 *
1421: raidPtr->parity_rewrite_stripes_done /
1422: raidPtr->Layout.numStripe;
1423: } else {
1424: *(int *) data = 100;
1425: }
1426: return (0);
1427:
1428: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1429: progressInfoPtr = (RF_ProgressInfo_t **) data;
1430: if (raidPtr->parity_rewrite_in_progress == 1) {
1431: progressInfo.total = raidPtr->Layout.numStripe;
1432: progressInfo.completed =
1433: raidPtr->parity_rewrite_stripes_done;
1434: progressInfo.remaining = progressInfo.total -
1435: progressInfo.completed;
1436: } else {
1437: progressInfo.remaining = 0;
1438: progressInfo.completed = 100;
1439: progressInfo.total = 100;
1440: }
1441: retcode = copyout((caddr_t) &progressInfo,
1442: (caddr_t) *progressInfoPtr,
1443: sizeof(RF_ProgressInfo_t));
1444: return (retcode);
1445:
1446: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1447: if (raidPtr->Layout.map->faultsTolerated == 0) {
1448: /* This makes no sense on a RAID 0 !! */
1449: *(int *) data = 100;
1450: return(0);
1451: }
1452: if (raidPtr->copyback_in_progress == 1) {
1453: *(int *) data = 100 * raidPtr->copyback_stripes_done /
1454: raidPtr->Layout.numStripe;
1455: } else {
1456: *(int *) data = 100;
1457: }
1458: return (0);
1459:
1460: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1461: progressInfoPtr = (RF_ProgressInfo_t **) data;
1462: if (raidPtr->copyback_in_progress == 1) {
1463: progressInfo.total = raidPtr->Layout.numStripe;
1464: progressInfo.completed =
1465: raidPtr->copyback_stripes_done;
1466: progressInfo.remaining = progressInfo.total -
1467: progressInfo.completed;
1468: } else {
1469: progressInfo.remaining = 0;
1470: progressInfo.completed = 100;
1471: progressInfo.total = 100;
1472: }
1473: retcode = copyout((caddr_t) &progressInfo,
1474: (caddr_t) *progressInfoPtr,
1475: sizeof(RF_ProgressInfo_t));
1476: return (retcode);
1477:
1478: #if 0
1479: case RAIDFRAME_SPARET_WAIT:
1480: /*
1481: * The sparetable daemon calls this to wait for the
1482: * kernel to need a spare table.
1483: * This ioctl does not return until a spare table is needed.
1484: * XXX -- Calling mpsleep here in the ioctl code is almost
1485: * certainly wrong and evil. -- XXX
1486: * XXX -- I should either compute the spare table in the
1487: * kernel, or have a different. -- XXX
1488: * XXX -- Interface (a different character device) for
1489: * delivering the table. -- XXX
1490: */
1491: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1492: while (!rf_sparet_wait_queue)
1493: mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH,
1494: "sparet wait", 0,
1495: (void *)simple_lock_addr(rf_sparet_wait_mutex),
1496: MS_LOCK_SIMPLE);
1497: waitreq = rf_sparet_wait_queue;
1498: rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1499: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1500:
1501: *((RF_SparetWait_t *)data) = *waitreq;
1502:
1503: RF_Free(waitreq, sizeof *waitreq);
1504: return (0);
1505:
1506: case RAIDFRAME_ABORT_SPARET_WAIT:
1507: /*
1508: * Wakes up a process waiting on SPARET_WAIT and puts an
1509: * error code in it that will cause the dameon to exit.
1510: */
1511: RF_Malloc(waitreq, sizeof (*waitreq), (RF_SparetWait_t *));
1512: waitreq->fcol = -1;
1513: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1514: waitreq->next = rf_sparet_wait_queue;
1515: rf_sparet_wait_queue = waitreq;
1516: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1517: wakeup(&rf_sparet_wait_queue);
1518: return (0);
1519:
1520: case RAIDFRAME_SEND_SPARET:
1521: /*
1522: * Used by the spare table daemon to deliver a spare table
1523: * into the kernel.
1524: */
1525:
1526: /* Install the spare table. */
1527: retcode = rf_SetSpareTable(raidPtr,*(void **)data);
1528:
1529: /*
1530: * Respond to the requestor. The return status of the
1531: * spare table installation is passed in the "fcol" field.
1532: */
1533: RF_Malloc(waitreq, sizeof *waitreq, (RF_SparetWait_t *));
1534: waitreq->fcol = retcode;
1535: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1536: waitreq->next = rf_sparet_resp_queue;
1537: rf_sparet_resp_queue = waitreq;
1538: wakeup(&rf_sparet_resp_queue);
1539: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1540:
1541: return (retcode);
1542: #endif
1543: /* Fall through to the os-specific code below. */
1544: default:
1545: break;
1546: }
1547:
1548: if (!raidPtr->valid)
1549: return (EINVAL);
1550:
1551: /*
1552: * Add support for "regular" device ioctls here.
1553: */
1554: switch (cmd) {
1555: case DIOCGDINFO:
1556: *(struct disklabel *)data = *(rs->sc_dkdev.dk_label);
1557: break;
1558:
1559: case DIOCGPART:
1560: ((struct partinfo *)data)->disklab = rs->sc_dkdev.dk_label;
1561: ((struct partinfo *)data)->part =
1562: &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1563: break;
1564:
1565: case DIOCWDINFO:
1566: case DIOCSDINFO:
1567: {
1568: struct disklabel *lp;
1569: lp = (struct disklabel *)data;
1570:
1571: if ((error = raidlock(rs)) != 0)
1572: return (error);
1573:
1574: rs->sc_flags |= RAIDF_LABELLING;
1575:
1576: error = setdisklabel(rs->sc_dkdev.dk_label, lp, 0);
1577: if (error == 0) {
1578: if (cmd == DIOCWDINFO)
1579: error = writedisklabel(DISKLABELDEV(dev),
1580: raidstrategy, rs->sc_dkdev.dk_label);
1581: }
1582:
1583: rs->sc_flags &= ~RAIDF_LABELLING;
1584:
1585: raidunlock(rs);
1586:
1587: if (error)
1588: return (error);
1589: break;
1590: }
1591:
1592: case DIOCWLABEL:
1593: if (*(int *)data != 0)
1594: rs->sc_flags |= RAIDF_WLABEL;
1595: else
1596: rs->sc_flags &= ~RAIDF_WLABEL;
1597: break;
1598:
1599: case DIOCGPDINFO:
1600: raidgetdisklabel(dev, (struct disklabel *)data, 1);
1601: break;
1602:
1603: default:
1604: retcode = ENOTTY;
1605: }
1606:
1607: return (retcode);
1608: }
1609:
1610: /*
1611: * raidinit -- Complete the rest of the initialization for the
1612: * RAIDframe device.
1613: */
1614: void
1615: raidinit(RF_Raid_t *raidPtr)
1616: {
1617: struct raid_softc *rs;
1618: struct cfdata *cf;
1619: int unit;
1620:
1621: unit = raidPtr->raidid;
1622:
1623: rs = &raid_softc[unit];
1624: pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1625: 0, 0, "raidpl", NULL);
1626:
1627: /* XXX should check return code first... */
1628: rs->sc_flags |= RAIDF_INITED;
1629:
1630: /* XXX doesn't check bounds. */
1631: snprintf(rs->sc_xname, sizeof rs->sc_xname, "raid%d", unit);
1632:
1633: rs->sc_dkdev.dk_name = rs->sc_xname;
1634:
1635: /*
1636: * disk_attach actually creates space for the CPU disklabel, among
1637: * other things, so it's critical to call this *BEFORE* we try
1638: * putzing with disklabels.
1639: */
1640: disk_attach(&rs->sc_dkdev);
1641:
1642: /*
1643: * XXX There may be a weird interaction here between this, and
1644: * protectedSectors, as used in RAIDframe.
1645: */
1646: rs->sc_size = raidPtr->totalSectors;
1647:
1648: /*
1649: * config_attach the raid device into the device tree.
1650: * For autoconf rootdev selection...
1651: */
1652: cf = malloc(sizeof(struct cfdata), M_RAIDFRAME, M_NOWAIT);
1653: if (cf == NULL) {
1654: printf("WARNING: no memory for cfdata struct\n");
1655: return;
1656: }
1657: bzero(cf, sizeof(struct cfdata));
1658:
1659: cf->cf_attach = &raid_ca;
1660: cf->cf_driver = &raid_cd;
1661: cf->cf_unit = unit;
1662:
1663: config_attach(NULL, cf, NULL, NULL);
1664: }
1665:
1666: /*
1667: * Wake up the daemon & tell it to get us a spare table.
1668: * XXX
1669: * The entries in the queues should be tagged with the raidPtr so that
1670: * in the extremely rare case that two recons happen at once, we know
1671: * which devices were requesting a spare table.
1672: * XXX
1673: *
1674: * XXX This code is not currently used. GO
1675: */
1676: int
1677: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1678: {
1679: int retcode;
1680:
1681: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1682: req->next = rf_sparet_wait_queue;
1683: rf_sparet_wait_queue = req;
1684: wakeup(&rf_sparet_wait_queue);
1685:
1686: /* mpsleep unlocks the mutex. */
1687: while (!rf_sparet_resp_queue) {
1688: tsleep(&rf_sparet_resp_queue, PRIBIO,
1689: "RAIDframe getsparetable", 0);
1690: }
1691: req = rf_sparet_resp_queue;
1692: rf_sparet_resp_queue = req->next;
1693: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1694:
1695: retcode = req->fcol;
1696: /* This is not the same req as we alloc'd. */
1697: RF_Free(req, sizeof *req);
1698: return (retcode);
1699: }
1700:
1701: /*
1702: * A wrapper around rf_DoAccess that extracts appropriate info from the
1703: * bp and passes it down.
1704: * Any calls originating in the kernel must use non-blocking I/O.
1705: * Do some extra sanity checking to return "appropriate" error values for
1706: * certain conditions (to make some standard utilities work).
1707: *
1708: * Formerly known as: rf_DoAccessKernel
1709: */
1710: void
1711: raidstart(RF_Raid_t *raidPtr)
1712: {
1713: RF_SectorCount_t num_blocks, pb, sum;
1714: RF_RaidAddr_t raid_addr;
1715: int retcode;
1716: struct partition *pp;
1717: daddr64_t blocknum;
1718: int unit;
1719: struct raid_softc *rs;
1720: int do_async;
1721: struct buf *bp;
1722:
1723: unit = raidPtr->raidid;
1724: rs = &raid_softc[unit];
1725:
1726: /* Quick check to see if anything has died recently. */
1727: RF_LOCK_MUTEX(raidPtr->mutex);
1728: if (raidPtr->numNewFailures > 0) {
1729: rf_update_component_labels(raidPtr,
1730: RF_NORMAL_COMPONENT_UPDATE);
1731: raidPtr->numNewFailures--;
1732: }
1733: RF_UNLOCK_MUTEX(raidPtr->mutex);
1734:
1735: /* Check to see if we're at the limit... */
1736: RF_LOCK_MUTEX(raidPtr->mutex);
1737: while (raidPtr->openings > 0) {
1738: RF_UNLOCK_MUTEX(raidPtr->mutex);
1739:
1740: bp = rs->sc_q.b_actf;
1741: if (bp == NULL) {
1742: /* Nothing more to do. */
1743: return;
1744: }
1745: rs->sc_q.b_actf = bp->b_actf;
1746:
1747: /*
1748: * Ok, for the bp we have here, bp->b_blkno is relative to the
1749: * partition... We need to make it absolute to the underlying
1750: * device...
1751: */
1752:
1753: blocknum = bp->b_blkno;
1754: if (DISKPART(bp->b_dev) != RAW_PART) {
1755: pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1756: blocknum += DL_GETPOFFSET(pp);
1757: }
1758:
1759: db1_printf(("Blocks: %d, %lld\n", (int) bp->b_blkno,
1760: blocknum));
1761:
1762: db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1763: db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1764:
1765: /*
1766: * *THIS* is where we adjust what block we're going to...
1767: * But DO NOT TOUCH bp->b_blkno !!!
1768: */
1769: raid_addr = blocknum;
1770:
1771: num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1772: pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1773: sum = raid_addr + num_blocks + pb;
1774: if (1 || rf_debugKernelAccess) {
1775: db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d)"
1776: " (%d)\n", (int)raid_addr, (int)sum,
1777: (int)num_blocks, (int)pb, (int)bp->b_resid));
1778: }
1779: if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1780: || (sum < num_blocks) || (sum < pb)) {
1781: bp->b_error = ENOSPC;
1782: bp->b_flags |= B_ERROR;
1783: bp->b_resid = bp->b_bcount;
1784: /* db1_printf(("%s: Calling biodone on 0x%x\n",
1785: __func__, bp)); */
1786: splassert(IPL_BIO);
1787: biodone(bp);
1788: RF_LOCK_MUTEX(raidPtr->mutex);
1789: continue;
1790: }
1791: /*
1792: * XXX rf_DoAccess() should do this, not just DoAccessKernel().
1793: */
1794:
1795: if (bp->b_bcount & raidPtr->sectorMask) {
1796: bp->b_error = EINVAL;
1797: bp->b_flags |= B_ERROR;
1798: bp->b_resid = bp->b_bcount;
1799: /* db1_printf(("%s: Calling biodone on 0x%x\n",
1800: __func__, bp)); */
1801: splassert(IPL_BIO);
1802: biodone(bp);
1803: RF_LOCK_MUTEX(raidPtr->mutex);
1804: continue;
1805:
1806: }
1807: db1_printf(("Calling DoAccess..\n"));
1808:
1809:
1810: RF_LOCK_MUTEX(raidPtr->mutex);
1811: raidPtr->openings--;
1812: RF_UNLOCK_MUTEX(raidPtr->mutex);
1813:
1814: /*
1815: * Everything is async.
1816: */
1817: do_async = 1;
1818:
1819: disk_busy(&rs->sc_dkdev);
1820:
1821: /*
1822: * XXX we're still at splbio() here... Do we *really*
1823: * need to be ?
1824: */
1825:
1826: /*
1827: * Don't ever condition on bp->b_flags & B_WRITE.
1828: * Always condition on B_READ instead.
1829: */
1830:
1831: retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1832: RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1833: do_async, raid_addr, num_blocks,
1834: bp->b_data, bp, NULL, NULL,
1835: RF_DAG_NONBLOCKING_IO, NULL, NULL, NULL);
1836:
1837: RF_LOCK_MUTEX(raidPtr->mutex);
1838: }
1839: RF_UNLOCK_MUTEX(raidPtr->mutex);
1840: }
1841:
1842: /* Invoke an I/O from kernel mode. Disk queue should be locked upon entry. */
1843:
1844: int
1845: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1846: {
1847: int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1848: struct buf *bp;
1849: struct raidbuf *raidbp = NULL;
1850: struct raid_softc *rs;
1851: int unit;
1852: /*int s = splbio();*/ /* Want to test this. */
1853:
1854: /*
1855: * XXX along with the vnode, we also need the softc associated with
1856: * this device...
1857: */
1858: req->queue = queue;
1859:
1860: unit = queue->raidPtr->raidid;
1861:
1862: db1_printf(("DispatchKernelIO unit: %d\n", unit));
1863:
1864: if (unit >= numraid) {
1865: printf("Invalid unit number: %d %d\n", unit, numraid);
1866: panic("Invalid Unit number in rf_DispatchKernelIO");
1867: }
1868:
1869: rs = &raid_softc[unit];
1870:
1871: bp = req->bp;
1872:
1873: #if 1
1874: /*
1875: * XXX When there is a physical disk failure, someone is passing
1876: * us a buffer that contains old stuff !! Attempt to deal with
1877: * this problem without taking a performance hit...
1878: * (not sure where the real bug is; it's buried in RAIDframe
1879: * somewhere) :-( GO )
1880: */
1881: if (bp->b_flags & B_ERROR) {
1882: bp->b_flags &= ~B_ERROR;
1883: }
1884: if (bp->b_error!=0) {
1885: bp->b_error = 0;
1886: }
1887: #endif
1888:
1889: raidbp = RAIDGETBUF(rs);
1890:
1891: raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1892:
1893: /*
1894: * Context for raidiodone.
1895: */
1896: raidbp->rf_obp = bp;
1897: raidbp->req = req;
1898:
1899: LIST_INIT(&raidbp->rf_buf.b_dep);
1900:
1901: switch (req->type) {
1902: case RF_IO_TYPE_NOP:
1903: /* Used primarily to unlock a locked queue. */
1904:
1905: db1_printf(("rf_DispatchKernelIO: NOP to r %d c %d\n",
1906: queue->row, queue->col));
1907:
1908: /* XXX need to do something extra here... */
1909:
1910: /*
1911: * I'm leaving this in, as I've never actually seen it
1912: * used, and I'd like folks to report it... GO
1913: */
1914: db1_printf(("WAKEUP CALLED\n"));
1915: queue->numOutstanding++;
1916:
1917: /* XXX need to glue the original buffer into this ?? */
1918:
1919: rf_KernelWakeupFunc(&raidbp->rf_buf);
1920: break;
1921:
1922: case RF_IO_TYPE_READ:
1923: case RF_IO_TYPE_WRITE:
1924: if (req->tracerec) {
1925: RF_ETIMER_START(req->tracerec->timer);
1926: }
1927:
1928: rf_InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1929: op | bp->b_flags, queue->rf_cinfo->ci_dev,
1930: req->sectorOffset, req->numSector,
1931: req->buf, rf_KernelWakeupFunc, (void *)req,
1932: queue->raidPtr->logBytesPerSector, req->b_proc);
1933:
1934: if (rf_debugKernelAccess) {
1935: db1_printf(("dispatch: bp->b_blkno = %ld\n",
1936: (long)bp->b_blkno));
1937: }
1938: queue->numOutstanding++;
1939: queue->last_deq_sector = req->sectorOffset;
1940:
1941: /*
1942: * Acc wouldn't have been let in if there were any
1943: * pending reqs at any other priority.
1944: */
1945: queue->curPriority = req->priority;
1946:
1947: db1_printf(("Going for %c to unit %d row %d col %d\n",
1948: req->type, unit, queue->row, queue->col));
1949: db1_printf(("sector %d count %d (%d bytes) %d\n",
1950: (int)req->sectorOffset, (int)req->numSector,
1951: (int)(req->numSector << queue->raidPtr->logBytesPerSector),
1952: (int)queue->raidPtr->logBytesPerSector));
1953: if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1954: raidbp->rf_buf.b_vp->v_numoutput++;
1955: }
1956:
1957: VOP_STRATEGY(&raidbp->rf_buf);
1958: break;
1959:
1960: default:
1961: panic("bad req->type in rf_DispatchKernelIO");
1962: }
1963: db1_printf(("Exiting from DispatchKernelIO\n"));
1964: /*splx(s);*/ /* want to test this */
1965: return (0);
1966: }
1967:
1968: /*
1969: * This is the callback function associated with a I/O invoked from
1970: * kernel code.
1971: */
1972: void
1973: rf_KernelWakeupFunc(struct buf *vbp)
1974: {
1975: RF_DiskQueueData_t *req = NULL;
1976: RF_DiskQueue_t *queue;
1977: struct raidbuf *raidbp = (struct raidbuf *)vbp;
1978: struct buf *bp;
1979: struct raid_softc *rs;
1980: int unit;
1981: int s;
1982:
1983: s = splbio();
1984: db1_printf(("recovering the request queue:\n"));
1985: req = raidbp->req;
1986:
1987: bp = raidbp->rf_obp;
1988:
1989: queue = (RF_DiskQueue_t *)req->queue;
1990:
1991: if (raidbp->rf_buf.b_flags & B_ERROR) {
1992: bp->b_flags |= B_ERROR;
1993: bp->b_error =
1994: raidbp->rf_buf.b_error ? raidbp->rf_buf.b_error : EIO;
1995: }
1996:
1997: #if 1
1998: /* XXX Methinks this could be wrong... */
1999: bp->b_resid = raidbp->rf_buf.b_resid;
2000: #endif
2001:
2002: if (req->tracerec) {
2003: RF_ETIMER_STOP(req->tracerec->timer);
2004: RF_ETIMER_EVAL(req->tracerec->timer);
2005: RF_LOCK_MUTEX(rf_tracing_mutex);
2006: req->tracerec->diskwait_us +=
2007: RF_ETIMER_VAL_US(req->tracerec->timer);
2008: req->tracerec->phys_io_us +=
2009: RF_ETIMER_VAL_US(req->tracerec->timer);
2010: req->tracerec->num_phys_ios++;
2011: RF_UNLOCK_MUTEX(rf_tracing_mutex);
2012: }
2013:
2014: bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
2015:
2016: unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
2017:
2018: /*
2019: * XXX Ok, let's get aggressive... If B_ERROR is set, let's go
2020: * ballistic, and mark the component as hosed...
2021: */
2022: if (bp->b_flags & B_ERROR) {
2023: /* Mark the disk as dead but only mark it once... */
2024: if (queue->raidPtr->Disks[queue->row][queue->col].status ==
2025: rf_ds_optimal) {
2026: printf("raid%d: IO Error. Marking %s as failed.\n",
2027: unit,
2028: queue->raidPtr->
2029: Disks[queue->row][queue->col].devname);
2030: queue->raidPtr->Disks[queue->row][queue->col].status =
2031: rf_ds_failed;
2032: queue->raidPtr->status[queue->row] = rf_rs_degraded;
2033: queue->raidPtr->numFailures++;
2034: queue->raidPtr->numNewFailures++;
2035: } else {
2036: /* Disk is already dead... */
2037: /* printf("Disk already marked as dead!\n"); */
2038: }
2039: }
2040:
2041: rs = &raid_softc[unit];
2042: RAIDPUTBUF(rs, raidbp);
2043:
2044: rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
2045: (req->CompleteFunc)(req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
2046:
2047: splx(s);
2048: }
2049:
2050: /*
2051: * Initialize a buf structure for doing an I/O in the kernel.
2052: */
2053: void
2054: rf_InitBP(
2055: struct buf *bp,
2056: struct vnode *b_vp,
2057: unsigned rw_flag,
2058: dev_t dev,
2059: RF_SectorNum_t startSect,
2060: RF_SectorCount_t numSect,
2061: caddr_t buf,
2062: void (*cbFunc)(struct buf *),
2063: void *cbArg,
2064: int logBytesPerSector,
2065: struct proc *b_proc
2066: )
2067: {
2068: /*bp->b_flags = B_PHYS | rw_flag;*/
2069: bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too ??? */
2070: bp->b_bcount = numSect << logBytesPerSector;
2071: bp->b_bufsize = bp->b_bcount;
2072: bp->b_error = 0;
2073: bp->b_dev = dev;
2074: bp->b_data = buf;
2075: bp->b_blkno = startSect;
2076: bp->b_resid = bp->b_bcount; /* XXX is this right !??!?!! */
2077: if (bp->b_bcount == 0) {
2078: panic("bp->b_bcount is zero in rf_InitBP!!");
2079: }
2080: bp->b_proc = b_proc;
2081: bp->b_iodone = cbFunc;
2082: bp->b_vp = b_vp;
2083: LIST_INIT(&bp->b_dep);
2084: }
2085:
2086: void
2087: raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
2088: struct disklabel *lp)
2089: {
2090: db1_printf(("Building a default label...\n"));
2091: bzero(lp, sizeof(*lp));
2092:
2093: /* Fabricate a label... */
2094: DL_SETDSIZE(lp, raidPtr->totalSectors);
2095: lp->d_secsize = raidPtr->bytesPerSector;
2096: lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
2097: lp->d_ntracks = 4 * raidPtr->numCol;
2098: lp->d_ncylinders = raidPtr->totalSectors /
2099: (lp->d_nsectors * lp->d_ntracks);
2100: lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2101:
2102: strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
2103: lp->d_type = DTYPE_RAID;
2104: strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2105: lp->d_rpm = 3600;
2106: lp->d_flags = 0;
2107: lp->d_interleave = 1;
2108: lp->d_version = 1;
2109:
2110: DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0);
2111: DL_SETPSIZE(&lp->d_partitions[RAW_PART], raidPtr->totalSectors);
2112: lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2113: lp->d_npartitions = RAW_PART + 1;
2114:
2115: lp->d_magic = DISKMAGIC;
2116: lp->d_magic2 = DISKMAGIC;
2117: lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2118: }
2119:
2120: /*
2121: * Read the disklabel from the raid device.
2122: * If one is not present, fake one up.
2123: */
2124: void
2125: raidgetdisklabel(dev_t dev, struct disklabel *lp, int spoofonly)
2126: {
2127: int unit = DISKUNIT(dev);
2128: struct raid_softc *rs = &raid_softc[unit];
2129: char *errstring;
2130: RF_Raid_t *raidPtr;
2131: int i;
2132: struct partition *pp;
2133:
2134: db1_printf(("Getting the disklabel...\n"));
2135:
2136: bzero(lp, sizeof(*lp));
2137:
2138: raidPtr = raidPtrs[unit];
2139:
2140: raidgetdefaultlabel(raidPtr, rs, lp);
2141:
2142: /*
2143: * Call the generic disklabel extraction routine.
2144: */
2145: errstring = readdisklabel(DISKLABELDEV(dev), raidstrategy, lp,
2146: spoofonly);
2147: if (errstring) {
2148: /*printf("%s: %s\n", rs->sc_xname, errstring);*/
2149: return;
2150: }
2151:
2152: /*
2153: * Sanity check whether the found disklabel is valid.
2154: *
2155: * This is necessary since total size of the raid device
2156: * may vary when an interleave is changed even though exactly
2157: * same componets are used, and old disklabel may used
2158: * if that is found.
2159: */
2160: #ifdef RAIDDEBUG
2161: if (DL_GETDSIZE(lp) != rs->sc_size)
2162: printf("WARNING: %s: "
2163: "total sector size in disklabel (%d) != "
2164: "the size of raid (%ld)\n", rs->sc_xname,
2165: DL_GETDSIZE(lp), (long) rs->sc_size);
2166: #endif /* RAIDDEBUG */
2167: for (i = 0; i < lp->d_npartitions; i++) {
2168: pp = &lp->d_partitions[i];
2169: if (DL_GETPOFFSET(pp) + DL_GETPSIZE(pp) > rs->sc_size)
2170: printf("WARNING: %s: end of partition `%c' "
2171: "exceeds the size of raid (%ld)\n",
2172: rs->sc_xname, 'a' + i, (long) rs->sc_size);
2173: }
2174: }
2175:
2176: /*
2177: * Lookup the provided name in the filesystem. If the file exists,
2178: * is a valid block device, and isn't being used by anyone else,
2179: * set *vpp to the file's vnode.
2180: * You'll find the original of this in ccd.c
2181: */
2182: int
2183: raidlookup(char *path, struct proc *p, struct vnode **vpp /* result */)
2184: {
2185: struct nameidata nd;
2186: struct vnode *vp;
2187: struct vattr va;
2188: int error;
2189:
2190: NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
2191: if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
2192: #ifdef RAIDDEBUG
2193: printf("RAIDframe: vn_open returned %d\n", error);
2194: #endif /* RAIDDEBUG */
2195: return (error);
2196: }
2197: vp = nd.ni_vp;
2198: if (vp->v_usecount > 1) {
2199: VOP_UNLOCK(vp, 0, p);
2200: (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2201: return (EBUSY);
2202: }
2203: if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2204: VOP_UNLOCK(vp, 0, p);
2205: (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2206: return (error);
2207: }
2208: /* XXX: eventually we should handle VREG, too. */
2209: if (va.va_type != VBLK) {
2210: VOP_UNLOCK(vp, 0, p);
2211: (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
2212: return (ENOTBLK);
2213: }
2214: VOP_UNLOCK(vp, 0, p);
2215: *vpp = vp;
2216: return (0);
2217: }
2218:
2219: /*
2220: * Wait interruptibly for an exclusive lock.
2221: *
2222: * XXX
2223: * Several drivers do this; it should be abstracted and made MP-safe.
2224: * (Hmm... where have we seen this warning before :-> GO )
2225: */
2226: int
2227: raidlock(struct raid_softc *rs)
2228: {
2229: int error;
2230:
2231: while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2232: rs->sc_flags |= RAIDF_WANTED;
2233: if ((error = tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
2234: return (error);
2235: }
2236: rs->sc_flags |= RAIDF_LOCKED;
2237: return (0);
2238: }
2239:
2240: /*
2241: * Unlock and wake up any waiters.
2242: */
2243: void
2244: raidunlock(struct raid_softc *rs)
2245: {
2246: rs->sc_flags &= ~RAIDF_LOCKED;
2247: if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2248: rs->sc_flags &= ~RAIDF_WANTED;
2249: wakeup(rs);
2250: }
2251: }
2252:
2253:
2254: #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2255: #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2256:
2257: int
2258: raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2259: {
2260: RF_ComponentLabel_t clabel;
2261: raidread_component_label(dev, b_vp, &clabel);
2262: clabel.mod_counter = mod_counter;
2263: clabel.clean = RF_RAID_CLEAN;
2264: raidwrite_component_label(dev, b_vp, &clabel);
2265: return(0);
2266: }
2267:
2268:
2269: int
2270: raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
2271: {
2272: RF_ComponentLabel_t clabel;
2273: raidread_component_label(dev, b_vp, &clabel);
2274: clabel.mod_counter = mod_counter;
2275: clabel.clean = RF_RAID_DIRTY;
2276: raidwrite_component_label(dev, b_vp, &clabel);
2277: return(0);
2278: }
2279:
2280: /* ARGSUSED */
2281: int
2282: raidread_component_label(dev_t dev, struct vnode *b_vp,
2283: RF_ComponentLabel_t *clabel)
2284: {
2285: struct buf *bp;
2286: int error;
2287:
2288: /*
2289: * XXX should probably ensure that we don't try to do this if
2290: * someone has changed rf_protected_sectors.
2291: */
2292:
2293: if (b_vp == NULL) {
2294: /*
2295: * For whatever reason, this component is not valid.
2296: * Don't try to read a component label from it.
2297: */
2298: return(EINVAL);
2299: }
2300:
2301: /* Get a block of the appropriate size... */
2302: bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2303: bp->b_dev = dev;
2304:
2305: /* Get our ducks in a row for the read. */
2306: bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2307: bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2308: bp->b_flags |= B_READ;
2309: bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2310:
2311: (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2312:
2313: error = biowait(bp);
2314:
2315: if (!error) {
2316: memcpy(clabel, bp->b_data, sizeof(RF_ComponentLabel_t));
2317: #if 0
2318: rf_print_component_label( clabel );
2319: #endif
2320: } else {
2321: db1_printf(("Failed to read RAID component label!\n"));
2322: }
2323:
2324: brelse(bp);
2325: return(error);
2326: }
2327:
2328: /* ARGSUSED */
2329: int
2330: raidwrite_component_label(dev_t dev, struct vnode *b_vp,
2331: RF_ComponentLabel_t *clabel)
2332: {
2333: struct buf *bp;
2334: int error;
2335:
2336: /* Get a block of the appropriate size... */
2337: bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2338: bp->b_dev = dev;
2339:
2340: /* Get our ducks in a row for the write. */
2341: bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2342: bp->b_bcount = RF_COMPONENT_INFO_SIZE;
2343: bp->b_flags |= B_WRITE;
2344: bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2345:
2346: memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
2347:
2348: memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
2349:
2350: (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
2351: error = biowait(bp);
2352: brelse(bp);
2353: if (error) {
2354: printf("Failed to write RAID component info!\n");
2355: }
2356:
2357: return(error);
2358: }
2359:
2360: void
2361: rf_markalldirty(RF_Raid_t *raidPtr)
2362: {
2363: RF_ComponentLabel_t clabel;
2364: int r,c;
2365:
2366: raidPtr->mod_counter++;
2367: for (r = 0; r < raidPtr->numRow; r++) {
2368: for (c = 0; c < raidPtr->numCol; c++) {
2369: /*
2370: * We don't want to touch (at all) a disk that has
2371: * failed.
2372: */
2373: if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
2374: raidread_component_label(
2375: raidPtr->Disks[r][c].dev,
2376: raidPtr->raid_cinfo[r][c].ci_vp, &clabel);
2377: if (clabel.status == rf_ds_spared) {
2378: /*
2379: * XXX do something special...
2380: * But whatever you do, don't
2381: * try to access it !!!
2382: */
2383: } else {
2384: #if 0
2385: clabel.status =
2386: raidPtr->Disks[r][c].status;
2387: raidwrite_component_label(
2388: raidPtr->Disks[r][c].dev,
2389: raidPtr->raid_cinfo[r][c].ci_vp,
2390: &clabel);
2391: #endif
2392: raidmarkdirty(
2393: raidPtr->Disks[r][c].dev,
2394: raidPtr->raid_cinfo[r][c].ci_vp,
2395: raidPtr->mod_counter);
2396: }
2397: }
2398: }
2399: }
2400: /*printf("Component labels marked dirty.\n");*/
2401: #if 0
2402: for( c = 0; c < raidPtr->numSpare ; c++) {
2403: sparecol = raidPtr->numCol + c;
2404: if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2405: /*
2406: * XXX This is where we get fancy and map this spare
2407: * into it's correct spot in the array.
2408: */
2409: /*
2410: * We claim this disk is "optimal" if it's
2411: * rf_ds_used_spare, as that means it should be
2412: * directly substitutable for the disk it replaced.
2413: * We note that too...
2414: */
2415:
2416: for(i=0;i<raidPtr->numRow;i++) {
2417: for(j=0;j<raidPtr->numCol;j++) {
2418: if ((raidPtr->Disks[i][j].spareRow ==
2419: r) &&
2420: (raidPtr->Disks[i][j].spareCol ==
2421: sparecol)) {
2422: srow = r;
2423: scol = sparecol;
2424: break;
2425: }
2426: }
2427: }
2428:
2429: raidread_component_label(
2430: raidPtr->Disks[r][sparecol].dev,
2431: raidPtr->raid_cinfo[r][sparecol].ci_vp, &clabel);
2432: /* Make sure status is noted. */
2433: clabel.version = RF_COMPONENT_LABEL_VERSION;
2434: clabel.mod_counter = raidPtr->mod_counter;
2435: clabel.serial_number = raidPtr->serial_number;
2436: clabel.row = srow;
2437: clabel.column = scol;
2438: clabel.num_rows = raidPtr->numRow;
2439: clabel.num_columns = raidPtr->numCol;
2440: clabel.clean = RF_RAID_DIRTY; /* Changed in a bit. */
2441: clabel.status = rf_ds_optimal;
2442: raidwrite_component_label(
2443: raidPtr->Disks[r][sparecol].dev,
2444: raidPtr->raid_cinfo[r][sparecol].ci_vp, &clabel);
2445: raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2446: raidPtr->raid_cinfo[r][sparecol].ci_vp);
2447: }
2448: }
2449:
2450: #endif
2451: }
2452:
2453:
2454: void
2455: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
2456: {
2457: RF_ComponentLabel_t clabel;
2458: int sparecol;
2459: int r,c;
2460: int i,j;
2461: int srow, scol;
2462:
2463: srow = -1;
2464: scol = -1;
2465:
2466: /*
2467: * XXX should do extra checks to make sure things really are clean,
2468: * rather than blindly setting the clean bit...
2469: */
2470:
2471: raidPtr->mod_counter++;
2472:
2473: for (r = 0; r < raidPtr->numRow; r++) {
2474: for (c = 0; c < raidPtr->numCol; c++) {
2475: if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2476: raidread_component_label(
2477: raidPtr->Disks[r][c].dev,
2478: raidPtr->raid_cinfo[r][c].ci_vp,
2479: &clabel);
2480: /* Make sure status is noted. */
2481: clabel.status = rf_ds_optimal;
2482: /* Bump the counter. */
2483: clabel.mod_counter = raidPtr->mod_counter;
2484:
2485: raidwrite_component_label(
2486: raidPtr->Disks[r][c].dev,
2487: raidPtr->raid_cinfo[r][c].ci_vp,
2488: &clabel);
2489: if (final == RF_FINAL_COMPONENT_UPDATE) {
2490: if (raidPtr->parity_good ==
2491: RF_RAID_CLEAN) {
2492: raidmarkclean(
2493: raidPtr->Disks[r][c].dev,
2494: raidPtr->
2495: raid_cinfo[r][c].ci_vp,
2496: raidPtr->mod_counter);
2497: }
2498: }
2499: }
2500: /* Else we don't touch it... */
2501: }
2502: }
2503:
2504: for( c = 0; c < raidPtr->numSpare ; c++) {
2505: sparecol = raidPtr->numCol + c;
2506: if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
2507: /*
2508: * We claim this disk is "optimal" if it's
2509: * rf_ds_used_spare, as that means it should be
2510: * directly substitutable for the disk it replaced.
2511: * We note that too...
2512: */
2513:
2514: for(i=0;i<raidPtr->numRow;i++) {
2515: for(j=0;j<raidPtr->numCol;j++) {
2516: if ((raidPtr->Disks[i][j].spareRow ==
2517: 0) &&
2518: (raidPtr->Disks[i][j].spareCol ==
2519: sparecol)) {
2520: srow = i;
2521: scol = j;
2522: break;
2523: }
2524: }
2525: }
2526:
2527: /* XXX Shouldn't *really* need this... */
2528: raidread_component_label(
2529: raidPtr->Disks[0][sparecol].dev,
2530: raidPtr->raid_cinfo[0][sparecol].ci_vp, &clabel);
2531: /* Make sure status is noted. */
2532:
2533: raid_init_component_label(raidPtr, &clabel);
2534:
2535: clabel.mod_counter = raidPtr->mod_counter;
2536: clabel.row = srow;
2537: clabel.column = scol;
2538: clabel.status = rf_ds_optimal;
2539:
2540: raidwrite_component_label(
2541: raidPtr->Disks[0][sparecol].dev,
2542: raidPtr->raid_cinfo[0][sparecol].ci_vp, &clabel);
2543: if (final == RF_FINAL_COMPONENT_UPDATE) {
2544: if (raidPtr->parity_good == RF_RAID_CLEAN) {
2545: raidmarkclean(raidPtr->
2546: Disks[0][sparecol].dev,
2547: raidPtr->
2548: raid_cinfo[0][sparecol].ci_vp,
2549: raidPtr->mod_counter);
2550: }
2551: }
2552: }
2553: }
2554: /*printf("Component labels updated\n");*/
2555: }
2556:
2557: void
2558: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
2559: {
2560: struct proc *p = curproc;
2561:
2562: if (vp != NULL) {
2563: if (auto_configured == 1) {
2564: /* component was opened by rf_find_raid_components() */
2565: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, p);
2566: vrele(vp);
2567: } else {
2568: /* component was opened by raidlookup() */
2569: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2570: }
2571: } else {
2572: printf("vnode was NULL\n");
2573: }
2574: }
2575:
2576: void
2577: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
2578: {
2579: int r,c;
2580: struct vnode *vp;
2581: int acd;
2582:
2583:
2584: /* We take this opportunity to close the vnodes like we should... */
2585:
2586: for (r = 0; r < raidPtr->numRow; r++) {
2587: for (c = 0; c < raidPtr->numCol; c++) {
2588: db1_printf(("Closing vnode for row: %d col: %d\n",
2589: r, c));
2590: vp = raidPtr->raid_cinfo[r][c].ci_vp;
2591: acd = raidPtr->Disks[r][c].auto_configured;
2592: rf_close_component(raidPtr, vp, acd);
2593: raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2594: raidPtr->Disks[r][c].auto_configured = 0;
2595: }
2596: }
2597: for (r = 0; r < raidPtr->numSpare; r++) {
2598: db1_printf(("Closing vnode for spare: %d\n", r));
2599: vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2600: acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2601: rf_close_component(raidPtr, vp, acd);
2602: raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2603: raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
2604: }
2605: }
2606:
2607:
2608: void
2609: rf_ReconThread(struct rf_recon_req *req)
2610: {
2611: int s;
2612: RF_Raid_t *raidPtr;
2613:
2614: s = splbio();
2615: raidPtr = (RF_Raid_t *) req->raidPtr;
2616: raidPtr->recon_in_progress = 1;
2617:
2618: rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2619: ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2620:
2621: /* XXX Get rid of this! we don't need it at all... */
2622: RF_Free(req, sizeof(*req));
2623:
2624: raidPtr->recon_in_progress = 0;
2625: splx(s);
2626:
2627: /* That's all... */
2628: kthread_exit(0); /* Does not return. */
2629: }
2630:
2631: void
2632: rf_RewriteParityThread(RF_Raid_t *raidPtr)
2633: {
2634: int retcode;
2635: int s;
2636:
2637: s = splbio();
2638: raidPtr->parity_rewrite_in_progress = 1;
2639: retcode = rf_RewriteParity(raidPtr);
2640: if (retcode) {
2641: printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2642: } else {
2643: /*
2644: * Set the clean bit ! If we shutdown correctly,
2645: * the clean bit on each component label will get
2646: * set.
2647: */
2648: raidPtr->parity_good = RF_RAID_CLEAN;
2649: }
2650: raidPtr->parity_rewrite_in_progress = 0;
2651: splx(s);
2652:
2653: /* Anyone waiting for us to stop ? If so, inform them... */
2654: if (raidPtr->waitShutdown) {
2655: wakeup(&raidPtr->parity_rewrite_in_progress);
2656: }
2657:
2658: /* That's all... */
2659: kthread_exit(0); /* Does not return. */
2660: }
2661:
2662:
2663: void
2664: rf_CopybackThread(RF_Raid_t *raidPtr)
2665: {
2666: int s;
2667:
2668: s = splbio();
2669: raidPtr->copyback_in_progress = 1;
2670: rf_CopybackReconstructedData(raidPtr);
2671: raidPtr->copyback_in_progress = 0;
2672: splx(s);
2673:
2674: /* That's all... */
2675: kthread_exit(0); /* Does not return. */
2676: }
2677:
2678:
2679: void
2680: rf_ReconstructInPlaceThread(struct rf_recon_req *req)
2681: {
2682: int retcode;
2683: int s;
2684: RF_Raid_t *raidPtr;
2685:
2686: s = splbio();
2687: raidPtr = req->raidPtr;
2688: raidPtr->recon_in_progress = 1;
2689: retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2690: RF_Free(req, sizeof(*req));
2691: raidPtr->recon_in_progress = 0;
2692: splx(s);
2693:
2694: /* That's all... */
2695: kthread_exit(0); /* Does not return. */
2696: }
2697:
2698:
2699: RF_AutoConfig_t *
2700: rf_find_raid_components(void)
2701: {
2702: #ifdef RAID_AUTOCONFIG
2703: int major;
2704: struct vnode *vp;
2705: struct disklabel label;
2706: struct device *dv;
2707: dev_t dev;
2708: int error;
2709: int i;
2710: int good_one;
2711: RF_ComponentLabel_t *clabel;
2712: RF_AutoConfig_t *ac;
2713: #endif /* RAID_AUTOCONFIG */
2714: RF_AutoConfig_t *ac_list;
2715:
2716:
2717: /* Initialize the AutoConfig list. */
2718: ac_list = NULL;
2719:
2720: #ifdef RAID_AUTOCONFIG
2721: /* We begin by trolling through *all* the devices on the system. */
2722:
2723: TAILQ_FOREACH(dv, &alldevs, dv_list) {
2724:
2725: /* We are only interested in disks... */
2726: if (dv->dv_class != DV_DISK)
2727: continue;
2728:
2729: /* We don't care about floppies... */
2730: if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
2731: continue;
2732: }
2733:
2734: /*
2735: * We need to find the device_name_to_block_device_major
2736: * stuff.
2737: */
2738: major = findblkmajor(dv);
2739:
2740: /* Get a vnode for the raw partition of this disk. */
2741:
2742: dev = MAKEDISKDEV(major, dv->dv_unit, RAW_PART);
2743: if (bdevvp(dev, &vp))
2744: panic("RAID can't alloc vnode");
2745:
2746: error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2747:
2748: if (error) {
2749: /*
2750: * "Who cares." Continue looking
2751: * for something that exists.
2752: */
2753: vput(vp);
2754: continue;
2755: }
2756:
2757: /* Ok, the disk exists. Go get the disklabel. */
2758: error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2759: FREAD, NOCRED, 0);
2760: if (error) {
2761: /*
2762: * XXX can't happen - open() would
2763: * have errored out (or faked up one).
2764: */
2765: printf("can't get label for dev %s%c (%d)!?!?\n",
2766: dv->dv_xname, 'a' + RAW_PART, error);
2767: }
2768:
2769: /*
2770: * We don't need this any more. We'll allocate it again
2771: * a little later if we really do...
2772: */
2773: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2774: vrele(vp);
2775:
2776: for (i=0; i < label.d_npartitions; i++) {
2777: /*
2778: * We only support partitions marked as RAID.
2779: * Except on sparc/sparc64 where FS_RAID doesn't
2780: * fit in the SUN disklabel and we need to look
2781: * into each and every partition !!!
2782: */
2783: #if !defined(__sparc__) && !defined(__sparc64__) && !defined(__sun3__)
2784: if (label.d_partitions[i].p_fstype != FS_RAID)
2785: continue;
2786: #else /* !__sparc__ && !__sparc64__ && !__sun3__ */
2787: if (label.d_partitions[i].p_fstype == FS_SWAP ||
2788: label.d_partitions[i].p_fstype == FS_UNUSED)
2789: continue;
2790: #endif /* __sparc__ || __sparc64__ || __sun3__ */
2791:
2792: dev = MAKEDISKDEV(major, dv->dv_unit, i);
2793: if (bdevvp(dev, &vp))
2794: panic("RAID can't alloc vnode");
2795:
2796: error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2797: if (error) {
2798: /* Whatever... */
2799: vput(vp);
2800: continue;
2801: }
2802:
2803: good_one = 0;
2804:
2805: clabel = (RF_ComponentLabel_t *)
2806: malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME,
2807: M_NOWAIT);
2808: if (clabel == NULL) {
2809: /* XXX CLEANUP HERE. */
2810: printf("RAID auto config: out of memory!\n");
2811: return(NULL); /* XXX probably should panic ? */
2812: }
2813:
2814: if (!raidread_component_label(dev, vp, clabel)) {
2815: /* Got the label. Does it look reasonable ? */
2816: if (rf_reasonable_label(clabel) &&
2817: (clabel->partitionSize <=
2818: DL_GETPSIZE(&label.d_partitions[i]))) {
2819: #ifdef RAIDDEBUG
2820: printf("Component on: %s%c: %d\n",
2821: dv->dv_xname, 'a'+i,
2822: DL_GETPSIZE(&label.d_partitions[i]));
2823: rf_print_component_label(clabel);
2824: #endif /* RAIDDEBUG */
2825: /*
2826: * If it's reasonable, add it,
2827: * else ignore it.
2828: */
2829: ac = (RF_AutoConfig_t *)
2830: malloc(sizeof(RF_AutoConfig_t),
2831: M_RAIDFRAME, M_NOWAIT);
2832: if (ac == NULL) {
2833: /* XXX should panic ??? */
2834: return(NULL);
2835: }
2836:
2837: snprintf(ac->devname,
2838: sizeof ac->devname, "%s%c",
2839: dv->dv_xname, 'a'+i);
2840: ac->dev = dev;
2841: ac->vp = vp;
2842: ac->clabel = clabel;
2843: ac->next = ac_list;
2844: ac_list = ac;
2845: good_one = 1;
2846: }
2847: }
2848: if (!good_one) {
2849: /* Cleanup. */
2850: free(clabel, M_RAIDFRAME);
2851: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
2852: vrele(vp);
2853: }
2854: }
2855: }
2856: #endif /* RAID_AUTOCONFIG */
2857: return(ac_list);
2858: }
2859:
2860: #ifdef RAID_AUTOCONFIG
2861: int
2862: rf_reasonable_label(RF_ComponentLabel_t *clabel)
2863: {
2864:
2865: if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2866: (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2867: ((clabel->clean == RF_RAID_CLEAN) ||
2868: (clabel->clean == RF_RAID_DIRTY)) &&
2869: clabel->row >=0 &&
2870: clabel->column >= 0 &&
2871: clabel->num_rows > 0 &&
2872: clabel->num_columns > 0 &&
2873: clabel->row < clabel->num_rows &&
2874: clabel->column < clabel->num_columns &&
2875: clabel->blockSize > 0 &&
2876: clabel->numBlocks > 0) {
2877: /* Label looks reasonable enough... */
2878: return(1);
2879: }
2880: return(0);
2881: }
2882: #endif /* RAID_AUTOCONFIG */
2883:
2884: void
2885: rf_print_component_label(RF_ComponentLabel_t *clabel)
2886: {
2887: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2888: clabel->row, clabel->column, clabel->num_rows, clabel->num_columns);
2889: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2890: clabel->version, clabel->serial_number, clabel->mod_counter);
2891: printf(" Clean: %s Status: %d\n", clabel->clean ? "Yes" : "No",
2892: clabel->status );
2893: printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2894: clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2895: printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2896: (char) clabel->parityConfig, clabel->blockSize, clabel->numBlocks);
2897: printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
2898: printf(" Contains root partition: %s\n", clabel->root_partition ?
2899: "Yes" : "No" );
2900: printf(" Last configured as: raid%d\n", clabel->last_unit );
2901: #if 0
2902: printf(" Config order: %d\n", clabel->config_order);
2903: #endif
2904: }
2905:
2906: RF_ConfigSet_t *
2907: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
2908: {
2909: RF_AutoConfig_t *ac;
2910: RF_ConfigSet_t *config_sets;
2911: RF_ConfigSet_t *cset;
2912: RF_AutoConfig_t *ac_next;
2913:
2914:
2915: config_sets = NULL;
2916:
2917: /*
2918: * Go through the AutoConfig list, and figure out which components
2919: * belong to what sets.
2920: */
2921: ac = ac_list;
2922: while(ac!=NULL) {
2923: /*
2924: * We're going to putz with ac->next, so save it here
2925: * for use at the end of the loop.
2926: */
2927: ac_next = ac->next;
2928:
2929: if (config_sets == NULL) {
2930: /* We will need at least this one... */
2931: config_sets = (RF_ConfigSet_t *)
2932: malloc(sizeof(RF_ConfigSet_t), M_RAIDFRAME,
2933: M_NOWAIT);
2934: if (config_sets == NULL) {
2935: panic("rf_create_auto_sets: No memory!");
2936: }
2937: /* This one is easy :) */
2938: config_sets->ac = ac;
2939: config_sets->next = NULL;
2940: config_sets->rootable = 0;
2941: ac->next = NULL;
2942: } else {
2943: /* Which set does this component fit into ? */
2944: cset = config_sets;
2945: while(cset!=NULL) {
2946: if (rf_does_it_fit(cset, ac)) {
2947: /* Looks like it matches... */
2948: ac->next = cset->ac;
2949: cset->ac = ac;
2950: break;
2951: }
2952: cset = cset->next;
2953: }
2954: if (cset==NULL) {
2955: /* Didn't find a match above... new set... */
2956: cset = (RF_ConfigSet_t *)
2957: malloc(sizeof(RF_ConfigSet_t),
2958: M_RAIDFRAME, M_NOWAIT);
2959: if (cset == NULL) {
2960: panic("rf_create_auto_sets: No memory!");
2961: }
2962: cset->ac = ac;
2963: ac->next = NULL;
2964: cset->next = config_sets;
2965: cset->rootable = 0;
2966: config_sets = cset;
2967: }
2968: }
2969: ac = ac_next;
2970: }
2971:
2972:
2973: return(config_sets);
2974: }
2975:
2976: int
2977: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
2978: {
2979: RF_ComponentLabel_t *clabel1, *clabel2;
2980:
2981: /*
2982: * If this one matches the *first* one in the set, that's good
2983: * enough, since the other members of the set would have been
2984: * through here too...
2985: */
2986: /*
2987: * Note that we are not checking partitionSize here...
2988: *
2989: * Note that we are also not checking the mod_counters here.
2990: * If everything else matches except the mod_counter, that's
2991: * good enough for this test. We will deal with the mod_counters
2992: * a little later in the autoconfiguration process.
2993: *
2994: * (clabel1->mod_counter == clabel2->mod_counter) &&
2995: *
2996: * The reason we don't check for this is that failed disks
2997: * will have lower modification counts. If those disks are
2998: * not added to the set they used to belong to, then they will
2999: * form their own set, which may result in 2 different sets,
3000: * for example, competing to be configured at raid0, and
3001: * perhaps competing to be the root filesystem set. If the
3002: * wrong ones get configured, or both attempt to become /,
3003: * weird behaviour and or serious lossage will occur. Thus we
3004: * need to bring them into the fold here, and kick them out at
3005: * a later point.
3006: */
3007:
3008: clabel1 = cset->ac->clabel;
3009: clabel2 = ac->clabel;
3010: if ((clabel1->version == clabel2->version) &&
3011: (clabel1->serial_number == clabel2->serial_number) &&
3012: (clabel1->num_rows == clabel2->num_rows) &&
3013: (clabel1->num_columns == clabel2->num_columns) &&
3014: (clabel1->sectPerSU == clabel2->sectPerSU) &&
3015: (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3016: (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3017: (clabel1->parityConfig == clabel2->parityConfig) &&
3018: (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3019: (clabel1->blockSize == clabel2->blockSize) &&
3020: (clabel1->numBlocks == clabel2->numBlocks) &&
3021: (clabel1->autoconfigure == clabel2->autoconfigure) &&
3022: (clabel1->root_partition == clabel2->root_partition) &&
3023: (clabel1->last_unit == clabel2->last_unit) &&
3024: (clabel1->config_order == clabel2->config_order)) {
3025: /* If it get's here, it almost *has* to be a match. */
3026: } else {
3027: /* It's not consistent with somebody in the set... Punt. */
3028: return(0);
3029: }
3030: /* All was fine.. It must fit... */
3031: return(1);
3032: }
3033:
3034: int
3035: rf_have_enough_components(RF_ConfigSet_t *cset)
3036: {
3037: RF_AutoConfig_t *ac;
3038: RF_AutoConfig_t *auto_config;
3039: RF_ComponentLabel_t *clabel;
3040: int r,c;
3041: int num_rows;
3042: int num_cols;
3043: int num_missing;
3044: int mod_counter;
3045: int mod_counter_found;
3046: int even_pair_failed;
3047: char parity_type;
3048:
3049:
3050: /*
3051: * Check to see that we have enough 'live' components
3052: * of this set. If so, we can configure it if necessary.
3053: */
3054:
3055: num_rows = cset->ac->clabel->num_rows;
3056: num_cols = cset->ac->clabel->num_columns;
3057: parity_type = cset->ac->clabel->parityConfig;
3058:
3059: /* XXX Check for duplicate components !?!?!? */
3060:
3061: /* Determine what the mod_counter is supposed to be for this set. */
3062:
3063: mod_counter_found = 0;
3064: mod_counter = 0;
3065: ac = cset->ac;
3066: while(ac!=NULL) {
3067: if (mod_counter_found==0) {
3068: mod_counter = ac->clabel->mod_counter;
3069: mod_counter_found = 1;
3070: } else {
3071: if (ac->clabel->mod_counter > mod_counter) {
3072: mod_counter = ac->clabel->mod_counter;
3073: }
3074: }
3075: ac = ac->next;
3076: }
3077:
3078: num_missing = 0;
3079: auto_config = cset->ac;
3080:
3081: for(r=0; r<num_rows; r++) {
3082: even_pair_failed = 0;
3083: for(c=0; c<num_cols; c++) {
3084: ac = auto_config;
3085: while(ac!=NULL) {
3086: if ((ac->clabel->row == r) &&
3087: (ac->clabel->column == c) &&
3088: (ac->clabel->mod_counter == mod_counter)) {
3089: /* It's this one... */
3090: #ifdef RAIDDEBUG
3091: printf("Found: %s at %d,%d\n",
3092: ac->devname,r,c);
3093: #endif /* RAIDDEBUG */
3094: break;
3095: }
3096: ac=ac->next;
3097: }
3098: if (ac==NULL) {
3099: /* Didn't find one here! */
3100: /*
3101: * Special case for RAID 1, especially
3102: * where there are more than 2
3103: * components (where RAIDframe treats
3104: * things a little differently :( )
3105: */
3106: if (parity_type == '1') {
3107: if (c%2 == 0) { /* Even component. */
3108: even_pair_failed = 1;
3109: } else { /*
3110: * Odd component.
3111: * If we're failed,
3112: * and so is the even
3113: * component, it's
3114: * "Good Night, Charlie"
3115: */
3116: if (even_pair_failed == 1) {
3117: return(0);
3118: }
3119: }
3120: } else {
3121: /* Normal accounting. */
3122: num_missing++;
3123: }
3124: }
3125: if ((parity_type == '1') && (c%2 == 1)) {
3126: /*
3127: * Just did an even component, and we didn't
3128: * bail... Reset the even_pair_failed flag,
3129: * and go on to the next component...
3130: */
3131: even_pair_failed = 0;
3132: }
3133: }
3134: }
3135:
3136: clabel = cset->ac->clabel;
3137:
3138: if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3139: ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3140: ((clabel->parityConfig == '5') && (num_missing > 1))) {
3141: /* XXX This needs to be made *much* more general. */
3142: /* Too many failures. */
3143: return(0);
3144: }
3145: /*
3146: * Otherwise, all is well, and we've got enough to take a kick
3147: * at autoconfiguring this set.
3148: */
3149: return(1);
3150: }
3151:
3152: void
3153: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3154: RF_Raid_t *raidPtr)
3155: {
3156: RF_ComponentLabel_t *clabel;
3157: int i;
3158:
3159: clabel = ac->clabel;
3160:
3161: /* 1. Fill in the common stuff. */
3162: config->numRow = clabel->num_rows;
3163: config->numCol = clabel->num_columns;
3164: config->numSpare = 0; /* XXX Should this be set here ? */
3165: config->sectPerSU = clabel->sectPerSU;
3166: config->SUsPerPU = clabel->SUsPerPU;
3167: config->SUsPerRU = clabel->SUsPerRU;
3168: config->parityConfig = clabel->parityConfig;
3169: /* XXX... */
3170: strlcpy(config->diskQueueType,"fifo", sizeof config->diskQueueType);
3171: config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3172: config->layoutSpecificSize = 0; /* XXX ?? */
3173:
3174: while(ac!=NULL) {
3175: /*
3176: * row/col values will be in range due to the checks
3177: * in reasonable_label().
3178: */
3179: strlcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3180: ac->devname,
3181: sizeof config->devnames[ac->clabel->row][ac->clabel->column]);
3182: ac = ac->next;
3183: }
3184:
3185: for(i=0;i<RF_MAXDBGV;i++) {
3186: config->debugVars[i][0] = NULL;
3187: }
3188:
3189: #ifdef RAID_DEBUG_ALL
3190:
3191: #ifdef RF_DBG_OPTION
3192: #undef RF_DBG_OPTION
3193: #endif /* RF_DBG_OPTION */
3194:
3195: #ifdef __STDC__
3196: #define RF_DBG_OPTION(_option_,_val_) do { \
3197: snprintf(&(config->debugVars[i++][0]), 50, "%s %ld", \
3198: #_option_, _val_); \
3199: } while (0)
3200: #else /* __STDC__ */
3201: #define RF_DBG_OPTION(_option_,_val_) do { \
3202: snprintf(&(config->debugVars[i++][0]), 50, "%s %ld", \
3203: "/**/_option_/**/", _val_); \
3204: } while (0)
3205: #endif /* __STDC__ */
3206:
3207: i = 0;
3208:
3209: /* RF_DBG_OPTION(accessDebug, 0); */
3210: /* RF_DBG_OPTION(accessTraceBufSize, 0); */
3211: RF_DBG_OPTION(cscanDebug, 1); /* Debug CSCAN sorting. */
3212: RF_DBG_OPTION(dagDebug, 1);
3213: /* RF_DBG_OPTION(debugPrintUseBuffer, 0); */
3214: RF_DBG_OPTION(degDagDebug, 1);
3215: RF_DBG_OPTION(disableAsyncAccs, 1);
3216: RF_DBG_OPTION(diskDebug, 1);
3217: RF_DBG_OPTION(enableAtomicRMW, 0);
3218: /*
3219: * This debug variable enables locking of the
3220: * disk arm during small-write operations.
3221: * Setting this variable to anything other than
3222: * 0 will result in deadlock. (wvcii)
3223: */
3224: RF_DBG_OPTION(engineDebug, 1);
3225: RF_DBG_OPTION(fifoDebug, 1); /* Debug fifo queueing. */
3226: /* RF_DBG_OPTION(floatingRbufDebug, 1); */
3227: /* RF_DBG_OPTION(forceHeadSepLimit, -1); */
3228: /* RF_DBG_OPTION(forceNumFloatingReconBufs, -1); */
3229: /*
3230: * Wire down the number of extra recon buffers
3231: * to use.
3232: */
3233: /* RF_DBG_OPTION(keepAccTotals, 1); */
3234: /* Turn on keep_acc_totals. */
3235: RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE);
3236: RF_DBG_OPTION(mapDebug, 1);
3237: RF_DBG_OPTION(maxNumTraces, -1);
3238:
3239: /* RF_DBG_OPTION(memChunkDebug, 1); */
3240: /* RF_DBG_OPTION(memDebug, 1); */
3241: /* RF_DBG_OPTION(memDebugAddress, 1); */
3242: /* RF_DBG_OPTION(numBufsToAccumulate, 1); */
3243: /*
3244: * Number of buffers to accumulate before
3245: * doing XOR.
3246: */
3247: RF_DBG_OPTION(prReconSched, 0);
3248: RF_DBG_OPTION(printDAGsDebug, 1);
3249: RF_DBG_OPTION(printStatesDebug, 1);
3250: RF_DBG_OPTION(protectedSectors, 64L);
3251: /*
3252: * Number of sectors at start of disk to exclude
3253: * from RAID address space.
3254: */
3255: RF_DBG_OPTION(pssDebug, 1);
3256: RF_DBG_OPTION(queueDebug, 1);
3257: RF_DBG_OPTION(quiesceDebug, 1);
3258: RF_DBG_OPTION(raidSectorOffset, 0);
3259: /*
3260: * Value added to all incoming sectors to debug
3261: * alignment problems.
3262: */
3263: RF_DBG_OPTION(reconDebug, 1);
3264: RF_DBG_OPTION(reconbufferDebug, 1);
3265: RF_DBG_OPTION(scanDebug, 1); /* Debug SCAN sorting. */
3266: RF_DBG_OPTION(showXorCallCounts, 0);
3267: /* Show n-way Xor call counts. */
3268: RF_DBG_OPTION(shutdownDebug, 1); /* Show shutdown calls. */
3269: RF_DBG_OPTION(sizePercentage, 100);
3270: RF_DBG_OPTION(sstfDebug, 1);
3271: /* Turn on debugging info for sstf queueing. */
3272: RF_DBG_OPTION(stripeLockDebug, 1);
3273: RF_DBG_OPTION(suppressLocksAndLargeWrites, 0);
3274: RF_DBG_OPTION(suppressTraceDelays, 0);
3275: RF_DBG_OPTION(useMemChunks, 1);
3276: RF_DBG_OPTION(validateDAGDebug, 1);
3277: RF_DBG_OPTION(validateVisitedDebug, 1);
3278: /* XXX turn to zero by default ? */
3279: RF_DBG_OPTION(verifyParityDebug, 1);
3280: RF_DBG_OPTION(debugKernelAccess, 1);
3281: /* DoAccessKernel debugging. */
3282:
3283: #if RF_INCLUDE_PARITYLOGGING > 0
3284: RF_DBG_OPTION(forceParityLogReint, 0);
3285: RF_DBG_OPTION(numParityRegions, 0);
3286: /* Number of regions in the array. */
3287: RF_DBG_OPTION(numReintegrationThreads, 1);
3288: RF_DBG_OPTION(parityLogDebug, 1);
3289: /* If nonzero, enables debugging of parity logging. */
3290: RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024);
3291: /* Target bytes available for in-core logs. */
3292: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
3293:
3294: #endif /* RAID_DEBUG_ALL */
3295: }
3296:
3297: int
3298: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
3299: {
3300: RF_ComponentLabel_t clabel;
3301: struct vnode *vp;
3302: dev_t dev;
3303: int row, column;
3304:
3305: raidPtr->autoconfigure = new_value;
3306: for(row=0; row<raidPtr->numRow; row++) {
3307: for(column=0; column<raidPtr->numCol; column++) {
3308: if (raidPtr->Disks[row][column].status ==
3309: rf_ds_optimal) {
3310: dev = raidPtr->Disks[row][column].dev;
3311: vp = raidPtr->raid_cinfo[row][column].ci_vp;
3312: raidread_component_label(dev, vp, &clabel);
3313: clabel.autoconfigure = new_value;
3314: raidwrite_component_label(dev, vp, &clabel);
3315: }
3316: }
3317: }
3318: return(new_value);
3319: }
3320:
3321: int
3322: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
3323: {
3324: RF_ComponentLabel_t clabel;
3325: struct vnode *vp;
3326: dev_t dev;
3327: int row, column;
3328:
3329: raidPtr->root_partition = new_value;
3330: for(row=0; row<raidPtr->numRow; row++) {
3331: for(column=0; column<raidPtr->numCol; column++) {
3332: if (raidPtr->Disks[row][column].status ==
3333: rf_ds_optimal) {
3334: dev = raidPtr->Disks[row][column].dev;
3335: vp = raidPtr->raid_cinfo[row][column].ci_vp;
3336: raidread_component_label(dev, vp, &clabel);
3337: clabel.root_partition = new_value;
3338: raidwrite_component_label(dev, vp, &clabel);
3339: }
3340: }
3341: }
3342: return(new_value);
3343: }
3344:
3345: void
3346: rf_release_all_vps(RF_ConfigSet_t *cset)
3347: {
3348: RF_AutoConfig_t *ac;
3349:
3350: ac = cset->ac;
3351: while(ac!=NULL) {
3352: /* Close the vp, and give it back. */
3353: if (ac->vp) {
3354: VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3355: vrele(ac->vp);
3356: ac->vp = NULL;
3357: }
3358: ac = ac->next;
3359: }
3360: }
3361:
3362:
3363: void
3364: rf_cleanup_config_set(RF_ConfigSet_t *cset)
3365: {
3366: RF_AutoConfig_t *ac;
3367: RF_AutoConfig_t *next_ac;
3368:
3369: ac = cset->ac;
3370: while(ac!=NULL) {
3371: next_ac = ac->next;
3372: /* Nuke the label. */
3373: free(ac->clabel, M_RAIDFRAME);
3374: /* Cleanup the config structure. */
3375: free(ac, M_RAIDFRAME);
3376: /* "next..." */
3377: ac = next_ac;
3378: }
3379: /* And, finally, nuke the config set. */
3380: free(cset, M_RAIDFRAME);
3381: }
3382:
3383:
3384: void
3385: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
3386: {
3387: /* Current version number. */
3388: clabel->version = RF_COMPONENT_LABEL_VERSION;
3389: clabel->serial_number = raidPtr->serial_number;
3390: clabel->mod_counter = raidPtr->mod_counter;
3391: clabel->num_rows = raidPtr->numRow;
3392: clabel->num_columns = raidPtr->numCol;
3393: clabel->clean = RF_RAID_DIRTY; /* Not clean. */
3394: clabel->status = rf_ds_optimal; /* "It's good !" */
3395:
3396: clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3397: clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3398: clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
3399:
3400: clabel->blockSize = raidPtr->bytesPerSector;
3401: clabel->numBlocks = raidPtr->sectorsPerDisk;
3402:
3403: /* XXX Not portable. */
3404: clabel->parityConfig = raidPtr->Layout.map->parityConfig;
3405: clabel->maxOutstanding = raidPtr->maxOutstanding;
3406: clabel->autoconfigure = raidPtr->autoconfigure;
3407: clabel->root_partition = raidPtr->root_partition;
3408: clabel->last_unit = raidPtr->raidid;
3409: clabel->config_order = raidPtr->config_order;
3410: }
3411:
3412: int
3413: rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
3414: {
3415: RF_Raid_t *raidPtr;
3416: RF_Config_t *config;
3417: int raidID;
3418: int retcode;
3419:
3420: db1_printf(("RAID autoconfigure\n"));
3421:
3422: retcode = 0;
3423: *unit = -1;
3424:
3425: /* 1. Create a config structure. */
3426:
3427: config = (RF_Config_t *)malloc(sizeof(RF_Config_t), M_RAIDFRAME,
3428: M_NOWAIT);
3429: if (config==NULL) {
3430: printf("Out of mem!?!?\n");
3431: /* XXX Do something more intelligent here. */
3432: return(1);
3433: }
3434:
3435: memset(config, 0, sizeof(RF_Config_t));
3436:
3437: /* XXX raidID needs to be set correctly... */
3438:
3439: /*
3440: * 2. Figure out what RAID ID this one is supposed to live at.
3441: * See if we can get the same RAID dev that it was configured
3442: * on last time...
3443: */
3444:
3445: raidID = cset->ac->clabel->last_unit;
3446: if ((raidID < 0) || (raidID >= numraid)) {
3447: /* Let's not wander off into lala land. */
3448: raidID = numraid - 1;
3449: }
3450: if (raidPtrs[raidID]->valid != 0) {
3451:
3452: /*
3453: * Nope... Go looking for an alternative...
3454: * Start high so we don't immediately use raid0 if that's
3455: * not taken.
3456: */
3457:
3458: for(raidID = numraid - 1; raidID >= 0; raidID--) {
3459: if (raidPtrs[raidID]->valid == 0) {
3460: /* We can use this one ! */
3461: break;
3462: }
3463: }
3464: }
3465:
3466: if (raidID < 0) {
3467: /* Punt... */
3468: printf("Unable to auto configure this set!\n");
3469: printf("(Out of RAID devs!)\n");
3470: return(1);
3471: }
3472: raidPtr = raidPtrs[raidID];
3473:
3474: /* XXX All this stuff should be done SOMEWHERE ELSE ! */
3475: raidPtr->raidid = raidID;
3476: raidPtr->openings = RAIDOUTSTANDING;
3477:
3478: /* 3. Build the configuration structure. */
3479: rf_create_configuration(cset->ac, config, raidPtr);
3480:
3481: /* 4. Do the configuration. */
3482: retcode = rf_Configure(raidPtr, config, cset->ac);
3483:
3484: if (retcode == 0) {
3485:
3486: raidinit(raidPtrs[raidID]);
3487:
3488: rf_markalldirty(raidPtrs[raidID]);
3489: raidPtrs[raidID]->autoconfigure = 1; /* XXX Do this here ? */
3490: if (cset->ac->clabel->root_partition==1) {
3491: /*
3492: * Everything configured just fine. Make a note
3493: * that this set is eligible to be root.
3494: */
3495: cset->rootable = 1;
3496: /* XXX Do this here ? */
3497: raidPtrs[raidID]->root_partition = 1;
3498: }
3499: }
3500:
3501: printf(": (%s) total number of sectors is %lu (%lu MB)%s\n",
3502: (raidPtrs[raidID]->Layout).map->configName,
3503: (unsigned long) raidPtrs[raidID]->totalSectors,
3504: (unsigned long) (raidPtrs[raidID]->totalSectors / 1024 *
3505: (1 << raidPtrs[raidID]->logBytesPerSector) / 1024),
3506: raidPtrs[raidID]->root_partition ? " as root" : "");
3507:
3508: /* 5. Cleanup. */
3509: free(config, M_RAIDFRAME);
3510:
3511: *unit = raidID;
3512: return(retcode);
3513: }
3514:
3515: void
3516: rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
3517: {
3518: struct buf *bp;
3519:
3520: bp = (struct buf *)desc->bp;
3521: disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3522: (bp->b_bcount - bp->b_resid),
3523: (bp->b_flags & B_READ));
3524: }
CVSweb