Annotation of sys/dev/raidframe/rf_raid5.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_raid5.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $ */
2: /* $NetBSD: rf_raid5.c,v 1.4 2000/01/08 22:57:30 oster Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Carnegie-Mellon University.
6: * All rights reserved.
7: *
8: * Author: Mark Holland
9: *
10: * Permission to use, copy, modify and distribute this software and
11: * its documentation is hereby granted, provided that both the copyright
12: * notice and this permission notice appear in all copies of the
13: * software, derivative works or modified versions, and any portions
14: * thereof, and that both notices appear in supporting documentation.
15: *
16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19: *
20: * Carnegie Mellon requests users of this software to return to
21: *
22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23: * School of Computer Science
24: * Carnegie Mellon University
25: * Pittsburgh PA 15213-3890
26: *
27: * any improvements or extensions that they make and grant Carnegie the
28: * rights to redistribute these changes.
29: */
30:
31: /*****************************************************************************
32: *
33: * rf_raid5.c -- Implements RAID Level 5.
34: *
35: *****************************************************************************/
36:
37: #include "rf_types.h"
38: #include "rf_raid.h"
39: #include "rf_raid5.h"
40: #include "rf_dag.h"
41: #include "rf_dagffrd.h"
42: #include "rf_dagffwr.h"
43: #include "rf_dagdegrd.h"
44: #include "rf_dagdegwr.h"
45: #include "rf_dagutils.h"
46: #include "rf_general.h"
47: #include "rf_map.h"
48: #include "rf_utils.h"
49:
50: typedef struct RF_Raid5ConfigInfo_s {
51: RF_RowCol_t **stripeIdentifier; /*
52: * Filled in at config time and used
53: * by IdentifyStripe.
54: */
55: } RF_Raid5ConfigInfo_t;
56:
57:
58: int
59: rf_ConfigureRAID5(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
60: RF_Config_t *cfgPtr)
61: {
62: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
63: RF_Raid5ConfigInfo_t *info;
64: RF_RowCol_t i, j, startdisk;
65:
66: /* Create a RAID level 5 configuration structure. */
67: RF_MallocAndAdd(info, sizeof(RF_Raid5ConfigInfo_t),
68: (RF_Raid5ConfigInfo_t *), raidPtr->cleanupList);
69: if (info == NULL)
70: return (ENOMEM);
71: layoutPtr->layoutSpecificInfo = (void *) info;
72:
73: RF_ASSERT(raidPtr->numRow == 1);
74:
75: /*
76: * The stripe identifier must identify the disks in each stripe, IN
77: * THE ORDER THAT THEY APPEAR IN THE STRIPE.
78: */
79: info->stripeIdentifier = rf_make_2d_array(raidPtr->numCol,
80: raidPtr->numCol, raidPtr->cleanupList);
81: if (info->stripeIdentifier == NULL)
82: return (ENOMEM);
83: startdisk = 0;
84: for (i = 0; i < raidPtr->numCol; i++) {
85: for (j = 0; j < raidPtr->numCol; j++) {
86: info->stripeIdentifier[i][j] = (startdisk + j) %
87: raidPtr->numCol;
88: }
89: if ((--startdisk) < 0)
90: startdisk = raidPtr->numCol - 1;
91: }
92:
93: /* Fill in the remaining layout parameters. */
94: layoutPtr->numStripe = layoutPtr->stripeUnitsPerDisk;
95: layoutPtr->bytesPerStripeUnit = layoutPtr->sectorsPerStripeUnit <<
96: raidPtr->logBytesPerSector;
97: layoutPtr->numDataCol = raidPtr->numCol - 1;
98: layoutPtr->dataSectorsPerStripe = layoutPtr->numDataCol *
99: layoutPtr->sectorsPerStripeUnit;
100: layoutPtr->numParityCol = 1;
101: layoutPtr->dataStripeUnitsPerDisk = layoutPtr->stripeUnitsPerDisk;
102:
103: raidPtr->totalSectors = layoutPtr->stripeUnitsPerDisk *
104: layoutPtr->numDataCol * layoutPtr->sectorsPerStripeUnit;
105:
106: return (0);
107: }
108:
109: int
110: rf_GetDefaultNumFloatingReconBuffersRAID5(RF_Raid_t *raidPtr)
111: {
112: return (20);
113: }
114:
115: RF_HeadSepLimit_t
116: rf_GetDefaultHeadSepLimitRAID5(RF_Raid_t *raidPtr)
117: {
118: return (10);
119: }
120:
121: #if !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(_KERNEL)
122: /* Not currently used. */
123: int
124: rf_ShutdownRAID5(RF_Raid_t *raidPtr)
125: {
126: return (0);
127: }
128: #endif
129:
130: void
131: rf_MapSectorRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
132: RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
133: {
134: RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
135: *row = 0;
136: *col = (SUID % raidPtr->numCol);
137: *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
138: raidPtr->Layout.sectorsPerStripeUnit +
139: (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
140: }
141:
142: void
143: rf_MapParityRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t raidSector,
144: RF_RowCol_t *row, RF_RowCol_t *col, RF_SectorNum_t *diskSector, int remap)
145: {
146: RF_StripeNum_t SUID = raidSector / raidPtr->Layout.sectorsPerStripeUnit;
147:
148: *row = 0;
149: *col = raidPtr->Layout.numDataCol -
150: (SUID / raidPtr->Layout.numDataCol) % raidPtr->numCol;
151: *diskSector = (SUID / (raidPtr->Layout.numDataCol)) *
152: raidPtr->Layout.sectorsPerStripeUnit +
153: (raidSector % raidPtr->Layout.sectorsPerStripeUnit);
154: }
155:
156: void
157: rf_IdentifyStripeRAID5(RF_Raid_t *raidPtr, RF_RaidAddr_t addr,
158: RF_RowCol_t **diskids, RF_RowCol_t *outRow)
159: {
160: RF_StripeNum_t stripeID =
161: rf_RaidAddressToStripeID(&raidPtr->Layout, addr);
162: RF_Raid5ConfigInfo_t *info =
163: (RF_Raid5ConfigInfo_t *) raidPtr->Layout.layoutSpecificInfo;
164:
165: *outRow = 0;
166: *diskids = info->stripeIdentifier[stripeID % raidPtr->numCol];
167: }
168:
169: void
170: rf_MapSIDToPSIDRAID5(RF_RaidLayout_t *layoutPtr, RF_StripeNum_t stripeID,
171: RF_StripeNum_t *psID, RF_ReconUnitNum_t *which_ru)
172: {
173: *which_ru = 0;
174: *psID = stripeID;
175: }
176:
177:
178: /*
179: * Select an algorithm for performing an access. Returns two pointers,
180: * one to a function that will return information about the DAG, and
181: * another to a function that will create the dag.
182: */
183: void
184: rf_RaidFiveDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
185: RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
186: {
187: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
188: RF_PhysDiskAddr_t *failedPDA = NULL;
189: RF_RowCol_t frow, fcol;
190: RF_RowStatus_t rstat;
191: int prior_recon;
192:
193: RF_ASSERT(RF_IO_IS_R_OR_W(type));
194:
195: if (asmap->numDataFailed + asmap->numParityFailed > 1) {
196: RF_ERRORMSG("Multiple disks failed in a single group !"
197: " Aborting I/O operation.\n");
198: /* *infoFunc = */ *createFunc = NULL;
199: return;
200: } else
201: if (asmap->numDataFailed + asmap->numParityFailed == 1) {
202:
203: /*
204: * If under recon & already reconstructed, redirect
205: * the access to the spare drive and eliminate the
206: * failure indication.
207: */
208: failedPDA = asmap->failedPDAs[0];
209: frow = failedPDA->row;
210: fcol = failedPDA->col;
211: rstat = raidPtr->status[failedPDA->row];
212: prior_recon = (rstat == rf_rs_reconfigured) || (
213: (rstat == rf_rs_reconstructing) ?
214: rf_CheckRUReconstructed(raidPtr
215: ->reconControl[frow]->reconMap,
216: failedPDA->startSector) : 0);
217: if (prior_recon) {
218: RF_RowCol_t or = failedPDA->row;
219: RF_RowCol_t oc = failedPDA->col;
220: RF_SectorNum_t oo = failedPDA->startSector;
221:
222: if (layoutPtr->map->flags &
223: RF_DISTRIBUTE_SPARE) {
224: /* Redirect to dist spare space. */
225:
226: if (failedPDA == asmap->parityInfo) {
227:
228: /* Parity has failed. */
229: (layoutPtr->map->MapParity)
230: (raidPtr,
231: failedPDA->raidAddress,
232: &failedPDA->row,
233: &failedPDA->col,
234: &failedPDA->startSector,
235: RF_REMAP);
236:
237: if (asmap->parityInfo->next) {
238: /*
239: * Redir 2nd component,
240: * if any.
241: */
242: RF_PhysDiskAddr_t *p =
243: asmap
244: ->parityInfo->next;
245: RF_SectorNum_t SUoffs =
246: p->startSector %
247: layoutPtr->sectorsPerStripeUnit;
248: p->row = failedPDA->row;
249: p->col = failedPDA->col;
250: /*
251: * Cheating:
252: * startSector is not
253: * really a RAID
254: * address.
255: */
256: p->startSector =
257: rf_RaidAddressOfPrevStripeUnitBoundary(
258: layoutPtr, failedPDA->startSector) +
259: SUoffs;
260: }
261: } else
262: if (asmap->parityInfo->next &&
263: failedPDA ==
264: asmap->parityInfo->next) {
265: /*
266: * Should never happen.
267: */
268: RF_ASSERT(0);
269: } else {
270: /* Data has failed. */
271: (layoutPtr->map
272: ->MapSector) (raidPtr,
273: failedPDA->raidAddress,
274: &failedPDA->row,
275: &failedPDA->col,
276: &failedPDA->startSector,
277: RF_REMAP);
278: }
279:
280: } else {
281: /* Redirect to dedicated spare space. */
282:
283: failedPDA->row =
284: raidPtr->Disks[frow][fcol].spareRow;
285: failedPDA->col =
286: raidPtr->Disks[frow][fcol].spareCol;
287:
288: /*
289: * The parity may have two distinct
290: * components, both of which may need
291: * to be redirected.
292: */
293: if (asmap->parityInfo->next) {
294: if (failedPDA ==
295: asmap->parityInfo) {
296: failedPDA->next->row =
297: failedPDA->row;
298: failedPDA->next->col =
299: failedPDA->col;
300: } else {
301: if (failedPDA ==
302: asmap->parityInfo
303: ->next) {
304: /*
305: * Paranoid:
306: * Should never
307: * occur.
308: */
309: asmap
310: ->parityInfo
311: ->row =
312: failedPDA->row;
313: asmap
314: ->parityInfo
315: ->col =
316: failedPDA->col;
317: }
318: }
319: }
320: }
321:
322: RF_ASSERT(failedPDA->col != -1);
323:
324: if (rf_dagDebug || rf_mapDebug) {
325: printf("raid%d: Redirected type '%c'"
326: " r %d c %d o %ld -> r %d c %d"
327: " o %ld\n", raidPtr->raidid,
328: type, or, oc, (long) oo,
329: failedPDA->row, failedPDA->col,
330: (long) failedPDA->startSector);
331: }
332: asmap->numDataFailed = asmap->numParityFailed
333: = 0;
334: }
335: }
336: /*
337: * All DAGs begin/end with block/unblock node. Therefore, hdrSucc &
338: * termAnt counts should always be 1. Also, these counts should not be
339: * visible outside DAG creation routines - manipulating the counts
340: * here should be removed.
341: */
342: if (type == RF_IO_TYPE_READ) {
343: if (asmap->numDataFailed == 0)
344: *createFunc = (RF_VoidFuncPtr)
345: rf_CreateFaultFreeReadDAG;
346: else
347: *createFunc = (RF_VoidFuncPtr)
348: rf_CreateRaidFiveDegradedReadDAG;
349: } else {
350: /*
351: * If mirroring, always use large writes. If the access
352: * requires two distinct parity updates, always do a small
353: * write. If the stripe contains a failure but the access
354: * does not, do a small write. The first conditional
355: * (numStripeUnitsAccessed <= numDataCol/2) uses a
356: * less-than-or-equal rather than just a less-than because
357: * when G is 3 or 4, numDataCol/2 is 1, and I want
358: * single-stripe-unit updates to use just one disk.
359: */
360: if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
361: if (rf_suppressLocksAndLargeWrites ||
362: (((asmap->numStripeUnitsAccessed <=
363: (layoutPtr->numDataCol / 2)) &&
364: (layoutPtr->numDataCol != 1)) ||
365: (asmap->parityInfo->next != NULL) ||
366: rf_CheckStripeForFailures(raidPtr, asmap))) {
367: *createFunc = (RF_VoidFuncPtr)
368: rf_CreateSmallWriteDAG;
369: } else
370: *createFunc = (RF_VoidFuncPtr)
371: rf_CreateLargeWriteDAG;
372: } else {
373: if (asmap->numParityFailed == 1)
374: *createFunc = (RF_VoidFuncPtr)
375: rf_CreateNonRedundantWriteDAG;
376: else
377: if (asmap->numStripeUnitsAccessed != 1 &&
378: failedPDA->numSector !=
379: layoutPtr->sectorsPerStripeUnit)
380: *createFunc = NULL;
381: else
382: *createFunc = (RF_VoidFuncPtr)
383: rf_CreateDegradedWriteDAG;
384: }
385: }
386: }
CVSweb