Annotation of sys/dev/raidframe/rf_map.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_map.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */
2: /* $NetBSD: rf_map.c,v 1.5 2000/06/29 00:22:27 oster Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Carnegie-Mellon University.
6: * All rights reserved.
7: *
8: * Author: Mark Holland
9: *
10: * Permission to use, copy, modify and distribute this software and
11: * its documentation is hereby granted, provided that both the copyright
12: * notice and this permission notice appear in all copies of the
13: * software, derivative works or modified versions, and any portions
14: * thereof, and that both notices appear in supporting documentation.
15: *
16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19: *
20: * Carnegie Mellon requests users of this software to return to
21: *
22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23: * School of Computer Science
24: * Carnegie Mellon University
25: * Pittsburgh PA 15213-3890
26: *
27: * any improvements or extensions that they make and grant Carnegie the
28: * rights to redistribute these changes.
29: */
30:
31: /*****************************************************************************
32: *
33: * map.c -- Main code for mapping RAID addresses to physical disk addresses.
34: *
35: *****************************************************************************/
36:
37: #include "rf_types.h"
38: #include "rf_threadstuff.h"
39: #include "rf_raid.h"
40: #include "rf_general.h"
41: #include "rf_map.h"
42: #include "rf_freelist.h"
43: #include "rf_shutdown.h"
44:
45: void rf_FreePDAList(RF_PhysDiskAddr_t *, RF_PhysDiskAddr_t *, int);
46: void rf_FreeASMList(RF_AccessStripeMap_t *, RF_AccessStripeMap_t *, int);
47:
48: /*****************************************************************************
49: *
50: * MapAccess -- Main 1st order mapping routine.
51: *
52: * Maps an access in the RAID address space to the corresponding set of
53: * physical disk addresses. The result is returned as a list of
54: * AccessStripeMap structures, one per stripe accessed. Each ASM structure
55: * contains a pointer to a list of PhysDiskAddr structures, which describe
56: * the physical locations touched by the user access. Note that this routine
57: * returns only static mapping information, i.e. the list of physical
58: * addresses returned does not necessarily identify the set of physical
59: * locations that will actually be read or written.
60: *
61: * The routine also maps the parity. The physical disk location returned
62: * always indicates the entire parity unit, even when only a subset of it
63: * is being accessed. This is because an access that is not stripe unit
64: * aligned but that spans a stripe unit boundary may require access two
65: * distinct portions of the parity unit, and we can't yet tell which
66: * portion(s) we'll actually need. We leave it up to the algorithm
67: * selection code to decide what subset of the parity unit to access.
68: *
69: * Note that addresses in the RAID address space must always be maintained
70: * as longs, instead of ints.
71: *
72: * This routine returns NULL if numBlocks is 0.
73: *
74: *****************************************************************************/
75:
76: RF_AccessStripeMapHeader_t *
77: rf_MapAccess(
78: RF_Raid_t *raidPtr,
79: RF_RaidAddr_t raidAddress, /*
80: * Starting address in RAID address
81: * space.
82: */
83: RF_SectorCount_t numBlocks, /*
84: * Number of blocks in RAID address
85: * space to access.
86: */
87: caddr_t buffer, /* Buffer to supply/receive data. */
88: int remap /*
89: * 1 => remap addresses to spare space.
90: */
91: )
92: {
93: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
94: RF_AccessStripeMapHeader_t *asm_hdr = NULL;
95: RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL;
96: int faultsTolerated = layoutPtr->map->faultsTolerated;
97: /* We'll change raidAddress along the way. */
98: RF_RaidAddr_t startAddress = raidAddress;
99: RF_RaidAddr_t endAddress = raidAddress + numBlocks;
100: RF_RaidDisk_t **disks = raidPtr->Disks;
101:
102: RF_PhysDiskAddr_t *pda_p, *pda_q;
103: RF_StripeCount_t numStripes = 0;
104: RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress;
105: RF_RaidAddr_t nextStripeUnitAddress;
106: RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr;
107: RF_StripeCount_t totStripes;
108: RF_StripeNum_t stripeID, lastSID, SUID, lastSUID;
109: RF_AccessStripeMap_t *asmList, *t_asm;
110: RF_PhysDiskAddr_t *pdaList, *t_pda;
111:
112: /* Allocate all the ASMs and PDAs up front. */
113: lastRaidAddr = raidAddress + numBlocks - 1;
114: stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress);
115: lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr);
116: totStripes = lastSID - stripeID + 1;
117: SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress);
118: lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr);
119:
120: asmList = rf_AllocASMList(totStripes);
121: pdaList = rf_AllocPDAList(lastSUID - SUID + 1 +
122: faultsTolerated * totStripes); /*
123: * May also need pda(s)
124: * per stripe for parity.
125: */
126:
127: if (raidAddress + numBlocks > raidPtr->totalSectors) {
128: RF_ERRORMSG1("Unable to map access because offset (%d)"
129: " was invalid\n", (int) raidAddress);
130: return (NULL);
131: }
132: if (rf_mapDebug)
133: rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks);
134: for (; raidAddress < endAddress;) {
135: /* Make the next stripe structure. */
136: RF_ASSERT(asmList);
137: t_asm = asmList;
138: asmList = asmList->next;
139: bzero((char *) t_asm, sizeof(RF_AccessStripeMap_t));
140: if (!asm_p)
141: asm_list = asm_p = t_asm;
142: else {
143: asm_p->next = t_asm;
144: asm_p = asm_p->next;
145: }
146: numStripes++;
147:
148: /* Map SUs from current location to the end of the stripe. */
149: asm_p->stripeID =
150: /* rf_RaidAddressToStripeID(layoutPtr, raidAddress) */
151: stripeID++;
152: stripeRealEndAddress =
153: rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress);
154: stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress);
155: asm_p->raidAddress = raidAddress;
156: asm_p->endRaidAddress = stripeEndAddress;
157:
158: /* Map each stripe unit in the stripe. */
159: pda_p = NULL;
160: /*
161: * Raid addr of start of portion of access that is within this
162: * stripe.
163: */
164: startAddrWithinStripe = raidAddress;
165:
166: for (; raidAddress < stripeEndAddress;) {
167: RF_ASSERT(pdaList);
168: t_pda = pdaList;
169: pdaList = pdaList->next;
170: bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
171: if (!pda_p)
172: asm_p->physInfo = pda_p = t_pda;
173: else {
174: pda_p->next = t_pda;
175: pda_p = pda_p->next;
176: }
177:
178: pda_p->type = RF_PDA_TYPE_DATA;
179: (layoutPtr->map->MapSector) (raidPtr, raidAddress,
180: &(pda_p->row), &(pda_p->col),
181: &(pda_p->startSector), remap);
182:
183: /*
184: * Mark any failures we find.
185: * failedPDA is don't-care if there is more than
186: * one failure.
187: */
188: /*
189: * The RAID address corresponding to this physical
190: * disk address.
191: */
192: pda_p->raidAddress = raidAddress;
193: nextStripeUnitAddress =
194: rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
195: raidAddress);
196: pda_p->numSector = RF_MIN(endAddress,
197: nextStripeUnitAddress) - raidAddress;
198: RF_ASSERT(pda_p->numSector != 0);
199: rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0);
200: pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr,
201: (raidAddress - startAddress));
202: asm_p->totalSectorsAccessed += pda_p->numSector;
203: asm_p->numStripeUnitsAccessed++;
204: asm_p->origRow = pda_p->row; /*
205: * Redundant but
206: * harmless to do this
207: * in every loop
208: * iteration.
209: */
210:
211: raidAddress = RF_MIN(endAddress, nextStripeUnitAddress);
212: }
213:
214: /*
215: * Map the parity. At this stage, the startSector and
216: * numSector fields for the parity unit are always set to
217: * indicate the entire parity unit. We may modify this after
218: * mapping the data portion.
219: */
220: switch (faultsTolerated) {
221: case 0:
222: break;
223: case 1: /* Single fault tolerant. */
224: RF_ASSERT(pdaList);
225: t_pda = pdaList;
226: pdaList = pdaList->next;
227: bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
228: pda_p = asm_p->parityInfo = t_pda;
229: pda_p->type = RF_PDA_TYPE_PARITY;
230: (layoutPtr->map->MapParity) (raidPtr,
231: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
232: startAddrWithinStripe), &(pda_p->row),
233: &(pda_p->col), &(pda_p->startSector), remap);
234: pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
235: /*
236: * raidAddr may be needed to find unit to redirect to.
237: */
238: pda_p->raidAddress =
239: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
240: startAddrWithinStripe);
241: rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
242: rf_ASMParityAdjust(asm_p->parityInfo,
243: startAddrWithinStripe, endAddress,
244: layoutPtr, asm_p);
245:
246: break;
247: case 2: /* Two fault tolerant. */
248: RF_ASSERT(pdaList && pdaList->next);
249: t_pda = pdaList;
250: pdaList = pdaList->next;
251: bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
252: pda_p = asm_p->parityInfo = t_pda;
253: pda_p->type = RF_PDA_TYPE_PARITY;
254: t_pda = pdaList;
255: pdaList = pdaList->next;
256: bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
257: pda_q = asm_p->qInfo = t_pda;
258: pda_q->type = RF_PDA_TYPE_Q;
259: (layoutPtr->map->MapParity) (raidPtr,
260: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
261: startAddrWithinStripe), &(pda_p->row),
262: &(pda_p->col), &(pda_p->startSector), remap);
263: (layoutPtr->map->MapQ) (raidPtr,
264: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
265: startAddrWithinStripe), &(pda_q->row),
266: &(pda_q->col), &(pda_q->startSector), remap);
267: pda_q->numSector = pda_p->numSector =
268: layoutPtr->sectorsPerStripeUnit;
269: /*
270: * raidAddr may be needed to find unit to redirect to.
271: */
272: pda_p->raidAddress =
273: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
274: startAddrWithinStripe);
275: pda_q->raidAddress =
276: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
277: startAddrWithinStripe);
278: /* Failure mode stuff. */
279: rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
280: rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1);
281: rf_ASMParityAdjust(asm_p->parityInfo,
282: startAddrWithinStripe, endAddress,
283: layoutPtr, asm_p);
284: rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe,
285: endAddress, layoutPtr, asm_p);
286: break;
287: }
288: }
289: RF_ASSERT(asmList == NULL && pdaList == NULL);
290: /* Make the header structure. */
291: asm_hdr = rf_AllocAccessStripeMapHeader();
292: RF_ASSERT(numStripes == totStripes);
293: asm_hdr->numStripes = numStripes;
294: asm_hdr->stripeMap = asm_list;
295:
296: if (rf_mapDebug)
297: rf_PrintAccessStripeMap(asm_hdr);
298: return (asm_hdr);
299: }
300:
301: /*****************************************************************************
302: * This routine walks through an ASM list and marks the PDAs that have failed.
303: * It's called only when a disk failure causes an in-flight DAG to fail.
304: * The parity may consist of two components, but we want to use only one
305: * failedPDA pointer. Thus we set failedPDA to point to the first parity
306: * component, and rely on the rest of the code to do the right thing with this.
307: *****************************************************************************/
308: void
309: rf_MarkFailuresInASMList(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asm_h)
310: {
311: RF_RaidDisk_t **disks = raidPtr->Disks;
312: RF_AccessStripeMap_t *asmap;
313: RF_PhysDiskAddr_t *pda;
314:
315: for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) {
316: asmap->numDataFailed = asmap->numParityFailed =
317: asmap->numQFailed = 0;
318: asmap->numFailedPDAs = 0;
319: bzero((char *) asmap->failedPDAs,
320: RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *));
321: for (pda = asmap->physInfo; pda; pda = pda->next) {
322: if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
323: asmap->numDataFailed++;
324: asmap->failedPDAs[asmap->numFailedPDAs] = pda;
325: asmap->numFailedPDAs++;
326: }
327: }
328: pda = asmap->parityInfo;
329: if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
330: asmap->numParityFailed++;
331: asmap->failedPDAs[asmap->numFailedPDAs] = pda;
332: asmap->numFailedPDAs++;
333: }
334: pda = asmap->qInfo;
335: if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
336: asmap->numQFailed++;
337: asmap->failedPDAs[asmap->numFailedPDAs] = pda;
338: asmap->numFailedPDAs++;
339: }
340: }
341: }
342:
343: /*****************************************************************************
344: *
345: * DuplicateASM -- Duplicates an ASM and returns the new one.
346: *
347: *****************************************************************************/
348: RF_AccessStripeMap_t *
349: rf_DuplicateASM(RF_AccessStripeMap_t *asmap)
350: {
351: RF_AccessStripeMap_t *new_asm;
352: RF_PhysDiskAddr_t *pda, *new_pda, *t_pda;
353:
354: new_pda = NULL;
355: new_asm = rf_AllocAccessStripeMapComponent();
356: bcopy((char *) asmap, (char *) new_asm, sizeof(RF_AccessStripeMap_t));
357: new_asm->numFailedPDAs = 0; /* ??? */
358: new_asm->failedPDAs[0] = NULL;
359: new_asm->physInfo = NULL;
360: new_asm->parityInfo = NULL;
361: new_asm->next = NULL;
362:
363: for (pda = asmap->physInfo; pda; pda = pda->next) {
364: /* Copy the physInfo list. */
365: t_pda = rf_AllocPhysDiskAddr();
366: bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t));
367: t_pda->next = NULL;
368: if (!new_asm->physInfo) {
369: new_asm->physInfo = t_pda;
370: new_pda = t_pda;
371: } else {
372: new_pda->next = t_pda;
373: new_pda = new_pda->next;
374: }
375: if (pda == asmap->failedPDAs[0])
376: new_asm->failedPDAs[0] = t_pda;
377: }
378: for (pda = asmap->parityInfo; pda; pda = pda->next) {
379: /* Copy the parityInfo list. */
380: t_pda = rf_AllocPhysDiskAddr();
381: bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t));
382: t_pda->next = NULL;
383: if (!new_asm->parityInfo) {
384: new_asm->parityInfo = t_pda;
385: new_pda = t_pda;
386: } else {
387: new_pda->next = t_pda;
388: new_pda = new_pda->next;
389: }
390: if (pda == asmap->failedPDAs[0])
391: new_asm->failedPDAs[0] = t_pda;
392: }
393: return (new_asm);
394: }
395:
396: /*****************************************************************************
397: *
398: * DuplicatePDA -- Duplicates a PDA and returns the new one.
399: *
400: *****************************************************************************/
401: RF_PhysDiskAddr_t *
402: rf_DuplicatePDA(RF_PhysDiskAddr_t *pda)
403: {
404: RF_PhysDiskAddr_t *new;
405:
406: new = rf_AllocPhysDiskAddr();
407: bcopy((char *) pda, (char *) new, sizeof(RF_PhysDiskAddr_t));
408: return (new);
409: }
410:
411: /*****************************************************************************
412: *
413: * Routines to allocate and free list elements. All allocation routines zero
414: * the structure before returning it.
415: *
416: * FreePhysDiskAddr is static. It should never be called directly, because
417: * FreeAccessStripeMap takes care of freeing the PhysDiskAddr list.
418: *
419: *****************************************************************************/
420:
421: static RF_FreeList_t *rf_asmhdr_freelist;
422: #define RF_MAX_FREE_ASMHDR 128
423: #define RF_ASMHDR_INC 16
424: #define RF_ASMHDR_INITIAL 32
425:
426: static RF_FreeList_t *rf_asm_freelist;
427: #define RF_MAX_FREE_ASM 192
428: #define RF_ASM_INC 24
429: #define RF_ASM_INITIAL 64
430:
431: static RF_FreeList_t *rf_pda_freelist;
432: #define RF_MAX_FREE_PDA 192
433: #define RF_PDA_INC 24
434: #define RF_PDA_INITIAL 64
435:
436: /*
437: * Called at shutdown time. So far, all that is necessary is to release
438: * all the free lists.
439: */
440: void rf_ShutdownMapModule(void *);
441: void
442: rf_ShutdownMapModule(void *ignored)
443: {
444: RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
445: (RF_AccessStripeMapHeader_t *));
446: RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *));
447: RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *));
448: }
449:
450: int
451: rf_ConfigureMapModule(RF_ShutdownList_t **listp)
452: {
453: int rc;
454:
455: RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR,
456: RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t));
457: if (rf_asmhdr_freelist == NULL) {
458: return (ENOMEM);
459: }
460: RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM,
461: RF_ASM_INC, sizeof(RF_AccessStripeMap_t));
462: if (rf_asm_freelist == NULL) {
463: RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
464: (RF_AccessStripeMapHeader_t *));
465: return (ENOMEM);
466: }
467: RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, RF_PDA_INC,
468: sizeof(RF_PhysDiskAddr_t));
469: if (rf_pda_freelist == NULL) {
470: RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
471: (RF_AccessStripeMapHeader_t *));
472: RF_FREELIST_DESTROY(rf_pda_freelist, next,
473: (RF_PhysDiskAddr_t *));
474: return (ENOMEM);
475: }
476: rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL);
477: if (rc) {
478: RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
479: " rc=%d\n", __FILE__, __LINE__, rc);
480: rf_ShutdownMapModule(NULL);
481: return (rc);
482: }
483: RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL, next,
484: (RF_AccessStripeMapHeader_t *));
485: RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL, next,
486: (RF_AccessStripeMap_t *));
487: RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL, next,
488: (RF_PhysDiskAddr_t *));
489:
490: return (0);
491: }
492:
493: RF_AccessStripeMapHeader_t *
494: rf_AllocAccessStripeMapHeader(void)
495: {
496: RF_AccessStripeMapHeader_t *p;
497:
498: RF_FREELIST_GET(rf_asmhdr_freelist, p, next,
499: (RF_AccessStripeMapHeader_t *));
500: bzero((char *) p, sizeof(RF_AccessStripeMapHeader_t));
501:
502: return (p);
503: }
504:
505: void
506: rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t *p)
507: {
508: RF_FREELIST_FREE(rf_asmhdr_freelist, p, next);
509: }
510:
511: RF_PhysDiskAddr_t *
512: rf_AllocPhysDiskAddr(void)
513: {
514: RF_PhysDiskAddr_t *p;
515:
516: RF_FREELIST_GET(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *));
517: bzero((char *) p, sizeof(RF_PhysDiskAddr_t));
518:
519: return (p);
520: }
521:
522: /*
523: * Allocates a list of PDAs, locking the free list only once.
524: * When we have to call calloc, we do it one component at a time to simplify
525: * the process of freeing the list at program shutdown. This should not be
526: * much of a performance hit, because it should be very infrequently executed.
527: */
528: RF_PhysDiskAddr_t *
529: rf_AllocPDAList(int count)
530: {
531: RF_PhysDiskAddr_t *p = NULL;
532:
533: RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *),
534: count);
535: return (p);
536: }
537:
538: void
539: rf_FreePhysDiskAddr(RF_PhysDiskAddr_t *p)
540: {
541: RF_FREELIST_FREE(rf_pda_freelist, p, next);
542: }
543:
544: void
545: rf_FreePDAList(
546: /* Pointers to start and end of list. */
547: RF_PhysDiskAddr_t *l_start,
548: RF_PhysDiskAddr_t *l_end,
549: int count /* Number of elements in list. */
550: )
551: {
552: RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next,
553: (RF_PhysDiskAddr_t *), count);
554: }
555:
556: RF_AccessStripeMap_t *
557: rf_AllocAccessStripeMapComponent(void)
558: {
559: RF_AccessStripeMap_t *p;
560:
561: RF_FREELIST_GET(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *));
562: bzero((char *) p, sizeof(RF_AccessStripeMap_t));
563:
564: return (p);
565: }
566:
567: /*
568: * This is essentially identical to AllocPDAList. I should combine the two.
569: * When we have to call calloc, we do it one component at a time to simplify
570: * the process of freeing the list at program shutdown. This should not be
571: * much of a performance hit, because it should be very infrequently executed.
572: */
573: RF_AccessStripeMap_t *
574: rf_AllocASMList(int count)
575: {
576: RF_AccessStripeMap_t *p = NULL;
577:
578: RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *),
579: count);
580: return (p);
581: }
582:
583: void
584: rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t *p)
585: {
586: RF_FREELIST_FREE(rf_asm_freelist, p, next);
587: }
588:
589: void
590: rf_FreeASMList(RF_AccessStripeMap_t *l_start, RF_AccessStripeMap_t *l_end,
591: int count)
592: {
593: RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next,
594: (RF_AccessStripeMap_t *), count);
595: }
596:
597: void
598: rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t *hdr)
599: {
600: RF_AccessStripeMap_t *p, *pt = NULL;
601: RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL;
602: int count = 0, t, asm_count = 0;
603:
604: for (p = hdr->stripeMap; p; p = p->next) {
605:
606: /* Link the 3 pda lists into the accumulating pda list. */
607:
608: if (!pdaList)
609: pdaList = p->qInfo;
610: else
611: pdaEnd->next = p->qInfo;
612: for (trailer = NULL, pdp = p->qInfo; pdp;) {
613: trailer = pdp;
614: pdp = pdp->next;
615: count++;
616: }
617: if (trailer)
618: pdaEnd = trailer;
619:
620: if (!pdaList)
621: pdaList = p->parityInfo;
622: else
623: pdaEnd->next = p->parityInfo;
624: for (trailer = NULL, pdp = p->parityInfo; pdp;) {
625: trailer = pdp;
626: pdp = pdp->next;
627: count++;
628: }
629: if (trailer)
630: pdaEnd = trailer;
631:
632: if (!pdaList)
633: pdaList = p->physInfo;
634: else
635: pdaEnd->next = p->physInfo;
636: for (trailer = NULL, pdp = p->physInfo; pdp;) {
637: trailer = pdp;
638: pdp = pdp->next;
639: count++;
640: }
641: if (trailer)
642: pdaEnd = trailer;
643:
644: pt = p;
645: asm_count++;
646: }
647:
648: /* Debug only. */
649: for (t = 0, pdp = pdaList; pdp; pdp = pdp->next)
650: t++;
651: RF_ASSERT(t == count);
652:
653: if (pdaList)
654: rf_FreePDAList(pdaList, pdaEnd, count);
655: rf_FreeASMList(hdr->stripeMap, pt, asm_count);
656: rf_FreeAccessStripeMapHeader(hdr);
657: }
658:
659: /*
660: * We can't use the large write optimization if there are any failures in the
661: * stripe.
662: * In the declustered layout, there is no way to immediately determine what
663: * disks constitute a stripe, so we actually have to hunt through the stripe
664: * looking for failures.
665: * The reason we map the parity instead of just using asm->parityInfo->col is
666: * because the latter may have been already redirected to a spare drive, which
667: * would mess up the computation of the stripe offset.
668: *
669: * ASSUMES AT MOST ONE FAILURE IN THE STRIPE.
670: */
671: int
672: rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
673: {
674: RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i;
675: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
676: RF_StripeCount_t stripeOffset;
677: int numFailures;
678: RF_RaidAddr_t sosAddr;
679: RF_SectorNum_t diskOffset, poffset;
680: RF_RowCol_t testrow;
681:
682: /* Quick out in the fault-free case. */
683: RF_LOCK_MUTEX(raidPtr->mutex);
684: numFailures = raidPtr->numFailures;
685: RF_UNLOCK_MUTEX(raidPtr->mutex);
686: if (numFailures == 0)
687: return (0);
688:
689: sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
690: asmap->raidAddress);
691: row = asmap->physInfo->row;
692: (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress,
693: &diskids, &testrow);
694: (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress,
695: &prow, &pcol, &poffset, 0); /* get pcol */
696:
697: /*
698: * This needs not be true if we've redirected the access to a spare in
699: * another row.
700: * RF_ASSERT(row == testrow);
701: */
702: stripeOffset = 0;
703: for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) {
704: if (diskids[i] != pcol) {
705: if (RF_DEAD_DISK(raidPtr
706: ->Disks[testrow][diskids[i]].status)) {
707: if (raidPtr->status[testrow] !=
708: rf_rs_reconstructing)
709: return (1);
710: RF_ASSERT(
711: raidPtr->reconControl[testrow]->fcol ==
712: diskids[i]);
713: layoutPtr->map->MapSector(raidPtr,
714: sosAddr + stripeOffset *
715: layoutPtr->sectorsPerStripeUnit,
716: &trow, &tcol, &diskOffset, 0);
717: RF_ASSERT((trow == testrow) &&
718: (tcol == diskids[i]));
719: if (!rf_CheckRUReconstructed(raidPtr
720: ->reconControl[testrow]->reconMap,
721: diskOffset))
722: return (1);
723: asmap->flags |= RF_ASM_REDIR_LARGE_WRITE;
724: return (0);
725: }
726: stripeOffset++;
727: }
728: }
729: return (0);
730: }
731:
732: /*
733: * Return the number of failed data units in the stripe.
734: */
735: int
736: rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
737: {
738: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
739: RF_RowCol_t trow, tcol, row, i;
740: RF_SectorNum_t diskOffset;
741: RF_RaidAddr_t sosAddr;
742: int numFailures;
743:
744: /* Quick out in the fault-free case. */
745: RF_LOCK_MUTEX(raidPtr->mutex);
746: numFailures = raidPtr->numFailures;
747: RF_UNLOCK_MUTEX(raidPtr->mutex);
748: if (numFailures == 0)
749: return (0);
750: numFailures = 0;
751:
752: sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
753: asmap->raidAddress);
754: row = asmap->physInfo->row;
755: for (i = 0; i < layoutPtr->numDataCol; i++) {
756: (layoutPtr->map->MapSector) (raidPtr, sosAddr + i *
757: layoutPtr->sectorsPerStripeUnit,
758: &trow, &tcol, &diskOffset, 0);
759: if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status))
760: numFailures++;
761: }
762:
763: return numFailures;
764: }
765:
766:
767: /*****************************************************************************
768: *
769: * Debug routines.
770: *
771: *****************************************************************************/
772:
773: void
774: rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h)
775: {
776: rf_PrintFullAccessStripeMap(asm_h, 0);
777: }
778:
779: void
780: rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h,
781: int prbuf /* Flag to print buffer pointers. */)
782: {
783: int i;
784: RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
785: RF_PhysDiskAddr_t *p;
786: printf("%d stripes total\n", (int) asm_h->numStripes);
787: for (; asmap; asmap = asmap->next) {
788: /* printf("Num failures: %d\n", asmap->numDataFailed); */
789: /* printf("Num sectors: %d\n",
790: * (int)asmap->totalSectorsAccessed); */
791: printf("Stripe %d (%d sectors), failures: %d data, %d parity: ",
792: (int) asmap->stripeID,
793: (int) asmap->totalSectorsAccessed,
794: (int) asmap->numDataFailed,
795: (int) asmap->numParityFailed);
796: if (asmap->parityInfo) {
797: printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row,
798: asmap->parityInfo->col,
799: (int) asmap->parityInfo->startSector,
800: (int) (asmap->parityInfo->startSector +
801: asmap->parityInfo->numSector - 1));
802: if (prbuf)
803: printf(" b0x%lx",
804: (unsigned long) asmap->parityInfo->bufPtr);
805: if (asmap->parityInfo->next) {
806: printf(", r%d c%d s%d-%d",
807: asmap->parityInfo->next->row,
808: asmap->parityInfo->next->col,
809: (int) asmap->parityInfo->next->startSector,
810: (int) (asmap->parityInfo->next->startSector
811: + asmap->parityInfo->next->numSector - 1));
812: if (prbuf)
813: printf(" b0x%lx", (unsigned long)
814: asmap->parityInfo->next->bufPtr);
815: RF_ASSERT(asmap->parityInfo->next->next
816: == NULL);
817: }
818: printf("]\n\t");
819: }
820: for (i = 0, p = asmap->physInfo; p; p = p->next, i++) {
821: printf("SU r%d c%d s%d-%d ", p->row, p->col,
822: (int) p->startSector,
823: (int) (p->startSector + p->numSector - 1));
824: if (prbuf)
825: printf("b0x%lx ", (unsigned long) p->bufPtr);
826: if (i && !(i & 1))
827: printf("\n\t");
828: }
829: printf("\n");
830: p = asm_h->stripeMap->failedPDAs[0];
831: if (asm_h->stripeMap->numDataFailed +
832: asm_h->stripeMap->numParityFailed > 1)
833: printf("[multiple failures]\n");
834: else
835: if (asm_h->stripeMap->numDataFailed +
836: asm_h->stripeMap->numParityFailed > 0)
837: printf("\t[Failed PDA: r%d c%d s%d-%d]\n",
838: p->row, p->col, (int) p->startSector,
839: (int) (p->startSector + p->numSector - 1));
840: }
841: }
842:
843: void
844: rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
845: RF_SectorCount_t numBlocks)
846: {
847: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
848: RF_RaidAddr_t ra, sosAddr =
849: rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
850:
851: printf("Raid addrs of SU boundaries from start of stripe to end"
852: " of access:\n\t");
853: for (ra = sosAddr; ra <= raidAddr + numBlocks;
854: ra += layoutPtr->sectorsPerStripeUnit) {
855: printf("%d (0x%x), ", (int) ra, (int) ra);
856: }
857: printf("\n");
858: printf("Offset into stripe unit: %d (0x%x)\n",
859: (int) (raidAddr % layoutPtr->sectorsPerStripeUnit),
860: (int) (raidAddr % layoutPtr->sectorsPerStripeUnit));
861: }
862:
863: /*
864: * Given a parity descriptor and the starting address within a stripe,
865: * range restrict the parity descriptor to touch only the correct stuff.
866: */
867: void
868: rf_ASMParityAdjust(
869: RF_PhysDiskAddr_t *toAdjust,
870: RF_StripeNum_t startAddrWithinStripe,
871: RF_SectorNum_t endAddress,
872: RF_RaidLayout_t *layoutPtr,
873: RF_AccessStripeMap_t *asm_p
874: )
875: {
876: RF_PhysDiskAddr_t *new_pda;
877:
878: /*
879: * When we're accessing only a portion of one stripe unit, we want the
880: * parity descriptor to identify only the chunk of parity associated
881: * with the data. When the access spans exactly one stripe unit
882: * boundary and is less than a stripe unit in size, it uses two
883: * disjoint regions of the parity unit. When an access spans more
884: * than one stripe unit boundary, it uses all of the parity unit.
885: *
886: * To better handle the case where stripe units are small, we may
887: * eventually want to change the 2nd case so that if the SU size is
888: * below some threshold, we just read/write the whole thing instead of
889: * breaking it up into two accesses.
890: */
891: if (asm_p->numStripeUnitsAccessed == 1) {
892: int x = (startAddrWithinStripe %
893: layoutPtr->sectorsPerStripeUnit);
894: toAdjust->startSector += x;
895: toAdjust->raidAddress += x;
896: toAdjust->numSector = asm_p->physInfo->numSector;
897: RF_ASSERT(toAdjust->numSector != 0);
898: } else
899: if (asm_p->numStripeUnitsAccessed == 2 &&
900: asm_p->totalSectorsAccessed <
901: layoutPtr->sectorsPerStripeUnit) {
902: int x = (startAddrWithinStripe %
903: layoutPtr->sectorsPerStripeUnit);
904:
905: /*
906: * Create a second pda and copy the parity map info
907: * into it.
908: */
909: RF_ASSERT(toAdjust->next == NULL);
910: new_pda = toAdjust->next = rf_AllocPhysDiskAddr();
911: *new_pda = *toAdjust; /* Structure assignment. */
912: new_pda->next = NULL;
913:
914: /*
915: * Adjust the start sector & number of blocks for the
916: * first parity pda.
917: */
918: toAdjust->startSector += x;
919: toAdjust->raidAddress += x;
920: toAdjust->numSector =
921: rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
922: startAddrWithinStripe) - startAddrWithinStripe;
923: RF_ASSERT(toAdjust->numSector != 0);
924:
925: /* Adjust the second pda. */
926: new_pda->numSector = endAddress -
927: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
928: endAddress);
929: /* new_pda->raidAddress =
930: * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
931: * toAdjust->raidAddress); */
932: RF_ASSERT(new_pda->numSector != 0);
933: }
934: }
935:
936: /*
937: * Check if a disk has been spared or failed. If spared, redirect the I/O.
938: * If it has been failed, record it in the asm pointer.
939: * Fourth arg is whether data or parity.
940: */
941: void
942: rf_ASMCheckStatus(
943: RF_Raid_t *raidPtr,
944: RF_PhysDiskAddr_t *pda_p,
945: RF_AccessStripeMap_t *asm_p,
946: RF_RaidDisk_t **disks,
947: int parity
948: )
949: {
950: RF_DiskStatus_t dstatus;
951: RF_RowCol_t frow, fcol;
952:
953: dstatus = disks[pda_p->row][pda_p->col].status;
954:
955: if (dstatus == rf_ds_spared) {
956: /* If the disk has been spared, redirect access to the spare. */
957: frow = pda_p->row;
958: fcol = pda_p->col;
959: pda_p->row = disks[frow][fcol].spareRow;
960: pda_p->col = disks[frow][fcol].spareCol;
961: } else
962: if (dstatus == rf_ds_dist_spared) {
963: /* Ditto if disk has been spared to dist spare space. */
964: RF_RowCol_t or = pda_p->row, oc = pda_p->col;
965: RF_SectorNum_t oo = pda_p->startSector;
966:
967: if (pda_p->type == RF_PDA_TYPE_DATA)
968: raidPtr->Layout.map->MapSector(raidPtr,
969: pda_p->raidAddress, &pda_p->row,
970: &pda_p->col, &pda_p->startSector, RF_REMAP);
971: else
972: raidPtr->Layout.map->MapParity(raidPtr,
973: pda_p->raidAddress, &pda_p->row,
974: &pda_p->col, &pda_p->startSector, RF_REMAP);
975:
976: if (rf_mapDebug) {
977: printf("Redirected r %d c %d o %d -> r%d c %d"
978: " o %d\n", or, oc, (int) oo, pda_p->row,
979: pda_p->col, (int) pda_p->startSector);
980: }
981: } else
982: if (RF_DEAD_DISK(dstatus)) {
983: /*
984: * If the disk is inaccessible, mark the
985: * failure.
986: */
987: if (parity)
988: asm_p->numParityFailed++;
989: else {
990: asm_p->numDataFailed++;
991: #if 0
992: /*
993: * XXX Do we really want this spewing
994: * out on the console ? GO
995: */
996: printf("DATA_FAILED !\n");
997: #endif
998: }
999: asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p;
1000: asm_p->numFailedPDAs++;
1001: #if 0
1002: switch (asm_p->numParityFailed +
1003: asm_p->numDataFailed) {
1004: case 1:
1005: asm_p->failedPDAs[0] = pda_p;
1006: break;
1007: case 2:
1008: asm_p->failedPDAs[1] = pda_p;
1009: default:
1010: break;
1011: }
1012: #endif
1013: }
1014: /* The redirected access should never span a stripe unit boundary. */
1015: RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout,
1016: pda_p->raidAddress) ==
1017: rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress +
1018: pda_p->numSector - 1));
1019: RF_ASSERT(pda_p->col != -1);
1020: }
CVSweb