Annotation of sys/dev/raidframe/rf_map.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: rf_map.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */
! 2: /* $NetBSD: rf_map.c,v 1.5 2000/06/29 00:22:27 oster Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1995 Carnegie-Mellon University.
! 6: * All rights reserved.
! 7: *
! 8: * Author: Mark Holland
! 9: *
! 10: * Permission to use, copy, modify and distribute this software and
! 11: * its documentation is hereby granted, provided that both the copyright
! 12: * notice and this permission notice appear in all copies of the
! 13: * software, derivative works or modified versions, and any portions
! 14: * thereof, and that both notices appear in supporting documentation.
! 15: *
! 16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
! 18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 19: *
! 20: * Carnegie Mellon requests users of this software to return to
! 21: *
! 22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 23: * School of Computer Science
! 24: * Carnegie Mellon University
! 25: * Pittsburgh PA 15213-3890
! 26: *
! 27: * any improvements or extensions that they make and grant Carnegie the
! 28: * rights to redistribute these changes.
! 29: */
! 30:
! 31: /*****************************************************************************
! 32: *
! 33: * map.c -- Main code for mapping RAID addresses to physical disk addresses.
! 34: *
! 35: *****************************************************************************/
! 36:
! 37: #include "rf_types.h"
! 38: #include "rf_threadstuff.h"
! 39: #include "rf_raid.h"
! 40: #include "rf_general.h"
! 41: #include "rf_map.h"
! 42: #include "rf_freelist.h"
! 43: #include "rf_shutdown.h"
! 44:
! 45: void rf_FreePDAList(RF_PhysDiskAddr_t *, RF_PhysDiskAddr_t *, int);
! 46: void rf_FreeASMList(RF_AccessStripeMap_t *, RF_AccessStripeMap_t *, int);
! 47:
! 48: /*****************************************************************************
! 49: *
! 50: * MapAccess -- Main 1st order mapping routine.
! 51: *
! 52: * Maps an access in the RAID address space to the corresponding set of
! 53: * physical disk addresses. The result is returned as a list of
! 54: * AccessStripeMap structures, one per stripe accessed. Each ASM structure
! 55: * contains a pointer to a list of PhysDiskAddr structures, which describe
! 56: * the physical locations touched by the user access. Note that this routine
! 57: * returns only static mapping information, i.e. the list of physical
! 58: * addresses returned does not necessarily identify the set of physical
! 59: * locations that will actually be read or written.
! 60: *
! 61: * The routine also maps the parity. The physical disk location returned
! 62: * always indicates the entire parity unit, even when only a subset of it
! 63: * is being accessed. This is because an access that is not stripe unit
! 64: * aligned but that spans a stripe unit boundary may require access two
! 65: * distinct portions of the parity unit, and we can't yet tell which
! 66: * portion(s) we'll actually need. We leave it up to the algorithm
! 67: * selection code to decide what subset of the parity unit to access.
! 68: *
! 69: * Note that addresses in the RAID address space must always be maintained
! 70: * as longs, instead of ints.
! 71: *
! 72: * This routine returns NULL if numBlocks is 0.
! 73: *
! 74: *****************************************************************************/
! 75:
! 76: RF_AccessStripeMapHeader_t *
! 77: rf_MapAccess(
! 78: RF_Raid_t *raidPtr,
! 79: RF_RaidAddr_t raidAddress, /*
! 80: * Starting address in RAID address
! 81: * space.
! 82: */
! 83: RF_SectorCount_t numBlocks, /*
! 84: * Number of blocks in RAID address
! 85: * space to access.
! 86: */
! 87: caddr_t buffer, /* Buffer to supply/receive data. */
! 88: int remap /*
! 89: * 1 => remap addresses to spare space.
! 90: */
! 91: )
! 92: {
! 93: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
! 94: RF_AccessStripeMapHeader_t *asm_hdr = NULL;
! 95: RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL;
! 96: int faultsTolerated = layoutPtr->map->faultsTolerated;
! 97: /* We'll change raidAddress along the way. */
! 98: RF_RaidAddr_t startAddress = raidAddress;
! 99: RF_RaidAddr_t endAddress = raidAddress + numBlocks;
! 100: RF_RaidDisk_t **disks = raidPtr->Disks;
! 101:
! 102: RF_PhysDiskAddr_t *pda_p, *pda_q;
! 103: RF_StripeCount_t numStripes = 0;
! 104: RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress;
! 105: RF_RaidAddr_t nextStripeUnitAddress;
! 106: RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr;
! 107: RF_StripeCount_t totStripes;
! 108: RF_StripeNum_t stripeID, lastSID, SUID, lastSUID;
! 109: RF_AccessStripeMap_t *asmList, *t_asm;
! 110: RF_PhysDiskAddr_t *pdaList, *t_pda;
! 111:
! 112: /* Allocate all the ASMs and PDAs up front. */
! 113: lastRaidAddr = raidAddress + numBlocks - 1;
! 114: stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress);
! 115: lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr);
! 116: totStripes = lastSID - stripeID + 1;
! 117: SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress);
! 118: lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr);
! 119:
! 120: asmList = rf_AllocASMList(totStripes);
! 121: pdaList = rf_AllocPDAList(lastSUID - SUID + 1 +
! 122: faultsTolerated * totStripes); /*
! 123: * May also need pda(s)
! 124: * per stripe for parity.
! 125: */
! 126:
! 127: if (raidAddress + numBlocks > raidPtr->totalSectors) {
! 128: RF_ERRORMSG1("Unable to map access because offset (%d)"
! 129: " was invalid\n", (int) raidAddress);
! 130: return (NULL);
! 131: }
! 132: if (rf_mapDebug)
! 133: rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks);
! 134: for (; raidAddress < endAddress;) {
! 135: /* Make the next stripe structure. */
! 136: RF_ASSERT(asmList);
! 137: t_asm = asmList;
! 138: asmList = asmList->next;
! 139: bzero((char *) t_asm, sizeof(RF_AccessStripeMap_t));
! 140: if (!asm_p)
! 141: asm_list = asm_p = t_asm;
! 142: else {
! 143: asm_p->next = t_asm;
! 144: asm_p = asm_p->next;
! 145: }
! 146: numStripes++;
! 147:
! 148: /* Map SUs from current location to the end of the stripe. */
! 149: asm_p->stripeID =
! 150: /* rf_RaidAddressToStripeID(layoutPtr, raidAddress) */
! 151: stripeID++;
! 152: stripeRealEndAddress =
! 153: rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress);
! 154: stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress);
! 155: asm_p->raidAddress = raidAddress;
! 156: asm_p->endRaidAddress = stripeEndAddress;
! 157:
! 158: /* Map each stripe unit in the stripe. */
! 159: pda_p = NULL;
! 160: /*
! 161: * Raid addr of start of portion of access that is within this
! 162: * stripe.
! 163: */
! 164: startAddrWithinStripe = raidAddress;
! 165:
! 166: for (; raidAddress < stripeEndAddress;) {
! 167: RF_ASSERT(pdaList);
! 168: t_pda = pdaList;
! 169: pdaList = pdaList->next;
! 170: bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
! 171: if (!pda_p)
! 172: asm_p->physInfo = pda_p = t_pda;
! 173: else {
! 174: pda_p->next = t_pda;
! 175: pda_p = pda_p->next;
! 176: }
! 177:
! 178: pda_p->type = RF_PDA_TYPE_DATA;
! 179: (layoutPtr->map->MapSector) (raidPtr, raidAddress,
! 180: &(pda_p->row), &(pda_p->col),
! 181: &(pda_p->startSector), remap);
! 182:
! 183: /*
! 184: * Mark any failures we find.
! 185: * failedPDA is don't-care if there is more than
! 186: * one failure.
! 187: */
! 188: /*
! 189: * The RAID address corresponding to this physical
! 190: * disk address.
! 191: */
! 192: pda_p->raidAddress = raidAddress;
! 193: nextStripeUnitAddress =
! 194: rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
! 195: raidAddress);
! 196: pda_p->numSector = RF_MIN(endAddress,
! 197: nextStripeUnitAddress) - raidAddress;
! 198: RF_ASSERT(pda_p->numSector != 0);
! 199: rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0);
! 200: pda_p->bufPtr = buffer + rf_RaidAddressToByte(raidPtr,
! 201: (raidAddress - startAddress));
! 202: asm_p->totalSectorsAccessed += pda_p->numSector;
! 203: asm_p->numStripeUnitsAccessed++;
! 204: asm_p->origRow = pda_p->row; /*
! 205: * Redundant but
! 206: * harmless to do this
! 207: * in every loop
! 208: * iteration.
! 209: */
! 210:
! 211: raidAddress = RF_MIN(endAddress, nextStripeUnitAddress);
! 212: }
! 213:
! 214: /*
! 215: * Map the parity. At this stage, the startSector and
! 216: * numSector fields for the parity unit are always set to
! 217: * indicate the entire parity unit. We may modify this after
! 218: * mapping the data portion.
! 219: */
! 220: switch (faultsTolerated) {
! 221: case 0:
! 222: break;
! 223: case 1: /* Single fault tolerant. */
! 224: RF_ASSERT(pdaList);
! 225: t_pda = pdaList;
! 226: pdaList = pdaList->next;
! 227: bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
! 228: pda_p = asm_p->parityInfo = t_pda;
! 229: pda_p->type = RF_PDA_TYPE_PARITY;
! 230: (layoutPtr->map->MapParity) (raidPtr,
! 231: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 232: startAddrWithinStripe), &(pda_p->row),
! 233: &(pda_p->col), &(pda_p->startSector), remap);
! 234: pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
! 235: /*
! 236: * raidAddr may be needed to find unit to redirect to.
! 237: */
! 238: pda_p->raidAddress =
! 239: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 240: startAddrWithinStripe);
! 241: rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
! 242: rf_ASMParityAdjust(asm_p->parityInfo,
! 243: startAddrWithinStripe, endAddress,
! 244: layoutPtr, asm_p);
! 245:
! 246: break;
! 247: case 2: /* Two fault tolerant. */
! 248: RF_ASSERT(pdaList && pdaList->next);
! 249: t_pda = pdaList;
! 250: pdaList = pdaList->next;
! 251: bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
! 252: pda_p = asm_p->parityInfo = t_pda;
! 253: pda_p->type = RF_PDA_TYPE_PARITY;
! 254: t_pda = pdaList;
! 255: pdaList = pdaList->next;
! 256: bzero((char *) t_pda, sizeof(RF_PhysDiskAddr_t));
! 257: pda_q = asm_p->qInfo = t_pda;
! 258: pda_q->type = RF_PDA_TYPE_Q;
! 259: (layoutPtr->map->MapParity) (raidPtr,
! 260: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 261: startAddrWithinStripe), &(pda_p->row),
! 262: &(pda_p->col), &(pda_p->startSector), remap);
! 263: (layoutPtr->map->MapQ) (raidPtr,
! 264: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 265: startAddrWithinStripe), &(pda_q->row),
! 266: &(pda_q->col), &(pda_q->startSector), remap);
! 267: pda_q->numSector = pda_p->numSector =
! 268: layoutPtr->sectorsPerStripeUnit;
! 269: /*
! 270: * raidAddr may be needed to find unit to redirect to.
! 271: */
! 272: pda_p->raidAddress =
! 273: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 274: startAddrWithinStripe);
! 275: pda_q->raidAddress =
! 276: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 277: startAddrWithinStripe);
! 278: /* Failure mode stuff. */
! 279: rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
! 280: rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1);
! 281: rf_ASMParityAdjust(asm_p->parityInfo,
! 282: startAddrWithinStripe, endAddress,
! 283: layoutPtr, asm_p);
! 284: rf_ASMParityAdjust(asm_p->qInfo, startAddrWithinStripe,
! 285: endAddress, layoutPtr, asm_p);
! 286: break;
! 287: }
! 288: }
! 289: RF_ASSERT(asmList == NULL && pdaList == NULL);
! 290: /* Make the header structure. */
! 291: asm_hdr = rf_AllocAccessStripeMapHeader();
! 292: RF_ASSERT(numStripes == totStripes);
! 293: asm_hdr->numStripes = numStripes;
! 294: asm_hdr->stripeMap = asm_list;
! 295:
! 296: if (rf_mapDebug)
! 297: rf_PrintAccessStripeMap(asm_hdr);
! 298: return (asm_hdr);
! 299: }
! 300:
! 301: /*****************************************************************************
! 302: * This routine walks through an ASM list and marks the PDAs that have failed.
! 303: * It's called only when a disk failure causes an in-flight DAG to fail.
! 304: * The parity may consist of two components, but we want to use only one
! 305: * failedPDA pointer. Thus we set failedPDA to point to the first parity
! 306: * component, and rely on the rest of the code to do the right thing with this.
! 307: *****************************************************************************/
! 308: void
! 309: rf_MarkFailuresInASMList(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asm_h)
! 310: {
! 311: RF_RaidDisk_t **disks = raidPtr->Disks;
! 312: RF_AccessStripeMap_t *asmap;
! 313: RF_PhysDiskAddr_t *pda;
! 314:
! 315: for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) {
! 316: asmap->numDataFailed = asmap->numParityFailed =
! 317: asmap->numQFailed = 0;
! 318: asmap->numFailedPDAs = 0;
! 319: bzero((char *) asmap->failedPDAs,
! 320: RF_MAX_FAILED_PDA * sizeof(RF_PhysDiskAddr_t *));
! 321: for (pda = asmap->physInfo; pda; pda = pda->next) {
! 322: if (RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
! 323: asmap->numDataFailed++;
! 324: asmap->failedPDAs[asmap->numFailedPDAs] = pda;
! 325: asmap->numFailedPDAs++;
! 326: }
! 327: }
! 328: pda = asmap->parityInfo;
! 329: if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
! 330: asmap->numParityFailed++;
! 331: asmap->failedPDAs[asmap->numFailedPDAs] = pda;
! 332: asmap->numFailedPDAs++;
! 333: }
! 334: pda = asmap->qInfo;
! 335: if (pda && RF_DEAD_DISK(disks[pda->row][pda->col].status)) {
! 336: asmap->numQFailed++;
! 337: asmap->failedPDAs[asmap->numFailedPDAs] = pda;
! 338: asmap->numFailedPDAs++;
! 339: }
! 340: }
! 341: }
! 342:
! 343: /*****************************************************************************
! 344: *
! 345: * DuplicateASM -- Duplicates an ASM and returns the new one.
! 346: *
! 347: *****************************************************************************/
! 348: RF_AccessStripeMap_t *
! 349: rf_DuplicateASM(RF_AccessStripeMap_t *asmap)
! 350: {
! 351: RF_AccessStripeMap_t *new_asm;
! 352: RF_PhysDiskAddr_t *pda, *new_pda, *t_pda;
! 353:
! 354: new_pda = NULL;
! 355: new_asm = rf_AllocAccessStripeMapComponent();
! 356: bcopy((char *) asmap, (char *) new_asm, sizeof(RF_AccessStripeMap_t));
! 357: new_asm->numFailedPDAs = 0; /* ??? */
! 358: new_asm->failedPDAs[0] = NULL;
! 359: new_asm->physInfo = NULL;
! 360: new_asm->parityInfo = NULL;
! 361: new_asm->next = NULL;
! 362:
! 363: for (pda = asmap->physInfo; pda; pda = pda->next) {
! 364: /* Copy the physInfo list. */
! 365: t_pda = rf_AllocPhysDiskAddr();
! 366: bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t));
! 367: t_pda->next = NULL;
! 368: if (!new_asm->physInfo) {
! 369: new_asm->physInfo = t_pda;
! 370: new_pda = t_pda;
! 371: } else {
! 372: new_pda->next = t_pda;
! 373: new_pda = new_pda->next;
! 374: }
! 375: if (pda == asmap->failedPDAs[0])
! 376: new_asm->failedPDAs[0] = t_pda;
! 377: }
! 378: for (pda = asmap->parityInfo; pda; pda = pda->next) {
! 379: /* Copy the parityInfo list. */
! 380: t_pda = rf_AllocPhysDiskAddr();
! 381: bcopy((char *) pda, (char *) t_pda, sizeof(RF_PhysDiskAddr_t));
! 382: t_pda->next = NULL;
! 383: if (!new_asm->parityInfo) {
! 384: new_asm->parityInfo = t_pda;
! 385: new_pda = t_pda;
! 386: } else {
! 387: new_pda->next = t_pda;
! 388: new_pda = new_pda->next;
! 389: }
! 390: if (pda == asmap->failedPDAs[0])
! 391: new_asm->failedPDAs[0] = t_pda;
! 392: }
! 393: return (new_asm);
! 394: }
! 395:
! 396: /*****************************************************************************
! 397: *
! 398: * DuplicatePDA -- Duplicates a PDA and returns the new one.
! 399: *
! 400: *****************************************************************************/
! 401: RF_PhysDiskAddr_t *
! 402: rf_DuplicatePDA(RF_PhysDiskAddr_t *pda)
! 403: {
! 404: RF_PhysDiskAddr_t *new;
! 405:
! 406: new = rf_AllocPhysDiskAddr();
! 407: bcopy((char *) pda, (char *) new, sizeof(RF_PhysDiskAddr_t));
! 408: return (new);
! 409: }
! 410:
! 411: /*****************************************************************************
! 412: *
! 413: * Routines to allocate and free list elements. All allocation routines zero
! 414: * the structure before returning it.
! 415: *
! 416: * FreePhysDiskAddr is static. It should never be called directly, because
! 417: * FreeAccessStripeMap takes care of freeing the PhysDiskAddr list.
! 418: *
! 419: *****************************************************************************/
! 420:
! 421: static RF_FreeList_t *rf_asmhdr_freelist;
! 422: #define RF_MAX_FREE_ASMHDR 128
! 423: #define RF_ASMHDR_INC 16
! 424: #define RF_ASMHDR_INITIAL 32
! 425:
! 426: static RF_FreeList_t *rf_asm_freelist;
! 427: #define RF_MAX_FREE_ASM 192
! 428: #define RF_ASM_INC 24
! 429: #define RF_ASM_INITIAL 64
! 430:
! 431: static RF_FreeList_t *rf_pda_freelist;
! 432: #define RF_MAX_FREE_PDA 192
! 433: #define RF_PDA_INC 24
! 434: #define RF_PDA_INITIAL 64
! 435:
! 436: /*
! 437: * Called at shutdown time. So far, all that is necessary is to release
! 438: * all the free lists.
! 439: */
! 440: void rf_ShutdownMapModule(void *);
! 441: void
! 442: rf_ShutdownMapModule(void *ignored)
! 443: {
! 444: RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
! 445: (RF_AccessStripeMapHeader_t *));
! 446: RF_FREELIST_DESTROY(rf_pda_freelist, next, (RF_PhysDiskAddr_t *));
! 447: RF_FREELIST_DESTROY(rf_asm_freelist, next, (RF_AccessStripeMap_t *));
! 448: }
! 449:
! 450: int
! 451: rf_ConfigureMapModule(RF_ShutdownList_t **listp)
! 452: {
! 453: int rc;
! 454:
! 455: RF_FREELIST_CREATE(rf_asmhdr_freelist, RF_MAX_FREE_ASMHDR,
! 456: RF_ASMHDR_INC, sizeof(RF_AccessStripeMapHeader_t));
! 457: if (rf_asmhdr_freelist == NULL) {
! 458: return (ENOMEM);
! 459: }
! 460: RF_FREELIST_CREATE(rf_asm_freelist, RF_MAX_FREE_ASM,
! 461: RF_ASM_INC, sizeof(RF_AccessStripeMap_t));
! 462: if (rf_asm_freelist == NULL) {
! 463: RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
! 464: (RF_AccessStripeMapHeader_t *));
! 465: return (ENOMEM);
! 466: }
! 467: RF_FREELIST_CREATE(rf_pda_freelist, RF_MAX_FREE_PDA, RF_PDA_INC,
! 468: sizeof(RF_PhysDiskAddr_t));
! 469: if (rf_pda_freelist == NULL) {
! 470: RF_FREELIST_DESTROY(rf_asmhdr_freelist, next,
! 471: (RF_AccessStripeMapHeader_t *));
! 472: RF_FREELIST_DESTROY(rf_pda_freelist, next,
! 473: (RF_PhysDiskAddr_t *));
! 474: return (ENOMEM);
! 475: }
! 476: rc = rf_ShutdownCreate(listp, rf_ShutdownMapModule, NULL);
! 477: if (rc) {
! 478: RF_ERRORMSG3("Unable to add to shutdown list file %s line %d"
! 479: " rc=%d\n", __FILE__, __LINE__, rc);
! 480: rf_ShutdownMapModule(NULL);
! 481: return (rc);
! 482: }
! 483: RF_FREELIST_PRIME(rf_asmhdr_freelist, RF_ASMHDR_INITIAL, next,
! 484: (RF_AccessStripeMapHeader_t *));
! 485: RF_FREELIST_PRIME(rf_asm_freelist, RF_ASM_INITIAL, next,
! 486: (RF_AccessStripeMap_t *));
! 487: RF_FREELIST_PRIME(rf_pda_freelist, RF_PDA_INITIAL, next,
! 488: (RF_PhysDiskAddr_t *));
! 489:
! 490: return (0);
! 491: }
! 492:
! 493: RF_AccessStripeMapHeader_t *
! 494: rf_AllocAccessStripeMapHeader(void)
! 495: {
! 496: RF_AccessStripeMapHeader_t *p;
! 497:
! 498: RF_FREELIST_GET(rf_asmhdr_freelist, p, next,
! 499: (RF_AccessStripeMapHeader_t *));
! 500: bzero((char *) p, sizeof(RF_AccessStripeMapHeader_t));
! 501:
! 502: return (p);
! 503: }
! 504:
! 505: void
! 506: rf_FreeAccessStripeMapHeader(RF_AccessStripeMapHeader_t *p)
! 507: {
! 508: RF_FREELIST_FREE(rf_asmhdr_freelist, p, next);
! 509: }
! 510:
! 511: RF_PhysDiskAddr_t *
! 512: rf_AllocPhysDiskAddr(void)
! 513: {
! 514: RF_PhysDiskAddr_t *p;
! 515:
! 516: RF_FREELIST_GET(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *));
! 517: bzero((char *) p, sizeof(RF_PhysDiskAddr_t));
! 518:
! 519: return (p);
! 520: }
! 521:
! 522: /*
! 523: * Allocates a list of PDAs, locking the free list only once.
! 524: * When we have to call calloc, we do it one component at a time to simplify
! 525: * the process of freeing the list at program shutdown. This should not be
! 526: * much of a performance hit, because it should be very infrequently executed.
! 527: */
! 528: RF_PhysDiskAddr_t *
! 529: rf_AllocPDAList(int count)
! 530: {
! 531: RF_PhysDiskAddr_t *p = NULL;
! 532:
! 533: RF_FREELIST_GET_N(rf_pda_freelist, p, next, (RF_PhysDiskAddr_t *),
! 534: count);
! 535: return (p);
! 536: }
! 537:
! 538: void
! 539: rf_FreePhysDiskAddr(RF_PhysDiskAddr_t *p)
! 540: {
! 541: RF_FREELIST_FREE(rf_pda_freelist, p, next);
! 542: }
! 543:
! 544: void
! 545: rf_FreePDAList(
! 546: /* Pointers to start and end of list. */
! 547: RF_PhysDiskAddr_t *l_start,
! 548: RF_PhysDiskAddr_t *l_end,
! 549: int count /* Number of elements in list. */
! 550: )
! 551: {
! 552: RF_FREELIST_FREE_N(rf_pda_freelist, l_start, next,
! 553: (RF_PhysDiskAddr_t *), count);
! 554: }
! 555:
! 556: RF_AccessStripeMap_t *
! 557: rf_AllocAccessStripeMapComponent(void)
! 558: {
! 559: RF_AccessStripeMap_t *p;
! 560:
! 561: RF_FREELIST_GET(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *));
! 562: bzero((char *) p, sizeof(RF_AccessStripeMap_t));
! 563:
! 564: return (p);
! 565: }
! 566:
! 567: /*
! 568: * This is essentially identical to AllocPDAList. I should combine the two.
! 569: * When we have to call calloc, we do it one component at a time to simplify
! 570: * the process of freeing the list at program shutdown. This should not be
! 571: * much of a performance hit, because it should be very infrequently executed.
! 572: */
! 573: RF_AccessStripeMap_t *
! 574: rf_AllocASMList(int count)
! 575: {
! 576: RF_AccessStripeMap_t *p = NULL;
! 577:
! 578: RF_FREELIST_GET_N(rf_asm_freelist, p, next, (RF_AccessStripeMap_t *),
! 579: count);
! 580: return (p);
! 581: }
! 582:
! 583: void
! 584: rf_FreeAccessStripeMapComponent(RF_AccessStripeMap_t *p)
! 585: {
! 586: RF_FREELIST_FREE(rf_asm_freelist, p, next);
! 587: }
! 588:
! 589: void
! 590: rf_FreeASMList(RF_AccessStripeMap_t *l_start, RF_AccessStripeMap_t *l_end,
! 591: int count)
! 592: {
! 593: RF_FREELIST_FREE_N(rf_asm_freelist, l_start, next,
! 594: (RF_AccessStripeMap_t *), count);
! 595: }
! 596:
! 597: void
! 598: rf_FreeAccessStripeMap(RF_AccessStripeMapHeader_t *hdr)
! 599: {
! 600: RF_AccessStripeMap_t *p, *pt = NULL;
! 601: RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL;
! 602: int count = 0, t, asm_count = 0;
! 603:
! 604: for (p = hdr->stripeMap; p; p = p->next) {
! 605:
! 606: /* Link the 3 pda lists into the accumulating pda list. */
! 607:
! 608: if (!pdaList)
! 609: pdaList = p->qInfo;
! 610: else
! 611: pdaEnd->next = p->qInfo;
! 612: for (trailer = NULL, pdp = p->qInfo; pdp;) {
! 613: trailer = pdp;
! 614: pdp = pdp->next;
! 615: count++;
! 616: }
! 617: if (trailer)
! 618: pdaEnd = trailer;
! 619:
! 620: if (!pdaList)
! 621: pdaList = p->parityInfo;
! 622: else
! 623: pdaEnd->next = p->parityInfo;
! 624: for (trailer = NULL, pdp = p->parityInfo; pdp;) {
! 625: trailer = pdp;
! 626: pdp = pdp->next;
! 627: count++;
! 628: }
! 629: if (trailer)
! 630: pdaEnd = trailer;
! 631:
! 632: if (!pdaList)
! 633: pdaList = p->physInfo;
! 634: else
! 635: pdaEnd->next = p->physInfo;
! 636: for (trailer = NULL, pdp = p->physInfo; pdp;) {
! 637: trailer = pdp;
! 638: pdp = pdp->next;
! 639: count++;
! 640: }
! 641: if (trailer)
! 642: pdaEnd = trailer;
! 643:
! 644: pt = p;
! 645: asm_count++;
! 646: }
! 647:
! 648: /* Debug only. */
! 649: for (t = 0, pdp = pdaList; pdp; pdp = pdp->next)
! 650: t++;
! 651: RF_ASSERT(t == count);
! 652:
! 653: if (pdaList)
! 654: rf_FreePDAList(pdaList, pdaEnd, count);
! 655: rf_FreeASMList(hdr->stripeMap, pt, asm_count);
! 656: rf_FreeAccessStripeMapHeader(hdr);
! 657: }
! 658:
! 659: /*
! 660: * We can't use the large write optimization if there are any failures in the
! 661: * stripe.
! 662: * In the declustered layout, there is no way to immediately determine what
! 663: * disks constitute a stripe, so we actually have to hunt through the stripe
! 664: * looking for failures.
! 665: * The reason we map the parity instead of just using asm->parityInfo->col is
! 666: * because the latter may have been already redirected to a spare drive, which
! 667: * would mess up the computation of the stripe offset.
! 668: *
! 669: * ASSUMES AT MOST ONE FAILURE IN THE STRIPE.
! 670: */
! 671: int
! 672: rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
! 673: {
! 674: RF_RowCol_t trow, tcol, prow, pcol, *diskids, row, i;
! 675: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
! 676: RF_StripeCount_t stripeOffset;
! 677: int numFailures;
! 678: RF_RaidAddr_t sosAddr;
! 679: RF_SectorNum_t diskOffset, poffset;
! 680: RF_RowCol_t testrow;
! 681:
! 682: /* Quick out in the fault-free case. */
! 683: RF_LOCK_MUTEX(raidPtr->mutex);
! 684: numFailures = raidPtr->numFailures;
! 685: RF_UNLOCK_MUTEX(raidPtr->mutex);
! 686: if (numFailures == 0)
! 687: return (0);
! 688:
! 689: sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
! 690: asmap->raidAddress);
! 691: row = asmap->physInfo->row;
! 692: (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress,
! 693: &diskids, &testrow);
! 694: (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress,
! 695: &prow, &pcol, &poffset, 0); /* get pcol */
! 696:
! 697: /*
! 698: * This needs not be true if we've redirected the access to a spare in
! 699: * another row.
! 700: * RF_ASSERT(row == testrow);
! 701: */
! 702: stripeOffset = 0;
! 703: for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) {
! 704: if (diskids[i] != pcol) {
! 705: if (RF_DEAD_DISK(raidPtr
! 706: ->Disks[testrow][diskids[i]].status)) {
! 707: if (raidPtr->status[testrow] !=
! 708: rf_rs_reconstructing)
! 709: return (1);
! 710: RF_ASSERT(
! 711: raidPtr->reconControl[testrow]->fcol ==
! 712: diskids[i]);
! 713: layoutPtr->map->MapSector(raidPtr,
! 714: sosAddr + stripeOffset *
! 715: layoutPtr->sectorsPerStripeUnit,
! 716: &trow, &tcol, &diskOffset, 0);
! 717: RF_ASSERT((trow == testrow) &&
! 718: (tcol == diskids[i]));
! 719: if (!rf_CheckRUReconstructed(raidPtr
! 720: ->reconControl[testrow]->reconMap,
! 721: diskOffset))
! 722: return (1);
! 723: asmap->flags |= RF_ASM_REDIR_LARGE_WRITE;
! 724: return (0);
! 725: }
! 726: stripeOffset++;
! 727: }
! 728: }
! 729: return (0);
! 730: }
! 731:
! 732: /*
! 733: * Return the number of failed data units in the stripe.
! 734: */
! 735: int
! 736: rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
! 737: {
! 738: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
! 739: RF_RowCol_t trow, tcol, row, i;
! 740: RF_SectorNum_t diskOffset;
! 741: RF_RaidAddr_t sosAddr;
! 742: int numFailures;
! 743:
! 744: /* Quick out in the fault-free case. */
! 745: RF_LOCK_MUTEX(raidPtr->mutex);
! 746: numFailures = raidPtr->numFailures;
! 747: RF_UNLOCK_MUTEX(raidPtr->mutex);
! 748: if (numFailures == 0)
! 749: return (0);
! 750: numFailures = 0;
! 751:
! 752: sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
! 753: asmap->raidAddress);
! 754: row = asmap->physInfo->row;
! 755: for (i = 0; i < layoutPtr->numDataCol; i++) {
! 756: (layoutPtr->map->MapSector) (raidPtr, sosAddr + i *
! 757: layoutPtr->sectorsPerStripeUnit,
! 758: &trow, &tcol, &diskOffset, 0);
! 759: if (RF_DEAD_DISK(raidPtr->Disks[trow][tcol].status))
! 760: numFailures++;
! 761: }
! 762:
! 763: return numFailures;
! 764: }
! 765:
! 766:
! 767: /*****************************************************************************
! 768: *
! 769: * Debug routines.
! 770: *
! 771: *****************************************************************************/
! 772:
! 773: void
! 774: rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h)
! 775: {
! 776: rf_PrintFullAccessStripeMap(asm_h, 0);
! 777: }
! 778:
! 779: void
! 780: rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h,
! 781: int prbuf /* Flag to print buffer pointers. */)
! 782: {
! 783: int i;
! 784: RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
! 785: RF_PhysDiskAddr_t *p;
! 786: printf("%d stripes total\n", (int) asm_h->numStripes);
! 787: for (; asmap; asmap = asmap->next) {
! 788: /* printf("Num failures: %d\n", asmap->numDataFailed); */
! 789: /* printf("Num sectors: %d\n",
! 790: * (int)asmap->totalSectorsAccessed); */
! 791: printf("Stripe %d (%d sectors), failures: %d data, %d parity: ",
! 792: (int) asmap->stripeID,
! 793: (int) asmap->totalSectorsAccessed,
! 794: (int) asmap->numDataFailed,
! 795: (int) asmap->numParityFailed);
! 796: if (asmap->parityInfo) {
! 797: printf("Parity [r%d c%d s%d-%d", asmap->parityInfo->row,
! 798: asmap->parityInfo->col,
! 799: (int) asmap->parityInfo->startSector,
! 800: (int) (asmap->parityInfo->startSector +
! 801: asmap->parityInfo->numSector - 1));
! 802: if (prbuf)
! 803: printf(" b0x%lx",
! 804: (unsigned long) asmap->parityInfo->bufPtr);
! 805: if (asmap->parityInfo->next) {
! 806: printf(", r%d c%d s%d-%d",
! 807: asmap->parityInfo->next->row,
! 808: asmap->parityInfo->next->col,
! 809: (int) asmap->parityInfo->next->startSector,
! 810: (int) (asmap->parityInfo->next->startSector
! 811: + asmap->parityInfo->next->numSector - 1));
! 812: if (prbuf)
! 813: printf(" b0x%lx", (unsigned long)
! 814: asmap->parityInfo->next->bufPtr);
! 815: RF_ASSERT(asmap->parityInfo->next->next
! 816: == NULL);
! 817: }
! 818: printf("]\n\t");
! 819: }
! 820: for (i = 0, p = asmap->physInfo; p; p = p->next, i++) {
! 821: printf("SU r%d c%d s%d-%d ", p->row, p->col,
! 822: (int) p->startSector,
! 823: (int) (p->startSector + p->numSector - 1));
! 824: if (prbuf)
! 825: printf("b0x%lx ", (unsigned long) p->bufPtr);
! 826: if (i && !(i & 1))
! 827: printf("\n\t");
! 828: }
! 829: printf("\n");
! 830: p = asm_h->stripeMap->failedPDAs[0];
! 831: if (asm_h->stripeMap->numDataFailed +
! 832: asm_h->stripeMap->numParityFailed > 1)
! 833: printf("[multiple failures]\n");
! 834: else
! 835: if (asm_h->stripeMap->numDataFailed +
! 836: asm_h->stripeMap->numParityFailed > 0)
! 837: printf("\t[Failed PDA: r%d c%d s%d-%d]\n",
! 838: p->row, p->col, (int) p->startSector,
! 839: (int) (p->startSector + p->numSector - 1));
! 840: }
! 841: }
! 842:
! 843: void
! 844: rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
! 845: RF_SectorCount_t numBlocks)
! 846: {
! 847: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
! 848: RF_RaidAddr_t ra, sosAddr =
! 849: rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);
! 850:
! 851: printf("Raid addrs of SU boundaries from start of stripe to end"
! 852: " of access:\n\t");
! 853: for (ra = sosAddr; ra <= raidAddr + numBlocks;
! 854: ra += layoutPtr->sectorsPerStripeUnit) {
! 855: printf("%d (0x%x), ", (int) ra, (int) ra);
! 856: }
! 857: printf("\n");
! 858: printf("Offset into stripe unit: %d (0x%x)\n",
! 859: (int) (raidAddr % layoutPtr->sectorsPerStripeUnit),
! 860: (int) (raidAddr % layoutPtr->sectorsPerStripeUnit));
! 861: }
! 862:
! 863: /*
! 864: * Given a parity descriptor and the starting address within a stripe,
! 865: * range restrict the parity descriptor to touch only the correct stuff.
! 866: */
! 867: void
! 868: rf_ASMParityAdjust(
! 869: RF_PhysDiskAddr_t *toAdjust,
! 870: RF_StripeNum_t startAddrWithinStripe,
! 871: RF_SectorNum_t endAddress,
! 872: RF_RaidLayout_t *layoutPtr,
! 873: RF_AccessStripeMap_t *asm_p
! 874: )
! 875: {
! 876: RF_PhysDiskAddr_t *new_pda;
! 877:
! 878: /*
! 879: * When we're accessing only a portion of one stripe unit, we want the
! 880: * parity descriptor to identify only the chunk of parity associated
! 881: * with the data. When the access spans exactly one stripe unit
! 882: * boundary and is less than a stripe unit in size, it uses two
! 883: * disjoint regions of the parity unit. When an access spans more
! 884: * than one stripe unit boundary, it uses all of the parity unit.
! 885: *
! 886: * To better handle the case where stripe units are small, we may
! 887: * eventually want to change the 2nd case so that if the SU size is
! 888: * below some threshold, we just read/write the whole thing instead of
! 889: * breaking it up into two accesses.
! 890: */
! 891: if (asm_p->numStripeUnitsAccessed == 1) {
! 892: int x = (startAddrWithinStripe %
! 893: layoutPtr->sectorsPerStripeUnit);
! 894: toAdjust->startSector += x;
! 895: toAdjust->raidAddress += x;
! 896: toAdjust->numSector = asm_p->physInfo->numSector;
! 897: RF_ASSERT(toAdjust->numSector != 0);
! 898: } else
! 899: if (asm_p->numStripeUnitsAccessed == 2 &&
! 900: asm_p->totalSectorsAccessed <
! 901: layoutPtr->sectorsPerStripeUnit) {
! 902: int x = (startAddrWithinStripe %
! 903: layoutPtr->sectorsPerStripeUnit);
! 904:
! 905: /*
! 906: * Create a second pda and copy the parity map info
! 907: * into it.
! 908: */
! 909: RF_ASSERT(toAdjust->next == NULL);
! 910: new_pda = toAdjust->next = rf_AllocPhysDiskAddr();
! 911: *new_pda = *toAdjust; /* Structure assignment. */
! 912: new_pda->next = NULL;
! 913:
! 914: /*
! 915: * Adjust the start sector & number of blocks for the
! 916: * first parity pda.
! 917: */
! 918: toAdjust->startSector += x;
! 919: toAdjust->raidAddress += x;
! 920: toAdjust->numSector =
! 921: rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
! 922: startAddrWithinStripe) - startAddrWithinStripe;
! 923: RF_ASSERT(toAdjust->numSector != 0);
! 924:
! 925: /* Adjust the second pda. */
! 926: new_pda->numSector = endAddress -
! 927: rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr,
! 928: endAddress);
! 929: /* new_pda->raidAddress =
! 930: * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
! 931: * toAdjust->raidAddress); */
! 932: RF_ASSERT(new_pda->numSector != 0);
! 933: }
! 934: }
! 935:
! 936: /*
! 937: * Check if a disk has been spared or failed. If spared, redirect the I/O.
! 938: * If it has been failed, record it in the asm pointer.
! 939: * Fourth arg is whether data or parity.
! 940: */
! 941: void
! 942: rf_ASMCheckStatus(
! 943: RF_Raid_t *raidPtr,
! 944: RF_PhysDiskAddr_t *pda_p,
! 945: RF_AccessStripeMap_t *asm_p,
! 946: RF_RaidDisk_t **disks,
! 947: int parity
! 948: )
! 949: {
! 950: RF_DiskStatus_t dstatus;
! 951: RF_RowCol_t frow, fcol;
! 952:
! 953: dstatus = disks[pda_p->row][pda_p->col].status;
! 954:
! 955: if (dstatus == rf_ds_spared) {
! 956: /* If the disk has been spared, redirect access to the spare. */
! 957: frow = pda_p->row;
! 958: fcol = pda_p->col;
! 959: pda_p->row = disks[frow][fcol].spareRow;
! 960: pda_p->col = disks[frow][fcol].spareCol;
! 961: } else
! 962: if (dstatus == rf_ds_dist_spared) {
! 963: /* Ditto if disk has been spared to dist spare space. */
! 964: RF_RowCol_t or = pda_p->row, oc = pda_p->col;
! 965: RF_SectorNum_t oo = pda_p->startSector;
! 966:
! 967: if (pda_p->type == RF_PDA_TYPE_DATA)
! 968: raidPtr->Layout.map->MapSector(raidPtr,
! 969: pda_p->raidAddress, &pda_p->row,
! 970: &pda_p->col, &pda_p->startSector, RF_REMAP);
! 971: else
! 972: raidPtr->Layout.map->MapParity(raidPtr,
! 973: pda_p->raidAddress, &pda_p->row,
! 974: &pda_p->col, &pda_p->startSector, RF_REMAP);
! 975:
! 976: if (rf_mapDebug) {
! 977: printf("Redirected r %d c %d o %d -> r%d c %d"
! 978: " o %d\n", or, oc, (int) oo, pda_p->row,
! 979: pda_p->col, (int) pda_p->startSector);
! 980: }
! 981: } else
! 982: if (RF_DEAD_DISK(dstatus)) {
! 983: /*
! 984: * If the disk is inaccessible, mark the
! 985: * failure.
! 986: */
! 987: if (parity)
! 988: asm_p->numParityFailed++;
! 989: else {
! 990: asm_p->numDataFailed++;
! 991: #if 0
! 992: /*
! 993: * XXX Do we really want this spewing
! 994: * out on the console ? GO
! 995: */
! 996: printf("DATA_FAILED !\n");
! 997: #endif
! 998: }
! 999: asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p;
! 1000: asm_p->numFailedPDAs++;
! 1001: #if 0
! 1002: switch (asm_p->numParityFailed +
! 1003: asm_p->numDataFailed) {
! 1004: case 1:
! 1005: asm_p->failedPDAs[0] = pda_p;
! 1006: break;
! 1007: case 2:
! 1008: asm_p->failedPDAs[1] = pda_p;
! 1009: default:
! 1010: break;
! 1011: }
! 1012: #endif
! 1013: }
! 1014: /* The redirected access should never span a stripe unit boundary. */
! 1015: RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout,
! 1016: pda_p->raidAddress) ==
! 1017: rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress +
! 1018: pda_p->numSector - 1));
! 1019: RF_ASSERT(pda_p->col != -1);
! 1020: }
CVSweb