Annotation of sys/dev/raidframe/rf_paritylog.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_paritylog.c,v 1.5 2002/12/16 07:01:04 tdeval Exp $ */
2: /* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Carnegie-Mellon University.
6: * All rights reserved.
7: *
8: * Author: William V. Courtright II
9: *
10: * Permission to use, copy, modify and distribute this software and
11: * its documentation is hereby granted, provided that both the copyright
12: * notice and this permission notice appear in all copies of the
13: * software, derivative works or modified versions, and any portions
14: * thereof, and that both notices appear in supporting documentation.
15: *
16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19: *
20: * Carnegie Mellon requests users of this software to return to
21: *
22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23: * School of Computer Science
24: * Carnegie Mellon University
25: * Pittsburgh PA 15213-3890
26: *
27: * any improvements or extensions that they make and grant Carnegie the
28: * rights to redistribute these changes.
29: */
30:
31: /*
32: * Code for manipulating in-core parity logs.
33: */
34:
35: #include "rf_archs.h"
36:
37: #if RF_INCLUDE_PARITYLOGGING > 0
38:
39: /*
40: * Append-only log for recording parity "update" and "overwrite" records.
41: */
42:
43: #include "rf_types.h"
44: #include "rf_threadstuff.h"
45: #include "rf_mcpair.h"
46: #include "rf_raid.h"
47: #include "rf_dag.h"
48: #include "rf_dagfuncs.h"
49: #include "rf_desc.h"
50: #include "rf_layout.h"
51: #include "rf_diskqueue.h"
52: #include "rf_etimer.h"
53: #include "rf_paritylog.h"
54: #include "rf_general.h"
55: #include "rf_map.h"
56: #include "rf_paritylogging.h"
57: #include "rf_paritylogDiskMgr.h"
58:
59: RF_CommonLogData_t *rf_AllocParityLogCommonData(RF_Raid_t *);
60: void rf_FreeParityLogCommonData(RF_CommonLogData_t *);
61: RF_ParityLogData_t *rf_AllocParityLogData(RF_Raid_t *);
62: void rf_FreeParityLogData(RF_ParityLogData_t *);
63: void rf_EnqueueParityLogData(RF_ParityLogData_t *, RF_ParityLogData_t **,
64: RF_ParityLogData_t **);
65: RF_ParityLogData_t *rf_DequeueParityLogData(RF_Raid_t *, RF_ParityLogData_t **,
66: RF_ParityLogData_t **, int);
67: void rf_RequeueParityLogData(RF_ParityLogData_t *, RF_ParityLogData_t **,
68: RF_ParityLogData_t **);
69: RF_ParityLogData_t *rf_DequeueMatchingLogData(RF_Raid_t *,
70: RF_ParityLogData_t **, RF_ParityLogData_t **);
71: RF_ParityLog_t *rf_AcquireParityLog(RF_ParityLogData_t *, int);
72: void rf_ReintLog(RF_Raid_t *, int, RF_ParityLog_t *);
73: void rf_FlushLog(RF_Raid_t *, RF_ParityLog_t *);
74: int rf_DumpParityLogToDisk(int, RF_ParityLogData_t *);
75:
76: RF_CommonLogData_t *
77: rf_AllocParityLogCommonData(RF_Raid_t *raidPtr)
78: {
79: RF_CommonLogData_t *common = NULL;
80: int rc;
81:
82: /*
83: * Return a struct for holding common parity log information from the
84: * free list (rf_parityLogDiskQueue.freeCommonList). If the free list
85: * is empty, call RF_Malloc to create a new structure. NON-BLOCKING
86: */
87:
88: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
89: if (raidPtr->parityLogDiskQueue.freeCommonList) {
90: common = raidPtr->parityLogDiskQueue.freeCommonList;
91: raidPtr->parityLogDiskQueue.freeCommonList =
92: raidPtr->parityLogDiskQueue.freeCommonList->next;
93: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
94: } else {
95: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
96: RF_Malloc(common, sizeof(RF_CommonLogData_t),
97: (RF_CommonLogData_t *));
98: rc = rf_mutex_init(&common->mutex);
99: if (rc) {
100: RF_ERRORMSG3("Unable to init mutex file %s line %d"
101: " rc=%d\n", __FILE__, __LINE__, rc);
102: RF_Free(common, sizeof(RF_CommonLogData_t));
103: common = NULL;
104: }
105: }
106: common->next = NULL;
107: return (common);
108: }
109:
110: void
111: rf_FreeParityLogCommonData(RF_CommonLogData_t *common)
112: {
113: RF_Raid_t *raidPtr;
114:
115: /*
116: * Insert a single struct for holding parity log information (data)
117: * into the free list (rf_parityLogDiskQueue.freeCommonList).
118: * NON-BLOCKING
119: */
120:
121: raidPtr = common->raidPtr;
122: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
123: common->next = raidPtr->parityLogDiskQueue.freeCommonList;
124: raidPtr->parityLogDiskQueue.freeCommonList = common;
125: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
126: }
127:
128: RF_ParityLogData_t *
129: rf_AllocParityLogData(RF_Raid_t *raidPtr)
130: {
131: RF_ParityLogData_t *data = NULL;
132:
133: /*
134: * Return a struct for holding parity log information from the free
135: * list (rf_parityLogDiskQueue.freeList). If the free list is empty,
136: * call RF_Malloc to create a new structure. NON-BLOCKING
137: */
138:
139: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
140: if (raidPtr->parityLogDiskQueue.freeDataList) {
141: data = raidPtr->parityLogDiskQueue.freeDataList;
142: raidPtr->parityLogDiskQueue.freeDataList =
143: raidPtr->parityLogDiskQueue.freeDataList->next;
144: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
145: } else {
146: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
147: RF_Malloc(data, sizeof(RF_ParityLogData_t),
148: (RF_ParityLogData_t *));
149: }
150: data->next = NULL;
151: data->prev = NULL;
152: return (data);
153: }
154:
155:
156: void
157: rf_FreeParityLogData(RF_ParityLogData_t *data)
158: {
159: RF_ParityLogData_t *nextItem;
160: RF_Raid_t *raidPtr;
161:
162: /*
163: * Insert a linked list of structs for holding parity log information
164: * (data) into the free list (parityLogDiskQueue.freeList).
165: * NON-BLOCKING
166: */
167:
168: raidPtr = data->common->raidPtr;
169: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
170: while (data) {
171: nextItem = data->next;
172: data->next = raidPtr->parityLogDiskQueue.freeDataList;
173: raidPtr->parityLogDiskQueue.freeDataList = data;
174: data = nextItem;
175: }
176: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
177: }
178:
179:
180: void
181: rf_EnqueueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head,
182: RF_ParityLogData_t **tail)
183: {
184: RF_Raid_t *raidPtr;
185:
186: /*
187: * Insert an in-core parity log (*data) into the head of a disk queue
188: * (*head, *tail). NON-BLOCKING
189: */
190:
191: raidPtr = data->common->raidPtr;
192: if (rf_parityLogDebug)
193: printf("[enqueueing parity log data, region %d,"
194: " raidAddress %d, numSector %d]\n", data->regionID,
195: (int) data->diskAddress.raidAddress,
196: (int) data->diskAddress.numSector);
197: RF_ASSERT(data->prev == NULL);
198: RF_ASSERT(data->next == NULL);
199: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
200: if (*head) {
201: /* Insert into head of queue. */
202: RF_ASSERT((*head)->prev == NULL);
203: RF_ASSERT((*tail)->next == NULL);
204: data->next = *head;
205: (*head)->prev = data;
206: *head = data;
207: } else {
208: /* Insert into empty list. */
209: RF_ASSERT(*head == NULL);
210: RF_ASSERT(*tail == NULL);
211: *head = data;
212: *tail = data;
213: }
214: RF_ASSERT((*head)->prev == NULL);
215: RF_ASSERT((*tail)->next == NULL);
216: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
217: }
218:
219: RF_ParityLogData_t *
220: rf_DequeueParityLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head,
221: RF_ParityLogData_t **tail, int ignoreLocks)
222: {
223: RF_ParityLogData_t *data;
224:
225: /*
226: * Remove and return an in-core parity log from the tail of a disk
227: * queue (*head, *tail). NON-BLOCKING
228: */
229:
230: /* Remove from tail, preserving FIFO order. */
231: if (!ignoreLocks)
232: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
233: data = *tail;
234: if (data) {
235: if (*head == *tail) {
236: /* Removing last item from queue. */
237: *head = NULL;
238: *tail = NULL;
239: } else {
240: *tail = (*tail)->prev;
241: (*tail)->next = NULL;
242: RF_ASSERT((*head)->prev == NULL);
243: RF_ASSERT((*tail)->next == NULL);
244: }
245: data->next = NULL;
246: data->prev = NULL;
247: if (rf_parityLogDebug)
248: printf("[dequeueing parity log data, region %d,"
249: " raidAddress %d, numSector %d]\n", data->regionID,
250: (int) data->diskAddress.raidAddress,
251: (int) data->diskAddress.numSector);
252: }
253: if (*head) {
254: RF_ASSERT((*head)->prev == NULL);
255: RF_ASSERT((*tail)->next == NULL);
256: }
257: if (!ignoreLocks)
258: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
259: return (data);
260: }
261:
262:
263: void
264: rf_RequeueParityLogData(RF_ParityLogData_t *data, RF_ParityLogData_t **head,
265: RF_ParityLogData_t **tail)
266: {
267: RF_Raid_t *raidPtr;
268:
269: /*
270: * Insert an in-core parity log (*data) into the tail of a disk queue
271: * (*head, *tail). NON-BLOCKING
272: */
273:
274: raidPtr = data->common->raidPtr;
275: RF_ASSERT(data);
276: if (rf_parityLogDebug)
277: printf("[requeueing parity log data, region %d,"
278: " raidAddress %d, numSector %d]\n", data->regionID,
279: (int) data->diskAddress.raidAddress,
280: (int) data->diskAddress.numSector);
281: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
282: if (*tail) {
283: /* Append to tail of list. */
284: data->prev = *tail;
285: data->next = NULL;
286: (*tail)->next = data;
287: *tail = data;
288: } else {
289: /* Inserting into an empty list. */
290: *head = data;
291: *tail = data;
292: (*head)->prev = NULL;
293: (*tail)->next = NULL;
294: }
295: RF_ASSERT((*head)->prev == NULL);
296: RF_ASSERT((*tail)->next == NULL);
297: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
298: }
299:
300: RF_ParityLogData_t *
301: rf_CreateParityLogData(RF_ParityRecordType_t operation, RF_PhysDiskAddr_t *pda,
302: caddr_t bufPtr, RF_Raid_t *raidPtr,
303: int (*wakeFunc) (RF_DagNode_t * node, int status),
304: void *wakeArg, RF_AccTraceEntry_t *tracerec, RF_Etimer_t startTime)
305: {
306: RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
307: RF_CommonLogData_t *common;
308: RF_PhysDiskAddr_t *diskAddress;
309: int boundary, offset = 0;
310:
311: /*
312: * Return an initialized struct of info to be logged. Build one item
313: * per physical disk address, one item per region.
314: *
315: * NON-BLOCKING
316: */
317:
318: diskAddress = pda;
319: common = rf_AllocParityLogCommonData(raidPtr);
320: RF_ASSERT(common);
321:
322: common->operation = operation;
323: common->bufPtr = bufPtr;
324: common->raidPtr = raidPtr;
325: common->wakeFunc = wakeFunc;
326: common->wakeArg = wakeArg;
327: common->tracerec = tracerec;
328: common->startTime = startTime;
329: common->cnt = 0;
330:
331: if (rf_parityLogDebug)
332: printf("[entering CreateParityLogData]\n");
333: while (diskAddress) {
334: common->cnt++;
335: data = rf_AllocParityLogData(raidPtr);
336: RF_ASSERT(data);
337: data->common = common;
338: data->next = NULL;
339: data->prev = NULL;
340: data->regionID = rf_MapRegionIDParityLogging(raidPtr,
341: diskAddress->startSector);
342: if (data->regionID == rf_MapRegionIDParityLogging(raidPtr,
343: diskAddress->startSector + diskAddress->numSector - 1)) {
344: /* Disk address does not cross a region boundary. */
345: data->diskAddress = *diskAddress;
346: data->bufOffset = offset;
347: offset = offset + diskAddress->numSector;
348: rf_EnqueueParityLogData(data, &resultHead, &resultTail);
349: /* Adjust disk address. */
350: diskAddress = diskAddress->next;
351: } else {
352: /* Disk address crosses a region boundary. */
353: /* Find address where region is crossed. */
354: boundary = 0;
355: while (data->regionID ==
356: rf_MapRegionIDParityLogging(raidPtr,
357: diskAddress->startSector + boundary))
358: boundary++;
359:
360: /* Enter data before the boundary. */
361: data->diskAddress = *diskAddress;
362: data->diskAddress.numSector = boundary;
363: data->bufOffset = offset;
364: offset += boundary;
365: rf_EnqueueParityLogData(data, &resultHead, &resultTail);
366: /* Adjust disk address. */
367: diskAddress->startSector += boundary;
368: diskAddress->numSector -= boundary;
369: }
370: }
371: if (rf_parityLogDebug)
372: printf("[leaving CreateParityLogData]\n");
373: return (resultHead);
374: }
375:
376:
377: RF_ParityLogData_t *
378: rf_SearchAndDequeueParityLogData(RF_Raid_t *raidPtr, int regionID,
379: RF_ParityLogData_t **head, RF_ParityLogData_t **tail, int ignoreLocks)
380: {
381: RF_ParityLogData_t *w;
382:
383: /*
384: * Remove and return an in-core parity log from a specified region
385: * (regionID). If a matching log is not found, return NULL.
386: *
387: * NON-BLOCKING
388: */
389:
390: /*
391: * walk backward through a list, looking for an entry with a matching
392: * region ID.
393: */
394: if (!ignoreLocks)
395: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
396: w = (*tail);
397: while (w) {
398: if (w->regionID == regionID) {
399: /* Remove an element from the list. */
400: if (w == *tail) {
401: if (*head == *tail) {
402: /* Removing only element in the list. */
403: *head = NULL;
404: *tail = NULL;
405: } else {
406: /* Removing last item in the list. */
407: *tail = (*tail)->prev;
408: (*tail)->next = NULL;
409: RF_ASSERT((*head)->prev == NULL);
410: RF_ASSERT((*tail)->next == NULL);
411: }
412: } else {
413: if (w == *head) {
414: /* Removing first item in the list. */
415: *head = (*head)->next;
416: (*head)->prev = NULL;
417: RF_ASSERT((*head)->prev == NULL);
418: RF_ASSERT((*tail)->next == NULL);
419: } else {
420: /*
421: * Removing an item from the middle of
422: * the list.
423: */
424: w->prev->next = w->next;
425: w->next->prev = w->prev;
426: RF_ASSERT((*head)->prev == NULL);
427: RF_ASSERT((*tail)->next == NULL);
428: }
429: }
430: w->prev = NULL;
431: w->next = NULL;
432: if (rf_parityLogDebug)
433: printf("[dequeueing parity log data,"
434: " region %d, raidAddress %d,"
435: " numSector %d]\n", w->regionID,
436: (int) w->diskAddress.raidAddress,
437: (int) w->diskAddress.numSector);
438: return (w);
439: } else
440: w = w->prev;
441: }
442: if (!ignoreLocks)
443: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
444: return (NULL);
445: }
446:
447: RF_ParityLogData_t *
448: rf_DequeueMatchingLogData(RF_Raid_t *raidPtr, RF_ParityLogData_t **head,
449: RF_ParityLogData_t **tail)
450: {
451: RF_ParityLogData_t *logDataList, *logData;
452: int regionID;
453:
454: /*
455: * Remove and return an in-core parity log from the tail of a disk
456: * queue (*head, *tail). Then remove all matching (identical
457: * regionIDs) logData and return as a linked list.
458: *
459: * NON-BLOCKING
460: */
461:
462: logDataList = rf_DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
463: if (logDataList) {
464: regionID = logDataList->regionID;
465: logData = logDataList;
466: logData->next = rf_SearchAndDequeueParityLogData(raidPtr,
467: regionID, head, tail, RF_TRUE);
468: while (logData->next) {
469: logData = logData->next;
470: logData->next =
471: rf_SearchAndDequeueParityLogData(raidPtr, regionID,
472: head, tail, RF_TRUE);
473: }
474: }
475: return (logDataList);
476: }
477:
478:
479: RF_ParityLog_t *
480: rf_AcquireParityLog(RF_ParityLogData_t *logData, int finish)
481: {
482: RF_ParityLog_t *log = NULL;
483: RF_Raid_t *raidPtr;
484:
485: /*
486: * Grab a log buffer from the pool and return it. If no buffers are
487: * available, return NULL. NON-BLOCKING
488: */
489: raidPtr = logData->common->raidPtr;
490: RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
491: if (raidPtr->parityLogPool.parityLogs) {
492: log = raidPtr->parityLogPool.parityLogs;
493: raidPtr->parityLogPool.parityLogs =
494: raidPtr->parityLogPool.parityLogs->next;
495: log->regionID = logData->regionID;
496: log->numRecords = 0;
497: log->next = NULL;
498: raidPtr->logsInUse++;
499: RF_ASSERT(raidPtr->logsInUse >= 0 &&
500: raidPtr->logsInUse <= raidPtr->numParityLogs);
501: } else {
502: /*
503: * No logs available, so place ourselves on the queue of work
504: * waiting on log buffers this is done while
505: * parityLogPool.mutex is held, to ensure synchronization with
506: * ReleaseParityLogs.
507: */
508: if (rf_parityLogDebug)
509: printf("[blocked on log, region %d, finish %d]\n",
510: logData->regionID, finish);
511: if (finish)
512: rf_RequeueParityLogData(logData,
513: &raidPtr->parityLogDiskQueue.logBlockHead,
514: &raidPtr->parityLogDiskQueue.logBlockTail);
515: else
516: rf_EnqueueParityLogData(logData,
517: &raidPtr->parityLogDiskQueue.logBlockHead,
518: &raidPtr->parityLogDiskQueue.logBlockTail);
519: }
520: RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
521: return (log);
522: }
523:
524: void
525: rf_ReleaseParityLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *firstLog)
526: {
527: RF_ParityLogData_t *logDataList;
528: RF_ParityLog_t *log, *lastLog;
529: int cnt;
530:
531: /*
532: * Insert a linked list of parity logs (firstLog) to the free list
533: * (parityLogPool.parityLogPool)
534: *
535: * NON-BLOCKING
536: */
537:
538: RF_ASSERT(firstLog);
539:
540: /*
541: * Before returning logs to global free list, service all requests
542: * which are blocked on logs. Holding mutexes for parityLogPool and
543: * parityLogDiskQueue forces synchronization with rf_AcquireParityLog().
544: */
545: RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
546: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
547: logDataList = rf_DequeueMatchingLogData(raidPtr,
548: &raidPtr->parityLogDiskQueue.logBlockHead,
549: &raidPtr->parityLogDiskQueue.logBlockTail);
550: log = firstLog;
551: if (firstLog)
552: firstLog = firstLog->next;
553: log->numRecords = 0;
554: log->next = NULL;
555: while (logDataList && log) {
556: RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
557: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
558: rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
559: if (rf_parityLogDebug)
560: printf("[finishing up buf-blocked log data,"
561: " region %d]\n", logDataList->regionID);
562: if (log == NULL) {
563: log = firstLog;
564: if (firstLog) {
565: firstLog = firstLog->next;
566: log->numRecords = 0;
567: log->next = NULL;
568: }
569: }
570: RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
571: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
572: if (log)
573: logDataList = rf_DequeueMatchingLogData(raidPtr,
574: &raidPtr->parityLogDiskQueue.logBlockHead,
575: &raidPtr->parityLogDiskQueue.logBlockTail);
576: }
577: /* Return remaining logs to pool. */
578: if (log) {
579: log->next = firstLog;
580: firstLog = log;
581: }
582: if (firstLog) {
583: lastLog = firstLog;
584: raidPtr->logsInUse--;
585: RF_ASSERT(raidPtr->logsInUse >= 0 &&
586: raidPtr->logsInUse <= raidPtr->numParityLogs);
587: while (lastLog->next) {
588: lastLog = lastLog->next;
589: raidPtr->logsInUse--;
590: RF_ASSERT(raidPtr->logsInUse >= 0 &&
591: raidPtr->logsInUse <= raidPtr->numParityLogs);
592: }
593: lastLog->next = raidPtr->parityLogPool.parityLogs;
594: raidPtr->parityLogPool.parityLogs = firstLog;
595: cnt = 0;
596: log = raidPtr->parityLogPool.parityLogs;
597: while (log) {
598: cnt++;
599: log = log->next;
600: }
601: RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
602: }
603: RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
604: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
605: }
606:
607: void
608: rf_ReintLog(RF_Raid_t *raidPtr, int regionID, RF_ParityLog_t *log)
609: {
610: RF_ASSERT(log);
611:
612: /*
613: * Insert an in-core parity log (log) into the disk queue of
614: * reintegration work. Set the flag (reintInProgress) for the
615: * specified region (regionID) to indicate that reintegration is in
616: * progress for this region. NON-BLOCKING
617: */
618:
619: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
620: /* Cleared when reint complete. */
621: raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;
622:
623: if (rf_parityLogDebug)
624: printf("[requesting reintegration of region %d]\n",
625: log->regionID);
626: /* Move record to reintegration queue. */
627: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
628: log->next = raidPtr->parityLogDiskQueue.reintQueue;
629: raidPtr->parityLogDiskQueue.reintQueue = log;
630: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
631: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
632: RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
633: }
634:
635: void
636: rf_FlushLog(RF_Raid_t *raidPtr, RF_ParityLog_t *log)
637: {
638: /*
639: * Insert a core log (log) into a list of logs
640: * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
641: * NON-BLOCKING
642: */
643:
644: RF_ASSERT(log);
645: RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
646: RF_ASSERT(log->next == NULL);
647: /* Move log to flush queue. */
648: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
649: log->next = raidPtr->parityLogDiskQueue.flushQueue;
650: raidPtr->parityLogDiskQueue.flushQueue = log;
651: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
652: RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
653: }
654:
655: int
656: rf_DumpParityLogToDisk(int finish, RF_ParityLogData_t *logData)
657: {
658: int i, diskCount, regionID = logData->regionID;
659: RF_ParityLog_t *log;
660: RF_Raid_t *raidPtr;
661:
662: raidPtr = logData->common->raidPtr;
663:
664: /*
665: * Move a core log to disk. If the log disk is full, initiate
666: * reintegration.
667: *
668: * Return (0) if we can enqueue the dump immediately, otherwise return
669: * (1) to indicate we are blocked on reintegration and control of the
670: * thread should be relinquished.
671: *
672: * Caller must hold regionInfo[regionID].mutex.
673: *
674: * NON-BLOCKING
675: */
676:
677: if (rf_parityLogDebug)
678: printf("[dumping parity log to disk, region %d]\n", regionID);
679: log = raidPtr->regionInfo[regionID].coreLog;
680: RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
681: RF_ASSERT(log->next == NULL);
682:
683: /* If reintegration is in progress, must queue work. */
684: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
685: if (raidPtr->regionInfo[regionID].reintInProgress) {
686: /*
687: * Can not proceed since this region is currently being
688: * reintegrated. We can not block, so queue remaining work and
689: * return.
690: */
691: if (rf_parityLogDebug)
692: printf("[region %d waiting on reintegration]\n",
693: regionID);
694: /*
695: * XXX Not sure about the use of finish - shouldn't this
696: * always be "Enqueue" ?
697: */
698: if (finish)
699: rf_RequeueParityLogData(logData,
700: &raidPtr->parityLogDiskQueue.reintBlockHead,
701: &raidPtr->parityLogDiskQueue.reintBlockTail);
702: else
703: rf_EnqueueParityLogData(logData,
704: &raidPtr->parityLogDiskQueue.reintBlockHead,
705: &raidPtr->parityLogDiskQueue.reintBlockTail);
706: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
707: return (1); /* Relenquish control of this thread. */
708: }
709: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
710: raidPtr->regionInfo[regionID].coreLog = NULL;
711: if ((raidPtr->regionInfo[regionID].diskCount) <
712: raidPtr->regionInfo[regionID].capacity)
713: /*
714: * IMPORTANT !!! This loop bound assumes region disk holds an
715: * integral number of core logs.
716: */
717: {
718: /* Update disk map for this region. */
719: diskCount = raidPtr->regionInfo[regionID].diskCount;
720: for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
721: raidPtr->regionInfo[regionID].diskMap[i + diskCount]
722: .operation = log->records[i].operation;
723: raidPtr->regionInfo[regionID].diskMap[i + diskCount]
724: .parityAddr = log->records[i].parityAddr;
725: }
726: log->diskOffset = diskCount;
727: raidPtr->regionInfo[regionID].diskCount +=
728: raidPtr->numSectorsPerLog;
729: rf_FlushLog(raidPtr, log);
730: } else {
731: /*
732: * No room for log on disk, send it to disk manager and
733: * request reintegration.
734: */
735: RF_ASSERT(raidPtr->regionInfo[regionID].diskCount ==
736: raidPtr->regionInfo[regionID].capacity);
737: rf_ReintLog(raidPtr, regionID, log);
738: }
739: if (rf_parityLogDebug)
740: printf("[finished dumping parity log to disk, region %d]\n",
741: regionID);
742: return (0);
743: }
744:
745: int
746: rf_ParityLogAppend(RF_ParityLogData_t *logData, int finish,
747: RF_ParityLog_t **incomingLog, int clearReintFlag)
748: {
749: int regionID, logItem, itemDone;
750: RF_ParityLogData_t *item;
751: int punt, done = RF_FALSE;
752: RF_ParityLog_t *log;
753: RF_Raid_t *raidPtr;
754: RF_Etimer_t timer;
755: int (*wakeFunc) (RF_DagNode_t * node, int status);
756: void *wakeArg;
757:
758: /*
759: * Add parity to the appropriate log, one sector at a time. This
760: * routine is called is called by dag functions ParityLogUpdateFunc
761: * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
762: *
763: * Parity to be logged is contained in a linked-list (logData). When
764: * this routine returns, every sector in the list will be in one of
765: * three places: 1) entered into the parity log 2) queued, waiting on
766: * reintegration 3) queued, waiting on a core log.
767: *
768: * Blocked work is passed to the ParityLoggingDiskManager for
769: * completion. Later, as conditions which required the block are
770: * removed, the work reenters this routine with the "finish" parameter
771: * set to "RF_TRUE."
772: *
773: * NON-BLOCKING
774: */
775:
776: raidPtr = logData->common->raidPtr;
777: /* Lock the region for the first item in logData. */
778: RF_ASSERT(logData != NULL);
779: regionID = logData->regionID;
780: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
781: RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
782:
783: if (clearReintFlag) {
784: /*
785: * Enable flushing for this region. Holding both locks
786: * provides a synchronization barrier with
787: * rf_DumpParityLogToDisk.
788: */
789: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
790: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
791: RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress ==
792: RF_TRUE);
793: raidPtr->regionInfo[regionID].diskCount = 0;
794: raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
795: /* Flushing is now enabled. */
796: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
797: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
798: }
799: /* Process each item in logData. */
800: while (logData) {
801: /* Remove an item from logData. */
802: item = logData;
803: logData = logData->next;
804: item->next = NULL;
805: item->prev = NULL;
806:
807: if (rf_parityLogDebug)
808: printf("[appending parity log data, region %d,"
809: " raidAddress %d, numSector %d]\n", item->regionID,
810: (int) item->diskAddress.raidAddress,
811: (int) item->diskAddress.numSector);
812:
813: /* See if we moved to a new region. */
814: if (regionID != item->regionID) {
815: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
816: regionID = item->regionID;
817: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
818: RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
819: }
820: punt = RF_FALSE;/*
821: * Set to RF_TRUE if work is blocked. This
822: * can happen in one of two ways: 1) no core
823: * log (rf_AcquireParityLog) 2) waiting on
824: * reintegration (rf_DumpParityLogToDisk).
825: * If punt is RF_TRUE, the dataItem was queued,
826: * so skip to next item.
827: */
828:
829: /*
830: * Process item, one sector at a time, until all sectors
831: * processed or we punt.
832: */
833: if (item->diskAddress.numSector > 0)
834: done = RF_FALSE;
835: else
836: RF_ASSERT(0);
837: while (!punt && !done) {
838: /* Verify that a core log exists for this region. */
839: if (!raidPtr->regionInfo[regionID].coreLog) {
840: /*
841: * Attempt to acquire a parity log. If
842: * acquisition fails, queue remaining work in
843: * data item and move to nextItem.
844: */
845: if (incomingLog) {
846: if (*incomingLog) {
847: RF_ASSERT((*incomingLog)->next
848: == NULL);
849: raidPtr->regionInfo[regionID]
850: .coreLog = *incomingLog;
851: raidPtr->regionInfo[regionID]
852: .coreLog->regionID =
853: regionID;
854: *incomingLog = NULL;
855: } else
856: raidPtr->regionInfo[regionID]
857: .coreLog =
858: rf_AcquireParityLog(item,
859: finish);
860: } else
861: raidPtr->regionInfo[regionID].coreLog =
862: rf_AcquireParityLog(item, finish);
863: /*
864: * Note: rf_AcquireParityLog either returns
865: * a log or enqueues currentItem.
866: */
867: }
868: if (!raidPtr->regionInfo[regionID].coreLog)
869: punt = RF_TRUE; /* Failed to find a core log. */
870: else {
871: RF_ASSERT(raidPtr->regionInfo[regionID].coreLog
872: ->next == NULL);
873: /*
874: * Verify that the log has room for new
875: * entries.
876: */
877: /*
878: * If log is full, dump it to disk and grab a
879: * new log.
880: */
881: if (raidPtr->regionInfo[regionID].coreLog
882: ->numRecords == raidPtr->numSectorsPerLog)
883: {
884: /* Log is full, dump it to disk. */
885: if (rf_DumpParityLogToDisk(finish,
886: item))
887: /*
888: * Dump unsuccessful, blocked
889: * on reintegration.
890: */
891: punt = RF_TRUE;
892: else {
893: /* Dump was successful. */
894: if (incomingLog) {
895: if (*incomingLog) {
896: RF_ASSERT(
897: (*incomingLog)->next ==
898: NULL);
899: raidPtr->
900: regionInfo[regionID].coreLog =
901: *incomingLog;
902: raidPtr->
903: regionInfo[regionID].coreLog->
904: regionID = regionID;
905: *incomingLog =
906: NULL;
907: } else
908: raidPtr->
909: regionInfo[regionID].coreLog =
910: rf_AcquireParityLog(item,
911: finish);
912: } else
913: raidPtr->regionInfo
914: [regionID].coreLog =
915: rf_AcquireParityLog(item,
916: finish);
917: /*
918: * If a core log is not
919: * available, must queue work
920: * and return.
921: */
922: if (!raidPtr->regionInfo
923: [regionID].coreLog)
924: /*
925: * Blocked on log
926: * availability.
927: */
928: punt = RF_TRUE;
929: }
930: }
931: }
932: /*
933: * If we didn't punt on this item, attempt to add a
934: * sector to the core log.
935: */
936: if (!punt) {
937: RF_ASSERT(raidPtr->regionInfo[regionID].coreLog
938: ->next == NULL);
939: /*
940: * At this point, we have a core log with
941: * enough room for a sector.
942: */
943: /* Copy a sector into the log. */
944: log = raidPtr->regionInfo[regionID].coreLog;
945: RF_ASSERT(log->numRecords <
946: raidPtr->numSectorsPerLog);
947: logItem = log->numRecords++;
948: log->records[logItem].parityAddr =
949: item->diskAddress;
950: RF_ASSERT(log->records[logItem].parityAddr
951: .startSector >=
952: raidPtr->regionInfo[regionID]
953: .parityStartAddr);
954: RF_ASSERT(log->records[logItem].parityAddr
955: .startSector <
956: raidPtr->regionInfo[regionID]
957: .parityStartAddr +
958: raidPtr->regionInfo[regionID]
959: .numSectorsParity);
960: log->records[logItem].parityAddr.numSector = 1;
961: log->records[logItem].operation =
962: item->common->operation;
963: bcopy((item->common->bufPtr +
964: (item->bufOffset++ * (1 <<
965: item->common->raidPtr->logBytesPerSector))),
966: log->bufPtr + (logItem * (1 <<
967: item->common->raidPtr->logBytesPerSector)),
968: (1 << item->common->raidPtr
969: ->logBytesPerSector));
970: item->diskAddress.numSector--;
971: item->diskAddress.startSector++;
972: if (item->diskAddress.numSector == 0)
973: done = RF_TRUE;
974: }
975: }
976:
977: if (!punt) {
978: /*
979: * Processed this item completely, decrement count of
980: * items to be processed.
981: */
982: RF_ASSERT(item->diskAddress.numSector == 0);
983: RF_LOCK_MUTEX(item->common->mutex);
984: item->common->cnt--;
985: if (item->common->cnt == 0)
986: itemDone = RF_TRUE;
987: else
988: itemDone = RF_FALSE;
989: RF_UNLOCK_MUTEX(item->common->mutex);
990: if (itemDone) {
991: /*
992: * Finished processing all log data for this
993: * IO Return structs to free list and invoke
994: * wakeup function.
995: */
996: /* Grab initial value of timer. */
997: timer = item->common->startTime;
998: RF_ETIMER_STOP(timer);
999: RF_ETIMER_EVAL(timer);
1000: item->common->tracerec->plog_us +=
1001: RF_ETIMER_VAL_US(timer);
1002: if (rf_parityLogDebug)
1003: printf("[waking process for region"
1004: " %d]\n", item->regionID);
1005: wakeFunc = item->common->wakeFunc;
1006: wakeArg = item->common->wakeArg;
1007: rf_FreeParityLogCommonData(item->common);
1008: rf_FreeParityLogData(item);
1009: (wakeFunc) (wakeArg, 0);
1010: } else
1011: rf_FreeParityLogData(item);
1012: }
1013: }
1014: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1015: if (rf_parityLogDebug)
1016: printf("[exiting ParityLogAppend]\n");
1017: return (0);
1018: }
1019:
1020:
1021: void
1022: rf_EnableParityLogging(RF_Raid_t *raidPtr)
1023: {
1024: int regionID;
1025:
1026: for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
1027: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1028: raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
1029: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
1030: }
1031: if (rf_parityLogDebug)
1032: printf("[parity logging enabled]\n");
1033: }
1034: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
CVSweb