Annotation of sys/dev/raidframe/rf_paritylogDiskMgr.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_paritylogDiskMgr.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */
2: /* $NetBSD: rf_paritylogDiskMgr.c,v 1.10 2000/01/15 01:57:57 oster Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Carnegie-Mellon University.
6: * All rights reserved.
7: *
8: * Author: William V. Courtright II
9: *
10: * Permission to use, copy, modify and distribute this software and
11: * its documentation is hereby granted, provided that both the copyright
12: * notice and this permission notice appear in all copies of the
13: * software, derivative works or modified versions, and any portions
14: * thereof, and that both notices appear in supporting documentation.
15: *
16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19: *
20: * Carnegie Mellon requests users of this software to return to
21: *
22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23: * School of Computer Science
24: * Carnegie Mellon University
25: * Pittsburgh PA 15213-3890
26: *
27: * any improvements or extensions that they make and grant Carnegie the
28: * rights to redistribute these changes.
29: */
30: /*
31: * Code for flushing and reintegrating operations related to parity logging.
32: */
33:
34: #include "rf_archs.h"
35:
36: #if RF_INCLUDE_PARITYLOGGING > 0
37:
38: #include "rf_types.h"
39: #include "rf_threadstuff.h"
40: #include "rf_mcpair.h"
41: #include "rf_raid.h"
42: #include "rf_dag.h"
43: #include "rf_dagfuncs.h"
44: #include "rf_desc.h"
45: #include "rf_layout.h"
46: #include "rf_diskqueue.h"
47: #include "rf_paritylog.h"
48: #include "rf_general.h"
49: #include "rf_etimer.h"
50: #include "rf_paritylogging.h"
51: #include "rf_engine.h"
52: #include "rf_dagutils.h"
53: #include "rf_map.h"
54: #include "rf_parityscan.h"
55:
56: #include "rf_paritylogDiskMgr.h"
57:
58: caddr_t rf_AcquireReintBuffer(RF_RegionBufferQueue_t *);
59: void rf_ReleaseReintBuffer(RF_RegionBufferQueue_t *, caddr_t);
60: void rf_ReadRegionLog(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *,
61: RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **);
62: void rf_WriteCoreLog(RF_ParityLog_t *, RF_MCPair_t *, RF_Raid_t *,
63: RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **);
64: void rf_ReadRegionParity(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *,
65: RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **);
66: void rf_WriteRegionParity(RF_RegionId_t, RF_MCPair_t *, caddr_t, RF_Raid_t *,
67: RF_DagHeader_t **, RF_AllocListElem_t **, RF_PhysDiskAddr_t **);
68: void rf_FlushLogsToDisk(RF_Raid_t *, RF_ParityLog_t *);
69: void rf_ReintegrateRegion(RF_Raid_t *, RF_RegionId_t, RF_ParityLog_t *);
70: void rf_ReintegrateLogs(RF_Raid_t *, RF_ParityLog_t *);
71:
72:
73: caddr_t
74: rf_AcquireReintBuffer(RF_RegionBufferQueue_t *pool)
75: {
76: caddr_t bufPtr = NULL;
77:
78: /*
79: * Return a region buffer from the free list (pool). If the free list
80: * is empty, WAIT. BLOCKING
81: */
82:
83: RF_LOCK_MUTEX(pool->mutex);
84: if (pool->availableBuffers > 0) {
85: bufPtr = pool->buffers[pool->availBuffersIndex];
86: pool->availableBuffers--;
87: pool->availBuffersIndex++;
88: if (pool->availBuffersIndex == pool->totalBuffers)
89: pool->availBuffersIndex = 0;
90: RF_UNLOCK_MUTEX(pool->mutex);
91: } else {
92: RF_PANIC(); /*
93: * Should never happen in correct config,
94: * single reint.
95: */
96: RF_WAIT_COND(pool->cond, pool->mutex);
97: }
98: return (bufPtr);
99: }
100:
101:
102: void
103: rf_ReleaseReintBuffer(RF_RegionBufferQueue_t *pool, caddr_t bufPtr)
104: {
105: /*
106: * Insert a region buffer (bufPtr) into the free list (pool).
107: * NON-BLOCKING
108: */
109:
110: RF_LOCK_MUTEX(pool->mutex);
111: pool->availableBuffers++;
112: pool->buffers[pool->emptyBuffersIndex] = bufPtr;
113: pool->emptyBuffersIndex++;
114: if (pool->emptyBuffersIndex == pool->totalBuffers)
115: pool->emptyBuffersIndex = 0;
116: RF_ASSERT(pool->availableBuffers <= pool->totalBuffers);
117: RF_UNLOCK_MUTEX(pool->mutex);
118: RF_SIGNAL_COND(pool->cond);
119: }
120:
121:
122: void
123: rf_ReadRegionLog(RF_RegionId_t regionID, RF_MCPair_t *rrd_mcpair,
124: caddr_t regionBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **rrd_dag_h,
125: RF_AllocListElem_t **rrd_alloclist, RF_PhysDiskAddr_t **rrd_pda)
126: {
127: /*
128: * Initiate the read a region log from disk. Once initiated, return
129: * to the calling routine.
130: *
131: * NON-BLOCKING
132: */
133:
134: RF_AccTraceEntry_t *tracerec;
135: RF_DagNode_t *rrd_rdNode;
136:
137: /* Create DAG to read region log from disk. */
138: rf_MakeAllocList(*rrd_alloclist);
139: *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer,
140: rf_DiskReadFunc, rf_DiskReadUndoFunc, "Rrl", *rrd_alloclist,
141: RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
142:
143: /* Create and initialize PDA for the core log. */
144: /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t),
145: * (RF_PhysDiskAddr_t *)); */
146: *rrd_pda = rf_AllocPDAList(1);
147: rf_MapLogParityLogging(raidPtr, regionID, 0, &((*rrd_pda)->row),
148: &((*rrd_pda)->col), &((*rrd_pda)->startSector));
149: (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity;
150:
151: if ((*rrd_pda)->next) {
152: (*rrd_pda)->next = NULL;
153: printf("set rrd_pda->next to NULL\n");
154: }
155: /* Initialize DAG parameters. */
156: RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
157: bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
158: (*rrd_dag_h)->tracerec = tracerec;
159: rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0];
160: rrd_rdNode->params[0].p = *rrd_pda;
161: /* rrd_rdNode->params[1] = regionBuffer; */
162: rrd_rdNode->params[2].v = 0;
163: rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
164: 0, 0, 0);
165:
166: /* Launch region log read dag. */
167: rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
168: (void *) rrd_mcpair);
169: }
170:
171:
172: void
173: rf_WriteCoreLog(RF_ParityLog_t *log, RF_MCPair_t *fwr_mcpair,
174: RF_Raid_t *raidPtr, RF_DagHeader_t **fwr_dag_h,
175: RF_AllocListElem_t **fwr_alloclist, RF_PhysDiskAddr_t **fwr_pda)
176: {
177: RF_RegionId_t regionID = log->regionID;
178: RF_AccTraceEntry_t *tracerec;
179: RF_SectorNum_t regionOffset;
180: RF_DagNode_t *fwr_wrNode;
181:
182: /*
183: * Initiate the write of a core log to a region log disk. Once
184: * initiated, return to the calling routine.
185: *
186: * NON-BLOCKING
187: */
188:
189: /* Create DAG to write a core log to a region log disk. */
190: rf_MakeAllocList(*fwr_alloclist);
191: *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr,
192: rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wcl", *fwr_alloclist,
193: RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
194:
195: /* Create and initialize PDA for the region log. */
196: /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t),
197: * (RF_PhysDiskAddr_t *)); */
198: *fwr_pda = rf_AllocPDAList(1);
199: regionOffset = log->diskOffset;
200: rf_MapLogParityLogging(raidPtr, regionID, regionOffset,
201: &((*fwr_pda)->row), &((*fwr_pda)->col), &((*fwr_pda)->startSector));
202: (*fwr_pda)->numSector = raidPtr->numSectorsPerLog;
203:
204: /* Initialize DAG parameters. */
205: RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
206: bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
207: (*fwr_dag_h)->tracerec = tracerec;
208: fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0];
209: fwr_wrNode->params[0].p = *fwr_pda;
210: /* fwr_wrNode->params[1] = log->bufPtr; */
211: fwr_wrNode->params[2].v = 0;
212: fwr_wrNode->params[3].v =
213: RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
214:
215: /* Launch the dag to write the core log to disk. */
216: rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
217: (void *) fwr_mcpair);
218: }
219:
220:
221: void
222: rf_ReadRegionParity(RF_RegionId_t regionID, RF_MCPair_t *prd_mcpair,
223: caddr_t parityBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **prd_dag_h,
224: RF_AllocListElem_t **prd_alloclist, RF_PhysDiskAddr_t **prd_pda)
225: {
226: /*
227: * Initiate the read region parity from disk. Once initiated, return
228: * to the calling routine.
229: *
230: * NON-BLOCKING
231: */
232:
233: RF_AccTraceEntry_t *tracerec;
234: RF_DagNode_t *prd_rdNode;
235:
236: /* Create DAG to read region parity from disk. */
237: rf_MakeAllocList(*prd_alloclist);
238: *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc,
239: rf_DiskReadUndoFunc, "Rrp", *prd_alloclist, RF_DAG_FLAGS_NONE,
240: RF_IO_NORMAL_PRIORITY);
241:
242: /* Create and initialize PDA for region parity. */
243: /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t),
244: * (RF_PhysDiskAddr_t *)); */
245: *prd_pda = rf_AllocPDAList(1);
246: rf_MapRegionParity(raidPtr, regionID, &((*prd_pda)->row),
247: &((*prd_pda)->col), &((*prd_pda)->startSector),
248: &((*prd_pda)->numSector));
249: if (rf_parityLogDebug)
250: printf("[reading %d sectors of parity from region %d]\n",
251: (int) (*prd_pda)->numSector, regionID);
252: if ((*prd_pda)->next) {
253: (*prd_pda)->next = NULL;
254: printf("set prd_pda->next to NULL\n");
255: }
256: /* Initialize DAG parameters. */
257: RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
258: bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
259: (*prd_dag_h)->tracerec = tracerec;
260: prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0];
261: prd_rdNode->params[0].p = *prd_pda;
262: prd_rdNode->params[1].p = parityBuffer;
263: prd_rdNode->params[2].v = 0;
264: prd_rdNode->params[3].v =
265: RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
266: if (rf_validateDAGDebug)
267: rf_ValidateDAG(*prd_dag_h);
268: /* Launch region parity read dag. */
269: rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
270: (void *) prd_mcpair);
271: }
272:
273: void
274: rf_WriteRegionParity(RF_RegionId_t regionID, RF_MCPair_t *pwr_mcpair,
275: caddr_t parityBuffer, RF_Raid_t *raidPtr, RF_DagHeader_t **pwr_dag_h,
276: RF_AllocListElem_t **pwr_alloclist, RF_PhysDiskAddr_t **pwr_pda)
277: {
278: /*
279: * Initiate the write of region parity to disk. Once initiated, return
280: * to the calling routine.
281: *
282: * NON-BLOCKING
283: */
284:
285: RF_AccTraceEntry_t *tracerec;
286: RF_DagNode_t *pwr_wrNode;
287:
288: /* Create DAG to write region log from disk. */
289: rf_MakeAllocList(*pwr_alloclist);
290: *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer,
291: rf_DiskWriteFunc, rf_DiskWriteUndoFunc, "Wrp", *pwr_alloclist,
292: RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY);
293:
294: /* Create and initialize PDA for region parity. */
295: /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t),
296: * (RF_PhysDiskAddr_t *)); */
297: *pwr_pda = rf_AllocPDAList(1);
298: rf_MapRegionParity(raidPtr, regionID, &((*pwr_pda)->row),
299: &((*pwr_pda)->col), &((*pwr_pda)->startSector),
300: &((*pwr_pda)->numSector));
301:
302: /* Initialize DAG parameters. */
303: RF_Malloc(tracerec, sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *));
304: bzero((char *) tracerec, sizeof(RF_AccTraceEntry_t));
305: (*pwr_dag_h)->tracerec = tracerec;
306: pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0];
307: pwr_wrNode->params[0].p = *pwr_pda;
308: /* pwr_wrNode->params[1] = parityBuffer; */
309: pwr_wrNode->params[2].v = 0;
310: pwr_wrNode->params[3].v =
311: RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0, 0, 0);
312:
313: /* Launch the dag to write region parity to disk. */
314: rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc,
315: (void *) pwr_mcpair);
316: }
317:
318: void
319: rf_FlushLogsToDisk(RF_Raid_t *raidPtr, RF_ParityLog_t *logList)
320: {
321: /*
322: * Flush a linked list of core logs to the log disk. Logs contain the
323: * disk location where they should be written. Logs were written in
324: * FIFO order and that order must be preserved.
325: *
326: * Recommended optimizations:
327: * 1) Allow multiple flushes to occur simultaneously.
328: * 2) Coalesce contiguous flush operations.
329: *
330: * BLOCKING
331: */
332:
333: RF_ParityLog_t *log;
334: RF_RegionId_t regionID;
335: RF_MCPair_t *fwr_mcpair;
336: RF_DagHeader_t *fwr_dag_h;
337: RF_AllocListElem_t *fwr_alloclist;
338: RF_PhysDiskAddr_t *fwr_pda;
339:
340: fwr_mcpair = rf_AllocMCPair();
341: RF_LOCK_MUTEX(fwr_mcpair->mutex);
342:
343: RF_ASSERT(logList);
344: log = logList;
345: while (log) {
346: regionID = log->regionID;
347:
348: /* Create and launch a DAG to write the core log. */
349: if (rf_parityLogDebug)
350: printf("[initiating write of core log for region"
351: " %d]\n", regionID);
352: fwr_mcpair->flag = RF_FALSE;
353: rf_WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h,
354: &fwr_alloclist, &fwr_pda);
355:
356: /* Wait for the DAG to complete. */
357: while (!fwr_mcpair->flag)
358: RF_WAIT_COND(fwr_mcpair->cond, fwr_mcpair->mutex);
359: if (fwr_dag_h->status != rf_enable) {
360: RF_ERRORMSG1("Unable to write core log to disk"
361: " (region %d)\n", regionID);
362: RF_ASSERT(0);
363: }
364: /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */
365: rf_FreePhysDiskAddr(fwr_pda);
366: rf_FreeDAG(fwr_dag_h);
367: rf_FreeAllocList(fwr_alloclist);
368:
369: log = log->next;
370: }
371: RF_UNLOCK_MUTEX(fwr_mcpair->mutex);
372: rf_FreeMCPair(fwr_mcpair);
373: rf_ReleaseParityLogs(raidPtr, logList);
374: }
375:
376: void
377: rf_ReintegrateRegion(RF_Raid_t *raidPtr, RF_RegionId_t regionID,
378: RF_ParityLog_t *coreLog)
379: {
380: RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair;
381: RF_DagHeader_t *rrd_dag_h, *prd_dag_h, *pwr_dag_h;
382: RF_AllocListElem_t *rrd_alloclist, *prd_alloclist, *pwr_alloclist;
383: RF_PhysDiskAddr_t *rrd_pda, *prd_pda, *pwr_pda;
384: caddr_t parityBuffer, regionBuffer = NULL;
385:
386: /*
387: * Reintegrate a region (regionID).
388: *
389: * 1. Acquire region and parity buffers.
390: * 2. Read log from disk.
391: * 3. Read parity from disk.
392: * 4. Apply log to parity.
393: * 5. Apply core log to parity.
394: * 6. Write new parity to disk.
395: *
396: * BLOCKING
397: */
398:
399: if (rf_parityLogDebug)
400: printf("[reintegrating region %d]\n", regionID);
401:
402: /* Initiate read of region parity. */
403: if (rf_parityLogDebug)
404: printf("[initiating read of parity for region %d]\n", regionID);
405: parityBuffer = rf_AcquireReintBuffer(&raidPtr->parityBufferPool);
406: prd_mcpair = rf_AllocMCPair();
407: RF_LOCK_MUTEX(prd_mcpair->mutex);
408: prd_mcpair->flag = RF_FALSE;
409: rf_ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr,
410: &prd_dag_h, &prd_alloclist, &prd_pda);
411:
412: /* If region log nonempty, initiate read. */
413: if (raidPtr->regionInfo[regionID].diskCount > 0) {
414: if (rf_parityLogDebug)
415: printf("[initiating read of disk log for region %d]\n",
416: regionID);
417: regionBuffer =
418: rf_AcquireReintBuffer(&raidPtr->regionBufferPool);
419: rrd_mcpair = rf_AllocMCPair();
420: RF_LOCK_MUTEX(rrd_mcpair->mutex);
421: rrd_mcpair->flag = RF_FALSE;
422: rf_ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr,
423: &rrd_dag_h, &rrd_alloclist, &rrd_pda);
424: }
425: /* Wait on read of region parity to complete. */
426: while (!prd_mcpair->flag) {
427: RF_WAIT_COND(prd_mcpair->cond, prd_mcpair->mutex);
428: }
429: RF_UNLOCK_MUTEX(prd_mcpair->mutex);
430: if (prd_dag_h->status != rf_enable) {
431: RF_ERRORMSG("Unable to read parity from disk\n");
432: /* Add code to fail the parity disk. */
433: RF_ASSERT(0);
434: }
435: /* Apply core log to parity. */
436: /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */
437:
438: if (raidPtr->regionInfo[regionID].diskCount > 0) {
439: /* Wait on read of region log to complete. */
440: while (!rrd_mcpair->flag)
441: RF_WAIT_COND(rrd_mcpair->cond, rrd_mcpair->mutex);
442: RF_UNLOCK_MUTEX(rrd_mcpair->mutex);
443: if (rrd_dag_h->status != rf_enable) {
444: RF_ERRORMSG("Unable to read region log from disk\n");
445: /* Add code to fail the log disk. */
446: RF_ASSERT(0);
447: }
448: /* Apply region log to parity. */
449: /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */
450: /* Release resources associated with region log. */
451: /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */
452: rf_FreePhysDiskAddr(rrd_pda);
453: rf_FreeDAG(rrd_dag_h);
454: rf_FreeAllocList(rrd_alloclist);
455: rf_FreeMCPair(rrd_mcpair);
456: rf_ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer);
457: }
458: /* Write reintegrated parity to disk. */
459: if (rf_parityLogDebug)
460: printf("[initiating write of parity for region %d]\n",
461: regionID);
462: pwr_mcpair = rf_AllocMCPair();
463: RF_LOCK_MUTEX(pwr_mcpair->mutex);
464: pwr_mcpair->flag = RF_FALSE;
465: rf_WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr,
466: &pwr_dag_h, &pwr_alloclist, &pwr_pda);
467: while (!pwr_mcpair->flag)
468: RF_WAIT_COND(pwr_mcpair->cond, pwr_mcpair->mutex);
469: RF_UNLOCK_MUTEX(pwr_mcpair->mutex);
470: if (pwr_dag_h->status != rf_enable) {
471: RF_ERRORMSG("Unable to write parity to disk\n");
472: /* Add code to fail the parity disk. */
473: RF_ASSERT(0);
474: }
475: /* Release resources associated with read of old parity. */
476: /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */
477: rf_FreePhysDiskAddr(prd_pda);
478: rf_FreeDAG(prd_dag_h);
479: rf_FreeAllocList(prd_alloclist);
480: rf_FreeMCPair(prd_mcpair);
481:
482: /* Release resources associated with write of new parity. */
483: rf_ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer);
484: /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */
485: rf_FreePhysDiskAddr(pwr_pda);
486: rf_FreeDAG(pwr_dag_h);
487: rf_FreeAllocList(pwr_alloclist);
488: rf_FreeMCPair(pwr_mcpair);
489:
490: if (rf_parityLogDebug)
491: printf("[finished reintegrating region %d]\n", regionID);
492: }
493:
494:
495: void
496: rf_ReintegrateLogs(RF_Raid_t *raidPtr, RF_ParityLog_t *logList)
497: {
498: RF_ParityLog_t *log, *freeLogList = NULL;
499: RF_ParityLogData_t *logData, *logDataList;
500: RF_RegionId_t regionID;
501:
502: RF_ASSERT(logList);
503: while (logList) {
504: log = logList;
505: logList = logList->next;
506: log->next = NULL;
507: regionID = log->regionID;
508: rf_ReintegrateRegion(raidPtr, regionID, log);
509: log->numRecords = 0;
510:
511: /*
512: * Remove all items which are blocked on reintegration of this
513: * region.
514: */
515: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
516: logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID,
517: &raidPtr->parityLogDiskQueue.reintBlockHead,
518: &raidPtr->parityLogDiskQueue.reintBlockTail, RF_TRUE);
519: logDataList = logData;
520: while (logData) {
521: logData->next =
522: rf_SearchAndDequeueParityLogData(raidPtr, regionID,
523: &raidPtr->parityLogDiskQueue.reintBlockHead,
524: &raidPtr->parityLogDiskQueue.reintBlockTail,
525: RF_TRUE);
526: logData = logData->next;
527: }
528: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
529:
530: /*
531: * Process blocked log data and clear reintInProgress flag for
532: * this region.
533: */
534: if (logDataList)
535: rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE);
536: else {
537: /*
538: * Enable flushing for this region. Holding both
539: * locks provides a synchronization barrier with
540: * DumpParityLogToDisk.
541: */
542: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
543: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
544: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
545: raidPtr->regionInfo[regionID].diskCount = 0;
546: raidPtr->regionInfo[regionID].reintInProgress =
547: RF_FALSE;
548: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
549: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID]
550: .reintMutex); /* Flushing is now enabled. */
551: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
552: }
553: /*
554: * If log wasn't used, attach it to the list of logs to be
555: * returned.
556: */
557: if (log) {
558: log->next = freeLogList;
559: freeLogList = log;
560: }
561: }
562: if (freeLogList)
563: rf_ReleaseParityLogs(raidPtr, freeLogList);
564: }
565:
566: int
567: rf_ShutdownLogging(RF_Raid_t *raidPtr)
568: {
569: /*
570: * Shutdown parity logging:
571: * 1) Disable parity logging in all regions.
572: * 2) Reintegrate all regions.
573: */
574:
575: RF_SectorCount_t diskCount;
576: RF_RegionId_t regionID;
577: RF_ParityLog_t *log;
578:
579: if (rf_parityLogDebug)
580: printf("[shutting down parity logging]\n");
581: /*
582: * Since parity log maps are volatile, we must reintegrate all
583: * regions.
584: */
585: if (rf_forceParityLogReint) {
586: for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
587: RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
588: raidPtr->regionInfo[regionID].loggingEnabled = RF_FALSE;
589: log = raidPtr->regionInfo[regionID].coreLog;
590: raidPtr->regionInfo[regionID].coreLog = NULL;
591: diskCount = raidPtr->regionInfo[regionID].diskCount;
592: RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
593: if (diskCount > 0 || log != NULL)
594: rf_ReintegrateRegion(raidPtr, regionID, log);
595: if (log != NULL)
596: rf_ReleaseParityLogs(raidPtr, log);
597: }
598: }
599: if (rf_parityLogDebug) {
600: printf("[parity logging disabled]\n");
601: printf("[should be done !]\n");
602: }
603: return (0);
604: }
605:
606: int
607: rf_ParityLoggingDiskManager(RF_Raid_t *raidPtr)
608: {
609: RF_ParityLog_t *reintQueue, *flushQueue;
610: int workNeeded, done = RF_FALSE;
611: int s;
612:
613: /*
614: * Main program for parity logging disk thread. This routine waits
615: * for work to appear in either the flush or reintegration queues and
616: * is responsible for flushing core logs to the log disk as well as
617: * reintegrating parity regions.
618: *
619: * BLOCKING
620: */
621:
622: s = splbio();
623:
624: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
625:
626: /*
627: * Inform our creator that we're running. Don't bother doing the
628: * mutex lock/unlock dance: we locked above, and we'll unlock
629: * below with nothing to do, yet.
630: */
631: raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING;
632: RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
633:
634: /* Empty the work queues. */
635: flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
636: raidPtr->parityLogDiskQueue.flushQueue = NULL;
637: reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
638: raidPtr->parityLogDiskQueue.reintQueue = NULL;
639: workNeeded = (flushQueue || reintQueue);
640:
641: while (!done) {
642: while (workNeeded) {
643: /*
644: * First, flush all logs in the flush queue, freeing
645: * buffers. Second, reintegrate all regions that are
646: * reported as full. Third, append queued log data
647: * until blocked.
648: *
649: * Note: Incoming appends (ParityLogAppend) can block
650: * on either 1. empty buffer pool 2. region under
651: * reintegration. To preserve a global FIFO ordering of
652: * appends, buffers are not released to the world
653: * until those appends blocked on buffers are removed
654: * from the append queue. Similarly, regions that are
655: * reintegrated are not opened for general use until
656: * the append queue has been emptied.
657: */
658:
659: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
660:
661: /*
662: * Empty flushQueue, using free'd log buffers to
663: * process bufTail.
664: */
665: if (flushQueue)
666: rf_FlushLogsToDisk(raidPtr, flushQueue);
667:
668: /*
669: * Empty reintQueue, flushing from reintTail as we go.
670: */
671: if (reintQueue)
672: rf_ReintegrateLogs(raidPtr, reintQueue);
673:
674: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
675: flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
676: raidPtr->parityLogDiskQueue.flushQueue = NULL;
677: reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
678: raidPtr->parityLogDiskQueue.reintQueue = NULL;
679: workNeeded = (flushQueue || reintQueue);
680: }
681: /* No work is needed at this point. */
682: if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) {
683: /*
684: * Shutdown parity logging:
685: * 1. Disable parity logging in all regions.
686: * 2. Reintegrate all regions.
687: */
688: done = RF_TRUE; /* Thread disabled, no work needed. */
689: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
690: rf_ShutdownLogging(raidPtr);
691: }
692: if (!done) {
693: /* Thread enabled, no work needed, so sleep. */
694: if (rf_parityLogDebug)
695: printf("[parity logging disk manager"
696: " sleeping]\n");
697: RF_WAIT_COND(raidPtr->parityLogDiskQueue.cond,
698: raidPtr->parityLogDiskQueue.mutex);
699: if (rf_parityLogDebug)
700: printf("[parity logging disk manager just"
701: " woke up]\n");
702: flushQueue = raidPtr->parityLogDiskQueue.flushQueue;
703: raidPtr->parityLogDiskQueue.flushQueue = NULL;
704: reintQueue = raidPtr->parityLogDiskQueue.reintQueue;
705: raidPtr->parityLogDiskQueue.reintQueue = NULL;
706: workNeeded = (flushQueue || reintQueue);
707: }
708: }
709: /*
710: * Announce that we're done.
711: */
712: RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
713: raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN;
714: RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
715: RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
716:
717: splx(s);
718:
719: /*
720: * In the Net- & OpenBSD kernel, the thread must exit; returning would
721: * cause the proc trampoline to attempt to return to userspace.
722: */
723: kthread_exit(0); /* does not return */
724: }
725: #endif /* RF_INCLUDE_PARITYLOGGING > 0 */
CVSweb