Annotation of sys/dev/raidframe/rf_reconbuffer.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_reconbuffer.c,v 1.4 2002/12/16 07:01:05 tdeval Exp $ */
2: /* $NetBSD: rf_reconbuffer.c,v 1.4 2000/03/13 23:52:36 soren Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Carnegie-Mellon University.
6: * All rights reserved.
7: *
8: * Author: Mark Holland
9: *
10: * Permission to use, copy, modify and distribute this software and
11: * its documentation is hereby granted, provided that both the copyright
12: * notice and this permission notice appear in all copies of the
13: * software, derivative works or modified versions, and any portions
14: * thereof, and that both notices appear in supporting documentation.
15: *
16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19: *
20: * Carnegie Mellon requests users of this software to return to
21: *
22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23: * School of Computer Science
24: * Carnegie Mellon University
25: * Pittsburgh PA 15213-3890
26: *
27: * any improvements or extensions that they make and grant Carnegie the
28: * rights to redistribute these changes.
29: */
30:
31: /*****************************************************
32: *
33: * rf_reconbuffer.c -- Reconstruction buffer manager.
34: *
35: *****************************************************/
36:
37: #include "rf_raid.h"
38: #include "rf_reconbuffer.h"
39: #include "rf_acctrace.h"
40: #include "rf_etimer.h"
41: #include "rf_general.h"
42: #include "rf_debugprint.h"
43: #include "rf_revent.h"
44: #include "rf_reconutil.h"
45: #include "rf_nwayxor.h"
46:
47: #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s, a)
48: #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s, a, b)
49: #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s, a, b, c)
50: #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s, a, b, c, d)
51: #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s, a, b, c, d, e)
52:
53: /*****************************************************************************
54: *
55: * Submit a reconstruction buffer to the manager for XOR.
56: * We can only submit a buffer if (1) we can xor into an existing buffer,
57: * which means we don't have to acquire a new one, (2) we can acquire a
58: * floating recon buffer, or (3) the caller has indicated that we are allowed
59: * to keep the submitted buffer.
60: *
61: * Returns non-zero if and only if we were not able to submit.
62: * In this case, we append the current disk ID to the wait list on the
63: * indicated RU, so that it will be re-enabled when we acquire a buffer for
64: * this RU.
65: *
66: *****************************************************************************/
67:
68: /* Just to make the code below more readable. */
69: #define BUFWAIT_APPEND(_cb_,_pssPtr_,_row_,_col_) \
70: do { \
71: _cb_ = rf_AllocCallbackDesc(); \
72: (_cb_)->row = (_row_); \
73: (_cb_)->col = (_col_); \
74: (_cb_)->next = (_pssPtr_)->bufWaitList; \
75: (_pssPtr_)->bufWaitList = (_cb_); \
76: } while (0)
77:
78: /*
79: * rf_nWayXorFuncs[i] is a pointer to a function that will xor "i"
80: * bufs into the accumulating sum.
81: */
82: static RF_VoidFuncPtr rf_nWayXorFuncs[] = {
83: NULL,
84: (RF_VoidFuncPtr) rf_nWayXor1,
85: (RF_VoidFuncPtr) rf_nWayXor2,
86: (RF_VoidFuncPtr) rf_nWayXor3,
87: (RF_VoidFuncPtr) rf_nWayXor4,
88: (RF_VoidFuncPtr) rf_nWayXor5,
89: (RF_VoidFuncPtr) rf_nWayXor6,
90: (RF_VoidFuncPtr) rf_nWayXor7,
91: (RF_VoidFuncPtr) rf_nWayXor8,
92: (RF_VoidFuncPtr) rf_nWayXor9
93: };
94:
95:
96: int
97: rf_SubmitReconBuffer(
98: RF_ReconBuffer_t *rbuf, /* The recon buffer to submit. */
99: int keep_it, /*
100: * Whether we can keep this buffer or
101: * we have to return it.
102: */
103: int use_committed /*
104: * Whether to use a committed or an
105: * available recon buffer.
106: */
107: )
108: {
109: RF_LayoutSW_t *lp;
110: int rc;
111:
112: lp = rbuf->raidPtr->Layout.map;
113: rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed);
114: return (rc);
115: }
116:
117: int
118: rf_SubmitReconBufferBasic(
119: RF_ReconBuffer_t *rbuf, /* The recon buffer to submit. */
120: int keep_it, /*
121: * Whether we can keep this buffer
122: * or we have to return it.
123: */
124: int use_committed /*
125: * Whether to use a committed or
126: * an available recon buffer.
127: */
128: )
129: {
130: RF_Raid_t *raidPtr = rbuf->raidPtr;
131: RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
132: RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl[rbuf->row];
133: RF_ReconParityStripeStatus_t *pssPtr;
134: /* Temporary rbuf pointers. */
135: RF_ReconBuffer_t *targetRbuf, *t = NULL;
136: /* Temporary data buffer pointer. */
137: caddr_t ta;
138: RF_CallbackDesc_t *cb, *p;
139: int retcode = 0, created = 0;
140:
141: RF_Etimer_t timer;
142:
143: /* Makes no sense to have a submission from the failed disk. */
144: RF_ASSERT(rbuf);
145: RF_ASSERT(rbuf->col != reconCtrlPtr->fcol);
146:
147: Dprintf5("RECON: submission by row %d col %d for psid %ld ru %d"
148: " (failed offset %ld).\n", rbuf->row, rbuf->col,
149: (long) rbuf->parityStripeID, rbuf->which_ru,
150: (long) rbuf->failedDiskSectorOffset);
151:
152: RF_LOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
153:
154: RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
155:
156: pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable,
157: rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, &created);
158: RF_ASSERT(pssPtr); /*
159: * If it didn't exist, we wouldn't have gotten
160: * an rbuf for it.
161: */
162:
163: /*
164: * Check to see if enough buffers have accumulated to do an XOR. If
165: * so, there's no need to acquire a floating rbuf. Before we can do
166: * any XORing, we must have acquired a destination buffer. If we
167: * have, then we can go ahead and do the XOR if (1) including this
168: * buffer, enough bufs have accumulated, or (2) this is the last
169: * submission for this stripe. Otherwise, we have to go acquire a
170: * floating rbuf.
171: */
172:
173: targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
174: if ((targetRbuf != NULL) &&
175: ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) ||
176: (targetRbuf->count + pssPtr->xorBufCount + 1 ==
177: layoutPtr->numDataCol))) {
178: /* Install this buffer. */
179: pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf;
180: Dprintf3("RECON: row %d col %d invoking a %d-way XOR.\n",
181: rbuf->row, rbuf->col, pssPtr->xorBufCount);
182: RF_ETIMER_START(timer);
183: rf_MultiWayReconXor(raidPtr, pssPtr);
184: RF_ETIMER_STOP(timer);
185: RF_ETIMER_EVAL(timer);
186: raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer);
187: if (!keep_it) {
188: raidPtr->recon_tracerecs[rbuf->col].xor_us =
189: RF_ETIMER_VAL_US(timer);
190: RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col]
191: .recon_timer);
192: RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col]
193: .recon_timer);
194: raidPtr->recon_tracerecs[rbuf->col]
195: .specific.recon.recon_return_to_submit_us +=
196: RF_ETIMER_VAL_US(raidPtr
197: ->recon_tracerecs[rbuf->col].recon_timer);
198: RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col]
199: .recon_timer);
200:
201: rf_LogTraceRec(raidPtr,
202: &raidPtr->recon_tracerecs[rbuf->col]);
203: }
204: rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr,
205: layoutPtr->numDataCol);
206:
207: /*
208: * If use_committed is on, we _must_ consume a buffer off the
209: * committed list.
210: */
211: if (use_committed) {
212: t = reconCtrlPtr->committedRbufs;
213: RF_ASSERT(t);
214: reconCtrlPtr->committedRbufs = t->next;
215: rf_ReleaseFloatingReconBuffer(raidPtr, rbuf->row, t);
216: }
217: if (keep_it) {
218: RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row,
219: rbuf->parityStripeID);
220: RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
221: rf_FreeReconBuffer(rbuf);
222: return (retcode);
223: }
224: goto out;
225: }
226: /* Set the value of "t", which we'll use as the rbuf from here on. */
227: if (keep_it) {
228: t = rbuf;
229: } else {
230: if (use_committed) {
231: /* If a buffer has been committed to us, use it. */
232:
233: t = reconCtrlPtr->committedRbufs;
234: RF_ASSERT(t);
235: reconCtrlPtr->committedRbufs = t->next;
236: t->next = NULL;
237: } else
238: if (reconCtrlPtr->floatingRbufs) {
239: t = reconCtrlPtr->floatingRbufs;
240: reconCtrlPtr->floatingRbufs = t->next;
241: t->next = NULL;
242: }
243: }
244:
245: /*
246: * If we weren't able to acquire a buffer, append to the end of the
247: * buf list in the recon ctrl struct.
248: */
249: if (!t) {
250: RF_ASSERT(!keep_it && !use_committed);
251: Dprintf2("RECON: row %d col %d failed to acquire floating"
252: " rbuf.\n", rbuf->row, rbuf->col);
253:
254: raidPtr->procsInBufWait++;
255: if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) &&
256: (raidPtr->numFullReconBuffers == 0)) {
257: printf("Buffer wait deadlock detected. Exiting.\n");
258: rf_PrintPSStatusTable(raidPtr, rbuf->row);
259: RF_PANIC();
260: }
261: pssPtr->flags |= RF_PSS_BUFFERWAIT;
262: /* Append to buf wait list in recon ctrl structure. */
263: cb = rf_AllocCallbackDesc();
264: cb->row = rbuf->row;
265: cb->col = rbuf->col;
266: cb->callbackArg.v = rbuf->parityStripeID;
267: cb->callbackArg2.v = rbuf->which_ru;
268: cb->next = NULL;
269: if (!reconCtrlPtr->bufferWaitList)
270: reconCtrlPtr->bufferWaitList = cb;
271: else {
272: /*
273: * Might want to maintain head/tail pointers
274: * here rather than search for end of list.
275: */
276: for (p = reconCtrlPtr->bufferWaitList; p->next;
277: p = p->next);
278: p->next = cb;
279: }
280: retcode = 1;
281: goto out;
282: }
283: Dprintf2("RECON: row %d col %d acquired rbuf.\n", rbuf->row, rbuf->col);
284: RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
285: RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
286: raidPtr->recon_tracerecs[rbuf->col]
287: .specific.recon.recon_return_to_submit_us +=
288: RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
289: RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer);
290:
291: rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]);
292:
293: /* Initialize the buffer. */
294: if (t != rbuf) {
295: t->row = rbuf->row;
296: t->col = reconCtrlPtr->fcol;
297: t->parityStripeID = rbuf->parityStripeID;
298: t->which_ru = rbuf->which_ru;
299: t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset;
300: t->spRow = rbuf->spRow;
301: t->spCol = rbuf->spCol;
302: t->spOffset = rbuf->spOffset;
303:
304: ta = t->buffer;
305: t->buffer = rbuf->buffer;
306: rbuf->buffer = ta; /* Swap buffers. */
307: }
308: /*
309: * The first installation always gets installed as the destination
310: * buffer. Subsequent installations get stacked up to allow for
311: * multi-way XOR.
312: */
313: if (!pssPtr->rbuf) {
314: pssPtr->rbuf = t;
315: t->count = 1;
316: } else
317: /* Install this buffer. */
318: pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t;
319:
320: /* The buffer is full if G=2. */
321: rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr,
322: layoutPtr->numDataCol);
323:
324: out:
325: RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->row, rbuf->parityStripeID);
326: RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
327: return (retcode);
328: }
329:
330: int
331: rf_MultiWayReconXor(
332: RF_Raid_t *raidPtr,
333: RF_ReconParityStripeStatus_t *pssPtr /*
334: * The pss descriptor for this
335: * parity stripe.
336: */
337: )
338: {
339: int i, numBufs = pssPtr->xorBufCount;
340: int numBytes = rf_RaidAddressToByte(raidPtr,
341: raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU);
342: RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor;
343: RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
344:
345: RF_ASSERT(pssPtr->rbuf != NULL);
346: RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS);
347: #ifdef _KERNEL
348: #if !defined(__NetBSD__) && !defined(__OpenBSD__)
349: /* Yield the processor before doing a big XOR. */
350: thread_block();
351: #endif
352: #endif /* _KERNEL */
353: /*
354: * XXX
355: *
356: * What if more than 9 bufs ?
357: */
358: rf_nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf,
359: numBytes / sizeof(long));
360:
361: /*
362: * Release all the reconstruction buffers except the last one, which
363: * belongs to the disk whose submission caused this XOR to take place.
364: */
365: for (i = 0; i < numBufs - 1; i++) {
366: if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING)
367: rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]->row,
368: rbufs[i]);
369: else
370: if (rbufs[i]->type == RF_RBUF_TYPE_FORCED)
371: rf_FreeReconBuffer(rbufs[i]);
372: else
373: RF_ASSERT(0);
374: }
375: targetRbuf->count += pssPtr->xorBufCount;
376: pssPtr->xorBufCount = 0;
377: return (0);
378: }
379:
380:
381: /*
382: * Removes one full buffer from one of the full-buffer lists and returns it.
383: *
384: * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY.
385: */
386: RF_ReconBuffer_t *
387: rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr)
388: {
389: RF_ReconBuffer_t *p;
390:
391: RF_LOCK_MUTEX(reconCtrlPtr->rb_mutex);
392:
393: if ((p = reconCtrlPtr->priorityList) != NULL) {
394: reconCtrlPtr->priorityList = p->next;
395: p->next = NULL;
396: goto out;
397: }
398: if ((p = reconCtrlPtr->fullBufferList) != NULL) {
399: reconCtrlPtr->fullBufferList = p->next;
400: p->next = NULL;
401: goto out;
402: }
403: out:
404: RF_UNLOCK_MUTEX(reconCtrlPtr->rb_mutex);
405: return (p);
406: }
407:
408:
409: /*
410: * If the reconstruction buffer is full, move it to the full list, which
411: * is maintained sorted by failed disk sector offset.
412: *
413: * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY.
414: */
415: int
416: rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl,
417: RF_ReconParityStripeStatus_t *pssPtr, int numDataCol)
418: {
419: RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf;
420:
421: if (rbuf->count == numDataCol) {
422: raidPtr->numFullReconBuffers++;
423: Dprintf2("RECON: rbuf for psid %ld ru %d has filled.\n",
424: (long) rbuf->parityStripeID, rbuf->which_ru);
425: if (!reconCtrl->fullBufferList ||
426: (rbuf->failedDiskSectorOffset <
427: reconCtrl->fullBufferList->failedDiskSectorOffset)) {
428: Dprintf2("RECON: rbuf for psid %ld ru %d is head of"
429: " list.\n", (long) rbuf->parityStripeID,
430: rbuf->which_ru);
431: rbuf->next = reconCtrl->fullBufferList;
432: reconCtrl->fullBufferList = rbuf;
433: } else {
434: for (pt = reconCtrl->fullBufferList, p = pt->next;
435: p && p->failedDiskSectorOffset <
436: rbuf->failedDiskSectorOffset;
437: pt = p, p = p->next);
438: rbuf->next = p;
439: pt->next = rbuf;
440: Dprintf2("RECON: rbuf for psid %ld ru %d is in list.\n",
441: (long) rbuf->parityStripeID, rbuf->which_ru);
442: }
443: #if 0
444: pssPtr->writeRbuf = pssPtr->rbuf; /*
445: * DEBUG ONLY: We like
446: * to be able to find
447: * this rbuf while it's
448: * awaiting write.
449: */
450: #else
451: rbuf->pssPtr = pssPtr;
452: #endif
453: pssPtr->rbuf = NULL;
454: rf_CauseReconEvent(raidPtr, rbuf->row, rbuf->col, NULL,
455: RF_REVENT_BUFREADY);
456: }
457: return (0);
458: }
459:
460:
461: /*
462: * Release a floating recon buffer for someone else to use.
463: * Assumes the rb_mutex is LOCKED at entry.
464: */
465: void
466: rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_RowCol_t row,
467: RF_ReconBuffer_t *rbuf)
468: {
469: RF_ReconCtrl_t *rcPtr = raidPtr->reconControl[row];
470: RF_CallbackDesc_t *cb;
471:
472: Dprintf2("RECON: releasing rbuf for psid %ld ru %d.\n",
473: (long) rbuf->parityStripeID, rbuf->which_ru);
474:
475: /*
476: * If anyone is waiting on buffers, wake one of them up. They will
477: * subsequently wake up anyone else waiting on their RU.
478: */
479: if (rcPtr->bufferWaitList) {
480: rbuf->next = rcPtr->committedRbufs;
481: rcPtr->committedRbufs = rbuf;
482: cb = rcPtr->bufferWaitList;
483: rcPtr->bufferWaitList = cb->next;
484: /* arg==1 => We've committed a buffer. */
485: rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 1,
486: RF_REVENT_BUFCLEAR);
487: rf_FreeCallbackDesc(cb);
488: raidPtr->procsInBufWait--;
489: } else {
490: rbuf->next = rcPtr->floatingRbufs;
491: rcPtr->floatingRbufs = rbuf;
492: }
493: }
494:
495:
496: /*
497: * Release any disk that is waiting on a buffer for the indicated RU.
498: * Assumes the rb_mutex is LOCKED at entry.
499: */
500: void
501: rf_ReleaseBufferWaiters(
502: RF_Raid_t *raidPtr,
503: RF_ReconParityStripeStatus_t *pssPtr
504: )
505: {
506: RF_CallbackDesc_t *cb1, *cb = pssPtr->bufWaitList;
507:
508: Dprintf2("RECON: releasing buf waiters for psid %ld ru %d.\n",
509: (long) pssPtr->parityStripeID, pssPtr->which_ru);
510: pssPtr->flags &= ~RF_PSS_BUFFERWAIT;
511: while (cb) {
512: cb1 = cb->next;
513: cb->next = NULL;
514: /* arg==0 => We haven't committed a buffer. */
515: rf_CauseReconEvent(raidPtr, cb->row, cb->col, (void *) 0,
516: RF_REVENT_BUFCLEAR);
517: rf_FreeCallbackDesc(cb);
518: cb = cb1;
519: }
520: pssPtr->bufWaitList = NULL;
521: }
522:
523:
524: /*
525: * When reconstruction is forced on an RU, there may be some disks waiting to
526: * acquire a buffer for that RU. Since we allocate a new buffer as part of
527: * the forced-reconstruction process, we no longer have to wait for any
528: * buffers, so we wakeup any waiter that we find in the bufferWaitList.
529: *
530: * Assumes the rb_mutex is LOCKED at entry.
531: */
532: void
533: rf_ReleaseBufferWaiter(RF_ReconCtrl_t *rcPtr, RF_ReconBuffer_t *rbuf)
534: {
535: RF_CallbackDesc_t *cb, *cbt;
536:
537: for (cbt = NULL, cb = rcPtr->bufferWaitList; cb;
538: cbt = cb, cb = cb->next) {
539: if ((cb->callbackArg.v == rbuf->parityStripeID) &&
540: (cb->callbackArg2.v == rbuf->which_ru)) {
541: Dprintf2("RECON: Dropping row %d col %d from buffer"
542: " wait list.\n", cb->row, cb->col);
543: if (cbt)
544: cbt->next = cb->next;
545: else
546: rcPtr->bufferWaitList = cb->next;
547:
548: /* arg==0 => No committed buffer. */
549: rf_CauseReconEvent((RF_Raid_t *) rbuf->raidPtr,
550: cb->row, cb->col, (void *) 0, RF_REVENT_BUFREADY);
551: rf_FreeCallbackDesc(cb);
552: return;
553: }
554: }
555: }
CVSweb