Annotation of sys/dev/raidframe/rf_pq.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: rf_pq.c,v 1.6 2002/12/16 07:01:04 tdeval Exp $ */
2: /* $NetBSD: rf_pq.c,v 1.7 2000/01/07 03:41:02 oster Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Carnegie-Mellon University.
6: * All rights reserved.
7: *
8: * Author: Daniel Stodolsky
9: *
10: * Permission to use, copy, modify and distribute this software and
11: * its documentation is hereby granted, provided that both the copyright
12: * notice and this permission notice appear in all copies of the
13: * software, derivative works or modified versions, and any portions
14: * thereof, and that both notices appear in supporting documentation.
15: *
16: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19: *
20: * Carnegie Mellon requests users of this software to return to
21: *
22: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
23: * School of Computer Science
24: * Carnegie Mellon University
25: * Pittsburgh PA 15213-3890
26: *
27: * any improvements or extensions that they make and grant Carnegie the
28: * rights to redistribute these changes.
29: */
30:
31: /*
32: * Code for RAID level 6 (P + Q) disk array architecture.
33: */
34:
35: #include "rf_archs.h"
36: #include "rf_types.h"
37: #include "rf_raid.h"
38: #include "rf_dag.h"
39: #include "rf_dagffrd.h"
40: #include "rf_dagffwr.h"
41: #include "rf_dagdegrd.h"
42: #include "rf_dagdegwr.h"
43: #include "rf_dagutils.h"
44: #include "rf_dagfuncs.h"
45: #include "rf_etimer.h"
46: #include "rf_pqdeg.h"
47: #include "rf_general.h"
48: #include "rf_map.h"
49: #include "rf_pq.h"
50:
51: RF_RedFuncs_t rf_pFuncs = {
52: rf_RegularONPFunc, "Regular Old-New P",
53: rf_SimpleONPFunc, "Simple Old-New P"
54: };
55: RF_RedFuncs_t rf_pRecoveryFuncs = {
56: rf_RecoveryPFunc, "Recovery P Func",
57: rf_RecoveryPFunc, "Recovery P Func"
58: };
59:
60: int
61: rf_RegularONPFunc(RF_DagNode_t *node)
62: {
63: return (rf_RegularXorFunc(node));
64: }
65:
66:
67: /*
68: * Same as simpleONQ func, but the coefficient is always 1.
69: */
70:
71: int
72: rf_SimpleONPFunc(RF_DagNode_t *node)
73: {
74: return (rf_SimpleXorFunc(node));
75: }
76:
77: int
78: rf_RecoveryPFunc(RF_DagNode_t *node)
79: {
80: return (rf_RecoveryXorFunc(node));
81: }
82:
83: int
84: rf_RegularPFunc(RF_DagNode_t *node)
85: {
86: return (rf_RegularXorFunc(node));
87: }
88:
89:
90: #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
91:
92: void rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
93: unsigned char coeff);
94: void rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
95: unsigned coeff);
96:
97: RF_RedFuncs_t rf_qFuncs = {
98: rf_RegularONQFunc, "Regular Old-New Q",
99: rf_SimpleONQFunc, "Simple Old-New Q"
100: };
101: RF_RedFuncs_t rf_qRecoveryFuncs = {
102: rf_RecoveryQFunc, "Recovery Q Func",
103: rf_RecoveryQFunc, "Recovery Q Func"
104: };
105: RF_RedFuncs_t rf_pqRecoveryFuncs = {
106: rf_RecoveryPQFunc, "Recovery PQ Func",
107: rf_RecoveryPQFunc, "Recovery PQ Func"
108: };
109:
110: void
111: rf_PQDagSelect(RF_Raid_t *raidPtr, RF_IoType_t type,
112: RF_AccessStripeMap_t *asmap, RF_VoidFuncPtr *createFunc)
113: {
114: RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
115: unsigned ndfail = asmap->numDataFailed;
116: unsigned npfail = asmap->numParityFailed;
117: unsigned ntfail = npfail + ndfail;
118:
119: RF_ASSERT(RF_IO_IS_R_OR_W(type));
120: if (ntfail > 2) {
121: RF_ERRORMSG("more than two disks failed in a single group !"
122: " Aborting I/O operation.\n");
123: /* *infoFunc = */ *createFunc = NULL;
124: return;
125: }
126: /* Ok, we can do this I/O. */
127: if (type == RF_IO_TYPE_READ) {
128: switch (ndfail) {
129: case 0:
130: /* Fault free read. */
131: *createFunc = (RF_VoidFuncPtr)
132: rf_CreateFaultFreeReadDAG; /* Same as raid 5. */
133: break;
134: case 1:
135: /* Lost a single data unit. */
136: /*
137: * Two cases:
138: * (1) Parity is not lost. Do a normal raid 5
139: * reconstruct read.
140: * (2) Parity is lost. Do a reconstruct read using "q".
141: */
142: if (ntfail == 2) { /* Also lost redundancy. */
143: if (asmap->failedPDAs[1]->type ==
144: RF_PDA_TYPE_PARITY)
145: *createFunc = (RF_VoidFuncPtr)
146: rf_PQ_110_CreateReadDAG;
147: else
148: *createFunc = (RF_VoidFuncPtr)
149: rf_PQ_101_CreateReadDAG;
150: } else {
151: /*
152: * P and Q are ok. But is there a failure in
153: * some unaccessed data unit ?
154: */
155: if (rf_NumFailedDataUnitsInStripe(raidPtr,
156: asmap) == 2)
157: *createFunc = (RF_VoidFuncPtr)
158: rf_PQ_200_CreateReadDAG;
159: else
160: *createFunc = (RF_VoidFuncPtr)
161: rf_PQ_100_CreateReadDAG;
162: }
163: break;
164: case 2:
165: /* Lost two data units. */
166: /* *infoFunc = rf_PQOneTwo; */
167: *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
168: break;
169: }
170: return;
171: }
172: /* A write. */
173: switch (ntfail) {
174: case 0: /* Fault free. */
175: if (rf_suppressLocksAndLargeWrites ||
176: (((asmap->numStripeUnitsAccessed <=
177: (layoutPtr->numDataCol / 2)) &&
178: (layoutPtr->numDataCol != 1)) ||
179: (asmap->parityInfo->next != NULL) ||
180: (asmap->qInfo->next != NULL) ||
181: rf_CheckStripeForFailures(raidPtr, asmap))) {
182:
183: *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
184: } else {
185: *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
186: }
187: break;
188:
189: case 1: /* Single disk fault. */
190: if (npfail == 1) {
191: RF_ASSERT((asmap->failedPDAs[0]->type ==
192: RF_PDA_TYPE_PARITY) ||
193: (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
194: if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {
195: /*
196: * Q died, treat like normal mode raid5 write.
197: */
198: if (((asmap->numStripeUnitsAccessed <=
199: (layoutPtr->numDataCol / 2)) ||
200: (asmap->numStripeUnitsAccessed == 1)) ||
201: rf_NumFailedDataUnitsInStripe(raidPtr,
202: asmap))
203: *createFunc = (RF_VoidFuncPtr)
204: rf_PQ_001_CreateSmallWriteDAG;
205: else
206: *createFunc = (RF_VoidFuncPtr)
207: rf_PQ_001_CreateLargeWriteDAG;
208: } else {/* Parity died, small write only updating Q. */
209: if (((asmap->numStripeUnitsAccessed <=
210: (layoutPtr->numDataCol / 2)) ||
211: (asmap->numStripeUnitsAccessed == 1)) ||
212: rf_NumFailedDataUnitsInStripe(raidPtr,
213: asmap))
214: *createFunc = (RF_VoidFuncPtr)
215: rf_PQ_010_CreateSmallWriteDAG;
216: else
217: *createFunc = (RF_VoidFuncPtr)
218: rf_PQ_010_CreateLargeWriteDAG;
219: }
220: } else { /*
221: * Data missing. Do a P reconstruct write if
222: * only a single data unit is lost in the
223: * stripe, otherwise a PQ reconstruct write.
224: */
225: if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
226: *createFunc = (RF_VoidFuncPtr)
227: rf_PQ_200_CreateWriteDAG;
228: else
229: *createFunc = (RF_VoidFuncPtr)
230: rf_PQ_100_CreateWriteDAG;
231: }
232: break;
233:
234: case 2: /* Two disk faults. */
235: switch (npfail) {
236: case 2: /* Both p and q dead. */
237: *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
238: break;
239: case 1: /* Either p or q and dead data. */
240: RF_ASSERT(asmap->failedPDAs[0]->type ==
241: RF_PDA_TYPE_DATA);
242: RF_ASSERT((asmap->failedPDAs[1]->type ==
243: RF_PDA_TYPE_PARITY) ||
244: (asmap->failedPDAs[1]->type ==
245: RF_PDA_TYPE_Q));
246: if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
247: *createFunc = (RF_VoidFuncPtr)
248: rf_PQ_101_CreateWriteDAG;
249: else
250: *createFunc = (RF_VoidFuncPtr)
251: rf_PQ_110_CreateWriteDAG;
252: break;
253: case 0: /* Double data loss. */
254: *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
255: break;
256: }
257: break;
258:
259: default: /* More than 2 disk faults. */
260: *createFunc = NULL;
261: RF_PANIC();
262: }
263: return;
264: }
265:
266:
267: /*
268: * Used as a stop gap info function.
269: */
270: #if 0
271: void
272: rf_PQOne(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
273: RF_AccessStripeMap_t *asmap)
274: {
275: *nSucc = *nAnte = 1;
276: }
277:
278: void
279: rf_PQOneTwo(RF_Raid_t *raidPtr, int *nSucc, int *nAnte,
280: RF_AccessStripeMap_t *asmap)
281: {
282: *nSucc = 1;
283: *nAnte = 2;
284: }
285: #endif
286:
287: RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
288: {
289: rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags,
290: allocList, 2, rf_RegularPQFunc, RF_FALSE);
291: }
292:
293: int
294: rf_RegularONQFunc(RF_DagNode_t *node)
295: {
296: int np = node->numParams;
297: int d;
298: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
299: int i;
300: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
301: RF_Etimer_t timer;
302: char *qbuf, *qpbuf;
303: char *obuf, *nbuf;
304: RF_PhysDiskAddr_t *old, *new;
305: unsigned long coeff;
306: unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
307:
308: RF_ETIMER_START(timer);
309:
310: d = (np - 3) / 4;
311: RF_ASSERT(4 * d + 3 == np);
312: qbuf = (char *) node->params[2 * d + 1].p; /* Q buffer. */
313: for (i = 0; i < d; i++) {
314: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
315: obuf = (char *) node->params[2 * i + 1].p;
316: new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
317: nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
318: RF_ASSERT(new->numSector == old->numSector);
319: RF_ASSERT(new->raidAddress == old->raidAddress);
320: /*
321: * The stripe unit within the stripe tells us the coefficient
322: * to use for the multiply.
323: */
324: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
325: new->raidAddress);
326: /*
327: * Compute the data unit offset within the column, then add
328: * one.
329: */
330: coeff = (coeff % raidPtr->Layout.numDataCol);
331: qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
332: old->startSector % secPerSU);
333: rf_QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
334: old->numSector), coeff);
335: }
336:
337: RF_ETIMER_STOP(timer);
338: RF_ETIMER_EVAL(timer);
339: tracerec->q_us += RF_ETIMER_VAL_US(timer);
340: rf_GenericWakeupFunc(node, 0); /*
341: * Call wake func explicitly since no
342: * I/O in this node.
343: */
344: return (0);
345: }
346:
347:
348: /*
349: * See the SimpleXORFunc for the difference between a simple and regular func.
350: * These Q functions should be used for
351: * new q = Q(data, old data, old q)
352: * style updates and not for
353: * q = (new data, new data, ...)
354: * computations.
355: *
356: * The simple q takes 2(2d+1)+1 params, where d is the number
357: * of stripes written. The order of params is
358: * old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ...
359: * old data pda_d, old data buffer_d
360: * [2d] old q pda_0, old q buffer
361: * [2d_2] new data pda_0, new data buffer_0, ...
362: * new data pda_d, new data buffer_d
363: * raidPtr
364: */
365:
366: int
367: rf_SimpleONQFunc(RF_DagNode_t *node)
368: {
369: int np = node->numParams;
370: int d;
371: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
372: int i;
373: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
374: RF_Etimer_t timer;
375: char *qbuf;
376: char *obuf, *nbuf;
377: RF_PhysDiskAddr_t *old, *new;
378: unsigned long coeff;
379:
380: RF_ETIMER_START(timer);
381:
382: d = (np - 3) / 4;
383: RF_ASSERT(4 * d + 3 == np);
384: qbuf = (char *) node->params[2 * d + 1].p; /* Q buffer. */
385: for (i = 0; i < d; i++) {
386: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
387: obuf = (char *) node->params[2 * i + 1].p;
388: new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
389: nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
390: RF_ASSERT(new->numSector == old->numSector);
391: RF_ASSERT(new->raidAddress == old->raidAddress);
392: /*
393: * The stripe unit within the stripe tells us the coefficient
394: * to use for the multiply.
395: */
396: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
397: new->raidAddress);
398: /*
399: * Compute the data unit offset within the column, then add
400: * one.
401: */
402: coeff = (coeff % raidPtr->Layout.numDataCol);
403: rf_QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr,
404: old->numSector), coeff);
405: }
406:
407: RF_ETIMER_STOP(timer);
408: RF_ETIMER_EVAL(timer);
409: tracerec->q_us += RF_ETIMER_VAL_US(timer);
410: rf_GenericWakeupFunc(node, 0); /*
411: * Call wake func explicitly since no
412: * I/O in this node.
413: */
414: return (0);
415: }
416:
417: RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
418: {
419: rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags,
420: allocList, &rf_pFuncs, &rf_qFuncs);
421: }
422:
423:
424: void rf_RegularQSubr(RF_DagNode_t *, char *);
425:
426: void
427: rf_RegularQSubr(RF_DagNode_t *node, char *qbuf)
428: {
429: int np = node->numParams;
430: int d;
431: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
432: unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
433: int i;
434: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
435: RF_Etimer_t timer;
436: char *obuf, *qpbuf;
437: RF_PhysDiskAddr_t *old;
438: unsigned long coeff;
439:
440: RF_ETIMER_START(timer);
441:
442: d = (np - 1) / 2;
443: RF_ASSERT(2 * d + 1 == np);
444: for (i = 0; i < d; i++) {
445: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
446: obuf = (char *) node->params[2 * i + 1].p;
447: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
448: old->raidAddress);
449: /*
450: * Compute the data unit offset within the column, then add
451: * one.
452: */
453: coeff = (coeff % raidPtr->Layout.numDataCol);
454: /*
455: * The input buffers may not all be aligned with the start of
456: * the stripe. So shift by their sector offset within the
457: * stripe unit.
458: */
459: qpbuf = qbuf + rf_RaidAddressToByte(raidPtr,
460: old->startSector % secPerSU);
461: rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
462: rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
463: }
464:
465: RF_ETIMER_STOP(timer);
466: RF_ETIMER_EVAL(timer);
467: tracerec->q_us += RF_ETIMER_VAL_US(timer);
468: }
469:
470:
471: /*
472: * Used in degraded writes.
473: */
474:
475: void rf_DegrQSubr(RF_DagNode_t *);
476:
477: void
478: rf_DegrQSubr(RF_DagNode_t *node)
479: {
480: int np = node->numParams;
481: int d;
482: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
483: unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
484: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
485: RF_Etimer_t timer;
486: char *qbuf = node->results[1];
487: char *obuf, *qpbuf;
488: RF_PhysDiskAddr_t *old;
489: unsigned long coeff;
490: unsigned fail_start;
491: int i, j;
492:
493: old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
494: fail_start = old->startSector % secPerSU;
495:
496: RF_ETIMER_START(timer);
497:
498: d = (np - 2) / 2;
499: RF_ASSERT(2 * d + 2 == np);
500: for (i = 0; i < d; i++) {
501: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
502: obuf = (char *) node->params[2 * i + 1].p;
503: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
504: old->raidAddress);
505: /*
506: * Compute the data unit offset within the column, then add
507: * one.
508: */
509: coeff = (coeff % raidPtr->Layout.numDataCol);
510: /*
511: * The input buffers may not all be aligned with the start of
512: * the stripe. So shift by their sector offset within the
513: * stripe unit.
514: */
515: j = old->startSector % secPerSU;
516: RF_ASSERT(j >= fail_start);
517: qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
518: rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
519: rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
520: }
521:
522: RF_ETIMER_STOP(timer);
523: RF_ETIMER_EVAL(timer);
524: tracerec->q_us += RF_ETIMER_VAL_US(timer);
525: }
526:
527:
528: /*
529: * Called by large write code to compute the new parity and the new q.
530: *
531: * Structure of the params:
532: *
533: * pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d (d = numDataCol)
534: * raidPtr
535: *
536: * For a total of 2d+1 arguments.
537: * The result buffers results[0], results[1] are the buffers for the p and q,
538: * respectively.
539: *
540: * We compute Q first, then compute P. The P calculation may try to reuse
541: * one of the input buffers for its output, so if we computed P first, we would
542: * corrupt the input for the q calculation.
543: */
544:
545: int
546: rf_RegularPQFunc(RF_DagNode_t *node)
547: {
548: rf_RegularQSubr(node, node->results[1]);
549: return (rf_RegularXorFunc(node)); /* Does the wakeup. */
550: }
551:
552: int
553: rf_RegularQFunc(RF_DagNode_t *node)
554: {
555: /* Almost ... adjust Qsubr args. */
556: rf_RegularQSubr(node, node->results[0]);
557: rf_GenericWakeupFunc(node, 0); /*
558: * Call wake func explicitly since no
559: * I/O in this node.
560: */
561: return (0);
562: }
563:
564:
565: /*
566: * Called by singly degraded write code to compute the new parity and
567: * the new q.
568: *
569: * Structure of the params:
570: *
571: * pda_0, buffer_0, pda_1 , buffer_1, ..., pda_d, buffer_d
572: * failedPDA raidPtr
573: *
574: * for a total of 2d+2 arguments.
575: * The result buffers results[0], results[1] are the buffers for the parity
576: * and q, respectively.
577: *
578: * We compute Q first, then compute parity. The parity calculation may try
579: * to reuse one of the input buffers for its output, so if we computed parity
580: * first, we would corrupt the input for the q calculation.
581: *
582: * We treat this identically to the regularPQ case, ignoring the failedPDA
583: * extra argument.
584: */
585:
586: void
587: rf_Degraded_100_PQFunc(RF_DagNode_t *node)
588: {
589: int np = node->numParams;
590:
591: RF_ASSERT(np >= 2);
592: rf_DegrQSubr(node);
593: rf_RecoveryXorFunc(node);
594: }
595:
596:
597: /*
598: * The two below are used when reading a stripe with a single lost data unit.
599: * The parameters are
600: *
601: * pda_0, buffer_0, ..., pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
602: *
603: * and results[0] contains the data buffer, which is originally zero-filled.
604: */
605:
606: /*
607: * This Q func is used by the degraded-mode dag functions to recover lost data.
608: * The second-to-last parameter is the PDA for the failed portion of the
609: * access. The code here looks at this PDA and assumes that the xor target
610: * buffer is equal in size to the number of sectors in the failed PDA. It then
611: * uses the other PDAs in the parameter list to determine where within the
612: * target buffer the corresponding data should be xored.
613: *
614: * Recall the basic equation is
615: *
616: * Q = (data_1 + 2 * data_2 ... + k * data_k) mod 256
617: *
618: * so to recover data_j we need
619: *
620: * J data_j = (Q - data_1 - 2 data_2 ... - k * data_k) mod 256
621: *
622: * So the coefficient for each buffer is (255 - data_col), and j should be
623: * initialized by copying Q into it. Then we need to do a table lookup to
624: * convert to solve
625: * data_j /= J
626: *
627: */
628:
629: int
630: rf_RecoveryQFunc(RF_DagNode_t *node)
631: {
632: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
633: RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
634: RF_PhysDiskAddr_t *failedPDA =
635: (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
636: int i;
637: RF_PhysDiskAddr_t *pda;
638: RF_RaidAddr_t suoffset;
639: RF_RaidAddr_t failedSUOffset =
640: rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
641: char *srcbuf, *destbuf;
642: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
643: RF_Etimer_t timer;
644: unsigned long coeff;
645:
646: RF_ETIMER_START(timer);
647: /* Start by copying Q into the buffer. */
648: bcopy(node->params[node->numParams - 3].p, node->results[0],
649: rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
650: for (i = 0; i < node->numParams - 4; i += 2) {
651: RF_ASSERT(node->params[i + 1].p != node->results[0]);
652: pda = (RF_PhysDiskAddr_t *) node->params[i].p;
653: srcbuf = (char *) node->params[i + 1].p;
654: suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
655: destbuf = ((char *) node->results[0]) +
656: rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
657: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
658: pda->raidAddress);
659: /* Compute the data unit offset within the column. */
660: coeff = (coeff % raidPtr->Layout.numDataCol);
661: rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf,
662: rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
663: }
664: /* Do the nasty inversion now. */
665: coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
666: failedPDA->startSector) % raidPtr->Layout.numDataCol);
667: rf_InvertQ(node->results[0], node->results[0],
668: rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
669: RF_ETIMER_STOP(timer);
670: RF_ETIMER_EVAL(timer);
671: tracerec->q_us += RF_ETIMER_VAL_US(timer);
672: rf_GenericWakeupFunc(node, 0);
673: return (0);
674: }
675:
676: int
677: rf_RecoveryPQFunc(RF_DagNode_t *node)
678: {
679: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
680: printf("raid%d: Recovery from PQ not implemented.\n", raidPtr->raidid);
681: return (1);
682: }
683:
684:
685: /*
686: * Degraded write Q subroutine.
687: * Used when P is dead.
688: * Large-write style Q computation.
689: * Parameters:
690: *
691: * (pda, buf), (pda, buf), ..., (failedPDA, bufPtr), failedPDA, raidPtr.
692: *
693: * We ignore failedPDA.
694: *
695: * This is a "simple style" recovery func.
696: */
697:
698: void
699: rf_PQ_DegradedWriteQFunc(RF_DagNode_t *node)
700: {
701: int np = node->numParams;
702: int d;
703: RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
704: unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
705: RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
706: RF_Etimer_t timer;
707: char *qbuf = node->results[0];
708: char *obuf, *qpbuf;
709: RF_PhysDiskAddr_t *old;
710: unsigned long coeff;
711: int fail_start, i, j;
712:
713: old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
714: fail_start = old->startSector % secPerSU;
715:
716: RF_ETIMER_START(timer);
717:
718: d = (np - 2) / 2;
719: RF_ASSERT(2 * d + 2 == np);
720:
721: for (i = 0; i < d; i++) {
722: old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
723: obuf = (char *) node->params[2 * i + 1].p;
724: coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout),
725: old->raidAddress);
726: /*
727: * Compute the data unit offset within the column, then add
728: * one.
729: */
730: coeff = (coeff % raidPtr->Layout.numDataCol);
731: j = old->startSector % secPerSU;
732: RF_ASSERT(j >= fail_start);
733: qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
734: rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf,
735: rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
736: }
737:
738: RF_ETIMER_STOP(timer);
739: RF_ETIMER_EVAL(timer);
740: tracerec->q_us += RF_ETIMER_VAL_US(timer);
741: rf_GenericWakeupFunc(node, 0);
742: }
743:
744:
745: /* Q computations. */
746:
747: /*
748: * Coeff - colummn;
749: *
750: * Compute dest ^= qfor[28-coeff][rn[coeff+1] a]
751: *
752: * On 5-bit basis;
753: * Length in bytes;
754: */
755:
756: void
757: rf_IncQ(unsigned long *dest, unsigned long *buf, unsigned length,
758: unsigned coeff)
759: {
760: unsigned long a, d, new;
761: unsigned long a1, a2;
762: unsigned int *q = &(rf_qfor[28 - coeff][0]);
763: unsigned r = rf_rn[coeff + 1];
764:
765: #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
766: #define INSERT(a,i) (a << (5L*i))
767:
768: length /= 8;
769: /* 13 5 bit quants in a 64 bit word. */
770: while (length) {
771: a = *buf++;
772: d = *dest;
773: a1 = EXTRACT(a, 0) ^ r;
774: a2 = EXTRACT(a, 1) ^ r;
775: new = INSERT(a2, 1) | a1;
776: a1 = EXTRACT(a, 2) ^ r;
777: a2 = EXTRACT(a, 3) ^ r;
778: a1 = q[a1];
779: a2 = q[a2];
780: new = new | INSERT(a1, 2) | INSERT(a2, 3);
781: a1 = EXTRACT(a, 4) ^ r;
782: a2 = EXTRACT(a, 5) ^ r;
783: a1 = q[a1];
784: a2 = q[a2];
785: new = new | INSERT(a1, 4) | INSERT(a2, 5);
786: a1 = EXTRACT(a, 5) ^ r;
787: a2 = EXTRACT(a, 6) ^ r;
788: a1 = q[a1];
789: a2 = q[a2];
790: new = new | INSERT(a1, 5) | INSERT(a2, 6);
791: #if RF_LONGSHIFT > 2
792: a1 = EXTRACT(a, 7) ^ r;
793: a2 = EXTRACT(a, 8) ^ r;
794: a1 = q[a1];
795: a2 = q[a2];
796: new = new | INSERT(a1, 7) | INSERT(a2, 8);
797: a1 = EXTRACT(a, 9) ^ r;
798: a2 = EXTRACT(a, 10) ^ r;
799: a1 = q[a1];
800: a2 = q[a2];
801: new = new | INSERT(a1, 9) | INSERT(a2, 10);
802: a1 = EXTRACT(a, 11) ^ r;
803: a2 = EXTRACT(a, 12) ^ r;
804: a1 = q[a1];
805: a2 = q[a2];
806: new = new | INSERT(a1, 11) | INSERT(a2, 12);
807: #endif /* RF_LONGSHIFT > 2 */
808: d ^= new;
809: *dest++ = d;
810: length--;
811: }
812: }
813:
814:
815: /*
816: * Compute.
817: *
818: * dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new)]
819: *
820: * On a five bit basis.
821: * Optimization: compute old ^ new on 64 bit basis.
822: *
823: * Length in bytes.
824: */
825:
826: void
827: rf_QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
828: unsigned char coeff)
829: {
830: unsigned long a, d, new;
831: unsigned long a1, a2;
832: unsigned int *q = &(rf_qfor[28 - coeff][0]);
833: unsigned int r = rf_rn[coeff + 1];
834:
835: r = a1 = a2 = new = d = a = 0; /* XXX For now... */
836: q = NULL; /* XXX For now */
837:
838: #ifdef _KERNEL
839: /*
840: * PQ in kernel currently not supported because the encoding/decoding
841: * table is not present.
842: */
843: bzero(dest, length);
844: #else /* _KERNEL */
845: /* This code probably doesn't work and should be rewritten. -wvcii */
846: /* 13 5 bit quants in a 64 bit word. */
847: length /= 8;
848: while (length) {
849: a = *obuf++; /*
850: * XXX Need to reorg to avoid cache conflicts.
851: */
852: a ^= *nbuf++;
853: d = *dest;
854: a1 = EXTRACT(a, 0) ^ r;
855: a2 = EXTRACT(a, 1) ^ r;
856: a1 = q[a1];
857: a2 = q[a2];
858: new = INSERT(a2, 1) | a1;
859: a1 = EXTRACT(a, 2) ^ r;
860: a2 = EXTRACT(a, 3) ^ r;
861: a1 = q[a1];
862: a2 = q[a2];
863: new = new | INSERT(a1, 2) | INSERT(a2, 3);
864: a1 = EXTRACT(a, 4) ^ r;
865: a2 = EXTRACT(a, 5) ^ r;
866: a1 = q[a1];
867: a2 = q[a2];
868: new = new | INSERT(a1, 4) | INSERT(a2, 5);
869: a1 = EXTRACT(a, 5) ^ r;
870: a2 = EXTRACT(a, 6) ^ r;
871: a1 = q[a1];
872: a2 = q[a2];
873: new = new | INSERT(a1, 5) | INSERT(a2, 6);
874: #if RF_LONGSHIFT > 2
875: a1 = EXTRACT(a, 7) ^ r;
876: a2 = EXTRACT(a, 8) ^ r;
877: a1 = q[a1];
878: a2 = q[a2];
879: new = new | INSERT(a1, 7) | INSERT(a2, 8);
880: a1 = EXTRACT(a, 9) ^ r;
881: a2 = EXTRACT(a, 10) ^ r;
882: a1 = q[a1];
883: a2 = q[a2];
884: new = new | INSERT(a1, 9) | INSERT(a2, 10);
885: a1 = EXTRACT(a, 11) ^ r;
886: a2 = EXTRACT(a, 12) ^ r;
887: a1 = q[a1];
888: a2 = q[a2];
889: new = new | INSERT(a1, 11) | INSERT(a2, 12);
890: #endif /* RF_LONGSHIFT > 2 */
891: d ^= new;
892: *dest++ = d;
893: length--;
894: }
895: #endif /* _KERNEL */
896: }
897:
898:
899: /*
900: * Recover columns a and b from the given p and q into
901: * bufs abuf and bbuf. All bufs are word aligned.
902: * Length is in bytes.
903: */
904:
905: /*
906: * XXX
907: *
908: * Everything about this seems wrong.
909: */
910:
911: void
912: rf_PQ_recover(unsigned long *pbuf, unsigned long *qbuf, unsigned long *abuf,
913: unsigned long *bbuf, unsigned length, unsigned coeff_a, unsigned coeff_b)
914: {
915: unsigned long p, q, a, a0, a1;
916: int col = (29 * coeff_a) + coeff_b;
917: unsigned char *q0 = &(rf_qinv[col][0]);
918:
919: length /= 8;
920: while (length) {
921: p = *pbuf++;
922: q = *qbuf++;
923: a0 = EXTRACT(p, 0);
924: a1 = EXTRACT(q, 0);
925: a = q0[a0 << 5 | a1];
926:
927: #define MF(i) \
928: do { \
929: a0 = EXTRACT(p, i); \
930: a1 = EXTRACT(q, i); \
931: a = a | INSERT(q0[a0<<5 | a1], i); \
932: } while (0)
933:
934: MF(1);
935: MF(2);
936: MF(3);
937: MF(4);
938: MF(5);
939: MF(6);
940: #if 0
941: MF(7);
942: MF(8);
943: MF(9);
944: MF(10);
945: MF(11);
946: MF(12);
947: #endif /* 0 */
948: *abuf++ = a;
949: *bbuf++ = a ^ p;
950: length--;
951: }
952: }
953:
954:
955: /*
956: * Lost parity and a data column. Recover that data column.
957: * Assume col coeff is lost. Let q the contents of Q after
958: * all surviving data columns have been q-xored out of it.
959: * Then we have the equation
960: *
961: * q[28-coeff][a_i ^ r_i+1] = q
962: *
963: * but q is cyclic with period 31.
964: * So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
965: * q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
966: *
967: * so a_i = r_{coeff+1} ^ q[3+coeff][q]
968: *
969: * The routine is passed q buffer and the buffer
970: * the data is to be recoverd into. They can be the same.
971: */
972:
973: void
974: rf_InvertQ(unsigned long *qbuf, unsigned long *abuf, unsigned length,
975: unsigned coeff)
976: {
977: unsigned long a, new;
978: unsigned long a1, a2;
979: unsigned int *q = &(rf_qfor[3 + coeff][0]);
980: unsigned r = rf_rn[coeff + 1];
981:
982: /* 13 5 bit quants in a 64 bit word. */
983: length /= 8;
984: while (length) {
985: a = *qbuf++;
986: a1 = EXTRACT(a, 0);
987: a2 = EXTRACT(a, 1);
988: a1 = r ^ q[a1];
989: a2 = r ^ q[a2];
990: new = INSERT(a2, 1) | a1;
991:
992: #define M(i,j) \
993: do { \
994: a1 = EXTRACT(a, i); \
995: a2 = EXTRACT(a, j); \
996: a1 = r ^ q[a1]; \
997: a2 = r ^ q[a2]; \
998: new = new | INSERT(a1, i) | INSERT(a2, j); \
999: } while (0)
1000:
1001: M(2, 3);
1002: M(4, 5);
1003: M(5, 6);
1004: #if RF_LONGSHIFT > 2
1005: M(7, 8);
1006: M(9, 10);
1007: M(11, 12);
1008: #endif /* RF_LONGSHIFT > 2 */
1009: *abuf++ = new;
1010: length--;
1011: }
1012: }
1013: #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) */
CVSweb