Annotation of sys/arch/m88k/m88k/m88100_fp.S, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: m88100_fp.S,v 1.4 2004/08/09 20:52:11 miod Exp $ */
2: /*
3: * Mach Operating System
4: * Copyright (c) 1991 Carnegie Mellon University
5: * Copyright (c) 1991 OMRON Corporation
6: * All Rights Reserved.
7: *
8: * Permission to use, copy, modify and distribute this software and its
9: * documentation is hereby granted, provided that both the copyright
10: * notice and this permission notice appear in all copies of the
11: * software, derivative works or modified versions, and any portions
12: * thereof, and that both notices appear in supporting documentation.
13: *
14: * CARNEGIE MELLON AND OMRON ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15: * CONDITION. CARNEGIE MELLON AND OMRON DISCLAIM ANY LIABILITY OF ANY KIND
16: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17: *
18: * Carnegie Mellon requests users of this software to return to
19: *
20: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21: * School of Computer Science
22: * Carnegie Mellon University
23: * Pittsburgh PA 15213-3890
24: *
25: * any improvements or extensions that they make and grant Carnegie the
26: * rights to redistribute these changes.
27: */
28:
29: /* Floating point trouble routines */
30: #include "assym.h"
31: #include <machine/trap.h>
32: #include <machine/asm.h>
33:
34: #define destsize 10
35:
36: /* Floating-Point Status Register bits */
37: #define inexact 0
38: #define overflow 1
39: #define underflow 2
40: #define divzero 3
41: #define oper 4
42:
43: #define sign 31
44: #define s1size 9
45: #define s2size 7
46: #define dsize 5
47:
48: #define FADDop 0x05
49: #define FSUBop 0x06
50: #define FCMPop 0x07
51: #define FMULop 0x00
52: #define FDIVop 0x0e
53: #define FSQRTop 0x0f
54: #define INTop 0x09
55: #define NINTop 0x0a
56: #define TRNCop 0x0b
57:
58: #define s1nan 7
59: #define s2nan 6
60: #define s1inf 5
61: #define s2inf 4
62: #define s1zero 3
63: #define s2zero 2
64: #define sigbit 19
65:
66: #define modehi 30
67: #define modelo 29
68: #define rndhi 15
69: #define rndlo 14
70: #define efunf 7
71: #define efovf 6
72: #define efinx 5
73:
74: ASENTRY(m88100_Xfp_precise)
75: or r29, r3, r0 /* r29 is now the E.F. */
76: subu r31, r31, 16
77: st r1, r31, 8
78: st r29, r31, 12
79:
80: ld r2, r29, EF_FPSR * 4
81: ld r3, r29, EF_FPCR * 4
82: ld r4, r29, EF_FPECR * 4
83: ld r5, r29, EF_FPHS1 * 4
84: ld r6, r29, EF_FPLS1 * 4
85: ld r7, r29, EF_FPHS2 * 4
86: ld r8, r29, EF_FPLS2 * 4
87: ld r9, r29, EF_FPPT * 4
88:
89:
90: /*
91: * Load into r1 the return address for the zero handlers. Looking at
92: * FPECR, branch to the appropriate zero handler. However, if none of
93: * the zero bits are enabled, then a floating point instruction was
94: * issued with the floating point unit disabled. This will cause an
95: * unimplemented opcode 0.
96: */
97:
98: or.u r1,r0,hi16(wrapup) /* load return address of function */
99: or r1,r1,lo16(wrapup)
100:
101: bb0 6,r4, 3f /* branch to FPunimp if bit set */
102: br FPuimp
103: 3:
104: bb0 7,r4, 4f /* branch to FPintover if bit set */
105: br FPintover
106: 4:
107: #if 0
108: bb0 5,r4, 5f /* branch to FPpriviol if bit set */
109: br FPpriviol
110: #endif
111: 5:
112: bb0 4,r4, 6f /* branch to FPresoper if bit set */
113: br FPresoper
114: 6:
115: bb0 3,r4, 7f /* branch to FPdivzero if bit set */
116: br FPdivzero
117: 7:
118: or.u r4, r4, 0xffff
119:
120: ASLOCAL(FPuimp)
121: subu r31,r31,16 /* allocate stack */
122: st r1,r31,0 /* save return address */
123: or r2,r0,T_FPEPFLT /* load trap type */
124: bsr.n _C_LABEL(m88100_trap)
125: or r3, r29, r0
126: ld r1,r31,0 /* recover return address */
127: addu r31,r31,16 /* deallocate stack */
128: jmp r1
129:
130: /*
131: * To write back the results to the user registers, disable exceptions
132: * and the floating point unit. Write FPSR and FPCR and load the SNIP
133: * and SFIP.
134: * r5 will contain the upper word of the result
135: * r6 will contain the lower word of the result
136: */
137:
138: ASLOCAL(wrapup)
139: tb1 0,r0,0 /* make sure all floating point operations */
140: /* have finished */
141: ldcr r10, cr1 /* load the PSR */
142: #if 0
143: set r10, r10, 1<PSR_FPU_DISABLE_BIT>
144: #endif
145: set r10, r10, 1<PSR_INTERRUPT_DISABLE_BIT>
146: stcr r10, cr1
147:
148: ld r1, r31, 8
149: ld r29, r31, 12
150: addu r31, r31, 16
151:
152: fstcr r2, FPSR /* write revised value of FPSR */
153: fstcr r3, FPCR /* write revised value of FPCR */
154:
155: /* result writeback routine */
156: addu r3, r29, EF_R0 * 4
157: extu r2, r9, 5<0> /* get 5 bits of destination register */
158: bb0 5, r9, writesingle /* branch if destination is single */
159:
160: /* writedouble here */
161: st r5, r3 [r2] /* write high word */
162: add r2, r2, 1 /* for double, the low word is the */
163: /* unspecified register */
164: clr r2, r2, 27<5> /* perform equivalent of mod 32 */
165: ASLOCAL(writesingle)
166: jmp.n r1
167: st r6, r3 [r2] /* write low word into memory */
168:
169: /*
170: * Check if the numerator is zero. If the numerator is zero, then handle
171: * this instruction as you would a 0/0 invalid operation.
172: */
173:
174: ASLOCAL(FPdivzero)
175: bb1.n s1size,r9,1f /* branch if numerator double */
176: st r1,r31,0 /* save return address */
177: /* single number */
178: clr r10,r5,1<sign> /* clear sign bit */
179: extu r11,r6,3<29> /* grab upper bits of lower word */
180: or r10,r10,r11 /* combine ones of mantissa */
181: bcnd eq0,r10,resoper /* numerator is zero, handle reserved operand */
182: br setbit /* set divzero bit */
183: 1:
184: /* double number */
185: clr r10,r5,1<sign> /* clear sign bit */
186: or r10,r10,r6 /* or high and low words */
187: bcnd ne0,r10,setbit /* set divzero bit */
188:
189: /*
190: * The numerator is zero, so handle the invalid operation by setting the
191: * invalid operation bit and writing a quiet NaN to the destination.
192: */
193:
194: ASLOCAL(resoper)
195: set r2,r2,1<oper>
196: set r5,r0,0<0> /* put a NaN in high word */
197: set r6,r0,0<0> /* put a NaN in low word */
198: br FP_div_return
199: /* writing to a word which may be ignored */
200: /* is just as quick as checking the precision */
201: /* of the destination */
202:
203: /*
204: * The operation is divide by zero, so set the divide by zero bit in the
205: * FPSR.
206: * Considering the sign of the numerator and zero, write a correctly
207: * signed infinity of the proper precision into the destination.
208: */
209:
210: setbit:
211: set r2,r2,1<divzero>
212: bb1 dsize,r9,FPzero_double /* branch to handle double result */
213: FPzero_single:
214: clr r10,r5,31<0> /* clear all of S1HI except sign bit */
215: xor r10,r7,r10 /* xor the sign bits of the operands */
216: or.u r6,r0,0x7f80 /* load single precision infinity */
217: br.n FP_div_return
218: or r6,r6,r10 /* load correctly signed infinity */
219:
220: FPzero_double:
221: clr r10,r5,31<0> /* clear all of S1HI except sign bit */
222: xor r10,r7,r10 /* xor the sign bits of the operands */
223: or.u r5,r0,0x7ff0 /* load double precision infinity */
224: or r5,r5,r10 /* load correctly signed infinity */
225: or r6,r0,r0 /* clear lower word of double */
226:
227: FP_div_return:
228: ld r1,r31,0 /* load return address */
229: jmp r1
230:
231: /*
232: * Both NINT and TRNC require a certain rounding mode, so check which
233: * instruction caused the integer conversion overflow. Use a substitute
234: * FPCR in r1, and modify the rounding mode if the instruction is NINT
235: * or TRNC.
236: */
237: ASLOCAL(FPintover)
238: extu r10,r9,5<11> /* extract opcode */
239: cmp r11,r10,INTop /* see if instruction is INT */
240: st r1,r31,0 /* save return address */
241: bb1.n eq,r11,checksize /* instruction is INT, do not modify */
242: /* rounding mode */
243: or r1,r0,r3 /* load FPCR into r1 */
244: cmp r11,r10,NINTop /* see if instruction is NINT */
245: bb1 eq,r11,NINT /* instruction is NINT */
246: TRNC:
247: clr r1,r1,2<rndlo> /* clear rounding mode bits, */
248: /* instruction is TRNC */
249: br.n checksize /* branch to check size */
250: set r1,r1,1<rndlo> /* make rounding mode round towards */
251: /* zero */
252: NINT:
253: clr r1,r1,2<rndlo> /* make rounding mode round to */
254: /* nearest */
255:
256: /* See whether the source is single or double precision. */
257:
258: checksize:
259: bb1 s2size,r9,checkdoub /* S2 is double, branch to see if */
260: /* there is a false alarm */
261:
262: /*
263: * An integer has more bits than the mantissa of a single precision floating
264: * point number, so to check for false alarms (i.e. valid conversion), simply
265: * check the exponents. False alarms are detected for 2**30 to (2**30) - 1
266: * and -2**30 to -2**31. Only seven bits need to be looked at since an
267: * exception will not occur for the other half of the numbering system.
268: * To speed up the processing, first check to see if the exponent is 32 or
269: * greater.
270: *
271: * This code was originally written for the exponent in the control
272: * register to have the most significant bit (8 - single, 11 - double)
273: * flipped and sign extended. For precise exceptions, however, the most
274: * significant bit is only sign extended. Therefore, the code was chopped
275: * up so that it would work for positive values of real exponent which were
276: * only sign extended.
277: */
278:
279: checksing:
280: extu r10,r7,7<20> /* internal representation for single */
281: /* precision is IEEE 8 bits sign extended */
282: /* to 11 bits; for real exp. = 30, the */
283: /* above instruction gives a result exp. */
284: /* that has the MSB flipped and sign */
285: /* extended like in the IMPCR */
286: cmp r11,r10,31 /* compare to 32,but exp. off by 1 */
287: /* these 2 instructions to speed up valid */
288: /* execution of valid cases */
289: bb1 ge,r11,overflw /* valid case, perform overflow routine */
290: bb1 sign,r7,checksingn /* source operand is negative */
291:
292: /*
293: * If the number is positve and the exponent is greater than 30, than it is
294: * overflow.
295: */
296: checksingp:
297: cmp r10,r10,29 /* compare to 30, but exp. off by 1 */
298: bb1 gt,r10,overflw /* no false alarm, its overflow */
299: br conversionsp /* finish single precision conversion */
300:
301: /*
302: * If the number is negative, and the exponent is 30, or 31 with a mantissa
303: * of 0, then it is a false alarm.
304: */
305: checksingn:
306: cmp r11,r10,30 /* compare to 31,but exp. off by 1 */
307: bb1 lt,r11,conversionsn /* exp. less than 31, so convert */
308: extu r10,r8,3<29> /* get upper three bits of lower */
309: /* mantissa */
310: mak r12,r7,20<3> /* get upper 20 bits of mantissa */
311: or r10,r10,r12 /* form complete mantissa */
312: bcnd eq0,r10,conversionsn /* complete conversion if mantissa */
313: /* is 0 */
314: br overflw /* no false alarm, its overflow */
315:
316: /*
317: * False alarms are detected for 2**30 to (2**30) - 1 and -2**30 to -2**31.
318: * Only seven bits need to be looked at since an exception will not occur
319: * for the other half of the numbering system.
320: * To speed up the processing, first check to see if the exponent is 32 or
321: * greater. Since there are more mantissa bits than integer bits, rounding
322: * could cause overflow. (2**31) - 1 needs to be checked so that it does
323: * not round to 2**31, and -2**31 needs to be checked in case it rounds to
324: * -((2**31) + 1).
325: */
326: checkdoub:
327: extu r10,r7,10<20> /* internal representation for double */
328: /* precision is the same IEEE 11 bits */
329: /* for real exp. = 30, the */
330: /* above instruction gives a result exp. */
331: /* that has the MSB flipped and sign */
332: /* extended like in the IMPCR */
333: cmp r11,r10,31 /* compare to 32,but exp. off by 1 */
334: /* these 2 instructions to speed up valid */
335: /* execution of valid cases */
336: bb1 ge,r11,overflw /* valid case, perform overflow routine */
337: bb1 sign,r7,checkdoubn /* source operand is negative */
338:
339: /*
340: * If the exponent is not 31, then the floating point number will be rounded
341: * before the conversion is done. A branch table is set up with bits 4 and 3
342: * being the rounding mode, and bits 2, 1, and 0 are the guard, round, and
343: * sticky bits.
344: */
345: checkdoubp:
346: cmp r11,r10,30 /* compare to 31, but exponent off by 1 */
347: bb1 eq,r11,overflw /* no false alarm, its overflow */
348: extu r12,r8,1<22> /* get LSB for integer with exp. = 30 */
349: mak r12,r12,1<2> /* start to set up field for branch table */
350: extu r11,r8,1<21> /* get guard bit */
351: mak r11,r11,1<1> /* set up field for branch table */
352: or r12,r11,r12 /* set up field for branch table */
353: extu r11,r8,21<0> /* get bits for sticky bit */
354: bcnd eq0,r11,nostickyp /* do not set sticky */
355: set r12,r12,1<0> /* set sticky bit */
356: nostickyp:
357: rot r11,r1,0<rndlo> /* shift rounding mode to 2 LSB''s */
358: mak r11,r11,2<3> /* set up field, clear other bits */
359: or r12,r11,r12 /* set up field for branch table */
360: lda r12,r0[r12] /* scale r12 */
361: or.u r12,r12,hi16(ptable) /* load pointer into table */
362: addu r12,r12,lo16(ptable)
363: jmp r12
364:
365: ptable:
366: br conversiondp
367: br conversiondp
368: br conversiondp
369: br paddone
370: br conversiondp
371: br conversiondp
372: br paddone
373: br paddone
374: br conversiondp
375: br conversiondp
376: br conversiondp
377: br conversiondp
378: br conversiondp
379: br conversiondp
380: br conversiondp
381: br conversiondp
382: br conversiondp
383: br conversiondp
384: br conversiondp
385: br conversiondp
386: br conversiondp
387: br conversiondp
388: br conversiondp
389: br conversiondp
390: br conversiondp
391: br paddone
392: br paddone
393: br paddone
394: br conversiondp
395: br paddone
396: br paddone
397: br paddone
398:
399: /*
400: * Add one to the bit of the mantissa which corresponds to the LSB of an
401: * integer. If the mantissa overflows, then there is a valid integer
402: * overflow conversion; otherwise, the mantissa can be converted to the
403: * integer.
404: */
405: paddone:
406: or r10,r0,r0 /* clear r10 */
407: set r10,r10,1<22> /* set LSB bit to 1 for adding */
408: addu.co r8,r8,r10 /* add the 1 obtained from rounding */
409: clr r11,r7,12<20> /* clear exponent and sign */
410: addu.ci r11,r0,r11 /* add carry */
411: bb1 20,r11,overflw /* overflow to 2**31, abort the rest */
412: br.n conversiondp /* since the exp. was 30, and the exp. */
413: /* did not round up to 31, the largest */
414: /* number that S2 could become is 2**31-1 */
415: or r7,r0,r11 /* store r11 into r7 for conversion */
416:
417: /*
418: * Now check for negative double precision sources. If the exponent is 30,
419: * then convert the false alarm. If the exponent is 31, then check the
420: * mantissa bits which correspond to integer bits. If any of them are a one,
421: * then there is overflow. If they are zero, then check the guard, round,
422: * and sticky bits.
423: * Round toward zero and positive will not cause a roundup, but round toward
424: * nearest and negative may, so perform those roundings. If there is no
425: * overflow, then convert and return.
426: */
427: checkdoubn:
428: cmp r11,r10,29 /* compare to 30, but exp. off by 1 */
429: bb1 eq,r11,conversiondn /* false alarm if exp. = 30 */
430: extu r10,r8,11<21> /* check upper bits of lower mantissa */
431: bcnd ne0,r10,overflw /* one of the bits is a 1, so oflow */
432: extu r10,r7,20<0> /* check upper bits of upper mantissa */
433: bcnd ne0,r10,overflw /* one of the bits is a 1, so oflow */
434: bb0 rndlo,r1,possround /* rounding mode is either round near */
435: /* or round negative, which may cause */
436: /* a round */
437: br.n FPintov_return /* round positive, which will not */
438: /* cause a round */
439: set r6,r0,1<sign>
440: possround:
441: extu r12,r8,1<20> /* get guard bit */
442: extu r11,r8,20<0> /* get bits for sticky bit */
443: bcnd.n eq0,r11,nostickyn /* do not set sticky */
444: mak r12,r12,1<1> /* set up field for branch table */
445: set r12,r12,1<0> /* set sticky bit */
446: nostickyn:
447: bb1 rndhi,r1,negative /* rounding mode is negative */
448: nearest:
449: cmp r12,r12,3 /* are both guard and sticky set */
450: bb1 eq,r12,overflw /* both guard and sticky are set, */
451: /* so signal overflow */
452: or r6,r0,r0 /* clear destination register r6 */
453: br.n FPintov_return
454: set r6,r6,1<sign> /* set the sign bit and take care of */
455: /* this special case */
456: negative:
457: bcnd ne0,r12,overflw /* -2**31 will be rounded to */
458: /* -(2**31+1), so signal overflow */
459: or r6,r0,r0 /* clear destination register r6 */
460: br.n FPintov_return
461: set r6,r6,1<sign> /* set the sign bit and take care of */
462: /* this special case */
463:
464: /*
465: * Since the exp. was 30, and there was no round-up, the largest
466: * number that S2 could have been was 2**31 - 1
467: */
468:
469:
470: /* Convert the single precision positive floating point number. */
471:
472: conversionsp:
473: extu r6,r8,3<29> /* extract lower bits of integer */
474: mak r6,r6,3<7> /* shift left to correct place in integer */
475: mak r10,r7,20<10> /* shift left upper bits of integer */
476: or r6,r6,r10 /* form most of integer */
477: br.n FPintov_return
478: set r6,r6,1<30> /* set hidden one */
479:
480: /* Convert the single precision negative floating point number. */
481:
482: conversionsn:
483: bb1 eq,r11,exp31s /* use old r11 to see if exp. is 31 */
484: extu r6,r8,3<29> /* extract lower bits of mantissa */
485: mak r6,r6,3<7> /* shift left to correct place in integer */
486: mak r10,r7,20<10> /* shift left upper bits of integer */
487: or r6,r6,r10 /* form most of integer */
488: set r6,r6,1<30> /* set hidden one */
489: or.c r6,r0,r6 /* negate result */
490: br.n FPintov_return
491: addu r6,r6,1 /* add 1 to get 2''s complement */
492: exp31s:
493: or r6,r0,r0 /* clear r6 */
494: br.n FPintov_return
495: set r6,r6,1<sign> /* set sign bit */
496:
497: /* Convert the double precision positive floating point number. */
498:
499: conversiondp:
500: extu r6,r8,10<22> /* extract lower bits of integer */
501: mak r10,r7,20<10> /* shift left upper bits of integer */
502: or r6,r6,r10 /* form most of integer */
503: br.n FPintov_return
504: set r6,r6,1<30> /* set hidden one */
505:
506: /*
507: * Convert the double precision negative floating point number.
508: * The number, whose exponent is 30, must be rounded before converting.
509: * Bits 4 and 3 are the rounding mode, and bits 2, 1, and 0 are the
510: * guard, round, and sticky bits for the branch table.
511: */
512:
513: conversiondn:
514: extu r12,r8,1<22> /* get LSB for integer with exp. = 30 */
515: mak r12,r12,1<2> /* start to set up field for branch table */
516: extu r11,r8,1<21> /* get guard bit */
517: mak r11,r11,1<1> /* set up field for branch table */
518: or r12,r11,r12 /* set up field for branch table */
519: extu r11,r8,21<0> /* get bits for sticky bit */
520: bcnd eq0,r11,nostkyn /* do not set sticky */
521: set r12,r12,1<0> /* set sticky bit */
522: nostkyn:
523: rot r11,r1,0<rndlo> /* shift rounding mode to 2 LSB''s */
524: mak r11,r11,2<3> /* set up field, clear other bits */
525: or r12,r11,r12 /* set up field for branch table */
526: lda r12,r0[r12] /* scale r12 */
527: or.u r12,r12,hi16(ntable) /* load pointer into table */
528: addu r12,r12,lo16(ntable)
529: jmp r12
530:
531: ntable:
532: br nnoaddone
533: br nnoaddone
534: br nnoaddone
535: br naddone
536: br nnoaddone
537: br nnoaddone
538: br naddone
539: br naddone
540: br nnoaddone
541: br nnoaddone
542: br nnoaddone
543: br nnoaddone
544: br nnoaddone
545: br nnoaddone
546: br nnoaddone
547: br nnoaddone
548: br nnoaddone
549: br naddone
550: br naddone
551: br naddone
552: br nnoaddone
553: br naddone
554: br naddone
555: br naddone
556: br nnoaddone
557: br nnoaddone
558: br nnoaddone
559: br nnoaddone
560: br nnoaddone
561: br nnoaddone
562: br nnoaddone
563: br nnoaddone
564:
565: /*
566: * Add one to the mantissa, and check to see if it overflows to -2**31.
567: * The conversion is done in nnoaddone.
568: */
569:
570: naddone:
571: or r10,r0,r0 /* clear r10 */
572: set r10,r10,1<22> /* set LSB bit to 1 for adding */
573: add.co r8,r8,r10 /* add the 1 obtained from rounding */
574: clr r7,r7,12<20> /* clear exponent and sign */
575: add.ci r7,r0,r7 /* add carry */
576: bb1 20,r7,maxneg /* rounded to -2**31,handle separately */
577: /* the exponent was originally 30 */
578: nnoaddone:
579: extu r6,r8,11<22> /* extract lower bits of integer */
580: mak r10,r7,20<10> /* shift left upper bits of integer */
581: or r6,r6,r10 /* form most of integer */
582: set r6,r6,1<30> /* set hidden one */
583: or.c r6,r0,r6 /* negate integer */
584: br.n FPintov_return
585: addu r6,r6,1 /* add 1 to get 2''s complement */
586:
587: maxneg:
588: or r6,r0,r0 /* clear integer */
589: br.n FPintov_return
590: set r6,r6,1<sign> /* set sign bit */
591:
592: /* For valid overflows, write the correctly signed largest integer. */
593: overflw:
594: set r2,r2,1<oper>
595: bb0.n sign,r7,FPintov_return /* if positive then return */
596: set r6,r6,31<0> /* set result to largest positive int */
597: or.c r6,r0,r6 /* negate r6, giving largest negative */
598: /* integer */
599:
600: FPintov_return:
601: ld r1,r31,0 /* load return address from memory */
602: jmp r1
603:
604: /*
605: * Some instructions only have the S2 operations, so clear S1HI and S1LO
606: * for those instructions so that the previous contents of S1HI and S1LO
607: * do not influence this instruction.
608: */
609:
610: ASLOCAL(FPresoper)
611: st r1, r31, 0
612: extu r10,r9,5<11> /* extract opcode */
613: #if 0
614: cmp r11,r10,FSQRTop /* compare to FSQRT */
615: bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */
616: #endif
617: cmp r11,r10,INTop /* compare to INT */
618: bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */
619: cmp r11,r10,NINTop /* compare to NINT */
620: bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */
621: cmp r11,r10,TRNCop /* compare to TRNC */
622: bb0 eq,r11,opercheck /* check for reserved operands */
623:
624: ASLOCAL(S1clear)
625: or r5,r0,r0 /* clear any NaN''s, denorms, or infinities */
626: or r6,r0,r0 /* that may be left in S1HI,S1LO from a */
627: /* previous instruction */
628:
629: /*
630: * r12 contains the following flags:
631: * bit 9 -- s1sign
632: * bit 8 -- s2sign
633: * bit 7 -- s1nan
634: * bit 6 -- s2nan
635: * bit 5 -- s1inf
636: * bit 4 -- s2inf
637: * bit 3 -- s1zero
638: * bit 2 -- s2zero
639: * bit 1 -- s1denorm
640: * bit 0 -- s2denorm
641: */
642:
643: /*
644: * Using code for both single and double precision, check if S1 is either
645: * a NaN or infinity and set the appropriate flags in r12. Then check if
646: * S2 is a NaN or infinity. If it is a NaN, then branch to the NaN routine.
647: */
648:
649: ASLOCAL(opercheck)
650: extu r10,r5,11<20> /* internal representation for double */
651: bb1.n s1size,r9,S1NaNdoub /* S1 is double precision */
652: or r12,r0,r0 /* clear operand flag register */
653: ASLOCAL(S1NaNsing)
654: xor r10,r10,0x0080 /* internal representation for single */
655: ext r10,r10,8<0> /* precision is IEEE 8 bits sign extended */
656: /* to 11 bits; for real exp. > 0, the */
657: /* above instructions gives a result exp. */
658: /* that has the MSB flipped and sign */
659: /* extended like in the IMPCR */
660: cmp r11,r10,127 /* Is exponent equal to IEEE 255 (here 127) */
661: bb1 ne,r11,S2NaN /* source 1 is not a NaN or infinity */
662: mak r10,r5,20<0> /* load r10 with upper bits of S1 mantissa */
663: extu r11,r6,3<29> /* get 3 upper bits of lower word */
664: or r11,r10,r11 /* combine any existing 1 */
665: bcnd eq0,r11,noS1NaNs /* since r11 can only hold 0 or a */
666: /* > 0 number, branch to noS1NaN when eq0 */
667: br.n S2NaN /* see if S2 has a NaN */
668: set r12,r12,1<s1nan> /* indicate that S1 has a NaN */
669: ASLOCAL(noS1NaNs)
670: br.n S2NaN /* check contents of S2 */
671: set r12,r0,1<s1inf> /* indicate that S1 has an infinity */
672:
673: ASLOCAL(S1NaNdoub)
674: xor r10,r10,0x0400 /* precision is the same IEEE 11 bits */
675: /* The above instructions gives a result exp. */
676: /* that has the MSB flipped and sign */
677: /* extended like in the IMPCR */
678: cmp r11,r10,1023 /* Is exp. equal to IEEE 2047 (internal 1023) */
679: bb1 ne,r11,S2NaN /* source 1 is not a NaN or infinity */
680: mak r10,r5,20<0> /* load r10 with upper bits of S1 mantissa */
681: or r11,r6,r10 /* combine existing 1''s of mantissa */
682: bcnd eq0,r11,noS1NaNd /* since r11 can only hold 0 or a > 0 */
683: /* number, branch to noS1NaN when eq0 */
684: br.n S2NaN /* see if S2 has a NaN */
685: set r12,r12,1<s1nan> /* indicate that S1 has a NaN */
686: ASLOCAL(noS1NaNd)
687: set r12,r0,1<s1inf> /* indicate that S1 has an infinity */
688:
689: ASLOCAL(S2NaN)
690: bb1.n s2size,r9,S2NaNdoub /* S1 is double precision */
691: extu r10,r7,11<20> /* internal representation for double */
692: ASLOCAL(S2NaNsing)
693: xor r10,r10,0x0080 /* internal representation for single */
694: ext r10,r10,8<0> /* precision is IEEE 8 bits sign extended */
695: /* to 11 bits; for real exp. > 0, the */
696: /* above instruction gives a result exp. */
697: /* that has the MSB flipped and sign */
698: /* extended like in the IMPCR */
699: cmp r11,r10,127 /* Is exponent equal to IEEE 255 (here 127) */
700: bb1 ne,r11,inf /* source 2 is not a NaN or infinity */
701: mak r10,r7,20<0> /* load r10 with upper bits of S1 mantissa */
702: extu r11,r8,3<29> /* get 3 upper bits of lower word */
703: or r11,r10,r11 /* combine any existing 1''s */
704: bcnd eq0,r11,noS2NaNs /* since r11 can only hold 0 or a > 0 */
705: /* number, branch to noS2NaNs when eq0 */
706: br.n _ASM_LABEL(NaN) /* branch to NaN routine */
707: set r12,r12,1<s2nan> /* indicate that s2 has a NaN */
708: ASLOCAL(noS2NaNs)
709: bb0 s1nan,r12, 1f /* branch to NaN if S1 is a NaN */
710: br _ASM_LABEL(NaN)
711: 1:
712: br.n _ASM_LABEL(infinity) /* If S1 had a NaN we would have */
713: /* already branched, and S2 does not have a */
714: /* NaN, but it does have an infinity, so */
715: /* branch to handle the finity */
716: set r12,r12,1<s2inf> /* indicate that S2 has an infinity */
717:
718: ASLOCAL(S2NaNdoub)
719: xor r10,r10,0x0400 /* precision is the same IEEE 11 bits */
720: /* The above instruction gives a result exp. */
721: /* that has the MSB flipped and sign */
722: /* extended like in the IMPCR */
723: cmp r11,r10,1023 /* Is exp. equal to IEEE 2047 (internal 1023) */
724: bb1 ne,r11,inf /* source 2 is not a NaN or infinity */
725: mak r10,r7,20<0> /* load r10 with upper bits of S2 mantissa */
726: or r11,r8,r10 /* combine existing 1''s of mantissa */
727: bcnd eq0,r11,noS2NaNd /* since r11 can only hold 0 or a > 0 */
728: /* number, branch to noS2NaNd when eq0 */
729: br.n _ASM_LABEL(NaN) /* branch to NaN routine */
730: set r12,r12,1<s2nan> /* indicate that s2 has a NaN */
731: ASLOCAL(noS2NaNd)
732: bb0 s1nan,r12,1f /* branch to NaN if S1 is a NaN */
733: br _ASM_LABEL(NaN)
734: 1:
735: br.n _ASM_LABEL(infinity) /* If S1 had a NaN we would have */
736: /* already branched, and S2 does not have a */
737: /* NaN, but it does have an infinity, so */
738: /* branch to handle the finity */
739: set r12,r12,1<s2inf> /* indicate that S2 has an infinity */
740:
741: /*
742: * If S2 was a NaN, the routine would have already branched to NaN. If S1
743: * is a NaN, then branch to NaN. If S1 is not a NaN and S2 is infinity, then
744: * we would have already branched to infinity. If S1 is infinity, then branch.
745: * If the routine still has not branched, then branch to denorm, the only
746: * reserved operand left.
747: */
748:
749: ASLOCAL(inf)
750: bb0 s1nan,r12,1f /* branch if S1 has a NaN and S2 does not */
751: br _ASM_LABEL(NaN)
752: 1:
753: bb0 s1inf,r12,2f /* Neither S1 or S2 has a NaN, and we would */
754: /* have branched already if S2 had an */
755: /* infinity, so branch if S1 is infinity */
756: br _ASM_LABEL(infinity)
757: 2:
758: br _ASM_LABEL(denorm) /* branch to denorm, the only */
759: /* remaining alternative */
760:
761: /*
762: * Branch to the routine to make a denormalized number.
763: */
764: ASLOCAL(FPunderflow)
765: st r1,r31,0 /* save return address */
766: set r2,r2,1<underflow>
767: set r2,r2,1<inexact>
768:
769: /*
770: * Now the floating point number, which has an exponent smaller than what
771: * IEEE allows, must be denormalized. Denormalization is done by calculating
772: * the difference between a denormalized exponent and an underflow exponent
773: * and shifting the mantissa by that amount. A one may need to be subtracted
774: * from the LSB if a one was added during rounding.
775: * r9 is used to contain the guard, round, sticky, and an inaccuracy bit in
776: * case some bits were shifted off the mantissa during denormalization.
777: * r9 will contain:
778: * bit 4 -- new addone if one added during rounding after denormalization
779: * bit 3 -- inaccuracy flag caused by denormalization or pre-denormalization
780: * inexactness
781: * bit 2 -- guard bit of result
782: * bit 1 -- round bit of result
783: * bit 0 -- sticky bit of result
784: */
785:
786: FPU_denorm:
787: bb1.n destsize,r12,Udouble /* denorm for double */
788: extu r9,r10,3<26> /* load r9 with grs */
789: Usingle:
790: mak r5,r10,21<3> /* extract high 21 bits of mantissa */
791: extu r6,r11,3<29> /* extract low 3 bits of mantissa */
792: or r11,r5,r6 /* form 24 bits of mantissa */
793:
794: /* See if the addone bit is set and unround if it is. */
795: bb0.n 25,r10,nounrounds /* do not unround if addone bit clear */
796: extu r6,r12,12<20> /* extract signed exponent from IMPCR */
797: unrounds:
798: subu r11,r11,1 /* subtract 1 from mantissa */
799:
800: /*
801: * If the hidden bit is cleared after subtracting the one, then the one added
802: * during the rounding must have propagated through the mantissa. The exponent
803: * will need to be decremented.
804: */
805: bb1 23,r11,nounrounds /* if hidden bit is set,then exponent */
806: /* does not need to be decremented */
807: decexps:
808: sub r6,r6,1 /* decrement exponent 1 */
809: set r11,r11,1<23> /* set the hidden bit */
810:
811: /*
812: * For both single and double precision, there are cases where it is easier
813: * and quicker to make a special case. Examples of this are if the shift
814: * amount is only 1 or 2, or all the mantissa is shifted off, or all the
815: * mantissa is shifted off and it is still shifting, or, in the case of
816: * doubles, if the shift amount is around the boundary of MANTLO and MANTHI.
817: */
818:
819: nounrounds:
820: or r8,r0,lo16(0x00000f81) /* load r8 with -127 in decimal */
821: /* for lowest 12 bits */
822: sub r7,r8,r6 /* find difference between two exponents, */
823: /* this amount is the shift amount */
824: cmp r6,r7,3 /* check to see if r7 contains 3 or more */
825: bb1 ge,r6,threesing /* br to code that handles shifts of >=3 */
826: cmp r6,r7,2 /* check to see if r7 contains 2 */
827: bb1 eq,r6,twosing /* br to code that handles shifts of 2 */
828: one:
829: rot r9,r9,0<1> /* rotate roundoff register once, this places */
830: /* guard in round and round in sticky */
831: bb0 31,r9,nosticky1s /* do not or round and sticky if sticky is */
832: /* 0, this lost bit will be cleared later */
833: set r9,r9,1<0> /* or round and sticky */
834: nosticky1s:
835: bb0 0,r11,guardclr1s /* do not set guard bit if LSB = 0 */
836: set r9,r9,1<2> /* set guard bit */
837: guardclr1s:
838: extu r11,r11,31<1> /* shift mantissa right 1 */
839: br.n round /* round result */
840: mak r9,r9,3<0> /* clear bits lost during rotation */
841:
842: twosing:
843: rot r9,r9,0<2> /* rotate roundff register twice, this places */
844: /* guard in sticky */
845: bb0 30,r9,nosticky2s /* do not or guard and sticky if stick is 0 */
846: /* this lost bit will be cleared later */
847: br.n noround2s /* skip or old guard and old round if old */
848: /* sticky set */
849: set r9,r9,1<0> /* or guard and sticky */
850: nosticky2s:
851: bb0 31,r9,noround2s /* do not or guard and round if round is 0 */
852: /* this lost bit will be cleared later */
853: set r9,r9,1<0> /* or guard and round */
854: noround2s:
855: bb0 0,r11,roundclr2s /* do not set round bit if LSB = 0 */
856: set r9,r9,1<1> /* set round bit */
857: roundclr2s:
858: bb0 1,r11,guardclr2s /* do not set guard bit if LSB + 1 = 0 */
859: set r9,r9,1<2> /* set guard bit */
860: guardclr2s:
861: extu r11,r11,30<2> /* shift mantissa right 2 */
862: br.n round /* round result */
863: mak r9,r9,3<0> /* clear bits lost during rotation */
864:
865: threesing:
866: bb1 0,r9,noguard3s /* check sticky initially */
867: /* sticky is set, forget most of the oring */
868: nosticky3s:
869: bb0 1,r9,noround3s /* check round initially, do not set sticky */
870: br.n noguard3s /* forget most of the rest of oring */
871: set r9,r9,1<0> /* if round is clear,set sticky if round set */
872: noround3s:
873: bb0.n 2,r9,noguard3s /* check guard initially, do not set sticky */
874: clr r9,r9,2<1> /* clear the original guard and round for when */
875: /* you get to round section */
876: set r9,r9,1<0> /* if guard is clear,set sticky if guard set */
877: noguard3s:
878: cmp r6,r7,23 /* check if # of shifts is <=23 */
879: bb1 gt,r6,s24 /* branch to see if shifts = 24 */
880: sub r6,r7,2 /* get number of bits to check for sticky */
881: mak r6,r6,5<5> /* shift width into width field */
882: mak r8,r11,r6 /* mask off shifted bits -2 */
883: ff1 r8,r8 /* see if r8 has any ones */
884: bb1 5,r8,nostky23 /* do not set sticky if no ones found */
885: set r9,r9,1<0> /* set sticky bit */
886: nostky23:
887: or r8,r0,34 /* start code to get new mantissa plus two */
888: /* extra bits for new round and new guard */
889: /* bits */
890: subu r8,r8,r7
891: mak r8,r8,5<5> /* shift field width into second five bits */
892: extu r6,r6,5<5> /* shift previous shifted -2 into offset field */
893: or r6,r6,r8 /* complete field */
894: extu r11,r11,r6 /* form new mantissa with two extra bits */
895:
896: bb0 0,r11,nornd3s /* do not set new round bit */
897: set r9,r9,1<1> /* set new round bit */
898: nornd3s:
899: bb0 1,r11,nogrd3s /* do not set new guard bit */
900: set r9,r9,1<2> /* set new guard bit */
901: nogrd3s:
902: br.n round /* round mantissa */
903: extu r11,r11,30<2> /* shift off remaining two bits */
904:
905: s24:
906: cmp r6,r7,24 /* check to see if # of shifts is 24 */
907: bb1 gt,r6,s25 /* branch to see if shifts = 25 */
908: bb1 0,r9,nostky24 /* skip checking if old sticky set */
909: extu r8,r11,22<0> /* prepare to check bits that will be shifted */
910: /* into the sticky */
911: ff1 r8,r8 /* see if there are any 1''s */
912: bb1 5,r8,nostky24 /* do not set sticky if no ones found */
913: set r9,r9,1<0> /* set sticky bit */
914: nostky24:
915: bb0 22,r11,nornd24 /* do not set new round bit */
916: set r9,r9,1<1> /* set new round bit */
917: nornd24:
918: set r9,r9,1<2> /* set new guard bit,this is hidden bit */
919: br.n round /* round mantissa */
920: or r11,r0,r0 /* clear r11, all of mantissa shifted off */
921:
922: s25:
923: cmp r6,r7,25 /* check to see if # of shifts is 25 */
924: bb1 gt,r6,s26 /* branch to execute for shifts => 26 */
925: bb1 0,r9,nostky25 /* skip checking if old sticky set */
926: extu r8,r11,23<0> /* prepare to check bits that will be shifted */
927: /* into the sticky */
928: ff1 r8,r8 /* see if there are any 1''s */
929: bb1 5,r8,nostky25 /* do not set sticky if no ones found */
930: set r9,r9,1<0> /* set sticky bit */
931: nostky25:
932: set r9,r9,1<1> /* set new round bit,this is hidden bit */
933: clr r9,r9,1<2> /* clear guard bit since nothing shifted in */
934: br.n round /* round and assemble result */
935: or r11,r0,r0 /* clear r11, all of mantissa shifted off */
936:
937: s26:
938: set r9,r9,1<0> /* set sticky bit,this contains hidden bit */
939: clr r9,r9,2<1> /* clear guard and round bits since nothing */
940: /* shifted in */
941: br.n round /* round and assemble result */
942: or r11,r0,r0 /* clear mantissa */
943:
944: Udouble:
945: mak r5,r10,21<0> /* extract upper bits of mantissa */
946: bb0.n 25,r10,nounroundd /* do not unround if addone bit clear */
947: extu r6,r12,12<20> /* extract signed exponenet from IMPCR */
948: unroundd:
949: or r8,r0,1
950: subu.co r11,r11,r8 /* subtract 1 from mantissa */
951: subu.ci r5,r5,r0 /* subtract borrow from upper word */
952: bb1 20,r5,nounroundd /* if hidden bit is set, then exponent does */
953: /* not need to be decremented */
954: decexpd:
955: sub r6,r6,1 /* decrement exponent 1 */
956: set r5,r5,1<20> /* set the hidden bit */
957:
958: nounroundd:
959: or r8,r0,lo16(0x00000c01) /* load r8 with -1023 in decimal */
960: /* for lowest 12 bits */
961: sub r7,r8,r6 /* find difference between two exponents, */
962: /* this amount is the shift amount */
963: cmp r6,r7,3 /* check to see if r7 contains 3 or more */
964: bb1 ge,r6,threedoub /* br to code that handles shifts of >=3 */
965: cmp r6,r7,2 /* check to see if r7 contains 2 */
966: bb1 eq,r6,twodoub /* br to code that handles shifts of 2 */
967:
968: onedoub:
969: rot r9,r9,0<1> /* rotate roundoff register once, this places */
970: /* guard in round and round in sticky */
971: bb0 31,r9,nosticky1d/* do not or round and sticky if sticky is 0 */
972: /* this lost bit will be cleared later */
973: set r9,r9,1<0> /* or old round and old sticky into new sticky */
974: nosticky1d:
975: bb0 0,r11,guardclr1d /* do not set new guard bit if old LSB = 0 */
976: set r9,r9,1<2> /* set new guard bit */
977: guardclr1d:
978: extu r11,r11,31<1> /* shift lower mantissa over 1 */
979: mak r6,r5,1<31> /* shift off low bit of high mantissa */
980: or r11,r6,r11 /* load high bit onto lower mantissa */
981: extu r5,r5,20<1> /* shift right once upper 20 bits of mantissa */
982: br.n round /* round mantissa and assemble result */
983: mak r9,r9,3<0> /* clear bits lost during rotation */
984:
985: twodoub:
986: rot r9,r9,0<2> /* rotate roundoff register twice, this places */
987: /* old guard into sticky */
988: bb0 30,r9,nosticky2d /* do not or old guard and old sticky if */
989: /* old sticky is 0 */
990: br.n noround2d /* skip or of old guard and old round if old */
991: /* sticky set */
992: set r9,r9,1<0> /* or old guard and old sticky into new sticky */
993: nosticky2d:
994: bb0 31,r9,noround2d /* do not or old guard and old round if */
995: /* old round is 0 */
996: set r9,r9,1<0> /* or old guard and old round into new sticky */
997: noround2d:
998: bb0 0,r11,roundclr2d /* do not set round bit if old LSB = 0 */
999: set r9,r9,1<1> /* set new round bit */
1000: roundclr2d:
1001: bb0 1,r11,guardclr2d /* do not set guard bit if old LSB + 1 = 0 */
1002: set r9,r9,1<2> /* set new guard bit */
1003: guardclr2d:
1004: extu r11,r11,30<2> /* shift lower mantissa over 2 */
1005: mak r6,r5,2<30> /* shift off low bits of high mantissa */
1006: or r11,r6,r11 /* load high bit onto lower mantissa */
1007: extu r5,r5,19<2> /* shift right twice upper 19 bits of mantissa */
1008: br.n round /* round mantissa and assemble result */
1009: mak r9,r9,3<0> /* clear bits lost during rotation */
1010:
1011: threedoub:
1012: bb1 0,r9,noguard3d /* checky sticky initially */
1013: /* sticky is set, forget most of rest of oring */
1014: nosticky3d:
1015: bb0 1,r9,noround3d /* check old round, do not set sticky if */
1016: /* old round is clear, set otherwise */
1017: br.n noguard3d /* sticky is set, forget most of rest of oring */
1018: set r9,r9,1<0> /* set sticky if old round is set */
1019: noround3d:
1020: bb0 2,r9,noguard3d /* check old guard, do not set sticky if 0 */
1021: clr r9,r9,2<1> /* clear the original guard and round for when */
1022: /* you get to round section */
1023: set r9,r9,1<0> /* set sticky if old guard is set */
1024: noguard3d:
1025: cmp r6,r7,32 /* do I need to work with a 1 or 2 word mant. */
1026: /* when forming sticky, round and guard */
1027: bb1 gt,r6,d33 /* jump to code that handles 2 word mantissas */
1028: sub r6,r7,2 /* get number of bits to check for sticky */
1029: mak r6,r6,5<5> /* shift width into width field */
1030: mak r8,r11,r6 /* mask off shifted bits -2 */
1031: ff1 r8,r8 /* see if r8 has any ones */
1032: bb1 5,r8,nostky32 /* do not set sticky if no ones found */
1033: set r9,r9,1<0> /* set sticky bit */
1034: nostky32:
1035: or r8,r0,34 /* start code to get new mantissa plus two */
1036: /* extra bits for new round and new guard bits, */
1037: /* the upper word bits will be shifted after */
1038: /* the round and guard bits are handled */
1039: subu r8,r8,r7
1040: mak r8,r8,5<5> /* shift field width into second five bits */
1041: extu r6,r6,5<5> /* shift previous shifted -2 into offset field */
1042: or r6,r6,r8 /* complete bit field */
1043: extu r11,r11,r6 /* partially form new low mantissa with 2 more */
1044: /* bits */
1045: bb0 0,r11,nornd32d /* do not set new round bit */
1046: set r9,r9,1<1> /* set new round bit */
1047: nornd32d:
1048: bb0 1,r11,nogrd32d /* do not set new guard bit */
1049: set r9,r9,1<2> /* set new guard bit */
1050: nogrd32d:
1051: extu r11,r11,30<2> /* shift off remaining two bits */
1052: mak r6,r7,5<5> /* shift field width into second 5 bits, if the */
1053: /* width is 32, then these bits will be 0 */
1054: or r8,r0,32 /* load word length into r8 */
1055: sub r8,r8,r7 /* form offset for high bits moved to low word */
1056: or r6,r6,r8 /* form complete bit field */
1057: mak r6,r5,r6 /* get shifted bits of high word */
1058: or r11,r6,r11 /* form new low word of mantissa */
1059: bcnd ne0,r8,regular33 /* do not adjust for special case of r8 */
1060: br.n round /* containing zeros, which would cause */
1061: or r5,r0,r0 /* all of the bits to be extracted under */
1062: /* the regular method */
1063: regular33:
1064: mak r6,r7,5<0> /* place lower 5 bits of shift into r6 */
1065: mak r8,r8,5<5> /* shift r8 into width field */
1066: or r6,r6,r8 /* form field for shifting of upper bits */
1067: br.n round /* round and assemble result */
1068: extu r5,r5,r6 /* form new high word mantissa */
1069:
1070: d33:
1071: cmp r6,r7,33 /* is the number of bits to be shifted is 33? */
1072: bb1 gt,r6,d34 /* check to see if # of bits is 34 */
1073: bb1 0,r9,nostky33 /* skip checking if old sticky set */
1074: mak r6,r11,31<0> /* check bits that will be shifted into sticky */
1075: ff1 r8,r8 /* check for ones */
1076: bb1 5,r8,nostky33 /* do not set sticky if there are no ones */
1077: set r9,r9,1<0> /* set new sticky bit */
1078: nostky33:
1079: bb0 31,r11,nornd33 /* do not set round if bit is not a 1 */
1080: set r9,r9,1<1> /* set new round bit */
1081: nornd33:
1082: bb0 0,r5,nogrd33 /* do not set guard bit if bit is not a 1 */
1083: set r9,r9,1<2> /* set new guard bit */
1084: nogrd33:
1085: extu r11,r5,31<1> /* shift high bits into low word */
1086: br.n round /* round and assemble result */
1087: or r5,r0,r0 /* clear high word */
1088:
1089: d34:
1090: cmp r6,r7,34 /* is the number of bits to be shifted 34? */
1091: bb1 gt,r6,d35 /* check to see if # of bits is >= 35 */
1092: bb1 0,r9,nostky34 /* skip checking if old sticky set */
1093: ff1 r8,r11 /* check bits that will be shifted into sticky */
1094: bb1 5,r8,nostky34 /* do not set sticky if there are no ones */
1095: set r9,r9,1<0> /* set new sticky bit */
1096: nostky34:
1097: bb0 0,r5,nornd34 /* do not set round if bit is not a 1 */
1098: set r9,r9,1<1> /* set new round bit */
1099: nornd34:
1100: bb0 1,r5,nogrd34 /* do not set guard bit if bit is not a 1 */
1101: set r9,r9,1<2> /* set new guard bit */
1102: nogrd34:
1103: extu r11,r5,30<2> /* shift high bits into low word */
1104: br.n round /* round and assemble result */
1105: or r5,r0,r0 /* clear high word */
1106:
1107: d35:
1108: cmp r6,r7,52 /* see if # of shifts is 35 <= X <= 52 */
1109: bb1 gt,r6,d53 /* check to see if # of shifts is 52 */
1110: bb1.n 0,r9,nostky35 /* skip checking if old sticky set */
1111: sub r7,r7,34 /* subtract 32 from # of shifts so that opera- */
1112: /* tions can be done on the upper word, and */
1113: /* then subtract two more checking guard and */
1114: /* sticky bits */
1115: ff1 r8,r11 /* see if lower word has a bit for sticky */
1116: bb1 5,r8,stkycheck35 /* see if upper word has any sticky bits */
1117: br.n nostky35 /* quit checking for sticky */
1118: set r9,r9,1<0> /* set sticky bit */
1119: stkycheck35:
1120: mak r6,r7,5<5> /* place width into width field */
1121: mak r8,r5,r6 /* mask off shifted bits - 2 */
1122: ff1 r8,r8 /* see if r8 has any ones */
1123: bb1 5,r8,nostky35 /* do not set sticky if no ones found */
1124: set r9,r9,1<0> /* set sticky bit */
1125: nostky35:
1126: or r8,r0,32 /* look at what does not get shifted off plus */
1127: /* round and sticky, remember that the r7 value */
1128: /* was adjusted so that it did not include */
1129: /* new round or new sticky in shifted off bits */
1130: subu r8,r8,r7 /* complement width */
1131: mak r8,r8,5<5> /* shift width into width field */
1132: or r8,r7,r8 /* add offset field */
1133: extu r11,r5,r8 /* extract upper bits into low word */
1134: bb0 0,r11,nornd35 /* do not set new round bit */
1135: set r9,r9,1<1> /* set new round bit */
1136: nornd35:
1137: bb0 1,r11,nogrd35 /* do not set new guard bit */
1138: set r9,r9,1<2> /* set new guard bit */
1139: nogrd35:
1140: extu r11,r11,30<2> /* shift off remaining guard and round bits */
1141: br.n round /* round and assemble result */
1142: or r5,r0,r0 /* clear high word */
1143:
1144: d53:
1145: cmp r6,r7,53 /* check to see if # of shifts is 53 */
1146: bb1 gt,r6,d54 /* branch to see if shifts = 54 */
1147: bb1 0,r9,nostky53 /* skip checking if old sticky set */
1148: ff1 r8,r11 /* see if lower word has a bit for sticky */
1149: bb1 5,r8,stkycheck53 /* see if upper word has any sticky bits */
1150: br.n nostky53 /* quit checking for sticky */
1151: set r9,r9,1<0> /* set sticky bit */
1152: stkycheck53:
1153: mak r6,r5,19<0> /* check bits that are shifted into sticky */
1154: ff1 r8,r6 /* see if r6 has any ones */
1155: bb1 5,r8,nostky53 /* do not set sticky if no ones found */
1156: set r9,r9,1<0> /* set sticky bit */
1157: nostky53:
1158: bb0 19,r5,nornd53 /* do not set new round bit */
1159: set r9,r9,1<1> /* set new round bit */
1160: nornd53:
1161: set r9,r9,1<2> /* set new guard bit,this is hidden bit */
1162: or r5,r0,r0 /* clear high word */
1163: br.n round /* round and assemble result */
1164: or r11,r0,r0 /* clear low word */
1165:
1166: d54:
1167: cmp r6,r7,54 /* check to see if # of shifts is 54 */
1168: bb1 gt,r6,d55 /* branch to execute for shifts =>55 */
1169: bb1 0,r9,nostky54 /* skip checking if old sticky set */
1170: ff1 r8,r11 /* see if lower word has a bit for sticky */
1171: bb1 5,r8,stkycheck54 /* see if upper word has any sticky bits */
1172: br.n nostky54 /* quit checking for sticky */
1173: set r9,r9,1<0> /* set sticky bit */
1174: stkycheck54:
1175: mak r6,r5,20<0> /* check bits that are shifted into sticky */
1176: ff1 r8,r6 /* see if r6 has any ones */
1177: bb1 5,r8,nostky54 /* do not set sticky if no ones found */
1178: set r9,r9,1<0> /* set sticky bit */
1179: nostky54:
1180: set r9,r9,1<1> /* set new round bit,this is hidden bit */
1181: clr r9,r9,1<2> /* clear guard bit since nothing shifted in */
1182: or r5,r0,r0 /* clear high word */
1183: br.n round /* round and assemble result */
1184: or r11,r0,r0 /* clear low word */
1185:
1186: d55:
1187: set r9,r9,1<0> /* set new sticky bit,this contains hidden bit */
1188: clr r9,r9,2<1> /* clear guard and round bits since nothing */
1189: /* shifted in */
1190: or r5,r0,r0 /* clear high word */
1191: or r11,r0,r0 /* clear low word */
1192:
1193:
1194: /* The first item that the rounding code does is see if either guard, round, */
1195: /* or sticky is set. If all are clear, then there is no denormalization loss */
1196: /* and no need to round, then branch to assemble answer. */
1197: /* For rounding, a branch table is set up. The left two most bits are the */
1198: /* rounding mode. The third bit is either the LSB of the mantissa or the */
1199: /* sign bit, depending on the rounding mode. The three LSB''s are the guard, */
1200: /* round and sticky bits. */
1201:
1202: round:
1203: ff1 r8,r9 /* see if there is denormalization loss */
1204: bb1 5,r8,assemble /* no denormalization loss or inexactness */
1205: extu r6,r10,2<modelo> /* extract rounding mode */
1206: bb1.n modehi,r10,signext /* use sign bit instead of LSB */
1207: mak r6,r6,2<4> /* shift over rounding mode */
1208: extu r7,r11,1<0> /* extract LSB */
1209: br.n grs /* skip sign extraction */
1210: mak r7,r7,1<3> /* shift over LSB */
1211: signext:
1212: extu r7,r10,1<31> /* extract sign bit */
1213: mak r7,r7,1<3> /* shift sign bit over */
1214: grs:
1215: or r6,r6,r7
1216: or r6,r6,r9 /* or in guard, round, and sticky */
1217: or.u r1,r0,hi16(roundtable) /* form address of branch table */
1218: or r1,r1,lo16(roundtable)
1219: lda r6,r1[r6] /* scale offset into branch table */
1220: jmp.n r6 /* jump to branch table */
1221: set r9,r9,1<3> /* set inexact flag in r9 */
1222:
1223: roundtable:
1224: br noaddone
1225: br noaddone
1226: br noaddone
1227: br noaddone
1228: br noaddone
1229: br addone
1230: br addone
1231: br addone
1232: br noaddone
1233: br noaddone
1234: br noaddone
1235: br noaddone
1236: br addone
1237: br addone
1238: br addone
1239: br addone
1240: br noaddone
1241: br noaddone
1242: br noaddone
1243: br noaddone
1244: br noaddone
1245: br noaddone
1246: br noaddone
1247: br noaddone
1248: br noaddone
1249: br noaddone
1250: br noaddone
1251: br noaddone
1252: br noaddone
1253: br noaddone
1254: br noaddone
1255: br noaddone
1256: br noaddone
1257: br noaddone
1258: br noaddone
1259: br noaddone
1260: br noaddone
1261: br noaddone
1262: br noaddone
1263: br noaddone
1264: br noaddone
1265: br addone
1266: br addone
1267: br addone
1268: br addone
1269: br addone
1270: br addone
1271: br addone
1272: br noaddone
1273: br addone
1274: br addone
1275: br addone
1276: br addone
1277: br addone
1278: br addone
1279: br addone
1280: br noaddone
1281: br noaddone
1282: br noaddone
1283: br noaddone
1284: br noaddone
1285: br noaddone
1286: br noaddone
1287: br noaddone
1288:
1289: /* Round by adding a one to the LSB of the mantissa. */
1290: addone:
1291: or r6,r0,1 /* load a 1 into r6 so that add.co can be used */
1292: add.co r11,r11,r6 /* add a one to the lower word of result */
1293: bb0.n destsize,r12,noaddone /* single result,forget carry */
1294: set r9,r9,1<4> /* indicate that a 1 has been added */
1295: add.ci r5,r5,r0 /* propagate carry into high word */
1296:
1297: noaddone:
1298: set r2,r2,1<inexact>
1299: set r2,r2,1<underflow>
1300:
1301: /* Assemble the result of the denormalization routine for writeback to the */
1302: /* destination register. The exponent of a denormalized number is zero, */
1303: /* so simply assemble the sign and the new mantissa. */
1304:
1305: assemble:
1306: bb1 destsize,r12,doubassem /* assemble double result */
1307: bb0 sign,r10,exassems /* exit assemble if sign is zero */
1308: set r11,r11,1<sign> /* make result negative */
1309: exassems:
1310: br Ureturn
1311:
1312: doubassem:
1313: bb0.n sign,r10,signclr /* do not set sign in r10 */
1314: or r10,r5,r0 /* load high word from r5 into r10 */
1315: set r10,r10,1<sign> /* high word with sign loaded */
1316: signclr:
1317: /* FALLTHROUGH */
1318: /* br Ureturn */
1319:
1320: /* Return to fpui. */
1321: Ureturn:
1322: ld r1,r31,0 /* load return address */
1323: jmp r1
1324:
1325: /*
1326: * FPoverflow
1327: */
1328:
1329: ASLOCAL(FPoverflow)
1330: st r1,r31,0 /* save return address */
1331: set r2,r2,1<overflow>
1332: set r2,r2,1<inexact>
1333:
1334: /* Determine which rounding mode to use for the default procedure. */
1335:
1336: bb1 modehi,r10,signed /* mode is either round toward pos. or neg. */
1337: bb0 modelo,r10,OFnearest /* rounding mode is round nearest */
1338: br OFzero /* rounding mode is round zero */
1339: signed:
1340: bb0 modelo,r10,OFnegative /* rounding mode is round negative */
1341: br positive /* rounding mode is round positive */
1342:
1343:
1344: /* In the round toward nearest mode, positive values are rounded to */
1345: /* positive infinity and negative values are loaded toward negative infinity. */
1346: /* The value for single or double precision is loaded from a data table. */
1347:
1348: OFnearest:
1349: bb1.n destsize,r12,neardouble /* branch to neardouble of */
1350: /* double result */
1351: mask.u r5,r10,0x8000 /* mask off sign bit from MANTHI */
1352: or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */
1353: or r11,r11,lo16(0x7f800000)
1354: br.n FPof_return /* return with result */
1355: or r11,r5,r11 /* adjust sign */
1356: neardouble:
1357: or r11,r0,r0 /* load lower word of infinity */
1358: or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */
1359: or r10,r10,lo16(0x7ff00000)
1360: br.n FPof_return /* return with result */
1361: or r10,r5,r10 /* adjust sign */
1362:
1363:
1364: /* In the round toward zero mode, positive values are rounded to the largest */
1365: /* postive finite number and negative values are rounded toward the largest */
1366: /* negative finite number. */
1367: /* The value for single or double precision is loaded from a data table. */
1368:
1369: OFzero:
1370: bb1.n destsize,r12,zerodouble /* branch to zerodouble of */
1371: /* double result */
1372: mask.u r5,r10,0x8000 /* mask off sign bit from MANTHI */
1373: or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */
1374: or r11,r11,lo16(0x7f7fffff)
1375: br.n FPof_return /* return with result */
1376: or r11,r5,r11 /* adjust sign */
1377: zerodouble:
1378: set r11,r0,0<0> /* load lower word of finite number */
1379: or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */
1380: or r10,r10,lo16(0x7fefffff)
1381: br.n FPof_return /* return with result */
1382: or r10,r5,r10 /* adjust sign */
1383:
1384:
1385: /* In the round toward positve mode, positive values are rounded to */
1386: /* postive infinity and negative values are loaded toward the largest */
1387: /* negative finite number. */
1388: /* The value for single or double precision is loaded from a data table. */
1389:
1390: positive:
1391: bb1 destsize,r12,posdouble /* branch to section for double result */
1392: possingle:
1393: bb1 sign,r10,possingleneg /* branch to section for negatives */
1394: possinglepos:
1395: or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */
1396: br.n FPof_return /* return with result */
1397: or r11,r11,lo16(0x7f800000)
1398: possingleneg:
1399: or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */
1400: or r11,r11,lo16(0x7f7fffff)
1401: br.n FPof_return /* return with result */
1402: set r11,r11,1<sign> /* set sign for negative */
1403: posdouble:
1404: bb1 sign,r10,posdoubleneg /* branch to negative double results */
1405: posdoublepos:
1406: or r11,r0,r0 /* load lower word of double infinity */
1407: or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */
1408: br.n FPof_return /* return with result */
1409: or r10,r10,lo16(0x7ff00000)
1410: posdoubleneg:
1411: set r11,r0,0<0> /* load lower word of finite number */
1412: or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */
1413: or r10,r10,lo16(0x7fefffff)
1414: br.n FPof_return /* return with result */
1415: set r10,r10,1<sign> /* set sign for negative */
1416:
1417:
1418: /* In the round toward negative mode, positive values are rounded to the largest */
1419: /* postive finite number and negative values are rounded to negative infinity. */
1420: /* The value for single or double precision is loaded from a data table. */
1421:
1422: OFnegative:
1423: bb1 destsize,r12,negdouble /* branch to section for double result */
1424: negsingle:
1425: bb1 sign,r10,negsingleneg /* branch to section for negatives */
1426: negsinglepos:
1427: or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */
1428: br.n FPof_return /* return with result */
1429: or r11,r11,lo16(0x7f7fffff)
1430: negsingleneg:
1431: or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */
1432: or r11,r11,lo16(0x7f800000)
1433: br.n FPof_return /* return with result */
1434: set r11,r11,1<sign> /* set sign for negative */
1435: negdouble:
1436: bb1 sign,r10,negdoubleneg /* branch to negative double results */
1437: negdoublepos:
1438: set r11,r0,0<0> /* load lower word of finite number */
1439: or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */
1440: br.n FPof_return /* return with result */
1441: or r10,r10,lo16(0x7fefffff)
1442: negdoubleneg:
1443: or r11,r0,r0 /* load lower word of double infinity */
1444: or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */
1445: or r10,r10,lo16(0x7ff00000)
1446: set r10,r10,1<sign> /* set sign for negative */
1447:
1448: FPof_return:
1449: ld r1,r31,0 /* ld return address */
1450: jmp r1
1451:
1452: /* If either S1 or S2 is a signalling NaN, then set the invalid operation */
1453: /* bit of the FPSR. */
1454: /* If S1 is the only NaN or one of two NaN''s, then write */
1455: /* a quiet S1 to the result. A signalling NaN must be made quiet before */
1456: /* it can be written, but a signalling S2 is not modified in this routine */
1457: /* if S1 is a NaN. */
1458: ASLOCAL(NaN)
1459: bb0.n s1nan,r12,S2sigcheck /* S1 is not a NaN */
1460: st r1,r31,0 /* save return address */
1461: bb1 sigbit,r5,S2sigcheck /* S1 is not a signaling NaN */
1462: set r2,r2,1<oper>
1463: br.n S1write /* FPSR bit already set, S1 is made quiet, */
1464: /* and since we always write S1 if it is a */
1465: /* NaN, write S1 and skip rest of routine */
1466: set r5,r5,1<sigbit> /* make S1 a quiet NaN */
1467:
1468: ASLOCAL(S2sigcheck)
1469: bb0 s2nan,r12,S1write /* S2 is not a NaN */
1470: bb1 sigbit,r7,S1write /* S2 is not a signaling NaN */
1471: set r2,r2,1<oper>
1472: set r7,r7,1<sigbit> /* make S2 a quiet NaN */
1473:
1474:
1475: /* Write a single or double precision quiet NaN unless the opeation is FCMP. */
1476: /* If the operation is FCMP, then set the not comparable bit in the result. */
1477:
1478: ASLOCAL(S1write)
1479: bb0 s1nan,r12,S2write /* do not write S1 if it is not a NaN */
1480: extu r10,r9,5<11> /* extract opcode */
1481: cmp r11,r10,FCMPop /* compare to FCMP */
1482: bb1 ne,r11,S1noFCMP /* operation is not FCMP */
1483: set r6,r0,1<nc> /* set the not comparable bit */
1484: br.n FPnan_return
1485: set r6,r6,1<ne> /* set the not equal bit */
1486: ASLOCAL(S1noFCMP)
1487: bb1.n dsize,r9,wrdoubS1 /* double destination */
1488: set r5,r5,11<20> /* set all exponent bits to 1 */
1489: /* The single result will be formed the same way whether S1 is a single or double */
1490: ASLOCAL(wrsingS1)
1491: mak r10,r5,28<3> /* wipe out extra exponent bits */
1492: extu r11,r6,3<29> /* get lower three bits of mantissa */
1493: or r10,r10,r11 /* combine all of result except sign */
1494: clr r6,r5,31<0> /* clear all but sign */
1495: br.n FPnan_return
1496: or r6,r6,r10 /* form result */
1497:
1498: ASLOCAL(wrdoubS1)
1499: set r6,r6,29<0> /* set extra bits of lower word */
1500: br FPnan_return /* no modification necessary for writing */
1501: /* double to double, so return */
1502:
1503: ASLOCAL(S2write)
1504: extu r10,r9,5<11> /* extract opcode */
1505: cmp r11,r10,FCMPop /* compare to FCMP */
1506: bb1.n ne,r11,S2noFCMP /* operation is not FCMP */
1507: set r7,r7,11<20> /* set all exponent bits to 1 */
1508: set r6,r0,1<nc> /* set the not comparable bit */
1509: br.n FPnan_return
1510: set r6,r6,1<ne> /* set the not equal bit */
1511: ASLOCAL(S2noFCMP)
1512: bb1.n dsize,r9,wrdoubS2 /* double destination */
1513: set r5,r5,11<20> /* set all exponent bits to 1 */
1514: /* The single result will be formed the same way whether S1 is a single or double */
1515: ASLOCAL(wrsingS2)
1516: mak r10,r7,28<3> /* wipe out extra exponent bits */
1517: extu r11,r8,3<29> /* get lower three bits of mantissa */
1518: or r10,r10,r11 /* combine all of result except sign */
1519: clr r6,r7,31<0> /* clear all but sign */
1520: br.n FPnan_return
1521: or r6,r6,r10 /* form result */
1522:
1523: ASLOCAL(wrdoubS2)
1524: set r6,r8,29<0> /* set extra bits of lower word */
1525:
1526: /* Return from this subroutine with the result. */
1527:
1528: ASLOCAL(FPnan_return)
1529: /* no modification necessary for writing */
1530: /* double to double, so return */
1531: ld r1,r31, 0 /* retrieve return address */
1532: jmp r1
1533:
1534: /*
1535: * infinity
1536: */
1537:
1538: /* Extract the opcode, compare to a constant, and branch to the code */
1539: /* for the instruction. */
1540:
1541: ASLOCAL(infinity)
1542: extu r10,r9,5<11> /* extract opcode */
1543: cmp r11,r10,FADDop /* compare to FADD */
1544: bb1.n eq,r11,FADD /* operation is FADD */
1545: st r1,r31,0 /* save return address */
1546: cmp r11,r10,FSUBop /* compare to FSUB */
1547: bb1 eq,r11,FSUB /* operation is FSUB */
1548: cmp r11,r10,FCMPop /* compare to FCMP */
1549: bb1 eq,r11,FCMP /* operation is FCMP */
1550: cmp r11,r10,FMULop /* compare to FMUL */
1551: bb1 eq,r11,FMUL /* operation is FMUL */
1552: cmp r11,r10,FDIVop /* compare to FDIV */
1553: bb1 eq,r11,FDIV /* operation is FDIV */
1554: #if 0
1555: cmp r11,r10,FSQRTop /* compare to FSQRT */
1556: bb1 eq,r11,FSQRT /* operation is FSQRT */
1557: #endif
1558: cmp r11,r10,INTop /* compare to INT */
1559: bb1 eq,r11,FP_inf_overflw /* operation is INT */
1560: cmp r11,r10,NINTop /* compare to NINT */
1561: bb1 eq,r11,FP_inf_overflw /* operation is NINT */
1562: cmp r11,r10,TRNCop /* compare to TRNC */
1563: bb1 eq,r11,FP_inf_overflw /* operation is TRNC */
1564:
1565:
1566: /* Adding infinities of opposite signs will cause an exception, */
1567: /* but all other operands will result in a correctly signed infinity. */
1568:
1569: FADD:
1570: bb0 s1inf,r12,addS2write /* branch if S1 not infinity */
1571: bb0 s2inf,r12,addS1write /* S2 is not inf., so branch to write S1 */
1572: bb1 sign,r5,addS1neg /* handle case of S1 negative */
1573: addS1pos:
1574: bb1 sign,r7,excpt /* adding infinities of different */
1575: /* signs causes an exception */
1576: br poswrinf /* branch to write positive infinity */
1577: addS1neg:
1578: bb0 sign,r7,excpt /* adding infinities of different */
1579: /* signs causes an exception */
1580: br negwrinf /* branch to write negative infinity */
1581: addS1write:
1582: bb0 sign,r5,poswrinf /* branch to write positive infinity */
1583: br negwrinf /* branch to write negative infinity */
1584: addS2write:
1585: bb0 sign,r7,poswrinf /* branch to write positive infinity */
1586: br negwrinf /* branch to write negative infinity */
1587:
1588:
1589: /* Subtracting infinities of the same sign will cause an exception, */
1590: /* but all other operands will result in a correctly signed infinity. */
1591:
1592: FSUB:
1593: bb0 s1inf,r12,subS2write /* branch if S1 not infinity */
1594: bb0 s2inf,r12,subS1write /* S2 is not inf., so branch to write S1 */
1595: bb1 sign,r5,subS1neg /* handle case of S1 negative */
1596: subS1pos:
1597: bb0 sign,r7,excpt /* subtracting infinities of the same */
1598: /* sign causes an exception */
1599: br poswrinf /* branch to write positive infinity */
1600: subS1neg:
1601: bb1 sign,r7,excpt /* subtracting infinities of the same */
1602: /* sign causes an exception */
1603: br negwrinf /* branch to write negative infinity */
1604: subS1write:
1605: bb0 sign,r5,poswrinf /* branch to write positive infinity */
1606: br negwrinf /* branch to write negative infinity */
1607: subS2write:
1608: bb1 sign,r7,poswrinf /* branch to write positive infinity */
1609: br negwrinf /* branch to write negative infinity */
1610:
1611:
1612: /* Compare the operands, at least one of which is infinity, and set the */
1613: /* correct bits in the destination register. */
1614:
1615: FCMP:
1616: bb0.n s1inf,r12,FCMPS1f /* branch for finite S1 */
1617: set r4,r0,1<cp> /* since neither S1 or S2 is a NaN, */
1618: /* set cp */
1619: FCMPS1i:
1620: bb1 sign,r5,FCMPS1ni /* branch to negative S1i */
1621: FCMPS1pi:
1622: bb0 s2inf,r12,FCMPS1piS2f /* branch to finite S2 with S1pi */
1623: FCMPS1piS2i:
1624: bb1 sign,r7,FCMPS1piS2ni /* branch to negative S2i with S1pi */
1625: FCMPS1piS2pi:
1626: set r4,r4,1<eq> /* set eq bit */
1627: set r4,r4,1<le> /* set le bit */
1628: set r4,r4,1<ge> /* set ge bit */
1629: set r4,r4,1<ib> /* set ib bit */
1630: br.n move
1631: set r4,r4,1<ob> /* set ob bit */
1632: FCMPS1piS2ni:
1633: set r4,r4,1<ne> /* set ne bit */
1634: set r4,r4,1<gt> /* set gt bit */
1635: br.n move
1636: set r4,r4,1<ge> /* set ge bit */
1637: FCMPS1piS2f:
1638: set r4,r4,1<ne> /* set ne bit */
1639: set r4,r4,1<gt> /* set gt bit */
1640: bsr.n _ASM_LABEL(zero) /* see if any of the operands are zero */
1641: set r4,r4,1<ge> /* set ge bit */
1642: bb0 s2zero,r12,FCMPS1piS2nz /* check for negative if s2 not zero */
1643: set r4,r4,1<ou> /* set ou bit */
1644: br.n move
1645: set r4,r4,1<ob> /* set ob bit */
1646: FCMPS1piS2nz:
1647: bb1 sign,r7,move /* return if s2 is negative */
1648: FCMPS1piS2pf:
1649: set r4,r4,1<ou> /* set ou bit */
1650: br.n move
1651: set r4,r4,1<ob> /* set ob bit */
1652: FCMPS1ni:
1653: bb0 s2inf,r12,FCMPS1niS2f /* branch to finite S2 with S1ni */
1654: FCMPS1niS2i:
1655: bb1 sign,r7,FCMPS1niS2ni /* branch to negative S2i with S1ni */
1656: FCMPS1niS2pi:
1657: set r4,r4,1<ne> /* set eq bit */
1658: set r4,r4,1<le> /* set le bit */
1659: set r4,r4,1<lt> /* set lt bit */
1660: set r4,r4,1<ou> /* set ou bit */
1661: br.n move
1662: set r4,r4,1<ob> /* set ob bit */
1663: FCMPS1niS2ni:
1664: set r4,r4,1<eq> /* set eq bit */
1665: set r4,r4,1<le> /* set le bit */
1666: br.n move
1667: set r4,r4,1<ge> /* set ge bit */
1668: FCMPS1niS2f:
1669: set r4,r4,1<ne> /* set eq bit */
1670: set r4,r4,1<le> /* set le bit */
1671: bsr.n _ASM_LABEL(zero) /* see if any of the operands are zero */
1672: set r4,r4,1<lt> /* set lt bit */
1673: bb0 s2zero,r12,FCMPS1niS2nz /* branch if s2 is not zero */
1674: set r4,r4,1<ou> /* set ou bit */
1675: br.n move
1676: set r4,r4,1<ob> /* set ob bit */
1677: FCMPS1niS2nz:
1678: bb1 sign,r7,move /* return if s2 is negative */
1679: set r4,r4,1<ou> /* set ou bit */
1680: br.n move
1681: set r4,r4,1<ob> /* set ob bit */
1682: FCMPS1f:
1683: bb1 sign,r5,FCMPS1nf /* branch to negative S1f */
1684: FCMPS1pf:
1685: bb1.n sign,r7,FCMPS1pfS2ni /* branch to negative S2i with S1pf */
1686: set r4,r4,1<ne> /* set ne bit */
1687: FCMPS1pfS2pi:
1688: set r4,r4,1<le> /* set le bit */
1689: set r4,r4,1<lt> /* set lt bit */
1690: bsr.n _ASM_LABEL(zero)
1691: set r4,r4,1<ib> /* set ib bit */
1692: bb0 s1zero,r12,FCMPS1pfS2pinozero
1693: FCMPS1pfS2pizero:
1694: br.n move
1695: set r4,r4,1<ob> /* set ob bit */
1696: FCMPS1pfS2pinozero:
1697: br.n move
1698: set r4,r4,1<in> /* set in bit */
1699: FCMPS1pfS2ni:
1700: set r4,r4,1<gt> /* set gt bit */
1701: br.n move
1702: set r4,r4,1<ge> /* set ge bit */
1703: FCMPS1nf:
1704: bb1.n sign,r7,FCMPS1nfS2ni /* branch to negative S2i with S1nf */
1705: set r4,r4,1<ne> /* set ne bit */
1706: set r4,r4,1<le> /* set gt bit */
1707: set r4,r4,1<lt> /* set ge bit */
1708: bsr.n _ASM_LABEL(zero) /* see which of the operands are zero */
1709: set r4,r4,1<ob> /* set ob bit */
1710: bb0 s1zero,r12,FCMPS1nfS2pinozero /* no ls and lo */
1711: FCMPS1nfS2pizero:
1712: br.n move
1713: set r4,r4,1<ib> /* set ib bit */
1714: FCMPS1nfS2pinozero:
1715: br.n move
1716: set r4,r4,1<ou> /* set ou bit */
1717: FCMPS1nfS2ni:
1718: set r4,r4,1<gt> /* set gt bit */
1719: set r4,r4,1<ge> /* set ge bit */
1720:
1721: move:
1722: br.n inf_return
1723: or r6,r0,r4 /* transfer answer to r6 */
1724:
1725:
1726: /* Multiplying infinity and zero causes an exception, but all other */
1727: /* operations produce a correctly signed infinity. */
1728:
1729: FMUL:
1730: bsr _ASM_LABEL(zero) /* see if any of the operands are zero */
1731: bb1 s1zero,r12,excpt /* infinity X 0 causes an exception */
1732: bb1 s2zero,r12,excpt /* infinity X 0 causes an exception */
1733: bb1 sign,r5,FMULS1neg /* handle negative cases of S1 */
1734: bb0 sign,r7,poswrinf /* + X + = + */
1735: br negwrinf /* + X - = - */
1736: FMULS1neg:
1737: bb1 sign,r7,poswrinf /* - X - = + */
1738: br negwrinf /* - X + = - */
1739:
1740:
1741: /* Dividing infinity by infinity causes an exception, but dividing */
1742: /* infinity by a finite yields a correctly signed infinity, and */
1743: /* dividing a finite by an infinity produces a correctly signed zero. */
1744:
1745: FDIV:
1746: bb1 s1inf,r12,FDIVS1inf /* handle case of S1 being infinity */
1747: bb1 sign,r5,FDIVS1nf /* handle cases of S1 being neg. non-inf. */
1748: bb1 sign,r7,FDIVS1pfS2mi /* handle case of negative S2 */
1749: FDIVS1pfS2pi:
1750: br poswrzero /* +f / +inf = +0 */
1751: FDIVS1pfS2mi:
1752: br negwrzero /* +f / -inf = -0 */
1753: FDIVS1nf:
1754: bb1 sign,r7,FDIVS1nfS2mi /* handle case of negative S2 */
1755: FDIVS1nfS2pi:
1756: br negwrzero /* -f / +inf = -0 */
1757: FDIVS1nfS2mi:
1758: br poswrzero /* -f / -inf = +0 */
1759: FDIVS1inf:
1760: bb1 s2inf,r12,excpt /* inf / inf = exception */
1761: bb1 sign,r5,FDIVS1mi /* handle cases of S1 being neg. inf. */
1762: bb1 sign,r7,FDIVS1piS2nf /* handle case of negative S2 */
1763: FDIVS1piS2pf:
1764: br poswrinf /* +inf / +f = +inf */
1765: FDIVS1piS2nf:
1766: br negwrinf /* +inf / -f = -inf */
1767: FDIVS1mi:
1768: bb1 sign,r7,FDIVS1miS2nf /* handle case of negative S2 */
1769: FDIVS1miS2pf:
1770: br negwrinf /* -inf / +f = -inf */
1771: FDIVS1miS2nf:
1772: br poswrinf /* -inf / -f = +inf */
1773:
1774:
1775: /* The square root of positive infinity is positive infinity, */
1776: /* but the square root of negative infinity is a NaN */
1777:
1778: #if 0
1779: FSQRT:
1780: bb0 sign,r7,poswrinf /* write sqrt(inf) = inf */
1781: br excpt /* write sqrt(-inf) = NaN */
1782: #endif
1783:
1784: excpt:
1785: set r2,r2,1<oper>
1786: set r5,r0,0<0> /* write NaN into r5 */
1787: br.n inf_return
1788: set r6,r0,0<0> /* write NaN into r6, writing NaN''s into */
1789: /* both of these registers is quicker than */
1790: /* checking for single or double precision */
1791:
1792:
1793: /* Write positive infinity of the correct precision */
1794:
1795: poswrinf:
1796: bb1 dsize,r9,poswrinfd /* branch to write double precision inf. */
1797: br.n inf_return
1798: or.u r6,r0,0x7f80 /* load r6 with single precision pos inf. */
1799: poswrinfd:
1800: or.u r5,r0,0x7ff0 /* load double precision pos inf. */
1801: br.n inf_return
1802: or r6,r0,r0
1803:
1804:
1805: /* Write negative infinity of the correct precision */
1806:
1807: negwrinf:
1808: bb1 dsize,r9,negwrinfd /* branch to write double precision inf. */
1809: br.n inf_return
1810: or.u r6,r0,0xff80 /* load r6 with single precision pos inf. */
1811: negwrinfd:
1812: or.u r5,r0,0xfff0 /* load double precision pos inf. */
1813: br.n inf_return
1814: or r6,r0,r0
1815:
1816:
1817: /* Write a positive zero disregarding precision. */
1818:
1819: poswrzero:
1820: or r5,r0,r0 /* write to both high word and low word now */
1821: br.n inf_return /* it does not matter that both are written */
1822: or r6,r0,r0
1823:
1824:
1825: /* Write a negative zero of the correct precision. */
1826:
1827: negwrzero:
1828: or r6,r0,r0 /* clear low word */
1829: bb1 dsize,r9,negwrzerod /* branch to write double precision zero */
1830: br.n inf_return
1831: set r6,r6,1<31> /* set sign bit */
1832: negwrzerod:
1833: or r5,r0,r0 /* clear high word */
1834: br.n inf_return
1835: set r5,r5,1<31> /* set sign bit */
1836:
1837: FP_inf_overflw:
1838: set r2,r2,1<oper>
1839: set r2,r2,1<overflow>
1840: set r2,r2,1<inexact>
1841:
1842: bb0.n sign,r7,inf_return /* if positive then return */
1843:
1844: set r6,r6,31<0> /* set result to largest positive integer */
1845: or.c r6,r0,r6 /* negate r6,giving largest negative int. */
1846:
1847: inf_return:
1848: ld r1,r31,0 /* load return address */
1849: jmp r1
1850:
1851: /*
1852: * denorm
1853: */
1854:
1855: /* Check to see if either S1 or S2 is a denormalized number. First */
1856: /* extract the exponent to see if it is zero, and then check to see if */
1857: /* the mantissa is not zero. If the number is denormalized, then set the */
1858: /* 1 or 0 bit 10 r12. */
1859:
1860: ASLOCAL(denorm)
1861: st r1,r31,0 /* save return address */
1862: dnmcheckS1:
1863: extu r10,r5,11<20> /* extract exponent */
1864: bcnd ne0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
1865: bb1.n 9,r9,dnmcheckS1d /* S1 is double precision */
1866: mak r10,r5,20<3> /* mak field with only mantissa bits */
1867: /* into final result */
1868: dnmcheckS1s:
1869: extu r11,r6,3<29> /* get three low bits of mantissa */
1870: or r10,r10,r11 /* assemble all of the mantissa bits */
1871: bcnd eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
1872: br dnmsetS1 /* S1 is a denorm */
1873:
1874: dnmcheckS1d:
1875: or r10,r6,r10 /* or all of mantissa bits */
1876: bcnd eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
1877: dnmsetS1:
1878: set r12,r12,1<1> /* S1 is a denorm */
1879:
1880: dnmcheckS2:
1881: extu r10,r7,11<20> /* extract exponent */
1882: bcnd ne0,r10,S1form /* S2 is not a denorm */
1883: bb1.n 7,r9,dnmcheckS2d /* S2 is double precision */
1884: mak r10,r7,20<3> /* mak field with only mantissa bits */
1885: dnmcheckS2s:
1886: extu r11,r8,3<29> /* get three low bits of mantissa */
1887: or r10,r10,r11 /* assemble all of the mantissa bits */
1888: bcnd eq0,r10,S1form /* S2 is not a denorm */
1889: br dnmsetS2 /* S1 is a denorm */
1890: dnmcheckS2d:
1891: or r10,r8,r10 /* or all or mantissa bits */
1892: bcnd eq0,r10,S1form /* S2 is not a denorm */
1893: dnmsetS2:
1894: set r12,r12,1<0> /* S2 is a denorm */
1895:
1896:
1897: /* Since the operations are going to be reperformed with modified denorms, */
1898: /* the operands which were initially single precision need to be modified */
1899: /* back to single precision. */
1900:
1901: S1form:
1902: bb1 9,r9,S2form /* S1 is double precision, so do not */
1903: /* modify S1 into single format */
1904: mak r11,r5,28<3> /* over final exponent and mantissa */
1905: /* eliminating extra 3 bits of exponent */
1906: extu r6,r6,3<29> /* get low 3 bits of mantissa */
1907: or r11,r6,r11 /* form complete mantissa and exponent */
1908: extu r10,r5,1<31> /* get the 31 bit */
1909: mak r10,r10,1<31> /* place 31 bit 10 correct position */
1910: or r6,r10,r11 /* or 31, exponent, and all of mantissa */
1911:
1912: S2form:
1913: bb1 7,r9,checkop /* S2 is double precision, so do not */
1914: /* modify S2 into single format */
1915: mak r11,r7,28<3> /* over final exponent and mantissa */
1916: /* eliminating extra 3 bits of exponent */
1917: extu r8,r8,3<29> /* get low 3 bits of mantissa */
1918: or r11,r8,r11 /* form complete mantissa and exponent */
1919: extu r10,r7,1<31> /* get the 31 bit */
1920: mak r10,r10,1<31> /* place 31 bit 10 correct position */
1921: or r8,r10,r11 /* or 31, exponent, and all of mantissa */
1922:
1923:
1924: /* Extract the opcode, compare to a constant, and branch to the code that */
1925: /* deals with that opcode. */
1926:
1927: checkop:
1928: extu r10,r9,5<11> /* extract opcode */
1929: cmp r11,r10,0x05 /* compare to FADD */
1930: bb1 2,r11,denorm_FADD /* operation is FADD */
1931: cmp r11,r10,0x06 /* compare to FSUB */
1932: bb1 2,r11,denorm_FSUB /* operation is FSUB */
1933: cmp r11,r10,0x07 /* compare to FCMP */
1934: bb1 2,r11,denorm_FCMP /* operation is FCMP */
1935: cmp r11,r10,0x00 /* compare to FMUL */
1936: bb1 2,r11,denorm_FMUL /* operation is FMUL */
1937: cmp r11,r10,0x0e /* compare to FDIV */
1938: bb1 2,r11,denorm_FDIV /* operation is FDIV */
1939: #if 0
1940: cmp r11,r10,0x0f /* compare to FSQRT */
1941: bb1 2,r11,denorm_FSQRT /* operation is FSQRT */
1942: #endif
1943: cmp r11,r10,0x09 /* compare to INT */
1944: bb1 2,r11,denorm_INT /* operation is INT */
1945: cmp r11,r10,0x0a /* compare to NINT */
1946: bb1 2,r11,denorm_NINT /* operation is NINT */
1947: cmp r11,r10,0x0b /* compare to TRNC */
1948: bb1 2,r11,denorm_TRNC /* operation is TRNC */
1949:
1950:
1951: /* For all the following operations, the denormalized number is set to */
1952: /* zero and the operation is reperformed the correct destination and source */
1953: /* sizes. */
1954:
1955: denorm_FADD:
1956: bb0 1,r12,FADDS2dnm /* S1 is not denorm, so S2 must be */
1957: or r5,r0,r0 /* set S1 to zero */
1958: or r6,r0,r0
1959: FADDS2chk:
1960: bb0 0,r12,FADDcalc /* S2 is not a denorm */
1961: FADDS2dnm:
1962: or r7,r0,r0 /* set S2 to zero */
1963: or r8,r0,r0
1964: FADDcalc:
1965: bb1 5,r9,FADDdD /* branch for double precision destination */
1966: FADDsD:
1967: bb1 9,r9,FADDsDdS1 /* branch for double precision S1 */
1968: FADDsDsS1:
1969: bb1 7,r9,FADDsDsS1dS2 /* branch for double precision S2 */
1970: FADDsDsS1sS2:
1971: br.n denorm_return
1972: fadd.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
1973: FADDsDsS1dS2:
1974: br.n denorm_return
1975: fadd.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
1976: FADDsDdS1:
1977: bb1 7,r9,FADDsDdS1dS2 /* branch for double precision S2 */
1978: FADDsDdS1sS2:
1979: br.n denorm_return
1980: fadd.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
1981: FADDsDdS1dS2:
1982: br.n denorm_return
1983: fadd.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
1984: FADDdD:
1985: bb1 9,r9,FADDdDdS1 /* branch for double precision S1 */
1986: FADDdDsS1:
1987: bb1 7,r9,FADDdDsS1dS2 /* branch for double precision S2 */
1988: FADDdDsS1sS2:
1989: br.n denorm_return
1990: fadd.dss r5,r6,r8 /* add the two sources and place result 10 S1 */
1991: FADDdDsS1dS2:
1992: br.n denorm_return
1993: fadd.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */
1994: FADDdDdS1:
1995: bb1 7,r9,FADDdDdS1dS2 /* branch for double precision S2 */
1996: FADDdDdS1sS2:
1997: br.n denorm_return
1998: fadd.dds r5,r5,r8 /* add the two sources and place result 10 S1 */
1999: FADDdDdS1dS2:
2000: br.n denorm_return
2001: fadd.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */
2002:
2003: denorm_FSUB:
2004: bb0 1,r12,FSUBS2dnm /* S1 is not denorm, so S2 must be */
2005: or r5,r0,r0 /* set S1 to zero */
2006: or r6,r0,r0
2007: FSUBS2chk:
2008: bb0 0,r12,FSUBcalc /* S2 is not a denorm */
2009: FSUBS2dnm:
2010: or r7,r0,r0 /* set S2 to zero */
2011: or r8,r0,r0
2012: FSUBcalc:
2013: bb1 5,r9,FSUBdD /* branch for double precision destination */
2014: FSUBsD:
2015: bb1 9,r9,FSUBsDdS1 /* branch for double precision S1 */
2016: FSUBsDsS1:
2017: bb1 7,r9,FSUBsDsS1dS2 /* branch for double precision S2 */
2018: FSUBsDsS1sS2:
2019: br.n denorm_return
2020: fsub.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
2021: FSUBsDsS1dS2:
2022: br.n denorm_return
2023: fsub.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
2024: FSUBsDdS1:
2025: bb1 7,r9,FSUBsDdS1dS2 /* branch for double precision S2 */
2026: FSUBsDdS1sS2:
2027: br.n denorm_return
2028: fsub.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
2029: FSUBsDdS1dS2:
2030: br.n denorm_return
2031: fsub.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
2032: FSUBdD:
2033: bb1 9,r9,FSUBdDdS1 /* branch for double precision S1 */
2034: FSUBdDsS1:
2035: bb1 7,r9,FSUBdDsS1dS2 /* branch for double precision S2 */
2036: FSUBdDsS1sS2:
2037: br.n denorm_return
2038: fsub.dss r5,r6,r8 /* add the two sources and place result 10 S1 */
2039: FSUBdDsS1dS2:
2040: br.n denorm_return
2041: fsub.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */
2042: FSUBdDdS1:
2043: bb1 7,r9,FSUBdDdS1dS2 /* branch for double precision S2 */
2044: FSUBdDdS1sS2:
2045: br.n denorm_return
2046: fsub.dds r5,r5,r8 /* add the two sources and place result 10 S1 */
2047: FSUBdDdS1dS2:
2048: br.n denorm_return
2049: fsub.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */
2050:
2051: denorm_FCMP:
2052: bb0 1,r12,FCMPS2dnm /* S1 is not denorm, so S2 must be */
2053: or r5,r0,r0 /* set S1 to zero */
2054: or r6,r0,r0
2055: FCMPS2chk:
2056: bb0 0,r12,FCMPcalc /* S2 is not a denorm */
2057: FCMPS2dnm:
2058: or r7,r0,r0 /* set S2 to zero */
2059: or r8,r0,r0
2060: FCMPcalc:
2061: bb1 9,r9,FCMPdS1 /* branch for double precision S1 */
2062: FCMPsS1:
2063: bb1 7,r9,FCMPsS1dS2 /* branch for double precision S2 */
2064: FCMPsS1sS2:
2065: br.n denorm_return
2066: fcmp.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
2067: FCMPsS1dS2:
2068: br.n denorm_return
2069: fcmp.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
2070: FCMPdS1:
2071: bb1 7,r9,FCMPdS1dS2 /* branch for double precision S2 */
2072: FCMPdS1sS2:
2073: br.n denorm_return
2074: fcmp.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
2075: FCMPdS1dS2:
2076: br.n denorm_return
2077: fcmp.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
2078:
2079: denorm_FMUL:
2080: bb0 1,r12,FMULS2dnm /* S1 is not denorm, so S2 must be */
2081: or r5,r0,r0 /* set S1 to zero */
2082: or r6,r0,r0
2083: FMULS2chk:
2084: bb0 0,r12,FMULcalc /* S2 is not a denorm */
2085: FMULS2dnm:
2086: or r7,r0,r0 /* set S2 to zero */
2087: or r8,r0,r0
2088: FMULcalc:
2089: bb1 5,r9,FMULdD /* branch for double precision destination */
2090: FMULsD:
2091: bb1 9,r9,FMULsDdS1 /* branch for double precision S1 */
2092: FMULsDsS1:
2093: bb1 7,r9,FMULsDsS1dS2 /* branch for double precision S2 */
2094: FMULsDsS1sS2:
2095: br.n denorm_return
2096: fmul.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
2097: FMULsDsS1dS2:
2098: br.n denorm_return
2099: fmul.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
2100: FMULsDdS1:
2101: bb1 7,r9,FMULsDdS1dS2 /* branch for double precision S2 */
2102: FMULsDdS1sS2:
2103: br.n denorm_return
2104: fmul.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
2105: FMULsDdS1dS2:
2106: br.n denorm_return
2107: fmul.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
2108: FMULdD:
2109: bb1 9,r9,FMULdDdS1 /* branch for double precision S1 */
2110: FMULdDsS1:
2111: bb1 7,r9,FMULdDsS1dS2 /* branch for double precision S2 */
2112: FMULdDsS1sS2:
2113: br.n denorm_return
2114: fmul.dss r5,r6,r8 /* add the two sources and place result 10 S1 */
2115: FMULdDsS1dS2:
2116: br.n denorm_return
2117: fmul.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */
2118: FMULdDdS1:
2119: bb1 7,r9,FMULdDdS1dS2 /* branch for double precision S2 */
2120: FMULdDdS1sS2:
2121: br.n denorm_return
2122: fmul.dds r5,r5,r8 /* add the two sources and place result 10 S1 */
2123: FMULdDdS1dS2:
2124: br.n denorm_return
2125: fmul.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */
2126:
2127: denorm_FDIV:
2128: bb0 1,r12,FDIVS2dnm /* S1 is not denorm, so S2 must be */
2129: or r5,r0,r0 /* set S1 to zero */
2130: or r6,r0,r0
2131: FDIVS2chk:
2132: bb0 0,r12,FDIVcalc /* S2 is not a denorm */
2133: FDIVS2dnm:
2134: or r7,r0,r0 /* set S2 to zero */
2135: or r8,r0,r0
2136: FDIVcalc:
2137: bb1 5,r9,FDIVdD /* branch for double precision destination */
2138: FDIVsD:
2139: bb1 9,r9,FDIVsDdS1 /* branch for double precision S1 */
2140: FDIVsDsS1:
2141: bb1 7,r9,FDIVsDsS1dS2 /* branch for double precision S2 */
2142: FDIVsDsS1sS2:
2143: fdiv.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
2144: br denorm_return
2145: FDIVsDsS1dS2:
2146: fdiv.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
2147: br denorm_return
2148: FDIVsDdS1:
2149: bb1 7,r9,FDIVsDdS1dS2 /* branch for double precision S2 */
2150: FDIVsDdS1sS2:
2151: fdiv.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
2152: br denorm_return
2153: FDIVsDdS1dS2:
2154: fdiv.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
2155: br denorm_return
2156: FDIVdD:
2157: bb1 9,r9,FDIVdDdS1 /* branch for double precision S1 */
2158: FDIVdDsS1:
2159: bb1 7,r9,FDIVdDsS1dS2 /* branch for double precision S2 */
2160: FDIVdDsS1sS2:
2161: fdiv.dss r5,r6,r8 /* add the two sources and place result 10 S1 */
2162: br denorm_return
2163: FDIVdDsS1dS2:
2164: fdiv.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */
2165: br denorm_return
2166: FDIVdDdS1:
2167: bb1 7,r9,FDIVdDdS1dS2 /* branch for double precision S2 */
2168: FDIVdDdS1sS2:
2169: fdiv.dds r5,r5,r8 /* add the two sources and place result 10 S1 */
2170: br denorm_return
2171: FDIVdDdS1dS2:
2172: fdiv.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */
2173: br denorm_return
2174:
2175: #if 0
2176: denorm_FSQRT:
2177: or r7,r0,r0 /* set S2 to zero */
2178: or r8,r0,r0
2179: FSQRTcalc:
2180: bb1 5,r9,FSQRTdD /* branch for double precision destination */
2181: FSQRTsD:
2182: bb1 7,r9,FSQRTsDdS2 /* branch for double precision S2 */
2183: FSQRTsDsS2:
2184: br.n denorm_return
2185: fsqrt.ss r6,r8 /* add the two sources and place result 10 S1 */
2186: FSQRTsDdS2:
2187: br.n denorm_return
2188: fsqrt.sd r6,r7 /* add the two sources and place result 10 S1 */
2189: FSQRTdD:
2190: bb1 7,r9,FSQRTdDdS2 /* branch for double precision S2 */
2191: FSQRTdDsS2:
2192: br.n denorm_return
2193: fsqrt.ds r5,r8 /* add the two sources and place result 10 S1 */
2194: FSQRTdDdS2:
2195: br.n denorm_return
2196: fsqrt.dd r5,r7 /* add the two sources and place result 10 S1 */
2197: #endif
2198:
2199: denorm_INT:
2200: or r7,r0,r0 /* set S2 to zero */
2201: or r8,r0,r0
2202: INTcalc:
2203: bb1 7,r9,INTdS2 /* branch for double precision S2 */
2204: INTsS2:
2205: br.n denorm_return
2206: int.ss r6,r8 /* add the two sources and place result 10 S1 */
2207: INTdS2:
2208: br.n denorm_return
2209: int.sd r6,r7 /* add the two sources and place result 10 S1 */
2210:
2211: denorm_NINT:
2212: or r7,r0,r0 /* set S2 to zero */
2213: or r8,r0,r0
2214: NINTcalc:
2215: bb1 7,r9,NINTdS2 /* branch for double precision S2 */
2216: NINTsS2:
2217: br.n denorm_return
2218: nint.ss r6,r8 /* add the two sources and place result 10 S1 */
2219: NINTdS2:
2220: br.n denorm_return
2221: nint.sd r6,r7 /* add the two sources and place result 10 S1 */
2222:
2223: denorm_TRNC:
2224: or r7,r0,r0 /* set S2 to zero */
2225: or r8,r0,r0
2226: TRNCcalc:
2227: bb1 7,r9,TRNCdS2 /* branch for double precision S2 */
2228: TRNCsS2:
2229: br.n denorm_return
2230: trnc.ss r6,r8 /* add the two sources and place result 10 S1 */
2231: TRNCdS2:
2232: trnc.sd r6,r7 /* add the two sources and place result 10 S1 */
2233:
2234:
2235: /* Return to the routine that detected the reserved operand. */
2236:
2237: denorm_return:
2238: ld r1,r31,0 /* load return address */
2239: jmp r1
2240:
2241: /* S1 and/or S2 is an infinity, and the other operand may be a zero. */
2242: /* Knowing which operands are infinity, check the remaining operands for zeros. */
2243:
2244: ASLOCAL(zero)
2245: bb0 s1inf,r12,S1noinf /* see if S1 is zero */
2246: bb0 s2inf,r12,S2noinf /* see if S2 is zero */
2247: jmp r1
2248:
2249: /* See if S1 is zero. Whether or not S1 is a zero, being in this routine */
2250: /* implies that S2 is infinity, so return to subroutine infinity after */
2251: /* completing this code. Set the s1zero flag in r12 if S1 is zero. */
2252:
2253: S1noinf:
2254: bb1 s1size,r9,S1noinfd /* work with double precision operand */
2255: S1noinfs:
2256: or r10,r0,r5 /* load high word into r10 */
2257: clr r10,r10,1<sign> /* clear the sign bit */
2258: extu r11,r6,3<29> /* extract lower 3 bits of mantissa */
2259: or r10,r10,r11 /* or these 3 bits with high word */
2260: bcnd ne0,r10,operation /* do not set zero flag */
2261: jmp.n r1 /* since this operand was not */
2262: /* infinity, S2 must have been, */
2263: /* so return */
2264: set r12,r12,1<s1zero> /* set zeroflag */
2265: S1noinfd:
2266: clr r10,r5,1<sign> /* clear the sign bit */
2267: or r10,r6,r10 /* or high and low word */
2268: bcnd ne0,r10,operation /* do not set zero flag */
2269: jmp.n r1 /* since this operand was not */
2270: /* infinity, S2 must have been, */
2271: /* so return */
2272: set r12,r12,1<s1zero> /* set zeroflag */
2273:
2274:
2275: /* Check S2 for zero. If it is zero, then set the s2zero flag in r12. */
2276:
2277: S2noinf:
2278: bb1 s2size,r9,S2noinfd /* work with double precision operand */
2279: S2noinfs:
2280: or r10,r0,r7 /* load high word into r10 */
2281: clr r10,r10,1<sign> /* clear the sign bit */
2282: extu r11,r8,3<29> /* extract lower 3 bits of mantissa */
2283: or r10,r10,r11 /* or these 3 bits with high word */
2284: bcnd ne0,r10,operation /* do not set zero flag */
2285: jmp.n r1 /* since this operand was not */
2286: /* infinity, S1 must have been, */
2287: /* so return */
2288: set r12,r12,1<s2zero> /* set zeroflag */
2289: S2noinfd:
2290: clr r10,r7,1<sign> /* clear the sign bit */
2291: or r10,r8,r10 /* or high and low word */
2292: bcnd ne0,r10,operation /* do not set zero flag */
2293: set r12,r12,1<s2zero> /* set zeroflag */
2294: /* since this operand was not */
2295: /* infinity, S1 must have been, */
2296: /* so return */
2297: operation:
2298: jmp r1
2299:
2300: ASENTRY(Xfp_imprecise)
2301: /* input: r3 is the exception frame */
2302: or r29, r3, r0 /* r29 is now the E.F. */
2303: subu r31, r31, 16
2304: st r1, r31, 4
2305: st r29, r31, 8
2306:
2307: ld r2 , r29, EF_FPSR * 4
2308: ld r3 , r29, EF_FPCR * 4
2309: ld r4 , r29, EF_FPECR * 4
2310: ld r10, r29, EF_FPRH * 4
2311: ld r11, r29, EF_FPRL * 4
2312: ld r12, r29, EF_FPIT * 4
2313:
2314: /* Load into r1 the return address for the exception handlers. Looking */
2315: /* at FPECR, branch to the appropriate exception handler. */
2316:
2317: or.u r1,r0,hi16(fpui_wrapup)/* load return address of functions */
2318: or r1,r1,lo16(fpui_wrapup)
2319:
2320: bb0 2,r4,2f /* branch to FPunderflow if bit set */
2321: br _ASM_LABEL(FPunderflow)
2322: 2:
2323: bb0 1,r4,3f /* branch to FPoverflow if bit set */
2324: br _ASM_LABEL(FPoverflow)
2325: 3:
2326: /* XXX handle inexact!!! */
2327:
2328: fpui_wrapup:
2329: tb1 0,r0,0 /* make sure all floating point operations */
2330: /* have finished */
2331: ldcr r4, cr1 /* load the PSR */
2332: #if 0
2333: set r4, r4, 1<PSR_FPU_DISABLE_BIT>
2334: #endif
2335: set r4, r4, 1<PSR_INTERRUPT_DISABLE_BIT>
2336: stcr r4, cr1
2337: ld r1, r31, 4
2338: ld r29,r31, 8
2339: addu r31, r31, 16
2340:
2341: fstcr r2, FPSR /* write revised value of FPSR */
2342: fstcr r3, FPCR /* write revised value of FPCR */
2343:
2344: /* write back the results */
2345: extu r2, r12, 5<0>
2346: bb0.n destsize, r12, Iwritesingle
2347: addu r3, r29, EF_R0 * 4
2348: st r10, r3 [r2]
2349: addu r2, r2, 1
2350: clr r2, r2, 27<5>
2351: Iwritesingle:
2352: jmp.n r1
2353: st r11, r3 [r2]
CVSweb