Annotation of sys/arch/m88k/m88k/m88100_fp.S, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: m88100_fp.S,v 1.4 2004/08/09 20:52:11 miod Exp $ */
! 2: /*
! 3: * Mach Operating System
! 4: * Copyright (c) 1991 Carnegie Mellon University
! 5: * Copyright (c) 1991 OMRON Corporation
! 6: * All Rights Reserved.
! 7: *
! 8: * Permission to use, copy, modify and distribute this software and its
! 9: * documentation is hereby granted, provided that both the copyright
! 10: * notice and this permission notice appear in all copies of the
! 11: * software, derivative works or modified versions, and any portions
! 12: * thereof, and that both notices appear in supporting documentation.
! 13: *
! 14: * CARNEGIE MELLON AND OMRON ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS IS"
! 15: * CONDITION. CARNEGIE MELLON AND OMRON DISCLAIM ANY LIABILITY OF ANY KIND
! 16: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
! 17: *
! 18: * Carnegie Mellon requests users of this software to return to
! 19: *
! 20: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
! 21: * School of Computer Science
! 22: * Carnegie Mellon University
! 23: * Pittsburgh PA 15213-3890
! 24: *
! 25: * any improvements or extensions that they make and grant Carnegie the
! 26: * rights to redistribute these changes.
! 27: */
! 28:
! 29: /* Floating point trouble routines */
! 30: #include "assym.h"
! 31: #include <machine/trap.h>
! 32: #include <machine/asm.h>
! 33:
! 34: #define destsize 10
! 35:
! 36: /* Floating-Point Status Register bits */
! 37: #define inexact 0
! 38: #define overflow 1
! 39: #define underflow 2
! 40: #define divzero 3
! 41: #define oper 4
! 42:
! 43: #define sign 31
! 44: #define s1size 9
! 45: #define s2size 7
! 46: #define dsize 5
! 47:
! 48: #define FADDop 0x05
! 49: #define FSUBop 0x06
! 50: #define FCMPop 0x07
! 51: #define FMULop 0x00
! 52: #define FDIVop 0x0e
! 53: #define FSQRTop 0x0f
! 54: #define INTop 0x09
! 55: #define NINTop 0x0a
! 56: #define TRNCop 0x0b
! 57:
! 58: #define s1nan 7
! 59: #define s2nan 6
! 60: #define s1inf 5
! 61: #define s2inf 4
! 62: #define s1zero 3
! 63: #define s2zero 2
! 64: #define sigbit 19
! 65:
! 66: #define modehi 30
! 67: #define modelo 29
! 68: #define rndhi 15
! 69: #define rndlo 14
! 70: #define efunf 7
! 71: #define efovf 6
! 72: #define efinx 5
! 73:
! 74: ASENTRY(m88100_Xfp_precise)
! 75: or r29, r3, r0 /* r29 is now the E.F. */
! 76: subu r31, r31, 16
! 77: st r1, r31, 8
! 78: st r29, r31, 12
! 79:
! 80: ld r2, r29, EF_FPSR * 4
! 81: ld r3, r29, EF_FPCR * 4
! 82: ld r4, r29, EF_FPECR * 4
! 83: ld r5, r29, EF_FPHS1 * 4
! 84: ld r6, r29, EF_FPLS1 * 4
! 85: ld r7, r29, EF_FPHS2 * 4
! 86: ld r8, r29, EF_FPLS2 * 4
! 87: ld r9, r29, EF_FPPT * 4
! 88:
! 89:
! 90: /*
! 91: * Load into r1 the return address for the zero handlers. Looking at
! 92: * FPECR, branch to the appropriate zero handler. However, if none of
! 93: * the zero bits are enabled, then a floating point instruction was
! 94: * issued with the floating point unit disabled. This will cause an
! 95: * unimplemented opcode 0.
! 96: */
! 97:
! 98: or.u r1,r0,hi16(wrapup) /* load return address of function */
! 99: or r1,r1,lo16(wrapup)
! 100:
! 101: bb0 6,r4, 3f /* branch to FPunimp if bit set */
! 102: br FPuimp
! 103: 3:
! 104: bb0 7,r4, 4f /* branch to FPintover if bit set */
! 105: br FPintover
! 106: 4:
! 107: #if 0
! 108: bb0 5,r4, 5f /* branch to FPpriviol if bit set */
! 109: br FPpriviol
! 110: #endif
! 111: 5:
! 112: bb0 4,r4, 6f /* branch to FPresoper if bit set */
! 113: br FPresoper
! 114: 6:
! 115: bb0 3,r4, 7f /* branch to FPdivzero if bit set */
! 116: br FPdivzero
! 117: 7:
! 118: or.u r4, r4, 0xffff
! 119:
! 120: ASLOCAL(FPuimp)
! 121: subu r31,r31,16 /* allocate stack */
! 122: st r1,r31,0 /* save return address */
! 123: or r2,r0,T_FPEPFLT /* load trap type */
! 124: bsr.n _C_LABEL(m88100_trap)
! 125: or r3, r29, r0
! 126: ld r1,r31,0 /* recover return address */
! 127: addu r31,r31,16 /* deallocate stack */
! 128: jmp r1
! 129:
! 130: /*
! 131: * To write back the results to the user registers, disable exceptions
! 132: * and the floating point unit. Write FPSR and FPCR and load the SNIP
! 133: * and SFIP.
! 134: * r5 will contain the upper word of the result
! 135: * r6 will contain the lower word of the result
! 136: */
! 137:
! 138: ASLOCAL(wrapup)
! 139: tb1 0,r0,0 /* make sure all floating point operations */
! 140: /* have finished */
! 141: ldcr r10, cr1 /* load the PSR */
! 142: #if 0
! 143: set r10, r10, 1<PSR_FPU_DISABLE_BIT>
! 144: #endif
! 145: set r10, r10, 1<PSR_INTERRUPT_DISABLE_BIT>
! 146: stcr r10, cr1
! 147:
! 148: ld r1, r31, 8
! 149: ld r29, r31, 12
! 150: addu r31, r31, 16
! 151:
! 152: fstcr r2, FPSR /* write revised value of FPSR */
! 153: fstcr r3, FPCR /* write revised value of FPCR */
! 154:
! 155: /* result writeback routine */
! 156: addu r3, r29, EF_R0 * 4
! 157: extu r2, r9, 5<0> /* get 5 bits of destination register */
! 158: bb0 5, r9, writesingle /* branch if destination is single */
! 159:
! 160: /* writedouble here */
! 161: st r5, r3 [r2] /* write high word */
! 162: add r2, r2, 1 /* for double, the low word is the */
! 163: /* unspecified register */
! 164: clr r2, r2, 27<5> /* perform equivalent of mod 32 */
! 165: ASLOCAL(writesingle)
! 166: jmp.n r1
! 167: st r6, r3 [r2] /* write low word into memory */
! 168:
! 169: /*
! 170: * Check if the numerator is zero. If the numerator is zero, then handle
! 171: * this instruction as you would a 0/0 invalid operation.
! 172: */
! 173:
! 174: ASLOCAL(FPdivzero)
! 175: bb1.n s1size,r9,1f /* branch if numerator double */
! 176: st r1,r31,0 /* save return address */
! 177: /* single number */
! 178: clr r10,r5,1<sign> /* clear sign bit */
! 179: extu r11,r6,3<29> /* grab upper bits of lower word */
! 180: or r10,r10,r11 /* combine ones of mantissa */
! 181: bcnd eq0,r10,resoper /* numerator is zero, handle reserved operand */
! 182: br setbit /* set divzero bit */
! 183: 1:
! 184: /* double number */
! 185: clr r10,r5,1<sign> /* clear sign bit */
! 186: or r10,r10,r6 /* or high and low words */
! 187: bcnd ne0,r10,setbit /* set divzero bit */
! 188:
! 189: /*
! 190: * The numerator is zero, so handle the invalid operation by setting the
! 191: * invalid operation bit and writing a quiet NaN to the destination.
! 192: */
! 193:
! 194: ASLOCAL(resoper)
! 195: set r2,r2,1<oper>
! 196: set r5,r0,0<0> /* put a NaN in high word */
! 197: set r6,r0,0<0> /* put a NaN in low word */
! 198: br FP_div_return
! 199: /* writing to a word which may be ignored */
! 200: /* is just as quick as checking the precision */
! 201: /* of the destination */
! 202:
! 203: /*
! 204: * The operation is divide by zero, so set the divide by zero bit in the
! 205: * FPSR.
! 206: * Considering the sign of the numerator and zero, write a correctly
! 207: * signed infinity of the proper precision into the destination.
! 208: */
! 209:
! 210: setbit:
! 211: set r2,r2,1<divzero>
! 212: bb1 dsize,r9,FPzero_double /* branch to handle double result */
! 213: FPzero_single:
! 214: clr r10,r5,31<0> /* clear all of S1HI except sign bit */
! 215: xor r10,r7,r10 /* xor the sign bits of the operands */
! 216: or.u r6,r0,0x7f80 /* load single precision infinity */
! 217: br.n FP_div_return
! 218: or r6,r6,r10 /* load correctly signed infinity */
! 219:
! 220: FPzero_double:
! 221: clr r10,r5,31<0> /* clear all of S1HI except sign bit */
! 222: xor r10,r7,r10 /* xor the sign bits of the operands */
! 223: or.u r5,r0,0x7ff0 /* load double precision infinity */
! 224: or r5,r5,r10 /* load correctly signed infinity */
! 225: or r6,r0,r0 /* clear lower word of double */
! 226:
! 227: FP_div_return:
! 228: ld r1,r31,0 /* load return address */
! 229: jmp r1
! 230:
! 231: /*
! 232: * Both NINT and TRNC require a certain rounding mode, so check which
! 233: * instruction caused the integer conversion overflow. Use a substitute
! 234: * FPCR in r1, and modify the rounding mode if the instruction is NINT
! 235: * or TRNC.
! 236: */
! 237: ASLOCAL(FPintover)
! 238: extu r10,r9,5<11> /* extract opcode */
! 239: cmp r11,r10,INTop /* see if instruction is INT */
! 240: st r1,r31,0 /* save return address */
! 241: bb1.n eq,r11,checksize /* instruction is INT, do not modify */
! 242: /* rounding mode */
! 243: or r1,r0,r3 /* load FPCR into r1 */
! 244: cmp r11,r10,NINTop /* see if instruction is NINT */
! 245: bb1 eq,r11,NINT /* instruction is NINT */
! 246: TRNC:
! 247: clr r1,r1,2<rndlo> /* clear rounding mode bits, */
! 248: /* instruction is TRNC */
! 249: br.n checksize /* branch to check size */
! 250: set r1,r1,1<rndlo> /* make rounding mode round towards */
! 251: /* zero */
! 252: NINT:
! 253: clr r1,r1,2<rndlo> /* make rounding mode round to */
! 254: /* nearest */
! 255:
! 256: /* See whether the source is single or double precision. */
! 257:
! 258: checksize:
! 259: bb1 s2size,r9,checkdoub /* S2 is double, branch to see if */
! 260: /* there is a false alarm */
! 261:
! 262: /*
! 263: * An integer has more bits than the mantissa of a single precision floating
! 264: * point number, so to check for false alarms (i.e. valid conversion), simply
! 265: * check the exponents. False alarms are detected for 2**30 to (2**30) - 1
! 266: * and -2**30 to -2**31. Only seven bits need to be looked at since an
! 267: * exception will not occur for the other half of the numbering system.
! 268: * To speed up the processing, first check to see if the exponent is 32 or
! 269: * greater.
! 270: *
! 271: * This code was originally written for the exponent in the control
! 272: * register to have the most significant bit (8 - single, 11 - double)
! 273: * flipped and sign extended. For precise exceptions, however, the most
! 274: * significant bit is only sign extended. Therefore, the code was chopped
! 275: * up so that it would work for positive values of real exponent which were
! 276: * only sign extended.
! 277: */
! 278:
! 279: checksing:
! 280: extu r10,r7,7<20> /* internal representation for single */
! 281: /* precision is IEEE 8 bits sign extended */
! 282: /* to 11 bits; for real exp. = 30, the */
! 283: /* above instruction gives a result exp. */
! 284: /* that has the MSB flipped and sign */
! 285: /* extended like in the IMPCR */
! 286: cmp r11,r10,31 /* compare to 32,but exp. off by 1 */
! 287: /* these 2 instructions to speed up valid */
! 288: /* execution of valid cases */
! 289: bb1 ge,r11,overflw /* valid case, perform overflow routine */
! 290: bb1 sign,r7,checksingn /* source operand is negative */
! 291:
! 292: /*
! 293: * If the number is positve and the exponent is greater than 30, than it is
! 294: * overflow.
! 295: */
! 296: checksingp:
! 297: cmp r10,r10,29 /* compare to 30, but exp. off by 1 */
! 298: bb1 gt,r10,overflw /* no false alarm, its overflow */
! 299: br conversionsp /* finish single precision conversion */
! 300:
! 301: /*
! 302: * If the number is negative, and the exponent is 30, or 31 with a mantissa
! 303: * of 0, then it is a false alarm.
! 304: */
! 305: checksingn:
! 306: cmp r11,r10,30 /* compare to 31,but exp. off by 1 */
! 307: bb1 lt,r11,conversionsn /* exp. less than 31, so convert */
! 308: extu r10,r8,3<29> /* get upper three bits of lower */
! 309: /* mantissa */
! 310: mak r12,r7,20<3> /* get upper 20 bits of mantissa */
! 311: or r10,r10,r12 /* form complete mantissa */
! 312: bcnd eq0,r10,conversionsn /* complete conversion if mantissa */
! 313: /* is 0 */
! 314: br overflw /* no false alarm, its overflow */
! 315:
! 316: /*
! 317: * False alarms are detected for 2**30 to (2**30) - 1 and -2**30 to -2**31.
! 318: * Only seven bits need to be looked at since an exception will not occur
! 319: * for the other half of the numbering system.
! 320: * To speed up the processing, first check to see if the exponent is 32 or
! 321: * greater. Since there are more mantissa bits than integer bits, rounding
! 322: * could cause overflow. (2**31) - 1 needs to be checked so that it does
! 323: * not round to 2**31, and -2**31 needs to be checked in case it rounds to
! 324: * -((2**31) + 1).
! 325: */
! 326: checkdoub:
! 327: extu r10,r7,10<20> /* internal representation for double */
! 328: /* precision is the same IEEE 11 bits */
! 329: /* for real exp. = 30, the */
! 330: /* above instruction gives a result exp. */
! 331: /* that has the MSB flipped and sign */
! 332: /* extended like in the IMPCR */
! 333: cmp r11,r10,31 /* compare to 32,but exp. off by 1 */
! 334: /* these 2 instructions to speed up valid */
! 335: /* execution of valid cases */
! 336: bb1 ge,r11,overflw /* valid case, perform overflow routine */
! 337: bb1 sign,r7,checkdoubn /* source operand is negative */
! 338:
! 339: /*
! 340: * If the exponent is not 31, then the floating point number will be rounded
! 341: * before the conversion is done. A branch table is set up with bits 4 and 3
! 342: * being the rounding mode, and bits 2, 1, and 0 are the guard, round, and
! 343: * sticky bits.
! 344: */
! 345: checkdoubp:
! 346: cmp r11,r10,30 /* compare to 31, but exponent off by 1 */
! 347: bb1 eq,r11,overflw /* no false alarm, its overflow */
! 348: extu r12,r8,1<22> /* get LSB for integer with exp. = 30 */
! 349: mak r12,r12,1<2> /* start to set up field for branch table */
! 350: extu r11,r8,1<21> /* get guard bit */
! 351: mak r11,r11,1<1> /* set up field for branch table */
! 352: or r12,r11,r12 /* set up field for branch table */
! 353: extu r11,r8,21<0> /* get bits for sticky bit */
! 354: bcnd eq0,r11,nostickyp /* do not set sticky */
! 355: set r12,r12,1<0> /* set sticky bit */
! 356: nostickyp:
! 357: rot r11,r1,0<rndlo> /* shift rounding mode to 2 LSB''s */
! 358: mak r11,r11,2<3> /* set up field, clear other bits */
! 359: or r12,r11,r12 /* set up field for branch table */
! 360: lda r12,r0[r12] /* scale r12 */
! 361: or.u r12,r12,hi16(ptable) /* load pointer into table */
! 362: addu r12,r12,lo16(ptable)
! 363: jmp r12
! 364:
! 365: ptable:
! 366: br conversiondp
! 367: br conversiondp
! 368: br conversiondp
! 369: br paddone
! 370: br conversiondp
! 371: br conversiondp
! 372: br paddone
! 373: br paddone
! 374: br conversiondp
! 375: br conversiondp
! 376: br conversiondp
! 377: br conversiondp
! 378: br conversiondp
! 379: br conversiondp
! 380: br conversiondp
! 381: br conversiondp
! 382: br conversiondp
! 383: br conversiondp
! 384: br conversiondp
! 385: br conversiondp
! 386: br conversiondp
! 387: br conversiondp
! 388: br conversiondp
! 389: br conversiondp
! 390: br conversiondp
! 391: br paddone
! 392: br paddone
! 393: br paddone
! 394: br conversiondp
! 395: br paddone
! 396: br paddone
! 397: br paddone
! 398:
! 399: /*
! 400: * Add one to the bit of the mantissa which corresponds to the LSB of an
! 401: * integer. If the mantissa overflows, then there is a valid integer
! 402: * overflow conversion; otherwise, the mantissa can be converted to the
! 403: * integer.
! 404: */
! 405: paddone:
! 406: or r10,r0,r0 /* clear r10 */
! 407: set r10,r10,1<22> /* set LSB bit to 1 for adding */
! 408: addu.co r8,r8,r10 /* add the 1 obtained from rounding */
! 409: clr r11,r7,12<20> /* clear exponent and sign */
! 410: addu.ci r11,r0,r11 /* add carry */
! 411: bb1 20,r11,overflw /* overflow to 2**31, abort the rest */
! 412: br.n conversiondp /* since the exp. was 30, and the exp. */
! 413: /* did not round up to 31, the largest */
! 414: /* number that S2 could become is 2**31-1 */
! 415: or r7,r0,r11 /* store r11 into r7 for conversion */
! 416:
! 417: /*
! 418: * Now check for negative double precision sources. If the exponent is 30,
! 419: * then convert the false alarm. If the exponent is 31, then check the
! 420: * mantissa bits which correspond to integer bits. If any of them are a one,
! 421: * then there is overflow. If they are zero, then check the guard, round,
! 422: * and sticky bits.
! 423: * Round toward zero and positive will not cause a roundup, but round toward
! 424: * nearest and negative may, so perform those roundings. If there is no
! 425: * overflow, then convert and return.
! 426: */
! 427: checkdoubn:
! 428: cmp r11,r10,29 /* compare to 30, but exp. off by 1 */
! 429: bb1 eq,r11,conversiondn /* false alarm if exp. = 30 */
! 430: extu r10,r8,11<21> /* check upper bits of lower mantissa */
! 431: bcnd ne0,r10,overflw /* one of the bits is a 1, so oflow */
! 432: extu r10,r7,20<0> /* check upper bits of upper mantissa */
! 433: bcnd ne0,r10,overflw /* one of the bits is a 1, so oflow */
! 434: bb0 rndlo,r1,possround /* rounding mode is either round near */
! 435: /* or round negative, which may cause */
! 436: /* a round */
! 437: br.n FPintov_return /* round positive, which will not */
! 438: /* cause a round */
! 439: set r6,r0,1<sign>
! 440: possround:
! 441: extu r12,r8,1<20> /* get guard bit */
! 442: extu r11,r8,20<0> /* get bits for sticky bit */
! 443: bcnd.n eq0,r11,nostickyn /* do not set sticky */
! 444: mak r12,r12,1<1> /* set up field for branch table */
! 445: set r12,r12,1<0> /* set sticky bit */
! 446: nostickyn:
! 447: bb1 rndhi,r1,negative /* rounding mode is negative */
! 448: nearest:
! 449: cmp r12,r12,3 /* are both guard and sticky set */
! 450: bb1 eq,r12,overflw /* both guard and sticky are set, */
! 451: /* so signal overflow */
! 452: or r6,r0,r0 /* clear destination register r6 */
! 453: br.n FPintov_return
! 454: set r6,r6,1<sign> /* set the sign bit and take care of */
! 455: /* this special case */
! 456: negative:
! 457: bcnd ne0,r12,overflw /* -2**31 will be rounded to */
! 458: /* -(2**31+1), so signal overflow */
! 459: or r6,r0,r0 /* clear destination register r6 */
! 460: br.n FPintov_return
! 461: set r6,r6,1<sign> /* set the sign bit and take care of */
! 462: /* this special case */
! 463:
! 464: /*
! 465: * Since the exp. was 30, and there was no round-up, the largest
! 466: * number that S2 could have been was 2**31 - 1
! 467: */
! 468:
! 469:
! 470: /* Convert the single precision positive floating point number. */
! 471:
! 472: conversionsp:
! 473: extu r6,r8,3<29> /* extract lower bits of integer */
! 474: mak r6,r6,3<7> /* shift left to correct place in integer */
! 475: mak r10,r7,20<10> /* shift left upper bits of integer */
! 476: or r6,r6,r10 /* form most of integer */
! 477: br.n FPintov_return
! 478: set r6,r6,1<30> /* set hidden one */
! 479:
! 480: /* Convert the single precision negative floating point number. */
! 481:
! 482: conversionsn:
! 483: bb1 eq,r11,exp31s /* use old r11 to see if exp. is 31 */
! 484: extu r6,r8,3<29> /* extract lower bits of mantissa */
! 485: mak r6,r6,3<7> /* shift left to correct place in integer */
! 486: mak r10,r7,20<10> /* shift left upper bits of integer */
! 487: or r6,r6,r10 /* form most of integer */
! 488: set r6,r6,1<30> /* set hidden one */
! 489: or.c r6,r0,r6 /* negate result */
! 490: br.n FPintov_return
! 491: addu r6,r6,1 /* add 1 to get 2''s complement */
! 492: exp31s:
! 493: or r6,r0,r0 /* clear r6 */
! 494: br.n FPintov_return
! 495: set r6,r6,1<sign> /* set sign bit */
! 496:
! 497: /* Convert the double precision positive floating point number. */
! 498:
! 499: conversiondp:
! 500: extu r6,r8,10<22> /* extract lower bits of integer */
! 501: mak r10,r7,20<10> /* shift left upper bits of integer */
! 502: or r6,r6,r10 /* form most of integer */
! 503: br.n FPintov_return
! 504: set r6,r6,1<30> /* set hidden one */
! 505:
! 506: /*
! 507: * Convert the double precision negative floating point number.
! 508: * The number, whose exponent is 30, must be rounded before converting.
! 509: * Bits 4 and 3 are the rounding mode, and bits 2, 1, and 0 are the
! 510: * guard, round, and sticky bits for the branch table.
! 511: */
! 512:
! 513: conversiondn:
! 514: extu r12,r8,1<22> /* get LSB for integer with exp. = 30 */
! 515: mak r12,r12,1<2> /* start to set up field for branch table */
! 516: extu r11,r8,1<21> /* get guard bit */
! 517: mak r11,r11,1<1> /* set up field for branch table */
! 518: or r12,r11,r12 /* set up field for branch table */
! 519: extu r11,r8,21<0> /* get bits for sticky bit */
! 520: bcnd eq0,r11,nostkyn /* do not set sticky */
! 521: set r12,r12,1<0> /* set sticky bit */
! 522: nostkyn:
! 523: rot r11,r1,0<rndlo> /* shift rounding mode to 2 LSB''s */
! 524: mak r11,r11,2<3> /* set up field, clear other bits */
! 525: or r12,r11,r12 /* set up field for branch table */
! 526: lda r12,r0[r12] /* scale r12 */
! 527: or.u r12,r12,hi16(ntable) /* load pointer into table */
! 528: addu r12,r12,lo16(ntable)
! 529: jmp r12
! 530:
! 531: ntable:
! 532: br nnoaddone
! 533: br nnoaddone
! 534: br nnoaddone
! 535: br naddone
! 536: br nnoaddone
! 537: br nnoaddone
! 538: br naddone
! 539: br naddone
! 540: br nnoaddone
! 541: br nnoaddone
! 542: br nnoaddone
! 543: br nnoaddone
! 544: br nnoaddone
! 545: br nnoaddone
! 546: br nnoaddone
! 547: br nnoaddone
! 548: br nnoaddone
! 549: br naddone
! 550: br naddone
! 551: br naddone
! 552: br nnoaddone
! 553: br naddone
! 554: br naddone
! 555: br naddone
! 556: br nnoaddone
! 557: br nnoaddone
! 558: br nnoaddone
! 559: br nnoaddone
! 560: br nnoaddone
! 561: br nnoaddone
! 562: br nnoaddone
! 563: br nnoaddone
! 564:
! 565: /*
! 566: * Add one to the mantissa, and check to see if it overflows to -2**31.
! 567: * The conversion is done in nnoaddone.
! 568: */
! 569:
! 570: naddone:
! 571: or r10,r0,r0 /* clear r10 */
! 572: set r10,r10,1<22> /* set LSB bit to 1 for adding */
! 573: add.co r8,r8,r10 /* add the 1 obtained from rounding */
! 574: clr r7,r7,12<20> /* clear exponent and sign */
! 575: add.ci r7,r0,r7 /* add carry */
! 576: bb1 20,r7,maxneg /* rounded to -2**31,handle separately */
! 577: /* the exponent was originally 30 */
! 578: nnoaddone:
! 579: extu r6,r8,11<22> /* extract lower bits of integer */
! 580: mak r10,r7,20<10> /* shift left upper bits of integer */
! 581: or r6,r6,r10 /* form most of integer */
! 582: set r6,r6,1<30> /* set hidden one */
! 583: or.c r6,r0,r6 /* negate integer */
! 584: br.n FPintov_return
! 585: addu r6,r6,1 /* add 1 to get 2''s complement */
! 586:
! 587: maxneg:
! 588: or r6,r0,r0 /* clear integer */
! 589: br.n FPintov_return
! 590: set r6,r6,1<sign> /* set sign bit */
! 591:
! 592: /* For valid overflows, write the correctly signed largest integer. */
! 593: overflw:
! 594: set r2,r2,1<oper>
! 595: bb0.n sign,r7,FPintov_return /* if positive then return */
! 596: set r6,r6,31<0> /* set result to largest positive int */
! 597: or.c r6,r0,r6 /* negate r6, giving largest negative */
! 598: /* integer */
! 599:
! 600: FPintov_return:
! 601: ld r1,r31,0 /* load return address from memory */
! 602: jmp r1
! 603:
! 604: /*
! 605: * Some instructions only have the S2 operations, so clear S1HI and S1LO
! 606: * for those instructions so that the previous contents of S1HI and S1LO
! 607: * do not influence this instruction.
! 608: */
! 609:
! 610: ASLOCAL(FPresoper)
! 611: st r1, r31, 0
! 612: extu r10,r9,5<11> /* extract opcode */
! 613: #if 0
! 614: cmp r11,r10,FSQRTop /* compare to FSQRT */
! 615: bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */
! 616: #endif
! 617: cmp r11,r10,INTop /* compare to INT */
! 618: bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */
! 619: cmp r11,r10,NINTop /* compare to NINT */
! 620: bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */
! 621: cmp r11,r10,TRNCop /* compare to TRNC */
! 622: bb0 eq,r11,opercheck /* check for reserved operands */
! 623:
! 624: ASLOCAL(S1clear)
! 625: or r5,r0,r0 /* clear any NaN''s, denorms, or infinities */
! 626: or r6,r0,r0 /* that may be left in S1HI,S1LO from a */
! 627: /* previous instruction */
! 628:
! 629: /*
! 630: * r12 contains the following flags:
! 631: * bit 9 -- s1sign
! 632: * bit 8 -- s2sign
! 633: * bit 7 -- s1nan
! 634: * bit 6 -- s2nan
! 635: * bit 5 -- s1inf
! 636: * bit 4 -- s2inf
! 637: * bit 3 -- s1zero
! 638: * bit 2 -- s2zero
! 639: * bit 1 -- s1denorm
! 640: * bit 0 -- s2denorm
! 641: */
! 642:
! 643: /*
! 644: * Using code for both single and double precision, check if S1 is either
! 645: * a NaN or infinity and set the appropriate flags in r12. Then check if
! 646: * S2 is a NaN or infinity. If it is a NaN, then branch to the NaN routine.
! 647: */
! 648:
! 649: ASLOCAL(opercheck)
! 650: extu r10,r5,11<20> /* internal representation for double */
! 651: bb1.n s1size,r9,S1NaNdoub /* S1 is double precision */
! 652: or r12,r0,r0 /* clear operand flag register */
! 653: ASLOCAL(S1NaNsing)
! 654: xor r10,r10,0x0080 /* internal representation for single */
! 655: ext r10,r10,8<0> /* precision is IEEE 8 bits sign extended */
! 656: /* to 11 bits; for real exp. > 0, the */
! 657: /* above instructions gives a result exp. */
! 658: /* that has the MSB flipped and sign */
! 659: /* extended like in the IMPCR */
! 660: cmp r11,r10,127 /* Is exponent equal to IEEE 255 (here 127) */
! 661: bb1 ne,r11,S2NaN /* source 1 is not a NaN or infinity */
! 662: mak r10,r5,20<0> /* load r10 with upper bits of S1 mantissa */
! 663: extu r11,r6,3<29> /* get 3 upper bits of lower word */
! 664: or r11,r10,r11 /* combine any existing 1 */
! 665: bcnd eq0,r11,noS1NaNs /* since r11 can only hold 0 or a */
! 666: /* > 0 number, branch to noS1NaN when eq0 */
! 667: br.n S2NaN /* see if S2 has a NaN */
! 668: set r12,r12,1<s1nan> /* indicate that S1 has a NaN */
! 669: ASLOCAL(noS1NaNs)
! 670: br.n S2NaN /* check contents of S2 */
! 671: set r12,r0,1<s1inf> /* indicate that S1 has an infinity */
! 672:
! 673: ASLOCAL(S1NaNdoub)
! 674: xor r10,r10,0x0400 /* precision is the same IEEE 11 bits */
! 675: /* The above instructions gives a result exp. */
! 676: /* that has the MSB flipped and sign */
! 677: /* extended like in the IMPCR */
! 678: cmp r11,r10,1023 /* Is exp. equal to IEEE 2047 (internal 1023) */
! 679: bb1 ne,r11,S2NaN /* source 1 is not a NaN or infinity */
! 680: mak r10,r5,20<0> /* load r10 with upper bits of S1 mantissa */
! 681: or r11,r6,r10 /* combine existing 1''s of mantissa */
! 682: bcnd eq0,r11,noS1NaNd /* since r11 can only hold 0 or a > 0 */
! 683: /* number, branch to noS1NaN when eq0 */
! 684: br.n S2NaN /* see if S2 has a NaN */
! 685: set r12,r12,1<s1nan> /* indicate that S1 has a NaN */
! 686: ASLOCAL(noS1NaNd)
! 687: set r12,r0,1<s1inf> /* indicate that S1 has an infinity */
! 688:
! 689: ASLOCAL(S2NaN)
! 690: bb1.n s2size,r9,S2NaNdoub /* S1 is double precision */
! 691: extu r10,r7,11<20> /* internal representation for double */
! 692: ASLOCAL(S2NaNsing)
! 693: xor r10,r10,0x0080 /* internal representation for single */
! 694: ext r10,r10,8<0> /* precision is IEEE 8 bits sign extended */
! 695: /* to 11 bits; for real exp. > 0, the */
! 696: /* above instruction gives a result exp. */
! 697: /* that has the MSB flipped and sign */
! 698: /* extended like in the IMPCR */
! 699: cmp r11,r10,127 /* Is exponent equal to IEEE 255 (here 127) */
! 700: bb1 ne,r11,inf /* source 2 is not a NaN or infinity */
! 701: mak r10,r7,20<0> /* load r10 with upper bits of S1 mantissa */
! 702: extu r11,r8,3<29> /* get 3 upper bits of lower word */
! 703: or r11,r10,r11 /* combine any existing 1''s */
! 704: bcnd eq0,r11,noS2NaNs /* since r11 can only hold 0 or a > 0 */
! 705: /* number, branch to noS2NaNs when eq0 */
! 706: br.n _ASM_LABEL(NaN) /* branch to NaN routine */
! 707: set r12,r12,1<s2nan> /* indicate that s2 has a NaN */
! 708: ASLOCAL(noS2NaNs)
! 709: bb0 s1nan,r12, 1f /* branch to NaN if S1 is a NaN */
! 710: br _ASM_LABEL(NaN)
! 711: 1:
! 712: br.n _ASM_LABEL(infinity) /* If S1 had a NaN we would have */
! 713: /* already branched, and S2 does not have a */
! 714: /* NaN, but it does have an infinity, so */
! 715: /* branch to handle the finity */
! 716: set r12,r12,1<s2inf> /* indicate that S2 has an infinity */
! 717:
! 718: ASLOCAL(S2NaNdoub)
! 719: xor r10,r10,0x0400 /* precision is the same IEEE 11 bits */
! 720: /* The above instruction gives a result exp. */
! 721: /* that has the MSB flipped and sign */
! 722: /* extended like in the IMPCR */
! 723: cmp r11,r10,1023 /* Is exp. equal to IEEE 2047 (internal 1023) */
! 724: bb1 ne,r11,inf /* source 2 is not a NaN or infinity */
! 725: mak r10,r7,20<0> /* load r10 with upper bits of S2 mantissa */
! 726: or r11,r8,r10 /* combine existing 1''s of mantissa */
! 727: bcnd eq0,r11,noS2NaNd /* since r11 can only hold 0 or a > 0 */
! 728: /* number, branch to noS2NaNd when eq0 */
! 729: br.n _ASM_LABEL(NaN) /* branch to NaN routine */
! 730: set r12,r12,1<s2nan> /* indicate that s2 has a NaN */
! 731: ASLOCAL(noS2NaNd)
! 732: bb0 s1nan,r12,1f /* branch to NaN if S1 is a NaN */
! 733: br _ASM_LABEL(NaN)
! 734: 1:
! 735: br.n _ASM_LABEL(infinity) /* If S1 had a NaN we would have */
! 736: /* already branched, and S2 does not have a */
! 737: /* NaN, but it does have an infinity, so */
! 738: /* branch to handle the finity */
! 739: set r12,r12,1<s2inf> /* indicate that S2 has an infinity */
! 740:
! 741: /*
! 742: * If S2 was a NaN, the routine would have already branched to NaN. If S1
! 743: * is a NaN, then branch to NaN. If S1 is not a NaN and S2 is infinity, then
! 744: * we would have already branched to infinity. If S1 is infinity, then branch.
! 745: * If the routine still has not branched, then branch to denorm, the only
! 746: * reserved operand left.
! 747: */
! 748:
! 749: ASLOCAL(inf)
! 750: bb0 s1nan,r12,1f /* branch if S1 has a NaN and S2 does not */
! 751: br _ASM_LABEL(NaN)
! 752: 1:
! 753: bb0 s1inf,r12,2f /* Neither S1 or S2 has a NaN, and we would */
! 754: /* have branched already if S2 had an */
! 755: /* infinity, so branch if S1 is infinity */
! 756: br _ASM_LABEL(infinity)
! 757: 2:
! 758: br _ASM_LABEL(denorm) /* branch to denorm, the only */
! 759: /* remaining alternative */
! 760:
! 761: /*
! 762: * Branch to the routine to make a denormalized number.
! 763: */
! 764: ASLOCAL(FPunderflow)
! 765: st r1,r31,0 /* save return address */
! 766: set r2,r2,1<underflow>
! 767: set r2,r2,1<inexact>
! 768:
! 769: /*
! 770: * Now the floating point number, which has an exponent smaller than what
! 771: * IEEE allows, must be denormalized. Denormalization is done by calculating
! 772: * the difference between a denormalized exponent and an underflow exponent
! 773: * and shifting the mantissa by that amount. A one may need to be subtracted
! 774: * from the LSB if a one was added during rounding.
! 775: * r9 is used to contain the guard, round, sticky, and an inaccuracy bit in
! 776: * case some bits were shifted off the mantissa during denormalization.
! 777: * r9 will contain:
! 778: * bit 4 -- new addone if one added during rounding after denormalization
! 779: * bit 3 -- inaccuracy flag caused by denormalization or pre-denormalization
! 780: * inexactness
! 781: * bit 2 -- guard bit of result
! 782: * bit 1 -- round bit of result
! 783: * bit 0 -- sticky bit of result
! 784: */
! 785:
! 786: FPU_denorm:
! 787: bb1.n destsize,r12,Udouble /* denorm for double */
! 788: extu r9,r10,3<26> /* load r9 with grs */
! 789: Usingle:
! 790: mak r5,r10,21<3> /* extract high 21 bits of mantissa */
! 791: extu r6,r11,3<29> /* extract low 3 bits of mantissa */
! 792: or r11,r5,r6 /* form 24 bits of mantissa */
! 793:
! 794: /* See if the addone bit is set and unround if it is. */
! 795: bb0.n 25,r10,nounrounds /* do not unround if addone bit clear */
! 796: extu r6,r12,12<20> /* extract signed exponent from IMPCR */
! 797: unrounds:
! 798: subu r11,r11,1 /* subtract 1 from mantissa */
! 799:
! 800: /*
! 801: * If the hidden bit is cleared after subtracting the one, then the one added
! 802: * during the rounding must have propagated through the mantissa. The exponent
! 803: * will need to be decremented.
! 804: */
! 805: bb1 23,r11,nounrounds /* if hidden bit is set,then exponent */
! 806: /* does not need to be decremented */
! 807: decexps:
! 808: sub r6,r6,1 /* decrement exponent 1 */
! 809: set r11,r11,1<23> /* set the hidden bit */
! 810:
! 811: /*
! 812: * For both single and double precision, there are cases where it is easier
! 813: * and quicker to make a special case. Examples of this are if the shift
! 814: * amount is only 1 or 2, or all the mantissa is shifted off, or all the
! 815: * mantissa is shifted off and it is still shifting, or, in the case of
! 816: * doubles, if the shift amount is around the boundary of MANTLO and MANTHI.
! 817: */
! 818:
! 819: nounrounds:
! 820: or r8,r0,lo16(0x00000f81) /* load r8 with -127 in decimal */
! 821: /* for lowest 12 bits */
! 822: sub r7,r8,r6 /* find difference between two exponents, */
! 823: /* this amount is the shift amount */
! 824: cmp r6,r7,3 /* check to see if r7 contains 3 or more */
! 825: bb1 ge,r6,threesing /* br to code that handles shifts of >=3 */
! 826: cmp r6,r7,2 /* check to see if r7 contains 2 */
! 827: bb1 eq,r6,twosing /* br to code that handles shifts of 2 */
! 828: one:
! 829: rot r9,r9,0<1> /* rotate roundoff register once, this places */
! 830: /* guard in round and round in sticky */
! 831: bb0 31,r9,nosticky1s /* do not or round and sticky if sticky is */
! 832: /* 0, this lost bit will be cleared later */
! 833: set r9,r9,1<0> /* or round and sticky */
! 834: nosticky1s:
! 835: bb0 0,r11,guardclr1s /* do not set guard bit if LSB = 0 */
! 836: set r9,r9,1<2> /* set guard bit */
! 837: guardclr1s:
! 838: extu r11,r11,31<1> /* shift mantissa right 1 */
! 839: br.n round /* round result */
! 840: mak r9,r9,3<0> /* clear bits lost during rotation */
! 841:
! 842: twosing:
! 843: rot r9,r9,0<2> /* rotate roundff register twice, this places */
! 844: /* guard in sticky */
! 845: bb0 30,r9,nosticky2s /* do not or guard and sticky if stick is 0 */
! 846: /* this lost bit will be cleared later */
! 847: br.n noround2s /* skip or old guard and old round if old */
! 848: /* sticky set */
! 849: set r9,r9,1<0> /* or guard and sticky */
! 850: nosticky2s:
! 851: bb0 31,r9,noround2s /* do not or guard and round if round is 0 */
! 852: /* this lost bit will be cleared later */
! 853: set r9,r9,1<0> /* or guard and round */
! 854: noround2s:
! 855: bb0 0,r11,roundclr2s /* do not set round bit if LSB = 0 */
! 856: set r9,r9,1<1> /* set round bit */
! 857: roundclr2s:
! 858: bb0 1,r11,guardclr2s /* do not set guard bit if LSB + 1 = 0 */
! 859: set r9,r9,1<2> /* set guard bit */
! 860: guardclr2s:
! 861: extu r11,r11,30<2> /* shift mantissa right 2 */
! 862: br.n round /* round result */
! 863: mak r9,r9,3<0> /* clear bits lost during rotation */
! 864:
! 865: threesing:
! 866: bb1 0,r9,noguard3s /* check sticky initially */
! 867: /* sticky is set, forget most of the oring */
! 868: nosticky3s:
! 869: bb0 1,r9,noround3s /* check round initially, do not set sticky */
! 870: br.n noguard3s /* forget most of the rest of oring */
! 871: set r9,r9,1<0> /* if round is clear,set sticky if round set */
! 872: noround3s:
! 873: bb0.n 2,r9,noguard3s /* check guard initially, do not set sticky */
! 874: clr r9,r9,2<1> /* clear the original guard and round for when */
! 875: /* you get to round section */
! 876: set r9,r9,1<0> /* if guard is clear,set sticky if guard set */
! 877: noguard3s:
! 878: cmp r6,r7,23 /* check if # of shifts is <=23 */
! 879: bb1 gt,r6,s24 /* branch to see if shifts = 24 */
! 880: sub r6,r7,2 /* get number of bits to check for sticky */
! 881: mak r6,r6,5<5> /* shift width into width field */
! 882: mak r8,r11,r6 /* mask off shifted bits -2 */
! 883: ff1 r8,r8 /* see if r8 has any ones */
! 884: bb1 5,r8,nostky23 /* do not set sticky if no ones found */
! 885: set r9,r9,1<0> /* set sticky bit */
! 886: nostky23:
! 887: or r8,r0,34 /* start code to get new mantissa plus two */
! 888: /* extra bits for new round and new guard */
! 889: /* bits */
! 890: subu r8,r8,r7
! 891: mak r8,r8,5<5> /* shift field width into second five bits */
! 892: extu r6,r6,5<5> /* shift previous shifted -2 into offset field */
! 893: or r6,r6,r8 /* complete field */
! 894: extu r11,r11,r6 /* form new mantissa with two extra bits */
! 895:
! 896: bb0 0,r11,nornd3s /* do not set new round bit */
! 897: set r9,r9,1<1> /* set new round bit */
! 898: nornd3s:
! 899: bb0 1,r11,nogrd3s /* do not set new guard bit */
! 900: set r9,r9,1<2> /* set new guard bit */
! 901: nogrd3s:
! 902: br.n round /* round mantissa */
! 903: extu r11,r11,30<2> /* shift off remaining two bits */
! 904:
! 905: s24:
! 906: cmp r6,r7,24 /* check to see if # of shifts is 24 */
! 907: bb1 gt,r6,s25 /* branch to see if shifts = 25 */
! 908: bb1 0,r9,nostky24 /* skip checking if old sticky set */
! 909: extu r8,r11,22<0> /* prepare to check bits that will be shifted */
! 910: /* into the sticky */
! 911: ff1 r8,r8 /* see if there are any 1''s */
! 912: bb1 5,r8,nostky24 /* do not set sticky if no ones found */
! 913: set r9,r9,1<0> /* set sticky bit */
! 914: nostky24:
! 915: bb0 22,r11,nornd24 /* do not set new round bit */
! 916: set r9,r9,1<1> /* set new round bit */
! 917: nornd24:
! 918: set r9,r9,1<2> /* set new guard bit,this is hidden bit */
! 919: br.n round /* round mantissa */
! 920: or r11,r0,r0 /* clear r11, all of mantissa shifted off */
! 921:
! 922: s25:
! 923: cmp r6,r7,25 /* check to see if # of shifts is 25 */
! 924: bb1 gt,r6,s26 /* branch to execute for shifts => 26 */
! 925: bb1 0,r9,nostky25 /* skip checking if old sticky set */
! 926: extu r8,r11,23<0> /* prepare to check bits that will be shifted */
! 927: /* into the sticky */
! 928: ff1 r8,r8 /* see if there are any 1''s */
! 929: bb1 5,r8,nostky25 /* do not set sticky if no ones found */
! 930: set r9,r9,1<0> /* set sticky bit */
! 931: nostky25:
! 932: set r9,r9,1<1> /* set new round bit,this is hidden bit */
! 933: clr r9,r9,1<2> /* clear guard bit since nothing shifted in */
! 934: br.n round /* round and assemble result */
! 935: or r11,r0,r0 /* clear r11, all of mantissa shifted off */
! 936:
! 937: s26:
! 938: set r9,r9,1<0> /* set sticky bit,this contains hidden bit */
! 939: clr r9,r9,2<1> /* clear guard and round bits since nothing */
! 940: /* shifted in */
! 941: br.n round /* round and assemble result */
! 942: or r11,r0,r0 /* clear mantissa */
! 943:
! 944: Udouble:
! 945: mak r5,r10,21<0> /* extract upper bits of mantissa */
! 946: bb0.n 25,r10,nounroundd /* do not unround if addone bit clear */
! 947: extu r6,r12,12<20> /* extract signed exponenet from IMPCR */
! 948: unroundd:
! 949: or r8,r0,1
! 950: subu.co r11,r11,r8 /* subtract 1 from mantissa */
! 951: subu.ci r5,r5,r0 /* subtract borrow from upper word */
! 952: bb1 20,r5,nounroundd /* if hidden bit is set, then exponent does */
! 953: /* not need to be decremented */
! 954: decexpd:
! 955: sub r6,r6,1 /* decrement exponent 1 */
! 956: set r5,r5,1<20> /* set the hidden bit */
! 957:
! 958: nounroundd:
! 959: or r8,r0,lo16(0x00000c01) /* load r8 with -1023 in decimal */
! 960: /* for lowest 12 bits */
! 961: sub r7,r8,r6 /* find difference between two exponents, */
! 962: /* this amount is the shift amount */
! 963: cmp r6,r7,3 /* check to see if r7 contains 3 or more */
! 964: bb1 ge,r6,threedoub /* br to code that handles shifts of >=3 */
! 965: cmp r6,r7,2 /* check to see if r7 contains 2 */
! 966: bb1 eq,r6,twodoub /* br to code that handles shifts of 2 */
! 967:
! 968: onedoub:
! 969: rot r9,r9,0<1> /* rotate roundoff register once, this places */
! 970: /* guard in round and round in sticky */
! 971: bb0 31,r9,nosticky1d/* do not or round and sticky if sticky is 0 */
! 972: /* this lost bit will be cleared later */
! 973: set r9,r9,1<0> /* or old round and old sticky into new sticky */
! 974: nosticky1d:
! 975: bb0 0,r11,guardclr1d /* do not set new guard bit if old LSB = 0 */
! 976: set r9,r9,1<2> /* set new guard bit */
! 977: guardclr1d:
! 978: extu r11,r11,31<1> /* shift lower mantissa over 1 */
! 979: mak r6,r5,1<31> /* shift off low bit of high mantissa */
! 980: or r11,r6,r11 /* load high bit onto lower mantissa */
! 981: extu r5,r5,20<1> /* shift right once upper 20 bits of mantissa */
! 982: br.n round /* round mantissa and assemble result */
! 983: mak r9,r9,3<0> /* clear bits lost during rotation */
! 984:
! 985: twodoub:
! 986: rot r9,r9,0<2> /* rotate roundoff register twice, this places */
! 987: /* old guard into sticky */
! 988: bb0 30,r9,nosticky2d /* do not or old guard and old sticky if */
! 989: /* old sticky is 0 */
! 990: br.n noround2d /* skip or of old guard and old round if old */
! 991: /* sticky set */
! 992: set r9,r9,1<0> /* or old guard and old sticky into new sticky */
! 993: nosticky2d:
! 994: bb0 31,r9,noround2d /* do not or old guard and old round if */
! 995: /* old round is 0 */
! 996: set r9,r9,1<0> /* or old guard and old round into new sticky */
! 997: noround2d:
! 998: bb0 0,r11,roundclr2d /* do not set round bit if old LSB = 0 */
! 999: set r9,r9,1<1> /* set new round bit */
! 1000: roundclr2d:
! 1001: bb0 1,r11,guardclr2d /* do not set guard bit if old LSB + 1 = 0 */
! 1002: set r9,r9,1<2> /* set new guard bit */
! 1003: guardclr2d:
! 1004: extu r11,r11,30<2> /* shift lower mantissa over 2 */
! 1005: mak r6,r5,2<30> /* shift off low bits of high mantissa */
! 1006: or r11,r6,r11 /* load high bit onto lower mantissa */
! 1007: extu r5,r5,19<2> /* shift right twice upper 19 bits of mantissa */
! 1008: br.n round /* round mantissa and assemble result */
! 1009: mak r9,r9,3<0> /* clear bits lost during rotation */
! 1010:
! 1011: threedoub:
! 1012: bb1 0,r9,noguard3d /* checky sticky initially */
! 1013: /* sticky is set, forget most of rest of oring */
! 1014: nosticky3d:
! 1015: bb0 1,r9,noround3d /* check old round, do not set sticky if */
! 1016: /* old round is clear, set otherwise */
! 1017: br.n noguard3d /* sticky is set, forget most of rest of oring */
! 1018: set r9,r9,1<0> /* set sticky if old round is set */
! 1019: noround3d:
! 1020: bb0 2,r9,noguard3d /* check old guard, do not set sticky if 0 */
! 1021: clr r9,r9,2<1> /* clear the original guard and round for when */
! 1022: /* you get to round section */
! 1023: set r9,r9,1<0> /* set sticky if old guard is set */
! 1024: noguard3d:
! 1025: cmp r6,r7,32 /* do I need to work with a 1 or 2 word mant. */
! 1026: /* when forming sticky, round and guard */
! 1027: bb1 gt,r6,d33 /* jump to code that handles 2 word mantissas */
! 1028: sub r6,r7,2 /* get number of bits to check for sticky */
! 1029: mak r6,r6,5<5> /* shift width into width field */
! 1030: mak r8,r11,r6 /* mask off shifted bits -2 */
! 1031: ff1 r8,r8 /* see if r8 has any ones */
! 1032: bb1 5,r8,nostky32 /* do not set sticky if no ones found */
! 1033: set r9,r9,1<0> /* set sticky bit */
! 1034: nostky32:
! 1035: or r8,r0,34 /* start code to get new mantissa plus two */
! 1036: /* extra bits for new round and new guard bits, */
! 1037: /* the upper word bits will be shifted after */
! 1038: /* the round and guard bits are handled */
! 1039: subu r8,r8,r7
! 1040: mak r8,r8,5<5> /* shift field width into second five bits */
! 1041: extu r6,r6,5<5> /* shift previous shifted -2 into offset field */
! 1042: or r6,r6,r8 /* complete bit field */
! 1043: extu r11,r11,r6 /* partially form new low mantissa with 2 more */
! 1044: /* bits */
! 1045: bb0 0,r11,nornd32d /* do not set new round bit */
! 1046: set r9,r9,1<1> /* set new round bit */
! 1047: nornd32d:
! 1048: bb0 1,r11,nogrd32d /* do not set new guard bit */
! 1049: set r9,r9,1<2> /* set new guard bit */
! 1050: nogrd32d:
! 1051: extu r11,r11,30<2> /* shift off remaining two bits */
! 1052: mak r6,r7,5<5> /* shift field width into second 5 bits, if the */
! 1053: /* width is 32, then these bits will be 0 */
! 1054: or r8,r0,32 /* load word length into r8 */
! 1055: sub r8,r8,r7 /* form offset for high bits moved to low word */
! 1056: or r6,r6,r8 /* form complete bit field */
! 1057: mak r6,r5,r6 /* get shifted bits of high word */
! 1058: or r11,r6,r11 /* form new low word of mantissa */
! 1059: bcnd ne0,r8,regular33 /* do not adjust for special case of r8 */
! 1060: br.n round /* containing zeros, which would cause */
! 1061: or r5,r0,r0 /* all of the bits to be extracted under */
! 1062: /* the regular method */
! 1063: regular33:
! 1064: mak r6,r7,5<0> /* place lower 5 bits of shift into r6 */
! 1065: mak r8,r8,5<5> /* shift r8 into width field */
! 1066: or r6,r6,r8 /* form field for shifting of upper bits */
! 1067: br.n round /* round and assemble result */
! 1068: extu r5,r5,r6 /* form new high word mantissa */
! 1069:
! 1070: d33:
! 1071: cmp r6,r7,33 /* is the number of bits to be shifted is 33? */
! 1072: bb1 gt,r6,d34 /* check to see if # of bits is 34 */
! 1073: bb1 0,r9,nostky33 /* skip checking if old sticky set */
! 1074: mak r6,r11,31<0> /* check bits that will be shifted into sticky */
! 1075: ff1 r8,r8 /* check for ones */
! 1076: bb1 5,r8,nostky33 /* do not set sticky if there are no ones */
! 1077: set r9,r9,1<0> /* set new sticky bit */
! 1078: nostky33:
! 1079: bb0 31,r11,nornd33 /* do not set round if bit is not a 1 */
! 1080: set r9,r9,1<1> /* set new round bit */
! 1081: nornd33:
! 1082: bb0 0,r5,nogrd33 /* do not set guard bit if bit is not a 1 */
! 1083: set r9,r9,1<2> /* set new guard bit */
! 1084: nogrd33:
! 1085: extu r11,r5,31<1> /* shift high bits into low word */
! 1086: br.n round /* round and assemble result */
! 1087: or r5,r0,r0 /* clear high word */
! 1088:
! 1089: d34:
! 1090: cmp r6,r7,34 /* is the number of bits to be shifted 34? */
! 1091: bb1 gt,r6,d35 /* check to see if # of bits is >= 35 */
! 1092: bb1 0,r9,nostky34 /* skip checking if old sticky set */
! 1093: ff1 r8,r11 /* check bits that will be shifted into sticky */
! 1094: bb1 5,r8,nostky34 /* do not set sticky if there are no ones */
! 1095: set r9,r9,1<0> /* set new sticky bit */
! 1096: nostky34:
! 1097: bb0 0,r5,nornd34 /* do not set round if bit is not a 1 */
! 1098: set r9,r9,1<1> /* set new round bit */
! 1099: nornd34:
! 1100: bb0 1,r5,nogrd34 /* do not set guard bit if bit is not a 1 */
! 1101: set r9,r9,1<2> /* set new guard bit */
! 1102: nogrd34:
! 1103: extu r11,r5,30<2> /* shift high bits into low word */
! 1104: br.n round /* round and assemble result */
! 1105: or r5,r0,r0 /* clear high word */
! 1106:
! 1107: d35:
! 1108: cmp r6,r7,52 /* see if # of shifts is 35 <= X <= 52 */
! 1109: bb1 gt,r6,d53 /* check to see if # of shifts is 52 */
! 1110: bb1.n 0,r9,nostky35 /* skip checking if old sticky set */
! 1111: sub r7,r7,34 /* subtract 32 from # of shifts so that opera- */
! 1112: /* tions can be done on the upper word, and */
! 1113: /* then subtract two more checking guard and */
! 1114: /* sticky bits */
! 1115: ff1 r8,r11 /* see if lower word has a bit for sticky */
! 1116: bb1 5,r8,stkycheck35 /* see if upper word has any sticky bits */
! 1117: br.n nostky35 /* quit checking for sticky */
! 1118: set r9,r9,1<0> /* set sticky bit */
! 1119: stkycheck35:
! 1120: mak r6,r7,5<5> /* place width into width field */
! 1121: mak r8,r5,r6 /* mask off shifted bits - 2 */
! 1122: ff1 r8,r8 /* see if r8 has any ones */
! 1123: bb1 5,r8,nostky35 /* do not set sticky if no ones found */
! 1124: set r9,r9,1<0> /* set sticky bit */
! 1125: nostky35:
! 1126: or r8,r0,32 /* look at what does not get shifted off plus */
! 1127: /* round and sticky, remember that the r7 value */
! 1128: /* was adjusted so that it did not include */
! 1129: /* new round or new sticky in shifted off bits */
! 1130: subu r8,r8,r7 /* complement width */
! 1131: mak r8,r8,5<5> /* shift width into width field */
! 1132: or r8,r7,r8 /* add offset field */
! 1133: extu r11,r5,r8 /* extract upper bits into low word */
! 1134: bb0 0,r11,nornd35 /* do not set new round bit */
! 1135: set r9,r9,1<1> /* set new round bit */
! 1136: nornd35:
! 1137: bb0 1,r11,nogrd35 /* do not set new guard bit */
! 1138: set r9,r9,1<2> /* set new guard bit */
! 1139: nogrd35:
! 1140: extu r11,r11,30<2> /* shift off remaining guard and round bits */
! 1141: br.n round /* round and assemble result */
! 1142: or r5,r0,r0 /* clear high word */
! 1143:
! 1144: d53:
! 1145: cmp r6,r7,53 /* check to see if # of shifts is 53 */
! 1146: bb1 gt,r6,d54 /* branch to see if shifts = 54 */
! 1147: bb1 0,r9,nostky53 /* skip checking if old sticky set */
! 1148: ff1 r8,r11 /* see if lower word has a bit for sticky */
! 1149: bb1 5,r8,stkycheck53 /* see if upper word has any sticky bits */
! 1150: br.n nostky53 /* quit checking for sticky */
! 1151: set r9,r9,1<0> /* set sticky bit */
! 1152: stkycheck53:
! 1153: mak r6,r5,19<0> /* check bits that are shifted into sticky */
! 1154: ff1 r8,r6 /* see if r6 has any ones */
! 1155: bb1 5,r8,nostky53 /* do not set sticky if no ones found */
! 1156: set r9,r9,1<0> /* set sticky bit */
! 1157: nostky53:
! 1158: bb0 19,r5,nornd53 /* do not set new round bit */
! 1159: set r9,r9,1<1> /* set new round bit */
! 1160: nornd53:
! 1161: set r9,r9,1<2> /* set new guard bit,this is hidden bit */
! 1162: or r5,r0,r0 /* clear high word */
! 1163: br.n round /* round and assemble result */
! 1164: or r11,r0,r0 /* clear low word */
! 1165:
! 1166: d54:
! 1167: cmp r6,r7,54 /* check to see if # of shifts is 54 */
! 1168: bb1 gt,r6,d55 /* branch to execute for shifts =>55 */
! 1169: bb1 0,r9,nostky54 /* skip checking if old sticky set */
! 1170: ff1 r8,r11 /* see if lower word has a bit for sticky */
! 1171: bb1 5,r8,stkycheck54 /* see if upper word has any sticky bits */
! 1172: br.n nostky54 /* quit checking for sticky */
! 1173: set r9,r9,1<0> /* set sticky bit */
! 1174: stkycheck54:
! 1175: mak r6,r5,20<0> /* check bits that are shifted into sticky */
! 1176: ff1 r8,r6 /* see if r6 has any ones */
! 1177: bb1 5,r8,nostky54 /* do not set sticky if no ones found */
! 1178: set r9,r9,1<0> /* set sticky bit */
! 1179: nostky54:
! 1180: set r9,r9,1<1> /* set new round bit,this is hidden bit */
! 1181: clr r9,r9,1<2> /* clear guard bit since nothing shifted in */
! 1182: or r5,r0,r0 /* clear high word */
! 1183: br.n round /* round and assemble result */
! 1184: or r11,r0,r0 /* clear low word */
! 1185:
! 1186: d55:
! 1187: set r9,r9,1<0> /* set new sticky bit,this contains hidden bit */
! 1188: clr r9,r9,2<1> /* clear guard and round bits since nothing */
! 1189: /* shifted in */
! 1190: or r5,r0,r0 /* clear high word */
! 1191: or r11,r0,r0 /* clear low word */
! 1192:
! 1193:
! 1194: /* The first item that the rounding code does is see if either guard, round, */
! 1195: /* or sticky is set. If all are clear, then there is no denormalization loss */
! 1196: /* and no need to round, then branch to assemble answer. */
! 1197: /* For rounding, a branch table is set up. The left two most bits are the */
! 1198: /* rounding mode. The third bit is either the LSB of the mantissa or the */
! 1199: /* sign bit, depending on the rounding mode. The three LSB''s are the guard, */
! 1200: /* round and sticky bits. */
! 1201:
! 1202: round:
! 1203: ff1 r8,r9 /* see if there is denormalization loss */
! 1204: bb1 5,r8,assemble /* no denormalization loss or inexactness */
! 1205: extu r6,r10,2<modelo> /* extract rounding mode */
! 1206: bb1.n modehi,r10,signext /* use sign bit instead of LSB */
! 1207: mak r6,r6,2<4> /* shift over rounding mode */
! 1208: extu r7,r11,1<0> /* extract LSB */
! 1209: br.n grs /* skip sign extraction */
! 1210: mak r7,r7,1<3> /* shift over LSB */
! 1211: signext:
! 1212: extu r7,r10,1<31> /* extract sign bit */
! 1213: mak r7,r7,1<3> /* shift sign bit over */
! 1214: grs:
! 1215: or r6,r6,r7
! 1216: or r6,r6,r9 /* or in guard, round, and sticky */
! 1217: or.u r1,r0,hi16(roundtable) /* form address of branch table */
! 1218: or r1,r1,lo16(roundtable)
! 1219: lda r6,r1[r6] /* scale offset into branch table */
! 1220: jmp.n r6 /* jump to branch table */
! 1221: set r9,r9,1<3> /* set inexact flag in r9 */
! 1222:
! 1223: roundtable:
! 1224: br noaddone
! 1225: br noaddone
! 1226: br noaddone
! 1227: br noaddone
! 1228: br noaddone
! 1229: br addone
! 1230: br addone
! 1231: br addone
! 1232: br noaddone
! 1233: br noaddone
! 1234: br noaddone
! 1235: br noaddone
! 1236: br addone
! 1237: br addone
! 1238: br addone
! 1239: br addone
! 1240: br noaddone
! 1241: br noaddone
! 1242: br noaddone
! 1243: br noaddone
! 1244: br noaddone
! 1245: br noaddone
! 1246: br noaddone
! 1247: br noaddone
! 1248: br noaddone
! 1249: br noaddone
! 1250: br noaddone
! 1251: br noaddone
! 1252: br noaddone
! 1253: br noaddone
! 1254: br noaddone
! 1255: br noaddone
! 1256: br noaddone
! 1257: br noaddone
! 1258: br noaddone
! 1259: br noaddone
! 1260: br noaddone
! 1261: br noaddone
! 1262: br noaddone
! 1263: br noaddone
! 1264: br noaddone
! 1265: br addone
! 1266: br addone
! 1267: br addone
! 1268: br addone
! 1269: br addone
! 1270: br addone
! 1271: br addone
! 1272: br noaddone
! 1273: br addone
! 1274: br addone
! 1275: br addone
! 1276: br addone
! 1277: br addone
! 1278: br addone
! 1279: br addone
! 1280: br noaddone
! 1281: br noaddone
! 1282: br noaddone
! 1283: br noaddone
! 1284: br noaddone
! 1285: br noaddone
! 1286: br noaddone
! 1287: br noaddone
! 1288:
! 1289: /* Round by adding a one to the LSB of the mantissa. */
! 1290: addone:
! 1291: or r6,r0,1 /* load a 1 into r6 so that add.co can be used */
! 1292: add.co r11,r11,r6 /* add a one to the lower word of result */
! 1293: bb0.n destsize,r12,noaddone /* single result,forget carry */
! 1294: set r9,r9,1<4> /* indicate that a 1 has been added */
! 1295: add.ci r5,r5,r0 /* propagate carry into high word */
! 1296:
! 1297: noaddone:
! 1298: set r2,r2,1<inexact>
! 1299: set r2,r2,1<underflow>
! 1300:
! 1301: /* Assemble the result of the denormalization routine for writeback to the */
! 1302: /* destination register. The exponent of a denormalized number is zero, */
! 1303: /* so simply assemble the sign and the new mantissa. */
! 1304:
! 1305: assemble:
! 1306: bb1 destsize,r12,doubassem /* assemble double result */
! 1307: bb0 sign,r10,exassems /* exit assemble if sign is zero */
! 1308: set r11,r11,1<sign> /* make result negative */
! 1309: exassems:
! 1310: br Ureturn
! 1311:
! 1312: doubassem:
! 1313: bb0.n sign,r10,signclr /* do not set sign in r10 */
! 1314: or r10,r5,r0 /* load high word from r5 into r10 */
! 1315: set r10,r10,1<sign> /* high word with sign loaded */
! 1316: signclr:
! 1317: /* FALLTHROUGH */
! 1318: /* br Ureturn */
! 1319:
! 1320: /* Return to fpui. */
! 1321: Ureturn:
! 1322: ld r1,r31,0 /* load return address */
! 1323: jmp r1
! 1324:
! 1325: /*
! 1326: * FPoverflow
! 1327: */
! 1328:
! 1329: ASLOCAL(FPoverflow)
! 1330: st r1,r31,0 /* save return address */
! 1331: set r2,r2,1<overflow>
! 1332: set r2,r2,1<inexact>
! 1333:
! 1334: /* Determine which rounding mode to use for the default procedure. */
! 1335:
! 1336: bb1 modehi,r10,signed /* mode is either round toward pos. or neg. */
! 1337: bb0 modelo,r10,OFnearest /* rounding mode is round nearest */
! 1338: br OFzero /* rounding mode is round zero */
! 1339: signed:
! 1340: bb0 modelo,r10,OFnegative /* rounding mode is round negative */
! 1341: br positive /* rounding mode is round positive */
! 1342:
! 1343:
! 1344: /* In the round toward nearest mode, positive values are rounded to */
! 1345: /* positive infinity and negative values are loaded toward negative infinity. */
! 1346: /* The value for single or double precision is loaded from a data table. */
! 1347:
! 1348: OFnearest:
! 1349: bb1.n destsize,r12,neardouble /* branch to neardouble of */
! 1350: /* double result */
! 1351: mask.u r5,r10,0x8000 /* mask off sign bit from MANTHI */
! 1352: or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */
! 1353: or r11,r11,lo16(0x7f800000)
! 1354: br.n FPof_return /* return with result */
! 1355: or r11,r5,r11 /* adjust sign */
! 1356: neardouble:
! 1357: or r11,r0,r0 /* load lower word of infinity */
! 1358: or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */
! 1359: or r10,r10,lo16(0x7ff00000)
! 1360: br.n FPof_return /* return with result */
! 1361: or r10,r5,r10 /* adjust sign */
! 1362:
! 1363:
! 1364: /* In the round toward zero mode, positive values are rounded to the largest */
! 1365: /* postive finite number and negative values are rounded toward the largest */
! 1366: /* negative finite number. */
! 1367: /* The value for single or double precision is loaded from a data table. */
! 1368:
! 1369: OFzero:
! 1370: bb1.n destsize,r12,zerodouble /* branch to zerodouble of */
! 1371: /* double result */
! 1372: mask.u r5,r10,0x8000 /* mask off sign bit from MANTHI */
! 1373: or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */
! 1374: or r11,r11,lo16(0x7f7fffff)
! 1375: br.n FPof_return /* return with result */
! 1376: or r11,r5,r11 /* adjust sign */
! 1377: zerodouble:
! 1378: set r11,r0,0<0> /* load lower word of finite number */
! 1379: or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */
! 1380: or r10,r10,lo16(0x7fefffff)
! 1381: br.n FPof_return /* return with result */
! 1382: or r10,r5,r10 /* adjust sign */
! 1383:
! 1384:
! 1385: /* In the round toward positve mode, positive values are rounded to */
! 1386: /* postive infinity and negative values are loaded toward the largest */
! 1387: /* negative finite number. */
! 1388: /* The value for single or double precision is loaded from a data table. */
! 1389:
! 1390: positive:
! 1391: bb1 destsize,r12,posdouble /* branch to section for double result */
! 1392: possingle:
! 1393: bb1 sign,r10,possingleneg /* branch to section for negatives */
! 1394: possinglepos:
! 1395: or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */
! 1396: br.n FPof_return /* return with result */
! 1397: or r11,r11,lo16(0x7f800000)
! 1398: possingleneg:
! 1399: or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */
! 1400: or r11,r11,lo16(0x7f7fffff)
! 1401: br.n FPof_return /* return with result */
! 1402: set r11,r11,1<sign> /* set sign for negative */
! 1403: posdouble:
! 1404: bb1 sign,r10,posdoubleneg /* branch to negative double results */
! 1405: posdoublepos:
! 1406: or r11,r0,r0 /* load lower word of double infinity */
! 1407: or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */
! 1408: br.n FPof_return /* return with result */
! 1409: or r10,r10,lo16(0x7ff00000)
! 1410: posdoubleneg:
! 1411: set r11,r0,0<0> /* load lower word of finite number */
! 1412: or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */
! 1413: or r10,r10,lo16(0x7fefffff)
! 1414: br.n FPof_return /* return with result */
! 1415: set r10,r10,1<sign> /* set sign for negative */
! 1416:
! 1417:
! 1418: /* In the round toward negative mode, positive values are rounded to the largest */
! 1419: /* postive finite number and negative values are rounded to negative infinity. */
! 1420: /* The value for single or double precision is loaded from a data table. */
! 1421:
! 1422: OFnegative:
! 1423: bb1 destsize,r12,negdouble /* branch to section for double result */
! 1424: negsingle:
! 1425: bb1 sign,r10,negsingleneg /* branch to section for negatives */
! 1426: negsinglepos:
! 1427: or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */
! 1428: br.n FPof_return /* return with result */
! 1429: or r11,r11,lo16(0x7f7fffff)
! 1430: negsingleneg:
! 1431: or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */
! 1432: or r11,r11,lo16(0x7f800000)
! 1433: br.n FPof_return /* return with result */
! 1434: set r11,r11,1<sign> /* set sign for negative */
! 1435: negdouble:
! 1436: bb1 sign,r10,negdoubleneg /* branch to negative double results */
! 1437: negdoublepos:
! 1438: set r11,r0,0<0> /* load lower word of finite number */
! 1439: or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */
! 1440: br.n FPof_return /* return with result */
! 1441: or r10,r10,lo16(0x7fefffff)
! 1442: negdoubleneg:
! 1443: or r11,r0,r0 /* load lower word of double infinity */
! 1444: or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */
! 1445: or r10,r10,lo16(0x7ff00000)
! 1446: set r10,r10,1<sign> /* set sign for negative */
! 1447:
! 1448: FPof_return:
! 1449: ld r1,r31,0 /* ld return address */
! 1450: jmp r1
! 1451:
! 1452: /* If either S1 or S2 is a signalling NaN, then set the invalid operation */
! 1453: /* bit of the FPSR. */
! 1454: /* If S1 is the only NaN or one of two NaN''s, then write */
! 1455: /* a quiet S1 to the result. A signalling NaN must be made quiet before */
! 1456: /* it can be written, but a signalling S2 is not modified in this routine */
! 1457: /* if S1 is a NaN. */
! 1458: ASLOCAL(NaN)
! 1459: bb0.n s1nan,r12,S2sigcheck /* S1 is not a NaN */
! 1460: st r1,r31,0 /* save return address */
! 1461: bb1 sigbit,r5,S2sigcheck /* S1 is not a signaling NaN */
! 1462: set r2,r2,1<oper>
! 1463: br.n S1write /* FPSR bit already set, S1 is made quiet, */
! 1464: /* and since we always write S1 if it is a */
! 1465: /* NaN, write S1 and skip rest of routine */
! 1466: set r5,r5,1<sigbit> /* make S1 a quiet NaN */
! 1467:
! 1468: ASLOCAL(S2sigcheck)
! 1469: bb0 s2nan,r12,S1write /* S2 is not a NaN */
! 1470: bb1 sigbit,r7,S1write /* S2 is not a signaling NaN */
! 1471: set r2,r2,1<oper>
! 1472: set r7,r7,1<sigbit> /* make S2 a quiet NaN */
! 1473:
! 1474:
! 1475: /* Write a single or double precision quiet NaN unless the opeation is FCMP. */
! 1476: /* If the operation is FCMP, then set the not comparable bit in the result. */
! 1477:
! 1478: ASLOCAL(S1write)
! 1479: bb0 s1nan,r12,S2write /* do not write S1 if it is not a NaN */
! 1480: extu r10,r9,5<11> /* extract opcode */
! 1481: cmp r11,r10,FCMPop /* compare to FCMP */
! 1482: bb1 ne,r11,S1noFCMP /* operation is not FCMP */
! 1483: set r6,r0,1<nc> /* set the not comparable bit */
! 1484: br.n FPnan_return
! 1485: set r6,r6,1<ne> /* set the not equal bit */
! 1486: ASLOCAL(S1noFCMP)
! 1487: bb1.n dsize,r9,wrdoubS1 /* double destination */
! 1488: set r5,r5,11<20> /* set all exponent bits to 1 */
! 1489: /* The single result will be formed the same way whether S1 is a single or double */
! 1490: ASLOCAL(wrsingS1)
! 1491: mak r10,r5,28<3> /* wipe out extra exponent bits */
! 1492: extu r11,r6,3<29> /* get lower three bits of mantissa */
! 1493: or r10,r10,r11 /* combine all of result except sign */
! 1494: clr r6,r5,31<0> /* clear all but sign */
! 1495: br.n FPnan_return
! 1496: or r6,r6,r10 /* form result */
! 1497:
! 1498: ASLOCAL(wrdoubS1)
! 1499: set r6,r6,29<0> /* set extra bits of lower word */
! 1500: br FPnan_return /* no modification necessary for writing */
! 1501: /* double to double, so return */
! 1502:
! 1503: ASLOCAL(S2write)
! 1504: extu r10,r9,5<11> /* extract opcode */
! 1505: cmp r11,r10,FCMPop /* compare to FCMP */
! 1506: bb1.n ne,r11,S2noFCMP /* operation is not FCMP */
! 1507: set r7,r7,11<20> /* set all exponent bits to 1 */
! 1508: set r6,r0,1<nc> /* set the not comparable bit */
! 1509: br.n FPnan_return
! 1510: set r6,r6,1<ne> /* set the not equal bit */
! 1511: ASLOCAL(S2noFCMP)
! 1512: bb1.n dsize,r9,wrdoubS2 /* double destination */
! 1513: set r5,r5,11<20> /* set all exponent bits to 1 */
! 1514: /* The single result will be formed the same way whether S1 is a single or double */
! 1515: ASLOCAL(wrsingS2)
! 1516: mak r10,r7,28<3> /* wipe out extra exponent bits */
! 1517: extu r11,r8,3<29> /* get lower three bits of mantissa */
! 1518: or r10,r10,r11 /* combine all of result except sign */
! 1519: clr r6,r7,31<0> /* clear all but sign */
! 1520: br.n FPnan_return
! 1521: or r6,r6,r10 /* form result */
! 1522:
! 1523: ASLOCAL(wrdoubS2)
! 1524: set r6,r8,29<0> /* set extra bits of lower word */
! 1525:
! 1526: /* Return from this subroutine with the result. */
! 1527:
! 1528: ASLOCAL(FPnan_return)
! 1529: /* no modification necessary for writing */
! 1530: /* double to double, so return */
! 1531: ld r1,r31, 0 /* retrieve return address */
! 1532: jmp r1
! 1533:
! 1534: /*
! 1535: * infinity
! 1536: */
! 1537:
! 1538: /* Extract the opcode, compare to a constant, and branch to the code */
! 1539: /* for the instruction. */
! 1540:
! 1541: ASLOCAL(infinity)
! 1542: extu r10,r9,5<11> /* extract opcode */
! 1543: cmp r11,r10,FADDop /* compare to FADD */
! 1544: bb1.n eq,r11,FADD /* operation is FADD */
! 1545: st r1,r31,0 /* save return address */
! 1546: cmp r11,r10,FSUBop /* compare to FSUB */
! 1547: bb1 eq,r11,FSUB /* operation is FSUB */
! 1548: cmp r11,r10,FCMPop /* compare to FCMP */
! 1549: bb1 eq,r11,FCMP /* operation is FCMP */
! 1550: cmp r11,r10,FMULop /* compare to FMUL */
! 1551: bb1 eq,r11,FMUL /* operation is FMUL */
! 1552: cmp r11,r10,FDIVop /* compare to FDIV */
! 1553: bb1 eq,r11,FDIV /* operation is FDIV */
! 1554: #if 0
! 1555: cmp r11,r10,FSQRTop /* compare to FSQRT */
! 1556: bb1 eq,r11,FSQRT /* operation is FSQRT */
! 1557: #endif
! 1558: cmp r11,r10,INTop /* compare to INT */
! 1559: bb1 eq,r11,FP_inf_overflw /* operation is INT */
! 1560: cmp r11,r10,NINTop /* compare to NINT */
! 1561: bb1 eq,r11,FP_inf_overflw /* operation is NINT */
! 1562: cmp r11,r10,TRNCop /* compare to TRNC */
! 1563: bb1 eq,r11,FP_inf_overflw /* operation is TRNC */
! 1564:
! 1565:
! 1566: /* Adding infinities of opposite signs will cause an exception, */
! 1567: /* but all other operands will result in a correctly signed infinity. */
! 1568:
! 1569: FADD:
! 1570: bb0 s1inf,r12,addS2write /* branch if S1 not infinity */
! 1571: bb0 s2inf,r12,addS1write /* S2 is not inf., so branch to write S1 */
! 1572: bb1 sign,r5,addS1neg /* handle case of S1 negative */
! 1573: addS1pos:
! 1574: bb1 sign,r7,excpt /* adding infinities of different */
! 1575: /* signs causes an exception */
! 1576: br poswrinf /* branch to write positive infinity */
! 1577: addS1neg:
! 1578: bb0 sign,r7,excpt /* adding infinities of different */
! 1579: /* signs causes an exception */
! 1580: br negwrinf /* branch to write negative infinity */
! 1581: addS1write:
! 1582: bb0 sign,r5,poswrinf /* branch to write positive infinity */
! 1583: br negwrinf /* branch to write negative infinity */
! 1584: addS2write:
! 1585: bb0 sign,r7,poswrinf /* branch to write positive infinity */
! 1586: br negwrinf /* branch to write negative infinity */
! 1587:
! 1588:
! 1589: /* Subtracting infinities of the same sign will cause an exception, */
! 1590: /* but all other operands will result in a correctly signed infinity. */
! 1591:
! 1592: FSUB:
! 1593: bb0 s1inf,r12,subS2write /* branch if S1 not infinity */
! 1594: bb0 s2inf,r12,subS1write /* S2 is not inf., so branch to write S1 */
! 1595: bb1 sign,r5,subS1neg /* handle case of S1 negative */
! 1596: subS1pos:
! 1597: bb0 sign,r7,excpt /* subtracting infinities of the same */
! 1598: /* sign causes an exception */
! 1599: br poswrinf /* branch to write positive infinity */
! 1600: subS1neg:
! 1601: bb1 sign,r7,excpt /* subtracting infinities of the same */
! 1602: /* sign causes an exception */
! 1603: br negwrinf /* branch to write negative infinity */
! 1604: subS1write:
! 1605: bb0 sign,r5,poswrinf /* branch to write positive infinity */
! 1606: br negwrinf /* branch to write negative infinity */
! 1607: subS2write:
! 1608: bb1 sign,r7,poswrinf /* branch to write positive infinity */
! 1609: br negwrinf /* branch to write negative infinity */
! 1610:
! 1611:
! 1612: /* Compare the operands, at least one of which is infinity, and set the */
! 1613: /* correct bits in the destination register. */
! 1614:
! 1615: FCMP:
! 1616: bb0.n s1inf,r12,FCMPS1f /* branch for finite S1 */
! 1617: set r4,r0,1<cp> /* since neither S1 or S2 is a NaN, */
! 1618: /* set cp */
! 1619: FCMPS1i:
! 1620: bb1 sign,r5,FCMPS1ni /* branch to negative S1i */
! 1621: FCMPS1pi:
! 1622: bb0 s2inf,r12,FCMPS1piS2f /* branch to finite S2 with S1pi */
! 1623: FCMPS1piS2i:
! 1624: bb1 sign,r7,FCMPS1piS2ni /* branch to negative S2i with S1pi */
! 1625: FCMPS1piS2pi:
! 1626: set r4,r4,1<eq> /* set eq bit */
! 1627: set r4,r4,1<le> /* set le bit */
! 1628: set r4,r4,1<ge> /* set ge bit */
! 1629: set r4,r4,1<ib> /* set ib bit */
! 1630: br.n move
! 1631: set r4,r4,1<ob> /* set ob bit */
! 1632: FCMPS1piS2ni:
! 1633: set r4,r4,1<ne> /* set ne bit */
! 1634: set r4,r4,1<gt> /* set gt bit */
! 1635: br.n move
! 1636: set r4,r4,1<ge> /* set ge bit */
! 1637: FCMPS1piS2f:
! 1638: set r4,r4,1<ne> /* set ne bit */
! 1639: set r4,r4,1<gt> /* set gt bit */
! 1640: bsr.n _ASM_LABEL(zero) /* see if any of the operands are zero */
! 1641: set r4,r4,1<ge> /* set ge bit */
! 1642: bb0 s2zero,r12,FCMPS1piS2nz /* check for negative if s2 not zero */
! 1643: set r4,r4,1<ou> /* set ou bit */
! 1644: br.n move
! 1645: set r4,r4,1<ob> /* set ob bit */
! 1646: FCMPS1piS2nz:
! 1647: bb1 sign,r7,move /* return if s2 is negative */
! 1648: FCMPS1piS2pf:
! 1649: set r4,r4,1<ou> /* set ou bit */
! 1650: br.n move
! 1651: set r4,r4,1<ob> /* set ob bit */
! 1652: FCMPS1ni:
! 1653: bb0 s2inf,r12,FCMPS1niS2f /* branch to finite S2 with S1ni */
! 1654: FCMPS1niS2i:
! 1655: bb1 sign,r7,FCMPS1niS2ni /* branch to negative S2i with S1ni */
! 1656: FCMPS1niS2pi:
! 1657: set r4,r4,1<ne> /* set eq bit */
! 1658: set r4,r4,1<le> /* set le bit */
! 1659: set r4,r4,1<lt> /* set lt bit */
! 1660: set r4,r4,1<ou> /* set ou bit */
! 1661: br.n move
! 1662: set r4,r4,1<ob> /* set ob bit */
! 1663: FCMPS1niS2ni:
! 1664: set r4,r4,1<eq> /* set eq bit */
! 1665: set r4,r4,1<le> /* set le bit */
! 1666: br.n move
! 1667: set r4,r4,1<ge> /* set ge bit */
! 1668: FCMPS1niS2f:
! 1669: set r4,r4,1<ne> /* set eq bit */
! 1670: set r4,r4,1<le> /* set le bit */
! 1671: bsr.n _ASM_LABEL(zero) /* see if any of the operands are zero */
! 1672: set r4,r4,1<lt> /* set lt bit */
! 1673: bb0 s2zero,r12,FCMPS1niS2nz /* branch if s2 is not zero */
! 1674: set r4,r4,1<ou> /* set ou bit */
! 1675: br.n move
! 1676: set r4,r4,1<ob> /* set ob bit */
! 1677: FCMPS1niS2nz:
! 1678: bb1 sign,r7,move /* return if s2 is negative */
! 1679: set r4,r4,1<ou> /* set ou bit */
! 1680: br.n move
! 1681: set r4,r4,1<ob> /* set ob bit */
! 1682: FCMPS1f:
! 1683: bb1 sign,r5,FCMPS1nf /* branch to negative S1f */
! 1684: FCMPS1pf:
! 1685: bb1.n sign,r7,FCMPS1pfS2ni /* branch to negative S2i with S1pf */
! 1686: set r4,r4,1<ne> /* set ne bit */
! 1687: FCMPS1pfS2pi:
! 1688: set r4,r4,1<le> /* set le bit */
! 1689: set r4,r4,1<lt> /* set lt bit */
! 1690: bsr.n _ASM_LABEL(zero)
! 1691: set r4,r4,1<ib> /* set ib bit */
! 1692: bb0 s1zero,r12,FCMPS1pfS2pinozero
! 1693: FCMPS1pfS2pizero:
! 1694: br.n move
! 1695: set r4,r4,1<ob> /* set ob bit */
! 1696: FCMPS1pfS2pinozero:
! 1697: br.n move
! 1698: set r4,r4,1<in> /* set in bit */
! 1699: FCMPS1pfS2ni:
! 1700: set r4,r4,1<gt> /* set gt bit */
! 1701: br.n move
! 1702: set r4,r4,1<ge> /* set ge bit */
! 1703: FCMPS1nf:
! 1704: bb1.n sign,r7,FCMPS1nfS2ni /* branch to negative S2i with S1nf */
! 1705: set r4,r4,1<ne> /* set ne bit */
! 1706: set r4,r4,1<le> /* set gt bit */
! 1707: set r4,r4,1<lt> /* set ge bit */
! 1708: bsr.n _ASM_LABEL(zero) /* see which of the operands are zero */
! 1709: set r4,r4,1<ob> /* set ob bit */
! 1710: bb0 s1zero,r12,FCMPS1nfS2pinozero /* no ls and lo */
! 1711: FCMPS1nfS2pizero:
! 1712: br.n move
! 1713: set r4,r4,1<ib> /* set ib bit */
! 1714: FCMPS1nfS2pinozero:
! 1715: br.n move
! 1716: set r4,r4,1<ou> /* set ou bit */
! 1717: FCMPS1nfS2ni:
! 1718: set r4,r4,1<gt> /* set gt bit */
! 1719: set r4,r4,1<ge> /* set ge bit */
! 1720:
! 1721: move:
! 1722: br.n inf_return
! 1723: or r6,r0,r4 /* transfer answer to r6 */
! 1724:
! 1725:
! 1726: /* Multiplying infinity and zero causes an exception, but all other */
! 1727: /* operations produce a correctly signed infinity. */
! 1728:
! 1729: FMUL:
! 1730: bsr _ASM_LABEL(zero) /* see if any of the operands are zero */
! 1731: bb1 s1zero,r12,excpt /* infinity X 0 causes an exception */
! 1732: bb1 s2zero,r12,excpt /* infinity X 0 causes an exception */
! 1733: bb1 sign,r5,FMULS1neg /* handle negative cases of S1 */
! 1734: bb0 sign,r7,poswrinf /* + X + = + */
! 1735: br negwrinf /* + X - = - */
! 1736: FMULS1neg:
! 1737: bb1 sign,r7,poswrinf /* - X - = + */
! 1738: br negwrinf /* - X + = - */
! 1739:
! 1740:
! 1741: /* Dividing infinity by infinity causes an exception, but dividing */
! 1742: /* infinity by a finite yields a correctly signed infinity, and */
! 1743: /* dividing a finite by an infinity produces a correctly signed zero. */
! 1744:
! 1745: FDIV:
! 1746: bb1 s1inf,r12,FDIVS1inf /* handle case of S1 being infinity */
! 1747: bb1 sign,r5,FDIVS1nf /* handle cases of S1 being neg. non-inf. */
! 1748: bb1 sign,r7,FDIVS1pfS2mi /* handle case of negative S2 */
! 1749: FDIVS1pfS2pi:
! 1750: br poswrzero /* +f / +inf = +0 */
! 1751: FDIVS1pfS2mi:
! 1752: br negwrzero /* +f / -inf = -0 */
! 1753: FDIVS1nf:
! 1754: bb1 sign,r7,FDIVS1nfS2mi /* handle case of negative S2 */
! 1755: FDIVS1nfS2pi:
! 1756: br negwrzero /* -f / +inf = -0 */
! 1757: FDIVS1nfS2mi:
! 1758: br poswrzero /* -f / -inf = +0 */
! 1759: FDIVS1inf:
! 1760: bb1 s2inf,r12,excpt /* inf / inf = exception */
! 1761: bb1 sign,r5,FDIVS1mi /* handle cases of S1 being neg. inf. */
! 1762: bb1 sign,r7,FDIVS1piS2nf /* handle case of negative S2 */
! 1763: FDIVS1piS2pf:
! 1764: br poswrinf /* +inf / +f = +inf */
! 1765: FDIVS1piS2nf:
! 1766: br negwrinf /* +inf / -f = -inf */
! 1767: FDIVS1mi:
! 1768: bb1 sign,r7,FDIVS1miS2nf /* handle case of negative S2 */
! 1769: FDIVS1miS2pf:
! 1770: br negwrinf /* -inf / +f = -inf */
! 1771: FDIVS1miS2nf:
! 1772: br poswrinf /* -inf / -f = +inf */
! 1773:
! 1774:
! 1775: /* The square root of positive infinity is positive infinity, */
! 1776: /* but the square root of negative infinity is a NaN */
! 1777:
! 1778: #if 0
! 1779: FSQRT:
! 1780: bb0 sign,r7,poswrinf /* write sqrt(inf) = inf */
! 1781: br excpt /* write sqrt(-inf) = NaN */
! 1782: #endif
! 1783:
! 1784: excpt:
! 1785: set r2,r2,1<oper>
! 1786: set r5,r0,0<0> /* write NaN into r5 */
! 1787: br.n inf_return
! 1788: set r6,r0,0<0> /* write NaN into r6, writing NaN''s into */
! 1789: /* both of these registers is quicker than */
! 1790: /* checking for single or double precision */
! 1791:
! 1792:
! 1793: /* Write positive infinity of the correct precision */
! 1794:
! 1795: poswrinf:
! 1796: bb1 dsize,r9,poswrinfd /* branch to write double precision inf. */
! 1797: br.n inf_return
! 1798: or.u r6,r0,0x7f80 /* load r6 with single precision pos inf. */
! 1799: poswrinfd:
! 1800: or.u r5,r0,0x7ff0 /* load double precision pos inf. */
! 1801: br.n inf_return
! 1802: or r6,r0,r0
! 1803:
! 1804:
! 1805: /* Write negative infinity of the correct precision */
! 1806:
! 1807: negwrinf:
! 1808: bb1 dsize,r9,negwrinfd /* branch to write double precision inf. */
! 1809: br.n inf_return
! 1810: or.u r6,r0,0xff80 /* load r6 with single precision pos inf. */
! 1811: negwrinfd:
! 1812: or.u r5,r0,0xfff0 /* load double precision pos inf. */
! 1813: br.n inf_return
! 1814: or r6,r0,r0
! 1815:
! 1816:
! 1817: /* Write a positive zero disregarding precision. */
! 1818:
! 1819: poswrzero:
! 1820: or r5,r0,r0 /* write to both high word and low word now */
! 1821: br.n inf_return /* it does not matter that both are written */
! 1822: or r6,r0,r0
! 1823:
! 1824:
! 1825: /* Write a negative zero of the correct precision. */
! 1826:
! 1827: negwrzero:
! 1828: or r6,r0,r0 /* clear low word */
! 1829: bb1 dsize,r9,negwrzerod /* branch to write double precision zero */
! 1830: br.n inf_return
! 1831: set r6,r6,1<31> /* set sign bit */
! 1832: negwrzerod:
! 1833: or r5,r0,r0 /* clear high word */
! 1834: br.n inf_return
! 1835: set r5,r5,1<31> /* set sign bit */
! 1836:
! 1837: FP_inf_overflw:
! 1838: set r2,r2,1<oper>
! 1839: set r2,r2,1<overflow>
! 1840: set r2,r2,1<inexact>
! 1841:
! 1842: bb0.n sign,r7,inf_return /* if positive then return */
! 1843:
! 1844: set r6,r6,31<0> /* set result to largest positive integer */
! 1845: or.c r6,r0,r6 /* negate r6,giving largest negative int. */
! 1846:
! 1847: inf_return:
! 1848: ld r1,r31,0 /* load return address */
! 1849: jmp r1
! 1850:
! 1851: /*
! 1852: * denorm
! 1853: */
! 1854:
! 1855: /* Check to see if either S1 or S2 is a denormalized number. First */
! 1856: /* extract the exponent to see if it is zero, and then check to see if */
! 1857: /* the mantissa is not zero. If the number is denormalized, then set the */
! 1858: /* 1 or 0 bit 10 r12. */
! 1859:
! 1860: ASLOCAL(denorm)
! 1861: st r1,r31,0 /* save return address */
! 1862: dnmcheckS1:
! 1863: extu r10,r5,11<20> /* extract exponent */
! 1864: bcnd ne0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
! 1865: bb1.n 9,r9,dnmcheckS1d /* S1 is double precision */
! 1866: mak r10,r5,20<3> /* mak field with only mantissa bits */
! 1867: /* into final result */
! 1868: dnmcheckS1s:
! 1869: extu r11,r6,3<29> /* get three low bits of mantissa */
! 1870: or r10,r10,r11 /* assemble all of the mantissa bits */
! 1871: bcnd eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
! 1872: br dnmsetS1 /* S1 is a denorm */
! 1873:
! 1874: dnmcheckS1d:
! 1875: or r10,r6,r10 /* or all of mantissa bits */
! 1876: bcnd eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
! 1877: dnmsetS1:
! 1878: set r12,r12,1<1> /* S1 is a denorm */
! 1879:
! 1880: dnmcheckS2:
! 1881: extu r10,r7,11<20> /* extract exponent */
! 1882: bcnd ne0,r10,S1form /* S2 is not a denorm */
! 1883: bb1.n 7,r9,dnmcheckS2d /* S2 is double precision */
! 1884: mak r10,r7,20<3> /* mak field with only mantissa bits */
! 1885: dnmcheckS2s:
! 1886: extu r11,r8,3<29> /* get three low bits of mantissa */
! 1887: or r10,r10,r11 /* assemble all of the mantissa bits */
! 1888: bcnd eq0,r10,S1form /* S2 is not a denorm */
! 1889: br dnmsetS2 /* S1 is a denorm */
! 1890: dnmcheckS2d:
! 1891: or r10,r8,r10 /* or all or mantissa bits */
! 1892: bcnd eq0,r10,S1form /* S2 is not a denorm */
! 1893: dnmsetS2:
! 1894: set r12,r12,1<0> /* S2 is a denorm */
! 1895:
! 1896:
! 1897: /* Since the operations are going to be reperformed with modified denorms, */
! 1898: /* the operands which were initially single precision need to be modified */
! 1899: /* back to single precision. */
! 1900:
! 1901: S1form:
! 1902: bb1 9,r9,S2form /* S1 is double precision, so do not */
! 1903: /* modify S1 into single format */
! 1904: mak r11,r5,28<3> /* over final exponent and mantissa */
! 1905: /* eliminating extra 3 bits of exponent */
! 1906: extu r6,r6,3<29> /* get low 3 bits of mantissa */
! 1907: or r11,r6,r11 /* form complete mantissa and exponent */
! 1908: extu r10,r5,1<31> /* get the 31 bit */
! 1909: mak r10,r10,1<31> /* place 31 bit 10 correct position */
! 1910: or r6,r10,r11 /* or 31, exponent, and all of mantissa */
! 1911:
! 1912: S2form:
! 1913: bb1 7,r9,checkop /* S2 is double precision, so do not */
! 1914: /* modify S2 into single format */
! 1915: mak r11,r7,28<3> /* over final exponent and mantissa */
! 1916: /* eliminating extra 3 bits of exponent */
! 1917: extu r8,r8,3<29> /* get low 3 bits of mantissa */
! 1918: or r11,r8,r11 /* form complete mantissa and exponent */
! 1919: extu r10,r7,1<31> /* get the 31 bit */
! 1920: mak r10,r10,1<31> /* place 31 bit 10 correct position */
! 1921: or r8,r10,r11 /* or 31, exponent, and all of mantissa */
! 1922:
! 1923:
! 1924: /* Extract the opcode, compare to a constant, and branch to the code that */
! 1925: /* deals with that opcode. */
! 1926:
! 1927: checkop:
! 1928: extu r10,r9,5<11> /* extract opcode */
! 1929: cmp r11,r10,0x05 /* compare to FADD */
! 1930: bb1 2,r11,denorm_FADD /* operation is FADD */
! 1931: cmp r11,r10,0x06 /* compare to FSUB */
! 1932: bb1 2,r11,denorm_FSUB /* operation is FSUB */
! 1933: cmp r11,r10,0x07 /* compare to FCMP */
! 1934: bb1 2,r11,denorm_FCMP /* operation is FCMP */
! 1935: cmp r11,r10,0x00 /* compare to FMUL */
! 1936: bb1 2,r11,denorm_FMUL /* operation is FMUL */
! 1937: cmp r11,r10,0x0e /* compare to FDIV */
! 1938: bb1 2,r11,denorm_FDIV /* operation is FDIV */
! 1939: #if 0
! 1940: cmp r11,r10,0x0f /* compare to FSQRT */
! 1941: bb1 2,r11,denorm_FSQRT /* operation is FSQRT */
! 1942: #endif
! 1943: cmp r11,r10,0x09 /* compare to INT */
! 1944: bb1 2,r11,denorm_INT /* operation is INT */
! 1945: cmp r11,r10,0x0a /* compare to NINT */
! 1946: bb1 2,r11,denorm_NINT /* operation is NINT */
! 1947: cmp r11,r10,0x0b /* compare to TRNC */
! 1948: bb1 2,r11,denorm_TRNC /* operation is TRNC */
! 1949:
! 1950:
! 1951: /* For all the following operations, the denormalized number is set to */
! 1952: /* zero and the operation is reperformed the correct destination and source */
! 1953: /* sizes. */
! 1954:
! 1955: denorm_FADD:
! 1956: bb0 1,r12,FADDS2dnm /* S1 is not denorm, so S2 must be */
! 1957: or r5,r0,r0 /* set S1 to zero */
! 1958: or r6,r0,r0
! 1959: FADDS2chk:
! 1960: bb0 0,r12,FADDcalc /* S2 is not a denorm */
! 1961: FADDS2dnm:
! 1962: or r7,r0,r0 /* set S2 to zero */
! 1963: or r8,r0,r0
! 1964: FADDcalc:
! 1965: bb1 5,r9,FADDdD /* branch for double precision destination */
! 1966: FADDsD:
! 1967: bb1 9,r9,FADDsDdS1 /* branch for double precision S1 */
! 1968: FADDsDsS1:
! 1969: bb1 7,r9,FADDsDsS1dS2 /* branch for double precision S2 */
! 1970: FADDsDsS1sS2:
! 1971: br.n denorm_return
! 1972: fadd.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
! 1973: FADDsDsS1dS2:
! 1974: br.n denorm_return
! 1975: fadd.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
! 1976: FADDsDdS1:
! 1977: bb1 7,r9,FADDsDdS1dS2 /* branch for double precision S2 */
! 1978: FADDsDdS1sS2:
! 1979: br.n denorm_return
! 1980: fadd.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
! 1981: FADDsDdS1dS2:
! 1982: br.n denorm_return
! 1983: fadd.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
! 1984: FADDdD:
! 1985: bb1 9,r9,FADDdDdS1 /* branch for double precision S1 */
! 1986: FADDdDsS1:
! 1987: bb1 7,r9,FADDdDsS1dS2 /* branch for double precision S2 */
! 1988: FADDdDsS1sS2:
! 1989: br.n denorm_return
! 1990: fadd.dss r5,r6,r8 /* add the two sources and place result 10 S1 */
! 1991: FADDdDsS1dS2:
! 1992: br.n denorm_return
! 1993: fadd.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */
! 1994: FADDdDdS1:
! 1995: bb1 7,r9,FADDdDdS1dS2 /* branch for double precision S2 */
! 1996: FADDdDdS1sS2:
! 1997: br.n denorm_return
! 1998: fadd.dds r5,r5,r8 /* add the two sources and place result 10 S1 */
! 1999: FADDdDdS1dS2:
! 2000: br.n denorm_return
! 2001: fadd.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */
! 2002:
! 2003: denorm_FSUB:
! 2004: bb0 1,r12,FSUBS2dnm /* S1 is not denorm, so S2 must be */
! 2005: or r5,r0,r0 /* set S1 to zero */
! 2006: or r6,r0,r0
! 2007: FSUBS2chk:
! 2008: bb0 0,r12,FSUBcalc /* S2 is not a denorm */
! 2009: FSUBS2dnm:
! 2010: or r7,r0,r0 /* set S2 to zero */
! 2011: or r8,r0,r0
! 2012: FSUBcalc:
! 2013: bb1 5,r9,FSUBdD /* branch for double precision destination */
! 2014: FSUBsD:
! 2015: bb1 9,r9,FSUBsDdS1 /* branch for double precision S1 */
! 2016: FSUBsDsS1:
! 2017: bb1 7,r9,FSUBsDsS1dS2 /* branch for double precision S2 */
! 2018: FSUBsDsS1sS2:
! 2019: br.n denorm_return
! 2020: fsub.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
! 2021: FSUBsDsS1dS2:
! 2022: br.n denorm_return
! 2023: fsub.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
! 2024: FSUBsDdS1:
! 2025: bb1 7,r9,FSUBsDdS1dS2 /* branch for double precision S2 */
! 2026: FSUBsDdS1sS2:
! 2027: br.n denorm_return
! 2028: fsub.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
! 2029: FSUBsDdS1dS2:
! 2030: br.n denorm_return
! 2031: fsub.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
! 2032: FSUBdD:
! 2033: bb1 9,r9,FSUBdDdS1 /* branch for double precision S1 */
! 2034: FSUBdDsS1:
! 2035: bb1 7,r9,FSUBdDsS1dS2 /* branch for double precision S2 */
! 2036: FSUBdDsS1sS2:
! 2037: br.n denorm_return
! 2038: fsub.dss r5,r6,r8 /* add the two sources and place result 10 S1 */
! 2039: FSUBdDsS1dS2:
! 2040: br.n denorm_return
! 2041: fsub.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */
! 2042: FSUBdDdS1:
! 2043: bb1 7,r9,FSUBdDdS1dS2 /* branch for double precision S2 */
! 2044: FSUBdDdS1sS2:
! 2045: br.n denorm_return
! 2046: fsub.dds r5,r5,r8 /* add the two sources and place result 10 S1 */
! 2047: FSUBdDdS1dS2:
! 2048: br.n denorm_return
! 2049: fsub.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */
! 2050:
! 2051: denorm_FCMP:
! 2052: bb0 1,r12,FCMPS2dnm /* S1 is not denorm, so S2 must be */
! 2053: or r5,r0,r0 /* set S1 to zero */
! 2054: or r6,r0,r0
! 2055: FCMPS2chk:
! 2056: bb0 0,r12,FCMPcalc /* S2 is not a denorm */
! 2057: FCMPS2dnm:
! 2058: or r7,r0,r0 /* set S2 to zero */
! 2059: or r8,r0,r0
! 2060: FCMPcalc:
! 2061: bb1 9,r9,FCMPdS1 /* branch for double precision S1 */
! 2062: FCMPsS1:
! 2063: bb1 7,r9,FCMPsS1dS2 /* branch for double precision S2 */
! 2064: FCMPsS1sS2:
! 2065: br.n denorm_return
! 2066: fcmp.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
! 2067: FCMPsS1dS2:
! 2068: br.n denorm_return
! 2069: fcmp.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
! 2070: FCMPdS1:
! 2071: bb1 7,r9,FCMPdS1dS2 /* branch for double precision S2 */
! 2072: FCMPdS1sS2:
! 2073: br.n denorm_return
! 2074: fcmp.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
! 2075: FCMPdS1dS2:
! 2076: br.n denorm_return
! 2077: fcmp.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
! 2078:
! 2079: denorm_FMUL:
! 2080: bb0 1,r12,FMULS2dnm /* S1 is not denorm, so S2 must be */
! 2081: or r5,r0,r0 /* set S1 to zero */
! 2082: or r6,r0,r0
! 2083: FMULS2chk:
! 2084: bb0 0,r12,FMULcalc /* S2 is not a denorm */
! 2085: FMULS2dnm:
! 2086: or r7,r0,r0 /* set S2 to zero */
! 2087: or r8,r0,r0
! 2088: FMULcalc:
! 2089: bb1 5,r9,FMULdD /* branch for double precision destination */
! 2090: FMULsD:
! 2091: bb1 9,r9,FMULsDdS1 /* branch for double precision S1 */
! 2092: FMULsDsS1:
! 2093: bb1 7,r9,FMULsDsS1dS2 /* branch for double precision S2 */
! 2094: FMULsDsS1sS2:
! 2095: br.n denorm_return
! 2096: fmul.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
! 2097: FMULsDsS1dS2:
! 2098: br.n denorm_return
! 2099: fmul.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
! 2100: FMULsDdS1:
! 2101: bb1 7,r9,FMULsDdS1dS2 /* branch for double precision S2 */
! 2102: FMULsDdS1sS2:
! 2103: br.n denorm_return
! 2104: fmul.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
! 2105: FMULsDdS1dS2:
! 2106: br.n denorm_return
! 2107: fmul.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
! 2108: FMULdD:
! 2109: bb1 9,r9,FMULdDdS1 /* branch for double precision S1 */
! 2110: FMULdDsS1:
! 2111: bb1 7,r9,FMULdDsS1dS2 /* branch for double precision S2 */
! 2112: FMULdDsS1sS2:
! 2113: br.n denorm_return
! 2114: fmul.dss r5,r6,r8 /* add the two sources and place result 10 S1 */
! 2115: FMULdDsS1dS2:
! 2116: br.n denorm_return
! 2117: fmul.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */
! 2118: FMULdDdS1:
! 2119: bb1 7,r9,FMULdDdS1dS2 /* branch for double precision S2 */
! 2120: FMULdDdS1sS2:
! 2121: br.n denorm_return
! 2122: fmul.dds r5,r5,r8 /* add the two sources and place result 10 S1 */
! 2123: FMULdDdS1dS2:
! 2124: br.n denorm_return
! 2125: fmul.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */
! 2126:
! 2127: denorm_FDIV:
! 2128: bb0 1,r12,FDIVS2dnm /* S1 is not denorm, so S2 must be */
! 2129: or r5,r0,r0 /* set S1 to zero */
! 2130: or r6,r0,r0
! 2131: FDIVS2chk:
! 2132: bb0 0,r12,FDIVcalc /* S2 is not a denorm */
! 2133: FDIVS2dnm:
! 2134: or r7,r0,r0 /* set S2 to zero */
! 2135: or r8,r0,r0
! 2136: FDIVcalc:
! 2137: bb1 5,r9,FDIVdD /* branch for double precision destination */
! 2138: FDIVsD:
! 2139: bb1 9,r9,FDIVsDdS1 /* branch for double precision S1 */
! 2140: FDIVsDsS1:
! 2141: bb1 7,r9,FDIVsDsS1dS2 /* branch for double precision S2 */
! 2142: FDIVsDsS1sS2:
! 2143: fdiv.sss r6,r6,r8 /* add the two sources and place result 10 S1 */
! 2144: br denorm_return
! 2145: FDIVsDsS1dS2:
! 2146: fdiv.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */
! 2147: br denorm_return
! 2148: FDIVsDdS1:
! 2149: bb1 7,r9,FDIVsDdS1dS2 /* branch for double precision S2 */
! 2150: FDIVsDdS1sS2:
! 2151: fdiv.sds r6,r5,r8 /* add the two sources and place result 10 S1 */
! 2152: br denorm_return
! 2153: FDIVsDdS1dS2:
! 2154: fdiv.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */
! 2155: br denorm_return
! 2156: FDIVdD:
! 2157: bb1 9,r9,FDIVdDdS1 /* branch for double precision S1 */
! 2158: FDIVdDsS1:
! 2159: bb1 7,r9,FDIVdDsS1dS2 /* branch for double precision S2 */
! 2160: FDIVdDsS1sS2:
! 2161: fdiv.dss r5,r6,r8 /* add the two sources and place result 10 S1 */
! 2162: br denorm_return
! 2163: FDIVdDsS1dS2:
! 2164: fdiv.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */
! 2165: br denorm_return
! 2166: FDIVdDdS1:
! 2167: bb1 7,r9,FDIVdDdS1dS2 /* branch for double precision S2 */
! 2168: FDIVdDdS1sS2:
! 2169: fdiv.dds r5,r5,r8 /* add the two sources and place result 10 S1 */
! 2170: br denorm_return
! 2171: FDIVdDdS1dS2:
! 2172: fdiv.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */
! 2173: br denorm_return
! 2174:
! 2175: #if 0
! 2176: denorm_FSQRT:
! 2177: or r7,r0,r0 /* set S2 to zero */
! 2178: or r8,r0,r0
! 2179: FSQRTcalc:
! 2180: bb1 5,r9,FSQRTdD /* branch for double precision destination */
! 2181: FSQRTsD:
! 2182: bb1 7,r9,FSQRTsDdS2 /* branch for double precision S2 */
! 2183: FSQRTsDsS2:
! 2184: br.n denorm_return
! 2185: fsqrt.ss r6,r8 /* add the two sources and place result 10 S1 */
! 2186: FSQRTsDdS2:
! 2187: br.n denorm_return
! 2188: fsqrt.sd r6,r7 /* add the two sources and place result 10 S1 */
! 2189: FSQRTdD:
! 2190: bb1 7,r9,FSQRTdDdS2 /* branch for double precision S2 */
! 2191: FSQRTdDsS2:
! 2192: br.n denorm_return
! 2193: fsqrt.ds r5,r8 /* add the two sources and place result 10 S1 */
! 2194: FSQRTdDdS2:
! 2195: br.n denorm_return
! 2196: fsqrt.dd r5,r7 /* add the two sources and place result 10 S1 */
! 2197: #endif
! 2198:
! 2199: denorm_INT:
! 2200: or r7,r0,r0 /* set S2 to zero */
! 2201: or r8,r0,r0
! 2202: INTcalc:
! 2203: bb1 7,r9,INTdS2 /* branch for double precision S2 */
! 2204: INTsS2:
! 2205: br.n denorm_return
! 2206: int.ss r6,r8 /* add the two sources and place result 10 S1 */
! 2207: INTdS2:
! 2208: br.n denorm_return
! 2209: int.sd r6,r7 /* add the two sources and place result 10 S1 */
! 2210:
! 2211: denorm_NINT:
! 2212: or r7,r0,r0 /* set S2 to zero */
! 2213: or r8,r0,r0
! 2214: NINTcalc:
! 2215: bb1 7,r9,NINTdS2 /* branch for double precision S2 */
! 2216: NINTsS2:
! 2217: br.n denorm_return
! 2218: nint.ss r6,r8 /* add the two sources and place result 10 S1 */
! 2219: NINTdS2:
! 2220: br.n denorm_return
! 2221: nint.sd r6,r7 /* add the two sources and place result 10 S1 */
! 2222:
! 2223: denorm_TRNC:
! 2224: or r7,r0,r0 /* set S2 to zero */
! 2225: or r8,r0,r0
! 2226: TRNCcalc:
! 2227: bb1 7,r9,TRNCdS2 /* branch for double precision S2 */
! 2228: TRNCsS2:
! 2229: br.n denorm_return
! 2230: trnc.ss r6,r8 /* add the two sources and place result 10 S1 */
! 2231: TRNCdS2:
! 2232: trnc.sd r6,r7 /* add the two sources and place result 10 S1 */
! 2233:
! 2234:
! 2235: /* Return to the routine that detected the reserved operand. */
! 2236:
! 2237: denorm_return:
! 2238: ld r1,r31,0 /* load return address */
! 2239: jmp r1
! 2240:
! 2241: /* S1 and/or S2 is an infinity, and the other operand may be a zero. */
! 2242: /* Knowing which operands are infinity, check the remaining operands for zeros. */
! 2243:
! 2244: ASLOCAL(zero)
! 2245: bb0 s1inf,r12,S1noinf /* see if S1 is zero */
! 2246: bb0 s2inf,r12,S2noinf /* see if S2 is zero */
! 2247: jmp r1
! 2248:
! 2249: /* See if S1 is zero. Whether or not S1 is a zero, being in this routine */
! 2250: /* implies that S2 is infinity, so return to subroutine infinity after */
! 2251: /* completing this code. Set the s1zero flag in r12 if S1 is zero. */
! 2252:
! 2253: S1noinf:
! 2254: bb1 s1size,r9,S1noinfd /* work with double precision operand */
! 2255: S1noinfs:
! 2256: or r10,r0,r5 /* load high word into r10 */
! 2257: clr r10,r10,1<sign> /* clear the sign bit */
! 2258: extu r11,r6,3<29> /* extract lower 3 bits of mantissa */
! 2259: or r10,r10,r11 /* or these 3 bits with high word */
! 2260: bcnd ne0,r10,operation /* do not set zero flag */
! 2261: jmp.n r1 /* since this operand was not */
! 2262: /* infinity, S2 must have been, */
! 2263: /* so return */
! 2264: set r12,r12,1<s1zero> /* set zeroflag */
! 2265: S1noinfd:
! 2266: clr r10,r5,1<sign> /* clear the sign bit */
! 2267: or r10,r6,r10 /* or high and low word */
! 2268: bcnd ne0,r10,operation /* do not set zero flag */
! 2269: jmp.n r1 /* since this operand was not */
! 2270: /* infinity, S2 must have been, */
! 2271: /* so return */
! 2272: set r12,r12,1<s1zero> /* set zeroflag */
! 2273:
! 2274:
! 2275: /* Check S2 for zero. If it is zero, then set the s2zero flag in r12. */
! 2276:
! 2277: S2noinf:
! 2278: bb1 s2size,r9,S2noinfd /* work with double precision operand */
! 2279: S2noinfs:
! 2280: or r10,r0,r7 /* load high word into r10 */
! 2281: clr r10,r10,1<sign> /* clear the sign bit */
! 2282: extu r11,r8,3<29> /* extract lower 3 bits of mantissa */
! 2283: or r10,r10,r11 /* or these 3 bits with high word */
! 2284: bcnd ne0,r10,operation /* do not set zero flag */
! 2285: jmp.n r1 /* since this operand was not */
! 2286: /* infinity, S1 must have been, */
! 2287: /* so return */
! 2288: set r12,r12,1<s2zero> /* set zeroflag */
! 2289: S2noinfd:
! 2290: clr r10,r7,1<sign> /* clear the sign bit */
! 2291: or r10,r8,r10 /* or high and low word */
! 2292: bcnd ne0,r10,operation /* do not set zero flag */
! 2293: set r12,r12,1<s2zero> /* set zeroflag */
! 2294: /* since this operand was not */
! 2295: /* infinity, S1 must have been, */
! 2296: /* so return */
! 2297: operation:
! 2298: jmp r1
! 2299:
! 2300: ASENTRY(Xfp_imprecise)
! 2301: /* input: r3 is the exception frame */
! 2302: or r29, r3, r0 /* r29 is now the E.F. */
! 2303: subu r31, r31, 16
! 2304: st r1, r31, 4
! 2305: st r29, r31, 8
! 2306:
! 2307: ld r2 , r29, EF_FPSR * 4
! 2308: ld r3 , r29, EF_FPCR * 4
! 2309: ld r4 , r29, EF_FPECR * 4
! 2310: ld r10, r29, EF_FPRH * 4
! 2311: ld r11, r29, EF_FPRL * 4
! 2312: ld r12, r29, EF_FPIT * 4
! 2313:
! 2314: /* Load into r1 the return address for the exception handlers. Looking */
! 2315: /* at FPECR, branch to the appropriate exception handler. */
! 2316:
! 2317: or.u r1,r0,hi16(fpui_wrapup)/* load return address of functions */
! 2318: or r1,r1,lo16(fpui_wrapup)
! 2319:
! 2320: bb0 2,r4,2f /* branch to FPunderflow if bit set */
! 2321: br _ASM_LABEL(FPunderflow)
! 2322: 2:
! 2323: bb0 1,r4,3f /* branch to FPoverflow if bit set */
! 2324: br _ASM_LABEL(FPoverflow)
! 2325: 3:
! 2326: /* XXX handle inexact!!! */
! 2327:
! 2328: fpui_wrapup:
! 2329: tb1 0,r0,0 /* make sure all floating point operations */
! 2330: /* have finished */
! 2331: ldcr r4, cr1 /* load the PSR */
! 2332: #if 0
! 2333: set r4, r4, 1<PSR_FPU_DISABLE_BIT>
! 2334: #endif
! 2335: set r4, r4, 1<PSR_INTERRUPT_DISABLE_BIT>
! 2336: stcr r4, cr1
! 2337: ld r1, r31, 4
! 2338: ld r29,r31, 8
! 2339: addu r31, r31, 16
! 2340:
! 2341: fstcr r2, FPSR /* write revised value of FPSR */
! 2342: fstcr r3, FPCR /* write revised value of FPCR */
! 2343:
! 2344: /* write back the results */
! 2345: extu r2, r12, 5<0>
! 2346: bb0.n destsize, r12, Iwritesingle
! 2347: addu r3, r29, EF_R0 * 4
! 2348: st r10, r3 [r2]
! 2349: addu r2, r2, 1
! 2350: clr r2, r2, 27<5>
! 2351: Iwritesingle:
! 2352: jmp.n r1
! 2353: st r11, r3 [r2]
CVSweb