[BACK]Return to round.sa CVS log [TXT][DIR] Up to [local] / sys / arch / m68k / fpsp

Annotation of sys/arch/m68k/fpsp/round.sa, Revision 1.1.1.1

1.1       nbrk        1: *      $OpenBSD: round.sa,v 1.3 2005/11/15 21:09:45 miod Exp $
                      2: *      $NetBSD: round.sa,v 1.3 1994/10/26 07:49:24 cgd Exp $
                      3:
                      4: *      MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
                      5: *      M68000 Hi-Performance Microprocessor Division
                      6: *      M68040 Software Package
                      7: *
                      8: *      M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
                      9: *      All rights reserved.
                     10: *
                     11: *      THE SOFTWARE is provided on an "AS IS" basis and without warranty.
                     12: *      To the maximum extent permitted by applicable law,
                     13: *      MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
                     14: *      INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
                     15: *      PARTICULAR PURPOSE and any warranty against infringement with
                     16: *      regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
                     17: *      and any accompanying written materials.
                     18: *
                     19: *      To the maximum extent permitted by applicable law,
                     20: *      IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
                     21: *      (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
                     22: *      PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
                     23: *      OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
                     24: *      SOFTWARE.  Motorola assumes no responsibility for the maintenance
                     25: *      and support of the SOFTWARE.
                     26: *
                     27: *      You are hereby granted a copyright license to use, modify, and
                     28: *      distribute the SOFTWARE so long as this entire notice is retained
                     29: *      without alteration in any modified and/or redistributed versions,
                     30: *      and that such modified versions are clearly identified as such.
                     31: *      No licenses are granted by implication, estoppel or otherwise
                     32: *      under any patents or trademarks of Motorola, Inc.
                     33:
                     34: *
                     35: *      round.sa 3.4 7/29/91
                     36: *
                     37: *      handle rounding and normalization tasks
                     38: *
                     39:
                     40: ROUND  IDNT    2,1 Motorola 040 Floating Point Software Package
                     41:
                     42:        section 8
                     43:
                     44:        include fpsp.h
                     45:
                     46: *
                     47: *      round --- round result according to precision/mode
                     48: *
                     49: *      a0 points to the input operand in the internal extended format
                     50: *      d1(high word) contains rounding precision:
                     51: *              ext = $0000xxxx
                     52: *              sgl = $0001xxxx
                     53: *              dbl = $0002xxxx
                     54: *      d1(low word) contains rounding mode:
                     55: *              RN  = $xxxx0000
                     56: *              RZ  = $xxxx0001
                     57: *              RM  = $xxxx0010
                     58: *              RP  = $xxxx0011
                     59: *      d0{31:29} contains the g,r,s bits (extended)
                     60: *
                     61: *      On return the value pointed to by a0 is correctly rounded,
                     62: *      a0 is preserved and the g-r-s bits in d0 are cleared.
                     63: *      The result is not typed - the tag field is invalid.  The
                     64: *      result is still in the internal extended format.
                     65: *
                     66: *      The INEX bit of USER_FPSR will be set if the rounded result was
                     67: *      inexact (i.e. if any of the g-r-s bits were set).
                     68: *
                     69:
                     70:        xdef    round
                     71: round:
                     72: * If g=r=s=0 then result is exact and round is done, else set
                     73: * the inex flag in status reg and continue.
                     74: *
                     75:        bsr.b   ext_grs                 ;this subroutine looks at the
                     76: *                                      :rounding precision and sets
                     77: *                                      ;the appropriate g-r-s bits.
                     78:        tst.l   d0                      ;if grs are zero, go force
                     79:        bne.w   rnd_cont                ;lower bits to zero for size
                     80:
                     81:        swap    d1                      ;set up d1.w for round prec.
                     82:        bra.w   truncate
                     83:
                     84: rnd_cont:
                     85: *
                     86: * Use rounding mode as an index into a jump table for these modes.
                     87: *
                     88:        or.l    #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
                     89:        lea     mode_tab,a1
                     90:        move.l  (a1,d1.w*4),a1
                     91:        jmp     (a1)
                     92: *
                     93: * Jump table indexed by rounding mode in d1.w.  All following assumes
                     94: * grs != 0.
                     95: *
                     96: mode_tab:
                     97:        dc.l    rnd_near
                     98:        dc.l    rnd_zero
                     99:        dc.l    rnd_mnus
                    100:        dc.l    rnd_plus
                    101: *
                    102: *      ROUND PLUS INFINITY
                    103: *
                    104: *      If sign of fp number = 0 (positive), then add 1 to l.
                    105: *
                    106: rnd_plus:
                    107:        swap    d1                      ;set up d1 for round prec.
                    108:        tst.b   LOCAL_SGN(a0)           ;check for sign
                    109:        bmi.w   truncate                ;if positive then truncate
                    110:        move.l  #$ffffffff,d0           ;force g,r,s to be all f's
                    111:        lea     add_to_l,a1
                    112:        move.l  (a1,d1.w*4),a1
                    113:        jmp     (a1)
                    114: *
                    115: *      ROUND MINUS INFINITY
                    116: *
                    117: *      If sign of fp number = 1 (negative), then add 1 to l.
                    118: *
                    119: rnd_mnus:
                    120:        swap    d1                      ;set up d1 for round prec.
                    121:        tst.b   LOCAL_SGN(a0)           ;check for sign
                    122:        bpl.w   truncate                ;if negative then truncate
                    123:        move.l  #$ffffffff,d0           ;force g,r,s to be all f's
                    124:        lea     add_to_l,a1
                    125:        move.l  (a1,d1.w*4),a1
                    126:        jmp     (a1)
                    127: *
                    128: *      ROUND ZERO
                    129: *
                    130: *      Always truncate.
                    131: rnd_zero:
                    132:        swap    d1                      ;set up d1 for round prec.
                    133:        bra.w   truncate
                    134: *
                    135: *
                    136: *      ROUND NEAREST
                    137: *
                    138: *      If (g=1), then add 1 to l and if (r=s=0), then clear l
                    139: *      Note that this will round to even in case of a tie.
                    140: *
                    141: rnd_near:
                    142:        swap    d1                      ;set up d1 for round prec.
                    143:        add.l   d0,d0                   ;shift g-bit to c-bit
                    144:        bcc.w   truncate                ;if (g=1) then
                    145:        lea     add_to_l,a1
                    146:        move.l  (a1,d1.w*4),a1
                    147:        jmp     (a1)
                    148:
                    149: *
                    150: *      ext_grs --- extract guard, round and sticky bits
                    151: *
                    152: * Input:       d1 =            PREC:ROUND
                    153: * Output:      d0{31:29}=      guard, round, sticky
                    154: *
                    155: * The ext_grs extract the guard/round/sticky bits according to the
                    156: * selected rounding precision. It is called by the round subroutine
                    157: * only.  All registers except d0 are kept intact. d0 becomes an
                    158: * updated guard,round,sticky in d0{31:29}
                    159: *
                    160: * Notes: the ext_grs uses the round PREC, and therefore has to swap d1
                    161: *       prior to usage, and needs to restore d1 to original.
                    162: *
                    163: ext_grs:
                    164:        swap    d1                      ;have d1.w point to round precision
                    165:        tst.w   d1
                    166:        bne.b   sgl_or_dbl
                    167:        bra.b   end_ext_grs
                    168:
                    169: sgl_or_dbl:
                    170:        movem.l d2/d3,-(a7)             ;make some temp registers
                    171:        cmpi.w  #1,d1
                    172:        bne.b   grs_dbl
                    173: grs_sgl:
                    174:        bfextu  LOCAL_HI(a0){24:2},d3   ;sgl prec. g-r are 2 bits right
                    175:        move.l  #30,d2                  ;of the sgl prec. limits
                    176:        lsl.l   d2,d3                   ;shift g-r bits to MSB of d3
                    177:        move.l  LOCAL_HI(a0),d2         ;get word 2 for s-bit test
                    178:        andi.l  #$0000003f,d2           ;s bit is the or of all other
                    179:        bne.b   st_stky                 ;bits to the right of g-r
                    180:        tst.l   LOCAL_LO(a0)            ;test lower mantissa
                    181:        bne.b   st_stky                 ;if any are set, set sticky
                    182:        tst.l   d0                      ;test original g,r,s
                    183:        bne.b   st_stky                 ;if any are set, set sticky
                    184:        bra.b   end_sd                  ;if words 3 and 4 are clr, exit
                    185: grs_dbl:
                    186:        bfextu  LOCAL_LO(a0){21:2},d3   ;dbl-prec. g-r are 2 bits right
                    187:        move.l  #30,d2                  ;of the dbl prec. limits
                    188:        lsl.l   d2,d3                   ;shift g-r bits to the MSB of d3
                    189:        move.l  LOCAL_LO(a0),d2         ;get lower mantissa  for s-bit test
                    190:        andi.l  #$000001ff,d2           ;s bit is the or-ing of all
                    191:        bne.b   st_stky                 ;other bits to the right of g-r
                    192:        tst.l   d0                      ;test word original g,r,s
                    193:        bne.b   st_stky                 ;if any are set, set sticky
                    194:        bra.b   end_sd                  ;if clear, exit
                    195: st_stky:
                    196:        bset    #rnd_stky_bit,d3
                    197: end_sd:
                    198:        move.l  d3,d0                   ;return grs to d0
                    199:        movem.l (a7)+,d2/d3             ;restore scratch registers
                    200: end_ext_grs:
                    201:        swap    d1                      ;restore d1 to original
                    202:        rts
                    203:
                    204: ********************  Local Equates
                    205: ad_1_sgl equ   $00000100       constant to add 1 to l-bit in sgl prec
                    206: ad_1_dbl equ   $00000800       constant to add 1 to l-bit in dbl prec
                    207:
                    208:
                    209: *Jump table for adding 1 to the l-bit indexed by rnd prec
                    210:
                    211: add_to_l:
                    212:        dc.l    add_ext
                    213:        dc.l    add_sgl
                    214:        dc.l    add_dbl
                    215:        dc.l    add_dbl
                    216: *
                    217: *      ADD SINGLE
                    218: *
                    219: add_sgl:
                    220:        add.l   #ad_1_sgl,LOCAL_HI(a0)
                    221:        bcc.b   scc_clr                 ;no mantissa overflow
                    222:        roxr.w  LOCAL_HI(a0)            ;shift v-bit back in
                    223:        roxr.w  LOCAL_HI+2(a0)          ;shift v-bit back in
                    224:        add.w   #$1,LOCAL_EX(a0)        ;and incr exponent
                    225: scc_clr:
                    226:        tst.l   d0                      ;test for rs = 0
                    227:        bne.b   sgl_done
                    228:        andi.w  #$fe00,LOCAL_HI+2(a0)   ;clear the l-bit
                    229: sgl_done:
                    230:        andi.l  #$ffffff00,LOCAL_HI(a0) ;truncate bits beyond sgl limit
                    231:        clr.l   LOCAL_LO(a0)            ;clear d2
                    232:        rts
                    233:
                    234: *
                    235: *      ADD EXTENDED
                    236: *
                    237: add_ext:
                    238:        addq.l  #1,LOCAL_LO(a0)         ;add 1 to l-bit
                    239:        bcc.b   xcc_clr                 ;test for carry out
                    240:        addq.l  #1,LOCAL_HI(a0)         ;propogate carry
                    241:        bcc.b   xcc_clr
                    242:        roxr.w  LOCAL_HI(a0)            ;mant is 0 so restore v-bit
                    243:        roxr.w  LOCAL_HI+2(a0)          ;mant is 0 so restore v-bit
                    244:        roxr.w  LOCAL_LO(a0)
                    245:        roxr.w  LOCAL_LO+2(a0)
                    246:        add.w   #$1,LOCAL_EX(a0)        ;and inc exp
                    247: xcc_clr:
                    248:        tst.l   d0                      ;test rs = 0
                    249:        bne.b   add_ext_done
                    250:        andi.b  #$fe,LOCAL_LO+3(a0)     ;clear the l bit
                    251: add_ext_done:
                    252:        rts
                    253: *
                    254: *      ADD DOUBLE
                    255: *
                    256: add_dbl:
                    257:        add.l   #ad_1_dbl,LOCAL_LO(a0)
                    258:        bcc.b   dcc_clr
                    259:        addq.l  #1,LOCAL_HI(a0)         ;propogate carry
                    260:        bcc.b   dcc_clr
                    261:        roxr.w  LOCAL_HI(a0)            ;mant is 0 so restore v-bit
                    262:        roxr.w  LOCAL_HI+2(a0)          ;mant is 0 so restore v-bit
                    263:        roxr.w  LOCAL_LO(a0)
                    264:        roxr.w  LOCAL_LO+2(a0)
                    265:        add.w   #$1,LOCAL_EX(a0)        ;incr exponent
                    266: dcc_clr:
                    267:        tst.l   d0                      ;test for rs = 0
                    268:        bne.b   dbl_done
                    269:        andi.w  #$f000,LOCAL_LO+2(a0)   ;clear the l-bit
                    270:
                    271: dbl_done:
                    272:        andi.l  #$fffff800,LOCAL_LO(a0) ;truncate bits beyond dbl limit
                    273:        rts
                    274:
                    275: error:
                    276:        rts
                    277: *
                    278: * Truncate all other bits
                    279: *
                    280: trunct:
                    281:        dc.l    end_rnd
                    282:        dc.l    sgl_done
                    283:        dc.l    dbl_done
                    284:        dc.l    dbl_done
                    285:
                    286: truncate:
                    287:        lea     trunct,a1
                    288:        move.l  (a1,d1.w*4),a1
                    289:        jmp     (a1)
                    290:
                    291: end_rnd:
                    292:        rts
                    293:
                    294: *
                    295: *      NORMALIZE
                    296: *
                    297: * These routines (nrm_zero & nrm_set) normalize the unnorm.  This
                    298: * is done by shifting the mantissa left while decrementing the
                    299: * exponent.
                    300: *
                    301: * NRM_SET shifts and decrements until there is a 1 set in the integer
                    302: * bit of the mantissa (msb in d1).
                    303: *
                    304: * NRM_ZERO shifts and decrements until there is a 1 set in the integer
                    305: * bit of the mantissa (msb in d1) unless this would mean the exponent
                    306: * would go less than 0.  In that case the number becomes a denorm - the
                    307: * exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
                    308: * normalized.
                    309: *
                    310: * Note that both routines have been optimized (for the worst case) and
                    311: * therefore do not have the easy to follow decrement/shift loop.
                    312: *
                    313: *      NRM_ZERO
                    314: *
                    315: *      Distance to first 1 bit in mantissa = X
                    316: *      Distance to 0 from exponent = Y
                    317: *      If X < Y
                    318: *      Then
                    319: *        nrm_set
                    320: *      Else
                    321: *        shift mantissa by Y
                    322: *        set exponent = 0
                    323: *
                    324: *input:
                    325: *      FP_SCR1 = exponent, ms mantissa part, ls mantissa part
                    326: *output:
                    327: *      L_SCR1{4} = fpte15 or ete15 bit
                    328: *
                    329:        xdef    nrm_zero
                    330: nrm_zero:
                    331:        move.w  LOCAL_EX(a0),d0
                    332:        cmp.w   #64,d0          ;see if exp > 64
                    333:        bmi.b   d0_less
                    334:        bsr     nrm_set         ;exp > 64 so exp won't exceed 0
                    335:        rts
                    336: d0_less:
                    337:        movem.l d2/d3/d5/d6,-(a7)
                    338:        move.l  LOCAL_HI(a0),d1
                    339:        move.l  LOCAL_LO(a0),d2
                    340:
                    341:        bfffo   d1{0:32},d3     ;get the distance to the first 1
                    342: *                              ;in ms mant
                    343:        beq.b   ms_clr          ;branch if no bits were set
                    344:        cmp.w   d3,d0           ;of X>Y
                    345:        bmi.b   greater         ;then exp will go past 0 (neg) if
                    346: *                              ;it is just shifted
                    347:        bsr     nrm_set         ;else exp won't go past 0
                    348:        movem.l (a7)+,d2/d3/d5/d6
                    349:        rts
                    350: greater:
                    351:        move.l  d2,d6           ;save ls mant in d6
                    352:        lsl.l   d0,d2           ;shift ls mant by count
                    353:        lsl.l   d0,d1           ;shift ms mant by count
                    354:        move.l  #32,d5
                    355:        sub.l   d0,d5           ;make op a denorm by shifting bits
                    356:        lsr.l   d5,d6           ;by the number in the exp, then
                    357: *                              ;set exp = 0.
                    358:        or.l    d6,d1           ;shift the ls mant bits into the ms mant
                    359:        clr.l   d0              ;same as if decremented exp to 0
                    360: *                              ;while shifting
                    361:        move.w  d0,LOCAL_EX(a0)
                    362:        move.l  d1,LOCAL_HI(a0)
                    363:        move.l  d2,LOCAL_LO(a0)
                    364:        movem.l (a7)+,d2/d3/d5/d6
                    365:        rts
                    366: ms_clr:
                    367:        bfffo   d2{0:32},d3     ;check if any bits set in ls mant
                    368:        beq.b   all_clr         ;branch if none set
                    369:        add.w   #32,d3
                    370:        cmp.w   d3,d0           ;if X>Y
                    371:        bmi.b   greater         ;then branch
                    372:        bsr     nrm_set         ;else exp won't go past 0
                    373:        movem.l (a7)+,d2/d3/d5/d6
                    374:        rts
                    375: all_clr:
                    376:        clr.w   LOCAL_EX(a0)    ;no mantissa bits set. Set exp = 0.
                    377:        movem.l (a7)+,d2/d3/d5/d6
                    378:        rts
                    379: *
                    380: *      NRM_SET
                    381: *
                    382:        xdef    nrm_set
                    383: nrm_set:
                    384:        move.l  d7,-(a7)
                    385:        bfffo   LOCAL_HI(a0){0:32},d7 ;find first 1 in ms mant to d7)
                    386:        beq.b   lower           ;branch if ms mant is all 0's
                    387:
                    388:        move.l  d6,-(a7)
                    389:
                    390:        sub.w   d7,LOCAL_EX(a0) ;sub exponent by count
                    391:        move.l  LOCAL_HI(a0),d0 ;d0 has ms mant
                    392:        move.l  LOCAL_LO(a0),d1 ;d1 has ls mant
                    393:
                    394:        lsl.l   d7,d0           ;shift first 1 to j bit position
                    395:        move.l  d1,d6           ;copy ls mant into d6
                    396:        lsl.l   d7,d6           ;shift ls mant by count
                    397:        move.l  d6,LOCAL_LO(a0) ;store ls mant into memory
                    398:        moveq.l #32,d6
                    399:        sub.l   d7,d6           ;continue shift
                    400:        lsr.l   d6,d1           ;shift off all bits but those that will
                    401: *                              ;be shifted into ms mant
                    402:        or.l    d1,d0           ;shift the ls mant bits into the ms mant
                    403:        move.l  d0,LOCAL_HI(a0) ;store ms mant into memory
                    404:        movem.l (a7)+,d7/d6     ;restore registers
                    405:        rts
                    406:
                    407: *
                    408: * We get here if ms mant was = 0, and we assume ls mant has bits
                    409: * set (otherwise this would have been tagged a zero not a denorm).
                    410: *
                    411: lower:
                    412:        move.w  LOCAL_EX(a0),d0 ;d0 has exponent
                    413:        move.l  LOCAL_LO(a0),d1 ;d1 has ls mant
                    414:        sub.w   #32,d0          ;account for ms mant being all zeros
                    415:        bfffo   d1{0:32},d7     ;find first 1 in ls mant to d7)
                    416:        sub.w   d7,d0           ;subtract shift count from exp
                    417:        lsl.l   d7,d1           ;shift first 1 to integer bit in ms mant
                    418:        move.w  d0,LOCAL_EX(a0) ;store ms mant
                    419:        move.l  d1,LOCAL_HI(a0) ;store exp
                    420:        clr.l   LOCAL_LO(a0)    ;clear ls mant
                    421:        move.l  (a7)+,d7
                    422:        rts
                    423: *
                    424: *      denorm --- denormalize an intermediate result
                    425: *
                    426: *      Used by underflow.
                    427: *
                    428: * Input:
                    429: *      a0       points to the operand to be denormalized
                    430: *               (in the internal extended format)
                    431: *
                    432: *      d0:      rounding precision
                    433: * Output:
                    434: *      a0       points to the denormalized result
                    435: *               (in the internal extended format)
                    436: *
                    437: *      d0      is guard,round,sticky
                    438: *
                    439: * d0 comes into this routine with the rounding precision. It
                    440: * is then loaded with the denormalized exponent threshold for the
                    441: * rounding precision.
                    442: *
                    443:
                    444:        xdef    denorm
                    445: denorm:
                    446:        btst.b  #6,LOCAL_EX(a0) ;check for exponents between $7fff-$4000
                    447:        beq.b   no_sgn_ext
                    448:        bset.b  #7,LOCAL_EX(a0) ;sign extend if it is so
                    449: no_sgn_ext:
                    450:
                    451:        tst.b   d0              ;if 0 then extended precision
                    452:        bne.b   not_ext         ;else branch
                    453:
                    454:        clr.l   d1              ;load d1 with ext threshold
                    455:        clr.l   d0              ;clear the sticky flag
                    456:        bsr     dnrm_lp         ;denormalize the number
                    457:        tst.b   d1              ;check for inex
                    458:        beq.w   no_inex         ;if clr, no inex
                    459:        bra.b   dnrm_inex       ;if set, set inex
                    460:
                    461: not_ext:
                    462:        cmpi.l  #1,d0           ;if 1 then single precision
                    463:        beq.b   load_sgl        ;else must be 2, double prec
                    464:
                    465: load_dbl:
                    466:        move.w  #dbl_thresh,d1  ;put copy of threshold in d1
                    467:        move.l  d1,d0           ;copy d1 into d0
                    468:        sub.w   LOCAL_EX(a0),d0 ;diff = threshold - exp
                    469:        cmp.w   #67,d0          ;if diff > 67 (mant + grs bits)
                    470:        bpl.b   chk_stky        ;then branch (all bits would be
                    471: *                              ; shifted off in denorm routine)
                    472:        clr.l   d0              ;else clear the sticky flag
                    473:        bsr     dnrm_lp         ;denormalize the number
                    474:        tst.b   d1              ;check flag
                    475:        beq.b   no_inex         ;if clr, no inex
                    476:        bra.b   dnrm_inex       ;if set, set inex
                    477:
                    478: load_sgl:
                    479:        move.w  #sgl_thresh,d1  ;put copy of threshold in d1
                    480:        move.l  d1,d0           ;copy d1 into d0
                    481:        sub.w   LOCAL_EX(a0),d0 ;diff = threshold - exp
                    482:        cmp.w   #67,d0          ;if diff > 67 (mant + grs bits)
                    483:        bpl.b   chk_stky        ;then branch (all bits would be
                    484: *                              ; shifted off in denorm routine)
                    485:        clr.l   d0              ;else clear the sticky flag
                    486:        bsr     dnrm_lp         ;denormalize the number
                    487:        tst.b   d1              ;check flag
                    488:        beq.b   no_inex         ;if clr, no inex
                    489:        bra.b   dnrm_inex       ;if set, set inex
                    490:
                    491: chk_stky:
                    492:        tst.l   LOCAL_HI(a0)    ;check for any bits set
                    493:        bne.b   set_stky
                    494:        tst.l   LOCAL_LO(a0)    ;check for any bits set
                    495:        bne.b   set_stky
                    496:        bra.b   clr_mant
                    497: set_stky:
                    498:        or.l    #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
                    499:        move.l  #$20000000,d0   ;set sticky bit in return value
                    500: clr_mant:
                    501:        move.w  d1,LOCAL_EX(a0)         ;load exp with threshold
                    502:        clr.l   LOCAL_HI(a0)    ;set d1 = 0 (ms mantissa)
                    503:        clr.l   LOCAL_LO(a0)            ;set d2 = 0 (ms mantissa)
                    504:        rts
                    505: dnrm_inex:
                    506:        or.l    #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
                    507: no_inex:
                    508:        rts
                    509:
                    510: *
                    511: *      dnrm_lp --- normalize exponent/mantissa to specified threshold
                    512: *
                    513: * Input:
                    514: *      a0              points to the operand to be denormalized
                    515: *      d0{31:29}       initial guard,round,sticky
                    516: *      d1{15:0}        denormalization threshold
                    517: * Output:
                    518: *      a0              points to the denormalized operand
                    519: *      d0{31:29}       final guard,round,sticky
                    520: *      d1.b            inexact flag:  all ones means inexact result
                    521: *
                    522: * The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
                    523: * so that bfext can be used to extract the new low part of the mantissa.
                    524: * Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
                    525: * is no LOCAL_GRS scratch word following it on the fsave frame.
                    526: *
                    527:        xdef    dnrm_lp
                    528: dnrm_lp:
                    529:        move.l  d2,-(sp)                ;save d2 for temp use
                    530:        btst.b  #E3,E_BYTE(a6)          ;test for type E3 exception
                    531:        beq.b   not_E3                  ;not type E3 exception
                    532:        bfextu  WBTEMP_GRS(a6){6:3},d2  ;extract guard,round, sticky  bit
                    533:        move.l  #29,d0
                    534:        lsl.l   d0,d2                   ;shift g,r,s to their postions
                    535:        move.l  d2,d0
                    536: not_E3:
                    537:        move.l  (sp)+,d2                ;restore d2
                    538:        move.l  LOCAL_LO(a0),FP_SCR2+LOCAL_LO(a6)
                    539:        move.l  d0,FP_SCR2+LOCAL_GRS(a6)
                    540:        move.l  d1,d0                   ;copy the denorm threshold
                    541:        sub.w   LOCAL_EX(a0),d1         ;d1 = threshold - uns exponent
                    542:        ble.b   no_lp                   ;d1 <= 0
                    543:        cmp.w   #32,d1
                    544:        blt.b   case_1                  ;0 = d1 < 32
                    545:        cmp.w   #64,d1
                    546:        blt.b   case_2                  ;32 <= d1 < 64
                    547:        bra.w   case_3                  ;d1 >= 64
                    548: *
                    549: * No normalization necessary
                    550: *
                    551: no_lp:
                    552:        clr.b   d1                      ;set no inex2 reported
                    553:        move.l  FP_SCR2+LOCAL_GRS(a6),d0        ;restore original g,r,s
                    554:        rts
                    555: *
                    556: * case (0<d1<32)
                    557: *
                    558: case_1:
                    559:        move.l  d2,-(sp)
                    560:        move.w  d0,LOCAL_EX(a0)         ;exponent = denorm threshold
                    561:        move.l  #32,d0
                    562:        sub.w   d1,d0                   ;d0 = 32 - d1
                    563:        bfextu  LOCAL_EX(a0){d0:32},d2
                    564:        bfextu  d2{d1:d0},d2            ;d2 = new LOCAL_HI
                    565:        bfextu  LOCAL_HI(a0){d0:32},d1  ;d1 = new LOCAL_LO
                    566:        bfextu  FP_SCR2+LOCAL_LO(a6){d0:32},d0  ;d0 = new G,R,S
                    567:        move.l  d2,LOCAL_HI(a0)         ;store new LOCAL_HI
                    568:        move.l  d1,LOCAL_LO(a0)         ;store new LOCAL_LO
                    569:        clr.b   d1
                    570:        bftst   d0{2:30}
                    571:        beq.b   c1nstky
                    572:        bset.l  #rnd_stky_bit,d0
                    573:        st.b    d1
                    574: c1nstky:
                    575:        move.l  FP_SCR2+LOCAL_GRS(a6),d2        ;restore original g,r,s
                    576:        andi.l  #$e0000000,d2           ;clear all but G,R,S
                    577:        tst.l   d2                      ;test if original G,R,S are clear
                    578:        beq.b   grs_clear
                    579:        or.l    #$20000000,d0           ;set sticky bit in d0
                    580: grs_clear:
                    581:        andi.l  #$e0000000,d0           ;clear all but G,R,S
                    582:        move.l  (sp)+,d2
                    583:        rts
                    584: *
                    585: * case (32<=d1<64)
                    586: *
                    587: case_2:
                    588:        move.l  d2,-(sp)
                    589:        move.w  d0,LOCAL_EX(a0)         ;unsigned exponent = threshold
                    590:        sub.w   #32,d1                  ;d1 now between 0 and 32
                    591:        move.l  #32,d0
                    592:        sub.w   d1,d0                   ;d0 = 32 - d1
                    593:        bfextu  LOCAL_EX(a0){d0:32},d2
                    594:        bfextu  d2{d1:d0},d2            ;d2 = new LOCAL_LO
                    595:        bfextu  LOCAL_HI(a0){d0:32},d1  ;d1 = new G,R,S
                    596:        bftst   d1{2:30}
                    597:        bne.b   c2_sstky                ;bra if sticky bit to be set
                    598:        bftst   FP_SCR2+LOCAL_LO(a6){d0:32}
                    599:        bne.b   c2_sstky                ;bra if sticky bit to be set
                    600:        move.l  d1,d0
                    601:        clr.b   d1
                    602:        bra.b   end_c2
                    603: c2_sstky:
                    604:        move.l  d1,d0
                    605:        bset.l  #rnd_stky_bit,d0
                    606:        st.b    d1
                    607: end_c2:
                    608:        clr.l   LOCAL_HI(a0)            ;store LOCAL_HI = 0
                    609:        move.l  d2,LOCAL_LO(a0)         ;store LOCAL_LO
                    610:        move.l  FP_SCR2+LOCAL_GRS(a6),d2        ;restore original g,r,s
                    611:        andi.l  #$e0000000,d2           ;clear all but G,R,S
                    612:        tst.l   d2                      ;test if original G,R,S are clear
                    613:        beq.b   clear_grs
                    614:        or.l    #$20000000,d0           ;set sticky bit in d0
                    615: clear_grs:
                    616:        andi.l  #$e0000000,d0           ;get rid of all but G,R,S
                    617:        move.l  (sp)+,d2
                    618:        rts
                    619: *
                    620: * d1 >= 64 Force the exponent to be the denorm threshold with the
                    621: * correct sign.
                    622: *
                    623: case_3:
                    624:        move.w  d0,LOCAL_EX(a0)
                    625:        tst.w   LOCAL_SGN(a0)
                    626:        bge.b   c3con
                    627: c3neg:
                    628:        or.l    #$80000000,LOCAL_EX(a0)
                    629: c3con:
                    630:        cmp.w   #64,d1
                    631:        beq.b   sixty_four
                    632:        cmp.w   #65,d1
                    633:        beq.b   sixty_five
                    634: *
                    635: * Shift value is out of range.  Set d1 for inex2 flag and
                    636: * return a zero with the given threshold.
                    637: *
                    638:        clr.l   LOCAL_HI(a0)
                    639:        clr.l   LOCAL_LO(a0)
                    640:        move.l  #$20000000,d0
                    641:        st.b    d1
                    642:        rts
                    643:
                    644: sixty_four:
                    645:        move.l  LOCAL_HI(a0),d0
                    646:        bfextu  d0{2:30},d1
                    647:        andi.l  #$c0000000,d0
                    648:        bra.b   c3com
                    649:
                    650: sixty_five:
                    651:        move.l  LOCAL_HI(a0),d0
                    652:        bfextu  d0{1:31},d1
                    653:        andi.l  #$80000000,d0
                    654:        lsr.l   #1,d0                   ;shift high bit into R bit
                    655:
                    656: c3com:
                    657:        tst.l   d1
                    658:        bne.b   c3ssticky
                    659:        tst.l   LOCAL_LO(a0)
                    660:        bne.b   c3ssticky
                    661:        tst.b   FP_SCR2+LOCAL_GRS(a6)
                    662:        bne.b   c3ssticky
                    663:        clr.b   d1
                    664:        bra.b   c3end
                    665:
                    666: c3ssticky:
                    667:        bset.l  #rnd_stky_bit,d0
                    668:        st.b    d1
                    669: c3end:
                    670:        clr.l   LOCAL_HI(a0)
                    671:        clr.l   LOCAL_LO(a0)
                    672:        rts
                    673:
                    674:        end

CVSweb