sys/arch/m68k/060sp/ilsp.s - annotate

Return to ilsp.s CVS log
Up to [local] / sys / arch / m68k / 060sp
Annotation of sys/arch/m68k/060sp/ilsp.s, Revision 1.1.1.1

1.1       nbrk        1: #
                      2: # $OpenBSD: ilsp.s,v 1.2 1996/05/30 22:14:39 niklas Exp $
                      3: # $NetBSD: ilsp.s,v 1.2 1996/05/15 19:48:37 is Exp $
                      4: #
                      5:
                      6: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                      7: # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
                      8: # M68000 Hi-Performance Microprocessor Division
                      9: # M68060 Software Package Production Release
                     10: #
                     11: # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
                     12: # All rights reserved.
                     13: #
                     14: # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
                     15: # To the maximum extent permitted by applicable law,
                     16: # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
                     17: # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
                     18: # FOR A PARTICULAR PURPOSE and any warranty against infringement with
                     19: # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
                     20: # and any accompanying written materials.
                     21: #
                     22: # To the maximum extent permitted by applicable law,
                     23: # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
                     24: # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
                     25: # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
                     26: # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
                     27: #
                     28: # Motorola assumes no responsibility for the maintenance and support
                     29: # of the SOFTWARE.
                     30: #
                     31: # You are hereby granted a copyright license to use, modify, and distribute the
                     32: # SOFTWARE so long as this entire notice is retained without alteration
                     33: # in any modified and/or redistributed versions, and that such modified
                     34: # versions are clearly identified as such.
                     35: # No licenses are granted by implication, estoppel or otherwise under any
                     36: # patents or trademarks of Motorola, Inc.
                     37: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                     38:
                     39: #
                     40: # litop.s:
                     41: #      This file is appended to the top of the 060FPLSP package
                     42: # and contains the entry points into the package. The user, in
                     43: # effect, branches to one of the branch table entries located here.
                     44: #
                     45:
                     46:        bra.l   _060LSP__idivs64_
                     47:        short   0x0000
                     48:        bra.l   _060LSP__idivu64_
                     49:        short   0x0000
                     50:
                     51:        bra.l   _060LSP__imuls64_
                     52:        short   0x0000
                     53:        bra.l   _060LSP__imulu64_
                     54:        short   0x0000
                     55:
                     56:        bra.l   _060LSP__cmp2_Ab_
                     57:        short   0x0000
                     58:        bra.l   _060LSP__cmp2_Aw_
                     59:        short   0x0000
                     60:        bra.l   _060LSP__cmp2_Al_
                     61:        short   0x0000
                     62:        bra.l   _060LSP__cmp2_Db_
                     63:        short   0x0000
                     64:        bra.l   _060LSP__cmp2_Dw_
                     65:        short   0x0000
                     66:        bra.l   _060LSP__cmp2_Dl_
                     67:        short   0x0000
                     68:
                     69: # leave room for future possible aditions.
                     70:        align   0x200
                     71:
                     72: #########################################################################
                     73: # XDEF ****************************************************************        #
                     74: #      _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.   #
                     75: #      _060LSP__idivs64_(): Emulate 64-bit signed div instruction.     #
                     76: #                                                                      #
                     77: #      This is the library version which is accessed as a subroutine   #
                     78: #      and therefore does not work exactly like the 680X0 div{s,u}.l   #
                     79: #      64-bit divide instruction.                                      #
                     80: #                                                                      #
                     81: # XREF ****************************************************************        #
                     82: #      None.                                                           #
                     83: #                                                                      #
                     84: # INPUT ***************************************************************        #
                     85: #      0x4(sp)  = divisor                                              #
                     86: #      0x8(sp)  = hi(dividend)                                         #
                     87: #      0xc(sp)  = lo(dividend)                                         #
                     88: #      0x10(sp) = pointer to location to place quotient/remainder      #
                     89: #                                                                      #
                     90: # OUTPUT **************************************************************        #
                     91: #      0x10(sp) = points to location of remainder/quotient.            #
                     92: #                 remainder is in first longword, quotient is in 2nd.  #
                     93: #                                                                      #
                     94: # ALGORITHM ***********************************************************        #
                     95: #      If the operands are signed, make them unsigned and save the     #
                     96: # sign info for later. Separate out special cases like divide-by-zero  #
                     97: # or 32-bit divides if possible. Else, use a special math algorithm    #
                     98: # to calculate the result.                                             #
                     99: #      Restore sign info if signed instruction. Set the condition      #
                    100: # codes before performing the final "rts". If the divisor was equal to #
                    101: # zero, then perform a divide-by-zero using a 16-bit implemented       #
                    102: # divide instruction. This way, the operating system can record that   #
                    103: # the event occurred even though it may not point to the correct place.        #
                    104: #                                                                      #
                    105: #########################################################################
                    106:
                    107: set    POSNEG,         -1
                    108: set    NDIVISOR,       -2
                    109: set    NDIVIDEND,      -3
                    110: set    DDSECOND,       -4
                    111: set    DDNORMAL,       -8
                    112: set    DDQUOTIENT,     -12
                    113: set    DIV64_CC,       -16
                    114:
                    115: ##########
                    116: # divs.l #
                    117: ##########
                    118:        global          _060LSP__idivs64_
                    119: _060LSP__idivs64_:
                    120: # PROLOGUE BEGIN ########################################################
                    121:        link.w          %a6,&-16
                    122:        movm.l          &0x3f00,-(%sp)          # save d2-d7
                    123: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    124: # PROLOGUE END ##########################################################
                    125:
                    126:        mov.w           %cc,DIV64_CC(%a6)
                    127:        st              POSNEG(%a6)             # signed operation
                    128:        bra.b           ldiv64_cont
                    129:
                    130: ##########
                    131: # divu.l #
                    132: ##########
                    133:        global          _060LSP__idivu64_
                    134: _060LSP__idivu64_:
                    135: # PROLOGUE BEGIN ########################################################
                    136:        link.w          %a6,&-16
                    137:        movm.l          &0x3f00,-(%sp)          # save d2-d7
                    138: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    139: # PROLOGUE END ##########################################################
                    140:
                    141:        mov.w           %cc,DIV64_CC(%a6)
                    142:        sf              POSNEG(%a6)             # unsigned operation
                    143:
                    144: ldiv64_cont:
                    145:        mov.l           0x8(%a6),%d7            # fetch divisor
                    146:
                    147:        beq.w           ldiv64eq0               # divisor is = 0!!!
                    148:
                    149:        mov.l           0xc(%a6), %d5           # get dividend hi
                    150:        mov.l           0x10(%a6), %d6          # get dividend lo
                    151:
                    152: # separate signed and unsigned divide
                    153:        tst.b           POSNEG(%a6)             # signed or unsigned?
                    154:        beq.b           ldspecialcases          # use positive divide
                    155:
                    156: # save the sign of the divisor
                    157: # make divisor unsigned if it's negative
                    158:        tst.l           %d7                     # chk sign of divisor
                    159:        slt             NDIVISOR(%a6)           # save sign of divisor
                    160:        bpl.b           ldsgndividend
                    161:        neg.l           %d7                     # complement negative divisor
                    162:
                    163: # save the sign of the dividend
                    164: # make dividend unsigned if it's negative
                    165: ldsgndividend:
                    166:        tst.l           %d5                     # chk sign of hi(dividend)
                    167:        slt             NDIVIDEND(%a6)          # save sign of dividend
                    168:        bpl.b           ldspecialcases
                    169:
                    170:        mov.w           &0x0, %cc               # clear 'X' cc bit
                    171:        negx.l          %d6                     # complement signed dividend
                    172:        negx.l          %d5
                    173:
                    174: # extract some special cases:
                    175: #      - is (dividend == 0) ?
                    176: #      - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
                    177: ldspecialcases:
                    178:        tst.l           %d5                     # is (hi(dividend) == 0)
                    179:        bne.b           ldnormaldivide          # no, so try it the long way
                    180:
                    181:        tst.l           %d6                     # is (lo(dividend) == 0), too
                    182:        beq.w           lddone                  # yes, so (dividend == 0)
                    183:
                    184:        cmp.l           %d7,%d6                 # is (divisor <= lo(dividend))
                    185:        bls.b           ld32bitdivide           # yes, so use 32 bit divide
                    186:
                    187:        exg             %d5,%d6                 # q = 0, r = dividend
                    188:        bra.w           ldivfinish              # can't divide, we're done.
                    189:
                    190: ld32bitdivide:
                    191:        tdivu.l         %d7, %d5:%d6            # it's only a 32/32 bit div!
                    192:
                    193:        bra.b           ldivfinish
                    194:
                    195: ldnormaldivide:
                    196: # last special case:
                    197: #      - is hi(dividend) >= divisor ? if yes, then overflow
                    198:        cmp.l           %d7,%d5
                    199:        bls.b           lddovf                  # answer won't fit in 32 bits
                    200:
                    201: # perform the divide algorithm:
                    202:        bsr.l           ldclassical             # do int divide
                    203:
                    204: # separate into signed and unsigned finishes.
                    205: ldivfinish:
                    206:        tst.b           POSNEG(%a6)             # do divs, divu separately
                    207:        beq.b           lddone                  # divu has no processing!!!
                    208:
                    209: # it was a divs.l, so ccode setting is a little more complicated...
                    210:        tst.b           NDIVIDEND(%a6)          # remainder has same sign
                    211:        beq.b           ldcc                    # as dividend.
                    212:        neg.l           %d5                     # sgn(rem) = sgn(dividend)
                    213: ldcc:
                    214:        mov.b           NDIVISOR(%a6), %d0
                    215:        eor.b           %d0, NDIVIDEND(%a6)     # chk if quotient is negative
                    216:        beq.b           ldqpos                  # branch to quot positive
                    217:
                    218: # 0x80000000 is the largest number representable as a 32-bit negative
                    219: # number. the negative of 0x80000000 is 0x80000000.
                    220:        cmpi.l          %d6, &0x80000000        # will (-quot) fit in 32 bits?
                    221:        bhi.b           lddovf
                    222:
                    223:        neg.l           %d6                     # make (-quot) 2's comp
                    224:
                    225:        bra.b           lddone
                    226:
                    227: ldqpos:
                    228:        btst            &0x1f, %d6              # will (+quot) fit in 32 bits?
                    229:        bne.b           lddovf
                    230:
                    231: lddone:
                    232: # if the register numbers are the same, only the quotient gets saved.
                    233: # so, if we always save the quotient second, we save ourselves a cmp&beq
                    234:        andi.w          &0x10,DIV64_CC(%a6)
                    235:        mov.w           DIV64_CC(%a6),%cc
                    236:        tst.l           %d6                     # may set 'N' ccode bit
                    237:
                    238: # here, the result is in d1 and d0. the current strategy is to save
                    239: # the values at the location pointed to by a0.
                    240: # use movm here to not disturb the condition codes.
                    241: ldexit:
                    242:        movm.l          &0x0060,([0x14,%a6])    # save result
                    243:
                    244: # EPILOGUE BEGIN ########################################################
                    245: #      fmovm.l         (%sp)+,&0x0             # restore no fpregs
                    246:        movm.l          (%sp)+,&0x00fc          # restore d2-d7
                    247:        unlk            %a6
                    248: # EPILOGUE END ##########################################################
                    249:
                    250:        rts
                    251:
                    252: # the result should be the unchanged dividend
                    253: lddovf:
                    254:        mov.l           0xc(%a6), %d5           # get dividend hi
                    255:        mov.l           0x10(%a6), %d6          # get dividend lo
                    256:
                    257:        andi.w          &0x1c,DIV64_CC(%a6)
                    258:        ori.w           &0x02,DIV64_CC(%a6)     # set 'V' ccode bit
                    259:        mov.w           DIV64_CC(%a6),%cc
                    260:
                    261:        bra.b           ldexit
                    262:
                    263: ldiv64eq0:
                    264:        mov.l           0xc(%a6),([0x14,%a6])
                    265:        mov.l           0x10(%a6),([0x14,%a6],0x4)
                    266:
                    267:        mov.w           DIV64_CC(%a6),%cc
                    268:
                    269: # EPILOGUE BEGIN ########################################################
                    270: #      fmovm.l         (%sp)+,&0x0             # restore no fpregs
                    271:        movm.l          (%sp)+,&0x00fc          # restore d2-d7
                    272:        unlk            %a6
                    273: # EPILOGUE END ##########################################################
                    274:
                    275:        divu.w          &0x0,%d0                # force a divbyzero exception
                    276:        rts
                    277:
                    278: ###########################################################################
                    279: #########################################################################
                    280: # This routine uses the 'classical' Algorithm D from Donald Knuth's    #
                    281: # Art of Computer Programming, vol II, Seminumerical Algorithms.       #
                    282: # For this implementation b=2**16, and the target is U1U2U3U4/V1V2,    #
                    283: # where U,V are words of the quadword dividend and longword divisor,   #
                    284: # and U1, V1 are the most significant words.                           #
                    285: #                                                                      #
                    286: # The most sig. longword of the 64 bit dividend must be in %d5, least  #
                    287: # in %d6. The divisor must be in the variable ddivisor, and the                #
                    288: # signed/unsigned flag ddusign must be set (0=unsigned,1=signed).      #
                    289: # The quotient is returned in %d6, remainder in %d5, unless the                #
                    290: # v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
                    291: # is unchanged.                                                                #
                    292: #########################################################################
                    293: ldclassical:
                    294: # if the divisor msw is 0, use simpler algorithm then the full blown
                    295: # one at ddknuth:
                    296:
                    297:        cmpi.l          %d7, &0xffff
                    298:        bhi.b           lddknuth                # go use D. Knuth algorithm
                    299:
                    300: # Since the divisor is only a word (and larger than the mslw of the dividend),
                    301: # a simpler algorithm may be used :
                    302: # In the general case, four quotient words would be created by
                    303: # dividing the divisor word into each dividend word. In this case,
                    304: # the first two quotient words must be zero, or overflow would occur.
                    305: # Since we already checked this case above, we can treat the most significant
                    306: # longword of the dividend as (0) remainder (see Knuth) and merely complete
                    307: # the last two divisions to get a quotient longword and word remainder:
                    308:
                    309:        clr.l           %d1
                    310:        swap            %d5                     # same as r*b if previous step rqd
                    311:        swap            %d6                     # get u3 to lsw position
                    312:        mov.w           %d6, %d5                # rb + u3
                    313:
                    314:        divu.w          %d7, %d5
                    315:
                    316:        mov.w           %d5, %d1                # first quotient word
                    317:        swap            %d6                     # get u4
                    318:        mov.w           %d6, %d5                # rb + u4
                    319:
                    320:        divu.w          %d7, %d5
                    321:
                    322:        swap            %d1
                    323:        mov.w           %d5, %d1                # 2nd quotient 'digit'
                    324:        clr.w           %d5
                    325:        swap            %d5                     # now remainder
                    326:        mov.l           %d1, %d6                # and quotient
                    327:
                    328:        rts
                    329:
                    330: lddknuth:
                    331: # In this algorithm, the divisor is treated as a 2 digit (word) number
                    332: # which is divided into a 3 digit (word) dividend to get one quotient
                    333: # digit (word). After subtraction, the dividend is shifted and the
                    334: # process repeated. Before beginning, the divisor and quotient are
                    335: # 'normalized' so that the process of estimating the quotient digit
                    336: # will yield verifiably correct results..
                    337:
                    338:        clr.l           DDNORMAL(%a6)           # count of shifts for normalization
                    339:        clr.b           DDSECOND(%a6)           # clear flag for quotient digits
                    340:        clr.l           %d1                     # %d1 will hold trial quotient
                    341: lddnchk:
                    342:        btst            &31, %d7                # must we normalize? first word of
                    343:        bne.b           lddnormalized           # divisor (V1) must be >= 65536/2
                    344:        addq.l          &0x1, DDNORMAL(%a6)     # count normalization shifts
                    345:        lsl.l           &0x1, %d7               # shift the divisor
                    346:        lsl.l           &0x1, %d6               # shift u4,u3 with overflow to u2
                    347:        roxl.l          &0x1, %d5               # shift u1,u2
                    348:        bra.w           lddnchk
                    349: lddnormalized:
                    350:
                    351: # Now calculate an estimate of the quotient words (msw first, then lsw).
                    352: # The comments use subscripts for the first quotient digit determination.
                    353:        mov.l           %d7, %d3                # divisor
                    354:        mov.l           %d5, %d2                # dividend mslw
                    355:        swap            %d2
                    356:        swap            %d3
                    357:        cmp.w           %d2, %d3                # V1 = U1 ?
                    358:        bne.b           lddqcalc1
                    359:        mov.w           &0xffff, %d1            # use max trial quotient word
                    360:        bra.b           lddadj0
                    361: lddqcalc1:
                    362:        mov.l           %d5, %d1
                    363:
                    364:        divu.w          %d3, %d1                # use quotient of mslw/msw
                    365:
                    366:        andi.l          &0x0000ffff, %d1        # zero any remainder
                    367: lddadj0:
                    368:
                    369: # now test the trial quotient and adjust. This step plus the
                    370: # normalization assures (according to Knuth) that the trial
                    371: # quotient will be at worst 1 too large.
                    372:        mov.l           %d6, -(%sp)
                    373:        clr.w           %d6                     # word u3 left
                    374:        swap            %d6                     # in lsw position
                    375: lddadj1: mov.l         %d7, %d3
                    376:        mov.l           %d1, %d2
                    377:        mulu.w          %d7, %d2                # V2q
                    378:        swap            %d3
                    379:        mulu.w          %d1, %d3                # V1q
                    380:        mov.l           %d5, %d4                # U1U2
                    381:        sub.l           %d3, %d4                # U1U2 - V1q
                    382:
                    383:        swap            %d4
                    384:
                    385:        mov.w           %d4,%d0
                    386:        mov.w           %d6,%d4                 # insert lower word (U3)
                    387:
                    388:        tst.w           %d0                     # is upper word set?
                    389:        bne.w           lddadjd1
                    390:
                    391: #      add.l           %d6, %d4                # (U1U2 - V1q) + U3
                    392:
                    393:        cmp.l           %d2, %d4
                    394:        bls.b           lddadjd1                # is V2q > (U1U2-V1q) + U3 ?
                    395:        subq.l          &0x1, %d1               # yes, decrement and recheck
                    396:        bra.b           lddadj1
                    397: lddadjd1:
                    398: # now test the word by multiplying it by the divisor (V1V2) and comparing
                    399: # the 3 digit (word) result with the current dividend words
                    400:        mov.l           %d5, -(%sp)             # save %d5 (%d6 already saved)
                    401:        mov.l           %d1, %d6
                    402:        swap            %d6                     # shift answer to ms 3 words
                    403:        mov.l           %d7, %d5
                    404:        bsr.l           ldmm2
                    405:        mov.l           %d5, %d2                # now %d2,%d3 are trial*divisor
                    406:        mov.l           %d6, %d3
                    407:        mov.l           (%sp)+, %d5             # restore dividend
                    408:        mov.l           (%sp)+, %d6
                    409:        sub.l           %d3, %d6
                    410:        subx.l          %d2, %d5                # subtract double precision
                    411:        bcc             ldd2nd                  # no carry, do next quotient digit
                    412:        subq.l          &0x1, %d1               # q is one too large
                    413: # need to add back divisor longword to current ms 3 digits of dividend
                    414: # - according to Knuth, this is done only 2 out of 65536 times for random
                    415: # divisor, dividend selection.
                    416:        clr.l           %d2
                    417:        mov.l           %d7, %d3
                    418:        swap            %d3
                    419:        clr.w           %d3                     # %d3 now ls word of divisor
                    420:        add.l           %d3, %d6                # aligned with 3rd word of dividend
                    421:        addx.l          %d2, %d5
                    422:        mov.l           %d7, %d3
                    423:        clr.w           %d3                     # %d3 now ms word of divisor
                    424:        swap            %d3                     # aligned with 2nd word of dividend
                    425:        add.l           %d3, %d5
                    426: ldd2nd:
                    427:        tst.b           DDSECOND(%a6)   # both q words done?
                    428:        bne.b           lddremain
                    429: # first quotient digit now correct. store digit and shift the
                    430: # (subtracted) dividend
                    431:        mov.w           %d1, DDQUOTIENT(%a6)
                    432:        clr.l           %d1
                    433:        swap            %d5
                    434:        swap            %d6
                    435:        mov.w           %d6, %d5
                    436:        clr.w           %d6
                    437:        st              DDSECOND(%a6)           # second digit
                    438:        bra.w           lddnormalized
                    439: lddremain:
                    440: # add 2nd word to quotient, get the remainder.
                    441:        mov.w           %d1, DDQUOTIENT+2(%a6)
                    442: # shift down one word/digit to renormalize remainder.
                    443:        mov.w           %d5, %d6
                    444:        swap            %d6
                    445:        swap            %d5
                    446:        mov.l           DDNORMAL(%a6), %d7      # get norm shift count
                    447:        beq.b           lddrn
                    448:        subq.l          &0x1, %d7               # set for loop count
                    449: lddnlp:
                    450:        lsr.l           &0x1, %d5               # shift into %d6
                    451:        roxr.l          &0x1, %d6
                    452:        dbf             %d7, lddnlp
                    453: lddrn:
                    454:        mov.l           %d6, %d5                # remainder
                    455:        mov.l           DDQUOTIENT(%a6), %d6    # quotient
                    456:
                    457:        rts
                    458: ldmm2:
                    459: # factors for the 32X32->64 multiplication are in %d5 and %d6.
                    460: # returns 64 bit result in %d5 (hi) %d6(lo).
                    461: # destroys %d2,%d3,%d4.
                    462:
                    463: # multiply hi,lo words of each factor to get 4 intermediate products
                    464:        mov.l           %d6, %d2
                    465:        mov.l           %d6, %d3
                    466:        mov.l           %d5, %d4
                    467:        swap            %d3
                    468:        swap            %d4
                    469:        mulu.w          %d5, %d6                # %d6 <- lsw*lsw
                    470:        mulu.w          %d3, %d5                # %d5 <- msw-dest*lsw-source
                    471:        mulu.w          %d4, %d2                # %d2 <- msw-source*lsw-dest
                    472:        mulu.w          %d4, %d3                # %d3 <- msw*msw
                    473: # now use swap and addx to consolidate to two longwords
                    474:        clr.l           %d4
                    475:        swap            %d6
                    476:        add.w           %d5, %d6                # add msw of l*l to lsw of m*l product
                    477:        addx.w          %d4, %d3                # add any carry to m*m product
                    478:        add.w           %d2, %d6                # add in lsw of other m*l product
                    479:        addx.w          %d4, %d3                # add any carry to m*m product
                    480:        swap            %d6                     # %d6 is low 32 bits of final product
                    481:        clr.w           %d5
                    482:        clr.w           %d2                     # lsw of two mixed products used,
                    483:        swap            %d5                     # now use msws of longwords
                    484:        swap            %d2
                    485:        add.l           %d2, %d5
                    486:        add.l           %d3, %d5        # %d5 now ms 32 bits of final product
                    487:        rts
                    488:
                    489: #########################################################################
                    490: # XDEF ****************************************************************        #
                    491: #      _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction    #
                    492: #      _060LSP__imuls64_(): Emulate 64-bit signed mul instruction.     #
                    493: #                                                                      #
                    494: #      This is the library version which is accessed as a subroutine   #
                    495: #      and therefore does not work exactly like the 680X0 mul{s,u}.l   #
                    496: #      64-bit multiply instruction.                                    #
                    497: #                                                                      #
                    498: # XREF ****************************************************************        #
                    499: #      None                                                            #
                    500: #                                                                      #
                    501: # INPUT ***************************************************************        #
                    502: #      0x4(sp) = multiplier                                            #
                    503: #      0x8(sp) = multiplicand                                          #
                    504: #      0xc(sp) = pointer to location to place 64-bit result            #
                    505: #                                                                      #
                    506: # OUTPUT **************************************************************        #
                    507: #      0xc(sp) = points to location of 64-bit result                   #
                    508: #                                                                      #
                    509: # ALGORITHM ***********************************************************        #
                    510: #      Perform the multiply in pieces using 16x16->32 unsigned         #
                    511: # multiplies and "add" instructions.                                   #
                    512: #      Set the condition codes as appropriate before performing an     #
                    513: # "rts".                                                               #
                    514: #                                                                      #
                    515: #########################################################################
                    516:
                    517: set MUL64_CC, -4
                    518:
                    519:        global          _060LSP__imulu64_
                    520: _060LSP__imulu64_:
                    521:
                    522: # PROLOGUE BEGIN ########################################################
                    523:        link.w          %a6,&-4
                    524:        movm.l          &0x3800,-(%sp)          # save d2-d4
                    525: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    526: # PROLOGUE END ##########################################################
                    527:
                    528:        mov.w           %cc,MUL64_CC(%a6)       # save incomming ccodes
                    529:
                    530:        mov.l           0x8(%a6),%d0            # store multiplier in d0
                    531:        beq.w           mulu64_zero             # handle zero separately
                    532:
                    533:        mov.l           0xc(%a6),%d1            # get multiplicand in d1
                    534:        beq.w           mulu64_zero             # handle zero separately
                    535:
                    536: #########################################################################
                    537: #      63                         32                           0       #
                    538: #      ----------------------------                                    #
                    539: #      | hi(mplier) * hi(mplicand)|                                    #
                    540: #      ----------------------------                                    #
                    541: #                   -----------------------------                      #
                    542: #                   | hi(mplier) * lo(mplicand) |                      #
                    543: #                   -----------------------------                      #
                    544: #                   -----------------------------                      #
                    545: #                   | lo(mplier) * hi(mplicand) |                      #
                    546: #                   -----------------------------                      #
                    547: #        |                        -----------------------------        #
                    548: #      --|--                      | lo(mplier) * lo(mplicand) |        #
                    549: #        |                        -----------------------------        #
                    550: #      ========================================================        #
                    551: #      --------------------------------------------------------        #
                    552: #      |       hi(result)         |        lo(result)         |        #
                    553: #      --------------------------------------------------------        #
                    554: #########################################################################
                    555: mulu64_alg:
                    556: # load temp registers with operands
                    557:        mov.l           %d0,%d2                 # mr in d2
                    558:        mov.l           %d0,%d3                 # mr in d3
                    559:        mov.l           %d1,%d4                 # md in d4
                    560:        swap            %d3                     # hi(mr) in lo d3
                    561:        swap            %d4                     # hi(md) in lo d4
                    562:
                    563: # complete necessary multiplies:
                    564:        mulu.w          %d1,%d0                 # [1] lo(mr) * lo(md)
                    565:        mulu.w          %d3,%d1                 # [2] hi(mr) * lo(md)
                    566:        mulu.w          %d4,%d2                 # [3] lo(mr) * hi(md)
                    567:        mulu.w          %d4,%d3                 # [4] hi(mr) * hi(md)
                    568:
                    569: # add lo portions of [2],[3] to hi portion of [1].
                    570: # add carries produced from these adds to [4].
                    571: # lo([1]) is the final lo 16 bits of the result.
                    572:        clr.l           %d4                     # load d4 w/ zero value
                    573:        swap            %d0                     # hi([1]) <==> lo([1])
                    574:        add.w           %d1,%d0                 # hi([1]) + lo([2])
                    575:        addx.l          %d4,%d3                 #    [4]  + carry
                    576:        add.w           %d2,%d0                 # hi([1]) + lo([3])
                    577:        addx.l          %d4,%d3                 #    [4]  + carry
                    578:        swap            %d0                     # lo([1]) <==> hi([1])
                    579:
                    580: # lo portions of [2],[3] have been added in to final result.
                    581: # now, clear lo, put hi in lo reg, and add to [4]
                    582:        clr.w           %d1                     # clear lo([2])
                    583:        clr.w           %d2                     # clear hi([3])
                    584:        swap            %d1                     # hi([2]) in lo d1
                    585:        swap            %d2                     # hi([3]) in lo d2
                    586:        add.l           %d2,%d1                 #    [4]  + hi([2])
                    587:        add.l           %d3,%d1                 #    [4]  + hi([3])
                    588:
                    589: # now, grab the condition codes. only one that can be set is 'N'.
                    590: # 'N' CAN be set if the operation is unsigned if bit 63 is set.
                    591:        mov.w           MUL64_CC(%a6),%d4
                    592:        andi.b          &0x10,%d4               # keep old 'X' bit
                    593:        tst.l           %d1                     # may set 'N' bit
                    594:        bpl.b           mulu64_ddone
                    595:        ori.b           &0x8,%d4                # set 'N' bit
                    596: mulu64_ddone:
                    597:        mov.w           %d4,%cc
                    598:
                    599: # here, the result is in d1 and d0. the current strategy is to save
                    600: # the values at the location pointed to by a0.
                    601: # use movm here to not disturb the condition codes.
                    602: mulu64_end:
                    603:        exg             %d1,%d0
                    604:        movm.l          &0x0003,([0x10,%a6])            # save result
                    605:
                    606: # EPILOGUE BEGIN ########################################################
                    607: #      fmovm.l         (%sp)+,&0x0             # restore no fpregs
                    608:        movm.l          (%sp)+,&0x001c          # restore d2-d4
                    609:        unlk            %a6
                    610: # EPILOGUE END ##########################################################
                    611:
                    612:        rts
                    613:
                    614: # one or both of the operands is zero so the result is also zero.
                    615: # save the zero result to the register file and set the 'Z' ccode bit.
                    616: mulu64_zero:
                    617:        clr.l           %d0
                    618:        clr.l           %d1
                    619:
                    620:        mov.w           MUL64_CC(%a6),%d4
                    621:        andi.b          &0x10,%d4
                    622:        ori.b           &0x4,%d4
                    623:        mov.w           %d4,%cc                 # set 'Z' ccode bit
                    624:
                    625:        bra.b           mulu64_end
                    626:
                    627: ##########
                    628: # muls.l #
                    629: ##########
                    630:        global          _060LSP__imuls64_
                    631: _060LSP__imuls64_:
                    632:
                    633: # PROLOGUE BEGIN ########################################################
                    634:        link.w          %a6,&-4
                    635:        movm.l          &0x3c00,-(%sp)          # save d2-d5
                    636: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    637: # PROLOGUE END ##########################################################
                    638:
                    639:        mov.w           %cc,MUL64_CC(%a6)       # save incomming ccodes
                    640:
                    641:        mov.l           0x8(%a6),%d0            # store multiplier in d0
                    642:        beq.b           mulu64_zero             # handle zero separately
                    643:
                    644:        mov.l           0xc(%a6),%d1            # get multiplicand in d1
                    645:        beq.b           mulu64_zero             # handle zero separately
                    646:
                    647:        clr.b           %d5                     # clear sign tag
                    648:        tst.l           %d0                     # is multiplier negative?
                    649:        bge.b           muls64_chk_md_sgn       # no
                    650:        neg.l           %d0                     # make multiplier positive
                    651:
                    652:        ori.b           &0x1,%d5                # save multiplier sgn
                    653:
                    654: # the result sign is the exclusive or of the operand sign bits.
                    655: muls64_chk_md_sgn:
                    656:        tst.l           %d1                     # is multiplicand negative?
                    657:        bge.b           muls64_alg              # no
                    658:        neg.l           %d1                     # make multiplicand positive
                    659:
                    660:        eori.b          &0x1,%d5                # calculate correct sign
                    661:
                    662: #########################################################################
                    663: #      63                         32                           0       #
                    664: #      ----------------------------                                    #
                    665: #      | hi(mplier) * hi(mplicand)|                                    #
                    666: #      ----------------------------                                    #
                    667: #                   -----------------------------                      #
                    668: #                   | hi(mplier) * lo(mplicand) |                      #
                    669: #                   -----------------------------                      #
                    670: #                   -----------------------------                      #
                    671: #                   | lo(mplier) * hi(mplicand) |                      #
                    672: #                   -----------------------------                      #
                    673: #        |                        -----------------------------        #
                    674: #      --|--                      | lo(mplier) * lo(mplicand) |        #
                    675: #        |                        -----------------------------        #
                    676: #      ========================================================        #
                    677: #      --------------------------------------------------------        #
                    678: #      |       hi(result)         |        lo(result)         |        #
                    679: #      --------------------------------------------------------        #
                    680: #########################################################################
                    681: muls64_alg:
                    682: # load temp registers with operands
                    683:        mov.l           %d0,%d2                 # mr in d2
                    684:        mov.l           %d0,%d3                 # mr in d3
                    685:        mov.l           %d1,%d4                 # md in d4
                    686:        swap            %d3                     # hi(mr) in lo d3
                    687:        swap            %d4                     # hi(md) in lo d4
                    688:
                    689: # complete necessary multiplies:
                    690:        mulu.w          %d1,%d0                 # [1] lo(mr) * lo(md)
                    691:        mulu.w          %d3,%d1                 # [2] hi(mr) * lo(md)
                    692:        mulu.w          %d4,%d2                 # [3] lo(mr) * hi(md)
                    693:        mulu.w          %d4,%d3                 # [4] hi(mr) * hi(md)
                    694:
                    695: # add lo portions of [2],[3] to hi portion of [1].
                    696: # add carries produced from these adds to [4].
                    697: # lo([1]) is the final lo 16 bits of the result.
                    698:        clr.l           %d4                     # load d4 w/ zero value
                    699:        swap            %d0                     # hi([1]) <==> lo([1])
                    700:        add.w           %d1,%d0                 # hi([1]) + lo([2])
                    701:        addx.l          %d4,%d3                 #    [4]  + carry
                    702:        add.w           %d2,%d0                 # hi([1]) + lo([3])
                    703:        addx.l          %d4,%d3                 #    [4]  + carry
                    704:        swap            %d0                     # lo([1]) <==> hi([1])
                    705:
                    706: # lo portions of [2],[3] have been added in to final result.
                    707: # now, clear lo, put hi in lo reg, and add to [4]
                    708:        clr.w           %d1                     # clear lo([2])
                    709:        clr.w           %d2                     # clear hi([3])
                    710:        swap            %d1                     # hi([2]) in lo d1
                    711:        swap            %d2                     # hi([3]) in lo d2
                    712:        add.l           %d2,%d1                 #    [4]  + hi([2])
                    713:        add.l           %d3,%d1                 #    [4]  + hi([3])
                    714:
                    715:        tst.b           %d5                     # should result be signed?
                    716:        beq.b           muls64_done             # no
                    717:
                    718: # result should be a signed negative number.
                    719: # compute 2's complement of the unsigned number:
                    720: #   -negate all bits and add 1
                    721: muls64_neg:
                    722:        not.l           %d0                     # negate lo(result) bits
                    723:        not.l           %d1                     # negate hi(result) bits
                    724:        addq.l          &1,%d0                  # add 1 to lo(result)
                    725:        addx.l          %d4,%d1                 # add carry to hi(result)
                    726:
                    727: muls64_done:
                    728:        mov.w           MUL64_CC(%a6),%d4
                    729:        andi.b          &0x10,%d4               # keep old 'X' bit
                    730:        tst.l           %d1                     # may set 'N' bit
                    731:        bpl.b           muls64_ddone
                    732:        ori.b           &0x8,%d4                # set 'N' bit
                    733: muls64_ddone:
                    734:        mov.w           %d4,%cc
                    735:
                    736: # here, the result is in d1 and d0. the current strategy is to save
                    737: # the values at the location pointed to by a0.
                    738: # use movm here to not disturb the condition codes.
                    739: muls64_end:
                    740:        exg             %d1,%d0
                    741:        movm.l          &0x0003,([0x10,%a6])    # save result at (a0)
                    742:
                    743: # EPILOGUE BEGIN ########################################################
                    744: #      fmovm.l         (%sp)+,&0x0             # restore no fpregs
                    745:        movm.l          (%sp)+,&0x003c          # restore d2-d5
                    746:        unlk            %a6
                    747: # EPILOGUE END ##########################################################
                    748:
                    749:        rts
                    750:
                    751: # one or both of the operands is zero so the result is also zero.
                    752: # save the zero result to the register file and set the 'Z' ccode bit.
                    753: muls64_zero:
                    754:        clr.l           %d0
                    755:        clr.l           %d1
                    756:
                    757:        mov.w           MUL64_CC(%a6),%d4
                    758:        andi.b          &0x10,%d4
                    759:        ori.b           &0x4,%d4
                    760:        mov.w           %d4,%cc                 # set 'Z' ccode bit
                    761:
                    762:        bra.b           muls64_end
                    763:
                    764: #########################################################################
                    765: # XDEF ****************************************************************        #
                    766: #      _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".                  #
                    767: #      _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".                  #
                    768: #      _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".                  #
                    769: #      _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".                  #
                    770: #      _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".                  #
                    771: #      _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".                  #
                    772: #                                                                      #
                    773: #      This is the library version which is accessed as a subroutine   #
                    774: #      and therefore does not work exactly like the 680X0 "cmp2"       #
                    775: #      instruction.                                                    #
                    776: #                                                                      #
                    777: # XREF ****************************************************************        #
                    778: #      None                                                            #
                    779: #                                                                      #
                    780: # INPUT ***************************************************************        #
                    781: #      0x4(sp) = Rn                                                    #
                    782: #      0x8(sp) = pointer to boundary pair                              #
                    783: #                                                                      #
                    784: # OUTPUT **************************************************************        #
                    785: #      cc = condition codes are set correctly                          #
                    786: #                                                                      #
                    787: # ALGORITHM ***********************************************************        #
                    788: #      In the interest of simplicity, all operands are converted to    #
                    789: # longword size whether the operation is byte, word, or long. The      #
                    790: # bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
                    791: # also sign extended. If Rn is an address register, it need not be sign #
                    792: # extended since the full register is always used.                     #
                    793: #      The condition codes are set correctly before the final "rts".   #
                    794: #                                                                      #
                    795: #########################################################################
                    796:
                    797: set    CMP2_CC,        -4
                    798:
                    799:        global          _060LSP__cmp2_Ab_
                    800: _060LSP__cmp2_Ab_:
                    801:
                    802: # PROLOGUE BEGIN ########################################################
                    803:        link.w          %a6,&-4
                    804:        movm.l          &0x3800,-(%sp)          # save d2-d4
                    805: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    806: # PROLOGUE END ##########################################################
                    807:
                    808:        mov.w           %cc,CMP2_CC(%a6)
                    809:        mov.l           0x8(%a6), %d2           # get regval
                    810:
                    811:        mov.b           ([0xc,%a6],0x0),%d0
                    812:        mov.b           ([0xc,%a6],0x1),%d1
                    813:
                    814:        extb.l          %d0                     # sign extend lo bnd
                    815:        extb.l          %d1                     # sign extend hi bnd
                    816:        bra.w           l_cmp2_cmp              # go do the compare emulation
                    817:
                    818:        global          _060LSP__cmp2_Aw_
                    819: _060LSP__cmp2_Aw_:
                    820:
                    821: # PROLOGUE BEGIN ########################################################
                    822:        link.w          %a6,&-4
                    823:        movm.l          &0x3800,-(%sp)          # save d2-d4
                    824: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    825: # PROLOGUE END ##########################################################
                    826:
                    827:        mov.w           %cc,CMP2_CC(%a6)
                    828:        mov.l           0x8(%a6), %d2           # get regval
                    829:
                    830:        mov.w           ([0xc,%a6],0x0),%d0
                    831:        mov.w           ([0xc,%a6],0x2),%d1
                    832:
                    833:        ext.l           %d0                     # sign extend lo bnd
                    834:        ext.l           %d1                     # sign extend hi bnd
                    835:        bra.w           l_cmp2_cmp              # go do the compare emulation
                    836:
                    837:        global          _060LSP__cmp2_Al_
                    838: _060LSP__cmp2_Al_:
                    839:
                    840: # PROLOGUE BEGIN ########################################################
                    841:        link.w          %a6,&-4
                    842:        movm.l          &0x3800,-(%sp)          # save d2-d4
                    843: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    844: # PROLOGUE END ##########################################################
                    845:
                    846:        mov.w           %cc,CMP2_CC(%a6)
                    847:        mov.l           0x8(%a6), %d2           # get regval
                    848:
                    849:        mov.l           ([0xc,%a6],0x0),%d0
                    850:        mov.l           ([0xc,%a6],0x4),%d1
                    851:        bra.w           l_cmp2_cmp              # go do the compare emulation
                    852:
                    853:        global          _060LSP__cmp2_Db_
                    854: _060LSP__cmp2_Db_:
                    855:
                    856: # PROLOGUE BEGIN ########################################################
                    857:        link.w          %a6,&-4
                    858:        movm.l          &0x3800,-(%sp)          # save d2-d4
                    859: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    860: # PROLOGUE END ##########################################################
                    861:
                    862:        mov.w           %cc,CMP2_CC(%a6)
                    863:        mov.l           0x8(%a6), %d2           # get regval
                    864:
                    865:        mov.b           ([0xc,%a6],0x0),%d0
                    866:        mov.b           ([0xc,%a6],0x1),%d1
                    867:
                    868:        extb.l          %d0                     # sign extend lo bnd
                    869:        extb.l          %d1                     # sign extend hi bnd
                    870:
                    871: # operation is a data register compare.
                    872: # sign extend byte to long so we can do simple longword compares.
                    873:        extb.l          %d2                     # sign extend data byte
                    874:        bra.w           l_cmp2_cmp              # go do the compare emulation
                    875:
                    876:        global          _060LSP__cmp2_Dw_
                    877: _060LSP__cmp2_Dw_:
                    878:
                    879: # PROLOGUE BEGIN ########################################################
                    880:        link.w          %a6,&-4
                    881:        movm.l          &0x3800,-(%sp)          # save d2-d4
                    882: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    883: # PROLOGUE END ##########################################################
                    884:
                    885:        mov.w           %cc,CMP2_CC(%a6)
                    886:        mov.l           0x8(%a6), %d2           # get regval
                    887:
                    888:        mov.w           ([0xc,%a6],0x0),%d0
                    889:        mov.w           ([0xc,%a6],0x2),%d1
                    890:
                    891:        ext.l           %d0                     # sign extend lo bnd
                    892:        ext.l           %d1                     # sign extend hi bnd
                    893:
                    894: # operation is a data register compare.
                    895: # sign extend word to long so we can do simple longword compares.
                    896:        ext.l           %d2                     # sign extend data word
                    897:        bra.w           l_cmp2_cmp              # go emulate compare
                    898:
                    899:        global          _060LSP__cmp2_Dl_
                    900: _060LSP__cmp2_Dl_:
                    901:
                    902: # PROLOGUE BEGIN ########################################################
                    903:        link.w          %a6,&-4
                    904:        movm.l          &0x3800,-(%sp)          # save d2-d4
                    905: #      fmovm.l         &0x0,-(%sp)             # save no fpregs
                    906: # PROLOGUE END ##########################################################
                    907:
                    908:        mov.w           %cc,CMP2_CC(%a6)
                    909:        mov.l           0x8(%a6), %d2           # get regval
                    910:
                    911:        mov.l           ([0xc,%a6],0x0),%d0
                    912:        mov.l           ([0xc,%a6],0x4),%d1
                    913:
                    914: #
                    915: # To set the ccodes correctly:
                    916: #      (1) save 'Z' bit from (Rn - lo)
                    917: #      (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
                    918: #      (3) keep 'X', 'N', and 'V' from before instruction
                    919: #      (4) combine ccodes
                    920: #
                    921: l_cmp2_cmp:
                    922:        sub.l           %d0, %d2                # (Rn - lo)
                    923:        mov.w           %cc, %d3                # fetch resulting ccodes
                    924:        andi.b          &0x4, %d3               # keep 'Z' bit
                    925:        sub.l           %d0, %d1                # (hi - lo)
                    926:        cmp.l           %d1,%d2                 # ((hi - lo) - (Rn - hi))
                    927:
                    928:        mov.w           %cc, %d4                # fetch resulting ccodes
                    929:        or.b            %d4, %d3                # combine w/ earlier ccodes
                    930:        andi.b          &0x5, %d3               # keep 'Z' and 'N'
                    931:
                    932:        mov.w           CMP2_CC(%a6), %d4       # fetch old ccodes
                    933:        andi.b          &0x1a, %d4              # keep 'X','N','V' bits
                    934:        or.b            %d3, %d4                # insert new ccodes
                    935:        mov.w           %d4,%cc                 # save new ccodes
                    936:
                    937: # EPILOGUE BEGIN ########################################################
                    938: #      fmovm.l         (%sp)+,&0x0             # restore no fpregs
                    939:        movm.l          (%sp)+,&0x001c          # restore d2-d4
                    940:        unlk            %a6
                    941: # EPILOGUE END ##########################################################
                    942:
                    943:        rts
CVSweb