Annotation of sys/arch/m68k/060sp/ilsp.s, Revision 1.1.1.1
1.1 nbrk 1: #
2: # $OpenBSD: ilsp.s,v 1.2 1996/05/30 22:14:39 niklas Exp $
3: # $NetBSD: ilsp.s,v 1.2 1996/05/15 19:48:37 is Exp $
4: #
5:
6: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7: # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
8: # M68000 Hi-Performance Microprocessor Division
9: # M68060 Software Package Production Release
10: #
11: # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
12: # All rights reserved.
13: #
14: # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
15: # To the maximum extent permitted by applicable law,
16: # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
17: # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
18: # FOR A PARTICULAR PURPOSE and any warranty against infringement with
19: # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
20: # and any accompanying written materials.
21: #
22: # To the maximum extent permitted by applicable law,
23: # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
24: # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
25: # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
26: # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
27: #
28: # Motorola assumes no responsibility for the maintenance and support
29: # of the SOFTWARE.
30: #
31: # You are hereby granted a copyright license to use, modify, and distribute the
32: # SOFTWARE so long as this entire notice is retained without alteration
33: # in any modified and/or redistributed versions, and that such modified
34: # versions are clearly identified as such.
35: # No licenses are granted by implication, estoppel or otherwise under any
36: # patents or trademarks of Motorola, Inc.
37: #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38:
39: #
40: # litop.s:
41: # This file is appended to the top of the 060FPLSP package
42: # and contains the entry points into the package. The user, in
43: # effect, branches to one of the branch table entries located here.
44: #
45:
46: bra.l _060LSP__idivs64_
47: short 0x0000
48: bra.l _060LSP__idivu64_
49: short 0x0000
50:
51: bra.l _060LSP__imuls64_
52: short 0x0000
53: bra.l _060LSP__imulu64_
54: short 0x0000
55:
56: bra.l _060LSP__cmp2_Ab_
57: short 0x0000
58: bra.l _060LSP__cmp2_Aw_
59: short 0x0000
60: bra.l _060LSP__cmp2_Al_
61: short 0x0000
62: bra.l _060LSP__cmp2_Db_
63: short 0x0000
64: bra.l _060LSP__cmp2_Dw_
65: short 0x0000
66: bra.l _060LSP__cmp2_Dl_
67: short 0x0000
68:
69: # leave room for future possible aditions.
70: align 0x200
71:
72: #########################################################################
73: # XDEF **************************************************************** #
74: # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
75: # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
76: # #
77: # This is the library version which is accessed as a subroutine #
78: # and therefore does not work exactly like the 680X0 div{s,u}.l #
79: # 64-bit divide instruction. #
80: # #
81: # XREF **************************************************************** #
82: # None. #
83: # #
84: # INPUT *************************************************************** #
85: # 0x4(sp) = divisor #
86: # 0x8(sp) = hi(dividend) #
87: # 0xc(sp) = lo(dividend) #
88: # 0x10(sp) = pointer to location to place quotient/remainder #
89: # #
90: # OUTPUT ************************************************************** #
91: # 0x10(sp) = points to location of remainder/quotient. #
92: # remainder is in first longword, quotient is in 2nd. #
93: # #
94: # ALGORITHM *********************************************************** #
95: # If the operands are signed, make them unsigned and save the #
96: # sign info for later. Separate out special cases like divide-by-zero #
97: # or 32-bit divides if possible. Else, use a special math algorithm #
98: # to calculate the result. #
99: # Restore sign info if signed instruction. Set the condition #
100: # codes before performing the final "rts". If the divisor was equal to #
101: # zero, then perform a divide-by-zero using a 16-bit implemented #
102: # divide instruction. This way, the operating system can record that #
103: # the event occurred even though it may not point to the correct place. #
104: # #
105: #########################################################################
106:
107: set POSNEG, -1
108: set NDIVISOR, -2
109: set NDIVIDEND, -3
110: set DDSECOND, -4
111: set DDNORMAL, -8
112: set DDQUOTIENT, -12
113: set DIV64_CC, -16
114:
115: ##########
116: # divs.l #
117: ##########
118: global _060LSP__idivs64_
119: _060LSP__idivs64_:
120: # PROLOGUE BEGIN ########################################################
121: link.w %a6,&-16
122: movm.l &0x3f00,-(%sp) # save d2-d7
123: # fmovm.l &0x0,-(%sp) # save no fpregs
124: # PROLOGUE END ##########################################################
125:
126: mov.w %cc,DIV64_CC(%a6)
127: st POSNEG(%a6) # signed operation
128: bra.b ldiv64_cont
129:
130: ##########
131: # divu.l #
132: ##########
133: global _060LSP__idivu64_
134: _060LSP__idivu64_:
135: # PROLOGUE BEGIN ########################################################
136: link.w %a6,&-16
137: movm.l &0x3f00,-(%sp) # save d2-d7
138: # fmovm.l &0x0,-(%sp) # save no fpregs
139: # PROLOGUE END ##########################################################
140:
141: mov.w %cc,DIV64_CC(%a6)
142: sf POSNEG(%a6) # unsigned operation
143:
144: ldiv64_cont:
145: mov.l 0x8(%a6),%d7 # fetch divisor
146:
147: beq.w ldiv64eq0 # divisor is = 0!!!
148:
149: mov.l 0xc(%a6), %d5 # get dividend hi
150: mov.l 0x10(%a6), %d6 # get dividend lo
151:
152: # separate signed and unsigned divide
153: tst.b POSNEG(%a6) # signed or unsigned?
154: beq.b ldspecialcases # use positive divide
155:
156: # save the sign of the divisor
157: # make divisor unsigned if it's negative
158: tst.l %d7 # chk sign of divisor
159: slt NDIVISOR(%a6) # save sign of divisor
160: bpl.b ldsgndividend
161: neg.l %d7 # complement negative divisor
162:
163: # save the sign of the dividend
164: # make dividend unsigned if it's negative
165: ldsgndividend:
166: tst.l %d5 # chk sign of hi(dividend)
167: slt NDIVIDEND(%a6) # save sign of dividend
168: bpl.b ldspecialcases
169:
170: mov.w &0x0, %cc # clear 'X' cc bit
171: negx.l %d6 # complement signed dividend
172: negx.l %d5
173:
174: # extract some special cases:
175: # - is (dividend == 0) ?
176: # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
177: ldspecialcases:
178: tst.l %d5 # is (hi(dividend) == 0)
179: bne.b ldnormaldivide # no, so try it the long way
180:
181: tst.l %d6 # is (lo(dividend) == 0), too
182: beq.w lddone # yes, so (dividend == 0)
183:
184: cmp.l %d7,%d6 # is (divisor <= lo(dividend))
185: bls.b ld32bitdivide # yes, so use 32 bit divide
186:
187: exg %d5,%d6 # q = 0, r = dividend
188: bra.w ldivfinish # can't divide, we're done.
189:
190: ld32bitdivide:
191: tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div!
192:
193: bra.b ldivfinish
194:
195: ldnormaldivide:
196: # last special case:
197: # - is hi(dividend) >= divisor ? if yes, then overflow
198: cmp.l %d7,%d5
199: bls.b lddovf # answer won't fit in 32 bits
200:
201: # perform the divide algorithm:
202: bsr.l ldclassical # do int divide
203:
204: # separate into signed and unsigned finishes.
205: ldivfinish:
206: tst.b POSNEG(%a6) # do divs, divu separately
207: beq.b lddone # divu has no processing!!!
208:
209: # it was a divs.l, so ccode setting is a little more complicated...
210: tst.b NDIVIDEND(%a6) # remainder has same sign
211: beq.b ldcc # as dividend.
212: neg.l %d5 # sgn(rem) = sgn(dividend)
213: ldcc:
214: mov.b NDIVISOR(%a6), %d0
215: eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative
216: beq.b ldqpos # branch to quot positive
217:
218: # 0x80000000 is the largest number representable as a 32-bit negative
219: # number. the negative of 0x80000000 is 0x80000000.
220: cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
221: bhi.b lddovf
222:
223: neg.l %d6 # make (-quot) 2's comp
224:
225: bra.b lddone
226:
227: ldqpos:
228: btst &0x1f, %d6 # will (+quot) fit in 32 bits?
229: bne.b lddovf
230:
231: lddone:
232: # if the register numbers are the same, only the quotient gets saved.
233: # so, if we always save the quotient second, we save ourselves a cmp&beq
234: andi.w &0x10,DIV64_CC(%a6)
235: mov.w DIV64_CC(%a6),%cc
236: tst.l %d6 # may set 'N' ccode bit
237:
238: # here, the result is in d1 and d0. the current strategy is to save
239: # the values at the location pointed to by a0.
240: # use movm here to not disturb the condition codes.
241: ldexit:
242: movm.l &0x0060,([0x14,%a6]) # save result
243:
244: # EPILOGUE BEGIN ########################################################
245: # fmovm.l (%sp)+,&0x0 # restore no fpregs
246: movm.l (%sp)+,&0x00fc # restore d2-d7
247: unlk %a6
248: # EPILOGUE END ##########################################################
249:
250: rts
251:
252: # the result should be the unchanged dividend
253: lddovf:
254: mov.l 0xc(%a6), %d5 # get dividend hi
255: mov.l 0x10(%a6), %d6 # get dividend lo
256:
257: andi.w &0x1c,DIV64_CC(%a6)
258: ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit
259: mov.w DIV64_CC(%a6),%cc
260:
261: bra.b ldexit
262:
263: ldiv64eq0:
264: mov.l 0xc(%a6),([0x14,%a6])
265: mov.l 0x10(%a6),([0x14,%a6],0x4)
266:
267: mov.w DIV64_CC(%a6),%cc
268:
269: # EPILOGUE BEGIN ########################################################
270: # fmovm.l (%sp)+,&0x0 # restore no fpregs
271: movm.l (%sp)+,&0x00fc # restore d2-d7
272: unlk %a6
273: # EPILOGUE END ##########################################################
274:
275: divu.w &0x0,%d0 # force a divbyzero exception
276: rts
277:
278: ###########################################################################
279: #########################################################################
280: # This routine uses the 'classical' Algorithm D from Donald Knuth's #
281: # Art of Computer Programming, vol II, Seminumerical Algorithms. #
282: # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
283: # where U,V are words of the quadword dividend and longword divisor, #
284: # and U1, V1 are the most significant words. #
285: # #
286: # The most sig. longword of the 64 bit dividend must be in %d5, least #
287: # in %d6. The divisor must be in the variable ddivisor, and the #
288: # signed/unsigned flag ddusign must be set (0=unsigned,1=signed). #
289: # The quotient is returned in %d6, remainder in %d5, unless the #
290: # v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
291: # is unchanged. #
292: #########################################################################
293: ldclassical:
294: # if the divisor msw is 0, use simpler algorithm then the full blown
295: # one at ddknuth:
296:
297: cmpi.l %d7, &0xffff
298: bhi.b lddknuth # go use D. Knuth algorithm
299:
300: # Since the divisor is only a word (and larger than the mslw of the dividend),
301: # a simpler algorithm may be used :
302: # In the general case, four quotient words would be created by
303: # dividing the divisor word into each dividend word. In this case,
304: # the first two quotient words must be zero, or overflow would occur.
305: # Since we already checked this case above, we can treat the most significant
306: # longword of the dividend as (0) remainder (see Knuth) and merely complete
307: # the last two divisions to get a quotient longword and word remainder:
308:
309: clr.l %d1
310: swap %d5 # same as r*b if previous step rqd
311: swap %d6 # get u3 to lsw position
312: mov.w %d6, %d5 # rb + u3
313:
314: divu.w %d7, %d5
315:
316: mov.w %d5, %d1 # first quotient word
317: swap %d6 # get u4
318: mov.w %d6, %d5 # rb + u4
319:
320: divu.w %d7, %d5
321:
322: swap %d1
323: mov.w %d5, %d1 # 2nd quotient 'digit'
324: clr.w %d5
325: swap %d5 # now remainder
326: mov.l %d1, %d6 # and quotient
327:
328: rts
329:
330: lddknuth:
331: # In this algorithm, the divisor is treated as a 2 digit (word) number
332: # which is divided into a 3 digit (word) dividend to get one quotient
333: # digit (word). After subtraction, the dividend is shifted and the
334: # process repeated. Before beginning, the divisor and quotient are
335: # 'normalized' so that the process of estimating the quotient digit
336: # will yield verifiably correct results..
337:
338: clr.l DDNORMAL(%a6) # count of shifts for normalization
339: clr.b DDSECOND(%a6) # clear flag for quotient digits
340: clr.l %d1 # %d1 will hold trial quotient
341: lddnchk:
342: btst &31, %d7 # must we normalize? first word of
343: bne.b lddnormalized # divisor (V1) must be >= 65536/2
344: addq.l &0x1, DDNORMAL(%a6) # count normalization shifts
345: lsl.l &0x1, %d7 # shift the divisor
346: lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2
347: roxl.l &0x1, %d5 # shift u1,u2
348: bra.w lddnchk
349: lddnormalized:
350:
351: # Now calculate an estimate of the quotient words (msw first, then lsw).
352: # The comments use subscripts for the first quotient digit determination.
353: mov.l %d7, %d3 # divisor
354: mov.l %d5, %d2 # dividend mslw
355: swap %d2
356: swap %d3
357: cmp.w %d2, %d3 # V1 = U1 ?
358: bne.b lddqcalc1
359: mov.w &0xffff, %d1 # use max trial quotient word
360: bra.b lddadj0
361: lddqcalc1:
362: mov.l %d5, %d1
363:
364: divu.w %d3, %d1 # use quotient of mslw/msw
365:
366: andi.l &0x0000ffff, %d1 # zero any remainder
367: lddadj0:
368:
369: # now test the trial quotient and adjust. This step plus the
370: # normalization assures (according to Knuth) that the trial
371: # quotient will be at worst 1 too large.
372: mov.l %d6, -(%sp)
373: clr.w %d6 # word u3 left
374: swap %d6 # in lsw position
375: lddadj1: mov.l %d7, %d3
376: mov.l %d1, %d2
377: mulu.w %d7, %d2 # V2q
378: swap %d3
379: mulu.w %d1, %d3 # V1q
380: mov.l %d5, %d4 # U1U2
381: sub.l %d3, %d4 # U1U2 - V1q
382:
383: swap %d4
384:
385: mov.w %d4,%d0
386: mov.w %d6,%d4 # insert lower word (U3)
387:
388: tst.w %d0 # is upper word set?
389: bne.w lddadjd1
390:
391: # add.l %d6, %d4 # (U1U2 - V1q) + U3
392:
393: cmp.l %d2, %d4
394: bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
395: subq.l &0x1, %d1 # yes, decrement and recheck
396: bra.b lddadj1
397: lddadjd1:
398: # now test the word by multiplying it by the divisor (V1V2) and comparing
399: # the 3 digit (word) result with the current dividend words
400: mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
401: mov.l %d1, %d6
402: swap %d6 # shift answer to ms 3 words
403: mov.l %d7, %d5
404: bsr.l ldmm2
405: mov.l %d5, %d2 # now %d2,%d3 are trial*divisor
406: mov.l %d6, %d3
407: mov.l (%sp)+, %d5 # restore dividend
408: mov.l (%sp)+, %d6
409: sub.l %d3, %d6
410: subx.l %d2, %d5 # subtract double precision
411: bcc ldd2nd # no carry, do next quotient digit
412: subq.l &0x1, %d1 # q is one too large
413: # need to add back divisor longword to current ms 3 digits of dividend
414: # - according to Knuth, this is done only 2 out of 65536 times for random
415: # divisor, dividend selection.
416: clr.l %d2
417: mov.l %d7, %d3
418: swap %d3
419: clr.w %d3 # %d3 now ls word of divisor
420: add.l %d3, %d6 # aligned with 3rd word of dividend
421: addx.l %d2, %d5
422: mov.l %d7, %d3
423: clr.w %d3 # %d3 now ms word of divisor
424: swap %d3 # aligned with 2nd word of dividend
425: add.l %d3, %d5
426: ldd2nd:
427: tst.b DDSECOND(%a6) # both q words done?
428: bne.b lddremain
429: # first quotient digit now correct. store digit and shift the
430: # (subtracted) dividend
431: mov.w %d1, DDQUOTIENT(%a6)
432: clr.l %d1
433: swap %d5
434: swap %d6
435: mov.w %d6, %d5
436: clr.w %d6
437: st DDSECOND(%a6) # second digit
438: bra.w lddnormalized
439: lddremain:
440: # add 2nd word to quotient, get the remainder.
441: mov.w %d1, DDQUOTIENT+2(%a6)
442: # shift down one word/digit to renormalize remainder.
443: mov.w %d5, %d6
444: swap %d6
445: swap %d5
446: mov.l DDNORMAL(%a6), %d7 # get norm shift count
447: beq.b lddrn
448: subq.l &0x1, %d7 # set for loop count
449: lddnlp:
450: lsr.l &0x1, %d5 # shift into %d6
451: roxr.l &0x1, %d6
452: dbf %d7, lddnlp
453: lddrn:
454: mov.l %d6, %d5 # remainder
455: mov.l DDQUOTIENT(%a6), %d6 # quotient
456:
457: rts
458: ldmm2:
459: # factors for the 32X32->64 multiplication are in %d5 and %d6.
460: # returns 64 bit result in %d5 (hi) %d6(lo).
461: # destroys %d2,%d3,%d4.
462:
463: # multiply hi,lo words of each factor to get 4 intermediate products
464: mov.l %d6, %d2
465: mov.l %d6, %d3
466: mov.l %d5, %d4
467: swap %d3
468: swap %d4
469: mulu.w %d5, %d6 # %d6 <- lsw*lsw
470: mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
471: mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
472: mulu.w %d4, %d3 # %d3 <- msw*msw
473: # now use swap and addx to consolidate to two longwords
474: clr.l %d4
475: swap %d6
476: add.w %d5, %d6 # add msw of l*l to lsw of m*l product
477: addx.w %d4, %d3 # add any carry to m*m product
478: add.w %d2, %d6 # add in lsw of other m*l product
479: addx.w %d4, %d3 # add any carry to m*m product
480: swap %d6 # %d6 is low 32 bits of final product
481: clr.w %d5
482: clr.w %d2 # lsw of two mixed products used,
483: swap %d5 # now use msws of longwords
484: swap %d2
485: add.l %d2, %d5
486: add.l %d3, %d5 # %d5 now ms 32 bits of final product
487: rts
488:
489: #########################################################################
490: # XDEF **************************************************************** #
491: # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
492: # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
493: # #
494: # This is the library version which is accessed as a subroutine #
495: # and therefore does not work exactly like the 680X0 mul{s,u}.l #
496: # 64-bit multiply instruction. #
497: # #
498: # XREF **************************************************************** #
499: # None #
500: # #
501: # INPUT *************************************************************** #
502: # 0x4(sp) = multiplier #
503: # 0x8(sp) = multiplicand #
504: # 0xc(sp) = pointer to location to place 64-bit result #
505: # #
506: # OUTPUT ************************************************************** #
507: # 0xc(sp) = points to location of 64-bit result #
508: # #
509: # ALGORITHM *********************************************************** #
510: # Perform the multiply in pieces using 16x16->32 unsigned #
511: # multiplies and "add" instructions. #
512: # Set the condition codes as appropriate before performing an #
513: # "rts". #
514: # #
515: #########################################################################
516:
517: set MUL64_CC, -4
518:
519: global _060LSP__imulu64_
520: _060LSP__imulu64_:
521:
522: # PROLOGUE BEGIN ########################################################
523: link.w %a6,&-4
524: movm.l &0x3800,-(%sp) # save d2-d4
525: # fmovm.l &0x0,-(%sp) # save no fpregs
526: # PROLOGUE END ##########################################################
527:
528: mov.w %cc,MUL64_CC(%a6) # save incomming ccodes
529:
530: mov.l 0x8(%a6),%d0 # store multiplier in d0
531: beq.w mulu64_zero # handle zero separately
532:
533: mov.l 0xc(%a6),%d1 # get multiplicand in d1
534: beq.w mulu64_zero # handle zero separately
535:
536: #########################################################################
537: # 63 32 0 #
538: # ---------------------------- #
539: # | hi(mplier) * hi(mplicand)| #
540: # ---------------------------- #
541: # ----------------------------- #
542: # | hi(mplier) * lo(mplicand) | #
543: # ----------------------------- #
544: # ----------------------------- #
545: # | lo(mplier) * hi(mplicand) | #
546: # ----------------------------- #
547: # | ----------------------------- #
548: # --|-- | lo(mplier) * lo(mplicand) | #
549: # | ----------------------------- #
550: # ======================================================== #
551: # -------------------------------------------------------- #
552: # | hi(result) | lo(result) | #
553: # -------------------------------------------------------- #
554: #########################################################################
555: mulu64_alg:
556: # load temp registers with operands
557: mov.l %d0,%d2 # mr in d2
558: mov.l %d0,%d3 # mr in d3
559: mov.l %d1,%d4 # md in d4
560: swap %d3 # hi(mr) in lo d3
561: swap %d4 # hi(md) in lo d4
562:
563: # complete necessary multiplies:
564: mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
565: mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
566: mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
567: mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
568:
569: # add lo portions of [2],[3] to hi portion of [1].
570: # add carries produced from these adds to [4].
571: # lo([1]) is the final lo 16 bits of the result.
572: clr.l %d4 # load d4 w/ zero value
573: swap %d0 # hi([1]) <==> lo([1])
574: add.w %d1,%d0 # hi([1]) + lo([2])
575: addx.l %d4,%d3 # [4] + carry
576: add.w %d2,%d0 # hi([1]) + lo([3])
577: addx.l %d4,%d3 # [4] + carry
578: swap %d0 # lo([1]) <==> hi([1])
579:
580: # lo portions of [2],[3] have been added in to final result.
581: # now, clear lo, put hi in lo reg, and add to [4]
582: clr.w %d1 # clear lo([2])
583: clr.w %d2 # clear hi([3])
584: swap %d1 # hi([2]) in lo d1
585: swap %d2 # hi([3]) in lo d2
586: add.l %d2,%d1 # [4] + hi([2])
587: add.l %d3,%d1 # [4] + hi([3])
588:
589: # now, grab the condition codes. only one that can be set is 'N'.
590: # 'N' CAN be set if the operation is unsigned if bit 63 is set.
591: mov.w MUL64_CC(%a6),%d4
592: andi.b &0x10,%d4 # keep old 'X' bit
593: tst.l %d1 # may set 'N' bit
594: bpl.b mulu64_ddone
595: ori.b &0x8,%d4 # set 'N' bit
596: mulu64_ddone:
597: mov.w %d4,%cc
598:
599: # here, the result is in d1 and d0. the current strategy is to save
600: # the values at the location pointed to by a0.
601: # use movm here to not disturb the condition codes.
602: mulu64_end:
603: exg %d1,%d0
604: movm.l &0x0003,([0x10,%a6]) # save result
605:
606: # EPILOGUE BEGIN ########################################################
607: # fmovm.l (%sp)+,&0x0 # restore no fpregs
608: movm.l (%sp)+,&0x001c # restore d2-d4
609: unlk %a6
610: # EPILOGUE END ##########################################################
611:
612: rts
613:
614: # one or both of the operands is zero so the result is also zero.
615: # save the zero result to the register file and set the 'Z' ccode bit.
616: mulu64_zero:
617: clr.l %d0
618: clr.l %d1
619:
620: mov.w MUL64_CC(%a6),%d4
621: andi.b &0x10,%d4
622: ori.b &0x4,%d4
623: mov.w %d4,%cc # set 'Z' ccode bit
624:
625: bra.b mulu64_end
626:
627: ##########
628: # muls.l #
629: ##########
630: global _060LSP__imuls64_
631: _060LSP__imuls64_:
632:
633: # PROLOGUE BEGIN ########################################################
634: link.w %a6,&-4
635: movm.l &0x3c00,-(%sp) # save d2-d5
636: # fmovm.l &0x0,-(%sp) # save no fpregs
637: # PROLOGUE END ##########################################################
638:
639: mov.w %cc,MUL64_CC(%a6) # save incomming ccodes
640:
641: mov.l 0x8(%a6),%d0 # store multiplier in d0
642: beq.b mulu64_zero # handle zero separately
643:
644: mov.l 0xc(%a6),%d1 # get multiplicand in d1
645: beq.b mulu64_zero # handle zero separately
646:
647: clr.b %d5 # clear sign tag
648: tst.l %d0 # is multiplier negative?
649: bge.b muls64_chk_md_sgn # no
650: neg.l %d0 # make multiplier positive
651:
652: ori.b &0x1,%d5 # save multiplier sgn
653:
654: # the result sign is the exclusive or of the operand sign bits.
655: muls64_chk_md_sgn:
656: tst.l %d1 # is multiplicand negative?
657: bge.b muls64_alg # no
658: neg.l %d1 # make multiplicand positive
659:
660: eori.b &0x1,%d5 # calculate correct sign
661:
662: #########################################################################
663: # 63 32 0 #
664: # ---------------------------- #
665: # | hi(mplier) * hi(mplicand)| #
666: # ---------------------------- #
667: # ----------------------------- #
668: # | hi(mplier) * lo(mplicand) | #
669: # ----------------------------- #
670: # ----------------------------- #
671: # | lo(mplier) * hi(mplicand) | #
672: # ----------------------------- #
673: # | ----------------------------- #
674: # --|-- | lo(mplier) * lo(mplicand) | #
675: # | ----------------------------- #
676: # ======================================================== #
677: # -------------------------------------------------------- #
678: # | hi(result) | lo(result) | #
679: # -------------------------------------------------------- #
680: #########################################################################
681: muls64_alg:
682: # load temp registers with operands
683: mov.l %d0,%d2 # mr in d2
684: mov.l %d0,%d3 # mr in d3
685: mov.l %d1,%d4 # md in d4
686: swap %d3 # hi(mr) in lo d3
687: swap %d4 # hi(md) in lo d4
688:
689: # complete necessary multiplies:
690: mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
691: mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
692: mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
693: mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
694:
695: # add lo portions of [2],[3] to hi portion of [1].
696: # add carries produced from these adds to [4].
697: # lo([1]) is the final lo 16 bits of the result.
698: clr.l %d4 # load d4 w/ zero value
699: swap %d0 # hi([1]) <==> lo([1])
700: add.w %d1,%d0 # hi([1]) + lo([2])
701: addx.l %d4,%d3 # [4] + carry
702: add.w %d2,%d0 # hi([1]) + lo([3])
703: addx.l %d4,%d3 # [4] + carry
704: swap %d0 # lo([1]) <==> hi([1])
705:
706: # lo portions of [2],[3] have been added in to final result.
707: # now, clear lo, put hi in lo reg, and add to [4]
708: clr.w %d1 # clear lo([2])
709: clr.w %d2 # clear hi([3])
710: swap %d1 # hi([2]) in lo d1
711: swap %d2 # hi([3]) in lo d2
712: add.l %d2,%d1 # [4] + hi([2])
713: add.l %d3,%d1 # [4] + hi([3])
714:
715: tst.b %d5 # should result be signed?
716: beq.b muls64_done # no
717:
718: # result should be a signed negative number.
719: # compute 2's complement of the unsigned number:
720: # -negate all bits and add 1
721: muls64_neg:
722: not.l %d0 # negate lo(result) bits
723: not.l %d1 # negate hi(result) bits
724: addq.l &1,%d0 # add 1 to lo(result)
725: addx.l %d4,%d1 # add carry to hi(result)
726:
727: muls64_done:
728: mov.w MUL64_CC(%a6),%d4
729: andi.b &0x10,%d4 # keep old 'X' bit
730: tst.l %d1 # may set 'N' bit
731: bpl.b muls64_ddone
732: ori.b &0x8,%d4 # set 'N' bit
733: muls64_ddone:
734: mov.w %d4,%cc
735:
736: # here, the result is in d1 and d0. the current strategy is to save
737: # the values at the location pointed to by a0.
738: # use movm here to not disturb the condition codes.
739: muls64_end:
740: exg %d1,%d0
741: movm.l &0x0003,([0x10,%a6]) # save result at (a0)
742:
743: # EPILOGUE BEGIN ########################################################
744: # fmovm.l (%sp)+,&0x0 # restore no fpregs
745: movm.l (%sp)+,&0x003c # restore d2-d5
746: unlk %a6
747: # EPILOGUE END ##########################################################
748:
749: rts
750:
751: # one or both of the operands is zero so the result is also zero.
752: # save the zero result to the register file and set the 'Z' ccode bit.
753: muls64_zero:
754: clr.l %d0
755: clr.l %d1
756:
757: mov.w MUL64_CC(%a6),%d4
758: andi.b &0x10,%d4
759: ori.b &0x4,%d4
760: mov.w %d4,%cc # set 'Z' ccode bit
761:
762: bra.b muls64_end
763:
764: #########################################################################
765: # XDEF **************************************************************** #
766: # _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". #
767: # _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". #
768: # _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". #
769: # _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". #
770: # _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". #
771: # _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". #
772: # #
773: # This is the library version which is accessed as a subroutine #
774: # and therefore does not work exactly like the 680X0 "cmp2" #
775: # instruction. #
776: # #
777: # XREF **************************************************************** #
778: # None #
779: # #
780: # INPUT *************************************************************** #
781: # 0x4(sp) = Rn #
782: # 0x8(sp) = pointer to boundary pair #
783: # #
784: # OUTPUT ************************************************************** #
785: # cc = condition codes are set correctly #
786: # #
787: # ALGORITHM *********************************************************** #
788: # In the interest of simplicity, all operands are converted to #
789: # longword size whether the operation is byte, word, or long. The #
790: # bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
791: # also sign extended. If Rn is an address register, it need not be sign #
792: # extended since the full register is always used. #
793: # The condition codes are set correctly before the final "rts". #
794: # #
795: #########################################################################
796:
797: set CMP2_CC, -4
798:
799: global _060LSP__cmp2_Ab_
800: _060LSP__cmp2_Ab_:
801:
802: # PROLOGUE BEGIN ########################################################
803: link.w %a6,&-4
804: movm.l &0x3800,-(%sp) # save d2-d4
805: # fmovm.l &0x0,-(%sp) # save no fpregs
806: # PROLOGUE END ##########################################################
807:
808: mov.w %cc,CMP2_CC(%a6)
809: mov.l 0x8(%a6), %d2 # get regval
810:
811: mov.b ([0xc,%a6],0x0),%d0
812: mov.b ([0xc,%a6],0x1),%d1
813:
814: extb.l %d0 # sign extend lo bnd
815: extb.l %d1 # sign extend hi bnd
816: bra.w l_cmp2_cmp # go do the compare emulation
817:
818: global _060LSP__cmp2_Aw_
819: _060LSP__cmp2_Aw_:
820:
821: # PROLOGUE BEGIN ########################################################
822: link.w %a6,&-4
823: movm.l &0x3800,-(%sp) # save d2-d4
824: # fmovm.l &0x0,-(%sp) # save no fpregs
825: # PROLOGUE END ##########################################################
826:
827: mov.w %cc,CMP2_CC(%a6)
828: mov.l 0x8(%a6), %d2 # get regval
829:
830: mov.w ([0xc,%a6],0x0),%d0
831: mov.w ([0xc,%a6],0x2),%d1
832:
833: ext.l %d0 # sign extend lo bnd
834: ext.l %d1 # sign extend hi bnd
835: bra.w l_cmp2_cmp # go do the compare emulation
836:
837: global _060LSP__cmp2_Al_
838: _060LSP__cmp2_Al_:
839:
840: # PROLOGUE BEGIN ########################################################
841: link.w %a6,&-4
842: movm.l &0x3800,-(%sp) # save d2-d4
843: # fmovm.l &0x0,-(%sp) # save no fpregs
844: # PROLOGUE END ##########################################################
845:
846: mov.w %cc,CMP2_CC(%a6)
847: mov.l 0x8(%a6), %d2 # get regval
848:
849: mov.l ([0xc,%a6],0x0),%d0
850: mov.l ([0xc,%a6],0x4),%d1
851: bra.w l_cmp2_cmp # go do the compare emulation
852:
853: global _060LSP__cmp2_Db_
854: _060LSP__cmp2_Db_:
855:
856: # PROLOGUE BEGIN ########################################################
857: link.w %a6,&-4
858: movm.l &0x3800,-(%sp) # save d2-d4
859: # fmovm.l &0x0,-(%sp) # save no fpregs
860: # PROLOGUE END ##########################################################
861:
862: mov.w %cc,CMP2_CC(%a6)
863: mov.l 0x8(%a6), %d2 # get regval
864:
865: mov.b ([0xc,%a6],0x0),%d0
866: mov.b ([0xc,%a6],0x1),%d1
867:
868: extb.l %d0 # sign extend lo bnd
869: extb.l %d1 # sign extend hi bnd
870:
871: # operation is a data register compare.
872: # sign extend byte to long so we can do simple longword compares.
873: extb.l %d2 # sign extend data byte
874: bra.w l_cmp2_cmp # go do the compare emulation
875:
876: global _060LSP__cmp2_Dw_
877: _060LSP__cmp2_Dw_:
878:
879: # PROLOGUE BEGIN ########################################################
880: link.w %a6,&-4
881: movm.l &0x3800,-(%sp) # save d2-d4
882: # fmovm.l &0x0,-(%sp) # save no fpregs
883: # PROLOGUE END ##########################################################
884:
885: mov.w %cc,CMP2_CC(%a6)
886: mov.l 0x8(%a6), %d2 # get regval
887:
888: mov.w ([0xc,%a6],0x0),%d0
889: mov.w ([0xc,%a6],0x2),%d1
890:
891: ext.l %d0 # sign extend lo bnd
892: ext.l %d1 # sign extend hi bnd
893:
894: # operation is a data register compare.
895: # sign extend word to long so we can do simple longword compares.
896: ext.l %d2 # sign extend data word
897: bra.w l_cmp2_cmp # go emulate compare
898:
899: global _060LSP__cmp2_Dl_
900: _060LSP__cmp2_Dl_:
901:
902: # PROLOGUE BEGIN ########################################################
903: link.w %a6,&-4
904: movm.l &0x3800,-(%sp) # save d2-d4
905: # fmovm.l &0x0,-(%sp) # save no fpregs
906: # PROLOGUE END ##########################################################
907:
908: mov.w %cc,CMP2_CC(%a6)
909: mov.l 0x8(%a6), %d2 # get regval
910:
911: mov.l ([0xc,%a6],0x0),%d0
912: mov.l ([0xc,%a6],0x4),%d1
913:
914: #
915: # To set the ccodes correctly:
916: # (1) save 'Z' bit from (Rn - lo)
917: # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
918: # (3) keep 'X', 'N', and 'V' from before instruction
919: # (4) combine ccodes
920: #
921: l_cmp2_cmp:
922: sub.l %d0, %d2 # (Rn - lo)
923: mov.w %cc, %d3 # fetch resulting ccodes
924: andi.b &0x4, %d3 # keep 'Z' bit
925: sub.l %d0, %d1 # (hi - lo)
926: cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
927:
928: mov.w %cc, %d4 # fetch resulting ccodes
929: or.b %d4, %d3 # combine w/ earlier ccodes
930: andi.b &0x5, %d3 # keep 'Z' and 'N'
931:
932: mov.w CMP2_CC(%a6), %d4 # fetch old ccodes
933: andi.b &0x1a, %d4 # keep 'X','N','V' bits
934: or.b %d3, %d4 # insert new ccodes
935: mov.w %d4,%cc # save new ccodes
936:
937: # EPILOGUE BEGIN ########################################################
938: # fmovm.l (%sp)+,&0x0 # restore no fpregs
939: movm.l (%sp)+,&0x001c # restore d2-d4
940: unlk %a6
941: # EPILOGUE END ##########################################################
942:
943: rts
CVSweb