Annotation of sys/lib/libkern/arch/sparc/divrem.m4, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: divrem.m4,v 1.6 2003/06/02 23:28:09 millert Exp $ */
2: /* $NetBSD: divrem.m4,v 1.3 1995/04/22 09:37:39 pk Exp $ */
3:
4: /*
5: * Copyright (c) 1992, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This software was developed by the Computer Systems Engineering group
9: * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
10: * contributed to Berkeley.
11: *
12: * Redistribution and use in source and binary forms, with or without
13: * modification, are permitted provided that the following conditions
14: * are met:
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: * 3. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: *
36: * Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp
37: */
38:
39: /*
40: * Division and remainder, from Appendix E of the Sparc Version 8
41: * Architecture Manual, with fixes from Gordon Irlam.
42: */
43:
44: #if defined(LIBC_SCCS) && !defined(lint)
45: #ifdef notdef
46: .asciz "@(#)divrem.m4 8.1 (Berkeley) 6/4/93"
47: #endif
48: .asciz "$OpenBSD: divrem.m4,v 1.6 2003/06/02 23:28:09 millert Exp $"
49: #endif /* LIBC_SCCS and not lint */
50:
51: /*
52: * Input: dividend and divisor in %o0 and %o1 respectively.
53: *
54: * m4 parameters:
55: * NAME name of function to generate
56: * NAME2 secondary name of function to generate
57: * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1
58: * S S=true => signed; S=false => unsigned
59: *
60: * Algorithm parameters:
61: * N how many bits per iteration we try to get (4)
62: * WORDSIZE total number of bits (32)
63: *
64: * Derived constants:
65: * TWOSUPN 2^N, for label generation (m4 exponentiation currently broken)
66: * TOPBITS number of bits in the top `decade' of a number
67: *
68: * Important variables:
69: * Q the partial quotient under development (initially 0)
70: * R the remainder so far, initially the dividend
71: * ITER number of main division loop iterations required;
72: * equal to ceil(log2(quotient) / N). Note that this
73: * is the log base (2^N) of the quotient.
74: * V the current comparand, initially divisor*2^(ITER*N-1)
75: *
76: * Cost:
77: * Current estimate for non-large dividend is
78: * ceil(log2(quotient) / N) * (10 + 7N/2) + C
79: * A large dividend is one greater than 2^(31-TOPBITS) and takes a
80: * different path, as the upper bits of the quotient must be developed
81: * one bit at a time.
82: */
83:
84: define(N, `4')
85: define(TWOSUPN, `16')
86: define(WORDSIZE, `32')
87: define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))
88:
89: define(dividend, `%o0')
90: define(divisor, `%o1')
91: define(Q, `%o2')
92: define(R, `%o3')
93: define(ITER, `%o4')
94: define(V, `%o5')
95:
96: /* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */
97: define(T, `%g1')
98: define(SC, `%g7')
99: ifelse(S, `true', `define(SIGN, `%g6')')
100:
101: /*
102: * This is the recursive definition for developing quotient digits.
103: *
104: * Parameters:
105: * $1 the current depth, 1 <= $1 <= N
106: * $2 the current accumulation of quotient bits
107: * N max depth
108: *
109: * We add a new bit to $2 and either recurse or insert the bits in
110: * the quotient. R, Q, and V are inputs and outputs as defined above;
111: * the condition codes are expected to reflect the input R, and are
112: * modified to reflect the output R.
113: */
114: define(DEVELOP_QUOTIENT_BITS,
115: ` ! depth $1, accumulated bits $2
116: bl L.$1.eval(TWOSUPN+$2)
117: srl V,1,V
118: ! remainder is positive
119: subcc R,V,R
120: ifelse($1, N,
121: ` b 9f
122: add Q, ($2*2+1), Q
123: ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
124: L.$1.eval(TWOSUPN+$2):
125: ! remainder is negative
126: addcc R,V,R
127: ifelse($1, N,
128: ` b 9f
129: add Q, ($2*2-1), Q
130: ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
131: ifelse($1, 1, `9:')')
132:
133: #include "DEFS.h"
134: #include <machine/trap.h>
135: #include <machine/asm.h>
136:
137: .globl NAME2
138: NAME2:
139: FUNC(NAME)
140: ifelse(S, `true',
141: ` ! compute sign of result; if neither is negative, no problem
142: orcc divisor, dividend, %g0 ! either negative?
143: bge 2f ! no, go do the divide
144: ifelse(OP, `div',
145: `xor divisor, dividend, SIGN',
146: `mov dividend, SIGN') ! compute sign in any case
147: tst divisor
148: bge 1f
149: tst dividend
150: ! divisor is definitely negative; dividend might also be negative
151: bge 2f ! if dividend not negative...
152: neg divisor ! in any case, make divisor nonneg
153: 1: ! dividend is negative, divisor is nonnegative
154: neg dividend ! make dividend nonnegative
155: 2:
156: ')
157: ! Ready to divide. Compute size of quotient; scale comparand.
158: orcc divisor, %g0, V
159: bnz 1f
160: mov dividend, R
161:
162: ! Divide by zero trap. If it returns, return 0 (about as
163: ! wrong as possible, but that is what SunOS does...).
164: t ST_DIV0
165: retl
166: clr %o0
167:
168: 1:
169: cmp R, V ! if divisor exceeds dividend, done
170: blu Lgot_result ! (and algorithm fails otherwise)
171: clr Q
172: sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
173: cmp R, T
174: blu Lnot_really_big
175: clr ITER
176:
177: ! `Here the dividend is >= 2^(31-N) or so. We must be careful here,
178: ! as our usual N-at-a-shot divide step will cause overflow and havoc.
179: ! The number of bits in the result here is N*ITER+SC, where SC <= N.
180: ! Compute ITER in an unorthodox manner: know we need to shift V into
181: ! the top decade: so do not even bother to compare to R.'
182: 1:
183: cmp V, T
184: bgeu 3f
185: mov 1, SC
186: sll V, N, V
187: b 1b
188: inc ITER
189:
190: ! Now compute SC.
191: 2: addcc V, V, V
192: bcc Lnot_too_big
193: inc SC
194:
195: ! We get here if the divisor overflowed while shifting.
196: ! This means that R has the high-order bit set.
197: ! Restore V and subtract from R.
198: sll T, TOPBITS, T ! high order bit
199: srl V, 1, V ! rest of V
200: add V, T, V
201: b Ldo_single_div
202: dec SC
203:
204: Lnot_too_big:
205: 3: cmp V, R
206: blu 2b
207: nop
208: be Ldo_single_div
209: nop
210: /* NB: these are commented out in the V8-Sparc manual as well */
211: /* (I do not understand this) */
212: ! V > R: went too far: back up 1 step
213: ! srl V, 1, V
214: ! dec SC
215: ! do single-bit divide steps
216: !
217: ! We have to be careful here. We know that R >= V, so we can do the
218: ! first divide step without thinking. BUT, the others are conditional,
219: ! and are only done if R >= 0. Because both R and V may have the high-
220: ! order bit set in the first step, just falling into the regular
221: ! division loop will mess up the first time around.
222: ! So we unroll slightly...
223: Ldo_single_div:
224: deccc SC
225: bl Lend_regular_divide
226: nop
227: sub R, V, R
228: mov 1, Q
229: b Lend_single_divloop
230: nop
231: Lsingle_divloop:
232: sll Q, 1, Q
233: bl 1f
234: srl V, 1, V
235: ! R >= 0
236: sub R, V, R
237: b 2f
238: inc Q
239: 1: ! R < 0
240: add R, V, R
241: dec Q
242: 2:
243: Lend_single_divloop:
244: deccc SC
245: bge Lsingle_divloop
246: tst R
247: b,a Lend_regular_divide
248:
249: Lnot_really_big:
250: 1:
251: sll V, N, V
252: cmp V, R
253: bleu 1b
254: inccc ITER
255: be Lgot_result
256: dec ITER
257:
258: tst R ! set up for initial iteration
259: Ldivloop:
260: sll Q, N, Q
261: DEVELOP_QUOTIENT_BITS(1, 0)
262: Lend_regular_divide:
263: deccc ITER
264: bge Ldivloop
265: tst R
266: bl,a Lgot_result
267: ! non-restoring fixup here (one instruction only!)
268: ifelse(OP, `div',
269: ` dec Q
270: ', ` add R, divisor, R
271: ')
272:
273: Lgot_result:
274: ifelse(S, `true',
275: ` ! check to see if answer should be < 0
276: tst SIGN
277: bl,a 1f
278: ifelse(OP, `div', `neg Q', `neg R')
279: 1:')
280: retl
281: ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
CVSweb