[BACK]Return to divrem.m4 CVS log [TXT][DIR] Up to [local] / sys / lib / libkern / arch / sparc

Annotation of sys/lib/libkern/arch/sparc/divrem.m4, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: divrem.m4,v 1.6 2003/06/02 23:28:09 millert Exp $     */
                      2: /*     $NetBSD: divrem.m4,v 1.3 1995/04/22 09:37:39 pk Exp $   */
                      3:
                      4: /*
                      5:  * Copyright (c) 1992, 1993
                      6:  *     The Regents of the University of California.  All rights reserved.
                      7:  *
                      8:  * This software was developed by the Computer Systems Engineering group
                      9:  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
                     10:  * contributed to Berkeley.
                     11:  *
                     12:  * Redistribution and use in source and binary forms, with or without
                     13:  * modification, are permitted provided that the following conditions
                     14:  * are met:
                     15:  * 1. Redistributions of source code must retain the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer.
                     17:  * 2. Redistributions in binary form must reproduce the above copyright
                     18:  *    notice, this list of conditions and the following disclaimer in the
                     19:  *    documentation and/or other materials provided with the distribution.
                     20:  * 3. Neither the name of the University nor the names of its contributors
                     21:  *    may be used to endorse or promote products derived from this software
                     22:  *    without specific prior written permission.
                     23:  *
                     24:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     25:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     26:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     27:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     28:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     29:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     30:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     31:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     32:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     33:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     34:  * SUCH DAMAGE.
                     35:  *
                     36:  * Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp
                     37:  */
                     38:
                     39: /*
                     40:  * Division and remainder, from Appendix E of the Sparc Version 8
                     41:  * Architecture Manual, with fixes from Gordon Irlam.
                     42:  */
                     43:
                     44: #if defined(LIBC_SCCS) && !defined(lint)
                     45: #ifdef notdef
                     46:        .asciz "@(#)divrem.m4   8.1 (Berkeley) 6/4/93"
                     47: #endif
                     48:        .asciz "$OpenBSD: divrem.m4,v 1.6 2003/06/02 23:28:09 millert Exp $"
                     49: #endif /* LIBC_SCCS and not lint */
                     50:
                     51: /*
                     52:  * Input: dividend and divisor in %o0 and %o1 respectively.
                     53:  *
                     54:  * m4 parameters:
                     55:  *  NAME       name of function to generate
                     56:  *  NAME2      secondary name of function to generate
                     57:  *  OP         OP=div => %o0 / %o1; OP=rem => %o0 % %o1
                     58:  *  S          S=true => signed; S=false => unsigned
                     59:  *
                     60:  * Algorithm parameters:
                     61:  *  N          how many bits per iteration we try to get (4)
                     62:  *  WORDSIZE   total number of bits (32)
                     63:  *
                     64:  * Derived constants:
                     65:  *  TWOSUPN    2^N, for label generation (m4 exponentiation currently broken)
                     66:  *  TOPBITS    number of bits in the top `decade' of a number
                     67:  *
                     68:  * Important variables:
                     69:  *  Q          the partial quotient under development (initially 0)
                     70:  *  R          the remainder so far, initially the dividend
                     71:  *  ITER       number of main division loop iterations required;
                     72:  *             equal to ceil(log2(quotient) / N).  Note that this
                     73:  *             is the log base (2^N) of the quotient.
                     74:  *  V          the current comparand, initially divisor*2^(ITER*N-1)
                     75:  *
                     76:  * Cost:
                     77:  *  Current estimate for non-large dividend is
                     78:  *     ceil(log2(quotient) / N) * (10 + 7N/2) + C
                     79:  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
                     80:  *  different path, as the upper bits of the quotient must be developed
                     81:  *  one bit at a time.
                     82:  */
                     83:
                     84: define(N, `4')
                     85: define(TWOSUPN, `16')
                     86: define(WORDSIZE, `32')
                     87: define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))
                     88:
                     89: define(dividend, `%o0')
                     90: define(divisor, `%o1')
                     91: define(Q, `%o2')
                     92: define(R, `%o3')
                     93: define(ITER, `%o4')
                     94: define(V, `%o5')
                     95:
                     96: /* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */
                     97: define(T, `%g1')
                     98: define(SC, `%g7')
                     99: ifelse(S, `true', `define(SIGN, `%g6')')
                    100:
                    101: /*
                    102:  * This is the recursive definition for developing quotient digits.
                    103:  *
                    104:  * Parameters:
                    105:  *  $1 the current depth, 1 <= $1 <= N
                    106:  *  $2 the current accumulation of quotient bits
                    107:  *  N  max depth
                    108:  *
                    109:  * We add a new bit to $2 and either recurse or insert the bits in
                    110:  * the quotient.  R, Q, and V are inputs and outputs as defined above;
                    111:  * the condition codes are expected to reflect the input R, and are
                    112:  * modified to reflect the output R.
                    113:  */
                    114: define(DEVELOP_QUOTIENT_BITS,
                    115: `      ! depth $1, accumulated bits $2
                    116:        bl      L.$1.eval(TWOSUPN+$2)
                    117:        srl     V,1,V
                    118:        ! remainder is positive
                    119:        subcc   R,V,R
                    120:        ifelse($1, N,
                    121:        `       b       9f
                    122:                add     Q, ($2*2+1), Q
                    123:        ', `    DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
                    124: L.$1.eval(TWOSUPN+$2):
                    125:        ! remainder is negative
                    126:        addcc   R,V,R
                    127:        ifelse($1, N,
                    128:        `       b       9f
                    129:                add     Q, ($2*2-1), Q
                    130:        ', `    DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
                    131:        ifelse($1, 1, `9:')')
                    132:
                    133: #include "DEFS.h"
                    134: #include <machine/trap.h>
                    135: #include <machine/asm.h>
                    136:
                    137:        .globl NAME2
                    138: NAME2:
                    139: FUNC(NAME)
                    140: ifelse(S, `true',
                    141: `      ! compute sign of result; if neither is negative, no problem
                    142:        orcc    divisor, dividend, %g0  ! either negative?
                    143:        bge     2f                      ! no, go do the divide
                    144:        ifelse(OP, `div',
                    145:                `xor    divisor, dividend, SIGN',
                    146:                `mov    dividend, SIGN')        ! compute sign in any case
                    147:        tst     divisor
                    148:        bge     1f
                    149:        tst     dividend
                    150:        ! divisor is definitely negative; dividend might also be negative
                    151:        bge     2f                      ! if dividend not negative...
                    152:        neg     divisor                 ! in any case, make divisor nonneg
                    153: 1:     ! dividend is negative, divisor is nonnegative
                    154:        neg     dividend                ! make dividend nonnegative
                    155: 2:
                    156: ')
                    157:        ! Ready to divide.  Compute size of quotient; scale comparand.
                    158:        orcc    divisor, %g0, V
                    159:        bnz     1f
                    160:        mov     dividend, R
                    161:
                    162:                ! Divide by zero trap.  If it returns, return 0 (about as
                    163:                ! wrong as possible, but that is what SunOS does...).
                    164:                t       ST_DIV0
                    165:                retl
                    166:                clr     %o0
                    167:
                    168: 1:
                    169:        cmp     R, V                    ! if divisor exceeds dividend, done
                    170:        blu     Lgot_result             ! (and algorithm fails otherwise)
                    171:        clr     Q
                    172:        sethi   %hi(1 << (WORDSIZE - TOPBITS - 1)), T
                    173:        cmp     R, T
                    174:        blu     Lnot_really_big
                    175:        clr     ITER
                    176:
                    177:        ! `Here the dividend is >= 2^(31-N) or so.  We must be careful here,
                    178:        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
                    179:        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
                    180:        ! Compute ITER in an unorthodox manner: know we need to shift V into
                    181:        ! the top decade: so do not even bother to compare to R.'
                    182:        1:
                    183:                cmp     V, T
                    184:                bgeu    3f
                    185:                mov     1, SC
                    186:                sll     V, N, V
                    187:                b       1b
                    188:                inc     ITER
                    189:
                    190:        ! Now compute SC.
                    191:        2:      addcc   V, V, V
                    192:                bcc     Lnot_too_big
                    193:                inc     SC
                    194:
                    195:                ! We get here if the divisor overflowed while shifting.
                    196:                ! This means that R has the high-order bit set.
                    197:                ! Restore V and subtract from R.
                    198:                sll     T, TOPBITS, T   ! high order bit
                    199:                srl     V, 1, V         ! rest of V
                    200:                add     V, T, V
                    201:                b       Ldo_single_div
                    202:                dec     SC
                    203:
                    204:        Lnot_too_big:
                    205:        3:      cmp     V, R
                    206:                blu     2b
                    207:                nop
                    208:                be      Ldo_single_div
                    209:                nop
                    210:        /* NB: these are commented out in the V8-Sparc manual as well */
                    211:        /* (I do not understand this) */
                    212:        ! V > R: went too far: back up 1 step
                    213:        !       srl     V, 1, V
                    214:        !       dec     SC
                    215:        ! do single-bit divide steps
                    216:        !
                    217:        ! We have to be careful here.  We know that R >= V, so we can do the
                    218:        ! first divide step without thinking.  BUT, the others are conditional,
                    219:        ! and are only done if R >= 0.  Because both R and V may have the high-
                    220:        ! order bit set in the first step, just falling into the regular
                    221:        ! division loop will mess up the first time around.
                    222:        ! So we unroll slightly...
                    223:        Ldo_single_div:
                    224:                deccc   SC
                    225:                bl      Lend_regular_divide
                    226:                nop
                    227:                sub     R, V, R
                    228:                mov     1, Q
                    229:                b       Lend_single_divloop
                    230:                nop
                    231:        Lsingle_divloop:
                    232:                sll     Q, 1, Q
                    233:                bl      1f
                    234:                srl     V, 1, V
                    235:                ! R >= 0
                    236:                sub     R, V, R
                    237:                b       2f
                    238:                inc     Q
                    239:        1:      ! R < 0
                    240:                add     R, V, R
                    241:                dec     Q
                    242:        2:
                    243:        Lend_single_divloop:
                    244:                deccc   SC
                    245:                bge     Lsingle_divloop
                    246:                tst     R
                    247:                b,a     Lend_regular_divide
                    248:
                    249: Lnot_really_big:
                    250: 1:
                    251:        sll     V, N, V
                    252:        cmp     V, R
                    253:        bleu    1b
                    254:        inccc   ITER
                    255:        be      Lgot_result
                    256:        dec     ITER
                    257:
                    258:        tst     R       ! set up for initial iteration
                    259: Ldivloop:
                    260:        sll     Q, N, Q
                    261:        DEVELOP_QUOTIENT_BITS(1, 0)
                    262: Lend_regular_divide:
                    263:        deccc   ITER
                    264:        bge     Ldivloop
                    265:        tst     R
                    266:        bl,a    Lgot_result
                    267:        ! non-restoring fixup here (one instruction only!)
                    268: ifelse(OP, `div',
                    269: `      dec     Q
                    270: ', `   add     R, divisor, R
                    271: ')
                    272:
                    273: Lgot_result:
                    274: ifelse(S, `true',
                    275: `      ! check to see if answer should be < 0
                    276:        tst     SIGN
                    277:        bl,a    1f
                    278:        ifelse(OP, `div', `neg Q', `neg R')
                    279: 1:')
                    280:        retl
                    281:        ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')

CVSweb