Annotation of sys/arch/hppa/spmath/impys.S, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: impys.S,v 1.11 2005/01/23 18:01:30 mickey Exp $ */
! 2: /*
! 3: (c) Copyright 1986 HEWLETT-PACKARD COMPANY
! 4: To anyone who acknowledges that this file is provided "AS IS"
! 5: without any express or implied warranty:
! 6: permission to use, copy, modify, and distribute this file
! 7: for any purpose is hereby granted without fee, provided that
! 8: the above copyright notice and this notice appears in all
! 9: copies, and that the name of Hewlett-Packard Company not be
! 10: used in advertising or publicity pertaining to distribution
! 11: of the software without specific, written prior permission.
! 12: Hewlett-Packard Company makes no representations about the
! 13: suitability of this software for any purpose.
! 14: */
! 15: /* @(#)impys.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:28 */
! 16:
! 17: #include <machine/asm.h>
! 18: #define _LOCORE
! 19: #include <machine/frame.h>
! 20:
! 21: ;****************************************************************************
! 22: ;
! 23: ;Implement an integer multiply routine for 32-bit operands and 64-bit product
! 24: ;with operand values of zero (multiplicand only) and -2**31 treated specially.
! 25: ;The algorithm uses the absolute value of the multiplier, four bits at a time,
! 26: ;from right to left, to generate partial product. Execution speed is more
! 27: ;important than program size in this implementation.
! 28: ;
! 29: ;****************************************************************************
! 30: ;
! 31: ; Definitions - General registers
! 32: ;
! 33: gr0 .reg %r0 ; General register zero
! 34: pu .reg %r3 ; upper part of product
! 35: pl .reg %r4 ; lower part of product
! 36: op2 .reg %r4 ; multiplier
! 37: op1 .reg %r5 ; multiplicand
! 38: cnt .reg %r6 ; count in multiply
! 39: brindex .reg %r7 ; index into the br. table
! 40: sign .reg %r8 ; sign of product
! 41: pc .reg %r9 ; carry bit of product, = 00...01
! 42: pm .reg %r10 ; value of -1 used in shifting
! 43:
! 44: ;*****************************************************************************
! 45: .text
! 46:
! 47: LEAF_ENTRY(s_xmpy)
! 48: stws,ma pu,4(sp) ; save registers on stack
! 49: stws,ma pl,4(sp) ; save registers on stack
! 50: stws,ma op1,4(sp) ; save registers on stack
! 51: stws,ma cnt,4(sp) ; save registers on stack
! 52: stws,ma brindex,4(sp) ; save registers on stack
! 53: stws,ma sign,4(sp) ; save registers on stack
! 54: stws,ma pc,4(sp) ; save registers on stack
! 55: stws,ma pm,4(sp) ; save registers on stack
! 56: ;
! 57: ; Start multiply process
! 58: ;
! 59: ldws 0(arg1),op2 ; get multiplier
! 60: ldws 0(arg0),op1 ; get multiplicand
! 61: addi -1,gr0,pm ; initialize pm to 111...1
! 62: comb,< op2,gr0,mpyb ; br. if multiplier < 0
! 63: xor op2,op1,sign ; sign(0) = sign of product
! 64: mpy1 comb,< op1,gr0,mpya ; br. if multiplicand < 0
! 65: addi 0,gr0,pu ; clear product
! 66: addib,= 0,op1,fini0 ; op1 = 0, product = 0
! 67: mpy2 addi 1,gr0,pc ; initialize pc to 00...01
! 68: movib,tr 8,cnt,mloop ; set count for mpy loop
! 69: extru op2,31,4,brindex ; 4 bits as index into table
! 70: ;
! 71: .align 8
! 72: ;
! 73: b sh4c ; br. if sign overflow
! 74: sh4n shd pu,pl,4,pl ; shift product right 4 bits
! 75: addib,<= -1,cnt,mulend ; reduce count by 1, exit if
! 76: extru pu,27,28,pu ; <= zero
! 77: ;
! 78: mloop blr brindex,gr0 ; br. into table
! 79: ; entries of 2 words
! 80: extru op2,27,4,brindex ; next 4 bits into index
! 81: ;
! 82: ;
! 83: ; branch table for the multiplication process with four multiplier bits
! 84: ;
! 85: mtable ; two words per entry
! 86: ;
! 87: ; ---- bits = 0000 ---- shift product 4 bits -------------------------------
! 88: ;
! 89: b sh4n+4 ; just shift partial
! 90: shd pu,pl,4,pl ; product right 4 bits
! 91: ;
! 92: ; ---- bits = 0001 ---- add op1, then shift 4 bits
! 93: ;
! 94: addb,tr op1,pu,sh4n+4 ; add op1 to product, to shift
! 95: shd pu,pl,4,pl ; product right 4 bits
! 96: ;
! 97: ; ---- bits = 0010 ---- add op1, add op1, then shift 4 bits
! 98: ;
! 99: addb,tr op1,pu,sh4n ; add 2*op1, to shift
! 100: addb,uv op1,pu,sh4c ; product right 4 bits
! 101: ;
! 102: ; ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
! 103: ;
! 104: addb,tr op1,pu,sh4n-4 ; add op1 & 2*op1, shift
! 105: sh1add,nsv op1,pu,pu ; product right 4 bits
! 106: ;
! 107: ; ---- bits = 0100 ---- shift 2, add op1, shift 2
! 108: ;
! 109: b sh2sa
! 110: shd pu,pl,2,pl ; shift product 2 bits
! 111: ;
! 112: ; ---- bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
! 113: ;
! 114: addb,tr op1,pu,sh2us ; add op1 to product
! 115: shd pu,pl,2,pl ; shift 2 bits
! 116: ;
! 117: ; ---- bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
! 118: ;
! 119: addb,tr op1,pu,sh2c ; add 2*op1, to shift 2 bits
! 120: addb,nuv op1,pu,sh2us ; br. if not overflow
! 121: ;
! 122: ; ---- bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
! 123: ;
! 124: b sh3s
! 125: sub pu,op1,pu ; subtract op1, br. to sh3s
! 126:
! 127: ;
! 128: ; ---- bits = 1000 ---- shift 3, add op1, shift 1
! 129: ;
! 130: b sh3sa
! 131: shd pu,pl,3,pl ; shift product right 3 bits
! 132: ;
! 133: ; ---- bits = 1001 ---- add op1, shift 3, add op1, shift 1
! 134: ;
! 135: addb,tr op1,pu,sh3us ; add op1, to shift 3, add op1,
! 136: shd pu,pl,3,pl ; and shift 1
! 137: ;
! 138: ; ---- bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
! 139: ;
! 140: addb,tr op1,pu,sh3c ; add 2*op1, to shift 3 bits
! 141: addb,nuv op1,pu,sh3us ; br. if no overflow
! 142: ;
! 143: ; ---- bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
! 144: ;
! 145: addib,tr 1,brindex,sh2s ; add 1 to index, subtract op1,
! 146: sub pu,op1,pu ; shift 2 with minus sign
! 147: ;
! 148: ; ---- bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
! 149: ;
! 150: addib,tr 1,brindex,sh2sb ; add 1 to index, to shift
! 151: shd pu,pl,2,pl ; shift right 2 bits signed
! 152: ;
! 153: ; ---- bits = 1101 ---- add op1, shift 2, add -op1, shift 2
! 154: ;
! 155: addb,tr op1,pu,sh2ns ; add op1, to shift 2
! 156: shd pu,pl,2,pl ; right 2 unsigned, etc.
! 157: ;
! 158: ; ---- bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
! 159: ;
! 160: addib,tr 1,brindex,sh1sa ; add 1 to index, to shift
! 161: shd pu,pl,1,pl ; shift 1 bit
! 162: ;
! 163: ; ---- bits = 1111 ---- add -op1, shift 4 signed
! 164: ;
! 165: addib,tr 1,brindex,sh4s ; add 1 to index, subtract op1,
! 166: sub pu,op1,pu ; to shift 4 signed
! 167:
! 168: ;
! 169: ; ---- bits = 10000 ---- shift 4 signed
! 170: ;
! 171: addib,tr 1,brindex,sh4s+4 ; add 1 to index
! 172: shd pu,pl,4,pl ; shift 4 signed
! 173: ;
! 174: ; ---- end of table ---------------------------------------------------------
! 175: ;
! 176: sh4s shd pu,pl,4,pl
! 177: addib,tr -1,cnt,mloop ; loop (count > 0 always here)
! 178: shd pm,pu,4,pu ; shift 4, minus signed
! 179: ;
! 180: sh4c addib,> -1,cnt,mloop ; decrement count, loop if > 0
! 181: shd pc,pu,4,pu ; shift 4 with overflow
! 182: b signs ; end of multiply
! 183: bb,>=,n sign,0,fini ; test sign of procduct
! 184: ;
! 185: mpyb add,= op2,op2,gr0 ; if <> 0, back to main sect.
! 186: b mpy1
! 187: sub 0,op2,op2 ; op2 = |multiplier|
! 188: add,>= op1,gr0,gr0 ; if op1 < 0, invert sign,
! 189: xor pm,sign,sign ; for correct result
! 190: ;
! 191: ; special case for multiplier = -2**31, op1 = signed multiplicand
! 192: ; or multiplicand = -2**31, op1 = signed multiplier
! 193: ;
! 194: shd op1,0,1,pl ; shift op1 left 31 bits
! 195: mmax extrs op1,30,31,pu
! 196: b signs ; negate product (if needed)
! 197: bb,>=,n sign,0,fini ; test sign of product
! 198: ;
! 199: mpya add,= op1,op1,gr0 ; op1 = -2**31, special case
! 200: b mpy2
! 201: sub 0,op1,op1 ; op1 = |multiplicand|
! 202: add,>= op2,gr0,gr0 ; if op2 < 0, invert sign,
! 203: xor pm,sign,sign ; for correct result
! 204: movb,tr op2,op1,mmax ; use op2 as multiplicand
! 205: shd op1,0,1,pl ; shift it left 31 bits
! 206: ;
! 207: sh3c shd pu,pl,3,pl ; shift product 3 bits
! 208: shd pc,pu,3,pu ; shift 3 signed
! 209: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
! 210: shd pu,pl,1,pl
! 211: ;
! 212: sh3us extru pu,28,29,pu ; shift 3 unsigned
! 213: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
! 214: shd pu,pl,1,pl
! 215: ;
! 216: sh3sa extrs pu,28,29,pu ; shift 3 signed
! 217: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
! 218: shd pu,pl,1,pl
! 219: ;
! 220: sh3s shd pu,pl,3,pl ; shift 3 minus signed
! 221: shd pm,pu,3,pu
! 222: addb,tr op1,pu,sh1 ; add op1, to shift 1 bit
! 223: shd pu,pl,1,pl
! 224: ;
! 225: sh1 addib,> -1,cnt,mloop ; loop if count > 0
! 226: extru pu,30,31,pu
! 227: b signs ; end of multiply
! 228: bb,>=,n sign,0,fini ; test sign of product
! 229: ;
! 230: sh2ns addib,tr 1,brindex,sh2sb+4 ; increment index
! 231: extru pu,29,30,pu ; shift unsigned
! 232: ;
! 233: sh2s shd pu,pl,2,pl ; shift with minus sign
! 234: shd pm,pu,2,pu ;
! 235: sub pu,op1,pu ; subtract op1
! 236: shd pu,pl,2,pl ; shift with minus sign
! 237: addib,tr -1,cnt,mloop ; decrement count, loop
! 238: shd pm,pu,2,pu ; shift with minus sign
! 239: ; count never reaches 0 here
! 240: ;
! 241: sh2sb extrs pu,29,30,pu ; shift 2 signed
! 242: sub pu,op1,pu ; subtract op1 from product
! 243: shd pu,pl,2,pl ; shift with minus sign
! 244: addib,tr -1,cnt,mloop ; decrement count, loop
! 245: shd pm,pu,2,pu ; shift with minus sign
! 246: ; count never reaches 0 here
! 247: ;
! 248: sh1sa extrs pu,30,31,pu ; signed
! 249: sub pu,op1,pu ; subtract op1 from product
! 250: shd pu,pl,3,pl ; shift 3 with minus sign
! 251: addib,tr -1,cnt,mloop ; dec. count, to loop
! 252: shd pm,pu,3,pu ; count never reaches 0 here
! 253: ;
! 254: fini0 movib,tr,n 0,pl,fini ; product = 0 as op1 = 0
! 255: ;
! 256: sh2us extru pu,29,30,pu ; shift 2 unsigned
! 257: addb,tr op1,pu,sh2a ; add op1
! 258: shd pu,pl,2,pl ; shift 2 bits
! 259: ;
! 260: sh2c shd pu,pl,2,pl
! 261: shd pc,pu,2,pu ; shift with carry
! 262: addb,tr op1,pu,sh2a ; add op1 to product
! 263: shd pu,pl,2,pl ; br. to sh2 to shift pu
! 264: ;
! 265: sh2sa extrs pu,29,30,pu ; shift with sign
! 266: addb,tr op1,pu,sh2a ; add op1 to product
! 267: shd pu,pl,2,pl ; br. to sh2 to shift pu
! 268: ;
! 269: sh2a addib,> -1,cnt,mloop ; loop if count > 0
! 270: extru pu,29,30,pu
! 271: ;
! 272: mulend bb,>=,n sign,0,fini ; test sign of product
! 273: signs sub 0,pl,pl ; negate product if sign
! 274: subb 0,pu,pu ; is negative
! 275: ;
! 276: ; finish
! 277: ;
! 278: fini stws pu,0(arg2) ; save high part of result
! 279: stws pl,4(arg2) ; save low part of result
! 280:
! 281: ldws,mb -4(sp),pm ; restore registers
! 282: ldws,mb -4(sp),pc ; restore registers
! 283: ldws,mb -4(sp),sign ; restore registers
! 284: ldws,mb -4(sp),brindex ; restore registers
! 285: ldws,mb -4(sp),cnt ; restore registers
! 286: ldws,mb -4(sp),op1 ; restore registers
! 287: ldws,mb -4(sp),pl ; restore registers
! 288: bv 0(rp) ; return
! 289: ldws,mb -4(sp),pu ; restore registers
! 290: EXIT(s_xmpy)
! 291:
! 292: .end
CVSweb