Annotation of sys/lib/libkern/arch/sparc/mul.S, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: mul.S,v 1.5 2003/06/02 23:28:09 millert Exp $ */
2: /* $NetBSD: mul.S,v 1.2 1994/10/26 06:40:01 cgd Exp $ */
3:
4: /*
5: * Copyright (c) 1992, 1993
6: * The Regents of the University of California. All rights reserved.
7: *
8: * This software was developed by the Computer Systems Engineering group
9: * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
10: * contributed to Berkeley.
11: *
12: * Redistribution and use in source and binary forms, with or without
13: * modification, are permitted provided that the following conditions
14: * are met:
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: * 3. Neither the name of the University nor the names of its contributors
21: * may be used to endorse or promote products derived from this software
22: * without specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: *
36: * Header: mul.s,v 1.5 92/06/25 13:24:03 torek Exp
37: */
38:
39: #if defined(LIBC_SCCS) && !defined(lint)
40: #ifdef notdef
41: .asciz "@(#)mul.s 8.1 (Berkeley) 6/4/93"
42: #endif
43: .asciz "$OpenBSD: mul.S,v 1.5 2003/06/02 23:28:09 millert Exp $"
44: #endif /* LIBC_SCCS and not lint */
45:
46: /*
47: * Signed multiply, from Appendix E of the Sparc Version 8
48: * Architecture Manual.
49: *
50: * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
51: * the 64-bit product).
52: *
53: * This code optimizes short (less than 13-bit) multiplies.
54: */
55:
56: #include "DEFS.h"
57: FUNC(.mul)
58: mov %o0, %y ! multiplier -> Y
59: andncc %o0, 0xfff, %g0 ! test bits 12..31
60: be Lmul_shortway ! if zero, can do it the short way
61: andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
62:
63: /*
64: * Long multiply. 32 steps, followed by a final shift step.
65: */
66: mulscc %o4, %o1, %o4 ! 1
67: mulscc %o4, %o1, %o4 ! 2
68: mulscc %o4, %o1, %o4 ! 3
69: mulscc %o4, %o1, %o4 ! 4
70: mulscc %o4, %o1, %o4 ! 5
71: mulscc %o4, %o1, %o4 ! 6
72: mulscc %o4, %o1, %o4 ! 7
73: mulscc %o4, %o1, %o4 ! 8
74: mulscc %o4, %o1, %o4 ! 9
75: mulscc %o4, %o1, %o4 ! 10
76: mulscc %o4, %o1, %o4 ! 11
77: mulscc %o4, %o1, %o4 ! 12
78: mulscc %o4, %o1, %o4 ! 13
79: mulscc %o4, %o1, %o4 ! 14
80: mulscc %o4, %o1, %o4 ! 15
81: mulscc %o4, %o1, %o4 ! 16
82: mulscc %o4, %o1, %o4 ! 17
83: mulscc %o4, %o1, %o4 ! 18
84: mulscc %o4, %o1, %o4 ! 19
85: mulscc %o4, %o1, %o4 ! 20
86: mulscc %o4, %o1, %o4 ! 21
87: mulscc %o4, %o1, %o4 ! 22
88: mulscc %o4, %o1, %o4 ! 23
89: mulscc %o4, %o1, %o4 ! 24
90: mulscc %o4, %o1, %o4 ! 25
91: mulscc %o4, %o1, %o4 ! 26
92: mulscc %o4, %o1, %o4 ! 27
93: mulscc %o4, %o1, %o4 ! 28
94: mulscc %o4, %o1, %o4 ! 29
95: mulscc %o4, %o1, %o4 ! 30
96: mulscc %o4, %o1, %o4 ! 31
97: mulscc %o4, %o1, %o4 ! 32
98: mulscc %o4, %g0, %o4 ! final shift
99:
100: ! If %o0 was negative, the result is
101: ! (%o0 * %o1) + (%o1 << 32))
102: ! We fix that here.
103:
104: tst %o0
105: bge 1f
106: rd %y, %o0
107:
108: ! %o0 was indeed negative; fix upper 32 bits of result by subtracting
109: ! %o1 (i.e., return %o4 - %o1 in %o1).
110: retl
111: sub %o4, %o1, %o1
112:
113: 1:
114: retl
115: mov %o4, %o1
116:
117: Lmul_shortway:
118: /*
119: * Short multiply. 12 steps, followed by a final shift step.
120: * The resulting bits are off by 12 and (32-12) = 20 bit positions,
121: * but there is no problem with %o0 being negative (unlike above).
122: */
123: mulscc %o4, %o1, %o4 ! 1
124: mulscc %o4, %o1, %o4 ! 2
125: mulscc %o4, %o1, %o4 ! 3
126: mulscc %o4, %o1, %o4 ! 4
127: mulscc %o4, %o1, %o4 ! 5
128: mulscc %o4, %o1, %o4 ! 6
129: mulscc %o4, %o1, %o4 ! 7
130: mulscc %o4, %o1, %o4 ! 8
131: mulscc %o4, %o1, %o4 ! 9
132: mulscc %o4, %o1, %o4 ! 10
133: mulscc %o4, %o1, %o4 ! 11
134: mulscc %o4, %o1, %o4 ! 12
135: mulscc %o4, %g0, %o4 ! final shift
136:
137: /*
138: * %o4 has 20 of the bits that should be in the low part of the
139: * result; %y has the bottom 12 (as %y's top 12). That is:
140: *
141: * %o4 %y
142: * +----------------+----------------+
143: * | -12- | -20- | -12- | -20- |
144: * +------(---------+------)---------+
145: * --hi-- ----low-part----
146: *
147: * The upper 12 bits of %o4 should be sign-extended to form the
148: * high part of the product (i.e., highpart = %o4 >> 20).
149: */
150:
151: rd %y, %o5
152: sll %o4, 12, %o0 ! shift middle bits left 12
153: srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left
154: or %o5, %o0, %o0 ! construct low part of result
155: retl
156: sra %o4, 20, %o1 ! ... and extract high part of result
CVSweb