Annotation of sys/lib/libkern/arch/sh/memset.S, Revision 1.1.1.1
1.1 nbrk 1: /* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */
2:
3: /*-
4: * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice, this list of conditions and the following disclaimer.
11: * 2. Redistributions in binary form must reproduce the above copyright
12: * notice, this list of conditions and the following disclaimer in the
13: * documentation and/or other materials provided with the distribution.
14: * 3. The name of the author may not be used to endorse or promote products
15: * derived from this software without specific prior written permission.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27: */
28:
29: #include <machine/asm.h>
30:
31: #if defined(LIBC_SCCS) && !defined(lint)
32: RCSID("$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $")
33: #endif
34:
35: #define REG_PTR r0
36: #define REG_TMP1 r1
37:
38: #ifdef BZERO
39: # define REG_C r2
40: # define REG_DST r4
41: # define REG_LEN r5
42: #else
43: # define REG_DST0 r3
44: # define REG_DST r4
45: # define REG_C r5
46: # define REG_LEN r6
47: #endif
48:
49: #ifdef BZERO
50: ENTRY(bzero)
51: #else
52: ENTRY(memset)
53: mov REG_DST,REG_DST0 /* for return value */
54: #endif
55: /* small amount to fill ? */
56: mov #28,REG_TMP1
57: cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
58: bt/s large
59: mov #12,REG_TMP1 /* if (len >= 12) goto small; */
60: cmp/hs REG_TMP1,REG_LEN
61: bt/s small
62: #ifdef BZERO
63: mov #0,REG_C
64: #endif
65: /* very little fill (0 ~ 11 bytes) */
66: tst REG_LEN,REG_LEN
67: add REG_DST,REG_LEN
68: bt/s done
69: add #1,REG_DST
70:
71: /* unroll 4 loops */
72: cmp/eq REG_DST,REG_LEN
73: 1: mov.b REG_C,@-REG_LEN
74: bt/s done
75: cmp/eq REG_DST,REG_LEN
76: mov.b REG_C,@-REG_LEN
77: bt/s done
78: cmp/eq REG_DST,REG_LEN
79: mov.b REG_C,@-REG_LEN
80: bt/s done
81: cmp/eq REG_DST,REG_LEN
82: mov.b REG_C,@-REG_LEN
83: bf/s 1b
84: cmp/eq REG_DST,REG_LEN
85: done:
86: #ifdef BZERO
87: rts
88: nop
89: #else
90: rts
91: mov REG_DST0,r0
92: #endif
93:
94:
95: small:
96: mov REG_DST,r0
97: tst #1,r0
98: bt/s small_aligned
99: mov REG_DST,REG_TMP1
100: shll REG_LEN
101: mova 1f,r0 /* 1f must be 4bytes aligned! */
102: add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
103: sub REG_LEN,r0
104: jmp @r0
105: mov REG_C,r0
106:
107: .align 2
108: mov.b r0,@(15,REG_TMP1)
109: mov.b r0,@(14,REG_TMP1)
110: mov.b r0,@(13,REG_TMP1)
111: mov.b r0,@(12,REG_TMP1)
112: mov.b r0,@(11,REG_TMP1)
113: mov.b r0,@(10,REG_TMP1)
114: mov.b r0,@(9,REG_TMP1)
115: mov.b r0,@(8,REG_TMP1)
116: mov.b r0,@(7,REG_TMP1)
117: mov.b r0,@(6,REG_TMP1)
118: mov.b r0,@(5,REG_TMP1)
119: mov.b r0,@(4,REG_TMP1)
120: mov.b r0,@(3,REG_TMP1)
121: mov.b r0,@(2,REG_TMP1)
122: mov.b r0,@(1,REG_TMP1)
123: mov.b r0,@REG_TMP1
124: mov.b r0,@(15,REG_DST)
125: mov.b r0,@(14,REG_DST)
126: mov.b r0,@(13,REG_DST)
127: mov.b r0,@(12,REG_DST)
128: mov.b r0,@(11,REG_DST)
129: mov.b r0,@(10,REG_DST)
130: mov.b r0,@(9,REG_DST)
131: mov.b r0,@(8,REG_DST)
132: mov.b r0,@(7,REG_DST)
133: mov.b r0,@(6,REG_DST)
134: mov.b r0,@(5,REG_DST)
135: mov.b r0,@(4,REG_DST)
136: mov.b r0,@(3,REG_DST)
137: mov.b r0,@(2,REG_DST)
138: mov.b r0,@(1,REG_DST)
139: #ifdef BZERO
140: rts
141: 1: mov.b r0,@REG_DST
142: #else
143: mov.b r0,@REG_DST
144: 1: rts
145: mov REG_DST0,r0
146: #endif
147:
148:
149: /* 2 bytes aligned small fill */
150: small_aligned:
151: #ifndef BZERO
152: extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
153: shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
154: or REG_TMP1,REG_C /* REG_C = ????xxxx */
155: #endif
156:
157: mov REG_LEN,r0
158: tst #1,r0 /* len is aligned? */
159: bt/s 1f
160: add #-1,r0
161: mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
162: mov r0,REG_LEN
163: 1:
164:
165: mova 1f,r0 /* 1f must be 4bytes aligned! */
166: sub REG_LEN,r0
167: jmp @r0
168: mov REG_C,r0
169:
170: .align 2
171: mov.w r0,@(30,REG_DST)
172: mov.w r0,@(28,REG_DST)
173: mov.w r0,@(26,REG_DST)
174: mov.w r0,@(24,REG_DST)
175: mov.w r0,@(22,REG_DST)
176: mov.w r0,@(20,REG_DST)
177: mov.w r0,@(18,REG_DST)
178: mov.w r0,@(16,REG_DST)
179: mov.w r0,@(14,REG_DST)
180: mov.w r0,@(12,REG_DST)
181: mov.w r0,@(10,REG_DST)
182: mov.w r0,@(8,REG_DST)
183: mov.w r0,@(6,REG_DST)
184: mov.w r0,@(4,REG_DST)
185: mov.w r0,@(2,REG_DST)
186: #ifdef BZERO
187: rts
188: 1: mov.w r0,@REG_DST
189: #else
190: mov.w r0,@REG_DST
191: 1: rts
192: mov REG_DST0,r0
193: #endif
194:
195:
196:
197: .align 2
198: large:
199: #ifdef BZERO
200: mov #0,REG_C
201: #else
202: extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
203: shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
204: or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
205: swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
206: xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
207: #endif
208:
209: mov #3,REG_TMP1
210: tst REG_TMP1,REG_DST
211: mov REG_DST,REG_PTR
212: bf/s unaligned_dst
213: add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
214: tst REG_TMP1,REG_LEN
215: bf/s unaligned_len
216:
217: aligned:
218: /* fill 32*n bytes */
219: mov #32,REG_TMP1
220: cmp/hi REG_LEN,REG_TMP1
221: bt 9f
222: .align 2
223: 1: sub REG_TMP1,REG_PTR
224: mov.l REG_C,@REG_PTR
225: sub REG_TMP1,REG_LEN
226: mov.l REG_C,@(4,REG_PTR)
227: cmp/hi REG_LEN,REG_TMP1
228: mov.l REG_C,@(8,REG_PTR)
229: mov.l REG_C,@(12,REG_PTR)
230: mov.l REG_C,@(16,REG_PTR)
231: mov.l REG_C,@(20,REG_PTR)
232: mov.l REG_C,@(24,REG_PTR)
233: bf/s 1b
234: mov.l REG_C,@(28,REG_PTR)
235: 9:
236:
237: /* fill left 4*n bytes */
238: cmp/eq REG_DST,REG_PTR
239: bt 9f
240: add #4,REG_DST
241: cmp/eq REG_DST,REG_PTR
242: 1: mov.l REG_C,@-REG_PTR
243: bt/s 9f
244: cmp/eq REG_DST,REG_PTR
245: mov.l REG_C,@-REG_PTR
246: bt/s 9f
247: cmp/eq REG_DST,REG_PTR
248: mov.l REG_C,@-REG_PTR
249: bt/s 9f
250: cmp/eq REG_DST,REG_PTR
251: mov.l REG_C,@-REG_PTR
252: bf/s 1b
253: cmp/eq REG_DST,REG_PTR
254: 9:
255: #ifdef BZERO
256: rts
257: nop
258: #else
259: rts
260: mov REG_DST0,r0
261: #endif
262:
263:
264: unaligned_dst:
265: mov #1,REG_TMP1
266: tst REG_TMP1,REG_DST /* if (dst & 1) { */
267: add #1,REG_TMP1
268: bt/s 2f
269: tst REG_TMP1,REG_DST
270: mov.b REG_C,@REG_DST /* *dst++ = c; */
271: add #1,REG_DST
272: tst REG_TMP1,REG_DST
273: 2: /* } */
274: /* if (dst & 2) { */
275: bt 4f
276: mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */
277: add #2,REG_DST
278: 4: /* } */
279:
280:
281: tst #3,REG_PTR /* if (ptr & 3) { */
282: bt/s 4f /* */
283: unaligned_len:
284: tst #1,REG_PTR /* if (ptr & 1) { */
285: bt/s 2f
286: tst #2,REG_PTR
287: mov.b REG_C,@-REG_PTR /* --ptr = c; */
288: 2: /* } */
289: /* if (ptr & 2) { */
290: bt 4f
291: mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */
292: 4: /* } */
293: /* } */
294:
295: mov REG_PTR,REG_LEN
296: bra aligned
297: sub REG_DST,REG_LEN
298:
CVSweb