Annotation of sys/arch/arm/arm/bcopy_page.S, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: bcopy_page.S,v 1.1 2004/02/01 05:09:48 drahn Exp $ */
2: /* $NetBSD: bcopy_page.S,v 1.7 2003/10/13 21:03:13 scw Exp $ */
3:
4:
5: /*
6: * Copyright (c) 1995 Scott Stevens
7: * All rights reserved.
8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed by Scott Stevens.
20: * 4. The name of the author may not be used to endorse or promote products
21: * derived from this software without specific prior written permission.
22: *
23: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33: *
34: * RiscBSD kernel project
35: *
36: * bcopy_page.S
37: *
38: * page optimised bcopy and bzero routines
39: *
40: * Created : 08/04/95
41: */
42:
43: #include <machine/asm.h>
44:
45: #include "assym.h"
46:
47: #ifndef __XSCALE__
48:
49: /* #define BIG_LOOPS */
50:
51: /*
52: * bcopy_page(src, dest)
53: *
54: * Optimised copy page routine.
55: *
56: * On entry:
57: * r0 - src address
58: * r1 - dest address
59: *
60: * Requires:
61: * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
62: * otherwise.
63: */
64:
65: #define CHUNK_SIZE 32
66:
67: #define PREFETCH_FIRST_CHUNK /* nothing */
68: #define PREFETCH_NEXT_CHUNK /* nothing */
69:
70: #ifndef COPY_CHUNK
71: #define COPY_CHUNK \
72: PREFETCH_NEXT_CHUNK ; \
73: ldmia r0!, {r3-r8,ip,lr} ; \
74: stmia r1!, {r3-r8,ip,lr}
75: #endif /* ! COPY_CHUNK */
76:
77: #ifndef SAVE_REGS
78: #define SAVE_REGS stmfd sp!, {r4-r8, lr}
79: #define RESTORE_REGS ldmfd sp!, {r4-r8, pc}
80: #endif
81:
82: ENTRY(bcopy_page)
83: PREFETCH_FIRST_CHUNK
84: SAVE_REGS
85: #ifdef BIG_LOOPS
86: mov r2, #(PAGE_SIZE >> 9)
87: #else
88: mov r2, #(PAGE_SIZE >> 7)
89: #endif
90:
91: 1:
92: COPY_CHUNK
93: COPY_CHUNK
94: COPY_CHUNK
95: COPY_CHUNK
96:
97: #ifdef BIG_LOOPS
98: /* There is little point making the loop any larger; unless we are
99: running with the cache off, the load/store overheads will
100: completely dominate this loop. */
101: COPY_CHUNK
102: COPY_CHUNK
103: COPY_CHUNK
104: COPY_CHUNK
105:
106: COPY_CHUNK
107: COPY_CHUNK
108: COPY_CHUNK
109: COPY_CHUNK
110:
111: COPY_CHUNK
112: COPY_CHUNK
113: COPY_CHUNK
114: COPY_CHUNK
115: #endif
116: subs r2, r2, #1
117: bne 1b
118:
119: RESTORE_REGS /* ...and return. */
120:
121: /*
122: * bzero_page(dest)
123: *
124: * Optimised zero page routine.
125: *
126: * On entry:
127: * r0 - dest address
128: *
129: * Requires:
130: * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
131: * otherwise
132: */
133:
134: ENTRY(bzero_page)
135: stmfd sp!, {r4-r8, lr}
136: #ifdef BIG_LOOPS
137: mov r2, #(PAGE_SIZE >> 9)
138: #else
139: mov r2, #(PAGE_SIZE >> 7)
140: #endif
141: mov r3, #0
142: mov r4, #0
143: mov r5, #0
144: mov r6, #0
145: mov r7, #0
146: mov r8, #0
147: mov ip, #0
148: mov lr, #0
149:
150: 1:
151: stmia r0!, {r3-r8,ip,lr}
152: stmia r0!, {r3-r8,ip,lr}
153: stmia r0!, {r3-r8,ip,lr}
154: stmia r0!, {r3-r8,ip,lr}
155:
156: #ifdef BIG_LOOPS
157: /* There is little point making the loop any larger; unless we are
158: running with the cache off, the load/store overheads will
159: completely dominate this loop. */
160: stmia r0!, {r3-r8,ip,lr}
161: stmia r0!, {r3-r8,ip,lr}
162: stmia r0!, {r3-r8,ip,lr}
163: stmia r0!, {r3-r8,ip,lr}
164:
165: stmia r0!, {r3-r8,ip,lr}
166: stmia r0!, {r3-r8,ip,lr}
167: stmia r0!, {r3-r8,ip,lr}
168: stmia r0!, {r3-r8,ip,lr}
169:
170: stmia r0!, {r3-r8,ip,lr}
171: stmia r0!, {r3-r8,ip,lr}
172: stmia r0!, {r3-r8,ip,lr}
173: stmia r0!, {r3-r8,ip,lr}
174:
175: #endif
176:
177: subs r2, r2, #1
178: bne 1b
179:
180: ldmfd sp!, {r4-r8, pc}
181:
182: #else /* __XSCALE__ */
183:
184: /*
185: * XSCALE version of bcopy_page
186: */
187: ENTRY(bcopy_page)
188: pld [r0]
189: stmfd sp!, {r4, r5}
190: mov ip, #32
191: ldr r2, [r0], #0x04 /* 0x00 */
192: ldr r3, [r0], #0x04 /* 0x04 */
193: 1: pld [r0, #0x18] /* Prefetch 0x20 */
194: ldr r4, [r0], #0x04 /* 0x08 */
195: ldr r5, [r0], #0x04 /* 0x0c */
196: strd r2, [r1], #0x08
197: ldr r2, [r0], #0x04 /* 0x10 */
198: ldr r3, [r0], #0x04 /* 0x14 */
199: strd r4, [r1], #0x08
200: ldr r4, [r0], #0x04 /* 0x18 */
201: ldr r5, [r0], #0x04 /* 0x1c */
202: strd r2, [r1], #0x08
203: ldr r2, [r0], #0x04 /* 0x20 */
204: ldr r3, [r0], #0x04 /* 0x24 */
205: pld [r0, #0x18] /* Prefetch 0x40 */
206: strd r4, [r1], #0x08
207: ldr r4, [r0], #0x04 /* 0x28 */
208: ldr r5, [r0], #0x04 /* 0x2c */
209: strd r2, [r1], #0x08
210: ldr r2, [r0], #0x04 /* 0x30 */
211: ldr r3, [r0], #0x04 /* 0x34 */
212: strd r4, [r1], #0x08
213: ldr r4, [r0], #0x04 /* 0x38 */
214: ldr r5, [r0], #0x04 /* 0x3c */
215: strd r2, [r1], #0x08
216: ldr r2, [r0], #0x04 /* 0x40 */
217: ldr r3, [r0], #0x04 /* 0x44 */
218: pld [r0, #0x18] /* Prefetch 0x60 */
219: strd r4, [r1], #0x08
220: ldr r4, [r0], #0x04 /* 0x48 */
221: ldr r5, [r0], #0x04 /* 0x4c */
222: strd r2, [r1], #0x08
223: ldr r2, [r0], #0x04 /* 0x50 */
224: ldr r3, [r0], #0x04 /* 0x54 */
225: strd r4, [r1], #0x08
226: ldr r4, [r0], #0x04 /* 0x58 */
227: ldr r5, [r0], #0x04 /* 0x5c */
228: strd r2, [r1], #0x08
229: ldr r2, [r0], #0x04 /* 0x60 */
230: ldr r3, [r0], #0x04 /* 0x64 */
231: pld [r0, #0x18] /* Prefetch 0x80 */
232: strd r4, [r1], #0x08
233: ldr r4, [r0], #0x04 /* 0x68 */
234: ldr r5, [r0], #0x04 /* 0x6c */
235: strd r2, [r1], #0x08
236: ldr r2, [r0], #0x04 /* 0x70 */
237: ldr r3, [r0], #0x04 /* 0x74 */
238: strd r4, [r1], #0x08
239: ldr r4, [r0], #0x04 /* 0x78 */
240: ldr r5, [r0], #0x04 /* 0x7c */
241: strd r2, [r1], #0x08
242: subs ip, ip, #0x01
243: ldrgt r2, [r0], #0x04 /* 0x80 */
244: ldrgt r3, [r0], #0x04 /* 0x84 */
245: strd r4, [r1], #0x08
246: bgt 1b
247: ldmfd sp!, {r4, r5}
248: mov pc, lr
249:
250: /*
251: * XSCALE version of bzero_page
252: */
253: ENTRY(bzero_page)
254: mov r1, #PAGE_SIZE
255: mov r2, #0
256: mov r3, #0
257: 1: strd r2, [r0], #8 /* 32 */
258: strd r2, [r0], #8
259: strd r2, [r0], #8
260: strd r2, [r0], #8
261: strd r2, [r0], #8 /* 64 */
262: strd r2, [r0], #8
263: strd r2, [r0], #8
264: strd r2, [r0], #8
265: strd r2, [r0], #8 /* 96 */
266: strd r2, [r0], #8
267: strd r2, [r0], #8
268: strd r2, [r0], #8
269: strd r2, [r0], #8 /* 128 */
270: strd r2, [r0], #8
271: strd r2, [r0], #8
272: strd r2, [r0], #8
273: subs r1, r1, #128
274: bne 1b
275: mov pc, lr
276: #endif /* __XSCALE__ */
CVSweb