Annotation of sys/arch/arm/arm/blockio.S, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: blockio.S,v 1.1 2004/02/01 05:09:48 drahn Exp $ */
2: /* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */
3:
4: /*
5: * Copyright (c) 2001 Ben Harris.
6: * Copyright (c) 1994 Mark Brinicombe.
7: * Copyright (c) 1994 Brini.
8: * All rights reserved.
9: *
10: * This code is derived from software written for Brini by Mark Brinicombe
11: *
12: * Redistribution and use in source and binary forms, with or without
13: * modification, are permitted provided that the following conditions
14: * are met:
15: * 1. Redistributions of source code must retain the above copyright
16: * notice, this list of conditions and the following disclaimer.
17: * 2. Redistributions in binary form must reproduce the above copyright
18: * notice, this list of conditions and the following disclaimer in the
19: * documentation and/or other materials provided with the distribution.
20: * 3. All advertising materials mentioning features or use of this software
21: * must display the following acknowledgement:
22: * This product includes software developed by Brini.
23: * 4. The name of the company nor the name of the author may be used to
24: * endorse or promote products derived from this software without specific
25: * prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
28: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30: * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
31: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37: * SUCH DAMAGE.
38: *
39: * RiscBSD kernel project
40: *
41: * blockio.S
42: *
43: * optimised block read/write from/to IO routines.
44: *
45: * Created : 08/10/94
46: * Modified : 22/01/99 -- R.Earnshaw
47: * Faster, and small tweaks for StrongARM
48: */
49:
50: #include <machine/asm.h>
51:
52: RCSID("$NetBSD: blockio.S,v 1.4 2001/06/02 11:15:56 bjh21 Exp $")
53:
54: /*
55: * Read bytes from an I/O address into a block of memory
56: *
57: * r0 = address to read from (IO)
58: * r1 = address to write to (memory)
59: * r2 = length
60: */
61:
62: /* This code will look very familiar if you've read _memcpy(). */
63: ENTRY(read_multi_1)
64: mov ip, sp
65: stmfd sp!, {fp, ip, lr, pc}
66: sub fp, ip, #4
67: subs r2, r2, #4 /* r2 = length - 4 */
68: blt .Lrm1_l4 /* less than 4 bytes */
69: ands r12, r1, #3
70: beq .Lrm1_main /* aligned destination */
71: rsb r12, r12, #4
72: cmp r12, #2
73: ldrb r3, [r0]
74: strb r3, [r1], #1
75: ldrgeb r3, [r0]
76: strgeb r3, [r1], #1
77: ldrgtb r3, [r0]
78: strgtb r3, [r1], #1
79: subs r2, r2, r12
80: blt .Lrm1_l4
81: .Lrm1_main:
82: .Lrm1loop:
83: ldrb r3, [r0]
84: ldrb r12, [r0]
85: orr r3, r3, r12, lsl #8
86: ldrb r12, [r0]
87: orr r3, r3, r12, lsl #16
88: ldrb r12, [r0]
89: orr r3, r3, r12, lsl #24
90: str r3, [r1], #4
91: subs r2, r2, #4
92: bge .Lrm1loop
93: .Lrm1_l4:
94: adds r2, r2, #4 /* r2 = length again */
95: ldmeqdb fp, {fp, sp, pc}
96: moveq pc, r14
97: cmp r2, #2
98: ldrb r3, [r0]
99: strb r3, [r1], #1
100: ldrgeb r3, [r0]
101: strgeb r3, [r1], #1
102: ldrgtb r3, [r0]
103: strgtb r3, [r1], #1
104: ldmdb fp, {fp, sp, pc}
105:
106: /*
107: * Write bytes to an I/O address from a block of memory
108: *
109: * r0 = address to write to (IO)
110: * r1 = address to read from (memory)
111: * r2 = length
112: */
113:
114: /* This code will look very familiar if you've read _memcpy(). */
115: ENTRY(write_multi_1)
116: mov ip, sp
117: stmfd sp!, {fp, ip, lr, pc}
118: sub fp, ip, #4
119: subs r2, r2, #4 /* r2 = length - 4 */
120: blt .Lwm1_l4 /* less than 4 bytes */
121: ands r12, r1, #3
122: beq .Lwm1_main /* aligned source */
123: rsb r12, r12, #4
124: cmp r12, #2
125: ldrb r3, [r1], #1
126: strb r3, [r0]
127: ldrgeb r3, [r1], #1
128: strgeb r3, [r0]
129: ldrgtb r3, [r1], #1
130: strgtb r3, [r0]
131: subs r2, r2, r12
132: blt .Lwm1_l4
133: .Lwm1_main:
134: .Lwm1loop:
135: ldr r3, [r1], #4
136: strb r3, [r0]
137: mov r3, r3, lsr #8
138: strb r3, [r0]
139: mov r3, r3, lsr #8
140: strb r3, [r0]
141: mov r3, r3, lsr #8
142: strb r3, [r0]
143: subs r2, r2, #4
144: bge .Lwm1loop
145: .Lwm1_l4:
146: adds r2, r2, #4 /* r2 = length again */
147: ldmeqdb fp, {fp, sp, pc}
148: cmp r2, #2
149: ldrb r3, [r1], #1
150: strb r3, [r0]
151: ldrgeb r3, [r1], #1
152: strgeb r3, [r0]
153: ldrgtb r3, [r1], #1
154: strgtb r3, [r0]
155: ldmdb fp, {fp, sp, pc}
156:
157: /*
158: * Reads short ints (16 bits) from an I/O address into a block of memory
159: *
160: * r0 = address to read from (IO)
161: * r1 = address to write to (memory)
162: * r2 = length
163: */
164:
165: ENTRY(insw)
166: /* Make sure that we have a positive length */
167: cmp r2, #0x00000000
168: movle pc, lr
169:
170: /* If the destination address and the size is word aligned, do it fast */
171:
172: tst r2, #0x00000001
173: tsteq r1, #0x00000003
174: beq .Lfastinsw
175:
176: /* Non aligned insw */
177:
178: .Linswloop:
179: ldr r3, [r0]
180: subs r2, r2, #0x00000001 /* Loop test in load delay slot */
181: strb r3, [r1], #0x0001
182: mov r3, r3, lsr #8
183: strb r3, [r1], #0x0001
184: bgt .Linswloop
185:
186: mov pc, lr
187:
188: /* Word aligned insw */
189:
190: .Lfastinsw:
191:
192: .Lfastinswloop:
193: ldr r3, [r0, #0x0002] /* take advantage of nonaligned
194: * word accesses */
195: ldr ip, [r0]
196: mov r3, r3, lsr #16 /* Put the two shorts together */
197: orr r3, r3, ip, lsl #16
198: str r3, [r1], #0x0004 /* Store */
199: subs r2, r2, #0x00000002 /* Next */
200: bgt .Lfastinswloop
201:
202: mov pc, lr
203:
204:
205: /*
206: * Writes short ints (16 bits) from a block of memory to an I/O address
207: *
208: * r0 = address to write to (IO)
209: * r1 = address to read from (memory)
210: * r2 = length
211: */
212:
213: ENTRY(outsw)
214: /* Make sure that we have a positive length */
215: cmp r2, #0x00000000
216: movle pc, lr
217:
218: /* If the destination address and the size is word aligned, do it fast */
219:
220: tst r2, #0x00000001
221: tsteq r1, #0x00000003
222: beq .Lfastoutsw
223:
224: /* Non aligned outsw */
225:
226: .Loutswloop:
227: ldrb r3, [r1], #0x0001
228: ldrb ip, [r1], #0x0001
229: subs r2, r2, #0x00000001 /* Loop test in load delay slot */
230: orr r3, r3, ip, lsl #8
231: orr r3, r3, r3, lsl #16
232: str r3, [r0]
233: bgt .Loutswloop
234:
235: mov pc, lr
236:
237: /* Word aligned outsw */
238:
239: .Lfastoutsw:
240:
241: .Lfastoutswloop:
242: ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
243: subs r2, r2, #0x00000002 /* Loop test in load delay slot */
244:
245: eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
246: eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
247: eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
248:
249: str r3, [r0]
250: str ip, [r0]
251:
252: /* mov ip, r3, lsl #16
253: * orr ip, ip, ip, lsr #16
254: * str ip, [r0]
255: *
256: * mov ip, r3, lsr #16
257: * orr ip, ip, ip, lsl #16
258: * str ip, [r0]
259: */
260:
261: bgt .Lfastoutswloop
262:
263: mov pc, lr
264:
265: /*
266: * reads short ints (16 bits) from an I/O address into a block of memory
267: * with a length garenteed to be a multiple of 16 bytes
268: * with a word aligned destination address
269: *
270: * r0 = address to read from (IO)
271: * r1 = address to write to (memory)
272: * r2 = length
273: */
274:
275: ENTRY(insw16)
276: /* Make sure that we have a positive length */
277: cmp r2, #0x00000000
278: movle pc, lr
279:
280: /* If the destination address is word aligned and the size suitably
281: aligned, do it fast */
282:
283: tst r2, #0x00000007
284: tsteq r1, #0x00000003
285:
286: bne _C_LABEL(insw)
287:
288: /* Word aligned insw */
289:
290: stmfd sp!, {r4,r5,lr}
291:
292: .Linsw16loop:
293: ldr r3, [r0, #0x0002] /* take advantage of nonaligned
294: * word accesses */
295: ldr lr, [r0]
296: mov r3, r3, lsr #16 /* Put the two shorts together */
297: orr r3, r3, lr, lsl #16
298:
299: ldr r4, [r0, #0x0002] /* take advantage of nonaligned
300: * word accesses */
301: ldr lr, [r0]
302: mov r4, r4, lsr #16 /* Put the two shorts together */
303: orr r4, r4, lr, lsl #16
304:
305: ldr r5, [r0, #0x0002] /* take advantage of nonaligned
306: * word accesses */
307: ldr lr, [r0]
308: mov r5, r5, lsr #16 /* Put the two shorts together */
309: orr r5, r5, lr, lsl #16
310:
311: ldr ip, [r0, #0x0002] /* take advantage of nonaligned
312: * word accesses */
313: ldr lr, [r0]
314: mov ip, ip, lsr #16 /* Put the two shorts together */
315: orr ip, ip, lr, lsl #16
316:
317: stmia r1!, {r3-r5,ip}
318: subs r2, r2, #0x00000008 /* Next */
319: bgt .Linsw16loop
320:
321: ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
322:
323:
324: /*
325: * Writes short ints (16 bits) from a block of memory to an I/O address
326: *
327: * r0 = address to write to (IO)
328: * r1 = address to read from (memory)
329: * r2 = length
330: */
331:
332: ENTRY(outsw16)
333: /* Make sure that we have a positive length */
334: cmp r2, #0x00000000
335: movle pc, lr
336:
337: /* If the destination address is word aligned and the size suitably
338: aligned, do it fast */
339:
340: tst r2, #0x00000007
341: tsteq r1, #0x00000003
342:
343: bne _C_LABEL(outsw)
344:
345: /* Word aligned outsw */
346:
347: stmfd sp!, {r4,r5,lr}
348:
349: .Loutsw16loop:
350: ldmia r1!, {r4,r5,ip,lr}
351:
352: eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
353: eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
354: eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
355: str r3, [r0]
356: str r4, [r0]
357:
358: /* mov r3, r4, lsl #16
359: * orr r3, r3, r3, lsr #16
360: * str r3, [r0]
361: *
362: * mov r3, r4, lsr #16
363: * orr r3, r3, r3, lsl #16
364: * str r3, [r0]
365: */
366:
367: eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
368: eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
369: eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
370: str r3, [r0]
371: str r5, [r0]
372:
373: eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
374: eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
375: eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
376: str r3, [r0]
377: str ip, [r0]
378:
379: eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
380: eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
381: eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
382: str r3, [r0]
383: str lr, [r0]
384:
385: subs r2, r2, #0x00000008
386: bgt .Loutsw16loop
387:
388: ldmfd sp!, {r4,r5,pc} /* and go home */
389:
390: /*
391: * reads short ints (16 bits) from an I/O address into a block of memory
392: * The I/O address is assumed to be mapped multiple times in a block of
393: * 8 words.
394: * The destination address should be word aligned.
395: *
396: * r0 = address to read from (IO)
397: * r1 = address to write to (memory)
398: * r2 = length
399: */
400:
401: ENTRY(inswm8)
402: /* Make sure that we have a positive length */
403: cmp r2, #0x00000000
404: movle pc, lr
405:
406: /* If the destination address is word aligned and the size suitably
407: aligned, do it fast */
408:
409: tst r1, #0x00000003
410:
411: bne _C_LABEL(insw)
412:
413: /* Word aligned insw */
414:
415: stmfd sp!, {r4-r9,lr}
416:
417: mov lr, #0xff000000
418: orr lr, lr, #0x00ff0000
419:
420: .Linswm8_loop8:
421: cmp r2, #8
422: bcc .Linswm8_l8
423:
424: ldmia r0, {r3-r9,ip}
425:
426: bic r3, r3, lr
427: orr r3, r3, r4, lsl #16
428: bic r5, r5, lr
429: orr r4, r5, r6, lsl #16
430: bic r7, r7, lr
431: orr r5, r7, r8, lsl #16
432: bic r9, r9, lr
433: orr r6, r9, ip, lsl #16
434:
435: stmia r1!, {r3-r6}
436:
437: subs r2, r2, #0x00000008 /* Next */
438: bne .Linswm8_loop8
439: beq .Linswm8_l1
440:
441: .Linswm8_l8:
442: cmp r2, #4
443: bcc .Linswm8_l4
444:
445: ldmia r0, {r3-r6}
446:
447: bic r3, r3, lr
448: orr r3, r3, r4, lsl #16
449: bic r5, r5, lr
450: orr r4, r5, r6, lsl #16
451:
452: stmia r1!, {r3-r4}
453:
454: subs r2, r2, #0x00000004
455: beq .Linswm8_l1
456:
457: .Linswm8_l4:
458: cmp r2, #2
459: bcc .Linswm8_l2
460:
461: ldmia r0, {r3-r4}
462:
463: bic r3, r3, lr
464: orr r3, r3, r4, lsl #16
465: str r3, [r1], #0x0004
466:
467: subs r2, r2, #0x00000002
468: beq .Linswm8_l1
469:
470: .Linswm8_l2:
471: cmp r2, #1
472: bcc .Linswm8_l1
473:
474: ldr r3, [r0]
475: subs r2, r2, #0x00000001 /* Test in load delay slot */
476: /* XXX, why don't we use result? */
477:
478: strb r3, [r1], #0x0001
479: mov r3, r3, lsr #8
480: strb r3, [r1], #0x0001
481:
482:
483: .Linswm8_l1:
484: ldmfd sp!, {r4-r9,pc} /* And go home */
485:
486: /*
487: * write short ints (16 bits) to an I/O address from a block of memory
488: * The I/O address is assumed to be mapped multiple times in a block of
489: * 8 words.
490: * The source address should be word aligned.
491: *
492: * r0 = address to read to (IO)
493: * r1 = address to write from (memory)
494: * r2 = length
495: */
496:
497: ENTRY(outswm8)
498: /* Make sure that we have a positive length */
499: cmp r2, #0x00000000
500: movle pc, lr
501:
502: /* If the destination address is word aligned and the size suitably
503: aligned, do it fast */
504:
505: tst r1, #0x00000003
506:
507: bne _C_LABEL(outsw)
508:
509: /* Word aligned outsw */
510:
511: stmfd sp!, {r4-r8,lr}
512:
513: .Loutswm8_loop8:
514: cmp r2, #8
515: bcc .Loutswm8_l8
516:
517: ldmia r1!, {r3,r5,r7,ip}
518:
519: eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
520: eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
521: eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
522:
523: eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
524: eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
525: eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
526:
527: eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
528: eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
529: eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
530:
531: eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
532: eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
533: eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
534:
535: stmia r0, {r3-r8,ip,lr}
536:
537: subs r2, r2, #0x00000008 /* Next */
538: bne .Loutswm8_loop8
539: beq .Loutswm8_l1
540:
541: .Loutswm8_l8:
542: cmp r2, #4
543: bcc .Loutswm8_l4
544:
545: ldmia r1!, {r3-r4}
546:
547: eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
548: eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
549: eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
550:
551: eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
552: eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
553: eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
554:
555: stmia r0, {r5-r8}
556:
557: subs r2, r2, #0x00000004
558: beq .Loutswm8_l1
559:
560: .Loutswm8_l4:
561: cmp r2, #2
562: bcc .Loutswm8_l2
563:
564: ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
565: subs r2, r2, #0x00000002 /* Done test in Load delay slot */
566:
567: eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
568: eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
569: eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
570:
571: stmia r0, {r4, r5}
572:
573: beq .Loutswm8_l1
574:
575: .Loutswm8_l2:
576: cmp r2, #1
577: bcc .Loutswm8_l1
578:
579: ldrb r3, [r1], #0x0001
580: ldrb r4, [r1], #0x0001
581: subs r2, r2, #0x00000001 /* Done test in load delay slot */
582: /* XXX This test isn't used? */
583: orr r3, r3, r4, lsl #8
584: orr r3, r3, r3, lsl #16
585: str r3, [r0]
586:
587: .Loutswm8_l1:
588: ldmfd sp!, {r4-r8,pc} /* And go home */
CVSweb