Annotation of sys/arch/arm/arm/blockio.S, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: blockio.S,v 1.1 2004/02/01 05:09:48 drahn Exp $ */
! 2: /* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 2001 Ben Harris.
! 6: * Copyright (c) 1994 Mark Brinicombe.
! 7: * Copyright (c) 1994 Brini.
! 8: * All rights reserved.
! 9: *
! 10: * This code is derived from software written for Brini by Mark Brinicombe
! 11: *
! 12: * Redistribution and use in source and binary forms, with or without
! 13: * modification, are permitted provided that the following conditions
! 14: * are met:
! 15: * 1. Redistributions of source code must retain the above copyright
! 16: * notice, this list of conditions and the following disclaimer.
! 17: * 2. Redistributions in binary form must reproduce the above copyright
! 18: * notice, this list of conditions and the following disclaimer in the
! 19: * documentation and/or other materials provided with the distribution.
! 20: * 3. All advertising materials mentioning features or use of this software
! 21: * must display the following acknowledgement:
! 22: * This product includes software developed by Brini.
! 23: * 4. The name of the company nor the name of the author may be used to
! 24: * endorse or promote products derived from this software without specific
! 25: * prior written permission.
! 26: *
! 27: * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
! 28: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
! 29: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
! 30: * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
! 31: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
! 32: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
! 33: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 34: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 35: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 36: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 37: * SUCH DAMAGE.
! 38: *
! 39: * RiscBSD kernel project
! 40: *
! 41: * blockio.S
! 42: *
! 43: * optimised block read/write from/to IO routines.
! 44: *
! 45: * Created : 08/10/94
! 46: * Modified : 22/01/99 -- R.Earnshaw
! 47: * Faster, and small tweaks for StrongARM
! 48: */
! 49:
! 50: #include <machine/asm.h>
! 51:
! 52: RCSID("$NetBSD: blockio.S,v 1.4 2001/06/02 11:15:56 bjh21 Exp $")
! 53:
! 54: /*
! 55: * Read bytes from an I/O address into a block of memory
! 56: *
! 57: * r0 = address to read from (IO)
! 58: * r1 = address to write to (memory)
! 59: * r2 = length
! 60: */
! 61:
! 62: /* This code will look very familiar if you've read _memcpy(). */
! 63: ENTRY(read_multi_1)
! 64: mov ip, sp
! 65: stmfd sp!, {fp, ip, lr, pc}
! 66: sub fp, ip, #4
! 67: subs r2, r2, #4 /* r2 = length - 4 */
! 68: blt .Lrm1_l4 /* less than 4 bytes */
! 69: ands r12, r1, #3
! 70: beq .Lrm1_main /* aligned destination */
! 71: rsb r12, r12, #4
! 72: cmp r12, #2
! 73: ldrb r3, [r0]
! 74: strb r3, [r1], #1
! 75: ldrgeb r3, [r0]
! 76: strgeb r3, [r1], #1
! 77: ldrgtb r3, [r0]
! 78: strgtb r3, [r1], #1
! 79: subs r2, r2, r12
! 80: blt .Lrm1_l4
! 81: .Lrm1_main:
! 82: .Lrm1loop:
! 83: ldrb r3, [r0]
! 84: ldrb r12, [r0]
! 85: orr r3, r3, r12, lsl #8
! 86: ldrb r12, [r0]
! 87: orr r3, r3, r12, lsl #16
! 88: ldrb r12, [r0]
! 89: orr r3, r3, r12, lsl #24
! 90: str r3, [r1], #4
! 91: subs r2, r2, #4
! 92: bge .Lrm1loop
! 93: .Lrm1_l4:
! 94: adds r2, r2, #4 /* r2 = length again */
! 95: ldmeqdb fp, {fp, sp, pc}
! 96: moveq pc, r14
! 97: cmp r2, #2
! 98: ldrb r3, [r0]
! 99: strb r3, [r1], #1
! 100: ldrgeb r3, [r0]
! 101: strgeb r3, [r1], #1
! 102: ldrgtb r3, [r0]
! 103: strgtb r3, [r1], #1
! 104: ldmdb fp, {fp, sp, pc}
! 105:
! 106: /*
! 107: * Write bytes to an I/O address from a block of memory
! 108: *
! 109: * r0 = address to write to (IO)
! 110: * r1 = address to read from (memory)
! 111: * r2 = length
! 112: */
! 113:
! 114: /* This code will look very familiar if you've read _memcpy(). */
! 115: ENTRY(write_multi_1)
! 116: mov ip, sp
! 117: stmfd sp!, {fp, ip, lr, pc}
! 118: sub fp, ip, #4
! 119: subs r2, r2, #4 /* r2 = length - 4 */
! 120: blt .Lwm1_l4 /* less than 4 bytes */
! 121: ands r12, r1, #3
! 122: beq .Lwm1_main /* aligned source */
! 123: rsb r12, r12, #4
! 124: cmp r12, #2
! 125: ldrb r3, [r1], #1
! 126: strb r3, [r0]
! 127: ldrgeb r3, [r1], #1
! 128: strgeb r3, [r0]
! 129: ldrgtb r3, [r1], #1
! 130: strgtb r3, [r0]
! 131: subs r2, r2, r12
! 132: blt .Lwm1_l4
! 133: .Lwm1_main:
! 134: .Lwm1loop:
! 135: ldr r3, [r1], #4
! 136: strb r3, [r0]
! 137: mov r3, r3, lsr #8
! 138: strb r3, [r0]
! 139: mov r3, r3, lsr #8
! 140: strb r3, [r0]
! 141: mov r3, r3, lsr #8
! 142: strb r3, [r0]
! 143: subs r2, r2, #4
! 144: bge .Lwm1loop
! 145: .Lwm1_l4:
! 146: adds r2, r2, #4 /* r2 = length again */
! 147: ldmeqdb fp, {fp, sp, pc}
! 148: cmp r2, #2
! 149: ldrb r3, [r1], #1
! 150: strb r3, [r0]
! 151: ldrgeb r3, [r1], #1
! 152: strgeb r3, [r0]
! 153: ldrgtb r3, [r1], #1
! 154: strgtb r3, [r0]
! 155: ldmdb fp, {fp, sp, pc}
! 156:
! 157: /*
! 158: * Reads short ints (16 bits) from an I/O address into a block of memory
! 159: *
! 160: * r0 = address to read from (IO)
! 161: * r1 = address to write to (memory)
! 162: * r2 = length
! 163: */
! 164:
! 165: ENTRY(insw)
! 166: /* Make sure that we have a positive length */
! 167: cmp r2, #0x00000000
! 168: movle pc, lr
! 169:
! 170: /* If the destination address and the size is word aligned, do it fast */
! 171:
! 172: tst r2, #0x00000001
! 173: tsteq r1, #0x00000003
! 174: beq .Lfastinsw
! 175:
! 176: /* Non aligned insw */
! 177:
! 178: .Linswloop:
! 179: ldr r3, [r0]
! 180: subs r2, r2, #0x00000001 /* Loop test in load delay slot */
! 181: strb r3, [r1], #0x0001
! 182: mov r3, r3, lsr #8
! 183: strb r3, [r1], #0x0001
! 184: bgt .Linswloop
! 185:
! 186: mov pc, lr
! 187:
! 188: /* Word aligned insw */
! 189:
! 190: .Lfastinsw:
! 191:
! 192: .Lfastinswloop:
! 193: ldr r3, [r0, #0x0002] /* take advantage of nonaligned
! 194: * word accesses */
! 195: ldr ip, [r0]
! 196: mov r3, r3, lsr #16 /* Put the two shorts together */
! 197: orr r3, r3, ip, lsl #16
! 198: str r3, [r1], #0x0004 /* Store */
! 199: subs r2, r2, #0x00000002 /* Next */
! 200: bgt .Lfastinswloop
! 201:
! 202: mov pc, lr
! 203:
! 204:
! 205: /*
! 206: * Writes short ints (16 bits) from a block of memory to an I/O address
! 207: *
! 208: * r0 = address to write to (IO)
! 209: * r1 = address to read from (memory)
! 210: * r2 = length
! 211: */
! 212:
! 213: ENTRY(outsw)
! 214: /* Make sure that we have a positive length */
! 215: cmp r2, #0x00000000
! 216: movle pc, lr
! 217:
! 218: /* If the destination address and the size is word aligned, do it fast */
! 219:
! 220: tst r2, #0x00000001
! 221: tsteq r1, #0x00000003
! 222: beq .Lfastoutsw
! 223:
! 224: /* Non aligned outsw */
! 225:
! 226: .Loutswloop:
! 227: ldrb r3, [r1], #0x0001
! 228: ldrb ip, [r1], #0x0001
! 229: subs r2, r2, #0x00000001 /* Loop test in load delay slot */
! 230: orr r3, r3, ip, lsl #8
! 231: orr r3, r3, r3, lsl #16
! 232: str r3, [r0]
! 233: bgt .Loutswloop
! 234:
! 235: mov pc, lr
! 236:
! 237: /* Word aligned outsw */
! 238:
! 239: .Lfastoutsw:
! 240:
! 241: .Lfastoutswloop:
! 242: ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
! 243: subs r2, r2, #0x00000002 /* Loop test in load delay slot */
! 244:
! 245: eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
! 246: eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
! 247: eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
! 248:
! 249: str r3, [r0]
! 250: str ip, [r0]
! 251:
! 252: /* mov ip, r3, lsl #16
! 253: * orr ip, ip, ip, lsr #16
! 254: * str ip, [r0]
! 255: *
! 256: * mov ip, r3, lsr #16
! 257: * orr ip, ip, ip, lsl #16
! 258: * str ip, [r0]
! 259: */
! 260:
! 261: bgt .Lfastoutswloop
! 262:
! 263: mov pc, lr
! 264:
! 265: /*
! 266: * reads short ints (16 bits) from an I/O address into a block of memory
! 267: * with a length garenteed to be a multiple of 16 bytes
! 268: * with a word aligned destination address
! 269: *
! 270: * r0 = address to read from (IO)
! 271: * r1 = address to write to (memory)
! 272: * r2 = length
! 273: */
! 274:
! 275: ENTRY(insw16)
! 276: /* Make sure that we have a positive length */
! 277: cmp r2, #0x00000000
! 278: movle pc, lr
! 279:
! 280: /* If the destination address is word aligned and the size suitably
! 281: aligned, do it fast */
! 282:
! 283: tst r2, #0x00000007
! 284: tsteq r1, #0x00000003
! 285:
! 286: bne _C_LABEL(insw)
! 287:
! 288: /* Word aligned insw */
! 289:
! 290: stmfd sp!, {r4,r5,lr}
! 291:
! 292: .Linsw16loop:
! 293: ldr r3, [r0, #0x0002] /* take advantage of nonaligned
! 294: * word accesses */
! 295: ldr lr, [r0]
! 296: mov r3, r3, lsr #16 /* Put the two shorts together */
! 297: orr r3, r3, lr, lsl #16
! 298:
! 299: ldr r4, [r0, #0x0002] /* take advantage of nonaligned
! 300: * word accesses */
! 301: ldr lr, [r0]
! 302: mov r4, r4, lsr #16 /* Put the two shorts together */
! 303: orr r4, r4, lr, lsl #16
! 304:
! 305: ldr r5, [r0, #0x0002] /* take advantage of nonaligned
! 306: * word accesses */
! 307: ldr lr, [r0]
! 308: mov r5, r5, lsr #16 /* Put the two shorts together */
! 309: orr r5, r5, lr, lsl #16
! 310:
! 311: ldr ip, [r0, #0x0002] /* take advantage of nonaligned
! 312: * word accesses */
! 313: ldr lr, [r0]
! 314: mov ip, ip, lsr #16 /* Put the two shorts together */
! 315: orr ip, ip, lr, lsl #16
! 316:
! 317: stmia r1!, {r3-r5,ip}
! 318: subs r2, r2, #0x00000008 /* Next */
! 319: bgt .Linsw16loop
! 320:
! 321: ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
! 322:
! 323:
! 324: /*
! 325: * Writes short ints (16 bits) from a block of memory to an I/O address
! 326: *
! 327: * r0 = address to write to (IO)
! 328: * r1 = address to read from (memory)
! 329: * r2 = length
! 330: */
! 331:
! 332: ENTRY(outsw16)
! 333: /* Make sure that we have a positive length */
! 334: cmp r2, #0x00000000
! 335: movle pc, lr
! 336:
! 337: /* If the destination address is word aligned and the size suitably
! 338: aligned, do it fast */
! 339:
! 340: tst r2, #0x00000007
! 341: tsteq r1, #0x00000003
! 342:
! 343: bne _C_LABEL(outsw)
! 344:
! 345: /* Word aligned outsw */
! 346:
! 347: stmfd sp!, {r4,r5,lr}
! 348:
! 349: .Loutsw16loop:
! 350: ldmia r1!, {r4,r5,ip,lr}
! 351:
! 352: eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
! 353: eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
! 354: eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
! 355: str r3, [r0]
! 356: str r4, [r0]
! 357:
! 358: /* mov r3, r4, lsl #16
! 359: * orr r3, r3, r3, lsr #16
! 360: * str r3, [r0]
! 361: *
! 362: * mov r3, r4, lsr #16
! 363: * orr r3, r3, r3, lsl #16
! 364: * str r3, [r0]
! 365: */
! 366:
! 367: eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
! 368: eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
! 369: eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
! 370: str r3, [r0]
! 371: str r5, [r0]
! 372:
! 373: eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
! 374: eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
! 375: eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
! 376: str r3, [r0]
! 377: str ip, [r0]
! 378:
! 379: eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
! 380: eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
! 381: eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
! 382: str r3, [r0]
! 383: str lr, [r0]
! 384:
! 385: subs r2, r2, #0x00000008
! 386: bgt .Loutsw16loop
! 387:
! 388: ldmfd sp!, {r4,r5,pc} /* and go home */
! 389:
! 390: /*
! 391: * reads short ints (16 bits) from an I/O address into a block of memory
! 392: * The I/O address is assumed to be mapped multiple times in a block of
! 393: * 8 words.
! 394: * The destination address should be word aligned.
! 395: *
! 396: * r0 = address to read from (IO)
! 397: * r1 = address to write to (memory)
! 398: * r2 = length
! 399: */
! 400:
! 401: ENTRY(inswm8)
! 402: /* Make sure that we have a positive length */
! 403: cmp r2, #0x00000000
! 404: movle pc, lr
! 405:
! 406: /* If the destination address is word aligned and the size suitably
! 407: aligned, do it fast */
! 408:
! 409: tst r1, #0x00000003
! 410:
! 411: bne _C_LABEL(insw)
! 412:
! 413: /* Word aligned insw */
! 414:
! 415: stmfd sp!, {r4-r9,lr}
! 416:
! 417: mov lr, #0xff000000
! 418: orr lr, lr, #0x00ff0000
! 419:
! 420: .Linswm8_loop8:
! 421: cmp r2, #8
! 422: bcc .Linswm8_l8
! 423:
! 424: ldmia r0, {r3-r9,ip}
! 425:
! 426: bic r3, r3, lr
! 427: orr r3, r3, r4, lsl #16
! 428: bic r5, r5, lr
! 429: orr r4, r5, r6, lsl #16
! 430: bic r7, r7, lr
! 431: orr r5, r7, r8, lsl #16
! 432: bic r9, r9, lr
! 433: orr r6, r9, ip, lsl #16
! 434:
! 435: stmia r1!, {r3-r6}
! 436:
! 437: subs r2, r2, #0x00000008 /* Next */
! 438: bne .Linswm8_loop8
! 439: beq .Linswm8_l1
! 440:
! 441: .Linswm8_l8:
! 442: cmp r2, #4
! 443: bcc .Linswm8_l4
! 444:
! 445: ldmia r0, {r3-r6}
! 446:
! 447: bic r3, r3, lr
! 448: orr r3, r3, r4, lsl #16
! 449: bic r5, r5, lr
! 450: orr r4, r5, r6, lsl #16
! 451:
! 452: stmia r1!, {r3-r4}
! 453:
! 454: subs r2, r2, #0x00000004
! 455: beq .Linswm8_l1
! 456:
! 457: .Linswm8_l4:
! 458: cmp r2, #2
! 459: bcc .Linswm8_l2
! 460:
! 461: ldmia r0, {r3-r4}
! 462:
! 463: bic r3, r3, lr
! 464: orr r3, r3, r4, lsl #16
! 465: str r3, [r1], #0x0004
! 466:
! 467: subs r2, r2, #0x00000002
! 468: beq .Linswm8_l1
! 469:
! 470: .Linswm8_l2:
! 471: cmp r2, #1
! 472: bcc .Linswm8_l1
! 473:
! 474: ldr r3, [r0]
! 475: subs r2, r2, #0x00000001 /* Test in load delay slot */
! 476: /* XXX, why don't we use result? */
! 477:
! 478: strb r3, [r1], #0x0001
! 479: mov r3, r3, lsr #8
! 480: strb r3, [r1], #0x0001
! 481:
! 482:
! 483: .Linswm8_l1:
! 484: ldmfd sp!, {r4-r9,pc} /* And go home */
! 485:
! 486: /*
! 487: * write short ints (16 bits) to an I/O address from a block of memory
! 488: * The I/O address is assumed to be mapped multiple times in a block of
! 489: * 8 words.
! 490: * The source address should be word aligned.
! 491: *
! 492: * r0 = address to read to (IO)
! 493: * r1 = address to write from (memory)
! 494: * r2 = length
! 495: */
! 496:
! 497: ENTRY(outswm8)
! 498: /* Make sure that we have a positive length */
! 499: cmp r2, #0x00000000
! 500: movle pc, lr
! 501:
! 502: /* If the destination address is word aligned and the size suitably
! 503: aligned, do it fast */
! 504:
! 505: tst r1, #0x00000003
! 506:
! 507: bne _C_LABEL(outsw)
! 508:
! 509: /* Word aligned outsw */
! 510:
! 511: stmfd sp!, {r4-r8,lr}
! 512:
! 513: .Loutswm8_loop8:
! 514: cmp r2, #8
! 515: bcc .Loutswm8_l8
! 516:
! 517: ldmia r1!, {r3,r5,r7,ip}
! 518:
! 519: eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
! 520: eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
! 521: eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
! 522:
! 523: eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
! 524: eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
! 525: eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
! 526:
! 527: eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
! 528: eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
! 529: eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
! 530:
! 531: eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
! 532: eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
! 533: eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
! 534:
! 535: stmia r0, {r3-r8,ip,lr}
! 536:
! 537: subs r2, r2, #0x00000008 /* Next */
! 538: bne .Loutswm8_loop8
! 539: beq .Loutswm8_l1
! 540:
! 541: .Loutswm8_l8:
! 542: cmp r2, #4
! 543: bcc .Loutswm8_l4
! 544:
! 545: ldmia r1!, {r3-r4}
! 546:
! 547: eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
! 548: eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
! 549: eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
! 550:
! 551: eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
! 552: eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
! 553: eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
! 554:
! 555: stmia r0, {r5-r8}
! 556:
! 557: subs r2, r2, #0x00000004
! 558: beq .Loutswm8_l1
! 559:
! 560: .Loutswm8_l4:
! 561: cmp r2, #2
! 562: bcc .Loutswm8_l2
! 563:
! 564: ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
! 565: subs r2, r2, #0x00000002 /* Done test in Load delay slot */
! 566:
! 567: eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
! 568: eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
! 569: eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
! 570:
! 571: stmia r0, {r4, r5}
! 572:
! 573: beq .Loutswm8_l1
! 574:
! 575: .Loutswm8_l2:
! 576: cmp r2, #1
! 577: bcc .Loutswm8_l1
! 578:
! 579: ldrb r3, [r1], #0x0001
! 580: ldrb r4, [r1], #0x0001
! 581: subs r2, r2, #0x00000001 /* Done test in load delay slot */
! 582: /* XXX This test isn't used? */
! 583: orr r3, r3, r4, lsl #8
! 584: orr r3, r3, r3, lsl #16
! 585: str r3, [r0]
! 586:
! 587: .Loutswm8_l1:
! 588: ldmfd sp!, {r4-r8,pc} /* And go home */
CVSweb