sys/arch/arm/arm/blockio.S - annotate

Return to blockio.S CVS log
Up to [local] / sys / arch / arm / arm
Annotation of sys/arch/arm/arm/blockio.S, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: blockio.S,v 1.1 2004/02/01 05:09:48 drahn Exp $       */
                      2: /*     $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $       */
                      3:
                      4: /*
                      5:  * Copyright (c) 2001 Ben Harris.
                      6:  * Copyright (c) 1994 Mark Brinicombe.
                      7:  * Copyright (c) 1994 Brini.
                      8:  * All rights reserved.
                      9:  *
                     10:  * This code is derived from software written for Brini by Mark Brinicombe
                     11:  *
                     12:  * Redistribution and use in source and binary forms, with or without
                     13:  * modification, are permitted provided that the following conditions
                     14:  * are met:
                     15:  * 1. Redistributions of source code must retain the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer.
                     17:  * 2. Redistributions in binary form must reproduce the above copyright
                     18:  *    notice, this list of conditions and the following disclaimer in the
                     19:  *    documentation and/or other materials provided with the distribution.
                     20:  * 3. All advertising materials mentioning features or use of this software
                     21:  *    must display the following acknowledgement:
                     22:  *     This product includes software developed by Brini.
                     23:  * 4. The name of the company nor the name of the author may be used to
                     24:  *    endorse or promote products derived from this software without specific
                     25:  *    prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
                     28:  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
                     29:  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     30:  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
                     31:  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
                     32:  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
                     33:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     34:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     35:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     36:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     37:  * SUCH DAMAGE.
                     38:  *
                     39:  * RiscBSD kernel project
                     40:  *
                     41:  * blockio.S
                     42:  *
                     43:  * optimised block read/write from/to IO routines.
                     44:  *
                     45:  * Created      : 08/10/94
                     46:  * Modified    : 22/01/99  -- R.Earnshaw
                     47:  *                            Faster, and small tweaks for StrongARM
                     48:  */
                     49:
                     50: #include <machine/asm.h>
                     51:
                     52: RCSID("$NetBSD: blockio.S,v 1.4 2001/06/02 11:15:56 bjh21 Exp $")
                     53:
                     54: /*
                     55:  * Read bytes from an I/O address into a block of memory
                     56:  *
                     57:  * r0 = address to read from (IO)
                     58:  * r1 = address to write to (memory)
                     59:  * r2 = length
                     60:  */
                     61:
                     62: /* This code will look very familiar if you've read _memcpy(). */
                     63: ENTRY(read_multi_1)
                     64:        mov     ip, sp
                     65:        stmfd   sp!, {fp, ip, lr, pc}
                     66:        sub     fp, ip, #4
                     67:        subs    r2, r2, #4              /* r2 = length - 4 */
                     68:        blt     .Lrm1_l4                        /* less than 4 bytes */
                     69:        ands    r12, r1, #3
                     70:        beq     .Lrm1_main              /* aligned destination */
                     71:        rsb     r12, r12, #4
                     72:        cmp     r12, #2
                     73:        ldrb    r3, [r0]
                     74:        strb    r3, [r1], #1
                     75:        ldrgeb  r3, [r0]
                     76:        strgeb  r3, [r1], #1
                     77:        ldrgtb  r3, [r0]
                     78:        strgtb  r3, [r1], #1
                     79:        subs    r2, r2, r12
                     80:        blt     .Lrm1_l4
                     81: .Lrm1_main:
                     82: .Lrm1loop:
                     83:        ldrb    r3, [r0]
                     84:        ldrb    r12, [r0]
                     85:        orr     r3, r3, r12, lsl #8
                     86:        ldrb    r12, [r0]
                     87:        orr     r3, r3, r12, lsl #16
                     88:        ldrb    r12, [r0]
                     89:        orr     r3, r3, r12, lsl #24
                     90:        str     r3, [r1], #4
                     91:        subs    r2, r2, #4
                     92:        bge     .Lrm1loop
                     93: .Lrm1_l4:
                     94:        adds    r2, r2, #4                      /* r2 = length again */
                     95:        ldmeqdb fp, {fp, sp, pc}
                     96:        moveq   pc, r14
                     97:        cmp     r2, #2
                     98:        ldrb    r3, [r0]
                     99:        strb    r3, [r1], #1
                    100:        ldrgeb  r3, [r0]
                    101:        strgeb  r3, [r1], #1
                    102:        ldrgtb  r3, [r0]
                    103:        strgtb  r3, [r1], #1
                    104:        ldmdb   fp, {fp, sp, pc}
                    105:
                    106: /*
                    107:  * Write bytes to an I/O address from a block of memory
                    108:  *
                    109:  * r0 = address to write to (IO)
                    110:  * r1 = address to read from (memory)
                    111:  * r2 = length
                    112:  */
                    113:
                    114: /* This code will look very familiar if you've read _memcpy(). */
                    115: ENTRY(write_multi_1)
                    116:        mov     ip, sp
                    117:        stmfd   sp!, {fp, ip, lr, pc}
                    118:        sub     fp, ip, #4
                    119:        subs    r2, r2, #4              /* r2 = length - 4 */
                    120:        blt     .Lwm1_l4                /* less than 4 bytes */
                    121:        ands    r12, r1, #3
                    122:        beq     .Lwm1_main              /* aligned source */
                    123:        rsb     r12, r12, #4
                    124:        cmp     r12, #2
                    125:        ldrb    r3, [r1], #1
                    126:        strb    r3, [r0]
                    127:        ldrgeb  r3, [r1], #1
                    128:        strgeb  r3, [r0]
                    129:        ldrgtb  r3, [r1], #1
                    130:        strgtb  r3, [r0]
                    131:        subs    r2, r2, r12
                    132:        blt     .Lwm1_l4
                    133: .Lwm1_main:
                    134: .Lwm1loop:
                    135:        ldr     r3, [r1], #4
                    136:        strb    r3, [r0]
                    137:        mov     r3, r3, lsr #8
                    138:        strb    r3, [r0]
                    139:        mov     r3, r3, lsr #8
                    140:        strb    r3, [r0]
                    141:        mov     r3, r3, lsr #8
                    142:        strb    r3, [r0]
                    143:        subs    r2, r2, #4
                    144:        bge     .Lwm1loop
                    145: .Lwm1_l4:
                    146:        adds    r2, r2, #4                      /* r2 = length again */
                    147:        ldmeqdb fp, {fp, sp, pc}
                    148:        cmp     r2, #2
                    149:        ldrb    r3, [r1], #1
                    150:        strb    r3, [r0]
                    151:        ldrgeb  r3, [r1], #1
                    152:        strgeb  r3, [r0]
                    153:        ldrgtb  r3, [r1], #1
                    154:        strgtb  r3, [r0]
                    155:        ldmdb   fp, {fp, sp, pc}
                    156:
                    157: /*
                    158:  * Reads short ints (16 bits) from an I/O address into a block of memory
                    159:  *
                    160:  * r0 = address to read from (IO)
                    161:  * r1 = address to write to (memory)
                    162:  * r2 = length
                    163:  */
                    164:
                    165: ENTRY(insw)
                    166: /* Make sure that we have a positive length */
                    167:        cmp     r2, #0x00000000
                    168:        movle   pc, lr
                    169:
                    170: /* If the destination address and the size is word aligned, do it fast */
                    171:
                    172:        tst     r2, #0x00000001
                    173:        tsteq   r1, #0x00000003
                    174:        beq     .Lfastinsw
                    175:
                    176: /* Non aligned insw */
                    177:
                    178: .Linswloop:
                    179:        ldr     r3, [r0]
                    180:        subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
                    181:        strb    r3, [r1], #0x0001
                    182:        mov     r3, r3, lsr #8
                    183:        strb    r3, [r1], #0x0001
                    184:        bgt     .Linswloop
                    185:
                    186:        mov     pc, lr
                    187:
                    188: /* Word aligned insw */
                    189:
                    190: .Lfastinsw:
                    191:
                    192: .Lfastinswloop:
                    193:        ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
                    194:                                         * word accesses */
                    195:        ldr     ip, [r0]
                    196:        mov     r3, r3, lsr #16         /* Put the two shorts together */
                    197:        orr     r3, r3, ip, lsl #16
                    198:        str     r3, [r1], #0x0004       /* Store */
                    199:        subs    r2, r2, #0x00000002     /* Next */
                    200:        bgt     .Lfastinswloop
                    201:
                    202:        mov     pc, lr
                    203:
                    204:
                    205: /*
                    206:  * Writes short ints (16 bits) from a block of memory to an I/O address
                    207:  *
                    208:  * r0 = address to write to (IO)
                    209:  * r1 = address to read from (memory)
                    210:  * r2 = length
                    211:  */
                    212:
                    213: ENTRY(outsw)
                    214: /* Make sure that we have a positive length */
                    215:        cmp     r2, #0x00000000
                    216:        movle   pc, lr
                    217:
                    218: /* If the destination address and the size is word aligned, do it fast */
                    219:
                    220:        tst     r2, #0x00000001
                    221:        tsteq   r1, #0x00000003
                    222:        beq     .Lfastoutsw
                    223:
                    224: /* Non aligned outsw */
                    225:
                    226: .Loutswloop:
                    227:        ldrb    r3, [r1], #0x0001
                    228:        ldrb    ip, [r1], #0x0001
                    229:        subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
                    230:        orr     r3, r3, ip, lsl #8
                    231:        orr     r3, r3, r3, lsl #16
                    232:        str     r3, [r0]
                    233:        bgt     .Loutswloop
                    234:
                    235:        mov     pc, lr
                    236:
                    237: /* Word aligned outsw */
                    238:
                    239: .Lfastoutsw:
                    240:
                    241: .Lfastoutswloop:
                    242:        ldr     r3, [r1], #0x0004       /* r3 = (H)(L) */
                    243:        subs    r2, r2, #0x00000002     /* Loop test in load delay slot */
                    244:
                    245:        eor     ip, r3, r3, lsr #16     /* ip = (H)(H^L) */
                    246:        eor     r3, r3, ip, lsl #16     /* r3 = (H^H^L)(L) = (L)(L) */
                    247:        eor     ip, ip, r3, lsr #16     /* ip = (H)(H^L^L) = (H)(H) */
                    248:
                    249:        str     r3, [r0]
                    250:        str     ip, [r0]
                    251:
                    252: /*     mov     ip, r3, lsl #16
                    253:  *     orr     ip, ip, ip, lsr #16
                    254:  *     str     ip, [r0]
                    255:  *
                    256:  *     mov     ip, r3, lsr #16
                    257:  *     orr     ip, ip, ip, lsl #16
                    258:  *     str     ip, [r0]
                    259:  */
                    260:
                    261:        bgt     .Lfastoutswloop
                    262:
                    263:        mov     pc, lr
                    264:
                    265: /*
                    266:  * reads short ints (16 bits) from an I/O address into a block of memory
                    267:  * with a length garenteed to be a multiple of 16 bytes
                    268:  * with a word aligned destination address
                    269:  *
                    270:  * r0 = address to read from (IO)
                    271:  * r1 = address to write to (memory)
                    272:  * r2 = length
                    273:  */
                    274:
                    275: ENTRY(insw16)
                    276: /* Make sure that we have a positive length */
                    277:        cmp     r2, #0x00000000
                    278:        movle   pc, lr
                    279:
                    280: /* If the destination address is word aligned and the size suitably
                    281:    aligned, do it fast */
                    282:
                    283:        tst     r2, #0x00000007
                    284:        tsteq   r1, #0x00000003
                    285:
                    286:        bne     _C_LABEL(insw)
                    287:
                    288: /* Word aligned insw */
                    289:
                    290:        stmfd   sp!, {r4,r5,lr}
                    291:
                    292: .Linsw16loop:
                    293:        ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
                    294:                                         * word accesses */
                    295:        ldr     lr, [r0]
                    296:        mov     r3, r3, lsr #16         /* Put the two shorts together */
                    297:        orr     r3, r3, lr, lsl #16
                    298:
                    299:        ldr     r4, [r0, #0x0002]       /* take advantage of nonaligned
                    300:                                         * word accesses */
                    301:        ldr     lr, [r0]
                    302:        mov     r4, r4, lsr #16         /* Put the two shorts together */
                    303:        orr     r4, r4, lr, lsl #16
                    304:
                    305:        ldr     r5, [r0, #0x0002]       /* take advantage of nonaligned
                    306:                                         * word accesses */
                    307:        ldr     lr, [r0]
                    308:        mov     r5, r5, lsr #16         /* Put the two shorts together */
                    309:        orr     r5, r5, lr, lsl #16
                    310:
                    311:        ldr     ip, [r0, #0x0002]       /* take advantage of nonaligned
                    312:                                         * word accesses */
                    313:        ldr     lr, [r0]
                    314:        mov     ip, ip, lsr #16         /* Put the two shorts together */
                    315:        orr     ip, ip, lr, lsl #16
                    316:
                    317:        stmia   r1!, {r3-r5,ip}
                    318:        subs    r2, r2, #0x00000008     /* Next */
                    319:        bgt     .Linsw16loop
                    320:
                    321:        ldmfd   sp!, {r4,r5,pc}         /* Restore regs and go home */
                    322:
                    323:
                    324: /*
                    325:  * Writes short ints (16 bits) from a block of memory to an I/O address
                    326:  *
                    327:  * r0 = address to write to (IO)
                    328:  * r1 = address to read from (memory)
                    329:  * r2 = length
                    330:  */
                    331:
                    332: ENTRY(outsw16)
                    333: /* Make sure that we have a positive length */
                    334:        cmp     r2, #0x00000000
                    335:        movle   pc, lr
                    336:
                    337: /* If the destination address is word aligned and the size suitably
                    338:    aligned, do it fast */
                    339:
                    340:        tst     r2, #0x00000007
                    341:        tsteq   r1, #0x00000003
                    342:
                    343:        bne     _C_LABEL(outsw)
                    344:
                    345: /* Word aligned outsw */
                    346:
                    347:        stmfd   sp!, {r4,r5,lr}
                    348:
                    349: .Loutsw16loop:
                    350:        ldmia   r1!, {r4,r5,ip,lr}
                    351:
                    352:        eor     r3, r4, r4, lsl #16     /* r3 = (A^B)(B) */
                    353:        eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    354:        eor     r3, r3, r4, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    355:        str     r3, [r0]
                    356:        str     r4, [r0]
                    357:
                    358: /*     mov     r3, r4, lsl #16
                    359:  *     orr     r3, r3, r3, lsr #16
                    360:  *     str     r3, [r0]
                    361:  *
                    362:  *     mov     r3, r4, lsr #16
                    363:  *     orr     r3, r3, r3, lsl #16
                    364:  *     str     r3, [r0]
                    365:  */
                    366:
                    367:        eor     r3, r5, r5, lsl #16     /* r3 = (A^B)(B) */
                    368:        eor     r5, r5, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    369:        eor     r3, r3, r5, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    370:        str     r3, [r0]
                    371:        str     r5, [r0]
                    372:
                    373:        eor     r3, ip, ip, lsl #16     /* r3 = (A^B)(B) */
                    374:        eor     ip, ip, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    375:        eor     r3, r3, ip, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    376:        str     r3, [r0]
                    377:        str     ip, [r0]
                    378:
                    379:        eor     r3, lr, lr, lsl #16     /* r3 = (A^B)(B) */
                    380:        eor     lr, lr, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    381:        eor     r3, r3, lr, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
                    382:        str     r3, [r0]
                    383:        str     lr, [r0]
                    384:
                    385:        subs    r2, r2, #0x00000008
                    386:        bgt     .Loutsw16loop
                    387:
                    388:        ldmfd   sp!, {r4,r5,pc}         /* and go home */
                    389:
                    390: /*
                    391:  * reads short ints (16 bits) from an I/O address into a block of memory
                    392:  * The I/O address is assumed to be mapped multiple times in a block of
                    393:  * 8 words.
                    394:  * The destination address should be word aligned.
                    395:  *
                    396:  * r0 = address to read from (IO)
                    397:  * r1 = address to write to (memory)
                    398:  * r2 = length
                    399:  */
                    400:
                    401: ENTRY(inswm8)
                    402: /* Make sure that we have a positive length */
                    403:        cmp     r2, #0x00000000
                    404:        movle   pc, lr
                    405:
                    406: /* If the destination address is word aligned and the size suitably
                    407:    aligned, do it fast */
                    408:
                    409:        tst     r1, #0x00000003
                    410:
                    411:        bne     _C_LABEL(insw)
                    412:
                    413: /* Word aligned insw */
                    414:
                    415:        stmfd   sp!, {r4-r9,lr}
                    416:
                    417:        mov     lr, #0xff000000
                    418:        orr     lr, lr, #0x00ff0000
                    419:
                    420: .Linswm8_loop8:
                    421:        cmp     r2, #8
                    422:        bcc     .Linswm8_l8
                    423:
                    424:        ldmia   r0, {r3-r9,ip}
                    425:
                    426:        bic     r3, r3, lr
                    427:        orr     r3, r3, r4, lsl #16
                    428:        bic     r5, r5, lr
                    429:        orr     r4, r5, r6, lsl #16
                    430:        bic     r7, r7, lr
                    431:        orr     r5, r7, r8, lsl #16
                    432:        bic     r9, r9, lr
                    433:        orr     r6, r9, ip, lsl #16
                    434:
                    435:        stmia   r1!, {r3-r6}
                    436:
                    437:        subs    r2, r2, #0x00000008     /* Next */
                    438:        bne     .Linswm8_loop8
                    439:        beq     .Linswm8_l1
                    440:
                    441: .Linswm8_l8:
                    442:        cmp     r2, #4
                    443:        bcc     .Linswm8_l4
                    444:
                    445:        ldmia   r0, {r3-r6}
                    446:
                    447:        bic     r3, r3, lr
                    448:        orr     r3, r3, r4, lsl #16
                    449:        bic     r5, r5, lr
                    450:        orr     r4, r5, r6, lsl #16
                    451:
                    452:        stmia   r1!, {r3-r4}
                    453:
                    454:        subs    r2, r2, #0x00000004
                    455:        beq     .Linswm8_l1
                    456:
                    457: .Linswm8_l4:
                    458:        cmp     r2, #2
                    459:        bcc     .Linswm8_l2
                    460:
                    461:        ldmia   r0, {r3-r4}
                    462:
                    463:        bic     r3, r3, lr
                    464:        orr     r3, r3, r4, lsl #16
                    465:        str     r3, [r1], #0x0004
                    466:
                    467:        subs    r2, r2, #0x00000002
                    468:        beq     .Linswm8_l1
                    469:
                    470: .Linswm8_l2:
                    471:        cmp     r2, #1
                    472:        bcc     .Linswm8_l1
                    473:
                    474:        ldr     r3, [r0]
                    475:        subs    r2, r2, #0x00000001     /* Test in load delay slot */
                    476:                                        /* XXX, why don't we use result?  */
                    477:
                    478:        strb    r3, [r1], #0x0001
                    479:        mov     r3, r3, lsr #8
                    480:        strb    r3, [r1], #0x0001
                    481:
                    482:
                    483: .Linswm8_l1:
                    484:        ldmfd   sp!, {r4-r9,pc}         /* And go home */
                    485:
                    486: /*
                    487:  * write short ints (16 bits) to an I/O address from a block of memory
                    488:  * The I/O address is assumed to be mapped multiple times in a block of
                    489:  * 8 words.
                    490:  * The source address should be word aligned.
                    491:  *
                    492:  * r0 = address to read to (IO)
                    493:  * r1 = address to write from (memory)
                    494:  * r2 = length
                    495:  */
                    496:
                    497: ENTRY(outswm8)
                    498: /* Make sure that we have a positive length */
                    499:        cmp     r2, #0x00000000
                    500:        movle   pc, lr
                    501:
                    502: /* If the destination address is word aligned and the size suitably
                    503:    aligned, do it fast */
                    504:
                    505:        tst     r1, #0x00000003
                    506:
                    507:        bne     _C_LABEL(outsw)
                    508:
                    509: /* Word aligned outsw */
                    510:
                    511:        stmfd   sp!, {r4-r8,lr}
                    512:
                    513: .Loutswm8_loop8:
                    514:        cmp     r2, #8
                    515:        bcc     .Loutswm8_l8
                    516:
                    517:        ldmia   r1!, {r3,r5,r7,ip}
                    518:
                    519:        eor     r4, r3, r3, lsr #16     /* r4 = (A)(A^B) */
                    520:        eor     r3, r3, r4, lsl #16     /* r3 = (A^A^B)(B) = (B)(B) */
                    521:        eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
                    522:
                    523:        eor     r6, r5, r5, lsr #16     /* r6 = (A)(A^B) */
                    524:        eor     r5, r5, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
                    525:        eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
                    526:
                    527:        eor     r8, r7, r7, lsr #16     /* r8 = (A)(A^B) */
                    528:        eor     r7, r7, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
                    529:        eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
                    530:
                    531:        eor     lr, ip, ip, lsr #16     /* lr = (A)(A^B) */
                    532:        eor     ip, ip, lr, lsl #16     /* ip = (A^A^B)(B) = (B)(B) */
                    533:        eor     lr, lr, ip, lsr #16     /* lr = (A)(B^A^B) = (A)(A) */
                    534:
                    535:        stmia   r0, {r3-r8,ip,lr}
                    536:
                    537:        subs    r2, r2, #0x00000008     /* Next */
                    538:        bne     .Loutswm8_loop8
                    539:        beq     .Loutswm8_l1
                    540:
                    541: .Loutswm8_l8:
                    542:        cmp     r2, #4
                    543:        bcc     .Loutswm8_l4
                    544:
                    545:        ldmia   r1!, {r3-r4}
                    546:
                    547:        eor     r6, r3, r3, lsr #16     /* r6 = (A)(A^B) */
                    548:        eor     r5, r3, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
                    549:        eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
                    550:
                    551:        eor     r8, r4, r4, lsr #16     /* r8 = (A)(A^B) */
                    552:        eor     r7, r4, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
                    553:        eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
                    554:
                    555:        stmia   r0, {r5-r8}
                    556:
                    557:        subs    r2, r2, #0x00000004
                    558:        beq     .Loutswm8_l1
                    559:
                    560: .Loutswm8_l4:
                    561:        cmp     r2, #2
                    562:        bcc     .Loutswm8_l2
                    563:
                    564:        ldr     r3, [r1], #0x0004       /* r3 = (A)(B) */
                    565:        subs    r2, r2, #0x00000002     /* Done test in Load delay slot */
                    566:
                    567:        eor     r5, r3, r3, lsr #16     /* r5 = (A)(A^B)*/
                    568:        eor     r4, r3, r5, lsl #16     /* r4 = (A^A^B)(B) = (B)(B) */
                    569:        eor     r5, r5, r4, lsr #16     /* r5 = (A)(B^A^B) = (A)(A) */
                    570:
                    571:        stmia   r0, {r4, r5}
                    572:
                    573:        beq     .Loutswm8_l1
                    574:
                    575: .Loutswm8_l2:
                    576:        cmp     r2, #1
                    577:        bcc     .Loutswm8_l1
                    578:
                    579:        ldrb    r3, [r1], #0x0001
                    580:        ldrb    r4, [r1], #0x0001
                    581:        subs    r2, r2, #0x00000001     /* Done test in load delay slot */
                    582:                                        /* XXX This test isn't used?  */
                    583:        orr     r3, r3, r4, lsl #8
                    584:        orr     r3, r3, r3, lsl #16
                    585:        str     r3, [r0]
                    586:
                    587: .Loutswm8_l1:
                    588:        ldmfd   sp!, {r4-r8,pc}         /* And go home */
CVSweb