[BACK]Return to bcopyinout.S CVS log [TXT][DIR] Up to [local] / sys / arch / arm / arm

Annotation of sys/arch/arm/arm/bcopyinout.S, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: bcopyinout.S,v 1.1 2004/02/01 05:09:48 drahn Exp $    */
        !             2: /*     $NetBSD: bcopyinout.S,v 1.13 2003/10/31 16:54:05 scw Exp $      */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 2002 Wasabi Systems, Inc.
        !             6:  * All rights reserved.
        !             7:  *
        !             8:  * Written by Allen Briggs for Wasabi Systems, Inc.
        !             9:  *
        !            10:  * Redistribution and use in source and binary forms, with or without
        !            11:  * modification, are permitted provided that the following conditions
        !            12:  * are met:
        !            13:  * 1. Redistributions of source code must retain the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer.
        !            15:  * 2. Redistributions in binary form must reproduce the above copyright
        !            16:  *    notice, this list of conditions and the following disclaimer in the
        !            17:  *    documentation and/or other materials provided with the distribution.
        !            18:  * 3. All advertising materials mentioning features or use of this software
        !            19:  *    must display the following acknowledgement:
        !            20:  *      This product includes software developed for the NetBSD Project by
        !            21:  *      Wasabi Systems, Inc.
        !            22:  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
        !            23:  *    or promote products derived from this software without specific prior
        !            24:  *    written permission.
        !            25:  *
        !            26:  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
        !            27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
        !            28:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
        !            29:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
        !            30:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
        !            31:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
        !            32:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
        !            33:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
        !            34:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
        !            35:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
        !            36:  * POSSIBILITY OF SUCH DAMAGE.
        !            37:  */
        !            38:
        !            39: #include "assym.h"
        !            40:
        !            41: #include <machine/asm.h>
        !            42:
        !            43: #ifdef __XSCALE__
        !            44: #include "bcopyinout_xscale.S"
        !            45: #else
        !            46:
        !            47:        .text
        !            48:        .align  0
        !            49:
        !            50: #ifdef MULTIPROCESSOR
        !            51: .Lcpu_info:
        !            52:        .word   _C_LABEL(cpu_info)
        !            53: #else
        !            54: .Lcurpcb:
        !            55:        .word _C_LABEL(curpcb)
        !            56: #endif
        !            57:
        !            58: #ifdef __PROG32
        !            59: #define SAVE_REGS      stmfd   sp!, {r4-r11}
        !            60: #define RESTORE_REGS   ldmfd   sp!, {r4-r11}
        !            61: #else
        !            62: /* Need to save R14_svc because it'll get trampled if we take a page fault. */
        !            63: #define SAVE_REGS      stmfd   sp!, {r4-r11, r14}
        !            64: #define RESTORE_REGS   ldmfd   sp!, {r4-r11, r14}
        !            65: #endif
        !            66:
        !            67: #if defined(__XSCALE__)
        !            68: #define HELLOCPP #
        !            69: #define PREFETCH(rx,o) pld     [ rx , HELLOCPP (o) ]
        !            70: #else
        !            71: #define PREFETCH(rx,o)
        !            72: #endif
        !            73:
        !            74: /*
        !            75:  * r0 = user space address
        !            76:  * r1 = kernel space address
        !            77:  * r2 = length
        !            78:  *
        !            79:  * Copies bytes from user space to kernel space
        !            80:  *
        !            81:  * We save/restore r4-r11:
        !            82:  * r4-r11 are scratch
        !            83:  */
        !            84: ENTRY(copyin)
        !            85:        /* Quick exit if length is zero */
        !            86:        teq     r2, #0
        !            87:        moveq   r0, #0
        !            88:        moveq   pc, lr
        !            89:
        !            90:        SAVE_REGS
        !            91: #ifdef MULTIPROCESSOR
        !            92:        /* XXX Probably not appropriate for non-Hydra SMPs */
        !            93:        stmfd   sp!, {r0-r2, r14}
        !            94:        bl      _C_LABEL(cpu_number)
        !            95:        ldr     r4, .Lcpu_info
        !            96:        ldr     r4, [r4, r0, lsl #2]
        !            97:        ldr     r4, [r4, #CI_CURPCB]
        !            98:        ldmfd   sp!, {r0-r2, r14}
        !            99: #else
        !           100:        ldr     r4, .Lcurpcb
        !           101:        ldr     r4, [r4]
        !           102: #endif
        !           103:
        !           104:        ldr     r5, [r4, #PCB_ONFAULT]
        !           105:        adr     r3, .Lcopyfault
        !           106:        str     r3, [r4, #PCB_ONFAULT]
        !           107:
        !           108:        PREFETCH(r0, 0)
        !           109:        PREFETCH(r1, 0)
        !           110:
        !           111:        /*
        !           112:         * If not too many bytes, take the slow path.
        !           113:         */
        !           114:        cmp     r2, #0x08
        !           115:        blt     .Licleanup
        !           116:
        !           117:        /*
        !           118:         * Align destination to word boundary.
        !           119:         */
        !           120:        and     r6, r1, #0x3
        !           121:        ldr     pc, [pc, r6, lsl #2]
        !           122:        b       .Lialend
        !           123:        .word   .Lialend
        !           124:        .word   .Lial3
        !           125:        .word   .Lial2
        !           126:        .word   .Lial1
        !           127: .Lial3:        ldrbt   r6, [r0], #1
        !           128:        sub     r2, r2, #1
        !           129:        strb    r6, [r1], #1
        !           130: .Lial2:        ldrbt   r7, [r0], #1
        !           131:        sub     r2, r2, #1
        !           132:        strb    r7, [r1], #1
        !           133: .Lial1:        ldrbt   r6, [r0], #1
        !           134:        sub     r2, r2, #1
        !           135:        strb    r6, [r1], #1
        !           136: .Lialend:
        !           137:
        !           138:        /*
        !           139:         * If few bytes left, finish slow.
        !           140:         */
        !           141:        cmp     r2, #0x08
        !           142:        blt     .Licleanup
        !           143:
        !           144:        /*
        !           145:         * If source is not aligned, finish slow.
        !           146:         */
        !           147:        ands    r3, r0, #0x03
        !           148:        bne     .Licleanup
        !           149:
        !           150:        cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
        !           151:        blt     .Licleanup8
        !           152:
        !           153:        /*
        !           154:         * Align destination to cacheline boundary.
        !           155:         * If source and destination are nicely aligned, this can be a big
        !           156:         * win.  If not, it's still cheaper to copy in groups of 32 even if
        !           157:         * we don't get the nice cacheline alignment.
        !           158:         */
        !           159:        and     r6, r1, #0x1f
        !           160:        ldr     pc, [pc, r6]
        !           161:        b       .Licaligned
        !           162:        .word   .Licaligned
        !           163:        .word   .Lical28
        !           164:        .word   .Lical24
        !           165:        .word   .Lical20
        !           166:        .word   .Lical16
        !           167:        .word   .Lical12
        !           168:        .word   .Lical8
        !           169:        .word   .Lical4
        !           170: .Lical28:ldrt  r6, [r0], #4
        !           171:        sub     r2, r2, #4
        !           172:        str     r6, [r1], #4
        !           173: .Lical24:ldrt  r7, [r0], #4
        !           174:        sub     r2, r2, #4
        !           175:        str     r7, [r1], #4
        !           176: .Lical20:ldrt  r6, [r0], #4
        !           177:        sub     r2, r2, #4
        !           178:        str     r6, [r1], #4
        !           179: .Lical16:ldrt  r7, [r0], #4
        !           180:        sub     r2, r2, #4
        !           181:        str     r7, [r1], #4
        !           182: .Lical12:ldrt  r6, [r0], #4
        !           183:        sub     r2, r2, #4
        !           184:        str     r6, [r1], #4
        !           185: .Lical8:ldrt   r7, [r0], #4
        !           186:        sub     r2, r2, #4
        !           187:        str     r7, [r1], #4
        !           188: .Lical4:ldrt   r6, [r0], #4
        !           189:        sub     r2, r2, #4
        !           190:        str     r6, [r1], #4
        !           191:
        !           192:        /*
        !           193:         * We start with > 0x40 bytes to copy (>= 0x60 got us into this
        !           194:         * part of the code, and we may have knocked that down by as much
        !           195:         * as 0x1c getting aligned).
        !           196:         *
        !           197:         * This loop basically works out to:
        !           198:         * do {
        !           199:         *      prefetch-next-cacheline(s)
        !           200:         *      bytes -= 0x20;
        !           201:         *      copy cacheline
        !           202:         * } while (bytes >= 0x40);
        !           203:         * bytes -= 0x20;
        !           204:         * copy cacheline
        !           205:         */
        !           206: .Licaligned:
        !           207:        PREFETCH(r0, 32)
        !           208:        PREFETCH(r1, 32)
        !           209:
        !           210:        sub     r2, r2, #0x20
        !           211:
        !           212:        /* Copy a cacheline */
        !           213:        ldrt    r10, [r0], #4
        !           214:        ldrt    r11, [r0], #4
        !           215:        ldrt    r6, [r0], #4
        !           216:        ldrt    r7, [r0], #4
        !           217:        ldrt    r8, [r0], #4
        !           218:        ldrt    r9, [r0], #4
        !           219:        stmia   r1!, {r10-r11}
        !           220:        ldrt    r10, [r0], #4
        !           221:        ldrt    r11, [r0], #4
        !           222:        stmia   r1!, {r6-r11}
        !           223:
        !           224:        cmp     r2, #0x40
        !           225:        bge     .Licaligned
        !           226:
        !           227:        sub     r2, r2, #0x20
        !           228:
        !           229:        /* Copy a cacheline */
        !           230:        ldrt    r10, [r0], #4
        !           231:        ldrt    r11, [r0], #4
        !           232:        ldrt    r6, [r0], #4
        !           233:        ldrt    r7, [r0], #4
        !           234:        ldrt    r8, [r0], #4
        !           235:        ldrt    r9, [r0], #4
        !           236:        stmia   r1!, {r10-r11}
        !           237:        ldrt    r10, [r0], #4
        !           238:        ldrt    r11, [r0], #4
        !           239:        stmia   r1!, {r6-r11}
        !           240:
        !           241:        cmp     r2, #0x08
        !           242:        blt     .Liprecleanup
        !           243:
        !           244: .Licleanup8:
        !           245:        ldrt    r8, [r0], #4
        !           246:        ldrt    r9, [r0], #4
        !           247:        sub     r2, r2, #8
        !           248:        stmia   r1!, {r8, r9}
        !           249:        cmp     r2, #8
        !           250:        bge     .Licleanup8
        !           251:
        !           252: .Liprecleanup:
        !           253:        /*
        !           254:         * If we're done, bail.
        !           255:         */
        !           256:        cmp     r2, #0
        !           257:        beq     .Lout
        !           258:
        !           259: .Licleanup:
        !           260:        and     r6, r2, #0x3
        !           261:        ldr     pc, [pc, r6, lsl #2]
        !           262:        b       .Licend
        !           263:        .word   .Lic4
        !           264:        .word   .Lic1
        !           265:        .word   .Lic2
        !           266:        .word   .Lic3
        !           267: .Lic4: ldrbt   r6, [r0], #1
        !           268:        sub     r2, r2, #1
        !           269:        strb    r6, [r1], #1
        !           270: .Lic3: ldrbt   r7, [r0], #1
        !           271:        sub     r2, r2, #1
        !           272:        strb    r7, [r1], #1
        !           273: .Lic2: ldrbt   r6, [r0], #1
        !           274:        sub     r2, r2, #1
        !           275:        strb    r6, [r1], #1
        !           276: .Lic1: ldrbt   r7, [r0], #1
        !           277:        subs    r2, r2, #1
        !           278:        strb    r7, [r1], #1
        !           279: .Licend:
        !           280:        bne     .Licleanup
        !           281:
        !           282: .Liout:
        !           283:        mov     r0, #0
        !           284:
        !           285:        str     r5, [r4, #PCB_ONFAULT]
        !           286:        RESTORE_REGS
        !           287:
        !           288:        mov     pc, lr
        !           289:
        !           290: .Lcopyfault:
        !           291:        str     r5, [r4, #PCB_ONFAULT]
        !           292:        RESTORE_REGS
        !           293:
        !           294:        mov     pc, lr
        !           295:
        !           296: /*
        !           297:  * r0 = kernel space address
        !           298:  * r1 = user space address
        !           299:  * r2 = length
        !           300:  *
        !           301:  * Copies bytes from kernel space to user space
        !           302:  *
        !           303:  * We save/restore r4-r11:
        !           304:  * r4-r11 are scratch
        !           305:  */
        !           306:
        !           307: ENTRY(copyout)
        !           308:        /* Quick exit if length is zero */
        !           309:        teq     r2, #0
        !           310:        moveq   r0, #0
        !           311:        moveq   pc, lr
        !           312:
        !           313:        SAVE_REGS
        !           314: #ifdef MULTIPROCESSOR
        !           315:        /* XXX Probably not appropriate for non-Hydra SMPs */
        !           316:        stmfd   sp!, {r0-r2, r14}
        !           317:        bl      _C_LABEL(cpu_number)
        !           318:        ldr     r4, .Lcpu_info
        !           319:        ldr     r4, [r4, r0, lsl #2]
        !           320:        ldr     r4, [r4, #CI_CURPCB]
        !           321:        ldmfd   sp!, {r0-r2, r14}
        !           322: #else
        !           323:        ldr     r4, .Lcurpcb
        !           324:        ldr     r4, [r4]
        !           325: #endif
        !           326:
        !           327:        ldr     r5, [r4, #PCB_ONFAULT]
        !           328:        adr     r3, .Lcopyfault
        !           329:        str     r3, [r4, #PCB_ONFAULT]
        !           330:
        !           331:        PREFETCH(r0, 0)
        !           332:        PREFETCH(r1, 0)
        !           333:
        !           334:        /*
        !           335:         * If not too many bytes, take the slow path.
        !           336:         */
        !           337:        cmp     r2, #0x08
        !           338:        blt     .Lcleanup
        !           339:
        !           340:        /*
        !           341:         * Align destination to word boundary.
        !           342:         */
        !           343:        and     r6, r1, #0x3
        !           344:        ldr     pc, [pc, r6, lsl #2]
        !           345:        b       .Lalend
        !           346:        .word   .Lalend
        !           347:        .word   .Lal3
        !           348:        .word   .Lal2
        !           349:        .word   .Lal1
        !           350: .Lal3: ldrb    r6, [r0], #1
        !           351:        sub     r2, r2, #1
        !           352:        strbt   r6, [r1], #1
        !           353: .Lal2: ldrb    r7, [r0], #1
        !           354:        sub     r2, r2, #1
        !           355:        strbt   r7, [r1], #1
        !           356: .Lal1: ldrb    r6, [r0], #1
        !           357:        sub     r2, r2, #1
        !           358:        strbt   r6, [r1], #1
        !           359: .Lalend:
        !           360:
        !           361:        /*
        !           362:         * If few bytes left, finish slow.
        !           363:         */
        !           364:        cmp     r2, #0x08
        !           365:        blt     .Lcleanup
        !           366:
        !           367:        /*
        !           368:         * If source is not aligned, finish slow.
        !           369:         */
        !           370:        ands    r3, r0, #0x03
        !           371:        bne     .Lcleanup
        !           372:
        !           373:        cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
        !           374:        blt     .Lcleanup8
        !           375:
        !           376:        /*
        !           377:         * Align source & destination to cacheline boundary.
        !           378:         */
        !           379:        and     r6, r1, #0x1f
        !           380:        ldr     pc, [pc, r6]
        !           381:        b       .Lcaligned
        !           382:        .word   .Lcaligned
        !           383:        .word   .Lcal28
        !           384:        .word   .Lcal24
        !           385:        .word   .Lcal20
        !           386:        .word   .Lcal16
        !           387:        .word   .Lcal12
        !           388:        .word   .Lcal8
        !           389:        .word   .Lcal4
        !           390: .Lcal28:ldr    r6, [r0], #4
        !           391:        sub     r2, r2, #4
        !           392:        strt    r6, [r1], #4
        !           393: .Lcal24:ldr    r7, [r0], #4
        !           394:        sub     r2, r2, #4
        !           395:        strt    r7, [r1], #4
        !           396: .Lcal20:ldr    r6, [r0], #4
        !           397:        sub     r2, r2, #4
        !           398:        strt    r6, [r1], #4
        !           399: .Lcal16:ldr    r7, [r0], #4
        !           400:        sub     r2, r2, #4
        !           401:        strt    r7, [r1], #4
        !           402: .Lcal12:ldr    r6, [r0], #4
        !           403:        sub     r2, r2, #4
        !           404:        strt    r6, [r1], #4
        !           405: .Lcal8:        ldr     r7, [r0], #4
        !           406:        sub     r2, r2, #4
        !           407:        strt    r7, [r1], #4
        !           408: .Lcal4:        ldr     r6, [r0], #4
        !           409:        sub     r2, r2, #4
        !           410:        strt    r6, [r1], #4
        !           411:
        !           412:        /*
        !           413:         * We start with > 0x40 bytes to copy (>= 0x60 got us into this
        !           414:         * part of the code, and we may have knocked that down by as much
        !           415:         * as 0x1c getting aligned).
        !           416:         *
        !           417:         * This loop basically works out to:
        !           418:         * do {
        !           419:         *      prefetch-next-cacheline(s)
        !           420:         *      bytes -= 0x20;
        !           421:         *      copy cacheline
        !           422:         * } while (bytes >= 0x40);
        !           423:         * bytes -= 0x20;
        !           424:         * copy cacheline
        !           425:         */
        !           426: .Lcaligned:
        !           427:        PREFETCH(r0, 32)
        !           428:        PREFETCH(r1, 32)
        !           429:
        !           430:        sub     r2, r2, #0x20
        !           431:
        !           432:        /* Copy a cacheline */
        !           433:        ldmia   r0!, {r6-r11}
        !           434:        strt    r6, [r1], #4
        !           435:        strt    r7, [r1], #4
        !           436:        ldmia   r0!, {r6-r7}
        !           437:        strt    r8, [r1], #4
        !           438:        strt    r9, [r1], #4
        !           439:        strt    r10, [r1], #4
        !           440:        strt    r11, [r1], #4
        !           441:        strt    r6, [r1], #4
        !           442:        strt    r7, [r1], #4
        !           443:
        !           444:        cmp     r2, #0x40
        !           445:        bge     .Lcaligned
        !           446:
        !           447:        sub     r2, r2, #0x20
        !           448:
        !           449:        /* Copy a cacheline */
        !           450:        ldmia   r0!, {r6-r11}
        !           451:        strt    r6, [r1], #4
        !           452:        strt    r7, [r1], #4
        !           453:        ldmia   r0!, {r6-r7}
        !           454:        strt    r8, [r1], #4
        !           455:        strt    r9, [r1], #4
        !           456:        strt    r10, [r1], #4
        !           457:        strt    r11, [r1], #4
        !           458:        strt    r6, [r1], #4
        !           459:        strt    r7, [r1], #4
        !           460:
        !           461:        cmp     r2, #0x08
        !           462:        blt     .Lprecleanup
        !           463:
        !           464: .Lcleanup8:
        !           465:        ldmia   r0!, {r8-r9}
        !           466:        sub     r2, r2, #8
        !           467:        strt    r8, [r1], #4
        !           468:        strt    r9, [r1], #4
        !           469:        cmp     r2, #8
        !           470:        bge     .Lcleanup8
        !           471:
        !           472: .Lprecleanup:
        !           473:        /*
        !           474:         * If we're done, bail.
        !           475:         */
        !           476:        cmp     r2, #0
        !           477:        beq     .Lout
        !           478:
        !           479: .Lcleanup:
        !           480:        and     r6, r2, #0x3
        !           481:        ldr     pc, [pc, r6, lsl #2]
        !           482:        b       .Lcend
        !           483:        .word   .Lc4
        !           484:        .word   .Lc1
        !           485:        .word   .Lc2
        !           486:        .word   .Lc3
        !           487: .Lc4:  ldrb    r6, [r0], #1
        !           488:        sub     r2, r2, #1
        !           489:        strbt   r6, [r1], #1
        !           490: .Lc3:  ldrb    r7, [r0], #1
        !           491:        sub     r2, r2, #1
        !           492:        strbt   r7, [r1], #1
        !           493: .Lc2:  ldrb    r6, [r0], #1
        !           494:        sub     r2, r2, #1
        !           495:        strbt   r6, [r1], #1
        !           496: .Lc1:  ldrb    r7, [r0], #1
        !           497:        subs    r2, r2, #1
        !           498:        strbt   r7, [r1], #1
        !           499: .Lcend:
        !           500:        bne     .Lcleanup
        !           501:
        !           502: .Lout:
        !           503:        mov     r0, #0
        !           504:
        !           505:        str     r5, [r4, #PCB_ONFAULT]
        !           506:        RESTORE_REGS
        !           507:
        !           508:        mov     pc, lr
        !           509:
        !           510: /*
        !           511:  * r0 = kernel space source address
        !           512:  * r1 = kernel space destination address
        !           513:  * r2 = length
        !           514:  *
        !           515:  * Copies bytes from kernel space to kernel space, aborting on page fault
        !           516:  *
        !           517:  * Copy of copyout, but without the ldrt/strt instructions.
        !           518:  */
        !           519:
        !           520: ENTRY(kcopy)
        !           521:        /* Quick exit if length is zero */
        !           522:        teq     r2, #0
        !           523:        moveq   r0, #0
        !           524:        moveq   pc, lr
        !           525:
        !           526:        SAVE_REGS
        !           527: #ifdef MULTIPROCESSOR
        !           528:        /* XXX Probably not appropriate for non-Hydra SMPs */
        !           529:        stmfd   sp!, {r0-r2, r14}
        !           530:        bl      _C_LABEL(cpu_number)
        !           531:        ldr     r4, .Lcpu_info
        !           532:        ldr     r4, [r4, r0, lsl #2]
        !           533:        ldr     r4, [r4, #CI_CURPCB]
        !           534:        ldmfd   sp!, {r0-r2, r14}
        !           535: #else
        !           536:        ldr     r4, .Lcurpcb
        !           537:        ldr     r4, [r4]
        !           538: #endif
        !           539:
        !           540:        ldr     r5, [r4, #PCB_ONFAULT]
        !           541:        adr     r3, .Lcopyfault
        !           542:        str     r3, [r4, #PCB_ONFAULT]
        !           543:
        !           544:        PREFETCH(r0, 0)
        !           545:        PREFETCH(r1, 0)
        !           546:
        !           547:        /*
        !           548:         * If not too many bytes, take the slow path.
        !           549:         */
        !           550:        cmp     r2, #0x08
        !           551:        blt     .Lkcleanup
        !           552:
        !           553:        /*
        !           554:         * Align destination to word boundary.
        !           555:         */
        !           556:        and     r6, r1, #0x3
        !           557:        ldr     pc, [pc, r6, lsl #2]
        !           558:        b       .Lkalend
        !           559:        .word   .Lkalend
        !           560:        .word   .Lkal3
        !           561:        .word   .Lkal2
        !           562:        .word   .Lkal1
        !           563: .Lkal3:        ldrb    r6, [r0], #1
        !           564:        sub     r2, r2, #1
        !           565:        strb    r6, [r1], #1
        !           566: .Lkal2:        ldrb    r7, [r0], #1
        !           567:        sub     r2, r2, #1
        !           568:        strb    r7, [r1], #1
        !           569: .Lkal1:        ldrb    r6, [r0], #1
        !           570:        sub     r2, r2, #1
        !           571:        strb    r6, [r1], #1
        !           572: .Lkalend:
        !           573:
        !           574:        /*
        !           575:         * If few bytes left, finish slow.
        !           576:         */
        !           577:        cmp     r2, #0x08
        !           578:        blt     .Lkcleanup
        !           579:
        !           580:        /*
        !           581:         * If source is not aligned, finish slow.
        !           582:         */
        !           583:        ands    r3, r0, #0x03
        !           584:        bne     .Lkcleanup
        !           585:
        !           586:        cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
        !           587:        blt     .Lkcleanup8
        !           588:
        !           589:        /*
        !           590:         * Align source & destination to cacheline boundary.
        !           591:         */
        !           592:        and     r6, r1, #0x1f
        !           593:        ldr     pc, [pc, r6]
        !           594:        b       .Lkcaligned
        !           595:        .word   .Lkcaligned
        !           596:        .word   .Lkcal28
        !           597:        .word   .Lkcal24
        !           598:        .word   .Lkcal20
        !           599:        .word   .Lkcal16
        !           600:        .word   .Lkcal12
        !           601:        .word   .Lkcal8
        !           602:        .word   .Lkcal4
        !           603: .Lkcal28:ldr   r6, [r0], #4
        !           604:        sub     r2, r2, #4
        !           605:        str     r6, [r1], #4
        !           606: .Lkcal24:ldr   r7, [r0], #4
        !           607:        sub     r2, r2, #4
        !           608:        str     r7, [r1], #4
        !           609: .Lkcal20:ldr   r6, [r0], #4
        !           610:        sub     r2, r2, #4
        !           611:        str     r6, [r1], #4
        !           612: .Lkcal16:ldr   r7, [r0], #4
        !           613:        sub     r2, r2, #4
        !           614:        str     r7, [r1], #4
        !           615: .Lkcal12:ldr   r6, [r0], #4
        !           616:        sub     r2, r2, #4
        !           617:        str     r6, [r1], #4
        !           618: .Lkcal8:ldr    r7, [r0], #4
        !           619:        sub     r2, r2, #4
        !           620:        str     r7, [r1], #4
        !           621: .Lkcal4:ldr    r6, [r0], #4
        !           622:        sub     r2, r2, #4
        !           623:        str     r6, [r1], #4
        !           624:
        !           625:        /*
        !           626:         * We start with > 0x40 bytes to copy (>= 0x60 got us into this
        !           627:         * part of the code, and we may have knocked that down by as much
        !           628:         * as 0x1c getting aligned).
        !           629:         *
        !           630:         * This loop basically works out to:
        !           631:         * do {
        !           632:         *      prefetch-next-cacheline(s)
        !           633:         *      bytes -= 0x20;
        !           634:         *      copy cacheline
        !           635:         * } while (bytes >= 0x40);
        !           636:         * bytes -= 0x20;
        !           637:         * copy cacheline
        !           638:         */
        !           639: .Lkcaligned:
        !           640:        PREFETCH(r0, 32)
        !           641:        PREFETCH(r1, 32)
        !           642:
        !           643:        sub     r2, r2, #0x20
        !           644:
        !           645:        /* Copy a cacheline */
        !           646:        ldmia   r0!, {r6-r11}
        !           647:        stmia   r1!, {r6, r7}
        !           648:        ldmia   r0!, {r6, r7}
        !           649:        stmia   r1!, {r8-r11}
        !           650:        stmia   r1!, {r6, r7}
        !           651:
        !           652:        cmp     r2, #0x40
        !           653:        bge     .Lkcaligned
        !           654:
        !           655:        sub     r2, r2, #0x20
        !           656:
        !           657:        /* Copy a cacheline */
        !           658:        ldmia   r0!, {r6-r11}
        !           659:        stmia   r1!, {r6-r7}
        !           660:        ldmia   r0!, {r6-r7}
        !           661:        stmia   r1!, {r8-r11}
        !           662:        stmia   r1!, {r6-r7}
        !           663:
        !           664:        cmp     r2, #0x08
        !           665:        blt     .Lkprecleanup
        !           666:
        !           667: .Lkcleanup8:
        !           668:        ldmia   r0!, {r8-r9}
        !           669:        sub     r2, r2, #8
        !           670:        stmia   r1!, {r8-r9}
        !           671:        cmp     r2, #8
        !           672:        bge     .Lkcleanup8
        !           673:
        !           674: .Lkprecleanup:
        !           675:        /*
        !           676:         * If we're done, bail.
        !           677:         */
        !           678:        cmp     r2, #0
        !           679:        beq     .Lkout
        !           680:
        !           681: .Lkcleanup:
        !           682:        and     r6, r2, #0x3
        !           683:        ldr     pc, [pc, r6, lsl #2]
        !           684:        b       .Lkcend
        !           685:        .word   .Lkc4
        !           686:        .word   .Lkc1
        !           687:        .word   .Lkc2
        !           688:        .word   .Lkc3
        !           689: .Lkc4: ldrb    r6, [r0], #1
        !           690:        sub     r2, r2, #1
        !           691:        strb    r6, [r1], #1
        !           692: .Lkc3: ldrb    r7, [r0], #1
        !           693:        sub     r2, r2, #1
        !           694:        strb    r7, [r1], #1
        !           695: .Lkc2: ldrb    r6, [r0], #1
        !           696:        sub     r2, r2, #1
        !           697:        strb    r6, [r1], #1
        !           698: .Lkc1: ldrb    r7, [r0], #1
        !           699:        subs    r2, r2, #1
        !           700:        strb    r7, [r1], #1
        !           701: .Lkcend:
        !           702:        bne     .Lkcleanup
        !           703:
        !           704: .Lkout:
        !           705:        mov     r0, #0
        !           706:
        !           707:        str     r5, [r4, #PCB_ONFAULT]
        !           708:        RESTORE_REGS
        !           709:
        !           710:        mov     pc, lr
        !           711: #endif /* !__XSCALE__ */
        !           712:
        !           713: #ifdef __PROG32
        !           714: /*
        !           715:  * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
        !           716:  *
        !           717:  * Copies a single 8-bit value from src to dest, returning 0 on success,
        !           718:  * else EFAULT if a page fault occurred.
        !           719:  */
        !           720: ENTRY(badaddr_read_1)
        !           721: #ifdef MULTIPROCESSOR
        !           722:        /* XXX Probably not appropriate for non-Hydra SMPs */
        !           723:        stmfd   sp!, {r0-r1, r14}
        !           724:        bl      _C_LABEL(cpu_number)
        !           725:        ldr     r2, .Lcpu_info
        !           726:        ldr     r2, [r2, r0, lsl #2]
        !           727:        ldr     r2, [r2, #CI_CURPCB]
        !           728:        ldmfd   sp!, {r0-r1, r14}
        !           729: #else
        !           730:        ldr     r2, .Lcurpcb
        !           731:        ldr     r2, [r2]
        !           732: #endif
        !           733:        ldr     ip, [r2, #PCB_ONFAULT]
        !           734:        adr     r3, 1f
        !           735:        str     r3, [r2, #PCB_ONFAULT]
        !           736:        nop
        !           737:        nop
        !           738:        nop
        !           739:        ldrb    r3, [r0]
        !           740:        nop
        !           741:        nop
        !           742:        nop
        !           743:        strb    r3, [r1]
        !           744:        mov     r0, #0          /* No fault */
        !           745: 1:     str     ip, [r2, #PCB_ONFAULT]
        !           746:        mov     pc, lr
        !           747:
        !           748: /*
        !           749:  * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
        !           750:  *
        !           751:  * Copies a single 16-bit value from src to dest, returning 0 on success,
        !           752:  * else EFAULT if a page fault occurred.
        !           753:  */
        !           754: ENTRY(badaddr_read_2)
        !           755: #ifdef MULTIPROCESSOR
        !           756:        /* XXX Probably not appropriate for non-Hydra SMPs */
        !           757:        stmfd   sp!, {r0-r1, r14}
        !           758:        bl      _C_LABEL(cpu_number)
        !           759:        ldr     r2, .Lcpu_info
        !           760:        ldr     r2, [r2, r0, lsl #2]
        !           761:        ldr     r2, [r2, #CI_CURPCB]
        !           762:        ldmfd   sp!, {r0-r1, r14}
        !           763: #else
        !           764:        ldr     r2, .Lcurpcb
        !           765:        ldr     r2, [r2]
        !           766: #endif
        !           767:        ldr     ip, [r2, #PCB_ONFAULT]
        !           768:        adr     r3, 1f
        !           769:        str     r3, [r2, #PCB_ONFAULT]
        !           770:        nop
        !           771:        nop
        !           772:        nop
        !           773:        ldrh    r3, [r0]
        !           774:        nop
        !           775:        nop
        !           776:        nop
        !           777:        strh    r3, [r1]
        !           778:        mov     r0, #0          /* No fault */
        !           779: 1:     str     ip, [r2, #PCB_ONFAULT]
        !           780:        mov     pc, lr
        !           781:
        !           782: /*
        !           783:  * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
        !           784:  *
        !           785:  * Copies a single 32-bit value from src to dest, returning 0 on success,
        !           786:  * else EFAULT if a page fault occurred.
        !           787:  */
        !           788: ENTRY(badaddr_read_4)
        !           789: #ifdef MULTIPROCESSOR
        !           790:        /* XXX Probably not appropriate for non-Hydra SMPs */
        !           791:        stmfd   sp!, {r0-r1, r14}
        !           792:        bl      _C_LABEL(cpu_number)
        !           793:        ldr     r2, .Lcpu_info
        !           794:        ldr     r2, [r2, r0, lsl #2]
        !           795:        ldr     r2, [r2, #CI_CURPCB]
        !           796:        ldmfd   sp!, {r0-r1, r14}
        !           797: #else
        !           798:        ldr     r2, .Lcurpcb
        !           799:        ldr     r2, [r2]
        !           800: #endif
        !           801:        ldr     ip, [r2, #PCB_ONFAULT]
        !           802:        adr     r3, 1f
        !           803:        str     r3, [r2, #PCB_ONFAULT]
        !           804:        nop
        !           805:        nop
        !           806:        nop
        !           807:        ldr     r3, [r0]
        !           808:        nop
        !           809:        nop
        !           810:        nop
        !           811:        str     r3, [r1]
        !           812:        mov     r0, #0          /* No fault */
        !           813: 1:     str     ip, [r2, #PCB_ONFAULT]
        !           814:        mov     pc, lr
        !           815: #endif /* __PROG32 */

CVSweb