sys/arch/arm/arm/bcopyinout.S - annotate

Return to bcopyinout.S CVS log
Up to [local] / sys / arch / arm / arm
Annotation of sys/arch/arm/arm/bcopyinout.S, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: bcopyinout.S,v 1.1 2004/02/01 05:09:48 drahn Exp $    */
                      2: /*     $NetBSD: bcopyinout.S,v 1.13 2003/10/31 16:54:05 scw Exp $      */
                      3:
                      4: /*
                      5:  * Copyright (c) 2002 Wasabi Systems, Inc.
                      6:  * All rights reserved.
                      7:  *
                      8:  * Written by Allen Briggs for Wasabi Systems, Inc.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  * 3. All advertising materials mentioning features or use of this software
                     19:  *    must display the following acknowledgement:
                     20:  *      This product includes software developed for the NetBSD Project by
                     21:  *      Wasabi Systems, Inc.
                     22:  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
                     23:  *    or promote products derived from this software without specific prior
                     24:  *    written permission.
                     25:  *
                     26:  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
                     27:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     28:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     29:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
                     30:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     31:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     32:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     33:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     34:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     35:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     36:  * POSSIBILITY OF SUCH DAMAGE.
                     37:  */
                     38:
                     39: #include "assym.h"
                     40:
                     41: #include <machine/asm.h>
                     42:
                     43: #ifdef __XSCALE__
                     44: #include "bcopyinout_xscale.S"
                     45: #else
                     46:
                     47:        .text
                     48:        .align  0
                     49:
                     50: #ifdef MULTIPROCESSOR
                     51: .Lcpu_info:
                     52:        .word   _C_LABEL(cpu_info)
                     53: #else
                     54: .Lcurpcb:
                     55:        .word _C_LABEL(curpcb)
                     56: #endif
                     57:
                     58: #ifdef __PROG32
                     59: #define SAVE_REGS      stmfd   sp!, {r4-r11}
                     60: #define RESTORE_REGS   ldmfd   sp!, {r4-r11}
                     61: #else
                     62: /* Need to save R14_svc because it'll get trampled if we take a page fault. */
                     63: #define SAVE_REGS      stmfd   sp!, {r4-r11, r14}
                     64: #define RESTORE_REGS   ldmfd   sp!, {r4-r11, r14}
                     65: #endif
                     66:
                     67: #if defined(__XSCALE__)
                     68: #define HELLOCPP #
                     69: #define PREFETCH(rx,o) pld     [ rx , HELLOCPP (o) ]
                     70: #else
                     71: #define PREFETCH(rx,o)
                     72: #endif
                     73:
                     74: /*
                     75:  * r0 = user space address
                     76:  * r1 = kernel space address
                     77:  * r2 = length
                     78:  *
                     79:  * Copies bytes from user space to kernel space
                     80:  *
                     81:  * We save/restore r4-r11:
                     82:  * r4-r11 are scratch
                     83:  */
                     84: ENTRY(copyin)
                     85:        /* Quick exit if length is zero */
                     86:        teq     r2, #0
                     87:        moveq   r0, #0
                     88:        moveq   pc, lr
                     89:
                     90:        SAVE_REGS
                     91: #ifdef MULTIPROCESSOR
                     92:        /* XXX Probably not appropriate for non-Hydra SMPs */
                     93:        stmfd   sp!, {r0-r2, r14}
                     94:        bl      _C_LABEL(cpu_number)
                     95:        ldr     r4, .Lcpu_info
                     96:        ldr     r4, [r4, r0, lsl #2]
                     97:        ldr     r4, [r4, #CI_CURPCB]
                     98:        ldmfd   sp!, {r0-r2, r14}
                     99: #else
                    100:        ldr     r4, .Lcurpcb
                    101:        ldr     r4, [r4]
                    102: #endif
                    103:
                    104:        ldr     r5, [r4, #PCB_ONFAULT]
                    105:        adr     r3, .Lcopyfault
                    106:        str     r3, [r4, #PCB_ONFAULT]
                    107:
                    108:        PREFETCH(r0, 0)
                    109:        PREFETCH(r1, 0)
                    110:
                    111:        /*
                    112:         * If not too many bytes, take the slow path.
                    113:         */
                    114:        cmp     r2, #0x08
                    115:        blt     .Licleanup
                    116:
                    117:        /*
                    118:         * Align destination to word boundary.
                    119:         */
                    120:        and     r6, r1, #0x3
                    121:        ldr     pc, [pc, r6, lsl #2]
                    122:        b       .Lialend
                    123:        .word   .Lialend
                    124:        .word   .Lial3
                    125:        .word   .Lial2
                    126:        .word   .Lial1
                    127: .Lial3:        ldrbt   r6, [r0], #1
                    128:        sub     r2, r2, #1
                    129:        strb    r6, [r1], #1
                    130: .Lial2:        ldrbt   r7, [r0], #1
                    131:        sub     r2, r2, #1
                    132:        strb    r7, [r1], #1
                    133: .Lial1:        ldrbt   r6, [r0], #1
                    134:        sub     r2, r2, #1
                    135:        strb    r6, [r1], #1
                    136: .Lialend:
                    137:
                    138:        /*
                    139:         * If few bytes left, finish slow.
                    140:         */
                    141:        cmp     r2, #0x08
                    142:        blt     .Licleanup
                    143:
                    144:        /*
                    145:         * If source is not aligned, finish slow.
                    146:         */
                    147:        ands    r3, r0, #0x03
                    148:        bne     .Licleanup
                    149:
                    150:        cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
                    151:        blt     .Licleanup8
                    152:
                    153:        /*
                    154:         * Align destination to cacheline boundary.
                    155:         * If source and destination are nicely aligned, this can be a big
                    156:         * win.  If not, it's still cheaper to copy in groups of 32 even if
                    157:         * we don't get the nice cacheline alignment.
                    158:         */
                    159:        and     r6, r1, #0x1f
                    160:        ldr     pc, [pc, r6]
                    161:        b       .Licaligned
                    162:        .word   .Licaligned
                    163:        .word   .Lical28
                    164:        .word   .Lical24
                    165:        .word   .Lical20
                    166:        .word   .Lical16
                    167:        .word   .Lical12
                    168:        .word   .Lical8
                    169:        .word   .Lical4
                    170: .Lical28:ldrt  r6, [r0], #4
                    171:        sub     r2, r2, #4
                    172:        str     r6, [r1], #4
                    173: .Lical24:ldrt  r7, [r0], #4
                    174:        sub     r2, r2, #4
                    175:        str     r7, [r1], #4
                    176: .Lical20:ldrt  r6, [r0], #4
                    177:        sub     r2, r2, #4
                    178:        str     r6, [r1], #4
                    179: .Lical16:ldrt  r7, [r0], #4
                    180:        sub     r2, r2, #4
                    181:        str     r7, [r1], #4
                    182: .Lical12:ldrt  r6, [r0], #4
                    183:        sub     r2, r2, #4
                    184:        str     r6, [r1], #4
                    185: .Lical8:ldrt   r7, [r0], #4
                    186:        sub     r2, r2, #4
                    187:        str     r7, [r1], #4
                    188: .Lical4:ldrt   r6, [r0], #4
                    189:        sub     r2, r2, #4
                    190:        str     r6, [r1], #4
                    191:
                    192:        /*
                    193:         * We start with > 0x40 bytes to copy (>= 0x60 got us into this
                    194:         * part of the code, and we may have knocked that down by as much
                    195:         * as 0x1c getting aligned).
                    196:         *
                    197:         * This loop basically works out to:
                    198:         * do {
                    199:         *      prefetch-next-cacheline(s)
                    200:         *      bytes -= 0x20;
                    201:         *      copy cacheline
                    202:         * } while (bytes >= 0x40);
                    203:         * bytes -= 0x20;
                    204:         * copy cacheline
                    205:         */
                    206: .Licaligned:
                    207:        PREFETCH(r0, 32)
                    208:        PREFETCH(r1, 32)
                    209:
                    210:        sub     r2, r2, #0x20
                    211:
                    212:        /* Copy a cacheline */
                    213:        ldrt    r10, [r0], #4
                    214:        ldrt    r11, [r0], #4
                    215:        ldrt    r6, [r0], #4
                    216:        ldrt    r7, [r0], #4
                    217:        ldrt    r8, [r0], #4
                    218:        ldrt    r9, [r0], #4
                    219:        stmia   r1!, {r10-r11}
                    220:        ldrt    r10, [r0], #4
                    221:        ldrt    r11, [r0], #4
                    222:        stmia   r1!, {r6-r11}
                    223:
                    224:        cmp     r2, #0x40
                    225:        bge     .Licaligned
                    226:
                    227:        sub     r2, r2, #0x20
                    228:
                    229:        /* Copy a cacheline */
                    230:        ldrt    r10, [r0], #4
                    231:        ldrt    r11, [r0], #4
                    232:        ldrt    r6, [r0], #4
                    233:        ldrt    r7, [r0], #4
                    234:        ldrt    r8, [r0], #4
                    235:        ldrt    r9, [r0], #4
                    236:        stmia   r1!, {r10-r11}
                    237:        ldrt    r10, [r0], #4
                    238:        ldrt    r11, [r0], #4
                    239:        stmia   r1!, {r6-r11}
                    240:
                    241:        cmp     r2, #0x08
                    242:        blt     .Liprecleanup
                    243:
                    244: .Licleanup8:
                    245:        ldrt    r8, [r0], #4
                    246:        ldrt    r9, [r0], #4
                    247:        sub     r2, r2, #8
                    248:        stmia   r1!, {r8, r9}
                    249:        cmp     r2, #8
                    250:        bge     .Licleanup8
                    251:
                    252: .Liprecleanup:
                    253:        /*
                    254:         * If we're done, bail.
                    255:         */
                    256:        cmp     r2, #0
                    257:        beq     .Lout
                    258:
                    259: .Licleanup:
                    260:        and     r6, r2, #0x3
                    261:        ldr     pc, [pc, r6, lsl #2]
                    262:        b       .Licend
                    263:        .word   .Lic4
                    264:        .word   .Lic1
                    265:        .word   .Lic2
                    266:        .word   .Lic3
                    267: .Lic4: ldrbt   r6, [r0], #1
                    268:        sub     r2, r2, #1
                    269:        strb    r6, [r1], #1
                    270: .Lic3: ldrbt   r7, [r0], #1
                    271:        sub     r2, r2, #1
                    272:        strb    r7, [r1], #1
                    273: .Lic2: ldrbt   r6, [r0], #1
                    274:        sub     r2, r2, #1
                    275:        strb    r6, [r1], #1
                    276: .Lic1: ldrbt   r7, [r0], #1
                    277:        subs    r2, r2, #1
                    278:        strb    r7, [r1], #1
                    279: .Licend:
                    280:        bne     .Licleanup
                    281:
                    282: .Liout:
                    283:        mov     r0, #0
                    284:
                    285:        str     r5, [r4, #PCB_ONFAULT]
                    286:        RESTORE_REGS
                    287:
                    288:        mov     pc, lr
                    289:
                    290: .Lcopyfault:
                    291:        str     r5, [r4, #PCB_ONFAULT]
                    292:        RESTORE_REGS
                    293:
                    294:        mov     pc, lr
                    295:
                    296: /*
                    297:  * r0 = kernel space address
                    298:  * r1 = user space address
                    299:  * r2 = length
                    300:  *
                    301:  * Copies bytes from kernel space to user space
                    302:  *
                    303:  * We save/restore r4-r11:
                    304:  * r4-r11 are scratch
                    305:  */
                    306:
                    307: ENTRY(copyout)
                    308:        /* Quick exit if length is zero */
                    309:        teq     r2, #0
                    310:        moveq   r0, #0
                    311:        moveq   pc, lr
                    312:
                    313:        SAVE_REGS
                    314: #ifdef MULTIPROCESSOR
                    315:        /* XXX Probably not appropriate for non-Hydra SMPs */
                    316:        stmfd   sp!, {r0-r2, r14}
                    317:        bl      _C_LABEL(cpu_number)
                    318:        ldr     r4, .Lcpu_info
                    319:        ldr     r4, [r4, r0, lsl #2]
                    320:        ldr     r4, [r4, #CI_CURPCB]
                    321:        ldmfd   sp!, {r0-r2, r14}
                    322: #else
                    323:        ldr     r4, .Lcurpcb
                    324:        ldr     r4, [r4]
                    325: #endif
                    326:
                    327:        ldr     r5, [r4, #PCB_ONFAULT]
                    328:        adr     r3, .Lcopyfault
                    329:        str     r3, [r4, #PCB_ONFAULT]
                    330:
                    331:        PREFETCH(r0, 0)
                    332:        PREFETCH(r1, 0)
                    333:
                    334:        /*
                    335:         * If not too many bytes, take the slow path.
                    336:         */
                    337:        cmp     r2, #0x08
                    338:        blt     .Lcleanup
                    339:
                    340:        /*
                    341:         * Align destination to word boundary.
                    342:         */
                    343:        and     r6, r1, #0x3
                    344:        ldr     pc, [pc, r6, lsl #2]
                    345:        b       .Lalend
                    346:        .word   .Lalend
                    347:        .word   .Lal3
                    348:        .word   .Lal2
                    349:        .word   .Lal1
                    350: .Lal3: ldrb    r6, [r0], #1
                    351:        sub     r2, r2, #1
                    352:        strbt   r6, [r1], #1
                    353: .Lal2: ldrb    r7, [r0], #1
                    354:        sub     r2, r2, #1
                    355:        strbt   r7, [r1], #1
                    356: .Lal1: ldrb    r6, [r0], #1
                    357:        sub     r2, r2, #1
                    358:        strbt   r6, [r1], #1
                    359: .Lalend:
                    360:
                    361:        /*
                    362:         * If few bytes left, finish slow.
                    363:         */
                    364:        cmp     r2, #0x08
                    365:        blt     .Lcleanup
                    366:
                    367:        /*
                    368:         * If source is not aligned, finish slow.
                    369:         */
                    370:        ands    r3, r0, #0x03
                    371:        bne     .Lcleanup
                    372:
                    373:        cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
                    374:        blt     .Lcleanup8
                    375:
                    376:        /*
                    377:         * Align source & destination to cacheline boundary.
                    378:         */
                    379:        and     r6, r1, #0x1f
                    380:        ldr     pc, [pc, r6]
                    381:        b       .Lcaligned
                    382:        .word   .Lcaligned
                    383:        .word   .Lcal28
                    384:        .word   .Lcal24
                    385:        .word   .Lcal20
                    386:        .word   .Lcal16
                    387:        .word   .Lcal12
                    388:        .word   .Lcal8
                    389:        .word   .Lcal4
                    390: .Lcal28:ldr    r6, [r0], #4
                    391:        sub     r2, r2, #4
                    392:        strt    r6, [r1], #4
                    393: .Lcal24:ldr    r7, [r0], #4
                    394:        sub     r2, r2, #4
                    395:        strt    r7, [r1], #4
                    396: .Lcal20:ldr    r6, [r0], #4
                    397:        sub     r2, r2, #4
                    398:        strt    r6, [r1], #4
                    399: .Lcal16:ldr    r7, [r0], #4
                    400:        sub     r2, r2, #4
                    401:        strt    r7, [r1], #4
                    402: .Lcal12:ldr    r6, [r0], #4
                    403:        sub     r2, r2, #4
                    404:        strt    r6, [r1], #4
                    405: .Lcal8:        ldr     r7, [r0], #4
                    406:        sub     r2, r2, #4
                    407:        strt    r7, [r1], #4
                    408: .Lcal4:        ldr     r6, [r0], #4
                    409:        sub     r2, r2, #4
                    410:        strt    r6, [r1], #4
                    411:
                    412:        /*
                    413:         * We start with > 0x40 bytes to copy (>= 0x60 got us into this
                    414:         * part of the code, and we may have knocked that down by as much
                    415:         * as 0x1c getting aligned).
                    416:         *
                    417:         * This loop basically works out to:
                    418:         * do {
                    419:         *      prefetch-next-cacheline(s)
                    420:         *      bytes -= 0x20;
                    421:         *      copy cacheline
                    422:         * } while (bytes >= 0x40);
                    423:         * bytes -= 0x20;
                    424:         * copy cacheline
                    425:         */
                    426: .Lcaligned:
                    427:        PREFETCH(r0, 32)
                    428:        PREFETCH(r1, 32)
                    429:
                    430:        sub     r2, r2, #0x20
                    431:
                    432:        /* Copy a cacheline */
                    433:        ldmia   r0!, {r6-r11}
                    434:        strt    r6, [r1], #4
                    435:        strt    r7, [r1], #4
                    436:        ldmia   r0!, {r6-r7}
                    437:        strt    r8, [r1], #4
                    438:        strt    r9, [r1], #4
                    439:        strt    r10, [r1], #4
                    440:        strt    r11, [r1], #4
                    441:        strt    r6, [r1], #4
                    442:        strt    r7, [r1], #4
                    443:
                    444:        cmp     r2, #0x40
                    445:        bge     .Lcaligned
                    446:
                    447:        sub     r2, r2, #0x20
                    448:
                    449:        /* Copy a cacheline */
                    450:        ldmia   r0!, {r6-r11}
                    451:        strt    r6, [r1], #4
                    452:        strt    r7, [r1], #4
                    453:        ldmia   r0!, {r6-r7}
                    454:        strt    r8, [r1], #4
                    455:        strt    r9, [r1], #4
                    456:        strt    r10, [r1], #4
                    457:        strt    r11, [r1], #4
                    458:        strt    r6, [r1], #4
                    459:        strt    r7, [r1], #4
                    460:
                    461:        cmp     r2, #0x08
                    462:        blt     .Lprecleanup
                    463:
                    464: .Lcleanup8:
                    465:        ldmia   r0!, {r8-r9}
                    466:        sub     r2, r2, #8
                    467:        strt    r8, [r1], #4
                    468:        strt    r9, [r1], #4
                    469:        cmp     r2, #8
                    470:        bge     .Lcleanup8
                    471:
                    472: .Lprecleanup:
                    473:        /*
                    474:         * If we're done, bail.
                    475:         */
                    476:        cmp     r2, #0
                    477:        beq     .Lout
                    478:
                    479: .Lcleanup:
                    480:        and     r6, r2, #0x3
                    481:        ldr     pc, [pc, r6, lsl #2]
                    482:        b       .Lcend
                    483:        .word   .Lc4
                    484:        .word   .Lc1
                    485:        .word   .Lc2
                    486:        .word   .Lc3
                    487: .Lc4:  ldrb    r6, [r0], #1
                    488:        sub     r2, r2, #1
                    489:        strbt   r6, [r1], #1
                    490: .Lc3:  ldrb    r7, [r0], #1
                    491:        sub     r2, r2, #1
                    492:        strbt   r7, [r1], #1
                    493: .Lc2:  ldrb    r6, [r0], #1
                    494:        sub     r2, r2, #1
                    495:        strbt   r6, [r1], #1
                    496: .Lc1:  ldrb    r7, [r0], #1
                    497:        subs    r2, r2, #1
                    498:        strbt   r7, [r1], #1
                    499: .Lcend:
                    500:        bne     .Lcleanup
                    501:
                    502: .Lout:
                    503:        mov     r0, #0
                    504:
                    505:        str     r5, [r4, #PCB_ONFAULT]
                    506:        RESTORE_REGS
                    507:
                    508:        mov     pc, lr
                    509:
                    510: /*
                    511:  * r0 = kernel space source address
                    512:  * r1 = kernel space destination address
                    513:  * r2 = length
                    514:  *
                    515:  * Copies bytes from kernel space to kernel space, aborting on page fault
                    516:  *
                    517:  * Copy of copyout, but without the ldrt/strt instructions.
                    518:  */
                    519:
                    520: ENTRY(kcopy)
                    521:        /* Quick exit if length is zero */
                    522:        teq     r2, #0
                    523:        moveq   r0, #0
                    524:        moveq   pc, lr
                    525:
                    526:        SAVE_REGS
                    527: #ifdef MULTIPROCESSOR
                    528:        /* XXX Probably not appropriate for non-Hydra SMPs */
                    529:        stmfd   sp!, {r0-r2, r14}
                    530:        bl      _C_LABEL(cpu_number)
                    531:        ldr     r4, .Lcpu_info
                    532:        ldr     r4, [r4, r0, lsl #2]
                    533:        ldr     r4, [r4, #CI_CURPCB]
                    534:        ldmfd   sp!, {r0-r2, r14}
                    535: #else
                    536:        ldr     r4, .Lcurpcb
                    537:        ldr     r4, [r4]
                    538: #endif
                    539:
                    540:        ldr     r5, [r4, #PCB_ONFAULT]
                    541:        adr     r3, .Lcopyfault
                    542:        str     r3, [r4, #PCB_ONFAULT]
                    543:
                    544:        PREFETCH(r0, 0)
                    545:        PREFETCH(r1, 0)
                    546:
                    547:        /*
                    548:         * If not too many bytes, take the slow path.
                    549:         */
                    550:        cmp     r2, #0x08
                    551:        blt     .Lkcleanup
                    552:
                    553:        /*
                    554:         * Align destination to word boundary.
                    555:         */
                    556:        and     r6, r1, #0x3
                    557:        ldr     pc, [pc, r6, lsl #2]
                    558:        b       .Lkalend
                    559:        .word   .Lkalend
                    560:        .word   .Lkal3
                    561:        .word   .Lkal2
                    562:        .word   .Lkal1
                    563: .Lkal3:        ldrb    r6, [r0], #1
                    564:        sub     r2, r2, #1
                    565:        strb    r6, [r1], #1
                    566: .Lkal2:        ldrb    r7, [r0], #1
                    567:        sub     r2, r2, #1
                    568:        strb    r7, [r1], #1
                    569: .Lkal1:        ldrb    r6, [r0], #1
                    570:        sub     r2, r2, #1
                    571:        strb    r6, [r1], #1
                    572: .Lkalend:
                    573:
                    574:        /*
                    575:         * If few bytes left, finish slow.
                    576:         */
                    577:        cmp     r2, #0x08
                    578:        blt     .Lkcleanup
                    579:
                    580:        /*
                    581:         * If source is not aligned, finish slow.
                    582:         */
                    583:        ands    r3, r0, #0x03
                    584:        bne     .Lkcleanup
                    585:
                    586:        cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
                    587:        blt     .Lkcleanup8
                    588:
                    589:        /*
                    590:         * Align source & destination to cacheline boundary.
                    591:         */
                    592:        and     r6, r1, #0x1f
                    593:        ldr     pc, [pc, r6]
                    594:        b       .Lkcaligned
                    595:        .word   .Lkcaligned
                    596:        .word   .Lkcal28
                    597:        .word   .Lkcal24
                    598:        .word   .Lkcal20
                    599:        .word   .Lkcal16
                    600:        .word   .Lkcal12
                    601:        .word   .Lkcal8
                    602:        .word   .Lkcal4
                    603: .Lkcal28:ldr   r6, [r0], #4
                    604:        sub     r2, r2, #4
                    605:        str     r6, [r1], #4
                    606: .Lkcal24:ldr   r7, [r0], #4
                    607:        sub     r2, r2, #4
                    608:        str     r7, [r1], #4
                    609: .Lkcal20:ldr   r6, [r0], #4
                    610:        sub     r2, r2, #4
                    611:        str     r6, [r1], #4
                    612: .Lkcal16:ldr   r7, [r0], #4
                    613:        sub     r2, r2, #4
                    614:        str     r7, [r1], #4
                    615: .Lkcal12:ldr   r6, [r0], #4
                    616:        sub     r2, r2, #4
                    617:        str     r6, [r1], #4
                    618: .Lkcal8:ldr    r7, [r0], #4
                    619:        sub     r2, r2, #4
                    620:        str     r7, [r1], #4
                    621: .Lkcal4:ldr    r6, [r0], #4
                    622:        sub     r2, r2, #4
                    623:        str     r6, [r1], #4
                    624:
                    625:        /*
                    626:         * We start with > 0x40 bytes to copy (>= 0x60 got us into this
                    627:         * part of the code, and we may have knocked that down by as much
                    628:         * as 0x1c getting aligned).
                    629:         *
                    630:         * This loop basically works out to:
                    631:         * do {
                    632:         *      prefetch-next-cacheline(s)
                    633:         *      bytes -= 0x20;
                    634:         *      copy cacheline
                    635:         * } while (bytes >= 0x40);
                    636:         * bytes -= 0x20;
                    637:         * copy cacheline
                    638:         */
                    639: .Lkcaligned:
                    640:        PREFETCH(r0, 32)
                    641:        PREFETCH(r1, 32)
                    642:
                    643:        sub     r2, r2, #0x20
                    644:
                    645:        /* Copy a cacheline */
                    646:        ldmia   r0!, {r6-r11}
                    647:        stmia   r1!, {r6, r7}
                    648:        ldmia   r0!, {r6, r7}
                    649:        stmia   r1!, {r8-r11}
                    650:        stmia   r1!, {r6, r7}
                    651:
                    652:        cmp     r2, #0x40
                    653:        bge     .Lkcaligned
                    654:
                    655:        sub     r2, r2, #0x20
                    656:
                    657:        /* Copy a cacheline */
                    658:        ldmia   r0!, {r6-r11}
                    659:        stmia   r1!, {r6-r7}
                    660:        ldmia   r0!, {r6-r7}
                    661:        stmia   r1!, {r8-r11}
                    662:        stmia   r1!, {r6-r7}
                    663:
                    664:        cmp     r2, #0x08
                    665:        blt     .Lkprecleanup
                    666:
                    667: .Lkcleanup8:
                    668:        ldmia   r0!, {r8-r9}
                    669:        sub     r2, r2, #8
                    670:        stmia   r1!, {r8-r9}
                    671:        cmp     r2, #8
                    672:        bge     .Lkcleanup8
                    673:
                    674: .Lkprecleanup:
                    675:        /*
                    676:         * If we're done, bail.
                    677:         */
                    678:        cmp     r2, #0
                    679:        beq     .Lkout
                    680:
                    681: .Lkcleanup:
                    682:        and     r6, r2, #0x3
                    683:        ldr     pc, [pc, r6, lsl #2]
                    684:        b       .Lkcend
                    685:        .word   .Lkc4
                    686:        .word   .Lkc1
                    687:        .word   .Lkc2
                    688:        .word   .Lkc3
                    689: .Lkc4: ldrb    r6, [r0], #1
                    690:        sub     r2, r2, #1
                    691:        strb    r6, [r1], #1
                    692: .Lkc3: ldrb    r7, [r0], #1
                    693:        sub     r2, r2, #1
                    694:        strb    r7, [r1], #1
                    695: .Lkc2: ldrb    r6, [r0], #1
                    696:        sub     r2, r2, #1
                    697:        strb    r6, [r1], #1
                    698: .Lkc1: ldrb    r7, [r0], #1
                    699:        subs    r2, r2, #1
                    700:        strb    r7, [r1], #1
                    701: .Lkcend:
                    702:        bne     .Lkcleanup
                    703:
                    704: .Lkout:
                    705:        mov     r0, #0
                    706:
                    707:        str     r5, [r4, #PCB_ONFAULT]
                    708:        RESTORE_REGS
                    709:
                    710:        mov     pc, lr
                    711: #endif /* !__XSCALE__ */
                    712:
                    713: #ifdef __PROG32
                    714: /*
                    715:  * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
                    716:  *
                    717:  * Copies a single 8-bit value from src to dest, returning 0 on success,
                    718:  * else EFAULT if a page fault occurred.
                    719:  */
                    720: ENTRY(badaddr_read_1)
                    721: #ifdef MULTIPROCESSOR
                    722:        /* XXX Probably not appropriate for non-Hydra SMPs */
                    723:        stmfd   sp!, {r0-r1, r14}
                    724:        bl      _C_LABEL(cpu_number)
                    725:        ldr     r2, .Lcpu_info
                    726:        ldr     r2, [r2, r0, lsl #2]
                    727:        ldr     r2, [r2, #CI_CURPCB]
                    728:        ldmfd   sp!, {r0-r1, r14}
                    729: #else
                    730:        ldr     r2, .Lcurpcb
                    731:        ldr     r2, [r2]
                    732: #endif
                    733:        ldr     ip, [r2, #PCB_ONFAULT]
                    734:        adr     r3, 1f
                    735:        str     r3, [r2, #PCB_ONFAULT]
                    736:        nop
                    737:        nop
                    738:        nop
                    739:        ldrb    r3, [r0]
                    740:        nop
                    741:        nop
                    742:        nop
                    743:        strb    r3, [r1]
                    744:        mov     r0, #0          /* No fault */
                    745: 1:     str     ip, [r2, #PCB_ONFAULT]
                    746:        mov     pc, lr
                    747:
                    748: /*
                    749:  * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
                    750:  *
                    751:  * Copies a single 16-bit value from src to dest, returning 0 on success,
                    752:  * else EFAULT if a page fault occurred.
                    753:  */
                    754: ENTRY(badaddr_read_2)
                    755: #ifdef MULTIPROCESSOR
                    756:        /* XXX Probably not appropriate for non-Hydra SMPs */
                    757:        stmfd   sp!, {r0-r1, r14}
                    758:        bl      _C_LABEL(cpu_number)
                    759:        ldr     r2, .Lcpu_info
                    760:        ldr     r2, [r2, r0, lsl #2]
                    761:        ldr     r2, [r2, #CI_CURPCB]
                    762:        ldmfd   sp!, {r0-r1, r14}
                    763: #else
                    764:        ldr     r2, .Lcurpcb
                    765:        ldr     r2, [r2]
                    766: #endif
                    767:        ldr     ip, [r2, #PCB_ONFAULT]
                    768:        adr     r3, 1f
                    769:        str     r3, [r2, #PCB_ONFAULT]
                    770:        nop
                    771:        nop
                    772:        nop
                    773:        ldrh    r3, [r0]
                    774:        nop
                    775:        nop
                    776:        nop
                    777:        strh    r3, [r1]
                    778:        mov     r0, #0          /* No fault */
                    779: 1:     str     ip, [r2, #PCB_ONFAULT]
                    780:        mov     pc, lr
                    781:
                    782: /*
                    783:  * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
                    784:  *
                    785:  * Copies a single 32-bit value from src to dest, returning 0 on success,
                    786:  * else EFAULT if a page fault occurred.
                    787:  */
                    788: ENTRY(badaddr_read_4)
                    789: #ifdef MULTIPROCESSOR
                    790:        /* XXX Probably not appropriate for non-Hydra SMPs */
                    791:        stmfd   sp!, {r0-r1, r14}
                    792:        bl      _C_LABEL(cpu_number)
                    793:        ldr     r2, .Lcpu_info
                    794:        ldr     r2, [r2, r0, lsl #2]
                    795:        ldr     r2, [r2, #CI_CURPCB]
                    796:        ldmfd   sp!, {r0-r1, r14}
                    797: #else
                    798:        ldr     r2, .Lcurpcb
                    799:        ldr     r2, [r2]
                    800: #endif
                    801:        ldr     ip, [r2, #PCB_ONFAULT]
                    802:        adr     r3, 1f
                    803:        str     r3, [r2, #PCB_ONFAULT]
                    804:        nop
                    805:        nop
                    806:        nop
                    807:        ldr     r3, [r0]
                    808:        nop
                    809:        nop
                    810:        nop
                    811:        str     r3, [r1]
                    812:        mov     r0, #0          /* No fault */
                    813: 1:     str     ip, [r2, #PCB_ONFAULT]
                    814:        mov     pc, lr
                    815: #endif /* __PROG32 */
CVSweb