[BACK]Return to bcopy_page.S CVS log [TXT][DIR] Up to [local] / sys / arch / arm / arm

Annotation of sys/arch/arm/arm/bcopy_page.S, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: bcopy_page.S,v 1.1 2004/02/01 05:09:48 drahn Exp $    */
                      2: /*     $NetBSD: bcopy_page.S,v 1.7 2003/10/13 21:03:13 scw Exp $       */
                      3:
                      4:
                      5: /*
                      6:  * Copyright (c) 1995 Scott Stevens
                      7:  * All rights reserved.
                      8:  *
                      9:  * Redistribution and use in source and binary forms, with or without
                     10:  * modification, are permitted provided that the following conditions
                     11:  * are met:
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in the
                     16:  *    documentation and/or other materials provided with the distribution.
                     17:  * 3. All advertising materials mentioning features or use of this software
                     18:  *    must display the following acknowledgement:
                     19:  *     This product includes software developed by Scott Stevens.
                     20:  * 4. The name of the author may not be used to endorse or promote products
                     21:  *    derived from this software without specific prior written permission.
                     22:  *
                     23:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     24:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     25:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     26:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     27:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     28:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     29:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     30:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     31:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     32:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     33:  *
                     34:  * RiscBSD kernel project
                     35:  *
                     36:  * bcopy_page.S
                     37:  *
                     38:  * page optimised bcopy and bzero routines
                     39:  *
                     40:  * Created      : 08/04/95
                     41:  */
                     42:
                     43: #include <machine/asm.h>
                     44:
                     45: #include "assym.h"
                     46:
                     47: #ifndef __XSCALE__
                     48:
                     49: /* #define BIG_LOOPS */
                     50:
                     51: /*
                     52:  * bcopy_page(src, dest)
                     53:  *
                     54:  * Optimised copy page routine.
                     55:  *
                     56:  * On entry:
                     57:  *   r0 - src address
                     58:  *   r1 - dest address
                     59:  *
                     60:  * Requires:
                     61:  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
                     62:  *   otherwise.
                     63:  */
                     64:
                     65: #define        CHUNK_SIZE      32
                     66:
                     67: #define        PREFETCH_FIRST_CHUNK    /* nothing */
                     68: #define        PREFETCH_NEXT_CHUNK     /* nothing */
                     69:
                     70: #ifndef COPY_CHUNK
                     71: #define        COPY_CHUNK \
                     72:        PREFETCH_NEXT_CHUNK ; \
                     73:        ldmia   r0!, {r3-r8,ip,lr} ; \
                     74:        stmia   r1!, {r3-r8,ip,lr}
                     75: #endif /* ! COPY_CHUNK */
                     76:
                     77: #ifndef SAVE_REGS
                     78: #define        SAVE_REGS       stmfd   sp!, {r4-r8, lr}
                     79: #define        RESTORE_REGS    ldmfd   sp!, {r4-r8, pc}
                     80: #endif
                     81:
                     82: ENTRY(bcopy_page)
                     83:        PREFETCH_FIRST_CHUNK
                     84:        SAVE_REGS
                     85: #ifdef BIG_LOOPS
                     86:        mov     r2, #(PAGE_SIZE >> 9)
                     87: #else
                     88:        mov     r2, #(PAGE_SIZE >> 7)
                     89: #endif
                     90:
                     91: 1:
                     92:        COPY_CHUNK
                     93:        COPY_CHUNK
                     94:        COPY_CHUNK
                     95:        COPY_CHUNK
                     96:
                     97: #ifdef BIG_LOOPS
                     98:        /* There is little point making the loop any larger; unless we are
                     99:           running with the cache off, the load/store overheads will
                    100:           completely dominate this loop.  */
                    101:        COPY_CHUNK
                    102:        COPY_CHUNK
                    103:        COPY_CHUNK
                    104:        COPY_CHUNK
                    105:
                    106:        COPY_CHUNK
                    107:        COPY_CHUNK
                    108:        COPY_CHUNK
                    109:        COPY_CHUNK
                    110:
                    111:        COPY_CHUNK
                    112:        COPY_CHUNK
                    113:        COPY_CHUNK
                    114:        COPY_CHUNK
                    115: #endif
                    116:        subs    r2, r2, #1
                    117:        bne     1b
                    118:
                    119:        RESTORE_REGS            /* ...and return. */
                    120:
                    121: /*
                    122:  * bzero_page(dest)
                    123:  *
                    124:  * Optimised zero page routine.
                    125:  *
                    126:  * On entry:
                    127:  *   r0 - dest address
                    128:  *
                    129:  * Requires:
                    130:  *   number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
                    131:  *   otherwise
                    132:  */
                    133:
                    134: ENTRY(bzero_page)
                    135:        stmfd   sp!, {r4-r8, lr}
                    136: #ifdef BIG_LOOPS
                    137:        mov     r2, #(PAGE_SIZE >> 9)
                    138: #else
                    139:        mov     r2, #(PAGE_SIZE >> 7)
                    140: #endif
                    141:        mov     r3, #0
                    142:        mov     r4, #0
                    143:        mov     r5, #0
                    144:        mov     r6, #0
                    145:        mov     r7, #0
                    146:        mov     r8, #0
                    147:        mov     ip, #0
                    148:        mov     lr, #0
                    149:
                    150: 1:
                    151:        stmia   r0!, {r3-r8,ip,lr}
                    152:        stmia   r0!, {r3-r8,ip,lr}
                    153:        stmia   r0!, {r3-r8,ip,lr}
                    154:        stmia   r0!, {r3-r8,ip,lr}
                    155:
                    156: #ifdef BIG_LOOPS
                    157:        /* There is little point making the loop any larger; unless we are
                    158:           running with the cache off, the load/store overheads will
                    159:           completely dominate this loop.  */
                    160:        stmia   r0!, {r3-r8,ip,lr}
                    161:        stmia   r0!, {r3-r8,ip,lr}
                    162:        stmia   r0!, {r3-r8,ip,lr}
                    163:        stmia   r0!, {r3-r8,ip,lr}
                    164:
                    165:        stmia   r0!, {r3-r8,ip,lr}
                    166:        stmia   r0!, {r3-r8,ip,lr}
                    167:        stmia   r0!, {r3-r8,ip,lr}
                    168:        stmia   r0!, {r3-r8,ip,lr}
                    169:
                    170:        stmia   r0!, {r3-r8,ip,lr}
                    171:        stmia   r0!, {r3-r8,ip,lr}
                    172:        stmia   r0!, {r3-r8,ip,lr}
                    173:        stmia   r0!, {r3-r8,ip,lr}
                    174:
                    175: #endif
                    176:
                    177:        subs    r2, r2, #1
                    178:        bne     1b
                    179:
                    180:        ldmfd   sp!, {r4-r8, pc}
                    181:
                    182: #else  /* __XSCALE__ */
                    183:
                    184: /*
                    185:  * XSCALE version of bcopy_page
                    186:  */
                    187: ENTRY(bcopy_page)
                    188:        pld     [r0]
                    189:        stmfd   sp!, {r4, r5}
                    190:        mov     ip, #32
                    191:        ldr     r2, [r0], #0x04         /* 0x00 */
                    192:        ldr     r3, [r0], #0x04         /* 0x04 */
                    193: 1:     pld     [r0, #0x18]             /* Prefetch 0x20 */
                    194:        ldr     r4, [r0], #0x04         /* 0x08 */
                    195:        ldr     r5, [r0], #0x04         /* 0x0c */
                    196:        strd    r2, [r1], #0x08
                    197:        ldr     r2, [r0], #0x04         /* 0x10 */
                    198:        ldr     r3, [r0], #0x04         /* 0x14 */
                    199:        strd    r4, [r1], #0x08
                    200:        ldr     r4, [r0], #0x04         /* 0x18 */
                    201:        ldr     r5, [r0], #0x04         /* 0x1c */
                    202:        strd    r2, [r1], #0x08
                    203:        ldr     r2, [r0], #0x04         /* 0x20 */
                    204:        ldr     r3, [r0], #0x04         /* 0x24 */
                    205:        pld     [r0, #0x18]             /* Prefetch 0x40 */
                    206:        strd    r4, [r1], #0x08
                    207:        ldr     r4, [r0], #0x04         /* 0x28 */
                    208:        ldr     r5, [r0], #0x04         /* 0x2c */
                    209:        strd    r2, [r1], #0x08
                    210:        ldr     r2, [r0], #0x04         /* 0x30 */
                    211:        ldr     r3, [r0], #0x04         /* 0x34 */
                    212:        strd    r4, [r1], #0x08
                    213:        ldr     r4, [r0], #0x04         /* 0x38 */
                    214:        ldr     r5, [r0], #0x04         /* 0x3c */
                    215:        strd    r2, [r1], #0x08
                    216:        ldr     r2, [r0], #0x04         /* 0x40 */
                    217:        ldr     r3, [r0], #0x04         /* 0x44 */
                    218:        pld     [r0, #0x18]             /* Prefetch 0x60 */
                    219:        strd    r4, [r1], #0x08
                    220:        ldr     r4, [r0], #0x04         /* 0x48 */
                    221:        ldr     r5, [r0], #0x04         /* 0x4c */
                    222:        strd    r2, [r1], #0x08
                    223:        ldr     r2, [r0], #0x04         /* 0x50 */
                    224:        ldr     r3, [r0], #0x04         /* 0x54 */
                    225:        strd    r4, [r1], #0x08
                    226:        ldr     r4, [r0], #0x04         /* 0x58 */
                    227:        ldr     r5, [r0], #0x04         /* 0x5c */
                    228:        strd    r2, [r1], #0x08
                    229:        ldr     r2, [r0], #0x04         /* 0x60 */
                    230:        ldr     r3, [r0], #0x04         /* 0x64 */
                    231:        pld     [r0, #0x18]             /* Prefetch 0x80 */
                    232:        strd    r4, [r1], #0x08
                    233:        ldr     r4, [r0], #0x04         /* 0x68 */
                    234:        ldr     r5, [r0], #0x04         /* 0x6c */
                    235:        strd    r2, [r1], #0x08
                    236:        ldr     r2, [r0], #0x04         /* 0x70 */
                    237:        ldr     r3, [r0], #0x04         /* 0x74 */
                    238:        strd    r4, [r1], #0x08
                    239:        ldr     r4, [r0], #0x04         /* 0x78 */
                    240:        ldr     r5, [r0], #0x04         /* 0x7c */
                    241:        strd    r2, [r1], #0x08
                    242:        subs    ip, ip, #0x01
                    243:        ldrgt   r2, [r0], #0x04         /* 0x80 */
                    244:        ldrgt   r3, [r0], #0x04         /* 0x84 */
                    245:        strd    r4, [r1], #0x08
                    246:        bgt     1b
                    247:        ldmfd   sp!, {r4, r5}
                    248:        mov     pc, lr
                    249:
                    250: /*
                    251:  * XSCALE version of bzero_page
                    252:  */
                    253: ENTRY(bzero_page)
                    254:        mov     r1, #PAGE_SIZE
                    255:        mov     r2, #0
                    256:        mov     r3, #0
                    257: 1:     strd    r2, [r0], #8            /* 32 */
                    258:        strd    r2, [r0], #8
                    259:        strd    r2, [r0], #8
                    260:        strd    r2, [r0], #8
                    261:        strd    r2, [r0], #8            /* 64 */
                    262:        strd    r2, [r0], #8
                    263:        strd    r2, [r0], #8
                    264:        strd    r2, [r0], #8
                    265:        strd    r2, [r0], #8            /* 96 */
                    266:        strd    r2, [r0], #8
                    267:        strd    r2, [r0], #8
                    268:        strd    r2, [r0], #8
                    269:        strd    r2, [r0], #8            /* 128 */
                    270:        strd    r2, [r0], #8
                    271:        strd    r2, [r0], #8
                    272:        strd    r2, [r0], #8
                    273:        subs    r1, r1, #128
                    274:        bne     1b
                    275:        mov     pc, lr
                    276: #endif /* __XSCALE__ */

CVSweb