sys/arch/sparc/sparc/in_cksum.c - annotate

Return to in_cksum.c CVS log
Up to [local] / sys / arch / sparc / sparc
Annotation of sys/arch/sparc/sparc/in_cksum.c, Revision 1.1.1.1

1.1       nbrk        1: /*     $OpenBSD: in_cksum.c,v 1.11 2005/05/03 00:39:39 brad Exp $      */
                      2: /*     $NetBSD: in_cksum.c,v 1.7 1996/10/05 23:44:34 mrg Exp $ */
                      3:
                      4: /*
                      5:  * Copyright (c) 1995 Zubin Dittia.
                      6:  * Copyright (c) 1995 Matthew R. Green.
                      7:  * Copyright (c) 1994 Charles Hannum.
                      8:  * Copyright (c) 1992, 1993
                      9:  *     The Regents of the University of California.  All rights reserved.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. Neither the name of the University nor the names of its contributors
                     20:  *    may be used to endorse or promote products derived from this software
                     21:  *    without specific prior written permission.
                     22:  *
                     23:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     24:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     25:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     26:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     27:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     28:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     29:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     30:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     31:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     32:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     33:  * SUCH DAMAGE.
                     34:  *
                     35:  *     @(#)in_cksum.c  8.1 (Berkeley) 6/11/93
                     36:  */
                     37:
                     38: #include <sys/param.h>
                     39: #include <sys/systm.h>
                     40: #include <sys/mbuf.h>
                     41: #include <sys/socketvar.h>
                     42: #include <netinet/in.h>
                     43: #include <netinet/in_systm.h>
                     44: #include <netinet/ip.h>
                     45: #include <netinet/ip_var.h>
                     46:
                     47: /*
                     48:  * Checksum routine for Internet Protocol family headers.
                     49:  *
                     50:  * This routine is very heavily used in the network
                     51:  * code and should be modified for each CPU to be as fast as possible.
                     52:  *
                     53:  * SPARC version.
                     54:  */
                     55:
                     56: /*
                     57:  * The checksum computation code here is significantly faster than its
                     58:  * vanilla C counterpart (by significantly, I mean 2-3 times faster if
                     59:  * the data is in cache, and 1.5-2 times faster if the data is not in
                     60:  * cache).
                     61:  * We optimize on three fronts:
                     62:  *     1. By using the add-with-carry (addxcc) instruction, we can use
                     63:  *        32-bit operations instead of 16-bit operations.
                     64:  *     2. By unrolling the main loop to reduce branch overheads.
                     65:  *     3. By doing a sequence of load,load,add,add,load,load,add,add,
                     66:  *        we can avoid the extra stall cycle which is incurred if the
                     67:  *        instruction immediately following a load tries to use the
                     68:  *        target register of the load.
                     69:  * Another possible optimization is to replace a pair of 32-bit loads
                     70:  * with a single 64-bit load (ldd) instruction, but I found that although
                     71:  * this improves performance somewhat on Sun4c machines, it actually
                     72:  * reduces performance considerably on Sun4m machines (I don't know why).
                     73:  * So I chose to leave it out.
                     74:  *
                     75:  * Zubin Dittia (zubin@dworkin.wustl.edu)
                     76:  */
                     77:
                     78: #define Asm    __asm __volatile
                     79: #define ADD64          Asm("   ld [%4+ 0],%1;   ld [%4+ 4],%2;         \
                     80:                                addcc  %0,%1,%0; addxcc %0,%2,%0;       \
                     81:                                ld [%4+ 8],%1;   ld [%4+12],%2;         \
                     82:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                     83:                                ld [%4+16],%1;   ld [%4+20],%2;         \
                     84:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                     85:                                ld [%4+24],%1;   ld [%4+28],%2;         \
                     86:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                     87:                                ld [%4+32],%1;   ld [%4+36],%2;         \
                     88:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                     89:                                ld [%4+40],%1;   ld [%4+44],%2;         \
                     90:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                     91:                                ld [%4+48],%1;   ld [%4+52],%2;         \
                     92:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                     93:                                ld [%4+56],%1;   ld [%4+60],%2;         \
                     94:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                     95:                                addxcc %0,0,%0"                         \
                     96:                                : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
                     97:                                : "0" (sum), "r" (w))
                     98: #define ADD32          Asm("   ld [%4+ 0],%1;   ld [%4+ 4],%2;         \
                     99:                                addcc  %0,%1,%0; addxcc %0,%2,%0;       \
                    100:                                ld [%4+ 8],%1;   ld [%4+12],%2;         \
                    101:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                    102:                                ld [%4+16],%1;   ld [%4+20],%2;         \
                    103:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                    104:                                ld [%4+24],%1;   ld [%4+28],%2;         \
                    105:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                    106:                                addxcc %0,0,%0"                         \
                    107:                                : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
                    108:                                : "0" (sum), "r" (w))
                    109: #define ADD16          Asm("   ld [%4+ 0],%1;   ld [%4+ 4],%2;         \
                    110:                                addcc  %0,%1,%0; addxcc %0,%2,%0;       \
                    111:                                ld [%4+ 8],%1;   ld [%4+12],%2;         \
                    112:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
                    113:                                addxcc %0,0,%0"                         \
                    114:                                : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
                    115:                                : "0" (sum), "r" (w))
                    116: #define ADD8           Asm("   ld [%4+ 0],%1;   ld [%4+ 4],%2;         \
                    117:                                addcc  %0,%1,%0; addxcc %0,%2,%0;       \
                    118:                                addxcc %0,0,%0"                         \
                    119:                                : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
                    120:                                : "0" (sum), "r" (w))
                    121: #define ADD4           Asm("   ld [%3+ 0],%1;                          \
                    122:                                addcc  %0,%1,%0;                        \
                    123:                                addxcc %0,0,%0"                         \
                    124:                                : "=r" (sum), "=&r" (tmp1)              \
                    125:                                : "0" (sum), "r" (w))
                    126:
                    127: #define REDUCE         {sum = (sum & 0xffff) + (sum >> 16);}
                    128: #define ADDCARRY       {if (sum > 0xffff) sum -= 0xffff;}
                    129: #define ROL            {sum = sum << 8;}       /* depends on recent REDUCE */
                    130: #define ADDBYTE                {ROL; sum += *w; byte_swapped ^= 1;}
                    131: #define ADDSHORT       {sum += *(u_short *)w;}
                    132: #define ADVANCE(n)     {w += n; mlen -= n;}
                    133:
                    134: static __inline__ int
                    135: in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
                    136: {
                    137:        u_char *w;
                    138:        int mlen = 0;
                    139:        int byte_swapped = 0;
                    140:
                    141:        /*
                    142:         * Declare two temporary registers for use by the asm code.  We
                    143:         * allow the compiler to pick which specific machine registers to
                    144:         * use, instead of hard-coding this in the asm code above.
                    145:         */
                    146:        u_int tmp1, tmp2;
                    147:
                    148:        for (; m && len; m = m->m_next) {
                    149:                if (m->m_len == 0)
                    150:                        continue;
                    151:                w = mtod(m, u_char *) + off;
                    152:                mlen = m->m_len - off;
                    153:                off = 0;
                    154:                if (len < mlen)
                    155:                        mlen = len;
                    156:                len -= mlen;
                    157:
                    158:                /*
                    159:                 * Ensure that we're aligned on a word boundary here so
                    160:                 * that we can do 32 bit operations below.
                    161:                 */
                    162:                if ((3 & (long)w) != 0) {
                    163:                        REDUCE;
                    164:                        if ((1 & (long)w) != 0 && mlen >= 1) {
                    165:                                ADDBYTE;
                    166:                                ADVANCE(1);
                    167:                        }
                    168:                        if ((2 & (long)w) != 0 && mlen >= 2) {
                    169:                                ADDSHORT;
                    170:                                ADVANCE(2);
                    171:                        }
                    172:                }
                    173:
                    174:                /*
                    175:                 * Do as many 32 bit operations as possible using the
                    176:                 * 64/32/16/8/4 macro's above, using as many as possible of
                    177:                 * these.
                    178:                 */
                    179:                while (mlen >= 64) {
                    180:                        ADD64;
                    181:                        ADVANCE(64);
                    182:                }
                    183:                if (mlen >= 32) {
                    184:                        ADD32;
                    185:                        ADVANCE(32);
                    186:                }
                    187:                if (mlen >= 16) {
                    188:                        ADD16;
                    189:                        ADVANCE(16);
                    190:                }
                    191:                if (mlen >= 8) {
                    192:                        ADD8;
                    193:                        ADVANCE(8);
                    194:                }
                    195:                if (mlen >= 4) {
                    196:                        ADD4;
                    197:                        ADVANCE(4)
                    198:                }
                    199:                if (mlen == 0)
                    200:                        continue;
                    201:
                    202:                REDUCE;
                    203:                if (mlen >= 2) {
                    204:                        ADDSHORT;
                    205:                        ADVANCE(2);
                    206:                }
                    207:                if (mlen == 1) {
                    208:                        ADDBYTE;
                    209:                }
                    210:        }
                    211:        if (byte_swapped) {
                    212:                REDUCE;
                    213:                ROL;
                    214:        }
                    215:        REDUCE;
                    216:        ADDCARRY;
                    217:
                    218:        return (0xffff ^ sum);
                    219: }
                    220:
                    221: int
                    222: in_cksum(struct mbuf *m, int len)
                    223: {
                    224:
                    225:        return (in_cksum_internal(m, 0, len, 0));
                    226: }
                    227:
                    228: int
                    229: in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
                    230: {
                    231:        u_char *w;
                    232:        u_int sum = 0;
                    233:        struct ipovly ipov;
                    234:
                    235:        /*
                    236:         * Declare two temporary registers for use by the asm code.  We
                    237:         * allow the compiler to pick which specific machine registers to
                    238:         * use, instead of hard-coding this in the asm code above.
                    239:         */
                    240:        u_int tmp1, tmp2;
                    241:
                    242:        if (nxt != 0) {
                    243:                /* pseudo header */
                    244:                memset(&ipov, 0, sizeof(ipov));
                    245:                ipov.ih_len = htons(len);
                    246:                ipov.ih_pr = nxt;
                    247:                ipov.ih_src = mtod(m, struct ip *)->ip_src;
                    248:                ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
                    249:                w = (u_char *)&ipov;
                    250:                /* assumes sizeof(ipov) == 20 */
                    251:                ADD16;
                    252:                w += 16;
                    253:                ADD4;
                    254:        }
                    255:
                    256:        /* skip unnecessary part */
                    257:        while (m && off > 0) {
                    258:                if (m->m_len > off)
                    259:                        break;
                    260:                off -= m->m_len;
                    261:                m = m->m_next;
                    262:        }
                    263:
                    264:        return (in_cksum_internal(m, off, len, sum));
                    265: }
CVSweb