sys/arch/sparc/sparc/in_cksum.c - annotate

Return to in_cksum.c CVS log
Up to [local] / sys / arch / sparc / sparc
Annotation of sys/arch/sparc/sparc/in_cksum.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: in_cksum.c,v 1.11 2005/05/03 00:39:39 brad Exp $      */
        !             2: /*     $NetBSD: in_cksum.c,v 1.7 1996/10/05 23:44:34 mrg Exp $ */
        !             3:
        !             4: /*
        !             5:  * Copyright (c) 1995 Zubin Dittia.
        !             6:  * Copyright (c) 1995 Matthew R. Green.
        !             7:  * Copyright (c) 1994 Charles Hannum.
        !             8:  * Copyright (c) 1992, 1993
        !             9:  *     The Regents of the University of California.  All rights reserved.
        !            10:  *
        !            11:  * Redistribution and use in source and binary forms, with or without
        !            12:  * modification, are permitted provided that the following conditions
        !            13:  * are met:
        !            14:  * 1. Redistributions of source code must retain the above copyright
        !            15:  *    notice, this list of conditions and the following disclaimer.
        !            16:  * 2. Redistributions in binary form must reproduce the above copyright
        !            17:  *    notice, this list of conditions and the following disclaimer in the
        !            18:  *    documentation and/or other materials provided with the distribution.
        !            19:  * 3. Neither the name of the University nor the names of its contributors
        !            20:  *    may be used to endorse or promote products derived from this software
        !            21:  *    without specific prior written permission.
        !            22:  *
        !            23:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            24:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            25:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            26:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            27:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            28:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            29:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            30:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            31:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            32:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            33:  * SUCH DAMAGE.
        !            34:  *
        !            35:  *     @(#)in_cksum.c  8.1 (Berkeley) 6/11/93
        !            36:  */
        !            37:
        !            38: #include <sys/param.h>
        !            39: #include <sys/systm.h>
        !            40: #include <sys/mbuf.h>
        !            41: #include <sys/socketvar.h>
        !            42: #include <netinet/in.h>
        !            43: #include <netinet/in_systm.h>
        !            44: #include <netinet/ip.h>
        !            45: #include <netinet/ip_var.h>
        !            46:
        !            47: /*
        !            48:  * Checksum routine for Internet Protocol family headers.
        !            49:  *
        !            50:  * This routine is very heavily used in the network
        !            51:  * code and should be modified for each CPU to be as fast as possible.
        !            52:  *
        !            53:  * SPARC version.
        !            54:  */
        !            55:
        !            56: /*
        !            57:  * The checksum computation code here is significantly faster than its
        !            58:  * vanilla C counterpart (by significantly, I mean 2-3 times faster if
        !            59:  * the data is in cache, and 1.5-2 times faster if the data is not in
        !            60:  * cache).
        !            61:  * We optimize on three fronts:
        !            62:  *     1. By using the add-with-carry (addxcc) instruction, we can use
        !            63:  *        32-bit operations instead of 16-bit operations.
        !            64:  *     2. By unrolling the main loop to reduce branch overheads.
        !            65:  *     3. By doing a sequence of load,load,add,add,load,load,add,add,
        !            66:  *        we can avoid the extra stall cycle which is incurred if the
        !            67:  *        instruction immediately following a load tries to use the
        !            68:  *        target register of the load.
        !            69:  * Another possible optimization is to replace a pair of 32-bit loads
        !            70:  * with a single 64-bit load (ldd) instruction, but I found that although
        !            71:  * this improves performance somewhat on Sun4c machines, it actually
        !            72:  * reduces performance considerably on Sun4m machines (I don't know why).
        !            73:  * So I chose to leave it out.
        !            74:  *
        !            75:  * Zubin Dittia (zubin@dworkin.wustl.edu)
        !            76:  */
        !            77:
        !            78: #define Asm    __asm __volatile
        !            79: #define ADD64          Asm("   ld [%4+ 0],%1;   ld [%4+ 4],%2;         \
        !            80:                                addcc  %0,%1,%0; addxcc %0,%2,%0;       \
        !            81:                                ld [%4+ 8],%1;   ld [%4+12],%2;         \
        !            82:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !            83:                                ld [%4+16],%1;   ld [%4+20],%2;         \
        !            84:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !            85:                                ld [%4+24],%1;   ld [%4+28],%2;         \
        !            86:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !            87:                                ld [%4+32],%1;   ld [%4+36],%2;         \
        !            88:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !            89:                                ld [%4+40],%1;   ld [%4+44],%2;         \
        !            90:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !            91:                                ld [%4+48],%1;   ld [%4+52],%2;         \
        !            92:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !            93:                                ld [%4+56],%1;   ld [%4+60],%2;         \
        !            94:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !            95:                                addxcc %0,0,%0"                         \
        !            96:                                : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
        !            97:                                : "0" (sum), "r" (w))
        !            98: #define ADD32          Asm("   ld [%4+ 0],%1;   ld [%4+ 4],%2;         \
        !            99:                                addcc  %0,%1,%0; addxcc %0,%2,%0;       \
        !           100:                                ld [%4+ 8],%1;   ld [%4+12],%2;         \
        !           101:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !           102:                                ld [%4+16],%1;   ld [%4+20],%2;         \
        !           103:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !           104:                                ld [%4+24],%1;   ld [%4+28],%2;         \
        !           105:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !           106:                                addxcc %0,0,%0"                         \
        !           107:                                : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
        !           108:                                : "0" (sum), "r" (w))
        !           109: #define ADD16          Asm("   ld [%4+ 0],%1;   ld [%4+ 4],%2;         \
        !           110:                                addcc  %0,%1,%0; addxcc %0,%2,%0;       \
        !           111:                                ld [%4+ 8],%1;   ld [%4+12],%2;         \
        !           112:                                addxcc %0,%1,%0; addxcc %0,%2,%0;       \
        !           113:                                addxcc %0,0,%0"                         \
        !           114:                                : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
        !           115:                                : "0" (sum), "r" (w))
        !           116: #define ADD8           Asm("   ld [%4+ 0],%1;   ld [%4+ 4],%2;         \
        !           117:                                addcc  %0,%1,%0; addxcc %0,%2,%0;       \
        !           118:                                addxcc %0,0,%0"                         \
        !           119:                                : "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
        !           120:                                : "0" (sum), "r" (w))
        !           121: #define ADD4           Asm("   ld [%3+ 0],%1;                          \
        !           122:                                addcc  %0,%1,%0;                        \
        !           123:                                addxcc %0,0,%0"                         \
        !           124:                                : "=r" (sum), "=&r" (tmp1)              \
        !           125:                                : "0" (sum), "r" (w))
        !           126:
        !           127: #define REDUCE         {sum = (sum & 0xffff) + (sum >> 16);}
        !           128: #define ADDCARRY       {if (sum > 0xffff) sum -= 0xffff;}
        !           129: #define ROL            {sum = sum << 8;}       /* depends on recent REDUCE */
        !           130: #define ADDBYTE                {ROL; sum += *w; byte_swapped ^= 1;}
        !           131: #define ADDSHORT       {sum += *(u_short *)w;}
        !           132: #define ADVANCE(n)     {w += n; mlen -= n;}
        !           133:
        !           134: static __inline__ int
        !           135: in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
        !           136: {
        !           137:        u_char *w;
        !           138:        int mlen = 0;
        !           139:        int byte_swapped = 0;
        !           140:
        !           141:        /*
        !           142:         * Declare two temporary registers for use by the asm code.  We
        !           143:         * allow the compiler to pick which specific machine registers to
        !           144:         * use, instead of hard-coding this in the asm code above.
        !           145:         */
        !           146:        u_int tmp1, tmp2;
        !           147:
        !           148:        for (; m && len; m = m->m_next) {
        !           149:                if (m->m_len == 0)
        !           150:                        continue;
        !           151:                w = mtod(m, u_char *) + off;
        !           152:                mlen = m->m_len - off;
        !           153:                off = 0;
        !           154:                if (len < mlen)
        !           155:                        mlen = len;
        !           156:                len -= mlen;
        !           157:
        !           158:                /*
        !           159:                 * Ensure that we're aligned on a word boundary here so
        !           160:                 * that we can do 32 bit operations below.
        !           161:                 */
        !           162:                if ((3 & (long)w) != 0) {
        !           163:                        REDUCE;
        !           164:                        if ((1 & (long)w) != 0 && mlen >= 1) {
        !           165:                                ADDBYTE;
        !           166:                                ADVANCE(1);
        !           167:                        }
        !           168:                        if ((2 & (long)w) != 0 && mlen >= 2) {
        !           169:                                ADDSHORT;
        !           170:                                ADVANCE(2);
        !           171:                        }
        !           172:                }
        !           173:
        !           174:                /*
        !           175:                 * Do as many 32 bit operations as possible using the
        !           176:                 * 64/32/16/8/4 macro's above, using as many as possible of
        !           177:                 * these.
        !           178:                 */
        !           179:                while (mlen >= 64) {
        !           180:                        ADD64;
        !           181:                        ADVANCE(64);
        !           182:                }
        !           183:                if (mlen >= 32) {
        !           184:                        ADD32;
        !           185:                        ADVANCE(32);
        !           186:                }
        !           187:                if (mlen >= 16) {
        !           188:                        ADD16;
        !           189:                        ADVANCE(16);
        !           190:                }
        !           191:                if (mlen >= 8) {
        !           192:                        ADD8;
        !           193:                        ADVANCE(8);
        !           194:                }
        !           195:                if (mlen >= 4) {
        !           196:                        ADD4;
        !           197:                        ADVANCE(4)
        !           198:                }
        !           199:                if (mlen == 0)
        !           200:                        continue;
        !           201:
        !           202:                REDUCE;
        !           203:                if (mlen >= 2) {
        !           204:                        ADDSHORT;
        !           205:                        ADVANCE(2);
        !           206:                }
        !           207:                if (mlen == 1) {
        !           208:                        ADDBYTE;
        !           209:                }
        !           210:        }
        !           211:        if (byte_swapped) {
        !           212:                REDUCE;
        !           213:                ROL;
        !           214:        }
        !           215:        REDUCE;
        !           216:        ADDCARRY;
        !           217:
        !           218:        return (0xffff ^ sum);
        !           219: }
        !           220:
        !           221: int
        !           222: in_cksum(struct mbuf *m, int len)
        !           223: {
        !           224:
        !           225:        return (in_cksum_internal(m, 0, len, 0));
        !           226: }
        !           227:
        !           228: int
        !           229: in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
        !           230: {
        !           231:        u_char *w;
        !           232:        u_int sum = 0;
        !           233:        struct ipovly ipov;
        !           234:
        !           235:        /*
        !           236:         * Declare two temporary registers for use by the asm code.  We
        !           237:         * allow the compiler to pick which specific machine registers to
        !           238:         * use, instead of hard-coding this in the asm code above.
        !           239:         */
        !           240:        u_int tmp1, tmp2;
        !           241:
        !           242:        if (nxt != 0) {
        !           243:                /* pseudo header */
        !           244:                memset(&ipov, 0, sizeof(ipov));
        !           245:                ipov.ih_len = htons(len);
        !           246:                ipov.ih_pr = nxt;
        !           247:                ipov.ih_src = mtod(m, struct ip *)->ip_src;
        !           248:                ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
        !           249:                w = (u_char *)&ipov;
        !           250:                /* assumes sizeof(ipov) == 20 */
        !           251:                ADD16;
        !           252:                w += 16;
        !           253:                ADD4;
        !           254:        }
        !           255:
        !           256:        /* skip unnecessary part */
        !           257:        while (m && off > 0) {
        !           258:                if (m->m_len > off)
        !           259:                        break;
        !           260:                off -= m->m_len;
        !           261:                m = m->m_next;
        !           262:        }
        !           263:
        !           264:        return (in_cksum_internal(m, off, len, sum));
        !           265: }
CVSweb