[BACK]Return to radix_mpath.c CVS log [TXT][DIR] Up to [local] / sys / net

Annotation of sys/net/radix_mpath.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: radix_mpath.c,v 1.7 2006/06/18 12:03:19 pascoe Exp $  */
        !             2: /*     $KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $    */
        !             3:
        !             4: /*
        !             5:  * Copyright (C) 2001 WIDE Project.
        !             6:  * All rights reserved.
        !             7:  *
        !             8:  * Redistribution and use in source and binary forms, with or without
        !             9:  * modification, are permitted provided that the following conditions
        !            10:  * are met:
        !            11:  * 1. Redistributions of source code must retain the above copyright
        !            12:  *    notice, this list of conditions and the following disclaimer.
        !            13:  * 2. Redistributions in binary form must reproduce the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer in the
        !            15:  *    documentation and/or other materials provided with the distribution.
        !            16:  * 3. Neither the name of the project nor the names of its contributors
        !            17:  *    may be used to endorse or promote products derived from this software
        !            18:  *    without specific prior written permission.
        !            19:  *
        !            20:  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
        !            21:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            22:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            23:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
        !            24:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            25:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            26:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            27:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            28:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            29:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            30:  * SUCH DAMAGE.
        !            31:  * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
        !            32:  * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
        !            33:  * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
        !            34:  * PROPERTIES.
        !            35:  */
        !            36:
        !            37: #include <sys/param.h>
        !            38: #include <sys/systm.h>
        !            39: #include <sys/malloc.h>
        !            40: #include <sys/socket.h>
        !            41: #define        M_DONTWAIT M_NOWAIT
        !            42: #include <sys/domain.h>
        !            43: #include <sys/syslog.h>
        !            44: #include <net/radix.h>
        !            45: #include <net/radix_mpath.h>
        !            46: #include <net/route.h>
        !            47: #include <dev/rndvar.h>
        !            48:
        !            49: #include <netinet/in.h>
        !            50:
        !            51: extern int ipmultipath;
        !            52: extern int ip6_multipath;
        !            53:
        !            54: u_int32_t rn_mpath_hash(struct route *, u_int32_t *);
        !            55:
        !            56: /*
        !            57:  * give some jitter to hash, to avoid synchronization between routers
        !            58:  */
        !            59: static u_int32_t hashjitter;
        !            60:
        !            61: int
        !            62: rn_mpath_capable(struct radix_node_head *rnh)
        !            63: {
        !            64:        return rnh->rnh_multipath;
        !            65: }
        !            66:
        !            67: struct radix_node *
        !            68: rn_mpath_next(struct radix_node *rn)
        !            69: {
        !            70:        struct radix_node *next;
        !            71:
        !            72:        if (!rn->rn_dupedkey)
        !            73:                return NULL;
        !            74:        next = rn->rn_dupedkey;
        !            75:        if (rn->rn_mask == next->rn_mask)
        !            76:                return next;
        !            77:        else
        !            78:                return NULL;
        !            79: }
        !            80:
        !            81: int
        !            82: rn_mpath_count(struct radix_node *rn)
        !            83: {
        !            84:        int i;
        !            85:
        !            86:        i = 1;
        !            87:        while ((rn = rn_mpath_next(rn)) != NULL)
        !            88:                i++;
        !            89:        return i;
        !            90: }
        !            91:
        !            92: struct rtentry *
        !            93: rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
        !            94: {
        !            95:        struct radix_node *rn;
        !            96:
        !            97:        if (!rn_mpath_next((struct radix_node *)rt))
        !            98:                return rt;
        !            99:
        !           100:        if (!gate)
        !           101:                return NULL;
        !           102:        /* beyond here, we use rn as the master copy */
        !           103:        rn = (struct radix_node *)rt;
        !           104:        do {
        !           105:                rt = (struct rtentry *)rn;
        !           106:                if (rt->rt_gateway->sa_len == gate->sa_len &&
        !           107:                    !memcmp(rt->rt_gateway, gate, gate->sa_len))
        !           108:                        break;
        !           109:        } while ((rn = rn_mpath_next(rn)) != NULL);
        !           110:        if (!rn)
        !           111:                return NULL;
        !           112:
        !           113:        return (struct rtentry *)rn;
        !           114: }
        !           115:
        !           116: /*
        !           117:  * check if we have the same key/mask/gateway on the table already.
        !           118:  */
        !           119: int
        !           120: rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
        !           121:                   struct sockaddr *netmask, int mpathok)
        !           122: {
        !           123:        struct radix_node *rn, *rn1;
        !           124:        struct rtentry *rt1;
        !           125:        char *p, *q, *eq;
        !           126:        int same, l, skip;
        !           127:
        !           128:        rn = (struct radix_node *)rt;
        !           129:        rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
        !           130:        if (!rn1 || rn1->rn_flags & RNF_ROOT)
        !           131:                return 0;
        !           132:
        !           133:        /*
        !           134:         * unlike other functions we have in this file, we have to check
        !           135:         * all key/mask/gateway as rnh_lookup can match less specific entry.
        !           136:         */
        !           137:        rt1 = (struct rtentry *)rn1;
        !           138:
        !           139:        /* compare key. */
        !           140:        if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
        !           141:            bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
        !           142:                goto different;
        !           143:
        !           144:        /* key was the same.  compare netmask.  hairy... */
        !           145:        if (rt_mask(rt1) && netmask) {
        !           146:                skip = rnh->rnh_treetop->rn_off;
        !           147:                if (rt_mask(rt1)->sa_len > netmask->sa_len) {
        !           148:                        /*
        !           149:                         * as rt_mask(rt1) is made optimal by radix.c,
        !           150:                         * there must be some 1-bits on rt_mask(rt1)
        !           151:                         * after netmask->sa_len.  therefore, in
        !           152:                         * this case, the entries are different.
        !           153:                         */
        !           154:                        if (rt_mask(rt1)->sa_len > skip)
        !           155:                                goto different;
        !           156:                        else {
        !           157:                                /* no bits to compare, i.e. same*/
        !           158:                                goto maskmatched;
        !           159:                        }
        !           160:                }
        !           161:
        !           162:                l = rt_mask(rt1)->sa_len;
        !           163:                if (skip > l) {
        !           164:                        /* no bits to compare, i.e. same */
        !           165:                        goto maskmatched;
        !           166:                }
        !           167:                p = (char *)rt_mask(rt1);
        !           168:                q = (char *)netmask;
        !           169:                if (bcmp(p + skip, q + skip, l - skip))
        !           170:                        goto different;
        !           171:                /*
        !           172:                 * need to go through all the bit, as netmask is not
        !           173:                 * optimal and can contain trailing 0s
        !           174:                 */
        !           175:                eq = (char *)netmask + netmask->sa_len;
        !           176:                q += l;
        !           177:                same = 1;
        !           178:                while (eq > q)
        !           179:                        if (*q++) {
        !           180:                                same = 0;
        !           181:                                break;
        !           182:                        }
        !           183:                if (!same)
        !           184:                        goto different;
        !           185:        } else if (!rt_mask(rt1) && !netmask)
        !           186:                ; /* no mask to compare, i.e. same */
        !           187:        else {
        !           188:                /* one has mask and the other does not, different */
        !           189:                goto different;
        !           190:        }
        !           191:
        !           192:  maskmatched:
        !           193:        if (!mpathok)
        !           194:                return EEXIST;
        !           195:
        !           196:        /* key/mask were the same.  compare gateway for all multipaths */
        !           197:        do {
        !           198:                rt1 = (struct rtentry *)rn1;
        !           199:
        !           200:                /* sanity: no use in comparing the same thing */
        !           201:                if (rn1 == rn)
        !           202:                        continue;
        !           203:
        !           204:                if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
        !           205:                    bcmp(rt1->rt_gateway, rt->rt_gateway,
        !           206:                    rt1->rt_gateway->sa_len))
        !           207:                        continue;
        !           208:
        !           209:                /* all key/mask/gateway are the same.  conflicting entry. */
        !           210:                return EEXIST;
        !           211:        } while ((rn1 = rn_mpath_next(rn1)) != NULL);
        !           212:
        !           213:  different:
        !           214:        return 0;
        !           215: }
        !           216:
        !           217: /*
        !           218:  * allocate a route, potentially using multipath to select the peer.
        !           219:  */
        !           220: void
        !           221: rtalloc_mpath(struct route *ro, u_int32_t *srcaddrp, u_int tableid)
        !           222: {
        !           223: #if defined(INET) || defined(INET6)
        !           224:        struct radix_node *rn;
        !           225:        int hash, npaths, threshold;
        !           226: #endif
        !           227:
        !           228:        /*
        !           229:         * return a cached entry if it is still valid, otherwise we increase
        !           230:         * the risk of disrupting local flows.
        !           231:         */
        !           232:        if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
        !           233:                return;
        !           234:        ro->ro_rt = rtalloc1(&ro->ro_dst, 1, tableid);
        !           235:
        !           236:        /* if the route does not exist or it is not multipath, don't care */
        !           237:        if (!ro->ro_rt || !(ro->ro_rt->rt_flags & RTF_MPATH))
        !           238:                return;
        !           239:
        !           240:        /* check if multipath routing is enabled for the specified protocol */
        !           241:        if (!(0
        !           242: #ifdef INET
        !           243:            || (ipmultipath && ro->ro_dst.sa_family == AF_INET)
        !           244: #endif
        !           245: #ifdef INET6
        !           246:            || (ip6_multipath && ro->ro_dst.sa_family == AF_INET6)
        !           247: #endif
        !           248:            ))
        !           249:                return;
        !           250:
        !           251: #if defined(INET) || defined(INET6)
        !           252:        /* gw selection by Hash-Threshold (RFC 2992) */
        !           253:        rn = (struct radix_node *)ro->ro_rt;
        !           254:        npaths = rn_mpath_count(rn);
        !           255:        hash = rn_mpath_hash(ro, srcaddrp) & 0xffff;
        !           256:        threshold = 1 + (0xffff / npaths);
        !           257:        while (hash > threshold && rn) {
        !           258:                /* stay within the multipath routes */
        !           259:                if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
        !           260:                        break;
        !           261:                rn = rn->rn_dupedkey;
        !           262:                hash -= threshold;
        !           263:        }
        !           264:
        !           265:        /* XXX try filling rt_gwroute and avoid unreachable gw  */
        !           266:
        !           267:        /* if gw selection fails, use the first match (default) */
        !           268:        if (!rn)
        !           269:                return;
        !           270:
        !           271:        rtfree(ro->ro_rt);
        !           272:        ro->ro_rt = (struct rtentry *)rn;
        !           273:        ro->ro_rt->rt_refcnt++;
        !           274: #endif
        !           275: }
        !           276:
        !           277: int
        !           278: rn_mpath_inithead(void **head, int off)
        !           279: {
        !           280:        struct radix_node_head *rnh;
        !           281:
        !           282:        while (hashjitter == 0)
        !           283:                hashjitter = arc4random();
        !           284:        if (rn_inithead(head, off) == 1) {
        !           285:                rnh = (struct radix_node_head *)*head;
        !           286:                rnh->rnh_multipath = 1;
        !           287:                return 1;
        !           288:        } else
        !           289:                return 0;
        !           290: }
        !           291:
        !           292: /*
        !           293:  * hash function based on pf_hash in pf.c
        !           294:  */
        !           295: #define mix(a,b,c) \
        !           296:        do {                                    \
        !           297:                a -= b; a -= c; a ^= (c >> 13); \
        !           298:                b -= c; b -= a; b ^= (a << 8);  \
        !           299:                c -= a; c -= b; c ^= (b >> 13); \
        !           300:                a -= b; a -= c; a ^= (c >> 12); \
        !           301:                b -= c; b -= a; b ^= (a << 16); \
        !           302:                c -= a; c -= b; c ^= (b >> 5);  \
        !           303:                a -= b; a -= c; a ^= (c >> 3);  \
        !           304:                b -= c; b -= a; b ^= (a << 10); \
        !           305:                c -= a; c -= b; c ^= (b >> 15); \
        !           306:        } while (0)
        !           307:
        !           308: u_int32_t
        !           309: rn_mpath_hash(struct route *ro, u_int32_t *srcaddrp)
        !           310: {
        !           311:        u_int32_t a, b, c;
        !           312:
        !           313:        a = b = 0x9e3779b9;
        !           314:        c = hashjitter;
        !           315:
        !           316:        switch (ro->ro_dst.sa_family) {
        !           317: #ifdef INET
        !           318:        case AF_INET:
        !           319:            {
        !           320:                struct sockaddr_in *sin_dst;
        !           321:
        !           322:                sin_dst = (struct sockaddr_in *)&ro->ro_dst;
        !           323:                a += sin_dst->sin_addr.s_addr;
        !           324:                b += srcaddrp ? srcaddrp[0] : 0;
        !           325:                mix(a, b, c);
        !           326:                break;
        !           327:            }
        !           328: #endif /* INET */
        !           329: #ifdef INET6
        !           330:        case AF_INET6:
        !           331:            {
        !           332:                struct sockaddr_in6 *sin6_dst;
        !           333:
        !           334:                sin6_dst = (struct sockaddr_in6 *)&ro->ro_dst;
        !           335:                a += sin6_dst->sin6_addr.s6_addr32[0];
        !           336:                b += sin6_dst->sin6_addr.s6_addr32[2];
        !           337:                c += srcaddrp ? srcaddrp[0] : 0;
        !           338:                mix(a, b, c);
        !           339:                a += sin6_dst->sin6_addr.s6_addr32[1];
        !           340:                b += sin6_dst->sin6_addr.s6_addr32[3];
        !           341:                c += srcaddrp ? srcaddrp[1] : 0;
        !           342:                mix(a, b, c);
        !           343:                a += sin6_dst->sin6_addr.s6_addr32[2];
        !           344:                b += sin6_dst->sin6_addr.s6_addr32[1];
        !           345:                c += srcaddrp ? srcaddrp[2] : 0;
        !           346:                mix(a, b, c);
        !           347:                a += sin6_dst->sin6_addr.s6_addr32[3];
        !           348:                b += sin6_dst->sin6_addr.s6_addr32[0];
        !           349:                c += srcaddrp ? srcaddrp[3] : 0;
        !           350:                mix(a, b, c);
        !           351:                break;
        !           352:            }
        !           353: #endif /* INET6 */
        !           354:        }
        !           355:
        !           356:        return c;
        !           357: }

CVSweb