[BACK]Return to altq_rio.c CVS log [TXT][DIR] Up to [local] / sys / altq

Annotation of sys/altq/altq_rio.c, Revision 1.1

1.1     ! nbrk        1: /*     $OpenBSD: altq_rio.c,v 1.10 2007/06/17 19:58:58 jasper Exp $    */
        !             2: /*     $KAME: altq_rio.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $       */
        !             3:
        !             4: /*
        !             5:  * Copyright (C) 1998-2000
        !             6:  *     Sony Computer Science Laboratories Inc.  All rights reserved.
        !             7:  *
        !             8:  * Redistribution and use in source and binary forms, with or without
        !             9:  * modification, are permitted provided that the following conditions
        !            10:  * are met:
        !            11:  * 1. Redistributions of source code must retain the above copyright
        !            12:  *    notice, this list of conditions and the following disclaimer.
        !            13:  * 2. Redistributions in binary form must reproduce the above copyright
        !            14:  *    notice, this list of conditions and the following disclaimer in the
        !            15:  *    documentation and/or other materials provided with the distribution.
        !            16:  *
        !            17:  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
        !            18:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            19:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            20:  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
        !            21:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            22:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            23:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            24:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            25:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            26:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            27:  * SUCH DAMAGE.
        !            28:  */
        !            29: /*
        !            30:  * Copyright (c) 1990-1994 Regents of the University of California.
        !            31:  * All rights reserved.
        !            32:  *
        !            33:  * Redistribution and use in source and binary forms, with or without
        !            34:  * modification, are permitted provided that the following conditions
        !            35:  * are met:
        !            36:  * 1. Redistributions of source code must retain the above copyright
        !            37:  *    notice, this list of conditions and the following disclaimer.
        !            38:  * 2. Redistributions in binary form must reproduce the above copyright
        !            39:  *    notice, this list of conditions and the following disclaimer in the
        !            40:  *    documentation and/or other materials provided with the distribution.
        !            41:  * 3. All advertising materials mentioning features or use of this software
        !            42:  *    must display the following acknowledgement:
        !            43:  *     This product includes software developed by the Computer Systems
        !            44:  *     Engineering Group at Lawrence Berkeley Laboratory.
        !            45:  * 4. Neither the name of the University nor of the Laboratory may be used
        !            46:  *    to endorse or promote products derived from this software without
        !            47:  *    specific prior written permission.
        !            48:  *
        !            49:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
        !            50:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
        !            51:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
        !            52:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
        !            53:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
        !            54:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
        !            55:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
        !            56:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
        !            57:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
        !            58:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
        !            59:  * SUCH DAMAGE.
        !            60:  */
        !            61:
        !            62: #include <sys/param.h>
        !            63: #include <sys/malloc.h>
        !            64: #include <sys/mbuf.h>
        !            65: #include <sys/socket.h>
        !            66: #include <sys/systm.h>
        !            67: #include <sys/errno.h>
        !            68:
        !            69: #include <net/if.h>
        !            70: #include <net/if_types.h>
        !            71:
        !            72: #include <netinet/in.h>
        !            73: #include <netinet/in_systm.h>
        !            74: #include <netinet/ip.h>
        !            75: #ifdef INET6
        !            76: #include <netinet/ip6.h>
        !            77: #endif
        !            78:
        !            79: #include <net/pfvar.h>
        !            80: #include <altq/altq.h>
        !            81: #include <altq/altq_cdnr.h>
        !            82: #include <altq/altq_red.h>
        !            83: #include <altq/altq_rio.h>
        !            84:
        !            85: /*
        !            86:  * RIO: RED with IN/OUT bit
        !            87:  *   described in
        !            88:  *     "Explicit Allocation of Best Effort Packet Delivery Service"
        !            89:  *     David D. Clark and Wenjia Fang, MIT Lab for Computer Science
        !            90:  *     http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
        !            91:  *
        !            92:  * this implementation is extended to support more than 2 drop precedence
        !            93:  * values as described in RFC2597 (Assured Forwarding PHB Group).
        !            94:  *
        !            95:  */
        !            96: /*
        !            97:  * AF DS (differentiated service) codepoints.
        !            98:  * (classes can be mapped to CBQ or H-FSC classes.)
        !            99:  *
        !           100:  *      0   1   2   3   4   5   6   7
        !           101:  *    +---+---+---+---+---+---+---+---+
        !           102:  *    |   CLASS   |DropPre| 0 |  CU   |
        !           103:  *    +---+---+---+---+---+---+---+---+
        !           104:  *
        !           105:  *    class 1: 001
        !           106:  *    class 2: 010
        !           107:  *    class 3: 011
        !           108:  *    class 4: 100
        !           109:  *
        !           110:  *    low drop prec:    01
        !           111:  *    medium drop prec: 10
        !           112:  *    high drop prec:   01
        !           113:  */
        !           114:
        !           115: /* normal red parameters */
        !           116: #define        W_WEIGHT        512     /* inverse of weight of EWMA (511/512) */
        !           117:                                /* q_weight = 0.00195 */
        !           118:
        !           119: /* red parameters for a slow link */
        !           120: #define        W_WEIGHT_1      128     /* inverse of weight of EWMA (127/128) */
        !           121:                                /* q_weight = 0.0078125 */
        !           122:
        !           123: /* red parameters for a very slow link (e.g., dialup) */
        !           124: #define        W_WEIGHT_2      64      /* inverse of weight of EWMA (63/64) */
        !           125:                                /* q_weight = 0.015625 */
        !           126:
        !           127: /* fixed-point uses 12-bit decimal places */
        !           128: #define        FP_SHIFT        12      /* fixed-point shift */
        !           129:
        !           130: /* red parameters for drop probability */
        !           131: #define        INV_P_MAX       10      /* inverse of max drop probability */
        !           132: #define        TH_MIN           5      /* min threshold */
        !           133: #define        TH_MAX          15      /* max threshold */
        !           134:
        !           135: #define        RIO_LIMIT       60      /* default max queue length */
        !           136: #define        RIO_STATS               /* collect statistics */
        !           137:
        !           138: #define        TV_DELTA(a, b, delta) {                                 \
        !           139:        int     xxs;                                    \
        !           140:                                                                \
        !           141:        delta = (a)->tv_usec - (b)->tv_usec;                    \
        !           142:        if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) {           \
        !           143:                if (xxs < 0) {                                  \
        !           144:                        delta = 60000000;                       \
        !           145:                } else if (xxs > 4)  {                          \
        !           146:                        if (xxs > 60)                           \
        !           147:                                delta = 60000000;               \
        !           148:                        else                                    \
        !           149:                                delta += xxs * 1000000;         \
        !           150:                } else while (xxs > 0) {                        \
        !           151:                        delta += 1000000;                       \
        !           152:                        xxs--;                                  \
        !           153:                }                                               \
        !           154:        }                                                       \
        !           155: }
        !           156:
        !           157: /* default rio parameter values */
        !           158: static struct redparams default_rio_params[RIO_NDROPPREC] = {
        !           159:   /* th_min,            th_max,     inv_pmax */
        !           160:   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
        !           161:   { TH_MAX + TH_MIN,    TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
        !           162:   { TH_MIN,             TH_MAX,     INV_P_MAX }  /* high drop precedence */
        !           163: };
        !           164:
        !           165: /* internal function prototypes */
        !           166: static int dscp2index(u_int8_t);
        !           167:
        !           168: rio_t *
        !           169: rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
        !           170: {
        !           171:        rio_t   *rp;
        !           172:        int      w, i;
        !           173:        int      npkts_per_sec;
        !           174:
        !           175:        MALLOC(rp, rio_t *, sizeof(rio_t), M_DEVBUF, M_WAITOK);
        !           176:        if (rp == NULL)
        !           177:                return (NULL);
        !           178:        bzero(rp, sizeof(rio_t));
        !           179:
        !           180:        rp->rio_flags = flags;
        !           181:        if (pkttime == 0)
        !           182:                /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
        !           183:                rp->rio_pkttime = 800;
        !           184:        else
        !           185:                rp->rio_pkttime = pkttime;
        !           186:
        !           187:        if (weight != 0)
        !           188:                rp->rio_weight = weight;
        !           189:        else {
        !           190:                /* use default */
        !           191:                rp->rio_weight = W_WEIGHT;
        !           192:
        !           193:                /* when the link is very slow, adjust red parameters */
        !           194:                npkts_per_sec = 1000000 / rp->rio_pkttime;
        !           195:                if (npkts_per_sec < 50) {
        !           196:                        /* up to about 400Kbps */
        !           197:                        rp->rio_weight = W_WEIGHT_2;
        !           198:                } else if (npkts_per_sec < 300) {
        !           199:                        /* up to about 2.4Mbps */
        !           200:                        rp->rio_weight = W_WEIGHT_1;
        !           201:                }
        !           202:        }
        !           203:
        !           204:        /* calculate wshift.  weight must be power of 2 */
        !           205:        w = rp->rio_weight;
        !           206:        for (i = 0; w > 1; i++)
        !           207:                w = w >> 1;
        !           208:        rp->rio_wshift = i;
        !           209:        w = 1 << rp->rio_wshift;
        !           210:        if (w != rp->rio_weight) {
        !           211:                printf("invalid weight value %d for red! use %d\n",
        !           212:                       rp->rio_weight, w);
        !           213:                rp->rio_weight = w;
        !           214:        }
        !           215:
        !           216:        /* allocate weight table */
        !           217:        rp->rio_wtab = wtab_alloc(rp->rio_weight);
        !           218:
        !           219:        for (i = 0; i < RIO_NDROPPREC; i++) {
        !           220:                struct dropprec_state *prec = &rp->rio_precstate[i];
        !           221:
        !           222:                prec->avg = 0;
        !           223:                prec->idle = 1;
        !           224:
        !           225:                if (params == NULL || params[i].inv_pmax == 0)
        !           226:                        prec->inv_pmax = default_rio_params[i].inv_pmax;
        !           227:                else
        !           228:                        prec->inv_pmax = params[i].inv_pmax;
        !           229:                if (params == NULL || params[i].th_min == 0)
        !           230:                        prec->th_min = default_rio_params[i].th_min;
        !           231:                else
        !           232:                        prec->th_min = params[i].th_min;
        !           233:                if (params == NULL || params[i].th_max == 0)
        !           234:                        prec->th_max = default_rio_params[i].th_max;
        !           235:                else
        !           236:                        prec->th_max = params[i].th_max;
        !           237:
        !           238:                /*
        !           239:                 * th_min_s and th_max_s are scaled versions of th_min
        !           240:                 * and th_max to be compared with avg.
        !           241:                 */
        !           242:                prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
        !           243:                prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
        !           244:
        !           245:                /*
        !           246:                 * precompute probability denominator
        !           247:                 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
        !           248:                 */
        !           249:                prec->probd = (2 * (prec->th_max - prec->th_min)
        !           250:                               * prec->inv_pmax) << FP_SHIFT;
        !           251:
        !           252:                microtime(&prec->last);
        !           253:        }
        !           254:
        !           255:        return (rp);
        !           256: }
        !           257:
        !           258: void
        !           259: rio_destroy(rio_t *rp)
        !           260: {
        !           261:        wtab_destroy(rp->rio_wtab);
        !           262:        FREE(rp, M_DEVBUF);
        !           263: }
        !           264:
        !           265: void
        !           266: rio_getstats(rio_t *rp, struct redstats *sp)
        !           267: {
        !           268:        int     i;
        !           269:
        !           270:        for (i = 0; i < RIO_NDROPPREC; i++) {
        !           271:                bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
        !           272:                sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
        !           273:                sp++;
        !           274:        }
        !           275: }
        !           276:
        !           277: #if (RIO_NDROPPREC == 3)
        !           278: /*
        !           279:  * internally, a drop precedence value is converted to an index
        !           280:  * starting from 0.
        !           281:  */
        !           282: static int
        !           283: dscp2index(u_int8_t dscp)
        !           284: {
        !           285:        int     dpindex = dscp & AF_DROPPRECMASK;
        !           286:
        !           287:        if (dpindex == 0)
        !           288:                return (0);
        !           289:        return ((dpindex >> 3) - 1);
        !           290: }
        !           291: #endif
        !           292:
        !           293: #if 1
        !           294: /*
        !           295:  * kludge: when a packet is dequeued, we need to know its drop precedence
        !           296:  * in order to keep the queue length of each drop precedence.
        !           297:  * use m_pkthdr.rcvif to pass this info.
        !           298:  */
        !           299: #define        RIOM_SET_PRECINDEX(m, idx)      \
        !           300:        do { (m)->m_pkthdr.rcvif = (struct ifnet *)((long)(idx)); } while (0)
        !           301: #define        RIOM_GET_PRECINDEX(m)   \
        !           302:        ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
        !           303:        (m)->m_pkthdr.rcvif = NULL; idx; })
        !           304: #endif
        !           305:
        !           306: int
        !           307: rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
        !           308:     struct altq_pktattr *pktattr)
        !           309: {
        !           310:        int                      avg, droptype;
        !           311:        u_int8_t                 dsfield, odsfield;
        !           312:        int                      dpindex, i, n, t;
        !           313:        struct timeval           now;
        !           314:        struct dropprec_state   *prec;
        !           315:
        !           316:        dsfield = odsfield = read_dsfield(m, pktattr);
        !           317:        dpindex = dscp2index(dsfield);
        !           318:
        !           319:        /*
        !           320:         * update avg of the precedence states whose drop precedence
        !           321:         * is larger than or equal to the drop precedence of the packet
        !           322:         */
        !           323:        now.tv_sec = 0;
        !           324:        for (i = dpindex; i < RIO_NDROPPREC; i++) {
        !           325:                prec = &rp->rio_precstate[i];
        !           326:                avg = prec->avg;
        !           327:                if (prec->idle) {
        !           328:                        prec->idle = 0;
        !           329:                        if (now.tv_sec == 0)
        !           330:                                microtime(&now);
        !           331:                        t = (now.tv_sec - prec->last.tv_sec);
        !           332:                        if (t > 60)
        !           333:                                avg = 0;
        !           334:                        else {
        !           335:                                t = t * 1000000 +
        !           336:                                        (now.tv_usec - prec->last.tv_usec);
        !           337:                                n = t / rp->rio_pkttime;
        !           338:                                /* calculate (avg = (1 - Wq)^n * avg) */
        !           339:                                if (n > 0)
        !           340:                                        avg = (avg >> FP_SHIFT) *
        !           341:                                                pow_w(rp->rio_wtab, n);
        !           342:                        }
        !           343:                }
        !           344:
        !           345:                /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
        !           346:                avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
        !           347:                prec->avg = avg;                /* save the new value */
        !           348:                /*
        !           349:                 * count keeps a tally of arriving traffic that has not
        !           350:                 * been dropped.
        !           351:                 */
        !           352:                prec->count++;
        !           353:        }
        !           354:
        !           355:        prec = &rp->rio_precstate[dpindex];
        !           356:        avg = prec->avg;
        !           357:
        !           358:        /* see if we drop early */
        !           359:        droptype = DTYPE_NODROP;
        !           360:        if (avg >= prec->th_min_s && prec->qlen > 1) {
        !           361:                if (avg >= prec->th_max_s) {
        !           362:                        /* avg >= th_max: forced drop */
        !           363:                        droptype = DTYPE_FORCED;
        !           364:                } else if (prec->old == 0) {
        !           365:                        /* first exceeds th_min */
        !           366:                        prec->count = 1;
        !           367:                        prec->old = 1;
        !           368:                } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
        !           369:                                      prec->probd, prec->count)) {
        !           370:                        /* unforced drop by red */
        !           371:                        droptype = DTYPE_EARLY;
        !           372:                }
        !           373:        } else {
        !           374:                /* avg < th_min */
        !           375:                prec->old = 0;
        !           376:        }
        !           377:
        !           378:        /*
        !           379:         * if the queue length hits the hard limit, it's a forced drop.
        !           380:         */
        !           381:        if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
        !           382:                droptype = DTYPE_FORCED;
        !           383:
        !           384:        if (droptype != DTYPE_NODROP) {
        !           385:                /* always drop incoming packet (as opposed to randomdrop) */
        !           386:                for (i = dpindex; i < RIO_NDROPPREC; i++)
        !           387:                        rp->rio_precstate[i].count = 0;
        !           388: #ifdef RIO_STATS
        !           389:                if (droptype == DTYPE_EARLY)
        !           390:                        rp->q_stats[dpindex].drop_unforced++;
        !           391:                else
        !           392:                        rp->q_stats[dpindex].drop_forced++;
        !           393:                PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
        !           394: #endif
        !           395:                m_freem(m);
        !           396:                return (-1);
        !           397:        }
        !           398:
        !           399:        for (i = dpindex; i < RIO_NDROPPREC; i++)
        !           400:                rp->rio_precstate[i].qlen++;
        !           401:
        !           402:        /* save drop precedence index in mbuf hdr */
        !           403:        RIOM_SET_PRECINDEX(m, dpindex);
        !           404:
        !           405:        if (rp->rio_flags & RIOF_CLEARDSCP)
        !           406:                dsfield &= ~DSCP_MASK;
        !           407:
        !           408:        if (dsfield != odsfield)
        !           409:                write_dsfield(m, pktattr, dsfield);
        !           410:
        !           411:        _addq(q, m);
        !           412:
        !           413: #ifdef RIO_STATS
        !           414:        PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
        !           415: #endif
        !           416:        return (0);
        !           417: }
        !           418:
        !           419: struct mbuf *
        !           420: rio_getq(rio_t *rp, class_queue_t *q)
        !           421: {
        !           422:        struct mbuf     *m;
        !           423:        int              dpindex, i;
        !           424:
        !           425:        if ((m = _getq(q)) == NULL)
        !           426:                return NULL;
        !           427:
        !           428:        dpindex = RIOM_GET_PRECINDEX(m);
        !           429:        for (i = dpindex; i < RIO_NDROPPREC; i++) {
        !           430:                if (--rp->rio_precstate[i].qlen == 0) {
        !           431:                        if (rp->rio_precstate[i].idle == 0) {
        !           432:                                rp->rio_precstate[i].idle = 1;
        !           433:                                microtime(&rp->rio_precstate[i].last);
        !           434:                        }
        !           435:                }
        !           436:        }
        !           437:        return (m);
        !           438: }

CVSweb