Annotation of sys/altq/altq_rmclass.c, Revision 1.1
1.1 ! nbrk 1: /* $OpenBSD: altq_rmclass.c,v 1.12 2006/03/04 22:40:15 brad Exp $ */
! 2: /* $KAME: altq_rmclass.c,v 1.10 2001/02/09 07:20:40 kjc Exp $ */
! 3:
! 4: /*
! 5: * Copyright (c) 1991-1997 Regents of the University of California.
! 6: * All rights reserved.
! 7: *
! 8: * Redistribution and use in source and binary forms, with or without
! 9: * modification, are permitted provided that the following conditions
! 10: * are met:
! 11: * 1. Redistributions of source code must retain the above copyright
! 12: * notice, this list of conditions and the following disclaimer.
! 13: * 2. Redistributions in binary form must reproduce the above copyright
! 14: * notice, this list of conditions and the following disclaimer in the
! 15: * documentation and/or other materials provided with the distribution.
! 16: * 3. All advertising materials mentioning features or use of this software
! 17: * must display the following acknowledgement:
! 18: * This product includes software developed by the Network Research
! 19: * Group at Lawrence Berkeley Laboratory.
! 20: * 4. Neither the name of the University nor of the Laboratory may be used
! 21: * to endorse or promote products derived from this software without
! 22: * specific prior written permission.
! 23: *
! 24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
! 25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
! 28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 34: * SUCH DAMAGE.
! 35: *
! 36: * LBL code modified by speer@eng.sun.com, May 1977.
! 37: * For questions and/or comments, please send mail to cbq@ee.lbl.gov
! 38: */
! 39:
! 40: #include <sys/param.h>
! 41: #include <sys/malloc.h>
! 42: #include <sys/mbuf.h>
! 43: #include <sys/socket.h>
! 44: #include <sys/systm.h>
! 45: #include <sys/errno.h>
! 46: #include <sys/time.h>
! 47:
! 48: #include <net/if.h>
! 49:
! 50: #include <altq/altq.h>
! 51: #include <altq/altq_rmclass.h>
! 52: #include <altq/altq_rmclass_debug.h>
! 53: #include <altq/altq_red.h>
! 54: #include <altq/altq_rio.h>
! 55:
! 56: /*
! 57: * Local Macros
! 58: */
! 59:
! 60: #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
! 61:
! 62: /*
! 63: * Local routines.
! 64: */
! 65:
! 66: static int rmc_satisfied(struct rm_class *, struct timeval *);
! 67: static void rmc_wrr_set_weights(struct rm_ifdat *);
! 68: static void rmc_depth_compute(struct rm_class *);
! 69: static void rmc_depth_recompute(rm_class_t *);
! 70:
! 71: static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
! 72: static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
! 73:
! 74: static int _rmc_addq(rm_class_t *, mbuf_t *);
! 75: static void _rmc_dropq(rm_class_t *);
! 76: static mbuf_t *_rmc_getq(rm_class_t *);
! 77: static mbuf_t *_rmc_pollq(rm_class_t *);
! 78:
! 79: static int rmc_under_limit(struct rm_class *, struct timeval *);
! 80: static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
! 81: static void rmc_drop_action(struct rm_class *);
! 82: static void rmc_restart(struct rm_class *);
! 83: static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
! 84:
! 85: #define BORROW_OFFTIME
! 86: /*
! 87: * BORROW_OFFTIME (experimental):
! 88: * borrow the offtime of the class borrowing from.
! 89: * the reason is that when its own offtime is set, the class is unable
! 90: * to borrow much, especially when cutoff is taking effect.
! 91: * but when the borrowed class is overloaded (advidle is close to minidle),
! 92: * use the borrowing class's offtime to avoid overload.
! 93: */
! 94: #define ADJUST_CUTOFF
! 95: /*
! 96: * ADJUST_CUTOFF (experimental):
! 97: * if no underlimit class is found due to cutoff, increase cutoff and
! 98: * retry the scheduling loop.
! 99: * also, don't invoke delay_actions while cutoff is taking effect,
! 100: * since a sleeping class won't have a chance to be scheduled in the
! 101: * next loop.
! 102: *
! 103: * now heuristics for setting the top-level variable (cutoff_) becomes:
! 104: * 1. if a packet arrives for a not-overlimit class, set cutoff
! 105: * to the depth of the class.
! 106: * 2. if cutoff is i, and a packet arrives for an overlimit class
! 107: * with an underlimit ancestor at a lower level than i (say j),
! 108: * then set cutoff to j.
! 109: * 3. at scheduling a packet, if there is no underlimit class
! 110: * due to the current cutoff level, increase cutoff by 1 and
! 111: * then try to schedule again.
! 112: */
! 113:
! 114: /*
! 115: * rm_class_t *
! 116: * rmc_newclass(...) - Create a new resource management class at priority
! 117: * 'pri' on the interface given by 'ifd'.
! 118: *
! 119: * nsecPerByte is the data rate of the interface in nanoseconds/byte.
! 120: * E.g., 800 for a 10Mb/s ethernet. If the class gets less
! 121: * than 100% of the bandwidth, this number should be the
! 122: * 'effective' rate for the class. Let f be the
! 123: * bandwidth fraction allocated to this class, and let
! 124: * nsPerByte be the data rate of the output link in
! 125: * nanoseconds/byte. Then nsecPerByte is set to
! 126: * nsPerByte / f. E.g., 1600 (= 800 / .5)
! 127: * for a class that gets 50% of an ethernet's bandwidth.
! 128: *
! 129: * action the routine to call when the class is over limit.
! 130: *
! 131: * maxq max allowable queue size for class (in packets).
! 132: *
! 133: * parent parent class pointer.
! 134: *
! 135: * borrow class to borrow from (should be either 'parent' or null).
! 136: *
! 137: * maxidle max value allowed for class 'idle' time estimate (this
! 138: * parameter determines how large an initial burst of packets
! 139: * can be before overlimit action is invoked.
! 140: *
! 141: * offtime how long 'delay' action will delay when class goes over
! 142: * limit (this parameter determines the steady-state burst
! 143: * size when a class is running over its limit).
! 144: *
! 145: * Maxidle and offtime have to be computed from the following: If the
! 146: * average packet size is s, the bandwidth fraction allocated to this
! 147: * class is f, we want to allow b packet bursts, and the gain of the
! 148: * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
! 149: *
! 150: * ptime = s * nsPerByte * (1 - f) / f
! 151: * maxidle = ptime * (1 - g^b) / g^b
! 152: * minidle = -ptime * (1 / (f - 1))
! 153: * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
! 154: *
! 155: * Operationally, it's convenient to specify maxidle & offtime in units
! 156: * independent of the link bandwidth so the maxidle & offtime passed to
! 157: * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
! 158: * (The constant factor is a scale factor needed to make the parameters
! 159: * integers. This scaling also means that the 'unscaled' values of
! 160: * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
! 161: * not nanoseconds.) Also note that the 'idle' filter computation keeps
! 162: * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
! 163: * maxidle also must be scaled upward by this value. Thus, the passed
! 164: * values for maxidle and offtime can be computed as follows:
! 165: *
! 166: * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
! 167: * offtime = offtime * 8 / (1000 * nsecPerByte)
! 168: *
! 169: * When USE_HRTIME is employed, then maxidle and offtime become:
! 170: * maxidle = maxilde * (8.0 / nsecPerByte);
! 171: * offtime = offtime * (8.0 / nsecPerByte);
! 172: */
! 173:
! 174: struct rm_class *
! 175: rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
! 176: void (*action)(rm_class_t *, rm_class_t *), int maxq,
! 177: struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
! 178: int minidle, u_int offtime, int pktsize, int flags)
! 179: {
! 180: struct rm_class *cl;
! 181: struct rm_class *peer;
! 182: int s;
! 183:
! 184: if (pri >= RM_MAXPRIO)
! 185: return (NULL);
! 186: #ifndef ALTQ_RED
! 187: if (flags & RMCF_RED) {
! 188: #ifdef ALTQ_DEBUG
! 189: printf("rmc_newclass: RED not configured for CBQ!\n");
! 190: #endif
! 191: return (NULL);
! 192: }
! 193: #endif
! 194: #ifndef ALTQ_RIO
! 195: if (flags & RMCF_RIO) {
! 196: #ifdef ALTQ_DEBUG
! 197: printf("rmc_newclass: RIO not configured for CBQ!\n");
! 198: #endif
! 199: return (NULL);
! 200: }
! 201: #endif
! 202:
! 203: MALLOC(cl, struct rm_class *, sizeof(struct rm_class),
! 204: M_DEVBUF, M_WAITOK);
! 205: if (cl == NULL)
! 206: return (NULL);
! 207: bzero(cl, sizeof(struct rm_class));
! 208: CALLOUT_INIT(&cl->callout_);
! 209: MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t),
! 210: M_DEVBUF, M_WAITOK);
! 211: if (cl->q_ == NULL) {
! 212: FREE(cl, M_DEVBUF);
! 213: return (NULL);
! 214: }
! 215: bzero(cl->q_, sizeof(class_queue_t));
! 216:
! 217: /*
! 218: * Class initialization.
! 219: */
! 220: cl->children_ = NULL;
! 221: cl->parent_ = parent;
! 222: cl->borrow_ = borrow;
! 223: cl->leaf_ = 1;
! 224: cl->ifdat_ = ifd;
! 225: cl->pri_ = pri;
! 226: cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
! 227: cl->depth_ = 0;
! 228: cl->qthresh_ = 0;
! 229: cl->ns_per_byte_ = nsecPerByte;
! 230:
! 231: qlimit(cl->q_) = maxq;
! 232: qtype(cl->q_) = Q_DROPHEAD;
! 233: qlen(cl->q_) = 0;
! 234: cl->flags_ = flags;
! 235:
! 236: #if 1 /* minidle is also scaled in ALTQ */
! 237: cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
! 238: if (cl->minidle_ > 0)
! 239: cl->minidle_ = 0;
! 240: #else
! 241: cl->minidle_ = minidle;
! 242: #endif
! 243: cl->maxidle_ = (maxidle * nsecPerByte) / 8;
! 244: if (cl->maxidle_ == 0)
! 245: cl->maxidle_ = 1;
! 246: #if 1 /* offtime is also scaled in ALTQ */
! 247: cl->avgidle_ = cl->maxidle_;
! 248: cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
! 249: if (cl->offtime_ == 0)
! 250: cl->offtime_ = 1;
! 251: #else
! 252: cl->avgidle_ = 0;
! 253: cl->offtime_ = (offtime * nsecPerByte) / 8;
! 254: #endif
! 255: cl->overlimit = action;
! 256:
! 257: #ifdef ALTQ_RED
! 258: if (flags & (RMCF_RED|RMCF_RIO)) {
! 259: int red_flags, red_pkttime;
! 260:
! 261: red_flags = 0;
! 262: if (flags & RMCF_ECN)
! 263: red_flags |= REDF_ECN;
! 264: if (flags & RMCF_FLOWVALVE)
! 265: red_flags |= REDF_FLOWVALVE;
! 266: #ifdef ALTQ_RIO
! 267: if (flags & RMCF_CLEARDSCP)
! 268: red_flags |= RIOF_CLEARDSCP;
! 269: #endif
! 270: red_pkttime = nsecPerByte * pktsize / 1000;
! 271:
! 272: if (flags & RMCF_RED) {
! 273: cl->red_ = red_alloc(0, 0,
! 274: qlimit(cl->q_) * 10/100,
! 275: qlimit(cl->q_) * 30/100,
! 276: red_flags, red_pkttime);
! 277: if (cl->red_ != NULL)
! 278: qtype(cl->q_) = Q_RED;
! 279: }
! 280: #ifdef ALTQ_RIO
! 281: else {
! 282: cl->red_ = (red_t *)rio_alloc(0, NULL,
! 283: red_flags, red_pkttime);
! 284: if (cl->red_ != NULL)
! 285: qtype(cl->q_) = Q_RIO;
! 286: }
! 287: #endif
! 288: }
! 289: #endif /* ALTQ_RED */
! 290:
! 291: /*
! 292: * put the class into the class tree
! 293: */
! 294: s = splnet();
! 295: if ((peer = ifd->active_[pri]) != NULL) {
! 296: /* find the last class at this pri */
! 297: cl->peer_ = peer;
! 298: while (peer->peer_ != ifd->active_[pri])
! 299: peer = peer->peer_;
! 300: peer->peer_ = cl;
! 301: } else {
! 302: ifd->active_[pri] = cl;
! 303: cl->peer_ = cl;
! 304: }
! 305:
! 306: if (cl->parent_) {
! 307: cl->next_ = parent->children_;
! 308: parent->children_ = cl;
! 309: parent->leaf_ = 0;
! 310: }
! 311:
! 312: /*
! 313: * Compute the depth of this class and its ancestors in the class
! 314: * hierarchy.
! 315: */
! 316: rmc_depth_compute(cl);
! 317:
! 318: /*
! 319: * If CBQ's WRR is enabled, then initialize the class WRR state.
! 320: */
! 321: if (ifd->wrr_) {
! 322: ifd->num_[pri]++;
! 323: ifd->alloc_[pri] += cl->allotment_;
! 324: rmc_wrr_set_weights(ifd);
! 325: }
! 326: splx(s);
! 327: return (cl);
! 328: }
! 329:
! 330: int
! 331: rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
! 332: int minidle, u_int offtime, int pktsize)
! 333: {
! 334: struct rm_ifdat *ifd;
! 335: u_int old_allotment;
! 336: int s;
! 337:
! 338: ifd = cl->ifdat_;
! 339: old_allotment = cl->allotment_;
! 340:
! 341: s = splnet();
! 342: cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
! 343: cl->qthresh_ = 0;
! 344: cl->ns_per_byte_ = nsecPerByte;
! 345:
! 346: qlimit(cl->q_) = maxq;
! 347:
! 348: #if 1 /* minidle is also scaled in ALTQ */
! 349: cl->minidle_ = (minidle * nsecPerByte) / 8;
! 350: if (cl->minidle_ > 0)
! 351: cl->minidle_ = 0;
! 352: #else
! 353: cl->minidle_ = minidle;
! 354: #endif
! 355: cl->maxidle_ = (maxidle * nsecPerByte) / 8;
! 356: if (cl->maxidle_ == 0)
! 357: cl->maxidle_ = 1;
! 358: #if 1 /* offtime is also scaled in ALTQ */
! 359: cl->avgidle_ = cl->maxidle_;
! 360: cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
! 361: if (cl->offtime_ == 0)
! 362: cl->offtime_ = 1;
! 363: #else
! 364: cl->avgidle_ = 0;
! 365: cl->offtime_ = (offtime * nsecPerByte) / 8;
! 366: #endif
! 367:
! 368: /*
! 369: * If CBQ's WRR is enabled, then initialize the class WRR state.
! 370: */
! 371: if (ifd->wrr_) {
! 372: ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
! 373: rmc_wrr_set_weights(ifd);
! 374: }
! 375: splx(s);
! 376: return (0);
! 377: }
! 378:
! 379: /*
! 380: * static void
! 381: * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
! 382: * the appropriate run robin weights for the CBQ weighted round robin
! 383: * algorithm.
! 384: *
! 385: * Returns: NONE
! 386: */
! 387:
! 388: static void
! 389: rmc_wrr_set_weights(struct rm_ifdat *ifd)
! 390: {
! 391: int i;
! 392: struct rm_class *cl, *clh;
! 393:
! 394: for (i = 0; i < RM_MAXPRIO; i++) {
! 395: /*
! 396: * This is inverted from that of the simulator to
! 397: * maintain precision.
! 398: */
! 399: if (ifd->num_[i] == 0)
! 400: ifd->M_[i] = 0;
! 401: else
! 402: ifd->M_[i] = ifd->alloc_[i] /
! 403: (ifd->num_[i] * ifd->maxpkt_);
! 404: /*
! 405: * Compute the weighted allotment for each class.
! 406: * This takes the expensive div instruction out
! 407: * of the main loop for the wrr scheduling path.
! 408: * These only get recomputed when a class comes or
! 409: * goes.
! 410: */
! 411: if (ifd->active_[i] != NULL) {
! 412: clh = cl = ifd->active_[i];
! 413: do {
! 414: /* safe-guard for slow link or alloc_ == 0 */
! 415: if (ifd->M_[i] == 0)
! 416: cl->w_allotment_ = 0;
! 417: else
! 418: cl->w_allotment_ = cl->allotment_ /
! 419: ifd->M_[i];
! 420: cl = cl->peer_;
! 421: } while ((cl != NULL) && (cl != clh));
! 422: }
! 423: }
! 424: }
! 425:
! 426: int
! 427: rmc_get_weight(struct rm_ifdat *ifd, int pri)
! 428: {
! 429: if ((pri >= 0) && (pri < RM_MAXPRIO))
! 430: return (ifd->M_[pri]);
! 431: else
! 432: return (0);
! 433: }
! 434:
! 435: /*
! 436: * static void
! 437: * rmc_depth_compute(struct rm_class *cl) - This function computes the
! 438: * appropriate depth of class 'cl' and its ancestors.
! 439: *
! 440: * Returns: NONE
! 441: */
! 442:
! 443: static void
! 444: rmc_depth_compute(struct rm_class *cl)
! 445: {
! 446: rm_class_t *t = cl, *p;
! 447:
! 448: /*
! 449: * Recompute the depth for the branch of the tree.
! 450: */
! 451: while (t != NULL) {
! 452: p = t->parent_;
! 453: if (p && (t->depth_ >= p->depth_)) {
! 454: p->depth_ = t->depth_ + 1;
! 455: t = p;
! 456: } else
! 457: t = NULL;
! 458: }
! 459: }
! 460:
! 461: /*
! 462: * static void
! 463: * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
! 464: * the depth of the tree after a class has been deleted.
! 465: *
! 466: * Returns: NONE
! 467: */
! 468:
! 469: static void
! 470: rmc_depth_recompute(rm_class_t *cl)
! 471: {
! 472: #if 1 /* ALTQ */
! 473: rm_class_t *p, *t;
! 474:
! 475: p = cl;
! 476: while (p != NULL) {
! 477: if ((t = p->children_) == NULL) {
! 478: p->depth_ = 0;
! 479: } else {
! 480: int cdepth = 0;
! 481:
! 482: while (t != NULL) {
! 483: if (t->depth_ > cdepth)
! 484: cdepth = t->depth_;
! 485: t = t->next_;
! 486: }
! 487:
! 488: if (p->depth_ == cdepth + 1)
! 489: /* no change to this parent */
! 490: return;
! 491:
! 492: p->depth_ = cdepth + 1;
! 493: }
! 494:
! 495: p = p->parent_;
! 496: }
! 497: #else
! 498: rm_class_t *t;
! 499:
! 500: if (cl->depth_ >= 1) {
! 501: if (cl->children_ == NULL) {
! 502: cl->depth_ = 0;
! 503: } else if ((t = cl->children_) != NULL) {
! 504: while (t != NULL) {
! 505: if (t->children_ != NULL)
! 506: rmc_depth_recompute(t);
! 507: t = t->next_;
! 508: }
! 509: } else
! 510: rmc_depth_compute(cl);
! 511: }
! 512: #endif
! 513: }
! 514:
! 515: /*
! 516: * void
! 517: * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
! 518: * function deletes a class from the link-sharing structure and frees
! 519: * all resources associated with the class.
! 520: *
! 521: * Returns: NONE
! 522: */
! 523:
! 524: void
! 525: rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
! 526: {
! 527: struct rm_class *p, *head, *previous;
! 528: int s;
! 529:
! 530: ASSERT(cl->children_ == NULL);
! 531:
! 532: if (cl->sleeping_)
! 533: CALLOUT_STOP(&cl->callout_);
! 534:
! 535: s = splnet();
! 536: /*
! 537: * Free packets in the packet queue.
! 538: * XXX - this may not be a desired behavior. Packets should be
! 539: * re-queued.
! 540: */
! 541: rmc_dropall(cl);
! 542:
! 543: /*
! 544: * If the class has a parent, then remove the class from the
! 545: * class from the parent's children chain.
! 546: */
! 547: if (cl->parent_ != NULL) {
! 548: head = cl->parent_->children_;
! 549: p = previous = head;
! 550: if (head->next_ == NULL) {
! 551: ASSERT(head == cl);
! 552: cl->parent_->children_ = NULL;
! 553: cl->parent_->leaf_ = 1;
! 554: } else while (p != NULL) {
! 555: if (p == cl) {
! 556: if (cl == head)
! 557: cl->parent_->children_ = cl->next_;
! 558: else
! 559: previous->next_ = cl->next_;
! 560: cl->next_ = NULL;
! 561: p = NULL;
! 562: } else {
! 563: previous = p;
! 564: p = p->next_;
! 565: }
! 566: }
! 567: }
! 568:
! 569: /*
! 570: * Delete class from class priority peer list.
! 571: */
! 572: if ((p = ifd->active_[cl->pri_]) != NULL) {
! 573: /*
! 574: * If there is more than one member of this priority
! 575: * level, then look for class(cl) in the priority level.
! 576: */
! 577: if (p != p->peer_) {
! 578: while (p->peer_ != cl)
! 579: p = p->peer_;
! 580: p->peer_ = cl->peer_;
! 581:
! 582: if (ifd->active_[cl->pri_] == cl)
! 583: ifd->active_[cl->pri_] = cl->peer_;
! 584: } else {
! 585: ASSERT(p == cl);
! 586: ifd->active_[cl->pri_] = NULL;
! 587: }
! 588: }
! 589:
! 590: /*
! 591: * Recompute the WRR weights.
! 592: */
! 593: if (ifd->wrr_) {
! 594: ifd->alloc_[cl->pri_] -= cl->allotment_;
! 595: ifd->num_[cl->pri_]--;
! 596: rmc_wrr_set_weights(ifd);
! 597: }
! 598:
! 599: /*
! 600: * Re-compute the depth of the tree.
! 601: */
! 602: #if 1 /* ALTQ */
! 603: rmc_depth_recompute(cl->parent_);
! 604: #else
! 605: rmc_depth_recompute(ifd->root_);
! 606: #endif
! 607:
! 608: splx(s);
! 609:
! 610: /*
! 611: * Free the class structure.
! 612: */
! 613: if (cl->red_ != NULL) {
! 614: #ifdef ALTQ_RIO
! 615: if (q_is_rio(cl->q_))
! 616: rio_destroy((rio_t *)cl->red_);
! 617: #endif
! 618: #ifdef ALTQ_RED
! 619: if (q_is_red(cl->q_))
! 620: red_destroy(cl->red_);
! 621: #endif
! 622: }
! 623: FREE(cl->q_, M_DEVBUF);
! 624: FREE(cl, M_DEVBUF);
! 625: }
! 626:
! 627:
! 628: /*
! 629: * void
! 630: * rmc_init(...) - Initialize the resource management data structures
! 631: * associated with the output portion of interface 'ifp'. 'ifd' is
! 632: * where the structures will be built (for backwards compatibility, the
! 633: * structures aren't kept in the ifnet struct). 'nsecPerByte'
! 634: * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
! 635: * 'restart' is the driver-specific routine that the generic 'delay
! 636: * until under limit' action will call to restart output. `maxq'
! 637: * is the queue size of the 'link' & 'default' classes. 'maxqueued'
! 638: * is the maximum number of packets that the resource management
! 639: * code will allow to be queued 'downstream' (this is typically 1).
! 640: *
! 641: * Returns: NONE
! 642: */
! 643:
! 644: void
! 645: rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
! 646: void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
! 647: int minidle, u_int offtime, int flags)
! 648: {
! 649: int i, mtu;
! 650:
! 651: /*
! 652: * Initialize the CBQ tracing/debug facility.
! 653: */
! 654: CBQTRACEINIT();
! 655:
! 656: bzero((char *)ifd, sizeof (*ifd));
! 657: mtu = ifq->altq_ifp->if_mtu;
! 658: ifd->ifq_ = ifq;
! 659: ifd->restart = restart;
! 660: ifd->maxqueued_ = maxqueued;
! 661: ifd->ns_per_byte_ = nsecPerByte;
! 662: ifd->maxpkt_ = mtu;
! 663: ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
! 664: ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
! 665: #if 1
! 666: ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
! 667: if (mtu * nsecPerByte > 10 * 1000000)
! 668: ifd->maxiftime_ /= 4;
! 669: #endif
! 670:
! 671: reset_cutoff(ifd);
! 672: CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
! 673:
! 674: /*
! 675: * Initialize the CBQ's WRR state.
! 676: */
! 677: for (i = 0; i < RM_MAXPRIO; i++) {
! 678: ifd->alloc_[i] = 0;
! 679: ifd->M_[i] = 0;
! 680: ifd->num_[i] = 0;
! 681: ifd->na_[i] = 0;
! 682: ifd->active_[i] = NULL;
! 683: }
! 684:
! 685: /*
! 686: * Initialize current packet state.
! 687: */
! 688: ifd->qi_ = 0;
! 689: ifd->qo_ = 0;
! 690: for (i = 0; i < RM_MAXQUEUED; i++) {
! 691: ifd->class_[i] = NULL;
! 692: ifd->curlen_[i] = 0;
! 693: ifd->borrowed_[i] = NULL;
! 694: }
! 695:
! 696: /*
! 697: * Create the root class of the link-sharing structure.
! 698: */
! 699: if ((ifd->root_ = rmc_newclass(0, ifd,
! 700: nsecPerByte,
! 701: rmc_root_overlimit, maxq, 0, 0,
! 702: maxidle, minidle, offtime,
! 703: 0, 0)) == NULL) {
! 704: printf("rmc_init: root class not allocated\n");
! 705: return ;
! 706: }
! 707: ifd->root_->depth_ = 0;
! 708: }
! 709:
! 710: /*
! 711: * void
! 712: * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
! 713: * mbuf 'm' to queue for resource class 'cl'. This routine is called
! 714: * by a driver's if_output routine. This routine must be called with
! 715: * output packet completion interrupts locked out (to avoid racing with
! 716: * rmc_dequeue_next).
! 717: *
! 718: * Returns: 0 on successful queueing
! 719: * -1 when packet drop occurs
! 720: */
! 721: int
! 722: rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
! 723: {
! 724: struct timeval now;
! 725: struct rm_ifdat *ifd = cl->ifdat_;
! 726: int cpri = cl->pri_;
! 727: int is_empty = qempty(cl->q_);
! 728:
! 729: RM_GETTIME(now);
! 730: if (ifd->cutoff_ > 0) {
! 731: if (TV_LT(&cl->undertime_, &now)) {
! 732: if (ifd->cutoff_ > cl->depth_)
! 733: ifd->cutoff_ = cl->depth_;
! 734: CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
! 735: }
! 736: #if 1 /* ALTQ */
! 737: else {
! 738: /*
! 739: * the class is overlimit. if the class has
! 740: * underlimit ancestors, set cutoff to the lowest
! 741: * depth among them.
! 742: */
! 743: struct rm_class *borrow = cl->borrow_;
! 744:
! 745: while (borrow != NULL &&
! 746: borrow->depth_ < ifd->cutoff_) {
! 747: if (TV_LT(&borrow->undertime_, &now)) {
! 748: ifd->cutoff_ = borrow->depth_;
! 749: CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
! 750: break;
! 751: }
! 752: borrow = borrow->borrow_;
! 753: }
! 754: }
! 755: #else /* !ALTQ */
! 756: else if ((ifd->cutoff_ > 1) && cl->borrow_) {
! 757: if (TV_LT(&cl->borrow_->undertime_, &now)) {
! 758: ifd->cutoff_ = cl->borrow_->depth_;
! 759: CBQTRACE(rmc_queue_packet, 'ffob',
! 760: cl->borrow_->depth_);
! 761: }
! 762: }
! 763: #endif /* !ALTQ */
! 764: }
! 765:
! 766: if (_rmc_addq(cl, m) < 0)
! 767: /* failed */
! 768: return (-1);
! 769:
! 770: if (is_empty) {
! 771: CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
! 772: ifd->na_[cpri]++;
! 773: }
! 774:
! 775: if (qlen(cl->q_) > qlimit(cl->q_)) {
! 776: /* note: qlimit can be set to 0 or 1 */
! 777: rmc_drop_action(cl);
! 778: return (-1);
! 779: }
! 780: return (0);
! 781: }
! 782:
! 783: /*
! 784: * void
! 785: * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
! 786: * classes to see if they are satisfied.
! 787: */
! 788:
! 789: static void
! 790: rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
! 791: {
! 792: int i;
! 793: rm_class_t *p, *bp;
! 794:
! 795: for (i = RM_MAXPRIO - 1; i >= 0; i--) {
! 796: if ((bp = ifd->active_[i]) != NULL) {
! 797: p = bp;
! 798: do {
! 799: if (!rmc_satisfied(p, now)) {
! 800: ifd->cutoff_ = p->depth_;
! 801: return;
! 802: }
! 803: p = p->peer_;
! 804: } while (p != bp);
! 805: }
! 806: }
! 807:
! 808: reset_cutoff(ifd);
! 809: }
! 810:
! 811: /*
! 812: * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
! 813: */
! 814:
! 815: static int
! 816: rmc_satisfied(struct rm_class *cl, struct timeval *now)
! 817: {
! 818: rm_class_t *p;
! 819:
! 820: if (cl == NULL)
! 821: return (1);
! 822: if (TV_LT(now, &cl->undertime_))
! 823: return (1);
! 824: if (cl->depth_ == 0) {
! 825: if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
! 826: return (0);
! 827: else
! 828: return (1);
! 829: }
! 830: if (cl->children_ != NULL) {
! 831: p = cl->children_;
! 832: while (p != NULL) {
! 833: if (!rmc_satisfied(p, now))
! 834: return (0);
! 835: p = p->next_;
! 836: }
! 837: }
! 838:
! 839: return (1);
! 840: }
! 841:
! 842: /*
! 843: * Return 1 if class 'cl' is under limit or can borrow from a parent,
! 844: * 0 if overlimit. As a side-effect, this routine will invoke the
! 845: * class overlimit action if the class if overlimit.
! 846: */
! 847:
! 848: static int
! 849: rmc_under_limit(struct rm_class *cl, struct timeval *now)
! 850: {
! 851: rm_class_t *p = cl;
! 852: rm_class_t *top;
! 853: struct rm_ifdat *ifd = cl->ifdat_;
! 854:
! 855: ifd->borrowed_[ifd->qi_] = NULL;
! 856: /*
! 857: * If cl is the root class, then always return that it is
! 858: * underlimit. Otherwise, check to see if the class is underlimit.
! 859: */
! 860: if (cl->parent_ == NULL)
! 861: return (1);
! 862:
! 863: if (cl->sleeping_) {
! 864: if (TV_LT(now, &cl->undertime_))
! 865: return (0);
! 866:
! 867: CALLOUT_STOP(&cl->callout_);
! 868: cl->sleeping_ = 0;
! 869: cl->undertime_.tv_sec = 0;
! 870: return (1);
! 871: }
! 872:
! 873: top = NULL;
! 874: while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
! 875: if (((cl = cl->borrow_) == NULL) ||
! 876: (cl->depth_ > ifd->cutoff_)) {
! 877: #ifdef ADJUST_CUTOFF
! 878: if (cl != NULL)
! 879: /* cutoff is taking effect, just
! 880: return false without calling
! 881: the delay action. */
! 882: return (0);
! 883: #endif
! 884: #ifdef BORROW_OFFTIME
! 885: /*
! 886: * check if the class can borrow offtime too.
! 887: * borrow offtime from the top of the borrow
! 888: * chain if the top class is not overloaded.
! 889: */
! 890: if (cl != NULL) {
! 891: /* cutoff is taking effect, use this class as top. */
! 892: top = cl;
! 893: CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
! 894: }
! 895: if (top != NULL && top->avgidle_ == top->minidle_)
! 896: top = NULL;
! 897: p->overtime_ = *now;
! 898: (p->overlimit)(p, top);
! 899: #else
! 900: p->overtime_ = *now;
! 901: (p->overlimit)(p, NULL);
! 902: #endif
! 903: return (0);
! 904: }
! 905: top = cl;
! 906: }
! 907:
! 908: if (cl != p)
! 909: ifd->borrowed_[ifd->qi_] = cl;
! 910: return (1);
! 911: }
! 912:
! 913: /*
! 914: * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
! 915: * Packet-by-packet round robin.
! 916: *
! 917: * The heart of the weighted round-robin scheduler, which decides which
! 918: * class next gets to send a packet. Highest priority first, then
! 919: * weighted round-robin within priorites.
! 920: *
! 921: * Each able-to-send class gets to send until its byte allocation is
! 922: * exhausted. Thus, the active pointer is only changed after a class has
! 923: * exhausted its allocation.
! 924: *
! 925: * If the scheduler finds no class that is underlimit or able to borrow,
! 926: * then the first class found that had a nonzero queue and is allowed to
! 927: * borrow gets to send.
! 928: */
! 929:
! 930: static mbuf_t *
! 931: _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
! 932: {
! 933: struct rm_class *cl = NULL, *first = NULL;
! 934: u_int deficit;
! 935: int cpri;
! 936: mbuf_t *m;
! 937: struct timeval now;
! 938:
! 939: RM_GETTIME(now);
! 940:
! 941: /*
! 942: * if the driver polls the top of the queue and then removes
! 943: * the polled packet, we must return the same packet.
! 944: */
! 945: if (op == ALTDQ_REMOVE && ifd->pollcache_) {
! 946: cl = ifd->pollcache_;
! 947: cpri = cl->pri_;
! 948: if (ifd->efficient_) {
! 949: /* check if this class is overlimit */
! 950: if (cl->undertime_.tv_sec != 0 &&
! 951: rmc_under_limit(cl, &now) == 0)
! 952: first = cl;
! 953: }
! 954: ifd->pollcache_ = NULL;
! 955: goto _wrr_out;
! 956: }
! 957: else {
! 958: /* mode == ALTDQ_POLL || pollcache == NULL */
! 959: ifd->pollcache_ = NULL;
! 960: ifd->borrowed_[ifd->qi_] = NULL;
! 961: }
! 962: #ifdef ADJUST_CUTOFF
! 963: _again:
! 964: #endif
! 965: for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
! 966: if (ifd->na_[cpri] == 0)
! 967: continue;
! 968: deficit = 0;
! 969: /*
! 970: * Loop through twice for a priority level, if some class
! 971: * was unable to send a packet the first round because
! 972: * of the weighted round-robin mechanism.
! 973: * During the second loop at this level, deficit==2.
! 974: * (This second loop is not needed if for every class,
! 975: * "M[cl->pri_])" times "cl->allotment" is greater than
! 976: * the byte size for the largest packet in the class.)
! 977: */
! 978: _wrr_loop:
! 979: cl = ifd->active_[cpri];
! 980: ASSERT(cl != NULL);
! 981: do {
! 982: if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
! 983: cl->bytes_alloc_ += cl->w_allotment_;
! 984: if (!qempty(cl->q_)) {
! 985: if ((cl->undertime_.tv_sec == 0) ||
! 986: rmc_under_limit(cl, &now)) {
! 987: if (cl->bytes_alloc_ > 0 || deficit > 1)
! 988: goto _wrr_out;
! 989:
! 990: /* underlimit but no alloc */
! 991: deficit = 1;
! 992: #if 1
! 993: ifd->borrowed_[ifd->qi_] = NULL;
! 994: #endif
! 995: }
! 996: else if (first == NULL && cl->borrow_ != NULL)
! 997: first = cl; /* borrowing candidate */
! 998: }
! 999:
! 1000: cl->bytes_alloc_ = 0;
! 1001: cl = cl->peer_;
! 1002: } while (cl != ifd->active_[cpri]);
! 1003:
! 1004: if (deficit == 1) {
! 1005: /* first loop found an underlimit class with deficit */
! 1006: /* Loop on same priority level, with new deficit. */
! 1007: deficit = 2;
! 1008: goto _wrr_loop;
! 1009: }
! 1010: }
! 1011:
! 1012: #ifdef ADJUST_CUTOFF
! 1013: /*
! 1014: * no underlimit class found. if cutoff is taking effect,
! 1015: * increase cutoff and try again.
! 1016: */
! 1017: if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
! 1018: ifd->cutoff_++;
! 1019: CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
! 1020: goto _again;
! 1021: }
! 1022: #endif /* ADJUST_CUTOFF */
! 1023: /*
! 1024: * If LINK_EFFICIENCY is turned on, then the first overlimit
! 1025: * class we encounter will send a packet if all the classes
! 1026: * of the link-sharing structure are overlimit.
! 1027: */
! 1028: reset_cutoff(ifd);
! 1029: CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
! 1030:
! 1031: if (!ifd->efficient_ || first == NULL)
! 1032: return (NULL);
! 1033:
! 1034: cl = first;
! 1035: cpri = cl->pri_;
! 1036: #if 0 /* too time-consuming for nothing */
! 1037: if (cl->sleeping_)
! 1038: CALLOUT_STOP(&cl->callout_);
! 1039: cl->sleeping_ = 0;
! 1040: cl->undertime_.tv_sec = 0;
! 1041: #endif
! 1042: ifd->borrowed_[ifd->qi_] = cl->borrow_;
! 1043: ifd->cutoff_ = cl->borrow_->depth_;
! 1044:
! 1045: /*
! 1046: * Deque the packet and do the book keeping...
! 1047: */
! 1048: _wrr_out:
! 1049: if (op == ALTDQ_REMOVE) {
! 1050: m = _rmc_getq(cl);
! 1051: if (m == NULL)
! 1052: panic("_rmc_wrr_dequeue_next");
! 1053: if (qempty(cl->q_))
! 1054: ifd->na_[cpri]--;
! 1055:
! 1056: /*
! 1057: * Update class statistics and link data.
! 1058: */
! 1059: if (cl->bytes_alloc_ > 0)
! 1060: cl->bytes_alloc_ -= m_pktlen(m);
! 1061:
! 1062: if ((cl->bytes_alloc_ <= 0) || first == cl)
! 1063: ifd->active_[cl->pri_] = cl->peer_;
! 1064: else
! 1065: ifd->active_[cl->pri_] = cl;
! 1066:
! 1067: ifd->class_[ifd->qi_] = cl;
! 1068: ifd->curlen_[ifd->qi_] = m_pktlen(m);
! 1069: ifd->now_[ifd->qi_] = now;
! 1070: ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
! 1071: ifd->queued_++;
! 1072: } else {
! 1073: /* mode == ALTDQ_PPOLL */
! 1074: m = _rmc_pollq(cl);
! 1075: ifd->pollcache_ = cl;
! 1076: }
! 1077: return (m);
! 1078: }
! 1079:
! 1080: /*
! 1081: * Dequeue & return next packet from the highest priority class that
! 1082: * has a packet to send & has enough allocation to send it. This
! 1083: * routine is called by a driver whenever it needs a new packet to
! 1084: * output.
! 1085: */
! 1086: static mbuf_t *
! 1087: _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
! 1088: {
! 1089: mbuf_t *m;
! 1090: int cpri;
! 1091: struct rm_class *cl, *first = NULL;
! 1092: struct timeval now;
! 1093:
! 1094: RM_GETTIME(now);
! 1095:
! 1096: /*
! 1097: * if the driver polls the top of the queue and then removes
! 1098: * the polled packet, we must return the same packet.
! 1099: */
! 1100: if (op == ALTDQ_REMOVE && ifd->pollcache_) {
! 1101: cl = ifd->pollcache_;
! 1102: cpri = cl->pri_;
! 1103: ifd->pollcache_ = NULL;
! 1104: goto _prr_out;
! 1105: } else {
! 1106: /* mode == ALTDQ_POLL || pollcache == NULL */
! 1107: ifd->pollcache_ = NULL;
! 1108: ifd->borrowed_[ifd->qi_] = NULL;
! 1109: }
! 1110: #ifdef ADJUST_CUTOFF
! 1111: _again:
! 1112: #endif
! 1113: for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
! 1114: if (ifd->na_[cpri] == 0)
! 1115: continue;
! 1116: cl = ifd->active_[cpri];
! 1117: ASSERT(cl != NULL);
! 1118: do {
! 1119: if (!qempty(cl->q_)) {
! 1120: if ((cl->undertime_.tv_sec == 0) ||
! 1121: rmc_under_limit(cl, &now))
! 1122: goto _prr_out;
! 1123: if (first == NULL && cl->borrow_ != NULL)
! 1124: first = cl;
! 1125: }
! 1126: cl = cl->peer_;
! 1127: } while (cl != ifd->active_[cpri]);
! 1128: }
! 1129:
! 1130: #ifdef ADJUST_CUTOFF
! 1131: /*
! 1132: * no underlimit class found. if cutoff is taking effect, increase
! 1133: * cutoff and try again.
! 1134: */
! 1135: if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
! 1136: ifd->cutoff_++;
! 1137: goto _again;
! 1138: }
! 1139: #endif /* ADJUST_CUTOFF */
! 1140: /*
! 1141: * If LINK_EFFICIENCY is turned on, then the first overlimit
! 1142: * class we encounter will send a packet if all the classes
! 1143: * of the link-sharing structure are overlimit.
! 1144: */
! 1145: reset_cutoff(ifd);
! 1146: if (!ifd->efficient_ || first == NULL)
! 1147: return (NULL);
! 1148:
! 1149: cl = first;
! 1150: cpri = cl->pri_;
! 1151: #if 0 /* too time-consuming for nothing */
! 1152: if (cl->sleeping_)
! 1153: CALLOUT_STOP(&cl->callout_);
! 1154: cl->sleeping_ = 0;
! 1155: cl->undertime_.tv_sec = 0;
! 1156: #endif
! 1157: ifd->borrowed_[ifd->qi_] = cl->borrow_;
! 1158: ifd->cutoff_ = cl->borrow_->depth_;
! 1159:
! 1160: /*
! 1161: * Deque the packet and do the book keeping...
! 1162: */
! 1163: _prr_out:
! 1164: if (op == ALTDQ_REMOVE) {
! 1165: m = _rmc_getq(cl);
! 1166: if (m == NULL)
! 1167: panic("_rmc_prr_dequeue_next");
! 1168: if (qempty(cl->q_))
! 1169: ifd->na_[cpri]--;
! 1170:
! 1171: ifd->active_[cpri] = cl->peer_;
! 1172:
! 1173: ifd->class_[ifd->qi_] = cl;
! 1174: ifd->curlen_[ifd->qi_] = m_pktlen(m);
! 1175: ifd->now_[ifd->qi_] = now;
! 1176: ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
! 1177: ifd->queued_++;
! 1178: } else {
! 1179: /* mode == ALTDQ_POLL */
! 1180: m = _rmc_pollq(cl);
! 1181: ifd->pollcache_ = cl;
! 1182: }
! 1183: return (m);
! 1184: }
! 1185:
! 1186: /*
! 1187: * mbuf_t *
! 1188: * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
! 1189: * is invoked by the packet driver to get the next packet to be
! 1190: * dequeued and output on the link. If WRR is enabled, then the
! 1191: * WRR dequeue next routine will determine the next packet to sent.
! 1192: * Otherwise, packet-by-packet round robin is invoked.
! 1193: *
! 1194: * Returns: NULL, if a packet is not available or if all
! 1195: * classes are overlimit.
! 1196: *
! 1197: * Otherwise, Pointer to the next packet.
! 1198: */
! 1199:
! 1200: mbuf_t *
! 1201: rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
! 1202: {
! 1203: if (ifd->queued_ >= ifd->maxqueued_)
! 1204: return (NULL);
! 1205: else if (ifd->wrr_)
! 1206: return (_rmc_wrr_dequeue_next(ifd, mode));
! 1207: else
! 1208: return (_rmc_prr_dequeue_next(ifd, mode));
! 1209: }
! 1210:
! 1211: /*
! 1212: * Update the utilization estimate for the packet that just completed.
! 1213: * The packet's class & the parent(s) of that class all get their
! 1214: * estimators updated. This routine is called by the driver's output-
! 1215: * packet-completion interrupt service routine.
! 1216: */
! 1217:
! 1218: /*
! 1219: * a macro to approximate "divide by 1000" that gives 0.000999,
! 1220: * if a value has enough effective digits.
! 1221: * (on pentium, mul takes 9 cycles but div takes 46!)
! 1222: */
! 1223: #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
! 1224: void
! 1225: rmc_update_class_util(struct rm_ifdat *ifd)
! 1226: {
! 1227: int idle, avgidle, pktlen;
! 1228: int pkt_time, tidle;
! 1229: rm_class_t *cl, *borrowed;
! 1230: rm_class_t *borrows;
! 1231: struct timeval *nowp;
! 1232:
! 1233: /*
! 1234: * Get the most recent completed class.
! 1235: */
! 1236: if ((cl = ifd->class_[ifd->qo_]) == NULL)
! 1237: return;
! 1238:
! 1239: pktlen = ifd->curlen_[ifd->qo_];
! 1240: borrowed = ifd->borrowed_[ifd->qo_];
! 1241: borrows = borrowed;
! 1242:
! 1243: PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
! 1244:
! 1245: /*
! 1246: * Run estimator on class and its ancestors.
! 1247: */
! 1248: /*
! 1249: * rm_update_class_util is designed to be called when the
! 1250: * transfer is completed from a xmit complete interrupt,
! 1251: * but most drivers don't implement an upcall for that.
! 1252: * so, just use estimated completion time.
! 1253: * as a result, ifd->qi_ and ifd->qo_ are always synced.
! 1254: */
! 1255: nowp = &ifd->now_[ifd->qo_];
! 1256: /* get pkt_time (for link) in usec */
! 1257: #if 1 /* use approximation */
! 1258: pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
! 1259: pkt_time = NSEC_TO_USEC(pkt_time);
! 1260: #else
! 1261: pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
! 1262: #endif
! 1263: #if 1 /* ALTQ4PPP */
! 1264: if (TV_LT(nowp, &ifd->ifnow_)) {
! 1265: int iftime;
! 1266:
! 1267: /*
! 1268: * make sure the estimated completion time does not go
! 1269: * too far. it can happen when the link layer supports
! 1270: * data compression or the interface speed is set to
! 1271: * a much lower value.
! 1272: */
! 1273: TV_DELTA(&ifd->ifnow_, nowp, iftime);
! 1274: if (iftime+pkt_time < ifd->maxiftime_) {
! 1275: TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
! 1276: } else {
! 1277: TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
! 1278: }
! 1279: } else {
! 1280: TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
! 1281: }
! 1282: #else
! 1283: if (TV_LT(nowp, &ifd->ifnow_)) {
! 1284: TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
! 1285: } else {
! 1286: TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
! 1287: }
! 1288: #endif
! 1289:
! 1290: while (cl != NULL) {
! 1291: TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
! 1292: if (idle >= 2000000)
! 1293: /*
! 1294: * this class is idle enough, reset avgidle.
! 1295: * (TV_DELTA returns 2000000 us when delta is large.)
! 1296: */
! 1297: cl->avgidle_ = cl->maxidle_;
! 1298:
! 1299: /* get pkt_time (for class) in usec */
! 1300: #if 1 /* use approximation */
! 1301: pkt_time = pktlen * cl->ns_per_byte_;
! 1302: pkt_time = NSEC_TO_USEC(pkt_time);
! 1303: #else
! 1304: pkt_time = pktlen * cl->ns_per_byte_ / 1000;
! 1305: #endif
! 1306: idle -= pkt_time;
! 1307:
! 1308: avgidle = cl->avgidle_;
! 1309: avgidle += idle - (avgidle >> RM_FILTER_GAIN);
! 1310: cl->avgidle_ = avgidle;
! 1311:
! 1312: /* Are we overlimit ? */
! 1313: if (avgidle <= 0) {
! 1314: CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
! 1315: #if 1 /* ALTQ */
! 1316: /*
! 1317: * need some lower bound for avgidle, otherwise
! 1318: * a borrowing class gets unbounded penalty.
! 1319: */
! 1320: if (avgidle < cl->minidle_)
! 1321: avgidle = cl->avgidle_ = cl->minidle_;
! 1322: #endif
! 1323: /* set next idle to make avgidle 0 */
! 1324: tidle = pkt_time +
! 1325: (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
! 1326: TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
! 1327: ++cl->stats_.over;
! 1328: } else {
! 1329: cl->avgidle_ =
! 1330: (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
! 1331: cl->undertime_.tv_sec = 0;
! 1332: if (cl->sleeping_) {
! 1333: CALLOUT_STOP(&cl->callout_);
! 1334: cl->sleeping_ = 0;
! 1335: }
! 1336: }
! 1337:
! 1338: if (borrows != NULL) {
! 1339: if (borrows != cl)
! 1340: ++cl->stats_.borrows;
! 1341: else
! 1342: borrows = NULL;
! 1343: }
! 1344: cl->last_ = ifd->ifnow_;
! 1345: cl->last_pkttime_ = pkt_time;
! 1346:
! 1347: #if 1
! 1348: if (cl->parent_ == NULL) {
! 1349: /* take stats of root class */
! 1350: PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
! 1351: }
! 1352: #endif
! 1353:
! 1354: cl = cl->parent_;
! 1355: }
! 1356:
! 1357: /*
! 1358: * Check to see if cutoff needs to set to a new level.
! 1359: */
! 1360: cl = ifd->class_[ifd->qo_];
! 1361: if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
! 1362: #if 1 /* ALTQ */
! 1363: if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
! 1364: rmc_tl_satisfied(ifd, nowp);
! 1365: CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
! 1366: } else {
! 1367: ifd->cutoff_ = borrowed->depth_;
! 1368: CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
! 1369: }
! 1370: #else /* !ALTQ */
! 1371: if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
! 1372: reset_cutoff(ifd);
! 1373: #ifdef notdef
! 1374: rmc_tl_satisfied(ifd, &now);
! 1375: #endif
! 1376: CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
! 1377: } else {
! 1378: ifd->cutoff_ = borrowed->depth_;
! 1379: CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
! 1380: }
! 1381: #endif /* !ALTQ */
! 1382: }
! 1383:
! 1384: /*
! 1385: * Release class slot
! 1386: */
! 1387: ifd->borrowed_[ifd->qo_] = NULL;
! 1388: ifd->class_[ifd->qo_] = NULL;
! 1389: ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
! 1390: ifd->queued_--;
! 1391: }
! 1392:
! 1393: /*
! 1394: * void
! 1395: * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
! 1396: * over-limit action routines. These get invoked by rmc_under_limit()
! 1397: * if a class with packets to send if over its bandwidth limit & can't
! 1398: * borrow from a parent class.
! 1399: *
! 1400: * Returns: NONE
! 1401: */
! 1402:
! 1403: static void
! 1404: rmc_drop_action(struct rm_class *cl)
! 1405: {
! 1406: struct rm_ifdat *ifd = cl->ifdat_;
! 1407:
! 1408: ASSERT(qlen(cl->q_) > 0);
! 1409: _rmc_dropq(cl);
! 1410: if (qempty(cl->q_))
! 1411: ifd->na_[cl->pri_]--;
! 1412: }
! 1413:
! 1414: void rmc_dropall(struct rm_class *cl)
! 1415: {
! 1416: struct rm_ifdat *ifd = cl->ifdat_;
! 1417:
! 1418: if (!qempty(cl->q_)) {
! 1419: _flushq(cl->q_);
! 1420:
! 1421: ifd->na_[cl->pri_]--;
! 1422: }
! 1423: }
! 1424:
! 1425: /*
! 1426: * void
! 1427: * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
! 1428: * delay action routine. It is invoked via rmc_under_limit when the
! 1429: * packet is discovered to be overlimit.
! 1430: *
! 1431: * If the delay action is result of borrow class being overlimit, then
! 1432: * delay for the offtime of the borrowing class that is overlimit.
! 1433: *
! 1434: * Returns: NONE
! 1435: */
! 1436:
! 1437: void
! 1438: rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
! 1439: {
! 1440: int delay, t, extradelay;
! 1441:
! 1442: cl->stats_.overactions++;
! 1443: TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
! 1444: #ifndef BORROW_OFFTIME
! 1445: delay += cl->offtime_;
! 1446: #endif
! 1447:
! 1448: if (!cl->sleeping_) {
! 1449: CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
! 1450: #ifdef BORROW_OFFTIME
! 1451: if (borrow != NULL)
! 1452: extradelay = borrow->offtime_;
! 1453: else
! 1454: #endif
! 1455: extradelay = cl->offtime_;
! 1456:
! 1457: #ifdef ALTQ
! 1458: /*
! 1459: * XXX recalculate suspend time:
! 1460: * current undertime is (tidle + pkt_time) calculated
! 1461: * from the last transmission.
! 1462: * tidle: time required to bring avgidle back to 0
! 1463: * pkt_time: target waiting time for this class
! 1464: * we need to replace pkt_time by offtime
! 1465: */
! 1466: extradelay -= cl->last_pkttime_;
! 1467: #endif
! 1468: if (extradelay > 0) {
! 1469: TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
! 1470: delay += extradelay;
! 1471: }
! 1472:
! 1473: cl->sleeping_ = 1;
! 1474: cl->stats_.delays++;
! 1475:
! 1476: /*
! 1477: * Since packets are phased randomly with respect to the
! 1478: * clock, 1 tick (the next clock tick) can be an arbitrarily
! 1479: * short time so we have to wait for at least two ticks.
! 1480: * NOTE: If there's no other traffic, we need the timer as
! 1481: * a 'backstop' to restart this class.
! 1482: */
! 1483: if (delay > tick * 2) {
! 1484: #ifdef __FreeBSD__
! 1485: /* FreeBSD rounds up the tick */
! 1486: t = hzto(&cl->undertime_);
! 1487: #else
! 1488: /* other BSDs round down the tick */
! 1489: t = hzto(&cl->undertime_) + 1;
! 1490: #endif
! 1491: } else
! 1492: t = 2;
! 1493: CALLOUT_RESET(&cl->callout_, t,
! 1494: (timeout_t *)rmc_restart, (caddr_t)cl);
! 1495: }
! 1496: }
! 1497:
! 1498: /*
! 1499: * void
! 1500: * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
! 1501: * called by the system timer code & is responsible checking if the
! 1502: * class is still sleeping (it might have been restarted as a side
! 1503: * effect of the queue scan on a packet arrival) and, if so, restarting
! 1504: * output for the class. Inspecting the class state & restarting output
! 1505: * require locking the class structure. In general the driver is
! 1506: * responsible for locking but this is the only routine that is not
! 1507: * called directly or indirectly from the interface driver so it has
! 1508: * know about system locking conventions. Under bsd, locking is done
! 1509: * by raising IPL to splnet so that's what's implemented here. On a
! 1510: * different system this would probably need to be changed.
! 1511: *
! 1512: * Returns: NONE
! 1513: */
! 1514:
! 1515: static void
! 1516: rmc_restart(struct rm_class *cl)
! 1517: {
! 1518: struct rm_ifdat *ifd = cl->ifdat_;
! 1519: int s;
! 1520:
! 1521: s = splnet();
! 1522: if (cl->sleeping_) {
! 1523: cl->sleeping_ = 0;
! 1524: cl->undertime_.tv_sec = 0;
! 1525:
! 1526: if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
! 1527: CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
! 1528: (ifd->restart)(ifd->ifq_);
! 1529: }
! 1530: }
! 1531: splx(s);
! 1532: }
! 1533:
! 1534: /*
! 1535: * void
! 1536: * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
! 1537: * handling routine for the root class of the link sharing structure.
! 1538: *
! 1539: * Returns: NONE
! 1540: */
! 1541:
! 1542: static void
! 1543: rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
! 1544: {
! 1545: panic("rmc_root_overlimit");
! 1546: }
! 1547:
! 1548: /*
! 1549: * Packet Queue handling routines. Eventually, this is to localize the
! 1550: * effects on the code whether queues are red queues or droptail
! 1551: * queues.
! 1552: */
! 1553:
! 1554: static int
! 1555: _rmc_addq(rm_class_t *cl, mbuf_t *m)
! 1556: {
! 1557: #ifdef ALTQ_RIO
! 1558: if (q_is_rio(cl->q_))
! 1559: return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
! 1560: #endif
! 1561: #ifdef ALTQ_RED
! 1562: if (q_is_red(cl->q_))
! 1563: return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
! 1564: #endif /* ALTQ_RED */
! 1565:
! 1566: if (cl->flags_ & RMCF_CLEARDSCP)
! 1567: write_dsfield(m, cl->pktattr_, 0);
! 1568:
! 1569: _addq(cl->q_, m);
! 1570: return (0);
! 1571: }
! 1572:
! 1573: /* note: _rmc_dropq is not called for red */
! 1574: static void
! 1575: _rmc_dropq(rm_class_t *cl)
! 1576: {
! 1577: mbuf_t *m;
! 1578:
! 1579: if ((m = _getq(cl->q_)) != NULL)
! 1580: m_freem(m);
! 1581: }
! 1582:
! 1583: static mbuf_t *
! 1584: _rmc_getq(rm_class_t *cl)
! 1585: {
! 1586: #ifdef ALTQ_RIO
! 1587: if (q_is_rio(cl->q_))
! 1588: return rio_getq((rio_t *)cl->red_, cl->q_);
! 1589: #endif
! 1590: #ifdef ALTQ_RED
! 1591: if (q_is_red(cl->q_))
! 1592: return red_getq(cl->red_, cl->q_);
! 1593: #endif
! 1594: return _getq(cl->q_);
! 1595: }
! 1596:
! 1597: static mbuf_t *
! 1598: _rmc_pollq(rm_class_t *cl)
! 1599: {
! 1600: return qhead(cl->q_);
! 1601: }
! 1602:
! 1603: #ifdef CBQ_TRACE
! 1604:
! 1605: struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
! 1606: struct cbqtrace *cbqtrace_ptr = NULL;
! 1607: int cbqtrace_count;
! 1608:
! 1609: /*
! 1610: * DDB hook to trace cbq events:
! 1611: * the last 1024 events are held in a circular buffer.
! 1612: * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
! 1613: */
! 1614: void cbqtrace_dump(int);
! 1615: static char *rmc_funcname(void *);
! 1616:
! 1617: static struct rmc_funcs {
! 1618: void *func;
! 1619: char *name;
! 1620: } rmc_funcs[] =
! 1621: {
! 1622: rmc_init, "rmc_init",
! 1623: rmc_queue_packet, "rmc_queue_packet",
! 1624: rmc_under_limit, "rmc_under_limit",
! 1625: rmc_update_class_util, "rmc_update_class_util",
! 1626: rmc_delay_action, "rmc_delay_action",
! 1627: rmc_restart, "rmc_restart",
! 1628: _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
! 1629: NULL, NULL
! 1630: };
! 1631:
! 1632: static char *rmc_funcname(void *func)
! 1633: {
! 1634: struct rmc_funcs *fp;
! 1635:
! 1636: for (fp = rmc_funcs; fp->func != NULL; fp++)
! 1637: if (fp->func == func)
! 1638: return (fp->name);
! 1639: return ("unknown");
! 1640: }
! 1641:
! 1642: void cbqtrace_dump(int counter)
! 1643: {
! 1644: int i, *p;
! 1645: char *cp;
! 1646:
! 1647: counter = counter % NCBQTRACE;
! 1648: p = (int *)&cbqtrace_buffer[counter];
! 1649:
! 1650: for (i=0; i<20; i++) {
! 1651: printf("[0x%x] ", *p++);
! 1652: printf("%s: ", rmc_funcname((void *)*p++));
! 1653: cp = (char *)p++;
! 1654: printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
! 1655: printf("%d\n",*p++);
! 1656:
! 1657: if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
! 1658: p = (int *)cbqtrace_buffer;
! 1659: }
! 1660: }
! 1661: #endif /* CBQ_TRACE */
! 1662:
! 1663: #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
! 1664: #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
! 1665:
! 1666: void
! 1667: _addq(class_queue_t *q, mbuf_t *m)
! 1668: {
! 1669: mbuf_t *m0;
! 1670:
! 1671: if ((m0 = qtail(q)) != NULL)
! 1672: m->m_nextpkt = m0->m_nextpkt;
! 1673: else
! 1674: m0 = m;
! 1675: m0->m_nextpkt = m;
! 1676: qtail(q) = m;
! 1677: qlen(q)++;
! 1678: }
! 1679:
! 1680: mbuf_t *
! 1681: _getq(class_queue_t *q)
! 1682: {
! 1683: mbuf_t *m, *m0;
! 1684:
! 1685: if ((m = qtail(q)) == NULL)
! 1686: return (NULL);
! 1687: if ((m0 = m->m_nextpkt) != m)
! 1688: m->m_nextpkt = m0->m_nextpkt;
! 1689: else {
! 1690: ASSERT(qlen(q) == 1);
! 1691: qtail(q) = NULL;
! 1692: }
! 1693: qlen(q)--;
! 1694: m0->m_nextpkt = NULL;
! 1695: return (m0);
! 1696: }
! 1697:
! 1698: /* drop a packet at the tail of the queue */
! 1699: mbuf_t *
! 1700: _getq_tail(class_queue_t *q)
! 1701: {
! 1702: mbuf_t *m, *m0, *prev;
! 1703:
! 1704: if ((m = m0 = qtail(q)) == NULL)
! 1705: return NULL;
! 1706: do {
! 1707: prev = m0;
! 1708: m0 = m0->m_nextpkt;
! 1709: } while (m0 != m);
! 1710: prev->m_nextpkt = m->m_nextpkt;
! 1711: if (prev == m) {
! 1712: ASSERT(qlen(q) == 1);
! 1713: qtail(q) = NULL;
! 1714: } else
! 1715: qtail(q) = prev;
! 1716: qlen(q)--;
! 1717: m->m_nextpkt = NULL;
! 1718: return (m);
! 1719: }
! 1720:
! 1721: /* randomly select a packet in the queue */
! 1722: mbuf_t *
! 1723: _getq_random(class_queue_t *q)
! 1724: {
! 1725: struct mbuf *m;
! 1726: int i, n;
! 1727:
! 1728: if ((m = qtail(q)) == NULL)
! 1729: return NULL;
! 1730: if (m->m_nextpkt == m) {
! 1731: ASSERT(qlen(q) == 1);
! 1732: qtail(q) = NULL;
! 1733: } else {
! 1734: struct mbuf *prev = NULL;
! 1735:
! 1736: n = random() % qlen(q) + 1;
! 1737: for (i = 0; i < n; i++) {
! 1738: prev = m;
! 1739: m = m->m_nextpkt;
! 1740: }
! 1741: prev->m_nextpkt = m->m_nextpkt;
! 1742: if (m == qtail(q))
! 1743: qtail(q) = prev;
! 1744: }
! 1745: qlen(q)--;
! 1746: m->m_nextpkt = NULL;
! 1747: return (m);
! 1748: }
! 1749:
! 1750: void
! 1751: _removeq(class_queue_t *q, mbuf_t *m)
! 1752: {
! 1753: mbuf_t *m0, *prev;
! 1754:
! 1755: m0 = qtail(q);
! 1756: do {
! 1757: prev = m0;
! 1758: m0 = m0->m_nextpkt;
! 1759: } while (m0 != m);
! 1760: prev->m_nextpkt = m->m_nextpkt;
! 1761: if (prev == m)
! 1762: qtail(q) = NULL;
! 1763: else if (qtail(q) == m)
! 1764: qtail(q) = prev;
! 1765: qlen(q)--;
! 1766: }
! 1767:
! 1768: void
! 1769: _flushq(class_queue_t *q)
! 1770: {
! 1771: mbuf_t *m;
! 1772:
! 1773: while ((m = _getq(q)) != NULL)
! 1774: m_freem(m);
! 1775: ASSERT(qlen(q) == 0);
! 1776: }
! 1777:
! 1778: #endif /* !__GNUC__ || ALTQ_DEBUG */
! 1779: #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
CVSweb