Annotation of sys/altq/altq_rmclass.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: altq_rmclass.c,v 1.12 2006/03/04 22:40:15 brad Exp $ */
2: /* $KAME: altq_rmclass.c,v 1.10 2001/02/09 07:20:40 kjc Exp $ */
3:
4: /*
5: * Copyright (c) 1991-1997 Regents of the University of California.
6: * All rights reserved.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. All advertising materials mentioning features or use of this software
17: * must display the following acknowledgement:
18: * This product includes software developed by the Network Research
19: * Group at Lawrence Berkeley Laboratory.
20: * 4. Neither the name of the University nor of the Laboratory may be used
21: * to endorse or promote products derived from this software without
22: * specific prior written permission.
23: *
24: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34: * SUCH DAMAGE.
35: *
36: * LBL code modified by speer@eng.sun.com, May 1977.
37: * For questions and/or comments, please send mail to cbq@ee.lbl.gov
38: */
39:
40: #include <sys/param.h>
41: #include <sys/malloc.h>
42: #include <sys/mbuf.h>
43: #include <sys/socket.h>
44: #include <sys/systm.h>
45: #include <sys/errno.h>
46: #include <sys/time.h>
47:
48: #include <net/if.h>
49:
50: #include <altq/altq.h>
51: #include <altq/altq_rmclass.h>
52: #include <altq/altq_rmclass_debug.h>
53: #include <altq/altq_red.h>
54: #include <altq/altq_rio.h>
55:
56: /*
57: * Local Macros
58: */
59:
60: #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
61:
62: /*
63: * Local routines.
64: */
65:
66: static int rmc_satisfied(struct rm_class *, struct timeval *);
67: static void rmc_wrr_set_weights(struct rm_ifdat *);
68: static void rmc_depth_compute(struct rm_class *);
69: static void rmc_depth_recompute(rm_class_t *);
70:
71: static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
72: static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
73:
74: static int _rmc_addq(rm_class_t *, mbuf_t *);
75: static void _rmc_dropq(rm_class_t *);
76: static mbuf_t *_rmc_getq(rm_class_t *);
77: static mbuf_t *_rmc_pollq(rm_class_t *);
78:
79: static int rmc_under_limit(struct rm_class *, struct timeval *);
80: static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
81: static void rmc_drop_action(struct rm_class *);
82: static void rmc_restart(struct rm_class *);
83: static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
84:
85: #define BORROW_OFFTIME
86: /*
87: * BORROW_OFFTIME (experimental):
88: * borrow the offtime of the class borrowing from.
89: * the reason is that when its own offtime is set, the class is unable
90: * to borrow much, especially when cutoff is taking effect.
91: * but when the borrowed class is overloaded (advidle is close to minidle),
92: * use the borrowing class's offtime to avoid overload.
93: */
94: #define ADJUST_CUTOFF
95: /*
96: * ADJUST_CUTOFF (experimental):
97: * if no underlimit class is found due to cutoff, increase cutoff and
98: * retry the scheduling loop.
99: * also, don't invoke delay_actions while cutoff is taking effect,
100: * since a sleeping class won't have a chance to be scheduled in the
101: * next loop.
102: *
103: * now heuristics for setting the top-level variable (cutoff_) becomes:
104: * 1. if a packet arrives for a not-overlimit class, set cutoff
105: * to the depth of the class.
106: * 2. if cutoff is i, and a packet arrives for an overlimit class
107: * with an underlimit ancestor at a lower level than i (say j),
108: * then set cutoff to j.
109: * 3. at scheduling a packet, if there is no underlimit class
110: * due to the current cutoff level, increase cutoff by 1 and
111: * then try to schedule again.
112: */
113:
114: /*
115: * rm_class_t *
116: * rmc_newclass(...) - Create a new resource management class at priority
117: * 'pri' on the interface given by 'ifd'.
118: *
119: * nsecPerByte is the data rate of the interface in nanoseconds/byte.
120: * E.g., 800 for a 10Mb/s ethernet. If the class gets less
121: * than 100% of the bandwidth, this number should be the
122: * 'effective' rate for the class. Let f be the
123: * bandwidth fraction allocated to this class, and let
124: * nsPerByte be the data rate of the output link in
125: * nanoseconds/byte. Then nsecPerByte is set to
126: * nsPerByte / f. E.g., 1600 (= 800 / .5)
127: * for a class that gets 50% of an ethernet's bandwidth.
128: *
129: * action the routine to call when the class is over limit.
130: *
131: * maxq max allowable queue size for class (in packets).
132: *
133: * parent parent class pointer.
134: *
135: * borrow class to borrow from (should be either 'parent' or null).
136: *
137: * maxidle max value allowed for class 'idle' time estimate (this
138: * parameter determines how large an initial burst of packets
139: * can be before overlimit action is invoked.
140: *
141: * offtime how long 'delay' action will delay when class goes over
142: * limit (this parameter determines the steady-state burst
143: * size when a class is running over its limit).
144: *
145: * Maxidle and offtime have to be computed from the following: If the
146: * average packet size is s, the bandwidth fraction allocated to this
147: * class is f, we want to allow b packet bursts, and the gain of the
148: * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
149: *
150: * ptime = s * nsPerByte * (1 - f) / f
151: * maxidle = ptime * (1 - g^b) / g^b
152: * minidle = -ptime * (1 / (f - 1))
153: * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
154: *
155: * Operationally, it's convenient to specify maxidle & offtime in units
156: * independent of the link bandwidth so the maxidle & offtime passed to
157: * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
158: * (The constant factor is a scale factor needed to make the parameters
159: * integers. This scaling also means that the 'unscaled' values of
160: * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
161: * not nanoseconds.) Also note that the 'idle' filter computation keeps
162: * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
163: * maxidle also must be scaled upward by this value. Thus, the passed
164: * values for maxidle and offtime can be computed as follows:
165: *
166: * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
167: * offtime = offtime * 8 / (1000 * nsecPerByte)
168: *
169: * When USE_HRTIME is employed, then maxidle and offtime become:
170: * maxidle = maxilde * (8.0 / nsecPerByte);
171: * offtime = offtime * (8.0 / nsecPerByte);
172: */
173:
174: struct rm_class *
175: rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
176: void (*action)(rm_class_t *, rm_class_t *), int maxq,
177: struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
178: int minidle, u_int offtime, int pktsize, int flags)
179: {
180: struct rm_class *cl;
181: struct rm_class *peer;
182: int s;
183:
184: if (pri >= RM_MAXPRIO)
185: return (NULL);
186: #ifndef ALTQ_RED
187: if (flags & RMCF_RED) {
188: #ifdef ALTQ_DEBUG
189: printf("rmc_newclass: RED not configured for CBQ!\n");
190: #endif
191: return (NULL);
192: }
193: #endif
194: #ifndef ALTQ_RIO
195: if (flags & RMCF_RIO) {
196: #ifdef ALTQ_DEBUG
197: printf("rmc_newclass: RIO not configured for CBQ!\n");
198: #endif
199: return (NULL);
200: }
201: #endif
202:
203: MALLOC(cl, struct rm_class *, sizeof(struct rm_class),
204: M_DEVBUF, M_WAITOK);
205: if (cl == NULL)
206: return (NULL);
207: bzero(cl, sizeof(struct rm_class));
208: CALLOUT_INIT(&cl->callout_);
209: MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t),
210: M_DEVBUF, M_WAITOK);
211: if (cl->q_ == NULL) {
212: FREE(cl, M_DEVBUF);
213: return (NULL);
214: }
215: bzero(cl->q_, sizeof(class_queue_t));
216:
217: /*
218: * Class initialization.
219: */
220: cl->children_ = NULL;
221: cl->parent_ = parent;
222: cl->borrow_ = borrow;
223: cl->leaf_ = 1;
224: cl->ifdat_ = ifd;
225: cl->pri_ = pri;
226: cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
227: cl->depth_ = 0;
228: cl->qthresh_ = 0;
229: cl->ns_per_byte_ = nsecPerByte;
230:
231: qlimit(cl->q_) = maxq;
232: qtype(cl->q_) = Q_DROPHEAD;
233: qlen(cl->q_) = 0;
234: cl->flags_ = flags;
235:
236: #if 1 /* minidle is also scaled in ALTQ */
237: cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
238: if (cl->minidle_ > 0)
239: cl->minidle_ = 0;
240: #else
241: cl->minidle_ = minidle;
242: #endif
243: cl->maxidle_ = (maxidle * nsecPerByte) / 8;
244: if (cl->maxidle_ == 0)
245: cl->maxidle_ = 1;
246: #if 1 /* offtime is also scaled in ALTQ */
247: cl->avgidle_ = cl->maxidle_;
248: cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
249: if (cl->offtime_ == 0)
250: cl->offtime_ = 1;
251: #else
252: cl->avgidle_ = 0;
253: cl->offtime_ = (offtime * nsecPerByte) / 8;
254: #endif
255: cl->overlimit = action;
256:
257: #ifdef ALTQ_RED
258: if (flags & (RMCF_RED|RMCF_RIO)) {
259: int red_flags, red_pkttime;
260:
261: red_flags = 0;
262: if (flags & RMCF_ECN)
263: red_flags |= REDF_ECN;
264: if (flags & RMCF_FLOWVALVE)
265: red_flags |= REDF_FLOWVALVE;
266: #ifdef ALTQ_RIO
267: if (flags & RMCF_CLEARDSCP)
268: red_flags |= RIOF_CLEARDSCP;
269: #endif
270: red_pkttime = nsecPerByte * pktsize / 1000;
271:
272: if (flags & RMCF_RED) {
273: cl->red_ = red_alloc(0, 0,
274: qlimit(cl->q_) * 10/100,
275: qlimit(cl->q_) * 30/100,
276: red_flags, red_pkttime);
277: if (cl->red_ != NULL)
278: qtype(cl->q_) = Q_RED;
279: }
280: #ifdef ALTQ_RIO
281: else {
282: cl->red_ = (red_t *)rio_alloc(0, NULL,
283: red_flags, red_pkttime);
284: if (cl->red_ != NULL)
285: qtype(cl->q_) = Q_RIO;
286: }
287: #endif
288: }
289: #endif /* ALTQ_RED */
290:
291: /*
292: * put the class into the class tree
293: */
294: s = splnet();
295: if ((peer = ifd->active_[pri]) != NULL) {
296: /* find the last class at this pri */
297: cl->peer_ = peer;
298: while (peer->peer_ != ifd->active_[pri])
299: peer = peer->peer_;
300: peer->peer_ = cl;
301: } else {
302: ifd->active_[pri] = cl;
303: cl->peer_ = cl;
304: }
305:
306: if (cl->parent_) {
307: cl->next_ = parent->children_;
308: parent->children_ = cl;
309: parent->leaf_ = 0;
310: }
311:
312: /*
313: * Compute the depth of this class and its ancestors in the class
314: * hierarchy.
315: */
316: rmc_depth_compute(cl);
317:
318: /*
319: * If CBQ's WRR is enabled, then initialize the class WRR state.
320: */
321: if (ifd->wrr_) {
322: ifd->num_[pri]++;
323: ifd->alloc_[pri] += cl->allotment_;
324: rmc_wrr_set_weights(ifd);
325: }
326: splx(s);
327: return (cl);
328: }
329:
330: int
331: rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
332: int minidle, u_int offtime, int pktsize)
333: {
334: struct rm_ifdat *ifd;
335: u_int old_allotment;
336: int s;
337:
338: ifd = cl->ifdat_;
339: old_allotment = cl->allotment_;
340:
341: s = splnet();
342: cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
343: cl->qthresh_ = 0;
344: cl->ns_per_byte_ = nsecPerByte;
345:
346: qlimit(cl->q_) = maxq;
347:
348: #if 1 /* minidle is also scaled in ALTQ */
349: cl->minidle_ = (minidle * nsecPerByte) / 8;
350: if (cl->minidle_ > 0)
351: cl->minidle_ = 0;
352: #else
353: cl->minidle_ = minidle;
354: #endif
355: cl->maxidle_ = (maxidle * nsecPerByte) / 8;
356: if (cl->maxidle_ == 0)
357: cl->maxidle_ = 1;
358: #if 1 /* offtime is also scaled in ALTQ */
359: cl->avgidle_ = cl->maxidle_;
360: cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
361: if (cl->offtime_ == 0)
362: cl->offtime_ = 1;
363: #else
364: cl->avgidle_ = 0;
365: cl->offtime_ = (offtime * nsecPerByte) / 8;
366: #endif
367:
368: /*
369: * If CBQ's WRR is enabled, then initialize the class WRR state.
370: */
371: if (ifd->wrr_) {
372: ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
373: rmc_wrr_set_weights(ifd);
374: }
375: splx(s);
376: return (0);
377: }
378:
379: /*
380: * static void
381: * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
382: * the appropriate run robin weights for the CBQ weighted round robin
383: * algorithm.
384: *
385: * Returns: NONE
386: */
387:
388: static void
389: rmc_wrr_set_weights(struct rm_ifdat *ifd)
390: {
391: int i;
392: struct rm_class *cl, *clh;
393:
394: for (i = 0; i < RM_MAXPRIO; i++) {
395: /*
396: * This is inverted from that of the simulator to
397: * maintain precision.
398: */
399: if (ifd->num_[i] == 0)
400: ifd->M_[i] = 0;
401: else
402: ifd->M_[i] = ifd->alloc_[i] /
403: (ifd->num_[i] * ifd->maxpkt_);
404: /*
405: * Compute the weighted allotment for each class.
406: * This takes the expensive div instruction out
407: * of the main loop for the wrr scheduling path.
408: * These only get recomputed when a class comes or
409: * goes.
410: */
411: if (ifd->active_[i] != NULL) {
412: clh = cl = ifd->active_[i];
413: do {
414: /* safe-guard for slow link or alloc_ == 0 */
415: if (ifd->M_[i] == 0)
416: cl->w_allotment_ = 0;
417: else
418: cl->w_allotment_ = cl->allotment_ /
419: ifd->M_[i];
420: cl = cl->peer_;
421: } while ((cl != NULL) && (cl != clh));
422: }
423: }
424: }
425:
426: int
427: rmc_get_weight(struct rm_ifdat *ifd, int pri)
428: {
429: if ((pri >= 0) && (pri < RM_MAXPRIO))
430: return (ifd->M_[pri]);
431: else
432: return (0);
433: }
434:
435: /*
436: * static void
437: * rmc_depth_compute(struct rm_class *cl) - This function computes the
438: * appropriate depth of class 'cl' and its ancestors.
439: *
440: * Returns: NONE
441: */
442:
443: static void
444: rmc_depth_compute(struct rm_class *cl)
445: {
446: rm_class_t *t = cl, *p;
447:
448: /*
449: * Recompute the depth for the branch of the tree.
450: */
451: while (t != NULL) {
452: p = t->parent_;
453: if (p && (t->depth_ >= p->depth_)) {
454: p->depth_ = t->depth_ + 1;
455: t = p;
456: } else
457: t = NULL;
458: }
459: }
460:
461: /*
462: * static void
463: * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
464: * the depth of the tree after a class has been deleted.
465: *
466: * Returns: NONE
467: */
468:
469: static void
470: rmc_depth_recompute(rm_class_t *cl)
471: {
472: #if 1 /* ALTQ */
473: rm_class_t *p, *t;
474:
475: p = cl;
476: while (p != NULL) {
477: if ((t = p->children_) == NULL) {
478: p->depth_ = 0;
479: } else {
480: int cdepth = 0;
481:
482: while (t != NULL) {
483: if (t->depth_ > cdepth)
484: cdepth = t->depth_;
485: t = t->next_;
486: }
487:
488: if (p->depth_ == cdepth + 1)
489: /* no change to this parent */
490: return;
491:
492: p->depth_ = cdepth + 1;
493: }
494:
495: p = p->parent_;
496: }
497: #else
498: rm_class_t *t;
499:
500: if (cl->depth_ >= 1) {
501: if (cl->children_ == NULL) {
502: cl->depth_ = 0;
503: } else if ((t = cl->children_) != NULL) {
504: while (t != NULL) {
505: if (t->children_ != NULL)
506: rmc_depth_recompute(t);
507: t = t->next_;
508: }
509: } else
510: rmc_depth_compute(cl);
511: }
512: #endif
513: }
514:
515: /*
516: * void
517: * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
518: * function deletes a class from the link-sharing structure and frees
519: * all resources associated with the class.
520: *
521: * Returns: NONE
522: */
523:
524: void
525: rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
526: {
527: struct rm_class *p, *head, *previous;
528: int s;
529:
530: ASSERT(cl->children_ == NULL);
531:
532: if (cl->sleeping_)
533: CALLOUT_STOP(&cl->callout_);
534:
535: s = splnet();
536: /*
537: * Free packets in the packet queue.
538: * XXX - this may not be a desired behavior. Packets should be
539: * re-queued.
540: */
541: rmc_dropall(cl);
542:
543: /*
544: * If the class has a parent, then remove the class from the
545: * class from the parent's children chain.
546: */
547: if (cl->parent_ != NULL) {
548: head = cl->parent_->children_;
549: p = previous = head;
550: if (head->next_ == NULL) {
551: ASSERT(head == cl);
552: cl->parent_->children_ = NULL;
553: cl->parent_->leaf_ = 1;
554: } else while (p != NULL) {
555: if (p == cl) {
556: if (cl == head)
557: cl->parent_->children_ = cl->next_;
558: else
559: previous->next_ = cl->next_;
560: cl->next_ = NULL;
561: p = NULL;
562: } else {
563: previous = p;
564: p = p->next_;
565: }
566: }
567: }
568:
569: /*
570: * Delete class from class priority peer list.
571: */
572: if ((p = ifd->active_[cl->pri_]) != NULL) {
573: /*
574: * If there is more than one member of this priority
575: * level, then look for class(cl) in the priority level.
576: */
577: if (p != p->peer_) {
578: while (p->peer_ != cl)
579: p = p->peer_;
580: p->peer_ = cl->peer_;
581:
582: if (ifd->active_[cl->pri_] == cl)
583: ifd->active_[cl->pri_] = cl->peer_;
584: } else {
585: ASSERT(p == cl);
586: ifd->active_[cl->pri_] = NULL;
587: }
588: }
589:
590: /*
591: * Recompute the WRR weights.
592: */
593: if (ifd->wrr_) {
594: ifd->alloc_[cl->pri_] -= cl->allotment_;
595: ifd->num_[cl->pri_]--;
596: rmc_wrr_set_weights(ifd);
597: }
598:
599: /*
600: * Re-compute the depth of the tree.
601: */
602: #if 1 /* ALTQ */
603: rmc_depth_recompute(cl->parent_);
604: #else
605: rmc_depth_recompute(ifd->root_);
606: #endif
607:
608: splx(s);
609:
610: /*
611: * Free the class structure.
612: */
613: if (cl->red_ != NULL) {
614: #ifdef ALTQ_RIO
615: if (q_is_rio(cl->q_))
616: rio_destroy((rio_t *)cl->red_);
617: #endif
618: #ifdef ALTQ_RED
619: if (q_is_red(cl->q_))
620: red_destroy(cl->red_);
621: #endif
622: }
623: FREE(cl->q_, M_DEVBUF);
624: FREE(cl, M_DEVBUF);
625: }
626:
627:
628: /*
629: * void
630: * rmc_init(...) - Initialize the resource management data structures
631: * associated with the output portion of interface 'ifp'. 'ifd' is
632: * where the structures will be built (for backwards compatibility, the
633: * structures aren't kept in the ifnet struct). 'nsecPerByte'
634: * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
635: * 'restart' is the driver-specific routine that the generic 'delay
636: * until under limit' action will call to restart output. `maxq'
637: * is the queue size of the 'link' & 'default' classes. 'maxqueued'
638: * is the maximum number of packets that the resource management
639: * code will allow to be queued 'downstream' (this is typically 1).
640: *
641: * Returns: NONE
642: */
643:
644: void
645: rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
646: void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
647: int minidle, u_int offtime, int flags)
648: {
649: int i, mtu;
650:
651: /*
652: * Initialize the CBQ tracing/debug facility.
653: */
654: CBQTRACEINIT();
655:
656: bzero((char *)ifd, sizeof (*ifd));
657: mtu = ifq->altq_ifp->if_mtu;
658: ifd->ifq_ = ifq;
659: ifd->restart = restart;
660: ifd->maxqueued_ = maxqueued;
661: ifd->ns_per_byte_ = nsecPerByte;
662: ifd->maxpkt_ = mtu;
663: ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
664: ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
665: #if 1
666: ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
667: if (mtu * nsecPerByte > 10 * 1000000)
668: ifd->maxiftime_ /= 4;
669: #endif
670:
671: reset_cutoff(ifd);
672: CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
673:
674: /*
675: * Initialize the CBQ's WRR state.
676: */
677: for (i = 0; i < RM_MAXPRIO; i++) {
678: ifd->alloc_[i] = 0;
679: ifd->M_[i] = 0;
680: ifd->num_[i] = 0;
681: ifd->na_[i] = 0;
682: ifd->active_[i] = NULL;
683: }
684:
685: /*
686: * Initialize current packet state.
687: */
688: ifd->qi_ = 0;
689: ifd->qo_ = 0;
690: for (i = 0; i < RM_MAXQUEUED; i++) {
691: ifd->class_[i] = NULL;
692: ifd->curlen_[i] = 0;
693: ifd->borrowed_[i] = NULL;
694: }
695:
696: /*
697: * Create the root class of the link-sharing structure.
698: */
699: if ((ifd->root_ = rmc_newclass(0, ifd,
700: nsecPerByte,
701: rmc_root_overlimit, maxq, 0, 0,
702: maxidle, minidle, offtime,
703: 0, 0)) == NULL) {
704: printf("rmc_init: root class not allocated\n");
705: return ;
706: }
707: ifd->root_->depth_ = 0;
708: }
709:
710: /*
711: * void
712: * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
713: * mbuf 'm' to queue for resource class 'cl'. This routine is called
714: * by a driver's if_output routine. This routine must be called with
715: * output packet completion interrupts locked out (to avoid racing with
716: * rmc_dequeue_next).
717: *
718: * Returns: 0 on successful queueing
719: * -1 when packet drop occurs
720: */
721: int
722: rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
723: {
724: struct timeval now;
725: struct rm_ifdat *ifd = cl->ifdat_;
726: int cpri = cl->pri_;
727: int is_empty = qempty(cl->q_);
728:
729: RM_GETTIME(now);
730: if (ifd->cutoff_ > 0) {
731: if (TV_LT(&cl->undertime_, &now)) {
732: if (ifd->cutoff_ > cl->depth_)
733: ifd->cutoff_ = cl->depth_;
734: CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
735: }
736: #if 1 /* ALTQ */
737: else {
738: /*
739: * the class is overlimit. if the class has
740: * underlimit ancestors, set cutoff to the lowest
741: * depth among them.
742: */
743: struct rm_class *borrow = cl->borrow_;
744:
745: while (borrow != NULL &&
746: borrow->depth_ < ifd->cutoff_) {
747: if (TV_LT(&borrow->undertime_, &now)) {
748: ifd->cutoff_ = borrow->depth_;
749: CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
750: break;
751: }
752: borrow = borrow->borrow_;
753: }
754: }
755: #else /* !ALTQ */
756: else if ((ifd->cutoff_ > 1) && cl->borrow_) {
757: if (TV_LT(&cl->borrow_->undertime_, &now)) {
758: ifd->cutoff_ = cl->borrow_->depth_;
759: CBQTRACE(rmc_queue_packet, 'ffob',
760: cl->borrow_->depth_);
761: }
762: }
763: #endif /* !ALTQ */
764: }
765:
766: if (_rmc_addq(cl, m) < 0)
767: /* failed */
768: return (-1);
769:
770: if (is_empty) {
771: CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
772: ifd->na_[cpri]++;
773: }
774:
775: if (qlen(cl->q_) > qlimit(cl->q_)) {
776: /* note: qlimit can be set to 0 or 1 */
777: rmc_drop_action(cl);
778: return (-1);
779: }
780: return (0);
781: }
782:
783: /*
784: * void
785: * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
786: * classes to see if they are satisfied.
787: */
788:
789: static void
790: rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
791: {
792: int i;
793: rm_class_t *p, *bp;
794:
795: for (i = RM_MAXPRIO - 1; i >= 0; i--) {
796: if ((bp = ifd->active_[i]) != NULL) {
797: p = bp;
798: do {
799: if (!rmc_satisfied(p, now)) {
800: ifd->cutoff_ = p->depth_;
801: return;
802: }
803: p = p->peer_;
804: } while (p != bp);
805: }
806: }
807:
808: reset_cutoff(ifd);
809: }
810:
811: /*
812: * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
813: */
814:
815: static int
816: rmc_satisfied(struct rm_class *cl, struct timeval *now)
817: {
818: rm_class_t *p;
819:
820: if (cl == NULL)
821: return (1);
822: if (TV_LT(now, &cl->undertime_))
823: return (1);
824: if (cl->depth_ == 0) {
825: if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
826: return (0);
827: else
828: return (1);
829: }
830: if (cl->children_ != NULL) {
831: p = cl->children_;
832: while (p != NULL) {
833: if (!rmc_satisfied(p, now))
834: return (0);
835: p = p->next_;
836: }
837: }
838:
839: return (1);
840: }
841:
842: /*
843: * Return 1 if class 'cl' is under limit or can borrow from a parent,
844: * 0 if overlimit. As a side-effect, this routine will invoke the
845: * class overlimit action if the class if overlimit.
846: */
847:
848: static int
849: rmc_under_limit(struct rm_class *cl, struct timeval *now)
850: {
851: rm_class_t *p = cl;
852: rm_class_t *top;
853: struct rm_ifdat *ifd = cl->ifdat_;
854:
855: ifd->borrowed_[ifd->qi_] = NULL;
856: /*
857: * If cl is the root class, then always return that it is
858: * underlimit. Otherwise, check to see if the class is underlimit.
859: */
860: if (cl->parent_ == NULL)
861: return (1);
862:
863: if (cl->sleeping_) {
864: if (TV_LT(now, &cl->undertime_))
865: return (0);
866:
867: CALLOUT_STOP(&cl->callout_);
868: cl->sleeping_ = 0;
869: cl->undertime_.tv_sec = 0;
870: return (1);
871: }
872:
873: top = NULL;
874: while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
875: if (((cl = cl->borrow_) == NULL) ||
876: (cl->depth_ > ifd->cutoff_)) {
877: #ifdef ADJUST_CUTOFF
878: if (cl != NULL)
879: /* cutoff is taking effect, just
880: return false without calling
881: the delay action. */
882: return (0);
883: #endif
884: #ifdef BORROW_OFFTIME
885: /*
886: * check if the class can borrow offtime too.
887: * borrow offtime from the top of the borrow
888: * chain if the top class is not overloaded.
889: */
890: if (cl != NULL) {
891: /* cutoff is taking effect, use this class as top. */
892: top = cl;
893: CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
894: }
895: if (top != NULL && top->avgidle_ == top->minidle_)
896: top = NULL;
897: p->overtime_ = *now;
898: (p->overlimit)(p, top);
899: #else
900: p->overtime_ = *now;
901: (p->overlimit)(p, NULL);
902: #endif
903: return (0);
904: }
905: top = cl;
906: }
907:
908: if (cl != p)
909: ifd->borrowed_[ifd->qi_] = cl;
910: return (1);
911: }
912:
913: /*
914: * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
915: * Packet-by-packet round robin.
916: *
917: * The heart of the weighted round-robin scheduler, which decides which
918: * class next gets to send a packet. Highest priority first, then
919: * weighted round-robin within priorites.
920: *
921: * Each able-to-send class gets to send until its byte allocation is
922: * exhausted. Thus, the active pointer is only changed after a class has
923: * exhausted its allocation.
924: *
925: * If the scheduler finds no class that is underlimit or able to borrow,
926: * then the first class found that had a nonzero queue and is allowed to
927: * borrow gets to send.
928: */
929:
930: static mbuf_t *
931: _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
932: {
933: struct rm_class *cl = NULL, *first = NULL;
934: u_int deficit;
935: int cpri;
936: mbuf_t *m;
937: struct timeval now;
938:
939: RM_GETTIME(now);
940:
941: /*
942: * if the driver polls the top of the queue and then removes
943: * the polled packet, we must return the same packet.
944: */
945: if (op == ALTDQ_REMOVE && ifd->pollcache_) {
946: cl = ifd->pollcache_;
947: cpri = cl->pri_;
948: if (ifd->efficient_) {
949: /* check if this class is overlimit */
950: if (cl->undertime_.tv_sec != 0 &&
951: rmc_under_limit(cl, &now) == 0)
952: first = cl;
953: }
954: ifd->pollcache_ = NULL;
955: goto _wrr_out;
956: }
957: else {
958: /* mode == ALTDQ_POLL || pollcache == NULL */
959: ifd->pollcache_ = NULL;
960: ifd->borrowed_[ifd->qi_] = NULL;
961: }
962: #ifdef ADJUST_CUTOFF
963: _again:
964: #endif
965: for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
966: if (ifd->na_[cpri] == 0)
967: continue;
968: deficit = 0;
969: /*
970: * Loop through twice for a priority level, if some class
971: * was unable to send a packet the first round because
972: * of the weighted round-robin mechanism.
973: * During the second loop at this level, deficit==2.
974: * (This second loop is not needed if for every class,
975: * "M[cl->pri_])" times "cl->allotment" is greater than
976: * the byte size for the largest packet in the class.)
977: */
978: _wrr_loop:
979: cl = ifd->active_[cpri];
980: ASSERT(cl != NULL);
981: do {
982: if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
983: cl->bytes_alloc_ += cl->w_allotment_;
984: if (!qempty(cl->q_)) {
985: if ((cl->undertime_.tv_sec == 0) ||
986: rmc_under_limit(cl, &now)) {
987: if (cl->bytes_alloc_ > 0 || deficit > 1)
988: goto _wrr_out;
989:
990: /* underlimit but no alloc */
991: deficit = 1;
992: #if 1
993: ifd->borrowed_[ifd->qi_] = NULL;
994: #endif
995: }
996: else if (first == NULL && cl->borrow_ != NULL)
997: first = cl; /* borrowing candidate */
998: }
999:
1000: cl->bytes_alloc_ = 0;
1001: cl = cl->peer_;
1002: } while (cl != ifd->active_[cpri]);
1003:
1004: if (deficit == 1) {
1005: /* first loop found an underlimit class with deficit */
1006: /* Loop on same priority level, with new deficit. */
1007: deficit = 2;
1008: goto _wrr_loop;
1009: }
1010: }
1011:
1012: #ifdef ADJUST_CUTOFF
1013: /*
1014: * no underlimit class found. if cutoff is taking effect,
1015: * increase cutoff and try again.
1016: */
1017: if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1018: ifd->cutoff_++;
1019: CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
1020: goto _again;
1021: }
1022: #endif /* ADJUST_CUTOFF */
1023: /*
1024: * If LINK_EFFICIENCY is turned on, then the first overlimit
1025: * class we encounter will send a packet if all the classes
1026: * of the link-sharing structure are overlimit.
1027: */
1028: reset_cutoff(ifd);
1029: CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
1030:
1031: if (!ifd->efficient_ || first == NULL)
1032: return (NULL);
1033:
1034: cl = first;
1035: cpri = cl->pri_;
1036: #if 0 /* too time-consuming for nothing */
1037: if (cl->sleeping_)
1038: CALLOUT_STOP(&cl->callout_);
1039: cl->sleeping_ = 0;
1040: cl->undertime_.tv_sec = 0;
1041: #endif
1042: ifd->borrowed_[ifd->qi_] = cl->borrow_;
1043: ifd->cutoff_ = cl->borrow_->depth_;
1044:
1045: /*
1046: * Deque the packet and do the book keeping...
1047: */
1048: _wrr_out:
1049: if (op == ALTDQ_REMOVE) {
1050: m = _rmc_getq(cl);
1051: if (m == NULL)
1052: panic("_rmc_wrr_dequeue_next");
1053: if (qempty(cl->q_))
1054: ifd->na_[cpri]--;
1055:
1056: /*
1057: * Update class statistics and link data.
1058: */
1059: if (cl->bytes_alloc_ > 0)
1060: cl->bytes_alloc_ -= m_pktlen(m);
1061:
1062: if ((cl->bytes_alloc_ <= 0) || first == cl)
1063: ifd->active_[cl->pri_] = cl->peer_;
1064: else
1065: ifd->active_[cl->pri_] = cl;
1066:
1067: ifd->class_[ifd->qi_] = cl;
1068: ifd->curlen_[ifd->qi_] = m_pktlen(m);
1069: ifd->now_[ifd->qi_] = now;
1070: ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1071: ifd->queued_++;
1072: } else {
1073: /* mode == ALTDQ_PPOLL */
1074: m = _rmc_pollq(cl);
1075: ifd->pollcache_ = cl;
1076: }
1077: return (m);
1078: }
1079:
1080: /*
1081: * Dequeue & return next packet from the highest priority class that
1082: * has a packet to send & has enough allocation to send it. This
1083: * routine is called by a driver whenever it needs a new packet to
1084: * output.
1085: */
1086: static mbuf_t *
1087: _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
1088: {
1089: mbuf_t *m;
1090: int cpri;
1091: struct rm_class *cl, *first = NULL;
1092: struct timeval now;
1093:
1094: RM_GETTIME(now);
1095:
1096: /*
1097: * if the driver polls the top of the queue and then removes
1098: * the polled packet, we must return the same packet.
1099: */
1100: if (op == ALTDQ_REMOVE && ifd->pollcache_) {
1101: cl = ifd->pollcache_;
1102: cpri = cl->pri_;
1103: ifd->pollcache_ = NULL;
1104: goto _prr_out;
1105: } else {
1106: /* mode == ALTDQ_POLL || pollcache == NULL */
1107: ifd->pollcache_ = NULL;
1108: ifd->borrowed_[ifd->qi_] = NULL;
1109: }
1110: #ifdef ADJUST_CUTOFF
1111: _again:
1112: #endif
1113: for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1114: if (ifd->na_[cpri] == 0)
1115: continue;
1116: cl = ifd->active_[cpri];
1117: ASSERT(cl != NULL);
1118: do {
1119: if (!qempty(cl->q_)) {
1120: if ((cl->undertime_.tv_sec == 0) ||
1121: rmc_under_limit(cl, &now))
1122: goto _prr_out;
1123: if (first == NULL && cl->borrow_ != NULL)
1124: first = cl;
1125: }
1126: cl = cl->peer_;
1127: } while (cl != ifd->active_[cpri]);
1128: }
1129:
1130: #ifdef ADJUST_CUTOFF
1131: /*
1132: * no underlimit class found. if cutoff is taking effect, increase
1133: * cutoff and try again.
1134: */
1135: if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1136: ifd->cutoff_++;
1137: goto _again;
1138: }
1139: #endif /* ADJUST_CUTOFF */
1140: /*
1141: * If LINK_EFFICIENCY is turned on, then the first overlimit
1142: * class we encounter will send a packet if all the classes
1143: * of the link-sharing structure are overlimit.
1144: */
1145: reset_cutoff(ifd);
1146: if (!ifd->efficient_ || first == NULL)
1147: return (NULL);
1148:
1149: cl = first;
1150: cpri = cl->pri_;
1151: #if 0 /* too time-consuming for nothing */
1152: if (cl->sleeping_)
1153: CALLOUT_STOP(&cl->callout_);
1154: cl->sleeping_ = 0;
1155: cl->undertime_.tv_sec = 0;
1156: #endif
1157: ifd->borrowed_[ifd->qi_] = cl->borrow_;
1158: ifd->cutoff_ = cl->borrow_->depth_;
1159:
1160: /*
1161: * Deque the packet and do the book keeping...
1162: */
1163: _prr_out:
1164: if (op == ALTDQ_REMOVE) {
1165: m = _rmc_getq(cl);
1166: if (m == NULL)
1167: panic("_rmc_prr_dequeue_next");
1168: if (qempty(cl->q_))
1169: ifd->na_[cpri]--;
1170:
1171: ifd->active_[cpri] = cl->peer_;
1172:
1173: ifd->class_[ifd->qi_] = cl;
1174: ifd->curlen_[ifd->qi_] = m_pktlen(m);
1175: ifd->now_[ifd->qi_] = now;
1176: ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1177: ifd->queued_++;
1178: } else {
1179: /* mode == ALTDQ_POLL */
1180: m = _rmc_pollq(cl);
1181: ifd->pollcache_ = cl;
1182: }
1183: return (m);
1184: }
1185:
1186: /*
1187: * mbuf_t *
1188: * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
1189: * is invoked by the packet driver to get the next packet to be
1190: * dequeued and output on the link. If WRR is enabled, then the
1191: * WRR dequeue next routine will determine the next packet to sent.
1192: * Otherwise, packet-by-packet round robin is invoked.
1193: *
1194: * Returns: NULL, if a packet is not available or if all
1195: * classes are overlimit.
1196: *
1197: * Otherwise, Pointer to the next packet.
1198: */
1199:
1200: mbuf_t *
1201: rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
1202: {
1203: if (ifd->queued_ >= ifd->maxqueued_)
1204: return (NULL);
1205: else if (ifd->wrr_)
1206: return (_rmc_wrr_dequeue_next(ifd, mode));
1207: else
1208: return (_rmc_prr_dequeue_next(ifd, mode));
1209: }
1210:
1211: /*
1212: * Update the utilization estimate for the packet that just completed.
1213: * The packet's class & the parent(s) of that class all get their
1214: * estimators updated. This routine is called by the driver's output-
1215: * packet-completion interrupt service routine.
1216: */
1217:
1218: /*
1219: * a macro to approximate "divide by 1000" that gives 0.000999,
1220: * if a value has enough effective digits.
1221: * (on pentium, mul takes 9 cycles but div takes 46!)
1222: */
1223: #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1224: void
1225: rmc_update_class_util(struct rm_ifdat *ifd)
1226: {
1227: int idle, avgidle, pktlen;
1228: int pkt_time, tidle;
1229: rm_class_t *cl, *borrowed;
1230: rm_class_t *borrows;
1231: struct timeval *nowp;
1232:
1233: /*
1234: * Get the most recent completed class.
1235: */
1236: if ((cl = ifd->class_[ifd->qo_]) == NULL)
1237: return;
1238:
1239: pktlen = ifd->curlen_[ifd->qo_];
1240: borrowed = ifd->borrowed_[ifd->qo_];
1241: borrows = borrowed;
1242:
1243: PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1244:
1245: /*
1246: * Run estimator on class and its ancestors.
1247: */
1248: /*
1249: * rm_update_class_util is designed to be called when the
1250: * transfer is completed from a xmit complete interrupt,
1251: * but most drivers don't implement an upcall for that.
1252: * so, just use estimated completion time.
1253: * as a result, ifd->qi_ and ifd->qo_ are always synced.
1254: */
1255: nowp = &ifd->now_[ifd->qo_];
1256: /* get pkt_time (for link) in usec */
1257: #if 1 /* use approximation */
1258: pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
1259: pkt_time = NSEC_TO_USEC(pkt_time);
1260: #else
1261: pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
1262: #endif
1263: #if 1 /* ALTQ4PPP */
1264: if (TV_LT(nowp, &ifd->ifnow_)) {
1265: int iftime;
1266:
1267: /*
1268: * make sure the estimated completion time does not go
1269: * too far. it can happen when the link layer supports
1270: * data compression or the interface speed is set to
1271: * a much lower value.
1272: */
1273: TV_DELTA(&ifd->ifnow_, nowp, iftime);
1274: if (iftime+pkt_time < ifd->maxiftime_) {
1275: TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1276: } else {
1277: TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
1278: }
1279: } else {
1280: TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1281: }
1282: #else
1283: if (TV_LT(nowp, &ifd->ifnow_)) {
1284: TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1285: } else {
1286: TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1287: }
1288: #endif
1289:
1290: while (cl != NULL) {
1291: TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
1292: if (idle >= 2000000)
1293: /*
1294: * this class is idle enough, reset avgidle.
1295: * (TV_DELTA returns 2000000 us when delta is large.)
1296: */
1297: cl->avgidle_ = cl->maxidle_;
1298:
1299: /* get pkt_time (for class) in usec */
1300: #if 1 /* use approximation */
1301: pkt_time = pktlen * cl->ns_per_byte_;
1302: pkt_time = NSEC_TO_USEC(pkt_time);
1303: #else
1304: pkt_time = pktlen * cl->ns_per_byte_ / 1000;
1305: #endif
1306: idle -= pkt_time;
1307:
1308: avgidle = cl->avgidle_;
1309: avgidle += idle - (avgidle >> RM_FILTER_GAIN);
1310: cl->avgidle_ = avgidle;
1311:
1312: /* Are we overlimit ? */
1313: if (avgidle <= 0) {
1314: CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
1315: #if 1 /* ALTQ */
1316: /*
1317: * need some lower bound for avgidle, otherwise
1318: * a borrowing class gets unbounded penalty.
1319: */
1320: if (avgidle < cl->minidle_)
1321: avgidle = cl->avgidle_ = cl->minidle_;
1322: #endif
1323: /* set next idle to make avgidle 0 */
1324: tidle = pkt_time +
1325: (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
1326: TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
1327: ++cl->stats_.over;
1328: } else {
1329: cl->avgidle_ =
1330: (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
1331: cl->undertime_.tv_sec = 0;
1332: if (cl->sleeping_) {
1333: CALLOUT_STOP(&cl->callout_);
1334: cl->sleeping_ = 0;
1335: }
1336: }
1337:
1338: if (borrows != NULL) {
1339: if (borrows != cl)
1340: ++cl->stats_.borrows;
1341: else
1342: borrows = NULL;
1343: }
1344: cl->last_ = ifd->ifnow_;
1345: cl->last_pkttime_ = pkt_time;
1346:
1347: #if 1
1348: if (cl->parent_ == NULL) {
1349: /* take stats of root class */
1350: PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1351: }
1352: #endif
1353:
1354: cl = cl->parent_;
1355: }
1356:
1357: /*
1358: * Check to see if cutoff needs to set to a new level.
1359: */
1360: cl = ifd->class_[ifd->qo_];
1361: if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
1362: #if 1 /* ALTQ */
1363: if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
1364: rmc_tl_satisfied(ifd, nowp);
1365: CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1366: } else {
1367: ifd->cutoff_ = borrowed->depth_;
1368: CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1369: }
1370: #else /* !ALTQ */
1371: if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
1372: reset_cutoff(ifd);
1373: #ifdef notdef
1374: rmc_tl_satisfied(ifd, &now);
1375: #endif
1376: CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1377: } else {
1378: ifd->cutoff_ = borrowed->depth_;
1379: CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1380: }
1381: #endif /* !ALTQ */
1382: }
1383:
1384: /*
1385: * Release class slot
1386: */
1387: ifd->borrowed_[ifd->qo_] = NULL;
1388: ifd->class_[ifd->qo_] = NULL;
1389: ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
1390: ifd->queued_--;
1391: }
1392:
1393: /*
1394: * void
1395: * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1396: * over-limit action routines. These get invoked by rmc_under_limit()
1397: * if a class with packets to send if over its bandwidth limit & can't
1398: * borrow from a parent class.
1399: *
1400: * Returns: NONE
1401: */
1402:
1403: static void
1404: rmc_drop_action(struct rm_class *cl)
1405: {
1406: struct rm_ifdat *ifd = cl->ifdat_;
1407:
1408: ASSERT(qlen(cl->q_) > 0);
1409: _rmc_dropq(cl);
1410: if (qempty(cl->q_))
1411: ifd->na_[cl->pri_]--;
1412: }
1413:
1414: void rmc_dropall(struct rm_class *cl)
1415: {
1416: struct rm_ifdat *ifd = cl->ifdat_;
1417:
1418: if (!qempty(cl->q_)) {
1419: _flushq(cl->q_);
1420:
1421: ifd->na_[cl->pri_]--;
1422: }
1423: }
1424:
1425: /*
1426: * void
1427: * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1428: * delay action routine. It is invoked via rmc_under_limit when the
1429: * packet is discovered to be overlimit.
1430: *
1431: * If the delay action is result of borrow class being overlimit, then
1432: * delay for the offtime of the borrowing class that is overlimit.
1433: *
1434: * Returns: NONE
1435: */
1436:
1437: void
1438: rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
1439: {
1440: int delay, t, extradelay;
1441:
1442: cl->stats_.overactions++;
1443: TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
1444: #ifndef BORROW_OFFTIME
1445: delay += cl->offtime_;
1446: #endif
1447:
1448: if (!cl->sleeping_) {
1449: CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
1450: #ifdef BORROW_OFFTIME
1451: if (borrow != NULL)
1452: extradelay = borrow->offtime_;
1453: else
1454: #endif
1455: extradelay = cl->offtime_;
1456:
1457: #ifdef ALTQ
1458: /*
1459: * XXX recalculate suspend time:
1460: * current undertime is (tidle + pkt_time) calculated
1461: * from the last transmission.
1462: * tidle: time required to bring avgidle back to 0
1463: * pkt_time: target waiting time for this class
1464: * we need to replace pkt_time by offtime
1465: */
1466: extradelay -= cl->last_pkttime_;
1467: #endif
1468: if (extradelay > 0) {
1469: TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
1470: delay += extradelay;
1471: }
1472:
1473: cl->sleeping_ = 1;
1474: cl->stats_.delays++;
1475:
1476: /*
1477: * Since packets are phased randomly with respect to the
1478: * clock, 1 tick (the next clock tick) can be an arbitrarily
1479: * short time so we have to wait for at least two ticks.
1480: * NOTE: If there's no other traffic, we need the timer as
1481: * a 'backstop' to restart this class.
1482: */
1483: if (delay > tick * 2) {
1484: #ifdef __FreeBSD__
1485: /* FreeBSD rounds up the tick */
1486: t = hzto(&cl->undertime_);
1487: #else
1488: /* other BSDs round down the tick */
1489: t = hzto(&cl->undertime_) + 1;
1490: #endif
1491: } else
1492: t = 2;
1493: CALLOUT_RESET(&cl->callout_, t,
1494: (timeout_t *)rmc_restart, (caddr_t)cl);
1495: }
1496: }
1497:
1498: /*
1499: * void
1500: * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1501: * called by the system timer code & is responsible checking if the
1502: * class is still sleeping (it might have been restarted as a side
1503: * effect of the queue scan on a packet arrival) and, if so, restarting
1504: * output for the class. Inspecting the class state & restarting output
1505: * require locking the class structure. In general the driver is
1506: * responsible for locking but this is the only routine that is not
1507: * called directly or indirectly from the interface driver so it has
1508: * know about system locking conventions. Under bsd, locking is done
1509: * by raising IPL to splnet so that's what's implemented here. On a
1510: * different system this would probably need to be changed.
1511: *
1512: * Returns: NONE
1513: */
1514:
1515: static void
1516: rmc_restart(struct rm_class *cl)
1517: {
1518: struct rm_ifdat *ifd = cl->ifdat_;
1519: int s;
1520:
1521: s = splnet();
1522: if (cl->sleeping_) {
1523: cl->sleeping_ = 0;
1524: cl->undertime_.tv_sec = 0;
1525:
1526: if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
1527: CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
1528: (ifd->restart)(ifd->ifq_);
1529: }
1530: }
1531: splx(s);
1532: }
1533:
1534: /*
1535: * void
1536: * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1537: * handling routine for the root class of the link sharing structure.
1538: *
1539: * Returns: NONE
1540: */
1541:
1542: static void
1543: rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
1544: {
1545: panic("rmc_root_overlimit");
1546: }
1547:
1548: /*
1549: * Packet Queue handling routines. Eventually, this is to localize the
1550: * effects on the code whether queues are red queues or droptail
1551: * queues.
1552: */
1553:
1554: static int
1555: _rmc_addq(rm_class_t *cl, mbuf_t *m)
1556: {
1557: #ifdef ALTQ_RIO
1558: if (q_is_rio(cl->q_))
1559: return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
1560: #endif
1561: #ifdef ALTQ_RED
1562: if (q_is_red(cl->q_))
1563: return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
1564: #endif /* ALTQ_RED */
1565:
1566: if (cl->flags_ & RMCF_CLEARDSCP)
1567: write_dsfield(m, cl->pktattr_, 0);
1568:
1569: _addq(cl->q_, m);
1570: return (0);
1571: }
1572:
1573: /* note: _rmc_dropq is not called for red */
1574: static void
1575: _rmc_dropq(rm_class_t *cl)
1576: {
1577: mbuf_t *m;
1578:
1579: if ((m = _getq(cl->q_)) != NULL)
1580: m_freem(m);
1581: }
1582:
1583: static mbuf_t *
1584: _rmc_getq(rm_class_t *cl)
1585: {
1586: #ifdef ALTQ_RIO
1587: if (q_is_rio(cl->q_))
1588: return rio_getq((rio_t *)cl->red_, cl->q_);
1589: #endif
1590: #ifdef ALTQ_RED
1591: if (q_is_red(cl->q_))
1592: return red_getq(cl->red_, cl->q_);
1593: #endif
1594: return _getq(cl->q_);
1595: }
1596:
1597: static mbuf_t *
1598: _rmc_pollq(rm_class_t *cl)
1599: {
1600: return qhead(cl->q_);
1601: }
1602:
1603: #ifdef CBQ_TRACE
1604:
1605: struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
1606: struct cbqtrace *cbqtrace_ptr = NULL;
1607: int cbqtrace_count;
1608:
1609: /*
1610: * DDB hook to trace cbq events:
1611: * the last 1024 events are held in a circular buffer.
1612: * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1613: */
1614: void cbqtrace_dump(int);
1615: static char *rmc_funcname(void *);
1616:
1617: static struct rmc_funcs {
1618: void *func;
1619: char *name;
1620: } rmc_funcs[] =
1621: {
1622: rmc_init, "rmc_init",
1623: rmc_queue_packet, "rmc_queue_packet",
1624: rmc_under_limit, "rmc_under_limit",
1625: rmc_update_class_util, "rmc_update_class_util",
1626: rmc_delay_action, "rmc_delay_action",
1627: rmc_restart, "rmc_restart",
1628: _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
1629: NULL, NULL
1630: };
1631:
1632: static char *rmc_funcname(void *func)
1633: {
1634: struct rmc_funcs *fp;
1635:
1636: for (fp = rmc_funcs; fp->func != NULL; fp++)
1637: if (fp->func == func)
1638: return (fp->name);
1639: return ("unknown");
1640: }
1641:
1642: void cbqtrace_dump(int counter)
1643: {
1644: int i, *p;
1645: char *cp;
1646:
1647: counter = counter % NCBQTRACE;
1648: p = (int *)&cbqtrace_buffer[counter];
1649:
1650: for (i=0; i<20; i++) {
1651: printf("[0x%x] ", *p++);
1652: printf("%s: ", rmc_funcname((void *)*p++));
1653: cp = (char *)p++;
1654: printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
1655: printf("%d\n",*p++);
1656:
1657: if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
1658: p = (int *)cbqtrace_buffer;
1659: }
1660: }
1661: #endif /* CBQ_TRACE */
1662:
1663: #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
1664: #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
1665:
1666: void
1667: _addq(class_queue_t *q, mbuf_t *m)
1668: {
1669: mbuf_t *m0;
1670:
1671: if ((m0 = qtail(q)) != NULL)
1672: m->m_nextpkt = m0->m_nextpkt;
1673: else
1674: m0 = m;
1675: m0->m_nextpkt = m;
1676: qtail(q) = m;
1677: qlen(q)++;
1678: }
1679:
1680: mbuf_t *
1681: _getq(class_queue_t *q)
1682: {
1683: mbuf_t *m, *m0;
1684:
1685: if ((m = qtail(q)) == NULL)
1686: return (NULL);
1687: if ((m0 = m->m_nextpkt) != m)
1688: m->m_nextpkt = m0->m_nextpkt;
1689: else {
1690: ASSERT(qlen(q) == 1);
1691: qtail(q) = NULL;
1692: }
1693: qlen(q)--;
1694: m0->m_nextpkt = NULL;
1695: return (m0);
1696: }
1697:
1698: /* drop a packet at the tail of the queue */
1699: mbuf_t *
1700: _getq_tail(class_queue_t *q)
1701: {
1702: mbuf_t *m, *m0, *prev;
1703:
1704: if ((m = m0 = qtail(q)) == NULL)
1705: return NULL;
1706: do {
1707: prev = m0;
1708: m0 = m0->m_nextpkt;
1709: } while (m0 != m);
1710: prev->m_nextpkt = m->m_nextpkt;
1711: if (prev == m) {
1712: ASSERT(qlen(q) == 1);
1713: qtail(q) = NULL;
1714: } else
1715: qtail(q) = prev;
1716: qlen(q)--;
1717: m->m_nextpkt = NULL;
1718: return (m);
1719: }
1720:
1721: /* randomly select a packet in the queue */
1722: mbuf_t *
1723: _getq_random(class_queue_t *q)
1724: {
1725: struct mbuf *m;
1726: int i, n;
1727:
1728: if ((m = qtail(q)) == NULL)
1729: return NULL;
1730: if (m->m_nextpkt == m) {
1731: ASSERT(qlen(q) == 1);
1732: qtail(q) = NULL;
1733: } else {
1734: struct mbuf *prev = NULL;
1735:
1736: n = random() % qlen(q) + 1;
1737: for (i = 0; i < n; i++) {
1738: prev = m;
1739: m = m->m_nextpkt;
1740: }
1741: prev->m_nextpkt = m->m_nextpkt;
1742: if (m == qtail(q))
1743: qtail(q) = prev;
1744: }
1745: qlen(q)--;
1746: m->m_nextpkt = NULL;
1747: return (m);
1748: }
1749:
1750: void
1751: _removeq(class_queue_t *q, mbuf_t *m)
1752: {
1753: mbuf_t *m0, *prev;
1754:
1755: m0 = qtail(q);
1756: do {
1757: prev = m0;
1758: m0 = m0->m_nextpkt;
1759: } while (m0 != m);
1760: prev->m_nextpkt = m->m_nextpkt;
1761: if (prev == m)
1762: qtail(q) = NULL;
1763: else if (qtail(q) == m)
1764: qtail(q) = prev;
1765: qlen(q)--;
1766: }
1767:
1768: void
1769: _flushq(class_queue_t *q)
1770: {
1771: mbuf_t *m;
1772:
1773: while ((m = _getq(q)) != NULL)
1774: m_freem(m);
1775: ASSERT(qlen(q) == 0);
1776: }
1777:
1778: #endif /* !__GNUC__ || ALTQ_DEBUG */
1779: #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
CVSweb