aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/contrib/altq/altq/altq.h203
-rw-r--r--sys/contrib/altq/altq/altq_cbq.c1169
-rw-r--r--sys/contrib/altq/altq/altq_cbq.h221
-rw-r--r--sys/contrib/altq/altq/altq_cdnr.c1386
-rw-r--r--sys/contrib/altq/altq/altq_cdnr.h335
-rw-r--r--sys/contrib/altq/altq/altq_classq.h206
-rw-r--r--sys/contrib/altq/altq/altq_hfsc.c2256
-rw-r--r--sys/contrib/altq/altq/altq_hfsc.h320
-rw-r--r--sys/contrib/altq/altq/altq_priq.c1036
-rw-r--r--sys/contrib/altq/altq/altq_priq.h170
-rw-r--r--sys/contrib/altq/altq/altq_red.c1492
-rw-r--r--sys/contrib/altq/altq/altq_red.h198
-rw-r--r--sys/contrib/altq/altq/altq_rio.c843
-rw-r--r--sys/contrib/altq/altq/altq_rio.h144
-rw-r--r--sys/contrib/altq/altq/altq_rmclass.c1832
-rw-r--r--sys/contrib/altq/altq/altq_rmclass.h266
-rw-r--r--sys/contrib/altq/altq/altq_rmclass_debug.h112
-rw-r--r--sys/contrib/altq/altq/altq_subr.c1901
-rw-r--r--sys/contrib/altq/altq/altq_var.h264
-rw-r--r--sys/contrib/altq/altq/altqconf.h29
-rw-r--r--sys/contrib/altq/altq/if_altq.h184
21 files changed, 14567 insertions, 0 deletions
diff --git a/sys/contrib/altq/altq/altq.h b/sys/contrib/altq/altq/altq.h
new file mode 100644
index 000000000000..64ff22ed97ea
--- /dev/null
+++ b/sys/contrib/altq/altq/altq.h
@@ -0,0 +1,203 @@
+/* $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $ */
+
+/*
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_H_
+#define _ALTQ_ALTQ_H_
+
+#if 1
+/*
+ * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq.
+ * altq3 is mainly for research experiments. pf-based altq is for daily use.
+ */
+#define ALTQ3_COMPAT /* for compatibility with altq-3 */
+#define ALTQ3_CLFIER_COMPAT /* for compatibility with altq-3 classifier */
+#endif
+
+#ifdef ALTQ3_COMPAT
+#include <sys/param.h>
+#include <sys/ioccom.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/* altq discipline type */
+#define ALTQT_NONE 0 /* reserved */
+#define ALTQT_CBQ 1 /* cbq */
+#define ALTQT_WFQ 2 /* wfq */
+#define ALTQT_AFMAP 3 /* afmap */
+#define ALTQT_FIFOQ 4 /* fifoq */
+#define ALTQT_RED 5 /* red */
+#define ALTQT_RIO 6 /* rio */
+#define ALTQT_LOCALQ 7 /* local use */
+#define ALTQT_HFSC 8 /* hfsc */
+#define ALTQT_CDNR 9 /* traffic conditioner */
+#define ALTQT_BLUE 10 /* blue */
+#define ALTQT_PRIQ 11 /* priority queue */
+#define ALTQT_JOBS 12 /* JoBS */
+#define ALTQT_MAX 13 /* should be max discipline type + 1 */
+
+#ifdef ALTQ3_COMPAT
+struct altqreq {
+ char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ u_long arg; /* request-specific argument */
+};
+#endif
+
+/* simple token backet meter profile */
+struct tb_profile {
+ u_int rate; /* rate in bit-per-sec */
+ u_int depth; /* depth in bytes */
+};
+
+#ifdef ALTQ3_COMPAT
+struct tbrreq {
+ char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
+ struct tb_profile tb_prof; /* token bucket profile */
+};
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * common network flow info structure
+ */
+struct flowinfo {
+ u_char fi_len; /* total length */
+ u_char fi_family; /* address family */
+ u_int8_t fi_data[46]; /* actually longer; address family
+ specific flow info. */
+};
+
+/*
+ * flow info structure for internet protocol family.
+ * (currently this is the only protocol family supported)
+ */
+struct flowinfo_in {
+ u_char fi_len; /* sizeof(struct flowinfo_in) */
+ u_char fi_family; /* AF_INET */
+ u_int8_t fi_proto; /* IPPROTO_XXX */
+ u_int8_t fi_tos; /* type-of-service */
+ struct in_addr fi_dst; /* dest address */
+ struct in_addr fi_src; /* src address */
+ u_int16_t fi_dport; /* dest port */
+ u_int16_t fi_sport; /* src port */
+ u_int32_t fi_gpi; /* generalized port id for ipsec */
+ u_int8_t _pad[28]; /* make the size equal to
+ flowinfo_in6 */
+};
+
+#ifdef SIN6_LEN
+struct flowinfo_in6 {
+ u_char fi6_len; /* sizeof(struct flowinfo_in6) */
+ u_char fi6_family; /* AF_INET6 */
+ u_int8_t fi6_proto; /* IPPROTO_XXX */
+ u_int8_t fi6_tclass; /* traffic class */
+ u_int32_t fi6_flowlabel; /* ipv6 flowlabel */
+ u_int16_t fi6_dport; /* dest port */
+ u_int16_t fi6_sport; /* src port */
+ u_int32_t fi6_gpi; /* generalized port id */
+ struct in6_addr fi6_dst; /* dest address */
+ struct in6_addr fi6_src; /* src address */
+};
+#endif /* INET6 */
+
+/*
+ * flow filters for AF_INET and AF_INET6
+ */
+struct flow_filter {
+ int ff_ruleno;
+ struct flowinfo_in ff_flow;
+ struct {
+ struct in_addr mask_dst;
+ struct in_addr mask_src;
+ u_int8_t mask_tos;
+ u_int8_t _pad[3];
+ } ff_mask;
+ u_int8_t _pad2[24]; /* make the size equal to flow_filter6 */
+};
+
+#ifdef SIN6_LEN
+struct flow_filter6 {
+ int ff_ruleno;
+ struct flowinfo_in6 ff_flow6;
+ struct {
+ struct in6_addr mask6_dst;
+ struct in6_addr mask6_src;
+ u_int8_t mask6_tclass;
+ u_int8_t _pad[3];
+ } ff_mask6;
+};
+#endif /* INET6 */
+#endif /* ALTQ3_CLFIER_COMPAT */
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * generic packet counter
+ */
+struct pktcntr {
+ u_int64_t packets;
+ u_int64_t bytes;
+};
+
+#define PKTCNTR_ADD(cntr, len) \
+ do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0)
+
+#ifdef ALTQ3_COMPAT
+/*
+ * altq related ioctls
+ */
+#define ALTQGTYPE _IOWR('q', 0, struct altqreq) /* get queue type */
+#if 0
+/*
+ * these ioctls are currently discipline-specific but could be shared
+ * in the future.
+ */
+#define ALTQATTACH _IOW('q', 1, struct altqreq) /* attach discipline */
+#define ALTQDETACH _IOW('q', 2, struct altqreq) /* detach discipline */
+#define ALTQENABLE _IOW('q', 3, struct altqreq) /* enable discipline */
+#define ALTQDISABLE _IOW('q', 4, struct altqreq) /* disable discipline*/
+#define ALTQCLEAR _IOW('q', 5, struct altqreq) /* (re)initialize */
+#define ALTQCONFIG _IOWR('q', 6, struct altqreq) /* set config params */
+#define ALTQADDCLASS _IOWR('q', 7, struct altqreq) /* add a class */
+#define ALTQMODCLASS _IOWR('q', 8, struct altqreq) /* modify a class */
+#define ALTQDELCLASS _IOWR('q', 9, struct altqreq) /* delete a class */
+#define ALTQADDFILTER _IOWR('q', 10, struct altqreq) /* add a filter */
+#define ALTQDELFILTER _IOWR('q', 11, struct altqreq) /* delete a filter */
+#define ALTQGETSTATS _IOWR('q', 12, struct altqreq) /* get statistics */
+#define ALTQGETCNTR _IOWR('q', 13, struct altqreq) /* get a pkt counter */
+#endif /* 0 */
+#define ALTQTBRSET _IOW('q', 14, struct tbrreq) /* set tb regulator */
+#define ALTQTBRGET _IOWR('q', 15, struct tbrreq) /* get tb regulator */
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+#include <altq/altq_var.h>
+#endif
+
+#endif /* _ALTQ_ALTQ_H_ */
diff --git a/sys/contrib/altq/altq/altq_cbq.c b/sys/contrib/altq/altq/altq_cbq.c
new file mode 100644
index 000000000000..c12828da98f8
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_cbq.c
@@ -0,0 +1,1169 @@
+/* $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $ */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <net/pfvar.h>
+#include <altq/altq.h>
+#include <altq/altq_cbq.h>
+#ifdef ALTQ3_COMPAT
+#include <altq/altq_conf.h>
+#endif
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Local Data structures.
+ */
+static cbq_state_t *cbq_list = NULL;
+#endif
+
+/*
+ * Forward Declarations.
+ */
+static int cbq_class_destroy(cbq_state_t *, struct rm_class *);
+static struct rm_class *clh_to_clp(cbq_state_t *, u_int32_t);
+static int cbq_clear_interface(cbq_state_t *);
+static int cbq_request(struct ifaltq *, int, void *);
+static int cbq_enqueue(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+static struct mbuf *cbq_dequeue(struct ifaltq *, int);
+static void cbqrestart(struct ifaltq *);
+static void get_class_stats(class_stats_t *, struct rm_class *);
+static void cbq_purge(cbq_state_t *);
+#ifdef ALTQ3_COMPAT
+static int cbq_add_class(struct cbq_add_class *);
+static int cbq_delete_class(struct cbq_delete_class *);
+static int cbq_modify_class(struct cbq_modify_class *);
+static int cbq_class_create(cbq_state_t *, struct cbq_add_class *,
+ struct rm_class *, struct rm_class *);
+static int cbq_clear_hierarchy(struct cbq_interface *);
+static int cbq_set_enable(struct cbq_interface *, int);
+static int cbq_ifattach(struct cbq_interface *);
+static int cbq_ifdetach(struct cbq_interface *);
+static int cbq_getstats(struct cbq_getstats *);
+
+static int cbq_add_filter(struct cbq_add_filter *);
+static int cbq_delete_filter(struct cbq_delete_filter *);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * int
+ * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
+ * function destroys a given traffic class. Before destroying
+ * the class, all traffic for that class is released.
+ */
+static int
+cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl)
+{
+ int i;
+
+ /* delete the class */
+ rmc_delete_class(&cbqp->ifnp, cl);
+
+ /*
+ * free the class handle
+ */
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == cl)
+ cbqp->cbq_class_tbl[i] = NULL;
+
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+ if (cl == cbqp->ifnp.ctl_)
+ cbqp->ifnp.ctl_ = NULL;
+#endif
+ return (0);
+}
+
+/* convert class handle to class pointer */
+static struct rm_class *
+clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle)
+{
+ int i;
+ struct rm_class *cl;
+
+ if (chandle == 0)
+ return (NULL);
+ /*
+ * first, try optimistically the slot matching the lower bits of
+ * the handle. if it fails, do the linear table search.
+ */
+ i = chandle % CBQ_MAX_CLASSES;
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+ cl->stats_.handle == chandle)
+ return (cl);
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
+ cl->stats_.handle == chandle)
+ return (cl);
+ return (NULL);
+}
+
+static int
+cbq_clear_interface(cbq_state_t *cbqp)
+{
+ int again, i;
+ struct rm_class *cl;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&cbqp->cbq_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes now */
+ do {
+ again = 0;
+ for (i = 0; i < CBQ_MAX_CLASSES; i++) {
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
+ if (is_a_parent_class(cl))
+ again++;
+ else {
+ cbq_class_destroy(cbqp, cl);
+ cbqp->cbq_class_tbl[i] = NULL;
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+#ifdef ALTQ3_COMPAT
+ if (cl == cbqp->ifnp.ctl_)
+ cbqp->ifnp.ctl_ = NULL;
+#endif
+ }
+ }
+ }
+ } while (again);
+
+ return (0);
+}
+
+static int
+cbq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ cbq_purge(cbqp);
+ break;
+ }
+ return (0);
+}
+
+/* copy the stats info in rm_class to class_states_t */
+static void
+get_class_stats(class_stats_t *statsp, struct rm_class *cl)
+{
+ statsp->xmit_cnt = cl->stats_.xmit_cnt;
+ statsp->drop_cnt = cl->stats_.drop_cnt;
+ statsp->over = cl->stats_.over;
+ statsp->borrows = cl->stats_.borrows;
+ statsp->overactions = cl->stats_.overactions;
+ statsp->delays = cl->stats_.delays;
+
+ statsp->depth = cl->depth_;
+ statsp->priority = cl->pri_;
+ statsp->maxidle = cl->maxidle_;
+ statsp->minidle = cl->minidle_;
+ statsp->offtime = cl->offtime_;
+ statsp->qmax = qlimit(cl->q_);
+ statsp->ns_per_byte = cl->ns_per_byte_;
+ statsp->wrr_allot = cl->w_allotment_;
+ statsp->qcnt = qlen(cl->q_);
+ statsp->avgidle = cl->avgidle_;
+
+ statsp->qtype = qtype(cl->q_);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_getstats(cl->red_, &statsp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
+#endif
+}
+
+int
+cbq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc,
+ cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+cbq_add_altq(struct pf_altq *a)
+{
+ cbq_state_t *cbqp;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ /* allocate and initialize cbq_state_t */
+ MALLOC(cbqp, cbq_state_t *, sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+ if (cbqp == NULL)
+ return (ENOMEM);
+ bzero(cbqp, sizeof(cbq_state_t));
+ CALLOUT_INIT(&cbqp->cbq_callout);
+ cbqp->cbq_qlen = 0;
+ cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */
+
+ /* keep the state in pf_altq */
+ a->altq_disc = cbqp;
+
+ return (0);
+}
+
+int
+cbq_remove_altq(struct pf_altq *a)
+{
+ cbq_state_t *cbqp;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ cbq_clear_interface(cbqp);
+
+ if (cbqp->ifnp.default_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.default_);
+ if (cbqp->ifnp.root_)
+ cbq_class_destroy(cbqp, cbqp->ifnp.root_);
+
+ /* deallocate cbq_state_t */
+ FREE(cbqp, M_DEVBUF);
+
+ return (0);
+}
+
+int
+cbq_add_queue(struct pf_altq *a)
+{
+ struct rm_class *borrow, *parent;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ struct cbq_opts *opts;
+ int i;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+
+ /*
+ * find a free slot in the class table. if the slot matching
+ * the lower bits of qid is free, use this slot. otherwise,
+ * use the first free slot.
+ */
+ i = a->qid % CBQ_MAX_CLASSES;
+ if (cbqp->cbq_class_tbl[i] != NULL) {
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == NULL)
+ break;
+ if (i == CBQ_MAX_CLASSES)
+ return (EINVAL);
+ }
+
+ opts = &a->pq_u.cbq_opts;
+ /* check parameters */
+ if (a->priority >= CBQ_MAXPRI)
+ return (EINVAL);
+
+ /* Get pointers to parent and borrow classes. */
+ parent = clh_to_clp(cbqp, a->parent_qid);
+ if (opts->flags & CBQCLF_BORROW)
+ borrow = parent;
+ else
+ borrow = NULL;
+
+ /*
+ * A class must borrow from it's parent or it can not
+ * borrow at all. Hence, borrow can be null.
+ */
+ if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) {
+ printf("cbq_add_queue: no parent class!\n");
+ return (EINVAL);
+ }
+
+ if ((borrow != parent) && (borrow != NULL)) {
+ printf("cbq_add_class: borrow class != parent\n");
+ return (EINVAL);
+ }
+
+ /*
+ * check parameters
+ */
+ switch (opts->flags & CBQCLF_CLASSMASK) {
+ case CBQCLF_ROOTCLASS:
+ if (parent != NULL)
+ return (EINVAL);
+ if (cbqp->ifnp.root_)
+ return (EINVAL);
+ break;
+ case CBQCLF_DEFCLASS:
+ if (cbqp->ifnp.default_)
+ return (EINVAL);
+ break;
+ case 0:
+ if (a->qid == 0)
+ return (EINVAL);
+ break;
+ default:
+ /* more than two flags bits set */
+ return (EINVAL);
+ }
+
+ /*
+ * create a class. if this is a root class, initialize the
+ * interface.
+ */
+ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+ rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte,
+ cbqrestart, a->qlimit, RM_MAXQUEUED,
+ opts->maxidle, opts->minidle, opts->offtime,
+ opts->flags);
+ cl = cbqp->ifnp.root_;
+ } else {
+ cl = rmc_newclass(a->priority,
+ &cbqp->ifnp, opts->ns_per_byte,
+ rmc_delay_action, a->qlimit, parent, borrow,
+ opts->maxidle, opts->minidle, opts->offtime,
+ opts->pktsize, opts->flags);
+ }
+ if (cl == NULL)
+ return (ENOMEM);
+
+ /* return handle to user space. */
+ cl->stats_.handle = a->qid;
+ cl->stats_.depth = cl->depth_;
+
+ /* save the allocated class */
+ cbqp->cbq_class_tbl[i] = cl;
+
+ if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+ cbqp->ifnp.default_ = cl;
+
+ return (0);
+}
+
+int
+cbq_remove_queue(struct pf_altq *a)
+{
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+ int i;
+
+ if ((cbqp = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+ return (EINVAL);
+
+ /* if we are a parent class, then return an error. */
+ if (is_a_parent_class(cl))
+ return (EINVAL);
+
+ /* delete the class */
+ rmc_delete_class(&cbqp->ifnp, cl);
+
+ /*
+ * free the class handle
+ */
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == cl) {
+ cbqp->cbq_class_tbl[i] = NULL;
+ if (cl == cbqp->ifnp.root_)
+ cbqp->ifnp.root_ = NULL;
+ if (cl == cbqp->ifnp.default_)
+ cbqp->ifnp.default_ = NULL;
+ break;
+ }
+
+ return (0);
+}
+
+int
+cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ class_stats_t stats;
+ int error = 0;
+
+ if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * int
+ * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr)
+ * - Queue data packets.
+ *
+ * cbq_enqueue is set to ifp->if_altqenqueue and called by an upper
+ * layer (e.g. ether_output). cbq_enqueue queues the given packet
+ * to the cbq, then invokes the driver's start routine.
+ *
+ * Assumptions: called in splimp
+ * Returns: 0 if the queueing is successful.
+ * ENOBUFS if a packet dropping occurred as a result of
+ * the queueing.
+ */
+
+static int
+cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+ struct rm_class *cl;
+ struct m_tag *t;
+ int len;
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+#else
+ printf("altq: packet for %s%d does not have pkthdr\n",
+ ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
+#endif
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = m_tag_find(m, PACKET_TAG_PF_QID, NULL)) != NULL)
+ cl = clh_to_clp(cbqp, ((struct altq_tag *)(t+1))->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL) {
+ cl = cbqp->ifnp.default_;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->pktattr_ = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->pktattr_ = NULL;
+ len = m_pktlen(m);
+ if (rmc_queue_packet(cl, m) != 0) {
+ /* drop occurred. some mbuf was freed in rmc_queue_packet. */
+ PKTCNTR_ADD(&cl->stats_.drop_cnt, len);
+ return (ENOBUFS);
+ }
+
+ /* successfully queued. */
+ ++cbqp->cbq_qlen;
+ IFQ_INC_LEN(ifq);
+ return (0);
+}
+
+static struct mbuf *
+cbq_dequeue(struct ifaltq *ifq, int op)
+{
+ cbq_state_t *cbqp = (cbq_state_t *)ifq->altq_disc;
+ struct mbuf *m;
+
+ m = rmc_dequeue_next(&cbqp->ifnp, op);
+
+ if (m && op == ALTDQ_REMOVE) {
+ --cbqp->cbq_qlen; /* decrement # of packets in cbq */
+ IFQ_DEC_LEN(ifq);
+
+ /* Update the class. */
+ rmc_update_class_util(&cbqp->ifnp);
+ }
+ return (m);
+}
+
+/*
+ * void
+ * cbqrestart(queue_t *) - Restart sending of data.
+ * called from rmc_restart in splimp via timeout after waking up
+ * a suspended class.
+ * Returns: NONE
+ */
+
+static void
+cbqrestart(struct ifaltq *ifq)
+{
+ cbq_state_t *cbqp;
+ struct ifnet *ifp;
+
+ if (!ALTQ_IS_ENABLED(ifq))
+ /* cbq must have been detached */
+ return;
+
+ if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL)
+ /* should not happen */
+ return;
+
+ ifp = ifq->altq_ifp;
+ if (ifp->if_start &&
+ cbqp->cbq_qlen > 0 && (ifp->if_flags & IFF_OACTIVE) == 0)
+ (*ifp->if_start)(ifp);
+}
+
+static void cbq_purge(cbq_state_t *cbqp)
+{
+ struct rm_class *cl;
+ int i;
+
+ for (i = 0; i < CBQ_MAX_CLASSES; i++)
+ if ((cl = cbqp->cbq_class_tbl[i]) != NULL)
+ rmc_dropall(cl);
+ if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_))
+ cbqp->ifnp.ifq_->ifq_len = 0;
+}
+#ifdef ALTQ3_COMPAT
+
+static int
+cbq_add_class(acp)
+ struct cbq_add_class *acp;
+{
+ char *ifacename;
+ struct rm_class *borrow, *parent;
+ cbq_state_t *cbqp;
+
+ ifacename = acp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* check parameters */
+ if (acp->cbq_class.priority >= CBQ_MAXPRI ||
+ acp->cbq_class.maxq > CBQ_MAXQSIZE)
+ return (EINVAL);
+
+ /* Get pointers to parent and borrow classes. */
+ parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle);
+ borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle);
+
+ /*
+ * A class must borrow from it's parent or it can not
+ * borrow at all. Hence, borrow can be null.
+ */
+ if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) {
+ printf("cbq_add_class: no parent class!\n");
+ return (EINVAL);
+ }
+
+ if ((borrow != parent) && (borrow != NULL)) {
+ printf("cbq_add_class: borrow class != parent\n");
+ return (EINVAL);
+ }
+
+ return cbq_class_create(cbqp, acp, parent, borrow);
+}
+
+static int
+cbq_delete_class(dcp)
+ struct cbq_delete_class *dcp;
+{
+ char *ifacename;
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+
+ ifacename = dcp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ /* if we are a parent class, then return an error. */
+ if (is_a_parent_class(cl))
+ return (EINVAL);
+
+ /* if a filter has a reference to this class delete the filter */
+ acc_discard_filters(&cbqp->cbq_classifier, cl, 0);
+
+ return cbq_class_destroy(cbqp, cl);
+}
+
+static int
+cbq_modify_class(acp)
+ struct cbq_modify_class *acp;
+{
+ char *ifacename;
+ struct rm_class *cl;
+ cbq_state_t *cbqp;
+
+ ifacename = acp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* Get pointer to this class */
+ if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte,
+ acp->cbq_class.maxq, acp->cbq_class.maxidle,
+ acp->cbq_class.minidle, acp->cbq_class.offtime,
+ acp->cbq_class.pktsize) < 0)
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * struct rm_class *
+ * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp,
+ * struct rm_class *parent, struct rm_class *borrow)
+ *
+ * This function create a new traffic class in the CBQ class hierarchy of
+ * given paramters. The class that created is either the root, default,
+ * or a new dynamic class. If CBQ is not initilaized, the the root class
+ * will be created.
+ */
+static int
+cbq_class_create(cbqp, acp, parent, borrow)
+ cbq_state_t *cbqp;
+ struct cbq_add_class *acp;
+ struct rm_class *parent, *borrow;
+{
+ struct rm_class *cl;
+ cbq_class_spec_t *spec = &acp->cbq_class;
+ u_int32_t chandle;
+ int i;
+
+ /*
+ * allocate class handle
+ */
+ for (i = 1; i < CBQ_MAX_CLASSES; i++)
+ if (cbqp->cbq_class_tbl[i] == NULL)
+ break;
+ if (i == CBQ_MAX_CLASSES)
+ return (EINVAL);
+ chandle = i; /* use the slot number as class handle */
+
+ /*
+ * create a class. if this is a root class, initialize the
+ * interface.
+ */
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
+ rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte,
+ cbqrestart, spec->maxq, RM_MAXQUEUED,
+ spec->maxidle, spec->minidle, spec->offtime,
+ spec->flags);
+ cl = cbqp->ifnp.root_;
+ } else {
+ cl = rmc_newclass(spec->priority,
+ &cbqp->ifnp, spec->nano_sec_per_byte,
+ rmc_delay_action, spec->maxq, parent, borrow,
+ spec->maxidle, spec->minidle, spec->offtime,
+ spec->pktsize, spec->flags);
+ }
+ if (cl == NULL)
+ return (ENOMEM);
+
+ /* return handle to user space. */
+ acp->cbq_class_handle = chandle;
+
+ cl->stats_.handle = chandle;
+ cl->stats_.depth = cl->depth_;
+
+ /* save the allocated class */
+ cbqp->cbq_class_tbl[i] = cl;
+
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
+ cbqp->ifnp.default_ = cl;
+ if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS)
+ cbqp->ifnp.ctl_ = cl;
+
+ return (0);
+}
+
+static int
+cbq_add_filter(afp)
+ struct cbq_add_filter *afp;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+
+ ifacename = afp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ /* Get the pointer to class. */
+ if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter,
+ cl, &afp->cbq_filter_handle);
+}
+
+static int
+cbq_delete_filter(dfp)
+ struct cbq_delete_filter *dfp;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = dfp->cbq_iface.cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&cbqp->cbq_classifier,
+ dfp->cbq_filter_handle);
+}
+
+/*
+ * cbq_clear_hierarchy deletes all classes and their filters on the
+ * given interface.
+ */
+static int
+cbq_clear_hierarchy(ifacep)
+ struct cbq_interface *ifacep;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ return cbq_clear_interface(cbqp);
+}
+
+/*
+ * static int
+ * cbq_set_enable(struct cbq_enable *ep) - this function processed the
+ * ioctl request to enable class based queueing. It searches the list
+ * of interfaces for the specified interface and then enables CBQ on
+ * that interface.
+ *
+ * Returns: 0, for no error.
+ * EBADF, for specified inteface not found.
+ */
+
+static int
+cbq_set_enable(ep, enable)
+ struct cbq_interface *ep;
+ int enable;
+{
+ int error = 0;
+ cbq_state_t *cbqp;
+ char *ifacename;
+
+ ifacename = ep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ switch (enable) {
+ case ENABLE:
+ if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL ||
+ cbqp->ifnp.ctl_ == NULL) {
+ if (cbqp->ifnp.root_ == NULL)
+ printf("No Root Class for %s\n", ifacename);
+ if (cbqp->ifnp.default_ == NULL)
+ printf("No Default Class for %s\n", ifacename);
+ if (cbqp->ifnp.ctl_ == NULL)
+ printf("No Control Class for %s\n", ifacename);
+ error = EINVAL;
+ } else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) {
+ cbqp->cbq_qlen = 0;
+ }
+ break;
+
+ case DISABLE:
+ error = altq_disable(cbqp->ifnp.ifq_);
+ break;
+ }
+ return (error);
+}
+
+static int
+cbq_getstats(gsp)
+ struct cbq_getstats *gsp;
+{
+ char *ifacename;
+ int i, n, nclasses;
+ cbq_state_t *cbqp;
+ struct rm_class *cl;
+ class_stats_t stats, *usp;
+ int error = 0;
+
+ ifacename = gsp->iface.cbq_ifacename;
+ nclasses = gsp->nclasses;
+ usp = gsp->stats;
+
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+ if (nclasses <= 0)
+ return (EINVAL);
+
+ for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) {
+ while ((cl = cbqp->cbq_class_tbl[i]) == NULL)
+ if (++i >= CBQ_MAX_CLASSES)
+ goto out;
+
+ get_class_stats(&stats, cl);
+ stats.handle = cl->stats_.handle;
+
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+
+ out:
+ gsp->nclasses = n;
+ return (error);
+}
+
+static int
+cbq_ifattach(ifacep)
+ struct cbq_interface *ifacep;
+{
+ int error = 0;
+ char *ifacename;
+ cbq_state_t *new_cbqp;
+ struct ifnet *ifp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((ifp = ifunit(ifacename)) == NULL)
+ return (ENXIO);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENXIO);
+
+ /* allocate and initialize cbq_state_t */
+ MALLOC(new_cbqp, cbq_state_t *, sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
+ if (new_cbqp == NULL)
+ return (ENOMEM);
+ bzero(new_cbqp, sizeof(cbq_state_t));
+ CALLOUT_INIT(&new_cbqp->cbq_callout);
+
+ new_cbqp->cbq_qlen = 0;
+ new_cbqp->ifnp.ifq_ = &ifp->if_snd; /* keep the ifq */
+
+ /*
+ * set CBQ to this ifnet structure.
+ */
+ error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp,
+ cbq_enqueue, cbq_dequeue, cbq_request,
+ &new_cbqp->cbq_classifier, acc_classify);
+ if (error) {
+ FREE(new_cbqp, M_DEVBUF);
+ return (error);
+ }
+
+ /* prepend to the list of cbq_state_t's. */
+ new_cbqp->cbq_next = cbq_list;
+ cbq_list = new_cbqp;
+
+ return (0);
+}
+
+static int
+cbq_ifdetach(ifacep)
+ struct cbq_interface *ifacep;
+{
+ char *ifacename;
+ cbq_state_t *cbqp;
+
+ ifacename = ifacep->cbq_ifacename;
+ if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
+ return (EBADF);
+
+ (void)cbq_set_enable(ifacep, DISABLE);
+
+ cbq_clear_interface(cbqp);
+
+ /* remove CBQ from the ifnet structure. */
+ (void)altq_detach(cbqp->ifnp.ifq_);
+
+ /* remove from the list of cbq_state_t's. */
+ if (cbq_list == cbqp)
+ cbq_list = cbqp->cbq_next;
+ else {
+ cbq_state_t *cp;
+
+ for (cp = cbq_list; cp != NULL; cp = cp->cbq_next)
+ if (cp->cbq_next == cbqp) {
+ cp->cbq_next = cbqp->cbq_next;
+ break;
+ }
+ ASSERT(cp != NULL);
+ }
+
+ /* deallocate cbq_state_t */
+ FREE(cbqp, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * cbq device interface
+ */
+
+altqdev_decl(cbq);
+
+int
+cbqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ return (0);
+}
+
+int
+cbqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct ifnet *ifp;
+ struct cbq_interface iface;
+ int err, error = 0;
+
+ while (cbq_list) {
+ ifp = cbq_list->ifnp.ifq_->altq_ifp;
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+ sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
+#else
+ sprintf(iface.cbq_ifacename,
+ "%s%d", ifp->if_name, ifp->if_unit);
+#endif
+ err = cbq_ifdetach(&iface);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return (error);
+}
+
+int
+cbqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ int error = 0;
+
+ /* check cmd for superuser only */
+ switch (cmd) {
+ case CBQ_GETSTATS:
+ /* currently only command that an ordinary user can call */
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ error = suser(p);
+#else
+ error = suser(p->p_ucred, &p->p_acflag);
+#endif
+ if (error)
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+
+ case CBQ_ENABLE:
+ error = cbq_set_enable((struct cbq_interface *)addr, ENABLE);
+ break;
+
+ case CBQ_DISABLE:
+ error = cbq_set_enable((struct cbq_interface *)addr, DISABLE);
+ break;
+
+ case CBQ_ADD_FILTER:
+ error = cbq_add_filter((struct cbq_add_filter *)addr);
+ break;
+
+ case CBQ_DEL_FILTER:
+ error = cbq_delete_filter((struct cbq_delete_filter *)addr);
+ break;
+
+ case CBQ_ADD_CLASS:
+ error = cbq_add_class((struct cbq_add_class *)addr);
+ break;
+
+ case CBQ_DEL_CLASS:
+ error = cbq_delete_class((struct cbq_delete_class *)addr);
+ break;
+
+ case CBQ_MODIFY_CLASS:
+ error = cbq_modify_class((struct cbq_modify_class *)addr);
+ break;
+
+ case CBQ_CLEAR_HIERARCHY:
+ error = cbq_clear_hierarchy((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_IF_ATTACH:
+ error = cbq_ifattach((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_IF_DETACH:
+ error = cbq_ifdetach((struct cbq_interface *)addr);
+ break;
+
+ case CBQ_GETSTATS:
+ error = cbq_getstats((struct cbq_getstats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+#if 0
+/* for debug */
+static void cbq_class_dump(int);
+
+static void cbq_class_dump(i)
+ int i;
+{
+ struct rm_class *cl;
+ rm_class_stats_t *s;
+ struct _class_queue_ *q;
+
+ if (cbq_list == NULL) {
+ printf("cbq_class_dump: no cbq_state found\n");
+ return;
+ }
+ cl = cbq_list->cbq_class_tbl[i];
+
+ printf("class %d cl=%p\n", i, cl);
+ if (cl != NULL) {
+ s = &cl->stats_;
+ q = cl->q_;
+
+ printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n",
+ cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_);
+ printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n",
+ cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_,
+ cl->maxidle_);
+ printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n",
+ cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_);
+ printf("handle=%d, depth=%d, packets=%d, bytes=%d\n",
+ s->handle, s->depth,
+ (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes);
+ printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n",
+ s->over, s->borrows, (int)s->drop_cnt.packets,
+ s->overactions, s->delays);
+ printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n",
+ q->tail_, q->head_, q->qlen_, q->qlim_,
+ q->qthresh_, q->qtype_);
+ }
+}
+#endif /* 0 */
+
+#ifdef KLD_MODULE
+
+static struct altqsw cbq_sw =
+ {"cbq", cbqopen, cbqclose, cbqioctl};
+
+ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw);
+MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_CBQ */
diff --git a/sys/contrib/altq/altq/altq_cbq.h b/sys/contrib/altq/altq/altq_cbq.h
new file mode 100644
index 000000000000..30a15c730242
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_cbq.h
@@ -0,0 +1,221 @@
+/* $KAME: altq_cbq.h,v 1.12 2003/10/03 05:05:15 kjc Exp $ */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _ALTQ_ALTQ_CBQ_H_
+#define _ALTQ_ALTQ_CBQ_H_
+
+#include <altq/altq.h>
+#include <altq/altq_rmclass.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NULL_CLASS_HANDLE 0
+
+/* class flags should be same as class flags in rm_class.h */
+#define CBQCLF_RED 0x0001 /* use RED */
+#define CBQCLF_ECN 0x0002 /* use RED/ECN */
+#define CBQCLF_RIO 0x0004 /* use RIO */
+#define CBQCLF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
+#define CBQCLF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define CBQCLF_BORROW 0x0020 /* borrow from parent */
+
+/* class flags only for root class */
+#define CBQCLF_WRR 0x0100 /* weighted-round robin */
+#define CBQCLF_EFFICIENT 0x0200 /* work-conserving */
+
+/* class flags for special classes */
+#define CBQCLF_ROOTCLASS 0x1000 /* root class */
+#define CBQCLF_DEFCLASS 0x2000 /* default class */
+#ifdef ALTQ3_COMPAT
+#define CBQCLF_CTLCLASS 0x4000 /* control class */
+#endif
+#define CBQCLF_CLASSMASK 0xf000 /* class mask */
+
+#define CBQ_MAXQSIZE 200
+#define CBQ_MAXPRI RM_MAXPRIO
+
+typedef struct _cbq_class_stats_ {
+ u_int32_t handle;
+ u_int depth;
+
+ struct pktcntr xmit_cnt; /* packets sent in this class */
+ struct pktcntr drop_cnt; /* dropped packets */
+ u_int over; /* # times went over limit */
+ u_int borrows; /* # times tried to borrow */
+ u_int overactions; /* # times invoked overlimit action */
+ u_int delays; /* # times invoked delay actions */
+
+ /* other static class parameters useful for debugging */
+ int priority;
+ int maxidle;
+ int minidle;
+ int offtime;
+ int qmax;
+ int ns_per_byte;
+ int wrr_allot;
+
+ int qcnt; /* # packets in queue */
+ int avgidle;
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3];
+} class_stats_t;
+
+#ifdef ALTQ3_COMPAT
+/*
+ * Define structures associated with IOCTLS for cbq.
+ */
+
+/*
+ * Define the CBQ interface structure. This must be included in all
+ * IOCTL's such that the CBQ driver may find the appropriate CBQ module
+ * associated with the network interface to be affected.
+ */
+struct cbq_interface {
+ char cbq_ifacename[IFNAMSIZ];
+};
+
+typedef struct cbq_class_spec {
+ u_int priority;
+ u_int nano_sec_per_byte;
+ u_int maxq;
+ u_int maxidle;
+ int minidle;
+ u_int offtime;
+ u_int32_t parent_class_handle;
+ u_int32_t borrow_class_handle;
+
+ u_int pktsize;
+ int flags;
+} cbq_class_spec_t;
+
+struct cbq_add_class {
+ struct cbq_interface cbq_iface;
+
+ cbq_class_spec_t cbq_class;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_delete_class {
+ struct cbq_interface cbq_iface;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_modify_class {
+ struct cbq_interface cbq_iface;
+
+ cbq_class_spec_t cbq_class;
+ u_int32_t cbq_class_handle;
+};
+
+struct cbq_add_filter {
+ struct cbq_interface cbq_iface;
+ u_int32_t cbq_class_handle;
+ struct flow_filter cbq_filter;
+
+ u_long cbq_filter_handle;
+};
+
+struct cbq_delete_filter {
+ struct cbq_interface cbq_iface;
+ u_long cbq_filter_handle;
+};
+
+/* number of classes are returned in nclasses field */
+struct cbq_getstats {
+ struct cbq_interface iface;
+ int nclasses;
+ class_stats_t *stats;
+};
+
+/*
+ * Define IOCTLs for CBQ.
+ */
+#define CBQ_IF_ATTACH _IOW('Q', 1, struct cbq_interface)
+#define CBQ_IF_DETACH _IOW('Q', 2, struct cbq_interface)
+#define CBQ_ENABLE _IOW('Q', 3, struct cbq_interface)
+#define CBQ_DISABLE _IOW('Q', 4, struct cbq_interface)
+#define CBQ_CLEAR_HIERARCHY _IOW('Q', 5, struct cbq_interface)
+#define CBQ_ADD_CLASS _IOWR('Q', 7, struct cbq_add_class)
+#define CBQ_DEL_CLASS _IOW('Q', 8, struct cbq_delete_class)
+#define CBQ_MODIFY_CLASS _IOWR('Q', 9, struct cbq_modify_class)
+#define CBQ_ADD_FILTER _IOWR('Q', 10, struct cbq_add_filter)
+#define CBQ_DEL_FILTER _IOW('Q', 11, struct cbq_delete_filter)
+#define CBQ_GETSTATS _IOWR('Q', 12, struct cbq_getstats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * Define macros only good for kernel drivers and modules.
+ */
+#define CBQ_WATCHDOG (hz / 20)
+#define CBQ_TIMEOUT 10
+#define CBQ_LS_TIMEOUT (20 * hz / 1000)
+
+#define CBQ_MAX_CLASSES 256
+
+#ifdef ALTQ3_COMPAT
+#define CBQ_MAX_FILTERS 256
+
+#define DISABLE 0x00
+#define ENABLE 0x01
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * Define State structures.
+ */
+typedef struct cbqstate {
+#ifdef ALTQ3_COMPAT
+ struct cbqstate *cbq_next;
+#endif
+ int cbq_qlen; /* # of packets in cbq */
+ struct rm_class *cbq_class_tbl[CBQ_MAX_CLASSES];
+
+ struct rm_ifdat ifnp;
+ struct callout cbq_callout; /* for timeouts */
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier cbq_classifier;
+#endif
+} cbq_state_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_ALTQ_ALTQ_CBQ_H_ */
diff --git a/sys/contrib/altq/altq/altq_cdnr.c b/sys/contrib/altq/altq/altq_cdnr.c
new file mode 100644
index 000000000000..ba61e7d93dde
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_cdnr.c
@@ -0,0 +1,1386 @@
+/* $KAME: altq_cdnr.c,v 1.14 2003/09/05 22:40:36 itojun Exp $ */
+
+/*
+ * Copyright (C) 1999-2002
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_cdnr.h>
+
+#ifdef ALTQ3_COMPAT
+/*
+ * diffserv traffic conditioning module
+ */
+
+int altq_cdnr_enabled = 0;
+
+/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
+#ifdef ALTQ_CDNR
+
+/* cdnr_list keeps all cdnr's allocated. */
+static LIST_HEAD(, top_cdnr) tcb_list;
+
+static int altq_cdnr_input(struct mbuf *, int);
+static struct top_cdnr *tcb_lookup(char *ifname);
+static struct cdnr_block *cdnr_handle2cb(u_long);
+static u_long cdnr_cb2handle(struct cdnr_block *);
+static void *cdnr_cballoc(struct top_cdnr *, int,
+ struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
+static void cdnr_cbdestroy(void *);
+static int tca_verify_action(struct tc_action *);
+static void tca_import_action(struct tc_action *, struct tc_action *);
+static void tca_invalidate_action(struct tc_action *);
+
+static int generic_element_destroy(struct cdnr_block *);
+static struct top_cdnr *top_create(struct ifaltq *);
+static int top_destroy(struct top_cdnr *);
+static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
+static int element_destroy(struct cdnr_block *);
+static void tb_import_profile(struct tbe *, struct tb_profile *);
+static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
+ struct tc_action *, struct tc_action *);
+static int tbm_destroy(struct tbmeter *);
+static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct trtcm *trtcm_create(struct top_cdnr *,
+ struct tb_profile *, struct tb_profile *,
+ struct tc_action *, struct tc_action *, struct tc_action *,
+ int);
+static int trtcm_destroy(struct trtcm *);
+static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+static struct tswtcm *tswtcm_create(struct top_cdnr *,
+ u_int32_t, u_int32_t, u_int32_t,
+ struct tc_action *, struct tc_action *, struct tc_action *);
+static int tswtcm_destroy(struct tswtcm *);
+static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
+
+static int cdnrcmd_if_attach(char *);
+static int cdnrcmd_if_detach(char *);
+static int cdnrcmd_add_element(struct cdnr_add_element *);
+static int cdnrcmd_delete_element(struct cdnr_delete_element *);
+static int cdnrcmd_add_filter(struct cdnr_add_filter *);
+static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
+static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
+static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
+static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
+static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
+static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
+static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
+static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
+static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
+static int cdnrcmd_get_stats(struct cdnr_get_stats *);
+
+altqdev_decl(cdnr);
+
+/*
+ * top level input function called from ip_input.
+ * should be called before converting header fields to host-byte-order.
+ */
+int
+altq_cdnr_input(m, af)
+ struct mbuf *m;
+ int af; /* address family */
+{
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct top_cdnr *top;
+ struct tc_action *tca;
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo pktinfo;
+
+ ifp = m->m_pkthdr.rcvif;
+ if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
+ /* traffic conditioner is not enabled on this interface */
+ return (1);
+
+ top = ifp->if_snd.altq_cdnr;
+
+ ip = mtod(m, struct ip *);
+#ifdef INET6
+ if (af == AF_INET6) {
+ u_int32_t flowlabel;
+
+ flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
+ pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
+ } else
+#endif
+ pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
+ pktinfo.pkt_len = m_pktlen(m);
+
+ tca = NULL;
+
+ cb = acc_classify(&top->tc_classifier, m, af);
+ if (cb != NULL)
+ tca = &cb->cb_action;
+
+ if (tca == NULL)
+ tca = &top->tc_block.cb_action;
+
+ while (1) {
+ PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
+
+ switch (tca->tca_code) {
+ case TCACODE_PASS:
+ return (1);
+ case TCACODE_DROP:
+ m_freem(m);
+ return (0);
+ case TCACODE_RETURN:
+ return (0);
+ case TCACODE_MARK:
+#ifdef INET6
+ if (af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ flowlabel = (tca->tca_dscp << 20) |
+ (flowlabel & ~(DSCP_MASK << 20));
+ ip6->ip6_flow = htonl(flowlabel);
+ } else
+#endif
+ ip->ip_tos = tca->tca_dscp |
+ (ip->ip_tos & DSCP_CUMASK);
+ return (1);
+ case TCACODE_NEXT:
+ cb = tca->tca_next;
+ tca = (*cb->cb_input)(cb, &pktinfo);
+ break;
+ case TCACODE_NONE:
+ default:
+ return (1);
+ }
+ }
+}
+
+static struct top_cdnr *
+tcb_lookup(ifname)
+ char *ifname;
+{
+ struct top_cdnr *top;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(ifname)) != NULL)
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (top->tc_ifq->altq_ifp == ifp)
+ return (top);
+ return (NULL);
+}
+
+static struct cdnr_block *
+cdnr_handle2cb(handle)
+ u_long handle;
+{
+ struct cdnr_block *cb;
+
+ cb = (struct cdnr_block *)handle;
+ if (handle != ALIGN(cb))
+ return (NULL);
+
+ if (cb == NULL || cb->cb_handle != handle)
+ return (NULL);
+ return (cb);
+}
+
+static u_long
+cdnr_cb2handle(cb)
+ struct cdnr_block *cb;
+{
+ return (cb->cb_handle);
+}
+
+static void *
+cdnr_cballoc(top, type, input_func)
+ struct top_cdnr *top;
+ int type;
+ struct tc_action *(*input_func)(struct cdnr_block *,
+ struct cdnr_pktinfo *);
+{
+ struct cdnr_block *cb;
+ int size;
+
+ switch (type) {
+ case TCETYPE_TOP:
+ size = sizeof(struct top_cdnr);
+ break;
+ case TCETYPE_ELEMENT:
+ size = sizeof(struct cdnr_block);
+ break;
+ case TCETYPE_TBMETER:
+ size = sizeof(struct tbmeter);
+ break;
+ case TCETYPE_TRTCM:
+ size = sizeof(struct trtcm);
+ break;
+ case TCETYPE_TSWTCM:
+ size = sizeof(struct tswtcm);
+ break;
+ default:
+ return (NULL);
+ }
+
+ MALLOC(cb, struct cdnr_block *, size, M_DEVBUF, M_WAITOK);
+ if (cb == NULL)
+ return (NULL);
+ bzero(cb, size);
+
+ cb->cb_len = size;
+ cb->cb_type = type;
+ cb->cb_ref = 0;
+ cb->cb_handle = (u_long)cb;
+ if (top == NULL)
+ cb->cb_top = (struct top_cdnr *)cb;
+ else
+ cb->cb_top = top;
+
+ if (input_func != NULL) {
+ /*
+ * if this cdnr has an action function,
+ * make tc_action to call itself.
+ */
+ cb->cb_action.tca_code = TCACODE_NEXT;
+ cb->cb_action.tca_next = cb;
+ cb->cb_input = input_func;
+ } else
+ cb->cb_action.tca_code = TCACODE_NONE;
+
+ /* if this isn't top, register the element to the top level cdnr */
+ if (top != NULL)
+ LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
+
+ return ((void *)cb);
+}
+
+static void
+cdnr_cbdestroy(cblock)
+ void *cblock;
+{
+ struct cdnr_block *cb = cblock;
+
+ /* delete filters belonging to this cdnr */
+ acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
+
+ /* remove from the top level cdnr */
+ if (cb->cb_top != cblock)
+ LIST_REMOVE(cb, cb_next);
+
+ FREE(cb, M_DEVBUF);
+}
+
+/*
+ * conditioner common destroy routine
+ */
+static int
+generic_element_destroy(cb)
+ struct cdnr_block *cb;
+{
+ int error = 0;
+
+ switch (cb->cb_type) {
+ case TCETYPE_TOP:
+ error = top_destroy((struct top_cdnr *)cb);
+ break;
+ case TCETYPE_ELEMENT:
+ error = element_destroy(cb);
+ break;
+ case TCETYPE_TBMETER:
+ error = tbm_destroy((struct tbmeter *)cb);
+ break;
+ case TCETYPE_TRTCM:
+ error = trtcm_destroy((struct trtcm *)cb);
+ break;
+ case TCETYPE_TSWTCM:
+ error = tswtcm_destroy((struct tswtcm *)cb);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+static int
+tca_verify_action(utca)
+ struct tc_action *utca;
+{
+ switch (utca->tca_code) {
+ case TCACODE_PASS:
+ case TCACODE_DROP:
+ case TCACODE_MARK:
+ /* these are ok */
+ break;
+
+ case TCACODE_HANDLE:
+ /* verify handle value */
+ if (cdnr_handle2cb(utca->tca_handle) == NULL)
+ return (-1);
+ break;
+
+ case TCACODE_NONE:
+ case TCACODE_RETURN:
+ case TCACODE_NEXT:
+ default:
+ /* should not be passed from a user */
+ return (-1);
+ }
+ return (0);
+}
+
+static void
+tca_import_action(ktca, utca)
+ struct tc_action *ktca, *utca;
+{
+ struct cdnr_block *cb;
+
+ *ktca = *utca;
+ if (ktca->tca_code == TCACODE_HANDLE) {
+ cb = cdnr_handle2cb(ktca->tca_handle);
+ if (cb == NULL) {
+ ktca->tca_code = TCACODE_NONE;
+ return;
+ }
+ ktca->tca_code = TCACODE_NEXT;
+ ktca->tca_next = cb;
+ cb->cb_ref++;
+ } else if (ktca->tca_code == TCACODE_MARK) {
+ ktca->tca_dscp &= DSCP_MASK;
+ }
+ return;
+}
+
+static void
+tca_invalidate_action(tca)
+ struct tc_action *tca;
+{
+ struct cdnr_block *cb;
+
+ if (tca->tca_code == TCACODE_NEXT) {
+ cb = tca->tca_next;
+ if (cb == NULL)
+ return;
+ cb->cb_ref--;
+ }
+ tca->tca_code = TCACODE_NONE;
+}
+
+/*
+ * top level traffic conditioner
+ */
+static struct top_cdnr *
+top_create(ifq)
+ struct ifaltq *ifq;
+{
+ struct top_cdnr *top;
+
+ if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
+ return (NULL);
+
+ top->tc_ifq = ifq;
+ /* set default action for the top level conditioner */
+ top->tc_block.cb_action.tca_code = TCACODE_PASS;
+
+ LIST_INSERT_HEAD(&tcb_list, top, tc_next);
+
+ ifq->altq_cdnr = top;
+
+ return (top);
+}
+
+static int
+top_destroy(top)
+ struct top_cdnr *top;
+{
+ struct cdnr_block *cb;
+
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+ top->tc_ifq->altq_cdnr = NULL;
+
+ /*
+ * destroy all the conditioner elements belonging to this interface
+ */
+ while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
+ while (cb != NULL && cb->cb_ref > 0)
+ cb = LIST_NEXT(cb, cb_next);
+ if (cb != NULL)
+ generic_element_destroy(cb);
+ }
+
+ LIST_REMOVE(top, tc_next);
+
+ cdnr_cbdestroy(top);
+
+ /* if there is no active conditioner, remove the input hook */
+ if (altq_input != NULL) {
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ break;
+ if (top == NULL)
+ altq_input = NULL;
+ }
+
+ return (0);
+}
+
+/*
+ * simple tc elements without input function (e.g., dropper and makers).
+ */
+static struct cdnr_block *
+element_create(top, action)
+ struct top_cdnr *top;
+ struct tc_action *action;
+{
+ struct cdnr_block *cb;
+
+ if (tca_verify_action(action) < 0)
+ return (NULL);
+
+ if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
+ return (NULL);
+
+ tca_import_action(&cb->cb_action, action);
+
+ return (cb);
+}
+
+static int
+element_destroy(cb)
+ struct cdnr_block *cb;
+{
+ if (cb->cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&cb->cb_action);
+
+ cdnr_cbdestroy(cb);
+ return (0);
+}
+
+/*
+ * internal representation of token bucket parameters
+ * rate: byte_per_unittime << 32
+ * (((bits_per_sec) / 8) << 32) / machclk_freq
+ * depth: byte << 32
+ *
+ */
+#define TB_SHIFT 32
+#define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT)
+#define TB_UNSCALE(x) ((x) >> TB_SHIFT)
+
+static void
+tb_import_profile(tb, profile)
+ struct tbe *tb;
+ struct tb_profile *profile;
+{
+ tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
+ tb->depth = TB_SCALE(profile->depth);
+ if (tb->rate > 0)
+ tb->filluptime = tb->depth / tb->rate;
+ else
+ tb->filluptime = 0xffffffffffffffffLL;
+ tb->token = tb->depth;
+ tb->last = read_machclk();
+}
+
+/*
+ * simple token bucket meter
+ */
+static struct tbmeter *
+tbm_create(top, profile, in_action, out_action)
+ struct top_cdnr *top;
+ struct tb_profile *profile;
+ struct tc_action *in_action, *out_action;
+{
+ struct tbmeter *tbm = NULL;
+
+ if (tca_verify_action(in_action) < 0
+ || tca_verify_action(out_action) < 0)
+ return (NULL);
+
+ if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
+ tbm_input)) == NULL)
+ return (NULL);
+
+ tb_import_profile(&tbm->tb, profile);
+
+ tca_import_action(&tbm->in_action, in_action);
+ tca_import_action(&tbm->out_action, out_action);
+
+ return (tbm);
+}
+
+static int
+tbm_destroy(tbm)
+ struct tbmeter *tbm;
+{
+ if (tbm->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tbm->in_action);
+ tca_invalidate_action(&tbm->out_action);
+
+ cdnr_cbdestroy(tbm);
+ return (0);
+}
+
+static struct tc_action *
+tbm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct tbmeter *tbm = (struct tbmeter *)cb;
+ u_int64_t len;
+ u_int64_t interval, now;
+
+ len = TB_SCALE(pktinfo->pkt_len);
+
+ if (tbm->tb.token < len) {
+ now = read_machclk();
+ interval = now - tbm->tb.last;
+ if (interval >= tbm->tb.filluptime)
+ tbm->tb.token = tbm->tb.depth;
+ else {
+ tbm->tb.token += interval * tbm->tb.rate;
+ if (tbm->tb.token > tbm->tb.depth)
+ tbm->tb.token = tbm->tb.depth;
+ }
+ tbm->tb.last = now;
+ }
+
+ if (tbm->tb.token < len) {
+ PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
+ return (&tbm->out_action);
+ }
+
+ tbm->tb.token -= len;
+ PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
+ return (&tbm->in_action);
+}
+
+/*
+ * two rate three color marker
+ * as described in draft-heinanen-diffserv-trtcm-01.txt
+ */
+static struct trtcm *
+trtcm_create(top, cmtd_profile, peak_profile,
+ green_action, yellow_action, red_action, coloraware)
+ struct top_cdnr *top;
+ struct tb_profile *cmtd_profile, *peak_profile;
+ struct tc_action *green_action, *yellow_action, *red_action;
+ int coloraware;
+{
+ struct trtcm *tcm = NULL;
+
+ if (tca_verify_action(green_action) < 0
+ || tca_verify_action(yellow_action) < 0
+ || tca_verify_action(red_action) < 0)
+ return (NULL);
+
+ if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
+ trtcm_input)) == NULL)
+ return (NULL);
+
+ tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
+ tb_import_profile(&tcm->peak_tb, peak_profile);
+
+ tca_import_action(&tcm->green_action, green_action);
+ tca_import_action(&tcm->yellow_action, yellow_action);
+ tca_import_action(&tcm->red_action, red_action);
+
+ /* set dscps to use */
+ if (tcm->green_action.tca_code == TCACODE_MARK)
+ tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->green_dscp = DSCP_AF11;
+ if (tcm->yellow_action.tca_code == TCACODE_MARK)
+ tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->yellow_dscp = DSCP_AF12;
+ if (tcm->red_action.tca_code == TCACODE_MARK)
+ tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
+ else
+ tcm->red_dscp = DSCP_AF13;
+
+ tcm->coloraware = coloraware;
+
+ return (tcm);
+}
+
+static int
+trtcm_destroy(tcm)
+ struct trtcm *tcm;
+{
+ if (tcm->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tcm->green_action);
+ tca_invalidate_action(&tcm->yellow_action);
+ tca_invalidate_action(&tcm->red_action);
+
+ cdnr_cbdestroy(tcm);
+ return (0);
+}
+
+static struct tc_action *
+trtcm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct trtcm *tcm = (struct trtcm *)cb;
+ u_int64_t len;
+ u_int64_t interval, now;
+ u_int8_t color;
+
+ len = TB_SCALE(pktinfo->pkt_len);
+ if (tcm->coloraware) {
+ color = pktinfo->pkt_dscp;
+ if (color != tcm->yellow_dscp && color != tcm->red_dscp)
+ color = tcm->green_dscp;
+ } else {
+ /* if color-blind, precolor it as green */
+ color = tcm->green_dscp;
+ }
+
+ now = read_machclk();
+ if (tcm->cmtd_tb.token < len) {
+ interval = now - tcm->cmtd_tb.last;
+ if (interval >= tcm->cmtd_tb.filluptime)
+ tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+ else {
+ tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
+ if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
+ tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
+ }
+ tcm->cmtd_tb.last = now;
+ }
+ if (tcm->peak_tb.token < len) {
+ interval = now - tcm->peak_tb.last;
+ if (interval >= tcm->peak_tb.filluptime)
+ tcm->peak_tb.token = tcm->peak_tb.depth;
+ else {
+ tcm->peak_tb.token += interval * tcm->peak_tb.rate;
+ if (tcm->peak_tb.token > tcm->peak_tb.depth)
+ tcm->peak_tb.token = tcm->peak_tb.depth;
+ }
+ tcm->peak_tb.last = now;
+ }
+
+ if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
+ pktinfo->pkt_dscp = tcm->red_dscp;
+ PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
+ return (&tcm->red_action);
+ }
+
+ if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
+ pktinfo->pkt_dscp = tcm->yellow_dscp;
+ tcm->peak_tb.token -= len;
+ PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
+ return (&tcm->yellow_action);
+ }
+
+ pktinfo->pkt_dscp = tcm->green_dscp;
+ tcm->cmtd_tb.token -= len;
+ tcm->peak_tb.token -= len;
+ PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
+ return (&tcm->green_action);
+}
+
+/*
+ * time sliding window three color marker
+ * as described in draft-fang-diffserv-tc-tswtcm-00.txt
+ */
+static struct tswtcm *
+tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
+ green_action, yellow_action, red_action)
+ struct top_cdnr *top;
+ u_int32_t cmtd_rate, peak_rate, avg_interval;
+ struct tc_action *green_action, *yellow_action, *red_action;
+{
+ struct tswtcm *tsw;
+
+ if (tca_verify_action(green_action) < 0
+ || tca_verify_action(yellow_action) < 0
+ || tca_verify_action(red_action) < 0)
+ return (NULL);
+
+ if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
+ tswtcm_input)) == NULL)
+ return (NULL);
+
+ tca_import_action(&tsw->green_action, green_action);
+ tca_import_action(&tsw->yellow_action, yellow_action);
+ tca_import_action(&tsw->red_action, red_action);
+
+ /* set dscps to use */
+ if (tsw->green_action.tca_code == TCACODE_MARK)
+ tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->green_dscp = DSCP_AF11;
+ if (tsw->yellow_action.tca_code == TCACODE_MARK)
+ tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->yellow_dscp = DSCP_AF12;
+ if (tsw->red_action.tca_code == TCACODE_MARK)
+ tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
+ else
+ tsw->red_dscp = DSCP_AF13;
+
+ /* convert rates from bits/sec to bytes/sec */
+ tsw->cmtd_rate = cmtd_rate / 8;
+ tsw->peak_rate = peak_rate / 8;
+ tsw->avg_rate = 0;
+
+ /* timewin is converted from msec to machine clock unit */
+ tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
+
+ return (tsw);
+}
+
+static int
+tswtcm_destroy(tsw)
+ struct tswtcm *tsw;
+{
+ if (tsw->cdnrblk.cb_ref > 0)
+ return (EBUSY);
+
+ tca_invalidate_action(&tsw->green_action);
+ tca_invalidate_action(&tsw->yellow_action);
+ tca_invalidate_action(&tsw->red_action);
+
+ cdnr_cbdestroy(tsw);
+ return (0);
+}
+
+static struct tc_action *
+tswtcm_input(cb, pktinfo)
+ struct cdnr_block *cb;
+ struct cdnr_pktinfo *pktinfo;
+{
+ struct tswtcm *tsw = (struct tswtcm *)cb;
+ int len;
+ u_int32_t avg_rate;
+ u_int64_t interval, now, tmp;
+
+ /*
+ * rate estimator
+ */
+ len = pktinfo->pkt_len;
+ now = read_machclk();
+
+ interval = now - tsw->t_front;
+ /*
+ * calculate average rate:
+ * avg = (avg * timewin + pkt_len)/(timewin + interval)
+ * pkt_len needs to be multiplied by machclk_freq in order to
+ * get (bytes/sec).
+ * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
+ * less than 32 bits, the following 64-bit operation has enough
+ * precision.
+ */
+ tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
+ + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
+ tsw->avg_rate = avg_rate = (u_int32_t)tmp;
+ tsw->t_front = now;
+
+ /*
+ * marker
+ */
+ if (avg_rate > tsw->cmtd_rate) {
+ u_int32_t randval = arc4random() % avg_rate;
+
+ if (avg_rate > tsw->peak_rate) {
+ if (randval < avg_rate - tsw->peak_rate) {
+ /* mark red */
+ pktinfo->pkt_dscp = tsw->red_dscp;
+ PKTCNTR_ADD(&tsw->red_cnt, len);
+ return (&tsw->red_action);
+ } else if (randval < avg_rate - tsw->cmtd_rate)
+ goto mark_yellow;
+ } else {
+ /* peak_rate >= avg_rate > cmtd_rate */
+ if (randval < avg_rate - tsw->cmtd_rate) {
+ mark_yellow:
+ pktinfo->pkt_dscp = tsw->yellow_dscp;
+ PKTCNTR_ADD(&tsw->yellow_cnt, len);
+ return (&tsw->yellow_action);
+ }
+ }
+ }
+
+ /* mark green */
+ pktinfo->pkt_dscp = tsw->green_dscp;
+ PKTCNTR_ADD(&tsw->green_cnt, len);
+ return (&tsw->green_action);
+}
+
+/*
+ * ioctl requests
+ */
+static int
+cdnrcmd_if_attach(ifname)
+ char *ifname;
+{
+ struct ifnet *ifp;
+ struct top_cdnr *top;
+
+ if ((ifp = ifunit(ifname)) == NULL)
+ return (EBADF);
+
+ if (ifp->if_snd.altq_cdnr != NULL)
+ return (EBUSY);
+
+ if ((top = top_create(&ifp->if_snd)) == NULL)
+ return (ENOMEM);
+ return (0);
+}
+
+static int
+cdnrcmd_if_detach(ifname)
+ char *ifname;
+{
+ struct top_cdnr *top;
+
+ if ((top = tcb_lookup(ifname)) == NULL)
+ return (EBADF);
+
+ return top_destroy(top);
+}
+
+static int
+cdnrcmd_add_element(ap)
+ struct cdnr_add_element *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ cb = element_create(top, &ap->action);
+ if (cb == NULL)
+ return (EINVAL);
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(cb);
+ return (0);
+}
+
+static int
+cdnrcmd_delete_element(ap)
+ struct cdnr_delete_element *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (cb->cb_type != TCETYPE_ELEMENT)
+ return generic_element_destroy(cb);
+
+ return element_destroy(cb);
+}
+
+static int
+cdnrcmd_add_filter(ap)
+ struct cdnr_add_filter *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&top->tc_classifier, &ap->filter,
+ cb, &ap->filter_handle);
+}
+
+static int
+cdnrcmd_delete_filter(ap)
+ struct cdnr_delete_filter *ap;
+{
+ struct top_cdnr *top;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
+}
+
+static int
+cdnrcmd_add_tbm(ap)
+ struct cdnr_add_tbmeter *ap;
+{
+ struct top_cdnr *top;
+ struct tbmeter *tbm;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
+ if (tbm == NULL)
+ return (EINVAL);
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_tbm(ap)
+ struct cdnr_modify_tbmeter *ap;
+{
+ struct tbmeter *tbm;
+
+ if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ tb_import_profile(&tbm->tb, &ap->profile);
+
+ return (0);
+}
+
+static int
+cdnrcmd_tbm_stats(ap)
+ struct cdnr_tbmeter_stats *ap;
+{
+ struct tbmeter *tbm;
+
+ if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ ap->in_cnt = tbm->in_cnt;
+ ap->out_cnt = tbm->out_cnt;
+
+ return (0);
+}
+
+static int
+cdnrcmd_add_trtcm(ap)
+ struct cdnr_add_trtcm *ap;
+{
+ struct top_cdnr *top;
+ struct trtcm *tcm;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
+ &ap->green_action, &ap->yellow_action,
+ &ap->red_action, ap->coloraware);
+ if (tcm == NULL)
+ return (EINVAL);
+
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_trtcm(ap)
+ struct cdnr_modify_trtcm *ap;
+{
+ struct trtcm *tcm;
+
+ if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
+ tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
+
+ return (0);
+}
+
+static int
+cdnrcmd_tcm_stats(ap)
+ struct cdnr_tcm_stats *ap;
+{
+ struct cdnr_block *cb;
+
+ if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (cb->cb_type == TCETYPE_TRTCM) {
+ struct trtcm *tcm = (struct trtcm *)cb;
+
+ ap->green_cnt = tcm->green_cnt;
+ ap->yellow_cnt = tcm->yellow_cnt;
+ ap->red_cnt = tcm->red_cnt;
+ } else if (cb->cb_type == TCETYPE_TSWTCM) {
+ struct tswtcm *tsw = (struct tswtcm *)cb;
+
+ ap->green_cnt = tsw->green_cnt;
+ ap->yellow_cnt = tsw->yellow_cnt;
+ ap->red_cnt = tsw->red_cnt;
+ } else
+ return (EINVAL);
+
+ return (0);
+}
+
+static int
+cdnrcmd_add_tswtcm(ap)
+ struct cdnr_add_tswtcm *ap;
+{
+ struct top_cdnr *top;
+ struct tswtcm *tsw;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ if (ap->cmtd_rate > ap->peak_rate)
+ return (EINVAL);
+
+ tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
+ ap->avg_interval, &ap->green_action,
+ &ap->yellow_action, &ap->red_action);
+ if (tsw == NULL)
+ return (EINVAL);
+
+ /* return a class handle to the user */
+ ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
+ return (0);
+}
+
+static int
+cdnrcmd_modify_tswtcm(ap)
+ struct cdnr_modify_tswtcm *ap;
+{
+ struct tswtcm *tsw;
+
+ if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
+ return (EINVAL);
+
+ if (ap->cmtd_rate > ap->peak_rate)
+ return (EINVAL);
+
+ /* convert rates from bits/sec to bytes/sec */
+ tsw->cmtd_rate = ap->cmtd_rate / 8;
+ tsw->peak_rate = ap->peak_rate / 8;
+ tsw->avg_rate = 0;
+
+ /* timewin is converted from msec to machine clock unit */
+ tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
+
+ return (0);
+}
+
+static int
+cdnrcmd_get_stats(ap)
+ struct cdnr_get_stats *ap;
+{
+ struct top_cdnr *top;
+ struct cdnr_block *cb;
+ struct tbmeter *tbm;
+ struct trtcm *tcm;
+ struct tswtcm *tsw;
+ struct tce_stats tce, *usp;
+ int error, n, nskip, nelements;
+
+ if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
+ return (EBADF);
+
+ /* copy action stats */
+ bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
+
+ /* stats for each element */
+ nelements = ap->nelements;
+ usp = ap->tce_stats;
+ if (nelements <= 0 || usp == NULL)
+ return (0);
+
+ nskip = ap->nskip;
+ n = 0;
+ LIST_FOREACH(cb, &top->tc_elements, cb_next) {
+ if (nskip > 0) {
+ nskip--;
+ continue;
+ }
+
+ bzero(&tce, sizeof(tce));
+ tce.tce_handle = cb->cb_handle;
+ tce.tce_type = cb->cb_type;
+ switch (cb->cb_type) {
+ case TCETYPE_TBMETER:
+ tbm = (struct tbmeter *)cb;
+ tce.tce_cnts[0] = tbm->in_cnt;
+ tce.tce_cnts[1] = tbm->out_cnt;
+ break;
+ case TCETYPE_TRTCM:
+ tcm = (struct trtcm *)cb;
+ tce.tce_cnts[0] = tcm->green_cnt;
+ tce.tce_cnts[1] = tcm->yellow_cnt;
+ tce.tce_cnts[2] = tcm->red_cnt;
+ break;
+ case TCETYPE_TSWTCM:
+ tsw = (struct tswtcm *)cb;
+ tce.tce_cnts[0] = tsw->green_cnt;
+ tce.tce_cnts[1] = tsw->yellow_cnt;
+ tce.tce_cnts[2] = tsw->red_cnt;
+ break;
+ default:
+ continue;
+ }
+
+ if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
+ sizeof(tce))) != 0)
+ return (error);
+
+ if (++n == nelements)
+ break;
+ }
+ ap->nelements = n;
+
+ return (0);
+}
+
+/*
+ * conditioner device interface
+ */
+int
+cdnropen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ if (machclk_freq == 0)
+ init_machclk();
+
+ if (machclk_freq == 0) {
+ printf("cdnr: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+cdnrclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct top_cdnr *top;
+ int err, error = 0;
+
+ while ((top = LIST_FIRST(&tcb_list)) != NULL) {
+ /* destroy all */
+ err = top_destroy(top);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+ altq_input = NULL;
+
+ return (error);
+}
+
+int
+cdnrioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct top_cdnr *top;
+ struct cdnr_interface *ifacep;
+ int s, error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case CDNR_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ switch (cmd) {
+
+ case CDNR_IF_ATTACH:
+ ifacep = (struct cdnr_interface *)addr;
+ error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
+ break;
+
+ case CDNR_IF_DETACH:
+ ifacep = (struct cdnr_interface *)addr;
+ error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
+ break;
+
+ case CDNR_ENABLE:
+ case CDNR_DISABLE:
+ ifacep = (struct cdnr_interface *)addr;
+ if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+
+ case CDNR_ENABLE:
+ ALTQ_SET_CNDTNING(top->tc_ifq);
+ if (altq_input == NULL)
+ altq_input = altq_cdnr_input;
+ break;
+
+ case CDNR_DISABLE:
+ ALTQ_CLEAR_CNDTNING(top->tc_ifq);
+ LIST_FOREACH(top, &tcb_list, tc_next)
+ if (ALTQ_IS_CNDTNING(top->tc_ifq))
+ break;
+ if (top == NULL)
+ altq_input = NULL;
+ break;
+ }
+ break;
+
+ case CDNR_ADD_ELEM:
+ error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
+ break;
+
+ case CDNR_DEL_ELEM:
+ error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
+ break;
+
+ case CDNR_ADD_TBM:
+ error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
+ break;
+
+ case CDNR_MOD_TBM:
+ error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
+ break;
+
+ case CDNR_TBM_STATS:
+ error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
+ break;
+
+ case CDNR_ADD_TCM:
+ error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
+ break;
+
+ case CDNR_MOD_TCM:
+ error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
+ break;
+
+ case CDNR_TCM_STATS:
+ error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
+ break;
+
+ case CDNR_ADD_FILTER:
+ error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
+ break;
+
+ case CDNR_DEL_FILTER:
+ error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
+ break;
+
+ case CDNR_GETSTATS:
+ error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
+ break;
+
+ case CDNR_ADD_TSW:
+ error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
+ break;
+
+ case CDNR_MOD_TSW:
+ error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ splx(s);
+
+ return error;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw cdnr_sw =
+ {"cdnr", cdnropen, cdnrclose, cdnrioctl};
+
+ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_CDNR */
diff --git a/sys/contrib/altq/altq/altq_cdnr.h b/sys/contrib/altq/altq/altq_cdnr.h
new file mode 100644
index 000000000000..d55402f4ef58
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_cdnr.h
@@ -0,0 +1,335 @@
+/* $KAME: altq_cdnr.h,v 1.9 2003/07/10 12:07:48 kjc Exp $ */
+
+/*
+ * Copyright (C) 1999-2002
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_CDNR_H_
+#define _ALTQ_ALTQ_CDNR_H_
+
+#include <altq/altq.h>
+
+/*
+ * traffic conditioner element types
+ */
+#define TCETYPE_NONE 0
+#define TCETYPE_TOP 1 /* top level conditioner */
+#define TCETYPE_ELEMENT 2 /* a simple tc element */
+#define TCETYPE_TBMETER 3 /* token bucket meter */
+#define TCETYPE_TRTCM 4 /* (two-rate) three color marker */
+#define TCETYPE_TSWTCM 5 /* time sliding window 3-color maker */
+
+/*
+ * traffic conditioner action
+ */
+struct cdnr_block;
+
+struct tc_action {
+ int tca_code; /* e.g., TCACODE_PASS */
+ /* tca_code dependent variable */
+ union {
+ u_long un_value; /* template */
+ u_int8_t un_dscp; /* diffserv code point */
+ u_long un_handle; /* tc action handle */
+ struct cdnr_block *un_next; /* next tc element block */
+ } tca_un;
+};
+#define tca_value tca_un.un_value
+#define tca_dscp tca_un.un_dscp
+#define tca_handle tca_un.un_handle
+#define tca_next tca_un.un_next
+
+#define TCACODE_NONE 0 /* action is not set */
+#define TCACODE_PASS 1 /* pass this packet */
+#define TCACODE_DROP 2 /* discard this packet */
+#define TCACODE_RETURN 3 /* do not process this packet */
+#define TCACODE_MARK 4 /* mark dscp */
+#define TCACODE_HANDLE 5 /* take action specified by handle */
+#define TCACODE_NEXT 6 /* take action in the next tc element */
+#define TCACODE_MAX 6
+
+#define CDNR_NULL_HANDLE 0
+
+struct cdnr_interface {
+ char cdnr_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+};
+
+/* simple element operations */
+struct cdnr_add_element {
+ struct cdnr_interface iface;
+ struct tc_action action;
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_delete_element {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+};
+
+/* token-bucket meter operations */
+struct cdnr_add_tbmeter {
+ struct cdnr_interface iface;
+ struct tb_profile profile;
+ struct tc_action in_action;
+ struct tc_action out_action;
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_tbmeter {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct tb_profile profile;
+};
+
+struct cdnr_tbmeter_stats {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct pktcntr in_cnt;
+ struct pktcntr out_cnt;
+};
+
+/* two-rate three-color marker operations */
+struct cdnr_add_trtcm {
+ struct cdnr_interface iface;
+ struct tb_profile cmtd_profile; /* profile for committed tb */
+ struct tb_profile peak_profile; /* profile for peak tb */
+ struct tc_action green_action; /* action for green packets */
+ struct tc_action yellow_action; /* action for yellow packets */
+ struct tc_action red_action; /* action for red packets */
+ int coloraware; /* color-aware/color-blind */
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_trtcm {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct tb_profile cmtd_profile; /* profile for committed tb */
+ struct tb_profile peak_profile; /* profile for peak tb */
+ int coloraware; /* color-aware/color-blind */
+};
+
+struct cdnr_tcm_stats {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+/* time sliding window three-color marker operations */
+struct cdnr_add_tswtcm {
+ struct cdnr_interface iface;
+ u_int32_t cmtd_rate; /* committed rate (bits/sec) */
+ u_int32_t peak_rate; /* peak rate (bits/sec) */
+ u_int32_t avg_interval; /* averaging interval (msec) */
+ struct tc_action green_action; /* action for green packets */
+ struct tc_action yellow_action; /* action for yellow packets */
+ struct tc_action red_action; /* action for red packets */
+
+ u_long cdnr_handle; /* return value */
+};
+
+struct cdnr_modify_tswtcm {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+ u_int32_t cmtd_rate; /* committed rate (bits/sec) */
+ u_int32_t peak_rate; /* peak rate (bits/sec) */
+ u_int32_t avg_interval; /* averaging interval (msec) */
+};
+
+struct cdnr_add_filter {
+ struct cdnr_interface iface;
+ u_long cdnr_handle;
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct flow_filter filter;
+#endif
+ u_long filter_handle; /* return value */
+};
+
+struct cdnr_delete_filter {
+ struct cdnr_interface iface;
+ u_long filter_handle;
+};
+
+struct tce_stats {
+ u_long tce_handle; /* tc element handle */
+ int tce_type; /* e.g., TCETYPE_ELEMENT */
+ struct pktcntr tce_cnts[3]; /* tcm returns 3 counters */
+};
+
+struct cdnr_get_stats {
+ struct cdnr_interface iface;
+ struct pktcntr cnts[TCACODE_MAX+1];
+
+ /* element stats */
+ int nskip; /* skip # of elements */
+ int nelements; /* # of element stats (WR) */
+ struct tce_stats *tce_stats; /* pointer to stats array */
+};
+
+#define CDNR_IF_ATTACH _IOW('Q', 1, struct cdnr_interface)
+#define CDNR_IF_DETACH _IOW('Q', 2, struct cdnr_interface)
+#define CDNR_ENABLE _IOW('Q', 3, struct cdnr_interface)
+#define CDNR_DISABLE _IOW('Q', 4, struct cdnr_interface)
+#define CDNR_ADD_FILTER _IOWR('Q', 10, struct cdnr_add_filter)
+#define CDNR_DEL_FILTER _IOW('Q', 11, struct cdnr_delete_filter)
+#define CDNR_GETSTATS _IOWR('Q', 12, struct cdnr_get_stats)
+#define CDNR_ADD_ELEM _IOWR('Q', 30, struct cdnr_add_element)
+#define CDNR_DEL_ELEM _IOW('Q', 31, struct cdnr_delete_element)
+#define CDNR_ADD_TBM _IOWR('Q', 32, struct cdnr_add_tbmeter)
+#define CDNR_MOD_TBM _IOW('Q', 33, struct cdnr_modify_tbmeter)
+#define CDNR_TBM_STATS _IOWR('Q', 34, struct cdnr_tbmeter_stats)
+#define CDNR_ADD_TCM _IOWR('Q', 35, struct cdnr_add_trtcm)
+#define CDNR_MOD_TCM _IOWR('Q', 36, struct cdnr_modify_trtcm)
+#define CDNR_TCM_STATS _IOWR('Q', 37, struct cdnr_tcm_stats)
+#define CDNR_ADD_TSW _IOWR('Q', 38, struct cdnr_add_tswtcm)
+#define CDNR_MOD_TSW _IOWR('Q', 39, struct cdnr_modify_tswtcm)
+
+#ifndef DSCP_EF
+/* diffserve code points */
+#define DSCP_MASK 0xfc
+#define DSCP_CUMASK 0x03
+#define DSCP_EF 0xb8
+#define DSCP_AF11 0x28
+#define DSCP_AF12 0x30
+#define DSCP_AF13 0x38
+#define DSCP_AF21 0x48
+#define DSCP_AF22 0x50
+#define DSCP_AF23 0x58
+#define DSCP_AF31 0x68
+#define DSCP_AF32 0x70
+#define DSCP_AF33 0x78
+#define DSCP_AF41 0x88
+#define DSCP_AF42 0x90
+#define DSCP_AF43 0x98
+#define AF_CLASSMASK 0xe0
+#define AF_DROPPRECMASK 0x18
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * packet information passed to the input function of tc elements
+ */
+struct cdnr_pktinfo {
+ int pkt_len; /* packet length */
+ u_int8_t pkt_dscp; /* diffserv code point */
+};
+
+/*
+ * traffic conditioner control block common to all types of tc elements
+ */
+struct cdnr_block {
+ LIST_ENTRY(cdnr_block) cb_next;
+ int cb_len; /* size of this tc element */
+ int cb_type; /* cdnr block type */
+ int cb_ref; /* reference count of this element */
+ u_long cb_handle; /* handle of this tc element */
+ struct top_cdnr *cb_top; /* back pointer to top */
+ struct tc_action cb_action; /* top level action for this tcb */
+ struct tc_action *(*cb_input)(struct cdnr_block *,
+ struct cdnr_pktinfo *);
+};
+
+/*
+ * top level traffic conditioner structure for an interface
+ */
+struct top_cdnr {
+ struct cdnr_block tc_block;
+
+ LIST_ENTRY(top_cdnr) tc_next;
+ struct ifaltq *tc_ifq;
+
+ LIST_HEAD(, cdnr_block) tc_elements;
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier tc_classifier;
+#endif
+ struct pktcntr tc_cnts[TCACODE_MAX+1];
+};
+
+/* token bucket element */
+struct tbe {
+ u_int64_t rate;
+ u_int64_t depth;
+
+ u_int64_t token;
+ u_int64_t filluptime;
+ u_int64_t last;
+};
+
+/* token bucket meter structure */
+struct tbmeter {
+ struct cdnr_block cdnrblk; /* conditioner block */
+ struct tbe tb; /* token bucket */
+ struct tc_action in_action; /* actions for IN/OUT */
+ struct tc_action out_action; /* actions for IN/OUT */
+ struct pktcntr in_cnt; /* statistics for IN/OUT */
+ struct pktcntr out_cnt; /* statistics for IN/OUT */
+};
+
+/* two-rate three-color marker structure */
+struct trtcm {
+ struct cdnr_block cdnrblk; /* conditioner block */
+ struct tbe cmtd_tb; /* committed tb profile */
+ struct tbe peak_tb; /* peak tb profile */
+ struct tc_action green_action;
+ struct tc_action yellow_action;
+ struct tc_action red_action;
+ int coloraware;
+ u_int8_t green_dscp;
+ u_int8_t yellow_dscp;
+ u_int8_t red_dscp;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+/* time sliding window three-color marker structure */
+struct tswtcm {
+ struct cdnr_block cdnrblk; /* conditioner block */
+
+ u_int32_t avg_rate; /* average rate (bytes/sec) */
+ u_int64_t t_front; /* timestamp of last update */
+
+ u_int64_t timewin; /* average interval */
+ u_int32_t cmtd_rate; /* committed target rate */
+ u_int32_t peak_rate; /* peak target rate */
+ struct tc_action green_action;
+ struct tc_action yellow_action;
+ struct tc_action red_action;
+ u_int8_t green_dscp;
+ u_int8_t yellow_dscp;
+ u_int8_t red_dscp;
+ struct pktcntr green_cnt;
+ struct pktcntr yellow_cnt;
+ struct pktcntr red_cnt;
+};
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_CDNR_H_ */
diff --git a/sys/contrib/altq/altq/altq_classq.h b/sys/contrib/altq/altq/altq_classq.h
new file mode 100644
index 000000000000..dc5c646f5281
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_classq.h
@@ -0,0 +1,206 @@
+/* $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * class queue definitions extracted from rm_class.h.
+ */
+#ifndef _ALTQ_ALTQ_CLASSQ_H_
+#define _ALTQ_ALTQ_CLASSQ_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packet Queue types: RED or DROPHEAD.
+ */
+#define Q_DROPHEAD 0x00
+#define Q_RED 0x01
+#define Q_RIO 0x02
+#define Q_DROPTAIL 0x03
+
+#ifdef _KERNEL
+
+/*
+ * Packet Queue structures and macros to manipulate them.
+ */
+struct _class_queue_ {
+ struct mbuf *tail_; /* Tail of packet queue */
+ int qlen_; /* Queue length (in number of packets) */
+ int qlim_; /* Queue limit (in number of packets*) */
+ int qtype_; /* Queue type */
+};
+
+typedef struct _class_queue_ class_queue_t;
+
+#define qtype(q) (q)->qtype_ /* Get queue type */
+#define qlimit(q) (q)->qlim_ /* Max packets to be queued */
+#define qlen(q) (q)->qlen_ /* Current queue length. */
+#define qtail(q) (q)->tail_ /* Tail of the queue */
+#define qhead(q) ((q)->tail_ ? (q)->tail_->m_nextpkt : NULL)
+
+#define qempty(q) ((q)->qlen_ == 0) /* Is the queue empty?? */
+#define q_is_red(q) ((q)->qtype_ == Q_RED) /* Is the queue a red queue */
+#define q_is_rio(q) ((q)->qtype_ == Q_RIO) /* Is the queue a rio queue */
+#define q_is_red_or_rio(q) ((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO)
+
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+extern void _addq(class_queue_t *, struct mbuf *);
+extern struct mbuf *_getq(class_queue_t *);
+extern struct mbuf *_getq_tail(class_queue_t *);
+extern struct mbuf *_getq_random(class_queue_t *);
+extern void _removeq(class_queue_t *, struct mbuf *);
+extern void _flushq(class_queue_t *);
+
+#else /* __GNUC__ && !ALTQ_DEBUG */
+/*
+ * inlined versions
+ */
+static __inline void
+_addq(class_queue_t *q, struct mbuf *m)
+{
+ struct mbuf *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+}
+
+static __inline struct mbuf *
+_getq(class_queue_t *q)
+{
+ struct mbuf *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ qtail(q) = NULL;
+ qlen(q)--;
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+static __inline struct mbuf *
+_getq_tail(class_queue_t *q)
+{
+ struct mbuf *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+static __inline struct mbuf *
+_getq_random(class_queue_t *q)
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m)
+ qtail(q) = NULL;
+ else {
+ struct mbuf *prev = NULL;
+
+ n = random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+static __inline void
+_removeq(class_queue_t *q, struct mbuf *m)
+{
+ struct mbuf *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+static __inline void
+_flushq(class_queue_t *q)
+{
+ struct mbuf *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+}
+
+#endif /* __GNUC__ && !ALTQ_DEBUG */
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_CLASSQ_H_ */
diff --git a/sys/contrib/altq/altq/altq_hfsc.c b/sys/contrib/altq/altq/altq_hfsc.c
new file mode 100644
index 000000000000..4ac035ee9d04
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_hfsc.c
@@ -0,0 +1,2256 @@
+/* $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve. the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#ifdef ALTQ_HFSC /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/queue.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#endif /* ALTQ3_COMPAT */
+
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <net/pfvar.h>
+#include <altq/altq.h>
+#include <altq/altq_hfsc.h>
+#ifdef ALTQ3_COMPAT
+#include <altq/altq_conf.h>
+#endif
+
+/*
+ * function prototypes
+ */
+static int hfsc_clear_interface(struct hfsc_if *);
+static int hfsc_request(struct ifaltq *, int, void *);
+static void hfsc_purge(struct hfsc_if *);
+static struct hfsc_class *hfsc_class_create(struct hfsc_if *,
+ struct service_curve *, struct service_curve *, struct service_curve *,
+ struct hfsc_class *, int, int, int);
+static int hfsc_class_destroy(struct hfsc_class *);
+static struct hfsc_class *hfsc_nextclass(struct hfsc_class *);
+static int hfsc_enqueue(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+static struct mbuf *hfsc_dequeue(struct ifaltq *, int);
+
+static int hfsc_addq(struct hfsc_class *, struct mbuf *);
+static struct mbuf *hfsc_getq(struct hfsc_class *);
+static struct mbuf *hfsc_pollq(struct hfsc_class *);
+static void hfsc_purgeq(struct hfsc_class *);
+
+static void update_cfmin(struct hfsc_class *);
+static void set_active(struct hfsc_class *, int);
+static void set_passive(struct hfsc_class *);
+
+static void init_ed(struct hfsc_class *, int);
+static void update_ed(struct hfsc_class *, int);
+static void update_d(struct hfsc_class *, int);
+static void init_vf(struct hfsc_class *, int);
+static void update_vf(struct hfsc_class *, int, u_int64_t);
+static ellist_t *ellist_alloc(void);
+static void ellist_destroy(ellist_t *);
+static void ellist_insert(struct hfsc_class *);
+static void ellist_remove(struct hfsc_class *);
+static void ellist_update(struct hfsc_class *);
+struct hfsc_class *ellist_get_mindl(ellist_t *, u_int64_t);
+static actlist_t *actlist_alloc(void);
+static void actlist_destroy(actlist_t *);
+static void actlist_insert(struct hfsc_class *);
+static void actlist_remove(struct hfsc_class *);
+static void actlist_update(struct hfsc_class *);
+
+static struct hfsc_class *actlist_firstfit(struct hfsc_class *,
+ u_int64_t);
+
+static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t);
+static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t);
+static __inline u_int64_t m2sm(u_int);
+static __inline u_int64_t m2ism(u_int);
+static __inline u_int64_t d2dx(u_int);
+static u_int sm2m(u_int64_t);
+static u_int dx2d(u_int64_t);
+
+static void sc2isc(struct service_curve *, struct internal_sc *);
+static void rtsc_init(struct runtime_sc *, struct internal_sc *,
+ u_int64_t, u_int64_t);
+static u_int64_t rtsc_y2x(struct runtime_sc *, u_int64_t);
+static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t);
+static void rtsc_min(struct runtime_sc *, struct internal_sc *,
+ u_int64_t, u_int64_t);
+
+static void get_class_stats(struct hfsc_classstats *,
+ struct hfsc_class *);
+static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t);
+
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int);
+static int hfsc_detach(struct hfsc_if *);
+static int hfsc_class_modify(struct hfsc_class *, struct service_curve *,
+ struct service_curve *, struct service_curve *);
+
+static int hfsccmd_if_attach(struct hfsc_attach *);
+static int hfsccmd_if_detach(struct hfsc_interface *);
+static int hfsccmd_add_class(struct hfsc_add_class *);
+static int hfsccmd_delete_class(struct hfsc_delete_class *);
+static int hfsccmd_modify_class(struct hfsc_modify_class *);
+static int hfsccmd_add_filter(struct hfsc_add_filter *);
+static int hfsccmd_delete_filter(struct hfsc_delete_filter *);
+static int hfsccmd_class_stats(struct hfsc_class_stats *);
+
+altqdev_decl(hfsc);
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * macros
+ */
+#define is_a_parent_class(cl) ((cl)->cl_children != NULL)
+
+#define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
+
+#ifdef ALTQ3_COMPAT
+/* hif_list keeps all hfsc_if's allocated. */
+static struct hfsc_if *hif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+hfsc_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
+ hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+hfsc_add_altq(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ MALLOC(hif, struct hfsc_if *, sizeof(struct hfsc_if),
+ M_DEVBUF, M_WAITOK);
+ if (hif == NULL)
+ return (ENOMEM);
+ bzero(hif, sizeof(struct hfsc_if));
+
+ hif->hif_eligible = ellist_alloc();
+ if (hif->hif_eligible == NULL) {
+ FREE(hif, M_DEVBUF);
+ return (ENOMEM);
+ }
+
+ hif->hif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = hif;
+
+ return (0);
+}
+
+int
+hfsc_remove_altq(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ (void)hfsc_clear_interface(hif);
+ (void)hfsc_class_destroy(hif->hif_rootclass);
+
+ ellist_destroy(hif->hif_eligible);
+
+ FREE(hif, M_DEVBUF);
+
+ return (0);
+}
+
+int
+hfsc_add_queue(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl, *parent;
+ struct hfsc_opts *opts;
+ struct service_curve rtsc, lssc, ulsc;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ opts = &a->pq_u.hfsc_opts;
+
+ if (a->parent_qid == HFSC_NULLCLASS_HANDLE &&
+ hif->hif_rootclass == NULL)
+ parent = NULL;
+ else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL)
+ return (EINVAL);
+
+ if (a->qid == 0)
+ return (EINVAL);
+
+ if (clh_to_clp(hif, a->qid) != NULL)
+ return (EBUSY);
+
+ rtsc.m1 = opts->rtsc_m1;
+ rtsc.d = opts->rtsc_d;
+ rtsc.m2 = opts->rtsc_m2;
+ lssc.m1 = opts->lssc_m1;
+ lssc.d = opts->lssc_d;
+ lssc.m2 = opts->lssc_m2;
+ ulsc.m1 = opts->ulsc_m1;
+ ulsc.d = opts->ulsc_d;
+ ulsc.m2 = opts->ulsc_m2;
+
+ cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
+ parent, a->qlimit, opts->flags, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+hfsc_remove_queue(struct pf_altq *a)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (hfsc_class_destroy(cl));
+}
+
+int
+hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct hfsc_classstats stats;
+ int error = 0;
+
+ if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes except the root class.
+ */
+static int
+hfsc_clear_interface(struct hfsc_if *hif)
+{
+ struct hfsc_class *cl;
+
+#ifdef ALTQ3_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&hif->hif_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes */
+ while (hif->hif_rootclass != NULL &&
+ (cl = hif->hif_rootclass->cl_children) != NULL) {
+ /*
+ * remove the first leaf class found in the hierarchy
+ * then start over
+ */
+ for (; cl != NULL; cl = hfsc_nextclass(cl)) {
+ if (!is_a_parent_class(cl)) {
+ (void)hfsc_class_destroy(cl);
+ break;
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+hfsc_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ hfsc_purge(hif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+hfsc_purge(struct hfsc_if *hif)
+{
+ struct hfsc_class *cl;
+
+ for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ hif->hif_ifq->ifq_len = 0;
+}
+
+struct hfsc_class *
+hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
+ struct service_curve *fsc, struct service_curve *usc,
+ struct hfsc_class *parent, int qlimit, int flags, int qid)
+{
+ struct hfsc_class *cl, *p;
+ int i, s;
+
+ if (hif->hif_classes >= HFSC_MAX_CLASSES)
+ return (NULL);
+
+#ifndef ALTQ_RED
+ if (flags & HFCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("hfsc_class_create: RED not configured for HFSC!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ MALLOC(cl, struct hfsc_class *, sizeof(struct hfsc_class),
+ M_DEVBUF, M_WAITOK);
+ if (cl == NULL)
+ return (NULL);
+ bzero(cl, sizeof(struct hfsc_class));
+
+ MALLOC(cl->cl_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (cl->cl_q == NULL)
+ goto err_ret;
+ bzero(cl->cl_q, sizeof(class_queue_t));
+
+ cl->cl_actc = actlist_alloc();
+ if (cl->cl_actc == NULL)
+ goto err_ret;
+
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ qlimit(cl->cl_q) = qlimit;
+ qtype(cl->cl_q) = Q_DROPTAIL;
+ qlen(cl->cl_q) = 0;
+ cl->cl_flags = flags;
+#ifdef ALTQ_RED
+ if (flags & (HFCF_RED|HFCF_RIO)) {
+ int red_flags, red_pkttime;
+ u_int m2;
+
+ m2 = 0;
+ if (rsc != NULL && rsc->m2 > m2)
+ m2 = rsc->m2;
+ if (fsc != NULL && fsc->m2 > m2)
+ m2 = fsc->m2;
+ if (usc != NULL && usc->m2 > m2)
+ m2 = usc->m2;
+
+ red_flags = 0;
+ if (flags & HFCF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & HFCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (m2 < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (m2 / 8);
+ if (flags & HFCF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ qlimit(cl->cl_q) * 10/100,
+ qlimit(cl->cl_q) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+
+ if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
+ MALLOC(cl->cl_rsc, struct internal_sc *,
+ sizeof(struct internal_sc), M_DEVBUF, M_WAITOK);
+ if (cl->cl_rsc == NULL)
+ goto err_ret;
+ sc2isc(rsc, cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
+ rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
+ }
+ if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
+ MALLOC(cl->cl_fsc, struct internal_sc *,
+ sizeof(struct internal_sc), M_DEVBUF, M_WAITOK);
+ if (cl->cl_fsc == NULL)
+ goto err_ret;
+ sc2isc(fsc, cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
+ }
+ if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
+ MALLOC(cl->cl_usc, struct internal_sc *,
+ sizeof(struct internal_sc), M_DEVBUF, M_WAITOK);
+ if (cl->cl_usc == NULL)
+ goto err_ret;
+ sc2isc(usc, cl->cl_usc);
+ rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
+ }
+
+ cl->cl_id = hif->hif_classid++;
+ cl->cl_handle = qid;
+ cl->cl_hif = hif;
+ cl->cl_parent = parent;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ hif->hif_classes++;
+
+ /*
+ * find a free slot in the class table. if the slot matching
+ * the lower bits of qid is free, use this slot. otherwise,
+ * use the first free slot.
+ */
+ i = qid % HFSC_MAX_CLASSES;
+ if (hif->hif_class_tbl[i] == NULL)
+ hif->hif_class_tbl[i] = cl;
+ else {
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if (hif->hif_class_tbl[i] == NULL) {
+ hif->hif_class_tbl[i] = cl;
+ break;
+ }
+ if (i == HFSC_MAX_CLASSES) {
+ splx(s);
+ goto err_ret;
+ }
+ }
+
+ if (flags & HFCF_DEFAULTCLASS)
+ hif->hif_defaultclass = cl;
+
+ if (parent == NULL) {
+ /* this is root class */
+ hif->hif_rootclass = cl;
+ } else {
+ /* add this class to the children list of the parent */
+ if ((p = parent->cl_children) == NULL)
+ parent->cl_children = cl;
+ else {
+ while (p->cl_siblings != NULL)
+ p = p->cl_siblings;
+ p->cl_siblings = cl;
+ }
+ }
+ splx(s);
+
+ return (cl);
+
+ err_ret:
+ if (cl->cl_actc != NULL)
+ actlist_destroy(cl->cl_actc);
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ if (cl->cl_fsc != NULL)
+ FREE(cl->cl_fsc, M_DEVBUF);
+ if (cl->cl_rsc != NULL)
+ FREE(cl->cl_rsc, M_DEVBUF);
+ if (cl->cl_usc != NULL)
+ FREE(cl->cl_usc, M_DEVBUF);
+ if (cl->cl_q != NULL)
+ FREE(cl->cl_q, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+hfsc_class_destroy(struct hfsc_class *cl)
+{
+ int i, s;
+
+ if (cl == NULL)
+ return (0);
+
+ if (is_a_parent_class(cl))
+ return (EBUSY);
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+
+#ifdef ALTQ3_COMPAT
+ /* delete filters referencing to this class */
+ acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0);
+#endif /* ALTQ3_COMPAT */
+
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+
+ if (cl->cl_parent == NULL) {
+ /* this is root class */
+ } else {
+ struct hfsc_class *p = cl->cl_parent->cl_children;
+
+ if (p == cl)
+ cl->cl_parent->cl_children = cl->cl_siblings;
+ else do {
+ if (p->cl_siblings == cl) {
+ p->cl_siblings = cl->cl_siblings;
+ break;
+ }
+ } while ((p = p->cl_siblings) != NULL);
+ ASSERT(p != NULL);
+ }
+
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if (cl->cl_hif->hif_class_tbl[i] == cl) {
+ cl->cl_hif->hif_class_tbl[i] = NULL;
+ break;
+ }
+
+ cl->cl_hif->hif_classes--;
+ splx(s);
+
+ actlist_destroy(cl->cl_actc);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+
+ if (cl == cl->cl_hif->hif_rootclass)
+ cl->cl_hif->hif_rootclass = NULL;
+ if (cl == cl->cl_hif->hif_defaultclass)
+ cl->cl_hif->hif_defaultclass = NULL;
+
+ if (cl->cl_usc != NULL)
+ FREE(cl->cl_usc, M_DEVBUF);
+ if (cl->cl_fsc != NULL)
+ FREE(cl->cl_fsc, M_DEVBUF);
+ if (cl->cl_rsc != NULL)
+ FREE(cl->cl_rsc, M_DEVBUF);
+ FREE(cl->cl_q, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+
+ return (0);
+}
+
+/*
+ * hfsc_nextclass returns the next class in the tree.
+ * usage:
+ * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
+ * do_something;
+ */
+static struct hfsc_class *
+hfsc_nextclass(struct hfsc_class *cl)
+{
+ if (cl->cl_children != NULL)
+ cl = cl->cl_children;
+ else if (cl->cl_siblings != NULL)
+ cl = cl->cl_siblings;
+ else {
+ while ((cl = cl->cl_parent) != NULL)
+ if (cl->cl_siblings) {
+ cl = cl->cl_siblings;
+ break;
+ }
+ }
+
+ return (cl);
+}
+
+/*
+ * hfsc_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+ struct hfsc_class *cl;
+ struct m_tag *t;
+ int len;
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+#else
+ printf("altq: packet for %s%d does not have pkthdr\n",
+ ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
+#endif
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = m_tag_find(m, PACKET_TAG_PF_QID, NULL)) != NULL)
+ cl = clh_to_clp(hif, ((struct altq_tag *)(t+1))->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL || is_a_parent_class(cl)) {
+ cl = hif->hif_defaultclass;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (hfsc_addq(cl, m) != 0) {
+ /* drop occurred. mbuf was freed in hfsc_addq. */
+ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+ cl->cl_hif->hif_packets++;
+
+ /* successfully queued. */
+ if (qlen(cl->cl_q) == 1)
+ set_active(cl, m_pktlen(m));
+
+ return (0);
+}
+
+/*
+ * hfsc_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+hfsc_dequeue(struct ifaltq *ifq, int op)
+{
+ struct hfsc_if *hif = (struct hfsc_if *)ifq->altq_disc;
+ struct hfsc_class *cl;
+ struct mbuf *m;
+ int len, next_len;
+ int realtime = 0;
+ u_int64_t cur_time;
+
+ if (hif->hif_packets == 0)
+ /* no packet in the tree */
+ return (NULL);
+
+ cur_time = read_machclk();
+
+ if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
+
+ cl = hif->hif_pollcache;
+ hif->hif_pollcache = NULL;
+ /* check if the class was scheduled by real-time criteria */
+ if (cl->cl_rsc != NULL)
+ realtime = (cl->cl_e <= cur_time);
+ } else {
+ /*
+ * if there are eligible classes, use real-time criteria.
+ * find the class with the minimum deadline among
+ * the eligible classes.
+ */
+ if ((cl = ellist_get_mindl(hif->hif_eligible, cur_time))
+ != NULL) {
+ realtime = 1;
+ } else {
+#ifdef ALTQ_DEBUG
+ int fits = 0;
+#endif
+ /*
+ * use link-sharing criteria
+ * get the class with the minimum vt in the hierarchy
+ */
+ cl = hif->hif_rootclass;
+ while (is_a_parent_class(cl)) {
+
+ cl = actlist_firstfit(cl, cur_time);
+ if (cl == NULL) {
+#ifdef ALTQ_DEBUG
+ if (fits > 0)
+ printf("%d fit but none found\n",fits);
+#endif
+ return (NULL);
+ }
+ /*
+ * update parent's cl_cvtmin.
+ * don't update if the new vt is smaller.
+ */
+ if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+ cl->cl_parent->cl_cvtmin = cl->cl_vt;
+#ifdef ALTQ_DEBUG
+ fits++;
+#endif
+ }
+ }
+
+ if (op == ALTDQ_POLL) {
+ hif->hif_pollcache = cl;
+ m = hfsc_pollq(cl);
+ return (m);
+ }
+ }
+
+ m = hfsc_getq(cl);
+ if (m == NULL)
+ panic("hfsc_dequeue:");
+ len = m_pktlen(m);
+ cl->cl_hif->hif_packets--;
+ IFQ_DEC_LEN(ifq);
+ PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
+
+ update_vf(cl, len, cur_time);
+ if (realtime)
+ cl->cl_cumul += len;
+
+ if (!qempty(cl->cl_q)) {
+ if (cl->cl_rsc != NULL) {
+ /* update ed */
+ next_len = m_pktlen(qhead(cl->cl_q));
+
+ if (realtime)
+ update_ed(cl, next_len);
+ else
+ update_d(cl, next_len);
+ }
+ } else {
+ /* the class becomes passive */
+ set_passive(cl);
+ }
+
+ return (m);
+}
+
+static int
+hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
+ m, cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+ if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & HFCF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(cl->cl_q, m);
+
+ return (0);
+}
+
+static struct mbuf *
+hfsc_getq(struct hfsc_class *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_getq(cl->cl_red, cl->cl_q);
+#endif
+ return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+hfsc_pollq(struct hfsc_class *cl)
+{
+ return qhead(cl->cl_q);
+}
+
+static void
+hfsc_purgeq(struct hfsc_class *cl)
+{
+ struct mbuf *m;
+
+ if (qempty(cl->cl_q))
+ return;
+
+ while ((m = _getq(cl->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
+ m_freem(m);
+ cl->cl_hif->hif_packets--;
+ IFQ_DEC_LEN(cl->cl_hif->hif_ifq);
+ }
+ ASSERT(qlen(cl->cl_q) == 0);
+
+ update_vf(cl, 0, 0); /* remove cl from the actlist */
+ set_passive(cl);
+}
+
+static void
+set_active(struct hfsc_class *cl, int len)
+{
+ if (cl->cl_rsc != NULL)
+ init_ed(cl, len);
+ if (cl->cl_fsc != NULL)
+ init_vf(cl, len);
+
+ cl->cl_stats.period++;
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+ if (cl->cl_rsc != NULL)
+ ellist_remove(cl);
+
+ /*
+ * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
+ * needs to be called explicitly to remove a class from actlist
+ */
+}
+
+static void
+init_ed(struct hfsc_class *cl, int next_len)
+{
+ u_int64_t cur_time;
+
+ cur_time = read_machclk();
+
+ /* update the deadline curve */
+ rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
+
+ /*
+ * update the eligible curve.
+ * for concave, it is equal to the deadline curve.
+ * for convex, it is a linear curve with slope m2.
+ */
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+
+ /* compute e and d */
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ ellist_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, int next_len)
+{
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ ellist_update(cl);
+}
+
+static void
+update_d(struct hfsc_class *cl, int next_len)
+{
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static void
+init_vf(struct hfsc_class *cl, int len)
+{
+ struct hfsc_class *max_cl, *p;
+ u_int64_t vt, f, cur_time;
+ int go_active;
+
+ cur_time = 0;
+ go_active = 1;
+ for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+ if (go_active && cl->cl_nactive++ == 0)
+ go_active = 1;
+ else
+ go_active = 0;
+
+ if (go_active) {
+ max_cl = actlist_last(cl->cl_parent->cl_actc);
+ if (max_cl != NULL) {
+ /*
+ * set vt to the average of the min and max
+ * classes. if the parent's period didn't
+ * change, don't decrease vt of the class.
+ */
+ vt = max_cl->cl_vt;
+ if (cl->cl_parent->cl_cvtmin != 0)
+ vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+ if (cl->cl_parent->cl_vtperiod !=
+ cl->cl_parentperiod || vt > cl->cl_vt)
+ cl->cl_vt = vt;
+ } else {
+ /*
+ * first child for a new parent backlog period.
+ * add parent's cvtmax to vtoff of children
+ * to make a new vt (vtoff + vt) larger than
+ * the vt in the last period for all children.
+ */
+ vt = cl->cl_parent->cl_cvtmax;
+ for (p = cl->cl_parent->cl_children; p != NULL;
+ p = p->cl_siblings)
+ p->cl_vtoff += vt;
+ cl->cl_vt = 0;
+ cl->cl_parent->cl_cvtmax = 0;
+ cl->cl_parent->cl_cvtmin = 0;
+ }
+ cl->cl_initvt = cl->cl_vt;
+
+ /* update the virtual curve */
+ vt = cl->cl_vt + cl->cl_vtoff;
+ rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total);
+ if (cl->cl_virtual.x == vt) {
+ cl->cl_virtual.x -= cl->cl_vtoff;
+ cl->cl_vtoff = 0;
+ }
+ cl->cl_vtadj = 0;
+
+ cl->cl_vtperiod++; /* increment vt period */
+ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+ if (cl->cl_parent->cl_nactive == 0)
+ cl->cl_parentperiod++;
+ cl->cl_f = 0;
+
+ actlist_insert(cl);
+
+ if (cl->cl_usc != NULL) {
+ /* class has upper limit curve */
+ if (cur_time == 0)
+ cur_time = read_machclk();
+
+ /* update the ulimit curve */
+ rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
+ cl->cl_total);
+ /* compute myf */
+ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+ cl->cl_total);
+ cl->cl_myfadj = 0;
+ }
+ }
+
+ if (cl->cl_myf > cl->cl_cfmin)
+ f = cl->cl_myf;
+ else
+ f = cl->cl_cfmin;
+ if (f != cl->cl_f) {
+ cl->cl_f = f;
+ update_cfmin(cl->cl_parent);
+ }
+ }
+}
+
+static void
+update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
+{
+ u_int64_t f, myf_bound, delta;
+ int go_passive;
+
+ go_passive = qempty(cl->cl_q);
+
+ for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+
+ cl->cl_total += len;
+
+ if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
+ continue;
+
+ if (go_passive && --cl->cl_nactive == 0)
+ go_passive = 1;
+ else
+ go_passive = 0;
+
+ if (go_passive) {
+ /* no more active child, going passive */
+
+ /* update cvtmax of the parent class */
+ if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+ cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+ /* remove this class from the vt list */
+ actlist_remove(cl);
+
+ update_cfmin(cl->cl_parent);
+
+ continue;
+ }
+
+ /*
+ * update vt and f
+ */
+ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+ - cl->cl_vtoff + cl->cl_vtadj;
+
+ /*
+ * if vt of the class is smaller than cvtmin,
+ * the class was skipped in the past due to non-fit.
+ * if so, we need to adjust vtadj.
+ */
+ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+ cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+ cl->cl_vt = cl->cl_parent->cl_cvtmin;
+ }
+
+ /* update the vt list */
+ actlist_update(cl);
+
+ if (cl->cl_usc != NULL) {
+ cl->cl_myf = cl->cl_myfadj
+ + rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+
+ /*
+ * if myf lags behind by more than one clock tick
+ * from the current time, adjust myfadj to prevent
+ * a rate-limited class from going greedy.
+ * in a steady state under rate-limiting, myf
+ * fluctuates within one clock tick.
+ */
+ myf_bound = cur_time - machclk_per_tick;
+ if (cl->cl_myf < myf_bound) {
+ delta = cur_time - cl->cl_myf;
+ cl->cl_myfadj += delta;
+ cl->cl_myf += delta;
+ }
+ }
+
+ /* cl_f is max(cl_myf, cl_cfmin) */
+ if (cl->cl_myf > cl->cl_cfmin)
+ f = cl->cl_myf;
+ else
+ f = cl->cl_cfmin;
+ if (f != cl->cl_f) {
+ cl->cl_f = f;
+ update_cfmin(cl->cl_parent);
+ }
+ }
+}
+
+static void
+update_cfmin(struct hfsc_class *cl)
+{
+ struct hfsc_class *p;
+ u_int64_t cfmin;
+
+ if (TAILQ_EMPTY(cl->cl_actc)) {
+ cl->cl_cfmin = 0;
+ return;
+ }
+ cfmin = HT_INFINITY;
+ TAILQ_FOREACH(p, cl->cl_actc, cl_actlist) {
+ if (p->cl_f == 0) {
+ cl->cl_cfmin = 0;
+ return;
+ }
+ if (p->cl_f < cfmin)
+ cfmin = p->cl_f;
+ }
+ cl->cl_cfmin = cfmin;
+}
+
+/*
+ * TAILQ based ellist and actlist implementation
+ * (ion wanted to make a calendar queue based implementation)
+ */
+/*
+ * eligible list holds backlogged classes being sorted by their eligible times.
+ * there is one eligible list per interface.
+ */
+
+static ellist_t *
+ellist_alloc(void)
+{
+ ellist_t *head;
+
+ MALLOC(head, ellist_t *, sizeof(ellist_t), M_DEVBUF, M_WAITOK);
+ TAILQ_INIT(head);
+ return (head);
+}
+
+static void
+ellist_destroy(ellist_t *head)
+{
+ FREE(head, M_DEVBUF);
+}
+
+static void
+ellist_insert(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+ struct hfsc_class *p;
+
+ /* check the last entry first */
+ if ((p = TAILQ_LAST(hif->hif_eligible, _eligible)) == NULL ||
+ p->cl_e <= cl->cl_e) {
+ TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
+ return;
+ }
+
+ TAILQ_FOREACH(p, hif->hif_eligible, cl_ellist) {
+ if (cl->cl_e < p->cl_e) {
+ TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static void
+ellist_remove(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+
+ TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+}
+
+static void
+ellist_update(struct hfsc_class *cl)
+{
+ struct hfsc_if *hif = cl->cl_hif;
+ struct hfsc_class *p, *last;
+
+ /*
+ * the eligible time of a class increases monotonically.
+ * if the next entry has a larger eligible time, nothing to do.
+ */
+ p = TAILQ_NEXT(cl, cl_ellist);
+ if (p == NULL || cl->cl_e <= p->cl_e)
+ return;
+
+ /* check the last entry */
+ last = TAILQ_LAST(hif->hif_eligible, _eligible);
+ ASSERT(last != NULL);
+ if (last->cl_e <= cl->cl_e) {
+ TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
+ return;
+ }
+
+ /*
+ * the new position must be between the next entry
+ * and the last entry
+ */
+ while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
+ if (cl->cl_e < p->cl_e) {
+ TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
+ TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+struct hfsc_class *
+ellist_get_mindl(ellist_t *head, u_int64_t cur_time)
+{
+ struct hfsc_class *p, *cl = NULL;
+
+ TAILQ_FOREACH(p, head, cl_ellist) {
+ if (p->cl_e > cur_time)
+ break;
+ if (cl == NULL || p->cl_d < cl->cl_d)
+ cl = p;
+ }
+ return (cl);
+}
+
+/*
+ * active children list holds backlogged child classes being sorted
+ * by their virtual time.
+ * each intermediate class has one active children list.
+ */
+static actlist_t *
+actlist_alloc(void)
+{
+ actlist_t *head;
+
+ MALLOC(head, actlist_t *, sizeof(actlist_t), M_DEVBUF, M_WAITOK);
+ TAILQ_INIT(head);
+ return (head);
+}
+
+static void
+actlist_destroy(actlist_t *head)
+{
+ FREE(head, M_DEVBUF);
+}
+static void
+actlist_insert(struct hfsc_class *cl)
+{
+ struct hfsc_class *p;
+
+ /* check the last entry first */
+ if ((p = TAILQ_LAST(cl->cl_parent->cl_actc, _active)) == NULL
+ || p->cl_vt <= cl->cl_vt) {
+ TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
+ return;
+ }
+
+ TAILQ_FOREACH(p, cl->cl_parent->cl_actc, cl_actlist) {
+ if (cl->cl_vt < p->cl_vt) {
+ TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static void
+actlist_remove(struct hfsc_class *cl)
+{
+ TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+}
+
+static void
+actlist_update(struct hfsc_class *cl)
+{
+ struct hfsc_class *p, *last;
+
+ /*
+ * the virtual time of a class increases monotonically during its
+ * backlogged period.
+ * if the next entry has a larger virtual time, nothing to do.
+ */
+ p = TAILQ_NEXT(cl, cl_actlist);
+ if (p == NULL || cl->cl_vt < p->cl_vt)
+ return;
+
+ /* check the last entry */
+ last = TAILQ_LAST(cl->cl_parent->cl_actc, _active);
+ ASSERT(last != NULL);
+ if (last->cl_vt <= cl->cl_vt) {
+ TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
+ return;
+ }
+
+ /*
+ * the new position must be between the next entry
+ * and the last entry
+ */
+ while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
+ if (cl->cl_vt < p->cl_vt) {
+ TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
+ TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
+ return;
+ }
+ }
+ ASSERT(0); /* should not reach here */
+}
+
+static struct hfsc_class *
+actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
+{
+ struct hfsc_class *p;
+
+ TAILQ_FOREACH(p, cl->cl_actc, cl_actlist) {
+ if (p->cl_f <= cur_time)
+ return (p);
+ }
+ return (NULL);
+}
+
+/*
+ * service curve support functions
+ *
+ * external service curve parameters
+ * m: bits/sec
+ * d: msec
+ * internal service curve parameters
+ * sm: (bytes/tsc_interval) << SM_SHIFT
+ * ism: (tsc_count/byte) << ISM_SHIFT
+ * dx: tsc_count
+ *
+ * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
+ * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
+ * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
+ * digits in decimal using the following table.
+ *
+ * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
+ * ----------+-------------------------------------------------------
+ * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
+ * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
+ * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
+ *
+ * nsec/byte 80000 8000 800 80 8
+ * ism(500MHz) 40000 4000 400 40 4
+ * ism(200MHz) 16000 1600 160 16 1.6
+ */
+#define SM_SHIFT 24
+#define ISM_SHIFT 10
+
+#define SM_MASK ((1LL << SM_SHIFT) - 1)
+#define ISM_MASK ((1LL << ISM_SHIFT) - 1)
+
+static __inline u_int64_t
+seg_x2y(u_int64_t x, u_int64_t sm)
+{
+ u_int64_t y;
+
+ /*
+ * compute
+ * y = x * sm >> SM_SHIFT
+ * but divide it for the upper and lower bits to avoid overflow
+ */
+ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+ return (y);
+}
+
+static __inline u_int64_t
+seg_y2x(u_int64_t y, u_int64_t ism)
+{
+ u_int64_t x;
+
+ if (y == 0)
+ x = 0;
+ else if (ism == HT_INFINITY)
+ x = HT_INFINITY;
+ else {
+ x = (y >> ISM_SHIFT) * ism
+ + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+ }
+ return (x);
+}
+
+static __inline u_int64_t
+m2sm(u_int m)
+{
+ u_int64_t sm;
+
+ sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
+ return (sm);
+}
+
+static __inline u_int64_t
+m2ism(u_int m)
+{
+ u_int64_t ism;
+
+ if (m == 0)
+ ism = HT_INFINITY;
+ else
+ ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
+ return (ism);
+}
+
+static __inline u_int64_t
+d2dx(u_int d)
+{
+ u_int64_t dx;
+
+ dx = ((u_int64_t)d * machclk_freq) / 1000;
+ return (dx);
+}
+
+static u_int
+sm2m(u_int64_t sm)
+{
+ u_int64_t m;
+
+ m = (sm * 8 * machclk_freq) >> SM_SHIFT;
+ return ((u_int)m);
+}
+
+static u_int
+dx2d(u_int64_t dx)
+{
+ u_int64_t d;
+
+ d = dx * 1000 / machclk_freq;
+ return ((u_int)d);
+}
+
+static void
+sc2isc(struct service_curve *sc, struct internal_sc *isc)
+{
+ isc->sm1 = m2sm(sc->m1);
+ isc->ism1 = m2ism(sc->m1);
+ isc->dx = d2dx(sc->d);
+ isc->dy = seg_x2y(isc->dx, isc->sm1);
+ isc->sm2 = m2sm(sc->m2);
+ isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x,
+ u_int64_t y)
+{
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->sm1 = isc->sm1;
+ rtsc->ism1 = isc->ism1;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ rtsc->sm2 = isc->sm2;
+ rtsc->ism2 = isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u_int64_t
+rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
+{
+ u_int64_t x;
+
+ if (y < rtsc->y)
+ x = rtsc->x;
+ else if (y <= rtsc->y + rtsc->dy) {
+ /* x belongs to the 1st segment */
+ if (rtsc->dy == 0)
+ x = rtsc->x + rtsc->dx;
+ else
+ x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+ } else {
+ /* x belongs to the 2nd segment */
+ x = rtsc->x + rtsc->dx
+ + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+ }
+ return (x);
+}
+
+static u_int64_t
+rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
+{
+ u_int64_t y;
+
+ if (x <= rtsc->x)
+ y = rtsc->y;
+ else if (x <= rtsc->x + rtsc->dx)
+ /* y belongs to the 1st segment */
+ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+ else
+ /* y belongs to the 2nd segment */
+ y = rtsc->y + rtsc->dy
+ + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+ return (y);
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
+ u_int64_t y)
+{
+ u_int64_t y1, y2, dx, dy;
+
+ if (isc->sm1 <= isc->sm2) {
+ /* service curve is convex */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 < y)
+ /* the current rtsc is smaller */
+ return;
+ rtsc->x = x;
+ rtsc->y = y;
+ return;
+ }
+
+ /*
+ * service curve is concave
+ * compute the two y values of the current rtsc
+ * y1: at x
+ * y2: at (x + dx)
+ */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 <= y) {
+ /* rtsc is below isc, no change to rtsc */
+ return;
+ }
+
+ y2 = rtsc_x2y(rtsc, x + isc->dx);
+ if (y2 >= y + isc->dy) {
+ /* rtsc is above isc, replace rtsc by isc */
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ return;
+ }
+
+ /*
+ * the two curves intersect
+ * compute the offsets (dx, dy) using the reverse
+ * function of seg_x2y()
+ * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+ */
+ dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
+ /*
+ * check if (x, y1) belongs to the 1st segment of rtsc.
+ * if so, add the offset.
+ */
+ if (rtsc->x + rtsc->dx > x)
+ dx += rtsc->x + rtsc->dx - x;
+ dy = seg_x2y(dx, isc->sm1);
+
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = dx;
+ rtsc->dy = dy;
+ return;
+}
+
+static void
+get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
+{
+ sp->class_id = cl->cl_id;
+ sp->class_handle = cl->cl_handle;
+
+ if (cl->cl_rsc != NULL) {
+ sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
+ sp->rsc.d = dx2d(cl->cl_rsc->dx);
+ sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
+ } else {
+ sp->rsc.m1 = 0;
+ sp->rsc.d = 0;
+ sp->rsc.m2 = 0;
+ }
+ if (cl->cl_fsc != NULL) {
+ sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
+ sp->fsc.d = dx2d(cl->cl_fsc->dx);
+ sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
+ } else {
+ sp->fsc.m1 = 0;
+ sp->fsc.d = 0;
+ sp->fsc.m2 = 0;
+ }
+ if (cl->cl_usc != NULL) {
+ sp->usc.m1 = sm2m(cl->cl_usc->sm1);
+ sp->usc.d = dx2d(cl->cl_usc->dx);
+ sp->usc.m2 = sm2m(cl->cl_usc->sm2);
+ } else {
+ sp->usc.m1 = 0;
+ sp->usc.d = 0;
+ sp->usc.m2 = 0;
+ }
+
+ sp->total = cl->cl_total;
+ sp->cumul = cl->cl_cumul;
+
+ sp->d = cl->cl_d;
+ sp->e = cl->cl_e;
+ sp->vt = cl->cl_vt;
+ sp->f = cl->cl_f;
+
+ sp->initvt = cl->cl_initvt;
+ sp->vtperiod = cl->cl_vtperiod;
+ sp->parentperiod = cl->cl_parentperiod;
+ sp->nactive = cl->cl_nactive;
+ sp->vtoff = cl->cl_vtoff;
+ sp->cvtmax = cl->cl_cvtmax;
+ sp->myf = cl->cl_myf;
+ sp->cfmin = cl->cl_cfmin;
+ sp->cvtmin = cl->cl_cvtmin;
+ sp->myfadj = cl->cl_myfadj;
+ sp->vtadj = cl->cl_vtadj;
+
+ sp->cur_time = read_machclk();
+ sp->machclk_freq = machclk_freq;
+
+ sp->qlength = qlen(cl->cl_q);
+ sp->qlimit = qlimit(cl->cl_q);
+ sp->xmit_cnt = cl->cl_stats.xmit_cnt;
+ sp->drop_cnt = cl->cl_stats.drop_cnt;
+ sp->period = cl->cl_stats.period;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct hfsc_class *
+clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
+{
+ int i;
+ struct hfsc_class *cl;
+
+ if (chandle == 0)
+ return (NULL);
+ /*
+ * first, try optimistically the slot matching the lower bits of
+ * the handle. if it fails, do the linear table search.
+ */
+ i = chandle % HFSC_MAX_CLASSES;
+ if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
+ return (cl);
+ for (i = 0; i < HFSC_MAX_CLASSES; i++)
+ if ((cl = hif->hif_class_tbl[i]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+ return (NULL);
+}
+
+#ifdef ALTQ3_COMPAT
+static struct hfsc_if *
+hfsc_attach(ifq, bandwidth)
+ struct ifaltq *ifq;
+ u_int bandwidth;
+{
+ struct hfsc_if *hif;
+
+ MALLOC(hif, struct hfsc_if *, sizeof(struct hfsc_if),
+ M_DEVBUF, M_WAITOK);
+ if (hif == NULL)
+ return (NULL);
+ bzero(hif, sizeof(struct hfsc_if));
+
+ hif->hif_eligible = ellist_alloc();
+ if (hif->hif_eligible == NULL) {
+ FREE(hif, M_DEVBUF);
+ return NULL;
+ }
+
+ hif->hif_ifq = ifq;
+
+ /* add this state to the hfsc list */
+ hif->hif_next = hif_list;
+ hif_list = hif;
+
+ return (hif);
+}
+
+static int
+hfsc_detach(hif)
+ struct hfsc_if *hif;
+{
+ (void)hfsc_clear_interface(hif);
+ (void)hfsc_class_destroy(hif->hif_rootclass);
+
+ /* remove this interface from the hif list */
+ if (hif_list == hif)
+ hif_list = hif->hif_next;
+ else {
+ struct hfsc_if *h;
+
+ for (h = hif_list; h != NULL; h = h->hif_next)
+ if (h->hif_next == hif) {
+ h->hif_next = hif->hif_next;
+ break;
+ }
+ ASSERT(h != NULL);
+ }
+
+ ellist_destroy(hif->hif_eligible);
+
+ FREE(hif, M_DEVBUF);
+
+ return (0);
+}
+
+static int
+hfsc_class_modify(cl, rsc, fsc, usc)
+ struct hfsc_class *cl;
+ struct service_curve *rsc, *fsc, *usc;
+{
+ struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp;
+ u_int64_t cur_time;
+ int s;
+
+ rsc_tmp = fsc_tmp = usc_tmp = NULL;
+ if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) &&
+ cl->cl_rsc == NULL) {
+ MALLOC(rsc_tmp, struct internal_sc *,
+ sizeof(struct internal_sc), M_DEVBUF, M_WAITOK);
+ if (rsc_tmp == NULL)
+ return (ENOMEM);
+ }
+ if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) &&
+ cl->cl_fsc == NULL) {
+ MALLOC(fsc_tmp, struct internal_sc *,
+ sizeof(struct internal_sc), M_DEVBUF, M_WAITOK);
+ if (fsc_tmp == NULL)
+ return (ENOMEM);
+ }
+ if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) &&
+ cl->cl_usc == NULL) {
+ MALLOC(usc_tmp, struct internal_sc *,
+ sizeof(struct internal_sc), M_DEVBUF, M_WAITOK);
+ if (usc_tmp == NULL)
+ return (ENOMEM);
+ }
+
+ cur_time = read_machclk();
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+
+ if (rsc != NULL) {
+ if (rsc->m1 == 0 && rsc->m2 == 0) {
+ if (cl->cl_rsc != NULL) {
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ FREE(cl->cl_rsc, M_DEVBUF);
+ cl->cl_rsc = NULL;
+ }
+ } else {
+ if (cl->cl_rsc == NULL)
+ cl->cl_rsc = rsc_tmp;
+ sc2isc(rsc, cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time,
+ cl->cl_cumul);
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+ }
+ }
+
+ if (fsc != NULL) {
+ if (fsc->m1 == 0 && fsc->m2 == 0) {
+ if (cl->cl_fsc != NULL) {
+ if (!qempty(cl->cl_q))
+ hfsc_purgeq(cl);
+ FREE(cl->cl_fsc, M_DEVBUF);
+ cl->cl_fsc = NULL;
+ }
+ } else {
+ if (cl->cl_fsc == NULL)
+ cl->cl_fsc = fsc_tmp;
+ sc2isc(fsc, cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt,
+ cl->cl_total);
+ }
+ }
+
+ if (usc != NULL) {
+ if (usc->m1 == 0 && usc->m2 == 0) {
+ if (cl->cl_usc != NULL) {
+ FREE(cl->cl_usc, M_DEVBUF);
+ cl->cl_usc = NULL;
+ cl->cl_myf = 0;
+ }
+ } else {
+ if (cl->cl_usc == NULL)
+ cl->cl_usc = usc_tmp;
+ sc2isc(usc, cl->cl_usc);
+ rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time,
+ cl->cl_total);
+ }
+ }
+
+ if (!qempty(cl->cl_q)) {
+ if (cl->cl_rsc != NULL)
+ update_ed(cl, m_pktlen(qhead(cl->cl_q)));
+ if (cl->cl_fsc != NULL)
+ update_vf(cl, 0, cur_time);
+ /* is this enough? */
+ }
+
+ splx(s);
+
+ return (0);
+}
+
+/*
+ * hfsc device interface
+ */
+int
+hfscopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ if (machclk_freq == 0)
+ init_machclk();
+
+ if (machclk_freq == 0) {
+ printf("hfsc: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+hfscclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct hfsc_if *hif;
+ int err, error = 0;
+
+ while ((hif = hif_list) != NULL) {
+ /* destroy all */
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ altq_disable(hif->hif_ifq);
+
+ err = altq_detach(hif->hif_ifq);
+ if (err == 0)
+ err = hfsc_detach(hif);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+hfscioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct hfsc_if *hif;
+ struct hfsc_interface *ifacep;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case HFSC_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case HFSC_IF_ATTACH:
+ error = hfsccmd_if_attach((struct hfsc_attach *)addr);
+ break;
+
+ case HFSC_IF_DETACH:
+ error = hfsccmd_if_detach((struct hfsc_interface *)addr);
+ break;
+
+ case HFSC_ENABLE:
+ case HFSC_DISABLE:
+ case HFSC_CLEAR_HIERARCHY:
+ ifacep = (struct hfsc_interface *)addr;
+ if ((hif = altq_lookup(ifacep->hfsc_ifname,
+ ALTQT_HFSC)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+
+ case HFSC_ENABLE:
+ if (hif->hif_defaultclass == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("hfsc: no default class\n");
+#endif
+ error = EINVAL;
+ break;
+ }
+ error = altq_enable(hif->hif_ifq);
+ break;
+
+ case HFSC_DISABLE:
+ error = altq_disable(hif->hif_ifq);
+ break;
+
+ case HFSC_CLEAR_HIERARCHY:
+ hfsc_clear_interface(hif);
+ break;
+ }
+ break;
+
+ case HFSC_ADD_CLASS:
+ error = hfsccmd_add_class((struct hfsc_add_class *)addr);
+ break;
+
+ case HFSC_DEL_CLASS:
+ error = hfsccmd_delete_class((struct hfsc_delete_class *)addr);
+ break;
+
+ case HFSC_MOD_CLASS:
+ error = hfsccmd_modify_class((struct hfsc_modify_class *)addr);
+ break;
+
+ case HFSC_ADD_FILTER:
+ error = hfsccmd_add_filter((struct hfsc_add_filter *)addr);
+ break;
+
+ case HFSC_DEL_FILTER:
+ error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr);
+ break;
+
+ case HFSC_GETSTATS:
+ error = hfsccmd_class_stats((struct hfsc_class_stats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+hfsccmd_if_attach(ap)
+ struct hfsc_attach *ap;
+{
+ struct hfsc_if *hif;
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL)
+ return (ENXIO);
+
+ if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL)
+ return (ENOMEM);
+
+ /*
+ * set HFSC to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif,
+ hfsc_enqueue, hfsc_dequeue, hfsc_request,
+ &hif->hif_classifier, acc_classify)) != 0)
+ (void)hfsc_detach(hif);
+
+ return (error);
+}
+
+static int
+hfsccmd_if_detach(ap)
+ struct hfsc_interface *ap;
+{
+ struct hfsc_if *hif;
+ int error;
+
+ if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if (ALTQ_IS_ENABLED(hif->hif_ifq))
+ altq_disable(hif->hif_ifq);
+
+ if ((error = altq_detach(hif->hif_ifq)))
+ return (error);
+
+ return hfsc_detach(hif);
+}
+
+static int
+hfsccmd_add_class(ap)
+ struct hfsc_add_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl, *parent;
+ int i;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if (ap->parent_handle == HFSC_NULLCLASS_HANDLE &&
+ hif->hif_rootclass == NULL)
+ parent = NULL;
+ else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL)
+ return (EINVAL);
+
+ /* assign a class handle (use a free slot number for now) */
+ for (i = 1; i < HFSC_MAX_CLASSES; i++)
+ if (hif->hif_class_tbl[i] == NULL)
+ break;
+ if (i == HFSC_MAX_CLASSES)
+ return (EBUSY);
+
+ if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL,
+ parent, ap->qlimit, ap->flags, i)) == NULL)
+ return (ENOMEM);
+
+ /* return a class handle to the user */
+ ap->class_handle = i;
+
+ return (0);
+}
+
+static int
+hfsccmd_delete_class(ap)
+ struct hfsc_delete_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return hfsc_class_destroy(cl);
+}
+
+static int
+hfsccmd_modify_class(ap)
+ struct hfsc_modify_class *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct service_curve *rsc = NULL;
+ struct service_curve *fsc = NULL;
+ struct service_curve *usc = NULL;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ if (ap->sctype & HFSC_REALTIMESC)
+ rsc = &ap->service_curve;
+ if (ap->sctype & HFSC_LINKSHARINGSC)
+ fsc = &ap->service_curve;
+ if (ap->sctype & HFSC_UPPERLIMITSC)
+ usc = &ap->service_curve;
+
+ return hfsc_class_modify(cl, rsc, fsc, usc);
+}
+
+static int
+hfsccmd_add_filter(ap)
+ struct hfsc_add_filter *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ if (is_a_parent_class(cl)) {
+#ifdef ALTQ_DEBUG
+ printf("hfsccmd_add_filter: not a leaf class!\n");
+#endif
+ return (EINVAL);
+ }
+
+ return acc_add_filter(&hif->hif_classifier, &ap->filter,
+ cl, &ap->filter_handle);
+}
+
+static int
+hfsccmd_delete_filter(ap)
+ struct hfsc_delete_filter *ap;
+{
+ struct hfsc_if *hif;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&hif->hif_classifier,
+ ap->filter_handle);
+}
+
+static int
+hfsccmd_class_stats(ap)
+ struct hfsc_class_stats *ap;
+{
+ struct hfsc_if *hif;
+ struct hfsc_class *cl;
+ struct hfsc_classstats stats, *usp;
+ int n, nclasses, error;
+
+ if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
+ return (EBADF);
+
+ ap->cur_time = read_machclk();
+ ap->machclk_freq = machclk_freq;
+ ap->hif_classes = hif->hif_classes;
+ ap->hif_packets = hif->hif_packets;
+
+ /* skip the first N classes in the tree */
+ nclasses = ap->nskip;
+ for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses;
+ cl = hfsc_nextclass(cl), n++)
+ ;
+ if (n != nclasses)
+ return (EINVAL);
+
+ /* then, read the next N classes in the tree */
+ nclasses = ap->nclasses;
+ usp = ap->stats;
+ for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) {
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+
+ ap->nclasses = n;
+
+ return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw hfsc_sw =
+ {"hfsc", hfscopen, hfscclose, hfscioctl};
+
+ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw);
+MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_HFSC */
diff --git a/sys/contrib/altq/altq/altq_hfsc.h b/sys/contrib/altq/altq/altq_hfsc.h
new file mode 100644
index 000000000000..91ba3d184a46
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_hfsc.h
@@ -0,0 +1,320 @@
+/* $KAME: altq_hfsc.h,v 1.12 2003/12/05 05:40:46 kjc Exp $ */
+
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+#ifndef _ALTQ_ALTQ_HFSC_H_
+#define _ALTQ_ALTQ_HFSC_H_
+
+#include <altq/altq.h>
+#include <altq/altq_classq.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct service_curve {
+ u_int m1; /* slope of the first segment in bits/sec */
+ u_int d; /* the x-projection of the first segment in msec */
+ u_int m2; /* slope of the second segment in bits/sec */
+};
+
+/* special class handles */
+#define HFSC_NULLCLASS_HANDLE 0
+#define HFSC_MAX_CLASSES 64
+
+/* hfsc class flags */
+#define HFCF_RED 0x0001 /* use RED */
+#define HFCF_ECN 0x0002 /* use RED/ECN */
+#define HFCF_RIO 0x0004 /* use RIO */
+#define HFCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define HFCF_DEFAULTCLASS 0x1000 /* default class */
+
+/* service curve types */
+#define HFSC_REALTIMESC 1
+#define HFSC_LINKSHARINGSC 2
+#define HFSC_UPPERLIMITSC 4
+#define HFSC_DEFAULTSC (HFSC_REALTIMESC|HFSC_LINKSHARINGSC)
+
+struct hfsc_classstats {
+ u_int class_id;
+ u_int32_t class_handle;
+ struct service_curve rsc;
+ struct service_curve fsc;
+ struct service_curve usc; /* upper limit service curve */
+
+ u_int64_t total; /* total work in bytes */
+ u_int64_t cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t d; /* deadline */
+ u_int64_t e; /* eligible time */
+ u_int64_t vt; /* virtual time */
+ u_int64_t f; /* fit time for upper-limit */
+
+ /* info helpful for debugging */
+ u_int64_t initvt; /* init virtual time */
+ u_int64_t vtoff; /* cl_vt_ipoff */
+ u_int64_t cvtmax; /* cl_maxvt */
+ u_int64_t myf; /* cl_myf */
+ u_int64_t cfmin; /* cl_mincf */
+ u_int64_t cvtmin; /* cl_mincvt */
+ u_int64_t myfadj; /* cl_myfadj */
+ u_int64_t vtadj; /* cl_vtadj */
+ u_int64_t cur_time;
+ u_int32_t machclk_freq;
+
+ u_int qlength;
+ u_int qlimit;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+
+ u_int vtperiod; /* vt period sequence no */
+ u_int parentperiod; /* parent's vt period seqno */
+ int nactive; /* number of active children */
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3];
+};
+
+#ifdef ALTQ3_COMPAT
+struct hfsc_interface {
+ char hfsc_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+};
+
+struct hfsc_attach {
+ struct hfsc_interface iface;
+ u_int bandwidth; /* link bandwidth in bits/sec */
+};
+
+struct hfsc_add_class {
+ struct hfsc_interface iface;
+ u_int32_t parent_handle;
+ struct service_curve service_curve;
+ int qlimit;
+ int flags;
+
+ u_int32_t class_handle; /* return value */
+};
+
+struct hfsc_delete_class {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+};
+
+struct hfsc_modify_class {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+ struct service_curve service_curve;
+ int sctype;
+};
+
+struct hfsc_add_filter {
+ struct hfsc_interface iface;
+ u_int32_t class_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct hfsc_delete_filter {
+ struct hfsc_interface iface;
+ u_long filter_handle;
+};
+
+struct hfsc_class_stats {
+ struct hfsc_interface iface;
+ int nskip; /* skip # of classes */
+ int nclasses; /* # of class stats (WR) */
+ u_int64_t cur_time; /* current time */
+ u_int32_t machclk_freq; /* machine clock frequency */
+ u_int hif_classes; /* # of classes in the tree */
+ u_int hif_packets; /* # of packets in the tree */
+ struct hfsc_classstats *stats; /* pointer to stats array */
+};
+
+#define HFSC_IF_ATTACH _IOW('Q', 1, struct hfsc_attach)
+#define HFSC_IF_DETACH _IOW('Q', 2, struct hfsc_interface)
+#define HFSC_ENABLE _IOW('Q', 3, struct hfsc_interface)
+#define HFSC_DISABLE _IOW('Q', 4, struct hfsc_interface)
+#define HFSC_CLEAR_HIERARCHY _IOW('Q', 5, struct hfsc_interface)
+#define HFSC_ADD_CLASS _IOWR('Q', 7, struct hfsc_add_class)
+#define HFSC_DEL_CLASS _IOW('Q', 8, struct hfsc_delete_class)
+#define HFSC_MOD_CLASS _IOW('Q', 9, struct hfsc_modify_class)
+#define HFSC_ADD_FILTER _IOWR('Q', 10, struct hfsc_add_filter)
+#define HFSC_DEL_FILTER _IOW('Q', 11, struct hfsc_delete_filter)
+#define HFSC_GETSTATS _IOWR('Q', 12, struct hfsc_class_stats)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+/*
+ * kernel internal service curve representation
+ * coordinates are given by 64 bit unsigned integers.
+ * x-axis: unit is clock count. for the intel x86 architecture,
+ * the raw Pentium TSC (Timestamp Counter) value is used.
+ * virtual time is also calculated in this time scale.
+ * y-axis: unit is byte.
+ *
+ * the service curve parameters are converted to the internal
+ * representation.
+ * the slope values are scaled to avoid overflow.
+ * the inverse slope values as well as the y-projection of the 1st
+ * segment are kept in order to to avoid 64-bit divide operations
+ * that are expensive on 32-bit architectures.
+ *
+ * note: Intel Pentium TSC never wraps around in several thousands of years.
+ * x-axis doesn't wrap around for 1089 years with 1GHz clock.
+ * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
+ */
+
+/* kernel internal representation of a service curve */
+struct internal_sc {
+ u_int64_t sm1; /* scaled slope of the 1st segment */
+ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
+ u_int64_t dx; /* the x-projection of the 1st segment */
+ u_int64_t dy; /* the y-projection of the 1st segment */
+ u_int64_t sm2; /* scaled slope of the 2nd segment */
+ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc {
+ u_int64_t x; /* current starting position on x-axis */
+ u_int64_t y; /* current starting position on x-axis */
+ u_int64_t sm1; /* scaled slope of the 1st segment */
+ u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
+ u_int64_t dx; /* the x-projection of the 1st segment */
+ u_int64_t dy; /* the y-projection of the 1st segment */
+ u_int64_t sm2; /* scaled slope of the 2nd segment */
+ u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+/* for TAILQ based ellist and actlist implementation */
+struct hfsc_class;
+typedef TAILQ_HEAD(_eligible, hfsc_class) ellist_t;
+typedef TAILQ_ENTRY(hfsc_class) elentry_t;
+typedef TAILQ_HEAD(_active, hfsc_class) actlist_t;
+typedef TAILQ_ENTRY(hfsc_class) actentry_t;
+#define ellist_first(s) TAILQ_FIRST(s)
+#define actlist_first(s) TAILQ_FIRST(s)
+#define actlist_last(s) TAILQ_LAST(s, _active)
+
+struct hfsc_class {
+ u_int cl_id; /* class id (just for debug) */
+ u_int32_t cl_handle; /* class handle */
+ struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */
+ int cl_flags; /* misc flags */
+
+ struct hfsc_class *cl_parent; /* parent class */
+ struct hfsc_class *cl_siblings; /* sibling classes */
+ struct hfsc_class *cl_children; /* child classes */
+
+ class_queue_t *cl_q; /* class queue structure */
+ struct red *cl_red; /* RED state */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ u_int64_t cl_total; /* total work in bytes */
+ u_int64_t cl_cumul; /* cumulative work in bytes
+ done by real-time criteria */
+ u_int64_t cl_d; /* deadline */
+ u_int64_t cl_e; /* eligible time */
+ u_int64_t cl_vt; /* virtual time */
+ u_int64_t cl_f; /* time when this class will fit for
+ link-sharing, max(myf, cfmin) */
+ u_int64_t cl_myf; /* my fit-time (as calculated from this
+ class's own upperlimit curve) */
+ u_int64_t cl_myfadj; /* my fit-time adjustment
+ (to cancel history dependence) */
+ u_int64_t cl_cfmin; /* earliest children's fit-time (used
+ with cl_myf to obtain cl_f) */
+ u_int64_t cl_cvtmin; /* minimal virtual time among the
+ children fit for link-sharing
+ (monotonic within a period) */
+ u_int64_t cl_vtadj; /* intra-period cumulative vt
+ adjustment */
+ u_int64_t cl_vtoff; /* inter-period cumulative vt offset */
+ u_int64_t cl_cvtmax; /* max child's vt in the last period */
+
+ u_int64_t cl_initvt; /* init virtual time (for debugging) */
+
+ struct internal_sc *cl_rsc; /* internal real-time service curve */
+ struct internal_sc *cl_fsc; /* internal fair service curve */
+ struct internal_sc *cl_usc; /* internal upperlimit service curve */
+ struct runtime_sc cl_deadline; /* deadline curve */
+ struct runtime_sc cl_eligible; /* eligible curve */
+ struct runtime_sc cl_virtual; /* virtual curve */
+ struct runtime_sc cl_ulimit; /* upperlimit curve */
+
+ u_int cl_vtperiod; /* vt period sequence no */
+ u_int cl_parentperiod; /* parent's vt period seqno */
+ int cl_nactive; /* number of active children */
+ actlist_t *cl_actc; /* active children list */
+
+ actentry_t cl_actlist; /* active children list entry */
+ elentry_t cl_ellist; /* eligible list entry */
+
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int period;
+ } cl_stats;
+};
+
+/*
+ * hfsc interface state
+ */
+struct hfsc_if {
+ struct hfsc_if *hif_next; /* interface state list */
+ struct ifaltq *hif_ifq; /* backpointer to ifaltq */
+ struct hfsc_class *hif_rootclass; /* root class */
+ struct hfsc_class *hif_defaultclass; /* default class */
+ struct hfsc_class *hif_class_tbl[HFSC_MAX_CLASSES];
+ struct hfsc_class *hif_pollcache; /* cache for poll operation */
+
+ u_int hif_classes; /* # of classes in the tree */
+ u_int hif_packets; /* # of packets in the tree */
+ u_int hif_classid; /* class id sequence number */
+
+ ellist_t *hif_eligible; /* eligible list */
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier hif_classifier;
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_HFSC_H_ */
diff --git a/sys/contrib/altq/altq/altq_priq.c b/sys/contrib/altq/altq/altq_priq.c
new file mode 100644
index 000000000000..7211277c4b5a
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_priq.c
@@ -0,0 +1,1036 @@
+/* $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $ */
+/*
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * priority queue
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#ifdef ALTQ_PRIQ /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <net/pfvar.h>
+#include <altq/altq.h>
+#include <altq/altq_conf.h>
+#include <altq/altq_priq.h>
+
+/*
+ * function prototypes
+ */
+#ifdef ALTQ3_COMPAT
+static struct priq_if *priq_attach(struct ifaltq *, u_int);
+static int priq_detach(struct priq_if *);
+#endif
+static int priq_clear_interface(struct priq_if *);
+static int priq_request(struct ifaltq *, int, void *);
+static void priq_purge(struct priq_if *);
+static struct priq_class *priq_class_create(struct priq_if *, int, int, int,
+ int);
+static int priq_class_destroy(struct priq_class *);
+static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *priq_dequeue(struct ifaltq *, int);
+
+static int priq_addq(struct priq_class *, struct mbuf *);
+static struct mbuf *priq_getq(struct priq_class *);
+static struct mbuf *priq_pollq(struct priq_class *);
+static void priq_purgeq(struct priq_class *);
+
+#ifdef ALTQ3_COMPAT
+static int priqcmd_if_attach(struct priq_interface *);
+static int priqcmd_if_detach(struct priq_interface *);
+static int priqcmd_add_class(struct priq_add_class *);
+static int priqcmd_delete_class(struct priq_delete_class *);
+static int priqcmd_modify_class(struct priq_modify_class *);
+static int priqcmd_add_filter(struct priq_add_filter *);
+static int priqcmd_delete_filter(struct priq_delete_filter *);
+static int priqcmd_class_stats(struct priq_class_stats *);
+#endif /* ALTQ3_COMPAT */
+
+static void get_class_stats(struct priq_classstats *, struct priq_class *);
+static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t);
+
+#ifdef ALTQ3_COMPAT
+altqdev_decl(priq);
+
+/* pif_list keeps all priq_if's allocated. */
+static struct priq_if *pif_list = NULL;
+#endif /* ALTQ3_COMPAT */
+
+int
+priq_pfattach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error;
+
+ if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
+ return (EINVAL);
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc,
+ priq_enqueue, priq_dequeue, priq_request, NULL, NULL);
+ splx(s);
+ return (error);
+}
+
+int
+priq_add_altq(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+ if (!ALTQ_IS_READY(&ifp->if_snd))
+ return (ENODEV);
+
+ MALLOC(pif, struct priq_if *, sizeof(struct priq_if),
+ M_DEVBUF, M_WAITOK);
+ if (pif == NULL)
+ return (ENOMEM);
+ bzero(pif, sizeof(struct priq_if));
+ pif->pif_bandwidth = a->ifbandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = &ifp->if_snd;
+
+ /* keep the state in pf_altq */
+ a->altq_disc = pif;
+
+ return (0);
+}
+
+int
+priq_remove_altq(struct pf_altq *a)
+{
+ struct priq_if *pif;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+ a->altq_disc = NULL;
+
+ (void)priq_clear_interface(pif);
+
+ FREE(pif, M_DEVBUF);
+ return (0);
+}
+
+int
+priq_add_queue(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ /* check parameters */
+ if (a->priority >= PRIQ_MAXPRI)
+ return (EINVAL);
+ if (a->qid == 0)
+ return (EINVAL);
+ if (pif->pif_classes[a->priority] != NULL)
+ return (EBUSY);
+ if (clh_to_clp(pif, a->qid) != NULL)
+ return (EBUSY);
+
+ cl = priq_class_create(pif, a->priority, a->qlimit,
+ a->pq_u.priq_opts.flags, a->qid);
+ if (cl == NULL)
+ return (ENOMEM);
+
+ return (0);
+}
+
+int
+priq_remove_queue(struct pf_altq *a)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = a->altq_disc) == NULL)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ return (priq_class_destroy(cl));
+}
+
+int
+priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ struct priq_classstats stats;
+ int error = 0;
+
+ if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, a->qid)) == NULL)
+ return (EINVAL);
+
+ if (*nbytes < sizeof(stats))
+ return (EINVAL);
+
+ get_class_stats(&stats, cl);
+
+ if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
+ return (error);
+ *nbytes = sizeof(stats);
+ return (0);
+}
+
+/*
+ * bring the interface back to the initial state by discarding
+ * all the filters and classes.
+ */
+static int
+priq_clear_interface(struct priq_if *pif)
+{
+ struct priq_class *cl;
+ int pri;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* free the filters for this interface */
+ acc_discard_filters(&pif->pif_classifier, NULL, 1);
+#endif
+
+ /* clear out the classes */
+ for (pri = 0; pri <= pif->pif_maxpri; pri++)
+ if ((cl = pif->pif_classes[pri]) != NULL)
+ priq_class_destroy(cl);
+
+ return (0);
+}
+
+static int
+priq_request(struct ifaltq *ifq, int req, void *arg)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ priq_purge(pif);
+ break;
+ }
+ return (0);
+}
+
+/* discard all the queued packets on the interface */
+static void
+priq_purge(struct priq_if *pif)
+{
+ struct priq_class *cl;
+ int pri;
+
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q))
+ priq_purgeq(cl);
+ }
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ pif->pif_ifq->ifq_len = 0;
+}
+
+static struct priq_class *
+priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
+{
+ struct priq_class *cl;
+ int s;
+
+#ifndef ALTQ_RED
+ if (flags & PRCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("priq_class_create: RED not configured for PRIQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ if ((cl = pif->pif_classes[pri]) != NULL) {
+ /* modify the class instead of creating a new one */
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ if (!qempty(cl->cl_q))
+ priq_purgeq(cl);
+ splx(s);
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ } else {
+ MALLOC(cl, struct priq_class *, sizeof(struct priq_class),
+ M_DEVBUF, M_WAITOK);
+ if (cl == NULL)
+ return (NULL);
+ bzero(cl, sizeof(struct priq_class));
+
+ MALLOC(cl->cl_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (cl->cl_q == NULL)
+ goto err_ret;
+ bzero(cl->cl_q, sizeof(class_queue_t));
+ }
+
+ pif->pif_classes[pri] = cl;
+ if (flags & PRCF_DEFAULTCLASS)
+ pif->pif_default = cl;
+ if (qlimit == 0)
+ qlimit = 50; /* use default */
+ qlimit(cl->cl_q) = qlimit;
+ qtype(cl->cl_q) = Q_DROPTAIL;
+ qlen(cl->cl_q) = 0;
+ cl->cl_flags = flags;
+ cl->cl_pri = pri;
+ if (pri > pif->pif_maxpri)
+ pif->pif_maxpri = pri;
+ cl->cl_pif = pif;
+ cl->cl_handle = qid;
+
+#ifdef ALTQ_RED
+ if (flags & (PRCF_RED|PRCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & PRCF_ECN)
+ red_flags |= REDF_ECN;
+#ifdef ALTQ_RIO
+ if (flags & PRCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ if (pif->pif_bandwidth < 8)
+ red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
+ else
+ red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
+ * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
+#ifdef ALTQ_RIO
+ if (flags & PRCF_RIO) {
+ cl->cl_red = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RIO;
+ } else
+#endif
+ if (flags & PRCF_RED) {
+ cl->cl_red = red_alloc(0, 0,
+ qlimit(cl->cl_q) * 10/100,
+ qlimit(cl->cl_q) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->cl_red != NULL)
+ qtype(cl->cl_q) = Q_RED;
+ }
+ }
+#endif /* ALTQ_RED */
+
+ return (cl);
+
+ err_ret:
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ if (cl->cl_q != NULL)
+ FREE(cl->cl_q, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+ return (NULL);
+}
+
+static int
+priq_class_destroy(struct priq_class *cl)
+{
+ struct priq_if *pif;
+ int s, pri;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+
+#ifdef ALTQ3_CLFIER_COMPAT
+ /* delete filters referencing to this class */
+ acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0);
+#endif
+
+ if (!qempty(cl->cl_q))
+ priq_purgeq(cl);
+
+ pif = cl->cl_pif;
+ pif->pif_classes[cl->cl_pri] = NULL;
+ if (pif->pif_maxpri == cl->cl_pri) {
+ for (pri = cl->cl_pri; pri >= 0; pri--)
+ if (pif->pif_classes[pri] != NULL) {
+ pif->pif_maxpri = pri;
+ break;
+ }
+ if (pri < 0)
+ pif->pif_maxpri = -1;
+ }
+ splx(s);
+
+ if (cl->cl_red != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_destroy((rio_t *)cl->cl_red);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_destroy(cl->cl_red);
+#endif
+ }
+ FREE(cl->cl_q, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+ return (0);
+}
+
+/*
+ * priq_enqueue is an enqueue function to be registered to
+ * (*altq_enqueue) in struct ifaltq.
+ */
+static int
+priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+ struct priq_class *cl;
+ struct m_tag *t;
+ int len;
+
+ /* grab class set by classifier */
+ if ((m->m_flags & M_PKTHDR) == 0) {
+ /* should not happen */
+#if defined(__NetBSD__) || defined(__OpenBSD__)
+ printf("altq: packet for %s does not have pkthdr\n",
+ ifq->altq_ifp->if_xname);
+#else
+ printf("altq: packet for %s%d does not have pkthdr\n",
+ ifq->altq_ifp->if_name, ifq->altq_ifp->if_unit);
+#endif
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ cl = NULL;
+ if ((t = m_tag_find(m, PACKET_TAG_PF_QID, NULL)) != NULL)
+ cl = clh_to_clp(pif, ((struct altq_tag *)(t+1))->qid);
+#ifdef ALTQ3_COMPAT
+ else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
+ cl = pktattr->pattr_class;
+#endif
+ if (cl == NULL) {
+ cl = pif->pif_default;
+ if (cl == NULL) {
+ m_freem(m);
+ return (ENOBUFS);
+ }
+ }
+#ifdef ALTQ3_COMPAT
+ if (pktattr != NULL)
+ cl->cl_pktattr = pktattr; /* save proto hdr used by ECN */
+ else
+#endif
+ cl->cl_pktattr = NULL;
+ len = m_pktlen(m);
+ if (priq_addq(cl, m) != 0) {
+ /* drop occurred. mbuf was freed in priq_addq. */
+ PKTCNTR_ADD(&cl->cl_dropcnt, len);
+ return (ENOBUFS);
+ }
+ IFQ_INC_LEN(ifq);
+
+ /* successfully queued. */
+ return (0);
+}
+
+/*
+ * priq_dequeue is a dequeue function to be registered to
+ * (*altq_dequeue) in struct ifaltq.
+ *
+ * note: ALTDQ_POLL returns the next packet without removing the packet
+ * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
+ * ALTDQ_REMOVE must return the same packet if called immediately
+ * after ALTDQ_POLL.
+ */
+static struct mbuf *
+priq_dequeue(struct ifaltq *ifq, int op)
+{
+ struct priq_if *pif = (struct priq_if *)ifq->altq_disc;
+ struct priq_class *cl;
+ struct mbuf *m;
+ int pri;
+
+ if (IFQ_IS_EMPTY(ifq))
+ /* no packet in the queue */
+ return (NULL);
+
+ for (pri = pif->pif_maxpri; pri >= 0; pri--) {
+ if ((cl = pif->pif_classes[pri]) != NULL &&
+ !qempty(cl->cl_q)) {
+ if (op == ALTDQ_POLL)
+ return (priq_pollq(cl));
+
+ m = priq_getq(cl);
+ if (m != NULL) {
+ IFQ_DEC_LEN(ifq);
+ if (qempty(cl->cl_q))
+ cl->cl_period++;
+ PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m));
+ }
+ return (m);
+ }
+ }
+ return (NULL);
+}
+
+static int
+priq_addq(struct priq_class *cl, struct mbuf *m)
+{
+
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m,
+ cl->cl_pktattr);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
+#endif
+ if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
+ m_freem(m);
+ return (-1);
+ }
+
+ if (cl->cl_flags & PRCF_CLEARDSCP)
+ write_dsfield(m, cl->cl_pktattr, 0);
+
+ _addq(cl->cl_q, m);
+
+ return (0);
+}
+
+static struct mbuf *
+priq_getq(struct priq_class *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ return red_getq(cl->cl_red, cl->cl_q);
+#endif
+ return _getq(cl->cl_q);
+}
+
+static struct mbuf *
+priq_pollq(cl)
+ struct priq_class *cl;
+{
+ return qhead(cl->cl_q);
+}
+
+static void
+priq_purgeq(struct priq_class *cl)
+{
+ struct mbuf *m;
+
+ if (qempty(cl->cl_q))
+ return;
+
+ while ((m = _getq(cl->cl_q)) != NULL) {
+ PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
+ m_freem(m);
+ }
+ ASSERT(qlen(cl->cl_q) == 0);
+}
+
+static void
+get_class_stats(struct priq_classstats *sp, struct priq_class *cl)
+{
+ sp->class_handle = cl->cl_handle;
+ sp->qlength = qlen(cl->cl_q);
+ sp->qlimit = qlimit(cl->cl_q);
+ sp->period = cl->cl_period;
+ sp->xmitcnt = cl->cl_xmitcnt;
+ sp->dropcnt = cl->cl_dropcnt;
+
+ sp->qtype = qtype(cl->cl_q);
+#ifdef ALTQ_RED
+ if (q_is_red(cl->cl_q))
+ red_getstats(cl->cl_red, &sp->red[0]);
+#endif
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->cl_q))
+ rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
+#endif
+
+}
+
+/* convert a class handle to the corresponding class pointer */
+static struct priq_class *
+clh_to_clp(struct priq_if *pif, u_int32_t chandle)
+{
+ struct priq_class *cl;
+ int idx;
+
+ if (chandle == 0)
+ return (NULL);
+
+ for (idx = pif->pif_maxpri; idx >= 0; idx--)
+ if ((cl = pif->pif_classes[idx]) != NULL &&
+ cl->cl_handle == chandle)
+ return (cl);
+
+ return (NULL);
+}
+
+
+#ifdef ALTQ3_COMPAT
+
+static struct priq_if *
+priq_attach(ifq, bandwidth)
+ struct ifaltq *ifq;
+ u_int bandwidth;
+{
+ struct priq_if *pif;
+
+ MALLOC(pif, struct priq_if *, sizeof(struct priq_if),
+ M_DEVBUF, M_WAITOK);
+ if (pif == NULL)
+ return (NULL);
+ bzero(pif, sizeof(struct priq_if));
+ pif->pif_bandwidth = bandwidth;
+ pif->pif_maxpri = -1;
+ pif->pif_ifq = ifq;
+
+ /* add this state to the priq list */
+ pif->pif_next = pif_list;
+ pif_list = pif;
+
+ return (pif);
+}
+
+static int
+priq_detach(pif)
+ struct priq_if *pif;
+{
+ (void)priq_clear_interface(pif);
+
+ /* remove this interface from the pif list */
+ if (pif_list == pif)
+ pif_list = pif->pif_next;
+ else {
+ struct priq_if *p;
+
+ for (p = pif_list; p != NULL; p = p->pif_next)
+ if (p->pif_next == pif) {
+ p->pif_next = pif->pif_next;
+ break;
+ }
+ ASSERT(p != NULL);
+ }
+
+ FREE(pif, M_DEVBUF);
+ return (0);
+}
+
+/*
+ * priq device interface
+ */
+int
+priqopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+priqclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct priq_if *pif;
+ int err, error = 0;
+
+ while ((pif = pif_list) != NULL) {
+ /* destroy all */
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ altq_disable(pif->pif_ifq);
+
+ err = altq_detach(pif->pif_ifq);
+ if (err == 0)
+ err = priq_detach(pif);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+priqioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ struct priq_if *pif;
+ struct priq_interface *ifacep;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case PRIQ_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case PRIQ_IF_ATTACH:
+ error = priqcmd_if_attach((struct priq_interface *)addr);
+ break;
+
+ case PRIQ_IF_DETACH:
+ error = priqcmd_if_detach((struct priq_interface *)addr);
+ break;
+
+ case PRIQ_ENABLE:
+ case PRIQ_DISABLE:
+ case PRIQ_CLEAR:
+ ifacep = (struct priq_interface *)addr;
+ if ((pif = altq_lookup(ifacep->ifname,
+ ALTQT_PRIQ)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ switch (cmd) {
+ case PRIQ_ENABLE:
+ if (pif->pif_default == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("priq: no default class\n");
+#endif
+ error = EINVAL;
+ break;
+ }
+ error = altq_enable(pif->pif_ifq);
+ break;
+
+ case PRIQ_DISABLE:
+ error = altq_disable(pif->pif_ifq);
+ break;
+
+ case PRIQ_CLEAR:
+ priq_clear_interface(pif);
+ break;
+ }
+ break;
+
+ case PRIQ_ADD_CLASS:
+ error = priqcmd_add_class((struct priq_add_class *)addr);
+ break;
+
+ case PRIQ_DEL_CLASS:
+ error = priqcmd_delete_class((struct priq_delete_class *)addr);
+ break;
+
+ case PRIQ_MOD_CLASS:
+ error = priqcmd_modify_class((struct priq_modify_class *)addr);
+ break;
+
+ case PRIQ_ADD_FILTER:
+ error = priqcmd_add_filter((struct priq_add_filter *)addr);
+ break;
+
+ case PRIQ_DEL_FILTER:
+ error = priqcmd_delete_filter((struct priq_delete_filter *)addr);
+ break;
+
+ case PRIQ_GETSTATS:
+ error = priqcmd_class_stats((struct priq_class_stats *)addr);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+priqcmd_if_attach(ap)
+ struct priq_interface *ap;
+{
+ struct priq_if *pif;
+ struct ifnet *ifp;
+ int error;
+
+ if ((ifp = ifunit(ap->ifname)) == NULL)
+ return (ENXIO);
+
+ if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL)
+ return (ENOMEM);
+
+ /*
+ * set PRIQ to this ifnet structure.
+ */
+ if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif,
+ priq_enqueue, priq_dequeue, priq_request,
+ &pif->pif_classifier, acc_classify)) != 0)
+ (void)priq_detach(pif);
+
+ return (error);
+}
+
+static int
+priqcmd_if_detach(ap)
+ struct priq_interface *ap;
+{
+ struct priq_if *pif;
+ int error;
+
+ if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ALTQ_IS_ENABLED(pif->pif_ifq))
+ altq_disable(pif->pif_ifq);
+
+ if ((error = altq_detach(pif->pif_ifq)))
+ return (error);
+
+ return priq_detach(pif);
+}
+
+static int
+priqcmd_add_class(ap)
+ struct priq_add_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ int qid;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+ return (EINVAL);
+ if (pif->pif_classes[ap->pri] != NULL)
+ return (EBUSY);
+
+ qid = ap->pri + 1;
+ if ((cl = priq_class_create(pif, ap->pri,
+ ap->qlimit, ap->flags, qid)) == NULL)
+ return (ENOMEM);
+
+ /* return a class handle to the user */
+ ap->class_handle = cl->cl_handle;
+
+ return (0);
+}
+
+static int
+priqcmd_delete_class(ap)
+ struct priq_delete_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return priq_class_destroy(cl);
+}
+
+static int
+priqcmd_modify_class(ap)
+ struct priq_modify_class *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
+ return (EINVAL);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ /*
+ * if priority is changed, move the class to the new priority
+ */
+ if (pif->pif_classes[ap->pri] != cl) {
+ if (pif->pif_classes[ap->pri] != NULL)
+ return (EEXIST);
+ pif->pif_classes[cl->cl_pri] = NULL;
+ pif->pif_classes[ap->pri] = cl;
+ cl->cl_pri = ap->pri;
+ }
+
+ /* call priq_class_create to change class parameters */
+ if ((cl = priq_class_create(pif, ap->pri,
+ ap->qlimit, ap->flags, ap->class_handle)) == NULL)
+ return (ENOMEM);
+ return 0;
+}
+
+static int
+priqcmd_add_filter(ap)
+ struct priq_add_filter *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
+ return (EINVAL);
+
+ return acc_add_filter(&pif->pif_classifier, &ap->filter,
+ cl, &ap->filter_handle);
+}
+
+static int
+priqcmd_delete_filter(ap)
+ struct priq_delete_filter *ap;
+{
+ struct priq_if *pif;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ return acc_delete_filter(&pif->pif_classifier,
+ ap->filter_handle);
+}
+
+static int
+priqcmd_class_stats(ap)
+ struct priq_class_stats *ap;
+{
+ struct priq_if *pif;
+ struct priq_class *cl;
+ struct priq_classstats stats, *usp;
+ int pri, error;
+
+ if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
+ return (EBADF);
+
+ ap->maxpri = pif->pif_maxpri;
+
+ /* then, read the next N classes in the tree */
+ usp = ap->stats;
+ for (pri = 0; pri <= pif->pif_maxpri; pri++) {
+ cl = pif->pif_classes[pri];
+ if (cl != NULL)
+ get_class_stats(&stats, cl);
+ else
+ bzero(&stats, sizeof(stats));
+ if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
+ sizeof(stats))) != 0)
+ return (error);
+ }
+ return (0);
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw priq_sw =
+ {"priq", priqopen, priqclose, priqioctl};
+
+ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw);
+MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1);
+MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+
+#endif /* ALTQ3_COMPAT */
+#endif /* ALTQ_PRIQ */
diff --git a/sys/contrib/altq/altq/altq_priq.h b/sys/contrib/altq/altq/altq_priq.h
new file mode 100644
index 000000000000..481d31b8a6be
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_priq.h
@@ -0,0 +1,170 @@
+/* $KAME: altq_priq.h,v 1.7 2003/10/03 05:05:15 kjc Exp $ */
+/*
+ * Copyright (C) 2000-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_PRIQ_H_
+#define _ALTQ_ALTQ_PRIQ_H_
+
+#include <altq/altq.h>
+#include <altq/altq_classq.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PRIQ_MAXPRI 16 /* upper limit of the number of priorities */
+
+#ifdef ALTQ3_COMPAT
+struct priq_interface {
+ char ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */
+ u_long arg; /* request-specific argument */
+};
+
+struct priq_add_class {
+ struct priq_interface iface;
+ int pri; /* priority (0 is the lowest) */
+ int qlimit; /* queue size limit */
+ int flags; /* misc flags (see below) */
+
+ u_int32_t class_handle; /* return value */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* priq class flags */
+#define PRCF_RED 0x0001 /* use RED */
+#define PRCF_ECN 0x0002 /* use RED/ECN */
+#define PRCF_RIO 0x0004 /* use RIO */
+#define PRCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+#define PRCF_DEFAULTCLASS 0x1000 /* default class */
+
+/* special class handles */
+#define PRIQ_NULLCLASS_HANDLE 0
+
+#ifdef ALTQ3_COMPAT
+struct priq_delete_class {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+};
+
+struct priq_modify_class {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+ int pri;
+ int qlimit;
+ int flags;
+};
+
+struct priq_add_filter {
+ struct priq_interface iface;
+ u_int32_t class_handle;
+ struct flow_filter filter;
+
+ u_long filter_handle; /* return value */
+};
+
+struct priq_delete_filter {
+ struct priq_interface iface;
+ u_long filter_handle;
+};
+#endif /* ALTQ3_COMPAT */
+
+struct priq_classstats {
+ u_int32_t class_handle;
+
+ u_int qlength;
+ u_int qlimit;
+ u_int period;
+ struct pktcntr xmitcnt; /* transmitted packet counter */
+ struct pktcntr dropcnt; /* dropped packet counter */
+
+ /* red and rio related info */
+ int qtype;
+ struct redstats red[3]; /* rio has 3 red stats */
+};
+
+#ifdef ALTQ3_COMPAT
+struct priq_class_stats {
+ struct priq_interface iface;
+ int maxpri; /* in/out */
+
+ struct priq_classstats *stats; /* pointer to stats array */
+};
+
+#define PRIQ_IF_ATTACH _IOW('Q', 1, struct priq_interface)
+#define PRIQ_IF_DETACH _IOW('Q', 2, struct priq_interface)
+#define PRIQ_ENABLE _IOW('Q', 3, struct priq_interface)
+#define PRIQ_DISABLE _IOW('Q', 4, struct priq_interface)
+#define PRIQ_CLEAR _IOW('Q', 5, struct priq_interface)
+#define PRIQ_ADD_CLASS _IOWR('Q', 7, struct priq_add_class)
+#define PRIQ_DEL_CLASS _IOW('Q', 8, struct priq_delete_class)
+#define PRIQ_MOD_CLASS _IOW('Q', 9, struct priq_modify_class)
+#define PRIQ_ADD_FILTER _IOWR('Q', 10, struct priq_add_filter)
+#define PRIQ_DEL_FILTER _IOW('Q', 11, struct priq_delete_filter)
+#define PRIQ_GETSTATS _IOWR('Q', 12, struct priq_class_stats)
+
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+struct priq_class {
+ u_int32_t cl_handle; /* class handle */
+ class_queue_t *cl_q; /* class queue structure */
+ struct red *cl_red; /* RED state */
+ int cl_pri; /* priority */
+ int cl_flags; /* class flags */
+ struct priq_if *cl_pif; /* back pointer to pif */
+ struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
+
+ /* statistics */
+ u_int cl_period; /* backlog period */
+ struct pktcntr cl_xmitcnt; /* transmitted packet counter */
+ struct pktcntr cl_dropcnt; /* dropped packet counter */
+};
+
+/*
+ * priq interface state
+ */
+struct priq_if {
+ struct priq_if *pif_next; /* interface state list */
+ struct ifaltq *pif_ifq; /* backpointer to ifaltq */
+ u_int pif_bandwidth; /* link bandwidth in bps */
+ int pif_maxpri; /* max priority in use */
+ struct priq_class *pif_default; /* default class */
+ struct priq_class *pif_classes[PRIQ_MAXPRI]; /* classes */
+#ifdef ALTQ3_CLFIER_COMPAT
+ struct acc_classifier pif_classifier; /* classifier */
+#endif
+};
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_PRIQ_H_ */
diff --git a/sys/contrib/altq/altq/altq_red.c b/sys/contrib/altq/altq/altq_red.c
new file mode 100644
index 000000000000..b4aa9d3bdbc9
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_red.c
@@ -0,0 +1,1492 @@
+/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/sockio.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#ifdef ALTQ_FLOWVALVE
+#include <sys/queue.h>
+#include <sys/time.h>
+#endif
+#endif /* ALTQ3_COMPAT */
+
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/pfvar.h>
+#include <altq/altq.h>
+#include <altq/altq_red.h>
+#ifdef ALTQ3_COMPAT
+#include <altq/altq_conf.h>
+#ifdef ALTQ_FLOWVALVE
+#include <altq/altq_flowvalve.h>
+#endif
+#endif
+
+/*
+ * ALTQ/RED (Random Early Detection) implementation using 32-bit
+ * fixed-point calculation.
+ *
+ * written by kjc using the ns code as a reference.
+ * you can learn more about red and ns from Sally's home page at
+ * http://www-nrg.ee.lbl.gov/floyd/
+ *
+ * most of the red parameter values are fixed in this implementation
+ * to prevent fixed-point overflow/underflow.
+ * if you change the parameters, watch out for overflow/underflow!
+ *
+ * the parameters used are recommended values by Sally.
+ * the corresponding ns config looks:
+ * q_weight=0.00195
+ * minthresh=5 maxthresh=15 queue-size=60
+ * linterm=30
+ * dropmech=drop-tail
+ * bytes=false (can't be handled by 32-bit fixed-point)
+ * doubleq=false dqthresh=false
+ * wait=true
+ */
+/*
+ * alternative red parameters for a slow link.
+ *
+ * assume the queue length becomes from zero to L and keeps L, it takes
+ * N packets for q_avg to reach 63% of L.
+ * when q_weight is 0.002, N is about 500 packets.
+ * for a slow link like dial-up, 500 packets takes more than 1 minute!
+ * when q_weight is 0.008, N is about 127 packets.
+ * when q_weight is 0.016, N is about 63 packets.
+ * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
+ * are allowed for 0.016.
+ * see Sally's paper for more details.
+ */
+/* normal red parameters */
+#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
+ /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
+ /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
+ /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define INV_P_MAX 10 /* inverse of max drop probability */
+#define TH_MIN 5 /* min threshold */
+#define TH_MAX 15 /* max threshold */
+
+#define RED_LIMIT 60 /* default max queue lenght */
+#define RED_STATS /* collect statistics */
+
+/*
+ * our default policy for forced-drop is drop-tail.
+ * (in altq-1.1.2 or earlier, the default was random-drop.
+ * but it makes more sense to punish the cause of the surge.)
+ * to switch to the random-drop policy, define "RED_RANDOM_DROP".
+ */
+
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+/*
+ * flow-valve is an extention to protect red from unresponsive flows
+ * and to promote end-to-end congestion control.
+ * flow-valve observes the average drop rates of the flows that have
+ * experienced packet drops in the recent past.
+ * when the average drop rate exceeds the threshold, the flow is
+ * blocked by the flow-valve. the trapped flow should back off
+ * exponentially to escape from the flow-valve.
+ */
+#ifdef RED_RANDOM_DROP
+#error "random-drop can't be used with flow-valve!"
+#endif
+#endif /* ALTQ_FLOWVALVE */
+
+/* red_list keeps all red_queue_t's allocated. */
+static red_queue_t *red_list = NULL;
+
+#endif /* ALTQ3_COMPAT */
+
+/* default red parameter values */
+static int default_th_min = TH_MIN;
+static int default_th_max = TH_MAX;
+static int default_inv_pmax = INV_P_MAX;
+
+#ifdef ALTQ3_COMPAT
+/* internal function prototypes */
+static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *red_dequeue(struct ifaltq *, int);
+static int red_request(struct ifaltq *, int, void *);
+static void red_purgeq(red_queue_t *);
+static int red_detach(red_queue_t *);
+#ifdef ALTQ_FLOWVALVE
+static __inline struct fve *flowlist_lookup(struct flowvalve *,
+ struct altq_pktattr *, struct timeval *);
+static __inline struct fve *flowlist_reclaim(struct flowvalve *,
+ struct altq_pktattr *);
+static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
+static __inline int fv_p2f(struct flowvalve *, int);
+static struct flowvalve *fv_alloc(struct red *);
+static void fv_destroy(struct flowvalve *);
+static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
+ struct fve **);
+static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
+ struct fve *);
+#endif
+#endif /* ALTQ3_COMPAT */
+
+/*
+ * red support routines
+ */
+red_t *
+red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
+ int pkttime)
+{
+ red_t *rp;
+ int w, i;
+ int npkts_per_sec;
+
+ MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
+ if (rp == NULL)
+ return (NULL);
+ bzero(rp, sizeof(red_t));
+
+ rp->red_avg = 0;
+ rp->red_idle = 1;
+
+ if (weight == 0)
+ rp->red_weight = W_WEIGHT;
+ else
+ rp->red_weight = weight;
+ if (inv_pmax == 0)
+ rp->red_inv_pmax = default_inv_pmax;
+ else
+ rp->red_inv_pmax = inv_pmax;
+ if (th_min == 0)
+ rp->red_thmin = default_th_min;
+ else
+ rp->red_thmin = th_min;
+ if (th_max == 0)
+ rp->red_thmax = default_th_max;
+ else
+ rp->red_thmax = th_max;
+
+ rp->red_flags = flags;
+
+ if (pkttime == 0)
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ rp->red_pkttime = 800;
+ else
+ rp->red_pkttime = pkttime;
+
+ if (weight == 0) {
+ /* when the link is very slow, adjust red parameters */
+ npkts_per_sec = 1000000 / rp->red_pkttime;
+ if (npkts_per_sec < 50) {
+ /* up to about 400Kbps */
+ rp->red_weight = W_WEIGHT_2;
+ } else if (npkts_per_sec < 300) {
+ /* up to about 2.4Mbps */
+ rp->red_weight = W_WEIGHT_1;
+ }
+ }
+
+ /* calculate wshift. weight must be power of 2 */
+ w = rp->red_weight;
+ for (i = 0; w > 1; i++)
+ w = w >> 1;
+ rp->red_wshift = i;
+ w = 1 << rp->red_wshift;
+ if (w != rp->red_weight) {
+ printf("invalid weight value %d for red! use %d\n",
+ rp->red_weight, w);
+ rp->red_weight = w;
+ }
+
+ /*
+ * thmin_s and thmax_s are scaled versions of th_min and th_max
+ * to be compared with avg.
+ */
+ rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
+ rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
+
+ /*
+ * precompute probability denominator
+ * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+ */
+ rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
+ * rp->red_inv_pmax) << FP_SHIFT;
+
+ /* allocate weight table */
+ rp->red_wtab = wtab_alloc(rp->red_weight);
+
+ microtime(&rp->red_last);
+ return (rp);
+}
+
+void
+red_destroy(red_t *rp)
+{
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL)
+ fv_destroy(rp->red_flowvalve);
+#endif
+#endif /* ALTQ3_COMPAT */
+ wtab_destroy(rp->red_wtab);
+ FREE(rp, M_DEVBUF);
+}
+
+void
+red_getstats(red_t *rp, struct redstats *sp)
+{
+ sp->q_avg = rp->red_avg >> rp->red_wshift;
+ sp->xmit_cnt = rp->red_stats.xmit_cnt;
+ sp->drop_cnt = rp->red_stats.drop_cnt;
+ sp->drop_forced = rp->red_stats.drop_forced;
+ sp->drop_unforced = rp->red_stats.drop_unforced;
+ sp->marked_packets = rp->red_stats.marked_packets;
+}
+
+int
+red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
+ struct altq_pktattr *pktattr)
+{
+ int avg, droptype;
+ int n;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ struct fve *fve = NULL;
+
+ if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
+ if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
+ m_freem(m);
+ return (-1);
+ }
+#endif
+#endif /* ALTQ3_COMPAT */
+
+ avg = rp->red_avg;
+
+ /*
+ * if we were idle, we pretend that n packets arrived during
+ * the idle period.
+ */
+ if (rp->red_idle) {
+ struct timeval now;
+ int t;
+
+ rp->red_idle = 0;
+ microtime(&now);
+ t = (now.tv_sec - rp->red_last.tv_sec);
+ if (t > 60) {
+ /*
+ * being idle for more than 1 minute, set avg to zero.
+ * this prevents t from overflow.
+ */
+ avg = 0;
+ } else {
+ t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
+ n = t / rp->red_pkttime - 1;
+
+ /* the following line does (avg = (1 - Wq)^n * avg) */
+ if (n > 0)
+ avg = (avg >> FP_SHIFT) *
+ pow_w(rp->red_wtab, n);
+ }
+ }
+
+ /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
+ avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
+ rp->red_avg = avg; /* save the new value */
+
+ /*
+ * red_count keeps a tally of arriving traffic that has not
+ * been dropped.
+ */
+ rp->red_count++;
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (avg >= rp->red_thmin_s && qlen(q) > 1) {
+ if (avg >= rp->red_thmax_s) {
+ /* avg >= th_max: forced drop */
+ droptype = DTYPE_FORCED;
+ } else if (rp->red_old == 0) {
+ /* first exceeds th_min */
+ rp->red_count = 1;
+ rp->red_old = 1;
+ } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
+ rp->red_probd, rp->red_count)) {
+ /* mark or drop by red */
+ if ((rp->red_flags & REDF_ECN) &&
+ mark_ecn(m, pktattr, rp->red_flags)) {
+ /* successfully marked. do not drop. */
+ rp->red_count = 0;
+#ifdef RED_STATS
+ rp->red_stats.marked_packets++;
+#endif
+ } else {
+ /* unforced drop by red */
+ droptype = DTYPE_EARLY;
+ }
+ }
+ } else {
+ /* avg < th_min */
+ rp->red_old = 0;
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+#ifdef RED_RANDOM_DROP
+ /* if successful or forced drop, enqueue this packet. */
+ if (droptype != DTYPE_EARLY)
+ _addq(q, m);
+#else
+ /* if successful, enqueue this packet. */
+ if (droptype == DTYPE_NODROP)
+ _addq(q, m);
+#endif
+ if (droptype != DTYPE_NODROP) {
+ if (droptype == DTYPE_EARLY) {
+ /* drop the incoming packet */
+#ifdef RED_STATS
+ rp->red_stats.drop_unforced++;
+#endif
+ } else {
+ /* forced drop, select a victim packet in the queue. */
+#ifdef RED_RANDOM_DROP
+ m = _getq_random(q);
+#endif
+#ifdef RED_STATS
+ rp->red_stats.drop_forced++;
+#endif
+ }
+#ifdef RED_STATS
+ PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
+#endif
+ rp->red_count = 0;
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL)
+ fv_dropbyred(rp->red_flowvalve, pktattr, fve);
+#endif
+#endif /* ALTQ3_COMPAT */
+ m_freem(m);
+ return (-1);
+ }
+ /* successfully queued */
+#ifdef RED_STATS
+ PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
+#endif
+ return (0);
+}
+
+/*
+ * early-drop probability is calculated as follows:
+ * prob = p_max * (avg - th_min) / (th_max - th_min)
+ * prob_a = prob / (2 - count*prob)
+ * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
+ * here prob_a increases as successive undrop count increases.
+ * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
+ * becomes 1 when (count >= (2 / prob))).
+ */
+int
+drop_early(int fp_len, int fp_probd, int count)
+{
+ int d; /* denominator of drop-probability */
+
+ d = fp_probd - count * fp_len;
+ if (d <= 0)
+ /* count exceeds the hard limit: drop or mark */
+ return (1);
+
+ /*
+ * now the range of d is [1..600] in fixed-point. (when
+ * th_max-th_min=10 and p_max=1/30)
+ * drop probability = (avg - TH_MIN) / d
+ */
+
+ if ((arc4random() % d) < fp_len) {
+ /* drop or mark */
+ return (1);
+ }
+ /* no drop/mark */
+ return (0);
+}
+
+/*
+ * try to mark CE bit to the packet.
+ * returns 1 if successfully marked, 0 otherwise.
+ */
+int
+mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
+{
+ struct mbuf *m0;
+ struct m_tag *t;
+ struct altq_tag *at;
+ void *hdr;
+ int af;
+
+ t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
+ if (t != NULL) {
+ at = (struct altq_tag *)(t + 1);
+ if (at == NULL)
+ return (0);
+ af = at->af;
+ hdr = at->hdr;
+#ifdef ALTQ3_COMPAT
+ } else if (pktattr != NULL) {
+ af = pktattr->pattr_af;
+ hdr = pktattr->pattr_hdr;
+#endif /* ALTQ3_COMPAT */
+ } else
+ return (0);
+
+ if (af != AF_INET && af != AF_INET6)
+ return (0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)hdr >= m0->m_data) &&
+ ((caddr_t)hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, tag info is stale */
+ return (0);
+ }
+
+ switch (af) {
+ case AF_INET:
+ if (flags & REDF_ECN4) {
+ struct ip *ip = hdr;
+ u_int8_t otos;
+ int sum;
+
+ if (ip->ip_v != 4)
+ return (0); /* version mismatch! */
+
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+ return (0); /* not-ECT */
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ return (1); /* already marked */
+
+ /*
+ * ecn-capable but not marked,
+ * mark CE and update checksum
+ */
+ otos = ip->ip_tos;
+ ip->ip_tos |= IPTOS_ECN_CE;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ return (1);
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ if (flags & REDF_ECN6) {
+ struct ip6_hdr *ip6 = hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return (0); /* version mismatch! */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_NOTECT << 20))
+ return (0); /* not-ECT */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_CE << 20))
+ return (1); /* already marked */
+ /*
+ * ecn-capable but not marked, mark CE
+ */
+ flowlabel |= (IPTOS_ECN_CE << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ return (1);
+ }
+ break;
+#endif /* INET6 */
+ }
+
+ /* not marked */
+ return (0);
+}
+
+struct mbuf *
+red_getq(rp, q)
+ red_t *rp;
+ class_queue_t *q;
+{
+ struct mbuf *m;
+
+ if ((m = _getq(q)) == NULL) {
+ if (rp->red_idle == 0) {
+ rp->red_idle = 1;
+ microtime(&rp->red_last);
+ }
+ return NULL;
+ }
+
+ rp->red_idle = 0;
+ return (m);
+}
+
+/*
+ * helper routine to calibrate avg during idle.
+ * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
+ * here Wq = 1/weight and the code assumes Wq is close to zero.
+ *
+ * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
+ */
+static struct wtab *wtab_list = NULL; /* pointer to wtab list */
+
+struct wtab *
+wtab_alloc(int weight)
+{
+ struct wtab *w;
+ int i;
+
+ for (w = wtab_list; w != NULL; w = w->w_next)
+ if (w->w_weight == weight) {
+ w->w_refcount++;
+ return (w);
+ }
+
+ MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
+ if (w == NULL)
+ panic("wtab_alloc: malloc failed!");
+ bzero(w, sizeof(struct wtab));
+ w->w_weight = weight;
+ w->w_refcount = 1;
+ w->w_next = wtab_list;
+ wtab_list = w;
+
+ /* initialize the weight table */
+ w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
+ for (i = 1; i < 32; i++) {
+ w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
+ if (w->w_tab[i] == 0 && w->w_param_max == 0)
+ w->w_param_max = 1 << i;
+ }
+
+ return (w);
+}
+
+int
+wtab_destroy(struct wtab *w)
+{
+ struct wtab *prev;
+
+ if (--w->w_refcount > 0)
+ return (0);
+
+ if (wtab_list == w)
+ wtab_list = w->w_next;
+ else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
+ if (prev->w_next == w) {
+ prev->w_next = w->w_next;
+ break;
+ }
+
+ FREE(w, M_DEVBUF);
+ return (0);
+}
+
+int32_t
+pow_w(struct wtab *w, int n)
+{
+ int i, bit;
+ int32_t val;
+
+ if (n >= w->w_param_max)
+ return (0);
+
+ val = 1 << FP_SHIFT;
+ if (n <= 0)
+ return (val);
+
+ bit = 1;
+ i = 0;
+ while (n) {
+ if (n & bit) {
+ val = (val * w->w_tab[i]) >> FP_SHIFT;
+ n &= ~bit;
+ }
+ i++;
+ bit <<= 1;
+ }
+ return (val);
+}
+
+#ifdef ALTQ3_COMPAT
+/*
+ * red device interface
+ */
+altqdev_decl(red);
+
+int
+redopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+redclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ red_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = red_list) != NULL) {
+ /* destroy all */
+ err = red_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+redioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ red_queue_t *rqp;
+ struct red_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case RED_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+#endif
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+
+ case RED_ENABLE:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case RED_DISABLE:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case RED_IF_ATTACH:
+ ifp = ifunit(((struct red_interface *)addr)->red_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize red_queue_t */
+ MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
+ if (rqp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp, sizeof(red_queue_t));
+
+ MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (rqp->rq_q == NULL) {
+ FREE(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
+ if (rqp->rq_red == NULL) {
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = RED_LIMIT;
+ qtype(rqp->rq_q) = Q_RED;
+
+ /*
+ * set RED to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
+ red_enqueue, red_dequeue, red_request,
+ NULL, NULL);
+ if (error) {
+ red_destroy(rqp->rq_red);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the red list */
+ rqp->rq_next = red_list;
+ red_list = rqp;
+ break;
+
+ case RED_IF_DETACH:
+ ifacep = (struct red_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = red_detach(rqp);
+ break;
+
+ case RED_GETSTATS:
+ do {
+ struct red_stats *q_stats;
+ red_t *rp;
+
+ q_stats = (struct red_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.red_ifname,
+ ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ q_stats->q_len = qlen(rqp->rq_q);
+ q_stats->q_limit = qlimit(rqp->rq_q);
+
+ rp = rqp->rq_red;
+ q_stats->q_avg = rp->red_avg >> rp->red_wshift;
+ q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
+ q_stats->drop_cnt = rp->red_stats.drop_cnt;
+ q_stats->drop_forced = rp->red_stats.drop_forced;
+ q_stats->drop_unforced = rp->red_stats.drop_unforced;
+ q_stats->marked_packets = rp->red_stats.marked_packets;
+
+ q_stats->weight = rp->red_weight;
+ q_stats->inv_pmax = rp->red_inv_pmax;
+ q_stats->th_min = rp->red_thmin;
+ q_stats->th_max = rp->red_thmax;
+
+#ifdef ALTQ_FLOWVALVE
+ if (rp->red_flowvalve != NULL) {
+ struct flowvalve *fv = rp->red_flowvalve;
+ q_stats->fv_flows = fv->fv_flows;
+ q_stats->fv_pass = fv->fv_stats.pass;
+ q_stats->fv_predrop = fv->fv_stats.predrop;
+ q_stats->fv_alloc = fv->fv_stats.alloc;
+ q_stats->fv_escape = fv->fv_stats.escape;
+ } else {
+#endif /* ALTQ_FLOWVALVE */
+ q_stats->fv_flows = 0;
+ q_stats->fv_pass = 0;
+ q_stats->fv_predrop = 0;
+ q_stats->fv_alloc = 0;
+ q_stats->fv_escape = 0;
+#ifdef ALTQ_FLOWVALVE
+ }
+#endif /* ALTQ_FLOWVALVE */
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RED_CONFIG:
+ do {
+ struct red_conf *fc;
+ red_t *new;
+ int s, limit;
+
+ fc = (struct red_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.red_ifname,
+ ALTQT_RED)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ new = red_alloc(fc->red_weight,
+ fc->red_inv_pmax,
+ fc->red_thmin,
+ fc->red_thmax,
+ fc->red_flags,
+ fc->red_pkttime);
+ if (new == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ red_purgeq(rqp);
+ limit = fc->red_limit;
+ if (limit < fc->red_thmax)
+ limit = fc->red_thmax;
+ qlimit(rqp->rq_q) = limit;
+ fc->red_limit = limit; /* write back the new value */
+
+ red_destroy(rqp->rq_red);
+ rqp->rq_red = new;
+
+ splx(s);
+
+ /* write back new values */
+ fc->red_limit = limit;
+ fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
+ fc->red_thmin = rqp->rq_red->red_thmin;
+ fc->red_thmax = rqp->rq_red->red_thmax;
+
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RED_SETDEFAULTS:
+ do {
+ struct redparams *rp;
+
+ rp = (struct redparams *)addr;
+
+ default_th_min = rp->th_min;
+ default_th_max = rp->th_max;
+ default_inv_pmax = rp->inv_pmax;
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+static int
+red_detach(rqp)
+ red_queue_t *rqp;
+{
+ red_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (red_list == rqp)
+ red_list = rqp->rq_next;
+ else {
+ for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("red_detach: no state found in red_list!\n");
+ }
+
+ red_destroy(rqp->rq_red);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+red_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+ if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
+ return ENOBUFS;
+ ifq->ifq_len++;
+ return 0;
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+red_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+ struct mbuf *m;
+
+ if (op == ALTDQ_POLL)
+ return qhead(rqp->rq_q);
+
+ /* op == ALTDQ_REMOVE */
+ m = red_getq(rqp->rq_red, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return (m);
+}
+
+static int
+red_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ red_purgeq(rqp);
+ break;
+ }
+ return (0);
+}
+
+static void
+red_purgeq(rqp)
+ red_queue_t *rqp;
+{
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ rqp->rq_ifq->ifq_len = 0;
+}
+
+#ifdef ALTQ_FLOWVALVE
+
+#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
+#define FV_PSCALE(x) ((x) << FV_PSHIFT)
+#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
+#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
+#define FV_FSCALE(x) ((x) << FV_FSHIFT)
+#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
+
+#define FV_TIMER (3 * hz) /* timer value for garbage collector */
+#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
+
+#define FV_N 10 /* update fve_f every FV_N packets */
+
+#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
+#define FV_TTHRESH 3 /* time threshold to delete fve */
+#define FV_ALPHA 5 /* extra packet count */
+
+#define FV_STATS
+
+#if (__FreeBSD_version > 300000)
+#define FV_TIMESTAMP(tp) getmicrotime(tp)
+#else
+#define FV_TIMESTAMP(tp) { (*(tp)) = time; }
+#endif
+
+/*
+ * Brtt table: 127 entry table to convert drop rate (p) to
+ * the corresponding bandwidth fraction (f)
+ * the following equation is implemented to use scaled values,
+ * fve_p and fve_f, in the fixed point format.
+ *
+ * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
+ * f = Brtt(p) / (max_th + alpha)
+ */
+#define BRTT_SIZE 128
+#define BRTT_SHIFT 12
+#define BRTT_MASK 0x0007f000
+#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
+
+const int brtt_tab[BRTT_SIZE] = {
+ 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
+ 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
+ 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
+ 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
+ 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
+ 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
+ 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
+ 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
+ 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
+ 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
+ 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
+ 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
+ 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
+ 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
+ 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
+ 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
+};
+
+static __inline struct fve *
+flowlist_lookup(fv, pktattr, now)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct timeval *now;
+{
+ struct fve *fve;
+ int flows;
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ struct timeval tthresh;
+
+ if (pktattr == NULL)
+ return (NULL);
+
+ tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
+ flows = 0;
+ /*
+ * search the flow list
+ */
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ ip = (struct ip *)pktattr->pattr_hdr;
+ TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+ if (fve->fve_lastdrop.tv_sec == 0)
+ break;
+ if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+ fve->fve_lastdrop.tv_sec = 0;
+ break;
+ }
+ if (fve->fve_flow.flow_af == AF_INET &&
+ fve->fve_flow.flow_ip.ip_src.s_addr ==
+ ip->ip_src.s_addr &&
+ fve->fve_flow.flow_ip.ip_dst.s_addr ==
+ ip->ip_dst.s_addr)
+ return (fve);
+ flows++;
+ }
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
+ if (fve->fve_lastdrop.tv_sec == 0)
+ break;
+ if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
+ fve->fve_lastdrop.tv_sec = 0;
+ break;
+ }
+ if (fve->fve_flow.flow_af == AF_INET6 &&
+ IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
+ &ip6->ip6_src) &&
+ IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
+ &ip6->ip6_dst))
+ return (fve);
+ flows++;
+ }
+ break;
+#endif /* INET6 */
+
+ default:
+ /* unknown protocol. no drop. */
+ return (NULL);
+ }
+ fv->fv_flows = flows; /* save the number of active fve's */
+ return (NULL);
+}
+
+static __inline struct fve *
+flowlist_reclaim(fv, pktattr)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+{
+ struct fve *fve;
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+
+ /*
+ * get an entry from the tail of the LRU list.
+ */
+ fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
+
+ switch (pktattr->pattr_af) {
+ case AF_INET:
+ ip = (struct ip *)pktattr->pattr_hdr;
+ fve->fve_flow.flow_af = AF_INET;
+ fve->fve_flow.flow_ip.ip_src = ip->ip_src;
+ fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ fve->fve_flow.flow_af = AF_INET6;
+ fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
+ fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
+ break;
+#endif
+ }
+
+ fve->fve_state = Green;
+ fve->fve_p = 0.0;
+ fve->fve_f = 0.0;
+ fve->fve_ifseq = fv->fv_ifseq - 1;
+ fve->fve_count = 0;
+
+ fv->fv_flows++;
+#ifdef FV_STATS
+ fv->fv_stats.alloc++;
+#endif
+ return (fve);
+}
+
+static __inline void
+flowlist_move_to_head(fv, fve)
+ struct flowvalve *fv;
+ struct fve *fve;
+{
+ if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
+ TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
+ TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
+ }
+}
+
+/*
+ * allocate flowvalve structure
+ */
+static struct flowvalve *
+fv_alloc(rp)
+ struct red *rp;
+{
+ struct flowvalve *fv;
+ struct fve *fve;
+ int i, num;
+
+ num = FV_FLOWLISTSIZE;
+ MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
+ M_DEVBUF, M_WAITOK);
+ if (fv == NULL)
+ return (NULL);
+ bzero(fv, sizeof(struct flowvalve));
+
+ MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
+ M_DEVBUF, M_WAITOK);
+ if (fv->fv_fves == NULL) {
+ FREE(fv, M_DEVBUF);
+ return (NULL);
+ }
+ bzero(fv->fv_fves, sizeof(struct fve) * num);
+
+ fv->fv_flows = 0;
+ TAILQ_INIT(&fv->fv_flowlist);
+ for (i = 0; i < num; i++) {
+ fve = &fv->fv_fves[i];
+ fve->fve_lastdrop.tv_sec = 0;
+ TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
+ }
+
+ /* initialize drop rate threshold in scaled fixed-point */
+ fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
+
+ /* initialize drop rate to fraction table */
+ MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
+ M_DEVBUF, M_WAITOK);
+ if (fv->fv_p2ftab == NULL) {
+ FREE(fv->fv_fves, M_DEVBUF);
+ FREE(fv, M_DEVBUF);
+ return (NULL);
+ }
+ /*
+ * create the p2f table.
+ * (shift is used to keep the precision)
+ */
+ for (i = 1; i < BRTT_SIZE; i++) {
+ int f;
+
+ f = brtt_tab[i] << 8;
+ fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
+ }
+
+ return (fv);
+}
+
+static void fv_destroy(fv)
+ struct flowvalve *fv;
+{
+ FREE(fv->fv_p2ftab, M_DEVBUF);
+ FREE(fv->fv_fves, M_DEVBUF);
+ FREE(fv, M_DEVBUF);
+}
+
+static __inline int
+fv_p2f(fv, p)
+ struct flowvalve *fv;
+ int p;
+{
+ int val, f;
+
+ if (p >= BRTT_PMAX)
+ f = fv->fv_p2ftab[BRTT_SIZE-1];
+ else if ((val = (p & BRTT_MASK)))
+ f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
+ else
+ f = fv->fv_p2ftab[1];
+ return (f);
+}
+
+/*
+ * check if an arriving packet should be pre-dropped.
+ * called from red_addq() when a packet arrives.
+ * returns 1 when the packet should be pre-dropped.
+ * should be called in splimp.
+ */
+static int
+fv_checkflow(fv, pktattr, fcache)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct fve **fcache;
+{
+ struct fve *fve;
+ struct timeval now;
+
+ fv->fv_ifseq++;
+ FV_TIMESTAMP(&now);
+
+ if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+ /* no matching entry in the flowlist */
+ return (0);
+
+ *fcache = fve;
+
+ /* update fraction f for every FV_N packets */
+ if (++fve->fve_count == FV_N) {
+ /*
+ * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
+ */
+ fve->fve_f =
+ (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
+ + fve->fve_f - FV_FUNSCALE(fve->fve_f);
+ fve->fve_ifseq = fv->fv_ifseq;
+ fve->fve_count = 0;
+ }
+
+ /*
+ * overpumping test
+ */
+ if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
+ int fthresh;
+
+ /* calculate a threshold */
+ fthresh = fv_p2f(fv, fve->fve_p);
+ if (fve->fve_f > fthresh)
+ fve->fve_state = Red;
+ }
+
+ if (fve->fve_state == Red) {
+ /*
+ * backoff test
+ */
+ if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
+ /* no drop for at least FV_BACKOFFTHRESH sec */
+ fve->fve_p = 0;
+ fve->fve_state = Green;
+#ifdef FV_STATS
+ fv->fv_stats.escape++;
+#endif
+ } else {
+ /* block this flow */
+ flowlist_move_to_head(fv, fve);
+ fve->fve_lastdrop = now;
+#ifdef FV_STATS
+ fv->fv_stats.predrop++;
+#endif
+ return (1);
+ }
+ }
+
+ /*
+ * p = (1 - Wp) * p
+ */
+ fve->fve_p -= FV_PUNSCALE(fve->fve_p);
+ if (fve->fve_p < 0)
+ fve->fve_p = 0;
+#ifdef FV_STATS
+ fv->fv_stats.pass++;
+#endif
+ return (0);
+}
+
+/*
+ * called from red_addq when a packet is dropped by red.
+ * should be called in splimp.
+ */
+static void fv_dropbyred(fv, pktattr, fcache)
+ struct flowvalve *fv;
+ struct altq_pktattr *pktattr;
+ struct fve *fcache;
+{
+ struct fve *fve;
+ struct timeval now;
+
+ if (pktattr == NULL)
+ return;
+ FV_TIMESTAMP(&now);
+
+ if (fcache != NULL)
+ /* the fve of this packet is already cached */
+ fve = fcache;
+ else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
+ fve = flowlist_reclaim(fv, pktattr);
+
+ flowlist_move_to_head(fv, fve);
+
+ /*
+ * update p: the following line cancels the update
+ * in fv_checkflow() and calculate
+ * p = Wp + (1 - Wp) * p
+ */
+ fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
+
+ fve->fve_lastdrop = now;
+}
+
+#endif /* ALTQ_FLOWVALVE */
+
+#ifdef KLD_MODULE
+
+static struct altqsw red_sw =
+ {"red", redopen, redclose, redioctl};
+
+ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
+MODULE_VERSION(altq_red, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RED */
diff --git a/sys/contrib/altq/altq/altq_red.h b/sys/contrib/altq/altq/altq_red.h
new file mode 100644
index 000000000000..dc8ea0ace312
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_red.h
@@ -0,0 +1,198 @@
+/* $KAME: altq_red.h,v 1.8 2003/07/10 12:07:49 kjc Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RED_H_
+#define _ALTQ_ALTQ_RED_H_
+
+#include <altq/altq_classq.h>
+
+#ifdef ALTQ3_COMPAT
+struct red_interface {
+ char red_ifname[IFNAMSIZ];
+};
+
+struct red_stats {
+ struct red_interface iface;
+ int q_len;
+ int q_avg;
+
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+
+ /* static red parameters */
+ int q_limit;
+ int weight;
+ int inv_pmax;
+ int th_min;
+ int th_max;
+
+ /* flowvalve related stuff */
+ u_int fv_flows;
+ u_int fv_pass;
+ u_int fv_predrop;
+ u_int fv_alloc;
+ u_int fv_escape;
+};
+
+struct red_conf {
+ struct red_interface iface;
+ int red_weight; /* weight for EWMA */
+ int red_inv_pmax; /* inverse of max drop probability */
+ int red_thmin; /* red min threshold */
+ int red_thmax; /* red max threshold */
+ int red_limit; /* max queue length */
+ int red_pkttime; /* average packet time in usec */
+ int red_flags; /* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* red flags */
+#define REDF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define REDF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define REDF_ECN (REDF_ECN4 | REDF_ECN6)
+#define REDF_FLOWVALVE 0x04 /* use flowvalve (aka penalty-box) */
+
+/*
+ * simpler versions of red parameters and statistics used by other
+ * disciplines (e.g., CBQ)
+ */
+struct redparams {
+ int th_min; /* red min threshold */
+ int th_max; /* red max threshold */
+ int inv_pmax; /* inverse of max drop probability */
+};
+
+struct redstats {
+ int q_avg;
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+};
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RED
+ */
+#define RED_IF_ATTACH _IOW('Q', 1, struct red_interface)
+#define RED_IF_DETACH _IOW('Q', 2, struct red_interface)
+#define RED_ENABLE _IOW('Q', 3, struct red_interface)
+#define RED_DISABLE _IOW('Q', 4, struct red_interface)
+#define RED_CONFIG _IOWR('Q', 6, struct red_conf)
+#define RED_GETSTATS _IOWR('Q', 12, struct red_stats)
+#define RED_SETDEFAULTS _IOW('Q', 30, struct redparams)
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+#ifdef ALTQ3_COMPAT
+struct flowvalve;
+#endif
+
+/* weight table structure for idle time calibration */
+struct wtab {
+ struct wtab *w_next;
+ int w_weight;
+ int w_param_max;
+ int w_refcount;
+ int32_t w_tab[32];
+};
+
+typedef struct red {
+ int red_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int red_flags; /* red flags */
+
+ /* red parameters */
+ int red_weight; /* weight for EWMA */
+ int red_inv_pmax; /* inverse of max drop probability */
+ int red_thmin; /* red min threshold */
+ int red_thmax; /* red max threshold */
+
+ /* variables for internal use */
+ int red_wshift; /* log(red_weight) */
+ int red_thmin_s; /* th_min scaled by avgshift */
+ int red_thmax_s; /* th_max scaled by avgshift */
+ int red_probd; /* drop probability denominator */
+
+ int red_avg; /* queue len avg scaled by avgshift */
+ int red_count; /* packet count since last dropped/
+ marked packet */
+ int red_idle; /* queue was empty */
+ int red_old; /* avg is above th_min */
+ struct wtab *red_wtab; /* weight table */
+ struct timeval red_last; /* time when the queue becomes idle */
+
+#ifdef ALTQ3_COMPAT
+ struct flowvalve *red_flowvalve; /* flowvalve state */
+#endif
+
+ struct {
+ struct pktcntr xmit_cnt;
+ struct pktcntr drop_cnt;
+ u_int drop_forced;
+ u_int drop_unforced;
+ u_int marked_packets;
+ } red_stats;
+} red_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct red_queue {
+ struct red_queue *rq_next; /* next red_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ red_t *rq_red;
+} red_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+/* red drop types */
+#define DTYPE_NODROP 0 /* no drop */
+#define DTYPE_FORCED 1 /* a "forced" drop */
+#define DTYPE_EARLY 2 /* an "unforced" (early) drop */
+
+extern red_t *red_alloc(int, int, int, int, int, int);
+extern void red_destroy(red_t *);
+extern void red_getstats(red_t *, struct redstats *);
+extern int red_addq(red_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *);
+extern struct mbuf *red_getq(red_t *, class_queue_t *);
+extern int drop_early(int, int, int);
+extern int mark_ecn(struct mbuf *, struct altq_pktattr *, int);
+extern struct wtab *wtab_alloc(int);
+extern int wtab_destroy(struct wtab *);
+extern int32_t pow_w(struct wtab *, int);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RED_H_ */
diff --git a/sys/contrib/altq/altq/altq_rio.c b/sys/contrib/altq/altq/altq_rio.c
new file mode 100644
index 000000000000..56c8ee84a2bc
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_rio.c
@@ -0,0 +1,843 @@
+/* $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ */
+
+/*
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Copyright (c) 1990-1994 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Computer Systems
+ * Engineering Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#if 1 /* ALTQ3_COMPAT */
+#include <sys/proc.h>
+#include <sys/sockio.h>
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+
+#include <net/pfvar.h>
+#include <altq/altq.h>
+#include <altq/altq_cdnr.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+#ifdef ALTQ3_COMPAT
+#include <altq/altq_conf.h>
+#endif
+
+/*
+ * RIO: RED with IN/OUT bit
+ * described in
+ * "Explicit Allocation of Best Effort Packet Delivery Service"
+ * David D. Clark and Wenjia Fang, MIT Lab for Computer Science
+ * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
+ *
+ * this implementation is extended to support more than 2 drop precedence
+ * values as described in RFC2597 (Assured Forwarding PHB Group).
+ *
+ */
+/*
+ * AF DS (differentiated service) codepoints.
+ * (classes can be mapped to CBQ or H-FSC classes.)
+ *
+ * 0 1 2 3 4 5 6 7
+ * +---+---+---+---+---+---+---+---+
+ * | CLASS |DropPre| 0 | CU |
+ * +---+---+---+---+---+---+---+---+
+ *
+ * class 1: 001
+ * class 2: 010
+ * class 3: 011
+ * class 4: 100
+ *
+ * low drop prec: 01
+ * medium drop prec: 10
+ * high drop prec: 01
+ */
+
+/* normal red parameters */
+#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
+ /* q_weight = 0.00195 */
+
+/* red parameters for a slow link */
+#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
+ /* q_weight = 0.0078125 */
+
+/* red parameters for a very slow link (e.g., dialup) */
+#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
+ /* q_weight = 0.015625 */
+
+/* fixed-point uses 12-bit decimal places */
+#define FP_SHIFT 12 /* fixed-point shift */
+
+/* red parameters for drop probability */
+#define INV_P_MAX 10 /* inverse of max drop probability */
+#define TH_MIN 5 /* min threshold */
+#define TH_MAX 15 /* max threshold */
+
+#define RIO_LIMIT 60 /* default max queue lenght */
+#define RIO_STATS /* collect statistics */
+
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a)->tv_usec - (b)->tv_usec; \
+ if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \
+ if (xxs < 0) { \
+ delta = 60000000; \
+ } else if (xxs > 4) { \
+ if (xxs > 60) \
+ delta = 60000000; \
+ else \
+ delta += xxs * 1000000; \
+ } else while (xxs > 0) { \
+ delta += 1000000; \
+ xxs--; \
+ } \
+ } \
+}
+
+#ifdef ALTQ3_COMPAT
+/* rio_list keeps all rio_queue_t's allocated. */
+static rio_queue_t *rio_list = NULL;
+#endif
+/* default rio parameter values */
+static struct redparams default_rio_params[RIO_NDROPPREC] = {
+ /* th_min, th_max, inv_pmax */
+ { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
+ { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
+ { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */
+};
+
+/* internal function prototypes */
+static int dscp2index(u_int8_t);
+#ifdef ALTQ3_COMPAT
+static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+static struct mbuf *rio_dequeue(struct ifaltq *, int);
+static int rio_request(struct ifaltq *, int, void *);
+static int rio_detach(rio_queue_t *);
+
+/*
+ * rio device interface
+ */
+altqdev_decl(rio);
+
+#endif /* ALTQ3_COMPAT */
+
+rio_t *
+rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
+{
+ rio_t *rp;
+ int w, i;
+ int npkts_per_sec;
+
+ MALLOC(rp, rio_t *, sizeof(rio_t), M_DEVBUF, M_WAITOK);
+ if (rp == NULL)
+ return (NULL);
+ bzero(rp, sizeof(rio_t));
+
+ rp->rio_flags = flags;
+ if (pkttime == 0)
+ /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
+ rp->rio_pkttime = 800;
+ else
+ rp->rio_pkttime = pkttime;
+
+ if (weight != 0)
+ rp->rio_weight = weight;
+ else {
+ /* use default */
+ rp->rio_weight = W_WEIGHT;
+
+ /* when the link is very slow, adjust red parameters */
+ npkts_per_sec = 1000000 / rp->rio_pkttime;
+ if (npkts_per_sec < 50) {
+ /* up to about 400Kbps */
+ rp->rio_weight = W_WEIGHT_2;
+ } else if (npkts_per_sec < 300) {
+ /* up to about 2.4Mbps */
+ rp->rio_weight = W_WEIGHT_1;
+ }
+ }
+
+ /* calculate wshift. weight must be power of 2 */
+ w = rp->rio_weight;
+ for (i = 0; w > 1; i++)
+ w = w >> 1;
+ rp->rio_wshift = i;
+ w = 1 << rp->rio_wshift;
+ if (w != rp->rio_weight) {
+ printf("invalid weight value %d for red! use %d\n",
+ rp->rio_weight, w);
+ rp->rio_weight = w;
+ }
+
+ /* allocate weight table */
+ rp->rio_wtab = wtab_alloc(rp->rio_weight);
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ struct dropprec_state *prec = &rp->rio_precstate[i];
+
+ prec->avg = 0;
+ prec->idle = 1;
+
+ if (params == NULL || params[i].inv_pmax == 0)
+ prec->inv_pmax = default_rio_params[i].inv_pmax;
+ else
+ prec->inv_pmax = params[i].inv_pmax;
+ if (params == NULL || params[i].th_min == 0)
+ prec->th_min = default_rio_params[i].th_min;
+ else
+ prec->th_min = params[i].th_min;
+ if (params == NULL || params[i].th_max == 0)
+ prec->th_max = default_rio_params[i].th_max;
+ else
+ prec->th_max = params[i].th_max;
+
+ /*
+ * th_min_s and th_max_s are scaled versions of th_min
+ * and th_max to be compared with avg.
+ */
+ prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
+ prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
+
+ /*
+ * precompute probability denominator
+ * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
+ */
+ prec->probd = (2 * (prec->th_max - prec->th_min)
+ * prec->inv_pmax) << FP_SHIFT;
+
+ microtime(&prec->last);
+ }
+
+ return (rp);
+}
+
+void
+rio_destroy(rio_t *rp)
+{
+ wtab_destroy(rp->rio_wtab);
+ FREE(rp, M_DEVBUF);
+}
+
+void
+rio_getstats(rio_t *rp, struct redstats *sp)
+{
+ int i;
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
+ sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
+ sp++;
+ }
+}
+
+#if (RIO_NDROPPREC == 3)
+/*
+ * internally, a drop precedence value is converted to an index
+ * starting from 0.
+ */
+static int
+dscp2index(u_int8_t dscp)
+{
+ int dpindex = dscp & AF_DROPPRECMASK;
+
+ if (dpindex == 0)
+ return (0);
+ return ((dpindex >> 3) - 1);
+}
+#endif
+
+#if 1
+/*
+ * kludge: when a packet is dequeued, we need to know its drop precedence
+ * in order to keep the queue length of each drop precedence.
+ * use m_pkthdr.rcvif to pass this info.
+ */
+#define RIOM_SET_PRECINDEX(m, idx) \
+ do { (m)->m_pkthdr.rcvif = (struct ifnet *)((long)(idx)); } while (0)
+#define RIOM_GET_PRECINDEX(m) \
+ ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
+ (m)->m_pkthdr.rcvif = NULL; idx; })
+#endif
+
+int
+rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
+ struct altq_pktattr *pktattr)
+{
+ int avg, droptype;
+ u_int8_t dsfield, odsfield;
+ int dpindex, i, n, t;
+ struct timeval now;
+ struct dropprec_state *prec;
+
+ dsfield = odsfield = read_dsfield(m, pktattr);
+ dpindex = dscp2index(dsfield);
+
+ /*
+ * update avg of the precedence states whose drop precedence
+ * is larger than or equal to the drop precedence of the packet
+ */
+ now.tv_sec = 0;
+ for (i = dpindex; i < RIO_NDROPPREC; i++) {
+ prec = &rp->rio_precstate[i];
+ avg = prec->avg;
+ if (prec->idle) {
+ prec->idle = 0;
+ if (now.tv_sec == 0)
+ microtime(&now);
+ t = (now.tv_sec - prec->last.tv_sec);
+ if (t > 60)
+ avg = 0;
+ else {
+ t = t * 1000000 +
+ (now.tv_usec - prec->last.tv_usec);
+ n = t / rp->rio_pkttime;
+ /* calculate (avg = (1 - Wq)^n * avg) */
+ if (n > 0)
+ avg = (avg >> FP_SHIFT) *
+ pow_w(rp->rio_wtab, n);
+ }
+ }
+
+ /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
+ avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
+ prec->avg = avg; /* save the new value */
+ /*
+ * count keeps a tally of arriving traffic that has not
+ * been dropped.
+ */
+ prec->count++;
+ }
+
+ prec = &rp->rio_precstate[dpindex];
+ avg = prec->avg;
+
+ /* see if we drop early */
+ droptype = DTYPE_NODROP;
+ if (avg >= prec->th_min_s && prec->qlen > 1) {
+ if (avg >= prec->th_max_s) {
+ /* avg >= th_max: forced drop */
+ droptype = DTYPE_FORCED;
+ } else if (prec->old == 0) {
+ /* first exceeds th_min */
+ prec->count = 1;
+ prec->old = 1;
+ } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
+ prec->probd, prec->count)) {
+ /* unforced drop by red */
+ droptype = DTYPE_EARLY;
+ }
+ } else {
+ /* avg < th_min */
+ prec->old = 0;
+ }
+
+ /*
+ * if the queue length hits the hard limit, it's a forced drop.
+ */
+ if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
+ droptype = DTYPE_FORCED;
+
+ if (droptype != DTYPE_NODROP) {
+ /* always drop incoming packet (as opposed to randomdrop) */
+ for (i = dpindex; i < RIO_NDROPPREC; i++)
+ rp->rio_precstate[i].count = 0;
+#ifdef RIO_STATS
+ if (droptype == DTYPE_EARLY)
+ rp->q_stats[dpindex].drop_unforced++;
+ else
+ rp->q_stats[dpindex].drop_forced++;
+ PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
+#endif
+ m_freem(m);
+ return (-1);
+ }
+
+ for (i = dpindex; i < RIO_NDROPPREC; i++)
+ rp->rio_precstate[i].qlen++;
+
+ /* save drop precedence index in mbuf hdr */
+ RIOM_SET_PRECINDEX(m, dpindex);
+
+ if (rp->rio_flags & RIOF_CLEARDSCP)
+ dsfield &= ~DSCP_MASK;
+
+ if (dsfield != odsfield)
+ write_dsfield(m, pktattr, dsfield);
+
+ _addq(q, m);
+
+#ifdef RIO_STATS
+ PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
+#endif
+ return (0);
+}
+
+struct mbuf *
+rio_getq(rio_t *rp, class_queue_t *q)
+{
+ struct mbuf *m;
+ int dpindex, i;
+
+ if ((m = _getq(q)) == NULL)
+ return NULL;
+
+ dpindex = RIOM_GET_PRECINDEX(m);
+ for (i = dpindex; i < RIO_NDROPPREC; i++) {
+ if (--rp->rio_precstate[i].qlen == 0) {
+ if (rp->rio_precstate[i].idle == 0) {
+ rp->rio_precstate[i].idle = 1;
+ microtime(&rp->rio_precstate[i].last);
+ }
+ }
+ }
+ return (m);
+}
+
+#ifdef ALTQ3_COMPAT
+int
+rioopen(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ /* everything will be done when the queueing scheme is attached. */
+ return 0;
+}
+
+int
+rioclose(dev, flag, fmt, p)
+ dev_t dev;
+ int flag, fmt;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ rio_queue_t *rqp;
+ int err, error = 0;
+
+ while ((rqp = rio_list) != NULL) {
+ /* destroy all */
+ err = rio_detach(rqp);
+ if (err != 0 && error == 0)
+ error = err;
+ }
+
+ return error;
+}
+
+int
+rioioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ ioctlcmd_t cmd;
+ caddr_t addr;
+ int flag;
+#if (__FreeBSD_version > 500000)
+ struct thread *p;
+#else
+ struct proc *p;
+#endif
+{
+ rio_queue_t *rqp;
+ struct rio_interface *ifacep;
+ struct ifnet *ifp;
+ int error = 0;
+
+ /* check super-user privilege */
+ switch (cmd) {
+ case RIO_GETSTATS:
+ break;
+ default:
+#if (__FreeBSD_version > 400000)
+ if ((error = suser(p)) != 0)
+ return (error);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+ break;
+ }
+
+ switch (cmd) {
+
+ case RIO_ENABLE:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_enable(rqp->rq_ifq);
+ break;
+
+ case RIO_DISABLE:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = altq_disable(rqp->rq_ifq);
+ break;
+
+ case RIO_IF_ATTACH:
+ ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
+ if (ifp == NULL) {
+ error = ENXIO;
+ break;
+ }
+
+ /* allocate and initialize rio_queue_t */
+ MALLOC(rqp, rio_queue_t *, sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
+ if (rqp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp, sizeof(rio_queue_t));
+
+ MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (rqp->rq_q == NULL) {
+ FREE(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+ bzero(rqp->rq_q, sizeof(class_queue_t));
+
+ rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
+ if (rqp->rq_rio == NULL) {
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ error = ENOMEM;
+ break;
+ }
+
+ rqp->rq_ifq = &ifp->if_snd;
+ qtail(rqp->rq_q) = NULL;
+ qlen(rqp->rq_q) = 0;
+ qlimit(rqp->rq_q) = RIO_LIMIT;
+ qtype(rqp->rq_q) = Q_RIO;
+
+ /*
+ * set RIO to this ifnet structure.
+ */
+ error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
+ rio_enqueue, rio_dequeue, rio_request,
+ NULL, NULL);
+ if (error) {
+ rio_destroy(rqp->rq_rio);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ break;
+ }
+
+ /* add this state to the rio list */
+ rqp->rq_next = rio_list;
+ rio_list = rqp;
+ break;
+
+ case RIO_IF_DETACH:
+ ifacep = (struct rio_interface *)addr;
+ if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+ error = rio_detach(rqp);
+ break;
+
+ case RIO_GETSTATS:
+ do {
+ struct rio_stats *q_stats;
+ rio_t *rp;
+ int i;
+
+ q_stats = (struct rio_stats *)addr;
+ if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
+ ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ rp = rqp->rq_rio;
+
+ q_stats->q_limit = qlimit(rqp->rq_q);
+ q_stats->weight = rp->rio_weight;
+ q_stats->flags = rp->rio_flags;
+
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ q_stats->q_len[i] = rp->rio_precstate[i].qlen;
+ bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
+ sizeof(struct redstats));
+ q_stats->q_stats[i].q_avg =
+ rp->rio_precstate[i].avg >> rp->rio_wshift;
+
+ q_stats->q_params[i].inv_pmax
+ = rp->rio_precstate[i].inv_pmax;
+ q_stats->q_params[i].th_min
+ = rp->rio_precstate[i].th_min;
+ q_stats->q_params[i].th_max
+ = rp->rio_precstate[i].th_max;
+ }
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RIO_CONFIG:
+ do {
+ struct rio_conf *fc;
+ rio_t *new;
+ int s, limit, i;
+
+ fc = (struct rio_conf *)addr;
+ if ((rqp = altq_lookup(fc->iface.rio_ifname,
+ ALTQT_RIO)) == NULL) {
+ error = EBADF;
+ break;
+ }
+
+ new = rio_alloc(fc->rio_weight, &fc->q_params[0],
+ fc->rio_flags, fc->rio_pkttime);
+ if (new == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ _flushq(rqp->rq_q);
+ limit = fc->rio_limit;
+ if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
+ limit = fc->q_params[RIO_NDROPPREC-1].th_max;
+ qlimit(rqp->rq_q) = limit;
+
+ rio_destroy(rqp->rq_rio);
+ rqp->rq_rio = new;
+
+ splx(s);
+
+ /* write back new values */
+ fc->rio_limit = limit;
+ for (i = 0; i < RIO_NDROPPREC; i++) {
+ fc->q_params[i].inv_pmax =
+ rqp->rq_rio->rio_precstate[i].inv_pmax;
+ fc->q_params[i].th_min =
+ rqp->rq_rio->rio_precstate[i].th_min;
+ fc->q_params[i].th_max =
+ rqp->rq_rio->rio_precstate[i].th_max;
+ }
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ case RIO_SETDEFAULTS:
+ do {
+ struct redparams *rp;
+ int i;
+
+ rp = (struct redparams *)addr;
+ for (i = 0; i < RIO_NDROPPREC; i++)
+ default_rio_params[i] = rp[i];
+ } while (/*CONSTCOND*/ 0);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+static int
+rio_detach(rqp)
+ rio_queue_t *rqp;
+{
+ rio_queue_t *tmp;
+ int error = 0;
+
+ if (ALTQ_IS_ENABLED(rqp->rq_ifq))
+ altq_disable(rqp->rq_ifq);
+
+ if ((error = altq_detach(rqp->rq_ifq)))
+ return (error);
+
+ if (rio_list == rqp)
+ rio_list = rqp->rq_next;
+ else {
+ for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
+ if (tmp->rq_next == rqp) {
+ tmp->rq_next = rqp->rq_next;
+ break;
+ }
+ if (tmp == NULL)
+ printf("rio_detach: no state found in rio_list!\n");
+ }
+
+ rio_destroy(rqp->rq_rio);
+ FREE(rqp->rq_q, M_DEVBUF);
+ FREE(rqp, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * rio support routines
+ */
+static int
+rio_request(ifq, req, arg)
+ struct ifaltq *ifq;
+ int req;
+ void *arg;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+
+ switch (req) {
+ case ALTRQ_PURGE:
+ _flushq(rqp->rq_q);
+ if (ALTQ_IS_ENABLED(ifq))
+ ifq->ifq_len = 0;
+ break;
+ }
+ return (0);
+}
+
+/*
+ * enqueue routine:
+ *
+ * returns: 0 when successfully queued.
+ * ENOBUFS when drop occurs.
+ */
+static int
+rio_enqueue(ifq, m, pktattr)
+ struct ifaltq *ifq;
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+ int error = 0;
+
+ if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
+ ifq->ifq_len++;
+ else
+ error = ENOBUFS;
+ return error;
+}
+
+/*
+ * dequeue routine:
+ * must be called in splimp.
+ *
+ * returns: mbuf dequeued.
+ * NULL when no packet is available in the queue.
+ */
+
+static struct mbuf *
+rio_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
+ struct mbuf *m = NULL;
+
+ if (op == ALTDQ_POLL)
+ return qhead(rqp->rq_q);
+
+ m = rio_getq(rqp->rq_rio, rqp->rq_q);
+ if (m != NULL)
+ ifq->ifq_len--;
+ return m;
+}
+
+#ifdef KLD_MODULE
+
+static struct altqsw rio_sw =
+ {"rio", rioopen, rioclose, rioioctl};
+
+ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
+MODULE_VERSION(altq_rio, 1);
+MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
+
+#endif /* KLD_MODULE */
+#endif /* ALTQ3_COMPAT */
+
+#endif /* ALTQ_RIO */
diff --git a/sys/contrib/altq/altq/altq_rio.h b/sys/contrib/altq/altq/altq_rio.h
new file mode 100644
index 000000000000..83210f235e76
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_rio.h
@@ -0,0 +1,144 @@
+/* $KAME: altq_rio.h,v 1.9 2003/07/10 12:07:49 kjc Exp $ */
+
+/*
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RIO_H_
+#define _ALTQ_ALTQ_RIO_H_
+
+#include <altq/altq_classq.h>
+
+/*
+ * RIO: RED with IN/OUT bit
+ * (extended to support more than 2 drop precedence values)
+ */
+#define RIO_NDROPPREC 3 /* number of drop precedence values */
+
+#ifdef ALTQ3_COMPAT
+struct rio_interface {
+ char rio_ifname[IFNAMSIZ];
+};
+
+struct rio_stats {
+ struct rio_interface iface;
+ int q_len[RIO_NDROPPREC];
+ struct redstats q_stats[RIO_NDROPPREC];
+
+ /* static red parameters */
+ int q_limit;
+ int weight;
+ int flags;
+ struct redparams q_params[RIO_NDROPPREC];
+};
+
+struct rio_conf {
+ struct rio_interface iface;
+ struct redparams q_params[RIO_NDROPPREC];
+ int rio_weight; /* weight for EWMA */
+ int rio_limit; /* max queue length */
+ int rio_pkttime; /* average packet time in usec */
+ int rio_flags; /* see below */
+};
+#endif /* ALTQ3_COMPAT */
+
+/* rio flags */
+#define RIOF_ECN4 0x01 /* use packet marking for IPv4 packets */
+#define RIOF_ECN6 0x02 /* use packet marking for IPv6 packets */
+#define RIOF_ECN (RIOF_ECN4 | RIOF_ECN6)
+#define RIOF_CLEARDSCP 0x200 /* clear diffserv codepoint */
+
+#ifdef ALTQ3_COMPAT
+/*
+ * IOCTLs for RIO
+ */
+#define RIO_IF_ATTACH _IOW('Q', 1, struct rio_interface)
+#define RIO_IF_DETACH _IOW('Q', 2, struct rio_interface)
+#define RIO_ENABLE _IOW('Q', 3, struct rio_interface)
+#define RIO_DISABLE _IOW('Q', 4, struct rio_interface)
+#define RIO_CONFIG _IOWR('Q', 6, struct rio_conf)
+#define RIO_GETSTATS _IOWR('Q', 12, struct rio_stats)
+#define RIO_SETDEFAULTS _IOW('Q', 30, struct redparams[RIO_NDROPPREC])
+#endif /* ALTQ3_COMPAT */
+
+#ifdef _KERNEL
+
+typedef struct rio {
+ /* per drop precedence structure */
+ struct dropprec_state {
+ /* red parameters */
+ int inv_pmax; /* inverse of max drop probability */
+ int th_min; /* red min threshold */
+ int th_max; /* red max threshold */
+
+ /* variables for internal use */
+ int th_min_s; /* th_min scaled by avgshift */
+ int th_max_s; /* th_max scaled by avgshift */
+ int probd; /* drop probability denominator */
+
+ int qlen; /* queue length */
+ int avg; /* (scaled) queue length average */
+ int count; /* packet count since the last dropped/
+ marked packet */
+ int idle; /* queue was empty */
+ int old; /* avg is above th_min */
+ struct timeval last; /* timestamp when queue becomes idle */
+ } rio_precstate[RIO_NDROPPREC];
+
+ int rio_wshift; /* log(red_weight) */
+ int rio_weight; /* weight for EWMA */
+ struct wtab *rio_wtab; /* weight table */
+
+ int rio_pkttime; /* average packet time in micro sec
+ used for idle calibration */
+ int rio_flags; /* rio flags */
+
+ u_int8_t rio_codepoint; /* codepoint value to tag packets */
+ u_int8_t rio_codepointmask; /* codepoint mask bits */
+
+ struct redstats q_stats[RIO_NDROPPREC]; /* statistics */
+} rio_t;
+
+#ifdef ALTQ3_COMPAT
+typedef struct rio_queue {
+ struct rio_queue *rq_next; /* next red_state in the list */
+ struct ifaltq *rq_ifq; /* backpointer to ifaltq */
+
+ class_queue_t *rq_q;
+
+ rio_t *rq_rio;
+} rio_queue_t;
+#endif /* ALTQ3_COMPAT */
+
+extern rio_t *rio_alloc(int, struct redparams *, int, int);
+extern void rio_destroy(rio_t *);
+extern void rio_getstats(rio_t *, struct redstats *);
+extern int rio_addq(rio_t *, class_queue_t *, struct mbuf *,
+ struct altq_pktattr *);
+extern struct mbuf *rio_getq(rio_t *, class_queue_t *);
+
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_ALTQ_RIO_H_ */
diff --git a/sys/contrib/altq/altq/altq_rmclass.c b/sys/contrib/altq/altq/altq_rmclass.c
new file mode 100644
index 000000000000..70f7926c325c
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_rmclass.c
@@ -0,0 +1,1832 @@
+/* $KAME: altq_rmclass.c,v 1.18 2003/11/06 06:32:53 kjc Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * LBL code modified by speer@eng.sun.com, May 1977.
+ * For questions and/or comments, please send mail to cbq@ee.lbl.gov
+ */
+
+#ident "@(#)rm_class.c 1.48 97/12/05 SMI"
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/time.h>
+#ifdef ALTQ3_COMPAT
+#include <sys/kernel.h>
+#endif
+
+#include <net/if.h>
+#ifdef ALTQ3_COMPAT
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#endif
+
+#include <altq/altq.h>
+#include <altq/altq_rmclass.h>
+#include <altq/altq_rmclass_debug.h>
+#include <altq/altq_red.h>
+#include <altq/altq_rio.h>
+
+/*
+ * Local Macros
+ */
+
+#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
+
+/*
+ * Local routines.
+ */
+
+static int rmc_satisfied(struct rm_class *, struct timeval *);
+static void rmc_wrr_set_weights(struct rm_ifdat *);
+static void rmc_depth_compute(struct rm_class *);
+static void rmc_depth_recompute(rm_class_t *);
+
+static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
+static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
+
+static int _rmc_addq(rm_class_t *, mbuf_t *);
+static void _rmc_dropq(rm_class_t *);
+static mbuf_t *_rmc_getq(rm_class_t *);
+static mbuf_t *_rmc_pollq(rm_class_t *);
+
+static int rmc_under_limit(struct rm_class *, struct timeval *);
+static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
+static void rmc_drop_action(struct rm_class *);
+static void rmc_restart(struct rm_class *);
+static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
+
+#define BORROW_OFFTIME
+/*
+ * BORROW_OFFTIME (experimental):
+ * borrow the offtime of the class borrowing from.
+ * the reason is that when its own offtime is set, the class is unable
+ * to borrow much, especially when cutoff is taking effect.
+ * but when the borrowed class is overloaded (advidle is close to minidle),
+ * use the borrowing class's offtime to avoid overload.
+ */
+#define ADJUST_CUTOFF
+/*
+ * ADJUST_CUTOFF (experimental):
+ * if no underlimit class is found due to cutoff, increase cutoff and
+ * retry the scheduling loop.
+ * also, don't invoke delay_actions while cutoff is taking effect,
+ * since a sleeping class won't have a chance to be scheduled in the
+ * next loop.
+ *
+ * now heuristics for setting the top-level variable (cutoff_) becomes:
+ * 1. if a packet arrives for a not-overlimit class, set cutoff
+ * to the depth of the class.
+ * 2. if cutoff is i, and a packet arrives for an overlimit class
+ * with an underlimit ancestor at a lower level than i (say j),
+ * then set cutoff to j.
+ * 3. at scheduling a packet, if there is no underlimit class
+ * due to the current cutoff level, increase cutoff by 1 and
+ * then try to schedule again.
+ */
+
+/*
+ * rm_class_t *
+ * rmc_newclass(...) - Create a new resource management class at priority
+ * 'pri' on the interface given by 'ifd'.
+ *
+ * nsecPerByte is the data rate of the interface in nanoseconds/byte.
+ * E.g., 800 for a 10Mb/s ethernet. If the class gets less
+ * than 100% of the bandwidth, this number should be the
+ * 'effective' rate for the class. Let f be the
+ * bandwidth fraction allocated to this class, and let
+ * nsPerByte be the data rate of the output link in
+ * nanoseconds/byte. Then nsecPerByte is set to
+ * nsPerByte / f. E.g., 1600 (= 800 / .5)
+ * for a class that gets 50% of an ethernet's bandwidth.
+ *
+ * action the routine to call when the class is over limit.
+ *
+ * maxq max allowable queue size for class (in packets).
+ *
+ * parent parent class pointer.
+ *
+ * borrow class to borrow from (should be either 'parent' or null).
+ *
+ * maxidle max value allowed for class 'idle' time estimate (this
+ * parameter determines how large an initial burst of packets
+ * can be before overlimit action is invoked.
+ *
+ * offtime how long 'delay' action will delay when class goes over
+ * limit (this parameter determines the steady-state burst
+ * size when a class is running over its limit).
+ *
+ * Maxidle and offtime have to be computed from the following: If the
+ * average packet size is s, the bandwidth fraction allocated to this
+ * class is f, we want to allow b packet bursts, and the gain of the
+ * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
+ *
+ * ptime = s * nsPerByte * (1 - f) / f
+ * maxidle = ptime * (1 - g^b) / g^b
+ * minidle = -ptime * (1 / (f - 1))
+ * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
+ *
+ * Operationally, it's convenient to specify maxidle & offtime in units
+ * independent of the link bandwidth so the maxidle & offtime passed to
+ * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
+ * (The constant factor is a scale factor needed to make the parameters
+ * integers. This scaling also means that the 'unscaled' values of
+ * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
+ * not nanoseconds.) Also note that the 'idle' filter computation keeps
+ * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
+ * maxidle also must be scaled upward by this value. Thus, the passed
+ * values for maxidle and offtime can be computed as follows:
+ *
+ * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
+ * offtime = offtime * 8 / (1000 * nsecPerByte)
+ *
+ * When USE_HRTIME is employed, then maxidle and offtime become:
+ * maxidle = maxilde * (8.0 / nsecPerByte);
+ * offtime = offtime * (8.0 / nsecPerByte);
+ */
+struct rm_class *
+rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
+ void (*action)(rm_class_t *, rm_class_t *), int maxq,
+ struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
+ int minidle, u_int offtime, int pktsize, int flags)
+{
+ struct rm_class *cl;
+ struct rm_class *peer;
+ int s;
+
+ if (pri >= RM_MAXPRIO)
+ return (NULL);
+#ifndef ALTQ_RED
+ if (flags & RMCF_RED) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: RED not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+#ifndef ALTQ_RIO
+ if (flags & RMCF_RIO) {
+#ifdef ALTQ_DEBUG
+ printf("rmc_newclass: RIO not configured for CBQ!\n");
+#endif
+ return (NULL);
+ }
+#endif
+
+ MALLOC(cl, struct rm_class *, sizeof(struct rm_class),
+ M_DEVBUF, M_WAITOK);
+ if (cl == NULL)
+ return (NULL);
+ bzero(cl, sizeof(struct rm_class));
+ CALLOUT_INIT(&cl->callout_);
+ MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t),
+ M_DEVBUF, M_WAITOK);
+ if (cl->q_ == NULL) {
+ FREE(cl, M_DEVBUF);
+ return (NULL);
+ }
+ bzero(cl->q_, sizeof(class_queue_t));
+
+ /*
+ * Class initialization.
+ */
+ cl->children_ = NULL;
+ cl->parent_ = parent;
+ cl->borrow_ = borrow;
+ cl->leaf_ = 1;
+ cl->ifdat_ = ifd;
+ cl->pri_ = pri;
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->depth_ = 0;
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+ qtype(cl->q_) = Q_DROPHEAD;
+ qlen(cl->q_) = 0;
+ cl->flags_ = flags;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+ cl->overlimit = action;
+
+#ifdef ALTQ_RED
+ if (flags & (RMCF_RED|RMCF_RIO)) {
+ int red_flags, red_pkttime;
+
+ red_flags = 0;
+ if (flags & RMCF_ECN)
+ red_flags |= REDF_ECN;
+ if (flags & RMCF_FLOWVALVE)
+ red_flags |= REDF_FLOWVALVE;
+#ifdef ALTQ_RIO
+ if (flags & RMCF_CLEARDSCP)
+ red_flags |= RIOF_CLEARDSCP;
+#endif
+ red_pkttime = nsecPerByte * pktsize / 1000;
+
+ if (flags & RMCF_RED) {
+ cl->red_ = red_alloc(0, 0,
+ qlimit(cl->q_) * 10/100,
+ qlimit(cl->q_) * 30/100,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RED;
+ }
+#ifdef ALTQ_RIO
+ else {
+ cl->red_ = (red_t *)rio_alloc(0, NULL,
+ red_flags, red_pkttime);
+ if (cl->red_ != NULL)
+ qtype(cl->q_) = Q_RIO;
+ }
+#endif
+ }
+#endif /* ALTQ_RED */
+
+ /*
+ * put the class into the class tree
+ */
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ if ((peer = ifd->active_[pri]) != NULL) {
+ /* find the last class at this pri */
+ cl->peer_ = peer;
+ while (peer->peer_ != ifd->active_[pri])
+ peer = peer->peer_;
+ peer->peer_ = cl;
+ } else {
+ ifd->active_[pri] = cl;
+ cl->peer_ = cl;
+ }
+
+ if (cl->parent_) {
+ cl->next_ = parent->children_;
+ parent->children_ = cl;
+ parent->leaf_ = 0;
+ }
+
+ /*
+ * Compute the depth of this class and its ancestors in the class
+ * hierarchy.
+ */
+ rmc_depth_compute(cl);
+
+ /*
+ * If CBQ's WRR is enabled, then initialize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->num_[pri]++;
+ ifd->alloc_[pri] += cl->allotment_;
+ rmc_wrr_set_weights(ifd);
+ }
+ splx(s);
+ return (cl);
+}
+
+int
+rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
+ int minidle, u_int offtime, int pktsize)
+{
+ struct rm_ifdat *ifd;
+ u_int old_allotment;
+ int s;
+
+ ifd = cl->ifdat_;
+ old_allotment = cl->allotment_;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
+ cl->qthresh_ = 0;
+ cl->ns_per_byte_ = nsecPerByte;
+
+ qlimit(cl->q_) = maxq;
+
+#if 1 /* minidle is also scaled in ALTQ */
+ cl->minidle_ = (minidle * nsecPerByte) / 8;
+ if (cl->minidle_ > 0)
+ cl->minidle_ = 0;
+#else
+ cl->minidle_ = minidle;
+#endif
+ cl->maxidle_ = (maxidle * nsecPerByte) / 8;
+ if (cl->maxidle_ == 0)
+ cl->maxidle_ = 1;
+#if 1 /* offtime is also scaled in ALTQ */
+ cl->avgidle_ = cl->maxidle_;
+ cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
+ if (cl->offtime_ == 0)
+ cl->offtime_ = 1;
+#else
+ cl->avgidle_ = 0;
+ cl->offtime_ = (offtime * nsecPerByte) / 8;
+#endif
+
+ /*
+ * If CBQ's WRR is enabled, then initialize the class WRR state.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
+ rmc_wrr_set_weights(ifd);
+ }
+ splx(s);
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
+ * the appropriate run robin weights for the CBQ weighted round robin
+ * algorithm.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_wrr_set_weights(struct rm_ifdat *ifd)
+{
+ int i;
+ struct rm_class *cl, *clh;
+
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ /*
+ * This is inverted from that of the simulator to
+ * maintain precision.
+ */
+ if (ifd->num_[i] == 0)
+ ifd->M_[i] = 0;
+ else
+ ifd->M_[i] = ifd->alloc_[i] /
+ (ifd->num_[i] * ifd->maxpkt_);
+ /*
+ * Compute the weighted allotment for each class.
+ * This takes the expensive div instruction out
+ * of the main loop for the wrr scheduling path.
+ * These only get recomputed when a class comes or
+ * goes.
+ */
+ if (ifd->active_[i] != NULL) {
+ clh = cl = ifd->active_[i];
+ do {
+ /* safe-guard for slow link or alloc_ == 0 */
+ if (ifd->M_[i] == 0)
+ cl->w_allotment_ = 0;
+ else
+ cl->w_allotment_ = cl->allotment_ /
+ ifd->M_[i];
+ cl = cl->peer_;
+ } while ((cl != NULL) && (cl != clh));
+ }
+ }
+}
+
+int
+rmc_get_weight(struct rm_ifdat *ifd, int pri)
+{
+ if ((pri >= 0) && (pri < RM_MAXPRIO))
+ return (ifd->M_[pri]);
+ else
+ return (0);
+}
+
+/*
+ * static void
+ * rmc_depth_compute(struct rm_class *cl) - This function computes the
+ * appropriate depth of class 'cl' and its ancestors.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_compute(struct rm_class *cl)
+{
+ rm_class_t *t = cl, *p;
+
+ /*
+ * Recompute the depth for the branch of the tree.
+ */
+ while (t != NULL) {
+ p = t->parent_;
+ if (p && (t->depth_ >= p->depth_)) {
+ p->depth_ = t->depth_ + 1;
+ t = p;
+ } else
+ t = NULL;
+ }
+}
+
+/*
+ * static void
+ * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
+ * the depth of the tree after a class has been deleted.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_depth_recompute(rm_class_t *cl)
+{
+#if 1 /* ALTQ */
+ rm_class_t *p, *t;
+
+ p = cl;
+ while (p != NULL) {
+ if ((t = p->children_) == NULL) {
+ p->depth_ = 0;
+ } else {
+ int cdepth = 0;
+
+ while (t != NULL) {
+ if (t->depth_ > cdepth)
+ cdepth = t->depth_;
+ t = t->next_;
+ }
+
+ if (p->depth_ == cdepth + 1)
+ /* no change to this parent */
+ return;
+
+ p->depth_ = cdepth + 1;
+ }
+
+ p = p->parent_;
+ }
+#else
+ rm_class_t *t;
+
+ if (cl->depth_ >= 1) {
+ if (cl->children_ == NULL) {
+ cl->depth_ = 0;
+ } else if ((t = cl->children_) != NULL) {
+ while (t != NULL) {
+ if (t->children_ != NULL)
+ rmc_depth_recompute(t);
+ t = t->next_;
+ }
+ } else
+ rmc_depth_compute(cl);
+ }
+#endif
+}
+
+/*
+ * void
+ * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
+ * function deletes a class from the link-sharing structure and frees
+ * all resources associated with the class.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
+{
+ struct rm_class *p, *head, *previous;
+ int s;
+
+ ASSERT(cl->children_ == NULL);
+
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ /*
+ * Free packets in the packet queue.
+ * XXX - this may not be a desired behavior. Packets should be
+ * re-queued.
+ */
+ rmc_dropall(cl);
+
+ /*
+ * If the class has a parent, then remove the class from the
+ * class from the parent's children chain.
+ */
+ if (cl->parent_ != NULL) {
+ head = cl->parent_->children_;
+ p = previous = head;
+ if (head->next_ == NULL) {
+ ASSERT(head == cl);
+ cl->parent_->children_ = NULL;
+ cl->parent_->leaf_ = 1;
+ } else while (p != NULL) {
+ if (p == cl) {
+ if (cl == head)
+ cl->parent_->children_ = cl->next_;
+ else
+ previous->next_ = cl->next_;
+ cl->next_ = NULL;
+ p = NULL;
+ } else {
+ previous = p;
+ p = p->next_;
+ }
+ }
+ }
+
+ /*
+ * Delete class from class priority peer list.
+ */
+ if ((p = ifd->active_[cl->pri_]) != NULL) {
+ /*
+ * If there is more than one member of this priority
+ * level, then look for class(cl) in the priority level.
+ */
+ if (p != p->peer_) {
+ while (p->peer_ != cl)
+ p = p->peer_;
+ p->peer_ = cl->peer_;
+
+ if (ifd->active_[cl->pri_] == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ } else {
+ ASSERT(p == cl);
+ ifd->active_[cl->pri_] = NULL;
+ }
+ }
+
+ /*
+ * Recompute the WRR weights.
+ */
+ if (ifd->wrr_) {
+ ifd->alloc_[cl->pri_] -= cl->allotment_;
+ ifd->num_[cl->pri_]--;
+ rmc_wrr_set_weights(ifd);
+ }
+
+ /*
+ * Re-compute the depth of the tree.
+ */
+#if 1 /* ALTQ */
+ rmc_depth_recompute(cl->parent_);
+#else
+ rmc_depth_recompute(ifd->root_);
+#endif
+
+ splx(s);
+
+ /*
+ * Free the class structure.
+ */
+ if (cl->red_ != NULL) {
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ rio_destroy((rio_t *)cl->red_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ red_destroy(cl->red_);
+#endif
+ }
+ FREE(cl->q_, M_DEVBUF);
+ FREE(cl, M_DEVBUF);
+}
+
+
+/*
+ * void
+ * rmc_init(...) - Initialize the resource management data structures
+ * associated with the output portion of interface 'ifp'. 'ifd' is
+ * where the structures will be built (for backwards compatibility, the
+ * structures aren't kept in the ifnet struct). 'nsecPerByte'
+ * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
+ * 'restart' is the driver-specific routine that the generic 'delay
+ * until under limit' action will call to restart output. `maxq'
+ * is the queue size of the 'link' & 'default' classes. 'maxqueued'
+ * is the maximum number of packets that the resource management
+ * code will allow to be queued 'downstream' (this is typically 1).
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
+ void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
+ int minidle, u_int offtime, int flags)
+{
+ int i, mtu;
+
+ /*
+ * Initialize the CBQ tracing/debug facility.
+ */
+ CBQTRACEINIT();
+
+ bzero((char *)ifd, sizeof (*ifd));
+ mtu = ifq->altq_ifp->if_mtu;
+ ifd->ifq_ = ifq;
+ ifd->restart = restart;
+ ifd->maxqueued_ = maxqueued;
+ ifd->ns_per_byte_ = nsecPerByte;
+ ifd->maxpkt_ = mtu;
+ ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
+ ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
+#if 1
+ ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
+ if (mtu * nsecPerByte > 10 * 1000000)
+ ifd->maxiftime_ /= 4;
+#endif
+
+ reset_cutoff(ifd);
+ CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
+
+ /*
+ * Initialize the CBQ's WRR state.
+ */
+ for (i = 0; i < RM_MAXPRIO; i++) {
+ ifd->alloc_[i] = 0;
+ ifd->M_[i] = 0;
+ ifd->num_[i] = 0;
+ ifd->na_[i] = 0;
+ ifd->active_[i] = NULL;
+ }
+
+ /*
+ * Initialize current packet state.
+ */
+ ifd->qi_ = 0;
+ ifd->qo_ = 0;
+ for (i = 0; i < RM_MAXQUEUED; i++) {
+ ifd->class_[i] = NULL;
+ ifd->curlen_[i] = 0;
+ ifd->borrowed_[i] = NULL;
+ }
+
+ /*
+ * Create the root class of the link-sharing structure.
+ */
+ if ((ifd->root_ = rmc_newclass(0, ifd,
+ nsecPerByte,
+ rmc_root_overlimit, maxq, 0, 0,
+ maxidle, minidle, offtime,
+ 0, 0)) == NULL) {
+ printf("rmc_init: root class not allocated\n");
+ return ;
+ }
+ ifd->root_->depth_ = 0;
+}
+
+/*
+ * void
+ * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
+ * mbuf 'm' to queue for resource class 'cl'. This routine is called
+ * by a driver's if_output routine. This routine must be called with
+ * output packet completion interrupts locked out (to avoid racing with
+ * rmc_dequeue_next).
+ *
+ * Returns: 0 on successful queueing
+ * -1 when packet drop occurs
+ */
+int
+rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
+{
+ struct timeval now;
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int cpri = cl->pri_;
+ int is_empty = qempty(cl->q_);
+
+ RM_GETTIME(now);
+ if (ifd->cutoff_ > 0) {
+ if (TV_LT(&cl->undertime_, &now)) {
+ if (ifd->cutoff_ > cl->depth_)
+ ifd->cutoff_ = cl->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
+ }
+#if 1 /* ALTQ */
+ else {
+ /*
+ * the class is overlimit. if the class has
+ * underlimit ancestors, set cutoff to the lowest
+ * depth among them.
+ */
+ struct rm_class *borrow = cl->borrow_;
+
+ while (borrow != NULL &&
+ borrow->depth_ < ifd->cutoff_) {
+ if (TV_LT(&borrow->undertime_, &now)) {
+ ifd->cutoff_ = borrow->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
+ break;
+ }
+ borrow = borrow->borrow_;
+ }
+ }
+#else /* !ALTQ */
+ else if ((ifd->cutoff_ > 1) && cl->borrow_) {
+ if (TV_LT(&cl->borrow_->undertime_, &now)) {
+ ifd->cutoff_ = cl->borrow_->depth_;
+ CBQTRACE(rmc_queue_packet, 'ffob',
+ cl->borrow_->depth_);
+ }
+ }
+#endif /* !ALTQ */
+ }
+
+ if (_rmc_addq(cl, m) < 0)
+ /* failed */
+ return (-1);
+
+ if (is_empty) {
+ CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
+ ifd->na_[cpri]++;
+ }
+
+ if (qlen(cl->q_) > qlimit(cl->q_)) {
+ /* note: qlimit can be set to 0 or 1 */
+ rmc_drop_action(cl);
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * void
+ * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
+ * classes to see if there are satified.
+ */
+
+static void
+rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
+{
+ int i;
+ rm_class_t *p, *bp;
+
+ for (i = RM_MAXPRIO - 1; i >= 0; i--) {
+ if ((bp = ifd->active_[i]) != NULL) {
+ p = bp;
+ do {
+ if (!rmc_satisfied(p, now)) {
+ ifd->cutoff_ = p->depth_;
+ return;
+ }
+ p = p->peer_;
+ } while (p != bp);
+ }
+ }
+
+ reset_cutoff(ifd);
+}
+
+/*
+ * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
+ */
+
+static int
+rmc_satisfied(struct rm_class *cl, struct timeval *now)
+{
+ rm_class_t *p;
+
+ if (cl == NULL)
+ return (1);
+ if (TV_LT(now, &cl->undertime_))
+ return (1);
+ if (cl->depth_ == 0) {
+ if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
+ return (0);
+ else
+ return (1);
+ }
+ if (cl->children_ != NULL) {
+ p = cl->children_;
+ while (p != NULL) {
+ if (!rmc_satisfied(p, now))
+ return (0);
+ p = p->next_;
+ }
+ }
+
+ return (1);
+}
+
+/*
+ * Return 1 if class 'cl' is under limit or can borrow from a parent,
+ * 0 if overlimit. As a side-effect, this routine will invoke the
+ * class overlimit action if the class if overlimit.
+ */
+
+static int
+rmc_under_limit(struct rm_class *cl, struct timeval *now)
+{
+ rm_class_t *p = cl;
+ rm_class_t *top;
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ifd->borrowed_[ifd->qi_] = NULL;
+ /*
+ * If cl is the root class, then always return that it is
+ * underlimit. Otherwise, check to see if the class is underlimit.
+ */
+ if (cl->parent_ == NULL)
+ return (1);
+
+ if (cl->sleeping_) {
+ if (TV_LT(now, &cl->undertime_))
+ return (0);
+
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+ return (1);
+ }
+
+ top = NULL;
+ while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
+ if (((cl = cl->borrow_) == NULL) ||
+ (cl->depth_ > ifd->cutoff_)) {
+#ifdef ADJUST_CUTOFF
+ if (cl != NULL)
+ /* cutoff is taking effect, just
+ return false without calling
+ the delay action. */
+ return (0);
+#endif
+#ifdef BORROW_OFFTIME
+ /*
+ * check if the class can borrow offtime too.
+ * borrow offtime from the top of the borrow
+ * chain if the top class is not overloaded.
+ */
+ if (cl != NULL) {
+ /* cutoff is taking effect, use this class as top. */
+ top = cl;
+ CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
+ }
+ if (top != NULL && top->avgidle_ == top->minidle_)
+ top = NULL;
+ p->overtime_ = *now;
+ (p->overlimit)(p, top);
+#else
+ p->overtime_ = *now;
+ (p->overlimit)(p, NULL);
+#endif
+ return (0);
+ }
+ top = cl;
+ }
+
+ if (cl != p)
+ ifd->borrowed_[ifd->qi_] = cl;
+ return (1);
+}
+
+/*
+ * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
+ * Packet-by-packet round robin.
+ *
+ * The heart of the weighted round-robin scheduler, which decides which
+ * class next gets to send a packet. Highest priority first, then
+ * weighted round-robin within priorites.
+ *
+ * Each able-to-send class gets to send until its byte allocation is
+ * exhausted. Thus, the active pointer is only changed after a class has
+ * exhausted its allocation.
+ *
+ * If the scheduler finds no class that is underlimit or able to borrow,
+ * then the first class found that had a nonzero queue and is allowed to
+ * borrow gets to send.
+ */
+
+static mbuf_t *
+_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+ struct rm_class *cl = NULL, *first = NULL;
+ u_int deficit;
+ int cpri;
+ mbuf_t *m;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ if (ifd->efficient_) {
+ /* check if this class is overlimit */
+ if (cl->undertime_.tv_sec != 0 &&
+ rmc_under_limit(cl, &now) == 0)
+ first = cl;
+ }
+ ifd->pollcache_ = NULL;
+ goto _wrr_out;
+ }
+ else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ deficit = 0;
+ /*
+ * Loop through twice for a priority level, if some class
+ * was unable to send a packet the first round because
+ * of the weighted round-robin mechanism.
+ * During the second loop at this level, deficit==2.
+ * (This second loop is not needed if for every class,
+ * "M[cl->pri_])" times "cl->allotment" is greater than
+ * the byte size for the largest packet in the class.)
+ */
+ _wrr_loop:
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
+ cl->bytes_alloc_ += cl->w_allotment_;
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now)) {
+ if (cl->bytes_alloc_ > 0 || deficit > 1)
+ goto _wrr_out;
+
+ /* underlimit but no alloc */
+ deficit = 1;
+#if 1
+ ifd->borrowed_[ifd->qi_] = NULL;
+#endif
+ }
+ else if (first == NULL && cl->borrow_ != NULL)
+ first = cl; /* borrowing candidate */
+ }
+
+ cl->bytes_alloc_ = 0;
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+
+ if (deficit == 1) {
+ /* first loop found an underlimit class with deficit */
+ /* Loop on same priority level, with new deficit. */
+ deficit = 2;
+ goto _wrr_loop;
+ }
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect,
+ * increase cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
+
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _wrr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_wrr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ /*
+ * Update class statistics and link data.
+ */
+ if (cl->bytes_alloc_ > 0)
+ cl->bytes_alloc_ -= m_pktlen(m);
+
+ if ((cl->bytes_alloc_ <= 0) || first == cl)
+ ifd->active_[cl->pri_] = cl->peer_;
+ else
+ ifd->active_[cl->pri_] = cl;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_PPOLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * Dequeue & return next packet from the highest priority class that
+ * has a packet to send & has enough allocation to send it. This
+ * routine is called by a driver whenever it needs a new packet to
+ * output.
+ */
+static mbuf_t *
+_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
+{
+ mbuf_t *m;
+ int cpri;
+ struct rm_class *cl, *first = NULL;
+ struct timeval now;
+
+ RM_GETTIME(now);
+
+ /*
+ * if the driver polls the top of the queue and then removes
+ * the polled packet, we must return the same packet.
+ */
+ if (op == ALTDQ_REMOVE && ifd->pollcache_) {
+ cl = ifd->pollcache_;
+ cpri = cl->pri_;
+ ifd->pollcache_ = NULL;
+ goto _prr_out;
+ } else {
+ /* mode == ALTDQ_POLL || pollcache == NULL */
+ ifd->pollcache_ = NULL;
+ ifd->borrowed_[ifd->qi_] = NULL;
+ }
+#ifdef ADJUST_CUTOFF
+ _again:
+#endif
+ for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
+ if (ifd->na_[cpri] == 0)
+ continue;
+ cl = ifd->active_[cpri];
+ ASSERT(cl != NULL);
+ do {
+ if (!qempty(cl->q_)) {
+ if ((cl->undertime_.tv_sec == 0) ||
+ rmc_under_limit(cl, &now))
+ goto _prr_out;
+ if (first == NULL && cl->borrow_ != NULL)
+ first = cl;
+ }
+ cl = cl->peer_;
+ } while (cl != ifd->active_[cpri]);
+ }
+
+#ifdef ADJUST_CUTOFF
+ /*
+ * no underlimit class found. if cutoff is taking effect, increase
+ * cutoff and try again.
+ */
+ if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
+ ifd->cutoff_++;
+ goto _again;
+ }
+#endif /* ADJUST_CUTOFF */
+ /*
+ * If LINK_EFFICIENCY is turned on, then the first overlimit
+ * class we encounter will send a packet if all the classes
+ * of the link-sharing structure are overlimit.
+ */
+ reset_cutoff(ifd);
+ if (!ifd->efficient_ || first == NULL)
+ return (NULL);
+
+ cl = first;
+ cpri = cl->pri_;
+#if 0 /* too time-consuming for nothing */
+ if (cl->sleeping_)
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+#endif
+ ifd->borrowed_[ifd->qi_] = cl->borrow_;
+ ifd->cutoff_ = cl->borrow_->depth_;
+
+ /*
+ * Deque the packet and do the book keeping...
+ */
+ _prr_out:
+ if (op == ALTDQ_REMOVE) {
+ m = _rmc_getq(cl);
+ if (m == NULL)
+ panic("_rmc_prr_dequeue_next");
+ if (qempty(cl->q_))
+ ifd->na_[cpri]--;
+
+ ifd->active_[cpri] = cl->peer_;
+
+ ifd->class_[ifd->qi_] = cl;
+ ifd->curlen_[ifd->qi_] = m_pktlen(m);
+ ifd->now_[ifd->qi_] = now;
+ ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
+ ifd->queued_++;
+ } else {
+ /* mode == ALTDQ_POLL */
+ m = _rmc_pollq(cl);
+ ifd->pollcache_ = cl;
+ }
+ return (m);
+}
+
+/*
+ * mbuf_t *
+ * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
+ * is invoked by the packet driver to get the next packet to be
+ * dequeued and output on the link. If WRR is enabled, then the
+ * WRR dequeue next routine will determine the next packet to sent.
+ * Otherwise, packet-by-packet round robin is invoked.
+ *
+ * Returns: NULL, if a packet is not available or if all
+ * classes are overlimit.
+ *
+ * Otherwise, Pointer to the next packet.
+ */
+
+mbuf_t *
+rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
+{
+ if (ifd->queued_ >= ifd->maxqueued_)
+ return (NULL);
+ else if (ifd->wrr_)
+ return (_rmc_wrr_dequeue_next(ifd, mode));
+ else
+ return (_rmc_prr_dequeue_next(ifd, mode));
+}
+
+/*
+ * Update the utilization estimate for the packet that just completed.
+ * The packet's class & the parent(s) of that class all get their
+ * estimators updated. This routine is called by the driver's output-
+ * packet-completion interrupt service routine.
+ */
+
+/*
+ * a macro to approximate "divide by 1000" that gives 0.000999,
+ * if a value has enough effective digits.
+ * (on pentium, mul takes 9 cycles but div takes 46!)
+ */
+#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
+void
+rmc_update_class_util(struct rm_ifdat *ifd)
+{
+ int idle, avgidle, pktlen;
+ int pkt_time, tidle;
+ rm_class_t *cl, *borrowed;
+ rm_class_t *borrows;
+ struct timeval *nowp;
+
+ /*
+ * Get the most recent completed class.
+ */
+ if ((cl = ifd->class_[ifd->qo_]) == NULL)
+ return;
+
+ pktlen = ifd->curlen_[ifd->qo_];
+ borrowed = ifd->borrowed_[ifd->qo_];
+ borrows = borrowed;
+
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+
+ /*
+ * Run estimator on class and its ancestors.
+ */
+ /*
+ * rm_update_class_util is designed to be called when the
+ * transfer is completed from a xmit complete interrupt,
+ * but most drivers don't implement an upcall for that.
+ * so, just use estimated completion time.
+ * as a result, ifd->qi_ and ifd->qo_ are always synced.
+ */
+ nowp = &ifd->now_[ifd->qo_];
+ /* get pkt_time (for link) in usec */
+#if 1 /* use approximation */
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
+#endif
+#if 1 /* ALTQ4PPP */
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ int iftime;
+
+ /*
+ * make sure the estimated completion time does not go
+ * too far. it can happen when the link layer supports
+ * data compression or the interface speed is set to
+ * a much lower value.
+ */
+ TV_DELTA(&ifd->ifnow_, nowp, iftime);
+ if (iftime+pkt_time < ifd->maxiftime_) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
+ }
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#else
+ if (TV_LT(nowp, &ifd->ifnow_)) {
+ TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
+ } else {
+ TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
+ }
+#endif
+
+ while (cl != NULL) {
+ TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
+ if (idle >= 2000000)
+ /*
+ * this class is idle enough, reset avgidle.
+ * (TV_DELTA returns 2000000 us when delta is large.)
+ */
+ cl->avgidle_ = cl->maxidle_;
+
+ /* get pkt_time (for class) in usec */
+#if 1 /* use approximation */
+ pkt_time = pktlen * cl->ns_per_byte_;
+ pkt_time = NSEC_TO_USEC(pkt_time);
+#else
+ pkt_time = pktlen * cl->ns_per_byte_ / 1000;
+#endif
+ idle -= pkt_time;
+
+ avgidle = cl->avgidle_;
+ avgidle += idle - (avgidle >> RM_FILTER_GAIN);
+ cl->avgidle_ = avgidle;
+
+ /* Are we overlimit ? */
+ if (avgidle <= 0) {
+ CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
+#if 1 /* ALTQ */
+ /*
+ * need some lower bound for avgidle, otherwise
+ * a borrowing class gets unbounded penalty.
+ */
+ if (avgidle < cl->minidle_)
+ avgidle = cl->avgidle_ = cl->minidle_;
+#endif
+ /* set next idle to make avgidle 0 */
+ tidle = pkt_time +
+ (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
+ TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
+ ++cl->stats_.over;
+ } else {
+ cl->avgidle_ =
+ (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
+ cl->undertime_.tv_sec = 0;
+ if (cl->sleeping_) {
+ CALLOUT_STOP(&cl->callout_);
+ cl->sleeping_ = 0;
+ }
+ }
+
+ if (borrows != NULL) {
+ if (borrows != cl)
+ ++cl->stats_.borrows;
+ else
+ borrows = NULL;
+ }
+ cl->last_ = ifd->ifnow_;
+ cl->last_pkttime_ = pkt_time;
+
+#if 1
+ if (cl->parent_ == NULL) {
+ /* take stats of root class */
+ PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
+ }
+#endif
+
+ cl = cl->parent_;
+ }
+
+ /*
+ * Check to see if cutoff needs to set to a new level.
+ */
+ cl = ifd->class_[ifd->qo_];
+ if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
+#if 1 /* ALTQ */
+ if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
+ rmc_tl_satisfied(ifd, nowp);
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#else /* !ALTQ */
+ if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
+ reset_cutoff(ifd);
+#ifdef notdef
+ rmc_tl_satisfied(ifd, &now);
+#endif
+ CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
+ } else {
+ ifd->cutoff_ = borrowed->depth_;
+ CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
+ }
+#endif /* !ALTQ */
+ }
+
+ /*
+ * Release class slot
+ */
+ ifd->borrowed_[ifd->qo_] = NULL;
+ ifd->class_[ifd->qo_] = NULL;
+ ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
+ ifd->queued_--;
+}
+
+/*
+ * void
+ * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
+ * over-limit action routines. These get invoked by rmc_under_limit()
+ * if a class with packets to send if over its bandwidth limit & can't
+ * borrow from a parent class.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_drop_action(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ ASSERT(qlen(cl->q_) > 0);
+ _rmc_dropq(cl);
+ if (qempty(cl->q_))
+ ifd->na_[cl->pri_]--;
+}
+
+void rmc_dropall(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+
+ if (!qempty(cl->q_)) {
+ _flushq(cl->q_);
+
+ ifd->na_[cl->pri_]--;
+ }
+}
+
+#if (__FreeBSD_version > 300000)
+/* hzto() is removed from FreeBSD-3.0 */
+static int hzto(struct timeval *);
+
+static int
+hzto(tv)
+ struct timeval *tv;
+{
+ struct timeval t2;
+
+ getmicrotime(&t2);
+ t2.tv_sec = tv->tv_sec - t2.tv_sec;
+ t2.tv_usec = tv->tv_usec - t2.tv_usec;
+ return (tvtohz(&t2));
+}
+#endif /* __FreeBSD_version > 300000 */
+
+/*
+ * void
+ * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
+ * delay action routine. It is invoked via rmc_under_limit when the
+ * packet is discoverd to be overlimit.
+ *
+ * If the delay action is result of borrow class being overlimit, then
+ * delay for the offtime of the borrowing class that is overlimit.
+ *
+ * Returns: NONE
+ */
+
+void
+rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
+{
+ int delay, t, extradelay;
+
+ cl->stats_.overactions++;
+ TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
+#ifndef BORROW_OFFTIME
+ delay += cl->offtime_;
+#endif
+
+ if (!cl->sleeping_) {
+ CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
+#ifdef BORROW_OFFTIME
+ if (borrow != NULL)
+ extradelay = borrow->offtime_;
+ else
+#endif
+ extradelay = cl->offtime_;
+
+#ifdef ALTQ
+ /*
+ * XXX recalculate suspend time:
+ * current undertime is (tidle + pkt_time) calculated
+ * from the last transmission.
+ * tidle: time required to bring avgidle back to 0
+ * pkt_time: target waiting time for this class
+ * we need to replace pkt_time by offtime
+ */
+ extradelay -= cl->last_pkttime_;
+#endif
+ if (extradelay > 0) {
+ TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
+ delay += extradelay;
+ }
+
+ cl->sleeping_ = 1;
+ cl->stats_.delays++;
+
+ /*
+ * Since packets are phased randomly with respect to the
+ * clock, 1 tick (the next clock tick) can be an arbitrarily
+ * short time so we have to wait for at least two ticks.
+ * NOTE: If there's no other traffic, we need the timer as
+ * a 'backstop' to restart this class.
+ */
+ if (delay > tick * 2) {
+#ifdef __FreeBSD__
+ /* FreeBSD rounds up the tick */
+ t = hzto(&cl->undertime_);
+#else
+ /* other BSDs round down the tick */
+ t = hzto(&cl->undertime_) + 1;
+#endif
+ } else
+ t = 2;
+ CALLOUT_RESET(&cl->callout_, t,
+ (timeout_t *)rmc_restart, (caddr_t)cl);
+ }
+}
+
+/*
+ * void
+ * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
+ * called by the system timer code & is responsible checking if the
+ * class is still sleeping (it might have been restarted as a side
+ * effect of the queue scan on a packet arrival) and, if so, restarting
+ * output for the class. Inspecting the class state & restarting output
+ * require locking the class structure. In general the driver is
+ * responsible for locking but this is the only routine that is not
+ * called directly or indirectly from the interface driver so it has
+ * know about system locking conventions. Under bsd, locking is done
+ * by raising IPL to splimp so that's what's implemented here. On a
+ * different system this would probably need to be changed.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_restart(struct rm_class *cl)
+{
+ struct rm_ifdat *ifd = cl->ifdat_;
+ int s;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ if (cl->sleeping_) {
+ cl->sleeping_ = 0;
+ cl->undertime_.tv_sec = 0;
+
+ if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
+ CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
+ (ifd->restart)(ifd->ifq_);
+ }
+ }
+ splx(s);
+}
+
+/*
+ * void
+ * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
+ * handling routine for the root class of the link sharing structure.
+ *
+ * Returns: NONE
+ */
+
+static void
+rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
+{
+ panic("rmc_root_overlimit");
+}
+
+/*
+ * Packet Queue handling routines. Eventually, this is to localize the
+ * effects on the code whether queues are red queues or droptail
+ * queues.
+ */
+
+static int
+_rmc_addq(rm_class_t *cl, mbuf_t *m)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
+#endif /* ALTQ_RED */
+
+ if (cl->flags_ & RMCF_CLEARDSCP)
+ write_dsfield(m, cl->pktattr_, 0);
+
+ _addq(cl->q_, m);
+ return (0);
+}
+
+/* note: _rmc_dropq is not called for red */
+static void
+_rmc_dropq(rm_class_t *cl)
+{
+ mbuf_t *m;
+
+ if ((m = _getq(cl->q_)) != NULL)
+ m_freem(m);
+}
+
+static mbuf_t *
+_rmc_getq(rm_class_t *cl)
+{
+#ifdef ALTQ_RIO
+ if (q_is_rio(cl->q_))
+ return rio_getq((rio_t *)cl->red_, cl->q_);
+#endif
+#ifdef ALTQ_RED
+ if (q_is_red(cl->q_))
+ return red_getq(cl->red_, cl->q_);
+#endif
+ return _getq(cl->q_);
+}
+
+static mbuf_t *
+_rmc_pollq(rm_class_t *cl)
+{
+ return qhead(cl->q_);
+}
+
+#ifdef CBQ_TRACE
+
+struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
+struct cbqtrace *cbqtrace_ptr = NULL;
+int cbqtrace_count;
+
+/*
+ * DDB hook to trace cbq events:
+ * the last 1024 events are held in a circular buffer.
+ * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
+ */
+void cbqtrace_dump(int);
+static char *rmc_funcname(void *);
+
+static struct rmc_funcs {
+ void *func;
+ char *name;
+} rmc_funcs[] =
+{
+ rmc_init, "rmc_init",
+ rmc_queue_packet, "rmc_queue_packet",
+ rmc_under_limit, "rmc_under_limit",
+ rmc_update_class_util, "rmc_update_class_util",
+ rmc_delay_action, "rmc_delay_action",
+ rmc_restart, "rmc_restart",
+ _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
+ NULL, NULL
+};
+
+static char *rmc_funcname(void *func)
+{
+ struct rmc_funcs *fp;
+
+ for (fp = rmc_funcs; fp->func != NULL; fp++)
+ if (fp->func == func)
+ return (fp->name);
+ return ("unknown");
+}
+
+void cbqtrace_dump(int counter)
+{
+ int i, *p;
+ char *cp;
+
+ counter = counter % NCBQTRACE;
+ p = (int *)&cbqtrace_buffer[counter];
+
+ for (i=0; i<20; i++) {
+ printf("[0x%x] ", *p++);
+ printf("%s: ", rmc_funcname((void *)*p++));
+ cp = (char *)p++;
+ printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
+ printf("%d\n",*p++);
+
+ if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
+ p = (int *)cbqtrace_buffer;
+ }
+}
+#endif /* CBQ_TRACE */
+#endif /* ALTQ_CBQ */
+
+#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
+#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
+
+void
+_addq(class_queue_t *q, mbuf_t *m)
+{
+ mbuf_t *m0;
+
+ if ((m0 = qtail(q)) != NULL)
+ m->m_nextpkt = m0->m_nextpkt;
+ else
+ m0 = m;
+ m0->m_nextpkt = m;
+ qtail(q) = m;
+ qlen(q)++;
+}
+
+mbuf_t *
+_getq(class_queue_t *q)
+{
+ mbuf_t *m, *m0;
+
+ if ((m = qtail(q)) == NULL)
+ return (NULL);
+ if ((m0 = m->m_nextpkt) != m)
+ m->m_nextpkt = m0->m_nextpkt;
+ else {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ }
+ qlen(q)--;
+ m0->m_nextpkt = NULL;
+ return (m0);
+}
+
+/* drop a packet at the tail of the queue */
+mbuf_t *
+_getq_tail(class_queue_t *q)
+{
+ mbuf_t *m, *m0, *prev;
+
+ if ((m = m0 = qtail(q)) == NULL)
+ return NULL;
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else
+ qtail(q) = prev;
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+/* randomly select a packet in the queue */
+mbuf_t *
+_getq_random(class_queue_t *q)
+{
+ struct mbuf *m;
+ int i, n;
+
+ if ((m = qtail(q)) == NULL)
+ return NULL;
+ if (m->m_nextpkt == m) {
+ ASSERT(qlen(q) == 1);
+ qtail(q) = NULL;
+ } else {
+ struct mbuf *prev = NULL;
+
+ n = arc4random() % qlen(q) + 1;
+ for (i = 0; i < n; i++) {
+ prev = m;
+ m = m->m_nextpkt;
+ }
+ prev->m_nextpkt = m->m_nextpkt;
+ if (m == qtail(q))
+ qtail(q) = prev;
+ }
+ qlen(q)--;
+ m->m_nextpkt = NULL;
+ return (m);
+}
+
+void
+_removeq(class_queue_t *q, mbuf_t *m)
+{
+ mbuf_t *m0, *prev;
+
+ m0 = qtail(q);
+ do {
+ prev = m0;
+ m0 = m0->m_nextpkt;
+ } while (m0 != m);
+ prev->m_nextpkt = m->m_nextpkt;
+ if (prev == m)
+ qtail(q) = NULL;
+ else if (qtail(q) == m)
+ qtail(q) = prev;
+ qlen(q)--;
+}
+
+void
+_flushq(class_queue_t *q)
+{
+ mbuf_t *m;
+
+ while ((m = _getq(q)) != NULL)
+ m_freem(m);
+ ASSERT(qlen(q) == 0);
+}
+
+#endif /* !__GNUC__ || ALTQ_DEBUG */
+#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
diff --git a/sys/contrib/altq/altq/altq_rmclass.h b/sys/contrib/altq/altq/altq_rmclass.h
new file mode 100644
index 000000000000..cf0ddf48e20f
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_rmclass.h
@@ -0,0 +1,266 @@
+/* $KAME: altq_rmclass.h,v 1.10 2003/08/20 23:30:23 itojun Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Network Research
+ * Group at Lawrence Berkeley Laboratory.
+ * 4. Neither the name of the University nor of the Laboratory may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_H_
+#define _ALTQ_ALTQ_RMCLASS_H_
+
+#include <altq/altq_classq.h>
+
+/* #pragma ident "@(#)rm_class.h 1.20 97/10/23 SMI" */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RM_MAXPRIO 8 /* Max priority */
+
+#ifdef _KERNEL
+
+typedef struct mbuf mbuf_t;
+typedef struct rm_ifdat rm_ifdat_t;
+typedef struct rm_class rm_class_t;
+
+struct red;
+
+/*
+ * Macros for dealing with time values. We assume all times are
+ * 'timevals'. `microtime' is used to get the best available clock
+ * resolution. If `microtime' *doesn't* return a value that's about
+ * ten times smaller than the average packet time on the fastest
+ * link that will use these routines, a slightly different clock
+ * scheme than this one should be used.
+ * (Bias due to truncation error in this scheme will overestimate utilization
+ * and discriminate against high bandwidth classes. To remove this bias an
+ * integrator needs to be added. The simplest integrator uses a history of
+ * 10 * avg.packet.time / min.tick.time packet completion entries. This is
+ * straight forward to add but we don't want to pay the extra memory
+ * traffic to maintain it if it's not necessary (occasionally a vendor
+ * accidentally builds a workstation with a decent clock - e.g., Sun & HP).)
+ */
+
+#define RM_GETTIME(now) microtime(&now)
+
+#define TV_LT(a, b) (((a)->tv_sec < (b)->tv_sec) || \
+ (((a)->tv_usec < (b)->tv_usec) && ((a)->tv_sec <= (b)->tv_sec)))
+
+#define TV_DELTA(a, b, delta) { \
+ register int xxs; \
+ \
+ delta = (a)->tv_usec - (b)->tv_usec; \
+ if ((xxs = (a)->tv_sec - (b)->tv_sec)) { \
+ switch (xxs) { \
+ default: \
+ /* if (xxs < 0) \
+ printf("rm_class: bogus time values\n"); */ \
+ delta = 0; \
+ /* fall through */ \
+ case 2: \
+ delta += 1000000; \
+ /* fall through */ \
+ case 1: \
+ delta += 1000000; \
+ break; \
+ } \
+ } \
+}
+
+#define TV_ADD_DELTA(a, delta, res) { \
+ register int xxus = (a)->tv_usec + (delta); \
+ \
+ (res)->tv_sec = (a)->tv_sec; \
+ while (xxus >= 1000000) { \
+ ++((res)->tv_sec); \
+ xxus -= 1000000; \
+ } \
+ (res)->tv_usec = xxus; \
+}
+
+#define RM_TIMEOUT 2 /* 1 Clock tick. */
+
+#if 1
+#define RM_MAXQUEUED 1 /* this isn't used in ALTQ/CBQ */
+#else
+#define RM_MAXQUEUED 16 /* Max number of packets downstream of CBQ */
+#endif
+#define RM_MAXQUEUE 64 /* Max queue length */
+#define RM_FILTER_GAIN 5 /* log2 of gain, e.g., 5 => 31/32 */
+#define RM_POWER (1 << RM_FILTER_GAIN)
+#define RM_MAXDEPTH 32
+#define RM_NS_PER_SEC (1000000000)
+
+typedef struct _rm_class_stats_ {
+ u_int handle;
+ u_int depth;
+
+ struct pktcntr xmit_cnt; /* packets sent in this class */
+ struct pktcntr drop_cnt; /* dropped packets */
+ u_int over; /* # times went over limit */
+ u_int borrows; /* # times tried to borrow */
+ u_int overactions; /* # times invoked overlimit action */
+ u_int delays; /* # times invoked delay actions */
+} rm_class_stats_t;
+
+/*
+ * CBQ Class state structure
+ */
+struct rm_class {
+ class_queue_t *q_; /* Queue of packets */
+ rm_ifdat_t *ifdat_;
+ int pri_; /* Class priority. */
+ int depth_; /* Class depth */
+ u_int ns_per_byte_; /* NanoSeconds per byte. */
+ u_int maxrate_; /* Bytes per second for this class. */
+ u_int allotment_; /* Fraction of link bandwidth. */
+ u_int w_allotment_; /* Weighted allotment for WRR */
+ int bytes_alloc_; /* Allocation for round of WRR */
+
+ int avgidle_;
+ int maxidle_;
+ int minidle_;
+ int offtime_;
+ int sleeping_; /* != 0 if delaying */
+ int qthresh_; /* Queue threshold for formal link sharing */
+ int leaf_; /* Note whether leaf class or not.*/
+
+ rm_class_t *children_; /* Children of this class */
+ rm_class_t *next_; /* Next pointer, used if child */
+
+ rm_class_t *peer_; /* Peer class */
+ rm_class_t *borrow_; /* Borrow class */
+ rm_class_t *parent_; /* Parent class */
+
+ void (*overlimit)(struct rm_class *, struct rm_class *);
+ void (*drop)(struct rm_class *); /* Class drop action. */
+
+ struct red *red_; /* RED state pointer */
+ struct altq_pktattr *pktattr_; /* saved hdr used by RED/ECN */
+ int flags_;
+
+ int last_pkttime_; /* saved pkt_time */
+ struct timeval undertime_; /* time can next send */
+ struct timeval last_; /* time last packet sent */
+ struct timeval overtime_;
+ struct callout callout_; /* for timeout() calls */
+
+ rm_class_stats_t stats_; /* Class Statistics */
+};
+
+/*
+ * CBQ Interface state
+ */
+struct rm_ifdat {
+ int queued_; /* # pkts queued downstream */
+ int efficient_; /* Link Efficency bit */
+ int wrr_; /* Enable Weighted Round-Robin */
+ u_long ns_per_byte_; /* Link byte speed. */
+ int maxqueued_; /* Max packets to queue */
+ int maxpkt_; /* Max packet size. */
+ int qi_; /* In/out pointers for downstream */
+ int qo_; /* packets */
+
+ /*
+ * Active class state and WRR state.
+ */
+ rm_class_t *active_[RM_MAXPRIO]; /* Active cl's in each pri */
+ int na_[RM_MAXPRIO]; /* # of active cl's in a pri */
+ int num_[RM_MAXPRIO]; /* # of cl's per pri */
+ int alloc_[RM_MAXPRIO]; /* Byte Allocation */
+ u_long M_[RM_MAXPRIO]; /* WRR weights. */
+
+ /*
+ * Network Interface/Solaris Queue state pointer.
+ */
+ struct ifaltq *ifq_;
+ rm_class_t *default_; /* Default Pkt class, BE */
+ rm_class_t *root_; /* Root Link class. */
+ rm_class_t *ctl_; /* Control Traffic class. */
+ void (*restart)(struct ifaltq *); /* Restart routine. */
+
+ /*
+ * Current packet downstream packet state and dynamic state.
+ */
+ rm_class_t *borrowed_[RM_MAXQUEUED]; /* Class borrowed last */
+ rm_class_t *class_[RM_MAXQUEUED]; /* class sending */
+ int curlen_[RM_MAXQUEUED]; /* Current pktlen */
+ struct timeval now_[RM_MAXQUEUED]; /* Current packet time. */
+ int is_overlimit_[RM_MAXQUEUED];/* Current packet time. */
+
+ int cutoff_; /* Cut-off depth for borrowing */
+
+ struct timeval ifnow_; /* expected xmit completion time */
+#if 1 /* ALTQ4PPP */
+ int maxiftime_; /* max delay inside interface */
+#endif
+ rm_class_t *pollcache_; /* cached rm_class by poll operation */
+};
+
+/* flags for rmc_init and rmc_newclass */
+/* class flags */
+#define RMCF_RED 0x0001
+#define RMCF_ECN 0x0002
+#define RMCF_RIO 0x0004
+#define RMCF_FLOWVALVE 0x0008 /* use flowvalve (aka penalty-box) */
+#define RMCF_CLEARDSCP 0x0010 /* clear diffserv codepoint */
+
+/* flags for rmc_init */
+#define RMCF_WRR 0x0100
+#define RMCF_EFFICIENT 0x0200
+
+#define is_a_parent_class(cl) ((cl)->children_ != NULL)
+
+extern rm_class_t *rmc_newclass(int, struct rm_ifdat *, u_int,
+ void (*)(struct rm_class *, struct rm_class *),
+ int, struct rm_class *, struct rm_class *,
+ u_int, int, u_int, int, int);
+extern void rmc_delete_class(struct rm_ifdat *, struct rm_class *);
+extern int rmc_modclass(struct rm_class *, u_int, int,
+ u_int, int, u_int, int);
+extern void rmc_init(struct ifaltq *, struct rm_ifdat *, u_int,
+ void (*)(struct ifaltq *),
+ int, int, u_int, int, u_int, int);
+extern int rmc_queue_packet(struct rm_class *, mbuf_t *);
+extern mbuf_t *rmc_dequeue_next(struct rm_ifdat *, int);
+extern void rmc_update_class_util(struct rm_ifdat *);
+extern void rmc_delay_action(struct rm_class *, struct rm_class *);
+extern void rmc_dropall(struct rm_class *);
+extern int rmc_get_weight(struct rm_ifdat *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_H_ */
diff --git a/sys/contrib/altq/altq/altq_rmclass_debug.h b/sys/contrib/altq/altq/altq_rmclass_debug.h
new file mode 100644
index 000000000000..8f471b2f9ddc
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_rmclass_debug.h
@@ -0,0 +1,112 @@
+/* $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $ */
+
+/*
+ * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the SMCC Technology
+ * Development Group at Sun Microsystems, Inc.
+ *
+ * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
+ * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE. The software is
+ * provided "as is" without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this software.
+ */
+
+#ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+#define _ALTQ_ALTQ_RMCLASS_DEBUG_H_
+
+/* #pragma ident "@(#)rm_class_debug.h 1.7 98/05/04 SMI" */
+
+/*
+ * Cbq debugging macros
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CBQ_TRACE
+#ifndef NCBQTRACE
+#define NCBQTRACE (16 * 1024)
+#endif
+
+/*
+ * To view the trace output, using adb, type:
+ * adb -k /dev/ksyms /dev/mem <cr>, then type
+ * cbqtrace_count/D to get the count, then type
+ * cbqtrace_buffer,0tcount/Dp4C" "Xn
+ * This will dump the trace buffer from 0 to count.
+ */
+/*
+ * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events
+ * from Nth event in the circular buffer.
+ */
+
+struct cbqtrace {
+ int count;
+ int function; /* address of function */
+ int trace_action; /* descriptive 4 characters */
+ int object; /* object operated on */
+};
+
+extern struct cbqtrace cbqtrace_buffer[];
+extern struct cbqtrace *cbqtrace_ptr;
+extern int cbqtrace_count;
+
+#define CBQTRACEINIT() { \
+ if (cbqtrace_ptr == NULL) \
+ cbqtrace_ptr = cbqtrace_buffer; \
+ else { \
+ cbqtrace_ptr = cbqtrace_buffer; \
+ bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \
+ cbqtrace_count = 0; \
+ } \
+}
+
+#define LOCK_TRACE() splimp()
+#define UNLOCK_TRACE(x) splx(x)
+
+#define CBQTRACE(func, act, obj) { \
+ int __s = LOCK_TRACE(); \
+ int *_p = &cbqtrace_ptr->count; \
+ *_p++ = ++cbqtrace_count; \
+ *_p++ = (int)(func); \
+ *_p++ = (int)(act); \
+ *_p++ = (int)(obj); \
+ if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\
+ cbqtrace_ptr = cbqtrace_buffer; \
+ else \
+ cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \
+ UNLOCK_TRACE(__s); \
+ }
+#else
+
+/* If no tracing, define no-ops */
+#define CBQTRACEINIT()
+#define CBQTRACE(a, b, c)
+
+#endif /* !CBQ_TRACE */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ALTQ_ALTQ_RMCLASS_DEBUG_H_ */
diff --git a/sys/contrib/altq/altq/altq_subr.c b/sys/contrib/altq/altq/altq_subr.c
new file mode 100644
index 000000000000..09482ceaab11
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_subr.c
@@ -0,0 +1,1901 @@
+/* $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+#include "opt_altq.h"
+#if (__FreeBSD__ != 2)
+#include "opt_inet.h"
+#ifdef __FreeBSD__
+#include "opt_inet6.h"
+#endif
+#endif
+#endif /* __FreeBSD__ || __NetBSD__ */
+
+#include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <net/pfvar.h>
+#include <altq/altq.h>
+#ifdef ALTQ3_COMPAT
+#include <altq/altq_conf.h>
+#endif
+
+/* machine dependent clock related includes */
+#ifdef __FreeBSD__
+#include "opt_cpu.h" /* for FreeBSD-2.2.8 to get i586_ctr_freq */
+#include <machine/clock.h>
+#endif
+#if defined(__i386__)
+#include <machine/cpufunc.h> /* for pentium tsc */
+#include <machine/specialreg.h> /* for CPUID_TSC */
+#ifdef __FreeBSD__
+#include <machine/md_var.h> /* for cpu_feature */
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+#include <machine/cpu.h> /* for cpu_feature */
+#endif
+#endif /* __i386__ */
+
+/*
+ * internal function prototypes
+ */
+static void tbr_timeout(void *);
+int (*altq_input)(struct mbuf *, int) = NULL;
+static int tbr_timer = 0; /* token bucket regulator timer */
+static struct callout tbr_callout = CALLOUT_INITIALIZER;
+
+#ifdef ALTQ3_CLFIER_COMPAT
+static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
+#ifdef INET6
+static int extract_ports6(struct mbuf *, struct ip6_hdr *,
+ struct flowinfo_in6 *);
+#endif
+static int apply_filter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+static int apply_ppfilter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+#ifdef INET6
+static int apply_filter6(u_int32_t, struct flow_filter6 *,
+ struct flowinfo_in6 *);
+#endif
+static int apply_tosfilter4(u_int32_t, struct flow_filter *,
+ struct flowinfo_in *);
+static u_long get_filt_handle(struct acc_classifier *, int);
+static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
+static u_int32_t filt2fibmask(struct flow_filter *);
+
+static void ip4f_cache(struct ip *, struct flowinfo_in *);
+static int ip4f_lookup(struct ip *, struct flowinfo_in *);
+static int ip4f_init(void);
+static struct ip4_frag *ip4f_alloc(void);
+static void ip4f_free(struct ip4_frag *);
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * alternate queueing support routines
+ */
+
+/* look up the queue state by the interface name and the queueing type. */
+void *
+altq_lookup(name, type)
+ char *name;
+ int type;
+{
+ struct ifnet *ifp;
+
+ if ((ifp = ifunit(name)) != NULL) {
+ if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
+ return (ifp->if_snd.altq_disc);
+ }
+
+ return NULL;
+}
+
+int
+altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
+ struct ifaltq *ifq;
+ int type;
+ void *discipline;
+ int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+ struct mbuf *(*dequeue)(struct ifaltq *, int);
+ int (*request)(struct ifaltq *, int, void *);
+ void *clfier;
+ void *(*classify)(void *, struct mbuf *, int);
+{
+ if (!ALTQ_IS_READY(ifq))
+ return ENXIO;
+
+#ifdef ALTQ3_COMPAT
+ /*
+ * pfaltq can override the existing discipline, but altq3 cannot.
+ * check these if clfier is not NULL (which implies altq3).
+ */
+ if (clfier != NULL) {
+ if (ALTQ_IS_ENABLED(ifq))
+ return EBUSY;
+ if (ALTQ_IS_ATTACHED(ifq))
+ return EEXIST;
+ }
+#endif
+ ifq->altq_type = type;
+ ifq->altq_disc = discipline;
+ ifq->altq_enqueue = enqueue;
+ ifq->altq_dequeue = dequeue;
+ ifq->altq_request = request;
+ ifq->altq_clfier = clfier;
+ ifq->altq_classify = classify;
+ ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+ altq_module_incref(type);
+#endif
+#endif
+ return 0;
+}
+
+int
+altq_detach(ifq)
+ struct ifaltq *ifq;
+{
+ if (!ALTQ_IS_READY(ifq))
+ return ENXIO;
+ if (ALTQ_IS_ENABLED(ifq))
+ return EBUSY;
+ if (!ALTQ_IS_ATTACHED(ifq))
+ return (0);
+#ifdef ALTQ3_COMPAT
+#ifdef ALTQ_KLD
+ altq_module_declref(ifq->altq_type);
+#endif
+#endif
+
+ ifq->altq_type = ALTQT_NONE;
+ ifq->altq_disc = NULL;
+ ifq->altq_enqueue = NULL;
+ ifq->altq_dequeue = NULL;
+ ifq->altq_request = NULL;
+ ifq->altq_clfier = NULL;
+ ifq->altq_classify = NULL;
+ ifq->altq_flags &= ALTQF_CANTCHANGE;
+ return 0;
+}
+
+int
+altq_enable(ifq)
+ struct ifaltq *ifq;
+{
+ int s;
+
+ if (!ALTQ_IS_READY(ifq))
+ return ENXIO;
+ if (ALTQ_IS_ENABLED(ifq))
+ return 0;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_PURGE(ifq);
+ ASSERT(ifq->ifq_len == 0);
+ ifq->altq_flags |= ALTQF_ENABLED;
+ if (ifq->altq_clfier != NULL)
+ ifq->altq_flags |= ALTQF_CLASSIFY;
+ splx(s);
+
+ return 0;
+}
+
+int
+altq_disable(ifq)
+ struct ifaltq *ifq;
+{
+ int s;
+
+ if (!ALTQ_IS_ENABLED(ifq))
+ return 0;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ IFQ_PURGE(ifq);
+ ASSERT(ifq->ifq_len == 0);
+ ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
+ splx(s);
+ return 0;
+}
+
+#ifdef ALTQ_DEBUG
+void
+altq_assert(file, line, failedexpr)
+ const char *file, *failedexpr;
+ int line;
+{
+ (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
+ failedexpr, file, line);
+ panic("altq assertion");
+ /* NOTREACHED */
+}
+#endif
+
+/*
+ * internal representation of token bucket parameters
+ * rate: byte_per_unittime << 32
+ * (((bits_per_sec) / 8) << 32) / machclk_freq
+ * depth: byte << 32
+ *
+ */
+#define TBR_SHIFT 32
+#define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
+#define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
+
+struct mbuf *
+tbr_dequeue(ifq, op)
+ struct ifaltq *ifq;
+ int op;
+{
+ struct tb_regulator *tbr;
+ struct mbuf *m;
+ int64_t interval;
+ u_int64_t now;
+
+ tbr = ifq->altq_tbr;
+ if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
+ /* if this is a remove after poll, bypass tbr check */
+ } else {
+ /* update token only when it is negative */
+ if (tbr->tbr_token <= 0) {
+ now = read_machclk();
+ interval = now - tbr->tbr_last;
+ if (interval >= tbr->tbr_filluptime)
+ tbr->tbr_token = tbr->tbr_depth;
+ else {
+ tbr->tbr_token += interval * tbr->tbr_rate;
+ if (tbr->tbr_token > tbr->tbr_depth)
+ tbr->tbr_token = tbr->tbr_depth;
+ }
+ tbr->tbr_last = now;
+ }
+ /* if token is still negative, don't allow dequeue */
+ if (tbr->tbr_token <= 0)
+ return (NULL);
+ }
+
+ if (ALTQ_IS_ENABLED(ifq))
+ m = (*ifq->altq_dequeue)(ifq, op);
+ else {
+ if (op == ALTDQ_POLL)
+ IF_POLL(ifq, m);
+ else
+ IF_DEQUEUE(ifq, m);
+ }
+
+ if (m != NULL && op == ALTDQ_REMOVE)
+ tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
+ tbr->tbr_lastop = op;
+ return (m);
+}
+
+/*
+ * set a token bucket regulator.
+ * if the specified rate is zero, the token bucket regulator is deleted.
+ */
+int
+tbr_set(ifq, profile)
+ struct ifaltq *ifq;
+ struct tb_profile *profile;
+{
+ struct tb_regulator *tbr, *otbr;
+
+ if (machclk_freq == 0)
+ init_machclk();
+ if (machclk_freq == 0) {
+ printf("tbr_set: no cpu clock available!\n");
+ return (ENXIO);
+ }
+
+ if (profile->rate == 0) {
+ /* delete this tbr */
+ if ((tbr = ifq->altq_tbr) == NULL)
+ return (ENOENT);
+ ifq->altq_tbr = NULL;
+ FREE(tbr, M_DEVBUF);
+ return (0);
+ }
+
+ MALLOC(tbr, struct tb_regulator *, sizeof(struct tb_regulator),
+ M_DEVBUF, M_WAITOK);
+ if (tbr == NULL)
+ return (ENOMEM);
+ bzero(tbr, sizeof(struct tb_regulator));
+
+ tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
+ tbr->tbr_depth = TBR_SCALE(profile->depth);
+ if (tbr->tbr_rate > 0)
+ tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
+ else
+ tbr->tbr_filluptime = 0xffffffffffffffffLL;
+ tbr->tbr_token = tbr->tbr_depth;
+ tbr->tbr_last = read_machclk();
+ tbr->tbr_lastop = ALTDQ_REMOVE;
+
+ otbr = ifq->altq_tbr;
+ ifq->altq_tbr = tbr; /* set the new tbr */
+
+ if (otbr != NULL)
+ FREE(otbr, M_DEVBUF);
+ else {
+ if (tbr_timer == 0) {
+ CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+ tbr_timer = 1;
+ }
+ }
+ return (0);
+}
+
+/*
+ * tbr_timeout goes through the interface list, and kicks the drivers
+ * if necessary.
+ */
+static void
+tbr_timeout(arg)
+ void *arg;
+{
+ struct ifnet *ifp;
+ int active, s;
+
+ active = 0;
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
+ if (!TBR_IS_ENABLED(&ifp->if_snd))
+ continue;
+ active++;
+ if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL)
+ (*ifp->if_start)(ifp);
+ }
+ splx(s);
+ if (active > 0)
+ CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
+ else
+ tbr_timer = 0; /* don't need tbr_timer anymore */
+#if defined(__alpha__) && !defined(ALTQ_NOPCC)
+ {
+ /*
+ * XXX read out the machine dependent clock once a second
+ * to detect counter wrap-around.
+ */
+ static u_int cnt;
+
+ if (++cnt >= hz) {
+ (void)read_machclk();
+ cnt = 0;
+ }
+ }
+#endif /* __alpha__ && !ALTQ_NOPCC */
+}
+
+/*
+ * get token bucket regulator profile
+ */
+int
+tbr_get(ifq, profile)
+ struct ifaltq *ifq;
+ struct tb_profile *profile;
+{
+ struct tb_regulator *tbr;
+
+ if ((tbr = ifq->altq_tbr) == NULL) {
+ profile->rate = 0;
+ profile->depth = 0;
+ } else {
+ profile->rate =
+ (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
+ profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
+ }
+ return (0);
+}
+
+/*
+ * attach a discipline to the interface. if one already exists, it is
+ * overridden.
+ */
+int
+altq_pfattach(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+ case ALTQT_NONE:
+ break;
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_pfattach(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_pfattach(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * detach a discipline from the interface.
+ * it is possible that the discipline was already overridden by another
+ * discipline.
+ */
+int
+altq_pfdetach(struct pf_altq *a)
+{
+ struct ifnet *ifp;
+ int s, error = 0;
+
+ if ((ifp = ifunit(a->ifname)) == NULL)
+ return (EINVAL);
+
+ /* if this discipline is no longer referenced, just return */
+ if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
+ return (0);
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ error = altq_disable(&ifp->if_snd);
+ if (error == 0)
+ error = altq_detach(&ifp->if_snd);
+ splx(s);
+
+ return (error);
+}
+
+/*
+ * add a discipline or a queue
+ */
+int
+altq_add(struct pf_altq *a)
+{
+ int error = 0;
+
+ if (a->qname[0] != 0)
+ return (altq_add_queue(a));
+
+ if (machclk_freq == 0)
+ init_machclk();
+ if (machclk_freq == 0)
+ panic("altq_add: no cpu clock");
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_add_altq(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_add_altq(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * remove a discipline or a queue
+ */
+int
+altq_remove(struct pf_altq *a)
+{
+ int error = 0;
+
+ if (a->qname[0] != 0)
+ return (altq_remove_queue(a));
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_remove_altq(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_remove_altq(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * add a queue to the discipline
+ */
+int
+altq_add_queue(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_add_queue(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_add_queue(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_add_queue(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * remove a queue from the discipline
+ */
+int
+altq_remove_queue(struct pf_altq *a)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_remove_queue(a);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_remove_queue(a);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_remove_queue(a);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * get queue statistics
+ */
+int
+altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
+{
+ int error = 0;
+
+ switch (a->scheduler) {
+#ifdef ALTQ_CBQ
+ case ALTQT_CBQ:
+ error = cbq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_PRIQ
+ case ALTQT_PRIQ:
+ error = priq_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+#ifdef ALTQ_HFSC
+ case ALTQT_HFSC:
+ error = hfsc_getqstats(a, ubuf, nbytes);
+ break;
+#endif
+ default:
+ error = ENXIO;
+ }
+
+ return (error);
+}
+
+/*
+ * read and write diffserv field in IPv4 or IPv6 header
+ */
+u_int8_t
+read_dsfield(m, pktattr)
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+{
+ struct mbuf *m0;
+ u_int8_t ds_field = 0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return ((u_int8_t)0);
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+ printf("read_dsfield: can't locate header!\n");
+#endif
+ return ((u_int8_t)0);
+ }
+
+ if (pktattr->pattr_af == AF_INET) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+
+ if (ip->ip_v != 4)
+ return ((u_int8_t)0); /* version mismatch! */
+ ds_field = ip->ip_tos;
+ }
+#ifdef INET6
+ else if (pktattr->pattr_af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return ((u_int8_t)0); /* version mismatch! */
+ ds_field = (flowlabel >> 20) & 0xff;
+ }
+#endif
+ return (ds_field);
+}
+
+void
+write_dsfield(m, pktattr, dsfield)
+ struct mbuf *m;
+ struct altq_pktattr *pktattr;
+ u_int8_t dsfield;
+{
+ struct mbuf *m0;
+
+ if (pktattr == NULL ||
+ (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
+ return;
+
+ /* verify that pattr_hdr is within the mbuf data */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if ((pktattr->pattr_hdr >= m0->m_data) &&
+ (pktattr->pattr_hdr < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+ /* ick, pattr_hdr is stale */
+ pktattr->pattr_af = AF_UNSPEC;
+#ifdef ALTQ_DEBUG
+ printf("write_dsfield: can't locate header!\n");
+#endif
+ return;
+ }
+
+ if (pktattr->pattr_af == AF_INET) {
+ struct ip *ip = (struct ip *)pktattr->pattr_hdr;
+ u_int8_t old;
+ int32_t sum;
+
+ if (ip->ip_v != 4)
+ return; /* version mismatch! */
+ old = ip->ip_tos;
+ dsfield |= old & 3; /* leave CU bits */
+ if (old == dsfield)
+ return;
+ ip->ip_tos = dsfield;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += 0xff00 + (~old & 0xff) + dsfield;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+
+ ip->ip_sum = htons(~sum & 0xffff);
+ }
+#ifdef INET6
+ else if (pktattr->pattr_af == AF_INET6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return; /* version mismatch! */
+ flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ }
+#endif
+ return;
+}
+
+
+/*
+ * high resolution clock support taking advantage of a machine dependent
+ * high resolution time counter (e.g., timestamp counter of intel pentium).
+ * we assume
+ * - 64-bit-long monotonically-increasing counter
+ * - frequency range is 100M-4GHz (CPU speed)
+ */
+/* if pcc is not available or disabled, emulate 256MHz using microtime() */
+#define MACHCLK_SHIFT 8
+
+int machclk_usepcc;
+u_int32_t machclk_freq = 0;
+u_int32_t machclk_per_tick = 0;
+
+#ifdef __alpha__
+#ifdef __FreeBSD__
+extern u_int32_t cycles_per_sec; /* alpha cpu clock frequency */
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+extern u_int64_t cycles_per_usec; /* alpha cpu clock frequency */
+#endif
+#endif /* __alpha__ */
+#if defined(__i386__) && defined(__NetBSD__)
+extern u_int64_t cpu_tsc_freq;
+#endif /* __alpha__ */
+
+void
+init_machclk(void)
+{
+ machclk_usepcc = 1;
+
+#if (!defined(__i386__) && !defined(__alpha__)) || defined(ALTQ_NOPCC)
+ machclk_usepcc = 0;
+#endif
+#if defined(__FreeBSD__) && defined(SMP)
+ machclk_usepcc = 0;
+#endif
+#if defined(__NetBSD__) && defined(MULTIPROCESSOR)
+ machclk_usepcc = 0;
+#endif
+#ifdef __i386__
+ /* check if TSC is available */
+ if (machclk_usepcc == 1 && (cpu_feature & CPUID_TSC) == 0)
+ machclk_usepcc = 0;
+#endif
+
+ if (machclk_usepcc == 0) {
+ /* emulate 256MHz using microtime() */
+ machclk_freq = 1000000 << MACHCLK_SHIFT;
+ machclk_per_tick = machclk_freq / hz;
+#ifdef ALTQ_DEBUG
+ printf("altq: emulate %uHz cpu clock\n", machclk_freq);
+#endif
+ return;
+ }
+
+ /*
+ * if the clock frequency (of Pentium TSC or Alpha PCC) is
+ * accessible, just use it.
+ */
+#ifdef __i386__
+#ifdef __FreeBSD__
+#if (__FreeBSD_version > 300000)
+ machclk_freq = tsc_freq;
+#else
+ machclk_freq = i586_ctr_freq;
+#endif
+#elif defined(__NetBSD__)
+ machclk_freq = (u_int32_t)cpu_tsc_freq;
+#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
+ machclk_freq = pentium_mhz * 1000000;
+#endif
+#elif defined(__alpha__)
+#ifdef __FreeBSD__
+ machclk_freq = cycles_per_sec;
+#elif defined(__NetBSD__) || defined(__OpenBSD__)
+ machclk_freq = (u_int32_t)(cycles_per_usec * 1000000);
+#endif
+#endif /* __alpha__ */
+
+ /*
+ * if we don't know the clock frequency, measure it.
+ */
+ if (machclk_freq == 0) {
+ static int wait;
+ struct timeval tv_start, tv_end;
+ u_int64_t start, end, diff;
+ int timo;
+
+ microtime(&tv_start);
+ start = read_machclk();
+ timo = hz; /* 1 sec */
+ (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
+ microtime(&tv_end);
+ end = read_machclk();
+ diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
+ + tv_end.tv_usec - tv_start.tv_usec;
+ if (diff != 0)
+ machclk_freq = (u_int)((end - start) * 1000000 / diff);
+ }
+
+ machclk_per_tick = machclk_freq / hz;
+
+#ifdef ALTQ_DEBUG
+ printf("altq: CPU clock: %uHz\n", machclk_freq);
+#endif
+}
+
+#if defined(__OpenBSD__) && defined(__i386__)
+static __inline u_int64_t
+rdtsc(void)
+{
+ u_int64_t rv;
+ __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
+ return (rv);
+}
+#endif /* __OpenBSD__ && __i386__ */
+
+u_int64_t
+read_machclk(void)
+{
+ u_int64_t val;
+
+ if (machclk_usepcc) {
+#if defined(__i386__)
+ val = rdtsc();
+#elif defined(__alpha__)
+ static u_int32_t last_pcc, upper;
+ u_int32_t pcc;
+
+ /*
+ * for alpha, make a 64bit counter value out of the 32bit
+ * alpha processor cycle counter.
+ * read_machclk must be called within a half of its
+ * wrap-around cycle (about 5 sec for 400MHz cpu) to properly
+ * detect a counter wrap-around.
+ * tbr_timeout calls read_machclk once a second.
+ */
+ pcc = (u_int32_t)alpha_rpcc();
+ if (pcc <= last_pcc)
+ upper++;
+ last_pcc = pcc;
+ val = ((u_int64_t)upper << 32) + pcc;
+#else
+ panic("read_machclk");
+#endif
+ } else {
+ struct timeval tv;
+
+ microtime(&tv);
+ val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
+ + tv.tv_usec) << MACHCLK_SHIFT);
+ }
+ return (val);
+}
+
+#ifdef ALTQ3_CLFIER_COMPAT
+
+#ifndef IPPROTO_ESP
+#define IPPROTO_ESP 50 /* encapsulating security payload */
+#endif
+#ifndef IPPROTO_AH
+#define IPPROTO_AH 51 /* authentication header */
+#endif
+
+/*
+ * extract flow information from a given packet.
+ * filt_mask shows flowinfo fields required.
+ * we assume the ip header is in one mbuf, and addresses and ports are
+ * in network byte order.
+ */
+int
+altq_extractflow(m, af, flow, filt_bmask)
+ struct mbuf *m;
+ int af;
+ struct flowinfo *flow;
+ u_int32_t filt_bmask;
+{
+
+ switch (af) {
+ case PF_INET: {
+ struct flowinfo_in *fin;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+
+ if (ip->ip_v != 4)
+ break;
+
+ fin = (struct flowinfo_in *)flow;
+ fin->fi_len = sizeof(struct flowinfo_in);
+ fin->fi_family = AF_INET;
+
+ fin->fi_proto = ip->ip_p;
+ fin->fi_tos = ip->ip_tos;
+
+ fin->fi_src.s_addr = ip->ip_src.s_addr;
+ fin->fi_dst.s_addr = ip->ip_dst.s_addr;
+
+ if (filt_bmask & FIMB4_PORTS)
+ /* if port info is required, extract port numbers */
+ extract_ports4(m, ip, fin);
+ else {
+ fin->fi_sport = 0;
+ fin->fi_dport = 0;
+ fin->fi_gpi = 0;
+ }
+ return (1);
+ }
+
+#ifdef INET6
+ case PF_INET6: {
+ struct flowinfo_in6 *fin6;
+ struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ /* should we check the ip version? */
+
+ fin6 = (struct flowinfo_in6 *)flow;
+ fin6->fi6_len = sizeof(struct flowinfo_in6);
+ fin6->fi6_family = AF_INET6;
+
+ fin6->fi6_proto = ip6->ip6_nxt;
+ fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+
+ fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
+ fin6->fi6_src = ip6->ip6_src;
+ fin6->fi6_dst = ip6->ip6_dst;
+
+ if ((filt_bmask & FIMB6_PORTS) ||
+ ((filt_bmask & FIMB6_PROTO)
+ && ip6->ip6_nxt > IPPROTO_IPV6))
+ /*
+ * if port info is required, or proto is required
+ * but there are option headers, extract port
+ * and protocol numbers.
+ */
+ extract_ports6(m, ip6, fin6);
+ else {
+ fin6->fi6_sport = 0;
+ fin6->fi6_dport = 0;
+ fin6->fi6_gpi = 0;
+ }
+ return (1);
+ }
+#endif /* INET6 */
+
+ default:
+ break;
+ }
+
+ /* failed */
+ flow->fi_len = sizeof(struct flowinfo);
+ flow->fi_family = AF_UNSPEC;
+ return (0);
+}
+
+/*
+ * helper routine to extract port numbers
+ */
+/* structure for ipsec and ipv6 option header template */
+struct _opt6 {
+ u_int8_t opt6_nxt; /* next header */
+ u_int8_t opt6_hlen; /* header extension length */
+ u_int16_t _pad;
+ u_int32_t ah_spi; /* security parameter index
+ for authentication header */
+};
+
+/*
+ * extract port numbers from a ipv4 packet.
+ */
+static int
+extract_ports4(m, ip, fin)
+ struct mbuf *m;
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct mbuf *m0;
+ u_short ip_off;
+ u_int8_t proto;
+ int off;
+
+ fin->fi_sport = 0;
+ fin->fi_dport = 0;
+ fin->fi_gpi = 0;
+
+ ip_off = ntohs(ip->ip_off);
+ /* if it is a fragment, try cached fragment info */
+ if (ip_off & IP_OFFMASK) {
+ ip4f_lookup(ip, fin);
+ return (1);
+ }
+
+ /* locate the mbuf containing the protocol header */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)ip >= m0->m_data) &&
+ ((caddr_t)ip < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("extract_ports4: can't locate header! ip=%p\n", ip);
+#endif
+ return (0);
+ }
+ off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
+ proto = ip->ip_p;
+
+#ifdef ALTQ_IPSEC
+ again:
+#endif
+ while (off >= m0->m_len) {
+ off -= m0->m_len;
+ m0 = m0->m_next;
+ if (m0 == NULL)
+ return (0); /* bogus ip_hl! */
+ }
+ if (m0->m_len < off + 4)
+ return (0);
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: {
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+ fin->fi_sport = udp->uh_sport;
+ fin->fi_dport = udp->uh_dport;
+ fin->fi_proto = proto;
+ }
+ break;
+
+#ifdef ALTQ_IPSEC
+ case IPPROTO_ESP:
+ if (fin->fi_gpi == 0){
+ u_int32_t *gpi;
+
+ gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+ fin->fi_gpi = *gpi;
+ }
+ fin->fi_proto = proto;
+ break;
+
+ case IPPROTO_AH: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ proto = opt6->opt6_nxt;
+ off += 8 + (opt6->opt6_hlen * 4);
+ if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
+ fin->fi_gpi = opt6->ah_spi;
+ }
+ /* goto the next header */
+ goto again;
+#endif /* ALTQ_IPSEC */
+
+ default:
+ fin->fi_proto = proto;
+ return (0);
+ }
+
+ /* if this is a first fragment, cache it. */
+ if (ip_off & IP_MF)
+ ip4f_cache(ip, fin);
+
+ return (1);
+}
+
+#ifdef INET6
+static int
+extract_ports6(m, ip6, fin6)
+ struct mbuf *m;
+ struct ip6_hdr *ip6;
+ struct flowinfo_in6 *fin6;
+{
+ struct mbuf *m0;
+ int off;
+ u_int8_t proto;
+
+ fin6->fi6_gpi = 0;
+ fin6->fi6_sport = 0;
+ fin6->fi6_dport = 0;
+
+ /* locate the mbuf containing the protocol header */
+ for (m0 = m; m0 != NULL; m0 = m0->m_next)
+ if (((caddr_t)ip6 >= m0->m_data) &&
+ ((caddr_t)ip6 < m0->m_data + m0->m_len))
+ break;
+ if (m0 == NULL) {
+#ifdef ALTQ_DEBUG
+ printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
+#endif
+ return (0);
+ }
+ off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
+
+ proto = ip6->ip6_nxt;
+ do {
+ while (off >= m0->m_len) {
+ off -= m0->m_len;
+ m0 = m0->m_next;
+ if (m0 == NULL)
+ return (0);
+ }
+ if (m0->m_len < off + 4)
+ return (0);
+
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: {
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
+ fin6->fi6_sport = udp->uh_sport;
+ fin6->fi6_dport = udp->uh_dport;
+ fin6->fi6_proto = proto;
+ }
+ return (1);
+
+ case IPPROTO_ESP:
+ if (fin6->fi6_gpi == 0) {
+ u_int32_t *gpi;
+
+ gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
+ fin6->fi6_gpi = *gpi;
+ }
+ fin6->fi6_proto = proto;
+ return (1);
+
+ case IPPROTO_AH: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
+ fin6->fi6_gpi = opt6->ah_spi;
+ proto = opt6->opt6_nxt;
+ off += 8 + (opt6->opt6_hlen * 4);
+ /* goto the next header */
+ break;
+ }
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS: {
+ /* get next header and header length */
+ struct _opt6 *opt6;
+
+ opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
+ proto = opt6->opt6_nxt;
+ off += (opt6->opt6_hlen + 1) * 8;
+ /* goto the next header */
+ break;
+ }
+
+ case IPPROTO_FRAGMENT:
+ /* ipv6 fragmentations are not supported yet */
+ default:
+ fin6->fi6_proto = proto;
+ return (0);
+ }
+ } while (1);
+ /*NOTREACHED*/
+}
+#endif /* INET6 */
+
+/*
+ * altq common classifier
+ */
+int
+acc_add_filter(classifier, filter, class, phandle)
+ struct acc_classifier *classifier;
+ struct flow_filter *filter;
+ void *class;
+ u_long *phandle;
+{
+ struct acc_filter *afp, *prev, *tmp;
+ int i, s;
+
+#ifdef INET6
+ if (filter->ff_flow.fi_family != AF_INET &&
+ filter->ff_flow.fi_family != AF_INET6)
+ return (EINVAL);
+#else
+ if (filter->ff_flow.fi_family != AF_INET)
+ return (EINVAL);
+#endif
+
+ MALLOC(afp, struct acc_filter *, sizeof(struct acc_filter),
+ M_DEVBUF, M_WAITOK);
+ if (afp == NULL)
+ return (ENOMEM);
+ bzero(afp, sizeof(struct acc_filter));
+
+ afp->f_filter = *filter;
+ afp->f_class = class;
+
+ i = ACC_WILDCARD_INDEX;
+ if (filter->ff_flow.fi_family == AF_INET) {
+ struct flow_filter *filter4 = &afp->f_filter;
+
+ /*
+ * if address is 0, it's a wildcard. if address mask
+ * isn't set, use full mask.
+ */
+ if (filter4->ff_flow.fi_dst.s_addr == 0)
+ filter4->ff_mask.mask_dst.s_addr = 0;
+ else if (filter4->ff_mask.mask_dst.s_addr == 0)
+ filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
+ if (filter4->ff_flow.fi_src.s_addr == 0)
+ filter4->ff_mask.mask_src.s_addr = 0;
+ else if (filter4->ff_mask.mask_src.s_addr == 0)
+ filter4->ff_mask.mask_src.s_addr = 0xffffffff;
+
+ /* clear extra bits in addresses */
+ filter4->ff_flow.fi_dst.s_addr &=
+ filter4->ff_mask.mask_dst.s_addr;
+ filter4->ff_flow.fi_src.s_addr &=
+ filter4->ff_mask.mask_src.s_addr;
+
+ /*
+ * if dst address is a wildcard, use hash-entry
+ * ACC_WILDCARD_INDEX.
+ */
+ if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
+ i = ACC_WILDCARD_INDEX;
+ else
+ i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
+ }
+#ifdef INET6
+ else if (filter->ff_flow.fi_family == AF_INET6) {
+ struct flow_filter6 *filter6 =
+ (struct flow_filter6 *)&afp->f_filter;
+#ifndef IN6MASK0 /* taken from kame ipv6 */
+#define IN6MASK0 {{{ 0, 0, 0, 0 }}}
+#define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
+ const struct in6_addr in6mask0 = IN6MASK0;
+ const struct in6_addr in6mask128 = IN6MASK128;
+#endif
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
+ filter6->ff_mask6.mask6_dst = in6mask0;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
+ filter6->ff_mask6.mask6_dst = in6mask128;
+ if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
+ filter6->ff_mask6.mask6_src = in6mask0;
+ else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
+ filter6->ff_mask6.mask6_src = in6mask128;
+
+ /* clear extra bits in addresses */
+ for (i = 0; i < 16; i++)
+ filter6->ff_flow6.fi6_dst.s6_addr[i] &=
+ filter6->ff_mask6.mask6_dst.s6_addr[i];
+ for (i = 0; i < 16; i++)
+ filter6->ff_flow6.fi6_src.s6_addr[i] &=
+ filter6->ff_mask6.mask6_src.s6_addr[i];
+
+ if (filter6->ff_flow6.fi6_flowlabel == 0)
+ i = ACC_WILDCARD_INDEX;
+ else
+ i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
+ }
+#endif /* INET6 */
+
+ afp->f_handle = get_filt_handle(classifier, i);
+
+ /* update filter bitmask */
+ afp->f_fbmask = filt2fibmask(filter);
+ classifier->acc_fbmask |= afp->f_fbmask;
+
+ /*
+ * add this filter to the filter list.
+ * filters are ordered from the highest rule number.
+ */
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ prev = NULL;
+ LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
+ if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
+ prev = tmp;
+ else
+ break;
+ }
+ if (prev == NULL)
+ LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
+ else
+ LIST_INSERT_AFTER(prev, afp, f_chain);
+ splx(s);
+
+ *phandle = afp->f_handle;
+ return (0);
+}
+
+int
+acc_delete_filter(classifier, handle)
+ struct acc_classifier *classifier;
+ u_long handle;
+{
+ struct acc_filter *afp;
+ int s;
+
+ if ((afp = filth_to_filtp(classifier, handle)) == NULL)
+ return (EINVAL);
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ LIST_REMOVE(afp, f_chain);
+ splx(s);
+
+ FREE(afp, M_DEVBUF);
+
+ /* todo: update filt_bmask */
+
+ return (0);
+}
+
+/*
+ * delete filters referencing to the specified class.
+ * if the all flag is not 0, delete all the filters.
+ */
+int
+acc_discard_filters(classifier, class, all)
+ struct acc_classifier *classifier;
+ void *class;
+ int all;
+{
+ struct acc_filter *afp;
+ int i, s;
+
+#ifdef __NetBSD__
+ s = splnet();
+#else
+ s = splimp();
+#endif
+ for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
+ do {
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (all || afp->f_class == class) {
+ LIST_REMOVE(afp, f_chain);
+ FREE(afp, M_DEVBUF);
+ /* start again from the head */
+ break;
+ }
+ } while (afp != NULL);
+ }
+ splx(s);
+
+ if (all)
+ classifier->acc_fbmask = 0;
+
+ return (0);
+}
+
+void *
+acc_classify(clfier, m, af)
+ void *clfier;
+ struct mbuf *m;
+ int af;
+{
+ struct acc_classifier *classifier;
+ struct flowinfo flow;
+ struct acc_filter *afp;
+ int i;
+
+ classifier = (struct acc_classifier *)clfier;
+ altq_extractflow(m, af, &flow, classifier->acc_fbmask);
+
+ if (flow.fi_family == AF_INET) {
+ struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
+
+ if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
+ /* only tos is used */
+ LIST_FOREACH(afp,
+ &classifier->acc_filters[ACC_WILDCARD_INDEX],
+ f_chain)
+ if (apply_tosfilter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+ } else if ((classifier->acc_fbmask &
+ (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
+ == 0) {
+ /* only proto and ports are used */
+ LIST_FOREACH(afp,
+ &classifier->acc_filters[ACC_WILDCARD_INDEX],
+ f_chain)
+ if (apply_ppfilter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+ } else {
+ /* get the filter hash entry from its dest address */
+ i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
+ do {
+ /*
+ * go through this loop twice. first for dst
+ * hash, second for wildcards.
+ */
+ LIST_FOREACH(afp, &classifier->acc_filters[i],
+ f_chain)
+ if (apply_filter4(afp->f_fbmask,
+ &afp->f_filter, fp))
+ /* filter matched */
+ return (afp->f_class);
+
+ /*
+ * check again for filters with a dst addr
+ * wildcard.
+ * (daddr == 0 || dmask != 0xffffffff).
+ */
+ if (i != ACC_WILDCARD_INDEX)
+ i = ACC_WILDCARD_INDEX;
+ else
+ break;
+ } while (1);
+ }
+ }
+#ifdef INET6
+ else if (flow.fi_family == AF_INET6) {
+ struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
+
+ /* get the filter hash entry from its flow ID */
+ if (fp6->fi6_flowlabel != 0)
+ i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
+ else
+ /* flowlable can be zero */
+ i = ACC_WILDCARD_INDEX;
+
+ /* go through this loop twice. first for flow hash, second
+ for wildcards. */
+ do {
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (apply_filter6(afp->f_fbmask,
+ (struct flow_filter6 *)&afp->f_filter,
+ fp6))
+ /* filter matched */
+ return (afp->f_class);
+
+ /*
+ * check again for filters with a wildcard.
+ */
+ if (i != ACC_WILDCARD_INDEX)
+ i = ACC_WILDCARD_INDEX;
+ else
+ break;
+ } while (1);
+ }
+#endif /* INET6 */
+
+ /* no filter matched */
+ return (NULL);
+}
+
+static int
+apply_filter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+ return (0);
+ if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+ return (0);
+ if ((fbmask & FIMB4_DADDR) &&
+ filt->ff_flow.fi_dst.s_addr !=
+ (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
+ return (0);
+ if ((fbmask & FIMB4_SADDR) &&
+ filt->ff_flow.fi_src.s_addr !=
+ (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
+ return (0);
+ if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+ return (0);
+ if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+ (pkt->fi_tos & filt->ff_mask.mask_tos))
+ return (0);
+ if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
+ return (0);
+ /* match */
+ return (1);
+}
+
+/*
+ * filter matching function optimized for a common case that checks
+ * only protocol and port numbers
+ */
+static int
+apply_ppfilter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
+ return (0);
+ if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
+ return (0);
+ if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
+ return (0);
+ /* match */
+ return (1);
+}
+
+/*
+ * filter matching function only for tos field.
+ */
+static int
+apply_tosfilter4(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter *filt;
+ struct flowinfo_in *pkt;
+{
+ if (filt->ff_flow.fi_family != AF_INET)
+ return (0);
+ if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
+ (pkt->fi_tos & filt->ff_mask.mask_tos))
+ return (0);
+ /* match */
+ return (1);
+}
+
+#ifdef INET6
+static int
+apply_filter6(fbmask, filt, pkt)
+ u_int32_t fbmask;
+ struct flow_filter6 *filt;
+ struct flowinfo_in6 *pkt;
+{
+ int i;
+
+ if (filt->ff_flow6.fi6_family != AF_INET6)
+ return (0);
+ if ((fbmask & FIMB6_FLABEL) &&
+ filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
+ return (0);
+ if ((fbmask & FIMB6_PROTO) &&
+ filt->ff_flow6.fi6_proto != pkt->fi6_proto)
+ return (0);
+ if ((fbmask & FIMB6_SPORT) &&
+ filt->ff_flow6.fi6_sport != pkt->fi6_sport)
+ return (0);
+ if ((fbmask & FIMB6_DPORT) &&
+ filt->ff_flow6.fi6_dport != pkt->fi6_dport)
+ return (0);
+ if (fbmask & FIMB6_SADDR) {
+ for (i = 0; i < 4; i++)
+ if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
+ (pkt->fi6_src.s6_addr32[i] &
+ filt->ff_mask6.mask6_src.s6_addr32[i]))
+ return (0);
+ }
+ if (fbmask & FIMB6_DADDR) {
+ for (i = 0; i < 4; i++)
+ if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
+ (pkt->fi6_dst.s6_addr32[i] &
+ filt->ff_mask6.mask6_dst.s6_addr32[i]))
+ return (0);
+ }
+ if ((fbmask & FIMB6_TCLASS) &&
+ filt->ff_flow6.fi6_tclass !=
+ (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
+ return (0);
+ if ((fbmask & FIMB6_GPI) &&
+ filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
+ return (0);
+ /* match */
+ return (1);
+}
+#endif /* INET6 */
+
+/*
+ * filter handle:
+ * bit 20-28: index to the filter hash table
+ * bit 0-19: unique id in the hash bucket.
+ */
+static u_long
+get_filt_handle(classifier, i)
+ struct acc_classifier *classifier;
+ int i;
+{
+ static u_long handle_number = 1;
+ u_long handle;
+ struct acc_filter *afp;
+
+ while (1) {
+ handle = handle_number++ & 0x000fffff;
+
+ if (LIST_EMPTY(&classifier->acc_filters[i]))
+ break;
+
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if ((afp->f_handle & 0x000fffff) == handle)
+ break;
+ if (afp == NULL)
+ break;
+ /* this handle is already used, try again */
+ }
+
+ return ((i << 20) | handle);
+}
+
+/* convert filter handle to filter pointer */
+static struct acc_filter *
+filth_to_filtp(classifier, handle)
+ struct acc_classifier *classifier;
+ u_long handle;
+{
+ struct acc_filter *afp;
+ int i;
+
+ i = ACC_GET_HINDEX(handle);
+
+ LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
+ if (afp->f_handle == handle)
+ return (afp);
+
+ return (NULL);
+}
+
+/* create flowinfo bitmask */
+static u_int32_t
+filt2fibmask(filt)
+ struct flow_filter *filt;
+{
+ u_int32_t mask = 0;
+#ifdef INET6
+ struct flow_filter6 *filt6;
+#endif
+
+ switch (filt->ff_flow.fi_family) {
+ case AF_INET:
+ if (filt->ff_flow.fi_proto != 0)
+ mask |= FIMB4_PROTO;
+ if (filt->ff_flow.fi_tos != 0)
+ mask |= FIMB4_TOS;
+ if (filt->ff_flow.fi_dst.s_addr != 0)
+ mask |= FIMB4_DADDR;
+ if (filt->ff_flow.fi_src.s_addr != 0)
+ mask |= FIMB4_SADDR;
+ if (filt->ff_flow.fi_sport != 0)
+ mask |= FIMB4_SPORT;
+ if (filt->ff_flow.fi_dport != 0)
+ mask |= FIMB4_DPORT;
+ if (filt->ff_flow.fi_gpi != 0)
+ mask |= FIMB4_GPI;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ filt6 = (struct flow_filter6 *)filt;
+
+ if (filt6->ff_flow6.fi6_proto != 0)
+ mask |= FIMB6_PROTO;
+ if (filt6->ff_flow6.fi6_tclass != 0)
+ mask |= FIMB6_TCLASS;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
+ mask |= FIMB6_DADDR;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
+ mask |= FIMB6_SADDR;
+ if (filt6->ff_flow6.fi6_sport != 0)
+ mask |= FIMB6_SPORT;
+ if (filt6->ff_flow6.fi6_dport != 0)
+ mask |= FIMB6_DPORT;
+ if (filt6->ff_flow6.fi6_gpi != 0)
+ mask |= FIMB6_GPI;
+ if (filt6->ff_flow6.fi6_flowlabel != 0)
+ mask |= FIMB6_FLABEL;
+ break;
+#endif /* INET6 */
+ }
+ return (mask);
+}
+
+
+/*
+ * helper functions to handle IPv4 fragments.
+ * currently only in-sequence fragments are handled.
+ * - fragment info is cached in a LRU list.
+ * - when a first fragment is found, cache its flow info.
+ * - when a non-first fragment is found, lookup the cache.
+ */
+
+struct ip4_frag {
+ TAILQ_ENTRY(ip4_frag) ip4f_chain;
+ char ip4f_valid;
+ u_short ip4f_id;
+ struct flowinfo_in ip4f_info;
+};
+
+static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
+
+#define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
+
+
+static void
+ip4f_cache(ip, fin)
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct ip4_frag *fp;
+
+ if (TAILQ_EMPTY(&ip4f_list)) {
+ /* first time call, allocate fragment cache entries. */
+ if (ip4f_init() < 0)
+ /* allocation failed! */
+ return;
+ }
+
+ fp = ip4f_alloc();
+ fp->ip4f_id = ip->ip_id;
+ fp->ip4f_info.fi_proto = ip->ip_p;
+ fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
+ fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
+
+ /* save port numbers */
+ fp->ip4f_info.fi_sport = fin->fi_sport;
+ fp->ip4f_info.fi_dport = fin->fi_dport;
+ fp->ip4f_info.fi_gpi = fin->fi_gpi;
+}
+
+static int
+ip4f_lookup(ip, fin)
+ struct ip *ip;
+ struct flowinfo_in *fin;
+{
+ struct ip4_frag *fp;
+
+ for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
+ fp = TAILQ_NEXT(fp, ip4f_chain))
+ if (ip->ip_id == fp->ip4f_id &&
+ ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
+ ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
+ ip->ip_p == fp->ip4f_info.fi_proto) {
+
+ /* found the matching entry */
+ fin->fi_sport = fp->ip4f_info.fi_sport;
+ fin->fi_dport = fp->ip4f_info.fi_dport;
+ fin->fi_gpi = fp->ip4f_info.fi_gpi;
+
+ if ((ntohs(ip->ip_off) & IP_MF) == 0)
+ /* this is the last fragment,
+ release the entry. */
+ ip4f_free(fp);
+
+ return (1);
+ }
+
+ /* no matching entry found */
+ return (0);
+}
+
+static int
+ip4f_init(void)
+{
+ struct ip4_frag *fp;
+ int i;
+
+ TAILQ_INIT(&ip4f_list);
+ for (i=0; i<IP4F_TABSIZE; i++) {
+ MALLOC(fp, struct ip4_frag *, sizeof(struct ip4_frag),
+ M_DEVBUF, M_NOWAIT);
+ if (fp == NULL) {
+ printf("ip4f_init: can't alloc %dth entry!\n", i);
+ if (i == 0)
+ return (-1);
+ return (0);
+ }
+ fp->ip4f_valid = 0;
+ TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+ }
+ return (0);
+}
+
+static struct ip4_frag *
+ip4f_alloc(void)
+{
+ struct ip4_frag *fp;
+
+ /* reclaim an entry at the tail, put it at the head */
+ fp = TAILQ_LAST(&ip4f_list, ip4f_list);
+ TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+ fp->ip4f_valid = 1;
+ TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
+ return (fp);
+}
+
+static void
+ip4f_free(fp)
+ struct ip4_frag *fp;
+{
+ TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
+ fp->ip4f_valid = 0;
+ TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
+}
+
+#endif /* ALTQ3_CLFIER_COMPAT */
diff --git a/sys/contrib/altq/altq/altq_var.h b/sys/contrib/altq/altq/altq_var.h
new file mode 100644
index 000000000000..dff9e5c5db11
--- /dev/null
+++ b/sys/contrib/altq/altq/altq_var.h
@@ -0,0 +1,264 @@
+/* $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $ */
+
+/*
+ * Copyright (C) 1998-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_ALTQ_VAR_H_
+#define _ALTQ_ALTQ_VAR_H_
+
+#ifdef _KERNEL
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/queue.h>
+
+#ifdef ALTQ3_CLFIER_COMPAT
+/*
+ * filter structure for altq common classifier
+ */
+struct acc_filter {
+ LIST_ENTRY(acc_filter) f_chain;
+ void *f_class; /* pointer to the class */
+ u_long f_handle; /* filter id */
+ u_int32_t f_fbmask; /* filter bitmask */
+ struct flow_filter f_filter; /* filter value */
+};
+
+/*
+ * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix
+ * the handle assignment.
+ */
+#define ACC_FILTER_TABLESIZE (256+1)
+#define ACC_FILTER_MASK (ACC_FILTER_TABLESIZE - 2)
+#define ACC_WILDCARD_INDEX (ACC_FILTER_TABLESIZE - 1)
+#ifdef __GNUC__
+#define ACC_GET_HASH_INDEX(addr) \
+ ({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;})
+#else
+#define ACC_GET_HASH_INDEX(addr) \
+ (((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \
+ & ACC_FILTER_MASK)
+#endif
+#define ACC_GET_HINDEX(handle) ((handle) >> 20)
+
+#if (__FreeBSD_version > 500000)
+#define ACC_LOCK_INIT(ac) mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF)
+#define ACC_LOCK_DESTROY(ac) mtx_destroy(&(ac)->acc_mtx)
+#define ACC_LOCK(ac) mtx_lock(&(ac)->acc_mtx)
+#define ACC_UNLOCK(ac) mtx_unlock(&(ac)->acc_mtx)
+#else
+#define ACC_LOCK_INIT(ac)
+#define ACC_LOCK_DESTROY(ac)
+#define ACC_LOCK(ac)
+#define ACC_UNLOCK(ac)
+#endif
+
+struct acc_classifier {
+ u_int32_t acc_fbmask;
+ LIST_HEAD(filt, acc_filter) acc_filters[ACC_FILTER_TABLESIZE];
+
+#if (__FreeBSD_version > 500000)
+ struct mtx acc_mtx;
+#endif
+};
+
+/*
+ * flowinfo mask bits used by classifier
+ */
+/* for ipv4 */
+#define FIMB4_PROTO 0x0001
+#define FIMB4_TOS 0x0002
+#define FIMB4_DADDR 0x0004
+#define FIMB4_SADDR 0x0008
+#define FIMB4_DPORT 0x0010
+#define FIMB4_SPORT 0x0020
+#define FIMB4_GPI 0x0040
+#define FIMB4_ALL 0x007f
+/* for ipv6 */
+#define FIMB6_PROTO 0x0100
+#define FIMB6_TCLASS 0x0200
+#define FIMB6_DADDR 0x0400
+#define FIMB6_SADDR 0x0800
+#define FIMB6_DPORT 0x1000
+#define FIMB6_SPORT 0x2000
+#define FIMB6_GPI 0x4000
+#define FIMB6_FLABEL 0x8000
+#define FIMB6_ALL 0xff00
+
+#define FIMB_ALL (FIMB4_ALL|FIMB6_ALL)
+
+#define FIMB4_PORTS (FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI)
+#define FIMB6_PORTS (FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI)
+#endif /* ALTQ3_CLFIER_COMPAT */
+
+/*
+ * machine dependent clock
+ * a 64bit high resolution time counter.
+ */
+extern int machclk_usepcc;
+extern u_int32_t machclk_freq;
+extern u_int32_t machclk_per_tick;
+extern void init_machclk(void);
+extern u_int64_t read_machclk(void);
+
+/*
+ * debug support
+ */
+#ifdef ALTQ_DEBUG
+#ifdef __STDC__
+#define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e))
+#else /* PCC */
+#define ASSERT(e) ((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e"))
+#endif
+#else
+#define ASSERT(e) ((void)0)
+#endif
+
+/*
+ * misc stuff for compatibility
+ */
+/* ioctl cmd type */
+#if defined(__FreeBSD__) && (__FreeBSD__ < 3)
+typedef int ioctlcmd_t;
+#else
+typedef u_long ioctlcmd_t;
+#endif
+
+/*
+ * queue macros:
+ * the interface of TAILQ_LAST macro changed after the introduction
+ * of softupdate. redefine it here to make it work with pre-2.2.7.
+ */
+#undef TAILQ_LAST
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+
+#ifndef TAILQ_EMPTY
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#endif
+#ifndef TAILQ_FOREACH
+#define TAILQ_FOREACH(var, head, field) \
+ for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
+#endif
+
+/* macro for timeout/untimeout */
+#if (__FreeBSD_version > 300000) || defined(__NetBSD__)
+/* use callout */
+#include <sys/callout.h>
+
+#if (__FreeBSD_version > 500000)
+#define CALLOUT_INIT(c) callout_init((c), 0)
+#else
+#define CALLOUT_INIT(c) callout_init((c))
+#endif
+#define CALLOUT_RESET(c,t,f,a) callout_reset((c),(t),(f),(a))
+#define CALLOUT_STOP(c) callout_stop((c))
+#ifndef CALLOUT_INITIALIZER
+#define CALLOUT_INITIALIZER { { { NULL } }, 0, NULL, NULL, 0 }
+#endif
+#elif defined(__OpenBSD__)
+#include <sys/timeout.h>
+/* callout structure as a wrapper of struct timeout */
+struct callout {
+ struct timeout c_to;
+};
+#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
+#define CALLOUT_RESET(c,t,f,a) do { if (!timeout_initialized(&(c)->c_to)) \
+ timeout_set(&(c)->c_to, (f), (a)); \
+ timeout_add(&(c)->c_to, (t)); } while (/*CONSTCOND*/ 0)
+#define CALLOUT_STOP(c) timeout_del(&(c)->c_to)
+#define CALLOUT_INITIALIZER { { { NULL }, NULL, NULL, 0, 0 } }
+#else
+/* use old-style timeout/untimeout */
+/* dummy callout structure */
+struct callout {
+ void *c_arg; /* function argument */
+ void (*c_func)(void *); /* functiuon to call */
+};
+#define CALLOUT_INIT(c) do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
+#define CALLOUT_RESET(c,t,f,a) do { (c)->c_arg = (a); \
+ (c)->c_func = (f); \
+ timeout((f),(a),(t)); } while (/*CONSTCOND*/ 0)
+#define CALLOUT_STOP(c) untimeout((c)->c_func,(c)->c_arg)
+#define CALLOUT_INITIALIZER { NULL, NULL }
+#endif
+#if !defined(__FreeBSD__)
+typedef void (timeout_t)(void *);
+#endif
+
+#define m_pktlen(m) ((m)->m_pkthdr.len)
+
+struct ifnet; struct mbuf;
+struct pf_altq;
+#ifdef ALTQ3_CLFIER_COMPAT
+struct flowinfo;
+#endif
+
+void *altq_lookup(char *, int);
+#ifdef ALTQ3_CLFIER_COMPAT
+int altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t);
+int acc_add_filter(struct acc_classifier *, struct flow_filter *,
+ void *, u_long *);
+int acc_delete_filter(struct acc_classifier *, u_long);
+int acc_discard_filters(struct acc_classifier *, void *, int);
+void *acc_classify(void *, struct mbuf *, int);
+#endif
+u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *);
+void write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t);
+void altq_assert(const char *, int, const char *);
+int tbr_set(struct ifaltq *, struct tb_profile *);
+int tbr_get(struct ifaltq *, struct tb_profile *);
+
+int altq_pfattach(struct pf_altq *);
+int altq_pfdetach(struct pf_altq *);
+int altq_add(struct pf_altq *);
+int altq_remove(struct pf_altq *);
+int altq_add_queue(struct pf_altq *);
+int altq_remove_queue(struct pf_altq *);
+int altq_getqstats(struct pf_altq *, void *, int *);
+
+int cbq_pfattach(struct pf_altq *);
+int cbq_add_altq(struct pf_altq *);
+int cbq_remove_altq(struct pf_altq *);
+int cbq_add_queue(struct pf_altq *);
+int cbq_remove_queue(struct pf_altq *);
+int cbq_getqstats(struct pf_altq *, void *, int *);
+
+int priq_pfattach(struct pf_altq *);
+int priq_add_altq(struct pf_altq *);
+int priq_remove_altq(struct pf_altq *);
+int priq_add_queue(struct pf_altq *);
+int priq_remove_queue(struct pf_altq *);
+int priq_getqstats(struct pf_altq *, void *, int *);
+
+int hfsc_pfattach(struct pf_altq *);
+int hfsc_add_altq(struct pf_altq *);
+int hfsc_remove_altq(struct pf_altq *);
+int hfsc_add_queue(struct pf_altq *);
+int hfsc_remove_queue(struct pf_altq *);
+int hfsc_getqstats(struct pf_altq *, void *, int *);
+
+#endif /* _KERNEL */
+#endif /* _ALTQ_ALTQ_VAR_H_ */
diff --git a/sys/contrib/altq/altq/altqconf.h b/sys/contrib/altq/altq/altqconf.h
new file mode 100644
index 000000000000..4d3921ca2bfa
--- /dev/null
+++ b/sys/contrib/altq/altq/altqconf.h
@@ -0,0 +1,29 @@
+/* $OpenBSD: altqconf.h,v 1.1 2001/06/27 05:28:36 kjc Exp $ */
+/* $NetBSD: altqconf.h,v 1.2 2001/05/30 11:57:16 mrg Exp $ */
+
+#if defined(_KERNEL_OPT) || defined(__OpenBSD__)
+
+#if defined(_KERNEL_OPT)
+#include "opt_altq_enabled.h"
+#endif
+
+#include <sys/conf.h>
+
+#ifdef ALTQ
+#define NALTQ 1
+#else
+#define NALTQ 0
+#endif
+
+cdev_decl(altq);
+
+#ifdef __OpenBSD__
+#define cdev_altq_init(c,n) { \
+ dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
+ (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
+ (dev_type_stop((*))) enodev, 0, (dev_type_select((*))) enodev, \
+ (dev_type_mmap((*))) enodev }
+#else
+#define cdev_altq_init(x,y) cdev__oci_init(x,y)
+#endif
+#endif /* defined(_KERNEL_OPT) || defined(__OpenBSD__) */
diff --git a/sys/contrib/altq/altq/if_altq.h b/sys/contrib/altq/altq/if_altq.h
new file mode 100644
index 000000000000..8abb6a6057c6
--- /dev/null
+++ b/sys/contrib/altq/altq/if_altq.h
@@ -0,0 +1,184 @@
+/* $KAME: if_altq.h,v 1.11 2003/07/10 12:07:50 kjc Exp $ */
+
+/*
+ * Copyright (C) 1997-2003
+ * Sony Computer Science Laboratories Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _ALTQ_IF_ALTQ_H_
+#define _ALTQ_IF_ALTQ_H_
+
+#if (defined(__FreeBSD__) && __FreeBSD_version >= 500000)
+#include <sys/lock.h> /* XXX */
+#include <sys/mutex.h> /* XXX */
+#include <sys/event.h> /* XXX */
+#endif
+
+#ifdef _KERNEL_OPT
+#include <altq/altqconf.h>
+#endif
+
+struct altq_pktattr; struct tb_regulator; struct top_cdnr;
+
+/*
+ * Structure defining a queue for a network interface.
+ */
+struct ifaltq {
+ /* fields compatible with struct ifqueue */
+ struct mbuf *ifq_head;
+ struct mbuf *ifq_tail;
+ int ifq_len;
+ int ifq_maxlen;
+ int ifq_drops;
+#if (defined(__FreeBSD__) && __FreeBSD_version >= 500000)
+ struct mtx ifq_mtx;
+#endif
+
+ /* alternate queueing related fields */
+ int altq_type; /* discipline type */
+ int altq_flags; /* flags (e.g. ready, in-use) */
+ void *altq_disc; /* for discipline-specific use */
+ struct ifnet *altq_ifp; /* back pointer to interface */
+
+ int (*altq_enqueue)(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *);
+ struct mbuf *(*altq_dequeue)(struct ifaltq *, int);
+ int (*altq_request)(struct ifaltq *, int, void *);
+
+ /* classifier fields */
+ void *altq_clfier; /* classifier-specific use */
+ void *(*altq_classify)(void *, struct mbuf *, int);
+
+ /* token bucket regulator */
+ struct tb_regulator *altq_tbr;
+
+ /* input traffic conditioner (doesn't belong to the output queue...) */
+ struct top_cdnr *altq_cdnr;
+};
+
+
+#ifdef _KERNEL
+
+/*
+ * packet attributes used by queueing disciplines.
+ * pattr_class is a discipline-dependent scheduling class that is
+ * set by a classifier.
+ * pattr_hdr and pattr_af may be used by a discipline to access
+ * the header within a mbuf. (e.g. ECN needs to update the CE bit)
+ * note that pattr_hdr could be stale after m_pullup, though link
+ * layer output routines usually don't use m_pullup. link-level
+ * compression also invalidates these fields. thus, pattr_hdr needs
+ * to be verified when a discipline touches the header.
+ */
+struct altq_pktattr {
+ void *pattr_class; /* sched class set by classifier */
+ int pattr_af; /* address family */
+ caddr_t pattr_hdr; /* saved header position in mbuf */
+};
+
+/*
+ * mbuf tag to carry a queue id (and hints for ECN).
+ */
+struct altq_tag {
+ u_int32_t qid; /* queue id */
+ /* hints for ecn */
+ int af; /* address family */
+ void *hdr; /* saved header position in mbuf */
+};
+
+/*
+ * a token-bucket regulator limits the rate that a network driver can
+ * dequeue packets from the output queue.
+ * modern cards are able to buffer a large amount of packets and dequeue
+ * too many packets at a time. this bursty dequeue behavior makes it
+ * impossible to schedule packets by queueing disciplines.
+ * a token-bucket is used to control the burst size in a device
+ * independent manner.
+ */
+struct tb_regulator {
+ int64_t tbr_rate; /* (scaled) token bucket rate */
+ int64_t tbr_depth; /* (scaled) token bucket depth */
+
+ int64_t tbr_token; /* (scaled) current token */
+ int64_t tbr_filluptime; /* (scaled) time to fill up bucket */
+ u_int64_t tbr_last; /* last time token was updated */
+
+ int tbr_lastop; /* last dequeue operation type
+ needed for poll-and-dequeue */
+};
+
+/* if_altqflags */
+#define ALTQF_READY 0x01 /* driver supports alternate queueing */
+#define ALTQF_ENABLED 0x02 /* altq is in use */
+#define ALTQF_CLASSIFY 0x04 /* classify packets */
+#define ALTQF_CNDTNING 0x08 /* altq traffic conditioning is enabled */
+#define ALTQF_DRIVER1 0x40 /* driver specific */
+
+/* if_altqflags set internally only: */
+#define ALTQF_CANTCHANGE (ALTQF_READY)
+
+/* altq_dequeue 2nd arg */
+#define ALTDQ_REMOVE 1 /* dequeue mbuf from the queue */
+#define ALTDQ_POLL 2 /* don't dequeue mbuf from the queue */
+
+/* altq request types (currently only purge is defined) */
+#define ALTRQ_PURGE 1 /* purge all packets */
+
+#define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY)
+#define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED)
+#define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY)
+#define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING)
+
+#define ALTQ_SET_CNDTNING(ifq) ((ifq)->altq_flags |= ALTQF_CNDTNING)
+#define ALTQ_CLEAR_CNDTNING(ifq) ((ifq)->altq_flags &= ~ALTQF_CNDTNING)
+#define ALTQ_IS_ATTACHED(ifq) ((ifq)->altq_disc != NULL)
+
+#define ALTQ_ENQUEUE(ifq, m, pa, err) \
+ (err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa))
+#define ALTQ_DEQUEUE(ifq, m) \
+ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE)
+#define ALTQ_POLL(ifq, m) \
+ (m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL)
+#define ALTQ_PURGE(ifq) \
+ (void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0)
+#define ALTQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
+#define TBR_IS_ENABLED(ifq) ((ifq)->altq_tbr != NULL)
+
+extern int altq_attach(struct ifaltq *, int, void *,
+ int (*)(struct ifaltq *, struct mbuf *,
+ struct altq_pktattr *),
+ struct mbuf *(*)(struct ifaltq *, int),
+ int (*)(struct ifaltq *, int, void *),
+ void *,
+ void *(*)(void *, struct mbuf *, int));
+extern int altq_detach(struct ifaltq *);
+extern int altq_enable(struct ifaltq *);
+extern int altq_disable(struct ifaltq *);
+extern struct mbuf *tbr_dequeue(struct ifaltq *, int);
+extern int (*altq_input)(struct mbuf *, int);
+#if 1 /* ALTQ3_CLFIER_COMPAT */
+void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
+#endif
+#endif /* _KERNEL */
+
+#endif /* _ALTQ_IF_ALTQ_H_ */